Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include <u.h>
2
#include <libc.h>
3
#include <draw.h>
4
#include <memdraw.h>
5
 
6
int drawdebug;
7
static int	tablesbuilt;
8
 
9
/* perfect approximation to NTSC = .299r+.587g+.114b when 0 ≤ r,g,b < 256 */
10
#define RGB2K(r,g,b)	((156763*(r)+307758*(g)+59769*(b))>>19)
11
 
12
/*
13
 * For 16-bit values, x / 255 == (t = x+1, (t+(t>>8)) >> 8).
14
 * We add another 127 to round to the nearest value rather
15
 * than truncate.
16
 *
17
 * CALCxy does x bytewise calculations on y input images (x=1,4; y=1,2).
18
 * CALC2x does two parallel 16-bit calculations on y input images (y=1,2).
19
 */
20
#define CALC11(a, v, tmp) \
21
	(tmp=(a)*(v)+128, (tmp+(tmp>>8))>>8)
22
 
23
#define CALC12(a1, v1, a2, v2, tmp) \
24
	(tmp=(a1)*(v1)+(a2)*(v2)+128, (tmp+(tmp>>8))>>8)
25
 
26
#define MASK 0xFF00FF
27
 
28
#define CALC21(a, vvuu, tmp) \
29
	(tmp=(a)*(vvuu)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK)
30
 
31
#define CALC41(a, rgba, tmp1, tmp2) \
32
	(CALC21(a, rgba & MASK, tmp1) | \
33
	 (CALC21(a, (rgba>>8)&MASK, tmp2)<<8))
34
 
35
#define CALC22(a1, vvuu1, a2, vvuu2, tmp) \
36
	(tmp=(a1)*(vvuu1)+(a2)*(vvuu2)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK)
37
 
38
#define CALC42(a1, rgba1, a2, rgba2, tmp1, tmp2) \
39
	(CALC22(a1, rgba1 & MASK, a2, rgba2 & MASK, tmp1) | \
40
	 (CALC22(a1, (rgba1>>8) & MASK, a2, (rgba2>>8) & MASK, tmp2)<<8))
41
 
42
static void mktables(void);
43
typedef int Subdraw(Memdrawparam*);
44
static Subdraw chardraw, alphadraw, memoptdraw;
45
 
46
static Memimage*	memones;
47
static Memimage*	memzeros;
48
Memimage *memwhite;
49
Memimage *memblack;
50
Memimage *memtransparent;
51
Memimage *memopaque;
52
 
53
int	_ifmt(Fmt*);
54
 
55
void
56
_memimageinit(void)
57
{
58
	static int didinit = 0;
59
 
60
	if(didinit)
61
		return;
62
 
63
	didinit = 1;
64
 
65
	mktables();
66
	_memmkcmap();
67
 
68
	fmtinstall('R', Rfmt); 
69
	fmtinstall('P', Pfmt);
70
 
71
	memones = allocmemimage(Rect(0,0,1,1), GREY1);
72
	memones->flags |= Frepl;
73
	memones->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
74
	*byteaddr(memones, ZP) = ~0;
75
 
76
	memzeros = allocmemimage(Rect(0,0,1,1), GREY1);
77
	memzeros->flags |= Frepl;
78
	memzeros->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
79
	*byteaddr(memzeros, ZP) = 0;
80
 
81
	if(memones == nil || memzeros == nil)
82
		assert(0 /*cannot initialize memimage library */);	/* RSC BUG */
83
 
84
	memwhite = memones;
85
	memblack = memzeros;
86
	memopaque = memones;
87
	memtransparent = memzeros;
88
}
89
 
90
ulong _imgtorgba(Memimage*, ulong);
91
ulong _rgbatoimg(Memimage*, ulong);
92
ulong _pixelbits(Memimage*, Point);
93
 
94
#define DBG if(0)
95
static Memdrawparam par;
96
 
97
Memdrawparam*
98
_memimagedrawsetup(Memimage *dst, Rectangle r, Memimage *src, Point p0, Memimage *mask, Point p1, int op)
99
{
100
 
101
	if(mask == nil)
102
		mask = memopaque;
103
 
104
DBG	print("memimagedraw %p/%luX %R @ %p %p/%luX %P %p/%luX %P... ", dst, dst->chan, r, dst->data->bdata, src, src->chan, p0, mask, mask->chan, p1);
105
 
106
	if(drawclip(dst, &r, src, &p0, mask, &p1, &par.sr, &par.mr) == 0){
107
//		if(drawdebug)
108
//			iprint("empty clipped rectangle\n");
109
		return nil;
110
	}
111
 
112
	if(op < Clear || op > SoverD){
113
//		if(drawdebug)
114
//			iprint("op out of range: %d\n", op);
115
		return nil;
116
	}
117
 
118
	par.op = op;
119
	par.dst = dst;
120
	par.r = r;
121
	par.src = src;
122
	/* par.sr set by drawclip */
123
	par.mask = mask;
124
	/* par.mr set by drawclip */
125
 
126
	par.state = 0;
127
	if(src->flags&Frepl){
128
		par.state |= Replsrc;
129
		if(Dx(src->r)==1 && Dy(src->r)==1){
130
			par.sval = _pixelbits(src, src->r.min);
131
			par.state |= Simplesrc;
132
			par.srgba = _imgtorgba(src, par.sval);
133
			par.sdval = _rgbatoimg(dst, par.srgba);
134
			if((par.srgba&0xFF) == 0 && (op&DoutS)){
135
//				if (drawdebug) iprint("fill with transparent source\n");
136
				return nil;	/* no-op successfully handled */
137
			}
138
		}
139
	}
140
 
141
	if(mask->flags & Frepl){
142
		par.state |= Replmask;
143
		if(Dx(mask->r)==1 && Dy(mask->r)==1){
144
			par.mval = _pixelbits(mask, mask->r.min);
145
			if(par.mval == 0 && (op&DoutS)){
146
//				if(drawdebug) iprint("fill with zero mask\n");
147
				return nil;	/* no-op successfully handled */
148
			}
149
			par.state |= Simplemask;
150
			if(par.mval == ~0)
151
				par.state |= Fullmask;
152
			par.mrgba = _imgtorgba(mask, par.mval);
153
		}
154
	}
155
 
156
//	if(drawdebug)
157
//		iprint("dr %R sr %R mr %R...", r, par.sr, par.mr);
158
DBG print("draw dr %R sr %R mr %R %lux\n", r, par.sr, par.mr, par.state);
159
 
160
	return &par;
161
}
162
 
163
void
164
_memimagedraw(Memdrawparam *par)
165
{
166
	if (par == nil)
167
		return;
168
 
169
	/*
170
	 * Now that we've clipped the parameters down to be consistent, we 
171
	 * simply try sub-drawing routines in order until we find one that was able
172
	 * to handle us.  If the sub-drawing routine returns zero, it means it was
173
	 * unable to satisfy the request, so we do not return.
174
	 */
175
 
176
	/*
177
	 * Hardware support.  Each video driver provides this function,
178
	 * which checks to see if there is anything it can help with.
179
	 * There could be an if around this checking to see if dst is in video memory.
180
	 */
181
DBG print("test hwdraw\n");
182
	if(hwdraw(par)){
183
//if(drawdebug) iprint("hw handled\n");
184
DBG print("hwdraw handled\n");
185
		return;
186
	}
187
	/*
188
	 * Optimizations using memmove and memset.
189
	 */
190
DBG print("test memoptdraw\n");
191
	if(memoptdraw(par)){
192
//if(drawdebug) iprint("memopt handled\n");
193
DBG print("memopt handled\n");
194
		return;
195
	}
196
 
197
	/*
198
	 * Character drawing.
199
	 * Solid source color being painted through a boolean mask onto a high res image.
200
	 */
201
DBG print("test chardraw\n");
202
	if(chardraw(par)){
203
//if(drawdebug) iprint("chardraw handled\n");
204
DBG print("chardraw handled\n");
205
		return;
206
	}
207
 
208
	/*
209
	 * General calculation-laden case that does alpha for each pixel.
210
	 */
211
DBG print("do alphadraw\n");
212
	alphadraw(par);
213
//if(drawdebug) iprint("alphadraw handled\n");
214
DBG print("alphadraw handled\n");
215
}
216
#undef DBG
217
 
218
/*
219
 * Clip the destination rectangle further based on the properties of the 
220
 * source and mask rectangles.  Once the destination rectangle is properly
221
 * clipped, adjust the source and mask rectangles to be the same size.
222
 * Then if source or mask is replicated, move its clipped rectangle
223
 * so that its minimum point falls within the repl rectangle.
224
 *
225
 * Return zero if the final rectangle is null.
226
 */
227
int
228
drawclip(Memimage *dst, Rectangle *r, Memimage *src, Point *p0, Memimage *mask, Point *p1, Rectangle *sr, Rectangle *mr)
229
{
230
	Point rmin, delta;
231
	int splitcoords;
232
	Rectangle omr;
233
 
234
	if(r->min.x>=r->max.x || r->min.y>=r->max.y)
235
		return 0;
236
	splitcoords = (p0->x!=p1->x) || (p0->y!=p1->y);
237
	/* clip to destination */
238
	rmin = r->min;
239
	if(!rectclip(r, dst->r) || !rectclip(r, dst->clipr))
240
		return 0;
241
	/* move mask point */
242
	p1->x += r->min.x-rmin.x;
243
	p1->y += r->min.y-rmin.y;
244
	/* move source point */
245
	p0->x += r->min.x-rmin.x;
246
	p0->y += r->min.y-rmin.y;
247
	/* map destination rectangle into source */
248
	sr->min = *p0;
249
	sr->max.x = p0->x+Dx(*r);
250
	sr->max.y = p0->y+Dy(*r);
251
	/* sr is r in source coordinates; clip to source */
252
	if(!(src->flags&Frepl) && !rectclip(sr, src->r))
253
		return 0;
254
	if(!rectclip(sr, src->clipr))
255
		return 0;
256
	/* compute and clip rectangle in mask */
257
	if(splitcoords){
258
		/* move mask point with source */
259
		p1->x += sr->min.x-p0->x;
260
		p1->y += sr->min.y-p0->y;
261
		mr->min = *p1;
262
		mr->max.x = p1->x+Dx(*sr);
263
		mr->max.y = p1->y+Dy(*sr);
264
		omr = *mr;
265
		/* mr is now rectangle in mask; clip it */
266
		if(!(mask->flags&Frepl) && !rectclip(mr, mask->r))
267
			return 0;
268
		if(!rectclip(mr, mask->clipr))
269
			return 0;
270
		/* reflect any clips back to source */
271
		sr->min.x += mr->min.x-omr.min.x;
272
		sr->min.y += mr->min.y-omr.min.y;
273
		sr->max.x += mr->max.x-omr.max.x;
274
		sr->max.y += mr->max.y-omr.max.y;
275
		*p1 = mr->min;
276
	}else{
277
		if(!(mask->flags&Frepl) && !rectclip(sr, mask->r))
278
			return 0;
279
		if(!rectclip(sr, mask->clipr))
280
			return 0;
281
		*p1 = sr->min;
282
	}
283
 
284
	/* move source clipping back to destination */
285
	delta.x = r->min.x - p0->x;
286
	delta.y = r->min.y - p0->y;
287
	r->min.x = sr->min.x + delta.x;
288
	r->min.y = sr->min.y + delta.y;
289
	r->max.x = sr->max.x + delta.x;
290
	r->max.y = sr->max.y + delta.y;
291
 
292
	/* move source rectangle so sr->min is in src->r */
293
	if(src->flags&Frepl) {
294
		delta.x = drawreplxy(src->r.min.x, src->r.max.x, sr->min.x) - sr->min.x;
295
		delta.y = drawreplxy(src->r.min.y, src->r.max.y, sr->min.y) - sr->min.y;
296
		sr->min.x += delta.x;
297
		sr->min.y += delta.y;
298
		sr->max.x += delta.x;
299
		sr->max.y += delta.y;
300
	}
301
	*p0 = sr->min;
302
 
303
	/* move mask point so it is in mask->r */
304
	*p1 = drawrepl(mask->r, *p1);
305
	mr->min = *p1;
306
	mr->max.x = p1->x+Dx(*sr);
307
	mr->max.y = p1->y+Dy(*sr);
308
 
309
	assert(Dx(*sr) == Dx(*mr) && Dx(*mr) == Dx(*r));
310
	assert(Dy(*sr) == Dy(*mr) && Dy(*mr) == Dy(*r));
311
	assert(ptinrect(*p0, src->r));
312
	assert(ptinrect(*p1, mask->r));
313
	assert(ptinrect(r->min, dst->r));
314
 
315
	return 1;
316
}
317
 
318
/*
319
 * Conversion tables.
320
 */
321
static uchar replbit[1+8][256];		/* replbit[x][y] is the replication of the x-bit quantity y to 8-bit depth */
322
static uchar conv18[256][8];		/* conv18[x][y] is the yth pixel in the depth-1 pixel x */
323
static uchar conv28[256][4];		/* ... */
324
static uchar conv48[256][2];
325
 
326
/*
327
 * bitmap of how to replicate n bits to fill 8, for 1 ≤ n ≤ 8.
328
 * the X's are where to put the bottom (ones) bit of the n-bit pattern.
329
 * only the top 8 bits of the result are actually used.
330
 * (the lower 8 bits are needed to get bits in the right place
331
 * when n is not a divisor of 8.)
332
 *
333
 * Should check to see if its easier to just refer to replmul than
334
 * use the precomputed values in replbit.  On PCs it may well
335
 * be; on machines with slow multiply instructions it probably isn't.
336
 */
337
#define a ((((((((((((((((0
338
#define X *2+1)
339
#define _ *2)
340
static int replmul[1+8] = {
341
	0,
342
	a X X X X X X X X X X X X X X X X,
343
	a _ X _ X _ X _ X _ X _ X _ X _ X,
344
	a _ _ X _ _ X _ _ X _ _ X _ _ X _,
345
	a _ _ _ X _ _ _ X _ _ _ X _ _ _ X,
346
	a _ _ _ _ X _ _ _ _ X _ _ _ _ X _,
347
	a _ _ _ _ _ X _ _ _ _ _ X _ _ _ _, 
348
	a _ _ _ _ _ _ X _ _ _ _ _ _ X _ _,
349
	a _ _ _ _ _ _ _ X _ _ _ _ _ _ _ X,
350
};
351
#undef a
352
#undef X
353
#undef _
354
 
355
static void
356
mktables(void)
357
{
358
	int i, j, mask, sh, small;
359
 
360
	if(tablesbuilt)
361
		return;
362
 
363
	fmtinstall('R', Rfmt);
364
	fmtinstall('P', Pfmt);
365
	tablesbuilt = 1;
366
 
367
	/* bit replication up to 8 bits */
368
	for(i=0; i<256; i++){
369
		for(j=0; j<=8; j++){	/* j <= 8 [sic] */
370
			small = i & ((1<<j)-1);
371
			replbit[j][i] = (small*replmul[j])>>8;
372
		}
373
	}
374
 
375
	/* bit unpacking up to 8 bits, only powers of 2 */
376
	for(i=0; i<256; i++){
377
		for(j=0, sh=7, mask=1; j<8; j++, sh--)
378
			conv18[i][j] = replbit[1][(i>>sh)&mask];
379
 
380
		for(j=0, sh=6, mask=3; j<4; j++, sh-=2)
381
			conv28[i][j] = replbit[2][(i>>sh)&mask];
382
 
383
		for(j=0, sh=4, mask=15; j<2; j++, sh-=4)
384
			conv48[i][j] = replbit[4][(i>>sh)&mask];
385
	}
386
}
387
 
388
static uchar ones = 0xff;
389
 
390
/*
391
 * General alpha drawing case.  Can handle anything.
392
 */
393
typedef struct	Buffer	Buffer;
394
struct Buffer {
395
	/* used by most routines */
396
	uchar	*red;
397
	uchar	*grn;
398
	uchar	*blu;
399
	uchar	*alpha;
400
	uchar	*grey;
401
	ulong	*rgba;
402
	int	delta;	/* number of bytes to add to pointer to get next pixel to the right */
403
 
404
	/* used by boolcalc* for mask data */
405
	uchar	*m;		/* ptr to mask data r.min byte; like p->bytermin */
406
	int		mskip;	/* no. of left bits to skip in *m */
407
	uchar	*bm;		/* ptr to mask data img->r.min byte; like p->bytey0s */
408
	int		bmskip;	/* no. of left bits to skip in *bm */
409
	uchar	*em;		/* ptr to mask data img->r.max.x byte; like p->bytey0e */
410
	int		emskip;	/* no. of right bits to skip in *em */
411
};
412
 
413
typedef struct	Param	Param;
414
typedef Buffer	Readfn(Param*, uchar*, int);
415
typedef void	Writefn(Param*, uchar*, Buffer);
416
typedef Buffer	Calcfn(Buffer, Buffer, Buffer, int, int, int);
417
 
418
enum {
419
	MAXBCACHE = 16
420
};
421
 
422
/* giant rathole to customize functions with */
423
struct Param {
424
	Readfn	*replcall;
425
	Readfn	*greymaskcall;	
426
	Readfn	*convreadcall;
427
	Writefn	*convwritecall;
428
 
429
	Memimage *img;
430
	Rectangle	r;
431
	int	dx;	/* of r */
432
	int	needbuf;
433
	int	convgrey;
434
	int	alphaonly;
435
 
436
	uchar	*bytey0s;		/* byteaddr(Pt(img->r.min.x, img->r.min.y)) */
437
	uchar	*bytermin;	/* byteaddr(Pt(r.min.x, img->r.min.y)) */
438
	uchar	*bytey0e;		/* byteaddr(Pt(img->r.max.x, img->r.min.y)) */
439
	int		bwidth;
440
 
441
	int	replcache;	/* if set, cache buffers */
442
	Buffer	bcache[MAXBCACHE];
443
	ulong	bfilled;
444
	uchar	*bufbase;
445
	int	bufoff;
446
	int	bufdelta;
447
 
448
	int	dir;
449
 
450
	int	convbufoff;
451
	uchar	*convbuf;
452
	Param	*convdpar;
453
	int	convdx;
454
};
455
 
456
static uchar *drawbuf;
457
static int	ndrawbuf;
458
static int	mdrawbuf;
459
static Param spar, mpar, dpar;	/* easier on the stacks */
460
static Readfn	greymaskread, replread, readptr;
461
static Writefn	nullwrite;
462
static Calcfn	alphacalc0, alphacalc14, alphacalc2810, alphacalc3679, alphacalc5, alphacalc11, alphacalcS;
463
static Calcfn	boolcalc14, boolcalc236789, boolcalc1011;
464
 
465
static Readfn*	readfn(Memimage*);
466
static Readfn*	readalphafn(Memimage*);
467
static Writefn*	writefn(Memimage*);
468
 
469
static Calcfn*	boolcopyfn(Memimage*, Memimage*);
470
static Readfn*	convfn(Memimage*, Param*, Memimage*, Param*);
471
 
472
static Calcfn *alphacalc[Ncomp] = 
473
{
474
	alphacalc0,		/* Clear */
475
	alphacalc14,		/* DoutS */
476
	alphacalc2810,		/* SoutD */
477
	alphacalc3679,		/* DxorS */
478
	alphacalc14,		/* DinS */
479
	alphacalc5,		/* D */
480
	alphacalc3679,		/* DatopS */
481
	alphacalc3679,		/* DoverS */
482
	alphacalc2810,		/* SinD */
483
	alphacalc3679,		/* SatopD */
484
	alphacalc2810,		/* S */
485
	alphacalc11,		/* SoverD */
486
};
487
 
488
static Calcfn *boolcalc[Ncomp] =
489
{
490
	alphacalc0,		/* Clear */
491
	boolcalc14,		/* DoutS */
492
	boolcalc236789,		/* SoutD */
493
	boolcalc236789,		/* DxorS */
494
	boolcalc14,		/* DinS */
495
	alphacalc5,		/* D */
496
	boolcalc236789,		/* DatopS */
497
	boolcalc236789,		/* DoverS */
498
	boolcalc236789,		/* SinD */
499
	boolcalc236789,		/* SatopD */
500
	boolcalc1011,		/* S */
501
	boolcalc1011,		/* SoverD */
502
};
503
 
504
static int
505
allocdrawbuf(void)
506
{
507
	uchar *p;
508
 
509
	if(ndrawbuf > mdrawbuf){
510
		p = realloc(drawbuf, ndrawbuf);
511
		if(p == nil){
512
			werrstr("memimagedraw out of memory");
513
			return -1;
514
		}
515
		drawbuf = p;
516
		mdrawbuf = ndrawbuf;
517
	}
518
	return 0;
519
}
520
 
521
static Param
522
getparam(Memimage *img, Rectangle r, int convgrey, int needbuf)
523
{
524
	Param p;
525
	int nbuf;
526
 
527
	memset(&p, 0, sizeof p);
528
 
529
	p.img = img;
530
	p.r = r;
531
	p.dx = Dx(r);
532
	p.needbuf = needbuf;
533
	p.convgrey = convgrey;
534
 
535
	assert(img->r.min.x <= r.min.x && r.min.x < img->r.max.x);
536
 
537
	p.bytey0s = byteaddr(img, Pt(img->r.min.x, img->r.min.y));
538
	p.bytermin = byteaddr(img, Pt(r.min.x, img->r.min.y));
539
	p.bytey0e = byteaddr(img, Pt(img->r.max.x, img->r.min.y));
540
	p.bwidth = sizeof(ulong)*img->width;
541
 
542
	assert(p.bytey0s <= p.bytermin && p.bytermin <= p.bytey0e);
543
 
544
	if(p.r.min.x == p.img->r.min.x)
545
		assert(p.bytermin == p.bytey0s);
546
 
547
	nbuf = 1;
548
	if((img->flags&Frepl) && Dy(img->r) <= MAXBCACHE && Dy(img->r) < Dy(r)){
549
		p.replcache = 1;
550
		nbuf = Dy(img->r);
551
	}
552
	p.bufdelta = 4*p.dx;
553
	p.bufoff = ndrawbuf;
554
	ndrawbuf += p.bufdelta*nbuf;
555
 
556
	return p;
557
}
558
 
559
static void
560
clipy(Memimage *img, int *y)
561
{
562
	int dy;
563
 
564
	dy = Dy(img->r);
565
	if(*y == dy)
566
		*y = 0;
567
	else if(*y == -1)
568
		*y = dy-1;
569
	assert(0 <= *y && *y < dy);
570
}
571
 
572
static void
573
dumpbuf(char *s, Buffer b, int n)
574
{
575
	int i;
576
	uchar *p;
577
 
578
	print("%s", s);
579
	for(i=0; i<n; i++){
580
		print(" ");
581
		if((p=b.grey)){
582
			print(" k%.2uX", *p);
583
			b.grey += b.delta;
584
		}else{	
585
			if((p=b.red)){
586
				print(" r%.2uX", *p);
587
				b.red += b.delta;
588
			}
589
			if((p=b.grn)){
590
				print(" g%.2uX", *p);
591
				b.grn += b.delta;
592
			}
593
			if((p=b.blu)){
594
				print(" b%.2uX", *p);
595
				b.blu += b.delta;
596
			}
597
		}
598
		if((p=b.alpha) != &ones){
599
			print(" α%.2uX", *p);
600
			b.alpha += b.delta;
601
		}
602
	}
603
	print("\n");
604
}
605
 
606
/*
607
 * For each scan line, we expand the pixels from source, mask, and destination
608
 * into byte-aligned red, green, blue, alpha, and grey channels.  If buffering is not
609
 * needed and the channels were already byte-aligned (grey8, rgb24, rgba32, rgb32),
610
 * the readers need not copy the data: they can simply return pointers to the data.
611
 * If the destination image is grey and the source is not, it is converted using the NTSC
612
 * formula.
613
 *
614
 * Once we have all the channels, we call either rgbcalc or greycalc, depending on 
615
 * whether the destination image is color.  This is allowed to overwrite the dst buffer (perhaps
616
 * the actual data, perhaps a copy) with its result.  It should only overwrite the dst buffer
617
 * with the same format (i.e. red bytes with red bytes, etc.)  A new buffer is returned from
618
 * the calculator, and that buffer is passed to a function to write it to the destination.
619
 * If the buffer is already pointing at the destination, the writing function is a no-op.
620
 */
621
#define DBG if(0)
622
static int
623
alphadraw(Memdrawparam *par)
624
{
625
	int isgrey, starty, endy, op;
626
	int needbuf, dsty, srcy, masky;
627
	int y, dir, dx, dy;
628
	Buffer bsrc, bdst, bmask;
629
	Readfn *rdsrc, *rdmask, *rddst;
630
	Calcfn *calc;
631
	Writefn *wrdst;
632
	Memimage *src, *mask, *dst;
633
	Rectangle r, sr, mr;
634
 
635
	r = par->r;
636
	dx = Dx(r);
637
	dy = Dy(r);
638
 
639
	ndrawbuf = 0;
640
 
641
	src = par->src;
642
	mask = par->mask;	
643
	dst = par->dst;
644
	sr = par->sr;
645
	mr = par->mr;
646
	op = par->op;
647
 
648
	isgrey = dst->flags&Fgrey;
649
 
650
	/*
651
	 * Buffering when src and dst are the same bitmap is sufficient but not 
652
	 * necessary.  There are stronger conditions we could use.  We could
653
	 * check to see if the rectangles intersect, and if simply moving in the
654
	 * correct y direction can avoid the need to buffer.
655
	 */
656
	needbuf = (src->data == dst->data);
657
 
658
	spar = getparam(src, sr, isgrey, needbuf);
659
	dpar = getparam(dst, r, isgrey, needbuf);
660
	mpar = getparam(mask, mr, 0, needbuf);
661
 
662
	dir = (needbuf && byteaddr(dst, r.min) > byteaddr(src, sr.min)) ? -1 : 1;
663
	spar.dir = mpar.dir = dpar.dir = dir;
664
 
665
	/*
666
	 * If the mask is purely boolean, we can convert from src to dst format
667
	 * when we read src, and then just copy it to dst where the mask tells us to.
668
	 * This requires a boolean (1-bit grey) mask and lack of a source alpha channel.
669
	 *
670
	 * The computation is accomplished by assigning the function pointers as follows:
671
	 *	rdsrc - read and convert source into dst format in a buffer
672
	 * 	rdmask - convert mask to bytes, set pointer to it
673
	 * 	rddst - fill with pointer to real dst data, but do no reads
674
	 *	calc - copy src onto dst when mask says to.
675
	 *	wrdst - do nothing
676
	 * This is slightly sleazy, since things aren't doing exactly what their names say,
677
	 * but it avoids a fair amount of code duplication to make this a case here
678
	 * rather than have a separate booldraw.
679
	 */
680
//if(drawdebug) iprint("flag %lud mchan %lux=?%x dd %d\n", src->flags&Falpha, mask->chan, GREY1, dst->depth);
681
	if(!(src->flags&Falpha) && mask->chan == GREY1 && dst->depth >= 8 && op == SoverD){
682
//if(drawdebug) iprint("boolcopy...");
683
		rdsrc = convfn(dst, &dpar, src, &spar);
684
		rddst = readptr;
685
		rdmask = readfn(mask);
686
		calc = boolcopyfn(dst, mask);
687
		wrdst = nullwrite;
688
	}else{
689
		/* usual alphadraw parameter fetching */
690
		rdsrc = readfn(src);
691
		rddst = readfn(dst);
692
		wrdst = writefn(dst);
693
		calc = alphacalc[op];
694
 
695
		/*
696
		 * If there is no alpha channel, we'll ask for a grey channel
697
		 * and pretend it is the alpha.
698
		 */
699
		if(mask->flags&Falpha){
700
			rdmask = readalphafn(mask);
701
			mpar.alphaonly = 1;
702
		}else{
703
			mpar.greymaskcall = readfn(mask);
704
			mpar.convgrey = 1;
705
			rdmask = greymaskread;
706
 
707
			/*
708
			 * Should really be above, but then boolcopyfns would have
709
			 * to deal with bit alignment, and I haven't written that.
710
			 *
711
			 * This is a common case for things like ellipse drawing.
712
			 * When there's no alpha involved and the mask is boolean,
713
			 * we can avoid all the division and multiplication.
714
			 */
715
			if(mask->chan == GREY1 && !(src->flags&Falpha))
716
				calc = boolcalc[op];
717
			else if(op == SoverD && !(src->flags&Falpha))
718
				calc = alphacalcS;
719
		}
720
	}
721
 
722
	/*
723
	 * If the image has a small enough repl rectangle,
724
	 * we can just read each line once and cache them.
725
	 */
726
	if(spar.replcache){
727
		spar.replcall = rdsrc;
728
		rdsrc = replread;
729
	}
730
	if(mpar.replcache){
731
		mpar.replcall = rdmask;
732
		rdmask = replread;
733
	}
734
 
735
	if(allocdrawbuf() < 0)
736
		return 0;
737
 
738
	/*
739
	 * Before we were saving only offsets from drawbuf in the parameter
740
	 * structures; now that drawbuf has been grown to accomodate us,
741
	 * we can fill in the pointers.
742
	 */
743
	spar.bufbase = drawbuf+spar.bufoff;
744
	mpar.bufbase = drawbuf+mpar.bufoff;
745
	dpar.bufbase = drawbuf+dpar.bufoff;
746
	spar.convbuf = drawbuf+spar.convbufoff;
747
 
748
	if(dir == 1){
749
		starty = 0;
750
		endy = dy;
751
	}else{
752
		starty = dy-1;
753
		endy = -1;
754
	}
755
 
756
	/*
757
	 * srcy, masky, and dsty are offsets from the top of their
758
	 * respective Rectangles.  they need to be contained within
759
	 * the rectangles, so clipy can keep them there without division.
760
 	 */
761
	srcy = (starty + sr.min.y - src->r.min.y)%Dy(src->r);
762
	masky = (starty + mr.min.y - mask->r.min.y)%Dy(mask->r);
763
	dsty = starty + r.min.y - dst->r.min.y;
764
 
765
	assert(0 <= srcy && srcy < Dy(src->r));
766
	assert(0 <= masky && masky < Dy(mask->r));
767
	assert(0 <= dsty && dsty < Dy(dst->r));
768
 
769
	for(y=starty; y!=endy; y+=dir, srcy+=dir, masky+=dir, dsty+=dir){
770
		clipy(src, &srcy);
771
		clipy(dst, &dsty);
772
		clipy(mask, &masky);
773
 
774
		bsrc = rdsrc(&spar, spar.bufbase, srcy);
775
DBG print("[");
776
		bmask = rdmask(&mpar, mpar.bufbase, masky);
777
DBG print("]\n");
778
		bdst = rddst(&dpar, dpar.bufbase, dsty);
779
DBG		dumpbuf("src", bsrc, dx);
780
DBG		dumpbuf("mask", bmask, dx);
781
DBG		dumpbuf("dst", bdst, dx);
782
		bdst = calc(bdst, bsrc, bmask, dx, isgrey, op);
783
		wrdst(&dpar, dpar.bytermin+dsty*dpar.bwidth, bdst);
784
	}
785
 
786
	return 1;
787
}
788
#undef DBG
789
 
790
static Buffer
791
alphacalc0(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
792
{
793
	USED(grey);
794
	USED(op);
795
	memset(bdst.rgba, 0, dx*bdst.delta);
796
	return bdst;
797
}
798
 
799
/*
800
 * Do the channels in the buffers match enough
801
 * that we can do word-at-a-time operations
802
 * on the pixels?
803
 */
804
static int
805
chanmatch(Buffer *bdst, Buffer *bsrc)
806
{
807
	uchar *drgb, *srgb;
808
 
809
	/*
810
	 * first, r, g, b must be in the same place
811
	 * in the rgba word.
812
	 */
813
	drgb = (uchar*)bdst->rgba;
814
	srgb = (uchar*)bsrc->rgba;
815
	if(bdst->red - drgb != bsrc->red - srgb
816
	|| bdst->blu - drgb != bsrc->blu - srgb
817
	|| bdst->grn - drgb != bsrc->grn - srgb)
818
		return 0;
819
 
820
	/*
821
	 * that implies alpha is in the same place,
822
	 * if it is there at all (it might be == &ones).
823
	 * if the destination is &ones, we can scribble
824
	 * over the rgba slot just fine.
825
	 */
826
	if(bdst->alpha == &ones)
827
		return 1;
828
 
829
	/*
830
	 * if the destination is not ones but the src is,
831
	 * then the simultaneous calculation will use
832
	 * bogus bytes from the src's rgba.  no good.
833
	 */
834
	if(bsrc->alpha == &ones)
835
		return 0;
836
 
837
	/*
838
	 * otherwise, alphas are in the same place.
839
	 */
840
	return 1;
841
}
842
 
843
static Buffer
844
alphacalc14(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
845
{
846
	Buffer obdst;
847
	int fd, sadelta;
848
	int i, sa, ma, q;
849
	ulong t, t1;
850
 
851
	obdst = bdst;
852
	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
853
	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
854
 
855
	for(i=0; i<dx; i++){
856
		sa = *bsrc.alpha;
857
		ma = *bmask.alpha;
858
		fd = CALC11(sa, ma, t);
859
		if(op == DoutS)
860
			fd = 255-fd;
861
 
862
		if(grey){
863
			*bdst.grey = CALC11(fd, *bdst.grey, t);
864
			bsrc.grey += bsrc.delta;
865
			bdst.grey += bdst.delta;
866
		}else{
867
			if(q){
868
				*bdst.rgba = CALC41(fd, *bdst.rgba, t, t1);
869
				bsrc.rgba++;
870
				bdst.rgba++;
871
				bsrc.alpha += sadelta;
872
				bmask.alpha += bmask.delta;
873
				continue;
874
			}
875
			*bdst.red = CALC11(fd, *bdst.red, t);
876
			*bdst.grn = CALC11(fd, *bdst.grn, t);
877
			*bdst.blu = CALC11(fd, *bdst.blu, t);
878
			bsrc.red += bsrc.delta;
879
			bsrc.blu += bsrc.delta;
880
			bsrc.grn += bsrc.delta;
881
			bdst.red += bdst.delta;
882
			bdst.blu += bdst.delta;
883
			bdst.grn += bdst.delta;
884
		}
885
		if(bdst.alpha != &ones){
886
			*bdst.alpha = CALC11(fd, *bdst.alpha, t);
887
			bdst.alpha += bdst.delta;
888
		}
889
		bmask.alpha += bmask.delta;
890
		bsrc.alpha += sadelta;
891
	}
892
	return obdst;
893
}
894
 
895
static Buffer
896
alphacalc2810(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
897
{
898
	Buffer obdst;
899
	int fs, sadelta;
900
	int i, ma, da, q;
901
	ulong t, t1;
902
 
903
	obdst = bdst;
904
	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
905
	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
906
 
907
	for(i=0; i<dx; i++){
908
		ma = *bmask.alpha;
909
		da = *bdst.alpha;
910
		if(op == SoutD)
911
			da = 255-da;
912
		fs = ma;
913
		if(op != S)
914
			fs = CALC11(fs, da, t);
915
 
916
		if(grey){
917
			*bdst.grey = CALC11(fs, *bsrc.grey, t);
918
			bsrc.grey += bsrc.delta;
919
			bdst.grey += bdst.delta;
920
		}else{
921
			if(q){
922
				*bdst.rgba = CALC41(fs, *bsrc.rgba, t, t1);
923
				bsrc.rgba++;
924
				bdst.rgba++;
925
				bmask.alpha += bmask.delta;
926
				bdst.alpha += bdst.delta;
927
				continue;
928
			}
929
			*bdst.red = CALC11(fs, *bsrc.red, t);
930
			*bdst.grn = CALC11(fs, *bsrc.grn, t);
931
			*bdst.blu = CALC11(fs, *bsrc.blu, t);
932
			bsrc.red += bsrc.delta;
933
			bsrc.blu += bsrc.delta;
934
			bsrc.grn += bsrc.delta;
935
			bdst.red += bdst.delta;
936
			bdst.blu += bdst.delta;
937
			bdst.grn += bdst.delta;
938
		}
939
		if(bdst.alpha != &ones){
940
			*bdst.alpha = CALC11(fs, *bsrc.alpha, t);
941
			bdst.alpha += bdst.delta;
942
		}
943
		bmask.alpha += bmask.delta;
944
		bsrc.alpha += sadelta;
945
	}
946
	return obdst;
947
}
948
 
949
static Buffer
950
alphacalc3679(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
951
{
952
	Buffer obdst;
953
	int fs, fd, sadelta;
954
	int i, sa, ma, da, q;
955
	ulong t, t1;
956
 
957
	obdst = bdst;
958
	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
959
	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
960
 
961
	for(i=0; i<dx; i++){
962
		sa = *bsrc.alpha;
963
		ma = *bmask.alpha;
964
		da = *bdst.alpha;
965
		if(op == SatopD)
966
			fs = CALC11(ma, da, t);
967
		else
968
			fs = CALC11(ma, 255-da, t);
969
		if(op == DoverS)
970
			fd = 255;
971
		else{
972
			fd = CALC11(sa, ma, t);
973
			if(op != DatopS)
974
				fd = 255-fd;
975
		}
976
 
977
		if(grey){
978
			*bdst.grey = CALC12(fs, *bsrc.grey, fd, *bdst.grey, t);
979
			bsrc.grey += bsrc.delta;
980
			bdst.grey += bdst.delta;
981
		}else{
982
			if(q){
983
				*bdst.rgba = CALC42(fs, *bsrc.rgba, fd, *bdst.rgba, t, t1);
984
				bsrc.rgba++;
985
				bdst.rgba++;
986
				bsrc.alpha += sadelta;
987
				bmask.alpha += bmask.delta;
988
				bdst.alpha += bdst.delta;
989
				continue;
990
			}
991
			*bdst.red = CALC12(fs, *bsrc.red, fd, *bdst.red, t);
992
			*bdst.grn = CALC12(fs, *bsrc.grn, fd, *bdst.grn, t);
993
			*bdst.blu = CALC12(fs, *bsrc.blu, fd, *bdst.blu, t);
994
			bsrc.red += bsrc.delta;
995
			bsrc.blu += bsrc.delta;
996
			bsrc.grn += bsrc.delta;
997
			bdst.red += bdst.delta;
998
			bdst.blu += bdst.delta;
999
			bdst.grn += bdst.delta;
1000
		}
1001
		if(bdst.alpha != &ones){
1002
			*bdst.alpha = CALC12(fs, sa, fd, da, t);
1003
			bdst.alpha += bdst.delta;
1004
		}
1005
		bmask.alpha += bmask.delta;
1006
		bsrc.alpha += sadelta;
1007
	}
1008
	return obdst;
1009
}
1010
 
1011
static Buffer
1012
alphacalc5(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
1013
{
1014
	USED(dx);
1015
	USED(grey);
1016
	USED(op);
1017
	return bdst;
1018
}
1019
 
1020
static Buffer
1021
alphacalc11(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1022
{
1023
	Buffer obdst;
1024
	int fd, sadelta;
1025
	int i, sa, ma, q;
1026
	ulong t, t1;
1027
 
1028
	USED(op);
1029
	obdst = bdst;
1030
	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
1031
	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
1032
 
1033
	for(i=0; i<dx; i++){
1034
		sa = *bsrc.alpha;
1035
		ma = *bmask.alpha;
1036
		fd = 255-CALC11(sa, ma, t);
1037
 
1038
		if(grey){
1039
			*bdst.grey = CALC12(ma, *bsrc.grey, fd, *bdst.grey, t);
1040
			bsrc.grey += bsrc.delta;
1041
			bdst.grey += bdst.delta;
1042
		}else{
1043
			if(q){
1044
				*bdst.rgba = CALC42(ma, *bsrc.rgba, fd, *bdst.rgba, t, t1);
1045
				bsrc.rgba++;
1046
				bdst.rgba++;
1047
				bsrc.alpha += sadelta;
1048
				bmask.alpha += bmask.delta;
1049
				continue;
1050
			}
1051
			*bdst.red = CALC12(ma, *bsrc.red, fd, *bdst.red, t);
1052
			*bdst.grn = CALC12(ma, *bsrc.grn, fd, *bdst.grn, t);
1053
			*bdst.blu = CALC12(ma, *bsrc.blu, fd, *bdst.blu, t);
1054
			bsrc.red += bsrc.delta;
1055
			bsrc.blu += bsrc.delta;
1056
			bsrc.grn += bsrc.delta;
1057
			bdst.red += bdst.delta;
1058
			bdst.blu += bdst.delta;
1059
			bdst.grn += bdst.delta;
1060
		}
1061
		if(bdst.alpha != &ones){
1062
			*bdst.alpha = CALC12(ma, sa, fd, *bdst.alpha, t);
1063
			bdst.alpha += bdst.delta;
1064
		}
1065
		bmask.alpha += bmask.delta;
1066
		bsrc.alpha += sadelta;
1067
	}
1068
	return obdst;
1069
}
1070
 
1071
/*
1072
not used yet
1073
source and mask alpha 1
1074
static Buffer
1075
alphacalcS0(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1076
{
1077
	Buffer obdst;
1078
	int i;
1079
 
1080
	USED(op);
1081
	obdst = bdst;
1082
	if(bsrc.delta == bdst.delta){
1083
		memmove(bdst.rgba, bsrc.rgba, dx*bdst.delta);
1084
		return obdst;
1085
	}
1086
	for(i=0; i<dx; i++){
1087
		if(grey){
1088
			*bdst.grey = *bsrc.grey;
1089
			bsrc.grey += bsrc.delta;
1090
			bdst.grey += bdst.delta;
1091
		}else{
1092
			*bdst.red = *bsrc.red;
1093
			*bdst.grn = *bsrc.grn;
1094
			*bdst.blu = *bsrc.blu;
1095
			bsrc.red += bsrc.delta;
1096
			bsrc.blu += bsrc.delta;
1097
			bsrc.grn += bsrc.delta;
1098
			bdst.red += bdst.delta;
1099
			bdst.blu += bdst.delta;
1100
			bdst.grn += bdst.delta;
1101
		}
1102
		if(bdst.alpha != &ones){
1103
			*bdst.alpha = 255;
1104
			bdst.alpha += bdst.delta;
1105
		}
1106
	}
1107
	return obdst;
1108
}
1109
*/
1110
 
1111
/* source alpha 1 */
1112
static Buffer
1113
alphacalcS(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1114
{
1115
	Buffer obdst;
1116
	int fd;
1117
	int i, ma;
1118
	ulong t;
1119
 
1120
	USED(op);
1121
	obdst = bdst;
1122
 
1123
	for(i=0; i<dx; i++){
1124
		ma = *bmask.alpha;
1125
		fd = 255-ma;
1126
 
1127
		if(grey){
1128
			*bdst.grey = CALC12(ma, *bsrc.grey, fd, *bdst.grey, t);
1129
			bsrc.grey += bsrc.delta;
1130
			bdst.grey += bdst.delta;
1131
		}else{
1132
			*bdst.red = CALC12(ma, *bsrc.red, fd, *bdst.red, t);
1133
			*bdst.grn = CALC12(ma, *bsrc.grn, fd, *bdst.grn, t);
1134
			*bdst.blu = CALC12(ma, *bsrc.blu, fd, *bdst.blu, t);
1135
			bsrc.red += bsrc.delta;
1136
			bsrc.blu += bsrc.delta;
1137
			bsrc.grn += bsrc.delta;
1138
			bdst.red += bdst.delta;
1139
			bdst.blu += bdst.delta;
1140
			bdst.grn += bdst.delta;
1141
		}
1142
		if(bdst.alpha != &ones){
1143
			*bdst.alpha = ma+CALC11(fd, *bdst.alpha, t);
1144
			bdst.alpha += bdst.delta;
1145
		}
1146
		bmask.alpha += bmask.delta;
1147
	}
1148
	return obdst;
1149
}
1150
 
1151
static Buffer
1152
boolcalc14(Buffer bdst, Buffer b1, Buffer bmask, int dx, int grey, int op)
1153
{
1154
	Buffer obdst;
1155
	int i, ma, zero;
1156
 
1157
	obdst = bdst;
1158
 
1159
	for(i=0; i<dx; i++){
1160
		ma = *bmask.alpha;
1161
		zero = ma ? op == DoutS : op == DinS;
1162
 
1163
		if(grey){
1164
			if(zero)
1165
				*bdst.grey = 0;
1166
			bdst.grey += bdst.delta;
1167
		}else{
1168
			if(zero)
1169
				*bdst.red = *bdst.grn = *bdst.blu = 0;
1170
			bdst.red += bdst.delta;
1171
			bdst.blu += bdst.delta;
1172
			bdst.grn += bdst.delta;
1173
		}
1174
		bmask.alpha += bmask.delta;
1175
		if(bdst.alpha != &ones){
1176
			if(zero)
1177
				*bdst.alpha = 0;
1178
			bdst.alpha += bdst.delta;
1179
		}
1180
	}
1181
	return obdst;
1182
}
1183
 
1184
static Buffer
1185
boolcalc236789(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1186
{
1187
	Buffer obdst;
1188
	int fs, fd;
1189
	int i, ma, da, zero;
1190
	ulong t;
1191
 
1192
	obdst = bdst;
1193
	zero = !(op&1);
1194
 
1195
	for(i=0; i<dx; i++){
1196
		ma = *bmask.alpha;
1197
		da = *bdst.alpha;
1198
		fs = da;
1199
		if(op&2)
1200
			fs = 255-da;
1201
		fd = 0;
1202
		if(op&4)
1203
			fd = 255;
1204
 
1205
		if(grey){
1206
			if(ma)
1207
				*bdst.grey = CALC12(fs, *bsrc.grey, fd, *bdst.grey, t);
1208
			else if(zero)
1209
				*bdst.grey = 0;
1210
			bsrc.grey += bsrc.delta;
1211
			bdst.grey += bdst.delta;
1212
		}else{
1213
			if(ma){
1214
				*bdst.red = CALC12(fs, *bsrc.red, fd, *bdst.red, t);
1215
				*bdst.grn = CALC12(fs, *bsrc.grn, fd, *bdst.grn, t);
1216
				*bdst.blu = CALC12(fs, *bsrc.blu, fd, *bdst.blu, t);
1217
			}
1218
			else if(zero)
1219
				*bdst.red = *bdst.grn = *bdst.blu = 0;
1220
			bsrc.red += bsrc.delta;
1221
			bsrc.blu += bsrc.delta;
1222
			bsrc.grn += bsrc.delta;
1223
			bdst.red += bdst.delta;
1224
			bdst.blu += bdst.delta;
1225
			bdst.grn += bdst.delta;
1226
		}
1227
		bmask.alpha += bmask.delta;
1228
		if(bdst.alpha != &ones){
1229
			if(ma)
1230
				*bdst.alpha = fs+CALC11(fd, da, t);
1231
			else if(zero)
1232
				*bdst.alpha = 0;
1233
			bdst.alpha += bdst.delta;
1234
		}
1235
	}
1236
	return obdst;
1237
}
1238
 
1239
static Buffer
1240
boolcalc1011(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1241
{
1242
	Buffer obdst;
1243
	int i, ma, zero;
1244
 
1245
	obdst = bdst;
1246
	zero = !(op&1);
1247
 
1248
	for(i=0; i<dx; i++){
1249
		ma = *bmask.alpha;
1250
 
1251
		if(grey){
1252
			if(ma)
1253
				*bdst.grey = *bsrc.grey;
1254
			else if(zero)
1255
				*bdst.grey = 0;
1256
			bsrc.grey += bsrc.delta;
1257
			bdst.grey += bdst.delta;
1258
		}else{
1259
			if(ma){
1260
				*bdst.red = *bsrc.red;
1261
				*bdst.grn = *bsrc.grn;
1262
				*bdst.blu = *bsrc.blu;
1263
			}
1264
			else if(zero)
1265
				*bdst.red = *bdst.grn = *bdst.blu = 0;
1266
			bsrc.red += bsrc.delta;
1267
			bsrc.blu += bsrc.delta;
1268
			bsrc.grn += bsrc.delta;
1269
			bdst.red += bdst.delta;
1270
			bdst.blu += bdst.delta;
1271
			bdst.grn += bdst.delta;
1272
		}
1273
		bmask.alpha += bmask.delta;
1274
		if(bdst.alpha != &ones){
1275
			if(ma)
1276
				*bdst.alpha = 255;
1277
			else if(zero)
1278
				*bdst.alpha = 0;
1279
			bdst.alpha += bdst.delta;
1280
		}
1281
	}
1282
	return obdst;
1283
}
1284
/*
1285
 * Replicated cached scan line read.  Call the function listed in the Param,
1286
 * but cache the result so that for replicated images we only do the work once.
1287
 */
1288
static Buffer
1289
replread(Param *p, uchar *s, int y)
1290
{
1291
	Buffer *b;
1292
 
1293
	USED(s);
1294
	b = &p->bcache[y];
1295
	if((p->bfilled & (1<<y)) == 0){
1296
		p->bfilled |= 1<<y;
1297
		*b = p->replcall(p, p->bufbase+y*p->bufdelta, y);
1298
	}
1299
	return *b;
1300
}
1301
 
1302
/*
1303
 * Alpha reading function that simply relabels the grey pointer.
1304
 */
1305
static Buffer
1306
greymaskread(Param *p, uchar *buf, int y)
1307
{
1308
	Buffer b;
1309
 
1310
	b = p->greymaskcall(p, buf, y);
1311
	b.alpha = b.grey;
1312
	return b;
1313
}
1314
 
1315
#define DBG if(0)
1316
static Buffer
1317
readnbit(Param *p, uchar *buf, int y)
1318
{
1319
	Buffer b;
1320
	Memimage *img;
1321
	uchar *repl, *r, *w, *ow, bits;
1322
	int i, n, sh, depth, x, dx, npack, nbits;
1323
 
1324
	b.rgba = (ulong*)buf;
1325
	b.grey = w = buf;
1326
	b.red = b.blu = b.grn = w;
1327
	b.alpha = &ones;
1328
	b.delta = 1;
1329
 
1330
	dx = p->dx;
1331
	img = p->img;
1332
	depth = img->depth;
1333
	repl = &replbit[depth][0];
1334
	npack = 8/depth;
1335
	sh = 8-depth;
1336
 
1337
	/* copy from p->r.min.x until end of repl rectangle */
1338
	x = p->r.min.x;
1339
	n = dx;
1340
	if(n > p->img->r.max.x - x)
1341
		n = p->img->r.max.x - x;
1342
 
1343
	r = p->bytermin + y*p->bwidth;
1344
DBG print("readnbit dx %d %p=%p+%d*%d, *r=%d fetch %d ", dx, r, p->bytermin, y, p->bwidth, *r, n);
1345
	bits = *r++;
1346
	nbits = 8;
1347
	if((i=x&(npack-1))){
1348
DBG print("throwaway %d...", i);
1349
		bits <<= depth*i;
1350
		nbits -= depth*i;
1351
	}
1352
	for(i=0; i<n; i++){
1353
		if(nbits == 0){
1354
DBG print("(%.2ux)...", *r);
1355
			bits = *r++;
1356
			nbits = 8;
1357
		}
1358
		*w++ = repl[bits>>sh];
1359
DBG print("bit %x...", repl[bits>>sh]);
1360
		bits <<= depth;
1361
		nbits -= depth;
1362
	}
1363
	dx -= n;
1364
	if(dx == 0)
1365
		return b;
1366
 
1367
	assert(x+i == p->img->r.max.x);
1368
 
1369
	/* copy from beginning of repl rectangle until where we were before. */
1370
	x = p->img->r.min.x;
1371
	n = dx;
1372
	if(n > p->r.min.x - x)
1373
		n = p->r.min.x - x;
1374
 
1375
	r = p->bytey0s + y*p->bwidth;
1376
DBG print("x=%d r=%p...", x, r);
1377
	bits = *r++;
1378
	nbits = 8;
1379
	if((i=x&(npack-1))){
1380
		bits <<= depth*i;
1381
		nbits -= depth*i;
1382
	}
1383
DBG print("nbits=%d...", nbits);
1384
	for(i=0; i<n; i++){
1385
		if(nbits == 0){
1386
			bits = *r++;
1387
			nbits = 8;
1388
		}
1389
		*w++ = repl[bits>>sh];
1390
DBG print("bit %x...", repl[bits>>sh]);
1391
		bits <<= depth;
1392
		nbits -= depth;
1393
DBG print("bits %x nbits %d...", bits, nbits);
1394
	}
1395
	dx -= n;
1396
	if(dx == 0)
1397
		return b;
1398
 
1399
	assert(dx > 0);
1400
	/* now we have exactly one full scan line: just replicate the buffer itself until we are done */
1401
	ow = buf;
1402
	while(dx--)
1403
		*w++ = *ow++;
1404
 
1405
	return b;
1406
}
1407
#undef DBG
1408
 
1409
#define DBG if(0)
1410
static void
1411
writenbit(Param *p, uchar *w, Buffer src)
1412
{
1413
	uchar *r;
1414
	ulong bits;
1415
	int i, sh, depth, npack, nbits, x, ex;
1416
 
1417
	assert(src.grey != nil && src.delta == 1);
1418
 
1419
	x = p->r.min.x;
1420
	ex = x+p->dx;
1421
	depth = p->img->depth;
1422
	npack = 8/depth;
1423
 
1424
	i=x&(npack-1);
1425
	bits = i ? (*w >> (8-depth*i)) : 0;
1426
	nbits = depth*i;
1427
	sh = 8-depth;
1428
	r = src.grey;
1429
 
1430
	for(; x<ex; x++){
1431
		bits <<= depth;
1432
DBG print(" %x", *r);
1433
		bits |= (*r++ >> sh);
1434
		nbits += depth;
1435
		if(nbits == 8){
1436
			*w++ = bits;
1437
			nbits = 0;
1438
		}
1439
	}
1440
 
1441
	if(nbits){
1442
		sh = 8-nbits;
1443
		bits <<= sh;
1444
		bits |= *w & ((1<<sh)-1);
1445
		*w = bits;
1446
	}
1447
DBG print("\n");
1448
	return;
1449
}
1450
#undef DBG
1451
 
1452
static Buffer
1453
readcmap(Param *p, uchar *buf, int y)
1454
{
1455
	Buffer b;
1456
	int a, convgrey, copyalpha, dx, i, m;
1457
	uchar *q, *cmap, *begin, *end, *r, *w;
1458
 
1459
	begin = p->bytey0s + y*p->bwidth;
1460
	r = p->bytermin + y*p->bwidth;
1461
	end = p->bytey0e + y*p->bwidth;
1462
	cmap = p->img->cmap->cmap2rgb;
1463
	convgrey = p->convgrey;
1464
	copyalpha = (p->img->flags&Falpha) ? 1 : 0;
1465
 
1466
	w = buf;
1467
	dx = p->dx;
1468
	if(copyalpha){
1469
		b.alpha = buf++;
1470
		a = p->img->shift[CAlpha]/8;
1471
		m = p->img->shift[CMap]/8;
1472
		for(i=0; i<dx; i++){
1473
			*w++ = r[a];
1474
			q = cmap+r[m]*3;
1475
			r += 2;
1476
			if(r == end)
1477
				r = begin;
1478
			if(convgrey){
1479
				*w++ = RGB2K(q[0], q[1], q[2]);
1480
			}else{
1481
				*w++ = q[2];	/* blue */
1482
				*w++ = q[1];	/* green */
1483
				*w++ = q[0];	/* red */
1484
			}
1485
		}
1486
	}else{
1487
		b.alpha = &ones;
1488
		for(i=0; i<dx; i++){
1489
			q = cmap+*r++*3;
1490
			if(r == end)
1491
				r = begin;
1492
			if(convgrey){
1493
				*w++ = RGB2K(q[0], q[1], q[2]);
1494
			}else{
1495
				*w++ = q[2];	/* blue */
1496
				*w++ = q[1];	/* green */
1497
				*w++ = q[0];	/* red */
1498
			}
1499
		}
1500
	}
1501
 
1502
	b.rgba = (ulong*)(buf-copyalpha);
1503
 
1504
	if(convgrey){
1505
		b.grey = buf;
1506
		b.red = b.blu = b.grn = buf;
1507
		b.delta = 1+copyalpha;
1508
	}else{
1509
		b.blu = buf;
1510
		b.grn = buf+1;
1511
		b.red = buf+2;
1512
		b.grey = nil;
1513
		b.delta = 3+copyalpha;
1514
	}
1515
	return b;
1516
}
1517
 
1518
static void
1519
writecmap(Param *p, uchar *w, Buffer src)
1520
{
1521
	uchar *cmap, *red, *grn, *blu;
1522
	int i, dx, delta;
1523
 
1524
	cmap = p->img->cmap->rgb2cmap;
1525
 
1526
	delta = src.delta;
1527
	red= src.red;
1528
	grn = src.grn;
1529
	blu = src.blu;
1530
 
1531
	dx = p->dx;
1532
	for(i=0; i<dx; i++, red+=delta, grn+=delta, blu+=delta)
1533
		*w++ = cmap[(*red>>4)*256+(*grn>>4)*16+(*blu>>4)];
1534
}
1535
 
1536
#define DBG if(0)
1537
static Buffer
1538
readbyte(Param *p, uchar *buf, int y)
1539
{
1540
	Buffer b;
1541
	Memimage *img;
1542
	int dx, isgrey, convgrey, alphaonly, copyalpha, i, nb;
1543
	uchar *begin, *end, *r, *w, *rrepl, *grepl, *brepl, *arepl, *krepl;
1544
	uchar ured, ugrn, ublu;
1545
	ulong u;
1546
 
1547
	img = p->img;
1548
	begin = p->bytey0s + y*p->bwidth;
1549
	r = p->bytermin + y*p->bwidth;
1550
	end = p->bytey0e + y*p->bwidth;
1551
 
1552
	w = buf;
1553
	dx = p->dx;
1554
	nb = img->depth/8;
1555
 
1556
	convgrey = p->convgrey;	/* convert rgb to grey */
1557
	isgrey = img->flags&Fgrey;
1558
	alphaonly = p->alphaonly;
1559
	copyalpha = (img->flags&Falpha) ? 1 : 0;
1560
 
1561
DBG print("copyalpha %d alphaonly %d convgrey %d isgrey %d\n", copyalpha, alphaonly, convgrey, isgrey);
1562
	/* if we can, avoid processing everything */
1563
	if(!(img->flags&Frepl) && !convgrey && (img->flags&Fbytes)){
1564
		memset(&b, 0, sizeof b);
1565
		if(p->needbuf){
1566
			memmove(buf, r, dx*nb);
1567
			r = buf;
1568
		}
1569
		b.rgba = (ulong*)r;
1570
		if(copyalpha)
1571
			b.alpha = r+img->shift[CAlpha]/8;
1572
		else
1573
			b.alpha = &ones;
1574
		if(isgrey){
1575
			b.grey = r+img->shift[CGrey]/8;
1576
			b.red = b.grn = b.blu = b.grey;
1577
		}else{
1578
			b.red = r+img->shift[CRed]/8;
1579
			b.grn = r+img->shift[CGreen]/8;
1580
			b.blu = r+img->shift[CBlue]/8;
1581
		}
1582
		b.delta = nb;
1583
		return b;
1584
	}
1585
 
1586
DBG print("2\n");
1587
	rrepl = replbit[img->nbits[CRed]];
1588
	grepl = replbit[img->nbits[CGreen]];
1589
	brepl = replbit[img->nbits[CBlue]];
1590
	arepl = replbit[img->nbits[CAlpha]];
1591
	krepl = replbit[img->nbits[CGrey]];
1592
 
1593
	for(i=0; i<dx; i++){
1594
		u = r[0] | (r[1]<<8) | (r[2]<<16) | (r[3]<<24);
1595
		if(copyalpha) {
1596
			*w++ = arepl[(u>>img->shift[CAlpha]) & img->mask[CAlpha]];
1597
DBG print("a %x\n", w[-1]);
1598
		}
1599
 
1600
		if(isgrey)
1601
			*w++ = krepl[(u >> img->shift[CGrey]) & img->mask[CGrey]];
1602
		else if(!alphaonly){
1603
			ured = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
1604
			ugrn = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
1605
			ublu = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
1606
			if(convgrey){
1607
DBG print("g %x %x %x\n", ured, ugrn, ublu);
1608
				*w++ = RGB2K(ured, ugrn, ublu);
1609
DBG print("%x\n", w[-1]);
1610
			}else{
1611
				*w++ = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
1612
				*w++ = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
1613
				*w++ = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
1614
			}
1615
		}
1616
		r += nb;
1617
		if(r == end)
1618
			r = begin;
1619
	}
1620
 
1621
	b.alpha = copyalpha ? buf : &ones;
1622
	b.rgba = (ulong*)buf;
1623
	if(alphaonly){
1624
		b.red = b.grn = b.blu = b.grey = nil;
1625
		if(!copyalpha)
1626
			b.rgba = nil;
1627
		b.delta = 1;
1628
	}else if(isgrey || convgrey){
1629
		b.grey = buf+copyalpha;
1630
		b.red = b.grn = b.blu = buf+copyalpha;
1631
		b.delta = copyalpha+1;
1632
DBG print("alpha %x grey %x\n", b.alpha ? *b.alpha : 0xFF, *b.grey);
1633
	}else{
1634
		b.blu = buf+copyalpha;
1635
		b.grn = buf+copyalpha+1;
1636
		b.grey = nil;
1637
		b.red = buf+copyalpha+2;
1638
		b.delta = copyalpha+3;
1639
	}
1640
	return b;
1641
}
1642
#undef DBG
1643
 
1644
#define DBG if(0)
1645
static void
1646
writebyte(Param *p, uchar *w, Buffer src)
1647
{
1648
	Memimage *img;
1649
	int i, isalpha, isgrey, nb, delta, dx, adelta;
1650
	uchar ff, *red, *grn, *blu, *grey, *alpha;
1651
	ulong u, mask;
1652
 
1653
	img = p->img;
1654
 
1655
	red = src.red;
1656
	grn = src.grn;
1657
	blu = src.blu;
1658
	alpha = src.alpha;
1659
	delta = src.delta;
1660
	grey = src.grey;
1661
	dx = p->dx;
1662
 
1663
	nb = img->depth/8;
1664
	mask = (nb==4) ? 0 : ~((1<<img->depth)-1);
1665
 
1666
	isalpha = img->flags&Falpha;
1667
	isgrey = img->flags&Fgrey;
1668
	adelta = src.delta;
1669
 
1670
	if(isalpha && (alpha == nil || alpha == &ones)){
1671
		ff = 0xFF;
1672
		alpha = &ff;
1673
		adelta = 0;
1674
	}
1675
 
1676
	for(i=0; i<dx; i++){
1677
		u = w[0] | (w[1]<<8) | (w[2]<<16) | (w[3]<<24);
1678
DBG print("u %.8lux...", u);
1679
		u &= mask;
1680
DBG print("&mask %.8lux...", u);
1681
		if(isgrey){
1682
			u |= ((*grey >> (8-img->nbits[CGrey])) & img->mask[CGrey]) << img->shift[CGrey];
1683
DBG print("|grey %.8lux...", u);
1684
			grey += delta;
1685
		}else{
1686
			u |= ((*red >> (8-img->nbits[CRed])) & img->mask[CRed]) << img->shift[CRed];
1687
			u |= ((*grn >> (8-img->nbits[CGreen])) & img->mask[CGreen]) << img->shift[CGreen];
1688
			u |= ((*blu >> (8-img->nbits[CBlue])) & img->mask[CBlue]) << img->shift[CBlue];
1689
			red += delta;
1690
			grn += delta;
1691
			blu += delta;
1692
DBG print("|rgb %.8lux...", u);
1693
		}
1694
 
1695
		if(isalpha){
1696
			u |= ((*alpha >> (8-img->nbits[CAlpha])) & img->mask[CAlpha]) << img->shift[CAlpha];
1697
			alpha += adelta;
1698
DBG print("|alpha %.8lux...", u);
1699
		}
1700
 
1701
		w[0] = u;
1702
		w[1] = u>>8;
1703
		w[2] = u>>16;
1704
		w[3] = u>>24;
1705
		w += nb;
1706
	}
1707
}
1708
#undef DBG
1709
 
1710
static Readfn*
1711
readfn(Memimage *img)
1712
{
1713
	if(img->depth < 8)
1714
		return readnbit;
1715
	if(img->nbits[CMap] == 8)
1716
		return readcmap;
1717
	return readbyte;
1718
}
1719
 
1720
static Readfn*
1721
readalphafn(Memimage *m)
1722
{
1723
	USED(m);
1724
	return readbyte;
1725
}
1726
 
1727
static Writefn*
1728
writefn(Memimage *img)
1729
{
1730
	if(img->depth < 8)
1731
		return writenbit;
1732
	if(img->chan == CMAP8)
1733
		return writecmap;
1734
	return writebyte;
1735
}
1736
 
1737
static void
1738
nullwrite(Param *p, uchar *s, Buffer b)
1739
{
1740
	USED(p);
1741
	USED(s);
1742
}
1743
 
1744
static Buffer
1745
readptr(Param *p, uchar *s, int y)
1746
{
1747
	Buffer b;
1748
	uchar *q;
1749
 
1750
	USED(s);
1751
	q = p->bytermin + y*p->bwidth;
1752
	b.red = q;	/* ptr to data */
1753
	b.grn = b.blu = b.grey = b.alpha = nil;
1754
	b.rgba = (ulong*)q;
1755
	b.delta = p->img->depth/8;
1756
	return b;
1757
}
1758
 
1759
static Buffer
1760
boolmemmove(Buffer bdst, Buffer bsrc, Buffer b1, int dx, int i, int o)
1761
{
1762
	USED(i);
1763
	USED(o);
1764
	memmove(bdst.red, bsrc.red, dx*bdst.delta);
1765
	return bdst;
1766
}
1767
 
1768
static Buffer
1769
boolcopy8(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1770
{
1771
	uchar *m, *r, *w, *ew;
1772
 
1773
	USED(i);
1774
	USED(o);
1775
	m = bmask.grey;
1776
	w = bdst.red;
1777
	r = bsrc.red;
1778
	ew = w+dx;
1779
	for(; w < ew; w++,r++)
1780
		if(*m++)
1781
			*w = *r;
1782
	return bdst;	/* not used */
1783
}
1784
 
1785
static Buffer
1786
boolcopy16(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1787
{
1788
	uchar *m;
1789
	ushort *r, *w, *ew;
1790
 
1791
	USED(i);
1792
	USED(o);
1793
	m = bmask.grey;
1794
	w = (ushort*)bdst.red;
1795
	r = (ushort*)bsrc.red;
1796
	ew = w+dx;
1797
	for(; w < ew; w++,r++)
1798
		if(*m++)
1799
			*w = *r;
1800
	return bdst;	/* not used */
1801
}
1802
 
1803
static Buffer
1804
boolcopy24(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1805
{
1806
	uchar *m;
1807
	uchar *r, *w, *ew;
1808
 
1809
	USED(i);
1810
	USED(o);
1811
	m = bmask.grey;
1812
	w = bdst.red;
1813
	r = bsrc.red;
1814
	ew = w+dx*3;
1815
	while(w < ew){
1816
		if(*m++){
1817
			*w++ = *r++;
1818
			*w++ = *r++;
1819
			*w++ = *r++;
1820
		}else{
1821
			w += 3;
1822
			r += 3;
1823
		}
1824
	}
1825
	return bdst;	/* not used */
1826
}
1827
 
1828
static Buffer
1829
boolcopy32(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1830
{
1831
	uchar *m;
1832
	ulong *r, *w, *ew;
1833
 
1834
	USED(i);
1835
	USED(o);
1836
	m = bmask.grey;
1837
	w = (ulong*)bdst.red;
1838
	r = (ulong*)bsrc.red;
1839
	ew = w+dx;
1840
	for(; w < ew; w++,r++)
1841
		if(*m++)
1842
			*w = *r;
1843
	return bdst;	/* not used */
1844
}
1845
 
1846
static Buffer
1847
genconv(Param *p, uchar *buf, int y)
1848
{
1849
	Buffer b;
1850
	int nb;
1851
	uchar *r, *w, *ew;
1852
 
1853
	/* read from source into RGB format in convbuf */
1854
	b = p->convreadcall(p, p->convbuf, y);
1855
 
1856
	/* write RGB format into dst format in buf */
1857
	p->convwritecall(p->convdpar, buf, b);
1858
 
1859
	if(p->convdx){
1860
		nb = p->convdpar->img->depth/8;
1861
		r = buf;
1862
		w = buf+nb*p->dx;
1863
		ew = buf+nb*p->convdx;
1864
		while(w<ew)
1865
			*w++ = *r++;
1866
	}
1867
 
1868
	b.red = buf;
1869
	b.blu = b.grn = b.grey = b.alpha = nil;
1870
	b.rgba = (ulong*)buf;
1871
	b.delta = 0;
1872
 
1873
	return b;
1874
}
1875
 
1876
static Readfn*
1877
convfn(Memimage *dst, Param *dpar, Memimage *src, Param *spar)
1878
{
1879
	if(dst->chan == src->chan && !(src->flags&Frepl)){
1880
//if(drawdebug) iprint("readptr...");
1881
		return readptr;
1882
	}
1883
 
1884
	if(dst->chan==CMAP8 && (src->chan==GREY1||src->chan==GREY2||src->chan==GREY4)){
1885
		/* cheat because we know the replicated value is exactly the color map entry. */
1886
//if(drawdebug) iprint("Readnbit...");
1887
		return readnbit;
1888
	}
1889
 
1890
	spar->convreadcall = readfn(src);
1891
	spar->convwritecall = writefn(dst);
1892
	spar->convdpar = dpar;
1893
 
1894
	/* allocate a conversion buffer */
1895
	spar->convbufoff = ndrawbuf;
1896
	ndrawbuf += spar->dx*4;
1897
 
1898
	if(spar->dx > Dx(spar->img->r)){
1899
		spar->convdx = spar->dx;
1900
		spar->dx = Dx(spar->img->r);
1901
	}
1902
 
1903
//if(drawdebug) iprint("genconv...");
1904
	return genconv;
1905
}
1906
 
1907
ulong
1908
_pixelbits(Memimage *i, Point pt)
1909
{
1910
	uchar *p;
1911
	ulong val;
1912
	int off, bpp, npack;
1913
 
1914
	val = 0;
1915
	p = byteaddr(i, pt);
1916
	switch(bpp=i->depth){
1917
	case 1:
1918
	case 2:
1919
	case 4:
1920
		npack = 8/bpp;
1921
		off = pt.x%npack;
1922
		val = p[0] >> bpp*(npack-1-off);
1923
		val &= (1<<bpp)-1;
1924
		break;
1925
	case 8:
1926
		val = p[0];
1927
		break;
1928
	case 16:
1929
		val = p[0]|(p[1]<<8);
1930
		break;
1931
	case 24:
1932
		val = p[0]|(p[1]<<8)|(p[2]<<16);
1933
		break;
1934
	case 32:
1935
		val = p[0]|(p[1]<<8)|(p[2]<<16)|(p[3]<<24);
1936
		break;
1937
	}
1938
	while(bpp<32){
1939
		val |= val<<bpp;
1940
		bpp *= 2;
1941
	}
1942
	return val;
1943
}
1944
 
1945
static Calcfn*
1946
boolcopyfn(Memimage *img, Memimage *mask)
1947
{
1948
	if(mask->flags&Frepl && Dx(mask->r)==1 && Dy(mask->r)==1 && pixelbits(mask, mask->r.min)==~0)
1949
		return boolmemmove;
1950
 
1951
	switch(img->depth){
1952
	case 8:
1953
		return boolcopy8;
1954
	case 16:
1955
		return boolcopy16;
1956
	case 24:
1957
		return boolcopy24;
1958
	case 32:
1959
		return boolcopy32;
1960
	default:
1961
		assert(0 /* boolcopyfn */);
1962
	}
1963
	return nil;
1964
}
1965
 
1966
/*
1967
 * Optimized draw for filling and scrolling; uses memset and memmove.
1968
 *
1969
static void
1970
memsetb(void *vp, uchar val, int n)
1971
{
1972
	uchar *p, *ep;
1973
 
1974
	p = vp;
1975
	ep = p+n;
1976
	while(p<ep)
1977
		*p++ = val;
1978
}
1979
*/
1980
 
1981
static void
1982
memsets(void *vp, ushort val, int n)
1983
{
1984
	ushort *p, *ep;
1985
 
1986
	p = vp;
1987
	ep = p+n;
1988
	while(p<ep)
1989
		*p++ = val;
1990
}
1991
 
1992
static void
1993
memsetl(void *vp, ulong val, int n)
1994
{
1995
	ulong *p, *ep;
1996
 
1997
	p = vp;
1998
	ep = p+n;
1999
	while(p<ep)
2000
		*p++ = val;
2001
}
2002
 
2003
static void
2004
memset24(void *vp, ulong val, int n)
2005
{
2006
	uchar *p, *ep;
2007
	uchar a,b,c;
2008
 
2009
	p = vp;
2010
	ep = p+3*n;
2011
	a = val;
2012
	b = val>>8;
2013
	c = val>>16;
2014
	while(p<ep){
2015
		*p++ = a;
2016
		*p++ = b;
2017
		*p++ = c;
2018
	}
2019
}
2020
 
2021
ulong
2022
_imgtorgba(Memimage *img, ulong val)
2023
{
2024
	uchar r, g, b, a;
2025
	int nb, ov, v;
2026
	ulong chan;
2027
	uchar *p;
2028
 
2029
	a = 0xFF;
2030
	r = g = b = 0xAA;	/* garbage */
2031
	for(chan=img->chan; chan; chan>>=8){
2032
		nb = NBITS(chan);
2033
		ov = v = val&((1<<nb)-1);
2034
		val >>= nb;
2035
 
2036
		while(nb < 8){
2037
			v |= v<<nb;
2038
			nb *= 2;
2039
		}
2040
		v >>= (nb-8);
2041
 
2042
		switch(TYPE(chan)){
2043
		case CRed:
2044
			r = v;
2045
			break;
2046
		case CGreen:
2047
			g = v;
2048
			break;
2049
		case CBlue:
2050
			b = v;
2051
			break;
2052
		case CAlpha:
2053
			a = v;
2054
			break;
2055
		case CGrey:
2056
			r = g = b = v;
2057
			break;
2058
		case CMap:
2059
			p = img->cmap->cmap2rgb+3*ov;
2060
			r = *p++;
2061
			g = *p++;	
2062
			b = *p;
2063
			break;
2064
		}
2065
	}
2066
	return (r<<24)|(g<<16)|(b<<8)|a;	
2067
}
2068
 
2069
ulong
2070
_rgbatoimg(Memimage *img, ulong rgba)
2071
{
2072
	ulong chan;
2073
	int d, nb;
2074
	ulong v;
2075
	uchar *p, r, g, b, a, m;
2076
 
2077
	v = 0;
2078
	r = rgba>>24;
2079
	g = rgba>>16;
2080
	b = rgba>>8;
2081
	a = rgba;
2082
	d = 0;
2083
	for(chan=img->chan; chan; chan>>=8){
2084
		nb = NBITS(chan);
2085
		switch(TYPE(chan)){
2086
		case CRed:
2087
			v |= (r>>(8-nb))<<d;
2088
			break;
2089
		case CGreen:
2090
			v |= (g>>(8-nb))<<d;
2091
			break;
2092
		case CBlue:
2093
			v |= (b>>(8-nb))<<d;
2094
			break;
2095
		case CAlpha:
2096
			v |= (a>>(8-nb))<<d;
2097
			break;
2098
		case CMap:
2099
			p = img->cmap->rgb2cmap;
2100
			m = p[(r>>4)*256+(g>>4)*16+(b>>4)];
2101
			v |= (m>>(8-nb))<<d;
2102
			break;
2103
		case CGrey:
2104
			m = RGB2K(r,g,b);
2105
			v |= (m>>(8-nb))<<d;
2106
			break;
2107
		}
2108
		d += nb;
2109
	}
2110
//	print("rgba2img %.8lux = %.*lux\n", rgba, 2*d/8, v);
2111
	return v;
2112
}
2113
 
2114
#define DBG if(0)
2115
static int
2116
memoptdraw(Memdrawparam *par)
2117
{
2118
	int m, y, dy, dx, op;
2119
	ulong v;
2120
	Memimage *src;
2121
	Memimage *dst;
2122
 
2123
	dx = Dx(par->r);
2124
	dy = Dy(par->r);
2125
	src = par->src;
2126
	dst = par->dst;
2127
	op = par->op;
2128
 
2129
DBG print("state %lux mval %lux dd %d\n", par->state, par->mval, dst->depth);
2130
	/*
2131
	 * If we have an opaque mask and source is one opaque pixel we can convert to the
2132
	 * destination format and just replicate with memset.
2133
	 */
2134
	m = Simplesrc|Simplemask|Fullmask;
2135
	if((par->state&m)==m && (par->srgba&0xFF) == 0xFF && (op ==S || op == SoverD)){
2136
		uchar *dp, p[4];
2137
		int d, dwid, ppb, np, nb;
2138
		uchar lm, rm;
2139
 
2140
DBG print("memopt, dst %p, dst->data->bdata %p\n", dst, dst->data->bdata);
2141
		dwid = dst->width*sizeof(ulong);
2142
		dp = byteaddr(dst, par->r.min);
2143
		v = par->sdval;
2144
DBG print("sdval %lud, depth %d\n", v, dst->depth);
2145
		switch(dst->depth){
2146
		case 1:
2147
		case 2:
2148
		case 4:
2149
			for(d=dst->depth; d<8; d*=2)
2150
				v |= (v<<d);
2151
			ppb = 8/dst->depth;	/* pixels per byte */
2152
			m = ppb-1;
2153
			/* left edge */
2154
			np = par->r.min.x&m;		/* no. pixels unused on left side of word */
2155
			dx -= (ppb-np);
2156
			nb = 8 - np * dst->depth;		/* no. bits used on right side of word */
2157
			lm = (1<<nb)-1;
2158
DBG print("np %d x %d nb %d lm %ux ppb %d m %ux\n", np, par->r.min.x, nb, lm, ppb, m);	
2159
 
2160
			/* right edge */
2161
			np = par->r.max.x&m;	/* no. pixels used on left side of word */
2162
			dx -= np;
2163
			nb = 8 - np * dst->depth;		/* no. bits unused on right side of word */
2164
			rm = ~((1<<nb)-1);
2165
DBG print("np %d x %d nb %d rm %ux ppb %d m %ux\n", np, par->r.max.x, nb, rm, ppb, m);	
2166
 
2167
DBG print("dx %d Dx %d\n", dx, Dx(par->r));
2168
			/* lm, rm are masks that are 1 where we should touch the bits */
2169
			if(dx < 0){	/* just one byte */
2170
				lm &= rm;
2171
				for(y=0; y<dy; y++, dp+=dwid)
2172
					*dp ^= (v ^ *dp) & lm;
2173
			}else if(dx == 0){	/* no full bytes */
2174
				if(lm)
2175
					dwid--;
2176
 
2177
				for(y=0; y<dy; y++, dp+=dwid){
2178
					if(lm){
2179
DBG print("dp %p v %lux lm %ux (v ^ *dp) & lm %lux\n", dp, v, lm, (v^*dp)&lm);
2180
						*dp ^= (v ^ *dp) & lm;
2181
						dp++;
2182
					}
2183
					*dp ^= (v ^ *dp) & rm;
2184
				}
2185
			}else{		/* full bytes in middle */
2186
				dx /= ppb;
2187
				if(lm)
2188
					dwid--;
2189
				dwid -= dx;
2190
 
2191
				for(y=0; y<dy; y++, dp+=dwid){
2192
					if(lm){
2193
						*dp ^= (v ^ *dp) & lm;
2194
						dp++;
2195
					}
2196
					memset(dp, v, dx);
2197
					dp += dx;
2198
					*dp ^= (v ^ *dp) & rm;
2199
				}
2200
			}
2201
			return 1;
2202
		case 8:
2203
			for(y=0; y<dy; y++, dp+=dwid)
2204
				memset(dp, v, dx);
2205
			return 1;
2206
		case 16:
2207
			p[0] = v;		/* make little endian */
2208
			p[1] = v>>8;
2209
			v = *(ushort*)p;
2210
DBG print("dp=%p; dx=%d; for(y=0; y<%d; y++, dp+=%d)\nmemsets(dp, v, dx);\n",
2211
	dp, dx, dy, dwid);
2212
			for(y=0; y<dy; y++, dp+=dwid)
2213
				memsets(dp, v, dx);
2214
			return 1;
2215
		case 24:
2216
			for(y=0; y<dy; y++, dp+=dwid)
2217
				memset24(dp, v, dx);
2218
			return 1;
2219
		case 32:
2220
			p[0] = v;		/* make little endian */
2221
			p[1] = v>>8;
2222
			p[2] = v>>16;
2223
			p[3] = v>>24;
2224
			v = *(ulong*)p;
2225
			for(y=0; y<dy; y++, dp+=dwid)
2226
				memsetl(dp, v, dx);
2227
			return 1;
2228
		default:
2229
			assert(0 /* bad dest depth in memoptdraw */);
2230
		}
2231
	}
2232
 
2233
	/*
2234
	 * If no source alpha, an opaque mask, we can just copy the
2235
	 * source onto the destination.  If the channels are the same and
2236
	 * the source is not replicated, memmove suffices.
2237
	 */
2238
	m = Simplemask|Fullmask;
2239
	if((par->state&(m|Replsrc))==m && src->depth >= 8 
2240
	&& src->chan == dst->chan && !(src->flags&Falpha) && (op == S || op == SoverD)){
2241
		uchar *sp, *dp;
2242
		long swid, dwid, nb;
2243
		int dir;
2244
 
2245
		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min))
2246
			dir = -1;
2247
		else
2248
			dir = 1;
2249
 
2250
		swid = src->width*sizeof(ulong);
2251
		dwid = dst->width*sizeof(ulong);
2252
		sp = byteaddr(src, par->sr.min);
2253
		dp = byteaddr(dst, par->r.min);
2254
		if(dir == -1){
2255
			sp += (dy-1)*swid;
2256
			dp += (dy-1)*dwid;
2257
			swid = -swid;
2258
			dwid = -dwid;
2259
		}
2260
		nb = (dx*src->depth)/8;
2261
		for(y=0; y<dy; y++, sp+=swid, dp+=dwid)
2262
			memmove(dp, sp, nb);
2263
		return 1;
2264
	}
2265
 
2266
	/*
2267
	 * If we have a 1-bit mask, 1-bit source, and 1-bit destination, and
2268
	 * they're all bit aligned, we can just use bit operators.  This happens
2269
	 * when we're manipulating boolean masks, e.g. in the arc code.
2270
	 */
2271
	if((par->state&(Simplemask|Simplesrc|Replmask|Replsrc))==0 
2272
	&& dst->chan==GREY1 && src->chan==GREY1 && par->mask->chan==GREY1 
2273
	&& (par->r.min.x&7)==(par->sr.min.x&7) && (par->r.min.x&7)==(par->mr.min.x&7)){
2274
		uchar *sp, *dp, *mp;
2275
		uchar lm, rm;
2276
		long swid, dwid, mwid;
2277
		int i, x, dir;
2278
 
2279
		sp = byteaddr(src, par->sr.min);
2280
		dp = byteaddr(dst, par->r.min);
2281
		mp = byteaddr(par->mask, par->mr.min);
2282
		swid = src->width*sizeof(ulong);
2283
		dwid = dst->width*sizeof(ulong);
2284
		mwid = par->mask->width*sizeof(ulong);
2285
 
2286
		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min)){
2287
			dir = -1;
2288
		}else
2289
			dir = 1;
2290
 
2291
		lm = 0xFF>>(par->r.min.x&7);
2292
		rm = 0xFF<<(8-(par->r.max.x&7));
2293
		dx -= (8-(par->r.min.x&7)) + (par->r.max.x&7);
2294
 
2295
		if(dx < 0){	/* one byte wide */
2296
			lm &= rm;
2297
			if(dir == -1){
2298
				dp += dwid*(dy-1);
2299
				sp += swid*(dy-1);
2300
				mp += mwid*(dy-1);
2301
				dwid = -dwid;
2302
				swid = -swid;
2303
				mwid = -mwid;
2304
			}
2305
			for(y=0; y<dy; y++){
2306
				*dp ^= (*dp ^ *sp) & *mp & lm;
2307
				dp += dwid;
2308
				sp += swid;
2309
				mp += mwid;
2310
			}
2311
			return 1;
2312
		}
2313
 
2314
		dx /= 8;
2315
		if(dir == 1){
2316
			i = (lm!=0)+dx+(rm!=0);
2317
			mwid -= i;
2318
			swid -= i;
2319
			dwid -= i;
2320
			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
2321
				if(lm){
2322
					*dp ^= (*dp ^ *sp++) & *mp++ & lm;
2323
					dp++;
2324
				}
2325
				for(x=0; x<dx; x++){
2326
					*dp ^= (*dp ^ *sp++) & *mp++;
2327
					dp++;
2328
				}
2329
				if(rm){
2330
					*dp ^= (*dp ^ *sp++) & *mp++ & rm;
2331
					dp++;
2332
				}
2333
			}
2334
			return 1;
2335
		}else{
2336
		/* dir == -1 */
2337
			i = (lm!=0)+dx+(rm!=0);
2338
			dp += dwid*(dy-1)+i-1;
2339
			sp += swid*(dy-1)+i-1;
2340
			mp += mwid*(dy-1)+i-1;
2341
			dwid = -dwid+i;
2342
			swid = -swid+i;
2343
			mwid = -mwid+i;
2344
			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
2345
				if(rm){
2346
					*dp ^= (*dp ^ *sp--) & *mp-- & rm;
2347
					dp--;
2348
				}
2349
				for(x=0; x<dx; x++){
2350
					*dp ^= (*dp ^ *sp--) & *mp--;
2351
					dp--;
2352
				}
2353
				if(lm){
2354
					*dp ^= (*dp ^ *sp--) & *mp-- & lm;
2355
					dp--;
2356
				}
2357
			}
2358
		}
2359
		return 1;
2360
	}
2361
	return 0;	
2362
}
2363
#undef DBG
2364
 
2365
/*
2366
 * Boolean character drawing.
2367
 * Solid opaque color through a 1-bit greyscale mask.
2368
 */
2369
#define DBG if(0)
2370
static int
2371
chardraw(Memdrawparam *par)
2372
{
2373
	ulong bits;
2374
	int i, ddepth, dy, dx, x, bx, ex, y, npack, bsh, depth, op;
2375
	ulong v, maskwid, dstwid;
2376
	uchar *wp, *rp, *q, *wc;
2377
	ushort *ws;
2378
	ulong *wl;
2379
	uchar sp[4];
2380
	Rectangle r, mr;
2381
	Memimage *mask, *src, *dst;
2382
 
2383
if(0) if(drawdebug) iprint("chardraw? mf %lux md %d sf %lux dxs %d dys %d dd %d ddat %p sdat %p\n",
2384
		par->mask->flags, par->mask->depth, par->src->flags, 
2385
		Dx(par->src->r), Dy(par->src->r), par->dst->depth, par->dst->data, par->src->data);
2386
 
2387
	mask = par->mask;
2388
	src = par->src;
2389
	dst = par->dst;
2390
	r = par->r;
2391
	mr = par->mr;
2392
	op = par->op;
2393
 
2394
	if((par->state&(Replsrc|Simplesrc|Replmask)) != (Replsrc|Simplesrc)
2395
	|| mask->depth != 1 || src->flags&Falpha || dst->depth<8 || dst->data==src->data
2396
	|| op != SoverD)
2397
		return 0;
2398
 
2399
//if(drawdebug) iprint("chardraw...");
2400
 
2401
	depth = mask->depth;
2402
	maskwid = mask->width*sizeof(ulong);
2403
	rp = byteaddr(mask, mr.min);
2404
	npack = 8/depth;
2405
	bsh = (mr.min.x % npack) * depth;
2406
 
2407
	wp = byteaddr(dst, r.min);
2408
	dstwid = dst->width*sizeof(ulong);
2409
DBG print("bsh %d\n", bsh);
2410
	dy = Dy(r);
2411
	dx = Dx(r);
2412
 
2413
	ddepth = dst->depth;
2414
 
2415
	/*
2416
	 * for loop counts from bsh to bsh+dx
2417
	 *
2418
	 * we want the bottom bits to be the amount
2419
	 * to shift the pixels down, so for n≡0 (mod 8) we want 
2420
	 * bottom bits 7.  for n≡1, 6, etc.
2421
	 * the bits come from -n-1.
2422
	 */
2423
 
2424
	bx = -bsh-1;
2425
	ex = -bsh-1-dx;
2426
	SET(bits);
2427
	v = par->sdval;
2428
 
2429
	/* make little endian */
2430
	sp[0] = v;
2431
	sp[1] = v>>8;
2432
	sp[2] = v>>16;
2433
	sp[3] = v>>24;
2434
 
2435
//print("sp %x %x %x %x\n", sp[0], sp[1], sp[2], sp[3]);
2436
	for(y=0; y<dy; y++, rp+=maskwid, wp+=dstwid){
2437
		q = rp;
2438
		if(bsh)
2439
			bits = *q++;
2440
		switch(ddepth){
2441
		case 8:
2442
//if(drawdebug) iprint("8loop...");
2443
			wc = wp;
2444
			for(x=bx; x>ex; x--, wc++){
2445
				i = x&7;
2446
				if(i == 8-1)
2447
					bits = *q++;
2448
DBG print("bits %lux sh %d...", bits, i);
2449
				if((bits>>i)&1)
2450
					*wc = v;
2451
			}
2452
			break;
2453
		case 16:
2454
			ws = (ushort*)wp;
2455
			v = *(ushort*)sp;
2456
			for(x=bx; x>ex; x--, ws++){
2457
				i = x&7;
2458
				if(i == 8-1)
2459
					bits = *q++;
2460
DBG print("bits %lux sh %d...", bits, i);
2461
				if((bits>>i)&1)
2462
					*ws = v;
2463
			}
2464
			break;
2465
		case 24:
2466
			wc = wp;
2467
			for(x=bx; x>ex; x--, wc+=3){
2468
				i = x&7;
2469
				if(i == 8-1)
2470
					bits = *q++;
2471
DBG print("bits %lux sh %d...", bits, i);
2472
				if((bits>>i)&1){
2473
					wc[0] = sp[0];
2474
					wc[1] = sp[1];
2475
					wc[2] = sp[2];
2476
				}
2477
			}
2478
			break;
2479
		case 32:
2480
			wl = (ulong*)wp;
2481
			v = *(ulong*)sp;
2482
			for(x=bx; x>ex; x--, wl++){
2483
				i = x&7;
2484
				if(i == 8-1)
2485
					bits = *q++;
2486
DBG iprint("bits %lux sh %d...", bits, i);
2487
				if((bits>>i)&1)
2488
					*wl = v;
2489
			}
2490
			break;
2491
		}
2492
	}
2493
 
2494
DBG print("\n");	
2495
	return 1;	
2496
}
2497
#undef DBG
2498
 
2499
 
2500
/*
2501
 * Fill entire byte with replicated (if necessary) copy of source pixel,
2502
 * assuming destination ldepth is >= source ldepth.
2503
 *
2504
 * This code is just plain wrong for >8bpp.
2505
 *
2506
ulong
2507
membyteval(Memimage *src)
2508
{
2509
	int i, val, bpp;
2510
	uchar uc;
2511
 
2512
	unloadmemimage(src, src->r, &uc, 1);
2513
	bpp = src->depth;
2514
	uc <<= (src->r.min.x&(7/src->depth))*src->depth;
2515
	uc &= ~(0xFF>>bpp);
2516
	// pixel value is now in high part of byte. repeat throughout byte 
2517
	val = uc;
2518
	for(i=bpp; i<8; i<<=1)
2519
		val |= val>>i;
2520
	return val;
2521
}
2522
 * 
2523
 */
2524
 
2525
void
2526
_memfillcolor(Memimage *i, ulong val)
2527
{
2528
	ulong bits;
2529
	int d, y;
2530
 
2531
	if(val == DNofill)
2532
		return;
2533
 
2534
	bits = _rgbatoimg(i, val);
2535
	switch(i->depth){
2536
	case 24:	/* 24-bit images suck */
2537
		for(y=i->r.min.y; y<i->r.max.y; y++)
2538
			memset24(byteaddr(i, Pt(i->r.min.x, y)), bits, Dx(i->r));
2539
		break;
2540
	default:	/* 1, 2, 4, 8, 16, 32 */
2541
		for(d=i->depth; d<32; d*=2)
2542
			bits = (bits << d) | bits;
2543
		memsetl(wordaddr(i, i->r.min), bits, i->width*Dy(i->r));
2544
		break;
2545
	}
2546
}
2547