Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include "gc.h"
2
 
3
Reg*
4
rega(void)
5
{
6
	Reg *r;
7
 
8
	r = freer;
9
	if(r == R) {
10
		r = alloc(sizeof(*r));
11
	} else
12
		freer = r->link;
13
 
14
	*r = zreg;
15
	return r;
16
}
17
 
18
int
19
rcmp(const void *a1, const void *a2)
20
{
21
	Rgn *p1, *p2;
22
	int c1, c2;
23
 
24
	p1 = (Rgn*)a1;
25
	p2 = (Rgn*)a2;
26
	c1 = p2->cost;
27
	c2 = p1->cost;
28
	if(c1 -= c2)
29
		return c1;
30
	return p2->varno - p1->varno;
31
}
32
 
33
void
34
regopt(Prog *p)
35
{
36
	Reg *r, *r1, *r2;
37
	Prog *p1;
38
	int i, z;
39
	long initpc, val, npc;
40
	ulong vreg;
41
	Bits bit;
42
	struct
43
	{
44
		long	m;
45
		long	c;
46
		Reg*	p;
47
	} log5[6], *lp;
48
 
49
	firstr = R;
50
	lastr = R;
51
	nvar = 0;
52
	regbits = 0;
53
	for(z=0; z<BITS; z++) {
54
		externs.b[z] = 0;
55
		params.b[z] = 0;
56
		consts.b[z] = 0;
57
		addrs.b[z] = 0;
58
	}
59
 
60
	/*
61
	 * pass 1
62
	 * build aux data structure
63
	 * allocate pcs
64
	 * find use and set of variables
65
	 */
66
	val = 5L * 5L * 5L * 5L * 5L;
67
	lp = log5;
68
	for(i=0; i<5; i++) {
69
		lp->m = val;
70
		lp->c = 0;
71
		lp->p = R;
72
		val /= 5L;
73
		lp++;
74
	}
75
	val = 0;
76
	for(; p != P; p = p->link) {
77
		switch(p->as) {
78
		case ADATA:
79
		case AGLOBL:
80
		case ANAME:
81
		case ASIGNAME:
82
			continue;
83
		}
84
		r = rega();
85
		if(firstr == R) {
86
			firstr = r;
87
			lastr = r;
88
		} else {
89
			lastr->link = r;
90
			r->p1 = lastr;
91
			lastr->s1 = r;
92
			lastr = r;
93
		}
94
		r->prog = p;
95
		r->pc = val;
96
		val++;
97
 
98
		lp = log5;
99
		for(i=0; i<5; i++) {
100
			lp->c--;
101
			if(lp->c <= 0) {
102
				lp->c = lp->m;
103
				if(lp->p != R)
104
					lp->p->log5 = r;
105
				lp->p = r;
106
				(lp+1)->c = 0;
107
				break;
108
			}
109
			lp++;
110
		}
111
 
112
		r1 = r->p1;
113
		if(r1 != R)
114
		switch(r1->prog->as) {
115
		case ARETURN:
116
		case AJMP:
117
		case ARETT:
118
			r->p1 = R;
119
			r1->s1 = R;
120
		}
121
 
122
		/*
123
		 * left side always read
124
		 */
125
		bit = mkvar(&p->from, p->as==AMOVW);
126
		for(z=0; z<BITS; z++)
127
			r->use1.b[z] |= bit.b[z];
128
 
129
		/*
130
		 * right side depends on opcode
131
		 */
132
		bit = mkvar(&p->to, 0);
133
		if(bany(&bit))
134
		switch(p->as) {
135
		default:
136
			diag(Z, "reg: unknown asop: %A", p->as);
137
			break;
138
 
139
		/*
140
		 * right side write
141
		 */
142
		case ANOP:
143
		case AMOVB:
144
		case AMOVBU:
145
		case AMOVH:
146
		case AMOVHU:
147
		case AMOVW:
148
		case AFMOVF:
149
		case AFMOVD:
150
			for(z=0; z<BITS; z++)
151
				r->set.b[z] |= bit.b[z];
152
			break;
153
 
154
		/*
155
		 * funny
156
		 */
157
		case AJMPL:
158
			for(z=0; z<BITS; z++)
159
				addrs.b[z] |= bit.b[z];
160
			break;
161
		}
162
	}
163
	if(firstr == R)
164
		return;
165
	initpc = pc - val;
166
	npc = val;
167
 
168
	/*
169
	 * pass 2
170
	 * turn branch references to pointers
171
	 * build back pointers
172
	 */
173
	for(r = firstr; r != R; r = r->link) {
174
		p = r->prog;
175
		if(p->to.type == D_BRANCH) {
176
			val = p->to.offset - initpc;
177
			r1 = firstr;
178
			while(r1 != R) {
179
				r2 = r1->log5;
180
				if(r2 != R && val >= r2->pc) {
181
					r1 = r2;
182
					continue;
183
				}
184
				if(r1->pc == val)
185
					break;
186
				r1 = r1->link;
187
			}
188
			if(r1 == R) {
189
				nearln = p->lineno;
190
				diag(Z, "ref not found\n%P", p);
191
				continue;
192
			}
193
			if(r1 == r) {
194
				nearln = p->lineno;
195
				diag(Z, "ref to self\n%P", p);
196
				continue;
197
			}
198
			r->s2 = r1;
199
			r->p2link = r1->p2;
200
			r1->p2 = r;
201
		}
202
	}
203
	if(debug['R']) {
204
		p = firstr->prog;
205
		print("\n%L %D\n", p->lineno, &p->from);
206
	}
207
 
208
	/*
209
	 * pass 2.5
210
	 * find looping structure
211
	 */
212
	for(r = firstr; r != R; r = r->link)
213
		r->active = 0;
214
	change = 0;
215
	loopit(firstr, npc);
216
	if(debug['R'] && debug['v']) {
217
		print("\nlooping structure:\n");
218
		for(r = firstr; r != R; r = r->link) {
219
			print("%ld:%P", r->loop, r->prog);
220
			for(z=0; z<BITS; z++)
221
				bit.b[z] = r->use1.b[z] |
222
					r->use2.b[z] | r->set.b[z];
223
			if(bany(&bit)) {
224
				print("\t");
225
				if(bany(&r->use1))
226
					print(" u1=%B", r->use1);
227
				if(bany(&r->use2))
228
					print(" u2=%B", r->use2);
229
				if(bany(&r->set))
230
					print(" st=%B", r->set);
231
			}
232
			print("\n");
233
		}
234
	}
235
 
236
	/*
237
	 * pass 3
238
	 * iterate propagating usage
239
	 * 	back until flow graph is complete
240
	 */
241
loop1:
242
	change = 0;
243
	for(r = firstr; r != R; r = r->link)
244
		r->active = 0;
245
	for(r = firstr; r != R; r = r->link)
246
		if(r->prog->as == ARETURN)
247
			prop(r, zbits, zbits);
248
loop11:
249
	/* pick up unreachable code */
250
	i = 0;
251
	for(r = firstr; r != R; r = r1) {
252
		r1 = r->link;
253
		if(r1 && r1->active && !r->active) {
254
			prop(r, zbits, zbits);
255
			i = 1;
256
		}
257
	}
258
	if(i)
259
		goto loop11;
260
	if(change)
261
		goto loop1;
262
 
263
 
264
	/*
265
	 * pass 4
266
	 * iterate propagating register/variable synchrony
267
	 * 	forward until graph is complete
268
	 */
269
loop2:
270
	change = 0;
271
	for(r = firstr; r != R; r = r->link)
272
		r->active = 0;
273
	synch(firstr, zbits);
274
	if(change)
275
		goto loop2;
276
 
277
 
278
	/*
279
	 * pass 5
280
	 * isolate regions
281
	 * calculate costs (paint1)
282
	 */
283
	r = firstr;
284
	if(r) {
285
		for(z=0; z<BITS; z++)
286
			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
287
			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
288
		if(bany(&bit)) {
289
			nearln = r->prog->lineno;
290
			warn(Z, "used and not set: %B", bit);
291
			if(debug['R'] && !debug['w'])
292
				print("used and not set: %B\n", bit);
293
		}
294
	}
295
	if(debug['R'] && debug['v'])
296
		print("\nprop structure:\n");
297
	for(r = firstr; r != R; r = r->link)
298
		r->act = zbits;
299
	rgp = region;
300
	nregion = 0;
301
	for(r = firstr; r != R; r = r->link) {
302
		if(debug['R'] && debug['v'])
303
			print("%P\n	set = %B; rah = %B; cal = %B\n",
304
				r->prog, r->set, r->refahead, r->calahead);
305
		for(z=0; z<BITS; z++)
306
			bit.b[z] = r->set.b[z] &
307
			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
308
		if(bany(&bit)) {
309
			nearln = r->prog->lineno;
310
			warn(Z, "set and not used: %B", bit);
311
			if(debug['R'])
312
				print("set an not used: %B\n", bit);
313
			excise(r);
314
		}
315
		for(z=0; z<BITS; z++)
316
			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
317
		while(bany(&bit)) {
318
			i = bnum(bit);
319
			rgp->enter = r;
320
			rgp->varno = i;
321
			change = 0;
322
			if(debug['R'] && debug['v'])
323
				print("\n");
324
			paint1(r, i);
325
			bit.b[i/32] &= ~(1L<<(i%32));
326
			if(change <= 0) {
327
				if(debug['R'])
328
					print("%L$%d: %B\n",
329
						r->prog->lineno, change, blsh(i));
330
				continue;
331
			}
332
			rgp->cost = change;
333
			nregion++;
334
			if(nregion >= NRGN) {
335
				warn(Z, "too many regions");
336
				goto brk;
337
			}
338
			rgp++;
339
		}
340
	}
341
brk:
342
	qsort(region, nregion, sizeof(region[0]), rcmp);
343
 
344
	/*
345
	 * pass 6
346
	 * determine used registers (paint2)
347
	 * replace code (paint3)
348
	 */
349
	rgp = region;
350
	for(i=0; i<nregion; i++) {
351
		bit = blsh(rgp->varno);
352
		vreg = paint2(rgp->enter, rgp->varno);
353
		vreg = allreg(vreg, rgp);
354
		if(debug['R']) {
355
			if(rgp->regno >= NREG)
356
				print("%L$%d F%d: %B\n",
357
					rgp->enter->prog->lineno,
358
					rgp->cost,
359
					rgp->regno-NREG,
360
					bit);
361
			else
362
				print("%L$%d R%d: %B\n",
363
					rgp->enter->prog->lineno,
364
					rgp->cost,
365
					rgp->regno,
366
					bit);
367
		}
368
		if(rgp->regno != 0)
369
			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
370
		rgp++;
371
	}
372
	/*
373
	 * pass 7
374
	 * peep-hole on basic block
375
	 */
376
	if(!debug['R'] || debug['P'])
377
		peep();
378
 
379
	/*
380
	 * pass 8
381
	 * recalculate pc
382
	 */
383
	val = initpc;
384
	for(r = firstr; r != R; r = r1) {
385
		r->pc = val;
386
		p = r->prog;
387
		p1 = P;
388
		r1 = r->link;
389
		if(r1 != R)
390
			p1 = r1->prog;
391
		for(; p != p1; p = p->link) {
392
			switch(p->as) {
393
			default:
394
				val++;
395
				break;
396
 
397
			case ANOP:
398
			case ADATA:
399
			case AGLOBL:
400
			case ANAME:
401
			case ASIGNAME:
402
				break;
403
			}
404
		}
405
	}
406
	pc = val;
407
 
408
	/*
409
	 * fix up branches
410
	 */
411
	if(debug['R'])
412
		if(bany(&addrs))
413
			print("addrs: %B\n", addrs);
414
 
415
	r1 = 0; /* set */
416
	for(r = firstr; r != R; r = r->link) {
417
		p = r->prog;
418
		if(p->to.type == D_BRANCH)
419
			p->to.offset = r->s2->pc;
420
		r1 = r;
421
	}
422
 
423
	/*
424
	 * last pass
425
	 * eliminate nops
426
	 * free aux structures
427
	 */
428
	for(p = firstr->prog; p != P; p = p->link){
429
		while(p->link && p->link->as == ANOP)
430
			p->link = p->link->link;
431
	}
432
	if(r1 != R) {
433
		r1->link = freer;
434
		freer = firstr;
435
	}
436
}
437
 
438
/*
439
 * add mov b,rn
440
 * just after r
441
 */
442
void
443
addmove(Reg *r, int bn, int rn, int f)
444
{
445
	Prog *p, *p1;
446
	Adr *a;
447
	Var *v;
448
 
449
	p1 = alloc(sizeof(*p1));
450
	*p1 = zprog;
451
	p = r->prog;
452
 
453
	p1->link = p->link;
454
	p->link = p1;
455
	p1->lineno = p->lineno;
456
 
457
	v = var + bn;
458
 
459
	a = &p1->to;
460
	a->sym = v->sym;
461
	a->name = v->name;
462
	a->offset = v->offset;
463
	a->etype = v->etype;
464
	a->type = D_OREG;
465
	if(a->etype == TARRAY || a->sym == S)
466
		a->type = D_CONST;
467
 
468
	p1->as = AMOVW;
469
	if(v->etype == TCHAR || v->etype == TUCHAR)
470
		p1->as = AMOVB;
471
	if(v->etype == TSHORT || v->etype == TUSHORT)
472
		p1->as = AMOVH;
473
	if(v->etype == TFLOAT)
474
		p1->as = AFMOVF;
475
	if(v->etype == TDOUBLE)
476
		p1->as = AFMOVD;
477
 
478
	p1->from.type = D_REG;
479
	p1->from.reg = rn;
480
	if(rn >= NREG) {
481
		p1->from.type = D_FREG;
482
		p1->from.reg = rn-NREG;
483
	}
484
	if(!f) {
485
		p1->from = *a;
486
		*a = zprog.from;
487
		a->type = D_REG;
488
		a->reg = rn;
489
		if(rn >= NREG) {
490
			a->type = D_FREG;
491
			a->reg = rn-NREG;
492
		}
493
		if(v->etype == TUCHAR)
494
			p1->as = AMOVBU;
495
		if(v->etype == TUSHORT)
496
			p1->as = AMOVHU;
497
	}
498
	if(debug['R'])
499
		print("%P\t.a%P\n", p, p1);
500
}
501
 
502
Bits
503
mkvar(Adr *a, int docon)
504
{
505
	Var *v;
506
	int i, t, n, et, z;
507
	long o;
508
	Bits bit;
509
	Sym *s;
510
 
511
	t = a->type;
512
	if(t == D_REG && a->reg != NREG)
513
		regbits |= RtoB(a->reg);
514
	if(t == D_FREG && a->reg != NREG)
515
		regbits |= FtoB(a->reg);
516
	s = a->sym;
517
	o = a->offset;
518
	et = a->etype;
519
	if(s == S) {
520
		if(t != D_CONST || !docon || a->reg != NREG)
521
			goto none;
522
		et = TLONG;
523
	}
524
	if(t == D_CONST) {
525
		if(s == S && sval(o))
526
			goto none;
527
	}
528
	n = a->name;
529
	v = var;
530
	for(i=0; i<nvar; i++) {
531
		if(s == v->sym)
532
		if(n == v->name)
533
		if(o == v->offset)
534
			goto out;
535
		v++;
536
	}
537
	if(s)
538
		if(s->name[0] == '.')
539
			goto none;
540
	if(nvar >= NVAR) {
541
		if(debug['w'] > 1 && s)
542
			warn(Z, "variable not optimized: %s", s->name);
543
		goto none;
544
	}
545
	i = nvar;
546
	nvar++;
547
	v = &var[i];
548
	v->sym = s;
549
	v->offset = o;
550
	v->etype = et;
551
	v->name = n;
552
	if(debug['R'])
553
		print("bit=%2d et=%2d %D\n", i, et, a);
554
out:
555
	bit = blsh(i);
556
	if(n == D_EXTERN || n == D_STATIC)
557
		for(z=0; z<BITS; z++)
558
			externs.b[z] |= bit.b[z];
559
	if(n == D_PARAM)
560
		for(z=0; z<BITS; z++)
561
			params.b[z] |= bit.b[z];
562
	if(v->etype != et || !typechlpfd[et])	/* funny punning */
563
		for(z=0; z<BITS; z++)
564
			addrs.b[z] |= bit.b[z];
565
	if(t == D_CONST) {
566
		if(s == S) {
567
			for(z=0; z<BITS; z++)
568
				consts.b[z] |= bit.b[z];
569
			return bit;
570
		}
571
		if(et != TARRAY)
572
			for(z=0; z<BITS; z++)
573
				addrs.b[z] |= bit.b[z];
574
		for(z=0; z<BITS; z++)
575
			params.b[z] |= bit.b[z];
576
		return bit;
577
	}
578
	if(t == D_OREG)
579
		return bit;
580
 
581
none:
582
	return zbits;
583
}
584
 
585
void
586
prop(Reg *r, Bits ref, Bits cal)
587
{
588
	Reg *r1, *r2;
589
	int z;
590
 
591
	for(r1 = r; r1 != R; r1 = r1->p1) {
592
		for(z=0; z<BITS; z++) {
593
			ref.b[z] |= r1->refahead.b[z];
594
			if(ref.b[z] != r1->refahead.b[z]) {
595
				r1->refahead.b[z] = ref.b[z];
596
				change++;
597
			}
598
			cal.b[z] |= r1->calahead.b[z];
599
			if(cal.b[z] != r1->calahead.b[z]) {
600
				r1->calahead.b[z] = cal.b[z];
601
				change++;
602
			}
603
		}
604
		switch(r1->prog->as) {
605
		case AJMPL:
606
			for(z=0; z<BITS; z++) {
607
				cal.b[z] |= ref.b[z] | externs.b[z];
608
				ref.b[z] = 0;
609
			}
610
			break;
611
 
612
		case ATEXT:
613
			for(z=0; z<BITS; z++) {
614
				cal.b[z] = 0;
615
				ref.b[z] = 0;
616
			}
617
			break;
618
 
619
		case ARETURN:
620
			for(z=0; z<BITS; z++) {
621
				cal.b[z] = externs.b[z];
622
				ref.b[z] = 0;
623
			}
624
		}
625
		for(z=0; z<BITS; z++) {
626
			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
627
				r1->use1.b[z] | r1->use2.b[z];
628
			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
629
			r1->refbehind.b[z] = ref.b[z];
630
			r1->calbehind.b[z] = cal.b[z];
631
		}
632
		if(r1->active)
633
			break;
634
		r1->active = 1;
635
	}
636
	for(; r != r1; r = r->p1)
637
		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
638
			prop(r2, r->refbehind, r->calbehind);
639
}
640
 
641
 
642
/*
643
 * find looping structure
644
 *
645
 * 1) find reverse postordering
646
 * 2) find approximate dominators,
647
 *	the actual dominators if the flow graph is reducible
648
 *	otherwise, dominators plus some other non-dominators.
649
 *	See Matthew S. Hecht and Jeffrey D. Ullman,
650
 *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
651
 *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
652
 *	Oct. 1-3, 1973, pp.  207-217.
653
 * 3) find all nodes with a predecessor dominated by the current node.
654
 *	such a node is a loop head.
655
 *	recursively, all preds with a greater rpo number are in the loop
656
 */
657
long
658
postorder(Reg *r, Reg **rpo2r, long n)
659
{
660
	Reg *r1;
661
 
662
	r->rpo = 1;
663
	r1 = r->s1;
664
	if(r1 && !r1->rpo)
665
		n = postorder(r1, rpo2r, n);
666
	r1 = r->s2;
667
	if(r1 && !r1->rpo)
668
		n = postorder(r1, rpo2r, n);
669
	rpo2r[n] = r;
670
	n++;
671
	return n;
672
}
673
 
674
long
675
rpolca(long *idom, long rpo1, long rpo2)
676
{
677
	long t;
678
 
679
	if(rpo1 == -1)
680
		return rpo2;
681
	while(rpo1 != rpo2){
682
		if(rpo1 > rpo2){
683
			t = rpo2;
684
			rpo2 = rpo1;
685
			rpo1 = t;
686
		}
687
		while(rpo1 < rpo2){
688
			t = idom[rpo2];
689
			if(t >= rpo2)
690
				fatal(Z, "bad idom");
691
			rpo2 = t;
692
		}
693
	}
694
	return rpo1;
695
}
696
 
697
int
698
doms(long *idom, long r, long s)
699
{
700
	while(s > r)
701
		s = idom[s];
702
	return s == r;
703
}
704
 
705
int
706
loophead(long *idom, Reg *r)
707
{
708
	long src;
709
 
710
	src = r->rpo;
711
	if(r->p1 != R && doms(idom, src, r->p1->rpo))
712
		return 1;
713
	for(r = r->p2; r != R; r = r->p2link)
714
		if(doms(idom, src, r->rpo))
715
			return 1;
716
	return 0;
717
}
718
 
719
void
720
loopmark(Reg **rpo2r, long head, Reg *r)
721
{
722
	if(r->rpo < head || r->active == head)
723
		return;
724
	r->active = head;
725
	r->loop += LOOP;
726
	if(r->p1 != R)
727
		loopmark(rpo2r, head, r->p1);
728
	for(r = r->p2; r != R; r = r->p2link)
729
		loopmark(rpo2r, head, r);
730
}
731
 
732
void
733
loopit(Reg *r, long nr)
734
{
735
	Reg *r1;
736
	long i, d, me;
737
 
738
	if(nr > maxnr) {
739
		rpo2r = alloc(nr * sizeof(Reg*));
740
		idom = alloc(nr * sizeof(long));
741
		maxnr = nr;
742
	}
743
 
744
	d = postorder(r, rpo2r, 0);
745
	if(d > nr)
746
		fatal(Z, "too many reg nodes");
747
	nr = d;
748
	for(i = 0; i < nr / 2; i++){
749
		r1 = rpo2r[i];
750
		rpo2r[i] = rpo2r[nr - 1 - i];
751
		rpo2r[nr - 1 - i] = r1;
752
	}
753
	for(i = 0; i < nr; i++)
754
		rpo2r[i]->rpo = i;
755
 
756
	idom[0] = 0;
757
	for(i = 0; i < nr; i++){
758
		r1 = rpo2r[i];
759
		me = r1->rpo;
760
		d = -1;
761
		if(r1->p1 != R && r1->p1->rpo < me)
762
			d = r1->p1->rpo;
763
		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
764
			if(r1->rpo < me)
765
				d = rpolca(idom, d, r1->rpo);
766
		idom[i] = d;
767
	}
768
 
769
	for(i = 0; i < nr; i++){
770
		r1 = rpo2r[i];
771
		r1->loop++;
772
		if(r1->p2 != R && loophead(idom, r1))
773
			loopmark(rpo2r, i, r1);
774
	}
775
}
776
 
777
void
778
synch(Reg *r, Bits dif)
779
{
780
	Reg *r1;
781
	int z;
782
 
783
	for(r1 = r; r1 != R; r1 = r1->s1) {
784
		for(z=0; z<BITS; z++) {
785
			dif.b[z] = (dif.b[z] &
786
				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
787
					r1->set.b[z] | r1->regdiff.b[z];
788
			if(dif.b[z] != r1->regdiff.b[z]) {
789
				r1->regdiff.b[z] = dif.b[z];
790
				change++;
791
			}
792
		}
793
		if(r1->active)
794
			break;
795
		r1->active = 1;
796
		for(z=0; z<BITS; z++)
797
			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
798
		if(r1->s2 != R)
799
			synch(r1->s2, dif);
800
	}
801
}
802
 
803
ulong
804
allreg(ulong b, Rgn *r)
805
{
806
	Var *v;
807
	int i;
808
 
809
	v = var + r->varno;
810
	r->regno = 0;
811
	switch(v->etype) {
812
 
813
	default:
814
		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
815
		break;
816
 
817
	case TCHAR:
818
	case TUCHAR:
819
	case TSHORT:
820
	case TUSHORT:
821
	case TINT:
822
	case TUINT:
823
	case TLONG:
824
	case TULONG:
825
	case TIND:
826
	case TARRAY:
827
		i = BtoR(~b);
828
		if(i && r->cost > 0) {
829
			r->regno = i;
830
			return RtoB(i);
831
		}
832
		break;
833
 
834
	case TDOUBLE:
835
	case TFLOAT:
836
		i = BtoF(~b);
837
		if(i && r->cost > 0) {
838
			r->regno = i+NREG;
839
			return FtoB(i);
840
		}
841
		break;
842
	}
843
	return 0;
844
}
845
 
846
void
847
paint1(Reg *r, int bn)
848
{
849
	Reg *r1;
850
	Prog *p;
851
	int z;
852
	ulong bb;
853
 
854
	z = bn/32;
855
	bb = 1L<<(bn%32);
856
	if(r->act.b[z] & bb)
857
		return;
858
	for(;;) {
859
		if(!(r->refbehind.b[z] & bb))
860
			break;
861
		r1 = r->p1;
862
		if(r1 == R)
863
			break;
864
		if(!(r1->refahead.b[z] & bb))
865
			break;
866
		if(r1->act.b[z] & bb)
867
			break;
868
		r = r1;
869
	}
870
 
871
	if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
872
		change -= CLOAD * r->loop;
873
		if(debug['R'] && debug['v'])
874
			print("%ld%P\tld %B $%d\n", r->loop,
875
				r->prog, blsh(bn), change);
876
	}
877
	for(;;) {
878
		r->act.b[z] |= bb;
879
		p = r->prog;
880
 
881
		if(r->use1.b[z] & bb) {
882
			change += CREF * r->loop;
883
			if(p->to.type == D_FREG && p->as == AMOVW)
884
				change = -CINF;		/* cant go Rreg to Freg */
885
			if(debug['R'] && debug['v'])
886
				print("%ld%P\tu1 %B $%d\n", r->loop,
887
					p, blsh(bn), change);
888
		}
889
 
890
		if((r->use2.b[z]|r->set.b[z]) & bb) {
891
			change += CREF * r->loop;
892
			if(p->from.type == D_FREG && p->as == AMOVW)
893
				change = -CINF;		/* cant go Rreg to Freg */
894
			if(debug['R'] && debug['v'])
895
				print("%ld%P\tu2 %B $%d\n", r->loop,
896
					p, blsh(bn), change);
897
		}
898
 
899
		if(STORE(r) & r->regdiff.b[z] & bb) {
900
			change -= CLOAD * r->loop;
901
			if(debug['R'] && debug['v'])
902
				print("%ld%P\tst %B $%d\n", r->loop,
903
					p, blsh(bn), change);
904
		}
905
 
906
		if(r->refbehind.b[z] & bb)
907
			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
908
				if(r1->refahead.b[z] & bb)
909
					paint1(r1, bn);
910
 
911
		if(!(r->refahead.b[z] & bb))
912
			break;
913
		r1 = r->s2;
914
		if(r1 != R)
915
			if(r1->refbehind.b[z] & bb)
916
				paint1(r1, bn);
917
		r = r->s1;
918
		if(r == R)
919
			break;
920
		if(r->act.b[z] & bb)
921
			break;
922
		if(!(r->refbehind.b[z] & bb))
923
			break;
924
	}
925
}
926
 
927
ulong
928
paint2(Reg *r, int bn)
929
{
930
	Reg *r1;
931
	int z;
932
	ulong bb, vreg;
933
 
934
	z = bn/32;
935
	bb = 1L << (bn%32);
936
	vreg = regbits;
937
	if(!(r->act.b[z] & bb))
938
		return vreg;
939
	for(;;) {
940
		if(!(r->refbehind.b[z] & bb))
941
			break;
942
		r1 = r->p1;
943
		if(r1 == R)
944
			break;
945
		if(!(r1->refahead.b[z] & bb))
946
			break;
947
		if(!(r1->act.b[z] & bb))
948
			break;
949
		r = r1;
950
	}
951
	for(;;) {
952
		r->act.b[z] &= ~bb;
953
 
954
		vreg |= r->regu;
955
 
956
		if(r->refbehind.b[z] & bb)
957
			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
958
				if(r1->refahead.b[z] & bb)
959
					vreg |= paint2(r1, bn);
960
 
961
		if(!(r->refahead.b[z] & bb))
962
			break;
963
		r1 = r->s2;
964
		if(r1 != R)
965
			if(r1->refbehind.b[z] & bb)
966
				vreg |= paint2(r1, bn);
967
		r = r->s1;
968
		if(r == R)
969
			break;
970
		if(!(r->act.b[z] & bb))
971
			break;
972
		if(!(r->refbehind.b[z] & bb))
973
			break;
974
	}
975
	return vreg;
976
}
977
 
978
void
979
paint3(Reg *r, int bn, long rb, int rn)
980
{
981
	Reg *r1;
982
	Prog *p;
983
	int z;
984
	ulong bb;
985
 
986
	z = bn/32;
987
	bb = 1L << (bn%32);
988
	if(r->act.b[z] & bb)
989
		return;
990
	for(;;) {
991
		if(!(r->refbehind.b[z] & bb))
992
			break;
993
		r1 = r->p1;
994
		if(r1 == R)
995
			break;
996
		if(!(r1->refahead.b[z] & bb))
997
			break;
998
		if(r1->act.b[z] & bb)
999
			break;
1000
		r = r1;
1001
	}
1002
 
1003
	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1004
		addmove(r, bn, rn, 0);
1005
	for(;;) {
1006
		r->act.b[z] |= bb;
1007
		p = r->prog;
1008
 
1009
		if(r->use1.b[z] & bb) {
1010
			if(debug['R'])
1011
				print("%P", p);
1012
			addreg(&p->from, rn);
1013
			if(debug['R'])
1014
				print("\t.c%P\n", p);
1015
		}
1016
		if((r->use2.b[z]|r->set.b[z]) & bb) {
1017
			if(debug['R'])
1018
				print("%P", p);
1019
			addreg(&p->to, rn);
1020
			if(debug['R'])
1021
				print("\t.c%P\n", p);
1022
		}
1023
 
1024
		if(STORE(r) & r->regdiff.b[z] & bb)
1025
			addmove(r, bn, rn, 1);
1026
		r->regu |= rb;
1027
 
1028
		if(r->refbehind.b[z] & bb)
1029
			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1030
				if(r1->refahead.b[z] & bb)
1031
					paint3(r1, bn, rb, rn);
1032
 
1033
		if(!(r->refahead.b[z] & bb))
1034
			break;
1035
		r1 = r->s2;
1036
		if(r1 != R)
1037
			if(r1->refbehind.b[z] & bb)
1038
				paint3(r1, bn, rb, rn);
1039
		r = r->s1;
1040
		if(r == R)
1041
			break;
1042
		if(r->act.b[z] & bb)
1043
			break;
1044
		if(!(r->refbehind.b[z] & bb))
1045
			break;
1046
	}
1047
}
1048
 
1049
void
1050
addreg(Adr *a, int rn)
1051
{
1052
 
1053
	a->sym = 0;
1054
	a->name = D_NONE;
1055
	a->type = D_REG;
1056
	a->reg = rn;
1057
	if(rn >= NREG) {
1058
		a->type = D_FREG;
1059
		a->reg = rn-NREG;
1060
	}
1061
}
1062
 
1063
/*
1064
 *	bit	reg
1065
 *	0	R9
1066
 *	1	R10
1067
 *	...	...
1068
 *	4	R13
1069
 *	5	R16
1070
 *	...	...
1071
 *	20	R31
1072
 */
1073
long
1074
RtoB(int r)
1075
{
1076
 
1077
	if(r >= 9 && r <= 13)
1078
		return 1L << (r-9);
1079
	if(r >= 16 && r <= 31)
1080
		return 1L << (r-11);
1081
	return 0;
1082
}
1083
 
1084
int
1085
BtoR(long b)
1086
{
1087
	int r;
1088
 
1089
	b &= 0x001fffffL;
1090
	if(b == 0)
1091
		return 0;
1092
	r = bitno(b) + 9;
1093
	if(r >= 14)
1094
		r += 2;
1095
	return r;
1096
}
1097
 
1098
/*
1099
 *	bit	reg
1100
 *	22	F4
1101
 *	23	F6
1102
 *	...	...
1103
 *	31	F22
1104
 */
1105
long
1106
FtoB(int f)
1107
{
1108
 
1109
	if(f < 4 || f > 22 || (f&1))
1110
		return 0;
1111
	return 1L << (f/2 + 20);
1112
}
1113
 
1114
BtoF(long b)
1115
{
1116
 
1117
	b &= 0xffc00000L;
1118
	if(b == 0)
1119
		return 0;
1120
	return bitno(b)*2 - 40;
1121
}