Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include "gc.h"
2
 
3
void	addsplits(void);
4
 
5
Reg*
6
rega(void)
7
{
8
	Reg *r;
9
 
10
	r = freer;
11
	if(r == R) {
12
		r = alloc(sizeof(*r));
13
	} else
14
		freer = r->link;
15
 
16
	*r = zreg;
17
	return r;
18
}
19
 
20
int
21
rcmp(const void *a1, const void *a2)
22
{
23
	Rgn *p1, *p2;
24
	int c1, c2;
25
 
26
	p1 = (Rgn*)a1;
27
	p2 = (Rgn*)a2;
28
	c1 = p2->cost;
29
	c2 = p1->cost;
30
	if(c1 -= c2)
31
		return c1;
32
	return p2->varno - p1->varno;
33
}
34
 
35
void
36
regopt(Prog *p)
37
{
38
	Reg *r, *r1, *r2;
39
	Prog *p1;
40
	int i, z;
41
	long initpc, val, npc;
42
	ulong vreg;
43
	Bits bit;
44
	struct
45
	{
46
		long	m;
47
		long	c;
48
		Reg*	p;
49
	} log5[6], *lp;
50
 
51
	firstr = R;
52
	lastr = R;
53
	nvar = 0;
54
	regbits = 0;
55
	for(z=0; z<BITS; z++) {
56
		externs.b[z] = 0;
57
		params.b[z] = 0;
58
		consts.b[z] = 0;
59
		addrs.b[z] = 0;
60
	}
61
 
62
	/*
63
	 * pass 1
64
	 * build aux data structure
65
	 * allocate pcs
66
	 * find use and set of variables
67
	 */
68
	val = 5L * 5L * 5L * 5L * 5L;
69
	lp = log5;
70
	for(i=0; i<5; i++) {
71
		lp->m = val;
72
		lp->c = 0;
73
		lp->p = R;
74
		val /= 5L;
75
		lp++;
76
	}
77
	val = 0;
78
	for(; p != P; p = p->link) {
79
		switch(p->as) {
80
		case ADATA:
81
		case AGLOBL:
82
		case ANAME:
83
		case ASIGNAME:
84
			continue;
85
		}
86
		r = rega();
87
		if(firstr == R) {
88
			firstr = r;
89
			lastr = r;
90
		} else {
91
			lastr->link = r;
92
			r->p1 = lastr;
93
			lastr->s1 = r;
94
			lastr = r;
95
		}
96
		r->prog = p;
97
		r->pc = val;
98
		val++;
99
 
100
		lp = log5;
101
		for(i=0; i<5; i++) {
102
			lp->c--;
103
			if(lp->c <= 0) {
104
				lp->c = lp->m;
105
				if(lp->p != R)
106
					lp->p->log5 = r;
107
				lp->p = r;
108
				(lp+1)->c = 0;
109
				break;
110
			}
111
			lp++;
112
		}
113
 
114
		r1 = r->p1;
115
		if(r1 != R)
116
		switch(r1->prog->as) {
117
		case ARET:
118
		case AB:
119
		case ARFE:
120
			r->p1 = R;
121
			r1->s1 = R;
122
		}
123
 
124
		/*
125
		 * left side always read
126
		 */
127
		bit = mkvar(&p->from, p->as==AMOVW);
128
		for(z=0; z<BITS; z++)
129
			r->use1.b[z] |= bit.b[z];
130
 
131
		/*
132
		 * right side depends on opcode
133
		 */
134
		bit = mkvar(&p->to, 0);
135
		if(bany(&bit))
136
		switch(p->as) {
137
		default:
138
			diag(Z, "reg: unknown asop: %A", p->as);
139
			break;
140
 
141
		/*
142
		 * right side write
143
		 */
144
		case ANOP:
145
		case AMOVB:
146
		case AMOVBU:
147
		case AMOVH:
148
		case AMOVHU:
149
		case AMOVW:
150
		case AMOVF:
151
		case AMOVD:
152
			for(z=0; z<BITS; z++)
153
				r->set.b[z] |= bit.b[z];
154
			break;
155
 
156
		/*
157
		 * funny
158
		 */
159
		case ABL:
160
			for(z=0; z<BITS; z++)
161
				addrs.b[z] |= bit.b[z];
162
			break;
163
		}
164
 
165
		if(p->as == AMOVM) {
166
			if(p->from.type == D_CONST)
167
				z = p->from.offset;
168
			else
169
				z = p->to.offset;
170
			for(i=0; z; i++) {
171
				if(z&1)
172
					regbits |= RtoB(i);
173
				z >>= 1;
174
			}
175
		}
176
	}
177
	if(firstr == R)
178
		return;
179
	initpc = pc - val;
180
	npc = val;
181
 
182
	/*
183
	 * pass 2
184
	 * turn branch references to pointers
185
	 * build back pointers
186
	 */
187
	for(r = firstr; r != R; r = r->link) {
188
		p = r->prog;
189
		if(p->to.type == D_BRANCH) {
190
			val = p->to.offset - initpc;
191
			r1 = firstr;
192
			while(r1 != R) {
193
				r2 = r1->log5;
194
				if(r2 != R && val >= r2->pc) {
195
					r1 = r2;
196
					continue;
197
				}
198
				if(r1->pc == val)
199
					break;
200
				r1 = r1->link;
201
			}
202
			if(r1 == R) {
203
				nearln = p->lineno;
204
				diag(Z, "ref not found\n%P", p);
205
				continue;
206
			}
207
			if(r1 == r) {
208
				nearln = p->lineno;
209
				diag(Z, "ref to self\n%P", p);
210
				continue;
211
			}
212
			r->s2 = r1;
213
			r->p2link = r1->p2;
214
			r1->p2 = r;
215
		}
216
	}
217
	if(debug['R']) {
218
		p = firstr->prog;
219
		print("\n%L %D\n", p->lineno, &p->from);
220
	}
221
 
222
	/*
223
	 * pass 2.5
224
	 * find looping structure
225
	 */
226
	for(r = firstr; r != R; r = r->link)
227
		r->active = 0;
228
	change = 0;
229
	loopit(firstr, npc);
230
 
231
	/*
232
	 * pass 3
233
	 * iterate propagating usage
234
	 * 	back until flow graph is complete
235
	 */
236
loop1:
237
	change = 0;
238
	for(r = firstr; r != R; r = r->link)
239
		r->active = 0;
240
	for(r = firstr; r != R; r = r->link)
241
		if(r->prog->as == ARET)
242
			prop(r, zbits, zbits);
243
loop11:
244
	/* pick up unreachable code */
245
	i = 0;
246
	for(r = firstr; r != R; r = r1) {
247
		r1 = r->link;
248
		if(r1 && r1->active && !r->active) {
249
			prop(r, zbits, zbits);
250
			i = 1;
251
		}
252
	}
253
	if(i)
254
		goto loop11;
255
	if(change)
256
		goto loop1;
257
 
258
 
259
	/*
260
	 * pass 4
261
	 * iterate propagating register/variable synchrony
262
	 * 	forward until graph is complete
263
	 */
264
loop2:
265
	change = 0;
266
	for(r = firstr; r != R; r = r->link)
267
		r->active = 0;
268
	synch(firstr, zbits);
269
	if(change)
270
		goto loop2;
271
 
272
	addsplits();
273
 
274
	if(debug['R'] && debug['v']) {
275
		print("\nprop structure:\n");
276
		for(r = firstr; r != R; r = r->link) {
277
			print("%ld:%P", r->loop, r->prog);
278
			for(z=0; z<BITS; z++)
279
				bit.b[z] = r->set.b[z] |
280
					r->refahead.b[z] | r->calahead.b[z] |
281
					r->refbehind.b[z] | r->calbehind.b[z] |
282
					r->use1.b[z] | r->use2.b[z];
283
			if(bany(&bit)) {
284
				print("\t");
285
				if(bany(&r->use1))
286
					print(" u1=%B", r->use1);
287
				if(bany(&r->use2))
288
					print(" u2=%B", r->use2);
289
				if(bany(&r->set))
290
					print(" st=%B", r->set);
291
				if(bany(&r->refahead))
292
					print(" ra=%B", r->refahead);
293
				if(bany(&r->calahead))
294
					print(" ca=%B", r->calahead);
295
				if(bany(&r->refbehind))
296
					print(" rb=%B", r->refbehind);
297
				if(bany(&r->calbehind))
298
					print(" cb=%B", r->calbehind);
299
			}
300
			print("\n");
301
		}
302
	}
303
 
304
	/*
305
	 * pass 5
306
	 * isolate regions
307
	 * calculate costs (paint1)
308
	 */
309
	r = firstr;
310
	if(r) {
311
		for(z=0; z<BITS; z++)
312
			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
313
			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
314
		if(bany(&bit)) {
315
			nearln = r->prog->lineno;
316
			warn(Z, "used and not set: %B", bit);
317
			if(debug['R'] && !debug['w'])
318
				print("used and not set: %B\n", bit);
319
		}
320
	}
321
 
322
	for(r = firstr; r != R; r = r->link)
323
		r->act = zbits;
324
	rgp = region;
325
	nregion = 0;
326
	for(r = firstr; r != R; r = r->link) {
327
		for(z=0; z<BITS; z++)
328
			bit.b[z] = r->set.b[z] &
329
			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
330
		if(bany(&bit)) {
331
			nearln = r->prog->lineno;
332
			warn(Z, "set and not used: %B", bit);
333
			if(debug['R'])
334
				print("set and not used: %B\n", bit);
335
			excise(r);
336
		}
337
		for(z=0; z<BITS; z++)
338
			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
339
		while(bany(&bit)) {
340
			i = bnum(bit);
341
			rgp->enter = r;
342
			rgp->varno = i;
343
			change = 0;
344
			if(debug['R'] && debug['v'])
345
				print("\n");
346
			paint1(r, i);
347
			bit.b[i/32] &= ~(1L<<(i%32));
348
			if(change <= 0) {
349
				if(debug['R'])
350
					print("%L $%d: %B\n",
351
						r->prog->lineno, change, blsh(i));
352
				continue;
353
			}
354
			rgp->cost = change;
355
			nregion++;
356
			if(nregion >= NRGN) {
357
				warn(Z, "too many regions");
358
				goto brk;
359
			}
360
			rgp++;
361
		}
362
	}
363
brk:
364
	qsort(region, nregion, sizeof(region[0]), rcmp);
365
 
366
	/*
367
	 * pass 6
368
	 * determine used registers (paint2)
369
	 * replace code (paint3)
370
	 */
371
	rgp = region;
372
	for(i=0; i<nregion; i++) {
373
		bit = blsh(rgp->varno);
374
		vreg = paint2(rgp->enter, rgp->varno);
375
		vreg = allreg(vreg, rgp);
376
		if(debug['R']) {
377
			if(rgp->regno >= NREG)
378
				print("%L $%d F%d: %B\n",
379
					rgp->enter->prog->lineno,
380
					rgp->cost,
381
					rgp->regno-NREG,
382
					bit);
383
			else
384
				print("%L $%d R%d: %B\n",
385
					rgp->enter->prog->lineno,
386
					rgp->cost,
387
					rgp->regno,
388
					bit);
389
		}
390
		if(rgp->regno != 0)
391
			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
392
		rgp++;
393
	}
394
	/*
395
	 * pass 7
396
	 * peep-hole on basic block
397
	 */
398
	if(!debug['R'] || debug['P'])
399
		peep();
400
 
401
	/*
402
	 * pass 8
403
	 * recalculate pc
404
	 */
405
	val = initpc;
406
	for(r = firstr; r != R; r = r1) {
407
		r->pc = val;
408
		p = r->prog;
409
		p1 = P;
410
		r1 = r->link;
411
		if(r1 != R)
412
			p1 = r1->prog;
413
		for(; p != p1; p = p->link) {
414
			switch(p->as) {
415
			default:
416
				val++;
417
				break;
418
 
419
			case ANOP:
420
			case ADATA:
421
			case AGLOBL:
422
			case ANAME:
423
			case ASIGNAME:
424
				break;
425
			}
426
		}
427
	}
428
	pc = val;
429
 
430
	/*
431
	 * fix up branches
432
	 */
433
	if(debug['R'])
434
		if(bany(&addrs))
435
			print("addrs: %B\n", addrs);
436
 
437
	r1 = 0; /* set */
438
	for(r = firstr; r != R; r = r->link) {
439
		p = r->prog;
440
		if(p->to.type == D_BRANCH)
441
			p->to.offset = r->s2->pc;
442
		r1 = r;
443
	}
444
 
445
	/*
446
	 * last pass
447
	 * eliminate nops
448
	 * free aux structures
449
	 */
450
	for(p = firstr->prog; p != P; p = p->link){
451
		while(p->link && p->link->as == ANOP)
452
			p->link = p->link->link;
453
	}
454
	if(r1 != R) {
455
		r1->link = freer;
456
		freer = firstr;
457
	}
458
}
459
 
460
void
461
addsplits(void)
462
{
463
	Reg *r, *r1;
464
	int z, i;
465
	Bits bit;
466
 
467
	for(r = firstr; r != R; r = r->link) {
468
		if(r->loop > 1)
469
			continue;
470
		if(r->prog->as == ABL)
471
			continue;
472
		for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
473
			if(r1->loop <= 1)
474
				continue;
475
			for(z=0; z<BITS; z++)
476
				bit.b[z] = r1->calbehind.b[z] &
477
					(r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
478
					~(r->calahead.b[z] & addrs.b[z]);
479
			while(bany(&bit)) {
480
				i = bnum(bit);
481
				bit.b[i/32] &= ~(1L << (i%32));
482
			}
483
		}
484
	}
485
}
486
 
487
/*
488
 * add mov b,rn
489
 * just after r
490
 */
491
void
492
addmove(Reg *r, int bn, int rn, int f)
493
{
494
	Prog *p, *p1;
495
	Adr *a;
496
	Var *v;
497
 
498
	p1 = alloc(sizeof(*p1));
499
	*p1 = zprog;
500
	p = r->prog;
501
 
502
	p1->link = p->link;
503
	p->link = p1;
504
	p1->lineno = p->lineno;
505
 
506
	v = var + bn;
507
 
508
	a = &p1->to;
509
	a->sym = v->sym;
510
	a->name = v->name;
511
	a->offset = v->offset;
512
	a->etype = v->etype;
513
	a->type = D_OREG;
514
	if(a->etype == TARRAY || a->sym == S)
515
		a->type = D_CONST;
516
 
517
	p1->as = AMOVW;
518
	if(v->etype == TCHAR || v->etype == TUCHAR)
519
		p1->as = AMOVB;
520
	if(v->etype == TSHORT || v->etype == TUSHORT)
521
		p1->as = AMOVH;
522
	if(v->etype == TFLOAT)
523
		p1->as = AMOVF;
524
	if(v->etype == TDOUBLE)
525
		p1->as = AMOVD;
526
 
527
	p1->from.type = D_REG;
528
	p1->from.reg = rn;
529
	if(rn >= NREG) {
530
		p1->from.type = D_FREG;
531
		p1->from.reg = rn-NREG;
532
	}
533
	if(!f) {
534
		p1->from = *a;
535
		*a = zprog.from;
536
		a->type = D_REG;
537
		a->reg = rn;
538
		if(rn >= NREG) {
539
			a->type = D_FREG;
540
			a->reg = rn-NREG;
541
		}
542
		if(v->etype == TUCHAR)
543
			p1->as = AMOVBU;
544
		if(v->etype == TUSHORT)
545
			p1->as = AMOVHU;
546
	}
547
	if(debug['R'])
548
		print("%P\t.a%P\n", p, p1);
549
}
550
 
551
Bits
552
mkvar(Adr *a, int docon)
553
{
554
	Var *v;
555
	int i, t, n, et, z;
556
	long o;
557
	Bits bit;
558
	Sym *s;
559
 
560
	t = a->type;
561
	if(t == D_REG && a->reg != NREG)
562
		regbits |= RtoB(a->reg);
563
	if(t == D_FREG && a->reg != NREG)
564
		regbits |= FtoB(a->reg);
565
	s = a->sym;
566
	o = a->offset;
567
	et = a->etype;
568
	if(s == S) {
569
		if(t != D_CONST || !docon || a->reg != NREG)
570
			goto none;
571
		et = TLONG;
572
	}
573
	if(t == D_CONST) {
574
		if(s == S && sval(o))
575
			goto none;
576
	}
577
 
578
	n = a->name;
579
	v = var;
580
	for(i=0; i<nvar; i++) {
581
		if(s == v->sym)
582
		if(n == v->name)
583
		if(o == v->offset)
584
			goto out;
585
		v++;
586
	}
587
	if(s)
588
		if(s->name[0] == '.')
589
			goto none;
590
	if(nvar >= NVAR) {
591
		if(debug['w'] > 1 && s)
592
			warn(Z, "variable not optimized: %s", s->name);
593
		goto none;
594
	}
595
	i = nvar;
596
	nvar++;
597
	v = &var[i];
598
	v->sym = s;
599
	v->offset = o;
600
	v->etype = et;
601
	v->name = n;
602
	if(debug['R'])
603
		print("bit=%2d et=%2d %D\n", i, et, a);
604
out:
605
	bit = blsh(i);
606
	if(n == D_EXTERN || n == D_STATIC)
607
		for(z=0; z<BITS; z++)
608
			externs.b[z] |= bit.b[z];
609
	if(n == D_PARAM)
610
		for(z=0; z<BITS; z++)
611
			params.b[z] |= bit.b[z];
612
	if(v->etype != et || !typechlpfd[et])	/* funny punning */
613
		for(z=0; z<BITS; z++)
614
			addrs.b[z] |= bit.b[z];
615
	if(t == D_CONST) {
616
		if(s == S) {
617
			for(z=0; z<BITS; z++)
618
				consts.b[z] |= bit.b[z];
619
			return bit;
620
		}
621
		if(et != TARRAY)
622
			for(z=0; z<BITS; z++)
623
				addrs.b[z] |= bit.b[z];
624
		for(z=0; z<BITS; z++)
625
			params.b[z] |= bit.b[z];
626
		return bit;
627
	}
628
	if(t == D_OREG)
629
		return bit;
630
 
631
none:
632
	return zbits;
633
}
634
 
635
void
636
prop(Reg *r, Bits ref, Bits cal)
637
{
638
	Reg *r1, *r2;
639
	int z;
640
 
641
	for(r1 = r; r1 != R; r1 = r1->p1) {
642
		for(z=0; z<BITS; z++) {
643
			ref.b[z] |= r1->refahead.b[z];
644
			if(ref.b[z] != r1->refahead.b[z]) {
645
				r1->refahead.b[z] = ref.b[z];
646
				change++;
647
			}
648
			cal.b[z] |= r1->calahead.b[z];
649
			if(cal.b[z] != r1->calahead.b[z]) {
650
				r1->calahead.b[z] = cal.b[z];
651
				change++;
652
			}
653
		}
654
		switch(r1->prog->as) {
655
		case ABL:
656
			for(z=0; z<BITS; z++) {
657
				cal.b[z] |= ref.b[z] | externs.b[z];
658
				ref.b[z] = 0;
659
			}
660
			break;
661
 
662
		case ATEXT:
663
			for(z=0; z<BITS; z++) {
664
				cal.b[z] = 0;
665
				ref.b[z] = 0;
666
			}
667
			break;
668
 
669
		case ARET:
670
			for(z=0; z<BITS; z++) {
671
				cal.b[z] = externs.b[z];
672
				ref.b[z] = 0;
673
			}
674
		}
675
		for(z=0; z<BITS; z++) {
676
			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
677
				r1->use1.b[z] | r1->use2.b[z];
678
			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
679
			r1->refbehind.b[z] = ref.b[z];
680
			r1->calbehind.b[z] = cal.b[z];
681
		}
682
		if(r1->active)
683
			break;
684
		r1->active = 1;
685
	}
686
	for(; r != r1; r = r->p1)
687
		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
688
			prop(r2, r->refbehind, r->calbehind);
689
}
690
 
691
/*
692
 * find looping structure
693
 *
694
 * 1) find reverse postordering
695
 * 2) find approximate dominators,
696
 *	the actual dominators if the flow graph is reducible
697
 *	otherwise, dominators plus some other non-dominators.
698
 *	See Matthew S. Hecht and Jeffrey D. Ullman,
699
 *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
700
 *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
701
 *	Oct. 1-3, 1973, pp.  207-217.
702
 * 3) find all nodes with a predecessor dominated by the current node.
703
 *	such a node is a loop head.
704
 *	recursively, all preds with a greater rpo number are in the loop
705
 */
706
long
707
postorder(Reg *r, Reg **rpo2r, long n)
708
{
709
	Reg *r1;
710
 
711
	r->rpo = 1;
712
	r1 = r->s1;
713
	if(r1 && !r1->rpo)
714
		n = postorder(r1, rpo2r, n);
715
	r1 = r->s2;
716
	if(r1 && !r1->rpo)
717
		n = postorder(r1, rpo2r, n);
718
	rpo2r[n] = r;
719
	n++;
720
	return n;
721
}
722
 
723
long
724
rpolca(long *idom, long rpo1, long rpo2)
725
{
726
	long t;
727
 
728
	if(rpo1 == -1)
729
		return rpo2;
730
	while(rpo1 != rpo2){
731
		if(rpo1 > rpo2){
732
			t = rpo2;
733
			rpo2 = rpo1;
734
			rpo1 = t;
735
		}
736
		while(rpo1 < rpo2){
737
			t = idom[rpo2];
738
			if(t >= rpo2)
739
				fatal(Z, "bad idom");
740
			rpo2 = t;
741
		}
742
	}
743
	return rpo1;
744
}
745
 
746
int
747
doms(long *idom, long r, long s)
748
{
749
	while(s > r)
750
		s = idom[s];
751
	return s == r;
752
}
753
 
754
int
755
loophead(long *idom, Reg *r)
756
{
757
	long src;
758
 
759
	src = r->rpo;
760
	if(r->p1 != R && doms(idom, src, r->p1->rpo))
761
		return 1;
762
	for(r = r->p2; r != R; r = r->p2link)
763
		if(doms(idom, src, r->rpo))
764
			return 1;
765
	return 0;
766
}
767
 
768
void
769
loopmark(Reg **rpo2r, long head, Reg *r)
770
{
771
	if(r->rpo < head || r->active == head)
772
		return;
773
	r->active = head;
774
	r->loop += LOOP;
775
	if(r->p1 != R)
776
		loopmark(rpo2r, head, r->p1);
777
	for(r = r->p2; r != R; r = r->p2link)
778
		loopmark(rpo2r, head, r);
779
}
780
 
781
void
782
loopit(Reg *r, long nr)
783
{
784
	Reg *r1;
785
	long i, d, me;
786
 
787
	if(nr > maxnr) {
788
		rpo2r = alloc(nr * sizeof(Reg*));
789
		idom = alloc(nr * sizeof(long));
790
		maxnr = nr;
791
	}
792
 
793
	d = postorder(r, rpo2r, 0);
794
	if(d > nr)
795
		fatal(Z, "too many reg nodes");
796
	nr = d;
797
	for(i = 0; i < nr / 2; i++){
798
		r1 = rpo2r[i];
799
		rpo2r[i] = rpo2r[nr - 1 - i];
800
		rpo2r[nr - 1 - i] = r1;
801
	}
802
	for(i = 0; i < nr; i++)
803
		rpo2r[i]->rpo = i;
804
 
805
	idom[0] = 0;
806
	for(i = 0; i < nr; i++){
807
		r1 = rpo2r[i];
808
		me = r1->rpo;
809
		d = -1;
810
		if(r1->p1 != R && r1->p1->rpo < me)
811
			d = r1->p1->rpo;
812
		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
813
			if(r1->rpo < me)
814
				d = rpolca(idom, d, r1->rpo);
815
		idom[i] = d;
816
	}
817
 
818
	for(i = 0; i < nr; i++){
819
		r1 = rpo2r[i];
820
		r1->loop++;
821
		if(r1->p2 != R && loophead(idom, r1))
822
			loopmark(rpo2r, i, r1);
823
	}
824
}
825
 
826
void
827
synch(Reg *r, Bits dif)
828
{
829
	Reg *r1;
830
	int z;
831
 
832
	for(r1 = r; r1 != R; r1 = r1->s1) {
833
		for(z=0; z<BITS; z++) {
834
			dif.b[z] = (dif.b[z] &
835
				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
836
					r1->set.b[z] | r1->regdiff.b[z];
837
			if(dif.b[z] != r1->regdiff.b[z]) {
838
				r1->regdiff.b[z] = dif.b[z];
839
				change++;
840
			}
841
		}
842
		if(r1->active)
843
			break;
844
		r1->active = 1;
845
		for(z=0; z<BITS; z++)
846
			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
847
		if(r1->s2 != R)
848
			synch(r1->s2, dif);
849
	}
850
}
851
 
852
ulong
853
allreg(ulong b, Rgn *r)
854
{
855
	Var *v;
856
	int i;
857
 
858
	v = var + r->varno;
859
	r->regno = 0;
860
	switch(v->etype) {
861
 
862
	default:
863
		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
864
		break;
865
 
866
	case TCHAR:
867
	case TUCHAR:
868
	case TSHORT:
869
	case TUSHORT:
870
	case TINT:
871
	case TUINT:
872
	case TLONG:
873
	case TULONG:
874
	case TIND:
875
	case TARRAY:
876
		i = BtoR(~b);
877
		if(i && r->cost >= 0) {
878
			r->regno = i;
879
			return RtoB(i);
880
		}
881
		break;
882
 
883
	case TVLONG:
884
	case TDOUBLE:
885
	case TFLOAT:
886
		i = BtoF(~b);
887
		if(i && r->cost >= 0) {
888
			r->regno = i+NREG;
889
			return FtoB(i);
890
		}
891
		break;
892
	}
893
	return 0;
894
}
895
 
896
void
897
paint1(Reg *r, int bn)
898
{
899
	Reg *r1;
900
	Prog *p;
901
	int z;
902
	ulong bb;
903
 
904
	z = bn/32;
905
	bb = 1L<<(bn%32);
906
	if(r->act.b[z] & bb)
907
		return;
908
	for(;;) {
909
		if(!(r->refbehind.b[z] & bb))
910
			break;
911
		r1 = r->p1;
912
		if(r1 == R)
913
			break;
914
		if(!(r1->refahead.b[z] & bb))
915
			break;
916
		if(r1->act.b[z] & bb)
917
			break;
918
		r = r1;
919
	}
920
 
921
	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
922
		change -= CLOAD * r->loop;
923
		if(debug['R'] && debug['v'])
924
			print("%ld%P\tld %B $%d\n", r->loop,
925
				r->prog, blsh(bn), change);
926
	}
927
	for(;;) {
928
		r->act.b[z] |= bb;
929
		p = r->prog;
930
 
931
		if(r->use1.b[z] & bb) {
932
			change += CREF * r->loop;
933
			if(debug['R'] && debug['v'])
934
				print("%ld%P\tu1 %B $%d\n", r->loop,
935
					p, blsh(bn), change);
936
		}
937
 
938
		if((r->use2.b[z]|r->set.b[z]) & bb) {
939
			change += CREF * r->loop;
940
			if(debug['R'] && debug['v'])
941
				print("%ld%P\tu2 %B $%d\n", r->loop,
942
					p, blsh(bn), change);
943
		}
944
 
945
		if(STORE(r) & r->regdiff.b[z] & bb) {
946
			change -= CLOAD * r->loop;
947
			if(debug['R'] && debug['v'])
948
				print("%ld%P\tst %B $%d\n", r->loop,
949
					p, blsh(bn), change);
950
		}
951
 
952
		if(r->refbehind.b[z] & bb)
953
			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
954
				if(r1->refahead.b[z] & bb)
955
					paint1(r1, bn);
956
 
957
		if(!(r->refahead.b[z] & bb))
958
			break;
959
		r1 = r->s2;
960
		if(r1 != R)
961
			if(r1->refbehind.b[z] & bb)
962
				paint1(r1, bn);
963
		r = r->s1;
964
		if(r == R)
965
			break;
966
		if(r->act.b[z] & bb)
967
			break;
968
		if(!(r->refbehind.b[z] & bb))
969
			break;
970
	}
971
}
972
 
973
ulong
974
paint2(Reg *r, int bn)
975
{
976
	Reg *r1;
977
	int z;
978
	ulong bb, vreg;
979
 
980
	z = bn/32;
981
	bb = 1L << (bn%32);
982
	vreg = regbits;
983
	if(!(r->act.b[z] & bb))
984
		return vreg;
985
	for(;;) {
986
		if(!(r->refbehind.b[z] & bb))
987
			break;
988
		r1 = r->p1;
989
		if(r1 == R)
990
			break;
991
		if(!(r1->refahead.b[z] & bb))
992
			break;
993
		if(!(r1->act.b[z] & bb))
994
			break;
995
		r = r1;
996
	}
997
	for(;;) {
998
		r->act.b[z] &= ~bb;
999
 
1000
		vreg |= r->regu;
1001
 
1002
		if(r->refbehind.b[z] & bb)
1003
			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1004
				if(r1->refahead.b[z] & bb)
1005
					vreg |= paint2(r1, bn);
1006
 
1007
		if(!(r->refahead.b[z] & bb))
1008
			break;
1009
		r1 = r->s2;
1010
		if(r1 != R)
1011
			if(r1->refbehind.b[z] & bb)
1012
				vreg |= paint2(r1, bn);
1013
		r = r->s1;
1014
		if(r == R)
1015
			break;
1016
		if(!(r->act.b[z] & bb))
1017
			break;
1018
		if(!(r->refbehind.b[z] & bb))
1019
			break;
1020
	}
1021
	return vreg;
1022
}
1023
 
1024
void
1025
paint3(Reg *r, int bn, long rb, int rn)
1026
{
1027
	Reg *r1;
1028
	Prog *p;
1029
	int z;
1030
	ulong bb;
1031
 
1032
	z = bn/32;
1033
	bb = 1L << (bn%32);
1034
	if(r->act.b[z] & bb)
1035
		return;
1036
	for(;;) {
1037
		if(!(r->refbehind.b[z] & bb))
1038
			break;
1039
		r1 = r->p1;
1040
		if(r1 == R)
1041
			break;
1042
		if(!(r1->refahead.b[z] & bb))
1043
			break;
1044
		if(r1->act.b[z] & bb)
1045
			break;
1046
		r = r1;
1047
	}
1048
 
1049
	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1050
		addmove(r, bn, rn, 0);
1051
	for(;;) {
1052
		r->act.b[z] |= bb;
1053
		p = r->prog;
1054
 
1055
		if(r->use1.b[z] & bb) {
1056
			if(debug['R'])
1057
				print("%P", p);
1058
			addreg(&p->from, rn);
1059
			if(debug['R'])
1060
				print("\t.c%P\n", p);
1061
		}
1062
		if((r->use2.b[z]|r->set.b[z]) & bb) {
1063
			if(debug['R'])
1064
				print("%P", p);
1065
			addreg(&p->to, rn);
1066
			if(debug['R'])
1067
				print("\t.c%P\n", p);
1068
		}
1069
 
1070
		if(STORE(r) & r->regdiff.b[z] & bb)
1071
			addmove(r, bn, rn, 1);
1072
		r->regu |= rb;
1073
 
1074
		if(r->refbehind.b[z] & bb)
1075
			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1076
				if(r1->refahead.b[z] & bb)
1077
					paint3(r1, bn, rb, rn);
1078
 
1079
		if(!(r->refahead.b[z] & bb))
1080
			break;
1081
		r1 = r->s2;
1082
		if(r1 != R)
1083
			if(r1->refbehind.b[z] & bb)
1084
				paint3(r1, bn, rb, rn);
1085
		r = r->s1;
1086
		if(r == R)
1087
			break;
1088
		if(r->act.b[z] & bb)
1089
			break;
1090
		if(!(r->refbehind.b[z] & bb))
1091
			break;
1092
	}
1093
}
1094
 
1095
void
1096
addreg(Adr *a, int rn)
1097
{
1098
 
1099
	a->sym = 0;
1100
	a->name = D_NONE;
1101
	a->type = D_REG;
1102
	a->reg = rn;
1103
	if(rn >= NREG) {
1104
		a->type = D_FREG;
1105
		a->reg = rn-NREG;
1106
	}
1107
}
1108
 
1109
/*
1110
 *	bit	reg
1111
 *	0	R0
1112
 *	1	R1
1113
 *	...	...
1114
 *	10	R10
1115
 */
1116
long
1117
RtoB(int r)
1118
{
1119
 
1120
	if(r >= REGMIN && r <= REGMAX)
1121
		return 1L << r;
1122
	return 0;
1123
}
1124
 
1125
int
1126
BtoR(long b)
1127
{
1128
	b &= 0x01fcL;	// excluded R9 and R10 for m and g
1129
	if(b == 0)
1130
		return 0;
1131
	return bitno(b);
1132
}
1133
 
1134
/*
1135
 *	bit	reg
1136
 *	18	F2
1137
 *	19	F3
1138
 *	...	...
1139
 *	23	F7
1140
 */
1141
long
1142
FtoB(int f)
1143
{
1144
 
1145
	if(f < 2 || f > NFREG-1)
1146
		return 0;
1147
	return 1L << (f + 16);
1148
}
1149
 
1150
int
1151
BtoF(long b)
1152
{
1153
 
1154
	b &= 0xfc0000L;
1155
	if(b == 0)
1156
		return 0;
1157
	return bitno(b) - 16;
1158
}