Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include "gc.h"
2
 
3
Reg*
4
rega(void)
5
{
6
	Reg *r;
7
 
8
	r = freer;
9
	if(r == R) {
10
		r = alloc(sizeof(*r));
11
	} else
12
		freer = r->link;
13
 
14
	*r = zreg;
15
	return r;
16
}
17
 
18
int
19
rcmp(const void *a1, const void *a2)
20
{
21
	Rgn *p1, *p2;
22
	int c1, c2;
23
 
24
	p1 = (Rgn*)a1;
25
	p2 = (Rgn*)a2;
26
	c1 = p2->cost;
27
	c2 = p1->cost;
28
	if(c1 -= c2)
29
		return c1;
30
	return p2->varno - p1->varno;
31
}
32
 
33
void
34
regopt(Prog *p)
35
{
36
	Reg *r, *r1, *r2;
37
	Prog *p1;
38
	int i, z;
39
	long initpc, val, npc;
40
	ulong vreg;
41
	Bits bit;
42
	struct
43
	{
44
		long	m;
45
		long	c;
46
		Reg*	p;
47
	} log5[6], *lp;
48
 
49
	firstr = R;
50
	lastr = R;
51
	nvar = 0;
52
	regbits = RtoB(D_SP) | RtoB(D_AX) | RtoB(D_X0);
53
	if(REGEXT)
54
		regbits |= RtoB(REGEXT) | RtoB(REGEXT-1);
55
	for(z=0; z<BITS; z++) {
56
		externs.b[z] = 0;
57
		params.b[z] = 0;
58
		consts.b[z] = 0;
59
		addrs.b[z] = 0;
60
	}
61
 
62
	/*
63
	 * pass 1
64
	 * build aux data structure
65
	 * allocate pcs
66
	 * find use and set of variables
67
	 */
68
	val = 5L * 5L * 5L * 5L * 5L;
69
	lp = log5;
70
	for(i=0; i<5; i++) {
71
		lp->m = val;
72
		lp->c = 0;
73
		lp->p = R;
74
		val /= 5L;
75
		lp++;
76
	}
77
	val = 0;
78
	for(; p != P; p = p->link) {
79
		switch(p->as) {
80
		case ADATA:
81
		case AGLOBL:
82
		case ANAME:
83
		case ASIGNAME:
84
			continue;
85
		}
86
		r = rega();
87
		if(firstr == R) {
88
			firstr = r;
89
			lastr = r;
90
		} else {
91
			lastr->link = r;
92
			r->p1 = lastr;
93
			lastr->s1 = r;
94
			lastr = r;
95
		}
96
		r->prog = p;
97
		r->pc = val;
98
		val++;
99
 
100
		lp = log5;
101
		for(i=0; i<5; i++) {
102
			lp->c--;
103
			if(lp->c <= 0) {
104
				lp->c = lp->m;
105
				if(lp->p != R)
106
					lp->p->log5 = r;
107
				lp->p = r;
108
				(lp+1)->c = 0;
109
				break;
110
			}
111
			lp++;
112
		}
113
 
114
		r1 = r->p1;
115
		if(r1 != R)
116
		switch(r1->prog->as) {
117
		case ARET:
118
		case AJMP:
119
		case AIRETL:
120
		case AIRETQ:
121
			r->p1 = R;
122
			r1->s1 = R;
123
		}
124
 
125
		bit = mkvar(r, &p->from);
126
		if(bany(&bit))
127
		switch(p->as) {
128
		/*
129
		 * funny
130
		 */
131
		case ALEAL:
132
		case ALEAQ:
133
			for(z=0; z<BITS; z++)
134
				addrs.b[z] |= bit.b[z];
135
			break;
136
 
137
		/*
138
		 * left side read
139
		 */
140
		default:
141
			for(z=0; z<BITS; z++)
142
				r->use1.b[z] |= bit.b[z];
143
			break;
144
		}
145
 
146
		bit = mkvar(r, &p->to);
147
		if(bany(&bit))
148
		switch(p->as) {
149
		default:
150
			diag(Z, "reg: unknown op: %A", p->as);
151
			break;
152
 
153
		/*
154
		 * right side read
155
		 */
156
		case ACMPB:
157
		case ACMPL:
158
		case ACMPQ:
159
		case ACMPW:
160
		case ACOMISS:
161
		case ACOMISD:
162
		case AUCOMISS:
163
		case AUCOMISD:
164
			for(z=0; z<BITS; z++)
165
				r->use2.b[z] |= bit.b[z];
166
			break;
167
 
168
		/*
169
		 * right side write
170
		 */
171
		case ANOP:
172
		case AMOVL:
173
		case AMOVQ:
174
		case AMOVB:
175
		case AMOVW:
176
		case AMOVBLSX:
177
		case AMOVBLZX:
178
		case AMOVBQSX:
179
		case AMOVBQZX:
180
		case AMOVLQSX:
181
		case AMOVLQZX:
182
		case AMOVWLSX:
183
		case AMOVWLZX:
184
		case AMOVWQSX:
185
		case AMOVWQZX:
186
 
187
		case AMOVSS:
188
		case AMOVSD:
189
		case ACVTSD2SL:
190
		case ACVTSD2SQ:
191
		case ACVTSD2SS:
192
		case ACVTSL2SD:
193
		case ACVTSL2SS:
194
		case ACVTSQ2SD:
195
		case ACVTSQ2SS:
196
		case ACVTSS2SD:
197
		case ACVTSS2SL:
198
		case ACVTSS2SQ:
199
		case ACVTTSD2SL:
200
		case ACVTTSD2SQ:
201
		case ACVTTSS2SL:
202
		case ACVTTSS2SQ:
203
			for(z=0; z<BITS; z++)
204
				r->set.b[z] |= bit.b[z];
205
			break;
206
 
207
		/*
208
		 * right side read+write
209
		 */
210
		case AADDB:
211
		case AADDL:
212
		case AADDQ:
213
		case AADDW:
214
		case AANDB:
215
		case AANDL:
216
		case AANDQ:
217
		case AANDW:
218
		case ASUBB:
219
		case ASUBL:
220
		case ASUBQ:
221
		case ASUBW:
222
		case AORB:
223
		case AORL:
224
		case AORQ:
225
		case AORW:
226
		case AXORB:
227
		case AXORL:
228
		case AXORQ:
229
		case AXORW:
230
		case ASALB:
231
		case ASALL:
232
		case ASALQ:
233
		case ASALW:
234
		case ASARB:
235
		case ASARL:
236
		case ASARQ:
237
		case ASARW:
238
		case AROLB:
239
		case AROLL:
240
		case AROLQ:
241
		case AROLW:
242
		case ARORB:
243
		case ARORL:
244
		case ARORQ:
245
		case ARORW:
246
		case ASHLB:
247
		case ASHLL:
248
		case ASHLQ:
249
		case ASHLW:
250
		case ASHRB:
251
		case ASHRL:
252
		case ASHRQ:
253
		case ASHRW:
254
		case AIMULL:
255
		case AIMULQ:
256
		case AIMULW:
257
		case ANEGL:
258
		case ANEGQ:
259
		case ANOTL:
260
		case ANOTQ:
261
		case AADCL:
262
		case AADCQ:
263
		case ASBBL:
264
		case ASBBQ:
265
 
266
		case AADDSD:
267
		case AADDSS:
268
		case ACMPSD:
269
		case ACMPSS:
270
		case ADIVSD:
271
		case ADIVSS:
272
		case AMAXSD:
273
		case AMAXSS:
274
		case AMINSD:
275
		case AMINSS:
276
		case AMULSD:
277
		case AMULSS:
278
		case ARCPSS:
279
		case ARSQRTSS:
280
		case ASQRTSD:
281
		case ASQRTSS:
282
		case ASUBSD:
283
		case ASUBSS:
284
		case AXORPD:
285
			for(z=0; z<BITS; z++) {
286
				r->set.b[z] |= bit.b[z];
287
				r->use2.b[z] |= bit.b[z];
288
			}
289
			break;
290
 
291
		/*
292
		 * funny
293
		 */
294
		case ACALL:
295
			for(z=0; z<BITS; z++)
296
				addrs.b[z] |= bit.b[z];
297
			break;
298
		}
299
 
300
		switch(p->as) {
301
		case AIMULL:
302
		case AIMULQ:
303
		case AIMULW:
304
			if(p->to.type != D_NONE)
305
				break;
306
 
307
		case AIDIVB:
308
		case AIDIVL:
309
		case AIDIVQ:
310
		case AIDIVW:
311
		case AIMULB:
312
		case ADIVB:
313
		case ADIVL:
314
		case ADIVQ:
315
		case ADIVW:
316
		case AMULB:
317
		case AMULL:
318
		case AMULQ:
319
		case AMULW:
320
 
321
		case ACWD:
322
		case ACDQ:
323
		case ACQO:
324
			r->regu |= RtoB(D_AX) | RtoB(D_DX);
325
			break;
326
 
327
		case AREP:
328
		case AREPN:
329
		case ALOOP:
330
		case ALOOPEQ:
331
		case ALOOPNE:
332
			r->regu |= RtoB(D_CX);
333
			break;
334
 
335
		case AMOVSB:
336
		case AMOVSL:
337
		case AMOVSQ:
338
		case AMOVSW:
339
		case ACMPSB:
340
		case ACMPSL:
341
		case ACMPSQ:
342
		case ACMPSW:
343
			r->regu |= RtoB(D_SI) | RtoB(D_DI);
344
			break;
345
 
346
		case ASTOSB:
347
		case ASTOSL:
348
		case ASTOSQ:
349
		case ASTOSW:
350
		case ASCASB:
351
		case ASCASL:
352
		case ASCASQ:
353
		case ASCASW:
354
			r->regu |= RtoB(D_AX) | RtoB(D_DI);
355
			break;
356
 
357
		case AINSB:
358
		case AINSL:
359
		case AINSW:
360
		case AOUTSB:
361
		case AOUTSL:
362
		case AOUTSW:
363
			r->regu |= RtoB(D_DI) | RtoB(D_DX);
364
			break;
365
		}
366
	}
367
	if(firstr == R)
368
		return;
369
	initpc = pc - val;
370
	npc = val;
371
 
372
	/*
373
	 * pass 2
374
	 * turn branch references to pointers
375
	 * build back pointers
376
	 */
377
	for(r = firstr; r != R; r = r->link) {
378
		p = r->prog;
379
		if(p->to.type == D_BRANCH) {
380
			val = p->to.offset - initpc;
381
			r1 = firstr;
382
			while(r1 != R) {
383
				r2 = r1->log5;
384
				if(r2 != R && val >= r2->pc) {
385
					r1 = r2;
386
					continue;
387
				}
388
				if(r1->pc == val)
389
					break;
390
				r1 = r1->link;
391
			}
392
			if(r1 == R) {
393
				nearln = p->lineno;
394
				diag(Z, "ref not found\n%P", p);
395
				continue;
396
			}
397
			if(r1 == r) {
398
				nearln = p->lineno;
399
				diag(Z, "ref to self\n%P", p);
400
				continue;
401
			}
402
			r->s2 = r1;
403
			r->p2link = r1->p2;
404
			r1->p2 = r;
405
		}
406
	}
407
	if(debug['R']) {
408
		p = firstr->prog;
409
		print("\n%L %D\n", p->lineno, &p->from);
410
	}
411
 
412
	/*
413
	 * pass 2.5
414
	 * find looping structure
415
	 */
416
	for(r = firstr; r != R; r = r->link)
417
		r->active = 0;
418
	change = 0;
419
	loopit(firstr, npc);
420
	if(debug['R'] && debug['v']) {
421
		print("\nlooping structure:\n");
422
		for(r = firstr; r != R; r = r->link) {
423
			print("%ld:%P", r->loop, r->prog);
424
			for(z=0; z<BITS; z++)
425
				bit.b[z] = r->use1.b[z] |
426
					   r->use2.b[z] |
427
					   r->set.b[z];
428
			if(bany(&bit)) {
429
				print("\t");
430
				if(bany(&r->use1))
431
					print(" u1=%B", r->use1);
432
				if(bany(&r->use2))
433
					print(" u2=%B", r->use2);
434
				if(bany(&r->set))
435
					print(" st=%B", r->set);
436
			}
437
			print("\n");
438
		}
439
	}
440
 
441
	/*
442
	 * pass 3
443
	 * iterate propagating usage
444
	 * 	back until flow graph is complete
445
	 */
446
loop1:
447
	change = 0;
448
	for(r = firstr; r != R; r = r->link)
449
		r->active = 0;
450
	for(r = firstr; r != R; r = r->link)
451
		if(r->prog->as == ARET)
452
			prop(r, zbits, zbits);
453
loop11:
454
	/* pick up unreachable code */
455
	i = 0;
456
	for(r = firstr; r != R; r = r1) {
457
		r1 = r->link;
458
		if(r1 && r1->active && !r->active) {
459
			prop(r, zbits, zbits);
460
			i = 1;
461
		}
462
	}
463
	if(i)
464
		goto loop11;
465
	if(change)
466
		goto loop1;
467
 
468
 
469
	/*
470
	 * pass 4
471
	 * iterate propagating register/variable synchrony
472
	 * 	forward until graph is complete
473
	 */
474
loop2:
475
	change = 0;
476
	for(r = firstr; r != R; r = r->link)
477
		r->active = 0;
478
	synch(firstr, zbits);
479
	if(change)
480
		goto loop2;
481
 
482
 
483
	/*
484
	 * pass 5
485
	 * isolate regions
486
	 * calculate costs (paint1)
487
	 */
488
	r = firstr;
489
	if(r) {
490
		for(z=0; z<BITS; z++)
491
			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
492
			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
493
		if(bany(&bit)) {
494
			nearln = r->prog->lineno;
495
			warn(Z, "used and not set: %B", bit);
496
			if(debug['R'] && !debug['w'])
497
				print("used and not set: %B\n", bit);
498
		}
499
	}
500
	if(debug['R'] && debug['v'])
501
		print("\nprop structure:\n");
502
	for(r = firstr; r != R; r = r->link)
503
		r->act = zbits;
504
	rgp = region;
505
	nregion = 0;
506
	for(r = firstr; r != R; r = r->link) {
507
		if(debug['R'] && debug['v']) {
508
			print("%P\t", r->prog);
509
			if(bany(&r->set))
510
				print("s:%B ", r->set);
511
			if(bany(&r->refahead))
512
				print("ra:%B ", r->refahead);
513
			if(bany(&r->calahead))
514
				print("ca:%B ", r->calahead);
515
			print("\n");
516
		}
517
		for(z=0; z<BITS; z++)
518
			bit.b[z] = r->set.b[z] &
519
			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
520
		if(bany(&bit)) {
521
			nearln = r->prog->lineno;
522
			warn(Z, "set and not used: %B", bit);
523
			if(debug['R'])
524
				print("set and not used: %B\n", bit);
525
			excise(r);
526
		}
527
		for(z=0; z<BITS; z++)
528
			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
529
		while(bany(&bit)) {
530
			i = bnum(bit);
531
			rgp->enter = r;
532
			rgp->varno = i;
533
			change = 0;
534
			if(debug['R'] && debug['v'])
535
				print("\n");
536
			paint1(r, i);
537
			bit.b[i/32] &= ~(1L<<(i%32));
538
			if(change <= 0) {
539
				if(debug['R'])
540
					print("%L$%d: %B\n",
541
						r->prog->lineno, change, blsh(i));
542
				continue;
543
			}
544
			rgp->cost = change;
545
			nregion++;
546
			if(nregion >= NRGN) {
547
				warn(Z, "too many regions");
548
				goto brk;
549
			}
550
			rgp++;
551
		}
552
	}
553
brk:
554
	qsort(region, nregion, sizeof(region[0]), rcmp);
555
 
556
	/*
557
	 * pass 6
558
	 * determine used registers (paint2)
559
	 * replace code (paint3)
560
	 */
561
	rgp = region;
562
	for(i=0; i<nregion; i++) {
563
		bit = blsh(rgp->varno);
564
		vreg = paint2(rgp->enter, rgp->varno);
565
		vreg = allreg(vreg, rgp);
566
		if(debug['R']) {
567
			print("%L$%d %R: %B\n",
568
				rgp->enter->prog->lineno,
569
				rgp->cost,
570
				rgp->regno,
571
				bit);
572
		}
573
		if(rgp->regno != 0)
574
			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
575
		rgp++;
576
	}
577
	/*
578
	 * pass 7
579
	 * peep-hole on basic block
580
	 */
581
	if(!debug['R'] || debug['P'])
582
		peep();
583
 
584
	/*
585
	 * pass 8
586
	 * recalculate pc
587
	 */
588
	val = initpc;
589
	for(r = firstr; r != R; r = r1) {
590
		r->pc = val;
591
		p = r->prog;
592
		p1 = P;
593
		r1 = r->link;
594
		if(r1 != R)
595
			p1 = r1->prog;
596
		for(; p != p1; p = p->link) {
597
			switch(p->as) {
598
			default:
599
				val++;
600
				break;
601
 
602
			case ANOP:
603
			case ADATA:
604
			case AGLOBL:
605
			case ANAME:
606
			case ASIGNAME:
607
				break;
608
			}
609
		}
610
	}
611
	pc = val;
612
 
613
	/*
614
	 * fix up branches
615
	 */
616
	if(debug['R'])
617
		if(bany(&addrs))
618
			print("addrs: %B\n", addrs);
619
 
620
	r1 = 0; /* set */
621
	for(r = firstr; r != R; r = r->link) {
622
		p = r->prog;
623
		if(p->to.type == D_BRANCH)
624
			p->to.offset = r->s2->pc;
625
		r1 = r;
626
	}
627
 
628
	/*
629
	 * last pass
630
	 * eliminate nops
631
	 * free aux structures
632
	 */
633
	for(p = firstr->prog; p != P; p = p->link){
634
		while(p->link && p->link->as == ANOP)
635
			p->link = p->link->link;
636
	}
637
	if(r1 != R) {
638
		r1->link = freer;
639
		freer = firstr;
640
	}
641
}
642
 
643
/*
644
 * add mov b,rn
645
 * just after r
646
 */
647
void
648
addmove(Reg *r, int bn, int rn, int f)
649
{
650
	Prog *p, *p1;
651
	Adr *a;
652
	Var *v;
653
 
654
	p1 = alloc(sizeof(*p1));
655
	*p1 = zprog;
656
	p = r->prog;
657
 
658
	p1->link = p->link;
659
	p->link = p1;
660
	p1->lineno = p->lineno;
661
 
662
	v = var + bn;
663
 
664
	a = &p1->to;
665
	a->sym = v->sym;
666
	a->offset = v->offset;
667
	a->etype = v->etype;
668
	a->type = v->name;
669
 
670
	p1->as = AMOVL;
671
	if(v->etype == TCHAR || v->etype == TUCHAR)
672
		p1->as = AMOVB;
673
	if(v->etype == TSHORT || v->etype == TUSHORT)
674
		p1->as = AMOVW;
675
	if(v->etype == TVLONG || v->etype == TUVLONG || v->etype == TIND)
676
		p1->as = AMOVQ;
677
	if(v->etype == TFLOAT)
678
		p1->as = AMOVSS;
679
	if(v->etype == TDOUBLE)
680
		p1->as = AMOVSD;
681
 
682
	p1->from.type = rn;
683
	if(!f) {
684
		p1->from = *a;
685
		*a = zprog.from;
686
		a->type = rn;
687
		if(v->etype == TUCHAR)
688
			p1->as = AMOVB;
689
		if(v->etype == TUSHORT)
690
			p1->as = AMOVW;
691
	}
692
	if(debug['R'])
693
		print("%P\t.a%P\n", p, p1);
694
}
695
 
696
ulong
697
doregbits(int r)
698
{
699
	ulong b;
700
 
701
	b = 0;
702
	if(r >= D_INDIR)
703
		r -= D_INDIR;
704
	if(r >= D_AX && r <= D_R15)
705
		b |= RtoB(r);
706
	else
707
	if(r >= D_AL && r <= D_R15B)
708
		b |= RtoB(r-D_AL+D_AX);
709
	else
710
	if(r >= D_AH && r <= D_BH)
711
		b |= RtoB(r-D_AH+D_AX);
712
	else
713
	if(r >= D_X0 && r <= D_X0+15)
714
		b |= FtoB(r);
715
	return b;
716
}
717
 
718
Bits
719
mkvar(Reg *r, Adr *a)
720
{
721
	Var *v;
722
	int i, t, n, et, z;
723
	long o;
724
	Bits bit;
725
	Sym *s;
726
 
727
	/*
728
	 * mark registers used
729
	 */
730
	t = a->type;
731
	r->regu |= doregbits(t);
732
	r->regu |= doregbits(a->index);
733
 
734
	switch(t) {
735
	default:
736
		goto none;
737
	case D_ADDR:
738
		a->type = a->index;
739
		bit = mkvar(r, a);
740
		for(z=0; z<BITS; z++)
741
			addrs.b[z] |= bit.b[z];
742
		a->type = t;
743
		goto none;
744
	case D_EXTERN:
745
	case D_STATIC:
746
	case D_PARAM:
747
	case D_AUTO:
748
		n = t;
749
		break;
750
	}
751
	s = a->sym;
752
	if(s == S)
753
		goto none;
754
	if(s->name[0] == '.')
755
		goto none;
756
	et = a->etype;
757
	o = a->offset;
758
	v = var;
759
	for(i=0; i<nvar; i++) {
760
		if(s == v->sym)
761
		if(n == v->name)
762
		if(o == v->offset)
763
			goto out;
764
		v++;
765
	}
766
	if(nvar >= NVAR) {
767
		if(debug['w'] > 1 && s)
768
			warn(Z, "variable not optimized: %s", s->name);
769
		goto none;
770
	}
771
	i = nvar;
772
	nvar++;
773
	v = &var[i];
774
	v->sym = s;
775
	v->offset = o;
776
	v->name = n;
777
	v->etype = et;
778
	if(debug['R'])
779
		print("bit=%2d et=%2d %D\n", i, et, a);
780
 
781
out:
782
	bit = blsh(i);
783
	if(n == D_EXTERN || n == D_STATIC)
784
		for(z=0; z<BITS; z++)
785
			externs.b[z] |= bit.b[z];
786
	if(n == D_PARAM)
787
		for(z=0; z<BITS; z++)
788
			params.b[z] |= bit.b[z];
789
	if(v->etype != et || !(typechlpfd[et] || typev[et]))	/* funny punning */
790
		for(z=0; z<BITS; z++)
791
			addrs.b[z] |= bit.b[z];
792
	return bit;
793
 
794
none:
795
	return zbits;
796
}
797
 
798
void
799
prop(Reg *r, Bits ref, Bits cal)
800
{
801
	Reg *r1, *r2;
802
	int z;
803
 
804
	for(r1 = r; r1 != R; r1 = r1->p1) {
805
		for(z=0; z<BITS; z++) {
806
			ref.b[z] |= r1->refahead.b[z];
807
			if(ref.b[z] != r1->refahead.b[z]) {
808
				r1->refahead.b[z] = ref.b[z];
809
				change++;
810
			}
811
			cal.b[z] |= r1->calahead.b[z];
812
			if(cal.b[z] != r1->calahead.b[z]) {
813
				r1->calahead.b[z] = cal.b[z];
814
				change++;
815
			}
816
		}
817
		switch(r1->prog->as) {
818
		case ACALL:
819
			for(z=0; z<BITS; z++) {
820
				cal.b[z] |= ref.b[z] | externs.b[z];
821
				ref.b[z] = 0;
822
			}
823
			break;
824
 
825
		case ATEXT:
826
			for(z=0; z<BITS; z++) {
827
				cal.b[z] = 0;
828
				ref.b[z] = 0;
829
			}
830
			break;
831
 
832
		case ARET:
833
			for(z=0; z<BITS; z++) {
834
				cal.b[z] = externs.b[z];
835
				ref.b[z] = 0;
836
			}
837
		}
838
		for(z=0; z<BITS; z++) {
839
			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
840
				r1->use1.b[z] | r1->use2.b[z];
841
			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
842
			r1->refbehind.b[z] = ref.b[z];
843
			r1->calbehind.b[z] = cal.b[z];
844
		}
845
		if(r1->active)
846
			break;
847
		r1->active = 1;
848
	}
849
	for(; r != r1; r = r->p1)
850
		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
851
			prop(r2, r->refbehind, r->calbehind);
852
}
853
 
854
/*
855
 * find looping structure
856
 *
857
 * 1) find reverse postordering
858
 * 2) find approximate dominators,
859
 *	the actual dominators if the flow graph is reducible
860
 *	otherwise, dominators plus some other non-dominators.
861
 *	See Matthew S. Hecht and Jeffrey D. Ullman,
862
 *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
863
 *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
864
 *	Oct. 1-3, 1973, pp.  207-217.
865
 * 3) find all nodes with a predecessor dominated by the current node.
866
 *	such a node is a loop head.
867
 *	recursively, all preds with a greater rpo number are in the loop
868
 */
869
long
870
postorder(Reg *r, Reg **rpo2r, long n)
871
{
872
	Reg *r1;
873
 
874
	r->rpo = 1;
875
	r1 = r->s1;
876
	if(r1 && !r1->rpo)
877
		n = postorder(r1, rpo2r, n);
878
	r1 = r->s2;
879
	if(r1 && !r1->rpo)
880
		n = postorder(r1, rpo2r, n);
881
	rpo2r[n] = r;
882
	n++;
883
	return n;
884
}
885
 
886
long
887
rpolca(long *idom, long rpo1, long rpo2)
888
{
889
	long t;
890
 
891
	if(rpo1 == -1)
892
		return rpo2;
893
	while(rpo1 != rpo2){
894
		if(rpo1 > rpo2){
895
			t = rpo2;
896
			rpo2 = rpo1;
897
			rpo1 = t;
898
		}
899
		while(rpo1 < rpo2){
900
			t = idom[rpo2];
901
			if(t >= rpo2)
902
				fatal(Z, "bad idom");
903
			rpo2 = t;
904
		}
905
	}
906
	return rpo1;
907
}
908
 
909
int
910
doms(long *idom, long r, long s)
911
{
912
	while(s > r)
913
		s = idom[s];
914
	return s == r;
915
}
916
 
917
int
918
loophead(long *idom, Reg *r)
919
{
920
	long src;
921
 
922
	src = r->rpo;
923
	if(r->p1 != R && doms(idom, src, r->p1->rpo))
924
		return 1;
925
	for(r = r->p2; r != R; r = r->p2link)
926
		if(doms(idom, src, r->rpo))
927
			return 1;
928
	return 0;
929
}
930
 
931
void
932
loopmark(Reg **rpo2r, long head, Reg *r)
933
{
934
	if(r->rpo < head || r->active == head)
935
		return;
936
	r->active = head;
937
	r->loop += LOOP;
938
	if(r->p1 != R)
939
		loopmark(rpo2r, head, r->p1);
940
	for(r = r->p2; r != R; r = r->p2link)
941
		loopmark(rpo2r, head, r);
942
}
943
 
944
void
945
loopit(Reg *r, long nr)
946
{
947
	Reg *r1;
948
	long i, d, me;
949
 
950
	if(nr > maxnr) {
951
		rpo2r = alloc(nr * sizeof(Reg*));
952
		idom = alloc(nr * sizeof(long));
953
		maxnr = nr;
954
	}
955
 
956
	d = postorder(r, rpo2r, 0);
957
	if(d > nr)
958
		fatal(Z, "too many reg nodes");
959
	nr = d;
960
	for(i = 0; i < nr / 2; i++){
961
		r1 = rpo2r[i];
962
		rpo2r[i] = rpo2r[nr - 1 - i];
963
		rpo2r[nr - 1 - i] = r1;
964
	}
965
	for(i = 0; i < nr; i++)
966
		rpo2r[i]->rpo = i;
967
 
968
	idom[0] = 0;
969
	for(i = 0; i < nr; i++){
970
		r1 = rpo2r[i];
971
		me = r1->rpo;
972
		d = -1;
973
		if(r1->p1 != R && r1->p1->rpo < me)
974
			d = r1->p1->rpo;
975
		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
976
			if(r1->rpo < me)
977
				d = rpolca(idom, d, r1->rpo);
978
		idom[i] = d;
979
	}
980
 
981
	for(i = 0; i < nr; i++){
982
		r1 = rpo2r[i];
983
		r1->loop++;
984
		if(r1->p2 != R && loophead(idom, r1))
985
			loopmark(rpo2r, i, r1);
986
	}
987
}
988
 
989
void
990
synch(Reg *r, Bits dif)
991
{
992
	Reg *r1;
993
	int z;
994
 
995
	for(r1 = r; r1 != R; r1 = r1->s1) {
996
		for(z=0; z<BITS; z++) {
997
			dif.b[z] = (dif.b[z] &
998
				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
999
					r1->set.b[z] | r1->regdiff.b[z];
1000
			if(dif.b[z] != r1->regdiff.b[z]) {
1001
				r1->regdiff.b[z] = dif.b[z];
1002
				change++;
1003
			}
1004
		}
1005
		if(r1->active)
1006
			break;
1007
		r1->active = 1;
1008
		for(z=0; z<BITS; z++)
1009
			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
1010
		if(r1->s2 != R)
1011
			synch(r1->s2, dif);
1012
	}
1013
}
1014
 
1015
ulong
1016
allreg(ulong b, Rgn *r)
1017
{
1018
	Var *v;
1019
	int i;
1020
 
1021
	v = var + r->varno;
1022
	r->regno = 0;
1023
	switch(v->etype) {
1024
 
1025
	default:
1026
		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
1027
		break;
1028
 
1029
	case TCHAR:
1030
	case TUCHAR:
1031
	case TSHORT:
1032
	case TUSHORT:
1033
	case TINT:
1034
	case TUINT:
1035
	case TLONG:
1036
	case TULONG:
1037
	case TVLONG:
1038
	case TUVLONG:
1039
	case TIND:
1040
	case TARRAY:
1041
		i = BtoR(~b);
1042
		if(i && r->cost > 0) {
1043
			r->regno = i;
1044
			return RtoB(i);
1045
		}
1046
		break;
1047
 
1048
	case TDOUBLE:
1049
	case TFLOAT:
1050
		i = BtoF(~b);
1051
		if(i && r->cost > 0) {
1052
			r->regno = i;
1053
			return FtoB(i);
1054
		}
1055
		break;
1056
	}
1057
	return 0;
1058
}
1059
 
1060
void
1061
paint1(Reg *r, int bn)
1062
{
1063
	Reg *r1;
1064
	Prog *p;
1065
	int z;
1066
	ulong bb;
1067
 
1068
	z = bn/32;
1069
	bb = 1L<<(bn%32);
1070
	if(r->act.b[z] & bb)
1071
		return;
1072
	for(;;) {
1073
		if(!(r->refbehind.b[z] & bb))
1074
			break;
1075
		r1 = r->p1;
1076
		if(r1 == R)
1077
			break;
1078
		if(!(r1->refahead.b[z] & bb))
1079
			break;
1080
		if(r1->act.b[z] & bb)
1081
			break;
1082
		r = r1;
1083
	}
1084
 
1085
	if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
1086
		change -= CLOAD * r->loop;
1087
		if(debug['R'] && debug['v'])
1088
			print("%ld%P\tld %B $%d\n", r->loop,
1089
				r->prog, blsh(bn), change);
1090
	}
1091
	for(;;) {
1092
		r->act.b[z] |= bb;
1093
		p = r->prog;
1094
 
1095
		if(r->use1.b[z] & bb) {
1096
			change += CREF * r->loop;
1097
			if(debug['R'] && debug['v'])
1098
				print("%ld%P\tu1 %B $%d\n", r->loop,
1099
					p, blsh(bn), change);
1100
		}
1101
 
1102
		if((r->use2.b[z]|r->set.b[z]) & bb) {
1103
			change += CREF * r->loop;
1104
			if(debug['R'] && debug['v'])
1105
				print("%ld%P\tu2 %B $%d\n", r->loop,
1106
					p, blsh(bn), change);
1107
		}
1108
 
1109
		if(STORE(r) & r->regdiff.b[z] & bb) {
1110
			change -= CLOAD * r->loop;
1111
			if(debug['R'] && debug['v'])
1112
				print("%ld%P\tst %B $%d\n", r->loop,
1113
					p, blsh(bn), change);
1114
		}
1115
 
1116
		if(r->refbehind.b[z] & bb)
1117
			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1118
				if(r1->refahead.b[z] & bb)
1119
					paint1(r1, bn);
1120
 
1121
		if(!(r->refahead.b[z] & bb))
1122
			break;
1123
		r1 = r->s2;
1124
		if(r1 != R)
1125
			if(r1->refbehind.b[z] & bb)
1126
				paint1(r1, bn);
1127
		r = r->s1;
1128
		if(r == R)
1129
			break;
1130
		if(r->act.b[z] & bb)
1131
			break;
1132
		if(!(r->refbehind.b[z] & bb))
1133
			break;
1134
	}
1135
}
1136
 
1137
ulong
1138
regset(Reg *r, ulong bb)
1139
{
1140
	ulong b, set;
1141
	Adr v;
1142
	int c;
1143
 
1144
	set = 0;
1145
	v = zprog.from;
1146
	while(b = bb & ~(bb-1)) {
1147
		v.type = b & 0xFFFF? BtoR(b): BtoF(b);
1148
		if(v.type == 0)
1149
			diag(Z, "zero v.type for %#lux", b);
1150
		c = copyu(r->prog, &v, A);
1151
		if(c == 3)
1152
			set |= b;
1153
		bb &= ~b;
1154
	}
1155
	return set;
1156
}
1157
 
1158
ulong
1159
reguse(Reg *r, ulong bb)
1160
{
1161
	ulong b, set;
1162
	Adr v;
1163
	int c;
1164
 
1165
	set = 0;
1166
	v = zprog.from;
1167
	while(b = bb & ~(bb-1)) {
1168
		v.type = b & 0xFFFF? BtoR(b): BtoF(b);
1169
		c = copyu(r->prog, &v, A);
1170
		if(c == 1 || c == 2 || c == 4)
1171
			set |= b;
1172
		bb &= ~b;
1173
	}
1174
	return set;
1175
}
1176
 
1177
ulong
1178
paint2(Reg *r, int bn)
1179
{
1180
	Reg *r1;
1181
	int z;
1182
	ulong bb, vreg, x;
1183
 
1184
	z = bn/32;
1185
	bb = 1L << (bn%32);
1186
	vreg = regbits;
1187
	if(!(r->act.b[z] & bb))
1188
		return vreg;
1189
	for(;;) {
1190
		if(!(r->refbehind.b[z] & bb))
1191
			break;
1192
		r1 = r->p1;
1193
		if(r1 == R)
1194
			break;
1195
		if(!(r1->refahead.b[z] & bb))
1196
			break;
1197
		if(!(r1->act.b[z] & bb))
1198
			break;
1199
		r = r1;
1200
	}
1201
	for(;;) {
1202
		r->act.b[z] &= ~bb;
1203
 
1204
		vreg |= r->regu;
1205
 
1206
		if(r->refbehind.b[z] & bb)
1207
			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1208
				if(r1->refahead.b[z] & bb)
1209
					vreg |= paint2(r1, bn);
1210
 
1211
		if(!(r->refahead.b[z] & bb))
1212
			break;
1213
		r1 = r->s2;
1214
		if(r1 != R)
1215
			if(r1->refbehind.b[z] & bb)
1216
				vreg |= paint2(r1, bn);
1217
		r = r->s1;
1218
		if(r == R)
1219
			break;
1220
		if(!(r->act.b[z] & bb))
1221
			break;
1222
		if(!(r->refbehind.b[z] & bb))
1223
			break;
1224
	}
1225
 
1226
	bb = vreg;
1227
	for(; r; r=r->s1) {
1228
		x = r->regu & ~bb;
1229
		if(x) {
1230
			vreg |= reguse(r, x);
1231
			bb |= regset(r, x);
1232
		}
1233
	}
1234
	return vreg;
1235
}
1236
 
1237
void
1238
paint3(Reg *r, int bn, long rb, int rn)
1239
{
1240
	Reg *r1;
1241
	Prog *p;
1242
	int z;
1243
	ulong bb;
1244
 
1245
	z = bn/32;
1246
	bb = 1L << (bn%32);
1247
	if(r->act.b[z] & bb)
1248
		return;
1249
	for(;;) {
1250
		if(!(r->refbehind.b[z] & bb))
1251
			break;
1252
		r1 = r->p1;
1253
		if(r1 == R)
1254
			break;
1255
		if(!(r1->refahead.b[z] & bb))
1256
			break;
1257
		if(r1->act.b[z] & bb)
1258
			break;
1259
		r = r1;
1260
	}
1261
 
1262
	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1263
		addmove(r, bn, rn, 0);
1264
	for(;;) {
1265
		r->act.b[z] |= bb;
1266
		p = r->prog;
1267
 
1268
		if(r->use1.b[z] & bb) {
1269
			if(debug['R'])
1270
				print("%P", p);
1271
			addreg(&p->from, rn);
1272
			if(debug['R'])
1273
				print("\t.c%P\n", p);
1274
		}
1275
		if((r->use2.b[z]|r->set.b[z]) & bb) {
1276
			if(debug['R'])
1277
				print("%P", p);
1278
			addreg(&p->to, rn);
1279
			if(debug['R'])
1280
				print("\t.c%P\n", p);
1281
		}
1282
 
1283
		if(STORE(r) & r->regdiff.b[z] & bb)
1284
			addmove(r, bn, rn, 1);
1285
		r->regu |= rb;
1286
 
1287
		if(r->refbehind.b[z] & bb)
1288
			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1289
				if(r1->refahead.b[z] & bb)
1290
					paint3(r1, bn, rb, rn);
1291
 
1292
		if(!(r->refahead.b[z] & bb))
1293
			break;
1294
		r1 = r->s2;
1295
		if(r1 != R)
1296
			if(r1->refbehind.b[z] & bb)
1297
				paint3(r1, bn, rb, rn);
1298
		r = r->s1;
1299
		if(r == R)
1300
			break;
1301
		if(r->act.b[z] & bb)
1302
			break;
1303
		if(!(r->refbehind.b[z] & bb))
1304
			break;
1305
	}
1306
}
1307
 
1308
void
1309
addreg(Adr *a, int rn)
1310
{
1311
 
1312
	a->sym = 0;
1313
	a->offset = 0;
1314
	a->type = rn;
1315
}
1316
 
1317
long
1318
RtoB(int r)
1319
{
1320
 
1321
	if(r < D_AX || r > D_R15)
1322
		return 0;
1323
	return 1L << (r-D_AX);
1324
}
1325
 
1326
int
1327
BtoR(long b)
1328
{
1329
 
1330
	b &= 0xffffL;
1331
	if(b == 0)
1332
		return 0;
1333
	return bitno(b) + D_AX;
1334
}
1335
 
1336
/*
1337
 *	bit	reg
1338
 *	16	X5
1339
 *	17	X6
1340
 *	18	X7
1341
 */
1342
long
1343
FtoB(int f)
1344
{
1345
	if(f < FREGMIN || f > FREGEXT)
1346
		return 0;
1347
	return 1L << (f - FREGMIN + 16);
1348
}
1349
 
1350
int
1351
BtoF(long b)
1352
{
1353
 
1354
	b &= 0x70000L;
1355
	if(b == 0)
1356
		return 0;
1357
	return bitno(b) - 16 + FREGMIN;
1358
}