Warning: Attempt to read property "date" on null in /usr/local/www/websvn.planix.org/blame.php on line 247

Warning: Attempt to read property "msg" on null in /usr/local/www/websvn.planix.org/blame.php on line 247
WebSVN – planix.SVN – Blame – /os/branches/planix-v0/sys/src/cmd/8c/reg.c – Rev 2

Subversion Repositories planix.SVN

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include "gc.h"
2
 
3
Reg*
4
rega(void)
5
{
6
	Reg *r;
7
 
8
	r = freer;
9
	if(r == R) {
10
		r = alloc(sizeof(*r));
11
	} else
12
		freer = r->link;
13
 
14
	*r = zreg;
15
	return r;
16
}
17
 
18
int
19
rcmp(const void *a1, const void *a2)
20
{
21
	Rgn *p1, *p2;
22
	int c1, c2;
23
 
24
	p1 = (Rgn*)a1;
25
	p2 = (Rgn*)a2;
26
	c1 = p2->cost;
27
	c2 = p1->cost;
28
	if(c1 -= c2)
29
		return c1;
30
	return p2->varno - p1->varno;
31
}
32
 
33
void
34
regopt(Prog *p)
35
{
36
	Reg *r, *r1, *r2;
37
	Prog *p1;
38
	int i, z;
39
	long initpc, val, npc;
40
	ulong vreg;
41
	Bits bit;
42
	struct
43
	{
44
		long	m;
45
		long	c;
46
		Reg*	p;
47
	} log5[6], *lp;
48
 
49
	firstr = R;
50
	lastr = R;
51
	nvar = 0;
52
	regbits = RtoB(D_SP) | RtoB(D_AX);
53
	for(z=0; z<BITS; z++) {
54
		externs.b[z] = 0;
55
		params.b[z] = 0;
56
		consts.b[z] = 0;
57
		addrs.b[z] = 0;
58
	}
59
 
60
	/*
61
	 * pass 1
62
	 * build aux data structure
63
	 * allocate pcs
64
	 * find use and set of variables
65
	 */
66
	val = 5L * 5L * 5L * 5L * 5L;
67
	lp = log5;
68
	for(i=0; i<5; i++) {
69
		lp->m = val;
70
		lp->c = 0;
71
		lp->p = R;
72
		val /= 5L;
73
		lp++;
74
	}
75
	val = 0;
76
	for(; p != P; p = p->link) {
77
		switch(p->as) {
78
		case ADATA:
79
		case AGLOBL:
80
		case ANAME:
81
		case ASIGNAME:
82
			continue;
83
		}
84
		r = rega();
85
		if(firstr == R) {
86
			firstr = r;
87
			lastr = r;
88
		} else {
89
			lastr->link = r;
90
			r->p1 = lastr;
91
			lastr->s1 = r;
92
			lastr = r;
93
		}
94
		r->prog = p;
95
		r->pc = val;
96
		val++;
97
 
98
		lp = log5;
99
		for(i=0; i<5; i++) {
100
			lp->c--;
101
			if(lp->c <= 0) {
102
				lp->c = lp->m;
103
				if(lp->p != R)
104
					lp->p->log5 = r;
105
				lp->p = r;
106
				(lp+1)->c = 0;
107
				break;
108
			}
109
			lp++;
110
		}
111
 
112
		r1 = r->p1;
113
		if(r1 != R)
114
		switch(r1->prog->as) {
115
		case ARET:
116
		case AJMP:
117
		case AIRETL:
118
			r->p1 = R;
119
			r1->s1 = R;
120
		}
121
 
122
		bit = mkvar(r, &p->from, p->as==AMOVL);
123
		if(bany(&bit))
124
		switch(p->as) {
125
		/*
126
		 * funny
127
		 */
128
		case ALEAL:
129
			for(z=0; z<BITS; z++)
130
				addrs.b[z] |= bit.b[z];
131
			break;
132
 
133
		/*
134
		 * left side read
135
		 */
136
		default:
137
			for(z=0; z<BITS; z++)
138
				r->use1.b[z] |= bit.b[z];
139
			break;
140
		}
141
 
142
		bit = mkvar(r, &p->to, 0);
143
		if(bany(&bit))
144
		switch(p->as) {
145
		default:
146
			diag(Z, "reg: unknown op: %A", p->as);
147
			break;
148
 
149
		/*
150
		 * right side read
151
		 */
152
		case ACMPB:
153
		case ACMPL:
154
		case ACMPW:
155
			for(z=0; z<BITS; z++)
156
				r->use2.b[z] |= bit.b[z];
157
			break;
158
 
159
		/*
160
		 * right side write
161
		 */
162
		case ANOP:
163
		case AMOVL:
164
		case AMOVB:
165
		case AMOVW:
166
		case AMOVBLSX:
167
		case AMOVBLZX:
168
		case AMOVWLSX:
169
		case AMOVWLZX:
170
			for(z=0; z<BITS; z++)
171
				r->set.b[z] |= bit.b[z];
172
			break;
173
 
174
		/*
175
		 * right side read+write
176
		 */
177
		case AADDB:
178
		case AADDL:
179
		case AADDW:
180
		case AANDB:
181
		case AANDL:
182
		case AANDW:
183
		case ASUBB:
184
		case ASUBL:
185
		case ASUBW:
186
		case AORB:
187
		case AORL:
188
		case AORW:
189
		case AXORB:
190
		case AXORL:
191
		case AXORW:
192
		case ASALB:
193
		case ASALL:
194
		case ASALW:
195
		case ASARB:
196
		case ASARL:
197
		case ASARW:
198
		case AROLB:
199
		case AROLL:
200
		case AROLW:
201
		case ARORB:
202
		case ARORL:
203
		case ARORW:
204
		case ASHLB:
205
		case ASHLL:
206
		case ASHLW:
207
		case ASHRB:
208
		case ASHRL:
209
		case ASHRW:
210
		case AIMULL:
211
		case AIMULW:
212
		case ANEGL:
213
		case ANOTL:
214
		case AADCL:
215
		case ASBBL:
216
			for(z=0; z<BITS; z++) {
217
				r->set.b[z] |= bit.b[z];
218
				r->use2.b[z] |= bit.b[z];
219
			}
220
			break;
221
 
222
		/*
223
		 * funny
224
		 */
225
		case AFMOVDP:
226
		case AFMOVFP:
227
		case AFMOVLP:
228
		case AFMOVVP:
229
		case AFMOVWP:
230
		case ACALL:
231
			for(z=0; z<BITS; z++)
232
				addrs.b[z] |= bit.b[z];
233
			break;
234
		}
235
 
236
		switch(p->as) {
237
		case AIMULL:
238
		case AIMULW:
239
			if(p->to.type != D_NONE)
240
				break;
241
 
242
		case AIDIVB:
243
		case AIDIVL:
244
		case AIDIVW:
245
		case AIMULB:
246
		case ADIVB:
247
		case ADIVL:
248
		case ADIVW:
249
		case AMULB:
250
		case AMULL:
251
		case AMULW:
252
 
253
		case ACWD:
254
		case ACDQ:
255
			r->regu |= RtoB(D_AX) | RtoB(D_DX);
256
			break;
257
 
258
		case AREP:
259
		case AREPN:
260
		case ALOOP:
261
		case ALOOPEQ:
262
		case ALOOPNE:
263
			r->regu |= RtoB(D_CX);
264
			break;
265
 
266
		case AMOVSB:
267
		case AMOVSL:
268
		case AMOVSW:
269
		case ACMPSB:
270
		case ACMPSL:
271
		case ACMPSW:
272
			r->regu |= RtoB(D_SI) | RtoB(D_DI);
273
			break;
274
 
275
		case ASTOSB:
276
		case ASTOSL:
277
		case ASTOSW:
278
		case ASCASB:
279
		case ASCASL:
280
		case ASCASW:
281
			r->regu |= RtoB(D_AX) | RtoB(D_DI);
282
			break;
283
 
284
		case AINSB:
285
		case AINSL:
286
		case AINSW:
287
		case AOUTSB:
288
		case AOUTSL:
289
		case AOUTSW:
290
			r->regu |= RtoB(D_DI) | RtoB(D_DX);
291
			break;
292
 
293
		case AFSTSW:
294
		case ASAHF:
295
			r->regu |= RtoB(D_AX);
296
			break;
297
		}
298
	}
299
	if(firstr == R)
300
		return;
301
	initpc = pc - val;
302
	npc = val;
303
 
304
	/*
305
	 * pass 2
306
	 * turn branch references to pointers
307
	 * build back pointers
308
	 */
309
	for(r = firstr; r != R; r = r->link) {
310
		p = r->prog;
311
		if(p->to.type == D_BRANCH) {
312
			val = p->to.offset - initpc;
313
			r1 = firstr;
314
			while(r1 != R) {
315
				r2 = r1->log5;
316
				if(r2 != R && val >= r2->pc) {
317
					r1 = r2;
318
					continue;
319
				}
320
				if(r1->pc == val)
321
					break;
322
				r1 = r1->link;
323
			}
324
			if(r1 == R) {
325
				nearln = p->lineno;
326
				diag(Z, "ref not found\n%P", p);
327
				continue;
328
			}
329
			if(r1 == r) {
330
				nearln = p->lineno;
331
				diag(Z, "ref to self\n%P", p);
332
				continue;
333
			}
334
			r->s2 = r1;
335
			r->p2link = r1->p2;
336
			r1->p2 = r;
337
		}
338
	}
339
	if(debug['R']) {
340
		p = firstr->prog;
341
		print("\n%L %D\n", p->lineno, &p->from);
342
	}
343
 
344
	/*
345
	 * pass 2.5
346
	 * find looping structure
347
	 */
348
	for(r = firstr; r != R; r = r->link)
349
		r->active = 0;
350
	change = 0;
351
	loopit(firstr, npc);
352
	if(debug['R'] && debug['v']) {
353
		print("\nlooping structure:\n");
354
		for(r = firstr; r != R; r = r->link) {
355
			print("%ld:%P", r->loop, r->prog);
356
			for(z=0; z<BITS; z++)
357
				bit.b[z] = r->use1.b[z] |
358
					   r->use2.b[z] |
359
					   r->set.b[z];
360
			if(bany(&bit)) {
361
				print("\t");
362
				if(bany(&r->use1))
363
					print(" u1=%B", r->use1);
364
				if(bany(&r->use2))
365
					print(" u2=%B", r->use2);
366
				if(bany(&r->set))
367
					print(" st=%B", r->set);
368
			}
369
			print("\n");
370
		}
371
	}
372
 
373
	/*
374
	 * pass 3
375
	 * iterate propagating usage
376
	 * 	back until flow graph is complete
377
	 */
378
loop1:
379
	change = 0;
380
	for(r = firstr; r != R; r = r->link)
381
		r->active = 0;
382
	for(r = firstr; r != R; r = r->link)
383
		if(r->prog->as == ARET)
384
			prop(r, zbits, zbits);
385
loop11:
386
	/* pick up unreachable code */
387
	i = 0;
388
	for(r = firstr; r != R; r = r1) {
389
		r1 = r->link;
390
		if(r1 && r1->active && !r->active) {
391
			prop(r, zbits, zbits);
392
			i = 1;
393
		}
394
	}
395
	if(i)
396
		goto loop11;
397
	if(change)
398
		goto loop1;
399
 
400
 
401
	/*
402
	 * pass 4
403
	 * iterate propagating register/variable synchrony
404
	 * 	forward until graph is complete
405
	 */
406
loop2:
407
	change = 0;
408
	for(r = firstr; r != R; r = r->link)
409
		r->active = 0;
410
	synch(firstr, zbits);
411
	if(change)
412
		goto loop2;
413
 
414
 
415
	/*
416
	 * pass 5
417
	 * isolate regions
418
	 * calculate costs (paint1)
419
	 */
420
	r = firstr;
421
	if(r) {
422
		for(z=0; z<BITS; z++)
423
			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
424
			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
425
		if(bany(&bit)) {
426
			nearln = r->prog->lineno;
427
			warn(Z, "used and not set: %B", bit);
428
			if(debug['R'] && !debug['w'])
429
				print("used and not set: %B\n", bit);
430
		}
431
	}
432
	if(debug['R'] && debug['v'])
433
		print("\nprop structure:\n");
434
	for(r = firstr; r != R; r = r->link)
435
		r->act = zbits;
436
	rgp = region;
437
	nregion = 0;
438
	for(r = firstr; r != R; r = r->link) {
439
		if(debug['R'] && debug['v']) {
440
			print("%P\t", r->prog);
441
			if(bany(&r->set))
442
				print("s:%B ", r->set);
443
			if(bany(&r->refahead))
444
				print("ra:%B ", r->refahead);
445
			if(bany(&r->calahead))
446
				print("ca:%B ", r->calahead);
447
			print("\n");
448
		}
449
		for(z=0; z<BITS; z++)
450
			bit.b[z] = r->set.b[z] &
451
			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
452
		if(bany(&bit)) {
453
			nearln = r->prog->lineno;
454
			warn(Z, "set and not used: %B", bit);
455
			if(debug['R'])
456
				print("set and not used: %B\n", bit);
457
			excise(r);
458
		}
459
		for(z=0; z<BITS; z++)
460
			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
461
		while(bany(&bit)) {
462
			i = bnum(bit);
463
			rgp->enter = r;
464
			rgp->varno = i;
465
			change = 0;
466
			if(debug['R'] && debug['v'])
467
				print("\n");
468
			paint1(r, i);
469
			bit.b[i/32] &= ~(1L<<(i%32));
470
			if(change <= 0) {
471
				if(debug['R'])
472
					print("%L$%d: %B\n",
473
						r->prog->lineno, change, blsh(i));
474
				continue;
475
			}
476
			rgp->cost = change;
477
			nregion++;
478
			if(nregion >= NRGN) {
479
				warn(Z, "too many regions");
480
				goto brk;
481
			}
482
			rgp++;
483
		}
484
	}
485
brk:
486
	qsort(region, nregion, sizeof(region[0]), rcmp);
487
 
488
	/*
489
	 * pass 6
490
	 * determine used registers (paint2)
491
	 * replace code (paint3)
492
	 */
493
	rgp = region;
494
	for(i=0; i<nregion; i++) {
495
		bit = blsh(rgp->varno);
496
		vreg = paint2(rgp->enter, rgp->varno);
497
		vreg = allreg(vreg, rgp);
498
		if(debug['R']) {
499
			print("%L$%d %R: %B\n",
500
				rgp->enter->prog->lineno,
501
				rgp->cost,
502
				rgp->regno,
503
				bit);
504
		}
505
		if(rgp->regno != 0)
506
			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
507
		rgp++;
508
	}
509
	/*
510
	 * pass 7
511
	 * peep-hole on basic block
512
	 */
513
	if(!debug['R'] || debug['P'])
514
		peep();
515
 
516
	/*
517
	 * pass 8
518
	 * recalculate pc
519
	 */
520
	val = initpc;
521
	for(r = firstr; r != R; r = r1) {
522
		r->pc = val;
523
		p = r->prog;
524
		p1 = P;
525
		r1 = r->link;
526
		if(r1 != R)
527
			p1 = r1->prog;
528
		for(; p != p1; p = p->link) {
529
			switch(p->as) {
530
			default:
531
				val++;
532
				break;
533
 
534
			case ANOP:
535
			case ADATA:
536
			case AGLOBL:
537
			case ANAME:
538
			case ASIGNAME:
539
				break;
540
			}
541
		}
542
	}
543
	pc = val;
544
 
545
	/*
546
	 * fix up branches
547
	 */
548
	if(debug['R'])
549
		if(bany(&addrs))
550
			print("addrs: %B\n", addrs);
551
 
552
	r1 = 0; /* set */
553
	for(r = firstr; r != R; r = r->link) {
554
		p = r->prog;
555
		if(p->to.type == D_BRANCH)
556
			p->to.offset = r->s2->pc;
557
		r1 = r;
558
	}
559
 
560
	/*
561
	 * last pass
562
	 * eliminate nops
563
	 * free aux structures
564
	 */
565
	for(p = firstr->prog; p != P; p = p->link){
566
		while(p->link && p->link->as == ANOP)
567
			p->link = p->link->link;
568
	}
569
	if(r1 != R) {
570
		r1->link = freer;
571
		freer = firstr;
572
	}
573
}
574
 
575
/*
576
 * add mov b,rn
577
 * just after r
578
 */
579
void
580
addmove(Reg *r, int bn, int rn, int f)
581
{
582
	Prog *p, *p1;
583
	Adr *a;
584
	Var *v;
585
 
586
	p1 = alloc(sizeof(*p1));
587
	*p1 = zprog;
588
	p = r->prog;
589
 
590
	p1->link = p->link;
591
	p->link = p1;
592
	p1->lineno = p->lineno;
593
 
594
	v = var + bn;
595
 
596
	a = &p1->to;
597
	a->sym = v->sym;
598
	a->offset = v->offset;
599
	a->etype = v->etype;
600
	a->type = v->name;
601
 
602
	p1->as = AMOVL;
603
	if(v->etype == TCHAR || v->etype == TUCHAR)
604
		p1->as = AMOVB;
605
	if(v->etype == TSHORT || v->etype == TUSHORT)
606
		p1->as = AMOVW;
607
 
608
	p1->from.type = rn;
609
	if(!f) {
610
		p1->from = *a;
611
		*a = zprog.from;
612
		a->type = rn;
613
		if(v->etype == TUCHAR)
614
			p1->as = AMOVB;
615
		if(v->etype == TUSHORT)
616
			p1->as = AMOVW;
617
	}
618
	if(debug['R'])
619
		print("%P\t.a%P\n", p, p1);
620
}
621
 
622
ulong
623
doregbits(int r)
624
{
625
	ulong b;
626
 
627
	b = 0;
628
	if(r >= D_INDIR)
629
		r -= D_INDIR;
630
	if(r >= D_AX && r <= D_DI)
631
		b |= RtoB(r);
632
	else
633
	if(r >= D_AL && r <= D_BL)
634
		b |= RtoB(r-D_AL+D_AX);
635
	else
636
	if(r >= D_AH && r <= D_BH)
637
		b |= RtoB(r-D_AH+D_AX);
638
	return b;
639
}
640
 
641
Bits
642
mkvar(Reg *r, Adr *a, int isro)
643
{
644
	Var *v;
645
	int i, t, n, et, z;
646
	long o;
647
	Bits bit;
648
	Sym *s;
649
 
650
	/*
651
	 * mark registers used
652
	 */
653
	t = a->type;
654
	r->regu |= doregbits(t);
655
	r->regu |= doregbits(a->index);
656
	et = a->etype;
657
 
658
	switch(t) {
659
	default:
660
		goto none;
661
	case D_INDIR+D_GS:
662
		if(!isro || 1)
663
			goto none;
664
		n = t;
665
		{static Sym er; a->sym = &er;}
666
		a->sym->name = "$extreg";
667
		break;
668
	case D_ADDR:
669
		a->type = a->index;
670
		bit = mkvar(r, a, 0);
671
		for(z=0; z<BITS; z++)
672
			addrs.b[z] |= bit.b[z];
673
		a->type = t;
674
		goto none;
675
	case D_EXTERN:
676
	case D_STATIC:
677
	case D_PARAM:
678
	case D_AUTO:
679
		n = t;
680
		break;
681
	}
682
	s = a->sym;
683
	if(s == S)
684
		goto none;
685
	if(s->name[0] == '.')
686
		goto none;
687
	o = a->offset;
688
	v = var;
689
	for(i=0; i<nvar; i++) {
690
		if(s == v->sym)
691
		if(n == v->name)
692
		if(o == v->offset)
693
			goto out;
694
		v++;
695
	}
696
	if(nvar >= NVAR) {
697
		if(debug['w'] > 1 && s)
698
			warn(Z, "variable not optimized: %s", s->name);
699
		goto none;
700
	}
701
	i = nvar;
702
	nvar++;
703
	v = &var[i];
704
	v->sym = s;
705
	v->offset = o;
706
	v->name = n;
707
	v->etype = et;
708
	if(debug['R'])
709
		print("bit=%2d et=%2d %D\n", i, et, a);
710
 
711
out:
712
	bit = blsh(i);
713
	if(n == D_EXTERN || n == D_STATIC)
714
		for(z=0; z<BITS; z++)
715
			externs.b[z] |= bit.b[z];
716
	if(n == D_PARAM)
717
		for(z=0; z<BITS; z++)
718
			params.b[z] |= bit.b[z];
719
	if(v->etype != et || !typechlpfd[et])	/* funny punning */
720
		for(z=0; z<BITS; z++)
721
			addrs.b[z] |= bit.b[z];
722
	return bit;
723
 
724
none:
725
	return zbits;
726
}
727
 
728
void
729
prop(Reg *r, Bits ref, Bits cal)
730
{
731
	Reg *r1, *r2;
732
	int z;
733
 
734
	for(r1 = r; r1 != R; r1 = r1->p1) {
735
		for(z=0; z<BITS; z++) {
736
			ref.b[z] |= r1->refahead.b[z];
737
			if(ref.b[z] != r1->refahead.b[z]) {
738
				r1->refahead.b[z] = ref.b[z];
739
				change++;
740
			}
741
			cal.b[z] |= r1->calahead.b[z];
742
			if(cal.b[z] != r1->calahead.b[z]) {
743
				r1->calahead.b[z] = cal.b[z];
744
				change++;
745
			}
746
		}
747
		switch(r1->prog->as) {
748
		case ACALL:
749
			for(z=0; z<BITS; z++) {
750
				cal.b[z] |= ref.b[z] | externs.b[z];
751
				ref.b[z] = 0;
752
			}
753
			break;
754
 
755
		case ATEXT:
756
			for(z=0; z<BITS; z++) {
757
				cal.b[z] = 0;
758
				ref.b[z] = 0;
759
			}
760
			break;
761
 
762
		case ARET:
763
			for(z=0; z<BITS; z++) {
764
				cal.b[z] = externs.b[z];
765
				ref.b[z] = 0;
766
			}
767
		}
768
		for(z=0; z<BITS; z++) {
769
			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
770
				r1->use1.b[z] | r1->use2.b[z];
771
			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
772
			r1->refbehind.b[z] = ref.b[z];
773
			r1->calbehind.b[z] = cal.b[z];
774
		}
775
		if(r1->active)
776
			break;
777
		r1->active = 1;
778
	}
779
	for(; r != r1; r = r->p1)
780
		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
781
			prop(r2, r->refbehind, r->calbehind);
782
}
783
 
784
/*
785
 * find looping structure
786
 *
787
 * 1) find reverse postordering
788
 * 2) find approximate dominators,
789
 *	the actual dominators if the flow graph is reducible
790
 *	otherwise, dominators plus some other non-dominators.
791
 *	See Matthew S. Hecht and Jeffrey D. Ullman,
792
 *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
793
 *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
794
 *	Oct. 1-3, 1973, pp.  207-217.
795
 * 3) find all nodes with a predecessor dominated by the current node.
796
 *	such a node is a loop head.
797
 *	recursively, all preds with a greater rpo number are in the loop
798
 */
799
long
800
postorder(Reg *r, Reg **rpo2r, long n)
801
{
802
	Reg *r1;
803
 
804
	r->rpo = 1;
805
	r1 = r->s1;
806
	if(r1 && !r1->rpo)
807
		n = postorder(r1, rpo2r, n);
808
	r1 = r->s2;
809
	if(r1 && !r1->rpo)
810
		n = postorder(r1, rpo2r, n);
811
	rpo2r[n] = r;
812
	n++;
813
	return n;
814
}
815
 
816
long
817
rpolca(long *idom, long rpo1, long rpo2)
818
{
819
	long t;
820
 
821
	if(rpo1 == -1)
822
		return rpo2;
823
	while(rpo1 != rpo2){
824
		if(rpo1 > rpo2){
825
			t = rpo2;
826
			rpo2 = rpo1;
827
			rpo1 = t;
828
		}
829
		while(rpo1 < rpo2){
830
			t = idom[rpo2];
831
			if(t >= rpo2)
832
				fatal(Z, "bad idom");
833
			rpo2 = t;
834
		}
835
	}
836
	return rpo1;
837
}
838
 
839
int
840
doms(long *idom, long r, long s)
841
{
842
	while(s > r)
843
		s = idom[s];
844
	return s == r;
845
}
846
 
847
int
848
loophead(long *idom, Reg *r)
849
{
850
	long src;
851
 
852
	src = r->rpo;
853
	if(r->p1 != R && doms(idom, src, r->p1->rpo))
854
		return 1;
855
	for(r = r->p2; r != R; r = r->p2link)
856
		if(doms(idom, src, r->rpo))
857
			return 1;
858
	return 0;
859
}
860
 
861
void
862
loopmark(Reg **rpo2r, long head, Reg *r)
863
{
864
	if(r->rpo < head || r->active == head)
865
		return;
866
	r->active = head;
867
	r->loop += LOOP;
868
	if(r->p1 != R)
869
		loopmark(rpo2r, head, r->p1);
870
	for(r = r->p2; r != R; r = r->p2link)
871
		loopmark(rpo2r, head, r);
872
}
873
 
874
void
875
loopit(Reg *r, long nr)
876
{
877
	Reg *r1;
878
	long i, d, me;
879
 
880
	if(nr > maxnr) {
881
		rpo2r = alloc(nr * sizeof(Reg*));
882
		idom = alloc(nr * sizeof(long));
883
		maxnr = nr;
884
	}
885
 
886
	d = postorder(r, rpo2r, 0);
887
	if(d > nr)
888
		fatal(Z, "too many reg nodes");
889
	nr = d;
890
	for(i = 0; i < nr / 2; i++){
891
		r1 = rpo2r[i];
892
		rpo2r[i] = rpo2r[nr - 1 - i];
893
		rpo2r[nr - 1 - i] = r1;
894
	}
895
	for(i = 0; i < nr; i++)
896
		rpo2r[i]->rpo = i;
897
 
898
	idom[0] = 0;
899
	for(i = 0; i < nr; i++){
900
		r1 = rpo2r[i];
901
		me = r1->rpo;
902
		d = -1;
903
		if(r1->p1 != R && r1->p1->rpo < me)
904
			d = r1->p1->rpo;
905
		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
906
			if(r1->rpo < me)
907
				d = rpolca(idom, d, r1->rpo);
908
		idom[i] = d;
909
	}
910
 
911
	for(i = 0; i < nr; i++){
912
		r1 = rpo2r[i];
913
		r1->loop++;
914
		if(r1->p2 != R && loophead(idom, r1))
915
			loopmark(rpo2r, i, r1);
916
	}
917
}
918
 
919
void
920
synch(Reg *r, Bits dif)
921
{
922
	Reg *r1;
923
	int z;
924
 
925
	for(r1 = r; r1 != R; r1 = r1->s1) {
926
		for(z=0; z<BITS; z++) {
927
			dif.b[z] = (dif.b[z] &
928
				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
929
					r1->set.b[z] | r1->regdiff.b[z];
930
			if(dif.b[z] != r1->regdiff.b[z]) {
931
				r1->regdiff.b[z] = dif.b[z];
932
				change++;
933
			}
934
		}
935
		if(r1->active)
936
			break;
937
		r1->active = 1;
938
		for(z=0; z<BITS; z++)
939
			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
940
		if(r1->s2 != R)
941
			synch(r1->s2, dif);
942
	}
943
}
944
 
945
ulong
946
allreg(ulong b, Rgn *r)
947
{
948
	Var *v;
949
	int i;
950
 
951
	v = var + r->varno;
952
	r->regno = 0;
953
	switch(v->etype) {
954
 
955
	default:
956
		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
957
		break;
958
 
959
	case TCHAR:
960
	case TUCHAR:
961
	case TSHORT:
962
	case TUSHORT:
963
	case TINT:
964
	case TUINT:
965
	case TLONG:
966
	case TULONG:
967
	case TIND:
968
	case TARRAY:
969
		i = BtoR(~b);
970
		if(i && r->cost > 0) {
971
			r->regno = i;
972
			return RtoB(i);
973
		}
974
		break;
975
 
976
	case TDOUBLE:
977
	case TFLOAT:
978
		break;
979
	}
980
	return 0;
981
}
982
 
983
void
984
paint1(Reg *r, int bn)
985
{
986
	Reg *r1;
987
	Prog *p;
988
	int z;
989
	ulong bb;
990
 
991
	z = bn/32;
992
	bb = 1L<<(bn%32);
993
	if(r->act.b[z] & bb)
994
		return;
995
	for(;;) {
996
		if(!(r->refbehind.b[z] & bb))
997
			break;
998
		r1 = r->p1;
999
		if(r1 == R)
1000
			break;
1001
		if(!(r1->refahead.b[z] & bb))
1002
			break;
1003
		if(r1->act.b[z] & bb)
1004
			break;
1005
		r = r1;
1006
	}
1007
 
1008
	if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
1009
		change -= CLOAD * r->loop;
1010
		if(debug['R'] && debug['v'])
1011
			print("%ld%P\tld %B $%d\n", r->loop,
1012
				r->prog, blsh(bn), change);
1013
	}
1014
	for(;;) {
1015
		r->act.b[z] |= bb;
1016
		p = r->prog;
1017
 
1018
		if(r->use1.b[z] & bb) {
1019
			change += CREF * r->loop;
1020
			if(p->as == AFMOVL || p->as == AFMOVW)
1021
				if(BtoR(bb) != D_F0)
1022
					change = -CINF;
1023
			if(debug['R'] && debug['v'])
1024
				print("%ld%P\tu1 %B $%d\n", r->loop,
1025
					p, blsh(bn), change);
1026
		}
1027
 
1028
		if((r->use2.b[z]|r->set.b[z]) & bb) {
1029
			change += CREF * r->loop;
1030
			if(p->as == AFMOVL || p->as == AFMOVW)
1031
				if(BtoR(bb) != D_F0)
1032
					change = -CINF;
1033
			if(debug['R'] && debug['v'])
1034
				print("%ld%P\tu2 %B $%d\n", r->loop,
1035
					p, blsh(bn), change);
1036
		}
1037
 
1038
		if(STORE(r) & r->regdiff.b[z] & bb) {
1039
			change -= CLOAD * r->loop;
1040
			if(p->as == AFMOVL || p->as == AFMOVW)
1041
				if(BtoR(bb) != D_F0)
1042
					change = -CINF;
1043
			if(debug['R'] && debug['v'])
1044
				print("%ld%P\tst %B $%d\n", r->loop,
1045
					p, blsh(bn), change);
1046
		}
1047
 
1048
		if(r->refbehind.b[z] & bb)
1049
			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1050
				if(r1->refahead.b[z] & bb)
1051
					paint1(r1, bn);
1052
 
1053
		if(!(r->refahead.b[z] & bb))
1054
			break;
1055
		r1 = r->s2;
1056
		if(r1 != R)
1057
			if(r1->refbehind.b[z] & bb)
1058
				paint1(r1, bn);
1059
		r = r->s1;
1060
		if(r == R)
1061
			break;
1062
		if(r->act.b[z] & bb)
1063
			break;
1064
		if(!(r->refbehind.b[z] & bb))
1065
			break;
1066
	}
1067
}
1068
 
1069
ulong
1070
regset(Reg *r, ulong bb)
1071
{
1072
	ulong b, set;
1073
	Adr v;
1074
	int c;
1075
 
1076
	set = 0;
1077
	v = zprog.from;
1078
	while(b = bb & ~(bb-1)) {
1079
		v.type = BtoR(b);
1080
		c = copyu(r->prog, &v, A);
1081
		if(c == 3)
1082
			set |= b;
1083
		bb &= ~b;
1084
	}
1085
	return set;
1086
}
1087
 
1088
ulong
1089
reguse(Reg *r, ulong bb)
1090
{
1091
	ulong b, set;
1092
	Adr v;
1093
	int c;
1094
 
1095
	set = 0;
1096
	v = zprog.from;
1097
	while(b = bb & ~(bb-1)) {
1098
		v.type = BtoR(b);
1099
		c = copyu(r->prog, &v, A);
1100
		if(c == 1 || c == 2 || c == 4)
1101
			set |= b;
1102
		bb &= ~b;
1103
	}
1104
	return set;
1105
}
1106
 
1107
ulong
1108
paint2(Reg *r, int bn)
1109
{
1110
	Reg *r1;
1111
	int z;
1112
	ulong bb, vreg, x;
1113
 
1114
	z = bn/32;
1115
	bb = 1L << (bn%32);
1116
	vreg = regbits;
1117
	if(!(r->act.b[z] & bb))
1118
		return vreg;
1119
	for(;;) {
1120
		if(!(r->refbehind.b[z] & bb))
1121
			break;
1122
		r1 = r->p1;
1123
		if(r1 == R)
1124
			break;
1125
		if(!(r1->refahead.b[z] & bb))
1126
			break;
1127
		if(!(r1->act.b[z] & bb))
1128
			break;
1129
		r = r1;
1130
	}
1131
	for(;;) {
1132
		r->act.b[z] &= ~bb;
1133
 
1134
		vreg |= r->regu;
1135
 
1136
		if(r->refbehind.b[z] & bb)
1137
			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1138
				if(r1->refahead.b[z] & bb)
1139
					vreg |= paint2(r1, bn);
1140
 
1141
		if(!(r->refahead.b[z] & bb))
1142
			break;
1143
		r1 = r->s2;
1144
		if(r1 != R)
1145
			if(r1->refbehind.b[z] & bb)
1146
				vreg |= paint2(r1, bn);
1147
		r = r->s1;
1148
		if(r == R)
1149
			break;
1150
		if(!(r->act.b[z] & bb))
1151
			break;
1152
		if(!(r->refbehind.b[z] & bb))
1153
			break;
1154
	}
1155
 
1156
	bb = vreg;
1157
	for(; r; r=r->s1) {
1158
		x = r->regu & ~bb;
1159
		if(x) {
1160
			vreg |= reguse(r, x);
1161
			bb |= regset(r, x);
1162
		}
1163
	}
1164
	return vreg;
1165
}
1166
 
1167
void
1168
paint3(Reg *r, int bn, long rb, int rn)
1169
{
1170
	Reg *r1;
1171
	Prog *p;
1172
	int z;
1173
	ulong bb;
1174
 
1175
	z = bn/32;
1176
	bb = 1L << (bn%32);
1177
	if(r->act.b[z] & bb)
1178
		return;
1179
	for(;;) {
1180
		if(!(r->refbehind.b[z] & bb))
1181
			break;
1182
		r1 = r->p1;
1183
		if(r1 == R)
1184
			break;
1185
		if(!(r1->refahead.b[z] & bb))
1186
			break;
1187
		if(r1->act.b[z] & bb)
1188
			break;
1189
		r = r1;
1190
	}
1191
 
1192
	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1193
		addmove(r, bn, rn, 0);
1194
	for(;;) {
1195
		r->act.b[z] |= bb;
1196
		p = r->prog;
1197
 
1198
		if(r->use1.b[z] & bb) {
1199
			if(debug['R'])
1200
				print("%P", p);
1201
			addreg(&p->from, rn);
1202
			if(debug['R'])
1203
				print("\t.c%P\n", p);
1204
		}
1205
		if((r->use2.b[z]|r->set.b[z]) & bb) {
1206
			if(debug['R'])
1207
				print("%P", p);
1208
			addreg(&p->to, rn);
1209
			if(debug['R'])
1210
				print("\t.c%P\n", p);
1211
		}
1212
 
1213
		if(STORE(r) & r->regdiff.b[z] & bb)
1214
			addmove(r, bn, rn, 1);
1215
		r->regu |= rb;
1216
 
1217
		if(r->refbehind.b[z] & bb)
1218
			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1219
				if(r1->refahead.b[z] & bb)
1220
					paint3(r1, bn, rb, rn);
1221
 
1222
		if(!(r->refahead.b[z] & bb))
1223
			break;
1224
		r1 = r->s2;
1225
		if(r1 != R)
1226
			if(r1->refbehind.b[z] & bb)
1227
				paint3(r1, bn, rb, rn);
1228
		r = r->s1;
1229
		if(r == R)
1230
			break;
1231
		if(r->act.b[z] & bb)
1232
			break;
1233
		if(!(r->refbehind.b[z] & bb))
1234
			break;
1235
	}
1236
}
1237
 
1238
void
1239
addreg(Adr *a, int rn)
1240
{
1241
 
1242
	a->sym = 0;
1243
	a->offset = 0;
1244
	a->type = rn;
1245
}
1246
 
1247
long
1248
RtoB(int r)
1249
{
1250
 
1251
	if(r < D_AX || r > D_DI)
1252
		return 0;
1253
	return 1L << (r-D_AX);
1254
}
1255
 
1256
int
1257
BtoR(long b)
1258
{
1259
 
1260
	b &= 0xffL;
1261
	if(b == 0)
1262
		return 0;
1263
	return bitno(b) + D_AX;
1264
}