Warning: Attempt to read property "date" on null in /usr/local/www/websvn.planix.org/blame.php on line 247

Warning: Attempt to read property "msg" on null in /usr/local/www/websvn.planix.org/blame.php on line 247
WebSVN – planix.SVN – Blame – /os/branches/feature_unix/sys/src/cmd/vc/reg.c – Rev 2

Subversion Repositories planix.SVN

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include "gc.h"
2
 
3
void	addsplits(void);
4
 
5
Reg*
6
rega(void)
7
{
8
	Reg *r;
9
 
10
	r = freer;
11
	if(r == R) {
12
		r = alloc(sizeof(*r));
13
	} else
14
		freer = r->link;
15
 
16
	*r = zreg;
17
	return r;
18
}
19
 
20
int
21
rcmp(const void *a1, const void *a2)
22
{
23
	Rgn *p1, *p2;
24
	int c1, c2;
25
 
26
	p1 = (Rgn*)a1;
27
	p2 = (Rgn*)a2;
28
	c1 = p2->cost;
29
	c2 = p1->cost;
30
	if(c1 -= c2)
31
		return c1;
32
	return p2->varno - p1->varno;
33
}
34
 
35
void
36
regopt(Prog *p)
37
{
38
	Reg *r, *r1, *r2;
39
	Prog *p1;
40
	int i, z;
41
	long initpc, val, npc;
42
	ulong vreg;
43
	Bits bit;
44
	struct
45
	{
46
		long	m;
47
		long	c;
48
		Reg*	p;
49
	} log5[6], *lp;
50
 
51
	firstr = R;
52
	lastr = R;
53
	nvar = 0;
54
	regbits = 0;
55
	for(z=0; z<BITS; z++) {
56
		externs.b[z] = 0;
57
		params.b[z] = 0;
58
		consts.b[z] = 0;
59
		addrs.b[z] = 0;
60
	}
61
 
62
	/*
63
	 * pass 1
64
	 * build aux data structure
65
	 * allocate pcs
66
	 * find use and set of variables
67
	 */
68
	val = 5L * 5L * 5L * 5L * 5L;
69
	lp = log5;
70
	for(i=0; i<5; i++) {
71
		lp->m = val;
72
		lp->c = 0;
73
		lp->p = R;
74
		val /= 5L;
75
		lp++;
76
	}
77
	val = 0;
78
	for(; p != P; p = p->link) {
79
		switch(p->as) {
80
		case ADATA:
81
		case AGLOBL:
82
		case ANAME:
83
		case ASIGNAME:
84
			continue;
85
		}
86
		r = rega();
87
		if(firstr == R) {
88
			firstr = r;
89
			lastr = r;
90
		} else {
91
			lastr->link = r;
92
			r->p1 = lastr;
93
			lastr->s1 = r;
94
			lastr = r;
95
		}
96
		r->prog = p;
97
		r->pc = val;
98
		val++;
99
 
100
		lp = log5;
101
		for(i=0; i<5; i++) {
102
			lp->c--;
103
			if(lp->c <= 0) {
104
				lp->c = lp->m;
105
				if(lp->p != R)
106
					lp->p->log5 = r;
107
				lp->p = r;
108
				(lp+1)->c = 0;
109
				break;
110
			}
111
			lp++;
112
		}
113
 
114
		r1 = r->p1;
115
		if(r1 != R)
116
		switch(r1->prog->as) {
117
		case ARET:
118
		case AJMP:
119
		case ARFE:
120
			r->p1 = R;
121
			r1->s1 = R;
122
		}
123
 
124
		/*
125
		 * left side always read
126
		 */
127
		bit = mkvar(&p->from, p->as==AMOVW);
128
		for(z=0; z<BITS; z++)
129
			r->use1.b[z] |= bit.b[z];
130
 
131
		/*
132
		 * right side depends on opcode
133
		 */
134
		bit = mkvar(&p->to, 0);
135
		if(bany(&bit))
136
		switch(p->as) {
137
		default:
138
			diag(Z, "reg: unknown asop: %A", p->as);
139
			break;
140
 
141
		/*
142
		 * right side write
143
		 */
144
		case ANOP:
145
		case AMOVB:
146
		case AMOVBU:
147
		case AMOVH:
148
		case AMOVHU:
149
		case AMOVW:
150
		case AMOVF:
151
		case AMOVD:
152
			for(z=0; z<BITS; z++)
153
				r->set.b[z] |= bit.b[z];
154
			break;
155
 
156
		/*
157
		 * funny
158
		 */
159
		case AJAL:
160
			for(z=0; z<BITS; z++)
161
				addrs.b[z] |= bit.b[z];
162
			break;
163
		}
164
	}
165
	if(firstr == R)
166
		return;
167
	initpc = pc - val;
168
	npc = val;
169
 
170
	/*
171
	 * pass 2
172
	 * turn branch references to pointers
173
	 * build back pointers
174
	 */
175
	for(r = firstr; r != R; r = r->link) {
176
		p = r->prog;
177
		if(p->to.type == D_BRANCH) {
178
			val = p->to.offset - initpc;
179
			r1 = firstr;
180
			while(r1 != R) {
181
				r2 = r1->log5;
182
				if(r2 != R && val >= r2->pc) {
183
					r1 = r2;
184
					continue;
185
				}
186
				if(r1->pc == val)
187
					break;
188
				r1 = r1->link;
189
			}
190
			if(r1 == R) {
191
				nearln = p->lineno;
192
				diag(Z, "ref not found\n%P", p);
193
				continue;
194
			}
195
			if(r1 == r) {
196
				nearln = p->lineno;
197
				diag(Z, "ref to self\n%P", p);
198
				continue;
199
			}
200
			r->s2 = r1;
201
			r->p2link = r1->p2;
202
			r1->p2 = r;
203
		}
204
	}
205
	if(debug['R']) {
206
		p = firstr->prog;
207
		print("\n%L %D\n", p->lineno, &p->from);
208
	}
209
 
210
	/*
211
	 * pass 2.5
212
	 * find looping structure
213
	 */
214
	for(r = firstr; r != R; r = r->link)
215
		r->active = 0;
216
	change = 0;
217
	loopit(firstr, npc);
218
 
219
	/*
220
	 * pass 3
221
	 * iterate propagating usage
222
	 * 	back until flow graph is complete
223
	 */
224
loop1:
225
	change = 0;
226
	for(r = firstr; r != R; r = r->link)
227
		r->active = 0;
228
	for(r = firstr; r != R; r = r->link)
229
		if(r->prog->as == ARET)
230
			prop(r, zbits, zbits);
231
loop11:
232
	/* pick up unreachable code */
233
	i = 0;
234
	for(r = firstr; r != R; r = r1) {
235
		r1 = r->link;
236
		if(r1 && r1->active && !r->active) {
237
			prop(r, zbits, zbits);
238
			i = 1;
239
		}
240
	}
241
	if(i)
242
		goto loop11;
243
	if(change)
244
		goto loop1;
245
 
246
 
247
	/*
248
	 * pass 4
249
	 * iterate propagating register/variable synchrony
250
	 * 	forward until graph is complete
251
	 */
252
loop2:
253
	change = 0;
254
	for(r = firstr; r != R; r = r->link)
255
		r->active = 0;
256
	synch(firstr, zbits);
257
	if(change)
258
		goto loop2;
259
 
260
	addsplits();
261
 
262
	if(debug['R'] && debug['v']) {
263
		print("\nprop structure:\n");
264
		for(r = firstr; r != R; r = r->link) {
265
			print("%ld:%P", r->loop, r->prog);
266
			for(z=0; z<BITS; z++)
267
				bit.b[z] = r->set.b[z] |
268
					r->refahead.b[z] | r->calahead.b[z] |
269
					r->refbehind.b[z] | r->calbehind.b[z] |
270
					r->use1.b[z] | r->use2.b[z];
271
			if(bany(&bit)) {
272
				print("\t");
273
				if(bany(&r->use1))
274
					print(" u1=%B", r->use1);
275
				if(bany(&r->use2))
276
					print(" u2=%B", r->use2);
277
				if(bany(&r->set))
278
					print(" st=%B", r->set);
279
				if(bany(&r->refahead))
280
					print(" ra=%B", r->refahead);
281
				if(bany(&r->calahead))
282
					print(" ca=%B", r->calahead);
283
				if(bany(&r->refbehind))
284
					print(" rb=%B", r->refbehind);
285
				if(bany(&r->calbehind))
286
					print(" cb=%B", r->calbehind);
287
				if(bany(&r->regdiff))
288
					print(" rd=%B", r->regdiff);
289
			}
290
			print("\n");
291
		}
292
	}
293
 
294
	/*
295
	 * pass 5
296
	 * isolate regions
297
	 * calculate costs (paint1)
298
	 */
299
	r = firstr;
300
	if(r) {
301
		for(z=0; z<BITS; z++)
302
			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
303
			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
304
		if(bany(&bit)) {
305
			nearln = r->prog->lineno;
306
			warn(Z, "used and not set: %B", bit);
307
			if(debug['R'] && !debug['w'])
308
				print("used and not set: %B\n", bit);
309
		}
310
	}
311
 
312
	for(r = firstr; r != R; r = r->link)
313
		r->act = zbits;
314
	rgp = region;
315
	nregion = 0;
316
	for(r = firstr; r != R; r = r->link) {
317
		for(z=0; z<BITS; z++)
318
			bit.b[z] = r->set.b[z] &
319
			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
320
		if(bany(&bit)) {
321
			nearln = r->prog->lineno;
322
			warn(Z, "set and not used: %B", bit);
323
			if(debug['R'])
324
				print("set and not used: %B\n", bit);
325
			excise(r);
326
		}
327
		for(z=0; z<BITS; z++)
328
			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
329
		while(bany(&bit)) {
330
			i = bnum(bit);
331
			rgp->enter = r;
332
			rgp->varno = i;
333
			change = 0;
334
			if(debug['R'] && debug['v'])
335
				print("\n");
336
			paint1(r, i);
337
			bit.b[i/32] &= ~(1L<<(i%32));
338
			if(change <= 0) {
339
				if(debug['R'])
340
					print("%L $%d: %B\n",
341
						r->prog->lineno, change, blsh(i));
342
				continue;
343
			}
344
			rgp->cost = change;
345
			nregion++;
346
			if(nregion >= NRGN) {
347
				warn(Z, "too many regions");
348
				goto brk;
349
			}
350
			rgp++;
351
		}
352
	}
353
brk:
354
	qsort(region, nregion, sizeof(region[0]), rcmp);
355
 
356
	/*
357
	 * pass 6
358
	 * determine used registers (paint2)
359
	 * replace code (paint3)
360
	 */
361
	rgp = region;
362
	for(i=0; i<nregion; i++) {
363
		bit = blsh(rgp->varno);
364
		vreg = paint2(rgp->enter, rgp->varno);
365
		vreg = allreg(vreg, rgp);
366
		if(debug['R']) {
367
			if(rgp->regno >= NREG)
368
				print("%L $%d F%d: %B\n",
369
					rgp->enter->prog->lineno,
370
					rgp->cost,
371
					rgp->regno-NREG,
372
					bit);
373
			else
374
				print("%L $%d R%d: %B\n",
375
					rgp->enter->prog->lineno,
376
					rgp->cost,
377
					rgp->regno,
378
					bit);
379
		}
380
		if(rgp->regno != 0)
381
			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
382
		rgp++;
383
	}
384
	/*
385
	 * pass 7
386
	 * peep-hole on basic block
387
	 */
388
	if(!debug['R'] || debug['P'])
389
		peep();
390
 
391
	/*
392
	 * pass 8
393
	 * recalculate pc
394
	 */
395
	val = initpc;
396
	for(r = firstr; r != R; r = r1) {
397
		r->pc = val;
398
		p = r->prog;
399
		p1 = P;
400
		r1 = r->link;
401
		if(r1 != R)
402
			p1 = r1->prog;
403
		for(; p != p1; p = p->link) {
404
			switch(p->as) {
405
			default:
406
				val++;
407
				break;
408
 
409
			case ANOP:
410
			case ADATA:
411
			case AGLOBL:
412
			case ANAME:
413
			case ASIGNAME:
414
				break;
415
			}
416
		}
417
	}
418
	pc = val;
419
 
420
	/*
421
	 * fix up branches
422
	 */
423
	if(debug['R'])
424
		if(bany(&addrs))
425
			print("addrs: %B\n", addrs);
426
 
427
	r1 = 0; /* set */
428
	for(r = firstr; r != R; r = r->link) {
429
		p = r->prog;
430
		if(p->to.type == D_BRANCH)
431
			p->to.offset = r->s2->pc;
432
		r1 = r;
433
	}
434
 
435
	/*
436
	 * last pass
437
	 * eliminate nops
438
	 * free aux structures
439
	 */
440
	for(p = firstr->prog; p != P; p = p->link){
441
		while(p->link && p->link->as == ANOP)
442
			p->link = p->link->link;
443
	}
444
	if(r1 != R) {
445
		r1->link = freer;
446
		freer = firstr;
447
	}
448
}
449
 
450
void
451
addsplits(void)
452
{
453
	Reg *r, *r1;
454
	int z, i;
455
	Bits bit;
456
 
457
	for(r = firstr; r != R; r = r->link) {
458
		if(r->loop > 1)
459
			continue;
460
		if(r->prog->as == AJAL)
461
			continue;
462
		for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
463
			if(r1->loop <= 1)
464
				continue;
465
			for(z=0; z<BITS; z++)
466
				bit.b[z] = r1->calbehind.b[z] &
467
					(r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
468
					~(r->calahead.b[z] & addrs.b[z]);
469
			while(bany(&bit)) {
470
				i = bnum(bit);
471
				bit.b[i/32] &= ~(1L << (i%32));
472
			}
473
		}
474
	}
475
}
476
 
477
/*
478
 * add mov b,rn
479
 * just after r
480
 */
481
void
482
addmove(Reg *r, int bn, int rn, int f)
483
{
484
	Prog *p, *p1;
485
	Adr *a;
486
	Var *v;
487
 
488
	p1 = alloc(sizeof(*p1));
489
	*p1 = zprog;
490
	p = r->prog;
491
 
492
	p1->link = p->link;
493
	p->link = p1;
494
	p1->lineno = p->lineno;
495
 
496
	v = var + bn;
497
 
498
	a = &p1->to;
499
	a->sym = v->sym;
500
	a->name = v->name;
501
	a->offset = v->offset;
502
	a->etype = v->etype;
503
	a->type = D_OREG;
504
	if(a->etype == TARRAY || a->sym == S)
505
		a->type = D_CONST;
506
 
507
	p1->as = AMOVW;
508
	if(v->etype == TCHAR || v->etype == TUCHAR)
509
		p1->as = AMOVB;
510
	if(v->etype == TSHORT || v->etype == TUSHORT)
511
		p1->as = AMOVH;
512
	if(v->etype == TFLOAT)
513
		p1->as = AMOVF;
514
	if(v->etype == TDOUBLE)
515
		p1->as = AMOVD;
516
 
517
	p1->from.type = D_REG;
518
	p1->from.reg = rn;
519
	if(rn >= NREG) {
520
		p1->from.type = D_FREG;
521
		p1->from.reg = rn-NREG;
522
	}
523
	if(!f) {
524
		p1->from = *a;
525
		*a = zprog.from;
526
		a->type = D_REG;
527
		a->reg = rn;
528
		if(rn >= NREG) {
529
			a->type = D_FREG;
530
			a->reg = rn-NREG;
531
		}
532
		if(v->etype == TUCHAR)
533
			p1->as = AMOVBU;
534
		if(v->etype == TUSHORT)
535
			p1->as = AMOVHU;
536
	}
537
	if(debug['R'])
538
		print("%P\t.a%P\n", p, p1);
539
}
540
 
541
Bits
542
mkvar(Adr *a, int docon)
543
{
544
	Var *v;
545
	int i, t, n, et, z;
546
	long o;
547
	Bits bit;
548
	Sym *s;
549
 
550
	t = a->type;
551
	if(t == D_REG && a->reg != NREG)
552
		regbits |= RtoB(a->reg);
553
	if(t == D_FREG && a->reg != NREG)
554
		regbits |= FtoB(a->reg);
555
	s = a->sym;
556
	o = a->offset;
557
	et = a->etype;
558
	if(s == S) {
559
		if(t != D_CONST || !docon || a->reg != NREG)
560
			goto none;
561
		et = TLONG;
562
	}
563
	if(t == D_CONST) {
564
		if(s == S && sval(o))
565
			goto none;
566
	}
567
 
568
	n = a->name;
569
	v = var;
570
	for(i=0; i<nvar; i++) {
571
		if(s == v->sym)
572
		if(n == v->name)
573
		if(o == v->offset)
574
			goto out;
575
		v++;
576
	}
577
	if(s)
578
		if(s->name[0] == '.')
579
			goto none;
580
	if(nvar >= NVAR) {
581
		if(debug['w'] > 1 && s)
582
			warn(Z, "variable not optimized: %s", s->name);
583
		goto none;
584
	}
585
	i = nvar;
586
	nvar++;
587
	v = &var[i];
588
	v->sym = s;
589
	v->offset = o;
590
	v->etype = et;
591
	v->name = n;
592
	if(debug['R'])
593
		print("bit=%2d et=%2d %D\n", i, et, a);
594
out:
595
	bit = blsh(i);
596
	if(n == D_EXTERN || n == D_STATIC)
597
		for(z=0; z<BITS; z++)
598
			externs.b[z] |= bit.b[z];
599
	if(n == D_PARAM)
600
		for(z=0; z<BITS; z++)
601
			params.b[z] |= bit.b[z];
602
	if(v->etype != et || !typechlpfd[et])	/* funny punning */
603
		for(z=0; z<BITS; z++)
604
			addrs.b[z] |= bit.b[z];
605
	if(t == D_CONST) {
606
		if(s == S) {
607
			for(z=0; z<BITS; z++)
608
				consts.b[z] |= bit.b[z];
609
			return bit;
610
		}
611
		if(et != TARRAY)
612
			for(z=0; z<BITS; z++)
613
				addrs.b[z] |= bit.b[z];
614
		for(z=0; z<BITS; z++)
615
			params.b[z] |= bit.b[z];
616
		return bit;
617
	}
618
	if(t == D_OREG)
619
		return bit;
620
 
621
none:
622
	return zbits;
623
}
624
 
625
void
626
prop(Reg *r, Bits ref, Bits cal)
627
{
628
	Reg *r1, *r2;
629
	int z;
630
 
631
	for(r1 = r; r1 != R; r1 = r1->p1) {
632
		for(z=0; z<BITS; z++) {
633
			ref.b[z] |= r1->refahead.b[z];
634
			if(ref.b[z] != r1->refahead.b[z]) {
635
				r1->refahead.b[z] = ref.b[z];
636
				change++;
637
			}
638
			cal.b[z] |= r1->calahead.b[z];
639
			if(cal.b[z] != r1->calahead.b[z]) {
640
				r1->calahead.b[z] = cal.b[z];
641
				change++;
642
			}
643
		}
644
		switch(r1->prog->as) {
645
		case AJAL:
646
			for(z=0; z<BITS; z++) {
647
				cal.b[z] |= ref.b[z] | externs.b[z];
648
				ref.b[z] = 0;
649
			}
650
			break;
651
 
652
		case ATEXT:
653
			for(z=0; z<BITS; z++) {
654
				cal.b[z] = 0;
655
				ref.b[z] = 0;
656
			}
657
			break;
658
 
659
		case ARET:
660
			for(z=0; z<BITS; z++) {
661
				cal.b[z] = externs.b[z];
662
				ref.b[z] = 0;
663
			}
664
		}
665
		for(z=0; z<BITS; z++) {
666
			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
667
				r1->use1.b[z] | r1->use2.b[z];
668
			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
669
			r1->refbehind.b[z] = ref.b[z];
670
			r1->calbehind.b[z] = cal.b[z];
671
		}
672
		if(r1->active)
673
			break;
674
		r1->active = 1;
675
	}
676
	for(; r != r1; r = r->p1)
677
		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
678
			prop(r2, r->refbehind, r->calbehind);
679
}
680
 
681
/*
682
 * find looping structure
683
 *
684
 * 1) find reverse postordering
685
 * 2) find approximate dominators,
686
 *	the actual dominators if the flow graph is reducible
687
 *	otherwise, dominators plus some other non-dominators.
688
 *	See Matthew S. Hecht and Jeffrey D. Ullman,
689
 *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
690
 *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
691
 *	Oct. 1-3, 1973, pp.  207-217.
692
 * 3) find all nodes with a predecessor dominated by the current node.
693
 *	such a node is a loop head.
694
 *	recursively, all preds with a greater rpo number are in the loop
695
 */
696
long
697
postorder(Reg *r, Reg **rpo2r, long n)
698
{
699
	Reg *r1;
700
 
701
	r->rpo = 1;
702
	r1 = r->s1;
703
	if(r1 && !r1->rpo)
704
		n = postorder(r1, rpo2r, n);
705
	r1 = r->s2;
706
	if(r1 && !r1->rpo)
707
		n = postorder(r1, rpo2r, n);
708
	rpo2r[n] = r;
709
	n++;
710
	return n;
711
}
712
 
713
long
714
rpolca(long *idom, long rpo1, long rpo2)
715
{
716
	long t;
717
 
718
	if(rpo1 == -1)
719
		return rpo2;
720
	while(rpo1 != rpo2){
721
		if(rpo1 > rpo2){
722
			t = rpo2;
723
			rpo2 = rpo1;
724
			rpo1 = t;
725
		}
726
		while(rpo1 < rpo2){
727
			t = idom[rpo2];
728
			if(t >= rpo2)
729
				fatal(Z, "bad idom");
730
			rpo2 = t;
731
		}
732
	}
733
	return rpo1;
734
}
735
 
736
int
737
doms(long *idom, long r, long s)
738
{
739
	while(s > r)
740
		s = idom[s];
741
	return s == r;
742
}
743
 
744
int
745
loophead(long *idom, Reg *r)
746
{
747
	long src;
748
 
749
	src = r->rpo;
750
	if(r->p1 != R && doms(idom, src, r->p1->rpo))
751
		return 1;
752
	for(r = r->p2; r != R; r = r->p2link)
753
		if(doms(idom, src, r->rpo))
754
			return 1;
755
	return 0;
756
}
757
 
758
void
759
loopmark(Reg **rpo2r, long head, Reg *r)
760
{
761
	if(r->rpo < head || r->active == head)
762
		return;
763
	r->active = head;
764
	r->loop += LOOP;
765
	if(r->p1 != R)
766
		loopmark(rpo2r, head, r->p1);
767
	for(r = r->p2; r != R; r = r->p2link)
768
		loopmark(rpo2r, head, r);
769
}
770
 
771
void
772
loopit(Reg *r, long nr)
773
{
774
	Reg *r1;
775
	long i, d, me;
776
 
777
	if(nr > maxnr) {
778
		rpo2r = alloc(nr * sizeof(Reg*));
779
		idom = alloc(nr * sizeof(long));
780
		maxnr = nr;
781
	}
782
 
783
	d = postorder(r, rpo2r, 0);
784
	if(d > nr)
785
		fatal(Z, "too many reg nodes");
786
	nr = d;
787
	for(i = 0; i < nr / 2; i++){
788
		r1 = rpo2r[i];
789
		rpo2r[i] = rpo2r[nr - 1 - i];
790
		rpo2r[nr - 1 - i] = r1;
791
	}
792
	for(i = 0; i < nr; i++)
793
		rpo2r[i]->rpo = i;
794
 
795
	idom[0] = 0;
796
	for(i = 0; i < nr; i++){
797
		r1 = rpo2r[i];
798
		me = r1->rpo;
799
		d = -1;
800
		if(r1->p1 != R && r1->p1->rpo < me)
801
			d = r1->p1->rpo;
802
		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
803
			if(r1->rpo < me)
804
				d = rpolca(idom, d, r1->rpo);
805
		idom[i] = d;
806
	}
807
 
808
	for(i = 0; i < nr; i++){
809
		r1 = rpo2r[i];
810
		r1->loop++;
811
		if(r1->p2 != R && loophead(idom, r1))
812
			loopmark(rpo2r, i, r1);
813
	}
814
}
815
 
816
void
817
synch(Reg *r, Bits dif)
818
{
819
	Reg *r1;
820
	int z;
821
 
822
	for(r1 = r; r1 != R; r1 = r1->s1) {
823
		for(z=0; z<BITS; z++) {
824
			dif.b[z] = (dif.b[z] &
825
				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
826
					r1->set.b[z] | r1->regdiff.b[z];
827
			if(dif.b[z] != r1->regdiff.b[z]) {
828
				r1->regdiff.b[z] = dif.b[z];
829
				change++;
830
			}
831
		}
832
		if(r1->active)
833
			break;
834
		r1->active = 1;
835
		for(z=0; z<BITS; z++)
836
			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
837
		if(r1->s2 != R)
838
			synch(r1->s2, dif);
839
	}
840
}
841
 
842
ulong
843
allreg(ulong b, Rgn *r)
844
{
845
	Var *v;
846
	int i;
847
 
848
	v = var + r->varno;
849
	r->regno = 0;
850
	switch(v->etype) {
851
 
852
	default:
853
		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
854
		break;
855
 
856
	case TCHAR:
857
	case TUCHAR:
858
	case TSHORT:
859
	case TUSHORT:
860
	case TINT:
861
	case TUINT:
862
	case TLONG:
863
	case TULONG:
864
	case TIND:
865
	case TARRAY:
866
		i = BtoR(~b);
867
		if(i && r->cost >= 0) {
868
			r->regno = i;
869
			return RtoB(i);
870
		}
871
		break;
872
 
873
	case TDOUBLE:
874
	case TFLOAT:
875
		i = BtoF(~b);
876
		if(i && r->cost >= 0) {
877
			r->regno = i+NREG;
878
			return FtoB(i);
879
		}
880
		break;
881
	}
882
	return 0;
883
}
884
 
885
void
886
paint1(Reg *r, int bn)
887
{
888
	Reg *r1;
889
	Prog *p;
890
	int z;
891
	ulong bb;
892
 
893
	z = bn/32;
894
	bb = 1L<<(bn%32);
895
	if(r->act.b[z] & bb)
896
		return;
897
	for(;;) {
898
		if(!(r->refbehind.b[z] & bb))
899
			break;
900
		r1 = r->p1;
901
		if(r1 == R)
902
			break;
903
		if(!(r1->refahead.b[z] & bb))
904
			break;
905
		if(r1->act.b[z] & bb)
906
			break;
907
		r = r1;
908
	}
909
 
910
	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
911
		change -= CLOAD * r->loop;
912
		if(debug['R'] && debug['v'])
913
			print("%ld%P\tld %B $%d\n", r->loop,
914
				r->prog, blsh(bn), change);
915
	}
916
	for(;;) {
917
		r->act.b[z] |= bb;
918
		p = r->prog;
919
 
920
		if(r->use1.b[z] & bb) {
921
			change += CREF * r->loop;
922
			if(debug['R'] && debug['v'])
923
				print("%ld%P\tu1 %B $%d\n", r->loop,
924
					p, blsh(bn), change);
925
		}
926
 
927
		if((r->use2.b[z]|r->set.b[z]) & bb) {
928
			change += CREF * r->loop;
929
			if(debug['R'] && debug['v'])
930
				print("%ld%P\tu2 %B $%d\n", r->loop,
931
					p, blsh(bn), change);
932
		}
933
 
934
		if(STORE(r) & r->regdiff.b[z] & bb) {
935
			change -= CLOAD * r->loop;
936
			if(debug['R'] && debug['v'])
937
				print("%ld%P\tst %B $%d\n", r->loop,
938
					p, blsh(bn), change);
939
		}
940
 
941
		if(r->refbehind.b[z] & bb)
942
			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
943
				if(r1->refahead.b[z] & bb)
944
					paint1(r1, bn);
945
 
946
		if(!(r->refahead.b[z] & bb))
947
			break;
948
		r1 = r->s2;
949
		if(r1 != R)
950
			if(r1->refbehind.b[z] & bb)
951
				paint1(r1, bn);
952
		r = r->s1;
953
		if(r == R)
954
			break;
955
		if(r->act.b[z] & bb)
956
			break;
957
		if(!(r->refbehind.b[z] & bb))
958
			break;
959
	}
960
}
961
 
962
ulong
963
paint2(Reg *r, int bn)
964
{
965
	Reg *r1;
966
	int z;
967
	ulong bb, vreg;
968
 
969
	z = bn/32;
970
	bb = 1L << (bn%32);
971
	vreg = regbits;
972
	if(!(r->act.b[z] & bb))
973
		return vreg;
974
	for(;;) {
975
		if(!(r->refbehind.b[z] & bb))
976
			break;
977
		r1 = r->p1;
978
		if(r1 == R)
979
			break;
980
		if(!(r1->refahead.b[z] & bb))
981
			break;
982
		if(!(r1->act.b[z] & bb))
983
			break;
984
		r = r1;
985
	}
986
	for(;;) {
987
		r->act.b[z] &= ~bb;
988
 
989
		vreg |= r->regu;
990
 
991
		if(r->refbehind.b[z] & bb)
992
			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
993
				if(r1->refahead.b[z] & bb)
994
					vreg |= paint2(r1, bn);
995
 
996
		if(!(r->refahead.b[z] & bb))
997
			break;
998
		r1 = r->s2;
999
		if(r1 != R)
1000
			if(r1->refbehind.b[z] & bb)
1001
				vreg |= paint2(r1, bn);
1002
		r = r->s1;
1003
		if(r == R)
1004
			break;
1005
		if(!(r->act.b[z] & bb))
1006
			break;
1007
		if(!(r->refbehind.b[z] & bb))
1008
			break;
1009
	}
1010
	return vreg;
1011
}
1012
 
1013
void
1014
paint3(Reg *r, int bn, long rb, int rn)
1015
{
1016
	Reg *r1;
1017
	Prog *p;
1018
	int z;
1019
	ulong bb;
1020
 
1021
	z = bn/32;
1022
	bb = 1L << (bn%32);
1023
	if(r->act.b[z] & bb)
1024
		return;
1025
	for(;;) {
1026
		if(!(r->refbehind.b[z] & bb))
1027
			break;
1028
		r1 = r->p1;
1029
		if(r1 == R)
1030
			break;
1031
		if(!(r1->refahead.b[z] & bb))
1032
			break;
1033
		if(r1->act.b[z] & bb)
1034
			break;
1035
		r = r1;
1036
	}
1037
 
1038
	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1039
		addmove(r, bn, rn, 0);
1040
	for(;;) {
1041
		r->act.b[z] |= bb;
1042
		p = r->prog;
1043
 
1044
		if(r->use1.b[z] & bb) {
1045
			if(debug['R'])
1046
				print("%P", p);
1047
			addreg(&p->from, rn);
1048
			if(debug['R'])
1049
				print("\t.c%P\n", p);
1050
		}
1051
		if((r->use2.b[z]|r->set.b[z]) & bb) {
1052
			if(debug['R'])
1053
				print("%P", p);
1054
			addreg(&p->to, rn);
1055
			if(debug['R'])
1056
				print("\t.c%P\n", p);
1057
		}
1058
 
1059
		if(STORE(r) & r->regdiff.b[z] & bb)
1060
			addmove(r, bn, rn, 1);
1061
		r->regu |= rb;
1062
 
1063
		if(r->refbehind.b[z] & bb)
1064
			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1065
				if(r1->refahead.b[z] & bb)
1066
					paint3(r1, bn, rb, rn);
1067
 
1068
		if(!(r->refahead.b[z] & bb))
1069
			break;
1070
		r1 = r->s2;
1071
		if(r1 != R)
1072
			if(r1->refbehind.b[z] & bb)
1073
				paint3(r1, bn, rb, rn);
1074
		r = r->s1;
1075
		if(r == R)
1076
			break;
1077
		if(r->act.b[z] & bb)
1078
			break;
1079
		if(!(r->refbehind.b[z] & bb))
1080
			break;
1081
	}
1082
}
1083
 
1084
void
1085
addreg(Adr *a, int rn)
1086
{
1087
 
1088
	a->sym = 0;
1089
	a->name = D_NONE;
1090
	a->type = D_REG;
1091
	a->reg = rn;
1092
	if(rn >= NREG) {
1093
		a->type = D_FREG;
1094
		a->reg = rn-NREG;
1095
	}
1096
}
1097
 
1098
/*
1099
 *	bit	reg
1100
 *	0	R3
1101
 *	1	R4
1102
 *	...	...
1103
 *	19	R22
1104
 *	20	R23
1105
 */
1106
long
1107
RtoB(int r)
1108
{
1109
 
1110
	if(r < 3 || r > 23)
1111
		return 0;
1112
	return 1L << (r-3);
1113
}
1114
 
1115
BtoR(long b)
1116
{
1117
 
1118
	b &= 0x001fffffL;
1119
	if(b == 0)
1120
		return 0;
1121
	return bitno(b) + 3;
1122
}
1123
 
1124
/*
1125
 *	bit	reg
1126
 *	22	F4
1127
 *	23	F6
1128
 *	...	...
1129
 *	31	F22
1130
 */
1131
long
1132
FtoB(int f)
1133
{
1134
 
1135
	if(f < 4 || f > 22 || (f&1))
1136
		return 0;
1137
	return 1L << (f/2 + 20);
1138
}
1139
 
1140
int
1141
BtoF(long b)
1142
{
1143
 
1144
	b &= 0xffc00000L;
1145
	if(b == 0)
1146
		return 0;
1147
	return bitno(b)*2 - 40;
1148
}