Warning: Attempt to read property "date" on null in /usr/local/www/websvn.planix.org/blame.php on line 247

Warning: Attempt to read property "msg" on null in /usr/local/www/websvn.planix.org/blame.php on line 247
WebSVN – planix.SVN – Blame – /os/branches/feature_tlsv12/sys/src/cmd/qc/reg.c – Rev 2

Subversion Repositories planix.SVN

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include "gc.h"
2
 
3
Reg*
4
rega(void)
5
{
6
	Reg *r;
7
 
8
	r = freer;
9
	if(r == R) {
10
		r = alloc(sizeof(*r));
11
	} else
12
		freer = r->link;
13
 
14
	*r = zreg;
15
	return r;
16
}
17
 
18
int
19
rcmp(void *a1, void *a2)
20
{
21
	Rgn *p1, *p2;
22
	int c1, c2;
23
 
24
	p1 = a1;
25
	p2 = a2;
26
	c1 = p2->cost;
27
	c2 = p1->cost;
28
	if(c1 -= c2)
29
		return c1;
30
	return p2->varno - p1->varno;
31
}
32
 
33
void
34
regopt(Prog *p)
35
{
36
	Reg *r, *r1, *r2;
37
	Prog *p1;
38
	int i, z;
39
	long initpc, val, npc;
40
	ulong vreg;
41
	Bits bit;
42
	struct
43
	{
44
		long	m;
45
		long	c;
46
		Reg*	p;
47
	} log5[6], *lp;
48
 
49
	firstr = R;
50
	lastr = R;
51
	nvar = 0;
52
	regbits = 0;
53
	for(z=0; z<BITS; z++) {
54
		externs.b[z] = 0;
55
		params.b[z] = 0;
56
		consts.b[z] = 0;
57
		addrs.b[z] = 0;
58
	}
59
 
60
	/*
61
	 * pass 1
62
	 * build aux data structure
63
	 * allocate pcs
64
	 * find use and set of variables
65
	 */
66
	val = 5L * 5L * 5L * 5L * 5L;
67
	lp = log5;
68
	for(i=0; i<5; i++) {
69
		lp->m = val;
70
		lp->c = 0;
71
		lp->p = R;
72
		val /= 5L;
73
		lp++;
74
	}
75
	val = 0;
76
	for(; p != P; p = p->link) {
77
		switch(p->as) {
78
		case ADATA:
79
		case AGLOBL:
80
		case ANAME:
81
		case ASIGNAME:
82
			continue;
83
		}
84
		r = rega();
85
		if(firstr == R) {
86
			firstr = r;
87
			lastr = r;
88
		} else {
89
			lastr->link = r;
90
			r->p1 = lastr;
91
			lastr->s1 = r;
92
			lastr = r;
93
		}
94
		r->prog = p;
95
		r->pc = val;
96
		val++;
97
 
98
		lp = log5;
99
		for(i=0; i<5; i++) {
100
			lp->c--;
101
			if(lp->c <= 0) {
102
				lp->c = lp->m;
103
				if(lp->p != R)
104
					lp->p->log5 = r;
105
				lp->p = r;
106
				(lp+1)->c = 0;
107
				break;
108
			}
109
			lp++;
110
		}
111
 
112
		r1 = r->p1;
113
		if(r1 != R)
114
		switch(r1->prog->as) {
115
		case ARETURN:
116
		case ABR:
117
		case ARFI:
118
			r->p1 = R;
119
			r1->s1 = R;
120
		}
121
 
122
		/*
123
		 * left side always read
124
		 */
125
		bit = mkvar(&p->from, p->as==AMOVW);
126
		for(z=0; z<BITS; z++)
127
			r->use1.b[z] |= bit.b[z];
128
 
129
		/*
130
		 * right side depends on opcode
131
		 */
132
		bit = mkvar(&p->to, 0);
133
		if(bany(&bit))
134
		switch(p->as) {
135
		default:
136
			diag(Z, "reg: unknown asop: %A", p->as);
137
			break;
138
 
139
		/*
140
		 * right side write
141
		 */
142
		case ANOP:
143
		case AMOVB:
144
		case AMOVBU:
145
		case AMOVBZ:
146
		case AMOVBZU:
147
		case AMOVH:
148
		case AMOVHBR:
149
		case AMOVHU:
150
		case AMOVHZ:
151
		case AMOVHZU:
152
		case AMOVW:
153
		case AMOVWU:
154
		case AFMOVD:
155
		case AFMOVDCC:
156
		case AFMOVDU:
157
		case AFMOVS:
158
		case AFMOVSU:
159
		case AFRSP:
160
			for(z=0; z<BITS; z++)
161
				r->set.b[z] |= bit.b[z];
162
			break;
163
 
164
		/*
165
		 * funny
166
		 */
167
		case ABL:
168
			for(z=0; z<BITS; z++)
169
				addrs.b[z] |= bit.b[z];
170
			break;
171
		}
172
	}
173
	if(firstr == R)
174
		return;
175
	initpc = pc - val;
176
	npc = val;
177
 
178
	/*
179
	 * pass 2
180
	 * turn branch references to pointers
181
	 * build back pointers
182
	 */
183
	for(r = firstr; r != R; r = r->link) {
184
		p = r->prog;
185
		if(p->to.type == D_BRANCH) {
186
			val = p->to.offset - initpc;
187
			r1 = firstr;
188
			while(r1 != R) {
189
				r2 = r1->log5;
190
				if(r2 != R && val >= r2->pc) {
191
					r1 = r2;
192
					continue;
193
				}
194
				if(r1->pc == val)
195
					break;
196
				r1 = r1->link;
197
			}
198
			if(r1 == R) {
199
				nearln = p->lineno;
200
				diag(Z, "ref not found\n%P", p);
201
				continue;
202
			}
203
			if(r1 == r) {
204
				nearln = p->lineno;
205
				diag(Z, "ref to self\n%P", p);
206
				continue;
207
			}
208
			r->s2 = r1;
209
			r->p2link = r1->p2;
210
			r1->p2 = r;
211
		}
212
	}
213
	if(debug['R']) {
214
		p = firstr->prog;
215
		print("\n%L %D\n", p->lineno, &p->from);
216
	}
217
 
218
	/*
219
	 * pass 2.5
220
	 * find looping structure
221
	 */
222
	for(r = firstr; r != R; r = r->link)
223
		r->active = 0;
224
	change = 0;
225
	loopit(firstr, npc);
226
	if(debug['R'] && debug['v']) {
227
		print("\nlooping structure:\n");
228
		for(r = firstr; r != R; r = r->link) {
229
			print("%ld:%P", r->loop, r->prog);
230
			for(z=0; z<BITS; z++)
231
				bit.b[z] = r->use1.b[z] |
232
					r->use2.b[z] | r->set.b[z];
233
			if(bany(&bit)) {
234
				print("\t");
235
				if(bany(&r->use1))
236
					print(" u1=%B", r->use1);
237
				if(bany(&r->use2))
238
					print(" u2=%B", r->use2);
239
				if(bany(&r->set))
240
					print(" st=%B", r->set);
241
			}
242
			print("\n");
243
		}
244
	}
245
 
246
	/*
247
	 * pass 3
248
	 * iterate propagating usage
249
	 * 	back until flow graph is complete
250
	 */
251
loop1:
252
	change = 0;
253
	for(r = firstr; r != R; r = r->link)
254
		r->active = 0;
255
	for(r = firstr; r != R; r = r->link)
256
		if(r->prog->as == ARETURN)
257
			prop(r, zbits, zbits);
258
loop11:
259
	/* pick up unreachable code */
260
	i = 0;
261
	for(r = firstr; r != R; r = r1) {
262
		r1 = r->link;
263
		if(r1 && r1->active && !r->active) {
264
			prop(r, zbits, zbits);
265
			i = 1;
266
		}
267
	}
268
	if(i)
269
		goto loop11;
270
	if(change)
271
		goto loop1;
272
 
273
 
274
	/*
275
	 * pass 4
276
	 * iterate propagating register/variable synchrony
277
	 * 	forward until graph is complete
278
	 */
279
loop2:
280
	change = 0;
281
	for(r = firstr; r != R; r = r->link)
282
		r->active = 0;
283
	synch(firstr, zbits);
284
	if(change)
285
		goto loop2;
286
 
287
 
288
	/*
289
	 * pass 5
290
	 * isolate regions
291
	 * calculate costs (paint1)
292
	 */
293
	r = firstr;
294
	if(r) {
295
		for(z=0; z<BITS; z++)
296
			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
297
			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
298
		if(bany(&bit)) {
299
			nearln = r->prog->lineno;
300
			warn(Z, "used and not set: %B", bit);
301
			if(debug['R'] && !debug['w'])
302
				print("used and not set: %B\n", bit);
303
		}
304
	}
305
	if(debug['R'] && debug['v'])
306
		print("\nprop structure:\n");
307
	for(r = firstr; r != R; r = r->link)
308
		r->act = zbits;
309
	rgp = region;
310
	nregion = 0;
311
	for(r = firstr; r != R; r = r->link) {
312
		if(debug['R'] && debug['v'])
313
			print("%P\n	set = %B; rah = %B; cal = %B\n",
314
				r->prog, r->set, r->refahead, r->calahead);
315
		for(z=0; z<BITS; z++)
316
			bit.b[z] = r->set.b[z] &
317
			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
318
		if(bany(&bit)) {
319
			nearln = r->prog->lineno;
320
			warn(Z, "set and not used: %B", bit);
321
			if(debug['R'])
322
				print("set an not used: %B\n", bit);
323
			excise(r);
324
		}
325
		for(z=0; z<BITS; z++)
326
			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
327
		while(bany(&bit)) {
328
			i = bnum(bit);
329
			rgp->enter = r;
330
			rgp->varno = i;
331
			change = 0;
332
			if(debug['R'] && debug['v'])
333
				print("\n");
334
			paint1(r, i);
335
			bit.b[i/32] &= ~(1L<<(i%32));
336
			if(change <= 0) {
337
				if(debug['R'])
338
					print("%L$%d: %B\n",
339
						r->prog->lineno, change, blsh(i));
340
				continue;
341
			}
342
			rgp->cost = change;
343
			nregion++;
344
			if(nregion >= NRGN) {
345
				warn(Z, "too many regions");
346
				goto brk;
347
			}
348
			rgp++;
349
		}
350
	}
351
brk:
352
	qsort(region, nregion, sizeof(region[0]), rcmp);
353
 
354
	/*
355
	 * pass 6
356
	 * determine used registers (paint2)
357
	 * replace code (paint3)
358
	 */
359
	rgp = region;
360
	for(i=0; i<nregion; i++) {
361
		bit = blsh(rgp->varno);
362
		vreg = paint2(rgp->enter, rgp->varno);
363
		vreg = allreg(vreg, rgp);
364
		if(debug['R']) {
365
			if(rgp->regno >= NREG)
366
				print("%L$%d F%d: %B\n",
367
					rgp->enter->prog->lineno,
368
					rgp->cost,
369
					rgp->regno-NREG,
370
					bit);
371
			else
372
				print("%L$%d R%d: %B\n",
373
					rgp->enter->prog->lineno,
374
					rgp->cost,
375
					rgp->regno,
376
					bit);
377
		}
378
		if(rgp->regno != 0)
379
			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
380
		rgp++;
381
	}
382
	/*
383
	 * pass 7
384
	 * peep-hole on basic block
385
	 */
386
	if(!debug['R'] || debug['P'])
387
		peep();
388
 
389
	/*
390
	 * pass 8
391
	 * recalculate pc
392
	 */
393
	val = initpc;
394
	for(r = firstr; r != R; r = r1) {
395
		r->pc = val;
396
		p = r->prog;
397
		p1 = P;
398
		r1 = r->link;
399
		if(r1 != R)
400
			p1 = r1->prog;
401
		for(; p != p1; p = p->link) {
402
			switch(p->as) {
403
			default:
404
				val++;
405
				break;
406
 
407
			case ANOP:
408
			case ADATA:
409
			case AGLOBL:
410
			case ANAME:
411
			case ASIGNAME:
412
				break;
413
			}
414
		}
415
	}
416
	pc = val;
417
 
418
	/*
419
	 * fix up branches
420
	 */
421
	if(debug['R'])
422
		if(bany(&addrs))
423
			print("addrs: %B\n", addrs);
424
 
425
	r1 = 0; /* set */
426
	for(r = firstr; r != R; r = r->link) {
427
		p = r->prog;
428
		if(p->to.type == D_BRANCH)
429
			p->to.offset = r->s2->pc;
430
		r1 = r;
431
	}
432
 
433
	/*
434
	 * last pass
435
	 * eliminate nops
436
	 * free aux structures
437
	 */
438
	for(p = firstr->prog; p != P; p = p->link){
439
		while(p->link && p->link->as == ANOP)
440
			p->link = p->link->link;
441
	}
442
	if(r1 != R) {
443
		r1->link = freer;
444
		freer = firstr;
445
	}
446
}
447
 
448
/*
449
 * add mov b,rn
450
 * just after r
451
 */
452
void
453
addmove(Reg *r, int bn, int rn, int f)
454
{
455
	Prog *p, *p1;
456
	Adr *a;
457
	Var *v;
458
 
459
	p1 = alloc(sizeof(*p1));
460
	*p1 = zprog;
461
	p = r->prog;
462
 
463
	p1->link = p->link;
464
	p->link = p1;
465
	p1->lineno = p->lineno;
466
 
467
	v = var + bn;
468
 
469
	a = &p1->to;
470
	a->sym = v->sym;
471
	a->name = v->name;
472
	a->offset = v->offset;
473
	a->etype = v->etype;
474
	a->type = D_OREG;
475
	if(a->etype == TARRAY || a->sym == S)
476
		a->type = D_CONST;
477
 
478
	p1->as = AMOVW;
479
	if(v->etype == TCHAR || v->etype == TUCHAR)
480
		p1->as = AMOVB;
481
	if(v->etype == TSHORT || v->etype == TUSHORT)
482
		p1->as = AMOVH;
483
	if(v->etype == TFLOAT)
484
		p1->as = AFMOVS;
485
	if(v->etype == TDOUBLE)
486
		p1->as = AFMOVD;
487
 
488
	p1->from.type = D_REG;
489
	p1->from.reg = rn;
490
	if(rn >= NREG) {
491
		p1->from.type = D_FREG;
492
		p1->from.reg = rn-NREG;
493
	}
494
	if(!f) {
495
		p1->from = *a;
496
		*a = zprog.from;
497
		a->type = D_REG;
498
		a->reg = rn;
499
		if(rn >= NREG) {
500
			a->type = D_FREG;
501
			a->reg = rn-NREG;
502
		}
503
		if(v->etype == TUCHAR)
504
			p1->as = AMOVBZ;
505
		if(v->etype == TUSHORT)
506
			p1->as = AMOVHZ;
507
	}
508
	if(debug['R'])
509
		print("%P\t.a%P\n", p, p1);
510
}
511
 
512
Bits
513
mkvar(Adr *a, int docon)
514
{
515
	Var *v;
516
	int i, t, n, et, z;
517
	long o;
518
	Bits bit;
519
	Sym *s;
520
 
521
	t = a->type;
522
	if(t == D_REG && a->reg != NREG)
523
		regbits |= RtoB(a->reg);
524
	if(t == D_FREG && a->reg != NREG)
525
		regbits |= FtoB(a->reg);
526
	s = a->sym;
527
	o = a->offset;
528
	et = a->etype;
529
	if(s == S) {
530
		if(t != D_CONST || !docon || a->reg != NREG)
531
			goto none;
532
		et = TLONG;
533
	}
534
	if(t == D_CONST) {
535
		if(s == S && sval(o))
536
			goto none;
537
	}
538
	n = a->name;
539
	v = var;
540
	for(i=0; i<nvar; i++) {
541
		if(s == v->sym)
542
		if(n == v->name)
543
		if(o == v->offset)
544
			goto out;
545
		v++;
546
	}
547
	if(s)
548
		if(s->name[0] == '.')
549
			goto none;
550
	if(nvar >= NVAR) {
551
		if(debug['w'] > 1 && s)
552
			warn(Z, "variable not optimized: %s", s->name);
553
		goto none;
554
	}
555
	i = nvar;
556
	nvar++;
557
	v = &var[i];
558
	v->sym = s;
559
	v->offset = o;
560
	v->etype = et;
561
	v->name = n;
562
	if(debug['R'])
563
		print("bit=%2d et=%2d %D\n", i, et, a);
564
out:
565
	bit = blsh(i);
566
	if(n == D_EXTERN || n == D_STATIC)
567
		for(z=0; z<BITS; z++)
568
			externs.b[z] |= bit.b[z];
569
	if(n == D_PARAM)
570
		for(z=0; z<BITS; z++)
571
			params.b[z] |= bit.b[z];
572
	if(v->etype != et || !typechlpfd[et])	/* funny punning */
573
		for(z=0; z<BITS; z++)
574
			addrs.b[z] |= bit.b[z];
575
	if(t == D_CONST) {
576
		if(s == S) {
577
			for(z=0; z<BITS; z++)
578
				consts.b[z] |= bit.b[z];
579
			return bit;
580
		}
581
		if(et != TARRAY)
582
			for(z=0; z<BITS; z++)
583
				addrs.b[z] |= bit.b[z];
584
		for(z=0; z<BITS; z++)
585
			params.b[z] |= bit.b[z];
586
		return bit;
587
	}
588
	if(t == D_OREG)
589
		return bit;
590
 
591
none:
592
	return zbits;
593
}
594
 
595
void
596
prop(Reg *r, Bits ref, Bits cal)
597
{
598
	Reg *r1, *r2;
599
	int z;
600
 
601
	for(r1 = r; r1 != R; r1 = r1->p1) {
602
		for(z=0; z<BITS; z++) {
603
			ref.b[z] |= r1->refahead.b[z];
604
			if(ref.b[z] != r1->refahead.b[z]) {
605
				r1->refahead.b[z] = ref.b[z];
606
				change++;
607
			}
608
			cal.b[z] |= r1->calahead.b[z];
609
			if(cal.b[z] != r1->calahead.b[z]) {
610
				r1->calahead.b[z] = cal.b[z];
611
				change++;
612
			}
613
		}
614
		switch(r1->prog->as) {
615
		case ABL:
616
			for(z=0; z<BITS; z++) {
617
				cal.b[z] |= ref.b[z] | externs.b[z];
618
				ref.b[z] = 0;
619
			}
620
			break;
621
 
622
		case ATEXT:
623
			for(z=0; z<BITS; z++) {
624
				cal.b[z] = 0;
625
				ref.b[z] = 0;
626
			}
627
			break;
628
 
629
		case ARETURN:
630
			for(z=0; z<BITS; z++) {
631
				cal.b[z] = externs.b[z];
632
				ref.b[z] = 0;
633
			}
634
		}
635
		for(z=0; z<BITS; z++) {
636
			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
637
				r1->use1.b[z] | r1->use2.b[z];
638
			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
639
			r1->refbehind.b[z] = ref.b[z];
640
			r1->calbehind.b[z] = cal.b[z];
641
		}
642
		if(r1->active)
643
			break;
644
		r1->active = 1;
645
	}
646
	for(; r != r1; r = r->p1)
647
		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
648
			prop(r2, r->refbehind, r->calbehind);
649
}
650
 
651
/*
652
 * find looping structure
653
 *
654
 * 1) find reverse postordering
655
 * 2) find approximate dominators,
656
 *	the actual dominators if the flow graph is reducible
657
 *	otherwise, dominators plus some other non-dominators.
658
 *	See Matthew S. Hecht and Jeffrey D. Ullman,
659
 *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
660
 *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
661
 *	Oct. 1-3, 1973, pp.  207-217.
662
 * 3) find all nodes with a predecessor dominated by the current node.
663
 *	such a node is a loop head.
664
 *	recursively, all preds with a greater rpo number are in the loop
665
 */
666
long
667
postorder(Reg *r, Reg **rpo2r, long n)
668
{
669
	Reg *r1;
670
 
671
	r->rpo = 1;
672
	r1 = r->s1;
673
	if(r1 && !r1->rpo)
674
		n = postorder(r1, rpo2r, n);
675
	r1 = r->s2;
676
	if(r1 && !r1->rpo)
677
		n = postorder(r1, rpo2r, n);
678
	rpo2r[n] = r;
679
	n++;
680
	return n;
681
}
682
 
683
long
684
rpolca(long *idom, long rpo1, long rpo2)
685
{
686
	long t;
687
 
688
	if(rpo1 == -1)
689
		return rpo2;
690
	while(rpo1 != rpo2){
691
		if(rpo1 > rpo2){
692
			t = rpo2;
693
			rpo2 = rpo1;
694
			rpo1 = t;
695
		}
696
		while(rpo1 < rpo2){
697
			t = idom[rpo2];
698
			if(t >= rpo2)
699
				fatal(Z, "bad idom");
700
			rpo2 = t;
701
		}
702
	}
703
	return rpo1;
704
}
705
 
706
int
707
doms(long *idom, long r, long s)
708
{
709
	while(s > r)
710
		s = idom[s];
711
	return s == r;
712
}
713
 
714
int
715
loophead(long *idom, Reg *r)
716
{
717
	long src;
718
 
719
	src = r->rpo;
720
	if(r->p1 != R && doms(idom, src, r->p1->rpo))
721
		return 1;
722
	for(r = r->p2; r != R; r = r->p2link)
723
		if(doms(idom, src, r->rpo))
724
			return 1;
725
	return 0;
726
}
727
 
728
void
729
loopmark(Reg **rpo2r, long head, Reg *r)
730
{
731
	if(r->rpo < head || r->active == head)
732
		return;
733
	r->active = head;
734
	r->loop += LOOP;
735
	if(r->p1 != R)
736
		loopmark(rpo2r, head, r->p1);
737
	for(r = r->p2; r != R; r = r->p2link)
738
		loopmark(rpo2r, head, r);
739
}
740
 
741
void
742
loopit(Reg *r, long nr)
743
{
744
	Reg *r1;
745
	long i, d, me;
746
 
747
	if(nr > maxnr) {
748
		rpo2r = alloc(nr * sizeof(Reg*));
749
		idom = alloc(nr * sizeof(long));
750
		maxnr = nr;
751
	}
752
 
753
	d = postorder(r, rpo2r, 0);
754
	if(d > nr)
755
		fatal(Z, "too many reg nodes");
756
	nr = d;
757
	for(i = 0; i < nr / 2; i++){
758
		r1 = rpo2r[i];
759
		rpo2r[i] = rpo2r[nr - 1 - i];
760
		rpo2r[nr - 1 - i] = r1;
761
	}
762
	for(i = 0; i < nr; i++)
763
		rpo2r[i]->rpo = i;
764
 
765
	idom[0] = 0;
766
	for(i = 0; i < nr; i++){
767
		r1 = rpo2r[i];
768
		me = r1->rpo;
769
		d = -1;
770
		if(r1->p1 != R && r1->p1->rpo < me)
771
			d = r1->p1->rpo;
772
		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
773
			if(r1->rpo < me)
774
				d = rpolca(idom, d, r1->rpo);
775
		idom[i] = d;
776
	}
777
 
778
	for(i = 0; i < nr; i++){
779
		r1 = rpo2r[i];
780
		r1->loop++;
781
		if(r1->p2 != R && loophead(idom, r1))
782
			loopmark(rpo2r, i, r1);
783
	}
784
}
785
 
786
void
787
synch(Reg *r, Bits dif)
788
{
789
	Reg *r1;
790
	int z;
791
 
792
	for(r1 = r; r1 != R; r1 = r1->s1) {
793
		for(z=0; z<BITS; z++) {
794
			dif.b[z] = (dif.b[z] &
795
				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
796
					r1->set.b[z] | r1->regdiff.b[z];
797
			if(dif.b[z] != r1->regdiff.b[z]) {
798
				r1->regdiff.b[z] = dif.b[z];
799
				change++;
800
			}
801
		}
802
		if(r1->active)
803
			break;
804
		r1->active = 1;
805
		for(z=0; z<BITS; z++)
806
			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
807
		if(r1->s2 != R)
808
			synch(r1->s2, dif);
809
	}
810
}
811
 
812
ulong
813
allreg(ulong b, Rgn *r)
814
{
815
	Var *v;
816
	int i;
817
 
818
	v = var + r->varno;
819
	r->regno = 0;
820
	switch(v->etype) {
821
 
822
	default:
823
		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
824
		break;
825
 
826
	case TCHAR:
827
	case TUCHAR:
828
	case TSHORT:
829
	case TUSHORT:
830
	case TINT:
831
	case TUINT:
832
	case TLONG:
833
	case TULONG:
834
	case TIND:
835
	case TARRAY:
836
		i = BtoR(~b);
837
		if(i && r->cost > 0) {
838
			r->regno = i;
839
			return RtoB(i);
840
		}
841
		break;
842
 
843
	case TDOUBLE:
844
	case TFLOAT:
845
		i = BtoF(~b);
846
		if(i && r->cost > 0) {
847
			r->regno = i+NREG;
848
			return FtoB(i);
849
		}
850
		break;
851
	}
852
	return 0;
853
}
854
 
855
void
856
paint1(Reg *r, int bn)
857
{
858
	Reg *r1;
859
	Prog *p;
860
	int z;
861
	ulong bb;
862
 
863
	z = bn/32;
864
	bb = 1L<<(bn%32);
865
	if(r->act.b[z] & bb)
866
		return;
867
	for(;;) {
868
		if(!(r->refbehind.b[z] & bb))
869
			break;
870
		r1 = r->p1;
871
		if(r1 == R)
872
			break;
873
		if(!(r1->refahead.b[z] & bb))
874
			break;
875
		if(r1->act.b[z] & bb)
876
			break;
877
		r = r1;
878
	}
879
 
880
	if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
881
		change -= CLOAD * r->loop;
882
		if(debug['R'] && debug['v'])
883
			print("%ld%P\tld %B $%d\n", r->loop,
884
				r->prog, blsh(bn), change);
885
	}
886
	for(;;) {
887
		r->act.b[z] |= bb;
888
		p = r->prog;
889
 
890
		if(r->use1.b[z] & bb) {
891
			change += CREF * r->loop;
892
			if(p->to.type == D_FREG && p->as == AMOVW)
893
				change = -CINF;		/* cant go Rreg to Freg */
894
			if(debug['R'] && debug['v'])
895
				print("%ld%P\tu1 %B $%d\n", r->loop,
896
					p, blsh(bn), change);
897
		}
898
 
899
		if((r->use2.b[z]|r->set.b[z]) & bb) {
900
			change += CREF * r->loop;
901
			if(p->from.type == D_FREG && p->as == AMOVW)
902
				change = -CINF;		/* cant go Rreg to Freg */
903
			if(debug['R'] && debug['v'])
904
				print("%ld%P\tu2 %B $%d\n", r->loop,
905
					p, blsh(bn), change);
906
		}
907
 
908
		if(STORE(r) & r->regdiff.b[z] & bb) {
909
			change -= CLOAD * r->loop;
910
			if(debug['R'] && debug['v'])
911
				print("%ld%P\tst %B $%d\n", r->loop,
912
					p, blsh(bn), change);
913
		}
914
 
915
		if(r->refbehind.b[z] & bb)
916
			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
917
				if(r1->refahead.b[z] & bb)
918
					paint1(r1, bn);
919
 
920
		if(!(r->refahead.b[z] & bb))
921
			break;
922
		r1 = r->s2;
923
		if(r1 != R)
924
			if(r1->refbehind.b[z] & bb)
925
				paint1(r1, bn);
926
		r = r->s1;
927
		if(r == R)
928
			break;
929
		if(r->act.b[z] & bb)
930
			break;
931
		if(!(r->refbehind.b[z] & bb))
932
			break;
933
	}
934
}
935
 
936
ulong
937
paint2(Reg *r, int bn)
938
{
939
	Reg *r1;
940
	int z;
941
	ulong bb, vreg;
942
 
943
	z = bn/32;
944
	bb = 1L << (bn%32);
945
	vreg = regbits;
946
	if(!(r->act.b[z] & bb))
947
		return vreg;
948
	for(;;) {
949
		if(!(r->refbehind.b[z] & bb))
950
			break;
951
		r1 = r->p1;
952
		if(r1 == R)
953
			break;
954
		if(!(r1->refahead.b[z] & bb))
955
			break;
956
		if(!(r1->act.b[z] & bb))
957
			break;
958
		r = r1;
959
	}
960
	for(;;) {
961
		r->act.b[z] &= ~bb;
962
 
963
		vreg |= r->regu;
964
 
965
		if(r->refbehind.b[z] & bb)
966
			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
967
				if(r1->refahead.b[z] & bb)
968
					vreg |= paint2(r1, bn);
969
 
970
		if(!(r->refahead.b[z] & bb))
971
			break;
972
		r1 = r->s2;
973
		if(r1 != R)
974
			if(r1->refbehind.b[z] & bb)
975
				vreg |= paint2(r1, bn);
976
		r = r->s1;
977
		if(r == R)
978
			break;
979
		if(!(r->act.b[z] & bb))
980
			break;
981
		if(!(r->refbehind.b[z] & bb))
982
			break;
983
	}
984
	return vreg;
985
}
986
 
987
void
988
paint3(Reg *r, int bn, long rb, int rn)
989
{
990
	Reg *r1;
991
	Prog *p;
992
	int z;
993
	ulong bb;
994
 
995
	z = bn/32;
996
	bb = 1L << (bn%32);
997
	if(r->act.b[z] & bb)
998
		return;
999
	for(;;) {
1000
		if(!(r->refbehind.b[z] & bb))
1001
			break;
1002
		r1 = r->p1;
1003
		if(r1 == R)
1004
			break;
1005
		if(!(r1->refahead.b[z] & bb))
1006
			break;
1007
		if(r1->act.b[z] & bb)
1008
			break;
1009
		r = r1;
1010
	}
1011
 
1012
	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1013
		addmove(r, bn, rn, 0);
1014
	for(;;) {
1015
		r->act.b[z] |= bb;
1016
		p = r->prog;
1017
 
1018
		if(r->use1.b[z] & bb) {
1019
			if(debug['R'])
1020
				print("%P", p);
1021
			addreg(&p->from, rn);
1022
			if(debug['R'])
1023
				print("\t.c%P\n", p);
1024
		}
1025
		if((r->use2.b[z]|r->set.b[z]) & bb) {
1026
			if(debug['R'])
1027
				print("%P", p);
1028
			addreg(&p->to, rn);
1029
			if(debug['R'])
1030
				print("\t.c%P\n", p);
1031
		}
1032
 
1033
		if(STORE(r) & r->regdiff.b[z] & bb)
1034
			addmove(r, bn, rn, 1);
1035
		r->regu |= rb;
1036
 
1037
		if(r->refbehind.b[z] & bb)
1038
			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1039
				if(r1->refahead.b[z] & bb)
1040
					paint3(r1, bn, rb, rn);
1041
 
1042
		if(!(r->refahead.b[z] & bb))
1043
			break;
1044
		r1 = r->s2;
1045
		if(r1 != R)
1046
			if(r1->refbehind.b[z] & bb)
1047
				paint3(r1, bn, rb, rn);
1048
		r = r->s1;
1049
		if(r == R)
1050
			break;
1051
		if(r->act.b[z] & bb)
1052
			break;
1053
		if(!(r->refbehind.b[z] & bb))
1054
			break;
1055
	}
1056
}
1057
 
1058
void
1059
addreg(Adr *a, int rn)
1060
{
1061
 
1062
	a->sym = 0;
1063
	a->name = D_NONE;
1064
	a->type = D_REG;
1065
	a->reg = rn;
1066
	if(rn >= NREG) {
1067
		a->type = D_FREG;
1068
		a->reg = rn-NREG;
1069
	}
1070
}
1071
 
1072
/*
1073
 * track register variables including external registers:
1074
 *	bit	reg
1075
 *	0	R7
1076
 *	1	R8
1077
 *	...	...
1078
 *	21	R28
1079
 */
1080
long
1081
RtoB(int r)
1082
{
1083
 
1084
	if(r >= REGMIN && r <= REGMAX)
1085
		return 1L << (r-REGMIN);
1086
	return 0;
1087
}
1088
 
1089
int
1090
BtoR(long b)
1091
{
1092
	b &= 0x001fffffL;
1093
	if(b == 0)
1094
		return 0;
1095
	return bitno(b) + REGMIN;
1096
}
1097
 
1098
/*
1099
 *	bit	reg
1100
 *	22	F17
1101
 *	23	F18
1102
 *	...	...
1103
 *	31	F26
1104
 */
1105
long
1106
FtoB(int f)
1107
{
1108
	if(f < FREGMIN || f > FREGEXT)
1109
		return 0;
1110
	return 1L << (f - FREGMIN + 22);
1111
}
1112
 
1113
int
1114
BtoF(long b)
1115
{
1116
 
1117
	b &= 0xffc00000L;
1118
	if(b == 0)
1119
		return 0;
1120
	return bitno(b) - 22 + FREGMIN;
1121
}