Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include	"l.h"
2
 
3
enum
4
{
5
	E_HILO	= 1<<0,
6
	E_FCR	= 1<<1,
7
	E_MCR	= 1<<2,
8
	E_MEM	= 1<<3,
9
	E_MEMSP	= 1<<4,	/* uses offset and size */
10
	E_MEMSB	= 1<<5,	/* uses offset and size */
11
	ANYMEM	= E_MEM|E_MEMSP|E_MEMSB,
12
	DELAY	= BRANCH|LOAD|FCMP,
13
};
14
 
15
typedef	struct	Sch	Sch;
16
typedef	struct	Dep	Dep;
17
 
18
struct	Dep
19
{
20
	ulong	ireg;
21
	ulong	freg;
22
	ulong	cc;
23
};
24
struct	Sch
25
{
26
	Prog	p;
27
	Dep	set;
28
	Dep	used;
29
	long	soffset;
30
	char	size;
31
	char	nop;
32
	char	comp;
33
};
34
 
35
void	regsused(Sch*, Prog*);
36
int	depend(Sch*, Sch*);
37
int	conflict(Sch*, Sch*);
38
int	offoverlap(Sch*, Sch*);
39
void	dumpbits(Sch*, Dep*);
40
 
41
void
42
sched(Prog *p0, Prog *pe)
43
{
44
	Prog *p, *q;
45
	Sch sch[NSCHED], *s, *t, *u, *se, stmp;
46
 
47
	/*
48
	 * build side structure
49
	 */
50
	s = sch;
51
	for(p=p0;; p=p->link) {
52
		memset(s, 0, sizeof(*s));
53
		s->p = *p;
54
		regsused(s, p);
55
		if(debug['X']) {
56
			Bprint(&bso, "%P\t\tset", &s->p);
57
			dumpbits(s, &s->set);
58
			Bprint(&bso, "; used");
59
			dumpbits(s, &s->used);
60
			if(s->comp)
61
				Bprint(&bso, "; compound");
62
			if(s->p.mark & LOAD)
63
				Bprint(&bso, "; load");
64
			if(s->p.mark & BRANCH)
65
				Bprint(&bso, "; branch");
66
			if(s->p.mark & FCMP)
67
				Bprint(&bso, "; fcmp");
68
			Bprint(&bso, "\n");
69
		}
70
		if(p == pe)
71
			break;
72
		s++;
73
	}
74
	se = s;
75
 
76
	/*
77
	 * prepass to move things around
78
	 * does nothing, but tries to make
79
	 * the actual scheduler work better
80
	 */
81
	for(s=sch; s<=se; s++) {
82
		if(!(s->p.mark & LOAD))
83
			continue;
84
		/* always good to put nonconflict loads together */
85
		for(t=s+1; t<=se; t++) {
86
			if(!(t->p.mark & LOAD))
87
				continue;
88
			if(t->p.mark & BRANCH)
89
				break;
90
			if(conflict(s, t))
91
				break;
92
			for(u=t-1; u>s; u--)
93
				if(depend(u, t))
94
					goto no11;
95
			u = s+1;
96
			stmp = *t;
97
			memmove(s+2, u, (uchar*)t - (uchar*)u);
98
			*u = stmp;
99
			break;
100
		}
101
	no11:
102
 
103
		/* put schedule fodder above load */
104
		for(t=s+1; t<=se; t++) {
105
			if(t->p.mark & BRANCH)
106
				break;
107
			if(s > sch && conflict(s-1, t))
108
				continue;
109
			for(u=t-1; u>=s; u--)
110
				if(depend(t, u))
111
					goto no1;
112
			stmp = *t;
113
			memmove(s+1, s, (uchar*)t - (uchar*)s);
114
			*s = stmp;
115
			if(!(s->p.mark & LOAD))
116
				break;
117
		no1:;
118
		}
119
	}
120
 
121
	for(s=se; s>=sch; s--) {
122
		if(!(s->p.mark & DELAY))
123
			continue;
124
		if(s < se)
125
			if(!conflict(s, s+1))
126
				goto out3;
127
		/*
128
		 * s is load, s+1 is immediate use of result or end of block
129
		 * t is the trial instruction to insert between s and s+1
130
		 */
131
		if(!debug['Y'])
132
		for(t=s-1; t>=sch; t--) {
133
			if(t->comp)
134
				if(s->p.mark & BRANCH)
135
					goto no2;
136
			if(t->p.mark & DELAY)
137
				if(s >= se || conflict(t, s+1))
138
					goto no2;
139
			for(u=t+1; u<=s; u++)
140
				if(depend(u, t))
141
					goto no2;
142
			goto out2;
143
		no2:;
144
		}
145
		if(debug['X'])
146
			Bprint(&bso, "?l%P\n", &s->p);
147
		s->nop = 1;
148
		if(debug['v']) {
149
			if(s->p.mark & LOAD) {
150
				nop.load.count++;
151
				nop.load.outof++;
152
			}
153
			if(s->p.mark & BRANCH) {
154
				nop.branch.count++;
155
				nop.branch.outof++;
156
			}
157
			if(s->p.mark & FCMP) {
158
				nop.fcmp.count++;
159
				nop.fcmp.outof++;
160
			}
161
		}
162
		continue;
163
 
164
	out2:
165
		if(debug['X']) {
166
			Bprint(&bso, "!l%P\n", &t->p);
167
			Bprint(&bso, "%P\n", &s->p);
168
		}
169
		stmp = *t;
170
		memmove(t, t+1, (uchar*)s - (uchar*)t);
171
		*s = stmp;
172
		s--;
173
 
174
	out3:
175
		if(debug['v']) {
176
			if(s->p.mark & LOAD)
177
				nop.load.outof++;
178
			if(s->p.mark & BRANCH)
179
				nop.branch.outof++;
180
			if(s->p.mark & FCMP)
181
				nop.fcmp.outof++;
182
		}
183
	}
184
 
185
	/* Avoid HI/LO use->set */
186
	t = sch+1;
187
	for(s=sch; s<se-1; s++, t++) {
188
		if((s->used.cc & E_HILO) == 0)
189
			continue;
190
		if(t->set.cc & E_HILO)
191
			s->nop = 2;
192
	}
193
 
194
	/*
195
	 * put it all back
196
	 */
197
	for(s=sch, p=p0; s<=se; s++, p=q) {
198
		q = p->link;
199
		if(q != s->p.link) {
200
			*p = s->p;
201
			p->link = q;
202
		}
203
		while(s->nop--)
204
			addnop(p);
205
	}
206
	if(debug['X']) {
207
		Bprint(&bso, "\n");
208
		Bflush(&bso);
209
	}
210
}
211
 
212
void
213
regsused(Sch *s, Prog *realp)
214
{
215
	int c, ar, ad, ld, sz;
216
	ulong m;
217
	Prog *p;
218
 
219
	p = &s->p;
220
	s->comp = compound(p);
221
	s->nop = 0;
222
	if(s->comp) {
223
		s->set.ireg |= 1<<REGTMP;
224
		s->used.ireg |= 1<<REGTMP;
225
	}
226
 
227
	ar = 0;		/* dest is really reference */
228
	ad = 0;		/* source/dest is really address */
229
	ld = 0;		/* opcode is load instruction */
230
	sz = 20;	/* size of load/store for overlap computation */
231
 
232
/*
233
 * flags based on opcode
234
 */
235
	switch(p->as) {
236
	case ATEXT:
237
		curtext = realp;
238
		autosize = p->to.offset + 4;
239
		ad = 1;
240
		break;
241
	case AJAL:
242
		c = p->reg;
243
		if(c == NREG)
244
			c = REGLINK;
245
		s->set.ireg |= 1<<c;
246
		ar = 1;
247
		ad = 1;
248
		break;
249
	case ABGEZAL:
250
	case ABLTZAL:
251
		s->set.ireg |= 1<<REGLINK;
252
	case ABEQ:
253
	case ABGEZ:
254
	case ABGTZ:
255
	case ABLEZ:
256
	case ABLTZ:
257
	case ABNE:
258
		ar = 1;
259
		ad = 1;
260
		break;
261
	case ABFPT:
262
	case ABFPF:
263
		ad = 1;
264
		s->used.cc |= E_FCR;
265
		break;
266
	case ACMPEQD:
267
	case ACMPEQF:
268
	case ACMPGED:
269
	case ACMPGEF:
270
	case ACMPGTD:
271
	case ACMPGTF:
272
		ar = 1;
273
		s->set.cc |= E_FCR;
274
		p->mark |= FCMP;
275
		break;
276
	case AJMP:
277
		ar = 1;
278
		ad = 1;
279
		break;
280
	case AMOVB:
281
	case AMOVBU:
282
		sz = 1;
283
		ld = 1;
284
		break;
285
	case AMOVH:
286
	case AMOVHU:
287
		sz = 2;
288
		ld = 1;
289
		break;
290
	case AMOVF:
291
	case AMOVW:
292
	case AMOVWL:
293
	case AMOVWR:
294
		sz = 4;
295
		ld = 1;
296
		break;
297
	case AMOVD:
298
	case AMOVV:
299
	case AMOVVL:
300
	case AMOVVR:
301
		sz = 8;
302
		ld = 1;
303
		break;
304
	case ADIV:
305
	case ADIVU:
306
	case AMUL:
307
	case AMULU:
308
	case AREM:
309
	case AREMU:
310
		s->set.cc = E_HILO;
311
	case AADD:
312
	case AADDU:
313
	case AAND:
314
	case ANOR:
315
	case AOR:
316
	case ASGT:
317
	case ASGTU:
318
	case ASLL:
319
	case ASRA:
320
	case ASRL:
321
	case ASUB:
322
	case ASUBU:
323
	case AXOR:
324
 
325
	case AADDD:
326
	case AADDF:
327
	case AADDW:
328
	case ASUBD:
329
	case ASUBF:
330
	case ASUBW:
331
	case AMULF:
332
	case AMULD:
333
	case AMULW:
334
	case ADIVF:
335
	case ADIVD:
336
	case ADIVW:
337
		if(p->reg == NREG) {
338
			if(p->to.type == D_REG || p->to.type == D_FREG)
339
				p->reg = p->to.reg;
340
			if(p->reg == NREG)
341
				print("botch %P\n", p);
342
		}
343
		break;
344
	}
345
 
346
/*
347
 * flags based on 'to' field
348
 */
349
	c = p->to.class;
350
	if(c == 0) {
351
		c = aclass(&p->to) + 1;
352
		p->to.class = c;
353
	}
354
	c--;
355
	switch(c) {
356
	default:
357
		print("unknown class %d %D\n", c, &p->to);
358
 
359
	case C_ZCON:
360
	case C_SCON:
361
	case C_ADD0CON:
362
	case C_AND0CON:
363
	case C_ADDCON:
364
	case C_ANDCON:
365
	case C_UCON:
366
	case C_LCON:
367
	case C_NONE:
368
	case C_SBRA:
369
	case C_LBRA:
370
		break;
371
 
372
	case C_HI:
373
	case C_LO:
374
		s->set.cc |= E_HILO;
375
		break;
376
	case C_FCREG:
377
		s->set.cc |= E_FCR;
378
		break;
379
	case C_MREG:
380
		s->set.cc |= E_MCR;
381
		break;
382
	case C_ZOREG:
383
	case C_SOREG:
384
	case C_LOREG:
385
		c = p->to.reg;
386
		s->used.ireg |= 1<<c;
387
		if(ad)
388
			break;
389
		s->size = sz;
390
		s->soffset = regoff(&p->to);
391
 
392
		m = ANYMEM;
393
		if(c == REGSB)
394
			m = E_MEMSB;
395
		if(c == REGSP)
396
			m = E_MEMSP;
397
 
398
		if(ar)
399
			s->used.cc |= m;
400
		else
401
			s->set.cc |= m;
402
		break;
403
	case C_SACON:
404
	case C_LACON:
405
		s->used.ireg |= 1<<REGSP;
406
		break;
407
	case C_SECON:
408
	case C_LECON:
409
		s->used.ireg |= 1<<REGSB;
410
		break;
411
	case C_REG:
412
		if(ar)
413
			s->used.ireg |= 1<<p->to.reg;
414
		else
415
			s->set.ireg |= 1<<p->to.reg;
416
		break;
417
	case C_FREG:
418
		/* do better -- determine double prec */
419
		if(ar) {
420
			s->used.freg |= 1<<p->to.reg;
421
			s->used.freg |= 1<<(p->to.reg|1);
422
		} else {
423
			s->set.freg |= 1<<p->to.reg;
424
			s->set.freg |= 1<<(p->to.reg|1);
425
		}
426
		if(ld && p->from.type == D_REG)
427
			p->mark |= LOAD;
428
		break;
429
	case C_SAUTO:
430
	case C_LAUTO:
431
		s->used.ireg |= 1<<REGSP;
432
		if(ad)
433
			break;
434
		s->size = sz;
435
		s->soffset = regoff(&p->to);
436
 
437
		if(ar)
438
			s->used.cc |= E_MEMSP;
439
		else
440
			s->set.cc |= E_MEMSP;
441
		break;
442
	case C_SEXT:
443
	case C_LEXT:
444
		s->used.ireg |= 1<<REGSB;
445
		if(ad)
446
			break;
447
		s->size = sz;
448
		s->soffset = regoff(&p->to);
449
 
450
		if(ar)
451
			s->used.cc |= E_MEMSB;
452
		else
453
			s->set.cc |= E_MEMSB;
454
		break;
455
	}
456
 
457
/*
458
 * flags based on 'from' field
459
 */
460
	c = p->from.class;
461
	if(c == 0) {
462
		c = aclass(&p->from) + 1;
463
		p->from.class = c;
464
	}
465
	c--;
466
	switch(c) {
467
	default:
468
		print("unknown class %d %D\n", c, &p->from);
469
 
470
	case C_ZCON:
471
	case C_SCON:
472
	case C_ADD0CON:
473
	case C_AND0CON:
474
	case C_ADDCON:
475
	case C_ANDCON:
476
	case C_UCON:
477
	case C_LCON:
478
	case C_NONE:
479
	case C_SBRA:
480
	case C_LBRA:
481
		break;
482
	case C_HI:
483
	case C_LO:
484
		s->used.cc |= E_HILO;
485
		break;
486
	case C_FCREG:
487
		s->used.cc |= E_FCR;
488
		break;
489
	case C_MREG:
490
		s->used.cc |= E_MCR;
491
		break;
492
	case C_ZOREG:
493
	case C_SOREG:
494
	case C_LOREG:
495
		c = p->from.reg;
496
		s->used.ireg |= 1<<c;
497
		if(ld)
498
			p->mark |= LOAD;
499
		s->size = sz;
500
		s->soffset = regoff(&p->from);
501
 
502
		m = ANYMEM;
503
		if(c == REGSB)
504
			m = E_MEMSB;
505
		if(c == REGSP)
506
			m = E_MEMSP;
507
 
508
		s->used.cc |= m;
509
		break;
510
	case C_SACON:
511
	case C_LACON:
512
		s->used.ireg |= 1<<REGSP;
513
		break;
514
	case C_SECON:
515
	case C_LECON:
516
		s->used.ireg |= 1<<REGSB;
517
		break;
518
	case C_REG:
519
		s->used.ireg |= 1<<p->from.reg;
520
		break;
521
	case C_FREG:
522
		/* do better -- determine double prec */
523
		s->used.freg |= 1<<p->from.reg;
524
		s->used.freg |= 1<<(p->from.reg|1);
525
		if(ld && p->to.type == D_REG)
526
			p->mark |= LOAD;
527
		break;
528
	case C_SAUTO:
529
	case C_LAUTO:
530
		s->used.ireg |= 1<<REGSP;
531
		if(ld)
532
			p->mark |= LOAD;
533
		if(ad)
534
			break;
535
		s->size = sz;
536
		s->soffset = regoff(&p->from);
537
 
538
		s->used.cc |= E_MEMSP;
539
		break;
540
	case C_SEXT:
541
	case C_LEXT:
542
		s->used.ireg |= 1<<REGSB;
543
		if(ld)
544
			p->mark |= LOAD;
545
		if(ad)
546
			break;
547
		s->size = sz;
548
		s->soffset = regoff(&p->from);
549
 
550
		s->used.cc |= E_MEMSB;
551
		break;
552
	}
553
 
554
	c = p->reg;
555
	if(c != NREG) {
556
		if(p->from.type == D_FREG || p->to.type == D_FREG) {
557
			s->used.freg |= 1<<c;
558
			s->used.freg |= 1<<(c|1);
559
		} else
560
			s->used.ireg |= 1<<c;
561
	}
562
	s->set.ireg &= ~(1<<REGZERO);		/* R0 cant be set */
563
}
564
 
565
/*
566
 * test to see if 2 instrictions can be
567
 * interchanged without changing semantics
568
 */
569
int
570
depend(Sch *sa, Sch *sb)
571
{
572
	ulong x;
573
 
574
	if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
575
		return 1;
576
	if(sb->set.ireg & sa->used.ireg)
577
		return 1;
578
 
579
	if(sa->set.freg & (sb->set.freg|sb->used.freg))
580
		return 1;
581
	if(sb->set.freg & sa->used.freg)
582
		return 1;
583
 
584
	/*
585
	 * special case.
586
	 * loads from same address cannot pass.
587
	 * this is for hardware fifo's and the like
588
	 */
589
	if(sa->used.cc & sb->used.cc & E_MEM)
590
		if(sa->p.reg == sb->p.reg)
591
		if(regoff(&sa->p.from) == regoff(&sb->p.from))
592
			return 1;
593
 
594
	x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
595
		(sb->set.cc & sa->used.cc);
596
	if(x) {
597
		/*
598
		 * allow SB and SP to pass each other.
599
		 * allow SB to pass SB iff doffsets are ok
600
		 * anything else conflicts
601
		 */
602
		if(x != E_MEMSP && x != E_MEMSB)
603
			return 1;
604
		x = sa->set.cc | sb->set.cc |
605
			sa->used.cc | sb->used.cc;
606
		if(x & E_MEM)
607
			return 1;
608
		if(offoverlap(sa, sb))
609
			return 1;
610
	}
611
 
612
	return 0; 
613
}
614
 
615
int
616
offoverlap(Sch *sa, Sch *sb)
617
{
618
 
619
	if(sa->soffset < sb->soffset) {
620
		if(sa->soffset+sa->size > sb->soffset)
621
			return 1;
622
		return 0;
623
	}
624
	if(sb->soffset+sb->size > sa->soffset)
625
		return 1;
626
	return 0;
627
}
628
 
629
/*
630
 * test 2 adjacent instructions
631
 * and find out if inserted instructions
632
 * are desired to prevent stalls.
633
 */
634
int
635
conflict(Sch *sa, Sch *sb)
636
{
637
 
638
	if(sa->set.ireg & sb->used.ireg)
639
		return 1;
640
	if(sa->set.freg & sb->used.freg)
641
		return 1;
642
	if(sa->set.cc & sb->used.cc)
643
		return 1;
644
 
645
	return 0;
646
}
647
 
648
int
649
compound(Prog *p)
650
{
651
	Optab *o;
652
 
653
	o = oplook(p);
654
	if(o->size != 4)
655
		return 1;
656
	if(p->to.type == D_REG && p->to.reg == REGSB)
657
		return 1;
658
	return 0;
659
}
660
 
661
void
662
dumpbits(Sch *s, Dep *d)
663
{
664
	int i;
665
 
666
	for(i=0; i<32; i++)
667
		if(d->ireg & (1<<i))
668
			Bprint(&bso, " R%d", i);
669
	for(i=0; i<32; i++)
670
		if(d->freg & (1<<i))
671
			Bprint(&bso, " F%d", i);
672
	for(i=0; i<32; i++)
673
		switch(d->cc & (1<<i)) {
674
		default:
675
			break;
676
		case E_HILO:
677
			Bprint(&bso, " HILO");
678
			break;
679
		case E_FCR:
680
			Bprint(&bso, " FCR");
681
			break;
682
		case E_MCR:
683
			Bprint(&bso, " MCR");
684
			break;
685
		case E_MEM:
686
			Bprint(&bso, " MEM%d", s->size);
687
			break;
688
		case E_MEMSB:
689
			Bprint(&bso, " SB%d", s->size);
690
			break;
691
		case E_MEMSP:
692
			Bprint(&bso, " SP%d", s->size);
693
			break;
694
		}
695
}