Subversion Repositories tendra.SVN

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 7u83 1
/*
2
    		 Crown Copyright (c) 1997
3
 
4
    This TenDRA(r) Computer Program is subject to Copyright
5
    owned by the United Kingdom Secretary of State for Defence
6
    acting through the Defence Evaluation and Research Agency
7
    (DERA).  It is made available to Recipients with a
8
    royalty-free licence for its use, reproduction, transfer
9
    to other parties and amendment for any purpose not excluding
10
    product development provided that any such use et cetera
11
    shall be deemed to be acceptance of the following conditions:-
12
 
13
	(1) Its Recipients shall ensure that this Notice is
14
	reproduced upon any copies or amended versions of it;
15
 
16
	(2) Any amended version of it shall be clearly marked to
17
	show both the nature of and the organisation responsible
18
	for the relevant amendment or amendments;
19
 
20
	(3) Its onward transfer from a recipient to another
21
	party shall be deemed to be that party's acceptance of
22
	these conditions;
23
 
24
	(4) DERA gives no warranty or assurance as to its
25
	quality or suitability for any purpose and DERA accepts
26
	no liability whatsoever in relation to any use to which
27
	it may be put.
28
*/
29
 
30
 
31
/*
32
$Log: muldvrem.c,v $
33
 * Revision 1.1.1.1  1998/01/17  15:56:03  release
34
 * First version to be checked into rolling release.
35
 *
36
 * Revision 1.3  1996/03/22  13:34:09  wfs
37
 * Corrections to the dynamic initialization stuff in translat.c + bad
38
 * needscan.c code deleted.
39
 *
40
 * Revision 1.2  1995/12/18  13:12:05  wfs
41
 * Put hppatrans uder cvs control. Major Changes made since last release
42
 * include:
43
 * (i) PIC code generation.
44
 * (ii) Profiling.
45
 * (iii) Dynamic Initialization.
46
 * (iv) Debugging of Exception Handling and Diagnostics.
47
 *
48
 * Revision 5.5  1995/10/30  12:57:05  wfs
49
 * Removed an "if" statement left in by mistake.
50
 *
51
 * Revision 5.4  1995/10/26  15:30:24  wfs
52
 * Had forgotten to check for division by 0 in div's and rem's.
53
 *
54
 * Revision 5.3  1995/10/20  14:06:26  wfs
55
 * gcc compilation changes.
56
 *
57
 * Revision 5.2  1995/10/09  13:01:46  wfs
58
 * Cosmetic changes.
59
 *
60
 * Revision 5.1  1995/09/15  12:56:17  wfs
61
 * Use of "trap_label" defined in "makecode.c". Removed a parameter
62
 * from a call of "rr_ins" which shouldn't have been there.
63
 *
64
 * Revision 5.0  1995/08/25  13:42:58  wfs
65
 * Preperation for August 25 Glue release
66
 *
67
 * Revision 3.4  1995/08/25  09:56:26  wfs
68
 * register synonyms changed. bug fixes to error jump's
69
 *
70
 * Revision 3.4  1995/08/25  09:56:26  wfs
71
 * register synonyms changed. bug fixes to error jump's
72
 *
73
 * Revision 3.1  95/04/10  16:27:27  16:27:27  wfs (William Simmonds)
74
 * Apr95 tape version.
75
 * 
76
 * Revision 3.0  95/03/30  11:18:22  11:18:22  wfs (William Simmonds)
77
 * Mar95 tape version with CRCR95_178 bug fix.
78
 * 
79
 * Revision 2.0  95/03/15  15:28:11  15:28:11  wfs (William Simmonds)
80
 * spec 3.1 changes implemented, tests outstanding.
81
 * 
82
 * Revision 1.3  95/02/02  15:45:02  15:45:02  wfs (William Simmonds)
83
 * Implemented rem1 and div1.
84
 * 
85
 * Revision 1.2  95/01/17  17:29:42  17:29:42  wfs (William Simmonds)
86
 * Changed name of an included header file.
87
 * 
88
 * Revision 1.1  95/01/11  13:13:34  13:13:34  wfs (William Simmonds)
89
 * Initial revision
90
 * 
91
*/
92
 
93
 
94
#define HPPATRANS_CODE
95
#include "config.h"
96
#include "myassert.h"
97
#include "needscan.h"
98
#include "addrtypes.h"
99
#include "tags.h"
100
#include "expmacs.h"
101
#include "installtypes.h"
102
#include "exp.h"
103
#include "exptypes.h"
104
#include "maxminmacs.h"
105
#include "shapemacs.h"
106
#include "proctypes.h"
107
#include "eval.h"
108
#include "move.h"
109
#include "oprators.h"
110
#include "comment.h"
111
#include "getregs.h"
112
#include "guard.h"
113
#include "locate.h"
114
#include "codehere.h"
115
#include "inst_fmt.h"
116
#include "hppains.h"
117
#include "bitsmacs.h"
118
#include "labels.h"
119
#include "regexps.h"
120
#include "special.h"
121
#include "regmacs.h"
122
#include "needscan.h"
123
#include "translat.h"
124
#include "muldvrem.h"
125
#include "proc.h"
126
#include "out.h"
127
 
128
 
129
#define BITS_PER_WORD		32
130
 
131
#define MAX_MUL_POW2_OFFSET	2	/* max permissable X in 2**n +- X for
132
					 * constant multiply */
133
 
134
#define NOT_MUL_CONST_SIMPLE	(MAX_MUL_POW2_OFFSET+1)
135
 /* any constant larger than permissable X offset in 2**n +- X */
136
 
137
#define IS_POW2(c)		((c) != 0 && ((c) & ((c)-1)) == 0)
138
 
139
extern long trap_label PROTO_S ((exp));
140
extern comib_ins PROTO_S ((ins_p,int,int,int));
141
/*
142
 * Utility functions.
143
 */
144
 
145
/* return bit number 0..31 from right of word of 'c' which has one bit set */
146
static int bit_no 
147
    PROTO_N ( ( c ) )
148
    PROTO_T ( unsigned long c )
149
{
150
  int shift_const;
151
  unsigned long mask;
152
 
153
  assert(IS_POW2(c));
154
 
155
  for (mask = 1, shift_const = 0; mask != c; mask = mask << 1)
156
  {
157
    shift_const++;
158
  }
159
 
160
  return shift_const;
161
}
162
 
163
 
164
void clear_t_regs
165
    PROTO_Z ()
166
{
167
  /* clear t-regs, i.e. GR2,GR19,GR20..,GR31, which might be modified by a
168
     call */
169
  int r;
170
  clear_reg(GR2);
171
  for (r = GR19; r < GR31+1; r++)
172
  {
173
     clear_reg(r);
174
  }
175
}
176
 
177
 
178
/* call millicode library procedure for complicated operation */
179
int call_muldivrem 
180
    PROTO_N ( ( lhs, rhs, sp, proc ) )
181
    PROTO_T ( exp lhs X exp rhs X space sp X int proc )
182
{
183
    char *stub="ARGW0=GR ARGW1=GR";
184
    reg_operand_here(lhs, sp, ARG0);
185
    sp = needreg(ARG0, sp);
186
 
187
    reg_operand_here(rhs,sp,ARG1);
188
    sp = needreg(ARG1,sp);
189
    call_millicode(proc,RP,stub,1);
190
 
191
    clear_t_regs();
192
 
193
    /* result left in RET1 */
194
    return RET1;
195
}
196
 
197
 
198
/*
199
 * Multiply.
200
 */
201
 
202
 
203
/* generate code for multiply by constant */
204
static void mul_const_complex
205
    PROTO_N ( ( src, constval, dest, sp, sgned ) )
206
    PROTO_T ( int src X long constval X int dest X space sp X bool sgned )
207
{
208
  struct
209
  {
210
    unsigned char bsl;		/* bit-string of 1s length */
211
    unsigned char shift;	/* shift from right of word */
212
  }      bs_tab[BITS_PER_WORD / 2];
213
 
214
  int bs_tab_len = 0;
215
  int bsl_1_tab = -1;
216
  int max_bsl = 0;
217
 
218
  comment1("multiply by %ld", constval);
219
 
220
 
221
  /* special case ~0 (all 1) which cannot be handled by the general algorithm */
222
  if (constval == ~0)
223
  {
224
    if (sgned)
225
    {
226
       assert(constval == -1);
227
       /* X * -1 => -X */
228
       rrr_ins(i_sub,c_,0,src,dest);
229
    }
230
    else
231
    {
232
 
233
      /*
234
       * only 2 non overflowing cases to consider 0 * ~0 == 0 1 * ~0 == ~0 ==
235
       * -1
236
       * 
237
       * negate handles these two correctly
238
       */
239
      rrr_ins(i_sub,c_,0,src,dest);
240
    }
241
    return;
242
  }
243
 
244
 
245
  /* set up bs_tab from constval */
246
  {
247
    unsigned long c = constval;
248
    int bsl = 0;
249
    int shift;
250
 
251
    for (c = constval, shift = 0; shift <= BITS_PER_WORD; shift++, c >>= 1)
252
    {
253
      if (c & 1)
254
      {
255
	bsl++;
256
      }
257
      else if (bsl != 0)
258
      {
259
	/* a complete all-1s bit-string */
260
	assert(bs_tab_len < BITS_PER_WORD / 2);
261
	bs_tab[bs_tab_len].bsl = bsl;
262
	bs_tab[bs_tab_len].shift = shift - bsl;	/* .shift is from right */
263
	if (bsl == 1)
264
	  bsl_1_tab = bs_tab_len;
265
	if (bsl > max_bsl)
266
	  max_bsl = bsl;
267
	comment4("mul_const_complex: bs_tab[%d]=%d,%d c=%d", bs_tab_len, bs_tab[bs_tab_len].bsl, bs_tab[bs_tab_len].shift, c);
268
	bs_tab_len++;
269
	bsl = 0;
270
      }
271
    }
272
  }
273
 
274
  comment2("mul_const_complex: max_bsl=%d bsl_1_tab=%d", max_bsl, bsl_1_tab);
275
 
276
  assert(bs_tab_len > 0);	/* shouldn't be here otherwise */
277
  assert(max_bsl >= 1);
278
  assert(max_bsl <= 31);	/* shifts by 32 don't work */
279
 
280
  /* generate the code */
281
  {
282
    int bsl;
283
    int bsl_laststep_tab;
284
    int tmp = GR1;
285
    int accum;
286
    bool accum_init = 0;	/* set to 1 when 'accum' reg initialised */
287
 
288
 
289
    /* allocate regs */
290
    assert(src != GR1);
291
    assert(dest != GR1);
292
 
293
    if (src != dest)
294
      accum = dest;
295
    else
296
      accum = getreg(sp.fixed);
297
 
298
    assert(src != accum);
299
 
300
 
301
    /* +++ neg */
302
 
303
    /* init accum if useful */
304
    if (bsl_1_tab >= 0 && bs_tab[bsl_1_tab].shift != 0)
305
    {
306
 
307
      /*
308
       * Usefully do one of the 1 bit strings with simple shift to accum. If
309
       * left to general algorithm 2 instructions, shift and move/add, would
310
       * often be used.
311
       */
312
      assert(bs_tab[bsl_1_tab].bsl == 1);
313
      rrir_ins(i_shd,c_,src,0,32-bs_tab[bsl_1_tab].shift,accum);
314
      bs_tab[bsl_1_tab].bsl = 0;/* mark as done */
315
      accum_init = 1;
316
    }
317
 
318
 
319
    /* find last cond generation step, so we can move to dest at that step */
320
    bsl_laststep_tab = -1;
321
 
322
    for (bsl = max_bsl; bsl > 0; bsl--)
323
    {
324
      int i;
325
 
326
      for (i = 0; i < bs_tab_len; i++)
327
      {
328
	if (bs_tab[i].bsl == bsl)
329
	  bsl_laststep_tab = i;
330
      }
331
    }
332
 
333
    assert(bsl_laststep_tab != -1);
334
 
335
 
336
    /*
337
     * accumulate handle all bit strings of same length together, so
338
     * 'src*((2**bsl)-1)' can be shared
339
     */
340
    for (bsl = max_bsl; bsl > 0; bsl--)
341
    {
342
      bool found_bsl = 0;
343
      int tmp_shifted=0;
344
      int i;
345
 
346
      for (i = 0; i < bs_tab_len; i++)
347
      {
348
	if (bs_tab[i].bsl == bsl)
349
	{
350
	  int to_accum_reg;	/* reg to be added to 'accum' at end of step */
351
	  int step_accum_dest = (i == bsl_laststep_tab ? dest : accum);
352
 
353
	  assert(accum != R_NO_REG);
354
 
355
	  /* amount to accum into tmp reg */
356
	  if (bsl == 1)
357
	  {
358
	    /* accumulate src<<shift */
359
	    if (bs_tab[i].shift == 0)
360
	    {
361
	       /* simple add */
362
	       to_accum_reg = src;
363
	       if (accum_init)
364
		  rrr_ins(i_add,c_,accum,to_accum_reg,step_accum_dest);
365
	       else
366
	       {
367
		  rr_ins(i_copy,to_accum_reg,step_accum_dest);
368
		  accum_init = 1;
369
	       }
370
	    }
371
	    else
372
	    {
373
	       /* simple shift and add */
374
	       to_accum_reg = tmp;
375
 	       if (accum_init)
376
	       {
377
		  if (bs_tab[i].shift==1)
378
		     rrr_ins(i_sh1add,c_,src,accum,step_accum_dest);
379
		  else if (bs_tab[i].shift==2)
380
		     rrr_ins(i_sh2add,c_,src,accum,step_accum_dest);
381
		  else if (bs_tab[i].shift==3)
382
		     rrr_ins(i_sh3add,c_,src,accum,step_accum_dest);
383
		  else
384
		  {
385
		     rrir_ins(i_shd,c_,src,0,32-bs_tab[i].shift,tmp);
386
		     rrr_ins(i_add,c_,accum,to_accum_reg,step_accum_dest);
387
		  }
388
	       }
389
	       else
390
	       {
391
		  rrir_ins(i_shd,c_,src,0,32-bs_tab[i].shift,step_accum_dest);
392
		  accum_init = 1;
393
	       }
394
	    }
395
	  }
396
	  else
397
	  {			/* bsl != 1 */
398
	    /* accumulate (src*((2**bsl)-1))<<shift */
399
 
400
	    to_accum_reg = tmp;
401
	    if (!found_bsl)
402
	    {
403
	      if (bsl==2)
404
		 rrr_ins(i_sh1add,c_,src,src,tmp);
405
	      else
406
	      {
407
		 rrir_ins(i_shd,c_,src,0,32-bsl,tmp);
408
		 rrr_ins(i_sub,c_,tmp,src,tmp);
409
	      }
410
	      tmp_shifted = 0;
411
	      found_bsl = 1;
412
	    }
413
 
414
	    if (bs_tab[i].shift != tmp_shifted)
415
	    {
416
	      int extra_shift = bs_tab[i].shift - tmp_shifted;
417
 
418
	      assert(extra_shift > 0 && extra_shift <= 31);
419
	      rrir_ins(i_shd,c_,tmp,0,32-extra_shift,tmp);
420
	      tmp_shifted += extra_shift;
421
	    }
422
	    /* else tmp already shifted to correct position */
423
 
424
	    to_accum_reg = tmp;
425
 
426
	    if (accum_init)
427
	       rrr_ins(i_add,c_,accum,to_accum_reg,step_accum_dest);
428
	    else
429
	    {
430
	       rr_ins(i_copy,to_accum_reg,step_accum_dest);
431
	       accum_init=1;
432
	    }
433
 
434
	  }
435
 
436
	  if (i == bsl_laststep_tab)
437
	    accum = R_NO_REG;	/* error check */
438
	}
439
      }
440
    }
441
 
442
    assert(accum_init);
443
    assert(accum == R_NO_REG);
444
 
445
    /* result in dest, due to step_accum_dest above */
446
  }
447
 
448
  comment1("end multiply by %ld", constval);
449
}
450
 
451
/* is constval +ve const 2**n or 2**(n +- X) where abs(X) <= MAX_MUL_POW2_OFFSET */
452
static int offset_mul_const_simple 
453
    PROTO_N ( ( constval, sgned ) )
454
    PROTO_T ( long constval X bool sgned )
455
{
456
  int i;
457
 
458
  FULLCOMMENT1("offset_mul_const_simple: %ld", constval);
459
 
460
  if (constval < 0)
461
  {
462
    if (sgned)
463
      constval = -constval;
464
    else
465
      return NOT_MUL_CONST_SIMPLE;	/* very rare case */
466
  }
467
 
468
  for (i = 0; i <= MAX_MUL_POW2_OFFSET; i++)
469
  {
470
    long c;			/* power of two close to constval */
471
 
472
    /* check for add offsets, avoiding overflow confusion */
473
    c = constval - i;
474
    if (IS_POW2(c) && c + i == constval)
475
      return i;
476
 
477
    /* check for sub offset of 1 only, avoiding overflow confusion */
478
    if (i == 1)
479
    {
480
      c = constval + i;
481
      if (IS_POW2(c) && c - i == constval)
482
	return -i;
483
    }
484
  }
485
 
486
  return NOT_MUL_CONST_SIMPLE;
487
}
488
 
489
 
490
/* generate code for multiply by constant */
491
static void mul_const_simple
492
    PROTO_N ( ( src, constval, dest, sgned ) )
493
    PROTO_T ( int src X long constval X int dest X bool sgned )
494
{
495
  int shift_const;
496
  long c;			/* power of two close to constval */
497
  int add_sub;			/* difference from power of two: +N add, 0
498
				 * nop, -N sub */
499
 
500
  if (sgned && constval < 0)
501
  {
502
    if (constval == -1)
503
    {
504
      /* X * -1 => -X */
505
      rrr_ins(i_sub,c_,0,src,dest);
506
      return;
507
    }
508
    constval = -constval;
509
    rrr_ins(i_sub,c_,0,src,GR1); /* incorrect to modify source */
510
    src = GR1;
511
  }
512
 
513
  if (constval==1)
514
  {
515
     if (src != dest)
516
	rr_ins(i_copy,src,dest);
517
     return;
518
  }
519
  else if (constval == 2)
520
  {
521
    /* use add, which can be peep-hole optimised to addcc later */
522
    rrr_ins(i_add,c_,src,src,dest);
523
    return;
524
  }
525
 
526
  add_sub = offset_mul_const_simple(constval, sgned);
527
  c = constval - add_sub;
528
 
529
  assert(constval == c + add_sub);
530
 
531
  shift_const = bit_no(c);
532
 
533
  FULLCOMMENT3("mul_const_simple: constval=%#lx shift_const=%d add_sub=%d", constval, shift_const, add_sub);
534
  assert(constval == (1 << shift_const) + add_sub);
535
 
536
  if (add_sub == 0)
537
     rrir_ins(i_shd,c_,src,0,32-shift_const,dest);
538
  else
539
  {
540
    /* add_sub != 0 */
541
    ins_p i_add_sub;
542
    int n;			/* number of add_sub instructions */
543
    int inter_reg;		/* for partial result */
544
    int i;
545
 
546
    if (add_sub > 0)
547
    {
548
      i_add_sub = i_add;
549
      n = add_sub;
550
    }
551
    else
552
    {
553
      i_add_sub = i_sub;
554
      n = -add_sub;
555
    }
556
 
557
    if (src == dest)
558
    {
559
      inter_reg = GR1;	/* must preserve src for add/sub */
560
    }
561
    else
562
    {
563
      inter_reg = dest;
564
    }
565
 
566
    assert(src != inter_reg);
567
 
568
    rrir_ins(i_shd,c_,src,0,32-shift_const,inter_reg);
569
 
570
    if (i_add_sub==i_add)
571
    {
572
       i=1;
573
       while (i<n)
574
       {
575
	  if (i+7<n)
576
	  {
577
	     rrr_ins(i_sh3add,c_,src,inter_reg,inter_reg);
578
	     i+=8;
579
	  }   
580
	  else if (i+3<n)
581
	  {
582
	     rrr_ins(i_sh2add,c_,src,inter_reg,inter_reg);
583
	     i+=4;
584
	  }   
585
	  else if (i+1<n)
586
	  {
587
	     rrr_ins(i_sh1add,c_,src,inter_reg,inter_reg);
588
	     i+=2;
589
	  }   
590
	  else 
591
	  {
592
	     rrr_ins(i_add,c_,src,inter_reg,inter_reg);
593
	     i++;
594
	  }   
595
       }
596
    }
597
    else
598
       for (i = 1; i < n; i++)
599
	   rrr_ins(i_add_sub,c_,inter_reg,src,inter_reg);
600
 
601
    /* final add_sub to dest reg */
602
    rrr_ins(i_add_sub,c_,inter_reg,src,dest);
603
 
604
  }
605
}
606
 
607
 
608
/* generate code for multiply by constant */
609
static void mul_const
610
    PROTO_N ( ( src, constval, dest, sp, sgned ) )
611
    PROTO_T ( int src X long constval X int dest X space sp X bool sgned )
612
{
613
  if (constval == 0)
614
     /* rare case not handled by mul_const_X() */
615
     rr_ins(i_copy,0,dest);
616
  else if (offset_mul_const_simple(constval, sgned) == NOT_MUL_CONST_SIMPLE)
617
    mul_const_complex(src, constval, dest, sp, sgned);
618
  else
619
    mul_const_simple(src, constval, dest, sgned);
620
}
621
 
622
 
623
/*
624
 *   Generate code for multiply .
625
 */
626
static int do_mul_comm 
627
    PROTO_N ( ( e, sp, final_reg, sgned ) )
628
    PROTO_T ( exp e X space sp X int final_reg X bool sgned )
629
{
630
  exp seq = son(e);
631
  exp arg2 = bro(seq);
632
  char *mul_proc;
633
  int arg = 1;
634
  baseoff b;
635
  int v;
636
 
637
  if (name(arg2) == val_tag)
638
  {
639
    /* const optim */
640
    v = reg_operand(seq, sp);
641
    sp = guardreg(v,sp);
642
    assert(last(arg2));	 /* check() & scan() should move const to last */
643
    if (final_reg == R_NO_REG)
644
    {
645
       final_reg = getreg(sp.fixed);
646
       sp = guardreg(final_reg, sp);
647
    }
648
    mul_const(v, no(arg2), final_reg, sp, sgned);
649
    return final_reg;
650
  }
651
 
652
 
653
  /* need to call .mul/.umul */
654
 
655
  mul_proc = (sgned ? "$$mulI" : "$$mulU");
656
 
657
  b=mem_temp(4);
658
  reg_operand_here(seq,sp,ARG0);
659
  st_ins(i_sw,ARG0,b);
660
  b=mem_temp(0);
661
 
662
 
663
  for (;;)
664
  {
665
    assert(!last(seq));		/* should have break out below by now */
666
 
667
    seq = bro(seq);
668
    arg++;
669
 
670
    FULLCOMMENT1("do_mul_comm: name(seq) = %d", name(seq));
671
 
672
    if (name(seq) == val_tag && offset_mul_const_simple(no(seq), sgned) != NOT_MUL_CONST_SIMPLE)
673
    {
674
      /* const optim */
675
      assert(last(seq)); /* check() & scan() should move const to last */
676
 
677
      stf_ins(i_fstw,14,b);
678
      ld_ins(i_lw,SIGNED,b,ARG0);
679
 
680
      if (final_reg == R_NO_REG)
681
	 final_reg = RET0;	
682
	 /* better code from mul_const if src != dest register */
683
 
684
      mul_const(ARG0, no(seq), final_reg, sp, sgned);
685
 
686
      break;
687
    }
688
    else
689
    {
690
       reg_operand_here(seq,sp,ARG0);
691
       if (last(seq) && b.offset<-17)
692
       {
693
	  ld_ins(i_lo,1,b,GR1);
694
	  b.base=GR1;
695
	  b.offset=0;
696
       }
697
       st_ins(i_sw,ARG0,b);
698
       if (arg==2)
699
	  ldf_ins(i_fldd,b,13);
700
       else
701
	  ldf_ins(i_fldw,b,12);
702
       rrrf_ins(i_xmpyu,f_,12,14,13);
703
 
704
       clear_t_regs();
705
 
706
       if (last(seq))
707
       {
708
	  stf_ins(i_fstw,14,b);
709
	  if (final_reg == R_NO_REG || final_reg == RET0)
710
	  {
711
	     ld_ins(i_lw,SIGNED,b,RET0);
712
 	     final_reg = RET0;  
713
	  }
714
	  else
715
	     ld_ins(i_lw,SIGNED,b,final_reg);
716
	  break;
717
       }
718
    }
719
  }
720
  return final_reg;
721
}
722
 
723
 
724
/*
725
 *   Generate code for div0, div1 and div2 by calling divI or divU unless
726
 *   dividing by a simple constant.
727
 */
728
static int do_div 
729
    PROTO_N ( ( e, sp, final_reg, sgned ) )
730
    PROTO_T ( exp e X space sp X int final_reg X bool sgned )
731
{
732
   exp seq = son(e);
733
   exp lhs = seq;
734
   exp rhs = bro(lhs);
735
   space nsp;
736
   int trap = 0;
737
   int sz = shape_size(sh(e));
738
   char *stub="ARGW0=GR ARGW1=GR";
739
   if ( !optop(e) )
740
      trap = trap_label(e);
741
   assert(last(rhs));
742
   /*
743
    *   ov_err can only occur when calculating p div1 q with p == variety's
744
    *   minimum and q==-1 
745
    */
746
   if ( name(rhs)==val_tag )
747
   {
748
      /*   nb. div_by_zero_err handled by common code  */
749
      int n = no(rhs);
750
      if ( n > 0 && IS_POW2(n) )
751
      {
752
	 int lhs_reg = reg_operand(lhs, sp);
753
	 int shift_const = bit_no(n);
754
	 sp = guardreg(lhs_reg, sp);
755
	 if ( final_reg == R_NO_REG )
756
	 {
757
	   final_reg = getreg(sp.fixed);
758
	 }
759
	 if ( n==1)
760
	 {
761
	    /*
762
	     *   div = lhs
763
	     */
764
 	    rr_ins(i_copy,lhs_reg,final_reg);
765
	    return final_reg;
766
	 }
767
	 else 
768
	 if (sgned)
769
	 {
770
	   /* signed, adjust lhs before shift */
771
 	   assert(shift_const > 0);/* assumed below */
772
	   if ( n==-1 && !optop(e) )
773
	   {
774
	      if (sz==8)
775
		 iiir_ins(i_zdepi,c_,-1,24,25,GR1);
776
	      else
777
	      if (sz==16)
778
		 iiir_ins(i_zdepi,c_,-1,16,17,GR1);
779
	      else
780
		 iiir_ins(i_zdepi,c_,-1,0,1,GR1);
781
	      cj_ins(c_eq,lhs_reg,GR1,trap);
782
	   }
783
	   if (shift_const - 1 != 0)
784
	   {
785
	      riir_ins(i_extrs,c_,lhs_reg,32-shift_const,33-shift_const,GR1);
786
	      rrir_ins(i_shd,c_,0,GR1,32-shift_const,GR1);
787
	   }
788
	   else
789
	      rrir_ins(i_shd,c_,0,lhs_reg,32-shift_const,GR1);
790
 	   rrr_ins(i_add,c_,lhs_reg,GR1,GR1);
791
	   riir_ins(i_extrs,c_,GR1,31-shift_const,32-shift_const,final_reg);
792
	 }
793
	 else
794
	    rrir_ins(i_shd,c_,0,lhs_reg,shift_const,final_reg);
795
	 return final_reg;
796
      }
797
   }
798
 
799
   /*  We will have to call divI or divU */
800
 
801
   reg_operand_here(lhs, sp, ARG0);
802
   nsp = guardreg(ARG0,sp);
803
   reg_operand_here(rhs,nsp,ARG1);
804
 
805
 
806
   if ( !optop(e) )
807
   {
808
      cj_ins(c_eq,GR0,ARG1,trap);
809
      if (sgned)
810
      {
811
	 comib_ins(c_neq,-1,ARG1,-16);        
812
	 z_ins(i_nop);
813
	 if (sz==8)
814
	    iiir_ins(i_zdepi,c_,-1,24,25,GR1);
815
	 else
816
	 if (sz==16)
817
	    iiir_ins(i_zdepi,c_,-1,16,17,GR1);
818
	 else
819
	    iiir_ins(i_zdepi,c_,-1,0,1,GR1);
820
	 cj_ins(c_eq,ARG0,GR1,trap);
821
      }
822
   }
823
 
824
   if (name(bro(rhs)) == div1_tag && sgned)
825
   {
826
      int fin = new_label();
827
      baseoff b;
828
      b = mem_temp(0);
829
      rrr_ins(i_or,c_neq,0,ARG0,RET1);
830
      ub_ins(cmplt_N,fin);
831
      st_ins(i_sw,ARG0,b);
832
      b.offset += 4;
833
      st_ins(i_sw,ARG1,b);
834
      call_millicode(MILLI_DIVI,RP,stub,1);
835
      ld_ins(i_lw,1,b,ARG1);
836
      b.offset -= 4;
837
      ld_ins(i_lw,1,b,ARG0);
838
      rrr_ins(i_xor,c_l,ARG0,ARG1,0);
839
      ub_ins(cmplt_N,fin);
840
      ld_ir_ins(i_ldo,cmplt_,fs_,empty_ltrl,b.offset,b.base,GR1);
841
      b.base = GR1; b.offset = 0;
842
      st_ins(i_sw,RET1,b);
843
      ldf_ins(i_fldd,b,13);
844
      rrrf_ins(i_xmpyu,f_,12,14,13);
845
      stf_ins(i_fstw,14,b);
846
      ld_ins(i_lw,1,b,ARG1);
847
      rrr_ins(i_comclr,c_eq,ARG0,ARG1,0);
848
      irr_ins(i_addi,c_,fs_,-1,RET1,RET1);
849
      outlab("L$$",fin);
850
   }
851
   else
852
   {
853
      call_millicode(sgned ? MILLI_DIVI : MILLI_DIVU,RP,stub,1);
854
   }
855
   clear_t_regs();
856
   /* result left in RET1 */
857
   return RET1;
858
}
859
 
860
 
861
/*
862
 *   Generate code for remainder using remI or remU unless  simple constant.
863
 */
864
static int do_rem 
865
    PROTO_N ( ( e, sp, final_reg, sgned ) )
866
    PROTO_T ( exp e X space sp X int final_reg X bool sgned )
867
{
868
   exp seq = son(e);
869
   exp lhs = seq;
870
   exp rhs = bro(lhs);
871
   int p=0;
872
   space nsp;
873
   int trap = 0;
874
   baseoff b;
875
   char *stub="ARGW0=GR ARGW1=GR";
876
   assert(last(rhs));
877
   b = mem_temp(0);
878
   if ( !optop(e) )
879
      trap = trap_label(e);
880
   if (name(rhs) == val_tag)
881
   {
882
      int n = no(rhs);
883
      if ( n==0 )
884
      {
885
	 if ( !optop(e) )
886
	    ub_ins(cmplt_N,trap);        
887
	 return GR0;
888
      }
889
      else 
890
      if ( IS_POW2(n) )
891
      {
892
	 int lhs_reg = reg_operand(lhs, sp);
893
	 sp = guardreg(lhs_reg, sp);
894
	 if (final_reg == R_NO_REG)
895
	 {
896
	    final_reg = getreg(sp.fixed);
897
	 }
898
	 if (n == 1)
899
	 {
900
	    /*
901
	     *   rem = 0
902
	     */
903
 	    rr_ins(i_copy,0,final_reg);
904
	    return final_reg;
905
	 }
906
	 while (((1<<(++p)) & n)==0);
907
	 if (sgned && name(bro(rhs)) == rem2_tag)
908
	 {
909
	    /*
910
	     *   Allow for negative lhs. Calculate lhs % n ( = 2**p ) by
911
	     *   anding lhs with mask, negating lhs before and after anding
912
	     *   if lhs<0.
913
	     */
914
	    if (lhs_reg==final_reg)
915
	    {
916
	       rrr_ins(i_or,c_g,0,lhs_reg,GR1);
917
	       rrr_ins(i_sub,c_,0,lhs_reg,lhs_reg);
918
	       riir_ins(i_dep,c_,0,31-p,32-p,lhs_reg);
919
	       rrr_ins(i_or,c_g,0,GR1,0);
920
	       rrr_ins(i_sub,c_,0,lhs_reg,lhs_reg);
921
	    }
922
	    else
923
	    {
924
	       rrr_ins(i_or,c_g,0,lhs_reg,final_reg);
925
	       rrr_ins(i_sub,c_,0,final_reg,final_reg);
926
	       riir_ins(i_dep,c_,0,31-p,32-p,final_reg);
927
	       rrr_ins(i_or,c_g,0,lhs_reg,0);
928
	       rrr_ins(i_sub,c_,0,final_reg,final_reg);
929
	    }
930
	 }
931
	 else
932
	 {
933
	    /*
934
	     *   Calculate lhs % n ( = 2**p ) by anding with mask.
935
	     */
936
	    if (lhs_reg==final_reg)   
937
	       riir_ins(i_dep,c_,0,31-p,32-p,final_reg);
938
	    else
939
	       riir_ins(i_zdep,c_,lhs_reg,31,p,final_reg);
940
	 }
941
	 return final_reg;
942
      }
943
      else
944
      {
945
	 /*
946
	  *   Need to call remI or remU.
947
	  */
948
	 reg_operand_here(lhs,sp,ARG0);
949
	 imm_to_r(n,ARG1);
950
	 if (sgned)
951
	 {
952
	    call_millicode(MILLI_REMI,RP,stub,1);
953
	    if (name(bro(rhs)) == mod_tag)
954
	    {
955
	       if (SIMM14(n))
956
	       {
957
		  if (n>0)
958
		     rrr_ins(i_comclr,c_geq,RET1,0,0);
959
		  else
960
		  if (n<0)
961
		     rrr_ins(i_comclr,c_leq,RET1,0,0);
962
		  ld_ir_ins(i_ldo,cmplt_,fs_,empty_ltrl,n,RET1,RET1);
963
	       }
964
	       else 
965
	       {
966
		  imm_to_r(n,ARG1);
967
		  if (n>0)
968
		     rrr_ins(i_comclr,c_geq,RET1,0,0);
969
		  else
970
		  if (n<0)
971
		     rrr_ins(i_comclr,c_leq,RET1,0,0);
972
		  rrr_ins(i_add,c_,ARG1,RET1,RET1);
973
	       }
974
	    }
975
	 }
976
	 else
977
	 {
978
	    call_millicode(MILLI_REMU,RP,stub,1);
979
	 }
980
	 clear_t_regs();
981
	 return RET1;  /* result left in RET1 */
982
      }
983
   }
984
   /*
985
    *   Need to call remI/.urem
986
    */
987
   reg_operand_here(lhs, sp, ARG0);
988
   nsp = guardreg(ARG0, sp);
989
   reg_operand_here(rhs, nsp, ARG1);
990
   if ( !optop(e) )
991
      cj_ins(c_eq,GR0,ARG1,trap);
992
 
993
   if (name(bro(rhs)) == mod_tag && sgned)
994
   {
995
      st_ins(i_sw,ARG1,b);
996
      call_millicode(MILLI_REMI,RP,stub,1);
997
      rrr_ins(i_comclr,c_eq,RET1,0,ARG1);
998
      ld_ins(i_lw,1,b,ARG1);
999
      rrr_ins(i_xor,c_geq,RET1,ARG1,0);
1000
      rrr_ins(i_add,c_,RET1,ARG1,RET1);
1001
   }
1002
   else
1003
   {
1004
      call_millicode( sgned ? MILLI_REMI : MILLI_REMU, RP, stub,1 );
1005
   }
1006
   clear_t_regs();
1007
   return RET1;  /* result left in RET1 */
1008
}
1009
 
1010
 
1011
typedef int ( *find_fn ) PROTO_S ( ( exp, space, int, bool ) ) ;
1012
 
1013
/* choose regs and generate code using do_fn */
1014
static int find_reg_and_apply
1015
    PROTO_N ( ( e, sp, dest, sgned, do_fn ) )
1016
    PROTO_T ( exp e X space sp X where dest X bool sgned X find_fn do_fn )
1017
{
1018
  ans a;
1019
  int dest_reg;
1020
 
1021
  /* +++ mips has tidyshort(dest, sh(e)); check not needed on HPPA */
1022
 
1023
  switch (discrim ( dest.answhere ) )
1024
  {
1025
  case inreg:
1026
    dest_reg = (*do_fn) (e, sp, regalt(dest.answhere), sgned);
1027
    break;
1028
 
1029
  case insomereg:
1030
    {
1031
      int *dr = someregalt(dest.answhere);
1032
 
1033
      *dr = (*do_fn) (e, sp, R_NO_REG, sgned);	/* leave (*do_fn)() to
1034
							 * allocate reg */
1035
      return *dr;		/* no need for move */
1036
    }
1037
 
1038
  default:
1039
    dest_reg = (*do_fn) (e, sp, R_NO_REG, sgned);	/* leave (*do_fn)() to
1040
							 * allocate reg */
1041
  }
1042
 
1043
  assert(dest_reg != R_NO_REG);
1044
 
1045
  setregalt(a, dest_reg);
1046
  sp = guardreg(dest_reg, sp);
1047
  move(a, dest, sp.fixed, sgned);
1048
 
1049
  return dest_reg;
1050
}
1051
 
1052
 
1053
 
1054
/* choose regs and generate code for multiply using multiply proc */
1055
int do_mul_comm_op 
1056
    PROTO_N ( ( e, sp, dest, sgned ) )
1057
    PROTO_T ( exp e X space sp X where dest X bool sgned )
1058
{
1059
   return find_reg_and_apply(e, sp, dest, sgned, do_mul_comm);
1060
}
1061
 
1062
 
1063
/* choose regs and generate code for divide using divide proc */
1064
int do_div_op 
1065
    PROTO_N ( ( e, sp, dest, sgned ) )
1066
    PROTO_T ( exp e X space sp X where dest X bool sgned )
1067
{
1068
  return find_reg_and_apply(e, sp, dest, sgned, do_div);
1069
}
1070
 
1071
 
1072
/* choose regs and generate code for rem using rem proc */
1073
int do_rem_op 
1074
    PROTO_N ( ( e, sp, dest, sgned ) )
1075
    PROTO_T ( exp e X space sp X where dest X bool sgned )
1076
{
1077
  return find_reg_and_apply(e, sp, dest, sgned, do_rem);
1078
}
1079
 
1080
 
1081
 
1082
/* is exp mul, div or rem that may call */
1083
bool is_muldivrem_call 
1084
    PROTO_N ( ( e ) )
1085
    PROTO_T ( exp e )
1086
{
1087
 
1088
  switch (name(e))
1089
  {
1090
 
1091
#if use_long_double
1092
     case test_tag:
1093
     case chfl_tag:
1094
     case round_tag:
1095
     {
1096
	 exp s = son(e);
1097
	 if ( name(sh(s))==doublehd )
1098
	    return (1) ;
1099
	 /* FALL THROUGH */
1100
     }      
1101
 
1102
     case fplus_tag:
1103
     case fminus_tag:
1104
     case fmult_tag:
1105
     case fdiv_tag:
1106
     case fneg_tag:
1107
     case fabs_tag:
1108
     case float_tag:
1109
     {
1110
	if ( name(sh(e))==doublehd)
1111
	   return(1);
1112
	else
1113
	   return(0);
1114
     }
1115
#endif
1116
#if 0
1117
   case chvar_tag:
1118
     e = son(e);		/* fall through, look at arg */
1119
#endif
1120
      case mult_tag:
1121
      case offset_mult_tag:
1122
      {
1123
	/*multneeds - simple cases don't need a call */
1124
	exp arg2 = bro(son(e));
1125
	if (last(arg2) && name(arg2) == val_tag)
1126
	{
1127
	  return 0;
1128
	}
1129
	return 1;
1130
      }    
1131
 
1132
    case div0_tag:
1133
    case rem0_tag:
1134
    case div1_tag:
1135
    case div2_tag:
1136
    case mod_tag:
1137
    case rem2_tag:
1138
    case offset_div_tag:
1139
    case offset_div_by_int_tag: 
1140
      {
1141
	/*remneeds, divneeds - simple cases don't need a call */
1142
	exp arg2 = bro(son(e));
1143
 
1144
	if (last(arg2) && name(arg2) == val_tag)
1145
	{
1146
	  long constval = no(arg2);
1147
	  if (constval > 0 && IS_POW2(constval))
1148
	  {
1149
	    return 0;
1150
	  }
1151
	}
1152
	return 1;
1153
      }
1154
  default:
1155
      return 0;
1156
    }
1157
}
1158
 
1159
 
1160
/*
1161
 * Needs estimation
1162
 */
1163
 
1164
 
1165
needs multneeds 
1166
    PROTO_N ( ( e, at ) )
1167
    PROTO_T ( exp * e X exp ** at )
1168
{
1169
  needs n ;
1170
  exp arg1 = son(*(e));
1171
  exp arg2 = bro(arg1);
1172
  n = likeplus(e, at);	/* has had comm_ass() treatment */
1173
 
1174
  /* remember that mult may have more than two args after optimisation */
1175
 
1176
  if (last(arg2) && name(arg2) == val_tag)
1177
  {
1178
 
1179
    /*
1180
     * const optim, additional reg only needed where src and dest are same
1181
     * reg, in which case it has already been allowed for.
1182
     */
1183
    return n;
1184
  }
1185
 
1186
  /* default, call .mul */
1187
  n.fixneeds = maxfix;
1188
#if 1
1189
  n.propsneeds |= hasproccall;
1190
#endif
1191
  return n;
1192
}
1193
 
1194
 
1195
needs divneeds 
1196
    PROTO_N ( ( e, at ) )
1197
    PROTO_T ( exp * e X exp ** at )
1198
{
1199
  needs n ;
1200
  exp lhs = son(*(e));
1201
  exp rhs = bro(lhs);
1202
  n = likediv(e, at);
1203
 
1204
  assert(last(rhs));
1205
 
1206
  if (name(rhs) == val_tag)
1207
  {
1208
    long constval = no(rhs);
1209
 
1210
    if (constval > 0 && IS_POW2(constval))
1211
    {
1212
      /* const optim, replace div by positive, non-zero, 2**n by shift right */
1213
 
1214
      return n;
1215
    }
1216
  }
1217
 
1218
  /* default, call .div */
1219
  n.fixneeds = maxfix;
1220
  n.propsneeds |= hasproccall;
1221
 
1222
  return n;
1223
}
1224
 
1225
 
1226
needs remneeds 
1227
    PROTO_N ( ( e, at ) )
1228
    PROTO_T ( exp * e X exp ** at )
1229
{
1230
  needs n ;
1231
  exp lhs = son(*(e));
1232
  exp rhs = bro(lhs);
1233
  n = likediv(e, at);
1234
 
1235
  assert(last(rhs));
1236
  if (name(rhs) == val_tag)
1237
  {
1238
    long constval = no(rhs);
1239
 
1240
    if (constval > 0 && IS_POW2(constval))
1241
    {
1242
      /* const optim of rem by positive, non-zero, 2**n */
1243
 
1244
      return n;
1245
    }
1246
  }
1247
  /* default, call .rem */
1248
  n.fixneeds = maxfix;
1249
  n.propsneeds |= hasproccall;
1250
  return n;
1251
}
1252
 
1253
 
1254
 
1255
 
1256
 
1257
 
1258
 
1259
 
1260
 
1261
 
1262