Subversion Repositories tendra.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 7u83 1
/*
7 7u83 2
 * Copyright (c) 2002-2005 The TenDRA Project <http://www.tendra.org/>.
3
 * All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions are met:
7
 *
8
 * 1. Redistributions of source code must retain the above copyright notice,
9
 *    this list of conditions and the following disclaimer.
10
 * 2. Redistributions in binary form must reproduce the above copyright notice,
11
 *    this list of conditions and the following disclaimer in the documentation
12
 *    and/or other materials provided with the distribution.
13
 * 3. Neither the name of The TenDRA Project nor the names of its contributors
14
 *    may be used to endorse or promote products derived from this software
15
 *    without specific, prior written permission.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
18
 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
21
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22
 * EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
27
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 *
29
 * $Id$
30
 */
31
/*
2 7u83 32
    		 Crown Copyright (c) 1997
7 7u83 33
 
2 7u83 34
    This TenDRA(r) Computer Program is subject to Copyright
35
    owned by the United Kingdom Secretary of State for Defence
36
    acting through the Defence Evaluation and Research Agency
37
    (DERA).  It is made available to Recipients with a
38
    royalty-free licence for its use, reproduction, transfer
39
    to other parties and amendment for any purpose not excluding
40
    product development provided that any such use et cetera
41
    shall be deemed to be acceptance of the following conditions:-
7 7u83 42
 
2 7u83 43
	(1) Its Recipients shall ensure that this Notice is
44
	reproduced upon any copies or amended versions of it;
7 7u83 45
 
2 7u83 46
	(2) Any amended version of it shall be clearly marked to
47
	show both the nature of and the organisation responsible
48
	for the relevant amendment or amendments;
7 7u83 49
 
2 7u83 50
	(3) Its onward transfer from a recipient to another
51
	party shall be deemed to be that party's acceptance of
52
	these conditions;
7 7u83 53
 
2 7u83 54
	(4) DERA gives no warranty or assurance as to its
55
	quality or suitability for any purpose and DERA accepts
56
	no liability whatsoever in relation to any use to which
57
	it may be put.
58
*/
59
 
60
 
61
/*
62
$Log: muldvrem.c,v $
63
 * Revision 1.1.1.1  1998/01/17  15:56:03  release
64
 * First version to be checked into rolling release.
65
 *
66
 * Revision 1.3  1996/03/22  13:34:09  wfs
67
 * Corrections to the dynamic initialization stuff in translat.c + bad
68
 * needscan.c code deleted.
69
 *
70
 * Revision 1.2  1995/12/18  13:12:05  wfs
71
 * Put hppatrans uder cvs control. Major Changes made since last release
72
 * include:
73
 * (i) PIC code generation.
74
 * (ii) Profiling.
75
 * (iii) Dynamic Initialization.
76
 * (iv) Debugging of Exception Handling and Diagnostics.
77
 *
78
 * Revision 5.5  1995/10/30  12:57:05  wfs
79
 * Removed an "if" statement left in by mistake.
80
 *
81
 * Revision 5.4  1995/10/26  15:30:24  wfs
82
 * Had forgotten to check for division by 0 in div's and rem's.
83
 *
84
 * Revision 5.3  1995/10/20  14:06:26  wfs
85
 * gcc compilation changes.
86
 *
87
 * Revision 5.2  1995/10/09  13:01:46  wfs
88
 * Cosmetic changes.
89
 *
90
 * Revision 5.1  1995/09/15  12:56:17  wfs
91
 * Use of "trap_label" defined in "makecode.c". Removed a parameter
92
 * from a call of "rr_ins" which shouldn't have been there.
93
 *
94
 * Revision 5.0  1995/08/25  13:42:58  wfs
95
 * Preperation for August 25 Glue release
96
 *
97
 * Revision 3.4  1995/08/25  09:56:26  wfs
98
 * register synonyms changed. bug fixes to error jump's
99
 *
100
 * Revision 3.4  1995/08/25  09:56:26  wfs
101
 * register synonyms changed. bug fixes to error jump's
102
 *
103
 * Revision 3.1  95/04/10  16:27:27  16:27:27  wfs (William Simmonds)
104
 * Apr95 tape version.
7 7u83 105
 *
2 7u83 106
 * Revision 3.0  95/03/30  11:18:22  11:18:22  wfs (William Simmonds)
107
 * Mar95 tape version with CRCR95_178 bug fix.
7 7u83 108
 *
2 7u83 109
 * Revision 2.0  95/03/15  15:28:11  15:28:11  wfs (William Simmonds)
110
 * spec 3.1 changes implemented, tests outstanding.
7 7u83 111
 *
2 7u83 112
 * Revision 1.3  95/02/02  15:45:02  15:45:02  wfs (William Simmonds)
113
 * Implemented rem1 and div1.
7 7u83 114
 *
2 7u83 115
 * Revision 1.2  95/01/17  17:29:42  17:29:42  wfs (William Simmonds)
116
 * Changed name of an included header file.
7 7u83 117
 *
2 7u83 118
 * Revision 1.1  95/01/11  13:13:34  13:13:34  wfs (William Simmonds)
119
 * Initial revision
7 7u83 120
 *
2 7u83 121
*/
122
 
123
 
124
#define HPPATRANS_CODE
125
#include "config.h"
126
#include "myassert.h"
127
#include "needscan.h"
128
#include "addrtypes.h"
129
#include "tags.h"
130
#include "expmacs.h"
131
#include "installtypes.h"
132
#include "exp.h"
133
#include "exptypes.h"
134
#include "maxminmacs.h"
135
#include "shapemacs.h"
136
#include "proctypes.h"
137
#include "eval.h"
138
#include "move.h"
139
#include "oprators.h"
140
#include "comment.h"
141
#include "getregs.h"
142
#include "guard.h"
143
#include "locate.h"
144
#include "codehere.h"
145
#include "inst_fmt.h"
146
#include "hppains.h"
147
#include "bitsmacs.h"
148
#include "labels.h"
149
#include "regexps.h"
150
#include "special.h"
151
#include "regmacs.h"
152
#include "needscan.h"
153
#include "translat.h"
154
#include "muldvrem.h"
155
#include "proc.h"
156
#include "out.h"
157
 
158
 
159
#define BITS_PER_WORD		32
160
 
161
#define MAX_MUL_POW2_OFFSET	2	/* max permissable X in 2**n +- X for
162
					 * constant multiply */
163
 
164
#define NOT_MUL_CONST_SIMPLE	(MAX_MUL_POW2_OFFSET+1)
165
 /* any constant larger than permissable X offset in 2**n +- X */
166
 
7 7u83 167
#define IS_POW2(c)		((c)!= 0 && ((c) & ((c) -1)) == 0)
2 7u83 168
 
7 7u83 169
extern long trap_label(exp);
170
extern comib_ins(ins_p,int,int,int);
2 7u83 171
/*
172
 * Utility functions.
173
 */
174
 
175
/* return bit number 0..31 from right of word of 'c' which has one bit set */
7 7u83 176
static int bit_no
177
(unsigned long c)
2 7u83 178
{
179
  int shift_const;
180
  unsigned long mask;
181
 
182
  assert(IS_POW2(c));
183
 
184
  for (mask = 1, shift_const = 0; mask != c; mask = mask << 1)
185
  {
186
    shift_const++;
187
  }
188
 
189
  return shift_const;
190
}
191
 
192
 
193
void clear_t_regs
7 7u83 194
(void)
2 7u83 195
{
196
  /* clear t-regs, i.e. GR2,GR19,GR20..,GR31, which might be modified by a
197
     call */
198
  int r;
199
  clear_reg(GR2);
200
  for (r = GR19; r < GR31+1; r++)
201
  {
202
     clear_reg(r);
203
  }
204
}
205
 
206
 
207
/* call millicode library procedure for complicated operation */
7 7u83 208
int call_muldivrem
209
(exp lhs, exp rhs, space sp, int proc)
2 7u83 210
{
211
    char *stub="ARGW0=GR ARGW1=GR";
212
    reg_operand_here(lhs, sp, ARG0);
213
    sp = needreg(ARG0, sp);
214
 
215
    reg_operand_here(rhs,sp,ARG1);
216
    sp = needreg(ARG1,sp);
217
    call_millicode(proc,RP,stub,1);
218
 
219
    clear_t_regs();
220
 
221
    /* result left in RET1 */
222
    return RET1;
223
}
224
 
225
 
226
/*
227
 * Multiply.
228
 */
229
 
230
 
231
/* generate code for multiply by constant */
232
static void mul_const_complex
7 7u83 233
(int src, long constval, int dest, space sp, bool sgned)
2 7u83 234
{
235
  struct
236
  {
237
    unsigned char bsl;		/* bit-string of 1s length */
238
    unsigned char shift;	/* shift from right of word */
239
  }      bs_tab[BITS_PER_WORD / 2];
240
 
241
  int bs_tab_len = 0;
242
  int bsl_1_tab = -1;
243
  int max_bsl = 0;
244
 
245
  comment1("multiply by %ld", constval);
246
 
247
 
248
  /* special case ~0 (all 1) which cannot be handled by the general algorithm */
249
  if (constval == ~0)
250
  {
251
    if (sgned)
252
    {
253
       assert(constval == -1);
254
       /* X * -1 => -X */
255
       rrr_ins(i_sub,c_,0,src,dest);
256
    }
257
    else
258
    {
259
 
260
      /*
261
       * only 2 non overflowing cases to consider 0 * ~0 == 0 1 * ~0 == ~0 ==
262
       * -1
7 7u83 263
       *
2 7u83 264
       * negate handles these two correctly
265
       */
266
      rrr_ins(i_sub,c_,0,src,dest);
267
    }
268
    return;
269
  }
270
 
271
 
272
  /* set up bs_tab from constval */
273
  {
274
    unsigned long c = constval;
275
    int bsl = 0;
276
    int shift;
277
 
278
    for (c = constval, shift = 0; shift <= BITS_PER_WORD; shift++, c >>= 1)
279
    {
280
      if (c & 1)
281
      {
282
	bsl++;
283
      }
284
      else if (bsl != 0)
285
      {
286
	/* a complete all-1s bit-string */
287
	assert(bs_tab_len < BITS_PER_WORD / 2);
288
	bs_tab[bs_tab_len].bsl = bsl;
289
	bs_tab[bs_tab_len].shift = shift - bsl;	/* .shift is from right */
290
	if (bsl == 1)
291
	  bsl_1_tab = bs_tab_len;
292
	if (bsl > max_bsl)
293
	  max_bsl = bsl;
7 7u83 294
	comment4("mul_const_complex: bs_tab[%d] =%d,%d c=%d", bs_tab_len, bs_tab[bs_tab_len].bsl, bs_tab[bs_tab_len].shift, c);
2 7u83 295
	bs_tab_len++;
296
	bsl = 0;
297
      }
298
    }
299
  }
300
 
301
  comment2("mul_const_complex: max_bsl=%d bsl_1_tab=%d", max_bsl, bsl_1_tab);
302
 
303
  assert(bs_tab_len > 0);	/* shouldn't be here otherwise */
304
  assert(max_bsl >= 1);
305
  assert(max_bsl <= 31);	/* shifts by 32 don't work */
306
 
307
  /* generate the code */
308
  {
309
    int bsl;
310
    int bsl_laststep_tab;
311
    int tmp = GR1;
312
    int accum;
313
    bool accum_init = 0;	/* set to 1 when 'accum' reg initialised */
314
 
315
 
316
    /* allocate regs */
317
    assert(src != GR1);
318
    assert(dest != GR1);
319
 
320
    if (src != dest)
321
      accum = dest;
322
    else
323
      accum = getreg(sp.fixed);
324
 
325
    assert(src != accum);
326
 
327
 
328
    /* +++ neg */
329
 
330
    /* init accum if useful */
331
    if (bsl_1_tab >= 0 && bs_tab[bsl_1_tab].shift != 0)
332
    {
333
 
334
      /*
335
       * Usefully do one of the 1 bit strings with simple shift to accum. If
336
       * left to general algorithm 2 instructions, shift and move/add, would
337
       * often be used.
338
       */
339
      assert(bs_tab[bsl_1_tab].bsl == 1);
340
      rrir_ins(i_shd,c_,src,0,32-bs_tab[bsl_1_tab].shift,accum);
341
      bs_tab[bsl_1_tab].bsl = 0;/* mark as done */
342
      accum_init = 1;
343
    }
344
 
345
 
346
    /* find last cond generation step, so we can move to dest at that step */
347
    bsl_laststep_tab = -1;
348
 
349
    for (bsl = max_bsl; bsl > 0; bsl--)
350
    {
351
      int i;
352
 
353
      for (i = 0; i < bs_tab_len; i++)
354
      {
355
	if (bs_tab[i].bsl == bsl)
356
	  bsl_laststep_tab = i;
357
      }
358
    }
359
 
360
    assert(bsl_laststep_tab != -1);
361
 
362
 
363
    /*
364
     * accumulate handle all bit strings of same length together, so
365
     * 'src*((2**bsl)-1)' can be shared
366
     */
367
    for (bsl = max_bsl; bsl > 0; bsl--)
368
    {
369
      bool found_bsl = 0;
370
      int tmp_shifted=0;
371
      int i;
372
 
373
      for (i = 0; i < bs_tab_len; i++)
374
      {
375
	if (bs_tab[i].bsl == bsl)
376
	{
377
	  int to_accum_reg;	/* reg to be added to 'accum' at end of step */
378
	  int step_accum_dest = (i == bsl_laststep_tab ? dest : accum);
379
 
380
	  assert(accum != R_NO_REG);
381
 
382
	  /* amount to accum into tmp reg */
383
	  if (bsl == 1)
384
	  {
385
	    /* accumulate src<<shift */
386
	    if (bs_tab[i].shift == 0)
387
	    {
388
	       /* simple add */
389
	       to_accum_reg = src;
390
	       if (accum_init)
391
		  rrr_ins(i_add,c_,accum,to_accum_reg,step_accum_dest);
392
	       else
393
	       {
394
		  rr_ins(i_copy,to_accum_reg,step_accum_dest);
395
		  accum_init = 1;
396
	       }
397
	    }
398
	    else
399
	    {
400
	       /* simple shift and add */
401
	       to_accum_reg = tmp;
402
 	       if (accum_init)
403
	       {
404
		  if (bs_tab[i].shift==1)
405
		     rrr_ins(i_sh1add,c_,src,accum,step_accum_dest);
406
		  else if (bs_tab[i].shift==2)
407
		     rrr_ins(i_sh2add,c_,src,accum,step_accum_dest);
408
		  else if (bs_tab[i].shift==3)
409
		     rrr_ins(i_sh3add,c_,src,accum,step_accum_dest);
410
		  else
411
		  {
412
		     rrir_ins(i_shd,c_,src,0,32-bs_tab[i].shift,tmp);
413
		     rrr_ins(i_add,c_,accum,to_accum_reg,step_accum_dest);
414
		  }
415
	       }
416
	       else
417
	       {
418
		  rrir_ins(i_shd,c_,src,0,32-bs_tab[i].shift,step_accum_dest);
419
		  accum_init = 1;
420
	       }
421
	    }
422
	  }
423
	  else
424
	  {			/* bsl != 1 */
425
	    /* accumulate (src*((2**bsl)-1))<<shift */
426
 
427
	    to_accum_reg = tmp;
428
	    if (!found_bsl)
429
	    {
430
	      if (bsl==2)
431
		 rrr_ins(i_sh1add,c_,src,src,tmp);
432
	      else
433
	      {
434
		 rrir_ins(i_shd,c_,src,0,32-bsl,tmp);
435
		 rrr_ins(i_sub,c_,tmp,src,tmp);
436
	      }
437
	      tmp_shifted = 0;
438
	      found_bsl = 1;
439
	    }
440
 
441
	    if (bs_tab[i].shift != tmp_shifted)
442
	    {
443
	      int extra_shift = bs_tab[i].shift - tmp_shifted;
444
 
445
	      assert(extra_shift > 0 && extra_shift <= 31);
446
	      rrir_ins(i_shd,c_,tmp,0,32-extra_shift,tmp);
447
	      tmp_shifted += extra_shift;
448
	    }
449
	    /* else tmp already shifted to correct position */
450
 
451
	    to_accum_reg = tmp;
452
 
453
	    if (accum_init)
454
	       rrr_ins(i_add,c_,accum,to_accum_reg,step_accum_dest);
455
	    else
456
	    {
457
	       rr_ins(i_copy,to_accum_reg,step_accum_dest);
458
	       accum_init=1;
459
	    }
460
 
461
	  }
462
 
463
	  if (i == bsl_laststep_tab)
464
	    accum = R_NO_REG;	/* error check */
465
	}
466
      }
467
    }
468
 
469
    assert(accum_init);
470
    assert(accum == R_NO_REG);
471
 
472
    /* result in dest, due to step_accum_dest above */
473
  }
474
 
475
  comment1("end multiply by %ld", constval);
476
}
477
 
478
/* is constval +ve const 2**n or 2**(n +- X) where abs(X) <= MAX_MUL_POW2_OFFSET */
7 7u83 479
static int offset_mul_const_simple
480
(long constval, bool sgned)
2 7u83 481
{
482
  int i;
483
 
484
  FULLCOMMENT1("offset_mul_const_simple: %ld", constval);
485
 
486
  if (constval < 0)
487
  {
488
    if (sgned)
489
      constval = -constval;
490
    else
491
      return NOT_MUL_CONST_SIMPLE;	/* very rare case */
492
  }
493
 
494
  for (i = 0; i <= MAX_MUL_POW2_OFFSET; i++)
495
  {
496
    long c;			/* power of two close to constval */
497
 
498
    /* check for add offsets, avoiding overflow confusion */
499
    c = constval - i;
500
    if (IS_POW2(c) && c + i == constval)
501
      return i;
502
 
503
    /* check for sub offset of 1 only, avoiding overflow confusion */
504
    if (i == 1)
505
    {
506
      c = constval + i;
507
      if (IS_POW2(c) && c - i == constval)
508
	return -i;
509
    }
510
  }
511
 
512
  return NOT_MUL_CONST_SIMPLE;
513
}
514
 
515
 
516
/* generate code for multiply by constant */
517
static void mul_const_simple
7 7u83 518
(int src, long constval, int dest, bool sgned)
2 7u83 519
{
520
  int shift_const;
521
  long c;			/* power of two close to constval */
522
  int add_sub;			/* difference from power of two: +N add, 0
523
				 * nop, -N sub */
524
 
525
  if (sgned && constval < 0)
526
  {
527
    if (constval == -1)
528
    {
529
      /* X * -1 => -X */
530
      rrr_ins(i_sub,c_,0,src,dest);
531
      return;
532
    }
533
    constval = -constval;
534
    rrr_ins(i_sub,c_,0,src,GR1); /* incorrect to modify source */
535
    src = GR1;
536
  }
537
 
538
  if (constval==1)
539
  {
540
     if (src != dest)
541
	rr_ins(i_copy,src,dest);
542
     return;
543
  }
544
  else if (constval == 2)
545
  {
546
    /* use add, which can be peep-hole optimised to addcc later */
547
    rrr_ins(i_add,c_,src,src,dest);
548
    return;
549
  }
550
 
551
  add_sub = offset_mul_const_simple(constval, sgned);
552
  c = constval - add_sub;
553
 
554
  assert(constval == c + add_sub);
555
 
556
  shift_const = bit_no(c);
557
 
558
  FULLCOMMENT3("mul_const_simple: constval=%#lx shift_const=%d add_sub=%d", constval, shift_const, add_sub);
559
  assert(constval == (1 << shift_const) + add_sub);
560
 
561
  if (add_sub == 0)
562
     rrir_ins(i_shd,c_,src,0,32-shift_const,dest);
563
  else
564
  {
565
    /* add_sub != 0 */
566
    ins_p i_add_sub;
567
    int n;			/* number of add_sub instructions */
568
    int inter_reg;		/* for partial result */
569
    int i;
570
 
571
    if (add_sub > 0)
572
    {
573
      i_add_sub = i_add;
574
      n = add_sub;
575
    }
576
    else
577
    {
578
      i_add_sub = i_sub;
579
      n = -add_sub;
580
    }
581
 
582
    if (src == dest)
583
    {
584
      inter_reg = GR1;	/* must preserve src for add/sub */
585
    }
586
    else
587
    {
588
      inter_reg = dest;
589
    }
590
 
591
    assert(src != inter_reg);
592
 
593
    rrir_ins(i_shd,c_,src,0,32-shift_const,inter_reg);
7 7u83 594
 
2 7u83 595
    if (i_add_sub==i_add)
596
    {
597
       i=1;
598
       while (i<n)
599
       {
600
	  if (i+7<n)
601
	  {
602
	     rrr_ins(i_sh3add,c_,src,inter_reg,inter_reg);
603
	     i+=8;
7 7u83 604
	  }
2 7u83 605
	  else if (i+3<n)
606
	  {
607
	     rrr_ins(i_sh2add,c_,src,inter_reg,inter_reg);
608
	     i+=4;
7 7u83 609
	  }
2 7u83 610
	  else if (i+1<n)
611
	  {
612
	     rrr_ins(i_sh1add,c_,src,inter_reg,inter_reg);
613
	     i+=2;
7 7u83 614
	  }
615
	  else
2 7u83 616
	  {
617
	     rrr_ins(i_add,c_,src,inter_reg,inter_reg);
618
	     i++;
7 7u83 619
	  }
2 7u83 620
       }
621
    }
622
    else
623
       for (i = 1; i < n; i++)
624
	   rrr_ins(i_add_sub,c_,inter_reg,src,inter_reg);
625
 
626
    /* final add_sub to dest reg */
627
    rrr_ins(i_add_sub,c_,inter_reg,src,dest);
628
 
629
  }
630
}
631
 
632
 
633
/* generate code for multiply by constant */
634
static void mul_const
7 7u83 635
(int src, long constval, int dest, space sp, bool sgned)
2 7u83 636
{
637
  if (constval == 0)
638
     /* rare case not handled by mul_const_X() */
639
     rr_ins(i_copy,0,dest);
640
  else if (offset_mul_const_simple(constval, sgned) == NOT_MUL_CONST_SIMPLE)
641
    mul_const_complex(src, constval, dest, sp, sgned);
642
  else
643
    mul_const_simple(src, constval, dest, sgned);
644
}
645
 
646
 
647
/*
648
 *   Generate code for multiply .
649
 */
7 7u83 650
static int do_mul_comm
651
(exp e, space sp, int final_reg, bool sgned)
2 7u83 652
{
653
  exp seq = son(e);
654
  exp arg2 = bro(seq);
655
  char *mul_proc;
656
  int arg = 1;
657
  baseoff b;
658
  int v;
659
 
660
  if (name(arg2) == val_tag)
661
  {
662
    /* const optim */
663
    v = reg_operand(seq, sp);
664
    sp = guardreg(v,sp);
665
    assert(last(arg2));	 /* check() & scan() should move const to last */
666
    if (final_reg == R_NO_REG)
667
    {
668
       final_reg = getreg(sp.fixed);
669
       sp = guardreg(final_reg, sp);
670
    }
671
    mul_const(v, no(arg2), final_reg, sp, sgned);
672
    return final_reg;
673
  }
674
 
675
 
676
  /* need to call .mul/.umul */
677
 
678
  mul_proc = (sgned ? "$$mulI" : "$$mulU");
679
 
680
  b=mem_temp(4);
681
  reg_operand_here(seq,sp,ARG0);
682
  st_ins(i_sw,ARG0,b);
683
  b=mem_temp(0);
684
 
685
 
686
  for (;;)
687
  {
688
    assert(!last(seq));		/* should have break out below by now */
689
 
690
    seq = bro(seq);
691
    arg++;
692
 
693
    FULLCOMMENT1("do_mul_comm: name(seq) = %d", name(seq));
694
 
7 7u83 695
    if (name(seq) == val_tag && offset_mul_const_simple(no(seq), sgned)!= NOT_MUL_CONST_SIMPLE)
2 7u83 696
    {
697
      /* const optim */
698
      assert(last(seq)); /* check() & scan() should move const to last */
699
 
700
      stf_ins(i_fstw,14,b);
701
      ld_ins(i_lw,SIGNED,b,ARG0);
702
 
703
      if (final_reg == R_NO_REG)
7 7u83 704
	 final_reg = RET0;
2 7u83 705
	 /* better code from mul_const if src != dest register */
706
 
707
      mul_const(ARG0, no(seq), final_reg, sp, sgned);
708
 
709
      break;
710
    }
711
    else
712
    {
713
       reg_operand_here(seq,sp,ARG0);
714
       if (last(seq) && b.offset<-17)
715
       {
716
	  ld_ins(i_lo,1,b,GR1);
717
	  b.base=GR1;
718
	  b.offset=0;
719
       }
720
       st_ins(i_sw,ARG0,b);
721
       if (arg==2)
722
	  ldf_ins(i_fldd,b,13);
723
       else
724
	  ldf_ins(i_fldw,b,12);
725
       rrrf_ins(i_xmpyu,f_,12,14,13);
7 7u83 726
 
2 7u83 727
       clear_t_regs();
728
 
729
       if (last(seq))
730
       {
731
	  stf_ins(i_fstw,14,b);
732
	  if (final_reg == R_NO_REG || final_reg == RET0)
733
	  {
734
	     ld_ins(i_lw,SIGNED,b,RET0);
7 7u83 735
 	     final_reg = RET0;
2 7u83 736
	  }
737
	  else
738
	     ld_ins(i_lw,SIGNED,b,final_reg);
739
	  break;
740
       }
741
    }
742
  }
743
  return final_reg;
744
}
745
 
746
 
747
/*
748
 *   Generate code for div0, div1 and div2 by calling divI or divU unless
749
 *   dividing by a simple constant.
750
 */
7 7u83 751
static int do_div
752
(exp e, space sp, int final_reg, bool sgned)
2 7u83 753
{
754
   exp seq = son(e);
755
   exp lhs = seq;
756
   exp rhs = bro(lhs);
757
   space nsp;
758
   int trap = 0;
759
   int sz = shape_size(sh(e));
760
   char *stub="ARGW0=GR ARGW1=GR";
7 7u83 761
   if (!optop(e))
2 7u83 762
      trap = trap_label(e);
763
   assert(last(rhs));
764
   /*
765
    *   ov_err can only occur when calculating p div1 q with p == variety's
7 7u83 766
    *   minimum and q==-1
2 7u83 767
    */
7 7u83 768
   if (name(rhs) ==val_tag)
2 7u83 769
   {
770
      /*   nb. div_by_zero_err handled by common code  */
771
      int n = no(rhs);
7 7u83 772
      if (n > 0 && IS_POW2(n))
2 7u83 773
      {
774
	 int lhs_reg = reg_operand(lhs, sp);
775
	 int shift_const = bit_no(n);
776
	 sp = guardreg(lhs_reg, sp);
7 7u83 777
	 if (final_reg == R_NO_REG)
2 7u83 778
	 {
779
	   final_reg = getreg(sp.fixed);
780
	 }
7 7u83 781
	 if (n==1)
2 7u83 782
	 {
783
	    /*
784
	     *   div = lhs
785
	     */
786
 	    rr_ins(i_copy,lhs_reg,final_reg);
787
	    return final_reg;
788
	 }
7 7u83 789
	 else
2 7u83 790
	 if (sgned)
791
	 {
792
	   /* signed, adjust lhs before shift */
793
 	   assert(shift_const > 0);/* assumed below */
7 7u83 794
	   if (n==-1 && !optop(e))
2 7u83 795
	   {
796
	      if (sz==8)
797
		 iiir_ins(i_zdepi,c_,-1,24,25,GR1);
798
	      else
799
	      if (sz==16)
800
		 iiir_ins(i_zdepi,c_,-1,16,17,GR1);
801
	      else
802
		 iiir_ins(i_zdepi,c_,-1,0,1,GR1);
803
	      cj_ins(c_eq,lhs_reg,GR1,trap);
804
	   }
805
	   if (shift_const - 1 != 0)
806
	   {
807
	      riir_ins(i_extrs,c_,lhs_reg,32-shift_const,33-shift_const,GR1);
808
	      rrir_ins(i_shd,c_,0,GR1,32-shift_const,GR1);
809
	   }
810
	   else
811
	      rrir_ins(i_shd,c_,0,lhs_reg,32-shift_const,GR1);
812
 	   rrr_ins(i_add,c_,lhs_reg,GR1,GR1);
813
	   riir_ins(i_extrs,c_,GR1,31-shift_const,32-shift_const,final_reg);
814
	 }
815
	 else
816
	    rrir_ins(i_shd,c_,0,lhs_reg,shift_const,final_reg);
817
	 return final_reg;
818
      }
819
   }
820
 
821
   /*  We will have to call divI or divU */
822
 
823
   reg_operand_here(lhs, sp, ARG0);
824
   nsp = guardreg(ARG0,sp);
825
   reg_operand_here(rhs,nsp,ARG1);
826
 
827
 
7 7u83 828
   if (!optop(e))
2 7u83 829
   {
830
      cj_ins(c_eq,GR0,ARG1,trap);
831
      if (sgned)
832
      {
7 7u83 833
	 comib_ins(c_neq,-1,ARG1,-16);
2 7u83 834
	 z_ins(i_nop);
835
	 if (sz==8)
836
	    iiir_ins(i_zdepi,c_,-1,24,25,GR1);
837
	 else
838
	 if (sz==16)
839
	    iiir_ins(i_zdepi,c_,-1,16,17,GR1);
840
	 else
841
	    iiir_ins(i_zdepi,c_,-1,0,1,GR1);
842
	 cj_ins(c_eq,ARG0,GR1,trap);
843
      }
844
   }
845
 
846
   if (name(bro(rhs)) == div1_tag && sgned)
847
   {
848
      int fin = new_label();
849
      baseoff b;
850
      b = mem_temp(0);
851
      rrr_ins(i_or,c_neq,0,ARG0,RET1);
852
      ub_ins(cmplt_N,fin);
853
      st_ins(i_sw,ARG0,b);
854
      b.offset += 4;
855
      st_ins(i_sw,ARG1,b);
856
      call_millicode(MILLI_DIVI,RP,stub,1);
857
      ld_ins(i_lw,1,b,ARG1);
858
      b.offset -= 4;
859
      ld_ins(i_lw,1,b,ARG0);
860
      rrr_ins(i_xor,c_l,ARG0,ARG1,0);
861
      ub_ins(cmplt_N,fin);
862
      ld_ir_ins(i_ldo,cmplt_,fs_,empty_ltrl,b.offset,b.base,GR1);
863
      b.base = GR1; b.offset = 0;
864
      st_ins(i_sw,RET1,b);
865
      ldf_ins(i_fldd,b,13);
866
      rrrf_ins(i_xmpyu,f_,12,14,13);
867
      stf_ins(i_fstw,14,b);
868
      ld_ins(i_lw,1,b,ARG1);
869
      rrr_ins(i_comclr,c_eq,ARG0,ARG1,0);
870
      irr_ins(i_addi,c_,fs_,-1,RET1,RET1);
871
      outlab("L$$",fin);
872
   }
873
   else
874
   {
875
      call_millicode(sgned ? MILLI_DIVI : MILLI_DIVU,RP,stub,1);
876
   }
877
   clear_t_regs();
878
   /* result left in RET1 */
879
   return RET1;
880
}
881
 
882
 
883
/*
884
 *   Generate code for remainder using remI or remU unless  simple constant.
885
 */
7 7u83 886
static int do_rem
887
(exp e, space sp, int final_reg, bool sgned)
2 7u83 888
{
889
   exp seq = son(e);
890
   exp lhs = seq;
891
   exp rhs = bro(lhs);
892
   int p=0;
893
   space nsp;
894
   int trap = 0;
895
   baseoff b;
896
   char *stub="ARGW0=GR ARGW1=GR";
897
   assert(last(rhs));
898
   b = mem_temp(0);
7 7u83 899
   if (!optop(e))
2 7u83 900
      trap = trap_label(e);
901
   if (name(rhs) == val_tag)
902
   {
903
      int n = no(rhs);
7 7u83 904
      if (n==0)
2 7u83 905
      {
7 7u83 906
	 if (!optop(e))
907
	    ub_ins(cmplt_N,trap);
2 7u83 908
	 return GR0;
909
      }
7 7u83 910
      else
911
      if (IS_POW2(n))
2 7u83 912
      {
913
	 int lhs_reg = reg_operand(lhs, sp);
914
	 sp = guardreg(lhs_reg, sp);
915
	 if (final_reg == R_NO_REG)
916
	 {
917
	    final_reg = getreg(sp.fixed);
918
	 }
919
	 if (n == 1)
920
	 {
921
	    /*
922
	     *   rem = 0
923
	     */
924
 	    rr_ins(i_copy,0,final_reg);
925
	    return final_reg;
926
	 }
7 7u83 927
	 while (((1<< (++p)) & n) ==0);
2 7u83 928
	 if (sgned && name(bro(rhs)) == rem2_tag)
929
	 {
930
	    /*
931
	     *   Allow for negative lhs. Calculate lhs % n ( = 2**p ) by
932
	     *   anding lhs with mask, negating lhs before and after anding
933
	     *   if lhs<0.
934
	     */
935
	    if (lhs_reg==final_reg)
936
	    {
937
	       rrr_ins(i_or,c_g,0,lhs_reg,GR1);
938
	       rrr_ins(i_sub,c_,0,lhs_reg,lhs_reg);
939
	       riir_ins(i_dep,c_,0,31-p,32-p,lhs_reg);
940
	       rrr_ins(i_or,c_g,0,GR1,0);
941
	       rrr_ins(i_sub,c_,0,lhs_reg,lhs_reg);
942
	    }
943
	    else
944
	    {
945
	       rrr_ins(i_or,c_g,0,lhs_reg,final_reg);
946
	       rrr_ins(i_sub,c_,0,final_reg,final_reg);
947
	       riir_ins(i_dep,c_,0,31-p,32-p,final_reg);
948
	       rrr_ins(i_or,c_g,0,lhs_reg,0);
949
	       rrr_ins(i_sub,c_,0,final_reg,final_reg);
950
	    }
951
	 }
952
	 else
953
	 {
954
	    /*
955
	     *   Calculate lhs % n ( = 2**p ) by anding with mask.
956
	     */
7 7u83 957
	    if (lhs_reg==final_reg)
2 7u83 958
	       riir_ins(i_dep,c_,0,31-p,32-p,final_reg);
959
	    else
960
	       riir_ins(i_zdep,c_,lhs_reg,31,p,final_reg);
961
	 }
962
	 return final_reg;
963
      }
964
      else
965
      {
966
	 /*
967
	  *   Need to call remI or remU.
968
	  */
969
	 reg_operand_here(lhs,sp,ARG0);
970
	 imm_to_r(n,ARG1);
971
	 if (sgned)
972
	 {
973
	    call_millicode(MILLI_REMI,RP,stub,1);
974
	    if (name(bro(rhs)) == mod_tag)
975
	    {
976
	       if (SIMM14(n))
977
	       {
978
		  if (n>0)
979
		     rrr_ins(i_comclr,c_geq,RET1,0,0);
980
		  else
981
		  if (n<0)
982
		     rrr_ins(i_comclr,c_leq,RET1,0,0);
983
		  ld_ir_ins(i_ldo,cmplt_,fs_,empty_ltrl,n,RET1,RET1);
984
	       }
7 7u83 985
	       else
2 7u83 986
	       {
987
		  imm_to_r(n,ARG1);
988
		  if (n>0)
989
		     rrr_ins(i_comclr,c_geq,RET1,0,0);
990
		  else
991
		  if (n<0)
992
		     rrr_ins(i_comclr,c_leq,RET1,0,0);
993
		  rrr_ins(i_add,c_,ARG1,RET1,RET1);
994
	       }
995
	    }
996
	 }
997
	 else
998
	 {
999
	    call_millicode(MILLI_REMU,RP,stub,1);
1000
	 }
1001
	 clear_t_regs();
1002
	 return RET1;  /* result left in RET1 */
1003
      }
1004
   }
1005
   /*
1006
    *   Need to call remI/.urem
1007
    */
1008
   reg_operand_here(lhs, sp, ARG0);
1009
   nsp = guardreg(ARG0, sp);
1010
   reg_operand_here(rhs, nsp, ARG1);
7 7u83 1011
   if (!optop(e))
2 7u83 1012
      cj_ins(c_eq,GR0,ARG1,trap);
1013
 
1014
   if (name(bro(rhs)) == mod_tag && sgned)
1015
   {
1016
      st_ins(i_sw,ARG1,b);
1017
      call_millicode(MILLI_REMI,RP,stub,1);
1018
      rrr_ins(i_comclr,c_eq,RET1,0,ARG1);
1019
      ld_ins(i_lw,1,b,ARG1);
1020
      rrr_ins(i_xor,c_geq,RET1,ARG1,0);
1021
      rrr_ins(i_add,c_,RET1,ARG1,RET1);
1022
   }
1023
   else
1024
   {
7 7u83 1025
      call_millicode(sgned ? MILLI_REMI : MILLI_REMU, RP, stub,1);
2 7u83 1026
   }
1027
   clear_t_regs();
1028
   return RET1;  /* result left in RET1 */
1029
}
1030
 
1031
 
7 7u83 1032
typedef int(*find_fn)(exp, space, int, bool);
2 7u83 1033
 
1034
/* choose regs and generate code using do_fn */
1035
static int find_reg_and_apply
7 7u83 1036
(exp e, space sp, where dest, bool sgned, find_fn do_fn)
2 7u83 1037
{
1038
  ans a;
1039
  int dest_reg;
1040
 
1041
  /* +++ mips has tidyshort(dest, sh(e)); check not needed on HPPA */
1042
 
7 7u83 1043
  switch (discrim(dest.answhere))
2 7u83 1044
  {
1045
  case inreg:
7 7u83 1046
    dest_reg = (*do_fn)(e, sp, regalt(dest.answhere), sgned);
2 7u83 1047
    break;
1048
 
1049
  case insomereg:
1050
    {
1051
      int *dr = someregalt(dest.answhere);
1052
 
1053
      *dr = (*do_fn) (e, sp, R_NO_REG, sgned);	/* leave (*do_fn)() to
1054
							 * allocate reg */
1055
      return *dr;		/* no need for move */
1056
    }
1057
 
1058
  default:
1059
    dest_reg = (*do_fn) (e, sp, R_NO_REG, sgned);	/* leave (*do_fn)() to
1060
							 * allocate reg */
1061
  }
1062
 
1063
  assert(dest_reg != R_NO_REG);
1064
 
1065
  setregalt(a, dest_reg);
1066
  sp = guardreg(dest_reg, sp);
1067
  move(a, dest, sp.fixed, sgned);
1068
 
1069
  return dest_reg;
1070
}
1071
 
1072
 
1073
 
1074
/* choose regs and generate code for multiply using multiply proc */
7 7u83 1075
int do_mul_comm_op
1076
(exp e, space sp, where dest, bool sgned)
2 7u83 1077
{
1078
   return find_reg_and_apply(e, sp, dest, sgned, do_mul_comm);
1079
}
1080
 
1081
 
1082
/* choose regs and generate code for divide using divide proc */
7 7u83 1083
int do_div_op
1084
(exp e, space sp, where dest, bool sgned)
2 7u83 1085
{
1086
  return find_reg_and_apply(e, sp, dest, sgned, do_div);
1087
}
1088
 
1089
 
1090
/* choose regs and generate code for rem using rem proc */
7 7u83 1091
int do_rem_op
1092
(exp e, space sp, where dest, bool sgned)
2 7u83 1093
{
1094
  return find_reg_and_apply(e, sp, dest, sgned, do_rem);
1095
}
1096
 
1097
 
1098
 
1099
/* is exp mul, div or rem that may call */
7 7u83 1100
bool is_muldivrem_call
1101
(exp e)
2 7u83 1102
{
1103
 
1104
  switch (name(e))
1105
  {
1106
 
1107
#if use_long_double
1108
     case test_tag:
1109
     case chfl_tag:
1110
     case round_tag:
1111
     {
1112
	 exp s = son(e);
7 7u83 1113
	 if (name(sh(s)) ==doublehd)
1114
	    return(1);
2 7u83 1115
	 /* FALL THROUGH */
7 7u83 1116
     }
2 7u83 1117
 
1118
     case fplus_tag:
1119
     case fminus_tag:
1120
     case fmult_tag:
1121
     case fdiv_tag:
1122
     case fneg_tag:
1123
     case fabs_tag:
1124
     case float_tag:
1125
     {
7 7u83 1126
	if (name(sh(e)) ==doublehd)
2 7u83 1127
	   return(1);
1128
	else
1129
	   return(0);
1130
     }
1131
#endif
1132
#if 0
1133
   case chvar_tag:
1134
     e = son(e);		/* fall through, look at arg */
1135
#endif
1136
      case mult_tag:
1137
      case offset_mult_tag:
1138
      {
1139
	/*multneeds - simple cases don't need a call */
1140
	exp arg2 = bro(son(e));
1141
	if (last(arg2) && name(arg2) == val_tag)
1142
	{
1143
	  return 0;
1144
	}
1145
	return 1;
7 7u83 1146
      }
2 7u83 1147
 
1148
    case div0_tag:
1149
    case rem0_tag:
1150
    case div1_tag:
1151
    case div2_tag:
1152
    case mod_tag:
1153
    case rem2_tag:
1154
    case offset_div_tag:
7 7u83 1155
    case offset_div_by_int_tag:
2 7u83 1156
      {
1157
	/*remneeds, divneeds - simple cases don't need a call */
1158
	exp arg2 = bro(son(e));
1159
 
1160
	if (last(arg2) && name(arg2) == val_tag)
1161
	{
1162
	  long constval = no(arg2);
1163
	  if (constval > 0 && IS_POW2(constval))
1164
	  {
1165
	    return 0;
1166
	  }
1167
	}
1168
	return 1;
1169
      }
1170
  default:
1171
      return 0;
1172
    }
1173
}
1174
 
1175
 
1176
/*
1177
 * Needs estimation
1178
 */
1179
 
1180
 
7 7u83 1181
needs multneeds
1182
(exp * e, exp ** at)
2 7u83 1183
{
7 7u83 1184
  needs n;
2 7u83 1185
  exp arg1 = son(*(e));
1186
  exp arg2 = bro(arg1);
1187
  n = likeplus(e, at);	/* has had comm_ass() treatment */
1188
 
1189
  /* remember that mult may have more than two args after optimisation */
1190
 
1191
  if (last(arg2) && name(arg2) == val_tag)
1192
  {
1193
 
1194
    /*
1195
     * const optim, additional reg only needed where src and dest are same
1196
     * reg, in which case it has already been allowed for.
1197
     */
1198
    return n;
1199
  }
1200
 
1201
  /* default, call .mul */
1202
  n.fixneeds = maxfix;
1203
#if 1
1204
  n.propsneeds |= hasproccall;
1205
#endif
1206
  return n;
1207
}
1208
 
1209
 
7 7u83 1210
needs divneeds
1211
(exp * e, exp ** at)
2 7u83 1212
{
7 7u83 1213
  needs n;
2 7u83 1214
  exp lhs = son(*(e));
1215
  exp rhs = bro(lhs);
1216
  n = likediv(e, at);
1217
 
1218
  assert(last(rhs));
1219
 
1220
  if (name(rhs) == val_tag)
1221
  {
1222
    long constval = no(rhs);
1223
 
1224
    if (constval > 0 && IS_POW2(constval))
1225
    {
1226
      /* const optim, replace div by positive, non-zero, 2**n by shift right */
1227
 
1228
      return n;
1229
    }
1230
  }
1231
 
1232
  /* default, call .div */
1233
  n.fixneeds = maxfix;
1234
  n.propsneeds |= hasproccall;
1235
 
1236
  return n;
1237
}
1238
 
1239
 
7 7u83 1240
needs remneeds
1241
(exp * e, exp ** at)
2 7u83 1242
{
7 7u83 1243
  needs n;
2 7u83 1244
  exp lhs = son(*(e));
1245
  exp rhs = bro(lhs);
1246
  n = likediv(e, at);
1247
 
1248
  assert(last(rhs));
1249
  if (name(rhs) == val_tag)
1250
  {
1251
    long constval = no(rhs);
1252
 
1253
    if (constval > 0 && IS_POW2(constval))
1254
    {
1255
      /* const optim of rem by positive, non-zero, 2**n */
1256
 
1257
      return n;
1258
    }
1259
  }
1260
  /* default, call .rem */
1261
  n.fixneeds = maxfix;
1262
  n.propsneeds |= hasproccall;
1263
  return n;
1264
}
1265
 
1266
 
1267
 
1268
 
1269
 
1270
 
1271
 
1272
 
1273
 
1274
 
1275