2 |
- |
1 |
#define BDNZ BC 16,0,
|
|
|
2 |
|
|
|
3 |
/*
|
|
|
4 |
* 64/64 division adapted from powerpc compiler writer's handbook
|
|
|
5 |
*
|
|
|
6 |
* (R3:R4) = (R3:R4) / (R5:R6) (64b) = (64b / 64b)
|
|
|
7 |
* quo dvd dvs
|
|
|
8 |
*
|
|
|
9 |
* Remainder is left in R7:R8
|
|
|
10 |
*
|
|
|
11 |
* Code comment notation:
|
|
|
12 |
* msw = most-significant (high-order) word, i.e. bits 0..31
|
|
|
13 |
* lsw = least-significant (low-order) word, i.e. bits 32..63
|
|
|
14 |
* LZ = Leading Zeroes
|
|
|
15 |
* SD = Significant Digits
|
|
|
16 |
*
|
|
|
17 |
* R3:R4 = dvd (input dividend); quo (output quotient)
|
|
|
18 |
* R5:R6 = dvs (input divisor)
|
|
|
19 |
*
|
|
|
20 |
* R7:R8 = tmp; rem (output remainder)
|
|
|
21 |
*/
|
|
|
22 |
|
|
|
23 |
TEXT _divu64(SB), $0
|
|
|
24 |
MOVW a+0(FP), R3
|
|
|
25 |
MOVW a+4(FP), R4
|
|
|
26 |
MOVW b+8(FP), R5
|
|
|
27 |
MOVW b+12(FP), R6
|
|
|
28 |
|
|
|
29 |
/* count the number of leading 0s in the dividend */
|
|
|
30 |
CMP R3, $0 /* dvd.msw == 0? R3, */
|
|
|
31 |
CNTLZW R3, R11 /* R11 = dvd.msw.LZ */
|
|
|
32 |
CNTLZW R4, R9 /* R9 = dvd.lsw.LZ */
|
|
|
33 |
BNE lab1 /* if(dvd.msw != 0) dvd.LZ = dvd.msw.LZ */
|
|
|
34 |
ADD $32, R9, R11 /* dvd.LZ = dvd.lsw.LZ + 32 */
|
|
|
35 |
|
|
|
36 |
lab1:
|
|
|
37 |
/* count the number of leading 0s in the divisor */
|
|
|
38 |
CMP R5, $0 /* dvd.msw == 0? */
|
|
|
39 |
CNTLZW R5, R9 /* R9 = dvs.msw.LZ */
|
|
|
40 |
CNTLZW R6, R10 /* R10 = dvs.lsw.LZ */
|
|
|
41 |
BNE lab2 /* if(dvs.msw != 0) dvs.LZ = dvs.msw.LZ */
|
|
|
42 |
ADD $32, R10, R9 /* dvs.LZ = dvs.lsw.LZ + 32 */
|
|
|
43 |
|
|
|
44 |
lab2:
|
|
|
45 |
/* determine shift amounts to minimize the number of iterations */
|
|
|
46 |
CMP R11, R9 /* compare dvd.LZ to dvs.LZ */
|
|
|
47 |
SUBC R11, $64, R10 /* R10 = dvd.SD */
|
|
|
48 |
BGT lab9 /* if(dvs > dvd) quotient = 0 */
|
|
|
49 |
ADD $1, R9 /* ++dvs.LZ (or --dvs.SD) */
|
|
|
50 |
SUBC R9, $64, R9 /* R9 = dvs.SD */
|
|
|
51 |
ADD R9, R11 /* (dvd.LZ + dvs.SD) = left shift of dvd for */
|
|
|
52 |
/* initial dvd */
|
|
|
53 |
SUB R9, R10, R9 /* (dvd.SD - dvs.SD) = right shift of dvd for */
|
|
|
54 |
/* initial tmp */
|
|
|
55 |
MOVW R9, CTR /* number of iterations = dvd.SD - dvs.SD */
|
|
|
56 |
|
|
|
57 |
/* R7:R8 = R3:R4 >> R9 */
|
|
|
58 |
CMP R9, $32
|
|
|
59 |
ADD $-32, R9, R7
|
|
|
60 |
BLT lab3 /* if(R9 < 32) jump to lab3 */
|
|
|
61 |
SRW R7, R3, R8 /* tmp.lsw = dvd.msw >> (R9 - 32) */
|
|
|
62 |
MOVW $0, R7 /* tmp.msw = 0 */
|
|
|
63 |
BR lab4
|
|
|
64 |
lab3:
|
|
|
65 |
SRW R9, R4, R8 /* R8 = dvd.lsw >> R9 */
|
|
|
66 |
SUBC R9, $32, R7
|
|
|
67 |
SLW R7, R3, R7 /* R7 = dvd.msw << 32 - R9 */
|
|
|
68 |
OR R7, R8 /* tmp.lsw = R8 | R7 */
|
|
|
69 |
SRW R9, R3, R7 /* tmp.msw = dvd.msw >> R9 */
|
|
|
70 |
|
|
|
71 |
lab4:
|
|
|
72 |
/* R3:R4 = R3:R4 << R11 */
|
|
|
73 |
CMP R11,$32
|
|
|
74 |
ADDC $-32, R11, R9
|
|
|
75 |
BLT lab5 /* (R11 < 32)? */
|
|
|
76 |
SLW R9, R4, R3 /* dvd.msw = dvs.lsw << R9 */
|
|
|
77 |
MOVW $0, R4 /* dvd.lsw = 0 */
|
|
|
78 |
BR lab6
|
|
|
79 |
|
|
|
80 |
lab5:
|
|
|
81 |
SLW R11, R3 /* R3 = dvd.msw << R11 */
|
|
|
82 |
SUBC R11, $32, R9
|
|
|
83 |
SRW R9, R4, R9 /* R9 = dvd.lsw >> 32 - R11 */
|
|
|
84 |
OR R9, R3 /* dvd.msw = R3 | R9 */
|
|
|
85 |
SLW R11, R4 /* dvd.lsw = dvd.lsw << R11 */
|
|
|
86 |
|
|
|
87 |
lab6:
|
|
|
88 |
/* restoring division shift and subtract loop */
|
|
|
89 |
MOVW $-1, R10
|
|
|
90 |
ADDC $0, R7 /* clear carry bit before loop starts */
|
|
|
91 |
lab7:
|
|
|
92 |
/* tmp:dvd is considered one large register */
|
|
|
93 |
/* each portion is shifted left 1 bit by adding it to itself */
|
|
|
94 |
/* adde sums the carry from the previous and creates a new carry */
|
|
|
95 |
ADDE R4,R4 /* shift dvd.lsw left 1 bit */
|
|
|
96 |
ADDE R3,R3 /* shift dvd.msw to left 1 bit */
|
|
|
97 |
ADDE R8,R8 /* shift tmp.lsw to left 1 bit */
|
|
|
98 |
ADDE R7,R7 /* shift tmp.msw to left 1 bit */
|
|
|
99 |
SUBC R6, R8, R11 /* tmp.lsw - dvs.lsw */
|
|
|
100 |
SUBECC R5, R7, R9 /* tmp.msw - dvs.msw */
|
|
|
101 |
BLT lab8 /* if(result < 0) clear carry bit */
|
|
|
102 |
MOVW R11, R8 /* move lsw */
|
|
|
103 |
MOVW R9, R7 /* move msw */
|
|
|
104 |
ADDC $1, R10, R11 /* set carry bit */
|
|
|
105 |
lab8:
|
|
|
106 |
BDNZ lab7
|
|
|
107 |
|
|
|
108 |
ADDE R4,R4 /* quo.lsw (lsb = CA) */
|
|
|
109 |
ADDE R3,R3 /* quo.msw (lsb from lsw) */
|
|
|
110 |
|
|
|
111 |
lab10:
|
|
|
112 |
MOVW qp+16(FP), R9
|
|
|
113 |
MOVW rp+20(FP), R10
|
|
|
114 |
CMP R9, $0
|
|
|
115 |
BEQ lab11
|
|
|
116 |
MOVW R3, 0(R9)
|
|
|
117 |
MOVW R4, 4(R9)
|
|
|
118 |
lab11:
|
|
|
119 |
CMP R10, $0
|
|
|
120 |
BEQ lab12
|
|
|
121 |
MOVW R7, 0(R10)
|
|
|
122 |
MOVW R8, 4(R10)
|
|
|
123 |
lab12:
|
|
|
124 |
RETURN
|
|
|
125 |
|
|
|
126 |
lab9:
|
|
|
127 |
/* Quotient is 0 (dvs > dvd) */
|
|
|
128 |
MOVW R4, R8 /* rmd.lsw = dvd.lsw */
|
|
|
129 |
MOVW R3, R7 /* rmd.msw = dvd.msw */
|
|
|
130 |
MOVW $0, R4 /* dvd.lsw = 0 */
|
|
|
131 |
MOVW $0, R3 /* dvd.msw = 0 */
|
|
|
132 |
BR lab10
|