Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
TS = 0
2
TE = 1
3
FROM = 2
4
N = 3
5
TMP = 3					/* N and TMP don't overlap */
6
TMP1 = 4
7
 
8
TEXT memcpy(SB), $0
9
	B	_memmove
10
TEXT memmove(SB), $0
11
_memmove:
12
	MOVW	R(TS), to+0(FP)		/* need to save for return value */
13
	MOVW	from+4(FP), R(FROM)
14
	MOVW	n+8(FP), R(N)
15
 
16
	ADD	R(N), R(TS), R(TE)	/* to end pointer */
17
 
18
	CMP	R(FROM), R(TS)
19
	BLS	_forward
20
 
21
_back:
22
	ADD	R(N), R(FROM)		/* from end pointer */
23
	CMP	$4, R(N)		/* need at least 4 bytes to copy */
24
	BLT	_b1tail
25
 
26
_b4align:				/* align destination on 4 */
27
	AND.S	$3, R(TE), R(TMP)
28
	BEQ	_b4aligned
29
 
30
	MOVBU.W	-1(R(FROM)), R(TMP)	/* pre-indexed */
31
	MOVBU.W	R(TMP), -1(R(TE))	/* pre-indexed */
32
	B	_b4align
33
 
34
_b4aligned:				/* is source now aligned? */
35
	AND.S	$3, R(FROM), R(TMP)
36
	BNE	_bunaligned
37
 
38
	ADD	$31, R(TS), R(TMP)	/* do 32-byte chunks if possible */
39
_b32loop:
40
	CMP	R(TMP), R(TE)
41
	BLS	_b4tail
42
 
43
	MOVM.DB.W (R(FROM)), [R4-R7]
44
	MOVM.DB.W [R4-R7], (R(TE))
45
	MOVM.DB.W (R(FROM)), [R4-R7]
46
	MOVM.DB.W [R4-R7], (R(TE))
47
	B	_b32loop
48
 
49
_b4tail:				/* do remaining words if possible */
50
	ADD	$3, R(TS), R(TMP)
51
_b4loop:
52
	CMP	R(TMP), R(TE)
53
	BLS	_b1tail
54
 
55
	MOVW.W	-4(R(FROM)), R(TMP1)	/* pre-indexed */
56
	MOVW.W	R(TMP1), -4(R(TE))	/* pre-indexed */
57
	B	_b4loop
58
 
59
_b1tail:				/* remaining bytes */
60
	CMP	R(TE), R(TS)
61
	BEQ	_return
62
 
63
	MOVBU.W	-1(R(FROM)), R(TMP)	/* pre-indexed */
64
	MOVBU.W	R(TMP), -1(R(TE))	/* pre-indexed */
65
	B	_b1tail
66
 
67
_forward:
68
	CMP	$4, R(N)		/* need at least 4 bytes to copy */
69
	BLT	_f1tail
70
 
71
_f4align:				/* align destination on 4 */
72
	AND.S	$3, R(TS), R(TMP)
73
	BEQ	_f4aligned
74
 
75
	MOVBU.P	1(R(FROM)), R(TMP)	/* implicit write back */
76
	MOVBU.P	R(TMP), 1(R(TS))	/* implicit write back */
77
	B	_f4align
78
 
79
_f4aligned:				/* is source now aligned? */
80
	AND.S	$3, R(FROM), R(TMP)
81
	BNE	_funaligned
82
 
83
	SUB	$31, R(TE), R(TMP)	/* do 32-byte chunks if possible */
84
_f32loop:
85
	CMP	R(TMP), R(TS)
86
	BHS	_f4tail
87
 
88
	MOVM.IA.W (R(FROM)), [R4-R7] 
89
	MOVM.IA.W [R4-R7], (R(TS))
90
	MOVM.IA.W (R(FROM)), [R4-R7] 
91
	MOVM.IA.W [R4-R7], (R(TS))
92
	B	_f32loop
93
 
94
_f4tail:
95
	SUB	$3, R(TE), R(TMP)	/* do remaining words if possible */
96
_f4loop:
97
	CMP	R(TMP), R(TS)
98
	BHS	_f1tail
99
 
100
	MOVW.P	4(R(FROM)), R(TMP1)	/* implicit write back */
101
	MOVW.P	R4, 4(R(TS))		/* implicit write back */
102
	B	_f4loop
103
 
104
_f1tail:
105
	CMP	R(TS), R(TE)
106
	BEQ	_return
107
 
108
	MOVBU.P	1(R(FROM)), R(TMP)	/* implicit write back */
109
	MOVBU.P	R(TMP), 1(R(TS))	/* implicit write back */
110
	B	_f1tail
111
 
112
_return:
113
	MOVW	to+0(FP), R0
114
	RET
115
 
116
RSHIFT = 4
117
LSHIFT = 5
118
OFFSET = 11
119
 
120
BR0 = 6
121
BW0 = 7
122
BR1 = 7
123
BW1 = 8
124
 
125
_bunaligned:
126
	CMP	$2, R(TMP)		/* is R(TMP) < 2 ? */
127
 
128
	MOVW.LT	$8, R(RSHIFT)		/* (R(n)<<24)|(R(n-1)>>8) */
129
	MOVW.LT	$24, R(LSHIFT)
130
	MOVW.LT	$1, R(OFFSET)
131
 
132
	MOVW.EQ	$16, R(RSHIFT)		/* (R(n)<<16)|(R(n-1)>>16) */
133
	MOVW.EQ	$16, R(LSHIFT)
134
	MOVW.EQ	$2, R(OFFSET)
135
 
136
	MOVW.GT	$24, R(RSHIFT)		/* (R(n)<<8)|(R(n-1)>>24) */
137
	MOVW.GT	$8, R(LSHIFT)
138
	MOVW.GT	$3, R(OFFSET)
139
 
140
	ADD	$8, R(TS), R(TMP)	/* do 8-byte chunks if possible */
141
	CMP	R(TMP), R(TE)
142
	BLS	_b1tail
143
 
144
	BIC	$3, R(FROM)		/* align source */
145
	MOVW	(R(FROM)), R(BR0)	/* prime first block register */
146
 
147
_bu8loop:
148
	CMP	R(TMP), R(TE)
149
	BLS	_bu1tail
150
 
151
	MOVW	R(BR0)<<R(LSHIFT), R(BW1)
152
	MOVM.DB.W (R(FROM)), [R(BR0)-R(BR1)]
153
	ORR	R(BR1)>>R(RSHIFT), R(BW1)
154
 
155
	MOVW	R(BR1)<<R(LSHIFT), R(BW0)
156
	ORR	R(BR0)>>R(RSHIFT), R(BW0)
157
 
158
	MOVM.DB.W [R(BW0)-R(BW1)], (R(TE))
159
	B	_bu8loop
160
 
161
_bu1tail:
162
	ADD	R(OFFSET), R(FROM)
163
	B	_b1tail
164
 
165
RSHIFT = 4
166
LSHIFT = 5
167
OFFSET = 11
168
 
169
FW0 = 6
170
FR0 = 7
171
FW1 = 7
172
FR1 = 8
173
 
174
_funaligned:
175
	CMP	$2, R(TMP)
176
 
177
	MOVW.LT	$8, R(RSHIFT)		/* (R(n+1)<<24)|(R(n)>>8) */
178
	MOVW.LT	$24, R(LSHIFT)
179
	MOVW.LT	$3, R(OFFSET)
180
 
181
	MOVW.EQ	$16, R(RSHIFT)		/* (R(n+1)<<16)|(R(n)>>16) */
182
	MOVW.EQ	$16, R(LSHIFT)
183
	MOVW.EQ	$2, R(OFFSET)
184
 
185
	MOVW.GT	$24, R(RSHIFT)		/* (R(n+1)<<8)|(R(n)>>24) */
186
	MOVW.GT	$8, R(LSHIFT)
187
	MOVW.GT	$1, R(OFFSET)
188
 
189
	SUB	$8, R(TE), R(TMP)	/* do 8-byte chunks if possible */
190
	CMP	R(TMP), R(TS)
191
	BHS	_f1tail
192
 
193
	BIC	$3, R(FROM)		/* align source */
194
	MOVW.P	4(R(FROM)), R(FR1)	/* prime last block register, implicit write back */
195
 
196
_fu8loop:
197
	CMP	R(TMP), R(TS)
198
	BHS	_fu1tail
199
 
200
	MOVW	R(FR1)>>R(RSHIFT), R(FW0)
201
	MOVM.IA.W (R(FROM)), [R(FR0)-R(FR1)]
202
	ORR	R(FR0)<<R(LSHIFT), R(FW0)
203
 
204
	MOVW	R(FR0)>>R(RSHIFT), R(FW1)
205
	ORR	R(FR1)<<R(LSHIFT), R(FW1)
206
 
207
	MOVM.IA.W [R(FW0)-R(FW1)], (R(TS))
208
	B	_fu8loop
209
 
210
_fu1tail:
211
	SUB	R(OFFSET), R(FROM)
212
	B	_f1tail