memcmp.S revision 1.1 1 /* $NetBSD: memcmp.S,v 1.1 2008/02/21 17:35:47 garbled Exp $ */
2
3 /* stropt/memcmp.S, pl_string_common, pl_linux 10/11/04 11:45:35
4 * ==========================================================================
5 * Optimized memcmp implementation for IBM PowerPC 405/440.
6 *
7 * Copyright (c) 2003, IBM Corporation
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * * Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 * * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials
20 * provided with the distribution.
21 * * Neither the name of IBM nor the names of its contributors
22 * may be used to endorse or promote products derived from this
23 * software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
26 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
27 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
28 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
29 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
31 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
34 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
36 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 *
38 * ==========================================================================
39 *
40 * Function: Compare two character strings (up to n characters)
41 *
42 * int memcmp(const char *s1, const char *s2, int n)
43 *
44 * Input: r3 - buffer 1 address
45 * r4 - buffer 2 address
46 * r5 - maximum characters to compare
47 * Output: r3 <0 (less), 0 (equal), >0 (greater)
48 *
49 * ==========================================================================
50 */
51
52 #define _NOREGNAMES
53 #include <machine/asm.h>
54 #ifdef _KERNEL
55 #include <assym.h>
56 #endif
57
58 .text
59 .align 4
60 /* LINTSTUB: Func: void *memcmp(const void *, const void *, size_t) */
61 ENTRY(memcmp)
62
63 /*
64 * Check count passed in R5. If zero, return 0; otherwise continue.
65 */
66 cmpwi %r5,0
67 beq- ret_0;
68
69 /*
70 * Most of the time the difference is found in the first
71 * several bytes. The following code minimizes the number
72 * of load operations for short compares.
73 */
74
75 mr %r11, %r3 /* Save buffer 1 */
76
77 again:
78
79 not %r10, %r4 /* buffer 2: bytes to page bdy */
80 rlwinm. %r10, %r10,29,23,31 /* buffer 2: dwords to page bdy */
81 beq- bytebybyte /* If < 8 bytes to the page bdy */
82 /* do byte by byte */
83 lwz %r8, 0(%r4) /* load 1st buffer 2 word */
84
85 not %r12, %r11 /* buffer 1: bytes to page bdy */
86 rlwinm. %r12, %r12,29,23,31 /* buffer 1: dwords to page bdy */
87 beq- bytebybyte /* If < 8 bytes to the page bdy */
88 /* do byte by byte */
89 lwz %r6, 0(%r11) /* load 1st buffer 1 word */
90
91 cmpwi %r5, 4 /* If remaining count <= 4 */
92 ble+ first4 /* handle specially. DWG */
93
94 cmplw %r8, %r6 /* compare buffer 2 and buffer 1*/
95 bne+ all_done /* different => we're done */
96
97 lwzu %r9, 4(%r4) /* load 2nd buffer 2 word */
98 lwzu %r7, 4(%r11) /* load 2nd buffer 1 word */
99
100 cmpwi %r5, 8 /* If remaining count <= 8 */
101 ble+ last4 /* handle specially. DWG */
102
103 cmplw %r9, %r7 /* compare buffer 2 and buffer 1*/
104 bne+ all_done /* different => we're done */
105
106 addi %r5, %r5, -8 /* Update character counter DWG */
107 addi %r10, %r4, 0x0004 /* DWG*/
108 not %r10, %r10 /* buffer 2: bytes to page bdy DWG */
109 rlwinm. %r10, %r10,29,23,31 /* buffer 2: dwords to page bdy DWG */
110 addi %r12, %r11, 0x0004 /* DWG */
111 not %r12, %r12 /* buffer 1: bytes to page bdy DWG */
112 rlwinm. %r12, %r12,29,23,31 /* buffer 1: dwords to page bdy DWG */
113
114 /* The following section prior to loop: figures out whether */
115 /* the buffer 1 or buffer 2 is closer to the page boundary. */
116 /* The main loop count is then set up to reflect the number of */
117 /* double words of the buffer that is closest */
118
119 cmpw %r10, %r12 /* Find closest */
120 blt lt
121
122 mr %r10, %r12
123
124 lt:
125
126 srwi %r12, %r5, 3 /* Double check the total count */
127 cmpw %r10, %r12 /* limitation */
128 blt lt2
129
130 mr %r10, %r12 /* DWG */
131 lt2: /* DWG */
132 cmpwi %r10, 0 /* DWG */
133 bne lt3 /* DWG */
134 addi %r4, %r4, 0x0004 /* DWG */
135 addi %r11,%r11,0x0004 /* DWG */
136 b again /* DWG */
137 lt3: /* DWG */
138 mtctr %r10 /* dword count for loop */
139 lwzu %r6, 4(%r11) /* pre-load buffer 1 word */
140
141 b in /* To the loop */
142
143 loop: /* main loop */
144
145 cmplw %r8, %r6 /* Compare first buffer 2 word */
146 bne- all_done /* with first buffer 1 word */
147 /* If different, we're done */
148 cmplw %r9, %r7 /* Compare second buffer 2 word */
149 /* with second buffer 1 word */
150 lwzu %r6, 4(%r11) /* pre-load buffer 1 word */
151
152 bne- all_done /* If different, we're done */
153
154 in:
155
156 lwzu %r7, 4(%r11) /* pre-load buffer 1 word */
157 lwzu %r8, 4(%r4) /* pre-load buffer 2 word */
158 lwzu %r9, 4(%r4) /* pre-load buffer 2 word */
159
160 bdnz+ loop /* Do more DW's if cnt > 0 */
161
162 /*mfctr %r12*/ /*DWG*/ /* number of dwords left */
163 /*subf %r10, %r12, %r10*/ /*DWG*//* number of dwords compared */
164 slwi %r10, %r10, 3
165 subf %r5, %r10, %r5 /* adjust byte counter */
166 /*bne+ partial*/ /*DWG*/ /* If less than 8 bytes, handle */
167 /* specially */
168 /*cmpwi %r5, 8*/ /* Removed. DWG */
169 /*blt partial*/ /* Removed. DWG */
170
171 /*addic %r5, %r5, -8*/ /*DWG*/ /* Subtract two words from count*/
172
173 cmplw %r8, %r6 /* compare last dword */
174 addi %r4, %r4, 4
175 bne- all_done
176
177 cmplw %r9, %r7
178 addi %r11, %r11, 4
179 bne- all_done
180
181 bytebybyte:
182
183 /* We've gotten close to a page boundary: do a byte-byte-byte
184 * compare for the following 8 bytes, and then go back to
185 * the full-word compare loop.
186 */
187
188 li %r3, 8 /* loop count */
189 cmpw %r3, %r5 /* take min(8, counter) */
190 ble f2
191
192 mr. %r3, %r5
193
194 beqlr
195
196 f2:
197
198 mtctr %r3
199 subf %r5, %r3, %r5 /* adjust counter */
200
201 bbb:
202
203 lbz %r6, 0(%r11) /* byte copy loop */
204
205 addi %r11, %r11, 1
206
207 lbz %r8, 0(%r4)
208
209 addi %r4, %r4, 1
210
211 cmplw %r8, %r6
212
213 bdnzt+ eq, bbb
214
215 bne all_done
216
217 cmpwi %r5, 0
218 bgt again /* handle the rest */
219
220 xor %r3,%r3,%r3
221
222 blr
223
224 #if 0 /* Removed code section. DWG */
225 partial:
226
227 mr. %r3, %r5
228
229 beqlr /* If count -> 0, we're done */
230
231 f1:
232
233 subfic %r3, %r3, 4 /* zero/end in first word? */
234 cmpwi %r3, 0
235 blt last4
236 #endif /* DWG */
237
238 first4:
239 subfic %r3, %r5, 4 /* If count <= 4, handle */
240 rlwinm %r3, %r3, 3, 0, 31 /* count *= 8 */
241 srw %r6, %r6, %r3 /* align 1st buffer 1 word */
242 srw %r8, %r8, %r3 /* align 1st buffer 2 word */
243
244 cmplw %r8, %r6 /* get result */
245 bne all_done
246 xor %r3,%r3,%r3
247 blr
248
249 last4:
250 subfic %r10, %r5, 8 /*DWG*/
251 rlwinm %r10, %r10, 3, 0, 31 /* count *= 8 */
252 srw %r7, %r7, %r10 /* align 2nd buffer 1 word */
253 srw %r9, %r9, %r10 /* align 2nd buffer 2 word */
254
255 cmplw %r9, %r7 /* get result */
256 bne all_done
257 ret_0:
258 xor %r3,%r3,%r3 /* Equal result */
259 blr
260
261 all_done:
262
263 blt finish_lt
264
265 addi %r3,0,-1 /* Less than result */
266
267 blr
268
269 finish_lt:
270
271 addi %r3,0,1 /* Greater than result */
272
273 blr
274