memcmp.S revision 1.2 1 /* $NetBSD: memcmp.S,v 1.2 2008/03/06 21:17:17 phx Exp $ */
2
3 /* stropt/memcmp.S, pl_string_common, pl_linux 10/11/04 11:45:35
4 * ==========================================================================
5 * Optimized memcmp implementation for IBM PowerPC 405/440.
6 *
7 * Copyright (c) 2003, IBM Corporation
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * * Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 * * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials
20 * provided with the distribution.
21 * * Neither the name of IBM nor the names of its contributors
22 * may be used to endorse or promote products derived from this
23 * software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
26 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
27 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
28 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
29 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
31 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
34 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
36 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 *
38 * ==========================================================================
39 *
40 * Function: Compare two character strings (up to n characters)
41 *
42 * int memcmp(const char *s1, const char *s2, int n)
43 *
44 * Input: r3 - buffer 1 address
45 * r4 - buffer 2 address
46 * r5 - maximum characters to compare
47 * Output: r3 <0 (less), 0 (equal), >0 (greater)
48 *
49 * ==========================================================================
50 */
51
52 #define _NOREGNAMES
53 #include <machine/asm.h>
54
55 .text
56 .align 4
57 /* LINTSTUB: Func: void *memcmp(const void *, const void *, size_t) */
58 ENTRY(memcmp)
59
60 /*
61 * Check count passed in R5. If zero, return 0; otherwise continue.
62 */
63 cmpwi %r5,0
64 beq- ret_0;
65
66 /*
67 * Most of the time the difference is found in the first
68 * several bytes. The following code minimizes the number
69 * of load operations for short compares.
70 */
71
72 mr %r11, %r3 /* Save buffer 1 */
73
74 again:
75
76 not %r10, %r4 /* buffer 2: bytes to page bdy */
77 rlwinm. %r10, %r10,29,23,31 /* buffer 2: dwords to page bdy */
78 beq- bytebybyte /* If < 8 bytes to the page bdy */
79 /* do byte by byte */
80 lwz %r8, 0(%r4) /* load 1st buffer 2 word */
81
82 not %r12, %r11 /* buffer 1: bytes to page bdy */
83 rlwinm. %r12, %r12,29,23,31 /* buffer 1: dwords to page bdy */
84 beq- bytebybyte /* If < 8 bytes to the page bdy */
85 /* do byte by byte */
86 lwz %r6, 0(%r11) /* load 1st buffer 1 word */
87
88 cmpwi %r5, 4 /* If remaining count <= 4 */
89 ble+ first4 /* handle specially. DWG */
90
91 cmplw %r8, %r6 /* compare buffer 2 and buffer 1*/
92 bne+ all_done /* different => we're done */
93
94 lwzu %r9, 4(%r4) /* load 2nd buffer 2 word */
95 lwzu %r7, 4(%r11) /* load 2nd buffer 1 word */
96
97 cmpwi %r5, 8 /* If remaining count <= 8 */
98 ble+ last4 /* handle specially. DWG */
99
100 cmplw %r9, %r7 /* compare buffer 2 and buffer 1*/
101 bne+ all_done /* different => we're done */
102
103 addi %r5, %r5, -8 /* Update character counter DWG */
104 addi %r10, %r4, 0x0004 /* DWG*/
105 not %r10, %r10 /* buffer 2: bytes to page bdy DWG */
106 rlwinm. %r10, %r10,29,23,31 /* buffer 2: dwords to page bdy DWG */
107 addi %r12, %r11, 0x0004 /* DWG */
108 not %r12, %r12 /* buffer 1: bytes to page bdy DWG */
109 rlwinm. %r12, %r12,29,23,31 /* buffer 1: dwords to page bdy DWG */
110
111 /* The following section prior to loop: figures out whether */
112 /* the buffer 1 or buffer 2 is closer to the page boundary. */
113 /* The main loop count is then set up to reflect the number of */
114 /* double words of the buffer that is closest */
115
116 cmpw %r10, %r12 /* Find closest */
117 blt lt
118
119 mr %r10, %r12
120
121 lt:
122
123 srwi %r12, %r5, 3 /* Double check the total count */
124 cmpw %r10, %r12 /* limitation */
125 blt lt2
126
127 mr %r10, %r12 /* DWG */
128 lt2: /* DWG */
129 cmpwi %r10, 0 /* DWG */
130 bne lt3 /* DWG */
131 addi %r4, %r4, 0x0004 /* DWG */
132 addi %r11,%r11,0x0004 /* DWG */
133 b again /* DWG */
134 lt3: /* DWG */
135 mtctr %r10 /* dword count for loop */
136 lwzu %r6, 4(%r11) /* pre-load buffer 1 word */
137
138 b in /* To the loop */
139
140 loop: /* main loop */
141
142 cmplw %r8, %r6 /* Compare first buffer 2 word */
143 bne- all_done /* with first buffer 1 word */
144 /* If different, we're done */
145 cmplw %r9, %r7 /* Compare second buffer 2 word */
146 /* with second buffer 1 word */
147 lwzu %r6, 4(%r11) /* pre-load buffer 1 word */
148
149 bne- all_done /* If different, we're done */
150
151 in:
152
153 lwzu %r7, 4(%r11) /* pre-load buffer 1 word */
154 lwzu %r8, 4(%r4) /* pre-load buffer 2 word */
155 lwzu %r9, 4(%r4) /* pre-load buffer 2 word */
156
157 bdnz+ loop /* Do more DW's if cnt > 0 */
158
159 /*mfctr %r12*/ /*DWG*/ /* number of dwords left */
160 /*subf %r10, %r12, %r10*/ /*DWG*//* number of dwords compared */
161 slwi %r10, %r10, 3
162 subf %r5, %r10, %r5 /* adjust byte counter */
163 /*bne+ partial*/ /*DWG*/ /* If less than 8 bytes, handle */
164 /* specially */
165 /*cmpwi %r5, 8*/ /* Removed. DWG */
166 /*blt partial*/ /* Removed. DWG */
167
168 /*addic %r5, %r5, -8*/ /*DWG*/ /* Subtract two words from count*/
169
170 cmplw %r8, %r6 /* compare last dword */
171 addi %r4, %r4, 4
172 bne- all_done
173
174 cmplw %r9, %r7
175 addi %r11, %r11, 4
176 bne- all_done
177
178 bytebybyte:
179
180 /* We've gotten close to a page boundary: do a byte-byte-byte
181 * compare for the following 8 bytes, and then go back to
182 * the full-word compare loop.
183 */
184
185 li %r3, 8 /* loop count */
186 cmpw %r3, %r5 /* take min(8, counter) */
187 ble f2
188
189 mr. %r3, %r5
190
191 beqlr
192
193 f2:
194
195 mtctr %r3
196 subf %r5, %r3, %r5 /* adjust counter */
197
198 bbb:
199
200 lbz %r6, 0(%r11) /* byte copy loop */
201
202 addi %r11, %r11, 1
203
204 lbz %r8, 0(%r4)
205
206 addi %r4, %r4, 1
207
208 cmplw %r8, %r6
209
210 bdnzt+ eq, bbb
211
212 bne all_done
213
214 cmpwi %r5, 0
215 bgt again /* handle the rest */
216
217 xor %r3,%r3,%r3
218
219 blr
220
221 #if 0 /* Removed code section. DWG */
222 partial:
223
224 mr. %r3, %r5
225
226 beqlr /* If count -> 0, we're done */
227
228 f1:
229
230 subfic %r3, %r3, 4 /* zero/end in first word? */
231 cmpwi %r3, 0
232 blt last4
233 #endif /* DWG */
234
235 first4:
236 subfic %r3, %r5, 4 /* If count <= 4, handle */
237 rlwinm %r3, %r3, 3, 0, 31 /* count *= 8 */
238 srw %r6, %r6, %r3 /* align 1st buffer 1 word */
239 srw %r8, %r8, %r3 /* align 1st buffer 2 word */
240
241 cmplw %r8, %r6 /* get result */
242 bne all_done
243 xor %r3,%r3,%r3
244 blr
245
246 last4:
247 subfic %r10, %r5, 8 /*DWG*/
248 rlwinm %r10, %r10, 3, 0, 31 /* count *= 8 */
249 srw %r7, %r7, %r10 /* align 2nd buffer 1 word */
250 srw %r9, %r9, %r10 /* align 2nd buffer 2 word */
251
252 cmplw %r9, %r7 /* get result */
253 bne all_done
254 ret_0:
255 xor %r3,%r3,%r3 /* Equal result */
256 blr
257
258 all_done:
259
260 blt finish_lt
261
262 addi %r3,0,-1 /* Less than result */
263
264 blr
265
266 finish_lt:
267
268 addi %r3,0,1 /* Greater than result */
269
270 blr
271