memcmp.S revision 1.3.6.1 1 /* $NetBSD: memcmp.S,v 1.3.6.1 2014/05/22 11:26:29 yamt Exp $ */
2
3 /* stropt/memcmp.S, pl_string_common, pl_linux 10/11/04 11:45:35
4 * ==========================================================================
5 * Optimized memcmp implementation for IBM PowerPC 405/440.
6 *
7 * Copyright (c) 2003, IBM Corporation
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * * Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 * * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials
20 * provided with the distribution.
21 * * Neither the name of IBM nor the names of its contributors
22 * may be used to endorse or promote products derived from this
23 * software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
26 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
27 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
28 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
29 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
31 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
34 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
36 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 *
38 * ==========================================================================
39 *
40 * Function: Compare two character strings (up to n characters)
41 *
42 * int memcmp(const char *s1, const char *s2, int n)
43 *
44 * Input: r3 - buffer 1 address
45 * r4 - buffer 2 address
46 * r5 - maximum characters to compare
47 * Output: r3 <0 (less), 0 (equal), >0 (greater)
48 *
49 * ==========================================================================
50 */
51
52 #include <machine/asm.h>
53
54 .text
55 .align 4
56 /* LINTSTUB: Func: void *memcmp(const void *, const void *, size_t) */
57 ENTRY(memcmp)
58
59 /*
60 * Check count passed in R5. If zero, return 0; otherwise continue.
61 */
62 cmpwi %r5,0
63 beq- ret_0;
64
65 /*
66 * Most of the time the difference is found in the first
67 * several bytes. The following code minimizes the number
68 * of load operations for short compares.
69 */
70
71 mr %r11, %r3 /* Save buffer 1 */
72
73 again:
74
75 not %r10, %r4 /* buffer 2: bytes to page bdy */
76 rlwinm. %r10, %r10,29,23,31 /* buffer 2: dwords to page bdy */
77 beq- bytebybyte /* If < 8 bytes to the page bdy */
78 /* do byte by byte */
79 lwz %r8, 0(%r4) /* load 1st buffer 2 word */
80
81 not %r12, %r11 /* buffer 1: bytes to page bdy */
82 rlwinm. %r12, %r12,29,23,31 /* buffer 1: dwords to page bdy */
83 beq- bytebybyte /* If < 8 bytes to the page bdy */
84 /* do byte by byte */
85 lwz %r6, 0(%r11) /* load 1st buffer 1 word */
86
87 cmpwi %r5, 4 /* If remaining count <= 4 */
88 ble+ first4 /* handle specially. DWG */
89
90 cmplw %r8, %r6 /* compare buffer 2 and buffer 1*/
91 bne+ all_done /* different => we're done */
92
93 lwzu %r9, 4(%r4) /* load 2nd buffer 2 word */
94 lwzu %r7, 4(%r11) /* load 2nd buffer 1 word */
95
96 cmpwi %r5, 8 /* If remaining count <= 8 */
97 ble+ last4 /* handle specially. DWG */
98
99 cmplw %r9, %r7 /* compare buffer 2 and buffer 1*/
100 bne+ all_done /* different => we're done */
101
102 addi %r5, %r5, -8 /* Update character counter DWG */
103 addi %r10, %r4, 0x0004 /* DWG*/
104 not %r10, %r10 /* buffer 2: bytes to page bdy DWG */
105 rlwinm. %r10, %r10,29,23,31 /* buffer 2: dwords to page bdy DWG */
106 addi %r12, %r11, 0x0004 /* DWG */
107 not %r12, %r12 /* buffer 1: bytes to page bdy DWG */
108 rlwinm. %r12, %r12,29,23,31 /* buffer 1: dwords to page bdy DWG */
109
110 /* The following section prior to loop: figures out whether */
111 /* the buffer 1 or buffer 2 is closer to the page boundary. */
112 /* The main loop count is then set up to reflect the number of */
113 /* double words of the buffer that is closest */
114
115 cmpw %r10, %r12 /* Find closest */
116 blt lt
117
118 mr %r10, %r12
119
120 lt:
121
122 srwi %r12, %r5, 3 /* Double check the total count */
123 cmpw %r10, %r12 /* limitation */
124 blt lt2
125
126 mr %r10, %r12 /* DWG */
127 lt2: /* DWG */
128 cmpwi %r10, 0 /* DWG */
129 bne lt3 /* DWG */
130 addi %r4, %r4, 0x0004 /* DWG */
131 addi %r11,%r11,0x0004 /* DWG */
132 b again /* DWG */
133 lt3: /* DWG */
134 mtctr %r10 /* dword count for loop */
135 lwzu %r6, 4(%r11) /* pre-load buffer 1 word */
136
137 b in /* To the loop */
138
139 loop: /* main loop */
140
141 cmplw %r8, %r6 /* Compare first buffer 2 word */
142 bne- all_done /* with first buffer 1 word */
143 /* If different, we're done */
144 cmplw %r9, %r7 /* Compare second buffer 2 word */
145 /* with second buffer 1 word */
146 lwzu %r6, 4(%r11) /* pre-load buffer 1 word */
147
148 bne- all_done /* If different, we're done */
149
150 in:
151
152 lwzu %r7, 4(%r11) /* pre-load buffer 1 word */
153 lwzu %r8, 4(%r4) /* pre-load buffer 2 word */
154 lwzu %r9, 4(%r4) /* pre-load buffer 2 word */
155
156 bdnz+ loop /* Do more DW's if cnt > 0 */
157
158 /*mfctr %r12*/ /*DWG*/ /* number of dwords left */
159 /*subf %r10, %r12, %r10*/ /*DWG*//* number of dwords compared */
160 slwi %r10, %r10, 3
161 subf %r5, %r10, %r5 /* adjust byte counter */
162 /*bne+ partial*/ /*DWG*/ /* If less than 8 bytes, handle */
163 /* specially */
164 /*cmpwi %r5, 8*/ /* Removed. DWG */
165 /*blt partial*/ /* Removed. DWG */
166
167 /*addic %r5, %r5, -8*/ /*DWG*/ /* Subtract two words from count*/
168
169 cmplw %r8, %r6 /* compare last dword */
170 addi %r4, %r4, 4
171 bne- all_done
172
173 cmplw %r9, %r7
174 addi %r11, %r11, 4
175 bne- all_done
176
177 bytebybyte:
178
179 /* We've gotten close to a page boundary: do a byte-byte-byte
180 * compare for the following 8 bytes, and then go back to
181 * the full-word compare loop.
182 */
183
184 li %r3, 8 /* loop count */
185 cmpw %r3, %r5 /* take min(8, counter) */
186 ble f2
187
188 mr. %r3, %r5
189
190 beqlr
191
192 f2:
193
194 mtctr %r3
195 subf %r5, %r3, %r5 /* adjust counter */
196
197 bbb:
198
199 lbz %r6, 0(%r11) /* byte copy loop */
200
201 addi %r11, %r11, 1
202
203 lbz %r8, 0(%r4)
204
205 addi %r4, %r4, 1
206
207 cmplw %r8, %r6
208
209 bdnzt eq, bbb
210
211 bne all_done
212
213 cmpwi %r5, 0
214 bgt again /* handle the rest */
215
216 xor %r3,%r3,%r3
217
218 blr
219
220 #if 0 /* Removed code section. DWG */
221 partial:
222
223 mr. %r3, %r5
224
225 beqlr /* If count -> 0, we're done */
226
227 f1:
228
229 subfic %r3, %r3, 4 /* zero/end in first word? */
230 cmpwi %r3, 0
231 blt last4
232 #endif /* DWG */
233
234 first4:
235 subfic %r3, %r5, 4 /* If count <= 4, handle */
236 rlwinm %r3, %r3, 3, 0, 31 /* count *= 8 */
237 srw %r6, %r6, %r3 /* align 1st buffer 1 word */
238 srw %r8, %r8, %r3 /* align 1st buffer 2 word */
239
240 cmplw %r8, %r6 /* get result */
241 bne all_done
242 xor %r3,%r3,%r3
243 blr
244
245 last4:
246 subfic %r10, %r5, 8 /*DWG*/
247 rlwinm %r10, %r10, 3, 0, 31 /* count *= 8 */
248 srw %r7, %r7, %r10 /* align 2nd buffer 1 word */
249 srw %r9, %r9, %r10 /* align 2nd buffer 2 word */
250
251 cmplw %r9, %r7 /* get result */
252 bne all_done
253 ret_0:
254 xor %r3,%r3,%r3 /* Equal result */
255 blr
256
257 all_done:
258
259 blt finish_lt
260
261 addi %r3,0,-1 /* Less than result */
262
263 blr
264
265 finish_lt:
266
267 addi %r3,0,1 /* Greater than result */
268
269 blr
270 END(memcmp)
271