memcmp.S revision 1.2.4.2 1 1.2.4.2 keiichi /* $NetBSD: memcmp.S,v 1.2.4.2 2008/03/24 07:14:29 keiichi Exp $ */
2 1.2.4.2 keiichi
3 1.2.4.2 keiichi /* stropt/memcmp.S, pl_string_common, pl_linux 10/11/04 11:45:35
4 1.2.4.2 keiichi * ==========================================================================
5 1.2.4.2 keiichi * Optimized memcmp implementation for IBM PowerPC 405/440.
6 1.2.4.2 keiichi *
7 1.2.4.2 keiichi * Copyright (c) 2003, IBM Corporation
8 1.2.4.2 keiichi * All rights reserved.
9 1.2.4.2 keiichi *
10 1.2.4.2 keiichi * Redistribution and use in source and binary forms, with or
11 1.2.4.2 keiichi * without modification, are permitted provided that the following
12 1.2.4.2 keiichi * conditions are met:
13 1.2.4.2 keiichi *
14 1.2.4.2 keiichi * * Redistributions of source code must retain the above
15 1.2.4.2 keiichi * copyright notice, this list of conditions and the following
16 1.2.4.2 keiichi * disclaimer.
17 1.2.4.2 keiichi * * Redistributions in binary form must reproduce the above
18 1.2.4.2 keiichi * copyright notice, this list of conditions and the following
19 1.2.4.2 keiichi * disclaimer in the documentation and/or other materials
20 1.2.4.2 keiichi * provided with the distribution.
21 1.2.4.2 keiichi * * Neither the name of IBM nor the names of its contributors
22 1.2.4.2 keiichi * may be used to endorse or promote products derived from this
23 1.2.4.2 keiichi * software without specific prior written permission.
24 1.2.4.2 keiichi *
25 1.2.4.2 keiichi * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
26 1.2.4.2 keiichi * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
27 1.2.4.2 keiichi * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
28 1.2.4.2 keiichi * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
29 1.2.4.2 keiichi * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
30 1.2.4.2 keiichi * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
31 1.2.4.2 keiichi * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 1.2.4.2 keiichi * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 1.2.4.2 keiichi * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
34 1.2.4.2 keiichi * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 1.2.4.2 keiichi * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
36 1.2.4.2 keiichi * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 1.2.4.2 keiichi *
38 1.2.4.2 keiichi * ==========================================================================
39 1.2.4.2 keiichi *
40 1.2.4.2 keiichi * Function: Compare two character strings (up to n characters)
41 1.2.4.2 keiichi *
42 1.2.4.2 keiichi * int memcmp(const char *s1, const char *s2, int n)
43 1.2.4.2 keiichi *
44 1.2.4.2 keiichi * Input: r3 - buffer 1 address
45 1.2.4.2 keiichi * r4 - buffer 2 address
46 1.2.4.2 keiichi * r5 - maximum characters to compare
47 1.2.4.2 keiichi * Output: r3 <0 (less), 0 (equal), >0 (greater)
48 1.2.4.2 keiichi *
49 1.2.4.2 keiichi * ==========================================================================
50 1.2.4.2 keiichi */
51 1.2.4.2 keiichi
52 1.2.4.2 keiichi #define _NOREGNAMES
53 1.2.4.2 keiichi #include <machine/asm.h>
54 1.2.4.2 keiichi
55 1.2.4.2 keiichi .text
56 1.2.4.2 keiichi .align 4
57 1.2.4.2 keiichi /* LINTSTUB: Func: void *memcmp(const void *, const void *, size_t) */
58 1.2.4.2 keiichi ENTRY(memcmp)
59 1.2.4.2 keiichi
60 1.2.4.2 keiichi /*
61 1.2.4.2 keiichi * Check count passed in R5. If zero, return 0; otherwise continue.
62 1.2.4.2 keiichi */
63 1.2.4.2 keiichi cmpwi %r5,0
64 1.2.4.2 keiichi beq- ret_0;
65 1.2.4.2 keiichi
66 1.2.4.2 keiichi /*
67 1.2.4.2 keiichi * Most of the time the difference is found in the first
68 1.2.4.2 keiichi * several bytes. The following code minimizes the number
69 1.2.4.2 keiichi * of load operations for short compares.
70 1.2.4.2 keiichi */
71 1.2.4.2 keiichi
72 1.2.4.2 keiichi mr %r11, %r3 /* Save buffer 1 */
73 1.2.4.2 keiichi
74 1.2.4.2 keiichi again:
75 1.2.4.2 keiichi
76 1.2.4.2 keiichi not %r10, %r4 /* buffer 2: bytes to page bdy */
77 1.2.4.2 keiichi rlwinm. %r10, %r10,29,23,31 /* buffer 2: dwords to page bdy */
78 1.2.4.2 keiichi beq- bytebybyte /* If < 8 bytes to the page bdy */
79 1.2.4.2 keiichi /* do byte by byte */
80 1.2.4.2 keiichi lwz %r8, 0(%r4) /* load 1st buffer 2 word */
81 1.2.4.2 keiichi
82 1.2.4.2 keiichi not %r12, %r11 /* buffer 1: bytes to page bdy */
83 1.2.4.2 keiichi rlwinm. %r12, %r12,29,23,31 /* buffer 1: dwords to page bdy */
84 1.2.4.2 keiichi beq- bytebybyte /* If < 8 bytes to the page bdy */
85 1.2.4.2 keiichi /* do byte by byte */
86 1.2.4.2 keiichi lwz %r6, 0(%r11) /* load 1st buffer 1 word */
87 1.2.4.2 keiichi
88 1.2.4.2 keiichi cmpwi %r5, 4 /* If remaining count <= 4 */
89 1.2.4.2 keiichi ble+ first4 /* handle specially. DWG */
90 1.2.4.2 keiichi
91 1.2.4.2 keiichi cmplw %r8, %r6 /* compare buffer 2 and buffer 1*/
92 1.2.4.2 keiichi bne+ all_done /* different => we're done */
93 1.2.4.2 keiichi
94 1.2.4.2 keiichi lwzu %r9, 4(%r4) /* load 2nd buffer 2 word */
95 1.2.4.2 keiichi lwzu %r7, 4(%r11) /* load 2nd buffer 1 word */
96 1.2.4.2 keiichi
97 1.2.4.2 keiichi cmpwi %r5, 8 /* If remaining count <= 8 */
98 1.2.4.2 keiichi ble+ last4 /* handle specially. DWG */
99 1.2.4.2 keiichi
100 1.2.4.2 keiichi cmplw %r9, %r7 /* compare buffer 2 and buffer 1*/
101 1.2.4.2 keiichi bne+ all_done /* different => we're done */
102 1.2.4.2 keiichi
103 1.2.4.2 keiichi addi %r5, %r5, -8 /* Update character counter DWG */
104 1.2.4.2 keiichi addi %r10, %r4, 0x0004 /* DWG*/
105 1.2.4.2 keiichi not %r10, %r10 /* buffer 2: bytes to page bdy DWG */
106 1.2.4.2 keiichi rlwinm. %r10, %r10,29,23,31 /* buffer 2: dwords to page bdy DWG */
107 1.2.4.2 keiichi addi %r12, %r11, 0x0004 /* DWG */
108 1.2.4.2 keiichi not %r12, %r12 /* buffer 1: bytes to page bdy DWG */
109 1.2.4.2 keiichi rlwinm. %r12, %r12,29,23,31 /* buffer 1: dwords to page bdy DWG */
110 1.2.4.2 keiichi
111 1.2.4.2 keiichi /* The following section prior to loop: figures out whether */
112 1.2.4.2 keiichi /* the buffer 1 or buffer 2 is closer to the page boundary. */
113 1.2.4.2 keiichi /* The main loop count is then set up to reflect the number of */
114 1.2.4.2 keiichi /* double words of the buffer that is closest */
115 1.2.4.2 keiichi
116 1.2.4.2 keiichi cmpw %r10, %r12 /* Find closest */
117 1.2.4.2 keiichi blt lt
118 1.2.4.2 keiichi
119 1.2.4.2 keiichi mr %r10, %r12
120 1.2.4.2 keiichi
121 1.2.4.2 keiichi lt:
122 1.2.4.2 keiichi
123 1.2.4.2 keiichi srwi %r12, %r5, 3 /* Double check the total count */
124 1.2.4.2 keiichi cmpw %r10, %r12 /* limitation */
125 1.2.4.2 keiichi blt lt2
126 1.2.4.2 keiichi
127 1.2.4.2 keiichi mr %r10, %r12 /* DWG */
128 1.2.4.2 keiichi lt2: /* DWG */
129 1.2.4.2 keiichi cmpwi %r10, 0 /* DWG */
130 1.2.4.2 keiichi bne lt3 /* DWG */
131 1.2.4.2 keiichi addi %r4, %r4, 0x0004 /* DWG */
132 1.2.4.2 keiichi addi %r11,%r11,0x0004 /* DWG */
133 1.2.4.2 keiichi b again /* DWG */
134 1.2.4.2 keiichi lt3: /* DWG */
135 1.2.4.2 keiichi mtctr %r10 /* dword count for loop */
136 1.2.4.2 keiichi lwzu %r6, 4(%r11) /* pre-load buffer 1 word */
137 1.2.4.2 keiichi
138 1.2.4.2 keiichi b in /* To the loop */
139 1.2.4.2 keiichi
140 1.2.4.2 keiichi loop: /* main loop */
141 1.2.4.2 keiichi
142 1.2.4.2 keiichi cmplw %r8, %r6 /* Compare first buffer 2 word */
143 1.2.4.2 keiichi bne- all_done /* with first buffer 1 word */
144 1.2.4.2 keiichi /* If different, we're done */
145 1.2.4.2 keiichi cmplw %r9, %r7 /* Compare second buffer 2 word */
146 1.2.4.2 keiichi /* with second buffer 1 word */
147 1.2.4.2 keiichi lwzu %r6, 4(%r11) /* pre-load buffer 1 word */
148 1.2.4.2 keiichi
149 1.2.4.2 keiichi bne- all_done /* If different, we're done */
150 1.2.4.2 keiichi
151 1.2.4.2 keiichi in:
152 1.2.4.2 keiichi
153 1.2.4.2 keiichi lwzu %r7, 4(%r11) /* pre-load buffer 1 word */
154 1.2.4.2 keiichi lwzu %r8, 4(%r4) /* pre-load buffer 2 word */
155 1.2.4.2 keiichi lwzu %r9, 4(%r4) /* pre-load buffer 2 word */
156 1.2.4.2 keiichi
157 1.2.4.2 keiichi bdnz+ loop /* Do more DW's if cnt > 0 */
158 1.2.4.2 keiichi
159 1.2.4.2 keiichi /*mfctr %r12*/ /*DWG*/ /* number of dwords left */
160 1.2.4.2 keiichi /*subf %r10, %r12, %r10*/ /*DWG*//* number of dwords compared */
161 1.2.4.2 keiichi slwi %r10, %r10, 3
162 1.2.4.2 keiichi subf %r5, %r10, %r5 /* adjust byte counter */
163 1.2.4.2 keiichi /*bne+ partial*/ /*DWG*/ /* If less than 8 bytes, handle */
164 1.2.4.2 keiichi /* specially */
165 1.2.4.2 keiichi /*cmpwi %r5, 8*/ /* Removed. DWG */
166 1.2.4.2 keiichi /*blt partial*/ /* Removed. DWG */
167 1.2.4.2 keiichi
168 1.2.4.2 keiichi /*addic %r5, %r5, -8*/ /*DWG*/ /* Subtract two words from count*/
169 1.2.4.2 keiichi
170 1.2.4.2 keiichi cmplw %r8, %r6 /* compare last dword */
171 1.2.4.2 keiichi addi %r4, %r4, 4
172 1.2.4.2 keiichi bne- all_done
173 1.2.4.2 keiichi
174 1.2.4.2 keiichi cmplw %r9, %r7
175 1.2.4.2 keiichi addi %r11, %r11, 4
176 1.2.4.2 keiichi bne- all_done
177 1.2.4.2 keiichi
178 1.2.4.2 keiichi bytebybyte:
179 1.2.4.2 keiichi
180 1.2.4.2 keiichi /* We've gotten close to a page boundary: do a byte-byte-byte
181 1.2.4.2 keiichi * compare for the following 8 bytes, and then go back to
182 1.2.4.2 keiichi * the full-word compare loop.
183 1.2.4.2 keiichi */
184 1.2.4.2 keiichi
185 1.2.4.2 keiichi li %r3, 8 /* loop count */
186 1.2.4.2 keiichi cmpw %r3, %r5 /* take min(8, counter) */
187 1.2.4.2 keiichi ble f2
188 1.2.4.2 keiichi
189 1.2.4.2 keiichi mr. %r3, %r5
190 1.2.4.2 keiichi
191 1.2.4.2 keiichi beqlr
192 1.2.4.2 keiichi
193 1.2.4.2 keiichi f2:
194 1.2.4.2 keiichi
195 1.2.4.2 keiichi mtctr %r3
196 1.2.4.2 keiichi subf %r5, %r3, %r5 /* adjust counter */
197 1.2.4.2 keiichi
198 1.2.4.2 keiichi bbb:
199 1.2.4.2 keiichi
200 1.2.4.2 keiichi lbz %r6, 0(%r11) /* byte copy loop */
201 1.2.4.2 keiichi
202 1.2.4.2 keiichi addi %r11, %r11, 1
203 1.2.4.2 keiichi
204 1.2.4.2 keiichi lbz %r8, 0(%r4)
205 1.2.4.2 keiichi
206 1.2.4.2 keiichi addi %r4, %r4, 1
207 1.2.4.2 keiichi
208 1.2.4.2 keiichi cmplw %r8, %r6
209 1.2.4.2 keiichi
210 1.2.4.2 keiichi bdnzt+ eq, bbb
211 1.2.4.2 keiichi
212 1.2.4.2 keiichi bne all_done
213 1.2.4.2 keiichi
214 1.2.4.2 keiichi cmpwi %r5, 0
215 1.2.4.2 keiichi bgt again /* handle the rest */
216 1.2.4.2 keiichi
217 1.2.4.2 keiichi xor %r3,%r3,%r3
218 1.2.4.2 keiichi
219 1.2.4.2 keiichi blr
220 1.2.4.2 keiichi
221 1.2.4.2 keiichi #if 0 /* Removed code section. DWG */
222 1.2.4.2 keiichi partial:
223 1.2.4.2 keiichi
224 1.2.4.2 keiichi mr. %r3, %r5
225 1.2.4.2 keiichi
226 1.2.4.2 keiichi beqlr /* If count -> 0, we're done */
227 1.2.4.2 keiichi
228 1.2.4.2 keiichi f1:
229 1.2.4.2 keiichi
230 1.2.4.2 keiichi subfic %r3, %r3, 4 /* zero/end in first word? */
231 1.2.4.2 keiichi cmpwi %r3, 0
232 1.2.4.2 keiichi blt last4
233 1.2.4.2 keiichi #endif /* DWG */
234 1.2.4.2 keiichi
235 1.2.4.2 keiichi first4:
236 1.2.4.2 keiichi subfic %r3, %r5, 4 /* If count <= 4, handle */
237 1.2.4.2 keiichi rlwinm %r3, %r3, 3, 0, 31 /* count *= 8 */
238 1.2.4.2 keiichi srw %r6, %r6, %r3 /* align 1st buffer 1 word */
239 1.2.4.2 keiichi srw %r8, %r8, %r3 /* align 1st buffer 2 word */
240 1.2.4.2 keiichi
241 1.2.4.2 keiichi cmplw %r8, %r6 /* get result */
242 1.2.4.2 keiichi bne all_done
243 1.2.4.2 keiichi xor %r3,%r3,%r3
244 1.2.4.2 keiichi blr
245 1.2.4.2 keiichi
246 1.2.4.2 keiichi last4:
247 1.2.4.2 keiichi subfic %r10, %r5, 8 /*DWG*/
248 1.2.4.2 keiichi rlwinm %r10, %r10, 3, 0, 31 /* count *= 8 */
249 1.2.4.2 keiichi srw %r7, %r7, %r10 /* align 2nd buffer 1 word */
250 1.2.4.2 keiichi srw %r9, %r9, %r10 /* align 2nd buffer 2 word */
251 1.2.4.2 keiichi
252 1.2.4.2 keiichi cmplw %r9, %r7 /* get result */
253 1.2.4.2 keiichi bne all_done
254 1.2.4.2 keiichi ret_0:
255 1.2.4.2 keiichi xor %r3,%r3,%r3 /* Equal result */
256 1.2.4.2 keiichi blr
257 1.2.4.2 keiichi
258 1.2.4.2 keiichi all_done:
259 1.2.4.2 keiichi
260 1.2.4.2 keiichi blt finish_lt
261 1.2.4.2 keiichi
262 1.2.4.2 keiichi addi %r3,0,-1 /* Less than result */
263 1.2.4.2 keiichi
264 1.2.4.2 keiichi blr
265 1.2.4.2 keiichi
266 1.2.4.2 keiichi finish_lt:
267 1.2.4.2 keiichi
268 1.2.4.2 keiichi addi %r3,0,1 /* Greater than result */
269 1.2.4.2 keiichi
270 1.2.4.2 keiichi blr
271