memcmp.S revision 1.3 1 1.3 matt /* $NetBSD: memcmp.S,v 1.3 2011/01/15 07:31:12 matt Exp $ */
2 1.1 garbled
3 1.1 garbled /* stropt/memcmp.S, pl_string_common, pl_linux 10/11/04 11:45:35
4 1.1 garbled * ==========================================================================
5 1.1 garbled * Optimized memcmp implementation for IBM PowerPC 405/440.
6 1.1 garbled *
7 1.1 garbled * Copyright (c) 2003, IBM Corporation
8 1.1 garbled * All rights reserved.
9 1.1 garbled *
10 1.1 garbled * Redistribution and use in source and binary forms, with or
11 1.1 garbled * without modification, are permitted provided that the following
12 1.1 garbled * conditions are met:
13 1.1 garbled *
14 1.1 garbled * * Redistributions of source code must retain the above
15 1.1 garbled * copyright notice, this list of conditions and the following
16 1.1 garbled * disclaimer.
17 1.1 garbled * * Redistributions in binary form must reproduce the above
18 1.1 garbled * copyright notice, this list of conditions and the following
19 1.1 garbled * disclaimer in the documentation and/or other materials
20 1.1 garbled * provided with the distribution.
21 1.1 garbled * * Neither the name of IBM nor the names of its contributors
22 1.1 garbled * may be used to endorse or promote products derived from this
23 1.1 garbled * software without specific prior written permission.
24 1.1 garbled *
25 1.1 garbled * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
26 1.1 garbled * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
27 1.1 garbled * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
28 1.1 garbled * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
29 1.1 garbled * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
30 1.1 garbled * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
31 1.1 garbled * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 1.1 garbled * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 1.1 garbled * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
34 1.1 garbled * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 1.1 garbled * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
36 1.1 garbled * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 1.1 garbled *
38 1.1 garbled * ==========================================================================
39 1.1 garbled *
40 1.1 garbled * Function: Compare two character strings (up to n characters)
41 1.1 garbled *
42 1.1 garbled * int memcmp(const char *s1, const char *s2, int n)
43 1.1 garbled *
44 1.1 garbled * Input: r3 - buffer 1 address
45 1.1 garbled * r4 - buffer 2 address
46 1.1 garbled * r5 - maximum characters to compare
47 1.1 garbled * Output: r3 <0 (less), 0 (equal), >0 (greater)
48 1.1 garbled *
49 1.1 garbled * ==========================================================================
50 1.1 garbled */
51 1.1 garbled
52 1.1 garbled #include <machine/asm.h>
53 1.1 garbled
54 1.1 garbled .text
55 1.1 garbled .align 4
56 1.1 garbled /* LINTSTUB: Func: void *memcmp(const void *, const void *, size_t) */
57 1.1 garbled ENTRY(memcmp)
58 1.1 garbled
59 1.1 garbled /*
60 1.1 garbled * Check count passed in R5. If zero, return 0; otherwise continue.
61 1.1 garbled */
62 1.1 garbled cmpwi %r5,0
63 1.1 garbled beq- ret_0;
64 1.1 garbled
65 1.1 garbled /*
66 1.1 garbled * Most of the time the difference is found in the first
67 1.1 garbled * several bytes. The following code minimizes the number
68 1.1 garbled * of load operations for short compares.
69 1.1 garbled */
70 1.1 garbled
71 1.1 garbled mr %r11, %r3 /* Save buffer 1 */
72 1.1 garbled
73 1.1 garbled again:
74 1.1 garbled
75 1.1 garbled not %r10, %r4 /* buffer 2: bytes to page bdy */
76 1.1 garbled rlwinm. %r10, %r10,29,23,31 /* buffer 2: dwords to page bdy */
77 1.1 garbled beq- bytebybyte /* If < 8 bytes to the page bdy */
78 1.1 garbled /* do byte by byte */
79 1.1 garbled lwz %r8, 0(%r4) /* load 1st buffer 2 word */
80 1.1 garbled
81 1.1 garbled not %r12, %r11 /* buffer 1: bytes to page bdy */
82 1.1 garbled rlwinm. %r12, %r12,29,23,31 /* buffer 1: dwords to page bdy */
83 1.1 garbled beq- bytebybyte /* If < 8 bytes to the page bdy */
84 1.1 garbled /* do byte by byte */
85 1.1 garbled lwz %r6, 0(%r11) /* load 1st buffer 1 word */
86 1.1 garbled
87 1.1 garbled cmpwi %r5, 4 /* If remaining count <= 4 */
88 1.1 garbled ble+ first4 /* handle specially. DWG */
89 1.1 garbled
90 1.1 garbled cmplw %r8, %r6 /* compare buffer 2 and buffer 1*/
91 1.1 garbled bne+ all_done /* different => we're done */
92 1.1 garbled
93 1.1 garbled lwzu %r9, 4(%r4) /* load 2nd buffer 2 word */
94 1.1 garbled lwzu %r7, 4(%r11) /* load 2nd buffer 1 word */
95 1.1 garbled
96 1.1 garbled cmpwi %r5, 8 /* If remaining count <= 8 */
97 1.1 garbled ble+ last4 /* handle specially. DWG */
98 1.1 garbled
99 1.1 garbled cmplw %r9, %r7 /* compare buffer 2 and buffer 1*/
100 1.1 garbled bne+ all_done /* different => we're done */
101 1.1 garbled
102 1.1 garbled addi %r5, %r5, -8 /* Update character counter DWG */
103 1.1 garbled addi %r10, %r4, 0x0004 /* DWG*/
104 1.1 garbled not %r10, %r10 /* buffer 2: bytes to page bdy DWG */
105 1.1 garbled rlwinm. %r10, %r10,29,23,31 /* buffer 2: dwords to page bdy DWG */
106 1.1 garbled addi %r12, %r11, 0x0004 /* DWG */
107 1.1 garbled not %r12, %r12 /* buffer 1: bytes to page bdy DWG */
108 1.1 garbled rlwinm. %r12, %r12,29,23,31 /* buffer 1: dwords to page bdy DWG */
109 1.1 garbled
110 1.1 garbled /* The following section prior to loop: figures out whether */
111 1.1 garbled /* the buffer 1 or buffer 2 is closer to the page boundary. */
112 1.1 garbled /* The main loop count is then set up to reflect the number of */
113 1.1 garbled /* double words of the buffer that is closest */
114 1.1 garbled
115 1.1 garbled cmpw %r10, %r12 /* Find closest */
116 1.1 garbled blt lt
117 1.1 garbled
118 1.1 garbled mr %r10, %r12
119 1.1 garbled
120 1.1 garbled lt:
121 1.1 garbled
122 1.1 garbled srwi %r12, %r5, 3 /* Double check the total count */
123 1.1 garbled cmpw %r10, %r12 /* limitation */
124 1.1 garbled blt lt2
125 1.1 garbled
126 1.1 garbled mr %r10, %r12 /* DWG */
127 1.1 garbled lt2: /* DWG */
128 1.1 garbled cmpwi %r10, 0 /* DWG */
129 1.1 garbled bne lt3 /* DWG */
130 1.1 garbled addi %r4, %r4, 0x0004 /* DWG */
131 1.1 garbled addi %r11,%r11,0x0004 /* DWG */
132 1.1 garbled b again /* DWG */
133 1.1 garbled lt3: /* DWG */
134 1.1 garbled mtctr %r10 /* dword count for loop */
135 1.1 garbled lwzu %r6, 4(%r11) /* pre-load buffer 1 word */
136 1.1 garbled
137 1.1 garbled b in /* To the loop */
138 1.1 garbled
139 1.1 garbled loop: /* main loop */
140 1.1 garbled
141 1.1 garbled cmplw %r8, %r6 /* Compare first buffer 2 word */
142 1.1 garbled bne- all_done /* with first buffer 1 word */
143 1.1 garbled /* If different, we're done */
144 1.1 garbled cmplw %r9, %r7 /* Compare second buffer 2 word */
145 1.1 garbled /* with second buffer 1 word */
146 1.1 garbled lwzu %r6, 4(%r11) /* pre-load buffer 1 word */
147 1.1 garbled
148 1.1 garbled bne- all_done /* If different, we're done */
149 1.1 garbled
150 1.1 garbled in:
151 1.1 garbled
152 1.1 garbled lwzu %r7, 4(%r11) /* pre-load buffer 1 word */
153 1.1 garbled lwzu %r8, 4(%r4) /* pre-load buffer 2 word */
154 1.1 garbled lwzu %r9, 4(%r4) /* pre-load buffer 2 word */
155 1.1 garbled
156 1.1 garbled bdnz+ loop /* Do more DW's if cnt > 0 */
157 1.1 garbled
158 1.1 garbled /*mfctr %r12*/ /*DWG*/ /* number of dwords left */
159 1.1 garbled /*subf %r10, %r12, %r10*/ /*DWG*//* number of dwords compared */
160 1.1 garbled slwi %r10, %r10, 3
161 1.1 garbled subf %r5, %r10, %r5 /* adjust byte counter */
162 1.1 garbled /*bne+ partial*/ /*DWG*/ /* If less than 8 bytes, handle */
163 1.1 garbled /* specially */
164 1.1 garbled /*cmpwi %r5, 8*/ /* Removed. DWG */
165 1.1 garbled /*blt partial*/ /* Removed. DWG */
166 1.1 garbled
167 1.1 garbled /*addic %r5, %r5, -8*/ /*DWG*/ /* Subtract two words from count*/
168 1.1 garbled
169 1.1 garbled cmplw %r8, %r6 /* compare last dword */
170 1.1 garbled addi %r4, %r4, 4
171 1.1 garbled bne- all_done
172 1.1 garbled
173 1.1 garbled cmplw %r9, %r7
174 1.1 garbled addi %r11, %r11, 4
175 1.1 garbled bne- all_done
176 1.1 garbled
177 1.1 garbled bytebybyte:
178 1.1 garbled
179 1.1 garbled /* We've gotten close to a page boundary: do a byte-byte-byte
180 1.1 garbled * compare for the following 8 bytes, and then go back to
181 1.1 garbled * the full-word compare loop.
182 1.1 garbled */
183 1.1 garbled
184 1.1 garbled li %r3, 8 /* loop count */
185 1.1 garbled cmpw %r3, %r5 /* take min(8, counter) */
186 1.1 garbled ble f2
187 1.1 garbled
188 1.1 garbled mr. %r3, %r5
189 1.1 garbled
190 1.1 garbled beqlr
191 1.1 garbled
192 1.1 garbled f2:
193 1.1 garbled
194 1.1 garbled mtctr %r3
195 1.1 garbled subf %r5, %r3, %r5 /* adjust counter */
196 1.1 garbled
197 1.1 garbled bbb:
198 1.1 garbled
199 1.1 garbled lbz %r6, 0(%r11) /* byte copy loop */
200 1.1 garbled
201 1.1 garbled addi %r11, %r11, 1
202 1.1 garbled
203 1.1 garbled lbz %r8, 0(%r4)
204 1.1 garbled
205 1.1 garbled addi %r4, %r4, 1
206 1.1 garbled
207 1.1 garbled cmplw %r8, %r6
208 1.1 garbled
209 1.1 garbled bdnzt+ eq, bbb
210 1.1 garbled
211 1.1 garbled bne all_done
212 1.1 garbled
213 1.1 garbled cmpwi %r5, 0
214 1.1 garbled bgt again /* handle the rest */
215 1.1 garbled
216 1.1 garbled xor %r3,%r3,%r3
217 1.1 garbled
218 1.1 garbled blr
219 1.1 garbled
220 1.1 garbled #if 0 /* Removed code section. DWG */
221 1.1 garbled partial:
222 1.1 garbled
223 1.1 garbled mr. %r3, %r5
224 1.1 garbled
225 1.1 garbled beqlr /* If count -> 0, we're done */
226 1.1 garbled
227 1.1 garbled f1:
228 1.1 garbled
229 1.1 garbled subfic %r3, %r3, 4 /* zero/end in first word? */
230 1.1 garbled cmpwi %r3, 0
231 1.1 garbled blt last4
232 1.1 garbled #endif /* DWG */
233 1.1 garbled
234 1.1 garbled first4:
235 1.1 garbled subfic %r3, %r5, 4 /* If count <= 4, handle */
236 1.1 garbled rlwinm %r3, %r3, 3, 0, 31 /* count *= 8 */
237 1.1 garbled srw %r6, %r6, %r3 /* align 1st buffer 1 word */
238 1.1 garbled srw %r8, %r8, %r3 /* align 1st buffer 2 word */
239 1.1 garbled
240 1.1 garbled cmplw %r8, %r6 /* get result */
241 1.1 garbled bne all_done
242 1.1 garbled xor %r3,%r3,%r3
243 1.1 garbled blr
244 1.1 garbled
245 1.1 garbled last4:
246 1.1 garbled subfic %r10, %r5, 8 /*DWG*/
247 1.1 garbled rlwinm %r10, %r10, 3, 0, 31 /* count *= 8 */
248 1.1 garbled srw %r7, %r7, %r10 /* align 2nd buffer 1 word */
249 1.1 garbled srw %r9, %r9, %r10 /* align 2nd buffer 2 word */
250 1.1 garbled
251 1.1 garbled cmplw %r9, %r7 /* get result */
252 1.1 garbled bne all_done
253 1.1 garbled ret_0:
254 1.1 garbled xor %r3,%r3,%r3 /* Equal result */
255 1.1 garbled blr
256 1.1 garbled
257 1.1 garbled all_done:
258 1.1 garbled
259 1.1 garbled blt finish_lt
260 1.1 garbled
261 1.1 garbled addi %r3,0,-1 /* Less than result */
262 1.1 garbled
263 1.1 garbled blr
264 1.1 garbled
265 1.1 garbled finish_lt:
266 1.1 garbled
267 1.1 garbled addi %r3,0,1 /* Greater than result */
268 1.1 garbled
269 1.1 garbled blr
270 1.3 matt END(memcmp)
271