Home | History | Annotate | Line # | Download | only in string
memcmp.S revision 1.2.2.2
      1 /* $NetBSD: memcmp.S,v 1.2.2.2 2008/03/23 00:12:42 matt Exp $ */
      2 
      3 /* stropt/memcmp.S, pl_string_common, pl_linux 10/11/04 11:45:35
      4  * ==========================================================================
      5  * Optimized memcmp implementation for IBM PowerPC 405/440.
      6  *
      7  *	Copyright (c) 2003, IBM Corporation
      8  *	All rights reserved.
      9  *
     10  *	Redistribution and use in source and binary forms, with or
     11  *	without modification, are permitted provided that the following
     12  *	conditions are met:
     13  *
     14  *	* Redistributions of source code must retain the above
     15  *	copyright notice, this list of conditions and the following
     16  *	disclaimer.
     17  *	* Redistributions in binary form must reproduce the above
     18  *	copyright notice, this list of conditions and the following
     19  *	disclaimer in the documentation and/or other materials
     20  *	provided with the distribution.
     21  *	* Neither the name of IBM nor the names of its contributors
     22  *	may be used to endorse or promote products derived from this
     23  *	software without specific prior written permission.
     24  *
     25  *	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
     26  *	CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
     27  *	INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     28  *	MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     29  *	DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
     30  *	BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
     31  *	OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     32  *	PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     33  *	PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     34  *	OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     35  *	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
     36  *	USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     37  *
     38  * ==========================================================================
     39  *
     40  * Function: Compare two character strings (up to n characters)
     41  *
     42  *		int memcmp(const char *s1, const char *s2, int n)
     43  *
     44  * Input:	r3 - buffer 1 address
     45  *	 	r4 - buffer 2 address
     46  *	 	r5 - maximum characters to compare
     47  * Output: r3 <0 (less), 0 (equal), >0 (greater)
     48  *
     49  * ==========================================================================
     50  */
     51 
     52 #define _NOREGNAMES
     53 #include <machine/asm.h>
     54 
     55         .text
     56         .align 4
     57 /* LINTSTUB: Func: void *memcmp(const void *, const void *, size_t) */
     58 ENTRY(memcmp)
     59 
     60 	/*
     61 	 * Check count passed in R5. If zero, return 0; otherwise continue.
     62 	 */
     63 	cmpwi	%r5,0
     64 	beq-	ret_0;
     65 
     66 	/*
     67 	 * Most of the time the difference is found in the first
     68 	 * several bytes.  The following code minimizes the number
     69 	 * of load operations for short compares.
     70 	 */
     71 
     72 	mr	%r11, %r3		/* Save buffer 1		*/
     73 
     74 again:
     75 
     76 	not	%r10, %r4		/* buffer 2: bytes to page bdy	*/
     77 	rlwinm.	%r10, %r10,29,23,31	/* buffer 2: dwords to page bdy	*/
     78 	beq-	bytebybyte		/* If < 8 bytes to the page bdy */
     79 					/* do byte by byte		*/
     80 	lwz	%r8, 0(%r4)		/* load 1st buffer 2 word	*/
     81 
     82 	not	%r12, %r11		/* buffer 1: bytes to page bdy	*/
     83 	rlwinm.	%r12, %r12,29,23,31	/* buffer 1: dwords to page bdy	*/
     84 	beq-	bytebybyte		/* If < 8 bytes to the page bdy */
     85 					/* do byte by byte		*/
     86 	lwz	%r6, 0(%r11)		/* load 1st buffer 1 word	*/
     87 
     88 	cmpwi	%r5, 4			/* If remaining count <= 4	*/
     89 	ble+	first4			/* handle specially.	DWG	*/
     90 
     91 	cmplw	%r8, %r6		/* compare buffer 2 and buffer 1*/
     92 	bne+	all_done		/* different => we're done	*/
     93 
     94 	lwzu	%r9, 4(%r4)		/* load 2nd buffer 2 word	*/
     95 	lwzu	%r7, 4(%r11)		/* load 2nd buffer 1 word	*/
     96 
     97 	cmpwi	%r5, 8			/* If remaining count <= 8	*/
     98 	ble+	last4			/* handle specially.	DWG	*/
     99 
    100 	cmplw	%r9, %r7		/* compare buffer 2 and buffer 1*/
    101 	bne+	all_done		/* different => we're done	*/
    102 
    103 	addi	%r5, %r5, -8		/* Update character counter DWG */
    104 	addi	%r10, %r4, 0x0004	/* DWG*/
    105 	not	%r10, %r10		/* buffer 2: bytes to page bdy DWG */
    106 	rlwinm.	%r10, %r10,29,23,31	/* buffer 2: dwords to page bdy	DWG */
    107 	addi	%r12, %r11, 0x0004	/* DWG */
    108 	not	%r12, %r12		/* buffer 1: bytes to page bdy DWG */
    109 	rlwinm.	%r12, %r12,29,23,31	/* buffer 1: dwords to page bdy	DWG */
    110 
    111 	/* The following section prior to loop: figures out whether	*/
    112 	/* the buffer 1 or buffer 2 is closer to the page boundary.	*/
    113 	/* The main loop count is then set up to reflect the number of	*/
    114 	/* double words of the buffer that is closest			*/
    115 
    116 	cmpw	%r10, %r12		/* Find closest			*/
    117 	blt	lt
    118 
    119 	mr	%r10, %r12
    120 
    121 lt:
    122 
    123 	srwi	%r12, %r5, 3		/* Double check the total count */
    124 	cmpw	%r10, %r12		/* limitation			*/
    125 	blt	lt2
    126 
    127 	mr	%r10, %r12		/* DWG */
    128 lt2:					/* DWG */
    129 	cmpwi	%r10, 0			/* DWG */
    130 	bne	lt3			/* DWG */
    131 	addi	%r4, %r4, 0x0004	/* DWG */
    132 	addi	%r11,%r11,0x0004	/* DWG */
    133 	b	again			/* DWG */
    134 lt3:					/* DWG */
    135 	mtctr	%r10			/* dword count for loop		*/
    136 	lwzu	%r6, 4(%r11)		/* pre-load buffer 1 word	*/
    137 
    138 	b	in			/* To the loop			*/
    139 
    140 loop:					/* main loop			*/
    141 
    142 	cmplw	%r8, %r6		/* Compare first buffer 2 word	*/
    143 	bne-	all_done		/* with first buffer 1 word	*/
    144 					/* If different, we're done	*/
    145 	cmplw	%r9, %r7		/* Compare second buffer 2 word	*/
    146 					/* with second buffer 1 word	*/
    147 	lwzu	%r6, 4(%r11)		/* pre-load buffer 1 word	*/
    148 
    149 	bne-	all_done		/* If different, we're done	*/
    150 
    151 in:
    152 
    153 	lwzu	%r7, 4(%r11)		/* pre-load buffer 1 word	*/
    154 	lwzu	%r8, 4(%r4)		/* pre-load buffer 2 word	*/
    155 	lwzu	%r9, 4(%r4)		/* pre-load buffer 2 word	*/
    156 
    157 	bdnz+	loop			/* Do more DW's if cnt > 0	*/
    158 
    159 	/*mfctr	%r12*/ /*DWG*/		/* number of dwords left	*/
    160 	/*subf	%r10, %r12, %r10*/ /*DWG*//* number of dwords compared	*/
    161 	slwi	%r10, %r10, 3
    162 	subf	%r5, %r10, %r5		/* adjust byte counter		*/
    163 	/*bne+	partial*/ /*DWG*/	/* If less than 8 bytes, handle */
    164 					/* specially			*/
    165 	/*cmpwi	%r5, 8*/		/* Removed.		 DWG */
    166 	/*blt	partial*/		/* Removed.		 DWG */
    167 
    168 	/*addic	%r5, %r5, -8*/ /*DWG*/	/* Subtract two words from count*/
    169 
    170 	cmplw	%r8, %r6		/* compare last dword		*/
    171 	addi	%r4, %r4, 4
    172 	bne-	all_done
    173 
    174 	cmplw	%r9, %r7
    175 	addi	%r11, %r11, 4
    176 	bne-	all_done
    177 
    178 bytebybyte:
    179 
    180 	/* We've gotten close to a page boundary: do a byte-byte-byte
    181 	 * compare for the following 8 bytes, and then go back to
    182 	 * the full-word compare loop.
    183 	 */
    184 
    185 	li	%r3, 8			/* loop count			*/
    186 	cmpw	%r3, %r5		/* take min(8, counter)		*/
    187 	ble	f2
    188 
    189 	mr.	%r3, %r5
    190 
    191 	beqlr
    192 
    193 f2:
    194 
    195 	mtctr	%r3
    196 	subf	%r5, %r3, %r5		/* adjust counter		*/
    197 
    198 bbb:
    199 
    200 	lbz	%r6, 0(%r11)		/* byte copy loop		*/
    201 
    202 	addi	%r11, %r11, 1
    203 
    204 	lbz	%r8, 0(%r4)
    205 
    206 	addi	%r4, %r4, 1
    207 
    208 	cmplw	%r8, %r6
    209 
    210 	bdnzt+	eq, bbb
    211 
    212 	bne	all_done
    213 
    214 	cmpwi	%r5, 0
    215 	bgt	again			/* handle the rest		*/
    216 
    217 	xor	%r3,%r3,%r3
    218 
    219 	blr
    220 
    221 #if 0 /* Removed code section. DWG */
    222 partial:
    223 
    224 	mr.	%r3, %r5
    225 
    226 	beqlr				/* If count -> 0, we're done	*/
    227 
    228 f1:
    229 
    230 	subfic	%r3, %r3, 4		/* zero/end in first word?	*/
    231 	cmpwi	%r3, 0
    232 	blt	last4
    233 #endif /* DWG */
    234 
    235 first4:
    236 	subfic	%r3, %r5, 4		/* If count <= 4, handle 	*/
    237 	rlwinm	%r3, %r3, 3, 0, 31	/* count *= 8			*/
    238 	srw	%r6, %r6, %r3		/* align 1st buffer 1 word	*/
    239 	srw	%r8, %r8, %r3		/* align 1st buffer 2 word	*/
    240 
    241 	cmplw	%r8, %r6		/* get result			*/
    242 	bne	all_done
    243 	xor	%r3,%r3,%r3
    244 	blr
    245 
    246 last4:
    247 	subfic	%r10, %r5, 8		/*DWG*/
    248 	rlwinm	%r10, %r10, 3, 0, 31	/* count *= 8			*/
    249 	srw	%r7, %r7, %r10		/* align 2nd buffer 1 word	*/
    250 	srw	%r9, %r9, %r10		/* align 2nd buffer 2 word	*/
    251 
    252 	cmplw	%r9, %r7		/* get result			*/
    253 	bne	all_done
    254 ret_0:
    255 	xor	%r3,%r3,%r3		/* Equal result		 */
    256 	blr
    257 
    258 all_done:
    259 
    260 	blt	finish_lt
    261 
    262 	addi	%r3,0,-1		/* Less than result		*/
    263 
    264 	blr
    265 
    266 finish_lt:
    267 
    268 	addi	%r3,0,1			/* Greater than result		*/
    269 
    270 	blr
    271