Home | History | Annotate | Line # | Download | only in string
strcat.S revision 1.2
      1 /*
      2  * Written by J.T. Conklin <jtc (at) acorntoolworks.com>
      3  * Public domain.
      4  */
      5 
      6 #include <machine/asm.h>
      7 
      8 #if defined(LIBC_SCCS)
      9 	RCSID("$NetBSD: strcat.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $")
     10 #endif
     11 
     12 ENTRY(strcat)
     13 	pushl	%ebx
     14 	movl	8(%esp),%ecx
     15 	movl	12(%esp),%eax
     16 
     17 	/*
     18 	 * Align destination to word boundary.
     19 	 * Consider unrolling loop?
     20 	 */
     21 .Lscan:
     22 .Lscan_align:
     23 	testb	$3,%cl
     24 	je	.Lscan_aligned
     25 	cmpb	$0,(%ecx)
     26 	je	.Lcopy
     27 	incl	%ecx
     28 	jmp	.Lscan_align
     29 
     30 	_ALIGN_TEXT
     31 .Lscan_aligned:
     32 .Lscan_loop:
     33 	movl	(%ecx),%ebx
     34 	addl	$4,%ecx
     35 	leal	-0x01010101(%ebx),%edx
     36 	testl	$0x80808080,%edx
     37 	je	.Lscan_loop
     38 
     39 	/*
     40 	 * In rare cases, the above loop may exit prematurely. We must
     41 	 * return to the loop if none of the bytes in the word equal 0.
     42 	 */
     43 
     44 	/*
     45 	 * The optimal code for determining whether each byte is zero
     46 	 * differs by processor.  This space-optimized code should be
     47 	 * acceptable on all, especially since we don't expect it to
     48 	 * be run frequently,
     49 	 */
     50 
     51 	testb	%bl,%bl		/* 1st byte == 0? */
     52 	jne	1f
     53 	subl	$4,%ecx
     54 	jmp	.Lcopy
     55 
     56 1:	testb	%bh,%bh		/* 2nd byte == 0? */
     57 	jne	1f
     58 	subl	$3,%ecx
     59 	jmp	.Lcopy
     60 
     61 1:	shrl	$16,%ebx
     62 	testb	%bl,%bl		/* 3rd byte == 0? */
     63 	jne	1f
     64 	subl	$2,%ecx
     65 	jmp	.Lcopy
     66 
     67 1:	testb	%bh,%bh		/* 4th byte == 0? */
     68 	jne	.Lscan_loop
     69 	subl	$1,%ecx
     70 
     71 	/*
     72 	 * Align source to a word boundary.
     73 	 * Consider unrolling loop?
     74 	 */
     75 .Lcopy:
     76 .Lcopy_align:
     77 	testl	$3,%eax
     78 	je	.Lcopy_aligned
     79 	movb	(%eax),%bl
     80 	incl	%eax
     81 	movb	%bl,(%ecx)
     82 	incl	%ecx
     83 	testb	%bl,%bl
     84 	jne	.Lcopy_align
     85 	jmp	.Ldone
     86 
     87 	_ALIGN_TEXT
     88 .Lcopy_loop:
     89 	movl	%ebx,(%ecx)
     90 	addl	$4,%ecx
     91 .Lcopy_aligned:
     92 	movl	(%eax),%ebx
     93 	addl	$4,%eax
     94 	leal	-0x01010101(%ebx),%edx
     95 	testl	$0x80808080,%edx
     96 	je	.Lcopy_loop
     97 
     98 	/*
     99 	 * In rare cases, the above loop may exit prematurely. We must
    100 	 * return to the loop if none of the bytes in the word equal 0.
    101 	 */
    102 
    103 	movb	%bl,(%ecx)
    104 	incl	%ecx
    105 	testb	%bl,%bl
    106 	je	.Ldone
    107 
    108 	movb	%bh,(%ecx)
    109 	incl	%ecx
    110 	testb	%bh,%bh
    111 	je	.Ldone
    112 
    113 	shrl	$16,%ebx
    114 	movb	%bl,(%ecx)
    115 	incl	%ecx
    116 	testb	%bl,%bl
    117 	je	.Ldone
    118 
    119 	movb	%bh,(%ecx)
    120 	incl	%ecx
    121 	testb	%bh,%bh
    122 	jne	.Lcopy_aligned
    123 
    124 .Ldone:
    125 	movl	8(%esp),%eax
    126 	popl	%ebx
    127 	ret
    128 END(strcat)
    129