Home | History | Annotate | Line # | Download | only in string
strcat.S revision 1.1
      1  1.1  christos /*
      2  1.1  christos  * Written by J.T. Conklin <jtc (at) acorntoolworks.com>
      3  1.1  christos  * Public domain.
      4  1.1  christos  */
      5  1.1  christos 
      6  1.1  christos #include <machine/asm.h>
      7  1.1  christos 
      8  1.1  christos #if defined(LIBC_SCCS)
      9  1.1  christos 	RCSID("$NetBSD: strcat.S,v 1.1 2005/12/20 19:28:51 christos Exp $")
     10  1.1  christos #endif
     11  1.1  christos 
     12  1.1  christos ENTRY(strcat)
     13  1.1  christos 	movq	%rdi,%rax
     14  1.1  christos 	movabsq	$0x0101010101010101,%r8
     15  1.1  christos 	movabsq	$0x8080808080808080,%r9
     16  1.1  christos 
     17  1.1  christos 	/*
     18  1.1  christos 	 * Align destination to word boundary.
     19  1.1  christos 	 * Consider unrolling loop?
     20  1.1  christos 	 */
     21  1.1  christos .Lscan:
     22  1.1  christos .Lscan_align:
     23  1.1  christos 	testb	$7,%dil
     24  1.1  christos 	je	.Lscan_aligned
     25  1.1  christos 	cmpb	$0,(%rdi)
     26  1.1  christos 	je	.Lcopy
     27  1.1  christos 	incq	%rdi
     28  1.1  christos 	jmp	.Lscan_align
     29  1.1  christos 
     30  1.1  christos 	_ALIGN_TEXT
     31  1.1  christos .Lscan_aligned:
     32  1.1  christos .Lscan_loop:
     33  1.1  christos 	movq	(%rdi),%rdx
     34  1.1  christos 	addq	$8,%rdi
     35  1.1  christos 	subq	%r8,%rdx
     36  1.1  christos 	testq	%r9,%rdx
     37  1.1  christos 	je	.Lscan_loop
     38  1.1  christos 
     39  1.1  christos 	/*
     40  1.1  christos 	 * In rare cases, the above loop may exit prematurely. We must
     41  1.1  christos 	 * return to the loop if none of the bytes in the word equal 0.
     42  1.1  christos 	 */
     43  1.1  christos 
     44  1.1  christos 	cmpb	$0,-8(%rdi)	/* 1st byte == 0? */
     45  1.1  christos 	jne	1f
     46  1.1  christos 	subq	$8,%rdi
     47  1.1  christos 	jmp	.Lcopy
     48  1.1  christos 
     49  1.1  christos 1:	cmpb	$0,-7(%rdi)	/* 2nd byte == 0? */
     50  1.1  christos 	jne	1f
     51  1.1  christos 	subq	$7,%rdi
     52  1.1  christos 	jmp	.Lcopy
     53  1.1  christos 
     54  1.1  christos 1:	cmpb	$0,-6(%rdi)	/* 3rd byte == 0? */
     55  1.1  christos 	jne	1f
     56  1.1  christos 	subq	$6,%rdi
     57  1.1  christos 	jmp	.Lcopy
     58  1.1  christos 
     59  1.1  christos 1:	cmpb	$0,-5(%rdi)	/* 4th byte == 0? */
     60  1.1  christos 	jne	1f
     61  1.1  christos 	subq	$5,%rdi
     62  1.1  christos 	jmp	.Lcopy
     63  1.1  christos 
     64  1.1  christos 1:	cmpb	$0,-4(%rdi)	/* 5th byte == 0? */
     65  1.1  christos 	jne	1f
     66  1.1  christos 	subq	$4,%rdi
     67  1.1  christos 	jmp	.Lcopy
     68  1.1  christos 
     69  1.1  christos 1:	cmpb	$0,-3(%rdi)	/* 6th byte == 0? */
     70  1.1  christos 	jne	1f
     71  1.1  christos 	subq	$3,%rdi
     72  1.1  christos 	jmp	.Lcopy
     73  1.1  christos 
     74  1.1  christos 1:	cmpb	$0,-2(%rdi)	/* 7th byte == 0? */
     75  1.1  christos 	jne	1f
     76  1.1  christos 	subq	$2,%rdi
     77  1.1  christos 	jmp	.Lcopy
     78  1.1  christos 
     79  1.1  christos 1:	cmpb	$0,-1(%rdi)	/* 8th byte == 0? */
     80  1.1  christos 	jne	.Lscan_loop
     81  1.1  christos 	subq	$1,%rdi
     82  1.1  christos 
     83  1.1  christos 	/*
     84  1.1  christos 	 * Align source to a word boundary.
     85  1.1  christos 	 * Consider unrolling loop?
     86  1.1  christos 	 */
     87  1.1  christos .Lcopy:
     88  1.1  christos .Lcopy_align:
     89  1.1  christos 	testb	$7,%sil
     90  1.1  christos 	je	.Lcopy_aligned
     91  1.1  christos 	movb	(%rsi),%dl
     92  1.1  christos 	incq	%rsi
     93  1.1  christos 	movb	%dl,(%rdi)
     94  1.1  christos 	incq	%rdi
     95  1.1  christos 	testb	%dl,%dl
     96  1.1  christos 	jne	.Lcopy_align
     97  1.1  christos 	ret
     98  1.1  christos 
     99  1.1  christos 	_ALIGN_TEXT
    100  1.1  christos .Lcopy_loop:
    101  1.1  christos 	movq	%rdx,(%rdi)
    102  1.1  christos 	addq	$8,%rdi
    103  1.1  christos .Lcopy_aligned:
    104  1.1  christos 	movq	(%rsi),%rdx
    105  1.1  christos 	movq	%rdx,%rcx
    106  1.1  christos 	addq	$8,%rsi
    107  1.1  christos 	subq	%r8,%rcx
    108  1.1  christos 	testq	%r9,%rcx
    109  1.1  christos 	je	.Lcopy_loop
    110  1.1  christos 
    111  1.1  christos 	/*
    112  1.1  christos 	 * In rare cases, the above loop may exit prematurely. We must
    113  1.1  christos 	 * return to the loop if none of the bytes in the word equal 0.
    114  1.1  christos 	 */
    115  1.1  christos 
    116  1.1  christos 	movb	%dl,(%rdi)
    117  1.1  christos 	incq	%rdi
    118  1.1  christos 	testb	%dl,%dl		/* 1st byte == 0? */
    119  1.1  christos 	je	.Ldone
    120  1.1  christos 
    121  1.1  christos 	shrq	$8,%rdx
    122  1.1  christos 	movb	%dl,(%rdi)
    123  1.1  christos 	incq	%rdi
    124  1.1  christos 	testb	%dl,%dl		/* 2nd byte == 0? */
    125  1.1  christos 	je	.Ldone
    126  1.1  christos 
    127  1.1  christos 	shrq	$8,%rdx
    128  1.1  christos 	movb	%dl,(%rdi)
    129  1.1  christos 	incq	%rdi
    130  1.1  christos 	testb	%dl,%dl		/* 3rd byte == 0? */
    131  1.1  christos 	je	.Ldone
    132  1.1  christos 
    133  1.1  christos 	shrq	$8,%rdx
    134  1.1  christos 	movb	%dl,(%rdi)
    135  1.1  christos 	incq	%rdi
    136  1.1  christos 	testb	%dl,%dl		/* 4th byte == 0? */
    137  1.1  christos 	je	.Ldone
    138  1.1  christos 
    139  1.1  christos 	shrq	$8,%rdx
    140  1.1  christos 	movb	%dl,(%rdi)
    141  1.1  christos 	incq	%rdi
    142  1.1  christos 	testb	%dl,%dl		/* 5th byte == 0? */
    143  1.1  christos 	je	.Ldone
    144  1.1  christos 
    145  1.1  christos 	shrq	$8,%rdx
    146  1.1  christos 	movb	%dl,(%rdi)
    147  1.1  christos 	incq	%rdi
    148  1.1  christos 	testb	%dl,%dl		/* 6th byte == 0? */
    149  1.1  christos 	je	.Ldone
    150  1.1  christos 
    151  1.1  christos 	shrq	$8,%rdx
    152  1.1  christos 	movb	%dl,(%rdi)
    153  1.1  christos 	incq	%rdi
    154  1.1  christos 	testb	%dl,%dl		/* 7th byte == 0? */
    155  1.1  christos 	je	.Ldone
    156  1.1  christos 
    157  1.1  christos 	shrq	$8,%rdx
    158  1.1  christos 	movb	%dl,(%rdi)
    159  1.1  christos 	incq	%rdi
    160  1.1  christos 	testb	%dl,%dl		/* 8th byte == 0? */
    161  1.1  christos 	jne	.Lcopy_aligned
    162  1.1  christos 
    163  1.1  christos .Ldone:
    164  1.1  christos 	ret
    165