1 1.3 ryo /* $NetBSD: memcmp.S,v 1.3 2018/07/09 06:07:06 ryo Exp $ */ 2 1.1 matt 3 1.1 matt /*- 4 1.1 matt * Copyright (c) 2014 The NetBSD Foundation, Inc. 5 1.1 matt * All rights reserved. 6 1.1 matt * 7 1.1 matt * This code is derived from software contributed to The NetBSD Foundation 8 1.1 matt * by Matt Thomas of 3am Software Foundry. 9 1.1 matt * 10 1.1 matt * Redistribution and use in source and binary forms, with or without 11 1.1 matt * modification, are permitted provided that the following conditions 12 1.1 matt * are met: 13 1.1 matt * 1. Redistributions of source code must retain the above copyright 14 1.1 matt * notice, this list of conditions and the following disclaimer. 15 1.1 matt * 2. Redistributions in binary form must reproduce the above copyright 16 1.1 matt * notice, this list of conditions and the following disclaimer in the 17 1.1 matt * documentation and/or other materials provided with the distribution. 18 1.1 matt * 19 1.1 matt * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.1 matt * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.1 matt * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.1 matt * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.1 matt * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.1 matt * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.1 matt * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.1 matt * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.1 matt * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.1 matt * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.1 matt * POSSIBILITY OF SUCH DAMAGE. 30 1.1 matt */ 31 1.1 matt 32 1.1 matt #include <machine/asm.h> 33 1.1 matt 34 1.3 ryo RCSID("$NetBSD: memcmp.S,v 1.3 2018/07/09 06:07:06 ryo Exp $") 35 1.1 matt 36 1.1 matt ENTRY(memcmp) 37 1.1 matt mov x9, x0 38 1.1 matt mov x10, x1 39 1.1 matt mov x0, xzr 40 1.1 matt cbz x2, .Lmemcmp_ret 41 1.1 matt #ifdef _KERNEL 42 1.1 matt cmp x2, #6 43 1.1 matt b.eq .Lmemcmp_6bytes 44 1.1 matt #endif 45 1.2 skrll cmp x2, #8 46 1.1 matt b.ls .Lmemcmp_lessthan8 47 1.1 matt 48 1.1 matt ands x3, x9, #7 49 1.1 matt b.eq .Lmemcmp_dword_loop 50 1.1 matt 51 1.1 matt /* 52 1.2 skrll * The src1 address is not dword aligned. 53 1.1 matt */ 54 1.1 matt add x2, x2, x3 /* add unalignment to length */ 55 1.1 matt sub x2, x2, #8 /* now subtract a dword */ 56 1.1 matt 57 1.1 matt sub x9, x9, x3 /* dword align src1 */ 58 1.1 matt 59 1.3 ryo ldr x6, [x10], #8 /* load dword from src2 */ 60 1.3 ryo sub x10, x10, x3 /* src2 -= x3 */ 61 1.1 matt lsl x3, x3, #3 /* convert bytes to bits */ 62 1.1 matt ldr x4, [x9], #8 /* load dword from src1 */ 63 1.1 matt #ifdef __AARCH64EB__ 64 1.1 matt lsl x4, x4, x3 /* discard leading bytes from data1 */ 65 1.3 ryo lsr x6, x6, x3 /* discard leading bytes from data2 */ 66 1.3 ryo lsl x6, x6, x3 /* get back bit position */ 67 1.1 matt #else 68 1.1 matt lsr x4, x4, x3 /* discard leading bytes from data1 */ 69 1.3 ryo lsl x6, x6, x3 /* discard leading bytes from data2 */ 70 1.3 ryo lsr x6, x6, x3 /* get back bit position */ 71 1.1 matt #endif 72 1.1 matt subs x0, x4, x6 /* compare data */ 73 1.1 matt b.ne .Lmemcmp_last_compare /* difference. find it */ 74 1.1 matt 75 1.1 matt .Lmemcmp_dword_loop: 76 1.1 matt subs x2, x2, #8 77 1.1 matt b.mi .Lmemcmp_finish_dword 78 1.1 matt ldr x4, [x9], #8 79 1.1 matt ldr x6, [x10], #8 80 1.1 matt subs x0, x4, x6 81 1.1 matt b.eq .Lmemcmp_dword_loop /* no difference. go to loop */ 82 1.1 matt b .Lmemcmp_last_compare /* go find the difference. */ 83 1.1 matt 84 1.1 matt .Lmemcmp_finish_dword: 85 1.1 matt /* 86 1.1 matt * we might have gotten here with nothing left. If so, just bail. 87 1.1 matt */ 88 1.1 matt tst x2, #7 89 1.1 matt b.eq .Lmemcmp_ret 90 1.2 skrll mov x4, xzr 91 1.2 skrll mov x6, xzr 92 1.1 matt /* 93 1.1 matt * 94 1.1 matt */ 95 1.1 matt tbz x2, #2, .Lmemcmp_finish_word 96 1.1 matt ldr w4, [x9], #4 97 1.1 matt ldr w6, [x10], #4 98 1.1 matt #ifdef __AARCH64EB__ 99 1.1 matt lsl x4, x4, #32 /* move to MSW */ 100 1.1 matt lsl x6, x6, #32 /* move to MSW */ 101 1.1 matt #endif 102 1.1 matt 103 1.1 matt .Lmemcmp_finish_word: 104 1.1 matt tbz x2, #1, .Lmemcmp_finish_hword 105 1.1 matt ldrh w5, [x9], #2 106 1.1 matt ldrh w7, [x10], #2 107 1.1 matt #ifdef __AARCH64EB__ 108 1.1 matt orr x4, x4, x5, lsl #16 109 1.1 matt orr x6, x6, x7, lsl #16 110 1.1 matt #else 111 1.1 matt orr x4, x4, x5, lsl #32 112 1.1 matt orr x6, x6, x7, lsl #32 113 1.1 matt #endif 114 1.1 matt 115 1.1 matt .Lmemcmp_finish_hword: 116 1.2 skrll tbz x2, #0, .Lmemcmp_last_compare0 117 1.2 skrll 118 1.1 matt ldrb w5, [x9] 119 1.1 matt ldrb w7, [x10] 120 1.2 skrll #ifdef __AARCH64EB__ 121 1.2 skrll orr x4, x4, x5, lsl #8 122 1.2 skrll orr x6, x6, x7, lsl #8 123 1.2 skrll #else 124 1.1 matt orr x4, x4, x5, lsl #48 125 1.1 matt orr x6, x6, x7, lsl #48 126 1.2 skrll #endif 127 1.2 skrll b .Lmemcmp_last_compare0 /* go find the difference. */ 128 1.1 matt 129 1.1 matt /* 130 1.1 matt * D 131 1.1 matt */ 132 1.1 matt .Lmemcmp_lessthan8: 133 1.1 matt sub x2, x2, #1 134 1.1 matt 1: ldrb w4, [x9], #1 135 1.1 matt ldrb w5, [x10], #1 136 1.1 matt subs x2, x2, #1 137 1.1 matt ccmp x4, x5, #0, cs 138 1.1 matt b.eq 1b 139 1.1 matt sub x0, x4, x5 140 1.1 matt 141 1.1 matt .Lmemcmp_ret: 142 1.1 matt ret 143 1.1 matt 144 1.1 matt #ifdef _KERNEL 145 1.1 matt .Lmemcmp_6bytes: 146 1.1 matt ldr w4, [x9], #4 147 1.1 matt ldrh w5, [x9] 148 1.1 matt #if __AARCH64EB__ 149 1.1 matt orr x4, x4, x5, lsl #48 150 1.1 matt rev x4, x4 151 1.1 matt #else 152 1.1 matt orr x4, x4, x5, lsl #32 153 1.1 matt #endif 154 1.1 matt ldr w6, [x10], #4 155 1.1 matt ldrh w7, [x10] 156 1.1 matt #if __AARCH64EB__ 157 1.1 matt orr x6, x6, x7, lsl #48 158 1.1 matt rev x6, x6 159 1.1 matt #else 160 1.1 matt orr x6, x6, x7, lsl #32 161 1.1 matt #endif 162 1.1 matt #endif /* _KERNEL */ 163 1.1 matt 164 1.1 matt /* 165 1.2 skrll * We have loaded the final bytes in x4 and x6 in host-endian. Now we have 166 1.1 matt * to figure what the difference is (if any). First we subtract. Any bytes 167 1.1 matt * that are the same will be 0. So to find the first non-zero byte we byterev 168 1.1 matt * and then use clz to find that byte. 169 1.1 matt * We mask the location to get the start of the byte. We shift both 170 1.1 matt * data dwords left to remove the equal part. Then we shift right to discard 171 1.1 matt * the trailing bytes. Then we subtract and return. 172 1.1 matt */ 173 1.2 skrll .Lmemcmp_last_compare0: 174 1.1 matt subs x0, x4, x6 175 1.1 matt b.eq .Lmemcmp_ret 176 1.1 matt .Lmemcmp_last_compare: 177 1.2 skrll #if __AARCH64EB__ 178 1.2 skrll clz x1, x0 /* find first non-zero byte */ 179 1.2 skrll rev x0, x0 180 1.2 skrll #else 181 1.2 skrll rev x1, x0 182 1.1 matt clz x1, x1 /* find first non-zero byte */ 183 1.2 skrll #endif 184 1.2 skrll bfi x1, xzr, #0, #3 /* make it byte aligned */ 185 1.2 skrll lsr x1, x0, x1 /* shift to LSB */ 186 1.2 skrll #if __AARCH64EL__ 187 1.2 skrll rev x4, x4 /* byte reverse */ 188 1.2 skrll rev x6, x6 /* byte reverse */ 189 1.2 skrll #endif 190 1.2 skrll subs x0, x4, x6 191 1.2 skrll csetm x0, cc /* set mask bits as sign */ 192 1.2 skrll bfm x0, x1, #0, #7 /* extend with sign bit */ 193 1.1 matt ret 194 1.1 matt END(memcmp) 195