1 1.2 apb /* $NetBSD: memset.S,v 1.2 2008/02/16 17:37:13 apb Exp $ */ 2 1.1 christos 3 1.1 christos /*- 4 1.1 christos * Copyright (c) 2002 SHIMIZU Ryo. All rights reserved. 5 1.1 christos * 6 1.1 christos * Redistribution and use in source and binary forms, with or without 7 1.1 christos * modification, are permitted provided that the following conditions 8 1.1 christos * are met: 9 1.1 christos * 1. Redistributions of source code must retain the above copyright 10 1.1 christos * notice, this list of conditions and the following disclaimer. 11 1.1 christos * 2. Redistributions in binary form must reproduce the above copyright 12 1.1 christos * notice, this list of conditions and the following disclaimer in the 13 1.1 christos * documentation and/or other materials provided with the distribution. 14 1.1 christos * 3. The name of the author may not be used to endorse or promote products 15 1.1 christos * derived from this software without specific prior written permission. 16 1.1 christos * 17 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 1.1 christos * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 1.1 christos * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 1.1 christos * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 1.1 christos * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 1.1 christos * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 1.1 christos * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 1.1 christos * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 1.1 christos * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 1.1 christos * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 1.1 christos */ 28 1.1 christos 29 1.1 christos #include <machine/asm.h> 30 1.1 christos 31 1.1 christos #if defined(LIBC_SCCS) && !defined(lint) 32 1.2 apb RCSID("$NetBSD: memset.S,v 1.2 2008/02/16 17:37:13 apb Exp $") 33 1.1 christos #endif 34 1.1 christos 35 1.1 christos #define REG_PTR r0 36 1.1 christos #define REG_TMP1 r1 37 1.1 christos 38 1.1 christos #ifdef BZERO 39 1.1 christos # define REG_C r2 40 1.1 christos # define REG_DST r4 41 1.1 christos # define REG_LEN r5 42 1.1 christos #else 43 1.1 christos # define REG_DST0 r3 44 1.1 christos # define REG_DST r4 45 1.1 christos # define REG_C r5 46 1.1 christos # define REG_LEN r6 47 1.1 christos #endif 48 1.1 christos 49 1.1 christos #ifdef BZERO 50 1.1 christos ENTRY(bzero) 51 1.1 christos #else 52 1.1 christos ENTRY(memset) 53 1.1 christos mov REG_DST,REG_DST0 /* for return value */ 54 1.1 christos #endif 55 1.1 christos /* small amount to fill ? */ 56 1.1 christos mov #28,REG_TMP1 57 1.1 christos cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */ 58 1.1 christos bt/s large 59 1.1 christos mov #12,REG_TMP1 /* if (len >= 12) goto small; */ 60 1.1 christos cmp/hs REG_TMP1,REG_LEN 61 1.1 christos bt/s small 62 1.1 christos #ifdef BZERO 63 1.1 christos mov #0,REG_C 64 1.1 christos #endif 65 1.1 christos /* very little fill (0 ~ 11 bytes) */ 66 1.1 christos tst REG_LEN,REG_LEN 67 1.1 christos add REG_DST,REG_LEN 68 1.1 christos bt/s done 69 1.1 christos add #1,REG_DST 70 1.1 christos 71 1.1 christos /* unroll 4 loops */ 72 1.1 christos cmp/eq REG_DST,REG_LEN 73 1.1 christos 1: mov.b REG_C,@-REG_LEN 74 1.1 christos bt/s done 75 1.1 christos cmp/eq REG_DST,REG_LEN 76 1.1 christos mov.b REG_C,@-REG_LEN 77 1.1 christos bt/s done 78 1.1 christos cmp/eq REG_DST,REG_LEN 79 1.1 christos mov.b REG_C,@-REG_LEN 80 1.1 christos bt/s done 81 1.1 christos cmp/eq REG_DST,REG_LEN 82 1.1 christos mov.b REG_C,@-REG_LEN 83 1.1 christos bf/s 1b 84 1.1 christos cmp/eq REG_DST,REG_LEN 85 1.1 christos done: 86 1.1 christos #ifdef BZERO 87 1.1 christos rts 88 1.1 christos nop 89 1.1 christos #else 90 1.1 christos rts 91 1.1 christos mov REG_DST0,r0 92 1.1 christos #endif 93 1.1 christos 94 1.1 christos 95 1.1 christos small: 96 1.1 christos mov REG_DST,r0 97 1.1 christos tst #1,r0 98 1.1 christos bt/s small_aligned 99 1.1 christos mov REG_DST,REG_TMP1 100 1.1 christos shll REG_LEN 101 1.1 christos mova 1f,r0 /* 1f must be 4bytes aligned! */ 102 1.1 christos add #16,REG_TMP1 /* REG_TMP1 = dst+16; */ 103 1.1 christos sub REG_LEN,r0 104 1.1 christos jmp @r0 105 1.1 christos mov REG_C,r0 106 1.1 christos 107 1.1 christos .align 2 108 1.1 christos mov.b r0,@(15,REG_TMP1) 109 1.1 christos mov.b r0,@(14,REG_TMP1) 110 1.1 christos mov.b r0,@(13,REG_TMP1) 111 1.1 christos mov.b r0,@(12,REG_TMP1) 112 1.1 christos mov.b r0,@(11,REG_TMP1) 113 1.1 christos mov.b r0,@(10,REG_TMP1) 114 1.1 christos mov.b r0,@(9,REG_TMP1) 115 1.1 christos mov.b r0,@(8,REG_TMP1) 116 1.1 christos mov.b r0,@(7,REG_TMP1) 117 1.1 christos mov.b r0,@(6,REG_TMP1) 118 1.1 christos mov.b r0,@(5,REG_TMP1) 119 1.1 christos mov.b r0,@(4,REG_TMP1) 120 1.1 christos mov.b r0,@(3,REG_TMP1) 121 1.1 christos mov.b r0,@(2,REG_TMP1) 122 1.1 christos mov.b r0,@(1,REG_TMP1) 123 1.1 christos mov.b r0,@REG_TMP1 124 1.1 christos mov.b r0,@(15,REG_DST) 125 1.1 christos mov.b r0,@(14,REG_DST) 126 1.1 christos mov.b r0,@(13,REG_DST) 127 1.1 christos mov.b r0,@(12,REG_DST) 128 1.1 christos mov.b r0,@(11,REG_DST) 129 1.1 christos mov.b r0,@(10,REG_DST) 130 1.1 christos mov.b r0,@(9,REG_DST) 131 1.1 christos mov.b r0,@(8,REG_DST) 132 1.1 christos mov.b r0,@(7,REG_DST) 133 1.1 christos mov.b r0,@(6,REG_DST) 134 1.1 christos mov.b r0,@(5,REG_DST) 135 1.1 christos mov.b r0,@(4,REG_DST) 136 1.1 christos mov.b r0,@(3,REG_DST) 137 1.1 christos mov.b r0,@(2,REG_DST) 138 1.1 christos mov.b r0,@(1,REG_DST) 139 1.1 christos #ifdef BZERO 140 1.1 christos rts 141 1.1 christos 1: mov.b r0,@REG_DST 142 1.1 christos #else 143 1.1 christos mov.b r0,@REG_DST 144 1.1 christos 1: rts 145 1.1 christos mov REG_DST0,r0 146 1.1 christos #endif 147 1.1 christos 148 1.1 christos 149 1.1 christos /* 2 bytes aligned small fill */ 150 1.1 christos small_aligned: 151 1.1 christos #ifndef BZERO 152 1.1 christos extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ 153 1.1 christos shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ 154 1.1 christos or REG_TMP1,REG_C /* REG_C = ????xxxx */ 155 1.1 christos #endif 156 1.1 christos 157 1.1 christos mov REG_LEN,r0 158 1.1 christos tst #1,r0 /* len is aligned? */ 159 1.1 christos bt/s 1f 160 1.1 christos add #-1,r0 161 1.1 christos mov.b REG_C,@(r0,REG_DST) /* fill last a byte */ 162 1.1 christos mov r0,REG_LEN 163 1.1 christos 1: 164 1.1 christos 165 1.1 christos mova 1f,r0 /* 1f must be 4bytes aligned! */ 166 1.1 christos sub REG_LEN,r0 167 1.1 christos jmp @r0 168 1.1 christos mov REG_C,r0 169 1.1 christos 170 1.1 christos .align 2 171 1.1 christos mov.w r0,@(30,REG_DST) 172 1.1 christos mov.w r0,@(28,REG_DST) 173 1.1 christos mov.w r0,@(26,REG_DST) 174 1.1 christos mov.w r0,@(24,REG_DST) 175 1.1 christos mov.w r0,@(22,REG_DST) 176 1.1 christos mov.w r0,@(20,REG_DST) 177 1.1 christos mov.w r0,@(18,REG_DST) 178 1.1 christos mov.w r0,@(16,REG_DST) 179 1.1 christos mov.w r0,@(14,REG_DST) 180 1.1 christos mov.w r0,@(12,REG_DST) 181 1.1 christos mov.w r0,@(10,REG_DST) 182 1.1 christos mov.w r0,@(8,REG_DST) 183 1.1 christos mov.w r0,@(6,REG_DST) 184 1.1 christos mov.w r0,@(4,REG_DST) 185 1.1 christos mov.w r0,@(2,REG_DST) 186 1.1 christos #ifdef BZERO 187 1.1 christos rts 188 1.1 christos 1: mov.w r0,@REG_DST 189 1.1 christos #else 190 1.1 christos mov.w r0,@REG_DST 191 1.1 christos 1: rts 192 1.1 christos mov REG_DST0,r0 193 1.1 christos #endif 194 1.1 christos 195 1.1 christos 196 1.1 christos 197 1.1 christos .align 2 198 1.1 christos large: 199 1.1 christos #ifdef BZERO 200 1.1 christos mov #0,REG_C 201 1.1 christos #else 202 1.1 christos extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ 203 1.1 christos shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ 204 1.1 christos or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */ 205 1.1 christos swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */ 206 1.1 christos xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */ 207 1.1 christos #endif 208 1.1 christos 209 1.1 christos mov #3,REG_TMP1 210 1.1 christos tst REG_TMP1,REG_DST 211 1.1 christos mov REG_DST,REG_PTR 212 1.1 christos bf/s unaligned_dst 213 1.1 christos add REG_LEN,REG_PTR /* REG_PTR = dst + len; */ 214 1.1 christos tst REG_TMP1,REG_LEN 215 1.1 christos bf/s unaligned_len 216 1.1 christos 217 1.1 christos aligned: 218 1.1 christos /* fill 32*n bytes */ 219 1.1 christos mov #32,REG_TMP1 220 1.1 christos cmp/hi REG_LEN,REG_TMP1 221 1.1 christos bt 9f 222 1.1 christos .align 2 223 1.1 christos 1: sub REG_TMP1,REG_PTR 224 1.1 christos mov.l REG_C,@REG_PTR 225 1.1 christos sub REG_TMP1,REG_LEN 226 1.1 christos mov.l REG_C,@(4,REG_PTR) 227 1.1 christos cmp/hi REG_LEN,REG_TMP1 228 1.1 christos mov.l REG_C,@(8,REG_PTR) 229 1.1 christos mov.l REG_C,@(12,REG_PTR) 230 1.1 christos mov.l REG_C,@(16,REG_PTR) 231 1.1 christos mov.l REG_C,@(20,REG_PTR) 232 1.1 christos mov.l REG_C,@(24,REG_PTR) 233 1.1 christos bf/s 1b 234 1.1 christos mov.l REG_C,@(28,REG_PTR) 235 1.1 christos 9: 236 1.1 christos 237 1.1 christos /* fill left 4*n bytes */ 238 1.1 christos cmp/eq REG_DST,REG_PTR 239 1.1 christos bt 9f 240 1.1 christos add #4,REG_DST 241 1.1 christos cmp/eq REG_DST,REG_PTR 242 1.1 christos 1: mov.l REG_C,@-REG_PTR 243 1.1 christos bt/s 9f 244 1.1 christos cmp/eq REG_DST,REG_PTR 245 1.1 christos mov.l REG_C,@-REG_PTR 246 1.1 christos bt/s 9f 247 1.1 christos cmp/eq REG_DST,REG_PTR 248 1.1 christos mov.l REG_C,@-REG_PTR 249 1.1 christos bt/s 9f 250 1.1 christos cmp/eq REG_DST,REG_PTR 251 1.1 christos mov.l REG_C,@-REG_PTR 252 1.1 christos bf/s 1b 253 1.1 christos cmp/eq REG_DST,REG_PTR 254 1.1 christos 9: 255 1.1 christos #ifdef BZERO 256 1.1 christos rts 257 1.1 christos nop 258 1.1 christos #else 259 1.1 christos rts 260 1.1 christos mov REG_DST0,r0 261 1.1 christos #endif 262 1.1 christos 263 1.1 christos 264 1.1 christos unaligned_dst: 265 1.1 christos mov #1,REG_TMP1 266 1.1 christos tst REG_TMP1,REG_DST /* if (dst & 1) { */ 267 1.1 christos add #1,REG_TMP1 268 1.1 christos bt/s 2f 269 1.1 christos tst REG_TMP1,REG_DST 270 1.1 christos mov.b REG_C,@REG_DST /* *dst++ = c; */ 271 1.1 christos add #1,REG_DST 272 1.1 christos tst REG_TMP1,REG_DST 273 1.1 christos 2: /* } */ 274 1.1 christos /* if (dst & 2) { */ 275 1.1 christos bt 4f 276 1.2 apb mov.w REG_C,@REG_DST /* *(uint16_t*)dst++ = c; */ 277 1.1 christos add #2,REG_DST 278 1.1 christos 4: /* } */ 279 1.1 christos 280 1.1 christos 281 1.1 christos tst #3,REG_PTR /* if (ptr & 3) { */ 282 1.1 christos bt/s 4f /* */ 283 1.1 christos unaligned_len: 284 1.1 christos tst #1,REG_PTR /* if (ptr & 1) { */ 285 1.1 christos bt/s 2f 286 1.1 christos tst #2,REG_PTR 287 1.1 christos mov.b REG_C,@-REG_PTR /* --ptr = c; */ 288 1.1 christos 2: /* } */ 289 1.1 christos /* if (ptr & 2) { */ 290 1.1 christos bt 4f 291 1.2 apb mov.w REG_C,@-REG_PTR /* *--(uint16_t*)ptr = c; */ 292 1.1 christos 4: /* } */ 293 1.1 christos /* } */ 294 1.1 christos 295 1.1 christos mov REG_PTR,REG_LEN 296 1.1 christos bra aligned 297 1.1 christos sub REG_DST,REG_LEN 298 1.1 christos 299