1 1.3 msaitoh /* $NetBSD: memcpy.S,v 1.3 2024/02/07 04:20:25 msaitoh Exp $ */ 2 1.1 christos 3 1.2 uwe /* 4 1.3 msaitoh * Copyright (c) 2000 SHIMIZU Ryo 5 1.2 uwe * All rights reserved. 6 1.2 uwe * 7 1.2 uwe * Redistribution and use in source and binary forms, with or without 8 1.2 uwe * modification, are permitted provided that the following conditions 9 1.2 uwe * are met: 10 1.2 uwe * 1. Redistributions of source code must retain the above copyright 11 1.2 uwe * notice, this list of conditions and the following disclaimer. 12 1.2 uwe * 2. Redistributions in binary form must reproduce the above copyright 13 1.2 uwe * notice, this list of conditions and the following disclaimer in the 14 1.2 uwe * documentation and/or other materials provided with the distribution. 15 1.2 uwe * 3. The name of the author may not be used to endorse or promote products 16 1.2 uwe * derived from this software without specific prior written permission. 17 1.2 uwe * 18 1.2 uwe * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 1.2 uwe * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 1.2 uwe * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 1.2 uwe * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 1.2 uwe * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 1.2 uwe * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 1.2 uwe * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 1.2 uwe * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 1.2 uwe * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 1.2 uwe * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 1.2 uwe */ 29 1.2 uwe 30 1.2 uwe #include <machine/asm.h> 31 1.2 uwe 32 1.2 uwe #if defined(LIBC_SCCS) && !defined(lint) 33 1.3 msaitoh RCSID("$NetBSD: memcpy.S,v 1.3 2024/02/07 04:20:25 msaitoh Exp $") 34 1.2 uwe #endif 35 1.2 uwe 36 1.2 uwe #if !defined(MEMCOPY) && !defined(MEMMOVE) && !defined(BCOPY) 37 1.1 christos #define MEMCOPY 38 1.2 uwe #endif 39 1.2 uwe 40 1.2 uwe #if defined(MEMCOPY) || defined(MEMMOVE) 41 1.2 uwe #define REG_DST0 r3 42 1.2 uwe #define REG_SRC r5 43 1.2 uwe #define REG_DST r4 44 1.2 uwe #else 45 1.2 uwe #define REG_SRC r4 46 1.2 uwe #define REG_DST r5 47 1.2 uwe #endif 48 1.2 uwe 49 1.2 uwe #define REG_LEN r6 50 1.2 uwe 51 1.2 uwe #if defined(MEMCOPY) 52 1.2 uwe ENTRY(memcpy) 53 1.2 uwe #elif defined(MEMMOVE) 54 1.2 uwe ENTRY(memmove) 55 1.2 uwe #elif defined(BCOPY) 56 1.2 uwe ENTRY(bcopy) 57 1.2 uwe #endif 58 1.2 uwe #ifdef REG_DST0 59 1.2 uwe mov REG_DST,REG_DST0 60 1.2 uwe #endif 61 1.2 uwe cmp/eq REG_DST,REG_SRC /* if ( src == dst ) return; */ 62 1.2 uwe bt/s bcopy_return 63 1.2 uwe cmp/hi REG_DST,REG_SRC 64 1.2 uwe bf/s bcopy_overlap 65 1.2 uwe 66 1.2 uwe mov REG_SRC,r0 67 1.2 uwe xor REG_DST,r0 68 1.2 uwe and #3,r0 69 1.2 uwe mov r0,r1 70 1.2 uwe tst r0,r0 /* (src ^ dst) & 3 */ 71 1.2 uwe bf/s word_align 72 1.2 uwe 73 1.2 uwe longword_align: 74 1.2 uwe tst REG_LEN,REG_LEN /* if ( len==0 ) return; */ 75 1.2 uwe bt/s bcopy_return 76 1.2 uwe 77 1.2 uwe 78 1.2 uwe mov REG_SRC,r0 79 1.2 uwe tst #1,r0 /* if ( src & 1 ) */ 80 1.2 uwe bt 1f 81 1.2 uwe mov.b @REG_SRC+,r0 /* *dst++ = *src++; */ 82 1.2 uwe add #-1,REG_LEN 83 1.2 uwe mov.b r0,@REG_DST 84 1.2 uwe add #1,REG_DST 85 1.2 uwe 1: 86 1.2 uwe 87 1.2 uwe 88 1.2 uwe mov #1,r0 89 1.2 uwe cmp/hi r0,REG_LEN /* if ( (len > 1) && */ 90 1.2 uwe bf/s 1f 91 1.2 uwe mov REG_SRC,r0 92 1.2 uwe tst #2,r0 /* (src & 2) { */ 93 1.2 uwe bt 1f 94 1.2 uwe mov.w @REG_SRC+,r0 /* *((unsigned short*)dst)++ = *((unsigned short*)src)++; */ 95 1.2 uwe add #-2,REG_LEN /* len -= 2; */ 96 1.2 uwe mov.w r0,@REG_DST 97 1.2 uwe add #2,REG_DST /* } */ 98 1.2 uwe 1: 99 1.2 uwe 100 1.2 uwe 101 1.2 uwe mov #3,r1 102 1.2 uwe cmp/hi r1,REG_LEN /* while ( len > 3 ) { */ 103 1.2 uwe bf/s no_align_delay 104 1.2 uwe tst REG_LEN,REG_LEN 105 1.2 uwe 2: 106 1.2 uwe mov.l @REG_SRC+,r0 /* *((unsigned long*)dst)++ = *((unsigned long*)src)++; */ 107 1.2 uwe add #-4,REG_LEN /* len -= 4; */ 108 1.2 uwe mov.l r0,@REG_DST 109 1.2 uwe cmp/hi r1,REG_LEN 110 1.2 uwe bt/s 2b 111 1.2 uwe add #4,REG_DST /* } */ 112 1.2 uwe 113 1.2 uwe bra no_align_delay 114 1.2 uwe tst REG_LEN,REG_LEN 115 1.2 uwe 116 1.2 uwe 117 1.2 uwe word_align: 118 1.2 uwe mov r1,r0 119 1.2 uwe tst #1,r0 120 1.2 uwe bf/s no_align_delay 121 1.2 uwe tst REG_LEN,REG_LEN /* if ( len == 0 ) return; */ 122 1.2 uwe bt bcopy_return 123 1.2 uwe 124 1.2 uwe 125 1.2 uwe mov REG_SRC,r0 /* if ( src & 1 ) */ 126 1.2 uwe tst #1,r0 127 1.2 uwe bt 1f 128 1.2 uwe mov.b @REG_SRC+,r0 /* *dst++ = *src++; */ 129 1.2 uwe add #-1,REG_LEN 130 1.2 uwe mov.b r0,@REG_DST 131 1.2 uwe add #1,REG_DST 132 1.2 uwe 1: 133 1.2 uwe 134 1.2 uwe 135 1.2 uwe mov #1,r1 136 1.2 uwe cmp/hi r1,REG_LEN /* while ( len > 1 ) { */ 137 1.2 uwe bf/s no_align_delay 138 1.2 uwe tst REG_LEN,REG_LEN 139 1.2 uwe 2: 140 1.2 uwe mov.w @REG_SRC+,r0 /* *((unsigned short*)dst)++ = *((unsigned short*)src)++; */ 141 1.2 uwe add #-2,REG_LEN /* len -= 2; */ 142 1.2 uwe mov.w r0,@REG_DST 143 1.2 uwe cmp/hi r1,REG_LEN 144 1.2 uwe bt/s 2b 145 1.2 uwe add #2,REG_DST /* } */ 146 1.2 uwe 147 1.2 uwe 148 1.2 uwe no_align: 149 1.2 uwe tst REG_LEN,REG_LEN /* while ( len!= ) { */ 150 1.2 uwe no_align_delay: 151 1.2 uwe bt bcopy_return 152 1.2 uwe 1: 153 1.2 uwe mov.b @REG_SRC+,r0 /* *dst++ = *src++; */ 154 1.2 uwe add #-1,REG_LEN /* len--; */ 155 1.2 uwe mov.b r0,@REG_DST 156 1.2 uwe tst REG_LEN,REG_LEN 157 1.2 uwe bf/s 1b 158 1.2 uwe add #1,REG_DST /* } */ 159 1.2 uwe bcopy_return: 160 1.2 uwe rts 161 1.2 uwe #ifdef REG_DST0 162 1.2 uwe mov REG_DST0,r0 163 1.2 uwe #else 164 1.2 uwe nop 165 1.2 uwe #endif 166 1.2 uwe 167 1.2 uwe 168 1.2 uwe bcopy_overlap: 169 1.2 uwe add REG_LEN,REG_SRC 170 1.2 uwe add REG_LEN,REG_DST 171 1.2 uwe 172 1.2 uwe mov REG_SRC,r0 173 1.2 uwe xor REG_DST,r0 174 1.2 uwe and #3,r0 175 1.2 uwe mov r0,r1 176 1.2 uwe tst r0,r0 /* (src ^ dst) & 3 */ 177 1.2 uwe bf/s ov_word_align 178 1.2 uwe 179 1.2 uwe ov_longword_align: 180 1.2 uwe tst REG_LEN,REG_LEN /* if ( len==0 ) return; */ 181 1.2 uwe bt/s bcopy_return 182 1.2 uwe 183 1.2 uwe 184 1.2 uwe mov REG_SRC,r0 185 1.2 uwe tst #1,r0 /* if ( src & 1 ) */ 186 1.2 uwe bt 1f 187 1.2 uwe add #-1,REG_SRC /* *--dst = *--src; */ 188 1.2 uwe mov.b @REG_SRC,r0 189 1.2 uwe mov.b r0,@-REG_DST 190 1.2 uwe add #-1,REG_LEN 191 1.2 uwe 1: 192 1.2 uwe 193 1.2 uwe 194 1.2 uwe mov #1,r0 195 1.2 uwe cmp/hi r0,REG_LEN /* if ( (len > 1) && */ 196 1.2 uwe bf/s 1f 197 1.2 uwe mov REG_SRC,r0 198 1.2 uwe tst #2,r0 /* (src & 2) { */ 199 1.2 uwe bt 1f 200 1.2 uwe add #-2,REG_SRC /* *--((unsigned short*)dst) = *--((unsigned short*)src); */ 201 1.2 uwe mov.w @REG_SRC,r0 202 1.2 uwe add #-2,REG_LEN /* len -= 2; */ 203 1.2 uwe mov.w r0,@-REG_DST /* } */ 204 1.2 uwe 1: 205 1.2 uwe 206 1.2 uwe 207 1.2 uwe mov #3,r1 208 1.2 uwe cmp/hi r1,REG_LEN /* while ( len > 3 ) { */ 209 1.2 uwe bf/s ov_no_align_delay 210 1.2 uwe tst REG_LEN,REG_LEN 211 1.2 uwe 2: 212 1.2 uwe add #-4,REG_SRC 213 1.2 uwe mov.l @REG_SRC,r0 /* *((unsigned long*)dst)++ = *((unsigned long*)src)++; */ 214 1.2 uwe add #-4,REG_LEN /* len -= 4; */ 215 1.2 uwe cmp/hi r1,REG_LEN 216 1.2 uwe bt/s 2b 217 1.2 uwe mov.l r0,@-REG_DST /* } */ 218 1.2 uwe 219 1.2 uwe bra ov_no_align_delay 220 1.2 uwe tst REG_LEN,REG_LEN 221 1.2 uwe 222 1.2 uwe 223 1.2 uwe ov_word_align: 224 1.2 uwe mov r1,r0 225 1.2 uwe tst #1,r0 226 1.2 uwe bf/s ov_no_align_delay 227 1.2 uwe tst REG_LEN,REG_LEN /* if ( len == 0 ) return; */ 228 1.2 uwe bt bcopy_return 229 1.2 uwe 230 1.2 uwe 231 1.2 uwe mov REG_SRC,r0 /* if ( src & 1 ) */ 232 1.2 uwe tst #1,r0 233 1.2 uwe bt 1f 234 1.2 uwe add #-1,REG_SRC 235 1.2 uwe mov.b @REG_SRC,r0 /* *--dst = *--src; */ 236 1.2 uwe add #-1,REG_LEN 237 1.2 uwe mov.b r0,@-REG_DST 238 1.2 uwe 1: 239 1.2 uwe 240 1.2 uwe 241 1.2 uwe mov #1,r1 242 1.2 uwe cmp/hi r1,REG_LEN /* while ( len > 1 ) { */ 243 1.2 uwe bf/s ov_no_align_delay 244 1.2 uwe tst REG_LEN,REG_LEN 245 1.2 uwe 2: 246 1.2 uwe add #-2,REG_SRC 247 1.2 uwe mov.w @REG_SRC,r0 /* *--((unsigned short*)dst) = *--((unsigned short*)src); */ 248 1.2 uwe add #-2,REG_LEN /* len -= 2; */ 249 1.2 uwe cmp/hi r1,REG_LEN 250 1.2 uwe bt/s 2b 251 1.2 uwe mov.w r0,@-REG_DST /* } */ 252 1.2 uwe 253 1.2 uwe 254 1.2 uwe ov_no_align: 255 1.2 uwe tst REG_LEN,REG_LEN /* while ( len!= ) { */ 256 1.2 uwe ov_no_align_delay: 257 1.2 uwe bt 9f 258 1.2 uwe 1: 259 1.2 uwe add #-1,REG_SRC 260 1.2 uwe mov.b @REG_SRC,r0 /* *--dst = *--src; */ 261 1.2 uwe add #-1,REG_LEN /* len--; */ 262 1.2 uwe tst REG_LEN,REG_LEN 263 1.2 uwe bf/s 1b 264 1.2 uwe mov.b r0,@-REG_DST /* } */ 265 1.2 uwe 9: 266 1.2 uwe rts 267 1.2 uwe #ifdef REG_DST0 268 1.2 uwe mov REG_DST0,r0 269 1.2 uwe #else 270 1.2 uwe nop 271 1.2 uwe #endif 272