1 /* $NetBSD: memcpy.S,v 1.3 2024/02/07 04:20:25 msaitoh Exp $ */ 2 3 /* 4 * Copyright (c) 2000 SHIMIZU Ryo 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <machine/asm.h> 31 32 #if defined(LIBC_SCCS) && !defined(lint) 33 RCSID("$NetBSD: memcpy.S,v 1.3 2024/02/07 04:20:25 msaitoh Exp $") 34 #endif 35 36 #if !defined(MEMCOPY) && !defined(MEMMOVE) && !defined(BCOPY) 37 #define MEMCOPY 38 #endif 39 40 #if defined(MEMCOPY) || defined(MEMMOVE) 41 #define REG_DST0 r3 42 #define REG_SRC r5 43 #define REG_DST r4 44 #else 45 #define REG_SRC r4 46 #define REG_DST r5 47 #endif 48 49 #define REG_LEN r6 50 51 #if defined(MEMCOPY) 52 ENTRY(memcpy) 53 #elif defined(MEMMOVE) 54 ENTRY(memmove) 55 #elif defined(BCOPY) 56 ENTRY(bcopy) 57 #endif 58 #ifdef REG_DST0 59 mov REG_DST,REG_DST0 60 #endif 61 cmp/eq REG_DST,REG_SRC /* if ( src == dst ) return; */ 62 bt/s bcopy_return 63 cmp/hi REG_DST,REG_SRC 64 bf/s bcopy_overlap 65 66 mov REG_SRC,r0 67 xor REG_DST,r0 68 and #3,r0 69 mov r0,r1 70 tst r0,r0 /* (src ^ dst) & 3 */ 71 bf/s word_align 72 73 longword_align: 74 tst REG_LEN,REG_LEN /* if ( len==0 ) return; */ 75 bt/s bcopy_return 76 77 78 mov REG_SRC,r0 79 tst #1,r0 /* if ( src & 1 ) */ 80 bt 1f 81 mov.b @REG_SRC+,r0 /* *dst++ = *src++; */ 82 add #-1,REG_LEN 83 mov.b r0,@REG_DST 84 add #1,REG_DST 85 1: 86 87 88 mov #1,r0 89 cmp/hi r0,REG_LEN /* if ( (len > 1) && */ 90 bf/s 1f 91 mov REG_SRC,r0 92 tst #2,r0 /* (src & 2) { */ 93 bt 1f 94 mov.w @REG_SRC+,r0 /* *((unsigned short*)dst)++ = *((unsigned short*)src)++; */ 95 add #-2,REG_LEN /* len -= 2; */ 96 mov.w r0,@REG_DST 97 add #2,REG_DST /* } */ 98 1: 99 100 101 mov #3,r1 102 cmp/hi r1,REG_LEN /* while ( len > 3 ) { */ 103 bf/s no_align_delay 104 tst REG_LEN,REG_LEN 105 2: 106 mov.l @REG_SRC+,r0 /* *((unsigned long*)dst)++ = *((unsigned long*)src)++; */ 107 add #-4,REG_LEN /* len -= 4; */ 108 mov.l r0,@REG_DST 109 cmp/hi r1,REG_LEN 110 bt/s 2b 111 add #4,REG_DST /* } */ 112 113 bra no_align_delay 114 tst REG_LEN,REG_LEN 115 116 117 word_align: 118 mov r1,r0 119 tst #1,r0 120 bf/s no_align_delay 121 tst REG_LEN,REG_LEN /* if ( len == 0 ) return; */ 122 bt bcopy_return 123 124 125 mov REG_SRC,r0 /* if ( src & 1 ) */ 126 tst #1,r0 127 bt 1f 128 mov.b @REG_SRC+,r0 /* *dst++ = *src++; */ 129 add #-1,REG_LEN 130 mov.b r0,@REG_DST 131 add #1,REG_DST 132 1: 133 134 135 mov #1,r1 136 cmp/hi r1,REG_LEN /* while ( len > 1 ) { */ 137 bf/s no_align_delay 138 tst REG_LEN,REG_LEN 139 2: 140 mov.w @REG_SRC+,r0 /* *((unsigned short*)dst)++ = *((unsigned short*)src)++; */ 141 add #-2,REG_LEN /* len -= 2; */ 142 mov.w r0,@REG_DST 143 cmp/hi r1,REG_LEN 144 bt/s 2b 145 add #2,REG_DST /* } */ 146 147 148 no_align: 149 tst REG_LEN,REG_LEN /* while ( len!= ) { */ 150 no_align_delay: 151 bt bcopy_return 152 1: 153 mov.b @REG_SRC+,r0 /* *dst++ = *src++; */ 154 add #-1,REG_LEN /* len--; */ 155 mov.b r0,@REG_DST 156 tst REG_LEN,REG_LEN 157 bf/s 1b 158 add #1,REG_DST /* } */ 159 bcopy_return: 160 rts 161 #ifdef REG_DST0 162 mov REG_DST0,r0 163 #else 164 nop 165 #endif 166 167 168 bcopy_overlap: 169 add REG_LEN,REG_SRC 170 add REG_LEN,REG_DST 171 172 mov REG_SRC,r0 173 xor REG_DST,r0 174 and #3,r0 175 mov r0,r1 176 tst r0,r0 /* (src ^ dst) & 3 */ 177 bf/s ov_word_align 178 179 ov_longword_align: 180 tst REG_LEN,REG_LEN /* if ( len==0 ) return; */ 181 bt/s bcopy_return 182 183 184 mov REG_SRC,r0 185 tst #1,r0 /* if ( src & 1 ) */ 186 bt 1f 187 add #-1,REG_SRC /* *--dst = *--src; */ 188 mov.b @REG_SRC,r0 189 mov.b r0,@-REG_DST 190 add #-1,REG_LEN 191 1: 192 193 194 mov #1,r0 195 cmp/hi r0,REG_LEN /* if ( (len > 1) && */ 196 bf/s 1f 197 mov REG_SRC,r0 198 tst #2,r0 /* (src & 2) { */ 199 bt 1f 200 add #-2,REG_SRC /* *--((unsigned short*)dst) = *--((unsigned short*)src); */ 201 mov.w @REG_SRC,r0 202 add #-2,REG_LEN /* len -= 2; */ 203 mov.w r0,@-REG_DST /* } */ 204 1: 205 206 207 mov #3,r1 208 cmp/hi r1,REG_LEN /* while ( len > 3 ) { */ 209 bf/s ov_no_align_delay 210 tst REG_LEN,REG_LEN 211 2: 212 add #-4,REG_SRC 213 mov.l @REG_SRC,r0 /* *((unsigned long*)dst)++ = *((unsigned long*)src)++; */ 214 add #-4,REG_LEN /* len -= 4; */ 215 cmp/hi r1,REG_LEN 216 bt/s 2b 217 mov.l r0,@-REG_DST /* } */ 218 219 bra ov_no_align_delay 220 tst REG_LEN,REG_LEN 221 222 223 ov_word_align: 224 mov r1,r0 225 tst #1,r0 226 bf/s ov_no_align_delay 227 tst REG_LEN,REG_LEN /* if ( len == 0 ) return; */ 228 bt bcopy_return 229 230 231 mov REG_SRC,r0 /* if ( src & 1 ) */ 232 tst #1,r0 233 bt 1f 234 add #-1,REG_SRC 235 mov.b @REG_SRC,r0 /* *--dst = *--src; */ 236 add #-1,REG_LEN 237 mov.b r0,@-REG_DST 238 1: 239 240 241 mov #1,r1 242 cmp/hi r1,REG_LEN /* while ( len > 1 ) { */ 243 bf/s ov_no_align_delay 244 tst REG_LEN,REG_LEN 245 2: 246 add #-2,REG_SRC 247 mov.w @REG_SRC,r0 /* *--((unsigned short*)dst) = *--((unsigned short*)src); */ 248 add #-2,REG_LEN /* len -= 2; */ 249 cmp/hi r1,REG_LEN 250 bt/s 2b 251 mov.w r0,@-REG_DST /* } */ 252 253 254 ov_no_align: 255 tst REG_LEN,REG_LEN /* while ( len!= ) { */ 256 ov_no_align_delay: 257 bt 9f 258 1: 259 add #-1,REG_SRC 260 mov.b @REG_SRC,r0 /* *--dst = *--src; */ 261 add #-1,REG_LEN /* len--; */ 262 tst REG_LEN,REG_LEN 263 bf/s 1b 264 mov.b r0,@-REG_DST /* } */ 265 9: 266 rts 267 #ifdef REG_DST0 268 mov REG_DST0,r0 269 #else 270 nop 271 #endif 272