1 1.1 christos /* $NetBSD: bcopy.S,v 1.1 2005/12/20 19:28:49 christos Exp $ */ 2 1.1 christos 3 1.1 christos /* 4 1.1 christos * Copyright (c) 1995 Carnegie-Mellon University. 5 1.1 christos * All rights reserved. 6 1.1 christos * 7 1.1 christos * Author: Trevor Blackwell. Support for use as memcpy() and memmove() 8 1.1 christos * added by Chris Demetriou. 9 1.1 christos * 10 1.1 christos * Permission to use, copy, modify and distribute this software and 11 1.1 christos * its documentation is hereby granted, provided that both the copyright 12 1.1 christos * notice and this permission notice appear in all copies of the 13 1.1 christos * software, derivative works or modified versions, and any portions 14 1.1 christos * thereof, and that both notices appear in supporting documentation. 15 1.1 christos * 16 1.1 christos * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 17 1.1 christos * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 18 1.1 christos * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 19 1.1 christos * 20 1.1 christos * Carnegie Mellon requests users of this software to return to 21 1.1 christos * 22 1.1 christos * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU 23 1.1 christos * School of Computer Science 24 1.1 christos * Carnegie Mellon University 25 1.1 christos * Pittsburgh PA 15213-3890 26 1.1 christos * 27 1.1 christos * any improvements or extensions that they make and grant Carnegie the 28 1.1 christos * rights to redistribute these changes. 29 1.1 christos */ 30 1.1 christos 31 1.1 christos #include <machine/asm.h> 32 1.1 christos 33 1.1 christos #if defined(MEMCOPY) || defined(MEMMOVE) 34 1.1 christos #ifdef MEMCOPY 35 1.1 christos #define FUNCTION memcpy 36 1.1 christos #else 37 1.1 christos #define FUNCTION memmove 38 1.1 christos #endif 39 1.1 christos #define SRCREG a1 40 1.1 christos #define DSTREG a0 41 1.1 christos #else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */ 42 1.1 christos #define FUNCTION bcopy 43 1.1 christos #define SRCREG a0 44 1.1 christos #define DSTREG a1 45 1.1 christos #endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */ 46 1.1 christos 47 1.1 christos #define SIZEREG a2 48 1.1 christos 49 1.1 christos /* 50 1.1 christos * Copy bytes. 51 1.1 christos * 52 1.1 christos * void bcopy(char *from, char *to, size_t len); 53 1.1 christos * char *memcpy(void *to, const void *from, size_t len); 54 1.1 christos * char *memmove(void *to, const void *from, size_t len); 55 1.1 christos * 56 1.1 christos * No matter how invoked, the source and destination registers 57 1.1 christos * for calculation. There's no point in copying them to "working" 58 1.1 christos * registers, since the code uses their values "in place," and 59 1.1 christos * copying them would be slower. 60 1.1 christos */ 61 1.1 christos 62 1.1 christos LEAF(FUNCTION,3) 63 1.1 christos 64 1.1 christos #if defined(MEMCOPY) || defined(MEMMOVE) 65 1.1 christos /* set up return value, while we still can */ 66 1.1 christos mov DSTREG,v0 67 1.1 christos #endif 68 1.1 christos 69 1.1 christos /* Check for negative length */ 70 1.1 christos ble SIZEREG,bcopy_done 71 1.1 christos 72 1.1 christos /* Check for overlap */ 73 1.1 christos subq DSTREG,SRCREG,t5 74 1.1 christos cmpult t5,SIZEREG,t5 75 1.1 christos bne t5,bcopy_overlap 76 1.1 christos 77 1.1 christos /* a3 = end address */ 78 1.1 christos addq SRCREG,SIZEREG,a3 79 1.1 christos 80 1.1 christos /* Get the first word */ 81 1.1 christos ldq_u t2,0(SRCREG) 82 1.1 christos 83 1.1 christos /* Do they have the same alignment? */ 84 1.1 christos xor SRCREG,DSTREG,t0 85 1.1 christos and t0,7,t0 86 1.1 christos and DSTREG,7,t1 87 1.1 christos bne t0,bcopy_different_alignment 88 1.1 christos 89 1.1 christos /* src & dst have same alignment */ 90 1.1 christos beq t1,bcopy_all_aligned 91 1.1 christos 92 1.1 christos ldq_u t3,0(DSTREG) 93 1.1 christos addq SIZEREG,t1,SIZEREG 94 1.1 christos mskqh t2,SRCREG,t2 95 1.1 christos mskql t3,SRCREG,t3 96 1.1 christos or t2,t3,t2 97 1.1 christos 98 1.1 christos /* Dst is 8-byte aligned */ 99 1.1 christos 100 1.1 christos bcopy_all_aligned: 101 1.1 christos /* If less than 8 bytes,skip loop */ 102 1.1 christos subq SIZEREG,1,t0 103 1.1 christos and SIZEREG,7,SIZEREG 104 1.1 christos bic t0,7,t0 105 1.1 christos beq t0,bcopy_samealign_lp_end 106 1.1 christos 107 1.1 christos bcopy_samealign_lp: 108 1.1 christos stq_u t2,0(DSTREG) 109 1.1 christos addq DSTREG,8,DSTREG 110 1.1 christos ldq_u t2,8(SRCREG) 111 1.1 christos subq t0,8,t0 112 1.1 christos addq SRCREG,8,SRCREG 113 1.1 christos bne t0,bcopy_samealign_lp 114 1.1 christos 115 1.1 christos bcopy_samealign_lp_end: 116 1.1 christos /* If we're done, exit */ 117 1.1 christos bne SIZEREG,bcopy_small_left 118 1.1 christos stq_u t2,0(DSTREG) 119 1.1 christos RET 120 1.1 christos 121 1.1 christos bcopy_small_left: 122 1.1 christos mskql t2,SIZEREG,t4 123 1.1 christos ldq_u t3,0(DSTREG) 124 1.1 christos mskqh t3,SIZEREG,t3 125 1.1 christos or t4,t3,t4 126 1.1 christos stq_u t4,0(DSTREG) 127 1.1 christos RET 128 1.1 christos 129 1.1 christos bcopy_different_alignment: 130 1.1 christos /* 131 1.1 christos * this is the fun part 132 1.1 christos */ 133 1.1 christos addq SRCREG,SIZEREG,a3 134 1.1 christos cmpule SIZEREG,8,t0 135 1.1 christos bne t0,bcopy_da_finish 136 1.1 christos 137 1.1 christos beq t1,bcopy_da_noentry 138 1.1 christos 139 1.1 christos /* Do the initial partial word */ 140 1.1 christos subq zero,DSTREG,t0 141 1.1 christos and t0,7,t0 142 1.1 christos ldq_u t3,7(SRCREG) 143 1.1 christos extql t2,SRCREG,t2 144 1.1 christos extqh t3,SRCREG,t3 145 1.1 christos or t2,t3,t5 146 1.1 christos insql t5,DSTREG,t5 147 1.1 christos ldq_u t6,0(DSTREG) 148 1.1 christos mskql t6,DSTREG,t6 149 1.1 christos or t5,t6,t5 150 1.1 christos stq_u t5,0(DSTREG) 151 1.1 christos addq SRCREG,t0,SRCREG 152 1.1 christos addq DSTREG,t0,DSTREG 153 1.1 christos subq SIZEREG,t0,SIZEREG 154 1.1 christos ldq_u t2,0(SRCREG) 155 1.1 christos 156 1.1 christos bcopy_da_noentry: 157 1.1 christos subq SIZEREG,1,t0 158 1.1 christos bic t0,7,t0 159 1.1 christos and SIZEREG,7,SIZEREG 160 1.1 christos beq t0,bcopy_da_finish2 161 1.1 christos 162 1.1 christos bcopy_da_lp: 163 1.1 christos ldq_u t3,7(SRCREG) 164 1.1 christos addq SRCREG,8,SRCREG 165 1.1 christos extql t2,SRCREG,t4 166 1.1 christos extqh t3,SRCREG,t5 167 1.1 christos subq t0,8,t0 168 1.1 christos or t4,t5,t5 169 1.1 christos stq t5,0(DSTREG) 170 1.1 christos addq DSTREG,8,DSTREG 171 1.1 christos beq t0,bcopy_da_finish1 172 1.1 christos ldq_u t2,7(SRCREG) 173 1.1 christos addq SRCREG,8,SRCREG 174 1.1 christos extql t3,SRCREG,t4 175 1.1 christos extqh t2,SRCREG,t5 176 1.1 christos subq t0,8,t0 177 1.1 christos or t4,t5,t5 178 1.1 christos stq t5,0(DSTREG) 179 1.1 christos addq DSTREG,8,DSTREG 180 1.1 christos bne t0,bcopy_da_lp 181 1.1 christos 182 1.1 christos bcopy_da_finish2: 183 1.1 christos /* Do the last new word */ 184 1.1 christos mov t2,t3 185 1.1 christos 186 1.1 christos bcopy_da_finish1: 187 1.1 christos /* Do the last partial word */ 188 1.1 christos ldq_u t2,-1(a3) 189 1.1 christos extql t3,SRCREG,t3 190 1.1 christos extqh t2,SRCREG,t2 191 1.1 christos or t2,t3,t2 192 1.1 christos br zero,bcopy_samealign_lp_end 193 1.1 christos 194 1.1 christos bcopy_da_finish: 195 1.1 christos /* Do the last word in the next source word */ 196 1.1 christos ldq_u t3,-1(a3) 197 1.1 christos extql t2,SRCREG,t2 198 1.1 christos extqh t3,SRCREG,t3 199 1.1 christos or t2,t3,t2 200 1.1 christos insqh t2,DSTREG,t3 201 1.1 christos insql t2,DSTREG,t2 202 1.1 christos lda t4,-1(zero) 203 1.1 christos mskql t4,SIZEREG,t5 204 1.1 christos cmovne t5,t5,t4 205 1.1 christos insqh t4,DSTREG,t5 206 1.1 christos insql t4,DSTREG,t4 207 1.1 christos addq DSTREG,SIZEREG,a4 208 1.1 christos ldq_u t6,0(DSTREG) 209 1.1 christos ldq_u t7,-1(a4) 210 1.1 christos bic t6,t4,t6 211 1.1 christos bic t7,t5,t7 212 1.1 christos and t2,t4,t2 213 1.1 christos and t3,t5,t3 214 1.1 christos or t2,t6,t2 215 1.1 christos or t3,t7,t3 216 1.1 christos stq_u t3,-1(a4) 217 1.1 christos stq_u t2,0(DSTREG) 218 1.1 christos RET 219 1.1 christos 220 1.1 christos bcopy_overlap: 221 1.1 christos /* 222 1.1 christos * Basically equivalent to previous case, only backwards. 223 1.1 christos * Not quite as highly optimized 224 1.1 christos */ 225 1.1 christos addq SRCREG,SIZEREG,a3 226 1.1 christos addq DSTREG,SIZEREG,a4 227 1.1 christos 228 1.1 christos /* less than 8 bytes - don't worry about overlap */ 229 1.1 christos cmpule SIZEREG,8,t0 230 1.1 christos bne t0,bcopy_ov_short 231 1.1 christos 232 1.1 christos /* Possibly do a partial first word */ 233 1.1 christos and a4,7,t4 234 1.1 christos beq t4,bcopy_ov_nostart2 235 1.1 christos subq a3,t4,a3 236 1.1 christos subq a4,t4,a4 237 1.1 christos ldq_u t1,0(a3) 238 1.1 christos subq SIZEREG,t4,SIZEREG 239 1.1 christos ldq_u t2,7(a3) 240 1.1 christos ldq t3,0(a4) 241 1.1 christos extql t1,a3,t1 242 1.1 christos extqh t2,a3,t2 243 1.1 christos or t1,t2,t1 244 1.1 christos mskqh t3,t4,t3 245 1.1 christos mskql t1,t4,t1 246 1.1 christos or t1,t3,t1 247 1.1 christos stq t1,0(a4) 248 1.1 christos 249 1.1 christos bcopy_ov_nostart2: 250 1.1 christos bic SIZEREG,7,t4 251 1.1 christos and SIZEREG,7,SIZEREG 252 1.1 christos beq t4,bcopy_ov_lp_end 253 1.1 christos 254 1.1 christos bcopy_ov_lp: 255 1.1 christos /* This could be more pipelined, but it doesn't seem worth it */ 256 1.1 christos ldq_u t0,-8(a3) 257 1.1 christos subq a4,8,a4 258 1.1 christos ldq_u t1,-1(a3) 259 1.1 christos subq a3,8,a3 260 1.1 christos extql t0,a3,t0 261 1.1 christos extqh t1,a3,t1 262 1.1 christos subq t4,8,t4 263 1.1 christos or t0,t1,t0 264 1.1 christos stq t0,0(a4) 265 1.1 christos bne t4,bcopy_ov_lp 266 1.1 christos 267 1.1 christos bcopy_ov_lp_end: 268 1.1 christos beq SIZEREG,bcopy_done 269 1.1 christos 270 1.1 christos ldq_u t0,0(SRCREG) 271 1.1 christos ldq_u t1,7(SRCREG) 272 1.1 christos ldq_u t2,0(DSTREG) 273 1.1 christos extql t0,SRCREG,t0 274 1.1 christos extqh t1,SRCREG,t1 275 1.1 christos or t0,t1,t0 276 1.1 christos insql t0,DSTREG,t0 277 1.1 christos mskql t2,DSTREG,t2 278 1.1 christos or t2,t0,t2 279 1.1 christos stq_u t2,0(DSTREG) 280 1.1 christos 281 1.1 christos bcopy_done: 282 1.1 christos RET 283 1.1 christos 284 1.1 christos bcopy_ov_short: 285 1.1 christos ldq_u t2,0(SRCREG) 286 1.1 christos br zero,bcopy_da_finish 287 1.1 christos 288 1.1 christos END(FUNCTION) 289