1 /* $NetBSD: bcopy.S,v 1.4 2011/08/27 13:23:52 bouyer Exp $ */ 2 3 /* 4 * Mach Operating System 5 * Copyright (c) 1993 Carnegie Mellon University 6 * All Rights Reserved. 7 * 8 * Permission to use, copy, modify and distribute this software and its 9 * documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie Mellon 26 * the rights to redistribute these changes. 27 */ 28 29 /* 30 * File: mips_bcopy.s 31 * Author: Chris Maeda 32 * Date: June 1993 33 * 34 * Fast copy routine. Derived from aligned_block_copy. 35 */ 36 37 38 #include <mips/asm.h> 39 #ifndef _LOCORE 40 #define _LOCORE /* XXX not really, just assembly-code source */ 41 #endif 42 #include <machine/endian.h> 43 44 45 #if defined(LIBC_SCCS) && !defined(lint) 46 #if 0 47 RCSID("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93") 48 #else 49 RCSID("$NetBSD: bcopy.S,v 1.4 2011/08/27 13:23:52 bouyer Exp $") 50 #endif 51 #endif /* LIBC_SCCS and not lint */ 52 53 /* 54 * bcopy(caddr_t src, caddr_t dst, unsigned int len) 55 * 56 * a0 src address 57 * a1 dst address 58 * a2 length 59 */ 60 61 #if defined(MEMCOPY) || defined(MEMMOVE) 62 #ifdef MEMCOPY 63 #define FUNCTION memcpy 64 #else 65 #define FUNCTION memmove 66 #endif 67 #define SRCREG a1 68 #define DSTREG a0 69 #else 70 #define FUNCTION bcopy 71 #define SRCREG a0 72 #define DSTREG a1 73 #endif 74 75 #define SIZEREG a2 76 77 LEAF(FUNCTION) 78 .set noat 79 .set noreorder 80 81 #if defined(MEMCOPY) || defined(MEMMOVE) 82 /* set up return value, while we still can */ 83 move v0,DSTREG 84 #endif 85 /* 86 * Make sure we can copy forwards. 87 */ 88 sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG 89 bne t0,zero,6f # copy backwards 90 91 /* 92 * There are four alignment cases (with frequency) 93 * (Based on measurements taken with a DECstation 5000/200 94 * inside a Mach kernel.) 95 * 96 * aligned -> aligned (mostly) 97 * unaligned -> aligned (sometimes) 98 * aligned,unaligned -> unaligned (almost never) 99 * 100 * Note that we could add another case that checks if 101 * the destination and source are unaligned but the 102 * copy is alignable. eg if src and dest are both 103 * on a halfword boundary. 104 */ 105 andi t1,DSTREG,(SZREG-1) # get last bits of dest 106 bne t1,zero,3f # dest unaligned 107 andi t0,SRCREG,(SZREG-1) # get last bits of src 108 bne t0,zero,5f 109 110 /* 111 * Forward aligned->aligned copy, 8 words at a time. 112 */ 113 98: 114 li AT,-(SZREG*8) 115 and t0,SIZEREG,AT # count truncated to multiples 116 PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr 117 sltu AT,SRCREG,a3 # any work to do? 118 beq AT,zero,2f 119 PTR_SUBU SIZEREG,t0 120 121 /* 122 * loop body 123 */ 124 1: # cp 125 REG_L t3,(0*SZREG)(SRCREG) 126 REG_L v1,(1*SZREG)(SRCREG) 127 REG_L t0,(2*SZREG)(SRCREG) 128 REG_L t1,(3*SZREG)(SRCREG) 129 PTR_ADDU SRCREG,SZREG*8 130 REG_S t3,(0*SZREG)(DSTREG) 131 REG_S v1,(1*SZREG)(DSTREG) 132 REG_S t0,(2*SZREG)(DSTREG) 133 REG_S t1,(3*SZREG)(DSTREG) 134 REG_L t1,(-1*SZREG)(SRCREG) 135 REG_L t0,(-2*SZREG)(SRCREG) 136 REG_L v1,(-3*SZREG)(SRCREG) 137 REG_L t3,(-4*SZREG)(SRCREG) 138 PTR_ADDU DSTREG,SZREG*8 139 REG_S t1,(-1*SZREG)(DSTREG) 140 REG_S t0,(-2*SZREG)(DSTREG) 141 REG_S v1,(-3*SZREG)(DSTREG) 142 bne SRCREG,a3,1b 143 REG_S t3,(-4*SZREG)(DSTREG) 144 145 /* 146 * Copy a word at a time, no loop unrolling. 147 */ 148 2: # wordcopy 149 andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG 150 PTR_SUBU t2,SIZEREG,t2 # t2 = words to copy * SZREG 151 beq t2,zero,3f 152 PTR_ADDU t0,SRCREG,t2 # stop at t0 153 PTR_SUBU SIZEREG,SIZEREG,t2 154 1: 155 REG_L t3,0(SRCREG) 156 PTR_ADDU SRCREG,SZREG 157 REG_S t3,0(DSTREG) 158 bne SRCREG,t0,1b 159 PTR_ADDU DSTREG,SZREG 160 161 3: # bytecopy 162 beq SIZEREG,zero,4f # nothing left to do? 163 nop 164 1: 165 lb t3,0(SRCREG) 166 PTR_ADDU SRCREG,1 167 sb t3,0(DSTREG) 168 PTR_SUBU SIZEREG,1 169 bgtz SIZEREG,1b 170 PTR_ADDU DSTREG,1 171 172 4: # copydone 173 .set at #-mfix-loongson2f-btb 174 j ra 175 nop 176 .set noat 177 178 /* 179 * Copy from unaligned source to aligned dest. 180 */ 181 5: # destaligned 182 andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG 183 PTR_SUBU a3,SIZEREG,t0 # number of words to transfer 184 beq a3,zero,3b 185 nop 186 move SIZEREG,t0 # this many to do after we are done 187 PTR_ADDU a3,SRCREG,a3 # stop point 188 189 1: 190 REG_LHI t3,0(SRCREG) 191 REG_LLO t3,SZREG-1(SRCREG) 192 PTR_ADDI SRCREG,SZREG 193 REG_S t3,0(DSTREG) 194 bne SRCREG,a3,1b 195 PTR_ADDI DSTREG,SZREG 196 197 b 3b 198 nop 199 200 6: # backcopy -- based on above 201 PTR_ADDU SRCREG,SIZEREG 202 PTR_ADDU DSTREG,SIZEREG 203 andi t1,DSTREG,SZREG-1 # get last 3 bits of dest 204 bne t1,zero,3f 205 andi t0,SRCREG,SZREG-1 # get last 3 bits of src 206 bne t0,zero,5f 207 208 /* 209 * Forward aligned->aligned copy, 8*4 bytes at a time. 210 */ 211 li AT,(-8*SZREG) 212 and t0,SIZEREG,AT # count truncated to multiple of 32 213 beq t0,zero,2f # any work to do? 214 PTR_SUBU SIZEREG,t0 215 PTR_SUBU a3,SRCREG,t0 216 217 /* 218 * loop body 219 */ 220 1: # cp 221 REG_L t3,(-4*SZREG)(SRCREG) 222 REG_L v1,(-3*SZREG)(SRCREG) 223 REG_L t0,(-2*SZREG)(SRCREG) 224 REG_L t1,(-1*SZREG)(SRCREG) 225 PTR_SUBU SRCREG,8*SZREG 226 REG_S t3,(-4*SZREG)(DSTREG) 227 REG_S v1,(-3*SZREG)(DSTREG) 228 REG_S t0,(-2*SZREG)(DSTREG) 229 REG_S t1,(-1*SZREG)(DSTREG) 230 REG_L t1,(3*SZREG)(SRCREG) 231 REG_L t0,(2*SZREG)(SRCREG) 232 REG_L v1,(1*SZREG)(SRCREG) 233 REG_L t3,(0*SZREG)(SRCREG) 234 PTR_SUBU DSTREG,8*SZREG 235 REG_S t1,(3*SZREG)(DSTREG) 236 REG_S t0,(2*SZREG)(DSTREG) 237 REG_S v1,(1*SZREG)(DSTREG) 238 bne SRCREG,a3,1b 239 REG_S t3,(0*SZREG)(DSTREG) 240 241 /* 242 * Copy a word at a time, no loop unrolling. 243 */ 244 2: # wordcopy 245 andi t2,SIZEREG,SZREG-1 # get byte count / 4 246 PTR_SUBU t2,SIZEREG,t2 # t2 = number of words to copy 247 beq t2,zero,3f 248 PTR_SUBU t0,SRCREG,t2 # stop at t0 249 PTR_SUBU SIZEREG,SIZEREG,t2 250 1: 251 REG_L t3,-SZREG(SRCREG) 252 PTR_SUBU SRCREG,SZREG 253 REG_S t3,-SZREG(DSTREG) 254 bne SRCREG,t0,1b 255 PTR_SUBU DSTREG,SZREG 256 257 3: # bytecopy 258 beq SIZEREG,zero,4f # nothing left to do? 259 nop 260 1: 261 lb t3,-1(SRCREG) 262 PTR_SUBU SRCREG,1 263 sb t3,-1(DSTREG) 264 PTR_SUBU SIZEREG,1 265 bgtz SIZEREG,1b 266 PTR_SUBU DSTREG,1 267 268 4: # copydone 269 .set at #-mfix-loongson2f-btb 270 j ra 271 nop 272 .set noat 273 274 /* 275 * Copy from unaligned source to aligned dest. 276 */ 277 5: # destaligned 278 andi t0,SIZEREG,SZREG-1 # t0 = bytecount mod 4 279 PTR_SUBU a3,SIZEREG,t0 # number of words to transfer 280 beq a3,zero,3b 281 nop 282 move SIZEREG,t0 # this many to do after we are done 283 PTR_SUBU a3,SRCREG,a3 # stop point 284 285 1: 286 REG_LHI t3,-SZREG(SRCREG) 287 REG_LLO t3,-1(SRCREG) 288 PTR_SUBU SRCREG,SZREG 289 REG_S t3,-SZREG(DSTREG) 290 bne SRCREG,a3,1b 291 PTR_SUBU DSTREG,SZREG 292 293 b 3b 294 nop 295 296 .set reorder 297 .set at 298 END(FUNCTION) 299