1 1.4 bouyer /* $NetBSD: bcopy.S,v 1.4 2011/08/27 13:23:52 bouyer Exp $ */ 2 1.1 christos 3 1.1 christos /* 4 1.1 christos * Mach Operating System 5 1.1 christos * Copyright (c) 1993 Carnegie Mellon University 6 1.1 christos * All Rights Reserved. 7 1.1 christos * 8 1.1 christos * Permission to use, copy, modify and distribute this software and its 9 1.1 christos * documentation is hereby granted, provided that both the copyright 10 1.1 christos * notice and this permission notice appear in all copies of the 11 1.1 christos * software, derivative works or modified versions, and any portions 12 1.1 christos * thereof, and that both notices appear in supporting documentation. 13 1.1 christos * 14 1.1 christos * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 1.1 christos * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 16 1.1 christos * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 1.1 christos * 18 1.1 christos * Carnegie Mellon requests users of this software to return to 19 1.1 christos * 20 1.1 christos * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU 21 1.1 christos * School of Computer Science 22 1.1 christos * Carnegie Mellon University 23 1.1 christos * Pittsburgh PA 15213-3890 24 1.1 christos * 25 1.1 christos * any improvements or extensions that they make and grant Carnegie Mellon 26 1.1 christos * the rights to redistribute these changes. 27 1.1 christos */ 28 1.1 christos 29 1.1 christos /* 30 1.1 christos * File: mips_bcopy.s 31 1.1 christos * Author: Chris Maeda 32 1.1 christos * Date: June 1993 33 1.1 christos * 34 1.1 christos * Fast copy routine. Derived from aligned_block_copy. 35 1.1 christos */ 36 1.1 christos 37 1.1 christos 38 1.1 christos #include <mips/asm.h> 39 1.2 tsutsui #ifndef _LOCORE 40 1.1 christos #define _LOCORE /* XXX not really, just assembly-code source */ 41 1.2 tsutsui #endif 42 1.1 christos #include <machine/endian.h> 43 1.1 christos 44 1.1 christos 45 1.1 christos #if defined(LIBC_SCCS) && !defined(lint) 46 1.3 matt #if 0 47 1.3 matt RCSID("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93") 48 1.3 matt #else 49 1.4 bouyer RCSID("$NetBSD: bcopy.S,v 1.4 2011/08/27 13:23:52 bouyer Exp $") 50 1.3 matt #endif 51 1.1 christos #endif /* LIBC_SCCS and not lint */ 52 1.1 christos 53 1.1 christos /* 54 1.1 christos * bcopy(caddr_t src, caddr_t dst, unsigned int len) 55 1.1 christos * 56 1.1 christos * a0 src address 57 1.1 christos * a1 dst address 58 1.1 christos * a2 length 59 1.1 christos */ 60 1.1 christos 61 1.1 christos #if defined(MEMCOPY) || defined(MEMMOVE) 62 1.1 christos #ifdef MEMCOPY 63 1.1 christos #define FUNCTION memcpy 64 1.1 christos #else 65 1.1 christos #define FUNCTION memmove 66 1.1 christos #endif 67 1.1 christos #define SRCREG a1 68 1.1 christos #define DSTREG a0 69 1.1 christos #else 70 1.1 christos #define FUNCTION bcopy 71 1.1 christos #define SRCREG a0 72 1.1 christos #define DSTREG a1 73 1.1 christos #endif 74 1.1 christos 75 1.1 christos #define SIZEREG a2 76 1.1 christos 77 1.1 christos LEAF(FUNCTION) 78 1.1 christos .set noat 79 1.1 christos .set noreorder 80 1.1 christos 81 1.1 christos #if defined(MEMCOPY) || defined(MEMMOVE) 82 1.1 christos /* set up return value, while we still can */ 83 1.1 christos move v0,DSTREG 84 1.1 christos #endif 85 1.1 christos /* 86 1.1 christos * Make sure we can copy forwards. 87 1.1 christos */ 88 1.1 christos sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG 89 1.1 christos bne t0,zero,6f # copy backwards 90 1.1 christos 91 1.1 christos /* 92 1.1 christos * There are four alignment cases (with frequency) 93 1.1 christos * (Based on measurements taken with a DECstation 5000/200 94 1.1 christos * inside a Mach kernel.) 95 1.1 christos * 96 1.1 christos * aligned -> aligned (mostly) 97 1.1 christos * unaligned -> aligned (sometimes) 98 1.1 christos * aligned,unaligned -> unaligned (almost never) 99 1.1 christos * 100 1.1 christos * Note that we could add another case that checks if 101 1.1 christos * the destination and source are unaligned but the 102 1.1 christos * copy is alignable. eg if src and dest are both 103 1.1 christos * on a halfword boundary. 104 1.1 christos */ 105 1.3 matt andi t1,DSTREG,(SZREG-1) # get last bits of dest 106 1.3 matt bne t1,zero,3f # dest unaligned 107 1.3 matt andi t0,SRCREG,(SZREG-1) # get last bits of src 108 1.3 matt bne t0,zero,5f 109 1.1 christos 110 1.1 christos /* 111 1.3 matt * Forward aligned->aligned copy, 8 words at a time. 112 1.1 christos */ 113 1.3 matt 98: 114 1.3 matt li AT,-(SZREG*8) 115 1.3 matt and t0,SIZEREG,AT # count truncated to multiples 116 1.3 matt PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr 117 1.3 matt sltu AT,SRCREG,a3 # any work to do? 118 1.3 matt beq AT,zero,2f 119 1.3 matt PTR_SUBU SIZEREG,t0 120 1.1 christos 121 1.1 christos /* 122 1.1 christos * loop body 123 1.1 christos */ 124 1.1 christos 1: # cp 125 1.3 matt REG_L t3,(0*SZREG)(SRCREG) 126 1.3 matt REG_L v1,(1*SZREG)(SRCREG) 127 1.3 matt REG_L t0,(2*SZREG)(SRCREG) 128 1.3 matt REG_L t1,(3*SZREG)(SRCREG) 129 1.3 matt PTR_ADDU SRCREG,SZREG*8 130 1.3 matt REG_S t3,(0*SZREG)(DSTREG) 131 1.3 matt REG_S v1,(1*SZREG)(DSTREG) 132 1.3 matt REG_S t0,(2*SZREG)(DSTREG) 133 1.3 matt REG_S t1,(3*SZREG)(DSTREG) 134 1.3 matt REG_L t1,(-1*SZREG)(SRCREG) 135 1.3 matt REG_L t0,(-2*SZREG)(SRCREG) 136 1.3 matt REG_L v1,(-3*SZREG)(SRCREG) 137 1.3 matt REG_L t3,(-4*SZREG)(SRCREG) 138 1.3 matt PTR_ADDU DSTREG,SZREG*8 139 1.3 matt REG_S t1,(-1*SZREG)(DSTREG) 140 1.3 matt REG_S t0,(-2*SZREG)(DSTREG) 141 1.3 matt REG_S v1,(-3*SZREG)(DSTREG) 142 1.3 matt bne SRCREG,a3,1b 143 1.3 matt REG_S t3,(-4*SZREG)(DSTREG) 144 1.1 christos 145 1.1 christos /* 146 1.1 christos * Copy a word at a time, no loop unrolling. 147 1.1 christos */ 148 1.1 christos 2: # wordcopy 149 1.3 matt andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG 150 1.3 matt PTR_SUBU t2,SIZEREG,t2 # t2 = words to copy * SZREG 151 1.3 matt beq t2,zero,3f 152 1.3 matt PTR_ADDU t0,SRCREG,t2 # stop at t0 153 1.3 matt PTR_SUBU SIZEREG,SIZEREG,t2 154 1.1 christos 1: 155 1.3 matt REG_L t3,0(SRCREG) 156 1.3 matt PTR_ADDU SRCREG,SZREG 157 1.3 matt REG_S t3,0(DSTREG) 158 1.3 matt bne SRCREG,t0,1b 159 1.3 matt PTR_ADDU DSTREG,SZREG 160 1.1 christos 161 1.1 christos 3: # bytecopy 162 1.3 matt beq SIZEREG,zero,4f # nothing left to do? 163 1.1 christos nop 164 1.1 christos 1: 165 1.3 matt lb t3,0(SRCREG) 166 1.3 matt PTR_ADDU SRCREG,1 167 1.3 matt sb t3,0(DSTREG) 168 1.3 matt PTR_SUBU SIZEREG,1 169 1.3 matt bgtz SIZEREG,1b 170 1.3 matt PTR_ADDU DSTREG,1 171 1.1 christos 172 1.1 christos 4: # copydone 173 1.4 bouyer .set at #-mfix-loongson2f-btb 174 1.1 christos j ra 175 1.1 christos nop 176 1.4 bouyer .set noat 177 1.1 christos 178 1.1 christos /* 179 1.1 christos * Copy from unaligned source to aligned dest. 180 1.1 christos */ 181 1.1 christos 5: # destaligned 182 1.3 matt andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG 183 1.3 matt PTR_SUBU a3,SIZEREG,t0 # number of words to transfer 184 1.3 matt beq a3,zero,3b 185 1.1 christos nop 186 1.3 matt move SIZEREG,t0 # this many to do after we are done 187 1.3 matt PTR_ADDU a3,SRCREG,a3 # stop point 188 1.1 christos 189 1.1 christos 1: 190 1.3 matt REG_LHI t3,0(SRCREG) 191 1.3 matt REG_LLO t3,SZREG-1(SRCREG) 192 1.3 matt PTR_ADDI SRCREG,SZREG 193 1.3 matt REG_S t3,0(DSTREG) 194 1.3 matt bne SRCREG,a3,1b 195 1.3 matt PTR_ADDI DSTREG,SZREG 196 1.1 christos 197 1.3 matt b 3b 198 1.1 christos nop 199 1.1 christos 200 1.1 christos 6: # backcopy -- based on above 201 1.3 matt PTR_ADDU SRCREG,SIZEREG 202 1.3 matt PTR_ADDU DSTREG,SIZEREG 203 1.3 matt andi t1,DSTREG,SZREG-1 # get last 3 bits of dest 204 1.3 matt bne t1,zero,3f 205 1.3 matt andi t0,SRCREG,SZREG-1 # get last 3 bits of src 206 1.3 matt bne t0,zero,5f 207 1.1 christos 208 1.1 christos /* 209 1.1 christos * Forward aligned->aligned copy, 8*4 bytes at a time. 210 1.1 christos */ 211 1.3 matt li AT,(-8*SZREG) 212 1.3 matt and t0,SIZEREG,AT # count truncated to multiple of 32 213 1.3 matt beq t0,zero,2f # any work to do? 214 1.3 matt PTR_SUBU SIZEREG,t0 215 1.3 matt PTR_SUBU a3,SRCREG,t0 216 1.1 christos 217 1.1 christos /* 218 1.1 christos * loop body 219 1.1 christos */ 220 1.1 christos 1: # cp 221 1.3 matt REG_L t3,(-4*SZREG)(SRCREG) 222 1.3 matt REG_L v1,(-3*SZREG)(SRCREG) 223 1.3 matt REG_L t0,(-2*SZREG)(SRCREG) 224 1.3 matt REG_L t1,(-1*SZREG)(SRCREG) 225 1.3 matt PTR_SUBU SRCREG,8*SZREG 226 1.3 matt REG_S t3,(-4*SZREG)(DSTREG) 227 1.3 matt REG_S v1,(-3*SZREG)(DSTREG) 228 1.3 matt REG_S t0,(-2*SZREG)(DSTREG) 229 1.3 matt REG_S t1,(-1*SZREG)(DSTREG) 230 1.3 matt REG_L t1,(3*SZREG)(SRCREG) 231 1.3 matt REG_L t0,(2*SZREG)(SRCREG) 232 1.3 matt REG_L v1,(1*SZREG)(SRCREG) 233 1.3 matt REG_L t3,(0*SZREG)(SRCREG) 234 1.3 matt PTR_SUBU DSTREG,8*SZREG 235 1.3 matt REG_S t1,(3*SZREG)(DSTREG) 236 1.3 matt REG_S t0,(2*SZREG)(DSTREG) 237 1.3 matt REG_S v1,(1*SZREG)(DSTREG) 238 1.3 matt bne SRCREG,a3,1b 239 1.3 matt REG_S t3,(0*SZREG)(DSTREG) 240 1.1 christos 241 1.1 christos /* 242 1.1 christos * Copy a word at a time, no loop unrolling. 243 1.1 christos */ 244 1.1 christos 2: # wordcopy 245 1.3 matt andi t2,SIZEREG,SZREG-1 # get byte count / 4 246 1.3 matt PTR_SUBU t2,SIZEREG,t2 # t2 = number of words to copy 247 1.3 matt beq t2,zero,3f 248 1.3 matt PTR_SUBU t0,SRCREG,t2 # stop at t0 249 1.3 matt PTR_SUBU SIZEREG,SIZEREG,t2 250 1.1 christos 1: 251 1.3 matt REG_L t3,-SZREG(SRCREG) 252 1.3 matt PTR_SUBU SRCREG,SZREG 253 1.3 matt REG_S t3,-SZREG(DSTREG) 254 1.3 matt bne SRCREG,t0,1b 255 1.3 matt PTR_SUBU DSTREG,SZREG 256 1.1 christos 257 1.1 christos 3: # bytecopy 258 1.3 matt beq SIZEREG,zero,4f # nothing left to do? 259 1.1 christos nop 260 1.1 christos 1: 261 1.3 matt lb t3,-1(SRCREG) 262 1.3 matt PTR_SUBU SRCREG,1 263 1.3 matt sb t3,-1(DSTREG) 264 1.3 matt PTR_SUBU SIZEREG,1 265 1.3 matt bgtz SIZEREG,1b 266 1.3 matt PTR_SUBU DSTREG,1 267 1.1 christos 268 1.1 christos 4: # copydone 269 1.4 bouyer .set at #-mfix-loongson2f-btb 270 1.1 christos j ra 271 1.1 christos nop 272 1.4 bouyer .set noat 273 1.1 christos 274 1.1 christos /* 275 1.1 christos * Copy from unaligned source to aligned dest. 276 1.1 christos */ 277 1.1 christos 5: # destaligned 278 1.3 matt andi t0,SIZEREG,SZREG-1 # t0 = bytecount mod 4 279 1.3 matt PTR_SUBU a3,SIZEREG,t0 # number of words to transfer 280 1.3 matt beq a3,zero,3b 281 1.1 christos nop 282 1.3 matt move SIZEREG,t0 # this many to do after we are done 283 1.3 matt PTR_SUBU a3,SRCREG,a3 # stop point 284 1.1 christos 285 1.1 christos 1: 286 1.3 matt REG_LHI t3,-SZREG(SRCREG) 287 1.3 matt REG_LLO t3,-1(SRCREG) 288 1.3 matt PTR_SUBU SRCREG,SZREG 289 1.3 matt REG_S t3,-SZREG(DSTREG) 290 1.3 matt bne SRCREG,a3,1b 291 1.3 matt PTR_SUBU DSTREG,SZREG 292 1.1 christos 293 1.3 matt b 3b 294 1.1 christos nop 295 1.1 christos 296 1.1 christos .set reorder 297 1.1 christos .set at 298 1.1 christos END(FUNCTION) 299