1 1.17 andvar /* $NetBSD: bcopy.S,v 1.17 2021/08/09 19:57:58 andvar Exp $ */ 2 1.1 fredette 3 1.1 fredette /* 4 1.1 fredette * Copyright (c) 2002 The NetBSD Foundation, Inc. 5 1.1 fredette * All rights reserved. 6 1.1 fredette * 7 1.1 fredette * This code is derived from software contributed to The NetBSD Foundation 8 1.1 fredette * by Matthew Fredette. 9 1.1 fredette * 10 1.1 fredette * Redistribution and use in source and binary forms, with or without 11 1.1 fredette * modification, are permitted provided that the following conditions 12 1.1 fredette * are met: 13 1.1 fredette * 1. Redistributions of source code must retain the above copyright 14 1.1 fredette * notice, this list of conditions and the following disclaimer. 15 1.1 fredette * 2. Redistributions in binary form must reproduce the above copyright 16 1.1 fredette * notice, this list of conditions and the following disclaimer in the 17 1.1 fredette * documentation and/or other materials provided with the distribution. 18 1.1 fredette * 19 1.1 fredette * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.1 fredette * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.1 fredette * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.1 fredette * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.1 fredette * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.1 fredette * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.1 fredette * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.1 fredette * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.1 fredette * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.1 fredette * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.1 fredette * POSSIBILITY OF SUCH DAMAGE. 30 1.1 fredette */ 31 1.1 fredette 32 1.1 fredette /* 33 1.1 fredette * Copy routines for NetBSD/hppa. 34 1.1 fredette */ 35 1.1 fredette 36 1.1 fredette #undef _LOCORE 37 1.1 fredette #define _LOCORE /* XXX fredette - unfortunate */ 38 1.8 skrll 39 1.14 skrll #if defined(SPCOPY) && !defined(_STANDALONE) 40 1.14 skrll 41 1.14 skrll #include "opt_multiprocessor.h" 42 1.14 skrll 43 1.12 skrll #include <machine/cpu.h> 44 1.14 skrll 45 1.14 skrll #endif 46 1.14 skrll 47 1.1 fredette #include <machine/asm.h> 48 1.1 fredette #include <machine/frame.h> 49 1.8 skrll #include <machine/reg.h> 50 1.4 perry 51 1.4 perry #if defined(LIBC_SCCS) && !defined(lint) 52 1.17 andvar RCSID("$NetBSD: bcopy.S,v 1.17 2021/08/09 19:57:58 andvar Exp $") 53 1.1 fredette #endif /* LIBC_SCCS and not lint */ 54 1.1 fredette 55 1.1 fredette /* 56 1.4 perry * The stbys instruction is a little asymmetric. When (%r2 & 3) 57 1.1 fredette * is zero, stbys,b,m %r1, 4(%r2) works like stws,ma. You 58 1.4 perry * might then wish that when (%r2 & 3) == 0, stbys,e,m %r1, -4(%r2) 59 1.1 fredette * worked like stws,mb. But it doesn't. 60 1.1 fredette * 61 1.1 fredette * This macro works around this problem. It requires that %t2 62 1.1 fredette * hold the number of bytes that will be written by this store 63 1.1 fredette * (meaning that it ranges from one to four). 64 1.1 fredette * 65 1.4 perry * Watch the delay-slot trickery here. The comib is used to set 66 1.4 perry * up which instruction, either the stws or the stbys, is run 67 1.1 fredette * in the delay slot of the b instruction. 68 1.1 fredette */ 69 1.1 fredette #define _STBYS_E_M(r, dst_spc, dst_off) \ 70 1.3 chs comib,<> 4, %t2, 4 ! \ 71 1.1 fredette b 4 ! \ 72 1.1 fredette stws,mb r, -4(dst_spc, dst_off) ! \ 73 1.1 fredette stbys,e,m r, 0(dst_spc, dst_off) 74 1.1 fredette 75 1.1 fredette /* 76 1.4 perry * This macro does a bulk copy with no shifting. cmplt and m are 77 1.4 perry * the completer and displacement multiplier, respectively, for 78 1.1 fredette * the load and store instructions. 79 1.1 fredette */ 80 1.1 fredette #define _COPY(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \ 81 1.1 fredette ! \ 82 1.1 fredette /* ! \ 83 1.1 fredette * Loop storing 16 bytes at a time. Since count ! \ 84 1.1 fredette * may be > INT_MAX, we have to be careful and ! \ 85 1.1 fredette * avoid comparisons that treat it as a signed ! \ 86 1.1 fredette * quantity, until after this loop, when count ! \ 87 1.1 fredette * is guaranteed to be less than 16. ! \ 88 1.1 fredette */ ! \ 89 1.1 fredette comib,>>=,n 15, count, _LABEL(_skip16) ! \ 90 1.1 fredette .label _LABEL(_loop16) ! \ 91 1.1 fredette addi -16, count, count ! \ 92 1.3 chs ldws,cmplt m*4(src_spc, src_off), %t1 ! \ 93 1.3 chs ldws,cmplt m*4(src_spc, src_off), %t2 ! \ 94 1.3 chs ldws,cmplt m*4(src_spc, src_off), %t3 ! \ 95 1.3 chs ldws,cmplt m*4(src_spc, src_off), %t4 ! \ 96 1.3 chs stws,cmplt %t1, m*4(dst_spc, dst_off) ! \ 97 1.3 chs stws,cmplt %t2, m*4(dst_spc, dst_off) ! \ 98 1.3 chs stws,cmplt %t3, m*4(dst_spc, dst_off) ! \ 99 1.1 fredette comib,<< 15, count, _LABEL(_loop16) ! \ 100 1.3 chs stws,cmplt %t4, m*4(dst_spc, dst_off) ! \ 101 1.1 fredette .label _LABEL(_skip16) ! \ 102 1.1 fredette ! \ 103 1.1 fredette /* Loop storing 4 bytes at a time. */ ! \ 104 1.1 fredette addib,<,n -4, count, _LABEL(_skip4) ! \ 105 1.1 fredette .label _LABEL(_loop4) ! \ 106 1.3 chs ldws,cmplt m*4(src_spc, src_off), %t1 ! \ 107 1.1 fredette addib,>= -4, count, _LABEL(_loop4) ! \ 108 1.3 chs stws,cmplt %t1, m*4(dst_spc, dst_off) ! \ 109 1.1 fredette .label _LABEL(_skip4) ! \ 110 1.1 fredette /* Restore the correct count. */ ! \ 111 1.1 fredette addi 4, count, count ! \ 112 1.1 fredette ! \ 113 1.1 fredette .label _LABEL(_do1) ! \ 114 1.1 fredette ! \ 115 1.1 fredette /* Loop storing 1 byte at a time. */ ! \ 116 1.1 fredette addib,<,n -1, count, _LABEL(_skip1) ! \ 117 1.1 fredette .label _LABEL(_loop1) ! \ 118 1.3 chs ldbs,cmplt m*1(src_spc, src_off), %t1 ! \ 119 1.1 fredette addib,>= -1, count, _LABEL(_loop1) ! \ 120 1.3 chs stbs,cmplt %t1, m*1(dst_spc, dst_off) ! \ 121 1.1 fredette .label _LABEL(_skip1) ! \ 122 1.1 fredette /* Restore the correct count. */ ! \ 123 1.1 fredette b _LABEL(_done) ! \ 124 1.1 fredette addi 1, count, count 125 1.1 fredette 126 1.1 fredette /* 127 1.1 fredette * This macro is definitely strange. It exists purely to 128 1.4 perry * allow the _COPYS macro to be reused, but because it 129 1.1 fredette * requires this long attempt to explain it, I'm starting 130 1.1 fredette * to doubt the value of that. 131 1.1 fredette * 132 1.1 fredette * Part of the expansion of the _COPYS macro below are loops 133 1.1 fredette * that copy four words or one word at a time, performing shifts 134 1.1 fredette * to get data to line up correctly in the destination buffer. 135 1.1 fredette * 136 1.1 fredette * The _COPYS macro is used when copying backwards, as well 137 1.3 chs * as forwards. The 4-word loop always loads into %t1, %t2, %t3, 138 1.3 chs * and %t4 in that order. This means that when copying forward, 139 1.3 chs * %t1 will have the word from the lowest address, and %t4 will 140 1.4 perry * have the word from the highest address. When copying 141 1.1 fredette * backwards, the opposite is true. 142 1.1 fredette * 143 1.1 fredette * The shift instructions need pairs of registers with adjacent 144 1.4 perry * words, with the register containing the word from the lowest 145 1.17 andvar * address *always* coming first. It is this asymmetry that 146 1.1 fredette * gives rise to this macro - depending on which direction 147 1.1 fredette * we're copying in, these ordered pairs are different. 148 1.1 fredette * 149 1.4 perry * Fortunately, we can compute those register numbers at compile 150 1.4 perry * time, and assemble them manually into a shift instruction. 151 1.1 fredette * That's what this macro does. 152 1.1 fredette * 153 1.1 fredette * This macro takes two arguments. n ranges from 0 to 3 and 154 1.1 fredette * is the "shift number", i.e., n = 0 means we're doing the 155 1.1 fredette * shift for what will be the first store. 156 1.1 fredette * 157 1.1 fredette * m is the displacement multiplier from the _COPYS macro call. 158 1.1 fredette * This is 1 for a forward copy and -1 for a backwards copy. 159 1.1 fredette * So, the ((m + 1) / 2) term yields 0 for a backwards copy and 160 1.4 perry * 1 for a forward copy, and the ((m - 1) / 2) term yields 161 1.1 fredette * 0 for a forward copy, and -1 for a backwards copy. 162 1.1 fredette * These terms are used to discriminate the register computations 163 1.1 fredette * below. 164 1.1 fredette * 165 1.1 fredette * When copying forward, then, the first register used with 166 1.3 chs * the first vshd will be 19 + (3 - ((0 - 1) & 3)), or %t4, 167 1.1 fredette * which matches _COPYS' requirement that the word last loaded 168 1.4 perry * be in %t4. The first register used for the second vshd 169 1.3 chs * will then "wrap" around to 19 + (3 - ((1 - 1) & 3)), or %t1. 170 1.3 chs * And so on to %t2 and %t3. 171 1.1 fredette * 172 1.4 perry * When copying forward, the second register used with the first 173 1.4 perry * vshd will be (19 + (3 - ((n + 0) & 3)), or %t1. It will 174 1.3 chs * continue to be %t2, then %t3, and finally %t4. 175 1.1 fredette * 176 1.4 perry * When copying backwards, the values for the first and second 177 1.4 perry * register for each vshd are reversed from the forwards case. 178 1.4 perry * (Symmetry reclaimed!) Proving this is "left as an exercise 179 1.1 fredette * for the reader" (remember the different discriminating values!) 180 1.1 fredette */ 181 1.1 fredette #define _VSHD(n, m, t) \ 182 1.1 fredette .word (0xd0000000 | \ 183 1.1 fredette ((19 + (3 - ((n - 1 * ((m + 1) / 2)) & 3))) << 16) | \ 184 1.1 fredette ((19 + (3 - ((n + 1 * ((m - 1) / 2)) & 3))) << 21) | \ 185 1.1 fredette (t)) 186 1.1 fredette 187 1.1 fredette /* 188 1.4 perry * This macro does a bulk copy with shifting. cmplt and m are 189 1.4 perry * the completer and displacement multiplier, respectively, for 190 1.1 fredette * the load and store instructions. It is assumed that the 191 1.3 chs * word last loaded is already in %t4. 192 1.1 fredette */ 193 1.1 fredette #define _COPYS(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \ 194 1.1 fredette ! \ 195 1.1 fredette /* ! \ 196 1.1 fredette * Loop storing 16 bytes at a time. Since count ! \ 197 1.1 fredette * may be > INT_MAX, we have to be careful and ! \ 198 1.1 fredette * avoid comparisons that treat it as a signed ! \ 199 1.1 fredette * quantity, until after this loop, when count ! \ 200 1.1 fredette * is guaranteed to be less than 16. ! \ 201 1.1 fredette */ ! \ 202 1.1 fredette comib,>>=,n 15, count, _LABEL(S_skip16) ! \ 203 1.1 fredette .label _LABEL(S_loop16) ! \ 204 1.1 fredette addi -16, count, count ! \ 205 1.3 chs ldws,cmplt m*4(src_spc, src_off), %t1 ! \ 206 1.3 chs ldws,cmplt m*4(src_spc, src_off), %t2 ! \ 207 1.3 chs ldws,cmplt m*4(src_spc, src_off), %t3 ! \ 208 1.3 chs _VSHD(0, m, 1) /* vshd %t4, %t1, %r1 */ ! \ 209 1.3 chs ldws,cmplt m*4(src_spc, src_off), %t4 ! \ 210 1.3 chs _VSHD(1, m, 22) /* vshd %t1, %t2, %t1 */ ! \ 211 1.3 chs _VSHD(2, m, 21) /* vshd %t2, %t3, %t2 */ ! \ 212 1.3 chs _VSHD(3, m, 20) /* vshd %t3, %t4, %t3 */ ! \ 213 1.1 fredette stws,cmplt %r1, m*4(dst_spc, dst_off) ! \ 214 1.3 chs stws,cmplt %t1, m*4(dst_spc, dst_off) ! \ 215 1.3 chs stws,cmplt %t2, m*4(dst_spc, dst_off) ! \ 216 1.1 fredette comib,<< 15, count, _LABEL(S_loop16) ! \ 217 1.3 chs stws,cmplt %t3, m*4(dst_spc, dst_off) ! \ 218 1.1 fredette .label _LABEL(S_skip16) ! \ 219 1.1 fredette ! \ 220 1.1 fredette /* Loop storing 4 bytes at a time. */ ! \ 221 1.1 fredette addib,<,n -4, count, _LABEL(S_skip4) ! \ 222 1.1 fredette .label _LABEL(S_loop4) ! \ 223 1.3 chs ldws,cmplt m*4(src_spc, src_off), %t1 ! \ 224 1.3 chs _VSHD(0, m, 1) /* into %r1 (1) */ ! \ 225 1.3 chs copy %t1, %t4 ! \ 226 1.1 fredette addib,>= -4, count, _LABEL(S_loop4) ! \ 227 1.1 fredette stws,cmplt %r1, m*4(dst_spc, dst_off) ! \ 228 1.1 fredette .label _LABEL(S_skip4) ! \ 229 1.1 fredette ! \ 230 1.1 fredette /* ! \ 231 1.1 fredette * We now need to "back up" src_off by the ! \ 232 1.1 fredette * number of bytes remaining in the FIFO ! \ 233 1.3 chs * (i.e., the number of bytes remaining in %t4), ! \ 234 1.1 fredette * because (the correct) count still includes ! \ 235 1.1 fredette * these bytes, and we intent to keep it that ! \ 236 1.1 fredette * way, and finish with the single-byte copier. ! \ 237 1.1 fredette * ! \ 238 1.1 fredette * The number of bytes remaining in the FIFO is ! \ 239 1.1 fredette * related to the shift count, so recover it, ! \ 240 1.1 fredette * restoring the correct count at the same time. ! \ 241 1.1 fredette */ ! \ 242 1.3 chs mfctl %cr11, %t1 ! \ 243 1.1 fredette addi 4, count, count ! \ 244 1.3 chs shd %r0, %t1, 3, %t1 ! \ 245 1.1 fredette ! \ 246 1.1 fredette /* ! \ 247 1.1 fredette * If we're copying forward, the shift count ! \ 248 1.1 fredette * is the number of bytes remaining in the ! \ 249 1.1 fredette * FIFO, and we want to subtract it from src_off. ! \ 250 1.1 fredette * If we're copying backwards, (4 - shift count) ! \ 251 1.1 fredette * is the number of bytes remaining in the FIFO, ! \ 252 1.1 fredette * and we want to add it to src_off. ! \ 253 1.1 fredette * ! \ 254 1.1 fredette * We observe that x + (4 - y) = x - (y - 4), ! \ 255 1.1 fredette * and introduce this instruction to add -4 when ! \ 256 1.1 fredette * m is -1, although this does mean one extra ! \ 257 1.1 fredette * instruction in the forward case. ! \ 258 1.1 fredette */ ! \ 259 1.3 chs addi 4*((m - 1) / 2), %t1, %t1 ! \ 260 1.1 fredette ! \ 261 1.1 fredette /* Now branch to the byte-at-a-time loop. */ ! \ 262 1.1 fredette b _LABEL(_do1) ! \ 263 1.3 chs sub src_off, %t1, src_off 264 1.1 fredette 265 1.1 fredette /* 266 1.1 fredette * This macro copies a region in the forward direction. 267 1.1 fredette */ 268 1.1 fredette #define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count) \ 269 1.1 fredette ! \ 270 1.1 fredette /* ! \ 271 1.1 fredette * Since in the shifting-left case we will ! \ 272 1.1 fredette * load 8 bytes before checking count, to ! \ 273 1.1 fredette * keep things simple, branch to the byte ! \ 274 1.1 fredette * copier unless we're copying at least 8. ! \ 275 1.1 fredette */ ! \ 276 1.1 fredette comib,>>,n 8, count, _LABEL(_do1) ! \ 277 1.1 fredette ! \ 278 1.1 fredette /* ! \ 279 1.1 fredette * Once we 4-byte align the source offset, ! \ 280 1.1 fredette * figure out how many bytes from the region ! \ 281 1.1 fredette * will be in the first 4-byte word we read. ! \ 282 1.1 fredette * Ditto for writing the destination offset. ! \ 283 1.1 fredette */ ! \ 284 1.3 chs extru src_off, 31, 2, %t1 ! \ 285 1.3 chs extru dst_off, 31, 2, %t2 ! \ 286 1.3 chs subi 4, %t1, %t1 ! \ 287 1.3 chs subi 4, %t2, %t2 ! \ 288 1.1 fredette ! \ 289 1.1 fredette /* ! \ 290 1.1 fredette * Calculate the byte shift required. A ! \ 291 1.1 fredette * positive value means a source 4-byte word ! \ 292 1.1 fredette * has to be shifted to the right to line up ! \ 293 1.1 fredette * as a destination 4-byte word. ! \ 294 1.1 fredette */ ! \ 295 1.3 chs sub %t1, %t2, %t1 ! \ 296 1.1 fredette ! \ 297 1.1 fredette /* 4-byte align src_off. */ ! \ 298 1.1 fredette depi 0, 31, 2, src_off ! \ 299 1.1 fredette ! \ 300 1.1 fredette /* ! \ 301 1.1 fredette * It's somewhat important to note that this ! \ 302 1.1 fredette * code thinks of count as "the number of bytes ! \ 303 1.1 fredette * that haven't been stored yet", as opposed to ! \ 304 1.1 fredette * "the number of bytes that haven't been copied ! \ 305 1.1 fredette * yet". The distinction is subtle, but becomes ! \ 306 1.1 fredette * apparent at the end of the shifting code, where ! \ 307 1.1 fredette * we "back up" src_off to correspond to count, ! \ 308 1.1 fredette * as opposed to flushing the FIFO. ! \ 309 1.1 fredette * ! \ 310 1.1 fredette * We calculated above how many bytes our first ! \ 311 1.1 fredette * store will store, so update count now. ! \ 312 1.1 fredette * ! \ 313 1.1 fredette * If the shift is zero, strictly as an optimization ! \ 314 1.1 fredette * we use a copy loop that does no shifting. ! \ 315 1.1 fredette */ ! \ 316 1.3 chs comb,<> %r0, %t1, _LABEL(_shifting) ! \ 317 1.3 chs sub count, %t2, count ! \ 318 1.1 fredette ! \ 319 1.1 fredette /* Load and store the first word. */ ! \ 320 1.3 chs ldws,ma 4(src_spc, src_off), %t4 ! \ 321 1.3 chs stbys,b,m %t4, 4(dst_spc, dst_off) ! \ 322 1.1 fredette ! \ 323 1.1 fredette /* Do the rest of the copy. */ ! \ 324 1.1 fredette _COPY(src_spc,src_off,dst_spc,dst_off,count,ma,1) ! \ 325 1.1 fredette ! \ 326 1.1 fredette .label _LABEL(_shifting) ! \ 327 1.1 fredette ! \ 328 1.1 fredette /* ! \ 329 1.1 fredette * If shift < 0, we need to shift words to the ! \ 330 1.1 fredette * left. Since we can't do this directly, we ! \ 331 1.1 fredette * adjust the shift so it's a shift to the right ! \ 332 1.1 fredette * and load the first word into the high word of ! \ 333 1.1 fredette * the FIFO. Otherwise, we load a zero into the ! \ 334 1.1 fredette * high word of the FIFO. ! \ 335 1.1 fredette */ ! \ 336 1.3 chs comb,<= %r0, %t1, _LABEL(_shiftingrt) ! \ 337 1.3 chs copy %r0, %t3 ! \ 338 1.3 chs addi 4, %t1, %t1 ! \ 339 1.3 chs ldws,ma 4(src_spc, src_off), %t3 ! \ 340 1.1 fredette .label _LABEL(_shiftingrt) ! \ 341 1.1 fredette ! \ 342 1.1 fredette /* ! \ 343 1.1 fredette * Turn the shift byte count into a bit count, ! \ 344 1.1 fredette * load the next word, set the Shift Amount ! \ 345 1.1 fredette * Register, and form and store the first word. ! \ 346 1.1 fredette */ ! \ 347 1.3 chs sh3add %t1, %r0, %t1 ! \ 348 1.3 chs ldws,ma 4(src_spc, src_off), %t4 ! \ 349 1.3 chs mtctl %t1, %cr11 ! \ 350 1.3 chs vshd %t3, %t4, %r1 ! \ 351 1.1 fredette stbys,b,m %r1, 4(dst_spc, dst_off) ! \ 352 1.1 fredette ! \ 353 1.1 fredette /* Do the rest of the copy. */ ! \ 354 1.1 fredette _COPYS(src_spc,src_off,dst_spc,dst_off,count,ma,1) 355 1.1 fredette 356 1.1 fredette /* This macro copies a region in the reverse direction. */ 357 1.1 fredette #define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count) \ 358 1.1 fredette ! \ 359 1.1 fredette /* Immediately add count to both offsets. */ ! \ 360 1.1 fredette add src_off, count, src_off ! \ 361 1.1 fredette add dst_off, count, dst_off ! \ 362 1.1 fredette ! \ 363 1.1 fredette /* ! \ 364 1.1 fredette * Since in the shifting-right case we ! \ 365 1.1 fredette * will load 8 bytes before checking ! \ 366 1.1 fredette * count, to keep things simple, branch ! \ 367 1.1 fredette * to the byte copier unless we're ! \ 368 1.1 fredette * copying at least 8 bytes. ! \ 369 1.1 fredette */ ! \ 370 1.1 fredette comib,>>,n 8, count, _LABEL(_do1) ! \ 371 1.1 fredette ! \ 372 1.1 fredette /* ! \ 373 1.1 fredette * Once we 4-byte align the source offset, ! \ 374 1.1 fredette * figure out how many bytes from the region ! \ 375 1.1 fredette * will be in the first 4-byte word we read. ! \ 376 1.1 fredette * Ditto for writing the destination offset. ! \ 377 1.1 fredette */ ! \ 378 1.3 chs extru,<> src_off, 31, 2, %t1 ! \ 379 1.3 chs ldi 4, %t1 ! \ 380 1.3 chs extru,<> dst_off, 31, 2, %t2 ! \ 381 1.3 chs ldi 4, %t2 ! \ 382 1.1 fredette ! \ 383 1.1 fredette /* ! \ 384 1.1 fredette * Calculate the byte shift required. A ! \ 385 1.1 fredette * positive value means a source 4-byte ! \ 386 1.1 fredette * word has to be shifted to the right to ! \ 387 1.1 fredette * line up as a destination 4-byte word. ! \ 388 1.1 fredette */ ! \ 389 1.3 chs sub %t2, %t1, %t1 ! \ 390 1.1 fredette ! \ 391 1.1 fredette /* ! \ 392 1.1 fredette * 4-byte align src_off, leaving it pointing ! \ 393 1.1 fredette * to the 4-byte word *after* the next word ! \ 394 1.1 fredette * we intend to load. ! \ 395 1.1 fredette * ! \ 396 1.1 fredette * It's somewhat important to note that this ! \ 397 1.1 fredette * code thinks of count as "the number of bytes ! \ 398 1.1 fredette * that haven't been stored yet", as opposed to ! \ 399 1.1 fredette * "the number of bytes that haven't been copied ! \ 400 1.1 fredette * yet". The distinction is subtle, but becomes ! \ 401 1.1 fredette * apparent at the end of the shifting code, where ! \ 402 1.1 fredette * we "back up" src_off to correspond to count, ! \ 403 1.1 fredette * as opposed to flushing the FIFO. ! \ 404 1.1 fredette * ! \ 405 1.1 fredette * We calculated above how many bytes our first ! \ 406 1.1 fredette * store will store, so update count now. ! \ 407 1.1 fredette * ! \ 408 1.1 fredette * If the shift is zero, we use a copy loop that ! \ 409 1.1 fredette * does no shifting. NB: unlike the forward case, ! \ 410 1.1 fredette * this is NOT strictly an optimization. If the ! \ 411 1.1 fredette * SAR is zero the vshds do NOT do the right thing. ! \ 412 1.17 andvar * This is another asymmetry more or less the "fault" ! \ 413 1.1 fredette * of vshd. ! \ 414 1.1 fredette */ ! \ 415 1.1 fredette addi 3, src_off, src_off ! \ 416 1.3 chs sub count, %t2, count ! \ 417 1.3 chs comb,<> %r0, %t1, _LABEL(_shifting) ! \ 418 1.1 fredette depi 0, 31, 2, src_off ! \ 419 1.1 fredette ! \ 420 1.1 fredette /* Load and store the first word. */ ! \ 421 1.3 chs ldws,mb -4(src_spc, src_off), %t4 ! \ 422 1.3 chs _STBYS_E_M(%t4, dst_spc, dst_off) ! \ 423 1.1 fredette ! \ 424 1.1 fredette /* Do the rest of the copy. */ ! \ 425 1.1 fredette _COPY(src_spc,src_off,dst_spc,dst_off,count,mb,-1) ! \ 426 1.1 fredette ! \ 427 1.1 fredette .label _LABEL(_shifting) ! \ 428 1.1 fredette ! \ 429 1.1 fredette /* ! \ 430 1.1 fredette * If shift < 0, we need to shift words to the ! \ 431 1.1 fredette * left. Since we can't do this directly, we ! \ 432 1.1 fredette * adjust the shift so it's a shift to the right ! \ 433 1.1 fredette * and load a zero in to the low word of the FIFO. ! \ 434 1.1 fredette * Otherwise, we load the first word into the ! \ 435 1.1 fredette * low word of the FIFO. ! \ 436 1.1 fredette * ! \ 437 1.1 fredette * Note the nullification trickery here. We ! \ 438 1.1 fredette * assume that we're shifting to the left, and ! \ 439 1.1 fredette * load zero into the low word of the FIFO. Then ! \ 440 1.1 fredette * we nullify the addi if we're shifting to the ! \ 441 1.1 fredette * right. If the addi is not nullified, we are ! \ 442 1.1 fredette * shifting to the left, so we nullify the load. ! \ 443 1.1 fredette * we branch if we're shifting to the ! \ 444 1.1 fredette */ ! \ 445 1.3 chs copy %r0, %t3 ! \ 446 1.3 chs comb,<=,n %r0, %t1, 0 ! \ 447 1.3 chs addi,tr 4, %t1, %t1 ! \ 448 1.3 chs ldws,mb -4(src_spc, src_off), %t3 ! \ 449 1.1 fredette ! \ 450 1.1 fredette /* ! \ 451 1.1 fredette * Turn the shift byte count into a bit count, ! \ 452 1.1 fredette * load the next word, set the Shift Amount ! \ 453 1.1 fredette * Register, and form and store the first word. ! \ 454 1.1 fredette */ ! \ 455 1.3 chs sh3add %t1, %r0, %t1 ! \ 456 1.3 chs ldws,mb -4(src_spc, src_off), %t4 ! \ 457 1.3 chs mtctl %t1, %cr11 ! \ 458 1.3 chs vshd %t4, %t3, %r1 ! \ 459 1.1 fredette _STBYS_E_M(%r1, dst_spc, dst_off) ! \ 460 1.1 fredette ! \ 461 1.1 fredette /* Do the rest of the copy. */ ! \ 462 1.1 fredette _COPYS(src_spc,src_off,dst_spc,dst_off,count,mb,-1) 463 1.1 fredette 464 1.1 fredette /* 465 1.1 fredette * For paranoia, when things aren't going well, enable this 466 1.1 fredette * code to assemble byte-at-a-time-only copying. 467 1.1 fredette */ 468 1.1 fredette #if 1 469 1.1 fredette #undef _COPY_FORWARD 470 1.1 fredette #define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count) \ 471 1.1 fredette comb,=,n %r0, count, _LABEL(_done) ! \ 472 1.1 fredette ldbs,ma 1(src_spc, src_off), %r1 ! \ 473 1.1 fredette addib,<> -1, count, -12 ! \ 474 1.1 fredette stbs,ma %r1, 1(dst_spc, dst_off) ! \ 475 1.1 fredette b,n _LABEL(_done) 476 1.1 fredette #undef _COPY_REVERSE 477 1.1 fredette #define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count) \ 478 1.1 fredette comb,= %r0, count, _LABEL(_done) ! \ 479 1.1 fredette add src_off, count, src_off ! \ 480 1.1 fredette add dst_off, count, dst_off ! \ 481 1.1 fredette ldbs,mb -1(src_spc, src_off), %r1 ! \ 482 1.1 fredette addib,<> -1, count, -12 ! \ 483 1.1 fredette stbs,mb %r1, -1(dst_spc, dst_off) ! \ 484 1.1 fredette b,n _LABEL(_done) 485 1.1 fredette #endif 486 1.1 fredette 487 1.1 fredette /* 488 1.1 fredette * If none of the following are defined, define BCOPY. 489 1.1 fredette */ 490 1.1 fredette #if !(defined(SPCOPY) || defined(MEMCPY) || defined(MEMMOVE)) 491 1.1 fredette #define BCOPY 492 1.1 fredette #endif 493 1.1 fredette 494 1.1 fredette #if defined(SPCOPY) && !defined(_STANDALONE) 495 1.13 skrll 496 1.1 fredette #include <sys/errno.h> 497 1.1 fredette #include "assym.h" 498 1.1 fredette 499 1.1 fredette /* 500 1.1 fredette * int spcopy(pa_space_t ssp, const void *src, pa_space_t dsp, void *dst, 501 1.1 fredette * size_t len) 502 1.1 fredette * 503 1.1 fredette * We assume that the regions do not overlap. 504 1.1 fredette */ 505 1.1 fredette LEAF_ENTRY(spcopy) 506 1.1 fredette 507 1.1 fredette /* 508 1.11 chs * Setup the fault handler, which will fill in %ret0 if triggered. 509 1.1 fredette */ 510 1.12 skrll GET_CURLWP(%r31) 511 1.1 fredette #ifdef DIAGNOSTIC 512 1.2 chs comb,<>,n %r0, %r31, Lspcopy_curlwp_ok 513 1.4 perry ldil L%panic, %r1 514 1.2 chs ldil L%Lspcopy_curlwp_bad, %arg0 515 1.1 fredette ldo R%panic(%r1), %r1 516 1.2 chs ldo R%Lspcopy_curlwp_bad(%arg0), %arg0 517 1.1 fredette .call 518 1.1 fredette bv,n %r0(%r1) 519 1.1 fredette nop 520 1.3 chs Lspcopy_curlwp_bad: 521 1.2 chs .asciz "spcopy: curlwp == NULL\n" 522 1.1 fredette .align 8 523 1.3 chs Lspcopy_curlwp_ok: 524 1.1 fredette #endif /* DIAGNOSTIC */ 525 1.2 chs ldil L%spcopy_fault, %r1 526 1.10 skrll ldw L_PCB(%r31), %r31 527 1.2 chs ldo R%spcopy_fault(%r1), %r1 528 1.9 skrll stw %r1, PCB_ONFAULT(%r31) 529 1.1 fredette 530 1.1 fredette /* Setup the space registers. */ 531 1.3 chs mfsp %sr2, %ret1 532 1.3 chs mtsp %arg0, %sr1 533 1.3 chs mtsp %arg2, %sr2 534 1.1 fredette 535 1.1 fredette /* Get the len argument and do the copy. */ 536 1.3 chs ldw HPPA_FRAME_ARG(4)(%sp), %arg0 537 1.1 fredette #define _LABEL(l) __CONCAT(spcopy,l) 538 1.3 chs _COPY_FORWARD(%sr1,%arg1,%sr2,%arg3,%arg0) 539 1.3 chs _LABEL(_done): 540 1.1 fredette 541 1.1 fredette /* Return. */ 542 1.1 fredette copy %r0, %ret0 543 1.1 fredette ALTENTRY(spcopy_fault) 544 1.9 skrll stw %r0, PCB_ONFAULT(%r31) 545 1.1 fredette bv %r0(%rp) 546 1.3 chs mtsp %ret1, %sr2 547 1.1 fredette EXIT(spcopy) 548 1.1 fredette #endif /* SPCOPY && !_STANDALONE */ 549 1.1 fredette 550 1.1 fredette #ifdef MEMCPY 551 1.1 fredette /* 552 1.6 christos * void *memcpy(void *restrict dst, const void *restrict src, size_t len); 553 1.1 fredette * 554 1.1 fredette * memcpy is specifically restricted to working on 555 1.1 fredette * non-overlapping regions, so we can just copy forward. 556 1.1 fredette */ 557 1.1 fredette LEAF_ENTRY(memcpy) 558 1.1 fredette copy %arg0, %ret0 559 1.1 fredette #define _LABEL(l) __CONCAT(memcpy,l) 560 1.3 chs _COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2) 561 1.3 chs _LABEL(_done): 562 1.1 fredette bv,n %r0(%rp) 563 1.1 fredette nop 564 1.1 fredette EXIT(memcpy) 565 1.1 fredette #endif /* MEMCPY */ 566 1.1 fredette 567 1.1 fredette #ifdef BCOPY 568 1.1 fredette /* 569 1.1 fredette * void bcopy(const void *src, void *dst, size_t len); 570 1.1 fredette */ 571 1.1 fredette LEAF_ENTRY(bcopy) 572 1.1 fredette copy %arg0, %r1 573 1.1 fredette copy %arg1, %arg0 574 1.1 fredette copy %r1, %arg1 575 1.1 fredette /* FALLTHROUGH */ 576 1.1 fredette #define _LABEL_F(l) __CONCAT(bcopy_F,l) 577 1.1 fredette #define _LABEL_R(l) __CONCAT(bcopy_R,l) 578 1.1 fredette #endif 579 1.1 fredette 580 1.1 fredette #ifdef MEMMOVE 581 1.1 fredette /* 582 1.1 fredette * void *memmove(void *dst, const void *src, size_t len); 583 1.1 fredette */ 584 1.1 fredette LEAF_ENTRY(memmove) 585 1.1 fredette #define _LABEL_F(l) __CONCAT(memmove_F,l) 586 1.1 fredette #define _LABEL_R(l) __CONCAT(memmove_R,l) 587 1.1 fredette copy %arg0, %ret0 588 1.1 fredette #endif /* MEMMOVE */ 589 1.1 fredette 590 1.1 fredette #if defined(BCOPY) || defined(MEMMOVE) 591 1.1 fredette 592 1.1 fredette /* 593 1.1 fredette * If src >= dst or src + len <= dst, we copy 594 1.1 fredette * forward, else we copy in reverse. 595 1.1 fredette */ 596 1.1 fredette add %arg1, %arg2, %r1 597 1.1 fredette comb,>>=,n %arg1, %arg0, 0 598 1.1 fredette comb,>>,n %r1, %arg0, _LABEL_R(_go) 599 1.1 fredette 600 1.1 fredette #define _LABEL _LABEL_F 601 1.3 chs _COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2) 602 1.1 fredette #undef _LABEL 603 1.1 fredette 604 1.3 chs _LABEL_R(_go): 605 1.1 fredette #define _LABEL _LABEL_R 606 1.3 chs _COPY_REVERSE(%sr0,%arg1,%sr0,%arg0,%arg2) 607 1.1 fredette #undef _LABEL 608 1.4 perry 609 1.3 chs _LABEL_F(_done): 610 1.3 chs _LABEL_R(_done): 611 1.1 fredette bv,n %r0(%rp) 612 1.1 fredette nop 613 1.1 fredette #ifdef BCOPY 614 1.1 fredette EXIT(bcopy) 615 1.1 fredette #else 616 1.1 fredette EXIT(memmove) 617 1.1 fredette #endif 618 1.1 fredette #endif /* BCOPY || MEMMOVE */ 619