1 1.2 christos /* $NetBSD: memcpy.S,v 1.2 2013/03/17 02:13:10 christos Exp $ */ 2 1.1 christos 3 1.1 christos /* 4 1.1 christos * Copyright (c) 1996-2002 Eduardo Horvath 5 1.1 christos * All rights reserved. 6 1.1 christos * 7 1.1 christos * Redistribution and use in source and binary forms, with or without 8 1.1 christos * modification, are permitted provided that the following conditions 9 1.1 christos * are met: 10 1.1 christos * 1. Redistributions of source code must retain the above copyright 11 1.1 christos * notice, this list of conditions and the following disclaimer. 12 1.1 christos * 13 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 14 1.1 christos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 1.1 christos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 1.1 christos * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 17 1.1 christos * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 1.1 christos * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 1.1 christos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 1.1 christos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 1.1 christos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 1.1 christos * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 1.1 christos * SUCH DAMAGE. 24 1.1 christos * 25 1.1 christos */ 26 1.1 christos #include "strmacros.h" 27 1.2 christos #if defined(LIBC_SCCS) && !defined(lint) 28 1.2 christos RCSID("$NetBSD: memcpy.S,v 1.2 2013/03/17 02:13:10 christos Exp $") 29 1.2 christos #endif /* LIBC_SCCS and not lint */ 30 1.1 christos 31 1.1 christos /* 32 1.2 christos * memcpy 33 1.2 christos * Assumes regions do not overlap; 34 1.1 christos * 35 1.1 christos * Must not use %g7 (see copyin/copyout above). 36 1.1 christos */ 37 1.1 christos ENTRY(memcpy) /* dest, src, size */ 38 1.1 christos /* 39 1.1 christos * Swap args for bcopy. Gcc generates calls to memcpy for 40 1.1 christos * structure assignments. 41 1.1 christos */ 42 1.1 christos mov %o0, %o3 43 1.1 christos mov %o1, %o0 44 1.1 christos mov %o3, %o1 45 1.1 christos #if !defined(_KERNEL) || defined(_RUMPKERNEL) 46 1.1 christos ENTRY(bcopy) /* src, dest, size */ 47 1.1 christos #endif 48 1.1 christos #ifdef DEBUG 49 1.1 christos #if defined(_KERNEL) && !defined(_RUMPKERNEL) 50 1.1 christos set pmapdebug, %o4 51 1.1 christos ld [%o4], %o4 52 1.1 christos btst 0x80, %o4 ! PDB_COPY 53 1.1 christos bz,pt %icc, 3f 54 1.1 christos nop 55 1.1 christos #endif 56 1.1 christos save %sp, -CC64FSZ, %sp 57 1.1 christos mov %i0, %o1 58 1.1 christos set 2f, %o0 59 1.1 christos mov %i1, %o2 60 1.1 christos call printf 61 1.1 christos mov %i2, %o3 62 1.1 christos ! ta 1; nop 63 1.1 christos restore 64 1.1 christos .data 65 1.1 christos 2: .asciz "memcpy(%p<-%p,%x)\n" 66 1.1 christos _ALIGN 67 1.1 christos .text 68 1.1 christos 3: 69 1.1 christos #endif 70 1.1 christos 71 1.1 christos cmp %o2, BCOPY_SMALL 72 1.1 christos 73 1.1 christos Lmemcpy_start: 74 1.1 christos bge,pt CCCR, 2f ! if >= this many, go be fancy. 75 1.1 christos cmp %o2, 256 76 1.1 christos 77 1.1 christos mov %o1, %o5 ! Save memcpy return value 78 1.1 christos /* 79 1.1 christos * Not much to copy, just do it a byte at a time. 80 1.1 christos */ 81 1.1 christos deccc %o2 ! while (--len >= 0) 82 1.1 christos bl 1f 83 1.1 christos .empty 84 1.1 christos 0: 85 1.1 christos inc %o0 86 1.1 christos ldsb [%o0 - 1], %o4 ! (++dst)[-1] = *src++; 87 1.1 christos stb %o4, [%o1] 88 1.1 christos deccc %o2 89 1.1 christos bge 0b 90 1.1 christos inc %o1 91 1.1 christos 1: 92 1.1 christos retl 93 1.1 christos mov %o5, %o0 94 1.1 christos NOTREACHED 95 1.1 christos 96 1.1 christos /* 97 1.1 christos * Plenty of data to copy, so try to do it optimally. 98 1.1 christos */ 99 1.1 christos 2: 100 1.1 christos #ifdef USE_BLOCK_STORE_LOAD 101 1.1 christos ! If it is big enough, use VIS instructions 102 1.1 christos bge Lmemcpy_block 103 1.1 christos nop 104 1.1 christos #endif /* USE_BLOCK_STORE_LOAD */ 105 1.1 christos Lmemcpy_fancy: 106 1.1 christos 107 1.1 christos !! 108 1.1 christos !! First align the output to a 8-byte entity 109 1.1 christos !! 110 1.1 christos 111 1.1 christos save %sp, -CC64FSZ, %sp 112 1.1 christos 113 1.1 christos mov %i0, %l0 114 1.1 christos mov %i1, %l1 115 1.1 christos 116 1.1 christos mov %i2, %l2 117 1.1 christos btst 1, %l1 118 1.1 christos 119 1.1 christos bz,pt %icc, 4f 120 1.1 christos btst 2, %l1 121 1.1 christos ldub [%l0], %l4 ! Load 1st byte 122 1.1 christos 123 1.1 christos deccc 1, %l2 124 1.1 christos ble,pn CCCR, Lmemcpy_finish ! XXXX 125 1.1 christos inc 1, %l0 126 1.1 christos 127 1.1 christos stb %l4, [%l1] ! Store 1st byte 128 1.1 christos inc 1, %l1 ! Update address 129 1.1 christos btst 2, %l1 130 1.1 christos 4: 131 1.1 christos bz,pt %icc, 4f 132 1.1 christos 133 1.1 christos btst 1, %l0 134 1.1 christos bz,a 1f 135 1.1 christos lduh [%l0], %l4 ! Load short 136 1.1 christos 137 1.1 christos ldub [%l0], %l4 ! Load bytes 138 1.1 christos 139 1.1 christos ldub [%l0+1], %l3 140 1.1 christos sllx %l4, 8, %l4 141 1.1 christos or %l3, %l4, %l4 142 1.1 christos 143 1.1 christos 1: 144 1.1 christos deccc 2, %l2 145 1.1 christos ble,pn CCCR, Lmemcpy_finish ! XXXX 146 1.1 christos inc 2, %l0 147 1.1 christos sth %l4, [%l1] ! Store 1st short 148 1.1 christos 149 1.1 christos inc 2, %l1 150 1.1 christos 4: 151 1.1 christos btst 4, %l1 152 1.1 christos bz,pt CCCR, 4f 153 1.1 christos 154 1.1 christos btst 3, %l0 155 1.1 christos bz,a,pt CCCR, 1f 156 1.1 christos lduw [%l0], %l4 ! Load word -1 157 1.1 christos 158 1.1 christos btst 1, %l0 159 1.1 christos bz,a,pt %icc, 2f 160 1.1 christos lduh [%l0], %l4 161 1.1 christos 162 1.1 christos ldub [%l0], %l4 163 1.1 christos 164 1.1 christos lduh [%l0+1], %l3 165 1.1 christos sllx %l4, 16, %l4 166 1.1 christos or %l4, %l3, %l4 167 1.1 christos 168 1.1 christos ldub [%l0+3], %l3 169 1.1 christos sllx %l4, 8, %l4 170 1.1 christos ba,pt %icc, 1f 171 1.1 christos or %l4, %l3, %l4 172 1.1 christos 173 1.1 christos 2: 174 1.1 christos lduh [%l0+2], %l3 175 1.1 christos sllx %l4, 16, %l4 176 1.1 christos or %l4, %l3, %l4 177 1.1 christos 178 1.1 christos 1: 179 1.1 christos deccc 4, %l2 180 1.1 christos ble,pn CCCR, Lmemcpy_finish ! XXXX 181 1.1 christos inc 4, %l0 182 1.1 christos 183 1.1 christos st %l4, [%l1] ! Store word 184 1.1 christos inc 4, %l1 185 1.1 christos 4: 186 1.1 christos !! 187 1.1 christos !! We are now 32-bit aligned in the dest. 188 1.1 christos !! 189 1.1 christos Lmemcpy_common: 190 1.1 christos 191 1.1 christos and %l0, 7, %l4 ! Shift amount 192 1.1 christos andn %l0, 7, %l0 ! Source addr 193 1.1 christos 194 1.1 christos brz,pt %l4, Lmemcpy_noshift8 ! No shift version... 195 1.1 christos 196 1.1 christos sllx %l4, 3, %l4 ! In bits 197 1.1 christos mov 8<<3, %l3 198 1.1 christos 199 1.1 christos ldx [%l0], %o0 ! Load word -1 200 1.1 christos sub %l3, %l4, %l3 ! Reverse shift 201 1.1 christos deccc 12*8, %l2 ! Have enough room? 202 1.1 christos 203 1.1 christos sllx %o0, %l4, %o0 204 1.1 christos bl,pn CCCR, 2f 205 1.1 christos and %l3, 0x38, %l3 206 1.1 christos Lmemcpy_unrolled8: 207 1.1 christos 208 1.1 christos /* 209 1.1 christos * This is about as close to optimal as you can get, since 210 1.1 christos * the shifts require EU0 and cannot be paired, and you have 211 1.1 christos * 3 dependent operations on the data. 212 1.1 christos */ 213 1.1 christos 214 1.1 christos ! ldx [%l0+0*8], %o0 ! Already done 215 1.1 christos ! sllx %o0, %l4, %o0 ! Already done 216 1.1 christos ldx [%l0+1*8], %o1 217 1.1 christos ldx [%l0+2*8], %o2 218 1.1 christos ldx [%l0+3*8], %o3 219 1.1 christos ldx [%l0+4*8], %o4 220 1.1 christos ba,pt %icc, 1f 221 1.1 christos ldx [%l0+5*8], %o5 222 1.1 christos .align 8 223 1.1 christos 1: 224 1.1 christos srlx %o1, %l3, %g1 225 1.1 christos inc 6*8, %l0 226 1.1 christos 227 1.1 christos sllx %o1, %l4, %o1 228 1.1 christos or %g1, %o0, %g6 229 1.1 christos ldx [%l0+0*8], %o0 230 1.1 christos 231 1.1 christos stx %g6, [%l1+0*8] 232 1.1 christos srlx %o2, %l3, %g1 233 1.1 christos 234 1.1 christos sllx %o2, %l4, %o2 235 1.1 christos or %g1, %o1, %g6 236 1.1 christos ldx [%l0+1*8], %o1 237 1.1 christos 238 1.1 christos stx %g6, [%l1+1*8] 239 1.1 christos srlx %o3, %l3, %g1 240 1.1 christos 241 1.1 christos sllx %o3, %l4, %o3 242 1.1 christos or %g1, %o2, %g6 243 1.1 christos ldx [%l0+2*8], %o2 244 1.1 christos 245 1.1 christos stx %g6, [%l1+2*8] 246 1.1 christos srlx %o4, %l3, %g1 247 1.1 christos 248 1.1 christos sllx %o4, %l4, %o4 249 1.1 christos or %g1, %o3, %g6 250 1.1 christos ldx [%l0+3*8], %o3 251 1.1 christos 252 1.1 christos stx %g6, [%l1+3*8] 253 1.1 christos srlx %o5, %l3, %g1 254 1.1 christos 255 1.1 christos sllx %o5, %l4, %o5 256 1.1 christos or %g1, %o4, %g6 257 1.1 christos ldx [%l0+4*8], %o4 258 1.1 christos 259 1.1 christos stx %g6, [%l1+4*8] 260 1.1 christos srlx %o0, %l3, %g1 261 1.1 christos deccc 6*8, %l2 ! Have enough room? 262 1.1 christos 263 1.1 christos sllx %o0, %l4, %o0 ! Next loop 264 1.1 christos or %g1, %o5, %g6 265 1.1 christos ldx [%l0+5*8], %o5 266 1.1 christos 267 1.1 christos stx %g6, [%l1+5*8] 268 1.1 christos bge,pt CCCR, 1b 269 1.1 christos inc 6*8, %l1 270 1.1 christos 271 1.1 christos Lmemcpy_unrolled8_cleanup: 272 1.1 christos !! 273 1.1 christos !! Finished 8 byte block, unload the regs. 274 1.1 christos !! 275 1.1 christos srlx %o1, %l3, %g1 276 1.1 christos inc 5*8, %l0 277 1.1 christos 278 1.1 christos sllx %o1, %l4, %o1 279 1.1 christos or %g1, %o0, %g6 280 1.1 christos 281 1.1 christos stx %g6, [%l1+0*8] 282 1.1 christos srlx %o2, %l3, %g1 283 1.1 christos 284 1.1 christos sllx %o2, %l4, %o2 285 1.1 christos or %g1, %o1, %g6 286 1.1 christos 287 1.1 christos stx %g6, [%l1+1*8] 288 1.1 christos srlx %o3, %l3, %g1 289 1.1 christos 290 1.1 christos sllx %o3, %l4, %o3 291 1.1 christos or %g1, %o2, %g6 292 1.1 christos 293 1.1 christos stx %g6, [%l1+2*8] 294 1.1 christos srlx %o4, %l3, %g1 295 1.1 christos 296 1.1 christos sllx %o4, %l4, %o4 297 1.1 christos or %g1, %o3, %g6 298 1.1 christos 299 1.1 christos stx %g6, [%l1+3*8] 300 1.1 christos srlx %o5, %l3, %g1 301 1.1 christos 302 1.1 christos sllx %o5, %l4, %o5 303 1.1 christos or %g1, %o4, %g6 304 1.1 christos 305 1.1 christos stx %g6, [%l1+4*8] 306 1.1 christos inc 5*8, %l1 307 1.1 christos 308 1.1 christos mov %o5, %o0 ! Save our unused data 309 1.1 christos dec 5*8, %l2 310 1.1 christos 2: 311 1.1 christos inccc 12*8, %l2 312 1.1 christos bz,pn %icc, Lmemcpy_complete 313 1.1 christos 314 1.1 christos !! Unrolled 8 times 315 1.1 christos Lmemcpy_aligned8: 316 1.1 christos ! ldx [%l0], %o0 ! Already done 317 1.1 christos ! sllx %o0, %l4, %o0 ! Shift high word 318 1.1 christos 319 1.1 christos deccc 8, %l2 ! Pre-decrement 320 1.1 christos bl,pn CCCR, Lmemcpy_finish 321 1.1 christos 1: 322 1.1 christos ldx [%l0+8], %o1 ! Load word 0 323 1.1 christos inc 8, %l0 324 1.1 christos 325 1.1 christos srlx %o1, %l3, %g6 326 1.1 christos or %g6, %o0, %g6 ! Combine 327 1.1 christos 328 1.1 christos stx %g6, [%l1] ! Store result 329 1.1 christos inc 8, %l1 330 1.1 christos 331 1.1 christos deccc 8, %l2 332 1.1 christos bge,pn CCCR, 1b 333 1.1 christos sllx %o1, %l4, %o0 334 1.1 christos 335 1.1 christos btst 7, %l2 ! Done? 336 1.1 christos bz,pt CCCR, Lmemcpy_complete 337 1.1 christos 338 1.1 christos !! 339 1.1 christos !! Loadup the last dregs into %o0 and shift it into place 340 1.1 christos !! 341 1.1 christos srlx %l3, 3, %g6 ! # bytes in %o0 342 1.1 christos dec 8, %g6 ! - 8 343 1.1 christos !! n-8 - (by - 8) -> n - by 344 1.1 christos subcc %l2, %g6, %g0 ! # bytes we need 345 1.1 christos ble,pt %icc, Lmemcpy_finish 346 1.1 christos nop 347 1.1 christos ldx [%l0+8], %o1 ! Need another word 348 1.1 christos srlx %o1, %l3, %o1 349 1.1 christos ba,pt %icc, Lmemcpy_finish 350 1.1 christos or %o0, %o1, %o0 ! All loaded up. 351 1.1 christos 352 1.1 christos Lmemcpy_noshift8: 353 1.1 christos deccc 6*8, %l2 ! Have enough room? 354 1.1 christos bl,pn CCCR, 2f 355 1.1 christos nop 356 1.1 christos ba,pt %icc, 1f 357 1.1 christos nop 358 1.1 christos .align 32 359 1.1 christos 1: 360 1.1 christos ldx [%l0+0*8], %o0 361 1.1 christos ldx [%l0+1*8], %o1 362 1.1 christos ldx [%l0+2*8], %o2 363 1.1 christos stx %o0, [%l1+0*8] 364 1.1 christos stx %o1, [%l1+1*8] 365 1.1 christos stx %o2, [%l1+2*8] 366 1.1 christos 367 1.1 christos 368 1.1 christos ldx [%l0+3*8], %o3 369 1.1 christos ldx [%l0+4*8], %o4 370 1.1 christos ldx [%l0+5*8], %o5 371 1.1 christos inc 6*8, %l0 372 1.1 christos stx %o3, [%l1+3*8] 373 1.1 christos deccc 6*8, %l2 374 1.1 christos stx %o4, [%l1+4*8] 375 1.1 christos stx %o5, [%l1+5*8] 376 1.1 christos bge,pt CCCR, 1b 377 1.1 christos inc 6*8, %l1 378 1.1 christos 2: 379 1.1 christos inc 6*8, %l2 380 1.1 christos 1: 381 1.1 christos deccc 8, %l2 382 1.1 christos bl,pn %icc, 1f ! < 0 --> sub word 383 1.1 christos nop 384 1.1 christos ldx [%l0], %g6 385 1.1 christos inc 8, %l0 386 1.1 christos stx %g6, [%l1] 387 1.1 christos bg,pt %icc, 1b ! Exactly 0 --> done 388 1.1 christos inc 8, %l1 389 1.1 christos 1: 390 1.1 christos btst 7, %l2 ! Done? 391 1.1 christos bz,pt CCCR, Lmemcpy_complete 392 1.1 christos clr %l4 393 1.1 christos ldx [%l0], %o0 394 1.1 christos Lmemcpy_finish: 395 1.1 christos 396 1.1 christos brz,pn %l2, 2f ! 100% complete? 397 1.1 christos cmp %l2, 8 ! Exactly 8 bytes? 398 1.1 christos bz,a,pn CCCR, 2f 399 1.1 christos stx %o0, [%l1] 400 1.1 christos 401 1.1 christos btst 4, %l2 ! Word store? 402 1.1 christos bz CCCR, 1f 403 1.1 christos srlx %o0, 32, %g6 ! Shift high word down 404 1.1 christos stw %g6, [%l1] 405 1.1 christos inc 4, %l1 406 1.1 christos mov %o0, %g6 ! Operate on the low bits 407 1.1 christos 1: 408 1.1 christos btst 2, %l2 409 1.1 christos mov %g6, %o0 410 1.1 christos bz 1f 411 1.1 christos srlx %o0, 16, %g6 412 1.1 christos 413 1.1 christos sth %g6, [%l1] ! Store short 414 1.1 christos inc 2, %l1 415 1.1 christos mov %o0, %g6 ! Operate on low bytes 416 1.1 christos 1: 417 1.1 christos mov %g6, %o0 418 1.1 christos btst 1, %l2 ! Byte aligned? 419 1.1 christos bz 2f 420 1.1 christos srlx %o0, 8, %g6 421 1.1 christos 422 1.1 christos stb %g6, [%l1] ! Store last byte 423 1.1 christos inc 1, %l1 ! Update address 424 1.1 christos 2: 425 1.1 christos Lmemcpy_complete: 426 1.1 christos #if 0 427 1.1 christos !! 428 1.1 christos !! verify copy success. 429 1.1 christos !! 430 1.1 christos 431 1.1 christos mov %i0, %o2 432 1.1 christos mov %i1, %o4 433 1.1 christos mov %i2, %l4 434 1.1 christos 0: 435 1.1 christos ldub [%o2], %o1 436 1.1 christos inc %o2 437 1.1 christos ldub [%o4], %o3 438 1.1 christos inc %o4 439 1.1 christos cmp %o3, %o1 440 1.1 christos bnz 1f 441 1.1 christos dec %l4 442 1.1 christos brnz %l4, 0b 443 1.1 christos nop 444 1.1 christos ba 2f 445 1.1 christos nop 446 1.1 christos 447 1.1 christos 1: 448 1.1 christos set 0f, %o0 449 1.1 christos call printf 450 1.1 christos sub %i2, %l4, %o5 451 1.1 christos set 1f, %o0 452 1.1 christos mov %i0, %o2 453 1.1 christos mov %i1, %o1 454 1.1 christos call printf 455 1.1 christos mov %i2, %o3 456 1.1 christos ta 1 457 1.1 christos .data 458 1.1 christos 0: .asciz "memcpy failed: %x@%p != %x@%p byte %d\n" 459 1.1 christos 1: .asciz "memcpy(%p, %p, %lx)\n" 460 1.1 christos .align 8 461 1.1 christos .text 462 1.1 christos 2: 463 1.1 christos #endif 464 1.1 christos ret 465 1.1 christos restore %i1, %g0, %o0 466 1.1 christos 467 1.1 christos #ifdef USE_BLOCK_STORE_LOAD 468 1.1 christos 469 1.1 christos /* 470 1.1 christos * Block copy. Useful for >256 byte copies. 471 1.1 christos * 472 1.1 christos * Benchmarking has shown this always seems to be slower than 473 1.1 christos * the integer version, so this is disabled. Maybe someone will 474 1.1 christos * figure out why sometime. 475 1.1 christos */ 476 1.1 christos 477 1.1 christos Lmemcpy_block: 478 1.1 christos sethi %hi(block_disable), %o3 479 1.1 christos ldx [ %o3 + %lo(block_disable) ], %o3 480 1.1 christos brnz,pn %o3, Lmemcpy_fancy 481 1.1 christos !! Make sure our trap table is installed 482 1.1 christos set _C_LABEL(trapbase), %o5 483 1.1 christos rdpr %tba, %o3 484 1.1 christos sub %o3, %o5, %o3 485 1.1 christos brnz,pn %o3, Lmemcpy_fancy ! No, then don't use block load/store 486 1.1 christos nop 487 1.1 christos #if defined(_KERNEL) && !defined(_RUMPKERNEL) 488 1.1 christos /* 489 1.1 christos * Kernel: 490 1.1 christos * 491 1.1 christos * Here we use VIS instructions to do a block clear of a page. 492 1.1 christos * But before we can do that we need to save and enable the FPU. 493 1.1 christos * The last owner of the FPU registers is fplwp, and 494 1.1 christos * fplwp->l_md.md_fpstate is the current fpstate. If that's not 495 1.1 christos * null, call savefpstate() with it to store our current fp state. 496 1.1 christos * 497 1.1 christos * Next, allocate an aligned fpstate on the stack. We will properly 498 1.1 christos * nest calls on a particular stack so this should not be a problem. 499 1.1 christos * 500 1.1 christos * Now we grab either curlwp (or if we're on the interrupt stack 501 1.1 christos * lwp0). We stash its existing fpstate in a local register and 502 1.1 christos * put our new fpstate in curlwp->p_md.md_fpstate. We point 503 1.1 christos * fplwp at curlwp (or lwp0) and enable the FPU. 504 1.1 christos * 505 1.1 christos * If we are ever preempted, our FPU state will be saved in our 506 1.1 christos * fpstate. Then, when we're resumed and we take an FPDISABLED 507 1.1 christos * trap, the trap handler will be able to fish our FPU state out 508 1.1 christos * of curlwp (or lwp0). 509 1.1 christos * 510 1.1 christos * On exiting this routine we undo the damage: restore the original 511 1.1 christos * pointer to curlwp->p_md.md_fpstate, clear our fplwp, and disable 512 1.1 christos * the MMU. 513 1.1 christos * 514 1.1 christos * 515 1.1 christos * Register usage, Kernel only (after save): 516 1.1 christos * 517 1.1 christos * %i0 src 518 1.1 christos * %i1 dest 519 1.1 christos * %i2 size 520 1.1 christos * 521 1.1 christos * %l0 XXXX DEBUG old fpstate 522 1.1 christos * %l1 fplwp (hi bits only) 523 1.1 christos * %l2 orig fplwp 524 1.1 christos * %l3 orig fpstate 525 1.1 christos * %l5 curlwp 526 1.1 christos * %l6 old fpstate 527 1.1 christos * 528 1.1 christos * Register ussage, Kernel and user: 529 1.1 christos * 530 1.1 christos * %g1 src (retval for memcpy) 531 1.1 christos * 532 1.1 christos * %o0 src 533 1.1 christos * %o1 dest 534 1.1 christos * %o2 end dest 535 1.1 christos * %o5 last safe fetchable address 536 1.1 christos */ 537 1.1 christos 538 1.1 christos ENABLE_FPU(0) 539 1.1 christos 540 1.1 christos mov %i0, %o0 ! Src addr. 541 1.1 christos mov %i1, %o1 ! Store our dest ptr here. 542 1.1 christos mov %i2, %o2 ! Len counter 543 1.1 christos #endif /* _KERNEL */ 544 1.1 christos 545 1.1 christos !! 546 1.1 christos !! First align the output to a 64-bit entity 547 1.1 christos !! 548 1.1 christos 549 1.1 christos mov %o1, %g1 ! memcpy retval 550 1.1 christos add %o0, %o2, %o5 ! End of source block 551 1.1 christos 552 1.1 christos andn %o0, 7, %o3 ! Start of block 553 1.1 christos dec %o5 554 1.1 christos fzero %f0 555 1.1 christos 556 1.1 christos andn %o5, BLOCK_ALIGN, %o5 ! Last safe addr. 557 1.1 christos ldd [%o3], %f2 ! Load 1st word 558 1.1 christos 559 1.1 christos dec 8, %o3 ! Move %o3 1 word back 560 1.1 christos btst 1, %o1 561 1.1 christos bz 4f 562 1.1 christos 563 1.1 christos mov -7, %o4 ! Lowest src addr possible 564 1.1 christos alignaddr %o0, %o4, %o4 ! Base addr for load. 565 1.1 christos 566 1.1 christos cmp %o3, %o4 567 1.1 christos be,pt CCCR, 1f ! Already loaded? 568 1.1 christos mov %o4, %o3 569 1.1 christos fmovd %f2, %f0 ! No. Shift 570 1.1 christos ldd [%o3+8], %f2 ! And load 571 1.1 christos 1: 572 1.1 christos 573 1.1 christos faligndata %f0, %f2, %f4 ! Isolate 1st byte 574 1.1 christos 575 1.1 christos stda %f4, [%o1] ASI_FL8_P ! Store 1st byte 576 1.1 christos inc 1, %o1 ! Update address 577 1.1 christos inc 1, %o0 578 1.1 christos dec 1, %o2 579 1.1 christos 4: 580 1.1 christos btst 2, %o1 581 1.1 christos bz 4f 582 1.1 christos 583 1.1 christos mov -6, %o4 ! Calculate src - 6 584 1.1 christos alignaddr %o0, %o4, %o4 ! calculate shift mask and dest. 585 1.1 christos 586 1.1 christos cmp %o3, %o4 ! Addresses same? 587 1.1 christos be,pt CCCR, 1f 588 1.1 christos mov %o4, %o3 589 1.1 christos fmovd %f2, %f0 ! Shuffle data 590 1.1 christos ldd [%o3+8], %f2 ! Load word 0 591 1.1 christos 1: 592 1.1 christos faligndata %f0, %f2, %f4 ! Move 1st short low part of f8 593 1.1 christos 594 1.1 christos stda %f4, [%o1] ASI_FL16_P ! Store 1st short 595 1.1 christos dec 2, %o2 596 1.1 christos inc 2, %o1 597 1.1 christos inc 2, %o0 598 1.1 christos 4: 599 1.1 christos brz,pn %o2, Lmemcpy_blockfinish ! XXXX 600 1.1 christos 601 1.1 christos btst 4, %o1 602 1.1 christos bz 4f 603 1.1 christos 604 1.1 christos mov -4, %o4 605 1.1 christos alignaddr %o0, %o4, %o4 ! calculate shift mask and dest. 606 1.1 christos 607 1.1 christos cmp %o3, %o4 ! Addresses same? 608 1.1 christos beq,pt CCCR, 1f 609 1.1 christos mov %o4, %o3 610 1.1 christos fmovd %f2, %f0 ! Shuffle data 611 1.1 christos ldd [%o3+8], %f2 ! Load word 0 612 1.1 christos 1: 613 1.1 christos faligndata %f0, %f2, %f4 ! Move 1st short low part of f8 614 1.1 christos 615 1.1 christos st %f5, [%o1] ! Store word 616 1.1 christos dec 4, %o2 617 1.1 christos inc 4, %o1 618 1.1 christos inc 4, %o0 619 1.1 christos 4: 620 1.1 christos brz,pn %o2, Lmemcpy_blockfinish ! XXXX 621 1.1 christos !! 622 1.1 christos !! We are now 32-bit aligned in the dest. 623 1.1 christos !! 624 1.1 christos Lmemcpy_block_common: 625 1.1 christos 626 1.1 christos mov -0, %o4 627 1.1 christos alignaddr %o0, %o4, %o4 ! base - shift 628 1.1 christos 629 1.1 christos cmp %o3, %o4 ! Addresses same? 630 1.1 christos beq,pt CCCR, 1f 631 1.1 christos mov %o4, %o3 632 1.1 christos fmovd %f2, %f0 ! Shuffle data 633 1.1 christos ldd [%o3+8], %f2 ! Load word 0 634 1.1 christos 1: 635 1.1 christos add %o3, 8, %o0 ! now use %o0 for src 636 1.1 christos 637 1.1 christos !! 638 1.1 christos !! Continue until our dest is block aligned 639 1.1 christos !! 640 1.1 christos Lmemcpy_block_aligned8: 641 1.1 christos 1: 642 1.1 christos brz %o2, Lmemcpy_blockfinish 643 1.1 christos btst BLOCK_ALIGN, %o1 ! Block aligned? 644 1.1 christos bz 1f 645 1.1 christos 646 1.1 christos faligndata %f0, %f2, %f4 ! Generate result 647 1.1 christos deccc 8, %o2 648 1.1 christos ble,pn %icc, Lmemcpy_blockfinish ! Should never happen 649 1.1 christos fmovd %f4, %f48 650 1.1 christos 651 1.1 christos std %f4, [%o1] ! Store result 652 1.1 christos inc 8, %o1 653 1.1 christos 654 1.1 christos fmovd %f2, %f0 655 1.1 christos inc 8, %o0 656 1.1 christos ba,pt %xcc, 1b ! Not yet. 657 1.1 christos ldd [%o0], %f2 ! Load next part 658 1.1 christos Lmemcpy_block_aligned64: 659 1.1 christos 1: 660 1.1 christos 661 1.1 christos /* 662 1.1 christos * 64-byte aligned -- ready for block operations. 663 1.1 christos * 664 1.1 christos * Here we have the destination block aligned, but the 665 1.1 christos * source pointer may not be. Sub-word alignment will 666 1.1 christos * be handled by faligndata instructions. But the source 667 1.1 christos * can still be potentially aligned to 8 different words 668 1.1 christos * in our 64-bit block, so we have 8 different copy routines. 669 1.1 christos * 670 1.1 christos * Once we figure out our source alignment, we branch 671 1.1 christos * to the appropriate copy routine, which sets up the 672 1.1 christos * alignment for faligndata and loads (sets) the values 673 1.1 christos * into the source registers and does the copy loop. 674 1.1 christos * 675 1.1 christos * When were down to less than 1 block to store, we 676 1.1 christos * exit the copy loop and execute cleanup code. 677 1.1 christos * 678 1.1 christos * Block loads and stores are not properly interlocked. 679 1.1 christos * Stores save one reg/cycle, so you can start overwriting 680 1.1 christos * registers the cycle after the store is issued. 681 1.1 christos * 682 1.1 christos * Block loads require a block load to a different register 683 1.1 christos * block or a membar #Sync before accessing the loaded 684 1.1 christos * data. 685 1.1 christos * 686 1.1 christos * Since the faligndata instructions may be offset as far 687 1.1 christos * as 7 registers into a block (if you are shifting source 688 1.1 christos * 7 -> dest 0), you need 3 source register blocks for full 689 1.1 christos * performance: one you are copying, one you are loading, 690 1.1 christos * and one for interlocking. Otherwise, we would need to 691 1.1 christos * sprinkle the code with membar #Sync and lose the advantage 692 1.1 christos * of running faligndata in parallel with block stores. This 693 1.1 christos * means we are fetching a full 128 bytes ahead of the stores. 694 1.1 christos * We need to make sure the prefetch does not inadvertently 695 1.1 christos * cross a page boundary and fault on data that we will never 696 1.1 christos * store. 697 1.1 christos * 698 1.1 christos */ 699 1.1 christos #if 1 700 1.1 christos and %o0, BLOCK_ALIGN, %o3 701 1.1 christos srax %o3, 3, %o3 ! Isolate the offset 702 1.1 christos 703 1.1 christos brz %o3, L100 ! 0->0 704 1.1 christos btst 4, %o3 705 1.1 christos bnz %xcc, 4f 706 1.1 christos btst 2, %o3 707 1.1 christos bnz %xcc, 2f 708 1.1 christos btst 1, %o3 709 1.1 christos ba,pt %xcc, L101 ! 0->1 710 1.1 christos nop /* XXX spitfire bug */ 711 1.1 christos 2: 712 1.1 christos bz %xcc, L102 ! 0->2 713 1.1 christos nop 714 1.1 christos ba,pt %xcc, L103 ! 0->3 715 1.1 christos nop /* XXX spitfire bug */ 716 1.1 christos 4: 717 1.1 christos bnz %xcc, 2f 718 1.1 christos btst 1, %o3 719 1.1 christos bz %xcc, L104 ! 0->4 720 1.1 christos nop 721 1.1 christos ba,pt %xcc, L105 ! 0->5 722 1.1 christos nop /* XXX spitfire bug */ 723 1.1 christos 2: 724 1.1 christos bz %xcc, L106 ! 0->6 725 1.1 christos nop 726 1.1 christos ba,pt %xcc, L107 ! 0->7 727 1.1 christos nop /* XXX spitfire bug */ 728 1.1 christos #else 729 1.1 christos 730 1.1 christos !! 731 1.1 christos !! Isolate the word offset, which just happens to be 732 1.1 christos !! the slot in our jump table. 733 1.1 christos !! 734 1.1 christos !! This is 6 insns, most of which cannot be paired, 735 1.1 christos !! which is about the same as the above version. 736 1.1 christos !! 737 1.1 christos rd %pc, %o4 738 1.1 christos 1: 739 1.1 christos and %o0, 0x31, %o3 740 1.1 christos add %o3, (Lmemcpy_block_jmp - 1b), %o3 741 1.1 christos jmpl %o4 + %o3, %g0 742 1.1 christos nop 743 1.1 christos 744 1.1 christos !! 745 1.1 christos !! Jump table 746 1.1 christos !! 747 1.1 christos 748 1.1 christos Lmemcpy_block_jmp: 749 1.1 christos ba,a,pt %xcc, L100 750 1.1 christos nop 751 1.1 christos ba,a,pt %xcc, L101 752 1.1 christos nop 753 1.1 christos ba,a,pt %xcc, L102 754 1.1 christos nop 755 1.1 christos ba,a,pt %xcc, L103 756 1.1 christos nop 757 1.1 christos ba,a,pt %xcc, L104 758 1.1 christos nop 759 1.1 christos ba,a,pt %xcc, L105 760 1.1 christos nop 761 1.1 christos ba,a,pt %xcc, L106 762 1.1 christos nop 763 1.1 christos ba,a,pt %xcc, L107 764 1.1 christos nop 765 1.1 christos #endif 766 1.1 christos 767 1.1 christos !! 768 1.1 christos !! Source is block aligned. 769 1.1 christos !! 770 1.1 christos !! Just load a block and go. 771 1.1 christos !! 772 1.1 christos L100: 773 1.1 christos #ifdef RETURN_NAME 774 1.1 christos sethi %hi(1f), %g1 775 1.1 christos ba,pt %icc, 2f 776 1.1 christos or %g1, %lo(1f), %g1 777 1.1 christos 1: 778 1.1 christos .asciz "L100" 779 1.1 christos .align 8 780 1.1 christos 2: 781 1.1 christos #endif 782 1.1 christos fmovd %f0 , %f62 783 1.1 christos ldda [%o0] ASI_BLK_P, %f0 784 1.1 christos inc BLOCK_SIZE, %o0 785 1.1 christos cmp %o0, %o5 786 1.1 christos bleu,a,pn %icc, 3f 787 1.1 christos ldda [%o0] ASI_BLK_P, %f16 788 1.1 christos ba,pt %icc, 3f 789 1.1 christos membar #Sync 790 1.1 christos 791 1.1 christos .align 32 ! ICache align. 792 1.1 christos 3: 793 1.1 christos faligndata %f62, %f0, %f32 794 1.1 christos inc BLOCK_SIZE, %o0 795 1.1 christos faligndata %f0, %f2, %f34 796 1.1 christos dec BLOCK_SIZE, %o2 797 1.1 christos faligndata %f2, %f4, %f36 798 1.1 christos cmp %o0, %o5 799 1.1 christos faligndata %f4, %f6, %f38 800 1.1 christos faligndata %f6, %f8, %f40 801 1.1 christos faligndata %f8, %f10, %f42 802 1.1 christos faligndata %f10, %f12, %f44 803 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 804 1.1 christos faligndata %f12, %f14, %f46 805 1.1 christos 806 1.1 christos bleu,a,pn %icc, 2f 807 1.1 christos ldda [%o0] ASI_BLK_P, %f48 808 1.1 christos membar #Sync 809 1.1 christos 2: 810 1.1 christos stda %f32, [%o1] ASI_STORE 811 1.1 christos faligndata %f14, %f16, %f32 812 1.1 christos inc BLOCK_SIZE, %o0 813 1.1 christos faligndata %f16, %f18, %f34 814 1.1 christos inc BLOCK_SIZE, %o1 815 1.1 christos faligndata %f18, %f20, %f36 816 1.1 christos dec BLOCK_SIZE, %o2 817 1.1 christos faligndata %f20, %f22, %f38 818 1.1 christos cmp %o0, %o5 819 1.1 christos faligndata %f22, %f24, %f40 820 1.1 christos faligndata %f24, %f26, %f42 821 1.1 christos faligndata %f26, %f28, %f44 822 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 823 1.1 christos faligndata %f28, %f30, %f46 824 1.1 christos 825 1.1 christos bleu,a,pn %icc, 2f 826 1.1 christos ldda [%o0] ASI_BLK_P, %f0 827 1.1 christos membar #Sync 828 1.1 christos 2: 829 1.1 christos stda %f32, [%o1] ASI_STORE 830 1.1 christos faligndata %f30, %f48, %f32 831 1.1 christos inc BLOCK_SIZE, %o0 832 1.1 christos faligndata %f48, %f50, %f34 833 1.1 christos inc BLOCK_SIZE, %o1 834 1.1 christos faligndata %f50, %f52, %f36 835 1.1 christos dec BLOCK_SIZE, %o2 836 1.1 christos faligndata %f52, %f54, %f38 837 1.1 christos cmp %o0, %o5 838 1.1 christos faligndata %f54, %f56, %f40 839 1.1 christos faligndata %f56, %f58, %f42 840 1.1 christos faligndata %f58, %f60, %f44 841 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 842 1.1 christos faligndata %f60, %f62, %f46 843 1.1 christos bleu,a,pn %icc, 2f 844 1.1 christos ldda [%o0] ASI_BLK_P, %f16 ! Increment is at top 845 1.1 christos membar #Sync 846 1.1 christos 2: 847 1.1 christos stda %f32, [%o1] ASI_STORE 848 1.1 christos ba 3b 849 1.1 christos inc BLOCK_SIZE, %o1 850 1.1 christos 851 1.1 christos !! 852 1.1 christos !! Source at BLOCK_ALIGN+8 853 1.1 christos !! 854 1.1 christos !! We need to load almost 1 complete block by hand. 855 1.1 christos !! 856 1.1 christos L101: 857 1.1 christos #ifdef RETURN_NAME 858 1.1 christos sethi %hi(1f), %g1 859 1.1 christos ba,pt %icc, 2f 860 1.1 christos or %g1, %lo(1f), %g1 861 1.1 christos 1: 862 1.1 christos .asciz "L101" 863 1.1 christos .align 8 864 1.1 christos 2: 865 1.1 christos #endif 866 1.1 christos ! fmovd %f0, %f0 ! Hoist fmovd 867 1.1 christos ldd [%o0], %f2 868 1.1 christos inc 8, %o0 869 1.1 christos ldd [%o0], %f4 870 1.1 christos inc 8, %o0 871 1.1 christos ldd [%o0], %f6 872 1.1 christos inc 8, %o0 873 1.1 christos ldd [%o0], %f8 874 1.1 christos inc 8, %o0 875 1.1 christos ldd [%o0], %f10 876 1.1 christos inc 8, %o0 877 1.1 christos ldd [%o0], %f12 878 1.1 christos inc 8, %o0 879 1.1 christos ldd [%o0], %f14 880 1.1 christos inc 8, %o0 881 1.1 christos 882 1.1 christos cmp %o0, %o5 883 1.1 christos bleu,a,pn %icc, 3f 884 1.1 christos ldda [%o0] ASI_BLK_P, %f16 885 1.1 christos membar #Sync 886 1.1 christos 3: 887 1.1 christos faligndata %f0, %f2, %f32 888 1.1 christos inc BLOCK_SIZE, %o0 889 1.1 christos faligndata %f2, %f4, %f34 890 1.1 christos cmp %o0, %o5 891 1.1 christos faligndata %f4, %f6, %f36 892 1.1 christos dec BLOCK_SIZE, %o2 893 1.1 christos faligndata %f6, %f8, %f38 894 1.1 christos faligndata %f8, %f10, %f40 895 1.1 christos faligndata %f10, %f12, %f42 896 1.1 christos faligndata %f12, %f14, %f44 897 1.1 christos bleu,a,pn %icc, 2f 898 1.1 christos ldda [%o0] ASI_BLK_P, %f48 899 1.1 christos membar #Sync 900 1.1 christos 2: 901 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 902 1.1 christos faligndata %f14, %f16, %f46 903 1.1 christos 904 1.1 christos stda %f32, [%o1] ASI_STORE 905 1.1 christos 906 1.1 christos faligndata %f16, %f18, %f32 907 1.1 christos inc BLOCK_SIZE, %o0 908 1.1 christos faligndata %f18, %f20, %f34 909 1.1 christos inc BLOCK_SIZE, %o1 910 1.1 christos faligndata %f20, %f22, %f36 911 1.1 christos cmp %o0, %o5 912 1.1 christos faligndata %f22, %f24, %f38 913 1.1 christos dec BLOCK_SIZE, %o2 914 1.1 christos faligndata %f24, %f26, %f40 915 1.1 christos faligndata %f26, %f28, %f42 916 1.1 christos faligndata %f28, %f30, %f44 917 1.1 christos bleu,a,pn %icc, 2f 918 1.1 christos ldda [%o0] ASI_BLK_P, %f0 919 1.1 christos membar #Sync 920 1.1 christos 2: 921 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 922 1.1 christos faligndata %f30, %f48, %f46 923 1.1 christos 924 1.1 christos stda %f32, [%o1] ASI_STORE 925 1.1 christos 926 1.1 christos faligndata %f48, %f50, %f32 927 1.1 christos inc BLOCK_SIZE, %o0 928 1.1 christos faligndata %f50, %f52, %f34 929 1.1 christos inc BLOCK_SIZE, %o1 930 1.1 christos faligndata %f52, %f54, %f36 931 1.1 christos cmp %o0, %o5 932 1.1 christos faligndata %f54, %f56, %f38 933 1.1 christos dec BLOCK_SIZE, %o2 934 1.1 christos faligndata %f56, %f58, %f40 935 1.1 christos faligndata %f58, %f60, %f42 936 1.1 christos faligndata %f60, %f62, %f44 937 1.1 christos bleu,a,pn %icc, 2f 938 1.1 christos ldda [%o0] ASI_BLK_P, %f16 939 1.1 christos membar #Sync 940 1.1 christos 2: 941 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 942 1.1 christos faligndata %f62, %f0, %f46 943 1.1 christos 944 1.1 christos stda %f32, [%o1] ASI_STORE 945 1.1 christos ba 3b 946 1.1 christos inc BLOCK_SIZE, %o1 947 1.1 christos 948 1.1 christos !! 949 1.1 christos !! Source at BLOCK_ALIGN+16 950 1.1 christos !! 951 1.1 christos !! We need to load 6 doubles by hand. 952 1.1 christos !! 953 1.1 christos L102: 954 1.1 christos #ifdef RETURN_NAME 955 1.1 christos sethi %hi(1f), %g1 956 1.1 christos ba,pt %icc, 2f 957 1.1 christos or %g1, %lo(1f), %g1 958 1.1 christos 1: 959 1.1 christos .asciz "L102" 960 1.1 christos .align 8 961 1.1 christos 2: 962 1.1 christos #endif 963 1.1 christos ldd [%o0], %f4 964 1.1 christos inc 8, %o0 965 1.1 christos fmovd %f0, %f2 ! Hoist fmovd 966 1.1 christos ldd [%o0], %f6 967 1.1 christos inc 8, %o0 968 1.1 christos 969 1.1 christos ldd [%o0], %f8 970 1.1 christos inc 8, %o0 971 1.1 christos ldd [%o0], %f10 972 1.1 christos inc 8, %o0 973 1.1 christos ldd [%o0], %f12 974 1.1 christos inc 8, %o0 975 1.1 christos ldd [%o0], %f14 976 1.1 christos inc 8, %o0 977 1.1 christos 978 1.1 christos cmp %o0, %o5 979 1.1 christos bleu,a,pn %icc, 3f 980 1.1 christos ldda [%o0] ASI_BLK_P, %f16 981 1.1 christos membar #Sync 982 1.1 christos 3: 983 1.1 christos faligndata %f2, %f4, %f32 984 1.1 christos inc BLOCK_SIZE, %o0 985 1.1 christos faligndata %f4, %f6, %f34 986 1.1 christos cmp %o0, %o5 987 1.1 christos faligndata %f6, %f8, %f36 988 1.1 christos dec BLOCK_SIZE, %o2 989 1.1 christos faligndata %f8, %f10, %f38 990 1.1 christos faligndata %f10, %f12, %f40 991 1.1 christos faligndata %f12, %f14, %f42 992 1.1 christos bleu,a,pn %icc, 2f 993 1.1 christos ldda [%o0] ASI_BLK_P, %f48 994 1.1 christos membar #Sync 995 1.1 christos 2: 996 1.1 christos faligndata %f14, %f16, %f44 997 1.1 christos 998 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 999 1.1 christos faligndata %f16, %f18, %f46 1000 1.1 christos 1001 1.1 christos stda %f32, [%o1] ASI_STORE 1002 1.1 christos 1003 1.1 christos faligndata %f18, %f20, %f32 1004 1.1 christos inc BLOCK_SIZE, %o0 1005 1.1 christos faligndata %f20, %f22, %f34 1006 1.1 christos inc BLOCK_SIZE, %o1 1007 1.1 christos faligndata %f22, %f24, %f36 1008 1.1 christos cmp %o0, %o5 1009 1.1 christos faligndata %f24, %f26, %f38 1010 1.1 christos dec BLOCK_SIZE, %o2 1011 1.1 christos faligndata %f26, %f28, %f40 1012 1.1 christos faligndata %f28, %f30, %f42 1013 1.1 christos bleu,a,pn %icc, 2f 1014 1.1 christos ldda [%o0] ASI_BLK_P, %f0 1015 1.1 christos membar #Sync 1016 1.1 christos 2: 1017 1.1 christos faligndata %f30, %f48, %f44 1018 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 1019 1.1 christos faligndata %f48, %f50, %f46 1020 1.1 christos 1021 1.1 christos stda %f32, [%o1] ASI_STORE 1022 1.1 christos 1023 1.1 christos faligndata %f50, %f52, %f32 1024 1.1 christos inc BLOCK_SIZE, %o0 1025 1.1 christos faligndata %f52, %f54, %f34 1026 1.1 christos inc BLOCK_SIZE, %o1 1027 1.1 christos faligndata %f54, %f56, %f36 1028 1.1 christos cmp %o0, %o5 1029 1.1 christos faligndata %f56, %f58, %f38 1030 1.1 christos dec BLOCK_SIZE, %o2 1031 1.1 christos faligndata %f58, %f60, %f40 1032 1.1 christos faligndata %f60, %f62, %f42 1033 1.1 christos bleu,a,pn %icc, 2f 1034 1.1 christos ldda [%o0] ASI_BLK_P, %f16 1035 1.1 christos membar #Sync 1036 1.1 christos 2: 1037 1.1 christos faligndata %f62, %f0, %f44 1038 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 1039 1.1 christos faligndata %f0, %f2, %f46 1040 1.1 christos 1041 1.1 christos stda %f32, [%o1] ASI_STORE 1042 1.1 christos ba 3b 1043 1.1 christos inc BLOCK_SIZE, %o1 1044 1.1 christos 1045 1.1 christos !! 1046 1.1 christos !! Source at BLOCK_ALIGN+24 1047 1.1 christos !! 1048 1.1 christos !! We need to load 5 doubles by hand. 1049 1.1 christos !! 1050 1.1 christos L103: 1051 1.1 christos #ifdef RETURN_NAME 1052 1.1 christos sethi %hi(1f), %g1 1053 1.1 christos ba,pt %icc, 2f 1054 1.1 christos or %g1, %lo(1f), %g1 1055 1.1 christos 1: 1056 1.1 christos .asciz "L103" 1057 1.1 christos .align 8 1058 1.1 christos 2: 1059 1.1 christos #endif 1060 1.1 christos fmovd %f0, %f4 1061 1.1 christos ldd [%o0], %f6 1062 1.1 christos inc 8, %o0 1063 1.1 christos ldd [%o0], %f8 1064 1.1 christos inc 8, %o0 1065 1.1 christos ldd [%o0], %f10 1066 1.1 christos inc 8, %o0 1067 1.1 christos ldd [%o0], %f12 1068 1.1 christos inc 8, %o0 1069 1.1 christos ldd [%o0], %f14 1070 1.1 christos inc 8, %o0 1071 1.1 christos 1072 1.1 christos cmp %o0, %o5 1073 1.1 christos bleu,a,pn %icc, 2f 1074 1.1 christos ldda [%o0] ASI_BLK_P, %f16 1075 1.1 christos membar #Sync 1076 1.1 christos 2: 1077 1.1 christos inc BLOCK_SIZE, %o0 1078 1.1 christos 3: 1079 1.1 christos faligndata %f4, %f6, %f32 1080 1.1 christos cmp %o0, %o5 1081 1.1 christos faligndata %f6, %f8, %f34 1082 1.1 christos dec BLOCK_SIZE, %o2 1083 1.1 christos faligndata %f8, %f10, %f36 1084 1.1 christos faligndata %f10, %f12, %f38 1085 1.1 christos faligndata %f12, %f14, %f40 1086 1.1 christos bleu,a,pn %icc, 2f 1087 1.1 christos ldda [%o0] ASI_BLK_P, %f48 1088 1.1 christos membar #Sync 1089 1.1 christos 2: 1090 1.1 christos faligndata %f14, %f16, %f42 1091 1.1 christos inc BLOCK_SIZE, %o0 1092 1.1 christos faligndata %f16, %f18, %f44 1093 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 1094 1.1 christos faligndata %f18, %f20, %f46 1095 1.1 christos 1096 1.1 christos stda %f32, [%o1] ASI_STORE 1097 1.1 christos 1098 1.1 christos faligndata %f20, %f22, %f32 1099 1.1 christos cmp %o0, %o5 1100 1.1 christos faligndata %f22, %f24, %f34 1101 1.1 christos dec BLOCK_SIZE, %o2 1102 1.1 christos faligndata %f24, %f26, %f36 1103 1.1 christos inc BLOCK_SIZE, %o1 1104 1.1 christos faligndata %f26, %f28, %f38 1105 1.1 christos faligndata %f28, %f30, %f40 1106 1.1 christos ble,a,pn %icc, 2f 1107 1.1 christos ldda [%o0] ASI_BLK_P, %f0 1108 1.1 christos membar #Sync 1109 1.1 christos 2: 1110 1.1 christos faligndata %f30, %f48, %f42 1111 1.1 christos inc BLOCK_SIZE, %o0 1112 1.1 christos faligndata %f48, %f50, %f44 1113 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 1114 1.1 christos faligndata %f50, %f52, %f46 1115 1.1 christos 1116 1.1 christos stda %f32, [%o1] ASI_STORE 1117 1.1 christos 1118 1.1 christos faligndata %f52, %f54, %f32 1119 1.1 christos cmp %o0, %o5 1120 1.1 christos faligndata %f54, %f56, %f34 1121 1.1 christos dec BLOCK_SIZE, %o2 1122 1.1 christos faligndata %f56, %f58, %f36 1123 1.1 christos faligndata %f58, %f60, %f38 1124 1.1 christos inc BLOCK_SIZE, %o1 1125 1.1 christos faligndata %f60, %f62, %f40 1126 1.1 christos bleu,a,pn %icc, 2f 1127 1.1 christos ldda [%o0] ASI_BLK_P, %f16 1128 1.1 christos membar #Sync 1129 1.1 christos 2: 1130 1.1 christos faligndata %f62, %f0, %f42 1131 1.1 christos inc BLOCK_SIZE, %o0 1132 1.1 christos faligndata %f0, %f2, %f44 1133 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 1134 1.1 christos faligndata %f2, %f4, %f46 1135 1.1 christos 1136 1.1 christos stda %f32, [%o1] ASI_STORE 1137 1.1 christos ba 3b 1138 1.1 christos inc BLOCK_SIZE, %o1 1139 1.1 christos 1140 1.1 christos !! 1141 1.1 christos !! Source at BLOCK_ALIGN+32 1142 1.1 christos !! 1143 1.1 christos !! We need to load 4 doubles by hand. 1144 1.1 christos !! 1145 1.1 christos L104: 1146 1.1 christos #ifdef RETURN_NAME 1147 1.1 christos sethi %hi(1f), %g1 1148 1.1 christos ba,pt %icc, 2f 1149 1.1 christos or %g1, %lo(1f), %g1 1150 1.1 christos 1: 1151 1.1 christos .asciz "L104" 1152 1.1 christos .align 8 1153 1.1 christos 2: 1154 1.1 christos #endif 1155 1.1 christos fmovd %f0, %f6 1156 1.1 christos ldd [%o0], %f8 1157 1.1 christos inc 8, %o0 1158 1.1 christos ldd [%o0], %f10 1159 1.1 christos inc 8, %o0 1160 1.1 christos ldd [%o0], %f12 1161 1.1 christos inc 8, %o0 1162 1.1 christos ldd [%o0], %f14 1163 1.1 christos inc 8, %o0 1164 1.1 christos 1165 1.1 christos cmp %o0, %o5 1166 1.1 christos bleu,a,pn %icc, 2f 1167 1.1 christos ldda [%o0] ASI_BLK_P, %f16 1168 1.1 christos membar #Sync 1169 1.1 christos 2: 1170 1.1 christos inc BLOCK_SIZE, %o0 1171 1.1 christos 3: 1172 1.1 christos faligndata %f6, %f8, %f32 1173 1.1 christos cmp %o0, %o5 1174 1.1 christos faligndata %f8, %f10, %f34 1175 1.1 christos dec BLOCK_SIZE, %o2 1176 1.1 christos faligndata %f10, %f12, %f36 1177 1.1 christos faligndata %f12, %f14, %f38 1178 1.1 christos bleu,a,pn %icc, 2f 1179 1.1 christos ldda [%o0] ASI_BLK_P, %f48 1180 1.1 christos membar #Sync 1181 1.1 christos 2: 1182 1.1 christos faligndata %f14, %f16, %f40 1183 1.1 christos faligndata %f16, %f18, %f42 1184 1.1 christos inc BLOCK_SIZE, %o0 1185 1.1 christos faligndata %f18, %f20, %f44 1186 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 1187 1.1 christos faligndata %f20, %f22, %f46 1188 1.1 christos 1189 1.1 christos stda %f32, [%o1] ASI_STORE 1190 1.1 christos 1191 1.1 christos faligndata %f22, %f24, %f32 1192 1.1 christos cmp %o0, %o5 1193 1.1 christos faligndata %f24, %f26, %f34 1194 1.1 christos faligndata %f26, %f28, %f36 1195 1.1 christos inc BLOCK_SIZE, %o1 1196 1.1 christos faligndata %f28, %f30, %f38 1197 1.1 christos bleu,a,pn %icc, 2f 1198 1.1 christos ldda [%o0] ASI_BLK_P, %f0 1199 1.1 christos membar #Sync 1200 1.1 christos 2: 1201 1.1 christos faligndata %f30, %f48, %f40 1202 1.1 christos dec BLOCK_SIZE, %o2 1203 1.1 christos faligndata %f48, %f50, %f42 1204 1.1 christos inc BLOCK_SIZE, %o0 1205 1.1 christos faligndata %f50, %f52, %f44 1206 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 1207 1.1 christos faligndata %f52, %f54, %f46 1208 1.1 christos 1209 1.1 christos stda %f32, [%o1] ASI_STORE 1210 1.1 christos 1211 1.1 christos faligndata %f54, %f56, %f32 1212 1.1 christos cmp %o0, %o5 1213 1.1 christos faligndata %f56, %f58, %f34 1214 1.1 christos faligndata %f58, %f60, %f36 1215 1.1 christos inc BLOCK_SIZE, %o1 1216 1.1 christos faligndata %f60, %f62, %f38 1217 1.1 christos bleu,a,pn %icc, 2f 1218 1.1 christos ldda [%o0] ASI_BLK_P, %f16 1219 1.1 christos membar #Sync 1220 1.1 christos 2: 1221 1.1 christos faligndata %f62, %f0, %f40 1222 1.1 christos dec BLOCK_SIZE, %o2 1223 1.1 christos faligndata %f0, %f2, %f42 1224 1.1 christos inc BLOCK_SIZE, %o0 1225 1.1 christos faligndata %f2, %f4, %f44 1226 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 1227 1.1 christos faligndata %f4, %f6, %f46 1228 1.1 christos 1229 1.1 christos stda %f32, [%o1] ASI_STORE 1230 1.1 christos ba 3b 1231 1.1 christos inc BLOCK_SIZE, %o1 1232 1.1 christos 1233 1.1 christos !! 1234 1.1 christos !! Source at BLOCK_ALIGN+40 1235 1.1 christos !! 1236 1.1 christos !! We need to load 3 doubles by hand. 1237 1.1 christos !! 1238 1.1 christos L105: 1239 1.1 christos #ifdef RETURN_NAME 1240 1.1 christos sethi %hi(1f), %g1 1241 1.1 christos ba,pt %icc, 2f 1242 1.1 christos or %g1, %lo(1f), %g1 1243 1.1 christos 1: 1244 1.1 christos .asciz "L105" 1245 1.1 christos .align 8 1246 1.1 christos 2: 1247 1.1 christos #endif 1248 1.1 christos fmovd %f0, %f8 1249 1.1 christos ldd [%o0], %f10 1250 1.1 christos inc 8, %o0 1251 1.1 christos ldd [%o0], %f12 1252 1.1 christos inc 8, %o0 1253 1.1 christos ldd [%o0], %f14 1254 1.1 christos inc 8, %o0 1255 1.1 christos 1256 1.1 christos cmp %o0, %o5 1257 1.1 christos bleu,a,pn %icc, 2f 1258 1.1 christos ldda [%o0] ASI_BLK_P, %f16 1259 1.1 christos membar #Sync 1260 1.1 christos 2: 1261 1.1 christos inc BLOCK_SIZE, %o0 1262 1.1 christos 3: 1263 1.1 christos faligndata %f8, %f10, %f32 1264 1.1 christos cmp %o0, %o5 1265 1.1 christos faligndata %f10, %f12, %f34 1266 1.1 christos faligndata %f12, %f14, %f36 1267 1.1 christos bleu,a,pn %icc, 2f 1268 1.1 christos ldda [%o0] ASI_BLK_P, %f48 1269 1.1 christos membar #Sync 1270 1.1 christos 2: 1271 1.1 christos faligndata %f14, %f16, %f38 1272 1.1 christos dec BLOCK_SIZE, %o2 1273 1.1 christos faligndata %f16, %f18, %f40 1274 1.1 christos inc BLOCK_SIZE, %o0 1275 1.1 christos faligndata %f18, %f20, %f42 1276 1.1 christos faligndata %f20, %f22, %f44 1277 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 1278 1.1 christos faligndata %f22, %f24, %f46 1279 1.1 christos 1280 1.1 christos stda %f32, [%o1] ASI_STORE 1281 1.1 christos 1282 1.1 christos faligndata %f24, %f26, %f32 1283 1.1 christos cmp %o0, %o5 1284 1.1 christos faligndata %f26, %f28, %f34 1285 1.1 christos dec BLOCK_SIZE, %o2 1286 1.1 christos faligndata %f28, %f30, %f36 1287 1.1 christos bleu,a,pn %icc, 2f 1288 1.1 christos ldda [%o0] ASI_BLK_P, %f0 1289 1.1 christos membar #Sync 1290 1.1 christos 2: 1291 1.1 christos faligndata %f30, %f48, %f38 1292 1.1 christos inc BLOCK_SIZE, %o1 1293 1.1 christos faligndata %f48, %f50, %f40 1294 1.1 christos inc BLOCK_SIZE, %o0 1295 1.1 christos faligndata %f50, %f52, %f42 1296 1.1 christos faligndata %f52, %f54, %f44 1297 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 1298 1.1 christos faligndata %f54, %f56, %f46 1299 1.1 christos 1300 1.1 christos stda %f32, [%o1] ASI_STORE 1301 1.1 christos 1302 1.1 christos faligndata %f56, %f58, %f32 1303 1.1 christos cmp %o0, %o5 1304 1.1 christos faligndata %f58, %f60, %f34 1305 1.1 christos dec BLOCK_SIZE, %o2 1306 1.1 christos faligndata %f60, %f62, %f36 1307 1.1 christos bleu,a,pn %icc, 2f 1308 1.1 christos ldda [%o0] ASI_BLK_P, %f16 1309 1.1 christos membar #Sync 1310 1.1 christos 2: 1311 1.1 christos faligndata %f62, %f0, %f38 1312 1.1 christos inc BLOCK_SIZE, %o1 1313 1.1 christos faligndata %f0, %f2, %f40 1314 1.1 christos inc BLOCK_SIZE, %o0 1315 1.1 christos faligndata %f2, %f4, %f42 1316 1.1 christos faligndata %f4, %f6, %f44 1317 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 1318 1.1 christos faligndata %f6, %f8, %f46 1319 1.1 christos 1320 1.1 christos stda %f32, [%o1] ASI_STORE 1321 1.1 christos ba 3b 1322 1.1 christos inc BLOCK_SIZE, %o1 1323 1.1 christos 1324 1.1 christos 1325 1.1 christos !! 1326 1.1 christos !! Source at BLOCK_ALIGN+48 1327 1.1 christos !! 1328 1.1 christos !! We need to load 2 doubles by hand. 1329 1.1 christos !! 1330 1.1 christos L106: 1331 1.1 christos #ifdef RETURN_NAME 1332 1.1 christos sethi %hi(1f), %g1 1333 1.1 christos ba,pt %icc, 2f 1334 1.1 christos or %g1, %lo(1f), %g1 1335 1.1 christos 1: 1336 1.1 christos .asciz "L106" 1337 1.1 christos .align 8 1338 1.1 christos 2: 1339 1.1 christos #endif 1340 1.1 christos fmovd %f0, %f10 1341 1.1 christos ldd [%o0], %f12 1342 1.1 christos inc 8, %o0 1343 1.1 christos ldd [%o0], %f14 1344 1.1 christos inc 8, %o0 1345 1.1 christos 1346 1.1 christos cmp %o0, %o5 1347 1.1 christos bleu,a,pn %icc, 2f 1348 1.1 christos ldda [%o0] ASI_BLK_P, %f16 1349 1.1 christos membar #Sync 1350 1.1 christos 2: 1351 1.1 christos inc BLOCK_SIZE, %o0 1352 1.1 christos 3: 1353 1.1 christos faligndata %f10, %f12, %f32 1354 1.1 christos cmp %o0, %o5 1355 1.1 christos faligndata %f12, %f14, %f34 1356 1.1 christos bleu,a,pn %icc, 2f 1357 1.1 christos ldda [%o0] ASI_BLK_P, %f48 1358 1.1 christos membar #Sync 1359 1.1 christos 2: 1360 1.1 christos faligndata %f14, %f16, %f36 1361 1.1 christos dec BLOCK_SIZE, %o2 1362 1.1 christos faligndata %f16, %f18, %f38 1363 1.1 christos inc BLOCK_SIZE, %o0 1364 1.1 christos faligndata %f18, %f20, %f40 1365 1.1 christos faligndata %f20, %f22, %f42 1366 1.1 christos faligndata %f22, %f24, %f44 1367 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 1368 1.1 christos faligndata %f24, %f26, %f46 1369 1.1 christos 1370 1.1 christos stda %f32, [%o1] ASI_STORE 1371 1.1 christos 1372 1.1 christos faligndata %f26, %f28, %f32 1373 1.1 christos cmp %o0, %o5 1374 1.1 christos faligndata %f28, %f30, %f34 1375 1.1 christos bleu,a,pn %icc, 2f 1376 1.1 christos ldda [%o0] ASI_BLK_P, %f0 1377 1.1 christos membar #Sync 1378 1.1 christos 2: 1379 1.1 christos faligndata %f30, %f48, %f36 1380 1.1 christos dec BLOCK_SIZE, %o2 1381 1.1 christos faligndata %f48, %f50, %f38 1382 1.1 christos inc BLOCK_SIZE, %o1 1383 1.1 christos faligndata %f50, %f52, %f40 1384 1.1 christos faligndata %f52, %f54, %f42 1385 1.1 christos inc BLOCK_SIZE, %o0 1386 1.1 christos faligndata %f54, %f56, %f44 1387 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 1388 1.1 christos faligndata %f56, %f58, %f46 1389 1.1 christos 1390 1.1 christos stda %f32, [%o1] ASI_STORE 1391 1.1 christos 1392 1.1 christos faligndata %f58, %f60, %f32 1393 1.1 christos cmp %o0, %o5 1394 1.1 christos faligndata %f60, %f62, %f34 1395 1.1 christos bleu,a,pn %icc, 2f 1396 1.1 christos ldda [%o0] ASI_BLK_P, %f16 1397 1.1 christos membar #Sync 1398 1.1 christos 2: 1399 1.1 christos faligndata %f62, %f0, %f36 1400 1.1 christos dec BLOCK_SIZE, %o2 1401 1.1 christos faligndata %f0, %f2, %f38 1402 1.1 christos inc BLOCK_SIZE, %o1 1403 1.1 christos faligndata %f2, %f4, %f40 1404 1.1 christos faligndata %f4, %f6, %f42 1405 1.1 christos inc BLOCK_SIZE, %o0 1406 1.1 christos faligndata %f6, %f8, %f44 1407 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 1408 1.1 christos faligndata %f8, %f10, %f46 1409 1.1 christos 1410 1.1 christos stda %f32, [%o1] ASI_STORE 1411 1.1 christos ba 3b 1412 1.1 christos inc BLOCK_SIZE, %o1 1413 1.1 christos 1414 1.1 christos 1415 1.1 christos !! 1416 1.1 christos !! Source at BLOCK_ALIGN+56 1417 1.1 christos !! 1418 1.1 christos !! We need to load 1 double by hand. 1419 1.1 christos !! 1420 1.1 christos L107: 1421 1.1 christos #ifdef RETURN_NAME 1422 1.1 christos sethi %hi(1f), %g1 1423 1.1 christos ba,pt %icc, 2f 1424 1.1 christos or %g1, %lo(1f), %g1 1425 1.1 christos 1: 1426 1.1 christos .asciz "L107" 1427 1.1 christos .align 8 1428 1.1 christos 2: 1429 1.1 christos #endif 1430 1.1 christos fmovd %f0, %f12 1431 1.1 christos ldd [%o0], %f14 1432 1.1 christos inc 8, %o0 1433 1.1 christos 1434 1.1 christos cmp %o0, %o5 1435 1.1 christos bleu,a,pn %icc, 2f 1436 1.1 christos ldda [%o0] ASI_BLK_P, %f16 1437 1.1 christos membar #Sync 1438 1.1 christos 2: 1439 1.1 christos inc BLOCK_SIZE, %o0 1440 1.1 christos 3: 1441 1.1 christos faligndata %f12, %f14, %f32 1442 1.1 christos cmp %o0, %o5 1443 1.1 christos bleu,a,pn %icc, 2f 1444 1.1 christos ldda [%o0] ASI_BLK_P, %f48 1445 1.1 christos membar #Sync 1446 1.1 christos 2: 1447 1.1 christos faligndata %f14, %f16, %f34 1448 1.1 christos dec BLOCK_SIZE, %o2 1449 1.1 christos faligndata %f16, %f18, %f36 1450 1.1 christos inc BLOCK_SIZE, %o0 1451 1.1 christos faligndata %f18, %f20, %f38 1452 1.1 christos faligndata %f20, %f22, %f40 1453 1.1 christos faligndata %f22, %f24, %f42 1454 1.1 christos faligndata %f24, %f26, %f44 1455 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 1456 1.1 christos faligndata %f26, %f28, %f46 1457 1.1 christos 1458 1.1 christos stda %f32, [%o1] ASI_STORE 1459 1.1 christos 1460 1.1 christos faligndata %f28, %f30, %f32 1461 1.1 christos cmp %o0, %o5 1462 1.1 christos bleu,a,pn %icc, 2f 1463 1.1 christos ldda [%o0] ASI_BLK_P, %f0 1464 1.1 christos membar #Sync 1465 1.1 christos 2: 1466 1.1 christos faligndata %f30, %f48, %f34 1467 1.1 christos dec BLOCK_SIZE, %o2 1468 1.1 christos faligndata %f48, %f50, %f36 1469 1.1 christos inc BLOCK_SIZE, %o1 1470 1.1 christos faligndata %f50, %f52, %f38 1471 1.1 christos faligndata %f52, %f54, %f40 1472 1.1 christos inc BLOCK_SIZE, %o0 1473 1.1 christos faligndata %f54, %f56, %f42 1474 1.1 christos faligndata %f56, %f58, %f44 1475 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 1476 1.1 christos faligndata %f58, %f60, %f46 1477 1.1 christos 1478 1.1 christos stda %f32, [%o1] ASI_STORE 1479 1.1 christos 1480 1.1 christos faligndata %f60, %f62, %f32 1481 1.1 christos cmp %o0, %o5 1482 1.1 christos bleu,a,pn %icc, 2f 1483 1.1 christos ldda [%o0] ASI_BLK_P, %f16 1484 1.1 christos membar #Sync 1485 1.1 christos 2: 1486 1.1 christos faligndata %f62, %f0, %f34 1487 1.1 christos dec BLOCK_SIZE, %o2 1488 1.1 christos faligndata %f0, %f2, %f36 1489 1.1 christos inc BLOCK_SIZE, %o1 1490 1.1 christos faligndata %f2, %f4, %f38 1491 1.1 christos faligndata %f4, %f6, %f40 1492 1.1 christos inc BLOCK_SIZE, %o0 1493 1.1 christos faligndata %f6, %f8, %f42 1494 1.1 christos faligndata %f8, %f10, %f44 1495 1.1 christos 1496 1.1 christos brlez,pn %o2, Lmemcpy_blockdone 1497 1.1 christos faligndata %f10, %f12, %f46 1498 1.1 christos 1499 1.1 christos stda %f32, [%o1] ASI_STORE 1500 1.1 christos ba 3b 1501 1.1 christos inc BLOCK_SIZE, %o1 1502 1.1 christos 1503 1.1 christos Lmemcpy_blockdone: 1504 1.1 christos inc BLOCK_SIZE, %o2 ! Fixup our overcommit 1505 1.1 christos membar #Sync ! Finish any pending loads 1506 1.1 christos #define FINISH_REG(f) \ 1507 1.1 christos deccc 8, %o2; \ 1508 1.1 christos bl,a Lmemcpy_blockfinish; \ 1509 1.1 christos fmovd f, %f48; \ 1510 1.1 christos std f, [%o1]; \ 1511 1.1 christos inc 8, %o1 1512 1.1 christos 1513 1.1 christos FINISH_REG(%f32) 1514 1.1 christos FINISH_REG(%f34) 1515 1.1 christos FINISH_REG(%f36) 1516 1.1 christos FINISH_REG(%f38) 1517 1.1 christos FINISH_REG(%f40) 1518 1.1 christos FINISH_REG(%f42) 1519 1.1 christos FINISH_REG(%f44) 1520 1.1 christos FINISH_REG(%f46) 1521 1.1 christos FINISH_REG(%f48) 1522 1.1 christos #undef FINISH_REG 1523 1.1 christos !! 1524 1.1 christos !! The low 3 bits have the sub-word bits needed to be 1525 1.1 christos !! stored [because (x-8)&0x7 == x]. 1526 1.1 christos !! 1527 1.1 christos Lmemcpy_blockfinish: 1528 1.1 christos brz,pn %o2, 2f ! 100% complete? 1529 1.1 christos fmovd %f48, %f4 1530 1.1 christos cmp %o2, 8 ! Exactly 8 bytes? 1531 1.1 christos bz,a,pn CCCR, 2f 1532 1.1 christos std %f4, [%o1] 1533 1.1 christos 1534 1.1 christos btst 4, %o2 ! Word store? 1535 1.1 christos bz CCCR, 1f 1536 1.1 christos nop 1537 1.1 christos st %f4, [%o1] 1538 1.1 christos inc 4, %o1 1539 1.1 christos 1: 1540 1.1 christos btst 2, %o2 1541 1.1 christos fzero %f0 1542 1.1 christos bz 1f 1543 1.1 christos 1544 1.1 christos mov -6, %o4 1545 1.1 christos alignaddr %o1, %o4, %g0 1546 1.1 christos 1547 1.1 christos faligndata %f0, %f4, %f8 1548 1.1 christos 1549 1.1 christos stda %f8, [%o1] ASI_FL16_P ! Store short 1550 1.1 christos inc 2, %o1 1551 1.1 christos 1: 1552 1.1 christos btst 1, %o2 ! Byte aligned? 1553 1.1 christos bz 2f 1554 1.1 christos 1555 1.1 christos mov -7, %o0 ! Calculate dest - 7 1556 1.1 christos alignaddr %o1, %o0, %g0 ! Calculate shift mask and dest. 1557 1.1 christos 1558 1.1 christos faligndata %f0, %f4, %f8 ! Move 1st byte to low part of f8 1559 1.1 christos 1560 1.1 christos stda %f8, [%o1] ASI_FL8_P ! Store 1st byte 1561 1.1 christos inc 1, %o1 ! Update address 1562 1.1 christos 2: 1563 1.1 christos membar #Sync 1564 1.1 christos #if 0 1565 1.1 christos !! 1566 1.1 christos !! verify copy success. 1567 1.1 christos !! 1568 1.1 christos 1569 1.1 christos mov %i0, %o2 1570 1.1 christos mov %i1, %o4 1571 1.1 christos mov %i2, %l4 1572 1.1 christos 0: 1573 1.1 christos ldub [%o2], %o1 1574 1.1 christos inc %o2 1575 1.1 christos ldub [%o4], %o3 1576 1.1 christos inc %o4 1577 1.1 christos cmp %o3, %o1 1578 1.1 christos bnz 1f 1579 1.1 christos dec %l4 1580 1.1 christos brnz %l4, 0b 1581 1.1 christos nop 1582 1.1 christos ba 2f 1583 1.1 christos nop 1584 1.1 christos 1585 1.1 christos 1: 1586 1.1 christos set block_disable, %o0 1587 1.1 christos stx %o0, [%o0] 1588 1.1 christos 1589 1.1 christos set 0f, %o0 1590 1.1 christos call prom_printf 1591 1.1 christos sub %i2, %l4, %o5 1592 1.1 christos set 1f, %o0 1593 1.1 christos mov %i0, %o2 1594 1.1 christos mov %i1, %o1 1595 1.1 christos call prom_printf 1596 1.1 christos mov %i2, %o3 1597 1.1 christos ta 1 1598 1.1 christos .data 1599 1.1 christos _ALIGN 1600 1.1 christos 0: .asciz "block memcpy failed: %x@%p != %x@%p byte %d\r\n" 1601 1.1 christos 1: .asciz "memcpy(%p, %p, %lx)\r\n" 1602 1.1 christos _ALIGN 1603 1.1 christos .text 1604 1.1 christos 2: 1605 1.1 christos #endif 1606 1.1 christos #if defined(_KERNEL) && !defined(_RUMPKERNEL) 1607 1.1 christos 1608 1.1 christos /* 1609 1.1 christos * Weve saved our possible fpstate, now disable the fpu 1610 1.1 christos * and continue with life. 1611 1.1 christos */ 1612 1.1 christos RESTORE_FPU 1613 1.1 christos ret 1614 1.1 christos restore %g1, 0, %o0 ! Return DEST for memcpy 1615 1.1 christos #endif 1616 1.1 christos retl 1617 1.1 christos mov %g1, %o0 1618 1.1 christos /* 1619 1.1 christos * Use block_disable to turn off block insns for 1620 1.1 christos * memcpy/memset 1621 1.1 christos */ 1622 1.1 christos .data 1623 1.1 christos .align 8 1624 1.1 christos .globl block_disable 1625 1.1 christos block_disable: .xword 1 1626 1.1 christos .text 1627 1.1 christos #endif /* USE_BLOCK_STORE_LOAD */ 1628