1 1.3 andvar /* $NetBSD: memset.S,v 1.3 2021/08/13 20:47:54 andvar Exp $ */ 2 1.1 christos 3 1.1 christos /* 4 1.1 christos * Copyright (c) 1996-2002 Eduardo Horvath 5 1.1 christos * All rights reserved. 6 1.1 christos * 7 1.1 christos * Redistribution and use in source and binary forms, with or without 8 1.1 christos * modification, are permitted provided that the following conditions 9 1.1 christos * are met: 10 1.1 christos * 1. Redistributions of source code must retain the above copyright 11 1.1 christos * notice, this list of conditions and the following disclaimer. 12 1.1 christos * 13 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 14 1.1 christos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 1.1 christos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 1.1 christos * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 17 1.1 christos * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 1.1 christos * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 1.1 christos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 1.1 christos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 1.1 christos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 1.1 christos * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 1.1 christos * SUCH DAMAGE. 24 1.1 christos * 25 1.1 christos */ 26 1.1 christos #include "strmacros.h" 27 1.2 christos #if defined(LIBC_SCCS) && !defined(lint) 28 1.3 andvar RCSID("$NetBSD: memset.S,v 1.3 2021/08/13 20:47:54 andvar Exp $") 29 1.2 christos #endif /* LIBC_SCCS and not lint */ 30 1.2 christos 31 1.1 christos 32 1.1 christos /* 33 1.1 christos * XXXXXXXXXXXXXXXXXXXX 34 1.1 christos * We need to make sure that this doesn't use floating point 35 1.1 christos * before our trap handlers are installed or we could panic 36 1.1 christos * XXXXXXXXXXXXXXXXXXXX 37 1.1 christos */ 38 1.1 christos /* 39 1.1 christos * memset(addr, c, len) 40 1.1 christos * 41 1.1 christos * We want to use VIS instructions if we're clearing out more than 42 1.1 christos * 256 bytes, but to do that we need to properly save and restore the 43 1.1 christos * FP registers. Unfortunately the code to do that in the kernel needs 44 1.1 christos * to keep track of the current owner of the FPU, hence the different 45 1.1 christos * code. 46 1.1 christos * 47 1.1 christos * XXXXX To produce more efficient code, we do not allow lengths 48 1.1 christos * greater than 0x80000000000000000, which are negative numbers. 49 1.1 christos * This should not really be an issue since the VA hole should 50 1.1 christos * cause any such ranges to fail anyway. 51 1.1 christos */ 52 1.1 christos #if !defined(_KERNEL) || defined(_RUMPKERNEL) 53 1.1 christos ENTRY(bzero) 54 1.1 christos ! %o0 = addr, %o1 = len 55 1.1 christos mov %o1, %o2 56 1.2 christos clr %o1 ! ser pattern 57 1.1 christos #endif 58 1.1 christos ENTRY(memset) 59 1.1 christos ! %o0 = addr, %o1 = pattern, %o2 = len 60 1.1 christos mov %o0, %o4 ! Save original pointer 61 1.1 christos 62 1.1 christos Lmemset_internal: 63 1.1 christos btst 7, %o0 ! Word aligned? 64 1.1 christos bz,pn %xcc, 0f 65 1.1 christos nop 66 1.1 christos inc %o0 67 1.1 christos deccc %o2 ! Store up to 7 bytes 68 1.1 christos bge,a,pt CCCR, Lmemset_internal 69 1.1 christos stb %o1, [%o0 - 1] 70 1.1 christos 71 1.1 christos retl ! Duplicate Lmemset_done 72 1.1 christos mov %o4, %o0 73 1.1 christos 0: 74 1.1 christos /* 75 1.1 christos * Duplicate the pattern so it fills 64-bits. 76 1.1 christos */ 77 1.1 christos andcc %o1, 0x0ff, %o1 ! No need to extend zero 78 1.1 christos bz,pt %icc, 1f 79 1.1 christos sllx %o1, 8, %o3 ! sigh. all dependent insns. 80 1.1 christos or %o1, %o3, %o1 81 1.1 christos sllx %o1, 16, %o3 82 1.1 christos or %o1, %o3, %o1 83 1.1 christos sllx %o1, 32, %o3 84 1.1 christos or %o1, %o3, %o1 85 1.1 christos 1: 86 1.1 christos #ifdef USE_BLOCK_STORE_LOAD 87 1.1 christos !! Now we are 64-bit aligned 88 1.1 christos cmp %o2, 256 ! Use block clear if len > 256 89 1.1 christos bge,pt CCCR, Lmemset_block ! use block store insns 90 1.1 christos #endif /* USE_BLOCK_STORE_LOAD */ 91 1.1 christos deccc 8, %o2 92 1.1 christos Lmemset_longs: 93 1.1 christos bl,pn CCCR, Lmemset_cleanup ! Less than 8 bytes left 94 1.1 christos nop 95 1.1 christos 3: 96 1.1 christos inc 8, %o0 97 1.1 christos deccc 8, %o2 98 1.1 christos bge,pt CCCR, 3b 99 1.1 christos stx %o1, [%o0 - 8] ! Do 1 longword at a time 100 1.1 christos 101 1.1 christos /* 102 1.1 christos * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero, 103 1.1 christos * -6 => two bytes, etc. Mop up this remainder, if any. 104 1.1 christos */ 105 1.1 christos Lmemset_cleanup: 106 1.1 christos btst 4, %o2 107 1.1 christos bz,pt CCCR, 5f ! if (len & 4) { 108 1.1 christos nop 109 1.1 christos stw %o1, [%o0] ! *(int *)addr = 0; 110 1.1 christos inc 4, %o0 ! addr += 4; 111 1.1 christos 5: 112 1.1 christos btst 2, %o2 113 1.1 christos bz,pt CCCR, 7f ! if (len & 2) { 114 1.1 christos nop 115 1.1 christos sth %o1, [%o0] ! *(short *)addr = 0; 116 1.1 christos inc 2, %o0 ! addr += 2; 117 1.1 christos 7: 118 1.1 christos btst 1, %o2 119 1.1 christos bnz,a %icc, Lmemset_done ! if (len & 1) 120 1.1 christos stb %o1, [%o0] ! *addr = 0; 121 1.1 christos Lmemset_done: 122 1.1 christos retl 123 1.3 andvar mov %o4, %o0 ! Restore pointer for memset (ugh) 124 1.1 christos 125 1.1 christos #ifdef USE_BLOCK_STORE_LOAD 126 1.1 christos Lmemset_block: 127 1.1 christos sethi %hi(block_disable), %o3 128 1.1 christos ldx [ %o3 + %lo(block_disable) ], %o3 129 1.1 christos brnz,pn %o3, Lmemset_longs 130 1.1 christos !! Make sure our trap table is installed 131 1.1 christos set _C_LABEL(trapbase), %o5 132 1.1 christos rdpr %tba, %o3 133 1.1 christos sub %o3, %o5, %o3 134 1.1 christos brnz,pn %o3, Lmemset_longs ! No, then don't use block load/store 135 1.1 christos nop 136 1.1 christos /* 137 1.1 christos * Kernel: 138 1.1 christos * 139 1.1 christos * Here we use VIS instructions to do a block clear of a page. 140 1.1 christos * But before we can do that we need to save and enable the FPU. 141 1.1 christos * The last owner of the FPU registers is fplwp, and 142 1.1 christos * fplwp->l_md.md_fpstate is the current fpstate. If that's not 143 1.1 christos * null, call savefpstate() with it to store our current fp state. 144 1.1 christos * 145 1.1 christos * Next, allocate an aligned fpstate on the stack. We will properly 146 1.1 christos * nest calls on a particular stack so this should not be a problem. 147 1.1 christos * 148 1.1 christos * Now we grab either curlwp (or if we're on the interrupt stack 149 1.1 christos * lwp0). We stash its existing fpstate in a local register and 150 1.1 christos * put our new fpstate in curlwp->p_md.md_fpstate. We point 151 1.1 christos * fplwp at curlwp (or lwp0) and enable the FPU. 152 1.1 christos * 153 1.1 christos * If we are ever preempted, our FPU state will be saved in our 154 1.1 christos * fpstate. Then, when we're resumed and we take an FPDISABLED 155 1.1 christos * trap, the trap handler will be able to fish our FPU state out 156 1.1 christos * of curlwp (or lwp0). 157 1.1 christos * 158 1.1 christos * On exiting this routine we undo the damage: restore the original 159 1.1 christos * pointer to curlwp->p_md.md_fpstate, clear our fplwp, and disable 160 1.1 christos * the MMU. 161 1.1 christos * 162 1.1 christos */ 163 1.1 christos 164 1.1 christos ENABLE_FPU(0) 165 1.1 christos 166 1.1 christos !! We are now 8-byte aligned. We need to become 64-byte aligned. 167 1.1 christos btst 63, %i0 168 1.1 christos bz,pt CCCR, 2f 169 1.1 christos nop 170 1.1 christos 1: 171 1.1 christos stx %i1, [%i0] 172 1.1 christos inc 8, %i0 173 1.1 christos btst 63, %i0 174 1.1 christos bnz,pt %xcc, 1b 175 1.1 christos dec 8, %i2 176 1.1 christos 177 1.1 christos 2: 178 1.1 christos brz %i1, 3f ! Skip the memory op 179 1.1 christos fzero %f0 ! if pattern is 0 180 1.1 christos 181 1.1 christos #ifdef _LP64 182 1.1 christos stx %i1, [%i0] ! Flush this puppy to RAM 183 1.1 christos membar #StoreLoad 184 1.1 christos ldd [%i0], %f0 185 1.1 christos #else 186 1.1 christos stw %i1, [%i0] ! Flush this puppy to RAM 187 1.1 christos membar #StoreLoad 188 1.1 christos ld [%i0], %f0 189 1.1 christos fmovsa %icc, %f0, %f1 190 1.1 christos #endif 191 1.1 christos 192 1.1 christos 3: 193 1.1 christos fmovd %f0, %f2 ! Duplicate the pattern 194 1.1 christos fmovd %f0, %f4 195 1.1 christos fmovd %f0, %f6 196 1.1 christos fmovd %f0, %f8 197 1.1 christos fmovd %f0, %f10 198 1.1 christos fmovd %f0, %f12 199 1.1 christos fmovd %f0, %f14 200 1.1 christos 201 1.1 christos !! Remember: we were 8 bytes too far 202 1.1 christos dec 56, %i2 ! Go one iteration too far 203 1.1 christos 5: 204 1.1 christos stda %f0, [%i0] ASI_STORE ! Store 64 bytes 205 1.1 christos deccc BLOCK_SIZE, %i2 206 1.1 christos bg,pt %icc, 5b 207 1.1 christos inc BLOCK_SIZE, %i0 208 1.1 christos 209 1.1 christos membar #Sync 210 1.1 christos /* 211 1.1 christos * We've saved our possible fpstate, now disable the fpu 212 1.1 christos * and continue with life. 213 1.1 christos */ 214 1.1 christos RESTORE_FPU 215 1.1 christos addcc %i2, 56, %i2 ! Restore the count 216 1.1 christos ba,pt %xcc, Lmemset_longs ! Finish up the remainder 217 1.1 christos restore 218 1.1 christos #endif /* USE_BLOCK_STORE_LOAD */ 219