memset.S revision 1.1 1 1.1 christos /* $NetBSD: memset.S,v 1.1 2013/03/17 00:42:32 christos Exp $ */
2 1.1 christos
3 1.1 christos /*
4 1.1 christos * Copyright (c) 1996-2002 Eduardo Horvath
5 1.1 christos * All rights reserved.
6 1.1 christos *
7 1.1 christos * Redistribution and use in source and binary forms, with or without
8 1.1 christos * modification, are permitted provided that the following conditions
9 1.1 christos * are met:
10 1.1 christos * 1. Redistributions of source code must retain the above copyright
11 1.1 christos * notice, this list of conditions and the following disclaimer.
12 1.1 christos *
13 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
14 1.1 christos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 1.1 christos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 1.1 christos * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
17 1.1 christos * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 1.1 christos * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 1.1 christos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 1.1 christos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 1.1 christos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 1.1 christos * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 1.1 christos * SUCH DAMAGE.
24 1.1 christos *
25 1.1 christos */
26 1.1 christos #include "strmacros.h"
27 1.1 christos
28 1.1 christos /*
29 1.1 christos * XXXXXXXXXXXXXXXXXXXX
30 1.1 christos * We need to make sure that this doesn't use floating point
31 1.1 christos * before our trap handlers are installed or we could panic
32 1.1 christos * XXXXXXXXXXXXXXXXXXXX
33 1.1 christos */
34 1.1 christos /*
35 1.1 christos * memset(addr, c, len)
36 1.1 christos *
37 1.1 christos * We want to use VIS instructions if we're clearing out more than
38 1.1 christos * 256 bytes, but to do that we need to properly save and restore the
39 1.1 christos * FP registers. Unfortunately the code to do that in the kernel needs
40 1.1 christos * to keep track of the current owner of the FPU, hence the different
41 1.1 christos * code.
42 1.1 christos *
43 1.1 christos * XXXXX To produce more efficient code, we do not allow lengths
44 1.1 christos * greater than 0x80000000000000000, which are negative numbers.
45 1.1 christos * This should not really be an issue since the VA hole should
46 1.1 christos * cause any such ranges to fail anyway.
47 1.1 christos */
48 1.1 christos #if !defined(_KERNEL) || defined(_RUMPKERNEL)
49 1.1 christos ENTRY(bzero)
50 1.1 christos ! %o0 = addr, %o1 = len
51 1.1 christos mov %o1, %o2
52 1.1 christos mov 0, %o1
53 1.1 christos #endif
54 1.1 christos ENTRY(memset)
55 1.1 christos ! %o0 = addr, %o1 = pattern, %o2 = len
56 1.1 christos mov %o0, %o4 ! Save original pointer
57 1.1 christos
58 1.1 christos Lmemset_internal:
59 1.1 christos btst 7, %o0 ! Word aligned?
60 1.1 christos bz,pn %xcc, 0f
61 1.1 christos nop
62 1.1 christos inc %o0
63 1.1 christos deccc %o2 ! Store up to 7 bytes
64 1.1 christos bge,a,pt CCCR, Lmemset_internal
65 1.1 christos stb %o1, [%o0 - 1]
66 1.1 christos
67 1.1 christos retl ! Duplicate Lmemset_done
68 1.1 christos mov %o4, %o0
69 1.1 christos 0:
70 1.1 christos /*
71 1.1 christos * Duplicate the pattern so it fills 64-bits.
72 1.1 christos */
73 1.1 christos andcc %o1, 0x0ff, %o1 ! No need to extend zero
74 1.1 christos bz,pt %icc, 1f
75 1.1 christos sllx %o1, 8, %o3 ! sigh. all dependent insns.
76 1.1 christos or %o1, %o3, %o1
77 1.1 christos sllx %o1, 16, %o3
78 1.1 christos or %o1, %o3, %o1
79 1.1 christos sllx %o1, 32, %o3
80 1.1 christos or %o1, %o3, %o1
81 1.1 christos 1:
82 1.1 christos #ifdef USE_BLOCK_STORE_LOAD
83 1.1 christos !! Now we are 64-bit aligned
84 1.1 christos cmp %o2, 256 ! Use block clear if len > 256
85 1.1 christos bge,pt CCCR, Lmemset_block ! use block store insns
86 1.1 christos #endif /* USE_BLOCK_STORE_LOAD */
87 1.1 christos deccc 8, %o2
88 1.1 christos Lmemset_longs:
89 1.1 christos bl,pn CCCR, Lmemset_cleanup ! Less than 8 bytes left
90 1.1 christos nop
91 1.1 christos 3:
92 1.1 christos inc 8, %o0
93 1.1 christos deccc 8, %o2
94 1.1 christos bge,pt CCCR, 3b
95 1.1 christos stx %o1, [%o0 - 8] ! Do 1 longword at a time
96 1.1 christos
97 1.1 christos /*
98 1.1 christos * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
99 1.1 christos * -6 => two bytes, etc. Mop up this remainder, if any.
100 1.1 christos */
101 1.1 christos Lmemset_cleanup:
102 1.1 christos btst 4, %o2
103 1.1 christos bz,pt CCCR, 5f ! if (len & 4) {
104 1.1 christos nop
105 1.1 christos stw %o1, [%o0] ! *(int *)addr = 0;
106 1.1 christos inc 4, %o0 ! addr += 4;
107 1.1 christos 5:
108 1.1 christos btst 2, %o2
109 1.1 christos bz,pt CCCR, 7f ! if (len & 2) {
110 1.1 christos nop
111 1.1 christos sth %o1, [%o0] ! *(short *)addr = 0;
112 1.1 christos inc 2, %o0 ! addr += 2;
113 1.1 christos 7:
114 1.1 christos btst 1, %o2
115 1.1 christos bnz,a %icc, Lmemset_done ! if (len & 1)
116 1.1 christos stb %o1, [%o0] ! *addr = 0;
117 1.1 christos Lmemset_done:
118 1.1 christos retl
119 1.1 christos mov %o4, %o0 ! Restore ponter for memset (ugh)
120 1.1 christos
121 1.1 christos #ifdef USE_BLOCK_STORE_LOAD
122 1.1 christos Lmemset_block:
123 1.1 christos sethi %hi(block_disable), %o3
124 1.1 christos ldx [ %o3 + %lo(block_disable) ], %o3
125 1.1 christos brnz,pn %o3, Lmemset_longs
126 1.1 christos !! Make sure our trap table is installed
127 1.1 christos set _C_LABEL(trapbase), %o5
128 1.1 christos rdpr %tba, %o3
129 1.1 christos sub %o3, %o5, %o3
130 1.1 christos brnz,pn %o3, Lmemset_longs ! No, then don't use block load/store
131 1.1 christos nop
132 1.1 christos /*
133 1.1 christos * Kernel:
134 1.1 christos *
135 1.1 christos * Here we use VIS instructions to do a block clear of a page.
136 1.1 christos * But before we can do that we need to save and enable the FPU.
137 1.1 christos * The last owner of the FPU registers is fplwp, and
138 1.1 christos * fplwp->l_md.md_fpstate is the current fpstate. If that's not
139 1.1 christos * null, call savefpstate() with it to store our current fp state.
140 1.1 christos *
141 1.1 christos * Next, allocate an aligned fpstate on the stack. We will properly
142 1.1 christos * nest calls on a particular stack so this should not be a problem.
143 1.1 christos *
144 1.1 christos * Now we grab either curlwp (or if we're on the interrupt stack
145 1.1 christos * lwp0). We stash its existing fpstate in a local register and
146 1.1 christos * put our new fpstate in curlwp->p_md.md_fpstate. We point
147 1.1 christos * fplwp at curlwp (or lwp0) and enable the FPU.
148 1.1 christos *
149 1.1 christos * If we are ever preempted, our FPU state will be saved in our
150 1.1 christos * fpstate. Then, when we're resumed and we take an FPDISABLED
151 1.1 christos * trap, the trap handler will be able to fish our FPU state out
152 1.1 christos * of curlwp (or lwp0).
153 1.1 christos *
154 1.1 christos * On exiting this routine we undo the damage: restore the original
155 1.1 christos * pointer to curlwp->p_md.md_fpstate, clear our fplwp, and disable
156 1.1 christos * the MMU.
157 1.1 christos *
158 1.1 christos */
159 1.1 christos
160 1.1 christos ENABLE_FPU(0)
161 1.1 christos
162 1.1 christos !! We are now 8-byte aligned. We need to become 64-byte aligned.
163 1.1 christos btst 63, %i0
164 1.1 christos bz,pt CCCR, 2f
165 1.1 christos nop
166 1.1 christos 1:
167 1.1 christos stx %i1, [%i0]
168 1.1 christos inc 8, %i0
169 1.1 christos btst 63, %i0
170 1.1 christos bnz,pt %xcc, 1b
171 1.1 christos dec 8, %i2
172 1.1 christos
173 1.1 christos 2:
174 1.1 christos brz %i1, 3f ! Skip the memory op
175 1.1 christos fzero %f0 ! if pattern is 0
176 1.1 christos
177 1.1 christos #ifdef _LP64
178 1.1 christos stx %i1, [%i0] ! Flush this puppy to RAM
179 1.1 christos membar #StoreLoad
180 1.1 christos ldd [%i0], %f0
181 1.1 christos #else
182 1.1 christos stw %i1, [%i0] ! Flush this puppy to RAM
183 1.1 christos membar #StoreLoad
184 1.1 christos ld [%i0], %f0
185 1.1 christos fmovsa %icc, %f0, %f1
186 1.1 christos #endif
187 1.1 christos
188 1.1 christos 3:
189 1.1 christos fmovd %f0, %f2 ! Duplicate the pattern
190 1.1 christos fmovd %f0, %f4
191 1.1 christos fmovd %f0, %f6
192 1.1 christos fmovd %f0, %f8
193 1.1 christos fmovd %f0, %f10
194 1.1 christos fmovd %f0, %f12
195 1.1 christos fmovd %f0, %f14
196 1.1 christos
197 1.1 christos !! Remember: we were 8 bytes too far
198 1.1 christos dec 56, %i2 ! Go one iteration too far
199 1.1 christos 5:
200 1.1 christos stda %f0, [%i0] ASI_STORE ! Store 64 bytes
201 1.1 christos deccc BLOCK_SIZE, %i2
202 1.1 christos bg,pt %icc, 5b
203 1.1 christos inc BLOCK_SIZE, %i0
204 1.1 christos
205 1.1 christos membar #Sync
206 1.1 christos /*
207 1.1 christos * We've saved our possible fpstate, now disable the fpu
208 1.1 christos * and continue with life.
209 1.1 christos */
210 1.1 christos RESTORE_FPU
211 1.1 christos addcc %i2, 56, %i2 ! Restore the count
212 1.1 christos ba,pt %xcc, Lmemset_longs ! Finish up the remainder
213 1.1 christos restore
214 1.1 christos #endif /* USE_BLOCK_STORE_LOAD */
215