memset.S revision 1.2.12.2 1 1.2.12.2 yamt /* $NetBSD: memset.S,v 1.2.12.2 2014/05/22 11:26:30 yamt Exp $ */
2 1.2.12.2 yamt
3 1.2.12.2 yamt /*
4 1.2.12.2 yamt * Copyright (c) 1996-2002 Eduardo Horvath
5 1.2.12.2 yamt * All rights reserved.
6 1.2.12.2 yamt *
7 1.2.12.2 yamt * Redistribution and use in source and binary forms, with or without
8 1.2.12.2 yamt * modification, are permitted provided that the following conditions
9 1.2.12.2 yamt * are met:
10 1.2.12.2 yamt * 1. Redistributions of source code must retain the above copyright
11 1.2.12.2 yamt * notice, this list of conditions and the following disclaimer.
12 1.2.12.2 yamt *
13 1.2.12.2 yamt * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
14 1.2.12.2 yamt * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 1.2.12.2 yamt * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 1.2.12.2 yamt * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
17 1.2.12.2 yamt * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 1.2.12.2 yamt * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 1.2.12.2 yamt * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 1.2.12.2 yamt * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 1.2.12.2 yamt * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 1.2.12.2 yamt * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 1.2.12.2 yamt * SUCH DAMAGE.
24 1.2.12.2 yamt *
25 1.2.12.2 yamt */
26 1.2.12.2 yamt #include "strmacros.h"
27 1.2.12.2 yamt #if defined(LIBC_SCCS) && !defined(lint)
28 1.2.12.2 yamt RCSID("$NetBSD: memset.S,v 1.2.12.2 2014/05/22 11:26:30 yamt Exp $")
29 1.2.12.2 yamt #endif /* LIBC_SCCS and not lint */
30 1.2.12.2 yamt
31 1.2.12.2 yamt
32 1.2.12.2 yamt /*
33 1.2.12.2 yamt * XXXXXXXXXXXXXXXXXXXX
34 1.2.12.2 yamt * We need to make sure that this doesn't use floating point
35 1.2.12.2 yamt * before our trap handlers are installed or we could panic
36 1.2.12.2 yamt * XXXXXXXXXXXXXXXXXXXX
37 1.2.12.2 yamt */
38 1.2.12.2 yamt /*
39 1.2.12.2 yamt * memset(addr, c, len)
40 1.2.12.2 yamt *
41 1.2.12.2 yamt * We want to use VIS instructions if we're clearing out more than
42 1.2.12.2 yamt * 256 bytes, but to do that we need to properly save and restore the
43 1.2.12.2 yamt * FP registers. Unfortunately the code to do that in the kernel needs
44 1.2.12.2 yamt * to keep track of the current owner of the FPU, hence the different
45 1.2.12.2 yamt * code.
46 1.2.12.2 yamt *
47 1.2.12.2 yamt * XXXXX To produce more efficient code, we do not allow lengths
48 1.2.12.2 yamt * greater than 0x80000000000000000, which are negative numbers.
49 1.2.12.2 yamt * This should not really be an issue since the VA hole should
50 1.2.12.2 yamt * cause any such ranges to fail anyway.
51 1.2.12.2 yamt */
52 1.2.12.2 yamt #if !defined(_KERNEL) || defined(_RUMPKERNEL)
53 1.2.12.2 yamt ENTRY(bzero)
54 1.2.12.2 yamt ! %o0 = addr, %o1 = len
55 1.2.12.2 yamt mov %o1, %o2
56 1.2.12.2 yamt clr %o1 ! ser pattern
57 1.2.12.2 yamt #endif
58 1.2.12.2 yamt ENTRY(memset)
59 1.2.12.2 yamt ! %o0 = addr, %o1 = pattern, %o2 = len
60 1.2.12.2 yamt mov %o0, %o4 ! Save original pointer
61 1.2.12.2 yamt
62 1.2.12.2 yamt Lmemset_internal:
63 1.2.12.2 yamt btst 7, %o0 ! Word aligned?
64 1.2.12.2 yamt bz,pn %xcc, 0f
65 1.2.12.2 yamt nop
66 1.2.12.2 yamt inc %o0
67 1.2.12.2 yamt deccc %o2 ! Store up to 7 bytes
68 1.2.12.2 yamt bge,a,pt CCCR, Lmemset_internal
69 1.2.12.2 yamt stb %o1, [%o0 - 1]
70 1.2.12.2 yamt
71 1.2.12.2 yamt retl ! Duplicate Lmemset_done
72 1.2.12.2 yamt mov %o4, %o0
73 1.2.12.2 yamt 0:
74 1.2.12.2 yamt /*
75 1.2.12.2 yamt * Duplicate the pattern so it fills 64-bits.
76 1.2.12.2 yamt */
77 1.2.12.2 yamt andcc %o1, 0x0ff, %o1 ! No need to extend zero
78 1.2.12.2 yamt bz,pt %icc, 1f
79 1.2.12.2 yamt sllx %o1, 8, %o3 ! sigh. all dependent insns.
80 1.2.12.2 yamt or %o1, %o3, %o1
81 1.2.12.2 yamt sllx %o1, 16, %o3
82 1.2.12.2 yamt or %o1, %o3, %o1
83 1.2.12.2 yamt sllx %o1, 32, %o3
84 1.2.12.2 yamt or %o1, %o3, %o1
85 1.2.12.2 yamt 1:
86 1.2.12.2 yamt #ifdef USE_BLOCK_STORE_LOAD
87 1.2.12.2 yamt !! Now we are 64-bit aligned
88 1.2.12.2 yamt cmp %o2, 256 ! Use block clear if len > 256
89 1.2.12.2 yamt bge,pt CCCR, Lmemset_block ! use block store insns
90 1.2.12.2 yamt #endif /* USE_BLOCK_STORE_LOAD */
91 1.2.12.2 yamt deccc 8, %o2
92 1.2.12.2 yamt Lmemset_longs:
93 1.2.12.2 yamt bl,pn CCCR, Lmemset_cleanup ! Less than 8 bytes left
94 1.2.12.2 yamt nop
95 1.2.12.2 yamt 3:
96 1.2.12.2 yamt inc 8, %o0
97 1.2.12.2 yamt deccc 8, %o2
98 1.2.12.2 yamt bge,pt CCCR, 3b
99 1.2.12.2 yamt stx %o1, [%o0 - 8] ! Do 1 longword at a time
100 1.2.12.2 yamt
101 1.2.12.2 yamt /*
102 1.2.12.2 yamt * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
103 1.2.12.2 yamt * -6 => two bytes, etc. Mop up this remainder, if any.
104 1.2.12.2 yamt */
105 1.2.12.2 yamt Lmemset_cleanup:
106 1.2.12.2 yamt btst 4, %o2
107 1.2.12.2 yamt bz,pt CCCR, 5f ! if (len & 4) {
108 1.2.12.2 yamt nop
109 1.2.12.2 yamt stw %o1, [%o0] ! *(int *)addr = 0;
110 1.2.12.2 yamt inc 4, %o0 ! addr += 4;
111 1.2.12.2 yamt 5:
112 1.2.12.2 yamt btst 2, %o2
113 1.2.12.2 yamt bz,pt CCCR, 7f ! if (len & 2) {
114 1.2.12.2 yamt nop
115 1.2.12.2 yamt sth %o1, [%o0] ! *(short *)addr = 0;
116 1.2.12.2 yamt inc 2, %o0 ! addr += 2;
117 1.2.12.2 yamt 7:
118 1.2.12.2 yamt btst 1, %o2
119 1.2.12.2 yamt bnz,a %icc, Lmemset_done ! if (len & 1)
120 1.2.12.2 yamt stb %o1, [%o0] ! *addr = 0;
121 1.2.12.2 yamt Lmemset_done:
122 1.2.12.2 yamt retl
123 1.2.12.2 yamt mov %o4, %o0 ! Restore ponter for memset (ugh)
124 1.2.12.2 yamt
125 1.2.12.2 yamt #ifdef USE_BLOCK_STORE_LOAD
126 1.2.12.2 yamt Lmemset_block:
127 1.2.12.2 yamt sethi %hi(block_disable), %o3
128 1.2.12.2 yamt ldx [ %o3 + %lo(block_disable) ], %o3
129 1.2.12.2 yamt brnz,pn %o3, Lmemset_longs
130 1.2.12.2 yamt !! Make sure our trap table is installed
131 1.2.12.2 yamt set _C_LABEL(trapbase), %o5
132 1.2.12.2 yamt rdpr %tba, %o3
133 1.2.12.2 yamt sub %o3, %o5, %o3
134 1.2.12.2 yamt brnz,pn %o3, Lmemset_longs ! No, then don't use block load/store
135 1.2.12.2 yamt nop
136 1.2.12.2 yamt /*
137 1.2.12.2 yamt * Kernel:
138 1.2.12.2 yamt *
139 1.2.12.2 yamt * Here we use VIS instructions to do a block clear of a page.
140 1.2.12.2 yamt * But before we can do that we need to save and enable the FPU.
141 1.2.12.2 yamt * The last owner of the FPU registers is fplwp, and
142 1.2.12.2 yamt * fplwp->l_md.md_fpstate is the current fpstate. If that's not
143 1.2.12.2 yamt * null, call savefpstate() with it to store our current fp state.
144 1.2.12.2 yamt *
145 1.2.12.2 yamt * Next, allocate an aligned fpstate on the stack. We will properly
146 1.2.12.2 yamt * nest calls on a particular stack so this should not be a problem.
147 1.2.12.2 yamt *
148 1.2.12.2 yamt * Now we grab either curlwp (or if we're on the interrupt stack
149 1.2.12.2 yamt * lwp0). We stash its existing fpstate in a local register and
150 1.2.12.2 yamt * put our new fpstate in curlwp->p_md.md_fpstate. We point
151 1.2.12.2 yamt * fplwp at curlwp (or lwp0) and enable the FPU.
152 1.2.12.2 yamt *
153 1.2.12.2 yamt * If we are ever preempted, our FPU state will be saved in our
154 1.2.12.2 yamt * fpstate. Then, when we're resumed and we take an FPDISABLED
155 1.2.12.2 yamt * trap, the trap handler will be able to fish our FPU state out
156 1.2.12.2 yamt * of curlwp (or lwp0).
157 1.2.12.2 yamt *
158 1.2.12.2 yamt * On exiting this routine we undo the damage: restore the original
159 1.2.12.2 yamt * pointer to curlwp->p_md.md_fpstate, clear our fplwp, and disable
160 1.2.12.2 yamt * the MMU.
161 1.2.12.2 yamt *
162 1.2.12.2 yamt */
163 1.2.12.2 yamt
164 1.2.12.2 yamt ENABLE_FPU(0)
165 1.2.12.2 yamt
166 1.2.12.2 yamt !! We are now 8-byte aligned. We need to become 64-byte aligned.
167 1.2.12.2 yamt btst 63, %i0
168 1.2.12.2 yamt bz,pt CCCR, 2f
169 1.2.12.2 yamt nop
170 1.2.12.2 yamt 1:
171 1.2.12.2 yamt stx %i1, [%i0]
172 1.2.12.2 yamt inc 8, %i0
173 1.2.12.2 yamt btst 63, %i0
174 1.2.12.2 yamt bnz,pt %xcc, 1b
175 1.2.12.2 yamt dec 8, %i2
176 1.2.12.2 yamt
177 1.2.12.2 yamt 2:
178 1.2.12.2 yamt brz %i1, 3f ! Skip the memory op
179 1.2.12.2 yamt fzero %f0 ! if pattern is 0
180 1.2.12.2 yamt
181 1.2.12.2 yamt #ifdef _LP64
182 1.2.12.2 yamt stx %i1, [%i0] ! Flush this puppy to RAM
183 1.2.12.2 yamt membar #StoreLoad
184 1.2.12.2 yamt ldd [%i0], %f0
185 1.2.12.2 yamt #else
186 1.2.12.2 yamt stw %i1, [%i0] ! Flush this puppy to RAM
187 1.2.12.2 yamt membar #StoreLoad
188 1.2.12.2 yamt ld [%i0], %f0
189 1.2.12.2 yamt fmovsa %icc, %f0, %f1
190 1.2.12.2 yamt #endif
191 1.2.12.2 yamt
192 1.2.12.2 yamt 3:
193 1.2.12.2 yamt fmovd %f0, %f2 ! Duplicate the pattern
194 1.2.12.2 yamt fmovd %f0, %f4
195 1.2.12.2 yamt fmovd %f0, %f6
196 1.2.12.2 yamt fmovd %f0, %f8
197 1.2.12.2 yamt fmovd %f0, %f10
198 1.2.12.2 yamt fmovd %f0, %f12
199 1.2.12.2 yamt fmovd %f0, %f14
200 1.2.12.2 yamt
201 1.2.12.2 yamt !! Remember: we were 8 bytes too far
202 1.2.12.2 yamt dec 56, %i2 ! Go one iteration too far
203 1.2.12.2 yamt 5:
204 1.2.12.2 yamt stda %f0, [%i0] ASI_STORE ! Store 64 bytes
205 1.2.12.2 yamt deccc BLOCK_SIZE, %i2
206 1.2.12.2 yamt bg,pt %icc, 5b
207 1.2.12.2 yamt inc BLOCK_SIZE, %i0
208 1.2.12.2 yamt
209 1.2.12.2 yamt membar #Sync
210 1.2.12.2 yamt /*
211 1.2.12.2 yamt * We've saved our possible fpstate, now disable the fpu
212 1.2.12.2 yamt * and continue with life.
213 1.2.12.2 yamt */
214 1.2.12.2 yamt RESTORE_FPU
215 1.2.12.2 yamt addcc %i2, 56, %i2 ! Restore the count
216 1.2.12.2 yamt ba,pt %xcc, Lmemset_longs ! Finish up the remainder
217 1.2.12.2 yamt restore
218 1.2.12.2 yamt #endif /* USE_BLOCK_STORE_LOAD */
219