altivec.c revision 1.1 1 /* $NetBSD: altivec.c,v 1.1 2003/02/03 17:10:09 matt Exp $ */
2
3 /*
4 * Copyright (C) 1996 Wolfgang Solfrank.
5 * Copyright (C) 1996 TooLs GmbH.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by TooLs GmbH.
19 * 4. The name of TooLs GmbH may not be used to endorse or promote products
20 * derived from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
28 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
31 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 #include <sys/param.h>
34 #include <sys/proc.h>
35 #include <sys/sa.h>
36 #include <sys/systm.h>
37 #include <sys/user.h>
38 #include <sys/malloc.h>
39 #include <sys/pool.h>
40
41 #include <powerpc/altivec.h>
42 #include <powerpc/spr.h>
43 #include <powerpc/psl.h>
44
45 struct pool vecpool;
46
47 void
48 enable_vec()
49 {
50 struct cpu_info *ci = curcpu();
51 struct lwp *l = curlwp;
52 struct pcb *pcb = &l->l_addr->u_pcb;
53 struct trapframe *tf = trapframe(l);
54 struct vreg *vr = pcb->pcb_vr;
55 int msr, scratch;
56
57 KASSERT(pcb->pcb_veccpu == NULL);
58
59 /*
60 * Allocate a vreg structure if we haven't done so.
61 */
62 if (!(pcb->pcb_flags & PCB_ALTIVEC)) {
63 vr = pcb->pcb_vr = pool_get(&vecpool, PR_WAITOK);
64 pcb->pcb_flags |= PCB_ALTIVEC;
65
66 /*
67 * Initialize the vectors with NaNs
68 */
69
70 for (scratch = 0; scratch < 32; scratch++) {
71 vr->vreg[scratch][0] = 0x7FFFDEAD;
72 vr->vreg[scratch][1] = 0x7FFFDEAD;
73 vr->vreg[scratch][2] = 0x7FFFDEAD;
74 vr->vreg[scratch][3] = 0x7FFFDEAD;
75 }
76 vr->vscr = 0;
77 vr->vrsave = tf->tf_xtra[TF_VRSAVE];
78 }
79
80 /*
81 * Enable AltiVec temporarily (and disable interrupts).
82 */
83 msr = mfmsr();
84 mtmsr((msr & ~PSL_EE) | PSL_VEC);
85 __asm __volatile ("isync");
86 if (ci->ci_veclwp) {
87 save_vec_cpu();
88 }
89 KASSERT(curcpu()->ci_veclwp == NULL);
90
91 /*
92 * Restore VSCR by first loading it into a vector and then into VSCR.
93 * (this needs to done before loading the user's vector registers
94 * since we need to use a scratch vector register)
95 */
96 __asm __volatile("vxor %2,%2,%2; lvewx %2,%0,%1; mtvscr %2" \
97 :: "r"(vr), "r"(offsetof(struct vreg, vscr)), "n"(0));
98
99 /*
100 * VRSAVE will be restored when trap frame returns
101 */
102 tf->tf_xtra[TF_VRSAVE] = vr->vrsave;
103
104 #define LVX(n,vr) __asm /*__volatile*/("lvx %2,%0,%1" \
105 :: "r"(vr), "r"(offsetof(struct vreg, vreg[n])), "n"(n));
106
107 /*
108 * Load all 32 vector registers
109 */
110 LVX( 0,vr); LVX( 1,vr); LVX( 2,vr); LVX( 3,vr);
111 LVX( 4,vr); LVX( 5,vr); LVX( 6,vr); LVX( 7,vr);
112 LVX( 8,vr); LVX( 9,vr); LVX(10,vr); LVX(11,vr);
113 LVX(12,vr); LVX(13,vr); LVX(14,vr); LVX(15,vr);
114
115 LVX(16,vr); LVX(17,vr); LVX(18,vr); LVX(19,vr);
116 LVX(20,vr); LVX(21,vr); LVX(22,vr); LVX(23,vr);
117 LVX(24,vr); LVX(25,vr); LVX(26,vr); LVX(27,vr);
118 LVX(28,vr); LVX(29,vr); LVX(30,vr); LVX(31,vr);
119 __asm __volatile ("isync");
120
121 /*
122 * Enable AltiVec when we return to user-mode.
123 * Record the new ownership of the AltiVec unit.
124 */
125 tf->srr1 |= PSL_VEC;
126 curcpu()->ci_veclwp = l;
127 pcb->pcb_veccpu = curcpu();
128 __asm __volatile ("sync");
129
130 /*
131 * Restore MSR (turn off AltiVec)
132 */
133 mtmsr(msr);
134 }
135
136 void
137 save_vec_cpu(void)
138 {
139 struct cpu_info *ci = curcpu();
140 struct lwp *l;
141 struct pcb *pcb;
142 struct vreg *vr;
143 struct trapframe *tf;
144 int msr;
145
146 /*
147 * Turn on AltiVEC, turn off interrupts.
148 */
149 msr = mfmsr();
150 mtmsr((msr & ~PSL_EE) | PSL_VEC);
151 __asm __volatile ("isync");
152 l = ci->ci_veclwp;
153 if (l == NULL) {
154 goto out;
155 }
156 pcb = &l->l_addr->u_pcb;
157 vr = pcb->pcb_vr;
158 tf = trapframe(l);
159
160 #define STVX(n,vr) __asm /*__volatile*/("stvx %2,%0,%1" \
161 :: "r"(vr), "r"(offsetof(struct vreg, vreg[n])), "n"(n));
162
163 /*
164 * Save the vector registers.
165 */
166 STVX( 0,vr); STVX( 1,vr); STVX( 2,vr); STVX( 3,vr);
167 STVX( 4,vr); STVX( 5,vr); STVX( 6,vr); STVX( 7,vr);
168 STVX( 8,vr); STVX( 9,vr); STVX(10,vr); STVX(11,vr);
169 STVX(12,vr); STVX(13,vr); STVX(14,vr); STVX(15,vr);
170
171 STVX(16,vr); STVX(17,vr); STVX(18,vr); STVX(19,vr);
172 STVX(20,vr); STVX(21,vr); STVX(22,vr); STVX(23,vr);
173 STVX(24,vr); STVX(25,vr); STVX(26,vr); STVX(27,vr);
174 STVX(28,vr); STVX(29,vr); STVX(30,vr); STVX(31,vr);
175
176 /*
177 * Save VSCR (this needs to be done after save the vector registers
178 * since we need to use one as scratch).
179 */
180 __asm __volatile("mfvscr %2; stvewx %2,%0,%1" \
181 :: "r"(vr), "r"(offsetof(struct vreg, vscr)), "n"(0));
182
183 /*
184 * Save VRSAVE
185 */
186 vr->vrsave = tf->tf_xtra[TF_VRSAVE];
187
188 /*
189 * Note that we aren't using any CPU resources and stop any
190 * data streams.
191 */
192 tf->srr1 &= ~PSL_VEC;
193 pcb->pcb_veccpu = NULL;
194 ci->ci_veclwp = NULL;
195 __asm __volatile ("dssall; sync");
196
197 out:
198
199 /*
200 * Restore MSR (turn off AltiVec)
201 */
202 mtmsr(msr);
203 }
204
205 /*
206 * Save a process's AltiVEC state to its PCB. The state may be in any CPU.
207 * The process must either be curproc or traced by curproc (and stopped).
208 * (The point being that the process must not run on another CPU during
209 * this function).
210 */
211 void
212 save_vec_lwp(l)
213 struct lwp *l;
214 {
215 struct pcb *pcb = &l->l_addr->u_pcb;
216 struct cpu_info *ci = curcpu();
217
218 /*
219 * If it's already in the PCB, there's nothing to do.
220 */
221
222 if (pcb->pcb_veccpu == NULL) {
223 return;
224 }
225
226 /*
227 * If the state is in the current CPU, just flush the current CPU's
228 * state.
229 */
230
231 if (l == ci->ci_veclwp) {
232 save_vec_cpu();
233 return;
234 }
235
236 #ifdef MULTIPROCESSOR
237
238 /*
239 * It must be on another CPU, flush it from there.
240 */
241
242 mp_save_vec_lwp(l);
243 #endif
244 }
245
246 #define ZERO_VEC 19
247
248 void
249 vzeropage(paddr_t pa)
250 {
251 const paddr_t ea = pa + NBPG;
252 uint32_t vec[7], *vp = (void *) roundup((uintptr_t) vec, 16);
253 uint32_t omsr, msr;
254
255 __asm __volatile("mfmsr %0" : "=r"(omsr) :);
256
257 /*
258 * Turn on AltiVec, turn off interrupts.
259 */
260 msr = (omsr & ~PSL_EE) | PSL_VEC;
261 __asm __volatile("sync; mtmsr %0; isync" :: "r"(msr));
262
263 /*
264 * Save the VEC register we are going to use before we disable
265 * relocation.
266 */
267 __asm("stvx %1,0,%0" :: "r"(vp), "n"(ZERO_VEC));
268 __asm("vxor %0,%0,%0" :: "n"(ZERO_VEC));
269
270 /*
271 * Turn off data relocation (DMMU off).
272 */
273 msr &= ~PSL_DR;
274 __asm __volatile("sync; mtmsr %0; isync" :: "r"(msr));
275
276 /*
277 * Zero the page using a single cache line.
278 */
279 do {
280 __asm("stvx %2,%0,%1" :: "r"(pa), "r"( 0), "n"(ZERO_VEC));
281 __asm("stvxl %2,%0,%1" :: "r"(pa), "r"(16), "n"(ZERO_VEC));
282 __asm("stvx %2,%0,%1" :: "r"(pa), "r"(32), "n"(ZERO_VEC));
283 __asm("stvxl %2,%0,%1" :: "r"(pa), "r"(48), "n"(ZERO_VEC));
284 pa += 64;
285 } while (pa < ea);
286
287 /*
288 * Restore data relocation (DMMU on);
289 */
290 msr |= PSL_DR;
291 __asm __volatile("sync; mtmsr %0; isync" :: "r"(msr));
292
293 /*
294 * Restore VEC register (now that we can access the stack again).
295 */
296 __asm("lvx %1,0,%0" :: "r"(vp), "n"(ZERO_VEC));
297
298 /*
299 * Restore old MSR (AltiVec OFF).
300 */
301 __asm __volatile("sync; mtmsr %0; isync" :: "r"(omsr));
302 }
303
304 #define LO_VEC 16
305 #define HI_VEC 17
306
307 void
308 vcopypage(paddr_t dst, paddr_t src)
309 {
310 const paddr_t edst = dst + NBPG;
311 uint32_t vec[11], *vp = (void *) roundup((uintptr_t) vec, 16);
312 uint32_t omsr, msr;
313
314 __asm __volatile("mfmsr %0" : "=r"(omsr) :);
315
316 /*
317 * Turn on AltiVec, turn off interrupts.
318 */
319 msr = (omsr & ~PSL_EE) | PSL_VEC;
320 __asm __volatile("sync; mtmsr %0; isync" :: "r"(msr));
321
322 /*
323 * Save the VEC registers we will be using before we disable
324 * relocation.
325 */
326 __asm("stvx %2,%1,%0" :: "r"(vp), "r"( 0), "n"(LO_VEC));
327 __asm("stvx %2,%1,%0" :: "r"(vp), "r"(16), "n"(HI_VEC));
328
329 /*
330 * Turn off data relocation (DMMU off).
331 */
332 msr &= ~PSL_DR;
333 __asm __volatile("sync; mtmsr %0; isync" :: "r"(msr));
334
335 /*
336 * Copy the page using a single cache line. On most PPCs, two
337 * vector registers occupy one cache line.
338 */
339 do {
340 __asm("lvx %2,%0,%1" :: "r"(src), "r"( 0), "n"(LO_VEC));
341 __asm("stvx %2,%0,%1" :: "r"(dst), "r"( 0), "n"(LO_VEC));
342 __asm("lvxl %2,%0,%1" :: "r"(src), "r"(16), "n"(HI_VEC));
343 __asm("stvxl %2,%0,%1" :: "r"(dst), "r"(16), "n"(HI_VEC));
344 src += 32;
345 dst += 32;
346 } while (dst < edst);
347
348 /*
349 * Restore data relocation (DMMU on);
350 */
351 msr |= PSL_DR;
352 __asm __volatile("sync; mtmsr %0; isync" :: "r"(msr));
353
354 /*
355 * Restore VEC registers (now that we can access the stack again).
356 */
357 __asm("lvx %2,%1,%0" :: "r"(vp), "r"( 0), "n"(LO_VEC));
358 __asm("lvx %2,%1,%0" :: "r"(vp), "r"(16), "n"(HI_VEC));
359
360 /*
361 * Restore old MSR (AltiVec OFF).
362 */
363 __asm __volatile("sync; mtmsr %0; isync" :: "r"(omsr));
364 }
365
366 void
367 init_vec(void)
368 {
369 pool_init(&vecpool, sizeof(struct vreg), 16, 0, 0, "vecpl", NULL);
370 }
371