xen_pmap.c revision 1.10.2.1 1 /* $NetBSD: xen_pmap.c,v 1.10.2.1 2012/02/18 07:33:45 mrg Exp $ */
2
3 /*
4 * Copyright (c) 2007 Manuel Bouyer.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 */
27
28 /*
29 * Copyright (c) 2006 Mathieu Ropert <mro (at) adviseo.fr>
30 *
31 * Permission to use, copy, modify, and distribute this software for any
32 * purpose with or without fee is hereby granted, provided that the above
33 * copyright notice and this permission notice appear in all copies.
34 *
35 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
36 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
37 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
38 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
39 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
40 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
41 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
42 */
43
44 /*
45 * Copyright (c) 1997 Charles D. Cranor and Washington University.
46 * All rights reserved.
47 *
48 * Redistribution and use in source and binary forms, with or without
49 * modification, are permitted provided that the following conditions
50 * are met:
51 * 1. Redistributions of source code must retain the above copyright
52 * notice, this list of conditions and the following disclaimer.
53 * 2. Redistributions in binary form must reproduce the above copyright
54 * notice, this list of conditions and the following disclaimer in the
55 * documentation and/or other materials provided with the distribution.
56 *
57 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
58 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
59 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
60 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
61 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
62 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
63 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
64 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
65 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
66 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
67 */
68
69 /*
70 * Copyright 2001 (c) Wasabi Systems, Inc.
71 * All rights reserved.
72 *
73 * Written by Frank van der Linden for Wasabi Systems, Inc.
74 *
75 * Redistribution and use in source and binary forms, with or without
76 * modification, are permitted provided that the following conditions
77 * are met:
78 * 1. Redistributions of source code must retain the above copyright
79 * notice, this list of conditions and the following disclaimer.
80 * 2. Redistributions in binary form must reproduce the above copyright
81 * notice, this list of conditions and the following disclaimer in the
82 * documentation and/or other materials provided with the distribution.
83 * 3. All advertising materials mentioning features or use of this software
84 * must display the following acknowledgement:
85 * This product includes software developed for the NetBSD Project by
86 * Wasabi Systems, Inc.
87 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
88 * or promote products derived from this software without specific prior
89 * written permission.
90 *
91 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
92 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
93 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
94 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
95 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
96 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
97 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
98 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
99 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
100 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
101 * POSSIBILITY OF SUCH DAMAGE.
102 */
103
104 #include <sys/cdefs.h>
105 __KERNEL_RCSID(0, "$NetBSD: xen_pmap.c,v 1.10.2.1 2012/02/18 07:33:45 mrg Exp $");
106
107 #include "opt_user_ldt.h"
108 #include "opt_lockdebug.h"
109 #include "opt_multiprocessor.h"
110 #include "opt_xen.h"
111 #if !defined(__x86_64__)
112 #include "opt_kstack_dr0.h"
113 #endif /* !defined(__x86_64__) */
114
115 #include <sys/param.h>
116 #include <sys/systm.h>
117 #include <sys/proc.h>
118 #include <sys/pool.h>
119 #include <sys/kernel.h>
120 #include <sys/atomic.h>
121 #include <sys/cpu.h>
122 #include <sys/intr.h>
123 #include <sys/xcall.h>
124
125 #include <uvm/uvm.h>
126
127 #include <dev/isa/isareg.h>
128
129 #include <machine/specialreg.h>
130 #include <machine/gdt.h>
131 #include <machine/isa_machdep.h>
132 #include <machine/cpuvar.h>
133
134 #include <x86/pmap.h>
135 #include <x86/pmap_pv.h>
136
137 #include <x86/i82489reg.h>
138 #include <x86/i82489var.h>
139
140 #include <xen/xen-public/xen.h>
141 #include <xen/hypervisor.h>
142 #include <xen/xenpmap.h>
143
144 #define COUNT(x) /* nothing */
145
146 extern pd_entry_t * const normal_pdes[];
147
148 extern paddr_t pmap_pa_start; /* PA of first physical page for this domain */
149 extern paddr_t pmap_pa_end; /* PA of last physical page for this domain */
150
151 int
152 pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
153 {
154 paddr_t ma;
155
156 if (__predict_false(pa < pmap_pa_start || pmap_pa_end <= pa)) {
157 ma = pa; /* XXX hack */
158 } else {
159 ma = xpmap_ptom(pa);
160 }
161
162 return pmap_enter_ma(pmap, va, ma, pa, prot, flags, DOMID_SELF);
163 }
164
165 /*
166 * pmap_kenter_ma: enter a kernel mapping without R/M (pv_entry) tracking
167 *
168 * => no need to lock anything, assume va is already allocated
169 * => should be faster than normal pmap enter function
170 * => we expect a MACHINE address
171 */
172
173 void
174 pmap_kenter_ma(vaddr_t va, paddr_t ma, vm_prot_t prot, u_int flags)
175 {
176 pt_entry_t *pte, opte, npte;
177
178 if (va < VM_MIN_KERNEL_ADDRESS)
179 pte = vtopte(va);
180 else
181 pte = kvtopte(va);
182
183 npte = ma | ((prot & VM_PROT_WRITE) ? PG_RW : PG_RO) |
184 PG_V | PG_k;
185 if (flags & PMAP_NOCACHE)
186 npte |= PG_N;
187
188 if ((cpu_feature[2] & CPUID_NOX) && !(prot & VM_PROT_EXECUTE))
189 npte |= PG_NX;
190
191 opte = pmap_pte_testset (pte, npte); /* zap! */
192
193 if (pmap_valid_entry(opte)) {
194 #if defined(MULTIPROCESSOR)
195 kpreempt_disable();
196 pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER);
197 kpreempt_enable();
198 #else
199 /* Don't bother deferring in the single CPU case. */
200 pmap_update_pg(va);
201 #endif
202 }
203 }
204
205 /*
206 * pmap_extract_ma: extract a MA for the given VA
207 */
208
209 bool
210 pmap_extract_ma(struct pmap *pmap, vaddr_t va, paddr_t *pap)
211 {
212 pt_entry_t *ptes, pte;
213 pd_entry_t pde;
214 pd_entry_t * const *pdes;
215 struct pmap *pmap2;
216
217 kpreempt_disable();
218 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
219 if (!pmap_pdes_valid(va, pdes, &pde)) {
220 pmap_unmap_ptes(pmap, pmap2);
221 kpreempt_enable();
222 return false;
223 }
224
225 pte = ptes[pl1_i(va)];
226 pmap_unmap_ptes(pmap, pmap2);
227 kpreempt_enable();
228
229 if (__predict_true((pte & PG_V) != 0)) {
230 if (pap != NULL)
231 *pap = (pte & PG_FRAME) | (va & (NBPD_L1 - 1));
232 return true;
233 }
234
235 return false;
236 }
237
238 /*
239 * Flush all APDP entries found in pmaps
240 * Required during Xen save/restore operations, as Xen does not
241 * handle alternative recursive mappings properly
242 */
243 void
244 pmap_xen_suspend(void)
245 {
246 int s;
247
248 s = splvm();
249 xpq_flush_queue();
250 splx(s);
251
252 #ifdef PAE
253 pmap_unmap_recursive_entries();
254 #endif
255 }
256
257 void
258 pmap_xen_resume(void)
259 {
260 #ifdef PAE
261 pmap_map_recursive_entries();
262 #endif
263 }
264
265 #ifdef PAE
266 /*
267 * NetBSD uses L2 shadow pages to support PAE with Xen. However, Xen does not
268 * handle them correctly during save/restore, leading to incorrect page
269 * tracking and pinning during restore.
270 * For save/restore to succeed, two functions are introduced:
271 * - pmap_map_recursive_entries(), used by resume code to set the recursive
272 * mapping entries to their correct value
273 * - pmap_unmap_recursive_entries(), used by suspend code to clear all
274 * PDIR_SLOT_PTE entries
275 */
276 void
277 pmap_map_recursive_entries(void)
278 {
279 int i;
280 struct pmap *pm;
281
282 mutex_enter(&pmaps_lock);
283 LIST_FOREACH(pm, &pmaps, pm_list) {
284 for (i = 0; i < PDP_SIZE; i++) {
285 xpq_queue_pte_update(
286 xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_PTE + i)),
287 xpmap_ptom((pm)->pm_pdirpa[i]) | PG_V);
288 }
289 }
290 mutex_exit(&pmaps_lock);
291
292 for (i = 0; i < PDP_SIZE; i++) {
293 xpq_queue_pte_update(
294 xpmap_ptom(pmap_pdirpa(pmap_kernel(), PDIR_SLOT_PTE + i)),
295 xpmap_ptom(pmap_kernel()->pm_pdirpa[i]) | PG_V);
296 }
297
298 xpq_flush_queue();
299 }
300
301 void
302 pmap_unmap_recursive_entries(void)
303 {
304 int i;
305 struct pmap *pm;
306
307 /*
308 * Invalidate pmap_pdp_cache as it contains L2-pinned objects with
309 * recursive entries.
310 * XXX jym@ : find a way to drain per-CPU caches to. pool_cache_inv
311 * does not do that.
312 */
313 pool_cache_invalidate(&pmap_pdp_cache);
314
315 mutex_enter(&pmaps_lock);
316 LIST_FOREACH(pm, &pmaps, pm_list) {
317 for (i = 0; i < PDP_SIZE; i++) {
318 xpq_queue_pte_update(
319 xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_PTE + i)), 0);
320 }
321 }
322 mutex_exit(&pmaps_lock);
323
324 /* do it for pmap_kernel() too! */
325 for (i = 0; i < PDP_SIZE; i++)
326 xpq_queue_pte_update(
327 xpmap_ptom(pmap_pdirpa(pmap_kernel(), PDIR_SLOT_PTE + i)),
328 0);
329
330 xpq_flush_queue();
331
332 }
333 #endif /* PAE */
334
335 #if defined(PAE) || defined(__x86_64__)
336
337 extern struct cpu_info * (*xpq_cpu)(void);
338 static __inline void
339 pmap_kpm_setpte(struct cpu_info *ci, struct pmap *pmap, int index)
340 {
341 if (pmap == pmap_kernel()) {
342 KASSERT(index >= PDIR_SLOT_KERN);
343 }
344 #ifdef PAE
345 xpq_queue_pte_update(
346 xpmap_ptetomach(&ci->ci_kpm_pdir[l2tol2(index)]),
347 pmap->pm_pdir[index]);
348 #elif defined(__x86_64__)
349 xpq_queue_pte_update(
350 xpmap_ptetomach(&ci->ci_kpm_pdir[index]),
351 pmap->pm_pdir[index]);
352 #endif /* PAE */
353 xpq_flush_queue();
354 }
355
356 /*
357 * Synchronise shadow pdir with the pmap on all cpus on which it is
358 * loaded.
359 */
360 void
361 xen_kpm_sync(struct pmap *pmap, int index)
362 {
363 CPU_INFO_ITERATOR cii;
364 struct cpu_info *ci;
365
366 KASSERT(pmap != NULL);
367
368 pmap_pte_flush();
369
370 for (CPU_INFO_FOREACH(cii, ci)) {
371 if (ci == NULL) {
372 continue;
373 }
374 if (pmap != pmap_kernel() &&
375 (ci->ci_cpumask & pmap->pm_xen_ptp_cpus) == 0)
376 continue;
377
378 /* take the lock and check again */
379 mutex_enter(&ci->ci_kpm_mtx);
380 if (pmap == pmap_kernel() ||
381 (ci->ci_cpumask & pmap->pm_xen_ptp_cpus) != 0) {
382 pmap_kpm_setpte(ci, pmap, index);
383 }
384 mutex_exit(&ci->ci_kpm_mtx);
385 }
386 return;
387 }
388
389 #endif /* PAE || __x86_64__ */
390