1 1.46 riastrad /* $NetBSD: hypervisor_machdep.c,v 1.46 2023/03/01 08:13:44 riastradh Exp $ */ 2 1.2 bouyer 3 1.2 bouyer /* 4 1.2 bouyer * 5 1.2 bouyer * Copyright (c) 2004 Christian Limpach. 6 1.2 bouyer * All rights reserved. 7 1.2 bouyer * 8 1.2 bouyer * Redistribution and use in source and binary forms, with or without 9 1.2 bouyer * modification, are permitted provided that the following conditions 10 1.2 bouyer * are met: 11 1.2 bouyer * 1. Redistributions of source code must retain the above copyright 12 1.2 bouyer * notice, this list of conditions and the following disclaimer. 13 1.2 bouyer * 2. Redistributions in binary form must reproduce the above copyright 14 1.2 bouyer * notice, this list of conditions and the following disclaimer in the 15 1.2 bouyer * documentation and/or other materials provided with the distribution. 16 1.2 bouyer * 17 1.2 bouyer * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 1.2 bouyer * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 1.2 bouyer * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 1.2 bouyer * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 1.2 bouyer * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 1.2 bouyer * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 1.2 bouyer * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 1.2 bouyer * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 1.2 bouyer * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 1.2 bouyer * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 1.2 bouyer */ 28 1.2 bouyer 29 1.2 bouyer /****************************************************************************** 30 1.2 bouyer * hypervisor.c 31 1.2 bouyer * 32 1.2 bouyer * Communication to/from hypervisor. 33 1.2 bouyer * 34 1.2 bouyer * Copyright (c) 2002-2004, K A Fraser 35 1.2 bouyer * 36 1.2 bouyer * Permission is hereby granted, free of charge, to any person obtaining a copy 37 1.2 bouyer * of this software and associated documentation files (the "Software"), to 38 1.2 bouyer * deal in the Software without restriction, including without limitation the 39 1.2 bouyer * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 40 1.2 bouyer * sell copies of the Software, and to permit persons to whom the Software is 41 1.2 bouyer * furnished to do so, subject to the following conditions: 42 1.2 bouyer * 43 1.2 bouyer * The above copyright notice and this permission notice shall be included in 44 1.2 bouyer * all copies or substantial portions of the Software. 45 1.2 bouyer * 46 1.2 bouyer * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 1.2 bouyer * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 1.2 bouyer * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 49 1.2 bouyer * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 1.2 bouyer * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 51 1.2 bouyer * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 52 1.2 bouyer * DEALINGS IN THE SOFTWARE. 53 1.2 bouyer */ 54 1.2 bouyer 55 1.2 bouyer 56 1.2 bouyer #include <sys/cdefs.h> 57 1.46 riastrad __KERNEL_RCSID(0, "$NetBSD: hypervisor_machdep.c,v 1.46 2023/03/01 08:13:44 riastradh Exp $"); 58 1.2 bouyer 59 1.2 bouyer #include <sys/param.h> 60 1.2 bouyer #include <sys/systm.h> 61 1.10 bouyer #include <sys/kmem.h> 62 1.38 bouyer #include <sys/cpu.h> 63 1.39 bouyer #include <sys/ksyms.h> 64 1.10 bouyer 65 1.10 bouyer #include <uvm/uvm_extern.h> 66 1.10 bouyer 67 1.10 bouyer #include <machine/vmparam.h> 68 1.10 bouyer #include <machine/pmap.h> 69 1.44 riastrad #include <machine/pmap_private.h> 70 1.2 bouyer 71 1.38 bouyer #include <x86/machdep.h> 72 1.38 bouyer #include <x86/cpuvar.h> 73 1.38 bouyer 74 1.2 bouyer #include <xen/xen.h> 75 1.38 bouyer #include <xen/intr.h> 76 1.2 bouyer #include <xen/hypervisor.h> 77 1.2 bouyer #include <xen/evtchn.h> 78 1.10 bouyer #include <xen/xenpmap.h> 79 1.2 bouyer 80 1.2 bouyer #include "opt_xen.h" 81 1.39 bouyer #include "opt_modular.h" 82 1.39 bouyer #include "opt_ddb.h" 83 1.29 cherry #include "isa.h" 84 1.29 cherry #include "pci.h" 85 1.39 bouyer #include "ksyms.h" 86 1.39 bouyer 87 1.39 bouyer #ifdef DDB 88 1.39 bouyer #include <machine/db_machdep.h> 89 1.39 bouyer #include <ddb/db_extern.h> 90 1.39 bouyer #include <ddb/db_output.h> 91 1.39 bouyer #include <ddb/db_interface.h> 92 1.39 bouyer #endif 93 1.2 bouyer 94 1.35 cherry #ifdef XENPV 95 1.10 bouyer /* 96 1.10 bouyer * arch-dependent p2m frame lists list (L3 and L2) 97 1.10 bouyer * used by Xen for save/restore mappings 98 1.10 bouyer */ 99 1.10 bouyer static unsigned long * l3_p2m_page; 100 1.10 bouyer static unsigned long * l2_p2m_page; 101 1.10 bouyer static int l2_p2m_page_size; /* size of L2 page, in pages */ 102 1.10 bouyer 103 1.10 bouyer static void build_p2m_frame_list_list(void); 104 1.10 bouyer static void update_p2m_frame_list_list(void); 105 1.10 bouyer 106 1.35 cherry #endif 107 1.35 cherry 108 1.2 bouyer // #define PORT_DEBUG 4 109 1.2 bouyer // #define EARLY_DEBUG_EVENT 110 1.2 bouyer 111 1.15 cherry /* callback function type */ 112 1.27 bouyer typedef void (*iterate_func_t)(unsigned int, unsigned int, 113 1.27 bouyer unsigned int, void *); 114 1.15 cherry 115 1.15 cherry static inline void 116 1.27 bouyer evt_iterate_bits(volatile unsigned long *pendingl1, 117 1.15 cherry volatile unsigned long *pendingl2, 118 1.15 cherry volatile unsigned long *mask, 119 1.15 cherry iterate_func_t iterate_pending, void *iterate_args) 120 1.15 cherry { 121 1.15 cherry 122 1.15 cherry KASSERT(pendingl1 != NULL); 123 1.15 cherry KASSERT(pendingl2 != NULL); 124 1.15 cherry 125 1.15 cherry unsigned long l1, l2; 126 1.15 cherry unsigned int l1i, l2i, port; 127 1.15 cherry 128 1.15 cherry l1 = xen_atomic_xchg(pendingl1, 0); 129 1.15 cherry while ((l1i = xen_ffs(l1)) != 0) { 130 1.15 cherry l1i--; 131 1.15 cherry l1 &= ~(1UL << l1i); 132 1.15 cherry 133 1.15 cherry l2 = pendingl2[l1i] & (mask != NULL ? ~mask[l1i] : -1UL); 134 1.27 bouyer l2 &= curcpu()->ci_evtmask[l1i]; 135 1.15 cherry 136 1.15 cherry if (mask != NULL) xen_atomic_setbits_l(&mask[l1i], l2); 137 1.15 cherry xen_atomic_clearbits_l(&pendingl2[l1i], l2); 138 1.15 cherry 139 1.15 cherry while ((l2i = xen_ffs(l2)) != 0) { 140 1.15 cherry l2i--; 141 1.15 cherry l2 &= ~(1UL << l2i); 142 1.15 cherry 143 1.15 cherry port = (l1i << LONG_SHIFT) + l2i; 144 1.15 cherry 145 1.27 bouyer iterate_pending(port, l1i, l2i, iterate_args); 146 1.15 cherry } 147 1.15 cherry } 148 1.15 cherry } 149 1.15 cherry 150 1.15 cherry /* 151 1.15 cherry * Set per-cpu "pending" information for outstanding events that 152 1.15 cherry * cannot be processed now. 153 1.15 cherry */ 154 1.15 cherry 155 1.15 cherry static inline void 156 1.27 bouyer evt_set_pending(unsigned int port, unsigned int l1i, 157 1.15 cherry unsigned int l2i, void *args) 158 1.15 cherry { 159 1.15 cherry 160 1.15 cherry KASSERT(args != NULL); 161 1.15 cherry 162 1.15 cherry int *ret = args; 163 1.43 bouyer struct intrhand *ih; 164 1.15 cherry 165 1.15 cherry if (evtsource[port]) { 166 1.27 bouyer hypervisor_set_ipending(evtsource[port]->ev_imask, l1i, l2i); 167 1.15 cherry evtsource[port]->ev_evcnt.ev_count++; 168 1.43 bouyer ih = evtsource[port]->ev_handlers; 169 1.43 bouyer while (ih != NULL) { 170 1.43 bouyer ih->ih_pending++; 171 1.43 bouyer ih = ih->ih_evt_next; 172 1.43 bouyer } 173 1.43 bouyer 174 1.27 bouyer if (*ret == 0 && curcpu()->ci_ilevel < 175 1.15 cherry evtsource[port]->ev_maxlevel) 176 1.15 cherry *ret = 1; 177 1.15 cherry } 178 1.15 cherry #ifdef DOM0OPS 179 1.15 cherry else { 180 1.15 cherry /* set pending event */ 181 1.15 cherry xenevt_setipending(l1i, l2i); 182 1.15 cherry } 183 1.15 cherry #endif 184 1.15 cherry } 185 1.15 cherry 186 1.2 bouyer int stipending(void); 187 1.2 bouyer int 188 1.7 cegger stipending(void) 189 1.2 bouyer { 190 1.2 bouyer volatile shared_info_t *s = HYPERVISOR_shared_info; 191 1.2 bouyer struct cpu_info *ci; 192 1.8 cegger volatile struct vcpu_info *vci; 193 1.2 bouyer int ret; 194 1.2 bouyer 195 1.46 riastrad kpreempt_disable(); 196 1.46 riastrad 197 1.2 bouyer ret = 0; 198 1.2 bouyer ci = curcpu(); 199 1.8 cegger vci = ci->ci_vcpu; 200 1.2 bouyer 201 1.2 bouyer #if 0 202 1.2 bouyer if (HYPERVISOR_shared_info->events) 203 1.2 bouyer printf("stipending events %08lx mask %08lx ilevel %d\n", 204 1.2 bouyer HYPERVISOR_shared_info->events, 205 1.2 bouyer HYPERVISOR_shared_info->events_mask, ci->ci_ilevel); 206 1.2 bouyer #endif 207 1.2 bouyer 208 1.2 bouyer #ifdef EARLY_DEBUG_EVENT 209 1.2 bouyer if (xen_atomic_test_bit(&s->evtchn_pending[0], debug_port)) { 210 1.2 bouyer xen_debug_handler(NULL); 211 1.2 bouyer xen_atomic_clear_bit(&s->evtchn_pending[0], debug_port); 212 1.2 bouyer } 213 1.2 bouyer #endif 214 1.2 bouyer 215 1.2 bouyer /* 216 1.2 bouyer * we're only called after STIC, so we know that we'll have to 217 1.2 bouyer * STI at the end 218 1.2 bouyer */ 219 1.15 cherry 220 1.8 cegger while (vci->evtchn_upcall_pending) { 221 1.36 bouyer x86_disable_intr(); 222 1.15 cherry 223 1.8 cegger vci->evtchn_upcall_pending = 0; 224 1.15 cherry 225 1.27 bouyer evt_iterate_bits(&vci->evtchn_pending_sel, 226 1.15 cherry s->evtchn_pending, s->evtchn_mask, 227 1.15 cherry evt_set_pending, &ret); 228 1.15 cherry 229 1.36 bouyer x86_enable_intr(); 230 1.2 bouyer } 231 1.2 bouyer 232 1.46 riastrad kpreempt_enable(); 233 1.46 riastrad 234 1.2 bouyer return (ret); 235 1.2 bouyer } 236 1.2 bouyer 237 1.15 cherry /* Iterate through pending events and call the event handler */ 238 1.15 cherry 239 1.15 cherry static inline void 240 1.27 bouyer evt_do_hypervisor_callback(unsigned int port, unsigned int l1i, 241 1.27 bouyer unsigned int l2i, void *args) 242 1.15 cherry { 243 1.15 cherry KASSERT(args != NULL); 244 1.15 cherry 245 1.38 bouyer #ifdef DOM0OPS 246 1.27 bouyer struct cpu_info *ci = curcpu(); 247 1.38 bouyer #endif 248 1.15 cherry struct intrframe *regs = args; 249 1.15 cherry 250 1.15 cherry #ifdef PORT_DEBUG 251 1.15 cherry if (port == PORT_DEBUG) 252 1.15 cherry printf("do_hypervisor_callback event %d\n", port); 253 1.15 cherry #endif 254 1.22 cherry if (evtsource[port]) { 255 1.38 bouyer KASSERT(cpu_intr_p()); 256 1.22 cherry evtchn_do_event(port, regs); 257 1.22 cherry } 258 1.15 cherry #ifdef DOM0OPS 259 1.15 cherry else { 260 1.15 cherry if (ci->ci_ilevel < IPL_HIGH) { 261 1.15 cherry /* fast path */ 262 1.15 cherry int oipl = ci->ci_ilevel; 263 1.15 cherry ci->ci_ilevel = IPL_HIGH; 264 1.38 bouyer KASSERT(cpu_intr_p()); 265 1.22 cherry xenevt_event(port); 266 1.15 cherry ci->ci_ilevel = oipl; 267 1.15 cherry } else { 268 1.15 cherry /* set pending event */ 269 1.15 cherry xenevt_setipending(l1i, l2i); 270 1.15 cherry } 271 1.15 cherry } 272 1.15 cherry #endif 273 1.15 cherry } 274 1.15 cherry 275 1.2 bouyer void 276 1.2 bouyer do_hypervisor_callback(struct intrframe *regs) 277 1.2 bouyer { 278 1.2 bouyer volatile shared_info_t *s = HYPERVISOR_shared_info; 279 1.2 bouyer struct cpu_info *ci; 280 1.8 cegger volatile struct vcpu_info *vci; 281 1.45 knakahar uint64_t level __diagused; 282 1.2 bouyer 283 1.2 bouyer ci = curcpu(); 284 1.8 cegger vci = ci->ci_vcpu; 285 1.2 bouyer level = ci->ci_ilevel; 286 1.2 bouyer 287 1.31 cherry /* Save trapframe for clock handler */ 288 1.31 cherry KASSERT(regs != NULL); 289 1.33 kre ci->ci_xen_clockf_usermode = USERMODE(regs->_INTRFRAME_CS); 290 1.33 kre ci->ci_xen_clockf_pc = regs->_INTRFRAME_IP; 291 1.31 cherry 292 1.2 bouyer // DDD printf("do_hypervisor_callback\n"); 293 1.2 bouyer 294 1.2 bouyer #ifdef EARLY_DEBUG_EVENT 295 1.2 bouyer if (xen_atomic_test_bit(&s->evtchn_pending[0], debug_port)) { 296 1.2 bouyer xen_debug_handler(NULL); 297 1.2 bouyer xen_atomic_clear_bit(&s->evtchn_pending[0], debug_port); 298 1.2 bouyer } 299 1.2 bouyer #endif 300 1.2 bouyer 301 1.8 cegger while (vci->evtchn_upcall_pending) { 302 1.8 cegger vci->evtchn_upcall_pending = 0; 303 1.2 bouyer 304 1.27 bouyer evt_iterate_bits(&vci->evtchn_pending_sel, 305 1.15 cherry s->evtchn_pending, s->evtchn_mask, 306 1.15 cherry evt_do_hypervisor_callback, regs); 307 1.2 bouyer } 308 1.2 bouyer 309 1.2 bouyer #ifdef DIAGNOSTIC 310 1.2 bouyer if (level != ci->ci_ilevel) 311 1.45 knakahar printf("hypervisor done %08x level %" PRIu64 "/%" PRIu64 " ipending %0" PRIx64 "\n", 312 1.8 cegger (uint)vci->evtchn_pending_sel, 313 1.45 knakahar level, (uint64_t)ci->ci_ilevel, (uint64_t)ci->ci_ipending); 314 1.2 bouyer #endif 315 1.2 bouyer } 316 1.2 bouyer 317 1.38 bouyer #if 0 318 1.2 bouyer void 319 1.17 cherry hypervisor_send_event(struct cpu_info *ci, unsigned int ev) 320 1.17 cherry { 321 1.17 cherry KASSERT(ci != NULL); 322 1.17 cherry 323 1.17 cherry volatile shared_info_t *s = HYPERVISOR_shared_info; 324 1.17 cherry volatile struct vcpu_info *vci = ci->ci_vcpu; 325 1.17 cherry 326 1.17 cherry #ifdef PORT_DEBUG 327 1.17 cherry if (ev == PORT_DEBUG) 328 1.17 cherry printf("hypervisor_send_event %d\n", ev); 329 1.17 cherry #endif 330 1.17 cherry 331 1.17 cherry xen_atomic_set_bit(&s->evtchn_pending[0], ev); 332 1.17 cherry 333 1.21 cherry if (__predict_false(ci == curcpu())) { 334 1.20 cherry xen_atomic_set_bit(&vci->evtchn_pending_sel, 335 1.20 cherry ev >> LONG_SHIFT); 336 1.20 cherry xen_atomic_set_bit(&vci->evtchn_upcall_pending, 0); 337 1.20 cherry } 338 1.17 cherry 339 1.17 cherry xen_atomic_clear_bit(&s->evtchn_mask[0], ev); 340 1.17 cherry 341 1.17 cherry if (__predict_true(ci == curcpu())) { 342 1.17 cherry hypervisor_force_callback(); 343 1.17 cherry } else { 344 1.18 bouyer if (__predict_false(xen_send_ipi(ci, XEN_IPI_HVCB))) { 345 1.38 bouyer panic("xen_send_ipi(cpu%d id %d, XEN_IPI_HVCB) failed\n", 346 1.38 bouyer (int) ci->ci_cpuid, ci->ci_vcpuid); 347 1.17 cherry } 348 1.17 cherry } 349 1.17 cherry } 350 1.38 bouyer #endif 351 1.17 cherry 352 1.17 cherry void 353 1.2 bouyer hypervisor_unmask_event(unsigned int ev) 354 1.2 bouyer { 355 1.30 cherry 356 1.30 cherry KASSERT(ev > 0 && ev < NR_EVENT_CHANNELS); 357 1.8 cegger 358 1.2 bouyer #ifdef PORT_DEBUG 359 1.2 bouyer if (ev == PORT_DEBUG) 360 1.2 bouyer printf("hypervisor_unmask_event %d\n", ev); 361 1.2 bouyer #endif 362 1.2 bouyer 363 1.30 cherry /* Xen unmasks the evtchn_mask[0]:ev bit for us. */ 364 1.30 cherry evtchn_op_t op; 365 1.30 cherry op.cmd = EVTCHNOP_unmask; 366 1.30 cherry op.u.unmask.port = ev; 367 1.30 cherry if (HYPERVISOR_event_channel_op(&op) != 0) 368 1.30 cherry panic("Failed to unmask event %d\n", ev); 369 1.18 bouyer 370 1.30 cherry return; 371 1.2 bouyer } 372 1.2 bouyer 373 1.2 bouyer void 374 1.2 bouyer hypervisor_mask_event(unsigned int ev) 375 1.2 bouyer { 376 1.2 bouyer volatile shared_info_t *s = HYPERVISOR_shared_info; 377 1.2 bouyer #ifdef PORT_DEBUG 378 1.2 bouyer if (ev == PORT_DEBUG) 379 1.2 bouyer printf("hypervisor_mask_event %d\n", ev); 380 1.2 bouyer #endif 381 1.2 bouyer 382 1.2 bouyer xen_atomic_set_bit(&s->evtchn_mask[0], ev); 383 1.2 bouyer } 384 1.2 bouyer 385 1.2 bouyer void 386 1.2 bouyer hypervisor_clear_event(unsigned int ev) 387 1.2 bouyer { 388 1.2 bouyer volatile shared_info_t *s = HYPERVISOR_shared_info; 389 1.2 bouyer #ifdef PORT_DEBUG 390 1.2 bouyer if (ev == PORT_DEBUG) 391 1.2 bouyer printf("hypervisor_clear_event %d\n", ev); 392 1.2 bouyer #endif 393 1.2 bouyer 394 1.2 bouyer xen_atomic_clear_bit(&s->evtchn_pending[0], ev); 395 1.2 bouyer } 396 1.2 bouyer 397 1.15 cherry static inline void 398 1.27 bouyer evt_enable_event(unsigned int port, unsigned int l1i, 399 1.27 bouyer unsigned int l2i, void *args) 400 1.15 cherry { 401 1.15 cherry KASSERT(args == NULL); 402 1.29 cherry hypervisor_unmask_event(port); 403 1.40 bouyer #if defined(XENPV) && (NPCI > 0 || NISA > 0) 404 1.40 bouyer hypervisor_ack_pirq_event(port); 405 1.40 bouyer #endif /* NPCI > 0 || NISA > 0 */ 406 1.15 cherry } 407 1.15 cherry 408 1.2 bouyer void 409 1.38 bouyer hypervisor_enable_sir(unsigned int sir) 410 1.2 bouyer { 411 1.2 bouyer struct cpu_info *ci = curcpu(); 412 1.2 bouyer 413 1.2 bouyer /* 414 1.2 bouyer * enable all events for ipl. As we only set an event in ipl_evt_mask 415 1.2 bouyer * for its lowest IPL, and pending IPLs are processed high to low, 416 1.2 bouyer * we know that all callback for this event have been processed. 417 1.2 bouyer */ 418 1.2 bouyer 419 1.38 bouyer evt_iterate_bits(&ci->ci_isources[sir]->ipl_evt_mask1, 420 1.38 bouyer ci->ci_isources[sir]->ipl_evt_mask2, NULL, 421 1.15 cherry evt_enable_event, NULL); 422 1.2 bouyer 423 1.2 bouyer } 424 1.2 bouyer 425 1.2 bouyer void 426 1.45 knakahar hypervisor_set_ipending(uint64_t imask, int l1, int l2) 427 1.2 bouyer { 428 1.27 bouyer 429 1.27 bouyer /* This function is not re-entrant */ 430 1.27 bouyer KASSERT(x86_read_psl() != 0); 431 1.27 bouyer 432 1.38 bouyer int sir; 433 1.27 bouyer struct cpu_info *ci = curcpu(); 434 1.2 bouyer 435 1.2 bouyer /* set pending bit for the appropriate IPLs */ 436 1.38 bouyer ci->ci_ipending |= imask; 437 1.2 bouyer 438 1.2 bouyer /* 439 1.2 bouyer * And set event pending bit for the lowest IPL. As IPL are handled 440 1.2 bouyer * from high to low, this ensure that all callbacks will have been 441 1.2 bouyer * called when we ack the event 442 1.2 bouyer */ 443 1.38 bouyer sir = ffs(imask); 444 1.38 bouyer KASSERT(sir > SIR_XENIPL_VM); 445 1.38 bouyer sir--; 446 1.38 bouyer KASSERT(sir <= SIR_XENIPL_HIGH); 447 1.38 bouyer KASSERT(ci->ci_isources[sir] != NULL); 448 1.38 bouyer ci->ci_isources[sir]->ipl_evt_mask1 |= 1UL << l1; 449 1.38 bouyer ci->ci_isources[sir]->ipl_evt_mask2[l1] |= 1UL << l2; 450 1.38 bouyer KASSERT(ci == curcpu()); 451 1.38 bouyer #if 0 452 1.25 bouyer if (__predict_false(ci != curcpu())) { 453 1.25 bouyer if (xen_send_ipi(ci, XEN_IPI_HVCB)) { 454 1.25 bouyer panic("hypervisor_set_ipending: " 455 1.38 bouyer "xen_send_ipi(cpu%d id %d, XEN_IPI_HVCB) failed\n", 456 1.38 bouyer (int) ci->ci_cpuid, ci->ci_vcpuid); 457 1.25 bouyer } 458 1.25 bouyer } 459 1.38 bouyer #endif 460 1.2 bouyer } 461 1.10 bouyer 462 1.10 bouyer void 463 1.12 cegger hypervisor_machdep_attach(void) 464 1.12 cegger { 465 1.35 cherry #ifdef XENPV 466 1.10 bouyer /* dom0 does not require the arch-dependent P2M translation table */ 467 1.16 jym if (!xendomain_is_dom0()) { 468 1.10 bouyer build_p2m_frame_list_list(); 469 1.16 jym sysctl_xen_suspend_setup(); 470 1.10 bouyer } 471 1.35 cherry #endif 472 1.10 bouyer } 473 1.10 bouyer 474 1.16 jym void 475 1.16 jym hypervisor_machdep_resume(void) 476 1.16 jym { 477 1.35 cherry #ifdef XENPV 478 1.16 jym /* dom0 does not require the arch-dependent P2M translation table */ 479 1.16 jym if (!xendomain_is_dom0()) 480 1.16 jym update_p2m_frame_list_list(); 481 1.35 cherry #endif 482 1.16 jym } 483 1.16 jym 484 1.38 bouyer /* 485 1.38 bouyer * idle_block() 486 1.38 bouyer * 487 1.38 bouyer * Called from the idle loop when we have nothing to do but wait 488 1.38 bouyer * for an interrupt. 489 1.38 bouyer */ 490 1.38 bouyer static void 491 1.38 bouyer idle_block(void) 492 1.38 bouyer { 493 1.38 bouyer KASSERT(curcpu()->ci_ipending == 0); 494 1.38 bouyer HYPERVISOR_block(); 495 1.38 bouyer KASSERT(curcpu()->ci_ipending == 0); 496 1.38 bouyer } 497 1.38 bouyer 498 1.38 bouyer void 499 1.38 bouyer x86_cpu_idle_xen(void) 500 1.38 bouyer { 501 1.38 bouyer struct cpu_info *ci = curcpu(); 502 1.38 bouyer 503 1.38 bouyer KASSERT(ci->ci_ilevel == IPL_NONE); 504 1.38 bouyer 505 1.38 bouyer x86_disable_intr(); 506 1.38 bouyer if (__predict_false(!ci->ci_want_resched)) { 507 1.38 bouyer idle_block(); 508 1.38 bouyer } else { 509 1.38 bouyer x86_enable_intr(); 510 1.38 bouyer } 511 1.38 bouyer } 512 1.38 bouyer 513 1.35 cherry #ifdef XENPV 514 1.10 bouyer /* 515 1.10 bouyer * Generate the p2m_frame_list_list table, 516 1.10 bouyer * needed for guest save/restore 517 1.10 bouyer */ 518 1.10 bouyer static void 519 1.12 cegger build_p2m_frame_list_list(void) 520 1.12 cegger { 521 1.10 bouyer int fpp; /* number of page (frame) pointer per page */ 522 1.10 bouyer unsigned long max_pfn; 523 1.10 bouyer /* 524 1.10 bouyer * The p2m list is composed of three levels of indirection, 525 1.10 bouyer * each layer containing MFNs pointing to lower level pages 526 1.10 bouyer * The indirection is used to convert a given PFN to its MFN 527 1.10 bouyer * Each N level page can point to @fpp (N-1) level pages 528 1.10 bouyer * For example, for x86 32bit, we have: 529 1.10 bouyer * - PAGE_SIZE: 4096 bytes 530 1.10 bouyer * - fpp: 1024 (one L3 page can address 1024 L2 pages) 531 1.10 bouyer * A L1 page contains the list of MFN we are looking for 532 1.10 bouyer */ 533 1.10 bouyer max_pfn = xen_start_info.nr_pages; 534 1.14 jym fpp = PAGE_SIZE / sizeof(xen_pfn_t); 535 1.10 bouyer 536 1.10 bouyer /* we only need one L3 page */ 537 1.14 jym l3_p2m_page = (vaddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 538 1.14 jym PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_NOWAIT); 539 1.10 bouyer if (l3_p2m_page == NULL) 540 1.10 bouyer panic("could not allocate memory for l3_p2m_page"); 541 1.10 bouyer 542 1.10 bouyer /* 543 1.10 bouyer * Determine how many L2 pages we need for the mapping 544 1.10 bouyer * Each L2 can map a total of @fpp L1 pages 545 1.10 bouyer */ 546 1.10 bouyer l2_p2m_page_size = howmany(max_pfn, fpp); 547 1.10 bouyer 548 1.14 jym l2_p2m_page = (vaddr_t *)uvm_km_alloc(kernel_map, 549 1.14 jym l2_p2m_page_size * PAGE_SIZE, 550 1.14 jym PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_NOWAIT); 551 1.10 bouyer if (l2_p2m_page == NULL) 552 1.10 bouyer panic("could not allocate memory for l2_p2m_page"); 553 1.10 bouyer 554 1.10 bouyer /* We now have L3 and L2 pages ready, update L1 mapping */ 555 1.10 bouyer update_p2m_frame_list_list(); 556 1.10 bouyer 557 1.10 bouyer } 558 1.10 bouyer 559 1.10 bouyer /* 560 1.10 bouyer * Update the L1 p2m_frame_list_list mapping (during guest boot or resume) 561 1.10 bouyer */ 562 1.10 bouyer static void 563 1.12 cegger update_p2m_frame_list_list(void) 564 1.12 cegger { 565 1.10 bouyer int i; 566 1.10 bouyer int fpp; /* number of page (frame) pointer per page */ 567 1.10 bouyer unsigned long max_pfn; 568 1.10 bouyer 569 1.10 bouyer max_pfn = xen_start_info.nr_pages; 570 1.14 jym fpp = PAGE_SIZE / sizeof(xen_pfn_t); 571 1.10 bouyer 572 1.10 bouyer for (i = 0; i < l2_p2m_page_size; i++) { 573 1.10 bouyer /* 574 1.10 bouyer * Each time we start a new L2 page, 575 1.10 bouyer * store its MFN in the L3 page 576 1.10 bouyer */ 577 1.10 bouyer if ((i % fpp) == 0) { 578 1.10 bouyer l3_p2m_page[i/fpp] = vtomfn( 579 1.10 bouyer (vaddr_t)&l2_p2m_page[i]); 580 1.10 bouyer } 581 1.10 bouyer /* 582 1.10 bouyer * we use a shortcut 583 1.10 bouyer * since @xpmap_phys_to_machine_mapping array 584 1.10 bouyer * already contains PFN to MFN mapping, we just 585 1.10 bouyer * set the l2_p2m_page MFN pointer to the MFN of the 586 1.10 bouyer * according frame of @xpmap_phys_to_machine_mapping 587 1.10 bouyer */ 588 1.10 bouyer l2_p2m_page[i] = vtomfn((vaddr_t) 589 1.10 bouyer &xpmap_phys_to_machine_mapping[i*fpp]); 590 1.10 bouyer } 591 1.10 bouyer 592 1.10 bouyer HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = 593 1.10 bouyer vtomfn((vaddr_t)l3_p2m_page); 594 1.10 bouyer HYPERVISOR_shared_info->arch.max_pfn = max_pfn; 595 1.10 bouyer 596 1.10 bouyer } 597 1.35 cherry #endif /* XENPV */ 598 1.39 bouyer 599 1.39 bouyer void 600 1.39 bouyer xen_init_ksyms(void) 601 1.39 bouyer { 602 1.39 bouyer #if NKSYMS || defined(DDB) || defined(MODULAR) 603 1.39 bouyer extern int end; 604 1.39 bouyer extern int *esym; 605 1.39 bouyer #ifdef DDB 606 1.39 bouyer db_machine_init(); 607 1.39 bouyer #endif 608 1.39 bouyer 609 1.39 bouyer #ifdef XENPV 610 1.39 bouyer esym = xen_start_info.mod_start ? 611 1.39 bouyer (void *)xen_start_info.mod_start : 612 1.39 bouyer (void *)xen_start_info.mfn_list; 613 1.39 bouyer #endif /* XENPV */ 614 1.39 bouyer /* for PVH, esym is set in locore.S */ 615 1.39 bouyer ksyms_addsyms_elf(*(int *)(void *)&end, 616 1.39 bouyer ((int *)(void *)&end) + 1, esym); 617 1.39 bouyer #endif 618 1.39 bouyer } 619