Home | History | Annotate | Line # | Download | only in x86
      1 /*	$NetBSD: lapic.c,v 1.93 2025/05/02 07:08:11 imil Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2000, 2008, 2020 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by RedBack Networks Inc.
      9  *
     10  * Author: Bill Sommerfeld
     11  *
     12  * Redistribution and use in source and binary forms, with or without
     13  * modification, are permitted provided that the following conditions
     14  * are met:
     15  * 1. Redistributions of source code must retain the above copyright
     16  *    notice, this list of conditions and the following disclaimer.
     17  * 2. Redistributions in binary form must reproduce the above copyright
     18  *    notice, this list of conditions and the following disclaimer in the
     19  *    documentation and/or other materials provided with the distribution.
     20  *
     21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     31  * POSSIBILITY OF SUCH DAMAGE.
     32  */
     33 
     34 #include <sys/cdefs.h>
     35 __KERNEL_RCSID(0, "$NetBSD: lapic.c,v 1.93 2025/05/02 07:08:11 imil Exp $");
     36 
     37 #include "acpica.h"
     38 #include "ioapic.h"
     39 #include "opt_acpi.h"
     40 #include "opt_ddb.h"
     41 #include "opt_mpbios.h"		/* for MPDEBUG */
     42 #include "opt_multiprocessor.h"
     43 #include "opt_ntp.h"
     44 #include "opt_xen.h"
     45 
     46 
     47 #include <sys/param.h>
     48 #include <sys/proc.h>
     49 #include <sys/systm.h>
     50 #include <sys/device.h>
     51 #include <sys/timetc.h>
     52 
     53 #include <uvm/uvm_extern.h>
     54 
     55 #include <dev/ic/i8253reg.h>
     56 
     57 #include <x86/machdep.h>
     58 #include <machine/cpu.h>
     59 #include <machine/cpu_counter.h>
     60 #include <machine/cpufunc.h>
     61 #include <machine/cpuvar.h>
     62 #include <machine/pmap.h>
     63 #include <machine/vmparam.h>
     64 #include <machine/mpacpi.h>
     65 #include <machine/mpbiosvar.h>
     66 #include <machine/pcb.h>
     67 #include <machine/pmap_private.h>
     68 #include <machine/specialreg.h>
     69 #include <machine/segments.h>
     70 #include <x86/x86/tsc.h>
     71 #include <x86/i82093var.h>
     72 
     73 #include <machine/apicvar.h>
     74 #include <machine/i82489reg.h>
     75 #include <machine/i82489var.h>
     76 
     77 #ifndef XENPV
     78 #if NACPICA > 0
     79 #include <dev/acpi/acpica.h>
     80 #include <dev/acpi/acpivar.h>
     81 #endif
     82 
     83 #ifdef DDB
     84 #include <machine/db_machdep.h>
     85 #ifdef MULTIPROCESSOR
     86 #ifdef __x86_64__
     87 typedef void (vector)(void);
     88 extern vector Xintr_x2apic_ddbipi;
     89 extern int ddb_vec;
     90 #endif
     91 #endif
     92 #endif
     93 
     94 #include <dev/vmt/vmtreg.h>	/* for vmt_hvcall() */
     95 #include <dev/vmt/vmtvar.h>	/* for vmt_hvcall() */
     96 
     97 /* Referenced from vector.S */
     98 void		lapic_clockintr(void *, struct intrframe *);
     99 
    100 static void	lapic_delay(unsigned int);
    101 static uint32_t lapic_gettick(void);
    102 static void	lapic_setup_bsp(paddr_t);
    103 static void	lapic_map(paddr_t);
    104 
    105 static void lapic_hwmask(struct pic *, int);
    106 static void lapic_hwunmask(struct pic *, int);
    107 static void lapic_setup(struct pic *, struct cpu_info *, int, int, int);
    108 /* Make it public to call via ddb */
    109 void	lapic_dump(void);
    110 
    111 struct pic local_pic = {
    112 	.pic_name = "lapic",
    113 	.pic_type = PIC_LAPIC,
    114 	.pic_lock = __SIMPLELOCK_UNLOCKED,
    115 	.pic_hwmask = lapic_hwmask,
    116 	.pic_hwunmask = lapic_hwunmask,
    117 	.pic_addroute = lapic_setup,
    118 	.pic_delroute = lapic_setup,
    119 	.pic_intr_get_devname = x86_intr_get_devname,
    120 	.pic_intr_get_assigned = x86_intr_get_assigned,
    121 	.pic_intr_get_count = x86_intr_get_count,
    122 };
    123 
    124 static int i82489_ipi(int vec, int target, int dl);
    125 static int x2apic_ipi(int vec, int target, int dl);
    126 int (*x86_ipi)(int, int, int) = i82489_ipi;
    127 
    128 bool x2apic_mode __read_mostly;
    129 #ifdef LAPIC_ENABLE_X2APIC
    130 bool x2apic_enable = true;
    131 #else
    132 bool x2apic_enable = false;
    133 #endif
    134 bool lapic_from_cpuid = false;
    135 
    136 static bool lapic_broken_periodic __read_mostly;
    137 
    138 static uint32_t
    139 i82489_readreg(u_int reg)
    140 {
    141 	return *((volatile uint32_t *)(local_apic_va + reg));
    142 }
    143 
    144 static void
    145 i82489_writereg(u_int reg, uint32_t val)
    146 {
    147 	*((volatile uint32_t *)(local_apic_va + reg)) = val;
    148 }
    149 
    150 static uint32_t
    151 i82489_cpu_number(void)
    152 {
    153 	return i82489_readreg(LAPIC_ID) >> LAPIC_ID_SHIFT;
    154 }
    155 
    156 static uint32_t
    157 x2apic_readreg(u_int reg)
    158 {
    159 	return rdmsr(MSR_X2APIC_BASE + (reg >> 4));
    160 }
    161 
    162 static void
    163 x2apic_writereg(u_int reg, uint32_t val)
    164 {
    165 	x86_mfence();
    166 	wrmsr(MSR_X2APIC_BASE + (reg >> 4), val);
    167 }
    168 
    169 static void
    170 x2apic_writereg64(u_int reg, uint64_t val)
    171 {
    172 	KDASSERT(reg == LAPIC_ICRLO);
    173 	x86_mfence();
    174 	wrmsr(MSR_X2APIC_BASE + (reg >> 4), val);
    175 }
    176 
    177 static void
    178 x2apic_write_icr(uint32_t hi, uint32_t lo)
    179 {
    180 	x2apic_writereg64(LAPIC_ICRLO, ((uint64_t)hi << 32) | lo);
    181 }
    182 
    183 static uint32_t
    184 x2apic_cpu_number(void)
    185 {
    186 	return x2apic_readreg(LAPIC_ID);
    187 }
    188 
    189 uint32_t
    190 lapic_readreg(u_int reg)
    191 {
    192 	if (x2apic_mode)
    193 		return x2apic_readreg(reg);
    194 	return i82489_readreg(reg);
    195 }
    196 
    197 void
    198 lapic_writereg(u_int reg, uint32_t val)
    199 {
    200 	if (x2apic_mode)
    201 		x2apic_writereg(reg, val);
    202 	else
    203 		i82489_writereg(reg, val);
    204 }
    205 
    206 void
    207 lapic_write_tpri(uint32_t val)
    208 {
    209 
    210 	val &= LAPIC_TPRI_MASK;
    211 #ifdef i386
    212 	lapic_writereg(LAPIC_TPRI, val);
    213 #else
    214 	lcr8(val >> 4);
    215 #endif
    216 }
    217 
    218 uint32_t
    219 lapic_cpu_number(void)
    220 {
    221 	if (x2apic_mode)
    222 		return x2apic_cpu_number();
    223 	return i82489_cpu_number();
    224 }
    225 
    226 static void
    227 lapic_enable_x2apic(void)
    228 {
    229 	uint64_t apicbase;
    230 
    231 	apicbase = rdmsr(MSR_APICBASE);
    232 	if (!ISSET(apicbase, APICBASE_EN)) {
    233 		apicbase |= APICBASE_EN;
    234 		wrmsr(MSR_APICBASE, apicbase);
    235 	}
    236 	apicbase |= APICBASE_EXTD;
    237 	wrmsr(MSR_APICBASE, apicbase);
    238 }
    239 
    240 bool
    241 lapic_is_x2apic(void)
    242 {
    243 	uint64_t msr;
    244 
    245 	if (!ISSET(cpu_feature[0], CPUID_APIC) ||
    246 	    rdmsr_safe(MSR_APICBASE, &msr) == EFAULT)
    247 		return false;
    248 	return (msr & (APICBASE_EN | APICBASE_EXTD)) ==
    249 	    (APICBASE_EN | APICBASE_EXTD);
    250 }
    251 
    252 /*
    253  * Initialize the local APIC on the BSP.
    254  */
    255 static void
    256 lapic_setup_bsp(paddr_t lapic_base)
    257 {
    258 	u_int regs[6];
    259 	const char *reason = NULL;
    260 	const char *hw_vendor;
    261 	bool bios_x2apic;
    262 
    263 	if (ISSET(cpu_feature[1], CPUID2_X2APIC)) {
    264 #if NACPICA > 0
    265 		if (acpi_present) {
    266 			ACPI_TABLE_DMAR *dmar;
    267 			ACPI_STATUS status;
    268 
    269 			/*
    270 			 * Automatically detect several configurations where
    271 			 * x2APIC mode is known to cause troubles.  User can
    272 			 * override the setting with hw.x2apic_enable tunable.
    273 			 */
    274 			status = AcpiGetTable(ACPI_SIG_DMAR, 1,
    275 			    (ACPI_TABLE_HEADER **)&dmar);
    276 			if (ACPI_SUCCESS(status)) {
    277 				if (ISSET(dmar->Flags, ACPI_DMAR_X2APIC_OPT_OUT)) {
    278 					reason = "by DMAR table";
    279 				}
    280 				AcpiPutTable(&dmar->Header);
    281 			}
    282 		}
    283 #endif	/* NACPICA > 0 */
    284 		if (vm_guest == VM_GUEST_VMWARE) {
    285 			vmt_hvcall(VM_CMD_GET_VCPU_INFO, regs);
    286 			if (ISSET(regs[0], VCPUINFO_VCPU_RESERVED) ||
    287 			    !ISSET(regs[0], VCPUINFO_LEGACY_X2APIC))
    288 				reason = "inside VMWare without intr "
    289 				    "redirection";
    290 		} else if (vm_guest == VM_GUEST_XENHVM) {
    291 			reason = "due to running under XEN";
    292 		} else if (vm_guest == VM_GUEST_NO &&
    293 		    CPUID_TO_FAMILY(curcpu()->ci_signature) == 6 &&
    294 		    CPUID_TO_MODEL(curcpu()->ci_signature) == 0x2a) {
    295 			hw_vendor = pmf_get_platform("board-vendor");
    296 			if (hw_vendor != NULL) {
    297 				/*
    298 				 * It seems that some Lenovo and ASUS
    299 				 * SandyBridge-based notebook BIOSes have a bug
    300 				 * which prevents booting AP in x2APIC mode.
    301 				 * Since the only way to detect mobile CPU is
    302 				 * to check northbridge pci id, which cannot be
    303 				 * done that early, disable x2APIC for all
    304 				 * Lenovo and ASUS SandyBridge machines.
    305 				 */
    306 				if (strcmp(hw_vendor, "LENOVO") == 0 ||
    307 				    strcmp(hw_vendor, "ASUSTeK Computer Inc.") == 0) {
    308 					reason = "for a suspected SandyBridge "
    309 					    "BIOS bug";
    310 				}
    311 			}
    312 		}
    313 		bios_x2apic = lapic_is_x2apic();
    314 		if (reason != NULL && bios_x2apic) {
    315 			aprint_verbose("x2APIC should be disabled %s but "
    316 			    "already enabled by BIOS; enabling.\n", reason);
    317 			reason = NULL;
    318 		}
    319 		if (reason == NULL)
    320 			x2apic_mode = true;
    321 		else
    322 			aprint_verbose("x2APIC available but disabled %s\n",
    323 			    reason);
    324 		if (x2apic_enable != x2apic_mode) {
    325 			if (bios_x2apic && !x2apic_enable)
    326 				aprint_verbose("x2APIC disabled by user and "
    327 				    "enabled by BIOS; ignoring user setting.\n");
    328 			else
    329 				x2apic_mode = x2apic_enable;
    330 		}
    331 	}
    332 	if (x2apic_mode) {
    333 		x86_ipi = x2apic_ipi;
    334 #if NIOAPIC > 0
    335 		struct ioapic_softc *ioapic;
    336 		for (ioapic = ioapics; ioapic != NULL; ioapic = ioapic->sc_next) {
    337 			ioapic->sc_pic.pic_edge_stubs = x2apic_edge_stubs;
    338 			ioapic->sc_pic.pic_level_stubs = x2apic_level_stubs;
    339 		}
    340 #endif
    341 #if defined(DDB) && defined(MULTIPROCESSOR)
    342 #ifdef __x86_64__
    343 		struct idt_vec *iv = &(cpu_info_primary.ci_idtvec);
    344 		idt_descriptor_t *idt = iv->iv_idt;
    345 		set_idtgate(&idt[ddb_vec], &Xintr_x2apic_ddbipi, 1,
    346 		    SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
    347 #else
    348 		/*
    349 		 * Set DDB IPI handler in cpu_set_tss_gates() when cpu0 is
    350 		 * attached.
    351 		 */
    352 #endif
    353 #endif
    354 
    355 		x86_disable_intr();
    356 		lapic_enable_x2apic();
    357 #ifdef MULTIPROCESSOR
    358 		cpu_init_first();	/* Catch up to changed cpu_number() */
    359 #endif
    360 		lapic_write_tpri(0);
    361 		x86_enable_intr();
    362 	} else
    363 		lapic_map(lapic_base);
    364 }
    365 
    366 static void
    367 lapic_map(paddr_t lapic_base)
    368 {
    369 	pt_entry_t *pte;
    370 	vaddr_t va = local_apic_va;
    371 
    372 	/*
    373 	 * If the CPU has an APIC MSR, use it and ignore the supplied value:
    374 	 * some ACPI implementations have been observed to pass bad values.
    375 	 * Additionally, ensure that the lapic is enabled as we are committed
    376 	 * to using it at this point.  Be conservative and assume that the MSR
    377 	 * is not present on the Pentium (is it?).
    378 	 */
    379 	if (CPUID_TO_FAMILY(curcpu()->ci_signature) >= 6) {
    380 		lapic_base = (paddr_t)rdmsr(MSR_APICBASE);
    381 		if ((lapic_base & APICBASE_PHYSADDR) == 0) {
    382 			lapic_base |= LAPIC_BASE;
    383 		}
    384 		wrmsr(MSR_APICBASE, lapic_base | APICBASE_EN);
    385 		lapic_base &= APICBASE_PHYSADDR;
    386 	}
    387 
    388 	x86_disable_intr();
    389 
    390 	/*
    391 	 * Map local apic.  If we have a local apic, it's safe to assume
    392 	 * we're on a 486 or better and can use invlpg and non-cacheable PTE's
    393 	 *
    394 	 * Whap the PTE "by hand" rather than calling pmap_kenter_pa because
    395 	 * the latter will attempt to invoke TLB shootdown code just as we
    396 	 * might have changed the value of cpu_number()..
    397 	 */
    398 
    399 	pte = kvtopte(va);
    400 	*pte = lapic_base | PTE_W | PTE_P | PTE_PCD | pmap_pg_g | pmap_pg_nx;
    401 	invlpg(va);
    402 
    403 #ifdef MULTIPROCESSOR
    404 	cpu_init_first();	/* Catch up to changed cpu_number() */
    405 #endif
    406 
    407 	lapic_write_tpri(0);
    408 	x86_enable_intr();
    409 }
    410 
    411 /*
    412  * enable local apic
    413  */
    414 void
    415 lapic_enable(void)
    416 {
    417 	lapic_writereg(LAPIC_SVR, LAPIC_SVR_ENABLE | LAPIC_SPURIOUS_VECTOR);
    418 }
    419 
    420 void
    421 lapic_set_lvt(void)
    422 {
    423 	struct cpu_info *ci = curcpu();
    424 	int i;
    425 	struct mp_intr_map *mpi;
    426 	uint32_t lint0, lint1;
    427 
    428 #ifdef MULTIPROCESSOR
    429 	if (mp_verbose) {
    430 		apic_format_redir(device_xname(ci->ci_dev), "prelint", 0,
    431 		    APIC_VECTYPE_LAPIC_LVT, 0, lapic_readreg(LAPIC_LVT_LINT0));
    432 		apic_format_redir(device_xname(ci->ci_dev), "prelint", 1,
    433 		    APIC_VECTYPE_LAPIC_LVT, 0, lapic_readreg(LAPIC_LVT_LINT1));
    434 	}
    435 #endif
    436 
    437 	/*
    438 	 * If an I/O APIC has been attached, assume that it is used instead of
    439 	 * the 8259A for interrupt delivery.  Otherwise request the LAPIC to
    440 	 * get external interrupts via LINT0 for the primary CPU.
    441 	 */
    442 	lint0 = LAPIC_DLMODE_EXTINT;
    443 	if (nioapics > 0 || !CPU_IS_PRIMARY(curcpu()))
    444 		lint0 |= LAPIC_LVT_MASKED;
    445 	lapic_writereg(LAPIC_LVT_LINT0, lint0);
    446 
    447 	/*
    448 	 * Non Maskable Interrupts are to be delivered to the primary CPU.
    449 	 */
    450 	lint1 = LAPIC_DLMODE_NMI;
    451 	if (!CPU_IS_PRIMARY(curcpu()))
    452 		lint1 |= LAPIC_LVT_MASKED;
    453 	lapic_writereg(LAPIC_LVT_LINT1, lint1);
    454 
    455 	for (i = 0; i < mp_nintr; i++) {
    456 		mpi = &mp_intrs[i];
    457 		if (mpi->ioapic == NULL && (mpi->cpu_id == MPS_ALL_APICS ||
    458 		    mpi->cpu_id == ci->ci_cpuid)) {
    459 			if (mpi->ioapic_pin > 1)
    460 				aprint_error_dev(ci->ci_dev,
    461 				    "%s: WARNING: bad pin value %d\n",
    462 				    __func__, mpi->ioapic_pin);
    463 			if (mpi->ioapic_pin == 0)
    464 				lapic_writereg(LAPIC_LVT_LINT0, mpi->redir);
    465 			else
    466 				lapic_writereg(LAPIC_LVT_LINT1, mpi->redir);
    467 		}
    468 	}
    469 
    470 #ifdef MULTIPROCESSOR
    471 	if (mp_verbose)
    472 		lapic_dump();
    473 #endif
    474 }
    475 
    476 /*
    477  * Initialize fixed idt vectors for use by local apic.
    478  */
    479 void
    480 lapic_boot_init(paddr_t lapic_base)
    481 {
    482 	struct idt_vec *iv = &(cpu_info_primary.ci_idtvec);
    483 
    484 	lapic_setup_bsp(lapic_base);
    485 
    486 #ifdef MULTIPROCESSOR
    487 	idt_vec_reserve(iv, LAPIC_IPI_VECTOR);
    488 	idt_vec_set(iv, LAPIC_IPI_VECTOR,
    489 	    x2apic_mode ? Xintr_x2apic_ipi : Xintr_lapic_ipi);
    490 
    491 	idt_vec_reserve(iv, LAPIC_TLB_VECTOR);
    492 	idt_vec_set(iv, LAPIC_TLB_VECTOR,
    493 	    x2apic_mode ? Xintr_x2apic_tlb : Xintr_lapic_tlb);
    494 #endif
    495 	idt_vec_reserve(iv, LAPIC_SPURIOUS_VECTOR);
    496 	idt_vec_set(iv, LAPIC_SPURIOUS_VECTOR, Xintrspurious);
    497 
    498 	idt_vec_reserve(iv, LAPIC_TIMER_VECTOR);
    499 	idt_vec_set(iv, LAPIC_TIMER_VECTOR,
    500 	    x2apic_mode ? Xintr_x2apic_ltimer : Xintr_lapic_ltimer);
    501 }
    502 
    503 static uint32_t
    504 lapic_gettick(void)
    505 {
    506 	return lapic_readreg(LAPIC_CCR_TIMER);
    507 }
    508 
    509 #include <sys/kernel.h>		/* for hz */
    510 
    511 uint32_t lapic_tval;
    512 
    513 /*
    514  * this gets us up to a 4GHz busclock....
    515  */
    516 uint32_t lapic_per_second;
    517 uint32_t lapic_frac_usec_per_cycle;
    518 uint64_t lapic_frac_cycle_per_usec;
    519 uint32_t lapic_delaytab[26];
    520 
    521 static u_int
    522 lapic_get_timecount(struct timecounter *tc)
    523 {
    524 	struct cpu_info *ci;
    525 	uint32_t cur_timer;
    526 	int s;
    527 
    528 	s = splhigh();
    529 	ci = curcpu();
    530 
    531 	/*
    532 	 * Check for a race against the clockinterrupt.
    533 	 * The update of ci_lapic_counter is blocked by splhigh() and
    534 	 * the check for a pending clockinterrupt compensates for that.
    535 	 *
    536 	 * If the current tick is almost the Initial Counter, explicitly
    537 	 * check for the pending interrupt bit as the interrupt delivery
    538 	 * could be asynchronous and compensate as well.
    539 	 *
    540 	 * This can't be done without splhigh() as the calling code might
    541 	 * have masked the clockinterrupt already.
    542 	 *
    543 	 * This code assumes that clockinterrupts are not missed.
    544 	 */
    545 	cur_timer = lapic_gettick();
    546 	if (cur_timer >= lapic_tval - 1) {
    547 		uint16_t reg = LAPIC_IRR + LAPIC_TIMER_VECTOR / 32 * 16;
    548 
    549 		if (lapic_readreg(reg) & (1 << (LAPIC_TIMER_VECTOR % 32))) {
    550 			cur_timer -= lapic_tval;
    551 		}
    552 	} else if (ci->ci_ipending & (1ULL << LIR_TIMER))
    553 		cur_timer = lapic_gettick() - lapic_tval;
    554 	cur_timer = ci->ci_lapic_counter - cur_timer;
    555 	splx(s);
    556 
    557 	return cur_timer;
    558 }
    559 
    560 static struct timecounter lapic_timecounter = {
    561 	.tc_get_timecount = lapic_get_timecount,
    562 	.tc_counter_mask = ~0u,
    563 	.tc_name = "lapic",
    564 	.tc_quality =
    565 #ifndef MULTIPROCESSOR
    566 	    2100,
    567 #else
    568 	    -100, /* per CPU state */
    569 #endif
    570 };
    571 
    572 extern u_int i8254_get_timecount(struct timecounter *);
    573 
    574 void
    575 lapic_clockintr(void *arg, struct intrframe *frame)
    576 {
    577 	struct cpu_info *ci = curcpu();
    578 
    579 	ci->ci_lapic_counter += lapic_tval;
    580 	ci->ci_isources[LIR_TIMER]->is_evcnt.ev_count++;
    581 	hardclock((struct clockframe *)frame);
    582 }
    583 
    584 void
    585 lapic_reset(void)
    586 {
    587 
    588 	/*
    589 	 * Mask the clock interrupt and set mode,
    590 	 * then set divisor,
    591 	 * then unmask and set the vector.
    592 	 */
    593 	lapic_writereg(LAPIC_LVT_TIMER,
    594 	    LAPIC_LVT_TMM_PERIODIC | LAPIC_LVT_MASKED);
    595 	lapic_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1);
    596 	lapic_writereg(LAPIC_ICR_TIMER, lapic_tval);
    597 	lapic_writereg(LAPIC_LVT_TIMER,
    598 	    LAPIC_LVT_TMM_PERIODIC | LAPIC_TIMER_VECTOR);
    599 	lapic_writereg(LAPIC_EOI, 0);
    600 }
    601 
    602 static void
    603 lapic_initclock(void)
    604 {
    605 
    606 	if (curcpu() == &cpu_info_primary) {
    607 		/*
    608 		 * Recalibrate the timer using the cycle counter, now that
    609 		 * the cycle counter itself has been recalibrated.
    610 		 *
    611 		 * Not needed when lapic_per_second is read from CPUID.
    612 		 */
    613 		if (!lapic_from_cpuid)
    614 			lapic_calibrate_timer(true);
    615 		/*
    616 		 * Hook up time counter.  This assume that all LAPICs have
    617 		 * the same frequency.
    618 		 */
    619 		lapic_timecounter.tc_frequency = lapic_per_second;
    620 		tc_init(&lapic_timecounter);
    621 	}
    622 
    623 	/* Start local apic countdown timer running, in repeated mode. */
    624 	lapic_reset();
    625 }
    626 
    627 /*
    628  * Calibrate the local apic count-down timer (which is running at
    629  * bus-clock speed) vs. the i8254 counter/timer (which is running at
    630  * a fixed rate).
    631  *
    632  * The Intel MP spec says: "An MP operating system may use the IRQ8
    633  * real-time clock as a reference to determine the actual APIC timer clock
    634  * speed."
    635  *
    636  * We're actually using the IRQ0 timer.  Hmm.
    637  */
    638 void
    639 lapic_calibrate_timer(bool secondpass)
    640 {
    641 	struct cpu_info *ci = curcpu();
    642 	uint64_t tmp;
    643 	int i;
    644 	char tbuf[9];
    645 
    646 	KASSERT(ci == &cpu_info_primary);
    647 
    648 	aprint_debug_dev(ci->ci_dev, "[re]calibrating local timer\n");
    649 
    650 	/*
    651 	 * Configure timer to one-shot, interrupt masked,
    652 	 * large positive number.
    653 	 */
    654 	x86_disable_intr();
    655 	lapic_writereg(LAPIC_LVT_TIMER, LAPIC_LVT_MASKED);
    656 	lapic_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1);
    657 	lapic_writereg(LAPIC_ICR_TIMER, 0x80000000);
    658 	(void)lapic_gettick();
    659 
    660 	if (secondpass && cpu_hascounter()) {
    661 		/*
    662 		 * Second pass calibration, using the TSC which has ideally
    663 		 * been calibrated using the HPET or information gleaned
    664 		 * from MSRs by this point.
    665 		 */
    666 		uint64_t l0, l1, t0, t1;
    667 
    668 		(void)cpu_counter();
    669 		t0 = cpu_counter();
    670 		l0 = lapic_gettick();
    671 		t0 += cpu_counter();
    672 		DELAY(50000);
    673 		t1 = cpu_counter();
    674 		l1 = lapic_gettick();
    675 		t1 += cpu_counter();
    676 
    677 		tmp = (l0 - l1) * cpu_frequency(ci) / ((t1 - t0 + 1) / 2);
    678 		lapic_per_second = rounddown(tmp + 500, 1000);
    679 	} else if (lapic_per_second == 0) {
    680 		/*
    681 		 * Inaccurate first pass calibration using the i8254.
    682 		 */
    683 		unsigned int seen, delta, initial_i8254, initial_lapic;
    684 		unsigned int cur_i8254, cur_lapic;
    685 
    686 		(void)gettick();
    687 		initial_lapic = lapic_gettick();
    688 		initial_i8254 = gettick();
    689 		for (seen = 0; seen < TIMER_FREQ / 100; seen += delta) {
    690 			cur_i8254 = gettick();
    691 			if (cur_i8254 > initial_i8254)
    692 				delta = x86_rtclock_tval - (cur_i8254 - initial_i8254);
    693 			else
    694 				delta = initial_i8254 - cur_i8254;
    695 			initial_i8254 = cur_i8254;
    696 		}
    697 		cur_lapic = lapic_gettick();
    698 		tmp = initial_lapic - cur_lapic;
    699 		lapic_per_second = (tmp * TIMER_FREQ + seen / 2) / seen;
    700 	}
    701 	x86_enable_intr();
    702 
    703 	humanize_number(tbuf, sizeof(tbuf), lapic_per_second, "Hz", 1000);
    704 	aprint_debug_dev(ci->ci_dev, "apic clock running at %s\n", tbuf);
    705 
    706 	if (lapic_per_second != 0) {
    707 		/*
    708 		 * reprogram the apic timer to run in periodic mode.
    709 		 * XXX need to program timer on other CPUs, too.
    710 		 */
    711 		lapic_tval = (lapic_per_second * 2) / hz;
    712 		lapic_tval = (lapic_tval / 2) + (lapic_tval & 0x1);
    713 
    714 		lapic_writereg(LAPIC_LVT_TIMER, LAPIC_LVT_TMM_PERIODIC
    715 		    | LAPIC_LVT_MASKED | LAPIC_TIMER_VECTOR);
    716 		lapic_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1);
    717 		lapic_writereg(LAPIC_ICR_TIMER, lapic_tval);
    718 
    719 		/*
    720 		 * Compute fixed-point ratios between cycles and
    721 		 * microseconds to avoid having to do any division
    722 		 * in lapic_delay.
    723 		 */
    724 
    725 		tmp = (1000000 * (uint64_t)1 << 32) / lapic_per_second;
    726 		lapic_frac_usec_per_cycle = tmp;
    727 
    728 		tmp = (lapic_per_second * (uint64_t)1 << 32) / 1000000;
    729 
    730 		lapic_frac_cycle_per_usec = tmp;
    731 
    732 		/*
    733 		 * Compute delay in cycles for likely short delays in usec.
    734 		 */
    735 		for (i = 0; i < 26; i++)
    736 			lapic_delaytab[i] = (lapic_frac_cycle_per_usec * i) >>
    737 			    32;
    738 
    739 		/*
    740 		 * Apply workaround for broken periodic timer under KVM
    741 		 */
    742 		if (vm_guest == VM_GUEST_KVM) {
    743 			lapic_broken_periodic = true;
    744 			lapic_timecounter.tc_quality = -100;
    745 			aprint_debug_dev(ci->ci_dev,
    746 			    "applying KVM timer workaround\n");
    747 		}
    748 
    749 		/*
    750 		 * Now that the timer's calibrated, use the apic timer routines
    751 		 * for all our timing needs..
    752 		 */
    753 		if (!secondpass) {
    754 			delay_func = lapic_delay;
    755 			x86_initclock_func = lapic_initclock;
    756 			initrtclock(0);
    757 		}
    758 	}
    759 }
    760 
    761 /*
    762  * delay for N usec.
    763  */
    764 
    765 static void
    766 lapic_delay(unsigned int usec)
    767 {
    768 	int32_t xtick, otick;
    769 	int64_t deltat;
    770 
    771 	/* XXX Bad to disable preemption, but it's tied to the cpu. */
    772 	kpreempt_disable();
    773 	otick = lapic_gettick();
    774 
    775 	if (usec <= 0) {
    776 		kpreempt_enable();
    777 		return;
    778 	}
    779 
    780 	if (usec <= 25)
    781 		deltat = lapic_delaytab[usec];
    782 	else
    783 		deltat = (lapic_frac_cycle_per_usec * usec) >> 32;
    784 
    785 	while (deltat > 0) {
    786 		xtick = lapic_gettick();
    787 		if (lapic_broken_periodic && xtick == 0 && otick == 0) {
    788 			lapic_reset();
    789 			xtick = lapic_gettick();
    790 			if (xtick == 0)
    791 				panic("lapic timer stopped ticking");
    792 		}
    793 		if (xtick > otick)
    794 			deltat -= lapic_tval - (xtick - otick);
    795 		else
    796 			deltat -= otick - xtick;
    797 		otick = xtick;
    798 
    799 		x86_pause();
    800 	}
    801 	kpreempt_enable();
    802 }
    803 
    804 /*
    805  * XXX the following belong mostly or partly elsewhere..
    806  */
    807 
    808 static void
    809 i82489_icr_wait(void)
    810 {
    811 #ifdef DIAGNOSTIC
    812 	unsigned j = 100000;
    813 #endif /* DIAGNOSTIC */
    814 
    815 	while ((i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) != 0) {
    816 		x86_pause();
    817 #ifdef DIAGNOSTIC
    818 		j--;
    819 		if (j == 0)
    820 			panic("i82489_icr_wait: busy");
    821 #endif /* DIAGNOSTIC */
    822 	}
    823 }
    824 
    825 static int
    826 i82489_ipi_init(int target)
    827 {
    828 	uint32_t esr;
    829 
    830 	i82489_writereg(LAPIC_ESR, 0);
    831 	(void)i82489_readreg(LAPIC_ESR);
    832 
    833 	i82489_writereg(LAPIC_ICRHI, target << LAPIC_ID_SHIFT);
    834 
    835 	i82489_writereg(LAPIC_ICRLO, LAPIC_DLMODE_INIT | LAPIC_LEVEL_ASSERT);
    836 	i82489_icr_wait();
    837 	delay_func(10000);
    838 	i82489_writereg(LAPIC_ICRLO,
    839 	    LAPIC_DLMODE_INIT | LAPIC_TRIGMODE_LEVEL | LAPIC_LEVEL_DEASSERT);
    840 	i82489_icr_wait();
    841 
    842 	if ((i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) != 0)
    843 		return EBUSY;
    844 
    845 	esr = i82489_readreg(LAPIC_ESR);
    846 	if (esr != 0)
    847 		aprint_debug("%s: ESR %08x\n", __func__, esr);
    848 
    849 	return 0;
    850 }
    851 
    852 static int
    853 i82489_ipi_startup(int target, int vec)
    854 {
    855 	uint32_t esr;
    856 
    857 	i82489_writereg(LAPIC_ESR, 0);
    858 	(void)i82489_readreg(LAPIC_ESR);
    859 
    860 	i82489_icr_wait();
    861 	i82489_writereg(LAPIC_ICRHI, target << LAPIC_ID_SHIFT);
    862 	i82489_writereg(LAPIC_ICRLO, vec | LAPIC_DLMODE_STARTUP |
    863 	    LAPIC_LEVEL_ASSERT);
    864 	i82489_icr_wait();
    865 
    866 	if ((i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) != 0)
    867 		return EBUSY;
    868 
    869 	esr = i82489_readreg(LAPIC_ESR);
    870 	if (esr != 0)
    871 		aprint_debug("%s: ESR %08x\n", __func__, esr);
    872 
    873 	return 0;
    874 }
    875 
    876 static int
    877 i82489_ipi(int vec, int target, int dl)
    878 {
    879 	int result, s;
    880 
    881 	s = splhigh();
    882 
    883 	i82489_icr_wait();
    884 
    885 	if ((target & LAPIC_DEST_MASK) == 0)
    886 		i82489_writereg(LAPIC_ICRHI, target << LAPIC_ID_SHIFT);
    887 
    888 	i82489_writereg(LAPIC_ICRLO,
    889 	    (target & LAPIC_DEST_MASK) | vec | dl | LAPIC_LEVEL_ASSERT);
    890 
    891 #ifdef DIAGNOSTIC
    892 	i82489_icr_wait();
    893 	result = (i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) ? EBUSY : 0;
    894 #else
    895 	/* Don't wait - if it doesn't go, we're in big trouble anyway. */
    896 	result = 0;
    897 #endif
    898 	splx(s);
    899 
    900 	return result;
    901 }
    902 
    903 static int
    904 x2apic_ipi_init(int target)
    905 {
    906 
    907 	x2apic_write_icr(target, LAPIC_DLMODE_INIT | LAPIC_LEVEL_ASSERT);
    908 
    909 	delay_func(10000);
    910 
    911 	x2apic_write_icr(0,
    912 	    LAPIC_DLMODE_INIT | LAPIC_TRIGMODE_LEVEL | LAPIC_LEVEL_DEASSERT);
    913 
    914 	return 0;
    915 }
    916 
    917 static int
    918 x2apic_ipi_startup(int target, int vec)
    919 {
    920 
    921 	x2apic_write_icr(target,
    922 	    vec | LAPIC_DLMODE_STARTUP | LAPIC_LEVEL_ASSERT);
    923 
    924 	return 0;
    925 }
    926 
    927 static int
    928 x2apic_ipi(int vec, int target, int dl)
    929 {
    930 	uint32_t dest_id = 0;
    931 
    932 	if ((target & LAPIC_DEST_MASK) == 0)
    933 		dest_id = target;
    934 
    935 	x2apic_write_icr(dest_id,
    936 	    (target & LAPIC_DEST_MASK) | vec | dl | LAPIC_LEVEL_ASSERT);
    937 
    938 	return 0;
    939 }
    940 
    941 int
    942 x86_ipi_init(int target)
    943 {
    944 	if (x2apic_mode)
    945 		return x2apic_ipi_init(target);
    946 	return i82489_ipi_init(target);
    947 }
    948 
    949 int
    950 x86_ipi_startup(int target, int vec)
    951 {
    952 	if (x2apic_mode)
    953 		return x2apic_ipi_startup(target, vec);
    954 	return i82489_ipi_startup(target, vec);
    955 }
    956 
    957 /*
    958  * Using 'pin numbers' as:
    959  * 0 - timer
    960  * 1 - thermal
    961  * 2 - PCINT
    962  * 3 - LVINT0
    963  * 4 - LVINT1
    964  * 5 - LVERR
    965  */
    966 
    967 static void
    968 lapic_hwmask(struct pic *pic, int pin)
    969 {
    970 	int reg;
    971 	uint32_t val;
    972 
    973 	reg = LAPIC_LVT_TIMER + (pin << 4);
    974 	val = lapic_readreg(reg);
    975 	val |= LAPIC_LVT_MASKED;
    976 	lapic_writereg(reg, val);
    977 }
    978 
    979 static void
    980 lapic_hwunmask(struct pic *pic, int pin)
    981 {
    982 	int reg;
    983 	uint32_t val;
    984 
    985 	reg = LAPIC_LVT_TIMER + (pin << 4);
    986 	val = lapic_readreg(reg);
    987 	val &= ~LAPIC_LVT_MASKED;
    988 	lapic_writereg(reg, val);
    989 }
    990 
    991 static void
    992 lapic_setup(struct pic *pic, struct cpu_info *ci,
    993     int pin, int idtvec, int type)
    994 {
    995 }
    996 
    997 void
    998 lapic_dump(void)
    999 {
   1000 	struct cpu_info *ci = curcpu();
   1001 
   1002 #define APIC_LVT_PRINT(ci, where, idx, lvtreg)				\
   1003 	apic_format_redir(device_xname(ci->ci_dev), where, (idx),	\
   1004 	    APIC_VECTYPE_LAPIC_LVT, 0, lapic_readreg(lvtreg))
   1005 
   1006 	APIC_LVT_PRINT(ci, "cmci", 0, LAPIC_LVT_CMCI);
   1007 	APIC_LVT_PRINT(ci, "timer", 0, LAPIC_LVT_TIMER);
   1008 	APIC_LVT_PRINT(ci, "thermal", 0, LAPIC_LVT_THERM);
   1009 	APIC_LVT_PRINT(ci, "pcint", 0, LAPIC_LVT_PCINT);
   1010 	APIC_LVT_PRINT(ci, "lint", 0, LAPIC_LVT_LINT0);
   1011 	APIC_LVT_PRINT(ci, "lint", 1, LAPIC_LVT_LINT1);
   1012 	APIC_LVT_PRINT(ci, "err", 0, LAPIC_LVT_ERR);
   1013 
   1014 #undef APIC_LVT_PRINT
   1015 }
   1016 #else /* XENPV */
   1017 void
   1018 lapic_boot_init(paddr_t lapic_base)
   1019 {
   1020 }
   1021 #endif /* XENPV */
   1022