Home | History | Annotate | Line # | Download | only in x86
intr.c revision 1.95
      1 /*	$NetBSD: intr.c,v 1.94 2016/07/11 23:09:34 knakahara Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Copyright 2002 (c) Wasabi Systems, Inc.
     34  * All rights reserved.
     35  *
     36  * Written by Frank van der Linden for Wasabi Systems, Inc.
     37  *
     38  * Redistribution and use in source and binary forms, with or without
     39  * modification, are permitted provided that the following conditions
     40  * are met:
     41  * 1. Redistributions of source code must retain the above copyright
     42  *    notice, this list of conditions and the following disclaimer.
     43  * 2. Redistributions in binary form must reproduce the above copyright
     44  *    notice, this list of conditions and the following disclaimer in the
     45  *    documentation and/or other materials provided with the distribution.
     46  * 3. All advertising materials mentioning features or use of this software
     47  *    must display the following acknowledgement:
     48  *      This product includes software developed for the NetBSD Project by
     49  *      Wasabi Systems, Inc.
     50  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
     51  *    or promote products derived from this software without specific prior
     52  *    written permission.
     53  *
     54  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
     55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     56  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     57  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
     58  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     59  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     60  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     61  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     62  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     63  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     64  * POSSIBILITY OF SUCH DAMAGE.
     65  */
     66 
     67 /*-
     68  * Copyright (c) 1991 The Regents of the University of California.
     69  * All rights reserved.
     70  *
     71  * This code is derived from software contributed to Berkeley by
     72  * William Jolitz.
     73  *
     74  * Redistribution and use in source and binary forms, with or without
     75  * modification, are permitted provided that the following conditions
     76  * are met:
     77  * 1. Redistributions of source code must retain the above copyright
     78  *    notice, this list of conditions and the following disclaimer.
     79  * 2. Redistributions in binary form must reproduce the above copyright
     80  *    notice, this list of conditions and the following disclaimer in the
     81  *    documentation and/or other materials provided with the distribution.
     82  * 3. Neither the name of the University nor the names of its contributors
     83  *    may be used to endorse or promote products derived from this software
     84  *    without specific prior written permission.
     85  *
     86  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     87  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     88  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     89  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     90  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     91  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     92  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     93  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     94  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     95  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     96  * SUCH DAMAGE.
     97  *
     98  *	@(#)isa.c	7.2 (Berkeley) 5/13/91
     99  */
    100 
    101 /*-
    102  * Copyright (c) 1993, 1994 Charles Hannum.
    103  *
    104  * Redistribution and use in source and binary forms, with or without
    105  * modification, are permitted provided that the following conditions
    106  * are met:
    107  * 1. Redistributions of source code must retain the above copyright
    108  *    notice, this list of conditions and the following disclaimer.
    109  * 2. Redistributions in binary form must reproduce the above copyright
    110  *    notice, this list of conditions and the following disclaimer in the
    111  *    documentation and/or other materials provided with the distribution.
    112  * 3. All advertising materials mentioning features or use of this software
    113  *    must display the following acknowledgement:
    114  *	This product includes software developed by the University of
    115  *	California, Berkeley and its contributors.
    116  * 4. Neither the name of the University nor the names of its contributors
    117  *    may be used to endorse or promote products derived from this software
    118  *    without specific prior written permission.
    119  *
    120  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
    121  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    122  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    123  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
    124  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    125  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    126  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    127  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    128  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    129  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    130  * SUCH DAMAGE.
    131  *
    132  *	@(#)isa.c	7.2 (Berkeley) 5/13/91
    133  */
    134 
    135 #include <sys/cdefs.h>
    136 __KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.94 2016/07/11 23:09:34 knakahara Exp $");
    137 
    138 #include "opt_intrdebug.h"
    139 #include "opt_multiprocessor.h"
    140 #include "opt_acpi.h"
    141 
    142 #include <sys/param.h>
    143 #include <sys/systm.h>
    144 #include <sys/kernel.h>
    145 #include <sys/syslog.h>
    146 #include <sys/device.h>
    147 #include <sys/kmem.h>
    148 #include <sys/proc.h>
    149 #include <sys/errno.h>
    150 #include <sys/intr.h>
    151 #include <sys/cpu.h>
    152 #include <sys/atomic.h>
    153 #include <sys/xcall.h>
    154 #include <sys/interrupt.h>
    155 
    156 #include <sys/kauth.h>
    157 #include <sys/conf.h>
    158 
    159 #include <uvm/uvm_extern.h>
    160 
    161 #include <machine/i8259.h>
    162 #include <machine/pio.h>
    163 
    164 #include "ioapic.h"
    165 #include "lapic.h"
    166 #include "pci.h"
    167 #include "acpica.h"
    168 
    169 #if NIOAPIC > 0 || NACPICA > 0
    170 #include <machine/i82093var.h>
    171 #include <machine/mpbiosvar.h>
    172 #include <machine/mpacpi.h>
    173 #endif
    174 
    175 #if NLAPIC > 0
    176 #include <machine/i82489var.h>
    177 #endif
    178 
    179 #if NPCI > 0
    180 #include <dev/pci/ppbreg.h>
    181 #endif
    182 
    183 #include <x86/pci/msipic.h>
    184 #include <x86/pci/pci_msi_machdep.h>
    185 
    186 #if NPCI == 0
    187 #define msipic_is_msi_pic(PIC)	(false)
    188 #endif
    189 
    190 #ifdef DDB
    191 #include <ddb/db_output.h>
    192 #endif
    193 
    194 #ifdef INTRDEBUG
    195 #define DPRINTF(msg) printf msg
    196 #else
    197 #define DPRINTF(msg)
    198 #endif
    199 
    200 struct pic softintr_pic = {
    201 	.pic_name = "softintr_fakepic",
    202 	.pic_type = PIC_SOFT,
    203 	.pic_vecbase = 0,
    204 	.pic_apicid = 0,
    205 	.pic_lock = __SIMPLELOCK_UNLOCKED,
    206 };
    207 
    208 static void intr_calculatemasks(struct cpu_info *);
    209 
    210 static SIMPLEQ_HEAD(, intrsource) io_interrupt_sources =
    211 	SIMPLEQ_HEAD_INITIALIZER(io_interrupt_sources);
    212 
    213 static kmutex_t intr_distribute_lock;
    214 
    215 #if NIOAPIC > 0 || NACPICA > 0
    216 static int intr_scan_bus(int, int, intr_handle_t *);
    217 #if NPCI > 0
    218 static int intr_find_pcibridge(int, pcitag_t *, pci_chipset_tag_t *);
    219 #endif
    220 #endif
    221 
    222 static int intr_allocate_slot_cpu(struct cpu_info *, struct pic *, int, int *,
    223 				  struct intrsource *);
    224 static int __noinline intr_allocate_slot(struct pic *, int, int,
    225 					 struct cpu_info **, int *, int *,
    226 					 struct intrsource *);
    227 
    228 static void intr_source_free(struct cpu_info *, int, struct pic *, int);
    229 
    230 static void intr_establish_xcall(void *, void *);
    231 static void intr_disestablish_xcall(void *, void *);
    232 
    233 static const char *legacy_intr_string(int, char *, size_t, struct pic *);
    234 
    235 static inline bool redzone_const_or_false(bool);
    236 static inline int redzone_const_or_zero(int);
    237 
    238 static void intr_redistribute_xc_t(void *, void *);
    239 static void intr_redistribute_xc_s1(void *, void *);
    240 static void intr_redistribute_xc_s2(void *, void *);
    241 static bool intr_redistribute(struct cpu_info *);
    242 
    243 static const char *create_intrid(int, struct pic *, int, char *, size_t);
    244 
    245 static struct intrsource *intr_get_io_intrsource(const char *);
    246 static void intr_free_io_intrsource_direct(struct intrsource *);
    247 static int intr_num_handlers(struct intrsource *);
    248 
    249 static const char *legacy_intr_string(int, char *, size_t, struct pic *);
    250 
    251 static int intr_find_unused_slot(struct cpu_info *, int *);
    252 static void intr_activate_xcall(void *, void *);
    253 static void intr_deactivate_xcall(void *, void *);
    254 static void intr_get_affinity(struct intrsource *, kcpuset_t *);
    255 static int intr_set_affinity(struct intrsource *, const kcpuset_t *);
    256 
    257 /*
    258  * Fill in default interrupt table (in case of spurious interrupt
    259  * during configuration of kernel), setup interrupt control unit
    260  */
    261 void
    262 intr_default_setup(void)
    263 {
    264 	int i;
    265 
    266 	/* icu vectors */
    267 	for (i = 0; i < NUM_LEGACY_IRQS; i++) {
    268 		idt_vec_reserve(ICU_OFFSET + i);
    269 		setgate(&idt[ICU_OFFSET + i],
    270 		    i8259_stubs[i].ist_entry, 0, SDT_SYS386IGT,
    271 		    SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
    272 	}
    273 
    274 	/*
    275 	 * Eventually might want to check if it's actually there.
    276 	 */
    277 	i8259_default_setup();
    278 
    279 	mutex_init(&intr_distribute_lock, MUTEX_DEFAULT, IPL_NONE);
    280 }
    281 
    282 /*
    283  * Handle a NMI, possibly a machine check.
    284  * return true to panic system, false to ignore.
    285  */
    286 void
    287 x86_nmi(void)
    288 {
    289 
    290 	log(LOG_CRIT, "NMI port 61 %x, port 70 %x\n", inb(0x61), inb(0x70));
    291 }
    292 
    293 /*
    294  * Recalculate the interrupt masks from scratch.
    295  * During early boot, anything goes and we are always called on the BP.
    296  * When the system is up and running:
    297  *
    298  * => called with ci == curcpu()
    299  * => cpu_lock held by the initiator
    300  * => interrupts disabled on-chip (PSL_I)
    301  *
    302  * Do not call printf(), kmem_free() or other "heavyweight" routines
    303  * from here.  This routine must be quick and must not block.
    304  */
    305 static void
    306 intr_calculatemasks(struct cpu_info *ci)
    307 {
    308 	int irq, level, unusedirqs, intrlevel[MAX_INTR_SOURCES];
    309 	struct intrhand *q;
    310 
    311 	/* First, figure out which levels each IRQ uses. */
    312 	unusedirqs = 0xffffffff;
    313 	for (irq = 0; irq < MAX_INTR_SOURCES; irq++) {
    314 		int levels = 0;
    315 
    316 		if (ci->ci_isources[irq] == NULL) {
    317 			intrlevel[irq] = 0;
    318 			continue;
    319 		}
    320 		for (q = ci->ci_isources[irq]->is_handlers; q; q = q->ih_next)
    321 			levels |= 1 << q->ih_level;
    322 		intrlevel[irq] = levels;
    323 		if (levels)
    324 			unusedirqs &= ~(1 << irq);
    325 	}
    326 
    327 	/* Then figure out which IRQs use each level. */
    328 	for (level = 0; level < NIPL; level++) {
    329 		int irqs = 0;
    330 		for (irq = 0; irq < MAX_INTR_SOURCES; irq++)
    331 			if (intrlevel[irq] & (1 << level))
    332 				irqs |= 1 << irq;
    333 		ci->ci_imask[level] = irqs | unusedirqs;
    334 	}
    335 
    336 	for (level = 0; level<(NIPL-1); level++)
    337 		ci->ci_imask[level+1] |= ci->ci_imask[level];
    338 
    339 	for (irq = 0; irq < MAX_INTR_SOURCES; irq++) {
    340 		int maxlevel = IPL_NONE;
    341 		int minlevel = IPL_HIGH;
    342 
    343 		if (ci->ci_isources[irq] == NULL)
    344 			continue;
    345 		for (q = ci->ci_isources[irq]->is_handlers; q;
    346 		     q = q->ih_next) {
    347 			if (q->ih_level < minlevel)
    348 				minlevel = q->ih_level;
    349 			if (q->ih_level > maxlevel)
    350 				maxlevel = q->ih_level;
    351 		}
    352 		ci->ci_isources[irq]->is_maxlevel = maxlevel;
    353 		ci->ci_isources[irq]->is_minlevel = minlevel;
    354 	}
    355 
    356 	for (level = 0; level < NIPL; level++)
    357 		ci->ci_iunmask[level] = ~ci->ci_imask[level];
    358 }
    359 
    360 /*
    361  * List to keep track of PCI buses that are probed but not known
    362  * to the firmware. Used to
    363  *
    364  * XXX should maintain one list, not an array and a linked list.
    365  */
    366 #if (NPCI > 0) && ((NIOAPIC > 0) || NACPICA > 0)
    367 struct intr_extra_bus {
    368 	int bus;
    369 	pcitag_t *pci_bridge_tag;
    370 	pci_chipset_tag_t pci_chipset_tag;
    371 	LIST_ENTRY(intr_extra_bus) list;
    372 };
    373 
    374 LIST_HEAD(, intr_extra_bus) intr_extra_buses =
    375     LIST_HEAD_INITIALIZER(intr_extra_buses);
    376 
    377 
    378 void
    379 intr_add_pcibus(struct pcibus_attach_args *pba)
    380 {
    381 	struct intr_extra_bus *iebp;
    382 
    383 	iebp = kmem_alloc(sizeof(*iebp), KM_SLEEP);
    384 	iebp->bus = pba->pba_bus;
    385 	iebp->pci_chipset_tag = pba->pba_pc;
    386 	iebp->pci_bridge_tag = pba->pba_bridgetag;
    387 	LIST_INSERT_HEAD(&intr_extra_buses, iebp, list);
    388 }
    389 
    390 static int
    391 intr_find_pcibridge(int bus, pcitag_t *pci_bridge_tag,
    392 		    pci_chipset_tag_t *pc)
    393 {
    394 	struct intr_extra_bus *iebp;
    395 	struct mp_bus *mpb;
    396 
    397 	if (bus < 0)
    398 		return ENOENT;
    399 
    400 	if (bus < mp_nbus) {
    401 		mpb = &mp_busses[bus];
    402 		if (mpb->mb_pci_bridge_tag == NULL)
    403 			return ENOENT;
    404 		*pci_bridge_tag = *mpb->mb_pci_bridge_tag;
    405 		*pc = mpb->mb_pci_chipset_tag;
    406 		return 0;
    407 	}
    408 
    409 	LIST_FOREACH(iebp, &intr_extra_buses, list) {
    410 		if (iebp->bus == bus) {
    411 			if (iebp->pci_bridge_tag == NULL)
    412 				return ENOENT;
    413 			*pci_bridge_tag = *iebp->pci_bridge_tag;
    414 			*pc = iebp->pci_chipset_tag;
    415 			return 0;
    416 		}
    417 	}
    418 	return ENOENT;
    419 }
    420 #endif
    421 
    422 #if NIOAPIC > 0 || NACPICA > 0
    423 /*
    424  * 'pin' argument pci bus_pin encoding of a device/pin combination.
    425  */
    426 int
    427 intr_find_mpmapping(int bus, int pin, intr_handle_t *handle)
    428 {
    429 
    430 #if NPCI > 0
    431 	while (intr_scan_bus(bus, pin, handle) != 0) {
    432 		int dev, func;
    433 		pcitag_t pci_bridge_tag;
    434 		pci_chipset_tag_t pc;
    435 
    436 		if (intr_find_pcibridge(bus, &pci_bridge_tag, &pc) != 0)
    437 			return ENOENT;
    438 		dev = pin >> 2;
    439 		pin = pin & 3;
    440 		pin = PPB_INTERRUPT_SWIZZLE(pin + 1, dev) - 1;
    441 		pci_decompose_tag(pc, pci_bridge_tag, &bus, &dev, &func);
    442 		pin |= (dev << 2);
    443 	}
    444 	return 0;
    445 #else
    446 	return intr_scan_bus(bus, pin, handle);
    447 #endif
    448 }
    449 
    450 static int
    451 intr_scan_bus(int bus, int pin, intr_handle_t *handle)
    452 {
    453 	struct mp_intr_map *mip, *intrs;
    454 
    455 	if (bus < 0 || bus >= mp_nbus)
    456 		return ENOENT;
    457 
    458 	intrs = mp_busses[bus].mb_intrs;
    459 	if (intrs == NULL)
    460 		return ENOENT;
    461 
    462 	for (mip = intrs; mip != NULL; mip = mip->next) {
    463 		if (mip->bus_pin == pin) {
    464 #if NACPICA > 0
    465 			if (mip->linkdev != NULL)
    466 				if (mpacpi_findintr_linkdev(mip) != 0)
    467 					continue;
    468 #endif
    469 			*handle = mip->ioapic_ih;
    470 			return 0;
    471 		}
    472 	}
    473 	return ENOENT;
    474 }
    475 #endif
    476 
    477 /*
    478  * Create an interrupt id such as "ioapic0 pin 9". This interrupt id is used
    479  * by MI code and intrctl(8).
    480  */
    481 static const char *
    482 create_intrid(int legacy_irq, struct pic *pic, int pin, char *buf, size_t len)
    483 {
    484 	int ih;
    485 
    486 #if NPCI > 0
    487 	if ((pic->pic_type == PIC_MSI) || (pic->pic_type == PIC_MSIX)) {
    488 		uint64_t pih;
    489 		int dev, vec;
    490 
    491 		dev = msipic_get_devid(pic);
    492 		vec = pin;
    493 		pih = __SHIFTIN((uint64_t)dev, MSI_INT_DEV_MASK)
    494 			| __SHIFTIN((uint64_t)vec, MSI_INT_VEC_MASK)
    495 			| APIC_INT_VIA_MSI;
    496 		if (pic->pic_type == PIC_MSI)
    497 			MSI_INT_MAKE_MSI(pih);
    498 		else if (pic->pic_type == PIC_MSIX)
    499 			MSI_INT_MAKE_MSIX(pih);
    500 
    501 		return x86_pci_msi_string(NULL, pih, buf, len);
    502 	}
    503 #endif
    504 
    505 	/*
    506 	 * If the device is pci, "legacy_irq" is alway -1. Least 8 bit of "ih"
    507 	 * is only used in intr_string() to show the irq number.
    508 	 * If the device is "legacy"(such as floppy), it should not use
    509 	 * intr_string().
    510 	 */
    511 	if (pic->pic_type == PIC_I8259) {
    512 		ih = legacy_irq;
    513 		return legacy_intr_string(ih, buf, len, pic);
    514 	}
    515 
    516 	ih = ((pic->pic_apicid << APIC_INT_APIC_SHIFT) & APIC_INT_APIC_MASK)
    517 	    | ((pin << APIC_INT_PIN_SHIFT) & APIC_INT_PIN_MASK);
    518 	if (pic->pic_type == PIC_IOAPIC) {
    519 		ih |= APIC_INT_VIA_APIC;
    520 	}
    521 	ih |= pin;
    522 	return intr_string(ih, buf, len);
    523 }
    524 
    525 /*
    526  * Find intrsource from io_interrupt_sources list.
    527  */
    528 static struct intrsource *
    529 intr_get_io_intrsource(const char *intrid)
    530 {
    531 	struct intrsource *isp;
    532 
    533 	KASSERT(mutex_owned(&cpu_lock));
    534 
    535 	SIMPLEQ_FOREACH(isp, &io_interrupt_sources, is_list) {
    536 		KASSERT(isp->is_intrid != NULL);
    537 		if (strncmp(intrid, isp->is_intrid, INTRIDBUF - 1) == 0)
    538 			return isp;
    539 	}
    540 	return NULL;
    541 }
    542 
    543 /*
    544  * Allocate intrsource and add to io_interrupt_sources list.
    545  */
    546 struct intrsource *
    547 intr_allocate_io_intrsource(const char *intrid)
    548 {
    549 	CPU_INFO_ITERATOR cii;
    550 	struct cpu_info *ci;
    551 	struct intrsource *isp;
    552 	struct percpu_evcnt *pep;
    553 
    554 	KASSERT(mutex_owned(&cpu_lock));
    555 
    556 	if (intrid == NULL)
    557 		return NULL;
    558 
    559 	isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
    560 	if (isp == NULL) {
    561 		return NULL;
    562 	}
    563 
    564 	pep = kmem_zalloc(sizeof(*pep) * ncpu, KM_SLEEP);
    565 	if (pep == NULL) {
    566 		kmem_free(isp, sizeof(*isp));
    567 		return NULL;
    568 	}
    569 	isp->is_saved_evcnt = pep;
    570 	for (CPU_INFO_FOREACH(cii, ci)) {
    571 		pep->cpuid = ci->ci_cpuid;
    572 		pep++;
    573 	}
    574 	strlcpy(isp->is_intrid, intrid, sizeof(isp->is_intrid));
    575 
    576 	SIMPLEQ_INSERT_TAIL(&io_interrupt_sources, isp, is_list);
    577 
    578 	return isp;
    579 }
    580 
    581 /*
    582  * Remove from io_interrupt_sources list and free by the intrsource pointer.
    583  */
    584 static void
    585 intr_free_io_intrsource_direct(struct intrsource *isp)
    586 {
    587 	KASSERT(mutex_owned(&cpu_lock));
    588 
    589 	SIMPLEQ_REMOVE(&io_interrupt_sources, isp, intrsource, is_list);
    590 
    591 	/* Is this interrupt established? */
    592 	if (isp->is_evname[0] != '\0')
    593 		evcnt_detach(&isp->is_evcnt);
    594 
    595 	kmem_free(isp->is_saved_evcnt,
    596 	    sizeof(*(isp->is_saved_evcnt)) * ncpu);
    597 
    598 	kmem_free(isp, sizeof(*isp));
    599 }
    600 
    601 /*
    602  * Remove from io_interrupt_sources list and free by the interrupt id.
    603  * This function can be used by MI code.
    604  */
    605 void
    606 intr_free_io_intrsource(const char *intrid)
    607 {
    608 	struct intrsource *isp;
    609 
    610 	KASSERT(mutex_owned(&cpu_lock));
    611 
    612 	if (intrid == NULL)
    613 		return;
    614 
    615 	if ((isp = intr_get_io_intrsource(intrid)) == NULL) {
    616 		return;
    617 	}
    618 
    619 	/* If the interrupt uses shared IRQ, don't free yet. */
    620 	if (isp->is_handlers != NULL) {
    621 		return;
    622 	}
    623 
    624 	intr_free_io_intrsource_direct(isp);
    625 }
    626 
    627 static int
    628 intr_allocate_slot_cpu(struct cpu_info *ci, struct pic *pic, int pin,
    629 		       int *index, struct intrsource *chained)
    630 {
    631 	int slot, i;
    632 	struct intrsource *isp;
    633 
    634 	KASSERT(mutex_owned(&cpu_lock));
    635 
    636 	if (pic == &i8259_pic) {
    637 		KASSERT(CPU_IS_PRIMARY(ci));
    638 		slot = pin;
    639 	} else {
    640 		int start = 0;
    641 		slot = -1;
    642 
    643 		/* avoid reserved slots for legacy interrupts. */
    644 		if (CPU_IS_PRIMARY(ci) && msipic_is_msi_pic(pic))
    645 			start = NUM_LEGACY_IRQS;
    646 		/*
    647 		 * intr_allocate_slot has checked for an existing mapping.
    648 		 * Now look for a free slot.
    649 		 */
    650 		for (i = start; i < MAX_INTR_SOURCES ; i++) {
    651 			if (ci->ci_isources[i] == NULL) {
    652 				slot = i;
    653 				break;
    654 			}
    655 		}
    656 		if (slot == -1) {
    657 			return EBUSY;
    658 		}
    659 	}
    660 
    661 	isp = ci->ci_isources[slot];
    662 	if (isp == NULL) {
    663 		const char *via;
    664 
    665 		isp = chained;
    666 		KASSERT(isp != NULL);
    667 		if (pic->pic_type == PIC_MSI || pic->pic_type == PIC_MSIX)
    668 			via = "vec";
    669 		else
    670 			via = "pin";
    671 		snprintf(isp->is_evname, sizeof (isp->is_evname),
    672 		    "%s %d", via, pin);
    673 		evcnt_attach_dynamic(&isp->is_evcnt, EVCNT_TYPE_INTR, NULL,
    674 		    pic->pic_name, isp->is_evname);
    675 		isp->is_active_cpu = ci->ci_cpuid;
    676 		ci->ci_isources[slot] = isp;
    677 	}
    678 
    679 	*index = slot;
    680 	return 0;
    681 }
    682 
    683 /*
    684  * A simple round-robin allocator to assign interrupts to CPUs.
    685  */
    686 static int __noinline
    687 intr_allocate_slot(struct pic *pic, int pin, int level,
    688 		   struct cpu_info **cip, int *index, int *idt_slot,
    689 		   struct intrsource *chained)
    690 {
    691 	CPU_INFO_ITERATOR cii;
    692 	struct cpu_info *ci, *lci;
    693 	struct intrsource *isp;
    694 	int slot = 0, idtvec, error;
    695 
    696 	KASSERT(mutex_owned(&cpu_lock));
    697 
    698 	/* First check if this pin is already used by an interrupt vector. */
    699 	for (CPU_INFO_FOREACH(cii, ci)) {
    700 		for (slot = 0 ; slot < MAX_INTR_SOURCES ; slot++) {
    701 			if ((isp = ci->ci_isources[slot]) == NULL) {
    702 				continue;
    703 			}
    704 			if (isp->is_pic == pic &&
    705 			    pin != -1 && isp->is_pin == pin) {
    706 				*idt_slot = isp->is_idtvec;
    707 				*index = slot;
    708 				*cip = ci;
    709 				return 0;
    710 			}
    711 		}
    712 	}
    713 
    714 	/*
    715 	 * The pic/pin combination doesn't have an existing mapping.
    716 	 * Find a slot for a new interrupt source.  For the i8259 case,
    717 	 * we always use reserved slots of the primary CPU.  Otherwise,
    718 	 * we make an attempt to balance the interrupt load.
    719 	 *
    720 	 * PIC and APIC usage are essentially exclusive, so the reservation
    721 	 * of the ISA slots is ignored when assigning IOAPIC slots.
    722 	 */
    723 	if (pic == &i8259_pic) {
    724 		/*
    725 		 * Must be directed to BP.
    726 		 */
    727 		ci = &cpu_info_primary;
    728 		error = intr_allocate_slot_cpu(ci, pic, pin, &slot, chained);
    729 	} else {
    730 		/*
    731 		 * Find least loaded AP/BP and try to allocate there.
    732 		 */
    733 		ci = NULL;
    734 		for (CPU_INFO_FOREACH(cii, lci)) {
    735 			if ((lci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) {
    736 				continue;
    737 			}
    738 #if 0
    739 			if (ci == NULL ||
    740 			    ci->ci_nintrhand > lci->ci_nintrhand) {
    741 			    	ci = lci;
    742 			}
    743 #else
    744 			ci = &cpu_info_primary;
    745 #endif
    746 		}
    747 		KASSERT(ci != NULL);
    748 		error = intr_allocate_slot_cpu(ci, pic, pin, &slot, chained);
    749 
    750 		/*
    751 		 * If that did not work, allocate anywhere.
    752 		 */
    753 		if (error != 0) {
    754 			for (CPU_INFO_FOREACH(cii, ci)) {
    755 				if ((ci->ci_schedstate.spc_flags &
    756 				    SPCF_NOINTR) != 0) {
    757 					continue;
    758 				}
    759 				error = intr_allocate_slot_cpu(ci, pic,
    760 				    pin, &slot, chained);
    761 				if (error == 0) {
    762 					break;
    763 				}
    764 			}
    765 		}
    766 	}
    767 	if (error != 0) {
    768 		return error;
    769 	}
    770 	KASSERT(ci != NULL);
    771 
    772 	/*
    773 	 * Now allocate an IDT vector.
    774 	 * For the 8259 these are reserved up front.
    775 	 */
    776 	if (pic == &i8259_pic) {
    777 		idtvec = ICU_OFFSET + pin;
    778 	} else {
    779 		idtvec = idt_vec_alloc(APIC_LEVEL(level), IDT_INTR_HIGH);
    780 	}
    781 	if (idtvec == 0) {
    782 		evcnt_detach(&ci->ci_isources[slot]->is_evcnt);
    783 		ci->ci_isources[slot] = NULL;
    784 		return EBUSY;
    785 	}
    786 	ci->ci_isources[slot]->is_idtvec = idtvec;
    787 	*idt_slot = idtvec;
    788 	*index = slot;
    789 	*cip = ci;
    790 	return 0;
    791 }
    792 
    793 static void
    794 intr_source_free(struct cpu_info *ci, int slot, struct pic *pic, int idtvec)
    795 {
    796 	struct intrsource *isp;
    797 
    798 	isp = ci->ci_isources[slot];
    799 
    800 	if (isp->is_handlers != NULL)
    801 		return;
    802 	ci->ci_isources[slot] = NULL;
    803 	if (pic != &i8259_pic)
    804 		idt_vec_free(idtvec);
    805 }
    806 
    807 #ifdef MULTIPROCESSOR
    808 static int intr_biglock_wrapper(void *);
    809 
    810 /*
    811  * intr_biglock_wrapper: grab biglock and call a real interrupt handler.
    812  */
    813 
    814 static int
    815 intr_biglock_wrapper(void *vp)
    816 {
    817 	struct intrhand *ih = vp;
    818 	int ret;
    819 
    820 	KERNEL_LOCK(1, NULL);
    821 
    822 	ret = (*ih->ih_realfun)(ih->ih_realarg);
    823 
    824 	KERNEL_UNLOCK_ONE(NULL);
    825 
    826 	return ret;
    827 }
    828 #endif /* MULTIPROCESSOR */
    829 
    830 struct pic *
    831 intr_findpic(int num)
    832 {
    833 #if NIOAPIC > 0
    834 	struct ioapic_softc *pic;
    835 
    836 	pic = ioapic_find_bybase(num);
    837 	if (pic != NULL)
    838 		return &pic->sc_pic;
    839 #endif
    840 	if (num < NUM_LEGACY_IRQS)
    841 		return &i8259_pic;
    842 
    843 	return NULL;
    844 }
    845 
    846 /*
    847  * Append device name to intrsource. If device A and device B share IRQ number,
    848  * the device name of the interrupt id is "device A, device B".
    849  */
    850 static void
    851 intr_append_intrsource_xname(struct intrsource *isp, const char *xname)
    852 {
    853 
    854 	if (isp->is_xname[0] != '\0')
    855 		strlcat(isp->is_xname, ", ", sizeof(isp->is_xname));
    856 	strlcat(isp->is_xname, xname, sizeof(isp->is_xname));
    857 }
    858 
    859 /*
    860  * Handle per-CPU component of interrupt establish.
    861  *
    862  * => caller (on initiating CPU) holds cpu_lock on our behalf
    863  * => arg1: struct intrhand *ih
    864  * => arg2: int idt_vec
    865  */
    866 static void
    867 intr_establish_xcall(void *arg1, void *arg2)
    868 {
    869 	struct intrsource *source;
    870 	struct intrstub *stubp;
    871 	struct intrhand *ih;
    872 	struct cpu_info *ci;
    873 	int idt_vec;
    874 	u_long psl;
    875 
    876 	ih = arg1;
    877 
    878 	KASSERT(ih->ih_cpu == curcpu() || !mp_online);
    879 
    880 	ci = ih->ih_cpu;
    881 	source = ci->ci_isources[ih->ih_slot];
    882 	idt_vec = (int)(intptr_t)arg2;
    883 
    884 	/* Disable interrupts locally. */
    885 	psl = x86_read_psl();
    886 	x86_disable_intr();
    887 
    888 	/* Link in the handler and re-calculate masks. */
    889 	*(ih->ih_prevp) = ih;
    890 	intr_calculatemasks(ci);
    891 
    892 	/* Hook in new IDT vector and SPL state. */
    893 	if (source->is_resume == NULL || source->is_idtvec != idt_vec) {
    894 		if (source->is_idtvec != 0 && source->is_idtvec != idt_vec)
    895 			idt_vec_free(source->is_idtvec);
    896 		source->is_idtvec = idt_vec;
    897 		if (source->is_type == IST_LEVEL) {
    898 			stubp = &source->is_pic->pic_level_stubs[ih->ih_slot];
    899 		} else {
    900 			stubp = &source->is_pic->pic_edge_stubs[ih->ih_slot];
    901 		}
    902 		source->is_resume = stubp->ist_resume;
    903 		source->is_recurse = stubp->ist_recurse;
    904 		setgate(&idt[idt_vec], stubp->ist_entry, 0, SDT_SYS386IGT,
    905 		    SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
    906 	}
    907 
    908 	/* Re-enable interrupts locally. */
    909 	x86_write_psl(psl);
    910 }
    911 
    912 void *
    913 intr_establish_xname(int legacy_irq, struct pic *pic, int pin, int type,
    914 		     int level, int (*handler)(void *), void *arg,
    915 		     bool known_mpsafe, const char *xname)
    916 {
    917 	struct intrhand **p, *q, *ih;
    918 	struct cpu_info *ci;
    919 	int slot, error, idt_vec;
    920 	struct intrsource *chained, *source;
    921 #ifdef MULTIPROCESSOR
    922 	bool mpsafe = (known_mpsafe || level != IPL_VM);
    923 #endif /* MULTIPROCESSOR */
    924 	uint64_t where;
    925 	const char *intrstr;
    926 	char intrstr_buf[INTRIDBUF];
    927 
    928 #ifdef DIAGNOSTIC
    929 	if (legacy_irq != -1 && (legacy_irq < 0 || legacy_irq > 15))
    930 		panic("%s: bad legacy IRQ value", __func__);
    931 
    932 	if (legacy_irq == -1 && pic == &i8259_pic)
    933 		panic("intr_establish: non-legacy IRQ on i8259");
    934 #endif
    935 
    936 	ih = kmem_alloc(sizeof(*ih), KM_SLEEP);
    937 	if (ih == NULL) {
    938 		printf("%s: can't allocate handler info\n", __func__);
    939 		return NULL;
    940 	}
    941 
    942 	intrstr = create_intrid(legacy_irq, pic, pin, intrstr_buf,
    943 	    sizeof(intrstr_buf));
    944 	KASSERT(intrstr != NULL);
    945 
    946 	mutex_enter(&cpu_lock);
    947 
    948 	/* allocate intrsource pool, if not yet. */
    949 	chained = intr_get_io_intrsource(intrstr);
    950 	if (chained == NULL) {
    951 		if (msipic_is_msi_pic(pic)) {
    952 			mutex_exit(&cpu_lock);
    953 			printf("%s: %s has no intrsource\n", __func__, intrstr);
    954 			return NULL;
    955 		}
    956 		chained = intr_allocate_io_intrsource(intrstr);
    957 		if (chained == NULL) {
    958 			mutex_exit(&cpu_lock);
    959 			printf("%s: can't allocate io_intersource\n", __func__);
    960 			return NULL;
    961 		}
    962 	}
    963 
    964 	error = intr_allocate_slot(pic, pin, level, &ci, &slot, &idt_vec,
    965 	    chained);
    966 	if (error != 0) {
    967 		intr_free_io_intrsource_direct(chained);
    968 		mutex_exit(&cpu_lock);
    969 		kmem_free(ih, sizeof(*ih));
    970 		printf("failed to allocate interrupt slot for PIC %s pin %d\n",
    971 		    pic->pic_name, pin);
    972 		return NULL;
    973 	}
    974 
    975 	source = ci->ci_isources[slot];
    976 
    977 	if (source->is_handlers != NULL &&
    978 	    source->is_pic->pic_type != pic->pic_type) {
    979 		intr_free_io_intrsource_direct(chained);
    980 		mutex_exit(&cpu_lock);
    981 		kmem_free(ih, sizeof(*ih));
    982 		printf("%s: can't share intr source between "
    983 		       "different PIC types (legacy_irq %d pin %d slot %d)\n",
    984 		    __func__, legacy_irq, pin, slot);
    985 		return NULL;
    986 	}
    987 
    988 	source->is_pin = pin;
    989 	source->is_pic = pic;
    990 	intr_append_intrsource_xname(source, xname);
    991 	switch (source->is_type) {
    992 	case IST_NONE:
    993 		source->is_type = type;
    994 		break;
    995 	case IST_EDGE:
    996 	case IST_LEVEL:
    997 		if (source->is_type == type)
    998 			break;
    999 		/* FALLTHROUGH */
   1000 	case IST_PULSE:
   1001 		if (type != IST_NONE) {
   1002 			intr_source_free(ci, slot, pic, idt_vec);
   1003 			intr_free_io_intrsource_direct(chained);
   1004 			mutex_exit(&cpu_lock);
   1005 			kmem_free(ih, sizeof(*ih));
   1006 			printf("%s: pic %s pin %d: can't share "
   1007 			       "type %d with %d\n",
   1008 				__func__, pic->pic_name, pin,
   1009 				source->is_type, type);
   1010 			return NULL;
   1011 		}
   1012 		break;
   1013 	default:
   1014 		panic("%s: bad intr type %d for pic %s pin %d\n",
   1015 		    __func__, source->is_type, pic->pic_name, pin);
   1016 		/* NOTREACHED */
   1017 	}
   1018 
   1019         /*
   1020 	 * If the establishing interrupt uses shared IRQ, the interrupt uses
   1021 	 * "ci->ci_isources[slot]" instead of allocated by the establishing
   1022 	 * device's pci_intr_alloc() or this function.
   1023 	 */
   1024 	if (source->is_handlers != NULL) {
   1025 		struct intrsource *isp, *nisp;
   1026 
   1027 		SIMPLEQ_FOREACH_SAFE(isp, &io_interrupt_sources,
   1028 		    is_list, nisp) {
   1029 			if (strncmp(intrstr, isp->is_intrid, INTRIDBUF - 1) == 0
   1030 			    && isp->is_handlers == NULL)
   1031 				intr_free_io_intrsource_direct(isp);
   1032 		}
   1033 	}
   1034 
   1035 	/*
   1036 	 * We're now committed.  Mask the interrupt in hardware and
   1037 	 * count it for load distribution.
   1038 	 */
   1039 	(*pic->pic_hwmask)(pic, pin);
   1040 	(ci->ci_nintrhand)++;
   1041 
   1042 	/*
   1043 	 * Figure out where to put the handler.
   1044 	 * This is O(N^2), but we want to preserve the order, and N is
   1045 	 * generally small.
   1046 	 */
   1047 	for (p = &ci->ci_isources[slot]->is_handlers;
   1048 	     (q = *p) != NULL && q->ih_level > level;
   1049 	     p = &q->ih_next) {
   1050 		/* nothing */;
   1051 	}
   1052 
   1053 	ih->ih_fun = ih->ih_realfun = handler;
   1054 	ih->ih_arg = ih->ih_realarg = arg;
   1055 	ih->ih_prevp = p;
   1056 	ih->ih_next = *p;
   1057 	ih->ih_level = level;
   1058 	ih->ih_pin = pin;
   1059 	ih->ih_cpu = ci;
   1060 	ih->ih_slot = slot;
   1061 #ifdef MULTIPROCESSOR
   1062 	if (!mpsafe) {
   1063 		ih->ih_fun = intr_biglock_wrapper;
   1064 		ih->ih_arg = ih;
   1065 	}
   1066 #endif /* MULTIPROCESSOR */
   1067 
   1068 	/*
   1069 	 * Call out to the remote CPU to update its interrupt state.
   1070 	 * Only make RPCs if the APs are up and running.
   1071 	 */
   1072 	if (ci == curcpu() || !mp_online) {
   1073 		intr_establish_xcall(ih, (void *)(intptr_t)idt_vec);
   1074 	} else {
   1075 		where = xc_unicast(0, intr_establish_xcall, ih,
   1076 		    (void *)(intptr_t)idt_vec, ci);
   1077 		xc_wait(where);
   1078 	}
   1079 
   1080 	/* All set up, so add a route for the interrupt and unmask it. */
   1081 	(*pic->pic_addroute)(pic, ci, pin, idt_vec, type);
   1082 	(*pic->pic_hwunmask)(pic, pin);
   1083 	mutex_exit(&cpu_lock);
   1084 
   1085 #ifdef INTRDEBUG
   1086 	printf("allocated pic %s type %s pin %d level %d to %s slot %d "
   1087 	    "idt entry %d\n",
   1088 	    pic->pic_name, type == IST_EDGE ? "edge" : "level", pin, level,
   1089 	    device_xname(ci->ci_dev), slot, idt_vec);
   1090 #endif
   1091 
   1092 	return (ih);
   1093 }
   1094 
   1095 void *
   1096 intr_establish(int legacy_irq, struct pic *pic, int pin, int type, int level,
   1097 	       int (*handler)(void *), void *arg, bool known_mpsafe)
   1098 {
   1099 
   1100 	return intr_establish_xname(legacy_irq, pic, pin, type,
   1101 	    level, handler, arg, known_mpsafe, "unknown");
   1102 }
   1103 
   1104 /*
   1105  * Called on bound CPU to handle intr_disestablish().
   1106  *
   1107  * => caller (on initiating CPU) holds cpu_lock on our behalf
   1108  * => arg1: struct intrhand *ih
   1109  * => arg2: unused
   1110  */
   1111 static void
   1112 intr_disestablish_xcall(void *arg1, void *arg2)
   1113 {
   1114 	struct intrhand **p, *q;
   1115 	struct cpu_info *ci;
   1116 	struct pic *pic;
   1117 	struct intrsource *source;
   1118 	struct intrhand *ih;
   1119 	u_long psl;
   1120 	int idtvec;
   1121 
   1122 	ih = arg1;
   1123 	ci = ih->ih_cpu;
   1124 
   1125 	KASSERT(ci == curcpu() || !mp_online);
   1126 
   1127 	/* Disable interrupts locally. */
   1128 	psl = x86_read_psl();
   1129 	x86_disable_intr();
   1130 
   1131 	pic = ci->ci_isources[ih->ih_slot]->is_pic;
   1132 	source = ci->ci_isources[ih->ih_slot];
   1133 	idtvec = source->is_idtvec;
   1134 
   1135 	(*pic->pic_hwmask)(pic, ih->ih_pin);
   1136 	atomic_and_32(&ci->ci_ipending, ~(1 << ih->ih_slot));
   1137 
   1138 	/*
   1139 	 * Remove the handler from the chain.
   1140 	 */
   1141 	for (p = &source->is_handlers; (q = *p) != NULL && q != ih;
   1142 	     p = &q->ih_next)
   1143 		;
   1144 	if (q == NULL) {
   1145 		x86_write_psl(psl);
   1146 		panic("%s: handler not registered", __func__);
   1147 		/* NOTREACHED */
   1148 	}
   1149 
   1150 	*p = q->ih_next;
   1151 
   1152 	intr_calculatemasks(ci);
   1153 	/*
   1154 	 * If there is no any handler, 1) do delroute because it has no
   1155 	 * any source and 2) dont' hwunmask to prevent spurious interrupt.
   1156 	 *
   1157 	 * If there is any handler, 1) don't delroute because it has source
   1158 	 * and 2) do hwunmask to be able to get interrupt again.
   1159 	 *
   1160 	 */
   1161 	if (source->is_handlers == NULL)
   1162 		(*pic->pic_delroute)(pic, ci, ih->ih_pin, idtvec, source->is_type);
   1163 	else
   1164 		(*pic->pic_hwunmask)(pic, ih->ih_pin);
   1165 
   1166 	/* Re-enable interrupts. */
   1167 	x86_write_psl(psl);
   1168 
   1169 	/* If the source is free we can drop it now. */
   1170 	intr_source_free(ci, ih->ih_slot, pic, idtvec);
   1171 
   1172 #ifdef INTRDEBUG
   1173 	printf("%s: remove slot %d (pic %s pin %d vec %d)\n",
   1174 	    device_xname(ci->ci_dev), ih->ih_slot, pic->pic_name,
   1175 	    ih->ih_pin, idtvec);
   1176 #endif
   1177 }
   1178 
   1179 static int
   1180 intr_num_handlers(struct intrsource *isp)
   1181 {
   1182 	struct intrhand *ih;
   1183 	int num;
   1184 
   1185 	num = 0;
   1186 	for (ih = isp->is_handlers; ih != NULL; ih = ih->ih_next)
   1187 		num++;
   1188 
   1189 	return num;
   1190 }
   1191 
   1192 /*
   1193  * Deregister an interrupt handler.
   1194  */
   1195 void
   1196 intr_disestablish(struct intrhand *ih)
   1197 {
   1198 	struct cpu_info *ci;
   1199 	struct intrsource *isp;
   1200 	uint64_t where;
   1201 
   1202 	/*
   1203 	 * Count the removal for load balancing.
   1204 	 * Call out to the remote CPU to update its interrupt state.
   1205 	 * Only make RPCs if the APs are up and running.
   1206 	 */
   1207 	mutex_enter(&cpu_lock);
   1208 	ci = ih->ih_cpu;
   1209 	(ci->ci_nintrhand)--;
   1210 	KASSERT(ci->ci_nintrhand >= 0);
   1211 	isp = ci->ci_isources[ih->ih_slot];
   1212 	if (ci == curcpu() || !mp_online) {
   1213 		intr_disestablish_xcall(ih, NULL);
   1214 	} else {
   1215 		where = xc_unicast(0, intr_disestablish_xcall, ih, NULL, ci);
   1216 		xc_wait(where);
   1217 	}
   1218 	if (!msipic_is_msi_pic(isp->is_pic) && intr_num_handlers(isp) < 1) {
   1219 		intr_free_io_intrsource_direct(isp);
   1220 	}
   1221 	mutex_exit(&cpu_lock);
   1222 	kmem_free(ih, sizeof(*ih));
   1223 }
   1224 
   1225 static const char *
   1226 legacy_intr_string(int ih, char *buf, size_t len, struct pic *pic)
   1227 {
   1228 	int legacy_irq;
   1229 
   1230 	KASSERT(pic->pic_type == PIC_I8259);
   1231 	KASSERT(APIC_IRQ_ISLEGACY(ih));
   1232 
   1233 	legacy_irq = APIC_IRQ_LEGACY_IRQ(ih);
   1234 	KASSERT(legacy_irq >= 0 && legacy_irq < 16);
   1235 
   1236 	snprintf(buf, len, "%s pin %d", pic->pic_name, legacy_irq);
   1237 
   1238 	return buf;
   1239 }
   1240 
   1241 const char *
   1242 intr_string(intr_handle_t ih, char *buf, size_t len)
   1243 {
   1244 #if NIOAPIC > 0
   1245 	struct ioapic_softc *pic;
   1246 #endif
   1247 
   1248 	if (ih == 0)
   1249 		panic("%s: bogus handle 0x%" PRIx64, __func__, ih);
   1250 
   1251 #if NIOAPIC > 0
   1252 	if (ih & APIC_INT_VIA_APIC) {
   1253 		pic = ioapic_find(APIC_IRQ_APIC(ih));
   1254 		if (pic != NULL) {
   1255 			snprintf(buf, len, "%s pin %d",
   1256 			    device_xname(pic->sc_dev), APIC_IRQ_PIN(ih));
   1257 		} else {
   1258 			snprintf(buf, len,
   1259 			    "apic %d int %d (irq %d)",
   1260 			    APIC_IRQ_APIC(ih),
   1261 			    APIC_IRQ_PIN(ih),
   1262 			    APIC_IRQ_LEGACY_IRQ(ih));
   1263 		}
   1264 	} else
   1265 		snprintf(buf, len, "irq %d", APIC_IRQ_LEGACY_IRQ(ih));
   1266 #else
   1267 
   1268 	snprintf(buf, len, "irq %d" APIC_IRQ_LEGACY_IRQ(ih));
   1269 #endif
   1270 	return buf;
   1271 
   1272 }
   1273 
   1274 /*
   1275  * Fake interrupt handler structures for the benefit of symmetry with
   1276  * other interrupt sources, and the benefit of intr_calculatemasks()
   1277  */
   1278 struct intrhand fake_softclock_intrhand;
   1279 struct intrhand fake_softnet_intrhand;
   1280 struct intrhand fake_softserial_intrhand;
   1281 struct intrhand fake_softbio_intrhand;
   1282 struct intrhand fake_timer_intrhand;
   1283 struct intrhand fake_ipi_intrhand;
   1284 struct intrhand fake_preempt_intrhand;
   1285 
   1286 #if NLAPIC > 0 && defined(MULTIPROCESSOR)
   1287 static const char *x86_ipi_names[X86_NIPI] = X86_IPI_NAMES;
   1288 #endif
   1289 
   1290 static inline bool
   1291 redzone_const_or_false(bool x)
   1292 {
   1293 #ifdef DIAGNOSTIC
   1294 	return x;
   1295 #else
   1296 	return false;
   1297 #endif /* !DIAGNOSTIC */
   1298 }
   1299 
   1300 static inline int
   1301 redzone_const_or_zero(int x)
   1302 {
   1303 	return redzone_const_or_false(true) ? x : 0;
   1304 }
   1305 
   1306 /*
   1307  * Initialize all handlers that aren't dynamically allocated, and exist
   1308  * for each CPU.
   1309  */
   1310 void
   1311 cpu_intr_init(struct cpu_info *ci)
   1312 {
   1313 	struct intrsource *isp;
   1314 #if NLAPIC > 0 && defined(MULTIPROCESSOR)
   1315 	int i;
   1316 	static int first = 1;
   1317 #endif
   1318 #ifdef INTRSTACKSIZE
   1319 	vaddr_t istack;
   1320 #endif
   1321 
   1322 #if NLAPIC > 0
   1323 	isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
   1324 	KASSERT(isp != NULL);
   1325 	isp->is_recurse = Xrecurse_lapic_ltimer;
   1326 	isp->is_resume = Xresume_lapic_ltimer;
   1327 	fake_timer_intrhand.ih_level = IPL_CLOCK;
   1328 	isp->is_handlers = &fake_timer_intrhand;
   1329 	isp->is_pic = &local_pic;
   1330 	ci->ci_isources[LIR_TIMER] = isp;
   1331 	evcnt_attach_dynamic(&isp->is_evcnt,
   1332 	    first ? EVCNT_TYPE_INTR : EVCNT_TYPE_MISC, NULL,
   1333 	    device_xname(ci->ci_dev), "timer");
   1334 	first = 0;
   1335 
   1336 #ifdef MULTIPROCESSOR
   1337 	isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
   1338 	KASSERT(isp != NULL);
   1339 	isp->is_recurse = Xrecurse_lapic_ipi;
   1340 	isp->is_resume = Xresume_lapic_ipi;
   1341 	fake_ipi_intrhand.ih_level = IPL_HIGH;
   1342 	isp->is_handlers = &fake_ipi_intrhand;
   1343 	isp->is_pic = &local_pic;
   1344 	ci->ci_isources[LIR_IPI] = isp;
   1345 
   1346 	for (i = 0; i < X86_NIPI; i++)
   1347 		evcnt_attach_dynamic(&ci->ci_ipi_events[i], EVCNT_TYPE_MISC,
   1348 		    NULL, device_xname(ci->ci_dev), x86_ipi_names[i]);
   1349 #endif
   1350 #endif
   1351 
   1352 	isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
   1353 	KASSERT(isp != NULL);
   1354 	isp->is_recurse = Xpreemptrecurse;
   1355 	isp->is_resume = Xpreemptresume;
   1356 	fake_preempt_intrhand.ih_level = IPL_PREEMPT;
   1357 	isp->is_handlers = &fake_preempt_intrhand;
   1358 	isp->is_pic = &softintr_pic;
   1359 	ci->ci_isources[SIR_PREEMPT] = isp;
   1360 
   1361 	intr_calculatemasks(ci);
   1362 
   1363 #if defined(INTRSTACKSIZE)
   1364 	/*
   1365 	 * If the red zone is activated, protect both the top and
   1366 	 * the bottom of the stack with an unmapped page.
   1367 	 */
   1368 	istack = uvm_km_alloc(kernel_map,
   1369 	    INTRSTACKSIZE + redzone_const_or_zero(2 * PAGE_SIZE), 0,
   1370 	    UVM_KMF_WIRED);
   1371 	if (redzone_const_or_false(true)) {
   1372 		pmap_kremove(istack, PAGE_SIZE);
   1373 		pmap_kremove(istack + INTRSTACKSIZE + PAGE_SIZE, PAGE_SIZE);
   1374 		pmap_update(pmap_kernel());
   1375 	}
   1376 	/* 33 used to be 1.  Arbitrarily reserve 32 more register_t's
   1377 	 * of space for ddb(4) to examine some subroutine arguments
   1378 	 * and to hunt for the next stack frame.
   1379 	 */
   1380 	ci->ci_intrstack = (char *)istack + redzone_const_or_zero(PAGE_SIZE) +
   1381 	    INTRSTACKSIZE - 33 * sizeof(register_t);
   1382 #if defined(__x86_64__)
   1383 	ci->ci_tss.tss_ist[0] = (uintptr_t)ci->ci_intrstack & ~0xf;
   1384 #endif /* defined(__x86_64__) */
   1385 #endif /* defined(INTRSTACKSIZE) */
   1386 	ci->ci_idepth = -1;
   1387 }
   1388 
   1389 #if defined(INTRDEBUG) || defined(DDB)
   1390 
   1391 void
   1392 intr_printconfig(void)
   1393 {
   1394 	int i;
   1395 	struct intrhand *ih;
   1396 	struct intrsource *isp;
   1397 	struct cpu_info *ci;
   1398 	CPU_INFO_ITERATOR cii;
   1399 	void (*pr)(const char *, ...);
   1400 
   1401 	pr = printf;
   1402 #ifdef DDB
   1403 	extern int db_active;
   1404 	if (db_active) {
   1405 		pr = db_printf;
   1406 	}
   1407 #endif
   1408 
   1409 	for (CPU_INFO_FOREACH(cii, ci)) {
   1410 		(*pr)("%s: interrupt masks:\n", device_xname(ci->ci_dev));
   1411 		for (i = 0; i < NIPL; i++)
   1412 			(*pr)("IPL %d mask %08lx unmask %08lx\n", i,
   1413 			    (u_long)ci->ci_imask[i], (u_long)ci->ci_iunmask[i]);
   1414 		for (i = 0; i < MAX_INTR_SOURCES; i++) {
   1415 			isp = ci->ci_isources[i];
   1416 			if (isp == NULL)
   1417 				continue;
   1418 			(*pr)("%s source %d is pin %d from pic %s type %d maxlevel %d\n",
   1419 			    device_xname(ci->ci_dev), i, isp->is_pin,
   1420 			    isp->is_pic->pic_name, isp->is_type, isp->is_maxlevel);
   1421 			for (ih = isp->is_handlers; ih != NULL;
   1422 			     ih = ih->ih_next)
   1423 				(*pr)("\thandler %p level %d\n",
   1424 				    ih->ih_fun, ih->ih_level);
   1425 #if NIOAPIC > 0
   1426 			if (isp->is_pic->pic_type == PIC_IOAPIC) {
   1427 				struct ioapic_softc *sc;
   1428 				sc = isp->is_pic->pic_ioapic;
   1429 				(*pr)("\tioapic redir 0x%x\n",
   1430 				    sc->sc_pins[isp->is_pin].ip_map->redir);
   1431 			}
   1432 #endif
   1433 
   1434 		}
   1435 	}
   1436 }
   1437 
   1438 #endif
   1439 
   1440 void
   1441 softint_init_md(lwp_t *l, u_int level, uintptr_t *machdep)
   1442 {
   1443 	struct intrsource *isp;
   1444 	struct cpu_info *ci;
   1445 	u_int sir;
   1446 
   1447 	ci = l->l_cpu;
   1448 
   1449 	isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
   1450 	KASSERT(isp != NULL);
   1451 	isp->is_recurse = Xsoftintr;
   1452 	isp->is_resume = Xsoftintr;
   1453 	isp->is_pic = &softintr_pic;
   1454 
   1455 	switch (level) {
   1456 	case SOFTINT_BIO:
   1457 		sir = SIR_BIO;
   1458 		fake_softbio_intrhand.ih_level = IPL_SOFTBIO;
   1459 		isp->is_handlers = &fake_softbio_intrhand;
   1460 		break;
   1461 	case SOFTINT_NET:
   1462 		sir = SIR_NET;
   1463 		fake_softnet_intrhand.ih_level = IPL_SOFTNET;
   1464 		isp->is_handlers = &fake_softnet_intrhand;
   1465 		break;
   1466 	case SOFTINT_SERIAL:
   1467 		sir = SIR_SERIAL;
   1468 		fake_softserial_intrhand.ih_level = IPL_SOFTSERIAL;
   1469 		isp->is_handlers = &fake_softserial_intrhand;
   1470 		break;
   1471 	case SOFTINT_CLOCK:
   1472 		sir = SIR_CLOCK;
   1473 		fake_softclock_intrhand.ih_level = IPL_SOFTCLOCK;
   1474 		isp->is_handlers = &fake_softclock_intrhand;
   1475 		break;
   1476 	default:
   1477 		panic("softint_init_md");
   1478 	}
   1479 
   1480 	KASSERT(ci->ci_isources[sir] == NULL);
   1481 
   1482 	*machdep = (1 << sir);
   1483 	ci->ci_isources[sir] = isp;
   1484 	ci->ci_isources[sir]->is_lwp = l;
   1485 
   1486 	intr_calculatemasks(ci);
   1487 }
   1488 
   1489 /*
   1490  * Save current affinitied cpu's interrupt count.
   1491  */
   1492 static void
   1493 intr_save_evcnt(struct intrsource *source, cpuid_t cpuid)
   1494 {
   1495 	struct percpu_evcnt *pep;
   1496 	uint64_t curcnt;
   1497 	int i;
   1498 
   1499 	curcnt = source->is_evcnt.ev_count;
   1500 	pep = source->is_saved_evcnt;
   1501 
   1502 	for (i = 0; i < ncpu; i++) {
   1503 		if (pep[i].cpuid == cpuid) {
   1504 			pep[i].count = curcnt;
   1505 			break;
   1506 		}
   1507 	}
   1508 }
   1509 
   1510 /*
   1511  * Restore current affinitied cpu's interrupt count.
   1512  */
   1513 static void
   1514 intr_restore_evcnt(struct intrsource *source, cpuid_t cpuid)
   1515 {
   1516 	struct percpu_evcnt *pep;
   1517 	int i;
   1518 
   1519 	pep = source->is_saved_evcnt;
   1520 
   1521 	for (i = 0; i < ncpu; i++) {
   1522 		if (pep[i].cpuid == cpuid) {
   1523 			source->is_evcnt.ev_count = pep[i].count;
   1524 			break;
   1525 		}
   1526 	}
   1527 }
   1528 
   1529 static void
   1530 intr_redistribute_xc_t(void *arg1, void *arg2)
   1531 {
   1532 	struct cpu_info *ci;
   1533 	struct intrsource *isp;
   1534 	int slot;
   1535 	u_long psl;
   1536 
   1537 	ci = curcpu();
   1538 	isp = arg1;
   1539 	slot = (int)(intptr_t)arg2;
   1540 
   1541 	/* Disable interrupts locally. */
   1542 	psl = x86_read_psl();
   1543 	x86_disable_intr();
   1544 
   1545 	/* Hook it in and re-calculate masks. */
   1546 	ci->ci_isources[slot] = isp;
   1547 	intr_calculatemasks(curcpu());
   1548 
   1549 	/* Re-enable interrupts locally. */
   1550 	x86_write_psl(psl);
   1551 }
   1552 
   1553 static void
   1554 intr_redistribute_xc_s1(void *arg1, void *arg2)
   1555 {
   1556 	struct pic *pic;
   1557 	struct intrsource *isp;
   1558 	struct cpu_info *nci;
   1559 	u_long psl;
   1560 
   1561 	isp = arg1;
   1562 	nci = arg2;
   1563 
   1564 	/*
   1565 	 * Disable interrupts on-chip and mask the pin.  Back out
   1566 	 * and let the interrupt be processed if one is pending.
   1567 	 */
   1568 	pic = isp->is_pic;
   1569 	for (;;) {
   1570 		psl = x86_read_psl();
   1571 		x86_disable_intr();
   1572 		if ((*pic->pic_trymask)(pic, isp->is_pin)) {
   1573 			break;
   1574 		}
   1575 		x86_write_psl(psl);
   1576 		DELAY(1000);
   1577 	}
   1578 
   1579 	/* pic_addroute will unmask the interrupt. */
   1580 	(*pic->pic_addroute)(pic, nci, isp->is_pin, isp->is_idtvec,
   1581 	    isp->is_type);
   1582 	x86_write_psl(psl);
   1583 }
   1584 
   1585 static void
   1586 intr_redistribute_xc_s2(void *arg1, void *arg2)
   1587 {
   1588 	struct cpu_info *ci;
   1589 	u_long psl;
   1590 	int slot;
   1591 
   1592 	ci = curcpu();
   1593 	slot = (int)(uintptr_t)arg1;
   1594 
   1595 	/* Disable interrupts locally. */
   1596 	psl = x86_read_psl();
   1597 	x86_disable_intr();
   1598 
   1599 	/* Patch out the source and re-calculate masks. */
   1600 	ci->ci_isources[slot] = NULL;
   1601 	intr_calculatemasks(ci);
   1602 
   1603 	/* Re-enable interrupts locally. */
   1604 	x86_write_psl(psl);
   1605 }
   1606 
   1607 static bool
   1608 intr_redistribute(struct cpu_info *oci)
   1609 {
   1610 	struct intrsource *isp;
   1611 	struct intrhand *ih;
   1612 	CPU_INFO_ITERATOR cii;
   1613 	struct cpu_info *nci, *ici;
   1614 	int oslot, nslot;
   1615 	uint64_t where;
   1616 
   1617 	KASSERT(mutex_owned(&cpu_lock));
   1618 
   1619 	/* Look for an interrupt source that we can migrate. */
   1620 	for (oslot = 0; oslot < MAX_INTR_SOURCES; oslot++) {
   1621 		if ((isp = oci->ci_isources[oslot]) == NULL) {
   1622 			continue;
   1623 		}
   1624 		if (isp->is_pic->pic_type == PIC_IOAPIC) {
   1625 			break;
   1626 		}
   1627 	}
   1628 	if (oslot == MAX_INTR_SOURCES) {
   1629 		return false;
   1630 	}
   1631 
   1632 	/* Find least loaded CPU and try to move there. */
   1633 	nci = NULL;
   1634 	for (CPU_INFO_FOREACH(cii, ici)) {
   1635 		if ((ici->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) {
   1636 			continue;
   1637 		}
   1638 		KASSERT(ici != oci);
   1639 		if (nci == NULL || nci->ci_nintrhand > ici->ci_nintrhand) {
   1640 			nci = ici;
   1641 		}
   1642 	}
   1643 	if (nci == NULL) {
   1644 		return false;
   1645 	}
   1646 	for (nslot = 0; nslot < MAX_INTR_SOURCES; nslot++) {
   1647 		if (nci->ci_isources[nslot] == NULL) {
   1648 			break;
   1649 		}
   1650 	}
   1651 
   1652 	/* If that did not work, allocate anywhere. */
   1653 	if (nslot == MAX_INTR_SOURCES) {
   1654 		for (CPU_INFO_FOREACH(cii, nci)) {
   1655 			if ((nci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) {
   1656 				continue;
   1657 			}
   1658 			KASSERT(nci != oci);
   1659 			for (nslot = 0; nslot < MAX_INTR_SOURCES; nslot++) {
   1660 				if (nci->ci_isources[nslot] == NULL) {
   1661 					break;
   1662 				}
   1663 			}
   1664 			if (nslot != MAX_INTR_SOURCES) {
   1665 				break;
   1666 			}
   1667 		}
   1668 	}
   1669 	if (nslot == MAX_INTR_SOURCES) {
   1670 		return false;
   1671 	}
   1672 
   1673 	/*
   1674 	 * Now we have new CPU and new slot.  Run a cross-call to set up
   1675 	 * the new vector on the target CPU.
   1676 	 */
   1677 	where = xc_unicast(0, intr_redistribute_xc_t, isp,
   1678 	    (void *)(intptr_t)nslot, nci);
   1679 	xc_wait(where);
   1680 
   1681 	/*
   1682 	 * We're ready to go on the target CPU.  Run a cross call to
   1683 	 * reroute the interrupt away from the source CPU.
   1684 	 */
   1685 	where = xc_unicast(0, intr_redistribute_xc_s1, isp, nci, oci);
   1686 	xc_wait(where);
   1687 
   1688 	/* Sleep for (at least) 10ms to allow the change to take hold. */
   1689 	(void)kpause("intrdist", false, mstohz(10), NULL);
   1690 
   1691 	/* Complete removal from the source CPU. */
   1692 	where = xc_unicast(0, intr_redistribute_xc_s2,
   1693 	    (void *)(uintptr_t)oslot, NULL, oci);
   1694 	xc_wait(where);
   1695 
   1696 	/* Finally, take care of book-keeping. */
   1697 	for (ih = isp->is_handlers; ih != NULL; ih = ih->ih_next) {
   1698 		oci->ci_nintrhand--;
   1699 		nci->ci_nintrhand++;
   1700 		ih->ih_cpu = nci;
   1701 	}
   1702 	intr_save_evcnt(isp, oci->ci_cpuid);
   1703 	intr_restore_evcnt(isp, nci->ci_cpuid);
   1704 	isp->is_active_cpu = nci->ci_cpuid;
   1705 
   1706 	return true;
   1707 }
   1708 
   1709 void
   1710 cpu_intr_redistribute(void)
   1711 {
   1712 	CPU_INFO_ITERATOR cii;
   1713 	struct cpu_info *ci;
   1714 
   1715 	KASSERT(mutex_owned(&cpu_lock));
   1716 	KASSERT(mp_online);
   1717 
   1718 	/* Direct interrupts away from shielded CPUs. */
   1719 	for (CPU_INFO_FOREACH(cii, ci)) {
   1720 		if ((ci->ci_schedstate.spc_flags & SPCF_NOINTR) == 0) {
   1721 			continue;
   1722 		}
   1723 		while (intr_redistribute(ci)) {
   1724 			/* nothing */
   1725 		}
   1726 	}
   1727 
   1728 	/* XXX should now re-balance */
   1729 }
   1730 
   1731 u_int
   1732 cpu_intr_count(struct cpu_info *ci)
   1733 {
   1734 
   1735 	KASSERT(ci->ci_nintrhand >= 0);
   1736 
   1737 	return ci->ci_nintrhand;
   1738 }
   1739 
   1740 static int
   1741 intr_find_unused_slot(struct cpu_info *ci, int *index)
   1742 {
   1743 	int slot, i;
   1744 
   1745 	KASSERT(mutex_owned(&cpu_lock));
   1746 
   1747 	slot = -1;
   1748 	for (i = 0; i < MAX_INTR_SOURCES ; i++) {
   1749 		if (ci->ci_isources[i] == NULL) {
   1750 			slot = i;
   1751 			break;
   1752 		}
   1753 	}
   1754 	if (slot == -1) {
   1755 		DPRINTF(("cannot allocate ci_isources\n"));
   1756 		return EBUSY;
   1757 	}
   1758 
   1759 	*index = slot;
   1760 	return 0;
   1761 }
   1762 
   1763 /*
   1764  * Let cpu_info ready to accept the interrupt.
   1765  */
   1766 static void
   1767 intr_activate_xcall(void *arg1, void *arg2)
   1768 {
   1769 	struct cpu_info *ci;
   1770 	struct intrsource *source;
   1771 	struct intrstub *stubp;
   1772 	struct intrhand *ih;
   1773 	u_long psl;
   1774 	int idt_vec;
   1775 	int slot;
   1776 
   1777 	ih = arg1;
   1778 
   1779 	kpreempt_disable();
   1780 
   1781 	KASSERT(ih->ih_cpu == curcpu() || !mp_online);
   1782 
   1783 	ci = ih->ih_cpu;
   1784 	slot = ih->ih_slot;
   1785 	source = ci->ci_isources[slot];
   1786 	idt_vec = source->is_idtvec;
   1787 
   1788 	psl = x86_read_psl();
   1789 	x86_disable_intr();
   1790 
   1791 	intr_calculatemasks(ci);
   1792 
   1793 	if (source->is_type == IST_LEVEL) {
   1794 		stubp = &source->is_pic->pic_level_stubs[slot];
   1795 	} else {
   1796 		stubp = &source->is_pic->pic_edge_stubs[slot];
   1797 	}
   1798 	source->is_resume = stubp->ist_resume;
   1799 	source->is_recurse = stubp->ist_recurse;
   1800 	setgate(&idt[idt_vec], stubp->ist_entry, 0, SDT_SYS386IGT,
   1801 	    SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
   1802 
   1803 	x86_write_psl(psl);
   1804 
   1805 	kpreempt_enable();
   1806 }
   1807 
   1808 /*
   1809  * Let cpu_info not accept the interrupt.
   1810  */
   1811 static void
   1812 intr_deactivate_xcall(void *arg1, void *arg2)
   1813 {
   1814 	struct cpu_info *ci;
   1815 	struct intrhand *ih, *lih;
   1816 	u_long psl;
   1817 	int slot;
   1818 
   1819 	ih = arg1;
   1820 
   1821 	kpreempt_disable();
   1822 
   1823 	KASSERT(ih->ih_cpu == curcpu() || !mp_online);
   1824 
   1825 	ci = ih->ih_cpu;
   1826 	slot = ih->ih_slot;
   1827 
   1828 	psl = x86_read_psl();
   1829 	x86_disable_intr();
   1830 
   1831 	/* Move all devices sharing IRQ number. */
   1832 	ci->ci_isources[slot] = NULL;
   1833 	for (lih = ih; lih != NULL; lih = lih->ih_next) {
   1834 		ci->ci_nintrhand--;
   1835 	}
   1836 
   1837 	intr_calculatemasks(ci);
   1838 
   1839 	/*
   1840 	 * Skip unsetgate(), because the same itd[] entry is overwritten in
   1841 	 * intr_activate_xcall().
   1842 	 */
   1843 
   1844 	x86_write_psl(psl);
   1845 
   1846 	kpreempt_enable();
   1847 }
   1848 
   1849 static void
   1850 intr_get_affinity(struct intrsource *isp, kcpuset_t *cpuset)
   1851 {
   1852 	struct cpu_info *ci;
   1853 
   1854 	KASSERT(mutex_owned(&cpu_lock));
   1855 
   1856 	if (isp == NULL) {
   1857 		kcpuset_zero(cpuset);
   1858 		return;
   1859 	}
   1860 
   1861 	ci = isp->is_handlers->ih_cpu;
   1862 	if (ci == NULL) {
   1863 		kcpuset_zero(cpuset);
   1864 		return;
   1865 	}
   1866 
   1867 	kcpuset_set(cpuset, cpu_index(ci));
   1868 	return;
   1869 }
   1870 
   1871 static int
   1872 intr_set_affinity(struct intrsource *isp, const kcpuset_t *cpuset)
   1873 {
   1874 	struct cpu_info *oldci, *newci;
   1875 	struct intrhand *ih, *lih;
   1876 	struct pic *pic;
   1877 	u_int cpu_idx;
   1878 	int idt_vec;
   1879 	int oldslot, newslot;
   1880 	int err;
   1881 	int pin;
   1882 
   1883 	KASSERT(mutex_owned(&intr_distribute_lock));
   1884 	KASSERT(mutex_owned(&cpu_lock));
   1885 
   1886 	/* XXX
   1887 	 * logical destination mode is not supported, use lowest index cpu.
   1888 	 */
   1889 	cpu_idx = kcpuset_ffs(cpuset) - 1;
   1890 	newci = cpu_lookup(cpu_idx);
   1891 	if (newci == NULL) {
   1892 		DPRINTF(("invalid cpu index: %u\n", cpu_idx));
   1893 		return EINVAL;
   1894 	}
   1895 	if ((newci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) {
   1896 		DPRINTF(("the cpu is set nointr shield. index:%u\n", cpu_idx));
   1897 		return EINVAL;
   1898 	}
   1899 
   1900 	if (isp == NULL) {
   1901 		DPRINTF(("invalid intrctl handler\n"));
   1902 		return EINVAL;
   1903 	}
   1904 
   1905 	/* i8259_pic supports only primary cpu, see i8259.c. */
   1906 	pic = isp->is_pic;
   1907 	if (pic == &i8259_pic) {
   1908 		DPRINTF(("i8259 pic does not support set_affinity\n"));
   1909 		return ENOTSUP;
   1910 	}
   1911 
   1912 	ih = isp->is_handlers;
   1913 	oldci = ih->ih_cpu;
   1914 	if (newci == oldci) /* nothing to do */
   1915 		return 0;
   1916 
   1917 	oldslot = ih->ih_slot;
   1918 	idt_vec = isp->is_idtvec;
   1919 
   1920 	err = intr_find_unused_slot(newci, &newslot);
   1921 	if (err) {
   1922 		DPRINTF(("failed to allocate interrupt slot for PIC %s intrid %s\n",
   1923 			isp->is_pic->pic_name, isp->is_intrid));
   1924 		return err;
   1925 	}
   1926 
   1927 	pin = isp->is_pin;
   1928 	(*pic->pic_hwmask)(pic, pin); /* for ci_ipending check */
   1929 	while(oldci->ci_ipending & (1 << oldslot))
   1930 		(void)kpause("intrdist", false, 1, &cpu_lock);
   1931 
   1932 	kpreempt_disable();
   1933 
   1934 	/* deactivate old interrupt setting */
   1935 	if (oldci == curcpu() || !mp_online) {
   1936 		intr_deactivate_xcall(ih, NULL);
   1937 	} else {
   1938 		uint64_t where;
   1939 		where = xc_unicast(0, intr_deactivate_xcall, ih,
   1940 				   NULL, oldci);
   1941 		xc_wait(where);
   1942 	}
   1943 	intr_save_evcnt(isp, oldci->ci_cpuid);
   1944 	(*pic->pic_delroute)(pic, oldci, pin, idt_vec, isp->is_type);
   1945 
   1946 	/* activate new interrupt setting */
   1947 	newci->ci_isources[newslot] = isp;
   1948 	for (lih = ih; lih != NULL; lih = lih->ih_next) {
   1949 		newci->ci_nintrhand++;
   1950 		lih->ih_cpu = newci;
   1951 		lih->ih_slot = newslot;
   1952 	}
   1953 	if (newci == curcpu() || !mp_online) {
   1954 		intr_activate_xcall(ih, NULL);
   1955 	} else {
   1956 		uint64_t where;
   1957 		where = xc_unicast(0, intr_activate_xcall, ih,
   1958 				   NULL, newci);
   1959 		xc_wait(where);
   1960 	}
   1961 	intr_restore_evcnt(isp, newci->ci_cpuid);
   1962 	isp->is_active_cpu = newci->ci_cpuid;
   1963 	(*pic->pic_addroute)(pic, newci, pin, idt_vec, isp->is_type);
   1964 
   1965 	kpreempt_enable();
   1966 
   1967 	(*pic->pic_hwunmask)(pic, pin);
   1968 
   1969 	return err;
   1970 }
   1971 
   1972 static bool
   1973 intr_is_affinity_intrsource(struct intrsource *isp, const kcpuset_t *cpuset)
   1974 {
   1975 	struct cpu_info *ci;
   1976 
   1977 	KASSERT(mutex_owned(&cpu_lock));
   1978 
   1979 	ci = isp->is_handlers->ih_cpu;
   1980 	KASSERT(ci != NULL);
   1981 
   1982 	return kcpuset_isset(cpuset, cpu_index(ci));
   1983 }
   1984 
   1985 static struct intrhand *
   1986 intr_get_handler(const char *intrid)
   1987 {
   1988 	struct intrsource *isp;
   1989 
   1990 	KASSERT(mutex_owned(&cpu_lock));
   1991 
   1992 	isp = intr_get_io_intrsource(intrid);
   1993 	if (isp == NULL)
   1994 		return NULL;
   1995 
   1996 	return isp->is_handlers;
   1997 }
   1998 
   1999 /*
   2000  * MI interface for subr_interrupt.c
   2001  */
   2002 uint64_t
   2003 interrupt_get_count(const char *intrid, u_int cpu_idx)
   2004 {
   2005 	struct cpu_info *ci;
   2006 	struct intrsource *isp;
   2007 	struct intrhand *ih;
   2008 	struct percpu_evcnt pep;
   2009 	cpuid_t cpuid;
   2010 	int i, slot;
   2011 	uint64_t count = 0;
   2012 
   2013 	ci = cpu_lookup(cpu_idx);
   2014 	cpuid = ci->ci_cpuid;
   2015 
   2016 	mutex_enter(&cpu_lock);
   2017 
   2018 	ih = intr_get_handler(intrid);
   2019 	if (ih == NULL) {
   2020 		count = 0;
   2021 		goto out;
   2022 	}
   2023 	slot = ih->ih_slot;
   2024 	isp = ih->ih_cpu->ci_isources[slot];
   2025 
   2026 	for (i = 0; i < ncpu; i++) {
   2027 		pep = isp->is_saved_evcnt[i];
   2028 		if (cpuid == pep.cpuid) {
   2029 			if (isp->is_active_cpu == pep.cpuid) {
   2030 				count = isp->is_evcnt.ev_count;
   2031 				goto out;
   2032 			} else {
   2033 				count = pep.count;
   2034 				goto out;
   2035 			}
   2036 		}
   2037 	}
   2038 
   2039  out:
   2040 	mutex_exit(&cpu_lock);
   2041 	return count;
   2042 }
   2043 
   2044 /*
   2045  * MI interface for subr_interrupt.c
   2046  */
   2047 void
   2048 interrupt_get_assigned(const char *intrid, kcpuset_t *cpuset)
   2049 {
   2050 	struct cpu_info *ci;
   2051 	struct intrhand *ih;
   2052 
   2053 	kcpuset_zero(cpuset);
   2054 
   2055 	mutex_enter(&cpu_lock);
   2056 
   2057 	ih = intr_get_handler(intrid);
   2058 	if (ih == NULL)
   2059 		goto out;
   2060 
   2061 	ci = ih->ih_cpu;
   2062 	kcpuset_set(cpuset, cpu_index(ci));
   2063 
   2064  out:
   2065 	mutex_exit(&cpu_lock);
   2066 }
   2067 
   2068 /*
   2069  * MI interface for subr_interrupt.c
   2070  */
   2071 void
   2072 interrupt_get_available(kcpuset_t *cpuset)
   2073 {
   2074 	CPU_INFO_ITERATOR cii;
   2075 	struct cpu_info *ci;
   2076 
   2077 	kcpuset_zero(cpuset);
   2078 
   2079 	mutex_enter(&cpu_lock);
   2080 	for (CPU_INFO_FOREACH(cii, ci)) {
   2081 		if ((ci->ci_schedstate.spc_flags & SPCF_NOINTR) == 0) {
   2082 			kcpuset_set(cpuset, cpu_index(ci));
   2083 		}
   2084 	}
   2085 	mutex_exit(&cpu_lock);
   2086 }
   2087 
   2088 /*
   2089  * MI interface for subr_interrupt.c
   2090  */
   2091 void
   2092 interrupt_get_devname(const char *intrid, char *buf, size_t len)
   2093 {
   2094 	struct intrsource *isp;
   2095 	struct intrhand *ih;
   2096 	int slot;
   2097 
   2098 	mutex_enter(&cpu_lock);
   2099 
   2100 	ih = intr_get_handler(intrid);
   2101 	if (ih == NULL) {
   2102 		buf[0] = '\0';
   2103 		goto out;
   2104 	}
   2105 	slot = ih->ih_slot;
   2106 	isp = ih->ih_cpu->ci_isources[slot];
   2107 	strlcpy(buf, isp->is_xname, len);
   2108 
   2109  out:
   2110 	mutex_exit(&cpu_lock);
   2111 }
   2112 
   2113 static int
   2114 intr_distribute_locked(struct intrhand *ih, const kcpuset_t *newset,
   2115     kcpuset_t *oldset)
   2116 {
   2117 	struct intrsource *isp;
   2118 	int slot;
   2119 
   2120 	KASSERT(mutex_owned(&intr_distribute_lock));
   2121 	KASSERT(mutex_owned(&cpu_lock));
   2122 
   2123 	if (ih == NULL)
   2124 		return EINVAL;
   2125 
   2126 	slot = ih->ih_slot;
   2127 	isp = ih->ih_cpu->ci_isources[slot];
   2128 	KASSERT(isp != NULL);
   2129 
   2130 	if (oldset != NULL)
   2131 		intr_get_affinity(isp, oldset);
   2132 
   2133 	return intr_set_affinity(isp, newset);
   2134 }
   2135 
   2136 /*
   2137  * MI interface for subr_interrupt.c
   2138  */
   2139 int
   2140 interrupt_distribute(void *cookie, const kcpuset_t *newset, kcpuset_t *oldset)
   2141 {
   2142 	int error;
   2143 	struct intrhand *ih = cookie;
   2144 
   2145 	mutex_enter(&intr_distribute_lock);
   2146 	mutex_enter(&cpu_lock);
   2147 	error = intr_distribute_locked(ih, newset, oldset);
   2148 	mutex_exit(&cpu_lock);
   2149 	mutex_exit(&intr_distribute_lock);
   2150 
   2151 	return error;
   2152 }
   2153 
   2154 /*
   2155  * MI interface for subr_interrupt.c
   2156  */
   2157 int
   2158 interrupt_distribute_handler(const char *intrid, const kcpuset_t *newset,
   2159     kcpuset_t *oldset)
   2160 {
   2161 	int error;
   2162 	struct intrhand *ih;
   2163 
   2164 	mutex_enter(&intr_distribute_lock);
   2165 	mutex_enter(&cpu_lock);
   2166 
   2167 	ih = intr_get_handler(intrid);
   2168 	if (ih == NULL) {
   2169 		error = ENOENT;
   2170 		goto out;
   2171 	}
   2172 	error = intr_distribute_locked(ih, newset, oldset);
   2173 
   2174  out:
   2175 	mutex_exit(&cpu_lock);
   2176 	mutex_exit(&intr_distribute_lock);
   2177 	return error;
   2178 }
   2179 
   2180 /*
   2181  * MI interface for subr_interrupt.c
   2182  */
   2183 struct intrids_handler *
   2184 interrupt_construct_intrids(const kcpuset_t *cpuset)
   2185 {
   2186 	struct intrsource *isp;
   2187 	struct intrids_handler *ii_handler;
   2188 	intrid_t *ids;
   2189 	int i, count;
   2190 
   2191 	if (kcpuset_iszero(cpuset))
   2192 		return 0;
   2193 
   2194 	/*
   2195 	 * Count the number of interrupts which affinity to any cpu of "cpuset".
   2196 	 */
   2197 	count = 0;
   2198 	mutex_enter(&cpu_lock);
   2199 	SIMPLEQ_FOREACH(isp, &io_interrupt_sources, is_list) {
   2200 		if (intr_is_affinity_intrsource(isp, cpuset))
   2201 			count++;
   2202 	}
   2203 	mutex_exit(&cpu_lock);
   2204 
   2205 	ii_handler = kmem_zalloc(sizeof(int) + sizeof(intrid_t) * count,
   2206 	    KM_SLEEP);
   2207 	if (ii_handler == NULL)
   2208 		return NULL;
   2209 	ii_handler->iih_nids = count;
   2210 	if (count == 0)
   2211 		return ii_handler;
   2212 
   2213 	ids = ii_handler->iih_intrids;
   2214 	i = 0;
   2215 	mutex_enter(&cpu_lock);
   2216 	SIMPLEQ_FOREACH(isp, &io_interrupt_sources, is_list) {
   2217 		/* Ignore devices attached after counting "count". */
   2218 		if (i >= count) {
   2219 			DPRINTF(("New devices are attached after counting.\n"));
   2220 			break;
   2221 		}
   2222 
   2223 		if (!intr_is_affinity_intrsource(isp, cpuset))
   2224 			continue;
   2225 
   2226 		strncpy(ids[i], isp->is_intrid, sizeof(intrid_t));
   2227 		i++;
   2228 	}
   2229 	mutex_exit(&cpu_lock);
   2230 
   2231 	return ii_handler;
   2232 }
   2233 
   2234 /*
   2235  * MI interface for subr_interrupt.c
   2236  */
   2237 void
   2238 interrupt_destruct_intrids(struct intrids_handler *ii_handler)
   2239 {
   2240 	size_t iih_size;
   2241 
   2242 	if (ii_handler == NULL)
   2243 		return;
   2244 
   2245 	iih_size = sizeof(int) + sizeof(intrid_t) * ii_handler->iih_nids;
   2246 	kmem_free(ii_handler, iih_size);
   2247 }
   2248