Home | History | Annotate | Line # | Download | only in xen
      1 /*      $NetBSD: xenevt.c,v 1.68 2022/09/22 16:21:34 bouyer Exp $      */
      2 
      3 /*
      4  * Copyright (c) 2005 Manuel Bouyer.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  *
     26  */
     27 
     28 #include <sys/cdefs.h>
     29 __KERNEL_RCSID(0, "$NetBSD: xenevt.c,v 1.68 2022/09/22 16:21:34 bouyer Exp $");
     30 
     31 #include "opt_xen.h"
     32 #include <sys/param.h>
     33 #include <sys/kernel.h>
     34 #include <sys/mutex.h>
     35 #include <sys/systm.h>
     36 #include <sys/device.h>
     37 #include <sys/file.h>
     38 #include <sys/filedesc.h>
     39 #include <sys/poll.h>
     40 #include <sys/select.h>
     41 #include <sys/proc.h>
     42 #include <sys/conf.h>
     43 #include <sys/intr.h>
     44 #include <sys/kmem.h>
     45 
     46 #include <uvm/uvm_extern.h>
     47 
     48 #include <xen/hypervisor.h>
     49 #include <xen/evtchn.h>
     50 #include <xen/intr.h>
     51 #ifdef XENPV
     52 #include <xen/xenpmap.h>
     53 #endif
     54 #include <xen/xenio.h>
     55 #include <xen/xenio3.h>
     56 #include <xen/xen.h>
     57 
     58 #include "ioconf.h"
     59 
     60 /*
     61  * Interface between the event channel and userland.
     62  * Each process with a xenevt device instance open can register events it
     63  * wants to receive. It will get pending events by read(), eventually blocking
     64  * until some event is available. Pending events are ack'd by a bitmask
     65  * write()en to the device. Some special operations (such as events binding)
     66  * are done though ioctl().
     67  * Processes get a device instance by opening a cloning device.
     68  */
     69 
     70 static int	xenevt_fread(struct file *, off_t *, struct uio *,
     71     kauth_cred_t, int);
     72 static int	xenevt_fwrite(struct file *, off_t *, struct uio *,
     73     kauth_cred_t, int);
     74 static int	xenevt_fioctl(struct file *, u_long, void *);
     75 static int	xenevt_fpoll(struct file *, int);
     76 static int	xenevt_fclose(struct file *);
     77 /* static int	xenevt_fkqfilter(struct file *, struct knote *); */
     78 
     79 static const struct fileops xenevt_fileops = {
     80 	.fo_name = "xenevt",
     81 	.fo_read = xenevt_fread,
     82 	.fo_write = xenevt_fwrite,
     83 	.fo_ioctl = xenevt_fioctl,
     84 	.fo_fcntl = fnullop_fcntl,
     85 	.fo_poll = xenevt_fpoll,
     86 	.fo_stat = fbadop_stat,
     87 	.fo_close = xenevt_fclose,
     88 	.fo_kqfilter = /* xenevt_fkqfilter */ fnullop_kqfilter,
     89 	.fo_restart = fnullop_restart,
     90 };
     91 
     92 dev_type_open(xenevtopen);
     93 dev_type_read(xenevtread);
     94 dev_type_mmap(xenevtmmap);
     95 const struct cdevsw xenevt_cdevsw = {
     96 	.d_open = xenevtopen,
     97 	.d_close = nullclose,
     98 	.d_read = xenevtread,
     99 	.d_write = nowrite,
    100 	.d_ioctl = noioctl,
    101 	.d_stop = nostop,
    102 	.d_tty = notty,
    103 	.d_poll = nopoll,
    104 	.d_mmap = xenevtmmap,
    105 	.d_kqfilter = nokqfilter,
    106 	.d_discard = nodiscard,
    107 	.d_flag = D_OTHER
    108 };
    109 
    110 /* minor numbers */
    111 #define DEV_EVT 0
    112 #define DEV_XSD 1
    113 
    114 /* per-instance datas */
    115 #define XENEVT_RING_SIZE 2048
    116 #define XENEVT_RING_MASK 2047
    117 
    118 #define BYTES_PER_PORT (sizeof(evtchn_port_t) / sizeof(uint8_t))
    119 
    120 struct xenevt_d {
    121 	kmutex_t lock;
    122 	kcondvar_t cv;
    123 	STAILQ_ENTRY(xenevt_d) pendingq;
    124 	bool pending;
    125 	evtchn_port_t ring[2048];
    126 	u_int ring_read; /* pointer of the reader */
    127 	u_int ring_write; /* pointer of the writer */
    128 	u_int flags;
    129 #define XENEVT_F_OVERFLOW 0x01 /* ring overflow */
    130 #define XENEVT_F_FREE 0x02 /* free entry */
    131 	struct selinfo sel; /* used by poll */
    132 	struct cpu_info *ci; /* preferred CPU for events for this device */
    133 };
    134 
    135 static struct intrhand *xenevt_ih;
    136 static evtchn_port_t xenevt_ev;
    137 
    138 /* event -> user device mapping */
    139 static struct xenevt_d *devevent[NR_EVENT_CHANNELS];
    140 
    141 /* pending events */
    142 static void *devevent_sih;
    143 static kmutex_t devevent_lock;
    144 static STAILQ_HEAD(, xenevt_d) devevent_pending;
    145 
    146 static void xenevt_record(struct xenevt_d *, evtchn_port_t);
    147 static void xenevt_free(struct xenevt_d *);
    148 
    149 /* pending events */
    150 long xenevt_ev1;
    151 long xenevt_ev2[NR_EVENT_CHANNELS];
    152 static int xenevt_processevt(void *);
    153 
    154 static evtchn_port_t xenevt_alloc_event(void)
    155 {
    156 	evtchn_op_t op;
    157 	op.cmd = EVTCHNOP_alloc_unbound;
    158 	op.u.alloc_unbound.dom = DOMID_SELF;
    159 	op.u.alloc_unbound.remote_dom = DOMID_SELF;
    160 	if (HYPERVISOR_event_channel_op(&op) != 0)
    161 		panic("%s: Failed to allocate loopback event\n", __func__);
    162 
    163 	return op.u.alloc_unbound.port;
    164 }
    165 
    166 /* called at boot time */
    167 void
    168 xenevtattach(int n)
    169 {
    170 	int level = IPL_HIGH;
    171 
    172 	if (!xendomain_is_privileged())
    173 		return;
    174 #ifndef XENPV
    175 	if (vm_guest != VM_GUEST_XENPVH)
    176 		return;
    177 #endif
    178 
    179 	mutex_init(&devevent_lock, MUTEX_DEFAULT, IPL_HIGH);
    180 	STAILQ_INIT(&devevent_pending);
    181 
    182 	devevent_sih = softint_establish(SOFTINT_SERIAL,
    183 	    (void (*)(void *))xenevt_notify, NULL);
    184 	memset(devevent, 0, sizeof(devevent));
    185 	xenevt_ev1 = 0;
    186 	memset(xenevt_ev2, 0, sizeof(xenevt_ev2));
    187 
    188 	/*
    189 	 * Allocate a loopback event port.
    190 	 * It won't be used by itself, but will help registering IPL
    191 	 * handlers.
    192 	 */
    193 	xenevt_ev = xenevt_alloc_event();
    194 
    195 	/*
    196 	 * The real objective here is to wiggle into the ih callchain for
    197 	 * IPL level on vCPU 0. (events are bound to vCPU 0 by default).
    198 	 */
    199 	xenevt_ih = event_set_handler(xenevt_ev, xenevt_processevt, NULL,
    200 	    level, NULL, "xenevt", true, &cpu_info_primary);
    201 
    202 	KASSERT(xenevt_ih != NULL);
    203 }
    204 
    205 /* register pending event - always called with interrupt disabled */
    206 void
    207 xenevt_setipending(int l1, int l2)
    208 {
    209 	KASSERT(curcpu() == xenevt_ih->ih_cpu);
    210 	KASSERT(xenevt_ih->ih_cpu->ci_ilevel >= IPL_HIGH);
    211 	atomic_or_ulong(&xenevt_ev1, 1UL << l1);
    212 	atomic_or_ulong(&xenevt_ev2[l1], 1UL << l2);
    213 	atomic_or_64(&xenevt_ih->ih_cpu->ci_istate, (1ULL << SIR_XENIPL_HIGH));
    214 	atomic_add_int(&xenevt_ih->ih_pending, 1);
    215 	evtsource[xenevt_ev]->ev_evcnt.ev_count++;
    216 }
    217 
    218 /* process pending events */
    219 static int
    220 xenevt_processevt(void *v)
    221 {
    222 	long l1, l2;
    223 	int l1i, l2i;
    224 	int port;
    225 
    226 	l1 = xen_atomic_xchg(&xenevt_ev1, 0);
    227 	while ((l1i = xen_ffs(l1)) != 0) {
    228 		l1i--;
    229 		l1 &= ~(1UL << l1i);
    230 		l2 = xen_atomic_xchg(&xenevt_ev2[l1i], 0);
    231 		while ((l2i = xen_ffs(l2)) != 0) {
    232 			l2i--;
    233 			l2 &= ~(1UL << l2i);
    234 			port = (l1i << LONG_SHIFT) + l2i;
    235 			xenevt_event(port);
    236 		}
    237 	}
    238 
    239 	return 0;
    240 }
    241 
    242 
    243 /* event callback, called at splhigh() */
    244 void
    245 xenevt_event(int port)
    246 {
    247 	struct xenevt_d *d;
    248 
    249 	mutex_enter(&devevent_lock);
    250 	d = devevent[port];
    251 	if (d != NULL) {
    252 		xenevt_record(d, port);
    253 
    254 		if (d->pending == false) {
    255 			STAILQ_INSERT_TAIL(&devevent_pending, d, pendingq);
    256 			d->pending = true;
    257 			mutex_exit(&devevent_lock);
    258 			softint_schedule(devevent_sih);
    259 			return;
    260 		}
    261 	}
    262 	mutex_exit(&devevent_lock);
    263 }
    264 
    265 void
    266 xenevt_notify(void)
    267 {
    268 	struct xenevt_d *d;
    269 
    270 	for (;;) {
    271 		mutex_enter(&devevent_lock);
    272 		d = STAILQ_FIRST(&devevent_pending);
    273 		if (d == NULL) {
    274 			mutex_exit(&devevent_lock);
    275 			break;
    276 		}
    277 		STAILQ_REMOVE_HEAD(&devevent_pending, pendingq);
    278 		d->pending = false;
    279 		mutex_enter(&d->lock);
    280 		if (d->flags & XENEVT_F_FREE) {
    281 			xenevt_free(d); /* releases devevent_lock */
    282 		} else {
    283 			mutex_exit(&devevent_lock);
    284 			selnotify(&d->sel, 0, 1);
    285 			cv_broadcast(&d->cv);
    286 			mutex_exit(&d->lock);
    287 		}
    288 	}
    289 }
    290 
    291 static void
    292 xenevt_record(struct xenevt_d *d, evtchn_port_t port)
    293 {
    294 
    295 	/*
    296 	 * This algorithm overflows for one less slot than available.
    297 	 * Not really an issue, and the correct algorithm would be more
    298 	 * complex
    299 	 */
    300 
    301 	mutex_enter(&d->lock);
    302 	if (d->ring_read ==
    303 	    ((d->ring_write + 1) & XENEVT_RING_MASK)) {
    304 		d->flags |= XENEVT_F_OVERFLOW;
    305 		printf("xenevt_event: ring overflow port %d\n", port);
    306 	} else {
    307 		d->ring[d->ring_write] = port;
    308 		d->ring_write = (d->ring_write + 1) & XENEVT_RING_MASK;
    309 	}
    310 	mutex_exit(&d->lock);
    311 }
    312 
    313 /* open the xenevt device; this is where we clone */
    314 int
    315 xenevtopen(dev_t dev, int flags, int mode, struct lwp *l)
    316 {
    317 	struct xenevt_d *d;
    318 	struct file *fp;
    319 	int fd, error;
    320 
    321 	switch(minor(dev)) {
    322 	case DEV_EVT:
    323 		/* falloc() will fill in the descriptor for us. */
    324 		if ((error = fd_allocfile(&fp, &fd)) != 0)
    325 			return error;
    326 
    327 		d = kmem_zalloc(sizeof(*d), KM_SLEEP);
    328 		d->ci = xenevt_ih->ih_cpu;
    329 		mutex_init(&d->lock, MUTEX_DEFAULT, IPL_HIGH);
    330 		cv_init(&d->cv, "xenevt");
    331 		selinit(&d->sel);
    332 		return fd_clone(fp, fd, flags, &xenevt_fileops, d);
    333 	case DEV_XSD:
    334 		/* no clone for /dev/xsd_kva */
    335 		return (0);
    336 	default:
    337 		break;
    338 	}
    339 	return ENODEV;
    340 }
    341 
    342 /* read from device: only for /dev/xsd_kva, xenevt is done though fread */
    343 int
    344 xenevtread(dev_t dev, struct uio *uio, int flags)
    345 {
    346 #define LD_STRLEN 21 /* a 64bit integer needs 20 digits in base10 */
    347 	if (minor(dev) == DEV_XSD) {
    348 		char strbuf[LD_STRLEN], *bf;
    349 		int off, error;
    350 		size_t len;
    351 
    352 		off = (int)uio->uio_offset;
    353 		if (off < 0)
    354 			return EINVAL;
    355 		len  = snprintf(strbuf, sizeof(strbuf), "%ld\n",
    356 		    xen_start_info.store_mfn);
    357 		if (off >= len) {
    358 			bf = strbuf;
    359 			len = 0;
    360 		} else {
    361 			bf = &strbuf[off];
    362 			len -= off;
    363 		}
    364 		error = uiomove(bf, len, uio);
    365 		return error;
    366 	}
    367 	return ENODEV;
    368 }
    369 
    370 /* mmap: only for xsd_kva */
    371 paddr_t
    372 xenevtmmap(dev_t dev, off_t off, int prot)
    373 {
    374 	if (minor(dev) == DEV_XSD) {
    375 		/* only one page, so off is always 0 */
    376 		if (off != 0)
    377 			return -1;
    378 #ifdef XENPV
    379 		return x86_btop(
    380 		   xpmap_mtop((paddr_t)xen_start_info.store_mfn << PAGE_SHIFT));
    381 #else
    382 		return x86_btop(
    383 		   (paddr_t)xen_start_info.store_mfn << PAGE_SHIFT);
    384 #endif
    385 	}
    386 	return -1;
    387 }
    388 
    389 static void
    390 xenevt_free(struct xenevt_d *d)
    391 {
    392 	int i;
    393 	KASSERT(mutex_owned(&devevent_lock));
    394 	KASSERT(mutex_owned(&d->lock));
    395 
    396 	for (i = 0; i < NR_EVENT_CHANNELS; i++ ) {
    397 		if (devevent[i] == d) {
    398 			evtchn_op_t op = { .cmd = 0 };
    399 			int error;
    400 
    401 			hypervisor_mask_event(i);
    402 			xen_atomic_clear_bit(&d->ci->ci_evtmask[0], i);
    403 			devevent[i] = NULL;
    404 			op.cmd = EVTCHNOP_close;
    405 			op.u.close.port = i;
    406 			if ((error = HYPERVISOR_event_channel_op(&op))) {
    407 				printf("xenevt_fclose: error %d from "
    408 				    "hypervisor\n", -error);
    409 			}
    410 		}
    411 	}
    412 	mutex_exit(&d->lock);
    413 	mutex_exit(&devevent_lock);
    414 	seldestroy(&d->sel);
    415 	cv_destroy(&d->cv);
    416 	mutex_destroy(&d->lock);
    417 	kmem_free(d, sizeof(*d));
    418 }
    419 
    420 static int
    421 xenevt_fclose(struct file *fp)
    422 {
    423 	struct xenevt_d *d = fp->f_data;
    424 
    425 	mutex_enter(&devevent_lock);
    426 	mutex_enter(&d->lock);
    427 	if (d->pending) {
    428 		d->flags |= XENEVT_F_FREE;
    429 		mutex_exit(&d->lock);
    430 		mutex_exit(&devevent_lock);
    431 	} else {
    432 		xenevt_free(d); /* releases devevent_lock */
    433 	}
    434 
    435 	fp->f_data = NULL;
    436 	return (0);
    437 }
    438 
    439 static int
    440 xenevt_fread(struct file *fp, off_t *offp, struct uio *uio,
    441     kauth_cred_t cred, int flags)
    442 {
    443 	struct xenevt_d *d = fp->f_data;
    444 	int error, ring_read, ring_write;
    445 	size_t len, uio_len;
    446 
    447 	error = 0;
    448 	mutex_enter(&d->lock);
    449 	while (error == 0) {
    450 		ring_read = d->ring_read;
    451 		ring_write = d->ring_write;
    452 		if (ring_read != ring_write) {
    453 			break;
    454 		}
    455 		if (d->flags & XENEVT_F_OVERFLOW) {
    456 			break;
    457 		}
    458 
    459 		/* nothing to read */
    460 		if ((fp->f_flag & FNONBLOCK) == 0) {
    461 			error = cv_wait_sig(&d->cv, &d->lock);
    462 		} else {
    463 			error = EAGAIN;
    464 		}
    465 	}
    466 	if (error == 0 && (d->flags & XENEVT_F_OVERFLOW)) {
    467 		error = EFBIG;
    468 	}
    469 	mutex_exit(&d->lock);
    470 
    471 	if (error) {
    472 		return error;
    473 	}
    474 
    475 	uio_len = uio->uio_resid / BYTES_PER_PORT;
    476 	if (ring_read <= ring_write)
    477 		len = ring_write - ring_read;
    478 	else
    479 		len = XENEVT_RING_SIZE - ring_read;
    480 	if (len > uio_len)
    481 		len = uio_len;
    482 	error = uiomove(&d->ring[ring_read], len * BYTES_PER_PORT, uio);
    483 	if (error)
    484 		return error;
    485 	ring_read = (ring_read + len) & XENEVT_RING_MASK;
    486 	uio_len = uio->uio_resid / BYTES_PER_PORT;
    487 	if (uio_len == 0)
    488 		goto done;
    489 	/* ring wrapped, read the second part */
    490 	len = ring_write - ring_read;
    491 	if (len > uio_len)
    492 		len = uio_len;
    493 	error = uiomove(&d->ring[ring_read], len * BYTES_PER_PORT, uio);
    494 	if (error)
    495 		return error;
    496 	ring_read = (ring_read + len) & XENEVT_RING_MASK;
    497 
    498 done:
    499 	mutex_enter(&d->lock);
    500 	d->ring_read = ring_read;
    501 	mutex_exit(&d->lock);
    502 
    503 	return 0;
    504 }
    505 
    506 static int
    507 xenevt_fwrite(struct file *fp, off_t *offp, struct uio *uio,
    508     kauth_cred_t cred, int flags)
    509 {
    510 	struct xenevt_d *d = fp->f_data;
    511 	uint16_t *chans;
    512 	int i, nentries, error;
    513 
    514 	if (uio->uio_resid == 0)
    515 		return (0);
    516 	nentries = uio->uio_resid / sizeof(uint16_t);
    517 	if (nentries >= NR_EVENT_CHANNELS)
    518 		return EMSGSIZE;
    519 	chans = kmem_alloc(nentries * sizeof(uint16_t), KM_SLEEP);
    520 	error = uiomove(chans, uio->uio_resid, uio);
    521 	if (error)
    522 		goto out;
    523 	mutex_enter(&devevent_lock);
    524 	for (i = 0; i < nentries; i++) {
    525 		if (chans[i] < NR_EVENT_CHANNELS &&
    526 		    devevent[chans[i]] == d) {
    527 			hypervisor_unmask_event(chans[i]);
    528 		}
    529 	}
    530 	mutex_exit(&devevent_lock);
    531 out:
    532 	kmem_free(chans, nentries * sizeof(uint16_t));
    533 	return 0;
    534 }
    535 
    536 static int
    537 xenevt_fioctl(struct file *fp, u_long cmd, void *addr)
    538 {
    539 	struct xenevt_d *d = fp->f_data;
    540 	evtchn_op_t op = { .cmd = 0 };
    541 	int error;
    542 
    543 	switch(cmd) {
    544 	case EVTCHN_RESET:
    545 	case IOCTL_EVTCHN_RESET:
    546 		mutex_enter(&d->lock);
    547 		d->ring_read = d->ring_write = 0;
    548 		d->flags = 0;
    549 		mutex_exit(&d->lock);
    550 		break;
    551 	case IOCTL_EVTCHN_BIND_VIRQ:
    552 	{
    553 		struct ioctl_evtchn_bind_virq *bind_virq = addr;
    554 		op.cmd = EVTCHNOP_bind_virq;
    555 		op.u.bind_virq.virq = bind_virq->virq;
    556 		op.u.bind_virq.vcpu = 0;
    557 		if ((error = HYPERVISOR_event_channel_op(&op))) {
    558 			printf("IOCTL_EVTCHN_BIND_VIRQ failed: virq %d error %d\n", bind_virq->virq, error);
    559 			return -error;
    560 		}
    561 		bind_virq->port = op.u.bind_virq.port;
    562 		mutex_enter(&devevent_lock);
    563 		KASSERT(devevent[bind_virq->port] == NULL);
    564 		devevent[bind_virq->port] = d;
    565 		mutex_exit(&devevent_lock);
    566 		xen_atomic_set_bit(&d->ci->ci_evtmask[0], bind_virq->port);
    567 		hypervisor_unmask_event(bind_virq->port);
    568 		break;
    569 	}
    570 	case IOCTL_EVTCHN_BIND_INTERDOMAIN:
    571 	{
    572 		struct ioctl_evtchn_bind_interdomain *bind_intd = addr;
    573 		op.cmd = EVTCHNOP_bind_interdomain;
    574 		op.u.bind_interdomain.remote_dom = bind_intd->remote_domain;
    575 		op.u.bind_interdomain.remote_port = bind_intd->remote_port;
    576 		if ((error = HYPERVISOR_event_channel_op(&op)))
    577 			return -error;
    578 		bind_intd->port = op.u.bind_interdomain.local_port;
    579 		mutex_enter(&devevent_lock);
    580 		KASSERT(devevent[bind_intd->port] == NULL);
    581 		devevent[bind_intd->port] = d;
    582 		mutex_exit(&devevent_lock);
    583 		xen_atomic_set_bit(&d->ci->ci_evtmask[0], bind_intd->port);
    584 		hypervisor_unmask_event(bind_intd->port);
    585 		break;
    586 	}
    587 	case IOCTL_EVTCHN_BIND_UNBOUND_PORT:
    588 	{
    589 		struct ioctl_evtchn_bind_unbound_port *bind_unbound = addr;
    590 		op.cmd = EVTCHNOP_alloc_unbound;
    591 		op.u.alloc_unbound.dom = DOMID_SELF;
    592 		op.u.alloc_unbound.remote_dom = bind_unbound->remote_domain;
    593 		if ((error = HYPERVISOR_event_channel_op(&op)))
    594 			return -error;
    595 		bind_unbound->port = op.u.alloc_unbound.port;
    596 		mutex_enter(&devevent_lock);
    597 		KASSERT(devevent[bind_unbound->port] == NULL);
    598 		devevent[bind_unbound->port] = d;
    599 		mutex_exit(&devevent_lock);
    600 		xen_atomic_set_bit(&d->ci->ci_evtmask[0], bind_unbound->port);
    601 		hypervisor_unmask_event(bind_unbound->port);
    602 		break;
    603 	}
    604 	case IOCTL_EVTCHN_UNBIND:
    605 	{
    606 		struct ioctl_evtchn_unbind *unbind = addr;
    607 
    608 		if (unbind->port >= NR_EVENT_CHANNELS)
    609 			return EINVAL;
    610 		mutex_enter(&devevent_lock);
    611 		if (devevent[unbind->port] != d) {
    612 			mutex_exit(&devevent_lock);
    613 			return ENOTCONN;
    614 		}
    615 		devevent[unbind->port] = NULL;
    616 		mutex_exit(&devevent_lock);
    617 		hypervisor_mask_event(unbind->port);
    618 		xen_atomic_clear_bit(&d->ci->ci_evtmask[0], unbind->port);
    619 		op.cmd = EVTCHNOP_close;
    620 		op.u.close.port = unbind->port;
    621 		if ((error = HYPERVISOR_event_channel_op(&op)))
    622 			return -error;
    623 		break;
    624 	}
    625 	case IOCTL_EVTCHN_NOTIFY:
    626 	{
    627 		struct ioctl_evtchn_notify *notify = addr;
    628 
    629 		if (notify->port >= NR_EVENT_CHANNELS)
    630 			return EINVAL;
    631 		mutex_enter(&devevent_lock);
    632 		if (devevent[notify->port] != d) {
    633 			mutex_exit(&devevent_lock);
    634 			return ENOTCONN;
    635 		}
    636 		hypervisor_notify_via_evtchn(notify->port);
    637 		mutex_exit(&devevent_lock);
    638 		break;
    639 	}
    640 	case FIONBIO:
    641 		break;
    642 	default:
    643 		return EINVAL;
    644 	}
    645 	return 0;
    646 }
    647 
    648 /*
    649  * Support for poll() system call
    650  *
    651  * Return true if the specific operation will not block indefinitely.
    652  */
    653 
    654 static int
    655 xenevt_fpoll(struct file *fp, int events)
    656 {
    657 	struct xenevt_d *d = fp->f_data;
    658 	int revents = events & (POLLOUT | POLLWRNORM); /* we can always write */
    659 
    660 	mutex_enter(&d->lock);
    661 	if (events & (POLLIN | POLLRDNORM)) {
    662 		if (d->ring_read != d->ring_write) {
    663 			revents |= events & (POLLIN | POLLRDNORM);
    664 		} else {
    665 			/* Record that someone is waiting */
    666 			selrecord(curlwp, &d->sel);
    667 		}
    668 	}
    669 	mutex_exit(&d->lock);
    670 	return (revents);
    671 }
    672