Home | History | Annotate | Line # | Download | only in kern
sys_eventfd.c revision 1.9.4.1
      1  1.9.4.1   bouyer /*	$NetBSD: sys_eventfd.c,v 1.9.4.1 2023/11/26 12:33:19 bouyer Exp $	*/
      2      1.2  thorpej 
      3      1.2  thorpej /*-
      4      1.2  thorpej  * Copyright (c) 2020 The NetBSD Foundation, Inc.
      5      1.2  thorpej  * All rights reserved.
      6      1.2  thorpej  *
      7      1.2  thorpej  * This code is derived from software contributed to The NetBSD Foundation
      8      1.2  thorpej  * by Jason R. Thorpe.
      9      1.2  thorpej  *
     10      1.2  thorpej  * Redistribution and use in source and binary forms, with or without
     11      1.2  thorpej  * modification, are permitted provided that the following conditions
     12      1.2  thorpej  * are met:
     13      1.2  thorpej  * 1. Redistributions of source code must retain the above copyright
     14      1.2  thorpej  *    notice, this list of conditions and the following disclaimer.
     15      1.2  thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     16      1.2  thorpej  *    notice, this list of conditions and the following disclaimer in the
     17      1.2  thorpej  *    documentation and/or other materials provided with the distribution.
     18      1.2  thorpej  *
     19      1.2  thorpej  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20      1.2  thorpej  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21      1.2  thorpej  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22      1.2  thorpej  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23      1.2  thorpej  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24      1.2  thorpej  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25      1.2  thorpej  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26      1.2  thorpej  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27      1.2  thorpej  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28      1.2  thorpej  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29      1.2  thorpej  * POSSIBILITY OF SUCH DAMAGE.
     30      1.2  thorpej  */
     31      1.2  thorpej 
     32      1.2  thorpej #include <sys/cdefs.h>
     33  1.9.4.1   bouyer __KERNEL_RCSID(0, "$NetBSD: sys_eventfd.c,v 1.9.4.1 2023/11/26 12:33:19 bouyer Exp $");
     34      1.2  thorpej 
     35      1.2  thorpej /*
     36      1.2  thorpej  * eventfd
     37      1.2  thorpej  *
     38      1.2  thorpej  * Eventfd objects present a simple counting object associated with a
     39      1.2  thorpej  * file descriptor.  Writes and reads to this file descriptor increment
     40      1.2  thorpej  * and decrement the count, respectively.  When the count is non-zero,
     41      1.2  thorpej  * the descriptor is considered "readable", and when less than the max
     42      1.2  thorpej  * value (EVENTFD_MAXVAL), is considered "writable".
     43      1.2  thorpej  *
     44      1.2  thorpej  * This implementation is API compatible with the Linux eventfd(2)
     45      1.2  thorpej  * interface.
     46      1.2  thorpej  */
     47      1.2  thorpej 
     48      1.3    skrll #include <sys/param.h>
     49      1.2  thorpej #include <sys/types.h>
     50      1.2  thorpej #include <sys/condvar.h>
     51      1.2  thorpej #include <sys/eventfd.h>
     52      1.2  thorpej #include <sys/file.h>
     53      1.2  thorpej #include <sys/filedesc.h>
     54      1.2  thorpej #include <sys/kauth.h>
     55      1.2  thorpej #include <sys/mutex.h>
     56      1.2  thorpej #include <sys/poll.h>
     57      1.2  thorpej #include <sys/proc.h>
     58      1.2  thorpej #include <sys/select.h>
     59      1.2  thorpej #include <sys/stat.h>
     60      1.2  thorpej #include <sys/syscallargs.h>
     61      1.2  thorpej #include <sys/uio.h>
     62      1.2  thorpej 
     63      1.2  thorpej struct eventfd {
     64      1.2  thorpej 	kmutex_t	efd_lock;
     65      1.2  thorpej 	kcondvar_t	efd_read_wait;
     66      1.2  thorpej 	kcondvar_t	efd_write_wait;
     67      1.2  thorpej 	struct selinfo	efd_read_sel;
     68      1.2  thorpej 	struct selinfo	efd_write_sel;
     69      1.2  thorpej 	eventfd_t	efd_val;
     70      1.2  thorpej 	int64_t		efd_nwaiters;
     71      1.2  thorpej 	bool		efd_restarting;
     72      1.2  thorpej 	bool		efd_is_semaphore;
     73      1.2  thorpej 
     74      1.2  thorpej 	/*
     75      1.2  thorpej 	 * Information kept for stat(2).
     76      1.2  thorpej 	 */
     77      1.2  thorpej 	struct timespec efd_btime;	/* time created */
     78      1.2  thorpej 	struct timespec	efd_mtime;	/* last write */
     79      1.2  thorpej 	struct timespec	efd_atime;	/* last read */
     80      1.2  thorpej };
     81      1.2  thorpej 
     82      1.2  thorpej #define	EVENTFD_MAXVAL	(UINT64_MAX - 1)
     83      1.2  thorpej 
     84      1.2  thorpej /*
     85      1.2  thorpej  * eventfd_create:
     86      1.2  thorpej  *
     87      1.2  thorpej  *	Create an eventfd object.
     88      1.2  thorpej  */
     89      1.2  thorpej static struct eventfd *
     90      1.2  thorpej eventfd_create(unsigned int const val, int const flags)
     91      1.2  thorpej {
     92      1.2  thorpej 	struct eventfd * const efd = kmem_zalloc(sizeof(*efd), KM_SLEEP);
     93      1.2  thorpej 
     94      1.2  thorpej 	mutex_init(&efd->efd_lock, MUTEX_DEFAULT, IPL_NONE);
     95      1.2  thorpej 	cv_init(&efd->efd_read_wait, "efdread");
     96      1.2  thorpej 	cv_init(&efd->efd_write_wait, "efdwrite");
     97      1.2  thorpej 	selinit(&efd->efd_read_sel);
     98      1.2  thorpej 	selinit(&efd->efd_write_sel);
     99      1.2  thorpej 	efd->efd_val = val;
    100      1.2  thorpej 	efd->efd_is_semaphore = !!(flags & EFD_SEMAPHORE);
    101      1.2  thorpej 	getnanotime(&efd->efd_btime);
    102      1.2  thorpej 
    103      1.2  thorpej 	/* Caller deals with EFD_CLOEXEC and EFD_NONBLOCK. */
    104      1.2  thorpej 
    105      1.2  thorpej 	return efd;
    106      1.2  thorpej }
    107      1.2  thorpej 
    108      1.2  thorpej /*
    109      1.2  thorpej  * eventfd_destroy:
    110      1.2  thorpej  *
    111      1.2  thorpej  *	Destroy an eventfd object.
    112      1.2  thorpej  */
    113      1.2  thorpej static void
    114      1.2  thorpej eventfd_destroy(struct eventfd * const efd)
    115      1.2  thorpej {
    116      1.2  thorpej 
    117      1.2  thorpej 	KASSERT(efd->efd_nwaiters == 0);
    118      1.2  thorpej 
    119      1.2  thorpej 	cv_destroy(&efd->efd_read_wait);
    120      1.2  thorpej 	cv_destroy(&efd->efd_write_wait);
    121      1.2  thorpej 
    122      1.2  thorpej 	seldestroy(&efd->efd_read_sel);
    123      1.2  thorpej 	seldestroy(&efd->efd_write_sel);
    124      1.2  thorpej 
    125      1.2  thorpej 	mutex_destroy(&efd->efd_lock);
    126      1.4  thorpej 
    127      1.4  thorpej 	kmem_free(efd, sizeof(*efd));
    128      1.2  thorpej }
    129      1.2  thorpej 
    130      1.2  thorpej /*
    131      1.2  thorpej  * eventfd_wait:
    132      1.2  thorpej  *
    133      1.2  thorpej  *	Block on an eventfd.  Handles non-blocking, as well as
    134      1.2  thorpej  *	the restart cases.
    135      1.2  thorpej  */
    136      1.2  thorpej static int
    137      1.2  thorpej eventfd_wait(struct eventfd * const efd, int const fflag, bool const is_write)
    138      1.2  thorpej {
    139      1.2  thorpej 	kcondvar_t *waitcv;
    140      1.2  thorpej 	int error;
    141      1.2  thorpej 
    142      1.2  thorpej 	if (fflag & FNONBLOCK) {
    143      1.2  thorpej 		return EAGAIN;
    144      1.2  thorpej 	}
    145      1.2  thorpej 
    146      1.2  thorpej 	/*
    147      1.8  thorpej 	 * We're going to block.  Check if we need to return ERESTART.
    148      1.2  thorpej 	 */
    149      1.8  thorpej 	if (efd->efd_restarting) {
    150      1.8  thorpej 		return ERESTART;
    151      1.2  thorpej 	}
    152      1.2  thorpej 
    153      1.2  thorpej 	if (is_write) {
    154      1.2  thorpej 		waitcv = &efd->efd_write_wait;
    155      1.2  thorpej 	} else {
    156      1.2  thorpej 		waitcv = &efd->efd_read_wait;
    157      1.2  thorpej 	}
    158      1.2  thorpej 
    159      1.2  thorpej 	efd->efd_nwaiters++;
    160      1.2  thorpej 	KASSERT(efd->efd_nwaiters > 0);
    161      1.2  thorpej 	error = cv_wait_sig(waitcv, &efd->efd_lock);
    162      1.2  thorpej 	efd->efd_nwaiters--;
    163      1.2  thorpej 	KASSERT(efd->efd_nwaiters >= 0);
    164      1.2  thorpej 
    165      1.2  thorpej 	/*
    166      1.2  thorpej 	 * If a restart was triggered while we were asleep, we need
    167      1.8  thorpej 	 * to return ERESTART if no other error was returned.
    168      1.2  thorpej 	 */
    169      1.2  thorpej 	if (efd->efd_restarting) {
    170      1.2  thorpej 		if (error == 0) {
    171      1.2  thorpej 			error = ERESTART;
    172      1.2  thorpej 		}
    173      1.2  thorpej 	}
    174      1.2  thorpej 
    175      1.2  thorpej 	return error;
    176      1.2  thorpej }
    177      1.2  thorpej 
    178      1.2  thorpej /*
    179      1.2  thorpej  * eventfd_wake:
    180      1.2  thorpej  *
    181      1.2  thorpej  *	Wake LWPs block on an eventfd.
    182      1.2  thorpej  */
    183      1.2  thorpej static void
    184      1.2  thorpej eventfd_wake(struct eventfd * const efd, bool const is_write)
    185      1.2  thorpej {
    186      1.2  thorpej 	kcondvar_t *waitcv = NULL;
    187      1.2  thorpej 	struct selinfo *sel;
    188      1.2  thorpej 	int pollev;
    189      1.2  thorpej 
    190      1.2  thorpej 	if (is_write) {
    191  1.9.4.1   bouyer 		waitcv = &efd->efd_read_wait;
    192      1.2  thorpej 		sel = &efd->efd_read_sel;
    193      1.2  thorpej 		pollev = POLLIN | POLLRDNORM;
    194      1.2  thorpej 	} else {
    195  1.9.4.1   bouyer 		waitcv = &efd->efd_write_wait;
    196      1.2  thorpej 		sel = &efd->efd_write_sel;
    197      1.2  thorpej 		pollev = POLLOUT | POLLWRNORM;
    198      1.2  thorpej 	}
    199      1.2  thorpej 	if (waitcv != NULL) {
    200      1.2  thorpej 		cv_broadcast(waitcv);
    201      1.2  thorpej 	}
    202      1.2  thorpej 	selnotify(sel, pollev, NOTE_SUBMIT);
    203      1.2  thorpej }
    204      1.2  thorpej 
    205      1.2  thorpej /*
    206      1.2  thorpej  * eventfd file operations
    207      1.2  thorpej  */
    208      1.2  thorpej 
    209      1.2  thorpej static int
    210      1.2  thorpej eventfd_fop_read(file_t * const fp, off_t * const offset,
    211      1.2  thorpej     struct uio * const uio, kauth_cred_t const cred, int const flags)
    212      1.2  thorpej {
    213      1.2  thorpej 	struct eventfd * const efd = fp->f_eventfd;
    214      1.2  thorpej 	int const fflag = fp->f_flag;
    215      1.2  thorpej 	eventfd_t return_value;
    216      1.2  thorpej 	int error;
    217      1.2  thorpej 
    218      1.2  thorpej 	if (uio->uio_resid < sizeof(eventfd_t)) {
    219      1.2  thorpej 		return EINVAL;
    220      1.2  thorpej 	}
    221      1.2  thorpej 
    222      1.2  thorpej 	mutex_enter(&efd->efd_lock);
    223      1.2  thorpej 
    224      1.2  thorpej 	while (efd->efd_val == 0) {
    225      1.2  thorpej 		if ((error = eventfd_wait(efd, fflag, false)) != 0) {
    226      1.2  thorpej 			mutex_exit(&efd->efd_lock);
    227      1.2  thorpej 			return error;
    228      1.2  thorpej 		}
    229      1.2  thorpej 	}
    230      1.2  thorpej 
    231      1.2  thorpej 	if (efd->efd_is_semaphore) {
    232      1.2  thorpej 		return_value = 1;
    233      1.2  thorpej 		efd->efd_val--;
    234      1.2  thorpej 	} else {
    235      1.2  thorpej 		return_value = efd->efd_val;
    236      1.2  thorpej 		efd->efd_val = 0;
    237      1.2  thorpej 	}
    238      1.2  thorpej 
    239      1.2  thorpej 	getnanotime(&efd->efd_atime);
    240      1.2  thorpej 	eventfd_wake(efd, false);
    241      1.2  thorpej 
    242      1.2  thorpej 	mutex_exit(&efd->efd_lock);
    243      1.2  thorpej 
    244      1.2  thorpej 	error = uiomove(&return_value, sizeof(return_value), uio);
    245      1.2  thorpej 
    246      1.2  thorpej 	return error;
    247      1.2  thorpej }
    248      1.2  thorpej 
    249      1.2  thorpej static int
    250      1.2  thorpej eventfd_fop_write(file_t * const fp, off_t * const offset,
    251      1.2  thorpej     struct uio * const uio, kauth_cred_t const cred, int const flags)
    252      1.2  thorpej {
    253      1.2  thorpej 	struct eventfd * const efd = fp->f_eventfd;
    254      1.2  thorpej 	int const fflag = fp->f_flag;
    255      1.2  thorpej 	eventfd_t write_value;
    256      1.2  thorpej 	int error;
    257      1.2  thorpej 
    258      1.2  thorpej 	if (uio->uio_resid < sizeof(eventfd_t)) {
    259      1.2  thorpej 		return EINVAL;
    260      1.2  thorpej 	}
    261      1.2  thorpej 
    262      1.2  thorpej 	if ((error = uiomove(&write_value, sizeof(write_value), uio)) != 0) {
    263      1.2  thorpej 		return error;
    264      1.2  thorpej 	}
    265      1.2  thorpej 
    266      1.2  thorpej 	if (write_value > EVENTFD_MAXVAL) {
    267      1.2  thorpej 		error = EINVAL;
    268      1.2  thorpej 		goto out;
    269      1.2  thorpej 	}
    270      1.2  thorpej 
    271      1.2  thorpej 	mutex_enter(&efd->efd_lock);
    272      1.2  thorpej 
    273      1.2  thorpej 	KASSERT(efd->efd_val <= EVENTFD_MAXVAL);
    274      1.2  thorpej 	while ((EVENTFD_MAXVAL - efd->efd_val) < write_value) {
    275      1.2  thorpej 		if ((error = eventfd_wait(efd, fflag, true)) != 0) {
    276      1.2  thorpej 			mutex_exit(&efd->efd_lock);
    277      1.2  thorpej 			goto out;
    278      1.2  thorpej 		}
    279      1.2  thorpej 	}
    280      1.2  thorpej 
    281      1.2  thorpej 	efd->efd_val += write_value;
    282      1.2  thorpej 	KASSERT(efd->efd_val <= EVENTFD_MAXVAL);
    283      1.2  thorpej 
    284      1.2  thorpej 	getnanotime(&efd->efd_mtime);
    285      1.2  thorpej 	eventfd_wake(efd, true);
    286      1.2  thorpej 
    287      1.2  thorpej 	mutex_exit(&efd->efd_lock);
    288      1.2  thorpej 
    289      1.2  thorpej  out:
    290      1.2  thorpej 	if (error) {
    291      1.2  thorpej 		/*
    292      1.2  thorpej 		 * Undo the effect of uiomove() so that the error
    293      1.2  thorpej 		 * gets reported correctly; see dofilewrite().
    294      1.2  thorpej 		 */
    295      1.2  thorpej 		uio->uio_resid += sizeof(write_value);
    296      1.2  thorpej 	}
    297      1.2  thorpej 	return error;
    298      1.2  thorpej }
    299      1.2  thorpej 
    300      1.2  thorpej static int
    301      1.9  thorpej eventfd_ioctl(file_t * const fp, u_long const cmd, void * const data)
    302      1.9  thorpej {
    303      1.9  thorpej 	struct eventfd * const efd = fp->f_eventfd;
    304      1.9  thorpej 
    305      1.9  thorpej 	switch (cmd) {
    306      1.9  thorpej 	case FIONBIO:
    307      1.9  thorpej 		return 0;
    308      1.9  thorpej 
    309      1.9  thorpej 	case FIONREAD:
    310      1.9  thorpej 		mutex_enter(&efd->efd_lock);
    311      1.9  thorpej 		*(int *)data = efd->efd_val != 0 ? sizeof(eventfd_t) : 0;
    312      1.9  thorpej 		mutex_exit(&efd->efd_lock);
    313      1.9  thorpej 		return 0;
    314      1.9  thorpej 
    315      1.9  thorpej 	case FIONWRITE:
    316      1.9  thorpej 		*(int *)data = 0;
    317      1.9  thorpej 		return 0;
    318      1.9  thorpej 
    319      1.9  thorpej 	case FIONSPACE:
    320      1.9  thorpej 		/*
    321      1.9  thorpej 		 * FIONSPACE doesn't really work for eventfd, because the
    322      1.9  thorpej 		 * writability depends on the contents (value) being written.
    323      1.9  thorpej 		 */
    324      1.9  thorpej 		break;
    325      1.9  thorpej 
    326      1.9  thorpej 	default:
    327      1.9  thorpej 		break;
    328      1.9  thorpej 	}
    329      1.9  thorpej 
    330      1.9  thorpej 	return EPASSTHROUGH;
    331      1.9  thorpej }
    332      1.9  thorpej 
    333      1.9  thorpej static int
    334      1.2  thorpej eventfd_fop_poll(file_t * const fp, int const events)
    335      1.2  thorpej {
    336      1.2  thorpej 	struct eventfd * const efd = fp->f_eventfd;
    337      1.2  thorpej 	int revents = 0;
    338      1.2  thorpej 
    339      1.2  thorpej 	/*
    340      1.2  thorpej 	 * Note that Linux will return POLLERR if the eventfd count
    341      1.2  thorpej 	 * overflows, but that is not possible in the normal read/write
    342      1.2  thorpej 	 * API, only with Linux kernel-internal interfaces.  So, this
    343      1.2  thorpej 	 * implementation never returns POLLERR.
    344      1.2  thorpej 	 *
    345      1.2  thorpej 	 * Also note that the Linux eventfd(2) man page does not
    346      1.2  thorpej 	 * specifically discuss returning POLLRDNORM, but we check
    347      1.2  thorpej 	 * for that event in addition to POLLIN.
    348      1.2  thorpej 	 */
    349      1.2  thorpej 
    350      1.2  thorpej 	mutex_enter(&efd->efd_lock);
    351      1.2  thorpej 
    352      1.2  thorpej 	if (events & (POLLIN | POLLRDNORM)) {
    353      1.2  thorpej 		if (efd->efd_val != 0) {
    354      1.2  thorpej 			revents |= events & (POLLIN | POLLRDNORM);
    355      1.2  thorpej 		} else {
    356      1.2  thorpej 			selrecord(curlwp, &efd->efd_read_sel);
    357      1.2  thorpej 		}
    358      1.2  thorpej 	}
    359      1.2  thorpej 
    360      1.2  thorpej 	if (events & (POLLOUT | POLLWRNORM)) {
    361      1.2  thorpej 		if (efd->efd_val < EVENTFD_MAXVAL) {
    362      1.2  thorpej 			revents |= events & (POLLOUT | POLLWRNORM);
    363      1.2  thorpej 		} else {
    364      1.2  thorpej 			selrecord(curlwp, &efd->efd_write_sel);
    365      1.2  thorpej 		}
    366      1.2  thorpej 	}
    367      1.2  thorpej 
    368      1.2  thorpej 	mutex_exit(&efd->efd_lock);
    369      1.2  thorpej 
    370      1.2  thorpej 	return revents;
    371      1.2  thorpej }
    372      1.2  thorpej 
    373      1.2  thorpej static int
    374      1.2  thorpej eventfd_fop_stat(file_t * const fp, struct stat * const st)
    375      1.2  thorpej {
    376      1.2  thorpej 	struct eventfd * const efd = fp->f_eventfd;
    377      1.2  thorpej 
    378      1.2  thorpej 	memset(st, 0, sizeof(*st));
    379      1.2  thorpej 
    380      1.2  thorpej 	mutex_enter(&efd->efd_lock);
    381      1.2  thorpej 	st->st_size = (off_t)efd->efd_val;
    382      1.2  thorpej 	st->st_blksize = sizeof(eventfd_t);
    383      1.2  thorpej 	st->st_mode = S_IFIFO | S_IRUSR | S_IWUSR;
    384      1.2  thorpej 	st->st_blocks = 1;
    385      1.2  thorpej 	st->st_birthtimespec = st->st_ctimespec = efd->efd_btime;
    386      1.2  thorpej 	st->st_atimespec = efd->efd_atime;
    387      1.2  thorpej 	st->st_mtimespec = efd->efd_mtime;
    388      1.2  thorpej 	st->st_uid = kauth_cred_geteuid(fp->f_cred);
    389      1.2  thorpej 	st->st_gid = kauth_cred_getegid(fp->f_cred);
    390      1.2  thorpej 	mutex_exit(&efd->efd_lock);
    391      1.2  thorpej 
    392      1.2  thorpej 	return 0;
    393      1.2  thorpej }
    394      1.2  thorpej 
    395      1.2  thorpej static int
    396      1.2  thorpej eventfd_fop_close(file_t * const fp)
    397      1.2  thorpej {
    398      1.2  thorpej 	struct eventfd * const efd = fp->f_eventfd;
    399      1.2  thorpej 
    400      1.2  thorpej 	fp->f_eventfd = NULL;
    401      1.2  thorpej 	eventfd_destroy(efd);
    402      1.2  thorpej 
    403      1.2  thorpej 	return 0;
    404      1.2  thorpej }
    405      1.2  thorpej 
    406      1.2  thorpej static void
    407      1.2  thorpej eventfd_filt_read_detach(struct knote * const kn)
    408      1.2  thorpej {
    409      1.2  thorpej 	struct eventfd * const efd = ((file_t *)kn->kn_obj)->f_eventfd;
    410      1.2  thorpej 
    411      1.2  thorpej 	mutex_enter(&efd->efd_lock);
    412      1.2  thorpej 	KASSERT(kn->kn_hook == efd);
    413      1.2  thorpej 	selremove_knote(&efd->efd_read_sel, kn);
    414      1.2  thorpej 	mutex_exit(&efd->efd_lock);
    415      1.2  thorpej }
    416      1.2  thorpej 
    417      1.2  thorpej static int
    418      1.2  thorpej eventfd_filt_read(struct knote * const kn, long const hint)
    419      1.2  thorpej {
    420      1.2  thorpej 	struct eventfd * const efd = ((file_t *)kn->kn_obj)->f_eventfd;
    421      1.7  thorpej 	int rv;
    422      1.2  thorpej 
    423      1.2  thorpej 	if (hint & NOTE_SUBMIT) {
    424      1.2  thorpej 		KASSERT(mutex_owned(&efd->efd_lock));
    425      1.2  thorpej 	} else {
    426      1.2  thorpej 		mutex_enter(&efd->efd_lock);
    427      1.2  thorpej 	}
    428      1.2  thorpej 
    429      1.2  thorpej 	kn->kn_data = (int64_t)efd->efd_val;
    430      1.7  thorpej 	rv = (eventfd_t)kn->kn_data > 0;
    431      1.2  thorpej 
    432      1.2  thorpej 	if ((hint & NOTE_SUBMIT) == 0) {
    433      1.2  thorpej 		mutex_exit(&efd->efd_lock);
    434      1.2  thorpej 	}
    435      1.2  thorpej 
    436      1.7  thorpej 	return rv;
    437      1.2  thorpej }
    438      1.2  thorpej 
    439      1.2  thorpej static const struct filterops eventfd_read_filterops = {
    440      1.6  thorpej 	.f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
    441      1.2  thorpej 	.f_detach = eventfd_filt_read_detach,
    442      1.2  thorpej 	.f_event = eventfd_filt_read,
    443      1.2  thorpej };
    444      1.2  thorpej 
    445      1.2  thorpej static void
    446      1.2  thorpej eventfd_filt_write_detach(struct knote * const kn)
    447      1.2  thorpej {
    448      1.2  thorpej 	struct eventfd * const efd = ((file_t *)kn->kn_obj)->f_eventfd;
    449      1.2  thorpej 
    450      1.2  thorpej 	mutex_enter(&efd->efd_lock);
    451      1.2  thorpej 	KASSERT(kn->kn_hook == efd);
    452      1.2  thorpej 	selremove_knote(&efd->efd_write_sel, kn);
    453      1.2  thorpej 	mutex_exit(&efd->efd_lock);
    454      1.2  thorpej }
    455      1.2  thorpej 
    456      1.2  thorpej static int
    457      1.2  thorpej eventfd_filt_write(struct knote * const kn, long const hint)
    458      1.2  thorpej {
    459      1.2  thorpej 	struct eventfd * const efd = ((file_t *)kn->kn_obj)->f_eventfd;
    460      1.7  thorpej 	int rv;
    461      1.2  thorpej 
    462      1.2  thorpej 	if (hint & NOTE_SUBMIT) {
    463      1.2  thorpej 		KASSERT(mutex_owned(&efd->efd_lock));
    464      1.2  thorpej 	} else {
    465      1.2  thorpej 		mutex_enter(&efd->efd_lock);
    466      1.2  thorpej 	}
    467      1.2  thorpej 
    468      1.2  thorpej 	kn->kn_data = (int64_t)efd->efd_val;
    469      1.7  thorpej 	rv = (eventfd_t)kn->kn_data < EVENTFD_MAXVAL;
    470      1.2  thorpej 
    471      1.2  thorpej 	if ((hint & NOTE_SUBMIT) == 0) {
    472      1.2  thorpej 		mutex_exit(&efd->efd_lock);
    473      1.2  thorpej 	}
    474      1.2  thorpej 
    475      1.7  thorpej 	return rv;
    476      1.2  thorpej }
    477      1.2  thorpej 
    478      1.2  thorpej static const struct filterops eventfd_write_filterops = {
    479      1.6  thorpej 	.f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
    480      1.2  thorpej 	.f_detach = eventfd_filt_write_detach,
    481      1.2  thorpej 	.f_event = eventfd_filt_write,
    482      1.2  thorpej };
    483      1.2  thorpej 
    484      1.2  thorpej static int
    485      1.2  thorpej eventfd_fop_kqfilter(file_t * const fp, struct knote * const kn)
    486      1.2  thorpej {
    487      1.2  thorpej 	struct eventfd * const efd = ((file_t *)kn->kn_obj)->f_eventfd;
    488      1.2  thorpej 	struct selinfo *sel;
    489      1.2  thorpej 
    490      1.2  thorpej 	switch (kn->kn_filter) {
    491      1.2  thorpej 	case EVFILT_READ:
    492      1.2  thorpej 		sel = &efd->efd_read_sel;
    493      1.2  thorpej 		kn->kn_fop = &eventfd_read_filterops;
    494      1.2  thorpej 		break;
    495      1.2  thorpej 
    496      1.2  thorpej 	case EVFILT_WRITE:
    497      1.2  thorpej 		sel = &efd->efd_write_sel;
    498      1.2  thorpej 		kn->kn_fop = &eventfd_write_filterops;
    499      1.2  thorpej 		break;
    500      1.2  thorpej 
    501      1.2  thorpej 	default:
    502      1.2  thorpej 		return EINVAL;
    503      1.2  thorpej 	}
    504      1.2  thorpej 
    505      1.2  thorpej 	kn->kn_hook = efd;
    506      1.2  thorpej 
    507      1.2  thorpej 	mutex_enter(&efd->efd_lock);
    508      1.2  thorpej 	selrecord_knote(sel, kn);
    509      1.2  thorpej 	mutex_exit(&efd->efd_lock);
    510      1.2  thorpej 
    511      1.2  thorpej 	return 0;
    512      1.2  thorpej }
    513      1.2  thorpej 
    514      1.2  thorpej static void
    515      1.2  thorpej eventfd_fop_restart(file_t * const fp)
    516      1.2  thorpej {
    517      1.2  thorpej 	struct eventfd * const efd = fp->f_eventfd;
    518      1.2  thorpej 
    519      1.2  thorpej 	/*
    520      1.2  thorpej 	 * Unblock blocked reads/writes in order to allow close() to complete.
    521      1.2  thorpej 	 * System calls return ERESTART so that the fd is revalidated.
    522      1.2  thorpej 	 */
    523      1.2  thorpej 
    524      1.2  thorpej 	mutex_enter(&efd->efd_lock);
    525      1.2  thorpej 
    526      1.2  thorpej 	if (efd->efd_nwaiters != 0) {
    527      1.2  thorpej 		efd->efd_restarting = true;
    528  1.9.4.1   bouyer 		cv_broadcast(&efd->efd_read_wait);
    529  1.9.4.1   bouyer 		cv_broadcast(&efd->efd_write_wait);
    530      1.2  thorpej 	}
    531      1.2  thorpej 
    532      1.2  thorpej 	mutex_exit(&efd->efd_lock);
    533      1.2  thorpej }
    534      1.2  thorpej 
    535      1.2  thorpej static const struct fileops eventfd_fileops = {
    536      1.2  thorpej 	.fo_name = "eventfd",
    537      1.2  thorpej 	.fo_read = eventfd_fop_read,
    538      1.2  thorpej 	.fo_write = eventfd_fop_write,
    539      1.9  thorpej 	.fo_ioctl = eventfd_ioctl,
    540      1.2  thorpej 	.fo_fcntl = fnullop_fcntl,
    541      1.2  thorpej 	.fo_poll = eventfd_fop_poll,
    542      1.2  thorpej 	.fo_stat = eventfd_fop_stat,
    543      1.2  thorpej 	.fo_close = eventfd_fop_close,
    544      1.2  thorpej 	.fo_kqfilter = eventfd_fop_kqfilter,
    545      1.2  thorpej 	.fo_restart = eventfd_fop_restart,
    546      1.2  thorpej };
    547      1.2  thorpej 
    548      1.2  thorpej /*
    549      1.2  thorpej  * eventfd(2) system call
    550      1.2  thorpej  */
    551      1.2  thorpej int
    552      1.2  thorpej do_eventfd(struct lwp * const l, unsigned int const val, int const flags,
    553      1.2  thorpej     register_t *retval)
    554      1.2  thorpej {
    555      1.2  thorpej 	file_t *fp;
    556      1.2  thorpej 	int fd, error;
    557      1.2  thorpej 
    558      1.2  thorpej 	if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK | EFD_SEMAPHORE)) {
    559      1.2  thorpej 		return EINVAL;
    560      1.2  thorpej 	}
    561      1.2  thorpej 
    562      1.2  thorpej 	if ((error = fd_allocfile(&fp, &fd)) != 0) {
    563      1.2  thorpej 		return error;
    564      1.2  thorpej 	}
    565      1.2  thorpej 
    566      1.2  thorpej 	fp->f_flag = FREAD | FWRITE;
    567      1.2  thorpej 	if (flags & EFD_NONBLOCK) {
    568      1.2  thorpej 		fp->f_flag |= FNONBLOCK;
    569      1.2  thorpej 	}
    570      1.2  thorpej 	fp->f_type = DTYPE_EVENTFD;
    571      1.2  thorpej 	fp->f_ops = &eventfd_fileops;
    572      1.2  thorpej 	fp->f_eventfd = eventfd_create(val, flags);
    573      1.2  thorpej 	fd_set_exclose(l, fd, !!(flags & EFD_CLOEXEC));
    574      1.2  thorpej 	fd_affix(curproc, fp, fd);
    575      1.2  thorpej 
    576      1.2  thorpej 	*retval = fd;
    577      1.2  thorpej 	return 0;
    578      1.2  thorpej }
    579      1.2  thorpej 
    580      1.2  thorpej int
    581      1.2  thorpej sys_eventfd(struct lwp *l, const struct sys_eventfd_args *uap,
    582      1.2  thorpej     register_t *retval)
    583      1.2  thorpej {
    584      1.2  thorpej 	/* {
    585      1.2  thorpej 		syscallarg(unsigned int) val;
    586      1.2  thorpej 		syscallarg(int) flags;
    587      1.2  thorpej 	} */
    588      1.2  thorpej 
    589      1.2  thorpej 	return do_eventfd(l, SCARG(uap, val), SCARG(uap, flags), retval);
    590      1.2  thorpej }
    591