linux_inotify.c revision 1.5 1 /* $NetBSD: linux_inotify.c,v 1.5 2023/08/24 19:51:24 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2023 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Theodore Preduta.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31 #include <sys/cdefs.h>
32 __KERNEL_RCSID(0, "$NetBSD: linux_inotify.c,v 1.5 2023/08/24 19:51:24 christos Exp $");
33
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/bitops.h>
37 #include <sys/dirent.h>
38 #include <sys/event.h>
39 #include <sys/eventvar.h>
40 #include <sys/errno.h>
41 #include <sys/file.h>
42 #include <sys/filedesc.h>
43 #include <sys/fcntl.h>
44 #include <sys/poll.h>
45 #include <sys/proc.h>
46 #include <sys/selinfo.h>
47 #include <sys/select.h>
48 #include <sys/signal.h>
49 #include <sys/vnode.h>
50
51 #include <sys/syscallargs.h>
52
53 #include <compat/linux/common/linux_machdep.h>
54 #include <compat/linux/common/linux_fcntl.h>
55 #include <compat/linux/common/linux_inotify.h>
56 #include <compat/linux/common/linux_ipc.h>
57 #include <compat/linux/common/linux_sched.h>
58 #include <compat/linux/common/linux_sem.h>
59 #include <compat/linux/common/linux_signal.h>
60
61 #include <compat/linux/linux_syscallargs.h>
62
63 /*
64 * inotify(2). This interface allows the user to get file system
65 * events and (unlike kqueue(2)) their order is strictly preserved.
66 * While nice, the API has sufficient gotchas that mean we don't want
67 * to add native entry points for it. They are:
68 *
69 * - Because data is returned via read(2), this API is prone to
70 * unaligned memory accesses. There is a note in the Linux man page
71 * that says the name field of struct linux_inotify_event *can* be
72 * used for alignment purposes. In practice, even Linux doesn't
73 * always do this, so for simplicity, we don't ever do this.
74 */
75
76 #define LINUX_INOTIFY_MAX_QUEUED 16384
77 #define LINUX_INOTIFY_MAX_FROM_KEVENT 3
78
79 #if DEBUG_LINUX
80 #define DPRINTF(x) uprintf x
81 #else
82 #define DPRINTF(x) __nothing
83 #endif
84
85 struct inotify_entry {
86 TAILQ_ENTRY(inotify_entry) ie_entries;
87 char ie_name[NAME_MAX + 1];
88 struct linux_inotify_event ie_event;
89 };
90
91 struct inotify_dir_entries {
92 size_t ide_count;
93 struct inotify_dir_entry {
94 char name[NAME_MAX + 1];
95 ino_t fileno;
96 } ide_entries[];
97 };
98 #define INOTIFY_DIR_ENTRIES_SIZE(count) (sizeof(struct inotify_dir_entries) \
99 + count * sizeof(struct inotify_dir_entry))
100
101 struct inotifyfd {
102 int ifd_kqfd; /* kqueue fd used by this inotify */
103 /* instance */
104 struct selinfo ifd_sel; /* for EVFILT_READ by epoll */
105 kmutex_t ifd_lock; /* lock for ifd_sel, ifd_wds and */
106 /* ifd_nwds */
107
108 struct inotify_dir_entries **ifd_wds;
109 /* keeps track of watch descriptors */
110 /* for directories: snapshot of the */
111 /* directory state */
112 /* for files: an inotify_dir_entries */
113 /* with ide_count == 0 */
114 size_t ifd_nwds; /* max watch descriptor that can be */
115 /* stored in ifd_wds + 1 */
116
117 TAILQ_HEAD(, inotify_entry) ifd_qhead; /* queue of pending events */
118 size_t ifd_qcount; /* number of pending events */
119 kcondvar_t ifd_qcv; /* condvar for blocking reads */
120 kmutex_t ifd_qlock; /* lock for ifd_q* and interlock */
121 /* for ifd_qcv */
122 };
123
124 struct inotify_kevent_mask_pair {
125 uint32_t inotify;
126 uint32_t kevent;
127 };
128
129 static int inotify_kev_fetch_changes(void *, const struct kevent *,
130 struct kevent *, size_t, int);
131 static int do_inotify_init(struct lwp *, register_t *, int);
132 static int inotify_close_wd(struct inotifyfd *, int);
133 static uint32_t inotify_mask_to_kevent_fflags(uint32_t, enum vtype);
134 static void do_kevent_to_inotify(int32_t, uint32_t, uint32_t,
135 struct inotify_entry *, size_t *, char *);
136 static int kevent_to_inotify(struct inotifyfd *, int, enum vtype, uint32_t,
137 uint32_t, struct inotify_entry *, size_t *);
138 static int inotify_readdir(file_t *, struct dirent *, int *, bool);
139 static struct inotify_dir_entries *get_inotify_dir_entries(int, bool);
140
141 static int inotify_filt_attach(struct knote *);
142 static void inotify_filt_detach(struct knote *);
143 static int inotify_filt_event(struct knote *, long);
144 static void inotify_read_filt_detach(struct knote *);
145 static int inotify_read_filt_event(struct knote *, long);
146
147 static int inotify_read(file_t *, off_t *, struct uio *, kauth_cred_t, int);
148 static int inotify_close(file_t *);
149 static int inotify_poll(file_t *, int);
150 static int inotify_kqfilter(file_t *, struct knote *);
151 static void inotify_restart(file_t *);
152
153 static const char inotify_filtname[] = "LINUX_INOTIFY";
154 static int inotify_filtid;
155
156 /* "fake" EVFILT_VNODE that gets attached to ifd_deps */
157 static const struct filterops inotify_filtops = {
158 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
159 .f_attach = inotify_filt_attach,
160 .f_detach = inotify_filt_detach,
161 .f_event = inotify_filt_event,
162 .f_touch = NULL,
163 };
164
165 /* EVFILT_READ attached to inotifyfd (to support watching via epoll) */
166 static const struct filterops inotify_read_filtops = {
167 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
168 .f_attach = NULL, /* attached via .fo_kqfilter */
169 .f_detach = inotify_read_filt_detach,
170 .f_event = inotify_read_filt_event,
171 .f_touch = NULL,
172 };
173
174 static const struct fileops inotify_fileops = {
175 .fo_name = "inotify",
176 .fo_read = inotify_read,
177 .fo_write = fbadop_write,
178 .fo_ioctl = fbadop_ioctl,
179 .fo_fcntl = fnullop_fcntl,
180 .fo_poll = inotify_poll,
181 .fo_stat = fbadop_stat,
182 .fo_close = inotify_close,
183 .fo_kqfilter = inotify_kqfilter,
184 .fo_restart = inotify_restart,
185 .fo_fpathconf = (void *)eopnotsupp,
186 };
187
188 /* basic flag translations */
189 static const struct inotify_kevent_mask_pair common_inotify_to_kevent[] = {
190 { .inotify = LINUX_IN_ATTRIB, .kevent = NOTE_ATTRIB, },
191 { .inotify = LINUX_IN_CLOSE_NOWRITE, .kevent = NOTE_CLOSE, },
192 { .inotify = LINUX_IN_OPEN, .kevent = NOTE_OPEN, },
193 { .inotify = LINUX_IN_MOVE_SELF, .kevent = NOTE_RENAME, },
194 };
195 static const size_t common_inotify_to_kevent_len =
196 __arraycount(common_inotify_to_kevent);
197
198 static const struct inotify_kevent_mask_pair vreg_inotify_to_kevent[] = {
199 { .inotify = LINUX_IN_ACCESS, .kevent = NOTE_READ, },
200 { .inotify = LINUX_IN_ATTRIB, .kevent = NOTE_ATTRIB|NOTE_LINK, },
201 { .inotify = LINUX_IN_CLOSE_WRITE, .kevent = NOTE_CLOSE_WRITE, },
202 { .inotify = LINUX_IN_MODIFY, .kevent = NOTE_WRITE, },
203 };
204 static const size_t vreg_inotify_to_kevent_len =
205 __arraycount(vreg_inotify_to_kevent);
206
207 static const struct inotify_kevent_mask_pair vdir_inotify_to_kevent[] = {
208 { .inotify = LINUX_IN_ACCESS, .kevent = NOTE_READ, },
209 { .inotify = LINUX_IN_CREATE, .kevent = NOTE_WRITE, },
210 { .inotify = LINUX_IN_DELETE, .kevent = NOTE_WRITE, },
211 { .inotify = LINUX_IN_MOVED_FROM, .kevent = NOTE_WRITE, },
212 { .inotify = LINUX_IN_MOVED_TO, .kevent = NOTE_WRITE, },
213 };
214 static const size_t vdir_inotify_to_kevent_len =
215 __arraycount(vdir_inotify_to_kevent);
216
217 static const struct inotify_kevent_mask_pair common_kevent_to_inotify[] = {
218 { .kevent = NOTE_ATTRIB, .inotify = LINUX_IN_ATTRIB, },
219 { .kevent = NOTE_CLOSE, .inotify = LINUX_IN_CLOSE_NOWRITE, },
220 { .kevent = NOTE_CLOSE_WRITE, .inotify = LINUX_IN_CLOSE_WRITE, },
221 { .kevent = NOTE_OPEN, .inotify = LINUX_IN_OPEN, },
222 { .kevent = NOTE_READ, .inotify = LINUX_IN_ACCESS, },
223 { .kevent = NOTE_RENAME, .inotify = LINUX_IN_MOVE_SELF, },
224 { .kevent = NOTE_REVOKE, .inotify = LINUX_IN_UNMOUNT, },
225 };
226 static const size_t common_kevent_to_inotify_len =
227 __arraycount(common_kevent_to_inotify);
228
229 static const struct inotify_kevent_mask_pair vreg_kevent_to_inotify[] = {
230 { .kevent = NOTE_DELETE|NOTE_LINK, .inotify = LINUX_IN_ATTRIB, },
231 { .kevent = NOTE_WRITE, .inotify = LINUX_IN_MODIFY, },
232 };
233 static const size_t vreg_kevent_to_inotify_len =
234 __arraycount(vreg_kevent_to_inotify);
235
236 /*
237 * Register the custom kfilter for inotify.
238 */
239 int
240 linux_inotify_init(void)
241 {
242 return kfilter_register(inotify_filtname, &inotify_filtops,
243 &inotify_filtid);
244 }
245
246 /*
247 * Unregister the custom kfilter for inotify.
248 */
249 int
250 linux_inotify_fini(void)
251 {
252 return kfilter_unregister(inotify_filtname);
253 }
254
255 /*
256 * Copyin callback used by kevent. This copies already converted
257 * filters from kernel memory to the kevent internal kernel memory.
258 * Hence the memcpy instead of copyin.
259 */
260 static int
261 inotify_kev_fetch_changes(void *ctx, const struct kevent *changelist,
262 struct kevent *changes, size_t index, int n)
263 {
264 memcpy(changes, changelist + index, n * sizeof(*changes));
265
266 return 0;
267 }
268
269 /*
270 * Initialize a new inotify fd.
271 */
272 static int
273 do_inotify_init(struct lwp *l, register_t *retval, int flags)
274 {
275 file_t *fp;
276 int error, fd;
277 struct proc *p = l->l_proc;
278 struct inotifyfd *ifd;
279 struct sys_kqueue1_args kqa;
280
281 if (flags & ~(LINUX_IN_ALL_FLAGS))
282 return EINVAL;
283
284 ifd = kmem_zalloc(sizeof(*ifd), KM_SLEEP);
285 mutex_init(&ifd->ifd_lock, MUTEX_DEFAULT, IPL_NONE);
286 mutex_init(&ifd->ifd_qlock, MUTEX_DEFAULT, IPL_NONE);
287 cv_init(&ifd->ifd_qcv, "inotify");
288 selinit(&ifd->ifd_sel);
289 TAILQ_INIT(&ifd->ifd_qhead);
290
291 ifd->ifd_nwds = 1;
292 ifd->ifd_wds = kmem_zalloc(ifd->ifd_nwds * sizeof(*ifd->ifd_wds),
293 KM_SLEEP);
294
295 SCARG(&kqa, flags) = 0;
296 if (flags & LINUX_IN_NONBLOCK)
297 SCARG(&kqa, flags) |= O_NONBLOCK;
298 error = sys_kqueue1(l, &kqa, retval);
299 if (error != 0)
300 goto leave0;
301 ifd->ifd_kqfd = *retval;
302
303 error = fd_allocfile(&fp, &fd);
304 if (error != 0)
305 goto leave1;
306
307 fp->f_flag = FREAD;
308 if (flags & LINUX_IN_NONBLOCK)
309 fp->f_flag |= FNONBLOCK;
310 fp->f_type = DTYPE_MISC;
311 fp->f_ops = &inotify_fileops;
312 fp->f_data = ifd;
313 fd_set_exclose(l, fd, (flags & LINUX_IN_CLOEXEC) != 0);
314 fd_affix(p, fp, fd);
315
316 *retval = fd;
317 return 0;
318
319 leave1:
320 KASSERT(fd_getfile(ifd->ifd_kqfd) != NULL);
321 fd_close(ifd->ifd_kqfd);
322 leave0:
323 kmem_free(ifd->ifd_wds, ifd->ifd_nwds * sizeof(*ifd->ifd_wds));
324 kmem_free(ifd, sizeof(*ifd));
325
326 mutex_destroy(&ifd->ifd_lock);
327 mutex_destroy(&ifd->ifd_qlock);
328 cv_destroy(&ifd->ifd_qcv);
329 seldestroy(&ifd->ifd_sel);
330
331 return error;
332 }
333
334 #ifndef __aarch64__
335 /*
336 * inotify_init(2). Initialize a new inotify fd with flags=0.
337 */
338 int
339 linux_sys_inotify_init(struct lwp *l, const void *v, register_t *retval)
340 {
341 return do_inotify_init(l, retval, 0);
342 }
343 #endif
344
345 /*
346 * inotify_init(2). Initialize a new inotify fd with the given flags.
347 */
348 int
349 linux_sys_inotify_init1(struct lwp *l,
350 const struct linux_sys_inotify_init1_args *uap, register_t *retval)
351 {
352 /* {
353 syscallarg(int) flags;
354 } */
355
356 return do_inotify_init(l, retval, SCARG(uap, flags));
357 }
358
359 /*
360 * Convert inotify mask to the fflags of an equivalent kevent.
361 */
362 static uint32_t
363 inotify_mask_to_kevent_fflags(uint32_t mask, enum vtype type)
364 {
365 const struct inotify_kevent_mask_pair *type_inotify_to_kevent;
366 uint32_t fflags;
367 size_t i, type_inotify_to_kevent_len;
368
369 switch (type) {
370 case VREG:
371 case VDIR:
372 case VLNK:
373 break;
374
375 default:
376 return 0;
377 }
378
379 /* flags that all watches could have */
380 fflags = NOTE_DELETE|NOTE_REVOKE;
381 for (i = 0; i < common_inotify_to_kevent_len; i++)
382 if (mask & common_inotify_to_kevent[i].inotify)
383 fflags |= common_inotify_to_kevent[i].kevent;
384
385 /* flags that depend on type */
386 switch (type) {
387 case VREG:
388 type_inotify_to_kevent = vreg_inotify_to_kevent;
389 type_inotify_to_kevent_len = vreg_inotify_to_kevent_len;
390 break;
391
392 case VDIR:
393 type_inotify_to_kevent = vdir_inotify_to_kevent;
394 type_inotify_to_kevent_len = vdir_inotify_to_kevent_len;
395 break;
396
397 default:
398 type_inotify_to_kevent_len = 0;
399 break;
400 }
401 for (i = 0; i < type_inotify_to_kevent_len; i++)
402 if (mask & type_inotify_to_kevent[i].inotify)
403 fflags |= type_inotify_to_kevent[i].kevent;
404
405 return fflags;
406 }
407
408 /*
409 * inotify_add_watch(2). Open a fd for pathname (if desired by mask)
410 * track it and add an equivalent kqueue event for it in
411 * ifd->ifd_kqfd.
412 */
413 int
414 linux_sys_inotify_add_watch(struct lwp *l,
415 const struct linux_sys_inotify_add_watch_args *uap, register_t *retval)
416 {
417 /* {
418 syscallarg(int) fd;
419 syscallarg(const char *) pathname;
420 syscallarg(uint32_t) mask;
421 } */
422 int wd, i, error = 0;
423 file_t *fp, *wp, *cur_fp;
424 struct inotifyfd *ifd;
425 struct inotify_dir_entries **new_wds;
426 struct knote *kn, *tmpkn;
427 struct sys_open_args oa;
428 struct kevent kev;
429 struct vnode *wvp;
430 namei_simple_flags_t sflags;
431 struct kevent_ops k_ops = {
432 .keo_private = NULL,
433 .keo_fetch_timeout = NULL,
434 .keo_fetch_changes = inotify_kev_fetch_changes,
435 .keo_put_events = NULL,
436 };
437 const int fd = SCARG(uap, fd);
438 const uint32_t mask = SCARG(uap, mask);
439
440 if (mask & ~LINUX_IN_ADD_KNOWN)
441 return EINVAL;
442
443 fp = fd_getfile(fd);
444 if (fp == NULL)
445 return EBADF;
446
447 if (fp->f_ops != &inotify_fileops) {
448 /* not an inotify fd */
449 error = EBADF;
450 goto leave0;
451 }
452
453 ifd = fp->f_data;
454
455 mutex_enter(&ifd->ifd_lock);
456
457 if (mask & LINUX_IN_DONT_FOLLOW)
458 sflags = NSM_NOFOLLOW_TRYEMULROOT;
459 else
460 sflags = NSM_FOLLOW_TRYEMULROOT;
461 error = namei_simple_user(SCARG(uap, pathname), sflags, &wvp);
462 if (error != 0)
463 goto leave1;
464
465 /* Check to see if we already have a descriptor to wd's file. */
466 wd = -1;
467 for (i = 0; i < ifd->ifd_nwds; i++) {
468 if (ifd->ifd_wds[i] != NULL) {
469 cur_fp = fd_getfile(i);
470 if (cur_fp == NULL) {
471 DPRINTF(("%s: wd=%d was closed externally\n",
472 __func__, i));
473 error = EBADF;
474 goto leave1;
475 }
476 if (cur_fp->f_type != DTYPE_VNODE) {
477 DPRINTF(("%s: wd=%d was replaced "
478 "with a non-vnode\n", __func__, i));
479 error = EBADF;
480 }
481 if (error == 0 && cur_fp->f_vnode == wvp)
482 wd = i;
483 fd_putfile(i);
484 if (error != 0)
485 goto leave1;
486
487 if (wd != -1)
488 break;
489 }
490 }
491
492 if (wd == -1) {
493 /*
494 * If we do not have a descriptor to wd's file, we
495 * need to open the watch descriptor.
496 */
497 SCARG(&oa, path) = SCARG(uap, pathname);
498 SCARG(&oa, mode) = 0;
499 SCARG(&oa, flags) = O_RDONLY;
500 if (mask & LINUX_IN_DONT_FOLLOW)
501 SCARG(&oa, flags) |= O_NOFOLLOW;
502 if (mask & LINUX_IN_ONLYDIR)
503 SCARG(&oa, flags) |= O_DIRECTORY;
504
505 error = sys_open(l, &oa, retval);
506 if (error != 0)
507 goto leave1;
508 wd = *retval;
509 wp = fd_getfile(wd);
510 KASSERT(wp != NULL);
511 KASSERT(wp->f_type == DTYPE_VNODE);
512
513 /* translate the flags */
514 memset(&kev, 0, sizeof(kev));
515 EV_SET(&kev, wd, inotify_filtid, EV_ADD|EV_ENABLE,
516 NOTE_DELETE|NOTE_REVOKE, 0, ifd);
517 if (mask & LINUX_IN_ONESHOT)
518 kev.flags |= EV_ONESHOT;
519 kev.fflags |= inotify_mask_to_kevent_fflags(mask,
520 wp->f_vnode->v_type);
521
522 error = kevent1(retval, ifd->ifd_kqfd, &kev, 1, NULL, 0, NULL,
523 &k_ops);
524 if (error != 0) {
525 KASSERT(fd_getfile(wd) != NULL);
526 fd_close(wd);
527 } else {
528 /* Success! */
529 *retval = wd;
530
531 /* Resize ifd_nwds to accomodate wd. */
532 if (wd+1 > ifd->ifd_nwds) {
533 new_wds = kmem_zalloc(
534 (wd+1) * sizeof(*ifd->ifd_wds), KM_SLEEP);
535 memcpy(new_wds, ifd->ifd_wds,
536 ifd->ifd_nwds * sizeof(*ifd->ifd_wds));
537
538 kmem_free(ifd->ifd_wds,
539 ifd->ifd_nwds * sizeof(*ifd->ifd_wds));
540
541 ifd->ifd_wds = new_wds;
542 ifd->ifd_nwds = wd+1;
543 }
544
545 ifd->ifd_wds[wd] = get_inotify_dir_entries(wd, true);
546 }
547 } else {
548 /*
549 * If we do have a descriptor to wd's file, try to edit
550 * the relevant knote.
551 */
552 if (mask & LINUX_IN_MASK_CREATE) {
553 error = EEXIST;
554 goto leave1;
555 }
556
557 wp = fd_getfile(wd);
558 if (wp == NULL) {
559 DPRINTF(("%s: wd=%d was closed externally "
560 "(race, probably)\n", __func__, wd));
561 error = EBADF;
562 goto leave1;
563 }
564 if (wp->f_type != DTYPE_VNODE) {
565 DPRINTF(("%s: wd=%d was replace with a non-vnode "
566 "(race, probably)\n", __func__, wd));
567 error = EBADF;
568 goto leave2;
569 }
570
571 kev.fflags = NOTE_DELETE | NOTE_REVOKE
572 | inotify_mask_to_kevent_fflags(mask, wp->f_vnode->v_type);
573
574 mutex_enter(wp->f_vnode->v_interlock);
575
576 /*
577 * XXX We are forced to find the appropriate knote
578 * manually because we cannot create a custom f_touch
579 * function for inotify_filtops. See filter_touch()
580 * in kern_event.c for details.
581 */
582 SLIST_FOREACH_SAFE(kn, &wp->f_vnode->v_klist->vk_klist,
583 kn_selnext, tmpkn) {
584 if (kn->kn_fop == &inotify_filtops
585 && ifd == kn->kn_kevent.udata) {
586 mutex_enter(&kn->kn_kq->kq_lock);
587 if (mask & LINUX_IN_MASK_ADD)
588 kn->kn_sfflags |= kev.fflags;
589 else
590 kn->kn_sfflags = kev.fflags;
591 wp->f_vnode->v_klist->vk_interest |=
592 kn->kn_sfflags;
593 mutex_exit(&kn->kn_kq->kq_lock);
594 }
595 }
596
597 mutex_exit(wp->f_vnode->v_interlock);
598
599 /* Success! */
600 *retval = wd;
601 }
602
603 leave2:
604 fd_putfile(wd);
605 leave1:
606 mutex_exit(&ifd->ifd_lock);
607 leave0:
608 fd_putfile(fd);
609 return error;
610 }
611
612 /*
613 * Remove a wd from ifd and close wd.
614 */
615 static int
616 inotify_close_wd(struct inotifyfd *ifd, int wd)
617 {
618 file_t *wp;
619 int error;
620 register_t retval;
621 struct kevent kev;
622 struct kevent_ops k_ops = {
623 .keo_private = NULL,
624 .keo_fetch_timeout = NULL,
625 .keo_fetch_changes = inotify_kev_fetch_changes,
626 .keo_put_events = NULL,
627 };
628
629 mutex_enter(&ifd->ifd_lock);
630
631 KASSERT(0 <= wd && wd < ifd->ifd_nwds && ifd->ifd_wds[wd] != NULL);
632
633 kmem_free(ifd->ifd_wds[wd],
634 INOTIFY_DIR_ENTRIES_SIZE(ifd->ifd_wds[wd]->ide_count));
635 ifd->ifd_wds[wd] = NULL;
636
637 mutex_exit(&ifd->ifd_lock);
638
639 wp = fd_getfile(wd);
640 if (wp == NULL) {
641 DPRINTF(("%s: wd=%d is already closed\n", __func__, wd));
642 return 0;
643 }
644 KASSERT(!mutex_owned(wp->f_vnode->v_interlock));
645
646 memset(&kev, 0, sizeof(kev));
647 EV_SET(&kev, wd, EVFILT_VNODE, EV_DELETE, 0, 0, 0);
648 error = kevent1(&retval, ifd->ifd_kqfd, &kev, 1, NULL, 0, NULL, &k_ops);
649 if (error != 0)
650 DPRINTF(("%s: attempt to disable all events for wd=%d "
651 "had error=%d\n", __func__, wd, error));
652
653 return fd_close(wd);
654 }
655
656 /*
657 * inotify_rm_watch(2). Close wd and remove it from ifd->ifd_wds.
658 */
659 int
660 linux_sys_inotify_rm_watch(struct lwp *l,
661 const struct linux_sys_inotify_rm_watch_args *uap, register_t *retval)
662 {
663 /* {
664 syscallarg(int) fd;
665 syscallarg(int) wd;
666 } */
667 struct inotifyfd *ifd;
668 file_t *fp;
669 int error = 0;
670 const int fd = SCARG(uap, fd);
671 const int wd = SCARG(uap, wd);
672
673 fp = fd_getfile(fd);
674 if (fp == NULL)
675 return EBADF;
676 if (fp->f_ops != &inotify_fileops) {
677 /* not an inotify fd */
678 error = EINVAL;
679 goto leave;
680 }
681
682 ifd = fp->f_data;
683 if (wd < 0 || wd >= ifd->ifd_nwds || ifd->ifd_wds[wd] == NULL) {
684 error = EINVAL;
685 goto leave;
686 }
687
688 error = inotify_close_wd(ifd, wd);
689
690 leave:
691 fd_putfile(fd);
692 return error;
693 }
694
695 /*
696 * Attach the inotify filter.
697 */
698 static int
699 inotify_filt_attach(struct knote *kn)
700 {
701 file_t *fp = kn->kn_obj;
702 struct vnode *vp;
703
704 KASSERT(fp->f_type == DTYPE_VNODE);
705 vp = fp->f_vnode;
706
707 /*
708 * Needs to be set so that we get the same event handling as
709 * EVFILT_VNODE. Otherwise we don't get any events.
710 *
711 * A consequence of this is that modifications/removals of
712 * this knote need to specify EVFILT_VNODE rather than
713 * inotify_filtid.
714 */
715 kn->kn_filter = EVFILT_VNODE;
716
717 kn->kn_fop = &inotify_filtops;
718 kn->kn_hook = vp;
719 vn_knote_attach(vp, kn);
720
721 return 0;
722 }
723
724 /*
725 * Detach the inotify filter.
726 */
727 static void
728 inotify_filt_detach(struct knote *kn)
729 {
730 struct vnode *vp = (struct vnode *)kn->kn_hook;
731
732 vn_knote_detach(vp, kn);
733 }
734
735 /*
736 * Create a single inotify event.
737 */
738 static void
739 do_kevent_to_inotify(int32_t wd, uint32_t mask, uint32_t cookie,
740 struct inotify_entry *buf, size_t *nbuf, char *name)
741 {
742 KASSERT(*nbuf < LINUX_INOTIFY_MAX_FROM_KEVENT);
743
744 buf += *nbuf;
745
746 memset(buf, 0, sizeof(*buf));
747
748 buf->ie_event.wd = wd;
749 buf->ie_event.mask = mask;
750 buf->ie_event.cookie = cookie;
751
752 if (name != NULL) {
753 buf->ie_event.len = strlen(name) + 1;
754 KASSERT(buf->ie_event.len < sizeof(buf->ie_name));
755 strcpy(buf->ie_name, name);
756 }
757
758 ++(*nbuf);
759 }
760
761 /*
762 * Like vn_readdir(), but with vnode locking only if needs_lock is
763 * true (to avoid double locking in some situations).
764 */
765 static int
766 inotify_readdir(file_t *fp, struct dirent *dep, int *done, bool needs_lock)
767 {
768 struct vnode *vp;
769 struct iovec iov;
770 struct uio uio;
771 int error, eofflag;
772
773 KASSERT(fp->f_type == DTYPE_VNODE);
774 vp = fp->f_vnode;
775 KASSERT(vp->v_type == VDIR);
776
777 iov.iov_base = dep;
778 iov.iov_len = sizeof(*dep);
779
780 uio.uio_iov = &iov;
781 uio.uio_iovcnt = 1;
782 uio.uio_rw = UIO_READ;
783 uio.uio_resid = sizeof(*dep);
784 UIO_SETUP_SYSSPACE(&uio);
785
786 mutex_enter(&fp->f_lock);
787 uio.uio_offset = fp->f_offset;
788 mutex_exit(&fp->f_lock);
789
790 /* XXX: should pass whether to lock or not */
791 if (needs_lock)
792 vn_lock(vp, LK_SHARED | LK_RETRY);
793 else
794 /*
795 * XXX We need to temprarily drop v_interlock because
796 * it may be temporarily acquired by biowait().
797 */
798 mutex_exit(vp->v_interlock);
799 KASSERT(!mutex_owned(vp->v_interlock));
800 error = VOP_READDIR(vp, &uio, fp->f_cred, &eofflag, NULL, NULL);
801 if (needs_lock)
802 VOP_UNLOCK(vp);
803 else
804 mutex_enter(vp->v_interlock);
805
806 mutex_enter(&fp->f_lock);
807 fp->f_offset = uio.uio_offset;
808 mutex_exit(&fp->f_lock);
809
810 *done = sizeof(*dep) - uio.uio_resid;
811 return error;
812 }
813
814 /*
815 * Create (and allocate) an appropriate inotify_dir_entries struct for wd to be
816 * used on ifd_wds of inotifyfd. If the entries on a directory fail to be read,
817 * NULL is returned. needs_lock indicates if the vnode's lock is not already
818 * owned.
819 */
820 static struct inotify_dir_entries *
821 get_inotify_dir_entries(int wd, bool needs_lock)
822 {
823 struct dirent de;
824 struct dirent *currdep;
825 struct inotify_dir_entries *idep = NULL;
826 file_t *wp;
827 int done, error;
828 size_t i, decount;
829
830 wp = fd_getfile(wd);
831 if (wp == NULL)
832 return NULL;
833 if (wp->f_type != DTYPE_VNODE)
834 goto leave;
835
836 /* for non-directories, we have 0 entries. */
837 if (wp->f_vnode->v_type != VDIR) {
838 idep = kmem_zalloc(INOTIFY_DIR_ENTRIES_SIZE(0), KM_SLEEP);
839 goto leave;
840 }
841
842 mutex_enter(&wp->f_lock);
843 wp->f_offset = 0;
844 mutex_exit(&wp->f_lock);
845 decount = 0;
846 for (;;) {
847 error = inotify_readdir(wp, &de, &done, needs_lock);
848 if (error != 0)
849 goto leave;
850 if (done == 0)
851 break;
852
853 currdep = &de;
854 while ((char *)currdep < ((char *)&de) + done) {
855 decount++;
856 currdep = _DIRENT_NEXT(currdep);
857 }
858 }
859
860 idep = kmem_zalloc(INOTIFY_DIR_ENTRIES_SIZE(decount), KM_SLEEP);
861 idep->ide_count = decount;
862
863 mutex_enter(&wp->f_lock);
864 wp->f_offset = 0;
865 mutex_exit(&wp->f_lock);
866 for (i = 0; i < decount;) {
867 error = inotify_readdir(wp, &de, &done, needs_lock);
868 if (error != 0 || done == 0) {
869 kmem_free(idep, INOTIFY_DIR_ENTRIES_SIZE(decount));
870 idep = NULL;
871 goto leave;
872 }
873
874 currdep = &de;
875 while ((char *)currdep < ((char *)&de) + done) {
876 idep->ide_entries[i].fileno = currdep->d_fileno;
877 strcpy(idep->ide_entries[i].name, currdep->d_name);
878
879 currdep = _DIRENT_NEXT(currdep);
880 i++;
881 }
882 }
883
884 leave:
885 fd_putfile(wd);
886 return idep;
887 }
888
889 static size_t
890 find_entry(struct inotify_dir_entries *i1, struct inotify_dir_entries *i2)
891 {
892 for (size_t i = 0; i < i2->ide_count; i++)
893 if (i2->ide_entries[i].fileno != i1->ide_entries[i].fileno)
894 return i;
895 KASSERTMSG(0, "Entry not found");
896 return -1;
897 }
898
899 static void
900 handle_write(struct inotifyfd *ifd, int wd, struct inotify_entry *buf,
901 size_t *nbuf)
902 {
903 struct inotify_dir_entries *old_idep, *new_idep;
904 size_t i;
905
906 mutex_enter(&ifd->ifd_lock);
907
908 old_idep = ifd->ifd_wds[wd];
909 KASSERT(old_idep != NULL);
910 new_idep = get_inotify_dir_entries(wd, false);
911 if (new_idep == NULL) {
912 DPRINTF(("%s: directory for wd=%d could not be read\n",
913 __func__, wd));
914 mutex_exit(&ifd->ifd_lock);
915 return;
916 }
917
918
919 if (old_idep->ide_count < new_idep->ide_count) {
920 KASSERT(old_idep->ide_count + 1 == new_idep->ide_count);
921
922 /* Find the new entry. */
923 i = find_entry(new_idep, old_idep);
924 do_kevent_to_inotify(wd, LINUX_IN_CREATE, 0,
925 buf, nbuf, new_idep->ide_entries[i].name);
926 goto out;
927 }
928
929 if (old_idep->ide_count > new_idep->ide_count) {
930 KASSERT(old_idep->ide_count == new_idep->ide_count + 1);
931
932 /* Find the deleted entry. */
933 i = find_entry(old_idep, new_idep);
934
935 do_kevent_to_inotify(wd, LINUX_IN_DELETE, 0,
936 buf, nbuf, old_idep->ide_entries[i].name);
937 goto out;
938 }
939
940 /*
941 * XXX Because we are not watching the entire
942 * file system, the only time we know for sure
943 * that the event is a LINUX_IN_MOVED_FROM/
944 * LINUX_IN_MOVED_TO is when the move happens
945 * within a single directory... ie. the number
946 * of directory entries has not changed.
947 *
948 * Otherwise all we can say for sure is that
949 * something was created/deleted. So we issue a
950 * LINUX_IN_CREATE/LINUX_IN_DELETE.
951 */
952 ino_t changed = new_idep->ide_entries[new_idep->ide_count - 1].fileno;
953
954 /* Find the deleted entry. */
955 for (i = 0; i < old_idep->ide_count; i++)
956 if (old_idep->ide_entries[i].fileno == changed)
957 break;
958 KASSERT(i != old_idep->ide_count);
959
960 do_kevent_to_inotify(wd, LINUX_IN_MOVED_FROM, changed, buf, nbuf,
961 old_idep->ide_entries[i].name);
962
963 do_kevent_to_inotify(wd, LINUX_IN_MOVED_TO, changed, buf, nbuf,
964 new_idep->ide_entries[new_idep->ide_count - 1].name);
965
966 out:
967 ifd->ifd_wds[wd] = new_idep;
968 mutex_exit(&ifd->ifd_lock);
969 }
970
971 /*
972 * Convert a kevent flags and fflags for EVFILT_VNODE to some number
973 * of inotify events.
974 */
975 static int
976 kevent_to_inotify(struct inotifyfd *ifd, int wd, enum vtype wtype,
977 uint32_t flags, uint32_t fflags, struct inotify_entry *buf,
978 size_t *nbuf)
979 {
980 struct stat st;
981 file_t *wp;
982 size_t i;
983 int error = 0;
984
985 for (i = 0; i < common_kevent_to_inotify_len; i++)
986 if (fflags & common_kevent_to_inotify[i].kevent)
987 do_kevent_to_inotify(wd,
988 common_kevent_to_inotify[i].inotify, 0, buf, nbuf,
989 NULL);
990
991 if (wtype == VREG) {
992 for (i = 0; i < vreg_kevent_to_inotify_len; i++)
993 if (fflags & vreg_kevent_to_inotify[i].kevent)
994 do_kevent_to_inotify(wd,
995 vreg_kevent_to_inotify[i].inotify, 0,
996 buf, nbuf, NULL);
997 } else if (wtype == VDIR) {
998 for (i = 0; i < *nbuf; i++)
999 if (buf[i].ie_event.mask &
1000 (LINUX_IN_ACCESS|LINUX_IN_ATTRIB
1001 |LINUX_IN_CLOSE|LINUX_IN_OPEN))
1002 buf[i].ie_event.mask |= LINUX_IN_ISDIR;
1003
1004 /* Need to disambiguate the possible NOTE_WRITEs. */
1005 if (fflags & NOTE_WRITE)
1006 handle_write(ifd, wd, buf, nbuf);
1007 }
1008
1009 /*
1010 * Need to check if wd is actually has a link count of 0 to issue a
1011 * LINUX_IN_DELETE_SELF.
1012 */
1013 if (fflags & NOTE_DELETE) {
1014 wp = fd_getfile(wd);
1015 KASSERT(wp != NULL);
1016 KASSERT(wp->f_type == DTYPE_VNODE);
1017 vn_stat(wp->f_vnode, &st);
1018 fd_putfile(wd);
1019
1020 if (st.st_nlink == 0)
1021 do_kevent_to_inotify(wd, LINUX_IN_DELETE_SELF, 0,
1022 buf, nbuf, NULL);
1023 }
1024
1025 /* LINUX_IN_IGNORED must be the last event issued for wd. */
1026 if ((flags & EV_ONESHOT) || (fflags & (NOTE_REVOKE|NOTE_DELETE))) {
1027 do_kevent_to_inotify(wd, LINUX_IN_IGNORED, 0, buf, nbuf, NULL);
1028 /*
1029 * XXX in theory we could call inotify_close_wd(ifd, wd) but if
1030 * we get here we must already be holding v_interlock for
1031 * wd... so we can't.
1032 *
1033 * For simplicity we do nothing, and so wd will only be closed
1034 * when the inotify fd is closed.
1035 */
1036 }
1037
1038 return error;
1039 }
1040
1041 /*
1042 * Handle an event. Unlike EVFILT_VNODE, we translate the event to a
1043 * linux_inotify_event and put it in our own custom queue.
1044 */
1045 static int
1046 inotify_filt_event(struct knote *kn, long hint)
1047 {
1048 struct vnode *vp = (struct vnode *)kn->kn_hook;
1049 struct inotifyfd *ifd;
1050 struct inotify_entry *cur_ie;
1051 size_t nbuf, i;
1052 uint32_t status;
1053 struct inotify_entry buf[LINUX_INOTIFY_MAX_FROM_KEVENT];
1054
1055 /*
1056 * If KN_WILLDETACH is set then
1057 * 1. kn->kn_kevent.udata has already been trashed with a
1058 * struct lwp *, so we don't have access to a real ifd
1059 * anymore, and
1060 * 2. we're about to detach anyways, so we don't really care
1061 * about the events.
1062 * (Also because of this we need to get ifd under the same
1063 * lock as kn->kn_status.)
1064 */
1065 mutex_enter(&kn->kn_kq->kq_lock);
1066 status = kn->kn_status;
1067 ifd = kn->kn_kevent.udata;
1068 mutex_exit(&kn->kn_kq->kq_lock);
1069 if (status & KN_WILLDETACH)
1070 return 0;
1071
1072 /*
1073 * If we don't care about the NOTEs in hint, we don't generate
1074 * any events.
1075 */
1076 hint &= kn->kn_sfflags;
1077 if (hint == 0)
1078 return 0;
1079
1080 KASSERT(mutex_owned(vp->v_interlock));
1081 KASSERT(!mutex_owned(&ifd->ifd_lock));
1082
1083 mutex_enter(&ifd->ifd_qlock);
1084
1085 /*
1086 * early out: there's no point even traslating the event if we
1087 * have nowhere to put it (and an LINUX_IN_Q_OVERFLOW has
1088 * already been added).
1089 */
1090 if (ifd->ifd_qcount >= LINUX_INOTIFY_MAX_QUEUED)
1091 goto leave;
1092
1093 nbuf = 0;
1094 (void)kevent_to_inotify(ifd, kn->kn_id, vp->v_type, kn->kn_flags,
1095 hint, buf, &nbuf);
1096 for (i = 0; i < nbuf && ifd->ifd_qcount < LINUX_INOTIFY_MAX_QUEUED-1;
1097 i++) {
1098 cur_ie = kmem_zalloc(sizeof(*cur_ie), KM_SLEEP);
1099 memcpy(cur_ie, &buf[i], sizeof(*cur_ie));
1100
1101 TAILQ_INSERT_TAIL(&ifd->ifd_qhead, cur_ie, ie_entries);
1102 ifd->ifd_qcount++;
1103 }
1104 /* handle early overflow, by adding an overflow event to the end */
1105 if (i != nbuf) {
1106 nbuf = 0;
1107 cur_ie = kmem_zalloc(sizeof(*cur_ie), KM_SLEEP);
1108 do_kevent_to_inotify(-1, LINUX_IN_Q_OVERFLOW, 0,
1109 cur_ie, &nbuf, NULL);
1110
1111 TAILQ_INSERT_TAIL(&ifd->ifd_qhead, cur_ie, ie_entries);
1112 ifd->ifd_qcount++;
1113 }
1114
1115 if (nbuf > 0) {
1116 cv_signal(&ifd->ifd_qcv);
1117
1118 mutex_enter(&ifd->ifd_lock);
1119 selnotify(&ifd->ifd_sel, 0, NOTE_LOWAT);
1120 mutex_exit(&ifd->ifd_lock);
1121 } else
1122 DPRINTF(("%s: hint=%lx resulted in 0 inotify events\n",
1123 __func__, hint));
1124
1125 leave:
1126 mutex_exit(&ifd->ifd_qlock);
1127 return 0;
1128 }
1129
1130 /*
1131 * Read inotify events from the queue.
1132 */
1133 static int
1134 inotify_read(file_t *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
1135 int flags)
1136 {
1137 struct inotify_entry *cur_iep;
1138 size_t cur_size, nread;
1139 int error = 0;
1140 struct inotifyfd *ifd = fp->f_data;
1141
1142 mutex_enter(&ifd->ifd_qlock);
1143
1144 if (ifd->ifd_qcount == 0) {
1145 if (fp->f_flag & O_NONBLOCK) {
1146 error = EAGAIN;
1147 goto leave;
1148 }
1149
1150 while (ifd->ifd_qcount == 0) {
1151 /* wait until there is an event to read */
1152 error = cv_wait_sig(&ifd->ifd_qcv, &ifd->ifd_qlock);
1153 if (error != 0) {
1154 error = EINTR;
1155 goto leave;
1156 }
1157 }
1158 }
1159
1160 KASSERT(ifd->ifd_qcount > 0);
1161 KASSERT(mutex_owned(&ifd->ifd_qlock));
1162
1163 nread = 0;
1164 while (ifd->ifd_qcount > 0) {
1165 cur_iep = TAILQ_FIRST(&ifd->ifd_qhead);
1166 KASSERT(cur_iep != NULL);
1167
1168 cur_size = sizeof(cur_iep->ie_event) + cur_iep->ie_event.len;
1169 if (cur_size > uio->uio_resid) {
1170 if (nread == 0)
1171 error = EINVAL;
1172 break;
1173 }
1174
1175 error = uiomove(&cur_iep->ie_event, sizeof(cur_iep->ie_event),
1176 uio);
1177 if (error != 0)
1178 break;
1179 error = uiomove(&cur_iep->ie_name, cur_iep->ie_event.len, uio);
1180 if (error != 0)
1181 break;
1182
1183 /* cleanup */
1184 TAILQ_REMOVE(&ifd->ifd_qhead, cur_iep, ie_entries);
1185 kmem_free(cur_iep, sizeof(*cur_iep));
1186
1187 nread++;
1188 ifd->ifd_qcount--;
1189 }
1190
1191 leave:
1192 /* Wake up the next reader, if the queue is not empty. */
1193 if (ifd->ifd_qcount > 0)
1194 cv_signal(&ifd->ifd_qcv);
1195
1196 mutex_exit(&ifd->ifd_qlock);
1197 return error;
1198 }
1199
1200 /*
1201 * Close all the file descriptors associated with fp.
1202 */
1203 static int
1204 inotify_close(file_t *fp)
1205 {
1206 int error;
1207 size_t i;
1208 file_t *kqfp;
1209 struct inotifyfd *ifd = fp->f_data;
1210
1211 for (i = 0; i < ifd->ifd_nwds; i++) {
1212 if (ifd->ifd_wds[i] != NULL) {
1213 error = inotify_close_wd(ifd, i);
1214 if (error != 0)
1215 return error;
1216 }
1217 }
1218
1219 /* the reference we need to hold is ifd->ifd_kqfp */
1220 kqfp = fd_getfile(ifd->ifd_kqfd);
1221 if (kqfp == NULL) {
1222 DPRINTF(("%s: kqfp=%d is already closed\n", __func__,
1223 ifd->ifd_kqfd));
1224 } else {
1225 error = fd_close(ifd->ifd_kqfd);
1226 if (error != 0)
1227 return error;
1228 }
1229
1230 mutex_destroy(&ifd->ifd_lock);
1231 mutex_destroy(&ifd->ifd_qlock);
1232 cv_destroy(&ifd->ifd_qcv);
1233 seldestroy(&ifd->ifd_sel);
1234
1235 kmem_free(ifd->ifd_wds, ifd->ifd_nwds * sizeof(*ifd->ifd_wds));
1236 kmem_free(ifd, sizeof(*ifd));
1237 fp->f_data = NULL;
1238
1239 return 0;
1240 }
1241
1242 /*
1243 * Check if there are pending read events.
1244 */
1245 static int
1246 inotify_poll(file_t *fp, int events)
1247 {
1248 int revents;
1249 struct inotifyfd *ifd = fp->f_data;
1250
1251 revents = 0;
1252 if (events & (POLLIN|POLLRDNORM)) {
1253 mutex_enter(&ifd->ifd_qlock);
1254
1255 if (ifd->ifd_qcount > 0)
1256 revents |= events & (POLLIN|POLLRDNORM);
1257
1258 mutex_exit(&ifd->ifd_qlock);
1259 }
1260
1261 return revents;
1262 }
1263
1264 /*
1265 * Attach EVFILT_READ to the inotify instance in fp.
1266 *
1267 * This is so you can watch inotify with epoll. No other kqueue
1268 * filter needs to be supported.
1269 */
1270 static int
1271 inotify_kqfilter(file_t *fp, struct knote *kn)
1272 {
1273 struct inotifyfd *ifd = fp->f_data;
1274
1275 KASSERT(fp == kn->kn_obj);
1276
1277 if (kn->kn_filter != EVFILT_READ)
1278 return EINVAL;
1279
1280 kn->kn_fop = &inotify_read_filtops;
1281 mutex_enter(&ifd->ifd_lock);
1282 selrecord_knote(&ifd->ifd_sel, kn);
1283 mutex_exit(&ifd->ifd_lock);
1284
1285 return 0;
1286 }
1287
1288 /*
1289 * Detach a filter from an inotify instance.
1290 */
1291 static void
1292 inotify_read_filt_detach(struct knote *kn)
1293 {
1294 struct inotifyfd *ifd = ((file_t *)kn->kn_obj)->f_data;
1295
1296 mutex_enter(&ifd->ifd_lock);
1297 selremove_knote(&ifd->ifd_sel, kn);
1298 mutex_exit(&ifd->ifd_lock);
1299 }
1300
1301 /*
1302 * Handle EVFILT_READ events. Note that nothing is put in kn_data.
1303 */
1304 static int
1305 inotify_read_filt_event(struct knote *kn, long hint)
1306 {
1307 struct inotifyfd *ifd = ((file_t *)kn->kn_obj)->f_data;
1308
1309 if (hint != 0) {
1310 KASSERT(mutex_owned(&ifd->ifd_lock));
1311 KASSERT(mutex_owned(&ifd->ifd_qlock));
1312 KASSERT(hint == NOTE_LOWAT);
1313
1314 kn->kn_data = ifd->ifd_qcount;
1315 }
1316
1317 return kn->kn_data > 0;
1318 }
1319
1320 /*
1321 * Restart the inotify instance.
1322 */
1323 static void
1324 inotify_restart(file_t *fp)
1325 {
1326 struct inotifyfd *ifd = fp->f_data;
1327
1328 mutex_enter(&ifd->ifd_qlock);
1329 cv_broadcast(&ifd->ifd_qcv);
1330 mutex_exit(&ifd->ifd_qlock);
1331 }
1332