genfs_vnops.c revision 1.218 1 /* $NetBSD: genfs_vnops.c,v 1.218 2022/03/27 16:23:08 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. Neither the name of the University nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 *
57 */
58
59 #include <sys/cdefs.h>
60 __KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.218 2022/03/27 16:23:08 christos Exp $");
61
62 #include <sys/param.h>
63 #include <sys/systm.h>
64 #include <sys/proc.h>
65 #include <sys/kernel.h>
66 #include <sys/mount.h>
67 #include <sys/fstrans.h>
68 #include <sys/namei.h>
69 #include <sys/vnode_impl.h>
70 #include <sys/fcntl.h>
71 #include <sys/kmem.h>
72 #include <sys/poll.h>
73 #include <sys/mman.h>
74 #include <sys/file.h>
75 #include <sys/kauth.h>
76 #include <sys/stat.h>
77 #include <sys/extattr.h>
78
79 #include <miscfs/genfs/genfs.h>
80 #include <miscfs/genfs/genfs_node.h>
81 #include <miscfs/specfs/specdev.h>
82
83 static void filt_genfsdetach(struct knote *);
84 static int filt_genfsread(struct knote *, long);
85 static int filt_genfsvnode(struct knote *, long);
86
87 /*
88 * Find the end of the first path component in NAME and return its
89 * length.
90 */
91 int
92 genfs_parsepath(void *v)
93 {
94 struct vop_parsepath_args /* {
95 struct vnode *a_dvp;
96 const char *a_name;
97 size_t *a_ret;
98 } */ *ap = v;
99 const char *name = ap->a_name;
100 size_t pos;
101
102 (void)ap->a_dvp;
103
104 pos = 0;
105 while (name[pos] != '\0' && name[pos] != '/') {
106 pos++;
107 }
108 *ap->a_retval = pos;
109 return 0;
110 }
111
112 int
113 genfs_poll(void *v)
114 {
115 struct vop_poll_args /* {
116 struct vnode *a_vp;
117 int a_events;
118 struct lwp *a_l;
119 } */ *ap = v;
120
121 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
122 }
123
124 int
125 genfs_seek(void *v)
126 {
127 struct vop_seek_args /* {
128 struct vnode *a_vp;
129 off_t a_oldoff;
130 off_t a_newoff;
131 kauth_cred_t cred;
132 } */ *ap = v;
133
134 if (ap->a_newoff < 0)
135 return (EINVAL);
136
137 return (0);
138 }
139
140 int
141 genfs_abortop(void *v)
142 {
143 struct vop_abortop_args /* {
144 struct vnode *a_dvp;
145 struct componentname *a_cnp;
146 } */ *ap = v;
147
148 (void)ap;
149
150 return (0);
151 }
152
153 int
154 genfs_fcntl(void *v)
155 {
156 struct vop_fcntl_args /* {
157 struct vnode *a_vp;
158 u_int a_command;
159 void *a_data;
160 int a_fflag;
161 kauth_cred_t a_cred;
162 struct lwp *a_l;
163 } */ *ap = v;
164
165 if (ap->a_command == F_SETFL)
166 return (0);
167 else
168 return (EOPNOTSUPP);
169 }
170
171 /*ARGSUSED*/
172 int
173 genfs_badop(void *v)
174 {
175
176 panic("genfs: bad op");
177 }
178
179 /*ARGSUSED*/
180 int
181 genfs_nullop(void *v)
182 {
183
184 return (0);
185 }
186
187 /*ARGSUSED*/
188 int
189 genfs_einval(void *v)
190 {
191
192 return (EINVAL);
193 }
194
195 /*
196 * Called when an fs doesn't support a particular vop.
197 * This takes care to vrele, vput, or vunlock passed in vnodes
198 * and calls VOP_ABORTOP for a componentname (in non-rename VOP).
199 */
200 int
201 genfs_eopnotsupp(void *v)
202 {
203 struct vop_generic_args /*
204 struct vnodeop_desc *a_desc;
205 / * other random data follows, presumably * /
206 } */ *ap = v;
207 struct vnodeop_desc *desc = ap->a_desc;
208 struct vnode *vp, *vp_last = NULL;
209 int flags, i, j, offset_cnp, offset_vp;
210
211 KASSERT(desc->vdesc_offset != VOP_LOOKUP_DESCOFFSET);
212 KASSERT(desc->vdesc_offset != VOP_ABORTOP_DESCOFFSET);
213
214 /*
215 * Abort any componentname that lookup potentially left state in.
216 *
217 * As is logical, componentnames for VOP_RENAME are handled by
218 * the caller of VOP_RENAME. Yay, rename!
219 */
220 if (desc->vdesc_offset != VOP_RENAME_DESCOFFSET &&
221 (offset_vp = desc->vdesc_vp_offsets[0]) != VDESC_NO_OFFSET &&
222 (offset_cnp = desc->vdesc_componentname_offset) != VDESC_NO_OFFSET){
223 struct componentname *cnp;
224 struct vnode *dvp;
225
226 dvp = *VOPARG_OFFSETTO(struct vnode **, offset_vp, ap);
227 cnp = *VOPARG_OFFSETTO(struct componentname **, offset_cnp, ap);
228
229 VOP_ABORTOP(dvp, cnp);
230 }
231
232 flags = desc->vdesc_flags;
233 for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) {
234 if ((offset_vp = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET)
235 break; /* stop at end of list */
236 if ((j = flags & VDESC_VP0_WILLPUT)) {
237 vp = *VOPARG_OFFSETTO(struct vnode **, offset_vp, ap);
238
239 /* Skip if NULL */
240 if (!vp)
241 continue;
242
243 switch (j) {
244 case VDESC_VP0_WILLPUT:
245 /* Check for dvp == vp cases */
246 if (vp == vp_last)
247 vrele(vp);
248 else {
249 vput(vp);
250 vp_last = vp;
251 }
252 break;
253 case VDESC_VP0_WILLRELE:
254 vrele(vp);
255 break;
256 }
257 }
258 }
259
260 return (EOPNOTSUPP);
261 }
262
263 /*ARGSUSED*/
264 int
265 genfs_ebadf(void *v)
266 {
267
268 return (EBADF);
269 }
270
271 /* ARGSUSED */
272 int
273 genfs_enoioctl(void *v)
274 {
275
276 return (EPASSTHROUGH);
277 }
278
279
280 /*
281 * Eliminate all activity associated with the requested vnode
282 * and with all vnodes aliased to the requested vnode.
283 */
284 int
285 genfs_revoke(void *v)
286 {
287 struct vop_revoke_args /* {
288 struct vnode *a_vp;
289 int a_flags;
290 } */ *ap = v;
291
292 #ifdef DIAGNOSTIC
293 if ((ap->a_flags & REVOKEALL) == 0)
294 panic("genfs_revoke: not revokeall");
295 #endif
296 vrevoke(ap->a_vp);
297 return (0);
298 }
299
300 /*
301 * Lock the node (for deadfs).
302 */
303 int
304 genfs_deadlock(void *v)
305 {
306 struct vop_lock_args /* {
307 struct vnode *a_vp;
308 int a_flags;
309 } */ *ap = v;
310 vnode_t *vp = ap->a_vp;
311 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
312 int flags = ap->a_flags;
313 krw_t op;
314
315 if (! ISSET(flags, LK_RETRY))
316 return ENOENT;
317
318 if (ISSET(flags, LK_DOWNGRADE)) {
319 rw_downgrade(&vip->vi_lock);
320 } else if (ISSET(flags, LK_UPGRADE)) {
321 KASSERT(ISSET(flags, LK_NOWAIT));
322 if (!rw_tryupgrade(&vip->vi_lock)) {
323 return EBUSY;
324 }
325 } else if ((flags & (LK_EXCLUSIVE | LK_SHARED)) != 0) {
326 op = (ISSET(flags, LK_EXCLUSIVE) ? RW_WRITER : RW_READER);
327 if (ISSET(flags, LK_NOWAIT)) {
328 if (!rw_tryenter(&vip->vi_lock, op))
329 return EBUSY;
330 } else {
331 rw_enter(&vip->vi_lock, op);
332 }
333 }
334 VSTATE_ASSERT_UNLOCKED(vp, VS_RECLAIMED);
335 return 0;
336 }
337
338 /*
339 * Unlock the node (for deadfs).
340 */
341 int
342 genfs_deadunlock(void *v)
343 {
344 struct vop_unlock_args /* {
345 struct vnode *a_vp;
346 } */ *ap = v;
347 vnode_t *vp = ap->a_vp;
348 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
349
350 rw_exit(&vip->vi_lock);
351
352 return 0;
353 }
354
355 /*
356 * Lock the node.
357 */
358 int
359 genfs_lock(void *v)
360 {
361 struct vop_lock_args /* {
362 struct vnode *a_vp;
363 int a_flags;
364 } */ *ap = v;
365 vnode_t *vp = ap->a_vp;
366 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
367 int flags = ap->a_flags;
368 krw_t op;
369
370 if (ISSET(flags, LK_DOWNGRADE)) {
371 rw_downgrade(&vip->vi_lock);
372 } else if (ISSET(flags, LK_UPGRADE)) {
373 KASSERT(ISSET(flags, LK_NOWAIT));
374 if (!rw_tryupgrade(&vip->vi_lock)) {
375 return EBUSY;
376 }
377 } else if ((flags & (LK_EXCLUSIVE | LK_SHARED)) != 0) {
378 op = (ISSET(flags, LK_EXCLUSIVE) ? RW_WRITER : RW_READER);
379 if (ISSET(flags, LK_NOWAIT)) {
380 if (!rw_tryenter(&vip->vi_lock, op))
381 return EBUSY;
382 } else {
383 rw_enter(&vip->vi_lock, op);
384 }
385 }
386 VSTATE_ASSERT_UNLOCKED(vp, VS_ACTIVE);
387 return 0;
388 }
389
390 /*
391 * Unlock the node.
392 */
393 int
394 genfs_unlock(void *v)
395 {
396 struct vop_unlock_args /* {
397 struct vnode *a_vp;
398 } */ *ap = v;
399 vnode_t *vp = ap->a_vp;
400 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
401
402 rw_exit(&vip->vi_lock);
403
404 return 0;
405 }
406
407 /*
408 * Return whether or not the node is locked.
409 */
410 int
411 genfs_islocked(void *v)
412 {
413 struct vop_islocked_args /* {
414 struct vnode *a_vp;
415 } */ *ap = v;
416 vnode_t *vp = ap->a_vp;
417 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
418
419 if (rw_write_held(&vip->vi_lock))
420 return LK_EXCLUSIVE;
421
422 if (rw_read_held(&vip->vi_lock))
423 return LK_SHARED;
424
425 return 0;
426 }
427
428 int
429 genfs_mmap(void *v)
430 {
431
432 return (0);
433 }
434
435 /*
436 * VOP_PUTPAGES() for vnodes which never have pages.
437 */
438
439 int
440 genfs_null_putpages(void *v)
441 {
442 struct vop_putpages_args /* {
443 struct vnode *a_vp;
444 voff_t a_offlo;
445 voff_t a_offhi;
446 int a_flags;
447 } */ *ap = v;
448 struct vnode *vp = ap->a_vp;
449
450 KASSERT(vp->v_uobj.uo_npages == 0);
451 rw_exit(vp->v_uobj.vmobjlock);
452 return (0);
453 }
454
455 void
456 genfs_node_init(struct vnode *vp, const struct genfs_ops *ops)
457 {
458 struct genfs_node *gp = VTOG(vp);
459
460 rw_init(&gp->g_glock);
461 gp->g_op = ops;
462 }
463
464 void
465 genfs_node_destroy(struct vnode *vp)
466 {
467 struct genfs_node *gp = VTOG(vp);
468
469 rw_destroy(&gp->g_glock);
470 }
471
472 void
473 genfs_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
474 {
475 int bsize;
476
477 bsize = 1 << vp->v_mount->mnt_fs_bshift;
478 *eobp = (size + bsize - 1) & ~(bsize - 1);
479 }
480
481 static void
482 filt_genfsdetach(struct knote *kn)
483 {
484 struct vnode *vp = (struct vnode *)kn->kn_hook;
485
486 vn_knote_detach(vp, kn);
487 }
488
489 static int
490 filt_genfsread(struct knote *kn, long hint)
491 {
492 struct vnode *vp = (struct vnode *)kn->kn_hook;
493 int rv;
494
495 /*
496 * filesystem is gone, so set the EOF flag and schedule
497 * the knote for deletion.
498 */
499 switch (hint) {
500 case NOTE_REVOKE:
501 KASSERT(mutex_owned(vp->v_interlock));
502 knote_set_eof(kn, EV_ONESHOT);
503 return (1);
504 case 0:
505 mutex_enter(vp->v_interlock);
506 kn->kn_data = vp->v_size - ((file_t *)kn->kn_obj)->f_offset;
507 rv = (kn->kn_data != 0);
508 mutex_exit(vp->v_interlock);
509 return rv;
510 default:
511 KASSERT(mutex_owned(vp->v_interlock));
512 kn->kn_data = vp->v_size - ((file_t *)kn->kn_obj)->f_offset;
513 return (kn->kn_data != 0);
514 }
515 }
516
517 static int
518 filt_genfswrite(struct knote *kn, long hint)
519 {
520 struct vnode *vp = (struct vnode *)kn->kn_hook;
521
522 /*
523 * filesystem is gone, so set the EOF flag and schedule
524 * the knote for deletion.
525 */
526 switch (hint) {
527 case NOTE_REVOKE:
528 KASSERT(mutex_owned(vp->v_interlock));
529 knote_set_eof(kn, EV_ONESHOT);
530 return (1);
531 case 0:
532 mutex_enter(vp->v_interlock);
533 kn->kn_data = 0;
534 mutex_exit(vp->v_interlock);
535 return 1;
536 default:
537 KASSERT(mutex_owned(vp->v_interlock));
538 kn->kn_data = 0;
539 return 1;
540 }
541 }
542
543 static int
544 filt_genfsvnode(struct knote *kn, long hint)
545 {
546 struct vnode *vp = (struct vnode *)kn->kn_hook;
547 int fflags;
548
549 switch (hint) {
550 case NOTE_REVOKE:
551 KASSERT(mutex_owned(vp->v_interlock));
552 knote_set_eof(kn, 0);
553 if ((kn->kn_sfflags & hint) != 0)
554 kn->kn_fflags |= hint;
555 return (1);
556 case 0:
557 mutex_enter(vp->v_interlock);
558 fflags = kn->kn_fflags;
559 mutex_exit(vp->v_interlock);
560 break;
561 default:
562 KASSERT(mutex_owned(vp->v_interlock));
563 if ((kn->kn_sfflags & hint) != 0)
564 kn->kn_fflags |= hint;
565 fflags = kn->kn_fflags;
566 break;
567 }
568
569 return (fflags != 0);
570 }
571
572 static const struct filterops genfsread_filtops = {
573 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
574 .f_attach = NULL,
575 .f_detach = filt_genfsdetach,
576 .f_event = filt_genfsread,
577 };
578
579 static const struct filterops genfswrite_filtops = {
580 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
581 .f_attach = NULL,
582 .f_detach = filt_genfsdetach,
583 .f_event = filt_genfswrite,
584 };
585
586 static const struct filterops genfsvnode_filtops = {
587 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
588 .f_attach = NULL,
589 .f_detach = filt_genfsdetach,
590 .f_event = filt_genfsvnode,
591 };
592
593 int
594 genfs_kqfilter(void *v)
595 {
596 struct vop_kqfilter_args /* {
597 struct vnode *a_vp;
598 struct knote *a_kn;
599 } */ *ap = v;
600 struct vnode *vp;
601 struct knote *kn;
602
603 vp = ap->a_vp;
604 kn = ap->a_kn;
605 switch (kn->kn_filter) {
606 case EVFILT_READ:
607 kn->kn_fop = &genfsread_filtops;
608 break;
609 case EVFILT_WRITE:
610 kn->kn_fop = &genfswrite_filtops;
611 break;
612 case EVFILT_VNODE:
613 kn->kn_fop = &genfsvnode_filtops;
614 break;
615 default:
616 return (EINVAL);
617 }
618
619 kn->kn_hook = vp;
620
621 vn_knote_attach(vp, kn);
622
623 return (0);
624 }
625
626 void
627 genfs_node_wrlock(struct vnode *vp)
628 {
629 struct genfs_node *gp = VTOG(vp);
630
631 rw_enter(&gp->g_glock, RW_WRITER);
632 }
633
634 void
635 genfs_node_rdlock(struct vnode *vp)
636 {
637 struct genfs_node *gp = VTOG(vp);
638
639 rw_enter(&gp->g_glock, RW_READER);
640 }
641
642 int
643 genfs_node_rdtrylock(struct vnode *vp)
644 {
645 struct genfs_node *gp = VTOG(vp);
646
647 return rw_tryenter(&gp->g_glock, RW_READER);
648 }
649
650 void
651 genfs_node_unlock(struct vnode *vp)
652 {
653 struct genfs_node *gp = VTOG(vp);
654
655 rw_exit(&gp->g_glock);
656 }
657
658 int
659 genfs_node_wrlocked(struct vnode *vp)
660 {
661 struct genfs_node *gp = VTOG(vp);
662
663 return rw_write_held(&gp->g_glock);
664 }
665
666 /*
667 * Common filesystem object access control check routine. Accepts a
668 * vnode, cred, uid, gid, mode, acl, requested access mode.
669 * Returns 0 on success, or an errno on failure.
670 */
671 int
672 genfs_can_access(vnode_t *vp, kauth_cred_t cred, uid_t file_uid, gid_t file_gid,
673 mode_t file_mode, struct acl *acl, accmode_t accmode)
674 {
675 accmode_t dac_granted;
676 int error;
677
678 KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0);
679 KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE));
680
681 /*
682 * Look for a normal, non-privileged way to access the file/directory
683 * as requested. If it exists, go with that.
684 */
685
686 dac_granted = 0;
687
688 /* Check the owner. */
689 if (kauth_cred_geteuid(cred) == file_uid) {
690 dac_granted |= VADMIN;
691 if (file_mode & S_IXUSR)
692 dac_granted |= VEXEC;
693 if (file_mode & S_IRUSR)
694 dac_granted |= VREAD;
695 if (file_mode & S_IWUSR)
696 dac_granted |= (VWRITE | VAPPEND);
697
698 goto privchk;
699 }
700
701 /* Otherwise, check the groups (first match) */
702 /* Otherwise, check the groups. */
703 error = kauth_cred_groupmember(cred, file_gid);
704 if (error > 0)
705 return error;
706 if (error == 0) {
707 if (file_mode & S_IXGRP)
708 dac_granted |= VEXEC;
709 if (file_mode & S_IRGRP)
710 dac_granted |= VREAD;
711 if (file_mode & S_IWGRP)
712 dac_granted |= (VWRITE | VAPPEND);
713
714 goto privchk;
715 }
716
717 /* Otherwise, check everyone else. */
718 if (file_mode & S_IXOTH)
719 dac_granted |= VEXEC;
720 if (file_mode & S_IROTH)
721 dac_granted |= VREAD;
722 if (file_mode & S_IWOTH)
723 dac_granted |= (VWRITE | VAPPEND);
724
725 privchk:
726 if ((accmode & dac_granted) == accmode)
727 return 0;
728
729 return (accmode & VADMIN) ? EPERM : EACCES;
730 }
731
732 /*
733 * Implement a version of genfs_can_access() that understands POSIX.1e ACL
734 * semantics;
735 * the access ACL has already been prepared for evaluation by the file system
736 * and is passed via 'uid', 'gid', and 'acl'. Return 0 on success, else an
737 * errno value.
738 */
739 int
740 genfs_can_access_acl_posix1e(vnode_t *vp, kauth_cred_t cred, uid_t file_uid,
741 gid_t file_gid, mode_t file_mode, struct acl *acl, accmode_t accmode)
742 {
743 struct acl_entry *acl_other, *acl_mask;
744 accmode_t dac_granted;
745 accmode_t acl_mask_granted;
746 int group_matched, i;
747 int error;
748
749 KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0);
750 KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE));
751
752 /*
753 * The owner matches if the effective uid associated with the
754 * credential matches that of the ACL_USER_OBJ entry. While we're
755 * doing the first scan, also cache the location of the ACL_MASK and
756 * ACL_OTHER entries, preventing some future iterations.
757 */
758 acl_mask = acl_other = NULL;
759 for (i = 0; i < acl->acl_cnt; i++) {
760 struct acl_entry *ae = &acl->acl_entry[i];
761 switch (ae->ae_tag) {
762 case ACL_USER_OBJ:
763 if (kauth_cred_geteuid(cred) != file_uid)
764 break;
765 dac_granted = 0;
766 dac_granted |= VADMIN;
767 if (ae->ae_perm & ACL_EXECUTE)
768 dac_granted |= VEXEC;
769 if (ae->ae_perm & ACL_READ)
770 dac_granted |= VREAD;
771 if (ae->ae_perm & ACL_WRITE)
772 dac_granted |= (VWRITE | VAPPEND);
773 goto out;
774
775 case ACL_MASK:
776 acl_mask = ae;
777 break;
778
779 case ACL_OTHER:
780 acl_other = ae;
781 break;
782
783 default:
784 break;
785 }
786 }
787
788 /*
789 * An ACL_OTHER entry should always exist in a valid access ACL. If
790 * it doesn't, then generate a serious failure. For now, this means
791 * a debugging message and EPERM, but in the future should probably
792 * be a panic.
793 */
794 if (acl_other == NULL) {
795 /*
796 * XXX This should never happen
797 */
798 printf("%s: ACL_OTHER missing\n", __func__);
799 return EPERM;
800 }
801
802 /*
803 * Checks against ACL_USER, ACL_GROUP_OBJ, and ACL_GROUP fields are
804 * masked by an ACL_MASK entry, if any. As such, first identify the
805 * ACL_MASK field, then iterate through identifying potential user
806 * matches, then group matches. If there is no ACL_MASK, assume that
807 * the mask allows all requests to succeed.
808 */
809 if (acl_mask != NULL) {
810 acl_mask_granted = 0;
811 if (acl_mask->ae_perm & ACL_EXECUTE)
812 acl_mask_granted |= VEXEC;
813 if (acl_mask->ae_perm & ACL_READ)
814 acl_mask_granted |= VREAD;
815 if (acl_mask->ae_perm & ACL_WRITE)
816 acl_mask_granted |= (VWRITE | VAPPEND);
817 } else
818 acl_mask_granted = VEXEC | VREAD | VWRITE | VAPPEND;
819
820 /*
821 * Check ACL_USER ACL entries. There will either be one or no
822 * matches; if there is one, we accept or rejected based on the
823 * match; otherwise, we continue on to groups.
824 */
825 for (i = 0; i < acl->acl_cnt; i++) {
826 struct acl_entry *ae = &acl->acl_entry[i];
827 switch (ae->ae_tag) {
828 case ACL_USER:
829 if (kauth_cred_geteuid(cred) != ae->ae_id)
830 break;
831 dac_granted = 0;
832 if (ae->ae_perm & ACL_EXECUTE)
833 dac_granted |= VEXEC;
834 if (ae->ae_perm & ACL_READ)
835 dac_granted |= VREAD;
836 if (ae->ae_perm & ACL_WRITE)
837 dac_granted |= (VWRITE | VAPPEND);
838 dac_granted &= acl_mask_granted;
839 goto out;
840 }
841 }
842
843 /*
844 * Group match is best-match, not first-match, so find a "best"
845 * match. Iterate across, testing each potential group match. Make
846 * sure we keep track of whether we found a match or not, so that we
847 * know if we should try again with any available privilege, or if we
848 * should move on to ACL_OTHER.
849 */
850 group_matched = 0;
851 for (i = 0; i < acl->acl_cnt; i++) {
852 struct acl_entry *ae = &acl->acl_entry[i];
853 switch (ae->ae_tag) {
854 case ACL_GROUP_OBJ:
855 error = kauth_cred_groupmember(cred, file_gid);
856 if (error > 0)
857 return error;
858 if (error)
859 break;
860 dac_granted = 0;
861 if (ae->ae_perm & ACL_EXECUTE)
862 dac_granted |= VEXEC;
863 if (ae->ae_perm & ACL_READ)
864 dac_granted |= VREAD;
865 if (ae->ae_perm & ACL_WRITE)
866 dac_granted |= (VWRITE | VAPPEND);
867 dac_granted &= acl_mask_granted;
868
869 if ((accmode & dac_granted) == accmode)
870 return 0;
871
872 group_matched = 1;
873 break;
874
875 case ACL_GROUP:
876 error = kauth_cred_groupmember(cred, ae->ae_id);
877 if (error > 0)
878 return error;
879 if (error)
880 break;
881 dac_granted = 0;
882 if (ae->ae_perm & ACL_EXECUTE)
883 dac_granted |= VEXEC;
884 if (ae->ae_perm & ACL_READ)
885 dac_granted |= VREAD;
886 if (ae->ae_perm & ACL_WRITE)
887 dac_granted |= (VWRITE | VAPPEND);
888 dac_granted &= acl_mask_granted;
889
890 if ((accmode & dac_granted) == accmode)
891 return 0;
892
893 group_matched = 1;
894 break;
895
896 default:
897 break;
898 }
899 }
900
901 if (group_matched == 1) {
902 /*
903 * There was a match, but it did not grant rights via pure
904 * DAC. Try again, this time with privilege.
905 */
906 for (i = 0; i < acl->acl_cnt; i++) {
907 struct acl_entry *ae = &acl->acl_entry[i];
908 switch (ae->ae_tag) {
909 case ACL_GROUP_OBJ:
910 error = kauth_cred_groupmember(cred, file_gid);
911 if (error > 0)
912 return error;
913 if (error)
914 break;
915 dac_granted = 0;
916 if (ae->ae_perm & ACL_EXECUTE)
917 dac_granted |= VEXEC;
918 if (ae->ae_perm & ACL_READ)
919 dac_granted |= VREAD;
920 if (ae->ae_perm & ACL_WRITE)
921 dac_granted |= (VWRITE | VAPPEND);
922 dac_granted &= acl_mask_granted;
923 goto out;
924
925 case ACL_GROUP:
926 error = kauth_cred_groupmember(cred, ae->ae_id);
927 if (error > 0)
928 return error;
929 if (error)
930 break;
931 dac_granted = 0;
932 if (ae->ae_perm & ACL_EXECUTE)
933 dac_granted |= VEXEC;
934 if (ae->ae_perm & ACL_READ)
935 dac_granted |= VREAD;
936 if (ae->ae_perm & ACL_WRITE)
937 dac_granted |= (VWRITE | VAPPEND);
938 dac_granted &= acl_mask_granted;
939
940 goto out;
941 default:
942 break;
943 }
944 }
945 /*
946 * Even with privilege, group membership was not sufficient.
947 * Return failure.
948 */
949 dac_granted = 0;
950 goto out;
951 }
952
953 /*
954 * Fall back on ACL_OTHER. ACL_MASK is not applied to ACL_OTHER.
955 */
956 dac_granted = 0;
957 if (acl_other->ae_perm & ACL_EXECUTE)
958 dac_granted |= VEXEC;
959 if (acl_other->ae_perm & ACL_READ)
960 dac_granted |= VREAD;
961 if (acl_other->ae_perm & ACL_WRITE)
962 dac_granted |= (VWRITE | VAPPEND);
963
964 out:
965 if ((accmode & dac_granted) == accmode)
966 return 0;
967 return (accmode & VADMIN) ? EPERM : EACCES;
968 }
969
970 static struct {
971 accmode_t accmode;
972 int mask;
973 } accmode2mask[] = {
974 { VREAD, ACL_READ_DATA },
975 { VWRITE, ACL_WRITE_DATA },
976 { VAPPEND, ACL_APPEND_DATA },
977 { VEXEC, ACL_EXECUTE },
978 { VREAD_NAMED_ATTRS, ACL_READ_NAMED_ATTRS },
979 { VWRITE_NAMED_ATTRS, ACL_WRITE_NAMED_ATTRS },
980 { VDELETE_CHILD, ACL_DELETE_CHILD },
981 { VREAD_ATTRIBUTES, ACL_READ_ATTRIBUTES },
982 { VWRITE_ATTRIBUTES, ACL_WRITE_ATTRIBUTES },
983 { VDELETE, ACL_DELETE },
984 { VREAD_ACL, ACL_READ_ACL },
985 { VWRITE_ACL, ACL_WRITE_ACL },
986 { VWRITE_OWNER, ACL_WRITE_OWNER },
987 { VSYNCHRONIZE, ACL_SYNCHRONIZE },
988 { 0, 0 },
989 };
990
991 static int
992 _access_mask_from_accmode(accmode_t accmode)
993 {
994 int access_mask = 0, i;
995
996 for (i = 0; accmode2mask[i].accmode != 0; i++) {
997 if (accmode & accmode2mask[i].accmode)
998 access_mask |= accmode2mask[i].mask;
999 }
1000
1001 /*
1002 * VAPPEND is just a modifier for VWRITE; if the caller asked
1003 * for 'VAPPEND | VWRITE', we want to check for ACL_APPEND_DATA only.
1004 */
1005 if (access_mask & ACL_APPEND_DATA)
1006 access_mask &= ~ACL_WRITE_DATA;
1007
1008 return (access_mask);
1009 }
1010
1011 /*
1012 * Return 0, iff access is allowed, 1 otherwise.
1013 */
1014 static int
1015 _acl_denies(const struct acl *aclp, int access_mask, kauth_cred_t cred,
1016 int file_uid, int file_gid, int *denied_explicitly)
1017 {
1018 int i, error;
1019 const struct acl_entry *ae;
1020
1021 if (denied_explicitly != NULL)
1022 *denied_explicitly = 0;
1023
1024 KASSERT(aclp->acl_cnt <= ACL_MAX_ENTRIES);
1025
1026 for (i = 0; i < aclp->acl_cnt; i++) {
1027 ae = &(aclp->acl_entry[i]);
1028
1029 if (ae->ae_entry_type != ACL_ENTRY_TYPE_ALLOW &&
1030 ae->ae_entry_type != ACL_ENTRY_TYPE_DENY)
1031 continue;
1032 if (ae->ae_flags & ACL_ENTRY_INHERIT_ONLY)
1033 continue;
1034 switch (ae->ae_tag) {
1035 case ACL_USER_OBJ:
1036 if (kauth_cred_geteuid(cred) != file_uid)
1037 continue;
1038 break;
1039 case ACL_USER:
1040 if (kauth_cred_geteuid(cred) != ae->ae_id)
1041 continue;
1042 break;
1043 case ACL_GROUP_OBJ:
1044 error = kauth_cred_groupmember(cred, file_gid);
1045 if (error > 0)
1046 return error;
1047 if (error != 0)
1048 continue;
1049 break;
1050 case ACL_GROUP:
1051 error = kauth_cred_groupmember(cred, ae->ae_id);
1052 if (error > 0)
1053 return error;
1054 if (error != 0)
1055 continue;
1056 break;
1057 default:
1058 KASSERT(ae->ae_tag == ACL_EVERYONE);
1059 }
1060
1061 if (ae->ae_entry_type == ACL_ENTRY_TYPE_DENY) {
1062 if (ae->ae_perm & access_mask) {
1063 if (denied_explicitly != NULL)
1064 *denied_explicitly = 1;
1065 return (1);
1066 }
1067 }
1068
1069 access_mask &= ~(ae->ae_perm);
1070 if (access_mask == 0)
1071 return (0);
1072 }
1073
1074 if (access_mask == 0)
1075 return (0);
1076
1077 return (1);
1078 }
1079
1080 int
1081 genfs_can_access_acl_nfs4(vnode_t *vp, kauth_cred_t cred, uid_t file_uid,
1082 gid_t file_gid, mode_t file_mode, struct acl *aclp, accmode_t accmode)
1083 {
1084 int denied, explicitly_denied, access_mask, is_directory,
1085 must_be_owner = 0;
1086 file_mode = 0;
1087
1088 KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND |
1089 VEXPLICIT_DENY | VREAD_NAMED_ATTRS | VWRITE_NAMED_ATTRS |
1090 VDELETE_CHILD | VREAD_ATTRIBUTES | VWRITE_ATTRIBUTES | VDELETE |
1091 VREAD_ACL | VWRITE_ACL | VWRITE_OWNER | VSYNCHRONIZE)) == 0);
1092 KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE));
1093
1094 if (accmode & VADMIN)
1095 must_be_owner = 1;
1096
1097 /*
1098 * Ignore VSYNCHRONIZE permission.
1099 */
1100 accmode &= ~VSYNCHRONIZE;
1101
1102 access_mask = _access_mask_from_accmode(accmode);
1103
1104 if (vp && vp->v_type == VDIR)
1105 is_directory = 1;
1106 else
1107 is_directory = 0;
1108
1109 /*
1110 * File owner is always allowed to read and write the ACL
1111 * and basic attributes. This is to prevent a situation
1112 * where user would change ACL in a way that prevents him
1113 * from undoing the change.
1114 */
1115 if (kauth_cred_geteuid(cred) == file_uid)
1116 access_mask &= ~(ACL_READ_ACL | ACL_WRITE_ACL |
1117 ACL_READ_ATTRIBUTES | ACL_WRITE_ATTRIBUTES);
1118
1119 /*
1120 * Ignore append permission for regular files; use write
1121 * permission instead.
1122 */
1123 if (!is_directory && (access_mask & ACL_APPEND_DATA)) {
1124 access_mask &= ~ACL_APPEND_DATA;
1125 access_mask |= ACL_WRITE_DATA;
1126 }
1127
1128 denied = _acl_denies(aclp, access_mask, cred, file_uid, file_gid,
1129 &explicitly_denied);
1130
1131 if (must_be_owner) {
1132 if (kauth_cred_geteuid(cred) != file_uid)
1133 denied = EPERM;
1134 }
1135
1136 /*
1137 * For VEXEC, ensure that at least one execute bit is set for
1138 * non-directories. We have to check the mode here to stay
1139 * consistent with execve(2). See the test in
1140 * exec_check_permissions().
1141 */
1142 __acl_nfs4_sync_mode_from_acl(&file_mode, aclp);
1143 if (!denied && !is_directory && (accmode & VEXEC) &&
1144 (file_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)
1145 denied = EACCES;
1146
1147 if (!denied)
1148 return (0);
1149
1150 /*
1151 * Access failed. Iff it was not denied explicitly and
1152 * VEXPLICIT_DENY flag was specified, allow access.
1153 */
1154 if ((accmode & VEXPLICIT_DENY) && explicitly_denied == 0)
1155 return (0);
1156
1157 accmode &= ~VEXPLICIT_DENY;
1158
1159 if (accmode & (VADMIN_PERMS | VDELETE_CHILD | VDELETE))
1160 denied = EPERM;
1161 else
1162 denied = EACCES;
1163
1164 return (denied);
1165 }
1166
1167 /*
1168 * Common routine to check if chmod() is allowed.
1169 *
1170 * Policy:
1171 * - You must own the file, and
1172 * - You must not set the "sticky" bit (meaningless, see chmod(2))
1173 * - You must be a member of the group if you're trying to set the
1174 * SGIDf bit
1175 *
1176 * vp - vnode of the file-system object
1177 * cred - credentials of the invoker
1178 * cur_uid, cur_gid - current uid/gid of the file-system object
1179 * new_mode - new mode for the file-system object
1180 *
1181 * Returns 0 if the change is allowed, or an error value otherwise.
1182 */
1183 int
1184 genfs_can_chmod(vnode_t *vp, kauth_cred_t cred, uid_t cur_uid,
1185 gid_t cur_gid, mode_t new_mode)
1186 {
1187 int error;
1188
1189 /*
1190 * To modify the permissions on a file, must possess VADMIN
1191 * for that file.
1192 */
1193 if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred)) != 0)
1194 return (error);
1195
1196 /*
1197 * Unprivileged users can't set the sticky bit on files.
1198 */
1199 if ((vp->v_type != VDIR) && (new_mode & S_ISTXT))
1200 return (EFTYPE);
1201
1202 /*
1203 * If the invoker is trying to set the SGID bit on the file,
1204 * check group membership.
1205 */
1206 if (new_mode & S_ISGID) {
1207 int ismember;
1208
1209 error = kauth_cred_ismember_gid(cred, cur_gid,
1210 &ismember);
1211 if (error || !ismember)
1212 return (EPERM);
1213 }
1214
1215 /*
1216 * Deny setting setuid if we are not the file owner.
1217 */
1218 if ((new_mode & S_ISUID) && cur_uid != kauth_cred_geteuid(cred))
1219 return (EPERM);
1220
1221 return (0);
1222 }
1223
1224 /*
1225 * Common routine to check if chown() is allowed.
1226 *
1227 * Policy:
1228 * - You must own the file, and
1229 * - You must not try to change ownership, and
1230 * - You must be member of the new group
1231 *
1232 * vp - vnode
1233 * cred - credentials of the invoker
1234 * cur_uid, cur_gid - current uid/gid of the file-system object
1235 * new_uid, new_gid - target uid/gid of the file-system object
1236 *
1237 * Returns 0 if the change is allowed, or an error value otherwise.
1238 */
1239 int
1240 genfs_can_chown(vnode_t *vp, kauth_cred_t cred, uid_t cur_uid,
1241 gid_t cur_gid, uid_t new_uid, gid_t new_gid)
1242 {
1243 int error, ismember;
1244
1245 /*
1246 * To modify the ownership of a file, must possess VADMIN for that
1247 * file.
1248 */
1249 if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred)) != 0)
1250 return (error);
1251
1252 /*
1253 * You can only change ownership of a file if:
1254 * You own the file and...
1255 */
1256 if (kauth_cred_geteuid(cred) == cur_uid) {
1257 /*
1258 * You don't try to change ownership, and...
1259 */
1260 if (new_uid != cur_uid)
1261 return (EPERM);
1262
1263 /*
1264 * You don't try to change group (no-op), or...
1265 */
1266 if (new_gid == cur_gid)
1267 return (0);
1268
1269 /*
1270 * Your effective gid is the new gid, or...
1271 */
1272 if (kauth_cred_getegid(cred) == new_gid)
1273 return (0);
1274
1275 /*
1276 * The new gid is one you're a member of.
1277 */
1278 ismember = 0;
1279 error = kauth_cred_ismember_gid(cred, new_gid,
1280 &ismember);
1281 if (!error && ismember)
1282 return (0);
1283 }
1284
1285 return (EPERM);
1286 }
1287
1288 int
1289 genfs_can_chtimes(vnode_t *vp, kauth_cred_t cred, uid_t owner_uid,
1290 u_int vaflags)
1291 {
1292 int error;
1293 /*
1294 * Grant permission if the caller is the owner of the file, or
1295 * the super-user, or has ACL_WRITE_ATTRIBUTES permission on
1296 * on the file. If the time pointer is null, then write
1297 * permission on the file is also sufficient.
1298 *
1299 * From NFSv4.1, draft 21, 6.2.1.3.1, Discussion of Mask Attributes:
1300 * A user having ACL_WRITE_DATA or ACL_WRITE_ATTRIBUTES
1301 * will be allowed to set the times [..] to the current
1302 * server time.
1303 */
1304 if ((error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred)) != 0)
1305 return (vaflags & VA_UTIMES_NULL) == 0 ? EPERM : EACCES;
1306
1307 /* Must be owner, or... */
1308 if (kauth_cred_geteuid(cred) == owner_uid)
1309 return (0);
1310
1311 /* set the times to the current time, and... */
1312 if ((vaflags & VA_UTIMES_NULL) == 0)
1313 return (EPERM);
1314
1315 /* have write access. */
1316 error = VOP_ACCESS(vp, VWRITE, cred);
1317 if (error)
1318 return (error);
1319
1320 return (0);
1321 }
1322
1323 /*
1324 * Common routine to check if chflags() is allowed.
1325 *
1326 * Policy:
1327 * - You must own the file, and
1328 * - You must not change system flags, and
1329 * - You must not change flags on character/block devices.
1330 *
1331 * vp - vnode
1332 * cred - credentials of the invoker
1333 * owner_uid - uid of the file-system object
1334 * changing_sysflags - true if the invoker wants to change system flags
1335 */
1336 int
1337 genfs_can_chflags(vnode_t *vp, kauth_cred_t cred,
1338 uid_t owner_uid, bool changing_sysflags)
1339 {
1340
1341 /* The user must own the file. */
1342 if (kauth_cred_geteuid(cred) != owner_uid) {
1343 return EPERM;
1344 }
1345
1346 if (changing_sysflags) {
1347 return EPERM;
1348 }
1349
1350 /*
1351 * Unprivileged users cannot change the flags on devices, even if they
1352 * own them.
1353 */
1354 if (vp->v_type == VCHR || vp->v_type == VBLK) {
1355 return EPERM;
1356 }
1357
1358 return 0;
1359 }
1360
1361 /*
1362 * Common "sticky" policy.
1363 *
1364 * When a directory is "sticky" (as determined by the caller), this
1365 * function may help implementing the following policy:
1366 * - Renaming a file in it is only possible if the user owns the directory
1367 * or the file being renamed.
1368 * - Deleting a file from it is only possible if the user owns the
1369 * directory or the file being deleted.
1370 */
1371 int
1372 genfs_can_sticky(vnode_t *vp, kauth_cred_t cred, uid_t dir_uid, uid_t file_uid)
1373 {
1374 if (kauth_cred_geteuid(cred) != dir_uid &&
1375 kauth_cred_geteuid(cred) != file_uid)
1376 return EPERM;
1377
1378 return 0;
1379 }
1380
1381 int
1382 genfs_can_extattr(vnode_t *vp, kauth_cred_t cred, accmode_t accmode,
1383 int attrnamespace)
1384 {
1385 /*
1386 * Kernel-invoked always succeeds.
1387 */
1388 if (cred == NOCRED)
1389 return 0;
1390
1391 switch (attrnamespace) {
1392 case EXTATTR_NAMESPACE_SYSTEM:
1393 return kauth_authorize_system(cred, KAUTH_SYSTEM_FS_EXTATTR,
1394 0, vp->v_mount, NULL, NULL);
1395 case EXTATTR_NAMESPACE_USER:
1396 return VOP_ACCESS(vp, accmode, cred);
1397 default:
1398 return EPERM;
1399 }
1400 }
1401
1402 int
1403 genfs_access(void *v)
1404 {
1405 struct vop_access_args *ap = v;
1406
1407 KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
1408 VAPPEND)) == 0);
1409
1410 return VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred);
1411 }
1412
1413 int
1414 genfs_accessx(void *v)
1415 {
1416 struct vop_accessx_args *ap = v;
1417 int error;
1418 accmode_t accmode = ap->a_accmode;
1419 error = vfs_unixify_accmode(&accmode);
1420 if (error != 0)
1421 return error;
1422
1423 if (accmode == 0)
1424 return 0;
1425
1426 return VOP_ACCESS(ap->a_vp, accmode, ap->a_cred);
1427 }
1428
1429 /*
1430 * genfs_pathconf:
1431 *
1432 * Standard implementation of POSIX pathconf, to get information about limits
1433 * for a filesystem.
1434 * Override per filesystem for the case where the filesystem has smaller
1435 * limits.
1436 */
1437 int
1438 genfs_pathconf(void *v)
1439 {
1440 struct vop_pathconf_args *ap = v;
1441
1442 switch (ap->a_name) {
1443 case _PC_PATH_MAX:
1444 *ap->a_retval = PATH_MAX;
1445 return 0;
1446 case _PC_ACL_EXTENDED:
1447 case _PC_ACL_NFS4:
1448 *ap->a_retval = 0;
1449 return 0;
1450 default:
1451 return EINVAL;
1452 }
1453 }
1454