genfs_vnops.c revision 1.204 1 /* $NetBSD: genfs_vnops.c,v 1.204 2020/05/16 18:31:51 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. Neither the name of the University nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 *
57 */
58
59 #include <sys/cdefs.h>
60 __KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.204 2020/05/16 18:31:51 christos Exp $");
61
62 #include <sys/param.h>
63 #include <sys/systm.h>
64 #include <sys/proc.h>
65 #include <sys/kernel.h>
66 #include <sys/mount.h>
67 #include <sys/fstrans.h>
68 #include <sys/namei.h>
69 #include <sys/vnode_impl.h>
70 #include <sys/fcntl.h>
71 #include <sys/kmem.h>
72 #include <sys/poll.h>
73 #include <sys/mman.h>
74 #include <sys/file.h>
75 #include <sys/kauth.h>
76 #include <sys/stat.h>
77 #include <sys/extattr.h>
78
79 #include <miscfs/genfs/genfs.h>
80 #include <miscfs/genfs/genfs_node.h>
81 #include <miscfs/specfs/specdev.h>
82
83 #include <uvm/uvm.h>
84 #include <uvm/uvm_pager.h>
85
86 static void filt_genfsdetach(struct knote *);
87 static int filt_genfsread(struct knote *, long);
88 static int filt_genfsvnode(struct knote *, long);
89
90 int
91 genfs_poll(void *v)
92 {
93 struct vop_poll_args /* {
94 struct vnode *a_vp;
95 int a_events;
96 struct lwp *a_l;
97 } */ *ap = v;
98
99 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
100 }
101
102 int
103 genfs_seek(void *v)
104 {
105 struct vop_seek_args /* {
106 struct vnode *a_vp;
107 off_t a_oldoff;
108 off_t a_newoff;
109 kauth_cred_t cred;
110 } */ *ap = v;
111
112 if (ap->a_newoff < 0)
113 return (EINVAL);
114
115 return (0);
116 }
117
118 int
119 genfs_abortop(void *v)
120 {
121 struct vop_abortop_args /* {
122 struct vnode *a_dvp;
123 struct componentname *a_cnp;
124 } */ *ap = v;
125
126 (void)ap;
127
128 return (0);
129 }
130
131 int
132 genfs_fcntl(void *v)
133 {
134 struct vop_fcntl_args /* {
135 struct vnode *a_vp;
136 u_int a_command;
137 void *a_data;
138 int a_fflag;
139 kauth_cred_t a_cred;
140 struct lwp *a_l;
141 } */ *ap = v;
142
143 if (ap->a_command == F_SETFL)
144 return (0);
145 else
146 return (EOPNOTSUPP);
147 }
148
149 /*ARGSUSED*/
150 int
151 genfs_badop(void *v)
152 {
153
154 panic("genfs: bad op");
155 }
156
157 /*ARGSUSED*/
158 int
159 genfs_nullop(void *v)
160 {
161
162 return (0);
163 }
164
165 /*ARGSUSED*/
166 int
167 genfs_einval(void *v)
168 {
169
170 return (EINVAL);
171 }
172
173 /*
174 * Called when an fs doesn't support a particular vop.
175 * This takes care to vrele, vput, or vunlock passed in vnodes
176 * and calls VOP_ABORTOP for a componentname (in non-rename VOP).
177 */
178 int
179 genfs_eopnotsupp(void *v)
180 {
181 struct vop_generic_args /*
182 struct vnodeop_desc *a_desc;
183 / * other random data follows, presumably * /
184 } */ *ap = v;
185 struct vnodeop_desc *desc = ap->a_desc;
186 struct vnode *vp, *vp_last = NULL;
187 int flags, i, j, offset_cnp, offset_vp;
188
189 KASSERT(desc->vdesc_offset != VOP_LOOKUP_DESCOFFSET);
190 KASSERT(desc->vdesc_offset != VOP_ABORTOP_DESCOFFSET);
191
192 /*
193 * Abort any componentname that lookup potentially left state in.
194 *
195 * As is logical, componentnames for VOP_RENAME are handled by
196 * the caller of VOP_RENAME. Yay, rename!
197 */
198 if (desc->vdesc_offset != VOP_RENAME_DESCOFFSET &&
199 (offset_vp = desc->vdesc_vp_offsets[0]) != VDESC_NO_OFFSET &&
200 (offset_cnp = desc->vdesc_componentname_offset) != VDESC_NO_OFFSET){
201 struct componentname *cnp;
202 struct vnode *dvp;
203
204 dvp = *VOPARG_OFFSETTO(struct vnode **, offset_vp, ap);
205 cnp = *VOPARG_OFFSETTO(struct componentname **, offset_cnp, ap);
206
207 VOP_ABORTOP(dvp, cnp);
208 }
209
210 flags = desc->vdesc_flags;
211 for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) {
212 if ((offset_vp = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET)
213 break; /* stop at end of list */
214 if ((j = flags & VDESC_VP0_WILLPUT)) {
215 vp = *VOPARG_OFFSETTO(struct vnode **, offset_vp, ap);
216
217 /* Skip if NULL */
218 if (!vp)
219 continue;
220
221 switch (j) {
222 case VDESC_VP0_WILLPUT:
223 /* Check for dvp == vp cases */
224 if (vp == vp_last)
225 vrele(vp);
226 else {
227 vput(vp);
228 vp_last = vp;
229 }
230 break;
231 case VDESC_VP0_WILLRELE:
232 vrele(vp);
233 break;
234 }
235 }
236 }
237
238 return (EOPNOTSUPP);
239 }
240
241 /*ARGSUSED*/
242 int
243 genfs_ebadf(void *v)
244 {
245
246 return (EBADF);
247 }
248
249 /* ARGSUSED */
250 int
251 genfs_enoioctl(void *v)
252 {
253
254 return (EPASSTHROUGH);
255 }
256
257
258 /*
259 * Eliminate all activity associated with the requested vnode
260 * and with all vnodes aliased to the requested vnode.
261 */
262 int
263 genfs_revoke(void *v)
264 {
265 struct vop_revoke_args /* {
266 struct vnode *a_vp;
267 int a_flags;
268 } */ *ap = v;
269
270 #ifdef DIAGNOSTIC
271 if ((ap->a_flags & REVOKEALL) == 0)
272 panic("genfs_revoke: not revokeall");
273 #endif
274 vrevoke(ap->a_vp);
275 return (0);
276 }
277
278 /*
279 * Lock the node (for deadfs).
280 */
281 int
282 genfs_deadlock(void *v)
283 {
284 struct vop_lock_args /* {
285 struct vnode *a_vp;
286 int a_flags;
287 } */ *ap = v;
288 vnode_t *vp = ap->a_vp;
289 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
290 int flags = ap->a_flags;
291 krw_t op;
292
293 if (! ISSET(flags, LK_RETRY))
294 return ENOENT;
295
296 if (ISSET(flags, LK_DOWNGRADE)) {
297 rw_downgrade(&vip->vi_lock);
298 } else if (ISSET(flags, LK_UPGRADE)) {
299 KASSERT(ISSET(flags, LK_NOWAIT));
300 if (!rw_tryupgrade(&vip->vi_lock)) {
301 return EBUSY;
302 }
303 } else if ((flags & (LK_EXCLUSIVE | LK_SHARED)) != 0) {
304 op = (ISSET(flags, LK_EXCLUSIVE) ? RW_WRITER : RW_READER);
305 if (ISSET(flags, LK_NOWAIT)) {
306 if (!rw_tryenter(&vip->vi_lock, op))
307 return EBUSY;
308 } else {
309 rw_enter(&vip->vi_lock, op);
310 }
311 }
312 VSTATE_ASSERT_UNLOCKED(vp, VS_RECLAIMED);
313 return 0;
314 }
315
316 /*
317 * Unlock the node (for deadfs).
318 */
319 int
320 genfs_deadunlock(void *v)
321 {
322 struct vop_unlock_args /* {
323 struct vnode *a_vp;
324 } */ *ap = v;
325 vnode_t *vp = ap->a_vp;
326 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
327
328 rw_exit(&vip->vi_lock);
329
330 return 0;
331 }
332
333 /*
334 * Lock the node.
335 */
336 int
337 genfs_lock(void *v)
338 {
339 struct vop_lock_args /* {
340 struct vnode *a_vp;
341 int a_flags;
342 } */ *ap = v;
343 vnode_t *vp = ap->a_vp;
344 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
345 int flags = ap->a_flags;
346 krw_t op;
347
348 if (ISSET(flags, LK_DOWNGRADE)) {
349 rw_downgrade(&vip->vi_lock);
350 } else if (ISSET(flags, LK_UPGRADE)) {
351 KASSERT(ISSET(flags, LK_NOWAIT));
352 if (!rw_tryupgrade(&vip->vi_lock)) {
353 return EBUSY;
354 }
355 } else if ((flags & (LK_EXCLUSIVE | LK_SHARED)) != 0) {
356 op = (ISSET(flags, LK_EXCLUSIVE) ? RW_WRITER : RW_READER);
357 if (ISSET(flags, LK_NOWAIT)) {
358 if (!rw_tryenter(&vip->vi_lock, op))
359 return EBUSY;
360 } else {
361 rw_enter(&vip->vi_lock, op);
362 }
363 }
364 VSTATE_ASSERT_UNLOCKED(vp, VS_ACTIVE);
365 return 0;
366 }
367
368 /*
369 * Unlock the node.
370 */
371 int
372 genfs_unlock(void *v)
373 {
374 struct vop_unlock_args /* {
375 struct vnode *a_vp;
376 } */ *ap = v;
377 vnode_t *vp = ap->a_vp;
378 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
379
380 rw_exit(&vip->vi_lock);
381
382 return 0;
383 }
384
385 /*
386 * Return whether or not the node is locked.
387 */
388 int
389 genfs_islocked(void *v)
390 {
391 struct vop_islocked_args /* {
392 struct vnode *a_vp;
393 } */ *ap = v;
394 vnode_t *vp = ap->a_vp;
395 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
396
397 if (rw_write_held(&vip->vi_lock))
398 return LK_EXCLUSIVE;
399
400 if (rw_read_held(&vip->vi_lock))
401 return LK_SHARED;
402
403 return 0;
404 }
405
406 /*
407 * Stubs to use when there is no locking to be done on the underlying object.
408 */
409 int
410 genfs_nolock(void *v)
411 {
412
413 return (0);
414 }
415
416 int
417 genfs_nounlock(void *v)
418 {
419
420 return (0);
421 }
422
423 int
424 genfs_noislocked(void *v)
425 {
426
427 return (0);
428 }
429
430 int
431 genfs_mmap(void *v)
432 {
433
434 return (0);
435 }
436
437 /*
438 * VOP_PUTPAGES() for vnodes which never have pages.
439 */
440
441 int
442 genfs_null_putpages(void *v)
443 {
444 struct vop_putpages_args /* {
445 struct vnode *a_vp;
446 voff_t a_offlo;
447 voff_t a_offhi;
448 int a_flags;
449 } */ *ap = v;
450 struct vnode *vp = ap->a_vp;
451
452 KASSERT(vp->v_uobj.uo_npages == 0);
453 rw_exit(vp->v_uobj.vmobjlock);
454 return (0);
455 }
456
457 void
458 genfs_node_init(struct vnode *vp, const struct genfs_ops *ops)
459 {
460 struct genfs_node *gp = VTOG(vp);
461
462 rw_init(&gp->g_glock);
463 gp->g_op = ops;
464 }
465
466 void
467 genfs_node_destroy(struct vnode *vp)
468 {
469 struct genfs_node *gp = VTOG(vp);
470
471 rw_destroy(&gp->g_glock);
472 }
473
474 void
475 genfs_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
476 {
477 int bsize;
478
479 bsize = 1 << vp->v_mount->mnt_fs_bshift;
480 *eobp = (size + bsize - 1) & ~(bsize - 1);
481 }
482
483 static void
484 filt_genfsdetach(struct knote *kn)
485 {
486 struct vnode *vp = (struct vnode *)kn->kn_hook;
487
488 mutex_enter(vp->v_interlock);
489 SLIST_REMOVE(&vp->v_klist, kn, knote, kn_selnext);
490 mutex_exit(vp->v_interlock);
491 }
492
493 static int
494 filt_genfsread(struct knote *kn, long hint)
495 {
496 struct vnode *vp = (struct vnode *)kn->kn_hook;
497 int rv;
498
499 /*
500 * filesystem is gone, so set the EOF flag and schedule
501 * the knote for deletion.
502 */
503 switch (hint) {
504 case NOTE_REVOKE:
505 KASSERT(mutex_owned(vp->v_interlock));
506 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
507 return (1);
508 case 0:
509 mutex_enter(vp->v_interlock);
510 kn->kn_data = vp->v_size - ((file_t *)kn->kn_obj)->f_offset;
511 rv = (kn->kn_data != 0);
512 mutex_exit(vp->v_interlock);
513 return rv;
514 default:
515 KASSERT(mutex_owned(vp->v_interlock));
516 kn->kn_data = vp->v_size - ((file_t *)kn->kn_obj)->f_offset;
517 return (kn->kn_data != 0);
518 }
519 }
520
521 static int
522 filt_genfswrite(struct knote *kn, long hint)
523 {
524 struct vnode *vp = (struct vnode *)kn->kn_hook;
525
526 /*
527 * filesystem is gone, so set the EOF flag and schedule
528 * the knote for deletion.
529 */
530 switch (hint) {
531 case NOTE_REVOKE:
532 KASSERT(mutex_owned(vp->v_interlock));
533 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
534 return (1);
535 case 0:
536 mutex_enter(vp->v_interlock);
537 kn->kn_data = 0;
538 mutex_exit(vp->v_interlock);
539 return 1;
540 default:
541 KASSERT(mutex_owned(vp->v_interlock));
542 kn->kn_data = 0;
543 return 1;
544 }
545 }
546
547 static int
548 filt_genfsvnode(struct knote *kn, long hint)
549 {
550 struct vnode *vp = (struct vnode *)kn->kn_hook;
551 int fflags;
552
553 switch (hint) {
554 case NOTE_REVOKE:
555 KASSERT(mutex_owned(vp->v_interlock));
556 kn->kn_flags |= EV_EOF;
557 if ((kn->kn_sfflags & hint) != 0)
558 kn->kn_fflags |= hint;
559 return (1);
560 case 0:
561 mutex_enter(vp->v_interlock);
562 fflags = kn->kn_fflags;
563 mutex_exit(vp->v_interlock);
564 break;
565 default:
566 KASSERT(mutex_owned(vp->v_interlock));
567 if ((kn->kn_sfflags & hint) != 0)
568 kn->kn_fflags |= hint;
569 fflags = kn->kn_fflags;
570 break;
571 }
572
573 return (fflags != 0);
574 }
575
576 static const struct filterops genfsread_filtops = {
577 .f_isfd = 1,
578 .f_attach = NULL,
579 .f_detach = filt_genfsdetach,
580 .f_event = filt_genfsread,
581 };
582
583 static const struct filterops genfswrite_filtops = {
584 .f_isfd = 1,
585 .f_attach = NULL,
586 .f_detach = filt_genfsdetach,
587 .f_event = filt_genfswrite,
588 };
589
590 static const struct filterops genfsvnode_filtops = {
591 .f_isfd = 1,
592 .f_attach = NULL,
593 .f_detach = filt_genfsdetach,
594 .f_event = filt_genfsvnode,
595 };
596
597 int
598 genfs_kqfilter(void *v)
599 {
600 struct vop_kqfilter_args /* {
601 struct vnode *a_vp;
602 struct knote *a_kn;
603 } */ *ap = v;
604 struct vnode *vp;
605 struct knote *kn;
606
607 vp = ap->a_vp;
608 kn = ap->a_kn;
609 switch (kn->kn_filter) {
610 case EVFILT_READ:
611 kn->kn_fop = &genfsread_filtops;
612 break;
613 case EVFILT_WRITE:
614 kn->kn_fop = &genfswrite_filtops;
615 break;
616 case EVFILT_VNODE:
617 kn->kn_fop = &genfsvnode_filtops;
618 break;
619 default:
620 return (EINVAL);
621 }
622
623 kn->kn_hook = vp;
624
625 mutex_enter(vp->v_interlock);
626 SLIST_INSERT_HEAD(&vp->v_klist, kn, kn_selnext);
627 mutex_exit(vp->v_interlock);
628
629 return (0);
630 }
631
632 void
633 genfs_node_wrlock(struct vnode *vp)
634 {
635 struct genfs_node *gp = VTOG(vp);
636
637 rw_enter(&gp->g_glock, RW_WRITER);
638 }
639
640 void
641 genfs_node_rdlock(struct vnode *vp)
642 {
643 struct genfs_node *gp = VTOG(vp);
644
645 rw_enter(&gp->g_glock, RW_READER);
646 }
647
648 int
649 genfs_node_rdtrylock(struct vnode *vp)
650 {
651 struct genfs_node *gp = VTOG(vp);
652
653 return rw_tryenter(&gp->g_glock, RW_READER);
654 }
655
656 void
657 genfs_node_unlock(struct vnode *vp)
658 {
659 struct genfs_node *gp = VTOG(vp);
660
661 rw_exit(&gp->g_glock);
662 }
663
664 int
665 genfs_node_wrlocked(struct vnode *vp)
666 {
667 struct genfs_node *gp = VTOG(vp);
668
669 return rw_write_held(&gp->g_glock);
670 }
671
672 static int
673 groupmember(gid_t gid, kauth_cred_t cred)
674 {
675 int ismember;
676 int error = kauth_cred_ismember_gid(cred, gid, &ismember);
677 if (error)
678 return error;
679 if (kauth_cred_getegid(cred) == gid || ismember)
680 return 0;
681 return -1;
682 }
683
684 /*
685 * Common filesystem object access control check routine. Accepts a
686 * vnode, cred, uid, gid, mode, acl, requested access mode.
687 * Returns 0 on success, or an errno on failure.
688 */
689 int
690 genfs_can_access(vnode_t *vp, kauth_cred_t cred, uid_t file_uid, gid_t file_gid,
691 mode_t file_mode, struct acl *acl, accmode_t accmode)
692 {
693 accmode_t dac_granted;
694 int error;
695
696 KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0);
697 KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE));
698
699 /*
700 * Look for a normal, non-privileged way to access the file/directory
701 * as requested. If it exists, go with that.
702 */
703
704 dac_granted = 0;
705
706 /* Check the owner. */
707 if (kauth_cred_geteuid(cred) == file_uid) {
708 dac_granted |= VADMIN;
709 if (file_mode & S_IXUSR)
710 dac_granted |= VEXEC;
711 if (file_mode & S_IRUSR)
712 dac_granted |= VREAD;
713 if (file_mode & S_IWUSR)
714 dac_granted |= (VWRITE | VAPPEND);
715
716 return (accmode & dac_granted) == accmode ? 0 : EPERM;
717 }
718
719 /* Otherwise, check the groups (first match) */
720 /* Otherwise, check the groups. */
721 error = groupmember(file_gid, cred);
722 if (error > 0)
723 return error;
724 if (error == 0) {
725 if (file_mode & S_IXGRP)
726 dac_granted |= VEXEC;
727 if (file_mode & S_IRGRP)
728 dac_granted |= VREAD;
729 if (file_mode & S_IWGRP)
730 dac_granted |= (VWRITE | VAPPEND);
731
732 return (accmode & dac_granted) == accmode ? 0 : EACCES;
733 }
734
735 /* Otherwise, check everyone else. */
736 if (file_mode & S_IXOTH)
737 dac_granted |= VEXEC;
738 if (file_mode & S_IROTH)
739 dac_granted |= VREAD;
740 if (file_mode & S_IWOTH)
741 dac_granted |= (VWRITE | VAPPEND);
742 return (accmode & dac_granted) == accmode ? 0 : EACCES;
743 return (0);
744 }
745
746 /*
747 * Implement a version of genfs_can_access() that understands POSIX.1e ACL
748 * semantics;
749 * the access ACL has already been prepared for evaluation by the file system
750 * and is passed via 'uid', 'gid', and 'acl'. Return 0 on success, else an
751 * errno value.
752 */
753 int
754 genfs_can_access_acl_posix1e(vnode_t *vp, kauth_cred_t cred, uid_t file_uid,
755 gid_t file_gid, mode_t file_mode, struct acl *acl, accmode_t accmode)
756 {
757 struct acl_entry *acl_other, *acl_mask;
758 accmode_t dac_granted;
759 accmode_t acl_mask_granted;
760 int group_matched, i;
761 int error;
762
763 KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0);
764 KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE));
765
766 /*
767 * The owner matches if the effective uid associated with the
768 * credential matches that of the ACL_USER_OBJ entry. While we're
769 * doing the first scan, also cache the location of the ACL_MASK and
770 * ACL_OTHER entries, preventing some future iterations.
771 */
772 acl_mask = acl_other = NULL;
773 for (i = 0; i < acl->acl_cnt; i++) {
774 struct acl_entry *ae = &acl->acl_entry[i];
775 switch (ae->ae_tag) {
776 case ACL_USER_OBJ:
777 if (kauth_cred_geteuid(cred) != file_uid)
778 break;
779 dac_granted = 0;
780 dac_granted |= VADMIN;
781 if (ae->ae_perm & ACL_EXECUTE)
782 dac_granted |= VEXEC;
783 if (ae->ae_perm & ACL_READ)
784 dac_granted |= VREAD;
785 if (ae->ae_perm & ACL_WRITE)
786 dac_granted |= (VWRITE | VAPPEND);
787 goto out;
788
789 case ACL_MASK:
790 acl_mask = ae;
791 break;
792
793 case ACL_OTHER:
794 acl_other = ae;
795 break;
796
797 default:
798 break;
799 }
800 }
801
802 /*
803 * An ACL_OTHER entry should always exist in a valid access ACL. If
804 * it doesn't, then generate a serious failure. For now, this means
805 * a debugging message and EPERM, but in the future should probably
806 * be a panic.
807 */
808 if (acl_other == NULL) {
809 /*
810 * XXX This should never happen
811 */
812 printf("%s: ACL_OTHER missing\n", __func__);
813 return EPERM;
814 }
815
816 /*
817 * Checks against ACL_USER, ACL_GROUP_OBJ, and ACL_GROUP fields are
818 * masked by an ACL_MASK entry, if any. As such, first identify the
819 * ACL_MASK field, then iterate through identifying potential user
820 * matches, then group matches. If there is no ACL_MASK, assume that
821 * the mask allows all requests to succeed.
822 */
823 if (acl_mask != NULL) {
824 acl_mask_granted = 0;
825 if (acl_mask->ae_perm & ACL_EXECUTE)
826 acl_mask_granted |= VEXEC;
827 if (acl_mask->ae_perm & ACL_READ)
828 acl_mask_granted |= VREAD;
829 if (acl_mask->ae_perm & ACL_WRITE)
830 acl_mask_granted |= (VWRITE | VAPPEND);
831 } else
832 acl_mask_granted = VEXEC | VREAD | VWRITE | VAPPEND;
833
834 /*
835 * Check ACL_USER ACL entries. There will either be one or no
836 * matches; if there is one, we accept or rejected based on the
837 * match; otherwise, we continue on to groups.
838 */
839 for (i = 0; i < acl->acl_cnt; i++) {
840 struct acl_entry *ae = &acl->acl_entry[i];
841 switch (ae->ae_tag) {
842 case ACL_USER:
843 if (kauth_cred_geteuid(cred) != ae->ae_id)
844 break;
845 dac_granted = 0;
846 if (ae->ae_perm & ACL_EXECUTE)
847 dac_granted |= VEXEC;
848 if (ae->ae_perm & ACL_READ)
849 dac_granted |= VREAD;
850 if (ae->ae_perm & ACL_WRITE)
851 dac_granted |= (VWRITE | VAPPEND);
852 dac_granted &= acl_mask_granted;
853 goto out;
854 }
855 }
856
857 /*
858 * Group match is best-match, not first-match, so find a "best"
859 * match. Iterate across, testing each potential group match. Make
860 * sure we keep track of whether we found a match or not, so that we
861 * know if we should try again with any available privilege, or if we
862 * should move on to ACL_OTHER.
863 */
864 group_matched = 0;
865 for (i = 0; i < acl->acl_cnt; i++) {
866 struct acl_entry *ae = &acl->acl_entry[i];
867 switch (ae->ae_tag) {
868 case ACL_GROUP_OBJ:
869 error = groupmember(file_gid, cred);
870 if (error > 0)
871 return error;
872 if (error)
873 break;
874 dac_granted = 0;
875 if (ae->ae_perm & ACL_EXECUTE)
876 dac_granted |= VEXEC;
877 if (ae->ae_perm & ACL_READ)
878 dac_granted |= VREAD;
879 if (ae->ae_perm & ACL_WRITE)
880 dac_granted |= (VWRITE | VAPPEND);
881 dac_granted &= acl_mask_granted;
882
883 if ((accmode & dac_granted) == accmode)
884 return 0;
885
886 group_matched = 1;
887 break;
888
889 case ACL_GROUP:
890 error = groupmember(ae->ae_id, cred);
891 if (error > 0)
892 return error;
893 if (error)
894 break;
895 dac_granted = 0;
896 if (ae->ae_perm & ACL_EXECUTE)
897 dac_granted |= VEXEC;
898 if (ae->ae_perm & ACL_READ)
899 dac_granted |= VREAD;
900 if (ae->ae_perm & ACL_WRITE)
901 dac_granted |= (VWRITE | VAPPEND);
902 dac_granted &= acl_mask_granted;
903
904 if ((accmode & dac_granted) == accmode)
905 return 0;
906
907 group_matched = 1;
908 break;
909
910 default:
911 break;
912 }
913 }
914
915 if (group_matched == 1) {
916 /*
917 * There was a match, but it did not grant rights via pure
918 * DAC. Try again, this time with privilege.
919 */
920 for (i = 0; i < acl->acl_cnt; i++) {
921 struct acl_entry *ae = &acl->acl_entry[i];
922 switch (ae->ae_tag) {
923 case ACL_GROUP_OBJ:
924 error = groupmember(file_gid, cred);
925 if (error > 0)
926 return error;
927 if (error)
928 break;
929 dac_granted = 0;
930 if (ae->ae_perm & ACL_EXECUTE)
931 dac_granted |= VEXEC;
932 if (ae->ae_perm & ACL_READ)
933 dac_granted |= VREAD;
934 if (ae->ae_perm & ACL_WRITE)
935 dac_granted |= (VWRITE | VAPPEND);
936 dac_granted &= acl_mask_granted;
937 goto out;
938
939 case ACL_GROUP:
940 error = groupmember(ae->ae_id, cred);
941 if (error > 0)
942 return error;
943 if (error)
944 break;
945 dac_granted = 0;
946 if (ae->ae_perm & ACL_EXECUTE)
947 dac_granted |= VEXEC;
948 if (ae->ae_perm & ACL_READ)
949 dac_granted |= VREAD;
950 if (ae->ae_perm & ACL_WRITE)
951 dac_granted |= (VWRITE | VAPPEND);
952 dac_granted &= acl_mask_granted;
953
954 goto out;
955 default:
956 break;
957 }
958 }
959 /*
960 * Even with privilege, group membership was not sufficient.
961 * Return failure.
962 */
963 dac_granted = 0;
964 goto out;
965 }
966
967 /*
968 * Fall back on ACL_OTHER. ACL_MASK is not applied to ACL_OTHER.
969 */
970 dac_granted = 0;
971 if (acl_other->ae_perm & ACL_EXECUTE)
972 dac_granted |= VEXEC;
973 if (acl_other->ae_perm & ACL_READ)
974 dac_granted |= VREAD;
975 if (acl_other->ae_perm & ACL_WRITE)
976 dac_granted |= (VWRITE | VAPPEND);
977
978 out:
979 if ((accmode & dac_granted) == accmode)
980 return 0;
981 return (accmode & VADMIN) ? EPERM : EACCES;
982 }
983
984 static struct {
985 accmode_t accmode;
986 int mask;
987 } accmode2mask[] = {
988 { VREAD, ACL_READ_DATA },
989 { VWRITE, ACL_WRITE_DATA },
990 { VAPPEND, ACL_APPEND_DATA },
991 { VEXEC, ACL_EXECUTE },
992 { VREAD_NAMED_ATTRS, ACL_READ_NAMED_ATTRS },
993 { VWRITE_NAMED_ATTRS, ACL_WRITE_NAMED_ATTRS },
994 { VDELETE_CHILD, ACL_DELETE_CHILD },
995 { VREAD_ATTRIBUTES, ACL_READ_ATTRIBUTES },
996 { VWRITE_ATTRIBUTES, ACL_WRITE_ATTRIBUTES },
997 { VDELETE, ACL_DELETE },
998 { VREAD_ACL, ACL_READ_ACL },
999 { VWRITE_ACL, ACL_WRITE_ACL },
1000 { VWRITE_OWNER, ACL_WRITE_OWNER },
1001 { VSYNCHRONIZE, ACL_SYNCHRONIZE },
1002 { 0, 0 },
1003 };
1004
1005 static int
1006 _access_mask_from_accmode(accmode_t accmode)
1007 {
1008 int access_mask = 0, i;
1009
1010 for (i = 0; accmode2mask[i].accmode != 0; i++) {
1011 if (accmode & accmode2mask[i].accmode)
1012 access_mask |= accmode2mask[i].mask;
1013 }
1014
1015 /*
1016 * VAPPEND is just a modifier for VWRITE; if the caller asked
1017 * for 'VAPPEND | VWRITE', we want to check for ACL_APPEND_DATA only.
1018 */
1019 if (access_mask & ACL_APPEND_DATA)
1020 access_mask &= ~ACL_WRITE_DATA;
1021
1022 return (access_mask);
1023 }
1024
1025 /*
1026 * Return 0, iff access is allowed, 1 otherwise.
1027 */
1028 static int
1029 _acl_denies(const struct acl *aclp, int access_mask, kauth_cred_t cred,
1030 int file_uid, int file_gid, int *denied_explicitly)
1031 {
1032 int i, error;
1033 const struct acl_entry *ae;
1034
1035 if (denied_explicitly != NULL)
1036 *denied_explicitly = 0;
1037
1038 KASSERT(aclp->acl_cnt <= ACL_MAX_ENTRIES);
1039
1040 for (i = 0; i < aclp->acl_cnt; i++) {
1041 ae = &(aclp->acl_entry[i]);
1042
1043 if (ae->ae_entry_type != ACL_ENTRY_TYPE_ALLOW &&
1044 ae->ae_entry_type != ACL_ENTRY_TYPE_DENY)
1045 continue;
1046 if (ae->ae_flags & ACL_ENTRY_INHERIT_ONLY)
1047 continue;
1048 switch (ae->ae_tag) {
1049 case ACL_USER_OBJ:
1050 if (kauth_cred_geteuid(cred) != file_uid)
1051 continue;
1052 break;
1053 case ACL_USER:
1054 if (kauth_cred_geteuid(cred) != ae->ae_id)
1055 continue;
1056 break;
1057 case ACL_GROUP_OBJ:
1058 error = groupmember(file_gid, cred);
1059 if (error > 0)
1060 return error;
1061 if (error != 0)
1062 continue;
1063 break;
1064 case ACL_GROUP:
1065 error = groupmember(ae->ae_id, cred);
1066 if (error > 0)
1067 return error;
1068 if (error != 0)
1069 continue;
1070 break;
1071 default:
1072 KASSERT(ae->ae_tag == ACL_EVERYONE);
1073 }
1074
1075 if (ae->ae_entry_type == ACL_ENTRY_TYPE_DENY) {
1076 if (ae->ae_perm & access_mask) {
1077 if (denied_explicitly != NULL)
1078 *denied_explicitly = 1;
1079 return (1);
1080 }
1081 }
1082
1083 access_mask &= ~(ae->ae_perm);
1084 if (access_mask == 0)
1085 return (0);
1086 }
1087
1088 if (access_mask == 0)
1089 return (0);
1090
1091 return (1);
1092 }
1093
1094 int
1095 genfs_can_access_acl_nfs4(vnode_t *vp, kauth_cred_t cred, uid_t file_uid,
1096 gid_t file_gid, mode_t file_mode, struct acl *aclp, accmode_t accmode)
1097 {
1098 int denied, explicitly_denied, access_mask, is_directory,
1099 must_be_owner = 0;
1100 file_mode = 0;
1101
1102 KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND |
1103 VEXPLICIT_DENY | VREAD_NAMED_ATTRS | VWRITE_NAMED_ATTRS |
1104 VDELETE_CHILD | VREAD_ATTRIBUTES | VWRITE_ATTRIBUTES | VDELETE |
1105 VREAD_ACL | VWRITE_ACL | VWRITE_OWNER | VSYNCHRONIZE)) == 0);
1106 KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE));
1107
1108 #ifdef ACL_DEBUG
1109 char buf[128];
1110 snprintb(buf, sizeof(buf), __VNODE_PERM_BITS, accmode);
1111 printf("%s: %s uid=%d gid=%d\n", __func__, buf, file_uid, file_gid);
1112 #endif
1113
1114 if (accmode & VADMIN)
1115 must_be_owner = 1;
1116
1117 /*
1118 * Ignore VSYNCHRONIZE permission.
1119 */
1120 accmode &= ~VSYNCHRONIZE;
1121
1122 access_mask = _access_mask_from_accmode(accmode);
1123
1124 if (vp && vp->v_type == VDIR)
1125 is_directory = 1;
1126 else
1127 is_directory = 0;
1128
1129 /*
1130 * File owner is always allowed to read and write the ACL
1131 * and basic attributes. This is to prevent a situation
1132 * where user would change ACL in a way that prevents him
1133 * from undoing the change.
1134 */
1135 if (kauth_cred_geteuid(cred) == file_uid)
1136 access_mask &= ~(ACL_READ_ACL | ACL_WRITE_ACL |
1137 ACL_READ_ATTRIBUTES | ACL_WRITE_ATTRIBUTES);
1138
1139 /*
1140 * Ignore append permission for regular files; use write
1141 * permission instead.
1142 */
1143 if (!is_directory && (access_mask & ACL_APPEND_DATA)) {
1144 access_mask &= ~ACL_APPEND_DATA;
1145 access_mask |= ACL_WRITE_DATA;
1146 }
1147
1148 denied = _acl_denies(aclp, access_mask, cred, file_uid, file_gid,
1149 &explicitly_denied);
1150
1151 if (must_be_owner) {
1152 if (kauth_cred_geteuid(cred) != file_uid)
1153 denied = EPERM;
1154 }
1155
1156 /*
1157 * For VEXEC, ensure that at least one execute bit is set for
1158 * non-directories. We have to check the mode here to stay
1159 * consistent with execve(2). See the test in
1160 * exec_check_permissions().
1161 */
1162 __acl_nfs4_sync_mode_from_acl(&file_mode, aclp);
1163 if (!denied && !is_directory && (accmode & VEXEC) &&
1164 (file_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)
1165 denied = EACCES;
1166
1167 if (!denied)
1168 return (0);
1169
1170 /*
1171 * Access failed. Iff it was not denied explicitly and
1172 * VEXPLICIT_DENY flag was specified, allow access.
1173 */
1174 if ((accmode & VEXPLICIT_DENY) && explicitly_denied == 0)
1175 return (0);
1176
1177 accmode &= ~VEXPLICIT_DENY;
1178
1179 if (accmode & (VADMIN_PERMS | VDELETE_CHILD | VDELETE))
1180 denied = EPERM;
1181 else
1182 denied = EACCES;
1183
1184 return (denied);
1185 }
1186
1187 /*
1188 * Common routine to check if chmod() is allowed.
1189 *
1190 * Policy:
1191 * - You must own the file, and
1192 * - You must not set the "sticky" bit (meaningless, see chmod(2))
1193 * - You must be a member of the group if you're trying to set the
1194 * SGIDf bit
1195 *
1196 * vp - vnode of the file-system object
1197 * cred - credentials of the invoker
1198 * cur_uid, cur_gid - current uid/gid of the file-system object
1199 * new_mode - new mode for the file-system object
1200 *
1201 * Returns 0 if the change is allowed, or an error value otherwise.
1202 */
1203 int
1204 genfs_can_chmod(vnode_t *vp, kauth_cred_t cred, uid_t cur_uid,
1205 gid_t cur_gid, mode_t new_mode)
1206 {
1207 int error;
1208
1209 /*
1210 * To modify the permissions on a file, must possess VADMIN
1211 * for that file.
1212 */
1213 if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred)) != 0)
1214 return (error);
1215
1216 /*
1217 * Unprivileged users can't set the sticky bit on files.
1218 */
1219 if ((vp->v_type != VDIR) && (new_mode & S_ISTXT))
1220 return (EFTYPE);
1221
1222 /*
1223 * If the invoker is trying to set the SGID bit on the file,
1224 * check group membership.
1225 */
1226 if (new_mode & S_ISGID) {
1227 int ismember;
1228
1229 error = kauth_cred_ismember_gid(cred, cur_gid,
1230 &ismember);
1231 if (error || !ismember)
1232 return (EPERM);
1233 }
1234
1235 /*
1236 * Deny setting setuid if we are not the file owner.
1237 */
1238 if ((new_mode & S_ISUID) && cur_uid != kauth_cred_geteuid(cred))
1239 return (EPERM);
1240
1241 return (0);
1242 }
1243
1244 /*
1245 * Common routine to check if chown() is allowed.
1246 *
1247 * Policy:
1248 * - You must own the file, and
1249 * - You must not try to change ownership, and
1250 * - You must be member of the new group
1251 *
1252 * vp - vnode
1253 * cred - credentials of the invoker
1254 * cur_uid, cur_gid - current uid/gid of the file-system object
1255 * new_uid, new_gid - target uid/gid of the file-system object
1256 *
1257 * Returns 0 if the change is allowed, or an error value otherwise.
1258 */
1259 int
1260 genfs_can_chown(vnode_t *vp, kauth_cred_t cred, uid_t cur_uid,
1261 gid_t cur_gid, uid_t new_uid, gid_t new_gid)
1262 {
1263 int error, ismember;
1264
1265 /*
1266 * To modify the ownership of a file, must possess VADMIN for that
1267 * file.
1268 */
1269 if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred)) != 0)
1270 return (error);
1271
1272 /*
1273 * You can only change ownership of a file if:
1274 * You own the file and...
1275 */
1276 if (kauth_cred_geteuid(cred) == cur_uid) {
1277 /*
1278 * You don't try to change ownership, and...
1279 */
1280 if (new_uid != cur_uid)
1281 return (EPERM);
1282
1283 /*
1284 * You don't try to change group (no-op), or...
1285 */
1286 if (new_gid == cur_gid)
1287 return (0);
1288
1289 /*
1290 * Your effective gid is the new gid, or...
1291 */
1292 if (kauth_cred_getegid(cred) == new_gid)
1293 return (0);
1294
1295 /*
1296 * The new gid is one you're a member of.
1297 */
1298 ismember = 0;
1299 error = kauth_cred_ismember_gid(cred, new_gid,
1300 &ismember);
1301 if (!error && ismember)
1302 return (0);
1303 }
1304
1305 return (EPERM);
1306 }
1307
1308 int
1309 genfs_can_chtimes(vnode_t *vp, kauth_cred_t cred, uid_t owner_uid,
1310 u_int vaflags)
1311 {
1312 int error;
1313 /*
1314 * Grant permission if the caller is the owner of the file, or
1315 * the super-user, or has ACL_WRITE_ATTRIBUTES permission on
1316 * on the file. If the time pointer is null, then write
1317 * permission on the file is also sufficient.
1318 *
1319 * From NFSv4.1, draft 21, 6.2.1.3.1, Discussion of Mask Attributes:
1320 * A user having ACL_WRITE_DATA or ACL_WRITE_ATTRIBUTES
1321 * will be allowed to set the times [..] to the current
1322 * server time.
1323 */
1324 if ((error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred)) != 0)
1325 return (error);
1326
1327 /* Must be owner, or... */
1328 if (kauth_cred_geteuid(cred) == owner_uid)
1329 return (0);
1330
1331 /* set the times to the current time, and... */
1332 if ((vaflags & VA_UTIMES_NULL) == 0)
1333 return (EPERM);
1334
1335 /* have write access. */
1336 error = VOP_ACCESS(vp, VWRITE, cred);
1337 if (error)
1338 return (error);
1339
1340 return (0);
1341 }
1342
1343 /*
1344 * Common routine to check if chflags() is allowed.
1345 *
1346 * Policy:
1347 * - You must own the file, and
1348 * - You must not change system flags, and
1349 * - You must not change flags on character/block devices.
1350 *
1351 * vp - vnode
1352 * cred - credentials of the invoker
1353 * owner_uid - uid of the file-system object
1354 * changing_sysflags - true if the invoker wants to change system flags
1355 */
1356 int
1357 genfs_can_chflags(vnode_t *vp, kauth_cred_t cred,
1358 uid_t owner_uid, bool changing_sysflags)
1359 {
1360
1361 /* The user must own the file. */
1362 if (kauth_cred_geteuid(cred) != owner_uid) {
1363 return EPERM;
1364 }
1365
1366 if (changing_sysflags) {
1367 return EPERM;
1368 }
1369
1370 /*
1371 * Unprivileged users cannot change the flags on devices, even if they
1372 * own them.
1373 */
1374 if (vp->v_type == VCHR || vp->v_type == VBLK) {
1375 return EPERM;
1376 }
1377
1378 return 0;
1379 }
1380
1381 /*
1382 * Common "sticky" policy.
1383 *
1384 * When a directory is "sticky" (as determined by the caller), this
1385 * function may help implementing the following policy:
1386 * - Renaming a file in it is only possible if the user owns the directory
1387 * or the file being renamed.
1388 * - Deleting a file from it is only possible if the user owns the
1389 * directory or the file being deleted.
1390 */
1391 int
1392 genfs_can_sticky(vnode_t *vp, kauth_cred_t cred, uid_t dir_uid, uid_t file_uid)
1393 {
1394 if (kauth_cred_geteuid(cred) != dir_uid &&
1395 kauth_cred_geteuid(cred) != file_uid)
1396 return EPERM;
1397
1398 return 0;
1399 }
1400
1401 int
1402 genfs_can_extattr(vnode_t *vp, kauth_cred_t cred, int accmode,
1403 int attrnamespace)
1404 {
1405 /*
1406 * Kernel-invoked always succeeds.
1407 */
1408 if (cred == NOCRED)
1409 return 0;
1410
1411 switch (attrnamespace) {
1412 case EXTATTR_NAMESPACE_SYSTEM:
1413 return kauth_authorize_system(cred, KAUTH_SYSTEM_FS_EXTATTR,
1414 0, vp->v_mount, NULL, NULL);
1415 case EXTATTR_NAMESPACE_USER:
1416 return VOP_ACCESS(vp, accmode, cred);
1417 default:
1418 return EPERM;
1419 }
1420 }
1421
1422 int
1423 genfs_access(void *v)
1424 {
1425 struct vop_access_args *ap = v;
1426
1427 KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
1428 VAPPEND)) == 0);
1429
1430 return VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred);
1431 }
1432
1433 int
1434 genfs_accessx(void *v)
1435 {
1436 struct vop_accessx_args *ap = v;
1437 int error;
1438 accmode_t accmode = ap->a_accmode;
1439 error = vfs_unixify_accmode(&accmode);
1440 if (error != 0)
1441 return error;
1442
1443 if (accmode == 0)
1444 return 0;
1445
1446 return VOP_ACCESS(ap->a_vp, accmode, ap->a_cred);
1447 }
1448