Home | History | Annotate | Line # | Download | only in genfs
genfs_vnops.c revision 1.213
      1 /*	$NetBSD: genfs_vnops.c,v 1.213 2021/10/10 23:46:23 thorpej Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * Copyright (c) 1982, 1986, 1989, 1993
     31  *	The Regents of the University of California.  All rights reserved.
     32  *
     33  * Redistribution and use in source and binary forms, with or without
     34  * modification, are permitted provided that the following conditions
     35  * are met:
     36  * 1. Redistributions of source code must retain the above copyright
     37  *    notice, this list of conditions and the following disclaimer.
     38  * 2. Redistributions in binary form must reproduce the above copyright
     39  *    notice, this list of conditions and the following disclaimer in the
     40  *    documentation and/or other materials provided with the distribution.
     41  * 3. Neither the name of the University nor the names of its contributors
     42  *    may be used to endorse or promote products derived from this software
     43  *    without specific prior written permission.
     44  *
     45  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     55  * SUCH DAMAGE.
     56  *
     57  */
     58 
     59 #include <sys/cdefs.h>
     60 __KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.213 2021/10/10 23:46:23 thorpej Exp $");
     61 
     62 #include <sys/param.h>
     63 #include <sys/systm.h>
     64 #include <sys/proc.h>
     65 #include <sys/kernel.h>
     66 #include <sys/mount.h>
     67 #include <sys/fstrans.h>
     68 #include <sys/namei.h>
     69 #include <sys/vnode_impl.h>
     70 #include <sys/fcntl.h>
     71 #include <sys/eventvar.h>		/* XXX for kq->kq_lock */
     72 #include <sys/kmem.h>
     73 #include <sys/poll.h>
     74 #include <sys/mman.h>
     75 #include <sys/file.h>
     76 #include <sys/kauth.h>
     77 #include <sys/stat.h>
     78 #include <sys/extattr.h>
     79 
     80 #include <miscfs/genfs/genfs.h>
     81 #include <miscfs/genfs/genfs_node.h>
     82 #include <miscfs/specfs/specdev.h>
     83 
     84 static void filt_genfsdetach(struct knote *);
     85 static int filt_genfsread(struct knote *, long);
     86 static int filt_genfsvnode(struct knote *, long);
     87 
     88 /*
     89  * Find the end of the first path component in NAME and return its
     90  * length.
     91  */
     92 int
     93 genfs_parsepath(void *v)
     94 {
     95 	struct vop_parsepath_args /* {
     96 		struct vnode *a_dvp;
     97 		const char *a_name;
     98 		size_t *a_ret;
     99 	} */ *ap = v;
    100 	const char *name = ap->a_name;
    101 	size_t pos;
    102 
    103 	(void)ap->a_dvp;
    104 
    105 	pos = 0;
    106 	while (name[pos] != '\0' && name[pos] != '/') {
    107 		pos++;
    108 	}
    109 	*ap->a_retval = pos;
    110 	return 0;
    111 }
    112 
    113 int
    114 genfs_poll(void *v)
    115 {
    116 	struct vop_poll_args /* {
    117 		struct vnode *a_vp;
    118 		int a_events;
    119 		struct lwp *a_l;
    120 	} */ *ap = v;
    121 
    122 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
    123 }
    124 
    125 int
    126 genfs_seek(void *v)
    127 {
    128 	struct vop_seek_args /* {
    129 		struct vnode *a_vp;
    130 		off_t a_oldoff;
    131 		off_t a_newoff;
    132 		kauth_cred_t cred;
    133 	} */ *ap = v;
    134 
    135 	if (ap->a_newoff < 0)
    136 		return (EINVAL);
    137 
    138 	return (0);
    139 }
    140 
    141 int
    142 genfs_abortop(void *v)
    143 {
    144 	struct vop_abortop_args /* {
    145 		struct vnode *a_dvp;
    146 		struct componentname *a_cnp;
    147 	} */ *ap = v;
    148 
    149 	(void)ap;
    150 
    151 	return (0);
    152 }
    153 
    154 int
    155 genfs_fcntl(void *v)
    156 {
    157 	struct vop_fcntl_args /* {
    158 		struct vnode *a_vp;
    159 		u_int a_command;
    160 		void *a_data;
    161 		int a_fflag;
    162 		kauth_cred_t a_cred;
    163 		struct lwp *a_l;
    164 	} */ *ap = v;
    165 
    166 	if (ap->a_command == F_SETFL)
    167 		return (0);
    168 	else
    169 		return (EOPNOTSUPP);
    170 }
    171 
    172 /*ARGSUSED*/
    173 int
    174 genfs_badop(void *v)
    175 {
    176 
    177 	panic("genfs: bad op");
    178 }
    179 
    180 /*ARGSUSED*/
    181 int
    182 genfs_nullop(void *v)
    183 {
    184 
    185 	return (0);
    186 }
    187 
    188 /*ARGSUSED*/
    189 int
    190 genfs_einval(void *v)
    191 {
    192 
    193 	return (EINVAL);
    194 }
    195 
    196 /*
    197  * Called when an fs doesn't support a particular vop.
    198  * This takes care to vrele, vput, or vunlock passed in vnodes
    199  * and calls VOP_ABORTOP for a componentname (in non-rename VOP).
    200  */
    201 int
    202 genfs_eopnotsupp(void *v)
    203 {
    204 	struct vop_generic_args /*
    205 		struct vnodeop_desc *a_desc;
    206 		/ * other random data follows, presumably * /
    207 	} */ *ap = v;
    208 	struct vnodeop_desc *desc = ap->a_desc;
    209 	struct vnode *vp, *vp_last = NULL;
    210 	int flags, i, j, offset_cnp, offset_vp;
    211 
    212 	KASSERT(desc->vdesc_offset != VOP_LOOKUP_DESCOFFSET);
    213 	KASSERT(desc->vdesc_offset != VOP_ABORTOP_DESCOFFSET);
    214 
    215 	/*
    216 	 * Abort any componentname that lookup potentially left state in.
    217 	 *
    218 	 * As is logical, componentnames for VOP_RENAME are handled by
    219 	 * the caller of VOP_RENAME.  Yay, rename!
    220 	 */
    221 	if (desc->vdesc_offset != VOP_RENAME_DESCOFFSET &&
    222 	    (offset_vp = desc->vdesc_vp_offsets[0]) != VDESC_NO_OFFSET &&
    223 	    (offset_cnp = desc->vdesc_componentname_offset) != VDESC_NO_OFFSET){
    224 		struct componentname *cnp;
    225 		struct vnode *dvp;
    226 
    227 		dvp = *VOPARG_OFFSETTO(struct vnode **, offset_vp, ap);
    228 		cnp = *VOPARG_OFFSETTO(struct componentname **, offset_cnp, ap);
    229 
    230 		VOP_ABORTOP(dvp, cnp);
    231 	}
    232 
    233 	flags = desc->vdesc_flags;
    234 	for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) {
    235 		if ((offset_vp = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET)
    236 			break;	/* stop at end of list */
    237 		if ((j = flags & VDESC_VP0_WILLPUT)) {
    238 			vp = *VOPARG_OFFSETTO(struct vnode **, offset_vp, ap);
    239 
    240 			/* Skip if NULL */
    241 			if (!vp)
    242 				continue;
    243 
    244 			switch (j) {
    245 			case VDESC_VP0_WILLPUT:
    246 				/* Check for dvp == vp cases */
    247 				if (vp == vp_last)
    248 					vrele(vp);
    249 				else {
    250 					vput(vp);
    251 					vp_last = vp;
    252 				}
    253 				break;
    254 			case VDESC_VP0_WILLRELE:
    255 				vrele(vp);
    256 				break;
    257 			}
    258 		}
    259 	}
    260 
    261 	return (EOPNOTSUPP);
    262 }
    263 
    264 /*ARGSUSED*/
    265 int
    266 genfs_ebadf(void *v)
    267 {
    268 
    269 	return (EBADF);
    270 }
    271 
    272 /* ARGSUSED */
    273 int
    274 genfs_enoioctl(void *v)
    275 {
    276 
    277 	return (EPASSTHROUGH);
    278 }
    279 
    280 
    281 /*
    282  * Eliminate all activity associated with the requested vnode
    283  * and with all vnodes aliased to the requested vnode.
    284  */
    285 int
    286 genfs_revoke(void *v)
    287 {
    288 	struct vop_revoke_args /* {
    289 		struct vnode *a_vp;
    290 		int a_flags;
    291 	} */ *ap = v;
    292 
    293 #ifdef DIAGNOSTIC
    294 	if ((ap->a_flags & REVOKEALL) == 0)
    295 		panic("genfs_revoke: not revokeall");
    296 #endif
    297 	vrevoke(ap->a_vp);
    298 	return (0);
    299 }
    300 
    301 /*
    302  * Lock the node (for deadfs).
    303  */
    304 int
    305 genfs_deadlock(void *v)
    306 {
    307 	struct vop_lock_args /* {
    308 		struct vnode *a_vp;
    309 		int a_flags;
    310 	} */ *ap = v;
    311 	vnode_t *vp = ap->a_vp;
    312 	vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
    313 	int flags = ap->a_flags;
    314 	krw_t op;
    315 
    316 	if (! ISSET(flags, LK_RETRY))
    317 		return ENOENT;
    318 
    319 	if (ISSET(flags, LK_DOWNGRADE)) {
    320 		rw_downgrade(&vip->vi_lock);
    321 	} else if (ISSET(flags, LK_UPGRADE)) {
    322 		KASSERT(ISSET(flags, LK_NOWAIT));
    323 		if (!rw_tryupgrade(&vip->vi_lock)) {
    324 			return EBUSY;
    325 		}
    326 	} else if ((flags & (LK_EXCLUSIVE | LK_SHARED)) != 0) {
    327 		op = (ISSET(flags, LK_EXCLUSIVE) ? RW_WRITER : RW_READER);
    328 		if (ISSET(flags, LK_NOWAIT)) {
    329 			if (!rw_tryenter(&vip->vi_lock, op))
    330 				return EBUSY;
    331 		} else {
    332 			rw_enter(&vip->vi_lock, op);
    333 		}
    334 	}
    335 	VSTATE_ASSERT_UNLOCKED(vp, VS_RECLAIMED);
    336 	return 0;
    337 }
    338 
    339 /*
    340  * Unlock the node (for deadfs).
    341  */
    342 int
    343 genfs_deadunlock(void *v)
    344 {
    345 	struct vop_unlock_args /* {
    346 		struct vnode *a_vp;
    347 	} */ *ap = v;
    348 	vnode_t *vp = ap->a_vp;
    349 	vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
    350 
    351 	rw_exit(&vip->vi_lock);
    352 
    353 	return 0;
    354 }
    355 
    356 /*
    357  * Lock the node.
    358  */
    359 int
    360 genfs_lock(void *v)
    361 {
    362 	struct vop_lock_args /* {
    363 		struct vnode *a_vp;
    364 		int a_flags;
    365 	} */ *ap = v;
    366 	vnode_t *vp = ap->a_vp;
    367 	vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
    368 	int flags = ap->a_flags;
    369 	krw_t op;
    370 
    371 	if (ISSET(flags, LK_DOWNGRADE)) {
    372 		rw_downgrade(&vip->vi_lock);
    373 	} else if (ISSET(flags, LK_UPGRADE)) {
    374 		KASSERT(ISSET(flags, LK_NOWAIT));
    375 		if (!rw_tryupgrade(&vip->vi_lock)) {
    376 			return EBUSY;
    377 		}
    378 	} else if ((flags & (LK_EXCLUSIVE | LK_SHARED)) != 0) {
    379 		op = (ISSET(flags, LK_EXCLUSIVE) ? RW_WRITER : RW_READER);
    380 		if (ISSET(flags, LK_NOWAIT)) {
    381 			if (!rw_tryenter(&vip->vi_lock, op))
    382 				return EBUSY;
    383 		} else {
    384 			rw_enter(&vip->vi_lock, op);
    385 		}
    386 	}
    387 	VSTATE_ASSERT_UNLOCKED(vp, VS_ACTIVE);
    388 	return 0;
    389 }
    390 
    391 /*
    392  * Unlock the node.
    393  */
    394 int
    395 genfs_unlock(void *v)
    396 {
    397 	struct vop_unlock_args /* {
    398 		struct vnode *a_vp;
    399 	} */ *ap = v;
    400 	vnode_t *vp = ap->a_vp;
    401 	vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
    402 
    403 	rw_exit(&vip->vi_lock);
    404 
    405 	return 0;
    406 }
    407 
    408 /*
    409  * Return whether or not the node is locked.
    410  */
    411 int
    412 genfs_islocked(void *v)
    413 {
    414 	struct vop_islocked_args /* {
    415 		struct vnode *a_vp;
    416 	} */ *ap = v;
    417 	vnode_t *vp = ap->a_vp;
    418 	vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
    419 
    420 	if (rw_write_held(&vip->vi_lock))
    421 		return LK_EXCLUSIVE;
    422 
    423 	if (rw_read_held(&vip->vi_lock))
    424 		return LK_SHARED;
    425 
    426 	return 0;
    427 }
    428 
    429 /*
    430  * Stubs to use when there is no locking to be done on the underlying object.
    431  */
    432 int
    433 genfs_nolock(void *v)
    434 {
    435 
    436 	return (0);
    437 }
    438 
    439 int
    440 genfs_nounlock(void *v)
    441 {
    442 
    443 	return (0);
    444 }
    445 
    446 int
    447 genfs_noislocked(void *v)
    448 {
    449 
    450 	return (0);
    451 }
    452 
    453 int
    454 genfs_mmap(void *v)
    455 {
    456 
    457 	return (0);
    458 }
    459 
    460 /*
    461  * VOP_PUTPAGES() for vnodes which never have pages.
    462  */
    463 
    464 int
    465 genfs_null_putpages(void *v)
    466 {
    467 	struct vop_putpages_args /* {
    468 		struct vnode *a_vp;
    469 		voff_t a_offlo;
    470 		voff_t a_offhi;
    471 		int a_flags;
    472 	} */ *ap = v;
    473 	struct vnode *vp = ap->a_vp;
    474 
    475 	KASSERT(vp->v_uobj.uo_npages == 0);
    476 	rw_exit(vp->v_uobj.vmobjlock);
    477 	return (0);
    478 }
    479 
    480 void
    481 genfs_node_init(struct vnode *vp, const struct genfs_ops *ops)
    482 {
    483 	struct genfs_node *gp = VTOG(vp);
    484 
    485 	rw_init(&gp->g_glock);
    486 	gp->g_op = ops;
    487 }
    488 
    489 void
    490 genfs_node_destroy(struct vnode *vp)
    491 {
    492 	struct genfs_node *gp = VTOG(vp);
    493 
    494 	rw_destroy(&gp->g_glock);
    495 }
    496 
    497 void
    498 genfs_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
    499 {
    500 	int bsize;
    501 
    502 	bsize = 1 << vp->v_mount->mnt_fs_bshift;
    503 	*eobp = (size + bsize - 1) & ~(bsize - 1);
    504 }
    505 
    506 static void
    507 filt_genfsdetach(struct knote *kn)
    508 {
    509 	struct vnode *vp = (struct vnode *)kn->kn_hook;
    510 
    511 	mutex_enter(vp->v_interlock);
    512 	SLIST_REMOVE(&vp->v_klist, kn, knote, kn_selnext);
    513 	mutex_exit(vp->v_interlock);
    514 }
    515 
    516 static int
    517 filt_genfsread(struct knote *kn, long hint)
    518 {
    519 	struct vnode *vp = (struct vnode *)kn->kn_hook;
    520 	int rv;
    521 
    522 	/*
    523 	 * filesystem is gone, so set the EOF flag and schedule
    524 	 * the knote for deletion.
    525 	 */
    526 	switch (hint) {
    527 	case NOTE_REVOKE:
    528 		KASSERT(mutex_owned(vp->v_interlock));
    529 		mutex_spin_enter(&kn->kn_kq->kq_lock);
    530 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
    531 		mutex_spin_exit(&kn->kn_kq->kq_lock);
    532 		return (1);
    533 	case 0:
    534 		mutex_enter(vp->v_interlock);
    535 		kn->kn_data = vp->v_size - ((file_t *)kn->kn_obj)->f_offset;
    536 		rv = (kn->kn_data != 0);
    537 		mutex_exit(vp->v_interlock);
    538 		return rv;
    539 	default:
    540 		KASSERT(mutex_owned(vp->v_interlock));
    541 		kn->kn_data = vp->v_size - ((file_t *)kn->kn_obj)->f_offset;
    542 		return (kn->kn_data != 0);
    543 	}
    544 }
    545 
    546 static int
    547 filt_genfswrite(struct knote *kn, long hint)
    548 {
    549 	struct vnode *vp = (struct vnode *)kn->kn_hook;
    550 
    551 	/*
    552 	 * filesystem is gone, so set the EOF flag and schedule
    553 	 * the knote for deletion.
    554 	 */
    555 	switch (hint) {
    556 	case NOTE_REVOKE:
    557 		KASSERT(mutex_owned(vp->v_interlock));
    558 		mutex_spin_enter(&kn->kn_kq->kq_lock);
    559 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
    560 		mutex_spin_exit(&kn->kn_kq->kq_lock);
    561 		return (1);
    562 	case 0:
    563 		mutex_enter(vp->v_interlock);
    564 		kn->kn_data = 0;
    565 		mutex_exit(vp->v_interlock);
    566 		return 1;
    567 	default:
    568 		KASSERT(mutex_owned(vp->v_interlock));
    569 		kn->kn_data = 0;
    570 		return 1;
    571 	}
    572 }
    573 
    574 static int
    575 filt_genfsvnode(struct knote *kn, long hint)
    576 {
    577 	struct vnode *vp = (struct vnode *)kn->kn_hook;
    578 	int fflags;
    579 
    580 	switch (hint) {
    581 	case NOTE_REVOKE:
    582 		KASSERT(mutex_owned(vp->v_interlock));
    583 		mutex_spin_enter(&kn->kn_kq->kq_lock);
    584 		kn->kn_flags |= EV_EOF;
    585 		mutex_spin_exit(&kn->kn_kq->kq_lock);
    586 		if ((kn->kn_sfflags & hint) != 0)
    587 			kn->kn_fflags |= hint;
    588 		return (1);
    589 	case 0:
    590 		mutex_enter(vp->v_interlock);
    591 		fflags = kn->kn_fflags;
    592 		mutex_exit(vp->v_interlock);
    593 		break;
    594 	default:
    595 		KASSERT(mutex_owned(vp->v_interlock));
    596 		if ((kn->kn_sfflags & hint) != 0)
    597 			kn->kn_fflags |= hint;
    598 		fflags = kn->kn_fflags;
    599 		break;
    600 	}
    601 
    602 	return (fflags != 0);
    603 }
    604 
    605 static const struct filterops genfsread_filtops = {
    606 	.f_flags = FILTEROP_ISFD,
    607 	.f_attach = NULL,
    608 	.f_detach = filt_genfsdetach,
    609 	.f_event = filt_genfsread,
    610 };
    611 
    612 static const struct filterops genfswrite_filtops = {
    613 	.f_flags = FILTEROP_ISFD,
    614 	.f_attach = NULL,
    615 	.f_detach = filt_genfsdetach,
    616 	.f_event = filt_genfswrite,
    617 };
    618 
    619 static const struct filterops genfsvnode_filtops = {
    620 	.f_flags = FILTEROP_ISFD,
    621 	.f_attach = NULL,
    622 	.f_detach = filt_genfsdetach,
    623 	.f_event = filt_genfsvnode,
    624 };
    625 
    626 int
    627 genfs_kqfilter(void *v)
    628 {
    629 	struct vop_kqfilter_args /* {
    630 		struct vnode	*a_vp;
    631 		struct knote	*a_kn;
    632 	} */ *ap = v;
    633 	struct vnode *vp;
    634 	struct knote *kn;
    635 
    636 	vp = ap->a_vp;
    637 	kn = ap->a_kn;
    638 	switch (kn->kn_filter) {
    639 	case EVFILT_READ:
    640 		kn->kn_fop = &genfsread_filtops;
    641 		break;
    642 	case EVFILT_WRITE:
    643 		kn->kn_fop = &genfswrite_filtops;
    644 		break;
    645 	case EVFILT_VNODE:
    646 		kn->kn_fop = &genfsvnode_filtops;
    647 		break;
    648 	default:
    649 		return (EINVAL);
    650 	}
    651 
    652 	kn->kn_hook = vp;
    653 
    654 	mutex_enter(vp->v_interlock);
    655 	SLIST_INSERT_HEAD(&vp->v_klist, kn, kn_selnext);
    656 	mutex_exit(vp->v_interlock);
    657 
    658 	return (0);
    659 }
    660 
    661 void
    662 genfs_node_wrlock(struct vnode *vp)
    663 {
    664 	struct genfs_node *gp = VTOG(vp);
    665 
    666 	rw_enter(&gp->g_glock, RW_WRITER);
    667 }
    668 
    669 void
    670 genfs_node_rdlock(struct vnode *vp)
    671 {
    672 	struct genfs_node *gp = VTOG(vp);
    673 
    674 	rw_enter(&gp->g_glock, RW_READER);
    675 }
    676 
    677 int
    678 genfs_node_rdtrylock(struct vnode *vp)
    679 {
    680 	struct genfs_node *gp = VTOG(vp);
    681 
    682 	return rw_tryenter(&gp->g_glock, RW_READER);
    683 }
    684 
    685 void
    686 genfs_node_unlock(struct vnode *vp)
    687 {
    688 	struct genfs_node *gp = VTOG(vp);
    689 
    690 	rw_exit(&gp->g_glock);
    691 }
    692 
    693 int
    694 genfs_node_wrlocked(struct vnode *vp)
    695 {
    696 	struct genfs_node *gp = VTOG(vp);
    697 
    698 	return rw_write_held(&gp->g_glock);
    699 }
    700 
    701 static int
    702 groupmember(gid_t gid, kauth_cred_t cred)
    703 {
    704 	int ismember;
    705 	int error = kauth_cred_ismember_gid(cred, gid, &ismember);
    706 	if (error)
    707 		return error;
    708 	if (kauth_cred_getegid(cred) == gid || ismember)
    709 		return 0;
    710 	return -1;
    711 }
    712 
    713 /*
    714  * Common filesystem object access control check routine.  Accepts a
    715  * vnode, cred, uid, gid, mode, acl, requested access mode.
    716  * Returns 0 on success, or an errno on failure.
    717  */
    718 int
    719 genfs_can_access(vnode_t *vp, kauth_cred_t cred, uid_t file_uid, gid_t file_gid,
    720     mode_t file_mode, struct acl *acl, accmode_t accmode)
    721 {
    722 	accmode_t dac_granted;
    723 	int error;
    724 
    725 	KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0);
    726 	KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE));
    727 
    728 	/*
    729 	 * Look for a normal, non-privileged way to access the file/directory
    730 	 * as requested.  If it exists, go with that.
    731 	 */
    732 
    733 	dac_granted = 0;
    734 
    735 	/* Check the owner. */
    736 	if (kauth_cred_geteuid(cred) == file_uid) {
    737 		dac_granted |= VADMIN;
    738 		if (file_mode & S_IXUSR)
    739 			dac_granted |= VEXEC;
    740 		if (file_mode & S_IRUSR)
    741 			dac_granted |= VREAD;
    742 		if (file_mode & S_IWUSR)
    743 			dac_granted |= (VWRITE | VAPPEND);
    744 
    745 		goto privchk;
    746 	}
    747 
    748 	/* Otherwise, check the groups (first match) */
    749 	/* Otherwise, check the groups. */
    750 	error = groupmember(file_gid, cred);
    751 	if (error > 0)
    752 		return error;
    753 	if (error == 0) {
    754 		if (file_mode & S_IXGRP)
    755 			dac_granted |= VEXEC;
    756 		if (file_mode & S_IRGRP)
    757 			dac_granted |= VREAD;
    758 		if (file_mode & S_IWGRP)
    759 			dac_granted |= (VWRITE | VAPPEND);
    760 
    761 		goto privchk;
    762 	}
    763 
    764 	/* Otherwise, check everyone else. */
    765 	if (file_mode & S_IXOTH)
    766 		dac_granted |= VEXEC;
    767 	if (file_mode & S_IROTH)
    768 		dac_granted |= VREAD;
    769 	if (file_mode & S_IWOTH)
    770 		dac_granted |= (VWRITE | VAPPEND);
    771 
    772 privchk:
    773 	if ((accmode & dac_granted) == accmode)
    774 		return 0;
    775 
    776 	return (accmode & VADMIN) ? EPERM : EACCES;
    777 }
    778 
    779 /*
    780  * Implement a version of genfs_can_access() that understands POSIX.1e ACL
    781  * semantics;
    782  * the access ACL has already been prepared for evaluation by the file system
    783  * and is passed via 'uid', 'gid', and 'acl'.  Return 0 on success, else an
    784  * errno value.
    785  */
    786 int
    787 genfs_can_access_acl_posix1e(vnode_t *vp, kauth_cred_t cred, uid_t file_uid,
    788     gid_t file_gid, mode_t file_mode, struct acl *acl, accmode_t accmode)
    789 {
    790 	struct acl_entry *acl_other, *acl_mask;
    791 	accmode_t dac_granted;
    792 	accmode_t acl_mask_granted;
    793 	int group_matched, i;
    794 	int error;
    795 
    796 	KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0);
    797 	KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE));
    798 
    799 	/*
    800 	 * The owner matches if the effective uid associated with the
    801 	 * credential matches that of the ACL_USER_OBJ entry.  While we're
    802 	 * doing the first scan, also cache the location of the ACL_MASK and
    803 	 * ACL_OTHER entries, preventing some future iterations.
    804 	 */
    805 	acl_mask = acl_other = NULL;
    806 	for (i = 0; i < acl->acl_cnt; i++) {
    807 		struct acl_entry *ae = &acl->acl_entry[i];
    808 		switch (ae->ae_tag) {
    809 		case ACL_USER_OBJ:
    810 			if (kauth_cred_geteuid(cred) != file_uid)
    811 				break;
    812 			dac_granted = 0;
    813 			dac_granted |= VADMIN;
    814 			if (ae->ae_perm & ACL_EXECUTE)
    815 				dac_granted |= VEXEC;
    816 			if (ae->ae_perm & ACL_READ)
    817 				dac_granted |= VREAD;
    818 			if (ae->ae_perm & ACL_WRITE)
    819 				dac_granted |= (VWRITE | VAPPEND);
    820 			goto out;
    821 
    822 		case ACL_MASK:
    823 			acl_mask = ae;
    824 			break;
    825 
    826 		case ACL_OTHER:
    827 			acl_other = ae;
    828 			break;
    829 
    830 		default:
    831 			break;
    832 		}
    833 	}
    834 
    835 	/*
    836 	 * An ACL_OTHER entry should always exist in a valid access ACL.  If
    837 	 * it doesn't, then generate a serious failure.	 For now, this means
    838 	 * a debugging message and EPERM, but in the future should probably
    839 	 * be a panic.
    840 	 */
    841 	if (acl_other == NULL) {
    842 		/*
    843 		 * XXX This should never happen
    844 		 */
    845 		printf("%s: ACL_OTHER missing\n", __func__);
    846 		return EPERM;
    847 	}
    848 
    849 	/*
    850 	 * Checks against ACL_USER, ACL_GROUP_OBJ, and ACL_GROUP fields are
    851 	 * masked by an ACL_MASK entry, if any.	 As such, first identify the
    852 	 * ACL_MASK field, then iterate through identifying potential user
    853 	 * matches, then group matches.	 If there is no ACL_MASK, assume that
    854 	 * the mask allows all requests to succeed.
    855 	 */
    856 	if (acl_mask != NULL) {
    857 		acl_mask_granted = 0;
    858 		if (acl_mask->ae_perm & ACL_EXECUTE)
    859 			acl_mask_granted |= VEXEC;
    860 		if (acl_mask->ae_perm & ACL_READ)
    861 			acl_mask_granted |= VREAD;
    862 		if (acl_mask->ae_perm & ACL_WRITE)
    863 			acl_mask_granted |= (VWRITE | VAPPEND);
    864 	} else
    865 		acl_mask_granted = VEXEC | VREAD | VWRITE | VAPPEND;
    866 
    867 	/*
    868 	 * Check ACL_USER ACL entries.	There will either be one or no
    869 	 * matches; if there is one, we accept or rejected based on the
    870 	 * match; otherwise, we continue on to groups.
    871 	 */
    872 	for (i = 0; i < acl->acl_cnt; i++) {
    873 		struct acl_entry *ae = &acl->acl_entry[i];
    874 		switch (ae->ae_tag) {
    875 		case ACL_USER:
    876 			if (kauth_cred_geteuid(cred) != ae->ae_id)
    877 				break;
    878 			dac_granted = 0;
    879 			if (ae->ae_perm & ACL_EXECUTE)
    880 				dac_granted |= VEXEC;
    881 			if (ae->ae_perm & ACL_READ)
    882 				dac_granted |= VREAD;
    883 			if (ae->ae_perm & ACL_WRITE)
    884 				dac_granted |= (VWRITE | VAPPEND);
    885 			dac_granted &= acl_mask_granted;
    886 			goto out;
    887 		}
    888 	}
    889 
    890 	/*
    891 	 * Group match is best-match, not first-match, so find a "best"
    892 	 * match.  Iterate across, testing each potential group match.	Make
    893 	 * sure we keep track of whether we found a match or not, so that we
    894 	 * know if we should try again with any available privilege, or if we
    895 	 * should move on to ACL_OTHER.
    896 	 */
    897 	group_matched = 0;
    898 	for (i = 0; i < acl->acl_cnt; i++) {
    899 		struct acl_entry *ae = &acl->acl_entry[i];
    900 		switch (ae->ae_tag) {
    901 		case ACL_GROUP_OBJ:
    902 			error = groupmember(file_gid, cred);
    903 			if (error > 0)
    904 				return error;
    905 			if (error)
    906 				break;
    907 			dac_granted = 0;
    908 			if (ae->ae_perm & ACL_EXECUTE)
    909 				dac_granted |= VEXEC;
    910 			if (ae->ae_perm & ACL_READ)
    911 				dac_granted |= VREAD;
    912 			if (ae->ae_perm & ACL_WRITE)
    913 				dac_granted |= (VWRITE | VAPPEND);
    914 			dac_granted  &= acl_mask_granted;
    915 
    916 			if ((accmode & dac_granted) == accmode)
    917 				return 0;
    918 
    919 			group_matched = 1;
    920 			break;
    921 
    922 		case ACL_GROUP:
    923 			error = groupmember(ae->ae_id, cred);
    924 			if (error > 0)
    925 				return error;
    926 			if (error)
    927 				break;
    928 			dac_granted = 0;
    929 			if (ae->ae_perm & ACL_EXECUTE)
    930 				dac_granted |= VEXEC;
    931 			if (ae->ae_perm & ACL_READ)
    932 				dac_granted |= VREAD;
    933 			if (ae->ae_perm & ACL_WRITE)
    934 				dac_granted |= (VWRITE | VAPPEND);
    935 			dac_granted  &= acl_mask_granted;
    936 
    937 			if ((accmode & dac_granted) == accmode)
    938 				return 0;
    939 
    940 			group_matched = 1;
    941 			break;
    942 
    943 		default:
    944 			break;
    945 		}
    946 	}
    947 
    948 	if (group_matched == 1) {
    949 		/*
    950 		 * There was a match, but it did not grant rights via pure
    951 		 * DAC.	 Try again, this time with privilege.
    952 		 */
    953 		for (i = 0; i < acl->acl_cnt; i++) {
    954 			struct acl_entry *ae = &acl->acl_entry[i];
    955 			switch (ae->ae_tag) {
    956 			case ACL_GROUP_OBJ:
    957 				error = groupmember(file_gid, cred);
    958 				if (error > 0)
    959 					return error;
    960 				if (error)
    961 					break;
    962 				dac_granted = 0;
    963 				if (ae->ae_perm & ACL_EXECUTE)
    964 					dac_granted |= VEXEC;
    965 				if (ae->ae_perm & ACL_READ)
    966 					dac_granted |= VREAD;
    967 				if (ae->ae_perm & ACL_WRITE)
    968 					dac_granted |= (VWRITE | VAPPEND);
    969 				dac_granted &= acl_mask_granted;
    970 				goto out;
    971 
    972 			case ACL_GROUP:
    973 				error = groupmember(ae->ae_id, cred);
    974 				if (error > 0)
    975 					return error;
    976 				if (error)
    977 					break;
    978 				dac_granted = 0;
    979 				if (ae->ae_perm & ACL_EXECUTE)
    980 				dac_granted |= VEXEC;
    981 				if (ae->ae_perm & ACL_READ)
    982 					dac_granted |= VREAD;
    983 				if (ae->ae_perm & ACL_WRITE)
    984 					dac_granted |= (VWRITE | VAPPEND);
    985 				dac_granted &= acl_mask_granted;
    986 
    987 				goto out;
    988 			default:
    989 				break;
    990 			}
    991 		}
    992 		/*
    993 		 * Even with privilege, group membership was not sufficient.
    994 		 * Return failure.
    995 		 */
    996 		dac_granted = 0;
    997 		goto out;
    998 	}
    999 
   1000 	/*
   1001 	 * Fall back on ACL_OTHER.  ACL_MASK is not applied to ACL_OTHER.
   1002 	 */
   1003 	dac_granted = 0;
   1004 	if (acl_other->ae_perm & ACL_EXECUTE)
   1005 		dac_granted |= VEXEC;
   1006 	if (acl_other->ae_perm & ACL_READ)
   1007 		dac_granted |= VREAD;
   1008 	if (acl_other->ae_perm & ACL_WRITE)
   1009 		dac_granted |= (VWRITE | VAPPEND);
   1010 
   1011 out:
   1012 	if ((accmode & dac_granted) == accmode)
   1013 		return 0;
   1014 	return (accmode & VADMIN) ? EPERM : EACCES;
   1015 }
   1016 
   1017 static struct {
   1018 	accmode_t accmode;
   1019 	int mask;
   1020 } accmode2mask[] = {
   1021 	{ VREAD, ACL_READ_DATA },
   1022 	{ VWRITE, ACL_WRITE_DATA },
   1023 	{ VAPPEND, ACL_APPEND_DATA },
   1024 	{ VEXEC, ACL_EXECUTE },
   1025 	{ VREAD_NAMED_ATTRS, ACL_READ_NAMED_ATTRS },
   1026 	{ VWRITE_NAMED_ATTRS, ACL_WRITE_NAMED_ATTRS },
   1027 	{ VDELETE_CHILD, ACL_DELETE_CHILD },
   1028 	{ VREAD_ATTRIBUTES, ACL_READ_ATTRIBUTES },
   1029 	{ VWRITE_ATTRIBUTES, ACL_WRITE_ATTRIBUTES },
   1030 	{ VDELETE, ACL_DELETE },
   1031 	{ VREAD_ACL, ACL_READ_ACL },
   1032 	{ VWRITE_ACL, ACL_WRITE_ACL },
   1033 	{ VWRITE_OWNER, ACL_WRITE_OWNER },
   1034 	{ VSYNCHRONIZE, ACL_SYNCHRONIZE },
   1035 	{ 0, 0 },
   1036 };
   1037 
   1038 static int
   1039 _access_mask_from_accmode(accmode_t accmode)
   1040 {
   1041 	int access_mask = 0, i;
   1042 
   1043 	for (i = 0; accmode2mask[i].accmode != 0; i++) {
   1044 		if (accmode & accmode2mask[i].accmode)
   1045 			access_mask |= accmode2mask[i].mask;
   1046 	}
   1047 
   1048 	/*
   1049 	 * VAPPEND is just a modifier for VWRITE; if the caller asked
   1050 	 * for 'VAPPEND | VWRITE', we want to check for ACL_APPEND_DATA only.
   1051 	 */
   1052 	if (access_mask & ACL_APPEND_DATA)
   1053 		access_mask &= ~ACL_WRITE_DATA;
   1054 
   1055 	return (access_mask);
   1056 }
   1057 
   1058 /*
   1059  * Return 0, iff access is allowed, 1 otherwise.
   1060  */
   1061 static int
   1062 _acl_denies(const struct acl *aclp, int access_mask, kauth_cred_t cred,
   1063     int file_uid, int file_gid, int *denied_explicitly)
   1064 {
   1065 	int i, error;
   1066 	const struct acl_entry *ae;
   1067 
   1068 	if (denied_explicitly != NULL)
   1069 		*denied_explicitly = 0;
   1070 
   1071 	KASSERT(aclp->acl_cnt <= ACL_MAX_ENTRIES);
   1072 
   1073 	for (i = 0; i < aclp->acl_cnt; i++) {
   1074 		ae = &(aclp->acl_entry[i]);
   1075 
   1076 		if (ae->ae_entry_type != ACL_ENTRY_TYPE_ALLOW &&
   1077 		    ae->ae_entry_type != ACL_ENTRY_TYPE_DENY)
   1078 			continue;
   1079 		if (ae->ae_flags & ACL_ENTRY_INHERIT_ONLY)
   1080 			continue;
   1081 		switch (ae->ae_tag) {
   1082 		case ACL_USER_OBJ:
   1083 			if (kauth_cred_geteuid(cred) != file_uid)
   1084 				continue;
   1085 			break;
   1086 		case ACL_USER:
   1087 			if (kauth_cred_geteuid(cred) != ae->ae_id)
   1088 				continue;
   1089 			break;
   1090 		case ACL_GROUP_OBJ:
   1091 			error = groupmember(file_gid, cred);
   1092 			if (error > 0)
   1093 				return error;
   1094 			if (error != 0)
   1095 				continue;
   1096 			break;
   1097 		case ACL_GROUP:
   1098 			error = groupmember(ae->ae_id, cred);
   1099 			if (error > 0)
   1100 				return error;
   1101 			if (error != 0)
   1102 				continue;
   1103 			break;
   1104 		default:
   1105 			KASSERT(ae->ae_tag == ACL_EVERYONE);
   1106 		}
   1107 
   1108 		if (ae->ae_entry_type == ACL_ENTRY_TYPE_DENY) {
   1109 			if (ae->ae_perm & access_mask) {
   1110 				if (denied_explicitly != NULL)
   1111 					*denied_explicitly = 1;
   1112 				return (1);
   1113 			}
   1114 		}
   1115 
   1116 		access_mask &= ~(ae->ae_perm);
   1117 		if (access_mask == 0)
   1118 			return (0);
   1119 	}
   1120 
   1121 	if (access_mask == 0)
   1122 		return (0);
   1123 
   1124 	return (1);
   1125 }
   1126 
   1127 int
   1128 genfs_can_access_acl_nfs4(vnode_t *vp, kauth_cred_t cred, uid_t file_uid,
   1129     gid_t file_gid, mode_t file_mode, struct acl *aclp, accmode_t accmode)
   1130 {
   1131 	int denied, explicitly_denied, access_mask, is_directory,
   1132 	    must_be_owner = 0;
   1133 	file_mode = 0;
   1134 
   1135 	KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND |
   1136 	    VEXPLICIT_DENY | VREAD_NAMED_ATTRS | VWRITE_NAMED_ATTRS |
   1137 	    VDELETE_CHILD | VREAD_ATTRIBUTES | VWRITE_ATTRIBUTES | VDELETE |
   1138 	    VREAD_ACL | VWRITE_ACL | VWRITE_OWNER | VSYNCHRONIZE)) == 0);
   1139 	KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE));
   1140 
   1141 	if (accmode & VADMIN)
   1142 		must_be_owner = 1;
   1143 
   1144 	/*
   1145 	 * Ignore VSYNCHRONIZE permission.
   1146 	 */
   1147 	accmode &= ~VSYNCHRONIZE;
   1148 
   1149 	access_mask = _access_mask_from_accmode(accmode);
   1150 
   1151 	if (vp && vp->v_type == VDIR)
   1152 		is_directory = 1;
   1153 	else
   1154 		is_directory = 0;
   1155 
   1156 	/*
   1157 	 * File owner is always allowed to read and write the ACL
   1158 	 * and basic attributes.  This is to prevent a situation
   1159 	 * where user would change ACL in a way that prevents him
   1160 	 * from undoing the change.
   1161 	 */
   1162 	if (kauth_cred_geteuid(cred) == file_uid)
   1163 		access_mask &= ~(ACL_READ_ACL | ACL_WRITE_ACL |
   1164 		    ACL_READ_ATTRIBUTES | ACL_WRITE_ATTRIBUTES);
   1165 
   1166 	/*
   1167 	 * Ignore append permission for regular files; use write
   1168 	 * permission instead.
   1169 	 */
   1170 	if (!is_directory && (access_mask & ACL_APPEND_DATA)) {
   1171 		access_mask &= ~ACL_APPEND_DATA;
   1172 		access_mask |= ACL_WRITE_DATA;
   1173 	}
   1174 
   1175 	denied = _acl_denies(aclp, access_mask, cred, file_uid, file_gid,
   1176 	    &explicitly_denied);
   1177 
   1178 	if (must_be_owner) {
   1179 		if (kauth_cred_geteuid(cred) != file_uid)
   1180 			denied = EPERM;
   1181 	}
   1182 
   1183 	/*
   1184 	 * For VEXEC, ensure that at least one execute bit is set for
   1185 	 * non-directories. We have to check the mode here to stay
   1186 	 * consistent with execve(2). See the test in
   1187 	 * exec_check_permissions().
   1188 	 */
   1189 	__acl_nfs4_sync_mode_from_acl(&file_mode, aclp);
   1190 	if (!denied && !is_directory && (accmode & VEXEC) &&
   1191 	    (file_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)
   1192 		denied = EACCES;
   1193 
   1194 	if (!denied)
   1195 		return (0);
   1196 
   1197 	/*
   1198 	 * Access failed.  Iff it was not denied explicitly and
   1199 	 * VEXPLICIT_DENY flag was specified, allow access.
   1200 	 */
   1201 	if ((accmode & VEXPLICIT_DENY) && explicitly_denied == 0)
   1202 		return (0);
   1203 
   1204 	accmode &= ~VEXPLICIT_DENY;
   1205 
   1206 	if (accmode & (VADMIN_PERMS | VDELETE_CHILD | VDELETE))
   1207 		denied = EPERM;
   1208 	else
   1209 		denied = EACCES;
   1210 
   1211 	return (denied);
   1212 }
   1213 
   1214 /*
   1215  * Common routine to check if chmod() is allowed.
   1216  *
   1217  * Policy:
   1218  *   - You must own the file, and
   1219  *     - You must not set the "sticky" bit (meaningless, see chmod(2))
   1220  *     - You must be a member of the group if you're trying to set the
   1221  *	 SGIDf bit
   1222  *
   1223  * vp - vnode of the file-system object
   1224  * cred - credentials of the invoker
   1225  * cur_uid, cur_gid - current uid/gid of the file-system object
   1226  * new_mode - new mode for the file-system object
   1227  *
   1228  * Returns 0 if the change is allowed, or an error value otherwise.
   1229  */
   1230 int
   1231 genfs_can_chmod(vnode_t *vp, kauth_cred_t cred, uid_t cur_uid,
   1232     gid_t cur_gid, mode_t new_mode)
   1233 {
   1234 	int error;
   1235 
   1236 	/*
   1237 	 * To modify the permissions on a file, must possess VADMIN
   1238 	 * for that file.
   1239 	 */
   1240 	if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred)) != 0)
   1241 		return (error);
   1242 
   1243 	/*
   1244 	 * Unprivileged users can't set the sticky bit on files.
   1245 	 */
   1246 	if ((vp->v_type != VDIR) && (new_mode & S_ISTXT))
   1247 		return (EFTYPE);
   1248 
   1249 	/*
   1250 	 * If the invoker is trying to set the SGID bit on the file,
   1251 	 * check group membership.
   1252 	 */
   1253 	if (new_mode & S_ISGID) {
   1254 		int ismember;
   1255 
   1256 		error = kauth_cred_ismember_gid(cred, cur_gid,
   1257 		    &ismember);
   1258 		if (error || !ismember)
   1259 			return (EPERM);
   1260 	}
   1261 
   1262 	/*
   1263 	 * Deny setting setuid if we are not the file owner.
   1264 	 */
   1265 	if ((new_mode & S_ISUID) && cur_uid != kauth_cred_geteuid(cred))
   1266 		return (EPERM);
   1267 
   1268 	return (0);
   1269 }
   1270 
   1271 /*
   1272  * Common routine to check if chown() is allowed.
   1273  *
   1274  * Policy:
   1275  *   - You must own the file, and
   1276  *     - You must not try to change ownership, and
   1277  *     - You must be member of the new group
   1278  *
   1279  * vp - vnode
   1280  * cred - credentials of the invoker
   1281  * cur_uid, cur_gid - current uid/gid of the file-system object
   1282  * new_uid, new_gid - target uid/gid of the file-system object
   1283  *
   1284  * Returns 0 if the change is allowed, or an error value otherwise.
   1285  */
   1286 int
   1287 genfs_can_chown(vnode_t *vp, kauth_cred_t cred, uid_t cur_uid,
   1288     gid_t cur_gid, uid_t new_uid, gid_t new_gid)
   1289 {
   1290 	int error, ismember;
   1291 
   1292 	/*
   1293 	 * To modify the ownership of a file, must possess VADMIN for that
   1294 	 * file.
   1295 	 */
   1296 	if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred)) != 0)
   1297 		return (error);
   1298 
   1299 	/*
   1300 	 * You can only change ownership of a file if:
   1301 	 * You own the file and...
   1302 	 */
   1303 	if (kauth_cred_geteuid(cred) == cur_uid) {
   1304 		/*
   1305 		 * You don't try to change ownership, and...
   1306 		 */
   1307 		if (new_uid != cur_uid)
   1308 			return (EPERM);
   1309 
   1310 		/*
   1311 		 * You don't try to change group (no-op), or...
   1312 		 */
   1313 		if (new_gid == cur_gid)
   1314 			return (0);
   1315 
   1316 		/*
   1317 		 * Your effective gid is the new gid, or...
   1318 		 */
   1319 		if (kauth_cred_getegid(cred) == new_gid)
   1320 			return (0);
   1321 
   1322 		/*
   1323 		 * The new gid is one you're a member of.
   1324 		 */
   1325 		ismember = 0;
   1326 		error = kauth_cred_ismember_gid(cred, new_gid,
   1327 		    &ismember);
   1328 		if (!error && ismember)
   1329 			return (0);
   1330 	}
   1331 
   1332 	return (EPERM);
   1333 }
   1334 
   1335 int
   1336 genfs_can_chtimes(vnode_t *vp, kauth_cred_t cred, uid_t owner_uid,
   1337     u_int vaflags)
   1338 {
   1339 	int error;
   1340 	/*
   1341 	 * Grant permission if the caller is the owner of the file, or
   1342 	 * the super-user, or has ACL_WRITE_ATTRIBUTES permission on
   1343 	 * on the file.	 If the time pointer is null, then write
   1344 	 * permission on the file is also sufficient.
   1345 	 *
   1346 	 * From NFSv4.1, draft 21, 6.2.1.3.1, Discussion of Mask Attributes:
   1347 	 * A user having ACL_WRITE_DATA or ACL_WRITE_ATTRIBUTES
   1348 	 * will be allowed to set the times [..] to the current
   1349 	 * server time.
   1350 	 */
   1351 	if ((error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred)) != 0)
   1352 		return (vaflags & VA_UTIMES_NULL) == 0 ? EPERM : EACCES;
   1353 
   1354 	/* Must be owner, or... */
   1355 	if (kauth_cred_geteuid(cred) == owner_uid)
   1356 		return (0);
   1357 
   1358 	/* set the times to the current time, and... */
   1359 	if ((vaflags & VA_UTIMES_NULL) == 0)
   1360 		return (EPERM);
   1361 
   1362 	/* have write access. */
   1363 	error = VOP_ACCESS(vp, VWRITE, cred);
   1364 	if (error)
   1365 		return (error);
   1366 
   1367 	return (0);
   1368 }
   1369 
   1370 /*
   1371  * Common routine to check if chflags() is allowed.
   1372  *
   1373  * Policy:
   1374  *   - You must own the file, and
   1375  *   - You must not change system flags, and
   1376  *   - You must not change flags on character/block devices.
   1377  *
   1378  * vp - vnode
   1379  * cred - credentials of the invoker
   1380  * owner_uid - uid of the file-system object
   1381  * changing_sysflags - true if the invoker wants to change system flags
   1382  */
   1383 int
   1384 genfs_can_chflags(vnode_t *vp, kauth_cred_t cred,
   1385      uid_t owner_uid, bool changing_sysflags)
   1386 {
   1387 
   1388 	/* The user must own the file. */
   1389 	if (kauth_cred_geteuid(cred) != owner_uid) {
   1390 		return EPERM;
   1391 	}
   1392 
   1393 	if (changing_sysflags) {
   1394 		return EPERM;
   1395 	}
   1396 
   1397 	/*
   1398 	 * Unprivileged users cannot change the flags on devices, even if they
   1399 	 * own them.
   1400 	 */
   1401 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
   1402 		return EPERM;
   1403 	}
   1404 
   1405 	return 0;
   1406 }
   1407 
   1408 /*
   1409  * Common "sticky" policy.
   1410  *
   1411  * When a directory is "sticky" (as determined by the caller), this
   1412  * function may help implementing the following policy:
   1413  * - Renaming a file in it is only possible if the user owns the directory
   1414  *   or the file being renamed.
   1415  * - Deleting a file from it is only possible if the user owns the
   1416  *   directory or the file being deleted.
   1417  */
   1418 int
   1419 genfs_can_sticky(vnode_t *vp, kauth_cred_t cred, uid_t dir_uid, uid_t file_uid)
   1420 {
   1421 	if (kauth_cred_geteuid(cred) != dir_uid &&
   1422 	    kauth_cred_geteuid(cred) != file_uid)
   1423 		return EPERM;
   1424 
   1425 	return 0;
   1426 }
   1427 
   1428 int
   1429 genfs_can_extattr(vnode_t *vp, kauth_cred_t cred, accmode_t accmode,
   1430     int attrnamespace)
   1431 {
   1432 	/*
   1433 	 * Kernel-invoked always succeeds.
   1434 	 */
   1435 	if (cred == NOCRED)
   1436 		return 0;
   1437 
   1438 	switch (attrnamespace) {
   1439 	case EXTATTR_NAMESPACE_SYSTEM:
   1440 		return kauth_authorize_system(cred, KAUTH_SYSTEM_FS_EXTATTR,
   1441 		    0, vp->v_mount, NULL, NULL);
   1442 	case EXTATTR_NAMESPACE_USER:
   1443 		return VOP_ACCESS(vp, accmode, cred);
   1444 	default:
   1445 		return EPERM;
   1446 	}
   1447 }
   1448 
   1449 int
   1450 genfs_access(void *v)
   1451 {
   1452 	struct vop_access_args *ap = v;
   1453 
   1454 	KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
   1455 	    VAPPEND)) == 0);
   1456 
   1457 	return VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred);
   1458 }
   1459 
   1460 int
   1461 genfs_accessx(void *v)
   1462 {
   1463 	struct vop_accessx_args *ap = v;
   1464 	int error;
   1465 	accmode_t accmode = ap->a_accmode;
   1466 	error = vfs_unixify_accmode(&accmode);
   1467 	if (error != 0)
   1468 		return error;
   1469 
   1470 	if (accmode == 0)
   1471 		return 0;
   1472 
   1473 	return VOP_ACCESS(ap->a_vp, accmode, ap->a_cred);
   1474 }
   1475 
   1476 /*
   1477  * genfs_pathconf:
   1478  *
   1479  * Standard implementation of POSIX pathconf, to get information about limits
   1480  * for a filesystem.
   1481  * Override per filesystem for the case where the filesystem has smaller
   1482  * limits.
   1483  */
   1484 int
   1485 genfs_pathconf(void *v)
   1486 {
   1487 	struct vop_pathconf_args *ap = v;
   1488 
   1489 	switch (ap->a_name) {
   1490 	case _PC_PATH_MAX:
   1491 		*ap->a_retval = PATH_MAX;
   1492 		return 0;
   1493 	case _PC_ACL_EXTENDED:
   1494 	case _PC_ACL_NFS4:
   1495 		*ap->a_retval = 0;
   1496 		return 0;
   1497 	default:
   1498 		return EINVAL;
   1499 	}
   1500 }
   1501