Home | History | Annotate | Line # | Download | only in union
union_vnops.c revision 1.56
      1 /*	$NetBSD: union_vnops.c,v 1.56 2014/02/16 09:50:25 hannken Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1992, 1993, 1994, 1995
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * This code is derived from software contributed to Berkeley by
      8  * Jan-Simon Pendry.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. Neither the name of the University nor the names of its contributors
     19  *    may be used to endorse or promote products derived from this software
     20  *    without specific prior written permission.
     21  *
     22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     32  * SUCH DAMAGE.
     33  *
     34  *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
     35  */
     36 
     37 /*
     38  * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
     39  *
     40  * This code is derived from software contributed to Berkeley by
     41  * Jan-Simon Pendry.
     42  *
     43  * Redistribution and use in source and binary forms, with or without
     44  * modification, are permitted provided that the following conditions
     45  * are met:
     46  * 1. Redistributions of source code must retain the above copyright
     47  *    notice, this list of conditions and the following disclaimer.
     48  * 2. Redistributions in binary form must reproduce the above copyright
     49  *    notice, this list of conditions and the following disclaimer in the
     50  *    documentation and/or other materials provided with the distribution.
     51  * 3. All advertising materials mentioning features or use of this software
     52  *    must display the following acknowledgement:
     53  *	This product includes software developed by the University of
     54  *	California, Berkeley and its contributors.
     55  * 4. Neither the name of the University nor the names of its contributors
     56  *    may be used to endorse or promote products derived from this software
     57  *    without specific prior written permission.
     58  *
     59  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     60  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     61  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     62  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     63  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     64  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     65  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     66  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     67  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     68  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     69  * SUCH DAMAGE.
     70  *
     71  *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
     72  */
     73 
     74 #include <sys/cdefs.h>
     75 __KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.56 2014/02/16 09:50:25 hannken Exp $");
     76 
     77 #include <sys/param.h>
     78 #include <sys/systm.h>
     79 #include <sys/proc.h>
     80 #include <sys/file.h>
     81 #include <sys/time.h>
     82 #include <sys/stat.h>
     83 #include <sys/vnode.h>
     84 #include <sys/mount.h>
     85 #include <sys/namei.h>
     86 #include <sys/malloc.h>
     87 #include <sys/buf.h>
     88 #include <sys/queue.h>
     89 #include <sys/lock.h>
     90 #include <sys/kauth.h>
     91 
     92 #include <fs/union/union.h>
     93 #include <miscfs/genfs/genfs.h>
     94 #include <miscfs/specfs/specdev.h>
     95 
     96 int union_lookup(void *);
     97 int union_create(void *);
     98 int union_whiteout(void *);
     99 int union_mknod(void *);
    100 int union_open(void *);
    101 int union_close(void *);
    102 int union_access(void *);
    103 int union_getattr(void *);
    104 int union_setattr(void *);
    105 int union_read(void *);
    106 int union_write(void *);
    107 int union_ioctl(void *);
    108 int union_poll(void *);
    109 int union_revoke(void *);
    110 int union_mmap(void *);
    111 int union_fsync(void *);
    112 int union_seek(void *);
    113 int union_remove(void *);
    114 int union_link(void *);
    115 int union_rename(void *);
    116 int union_mkdir(void *);
    117 int union_rmdir(void *);
    118 int union_symlink(void *);
    119 int union_readdir(void *);
    120 int union_readlink(void *);
    121 int union_abortop(void *);
    122 int union_inactive(void *);
    123 int union_reclaim(void *);
    124 int union_lock(void *);
    125 int union_unlock(void *);
    126 int union_bmap(void *);
    127 int union_print(void *);
    128 int union_islocked(void *);
    129 int union_pathconf(void *);
    130 int union_advlock(void *);
    131 int union_strategy(void *);
    132 int union_bwrite(void *);
    133 int union_getpages(void *);
    134 int union_putpages(void *);
    135 int union_kqfilter(void *);
    136 
    137 static int union_lookup1(struct vnode *, struct vnode **,
    138 			      struct vnode **, struct componentname *);
    139 
    140 
    141 /*
    142  * Global vfs data structures
    143  */
    144 int (**union_vnodeop_p)(void *);
    145 const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
    146 	{ &vop_default_desc, vn_default_error },
    147 	{ &vop_lookup_desc, union_lookup },		/* lookup */
    148 	{ &vop_create_desc, union_create },		/* create */
    149 	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
    150 	{ &vop_mknod_desc, union_mknod },		/* mknod */
    151 	{ &vop_open_desc, union_open },			/* open */
    152 	{ &vop_close_desc, union_close },		/* close */
    153 	{ &vop_access_desc, union_access },		/* access */
    154 	{ &vop_getattr_desc, union_getattr },		/* getattr */
    155 	{ &vop_setattr_desc, union_setattr },		/* setattr */
    156 	{ &vop_read_desc, union_read },			/* read */
    157 	{ &vop_write_desc, union_write },		/* write */
    158 	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
    159 	{ &vop_poll_desc, union_poll },			/* select */
    160 	{ &vop_revoke_desc, union_revoke },		/* revoke */
    161 	{ &vop_mmap_desc, union_mmap },			/* mmap */
    162 	{ &vop_fsync_desc, union_fsync },		/* fsync */
    163 	{ &vop_seek_desc, union_seek },			/* seek */
    164 	{ &vop_remove_desc, union_remove },		/* remove */
    165 	{ &vop_link_desc, union_link },			/* link */
    166 	{ &vop_rename_desc, union_rename },		/* rename */
    167 	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
    168 	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
    169 	{ &vop_symlink_desc, union_symlink },		/* symlink */
    170 	{ &vop_readdir_desc, union_readdir },		/* readdir */
    171 	{ &vop_readlink_desc, union_readlink },		/* readlink */
    172 	{ &vop_abortop_desc, union_abortop },		/* abortop */
    173 	{ &vop_inactive_desc, union_inactive },		/* inactive */
    174 	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
    175 	{ &vop_lock_desc, union_lock },			/* lock */
    176 	{ &vop_unlock_desc, union_unlock },		/* unlock */
    177 	{ &vop_bmap_desc, union_bmap },			/* bmap */
    178 	{ &vop_strategy_desc, union_strategy },		/* strategy */
    179 	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
    180 	{ &vop_print_desc, union_print },		/* print */
    181 	{ &vop_islocked_desc, union_islocked },		/* islocked */
    182 	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
    183 	{ &vop_advlock_desc, union_advlock },		/* advlock */
    184 	{ &vop_getpages_desc, union_getpages },		/* getpages */
    185 	{ &vop_putpages_desc, union_putpages },		/* putpages */
    186 	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
    187 	{ NULL, NULL }
    188 };
    189 const struct vnodeopv_desc union_vnodeop_opv_desc =
    190 	{ &union_vnodeop_p, union_vnodeop_entries };
    191 
    192 #define NODE_IS_SPECIAL(vp) \
    193 	((vp)->v_type == VBLK || (vp)->v_type == VCHR || \
    194 	(vp)->v_type == VSOCK || (vp)->v_type == VFIFO)
    195 
    196 static int
    197 union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
    198 	struct componentname *cnp)
    199 {
    200 	int error;
    201 	struct vnode *tdvp;
    202 	struct vnode *dvp;
    203 	struct mount *mp;
    204 
    205 	dvp = *dvpp;
    206 
    207 	/*
    208 	 * If stepping up the directory tree, check for going
    209 	 * back across the mount point, in which case do what
    210 	 * lookup would do by stepping back down the mount
    211 	 * hierarchy.
    212 	 */
    213 	if (cnp->cn_flags & ISDOTDOT) {
    214 		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
    215 			/*
    216 			 * Don't do the NOCROSSMOUNT check
    217 			 * at this level.  By definition,
    218 			 * union fs deals with namespaces, not
    219 			 * filesystems.
    220 			 */
    221 			tdvp = dvp;
    222 			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
    223 			VOP_UNLOCK(tdvp);
    224 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
    225 		}
    226 	}
    227 
    228         error = VOP_LOOKUP(dvp, &tdvp, cnp);
    229 	if (error)
    230 		return (error);
    231 	if (dvp != tdvp) {
    232 		if (cnp->cn_flags & ISDOTDOT)
    233 			VOP_UNLOCK(dvp);
    234 		error = vn_lock(tdvp, LK_EXCLUSIVE);
    235 		if (cnp->cn_flags & ISDOTDOT)
    236 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
    237 		if (error) {
    238 			vrele(tdvp);
    239 			return error;
    240 		}
    241 		dvp = tdvp;
    242 	}
    243 
    244 	/*
    245 	 * Lastly check if the current node is a mount point in
    246 	 * which case walk up the mount hierarchy making sure not to
    247 	 * bump into the root of the mount tree (ie. dvp != udvp).
    248 	 */
    249 	while (dvp != udvp && (dvp->v_type == VDIR) &&
    250 	       (mp = dvp->v_mountedhere)) {
    251 		if (vfs_busy(mp, NULL))
    252 			continue;
    253 		vput(dvp);
    254 		error = VFS_ROOT(mp, &tdvp);
    255 		vfs_unbusy(mp, false, NULL);
    256 		if (error) {
    257 			return (error);
    258 		}
    259 		dvp = tdvp;
    260 	}
    261 
    262 	*vpp = dvp;
    263 	return (0);
    264 }
    265 
    266 int
    267 union_lookup(void *v)
    268 {
    269 	struct vop_lookup_v2_args /* {
    270 		struct vnodeop_desc *a_desc;
    271 		struct vnode *a_dvp;
    272 		struct vnode **a_vpp;
    273 		struct componentname *a_cnp;
    274 	} */ *ap = v;
    275 	int error;
    276 	int uerror, lerror;
    277 	struct vnode *uppervp, *lowervp;
    278 	struct vnode *upperdvp, *lowerdvp;
    279 	struct vnode *dvp = ap->a_dvp;
    280 	struct union_node *dun = VTOUNION(dvp);
    281 	struct componentname *cnp = ap->a_cnp;
    282 	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
    283 	kauth_cred_t saved_cred = NULL;
    284 	int iswhiteout;
    285 	struct vattr va;
    286 
    287 #ifdef notyet
    288 	if (cnp->cn_namelen == 3 &&
    289 			cnp->cn_nameptr[2] == '.' &&
    290 			cnp->cn_nameptr[1] == '.' &&
    291 			cnp->cn_nameptr[0] == '.') {
    292 		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
    293 		if (dvp == NULLVP)
    294 			return (ENOENT);
    295 		vref(dvp);
    296 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
    297 		return (0);
    298 	}
    299 #endif
    300 
    301 	if ((cnp->cn_flags & ISLASTCN) &&
    302 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
    303 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
    304 		return (EROFS);
    305 
    306 start:
    307 	upperdvp = dun->un_uppervp;
    308 	lowerdvp = dun->un_lowervp;
    309 	uppervp = NULLVP;
    310 	lowervp = NULLVP;
    311 	iswhiteout = 0;
    312 
    313 	/*
    314 	 * do the lookup in the upper level.
    315 	 * if that level comsumes additional pathnames,
    316 	 * then assume that something special is going
    317 	 * on and just return that vnode.
    318 	 */
    319 	if (upperdvp != NULLVP) {
    320 		uerror = union_lookup1(um->um_uppervp, &upperdvp,
    321 					&uppervp, cnp);
    322 		if (cnp->cn_consume != 0) {
    323 			if (uppervp != upperdvp)
    324 				VOP_UNLOCK(uppervp);
    325 			*ap->a_vpp = uppervp;
    326 			return (uerror);
    327 		}
    328 		if (uerror == ENOENT || uerror == EJUSTRETURN) {
    329 			if (cnp->cn_flags & ISWHITEOUT) {
    330 				iswhiteout = 1;
    331 			} else if (lowerdvp != NULLVP) {
    332 				lerror = VOP_GETATTR(upperdvp, &va,
    333 					cnp->cn_cred);
    334 				if (lerror == 0 && (va.va_flags & OPAQUE))
    335 					iswhiteout = 1;
    336 			}
    337 		}
    338 	} else {
    339 		uerror = ENOENT;
    340 	}
    341 
    342 	/*
    343 	 * in a similar way to the upper layer, do the lookup
    344 	 * in the lower layer.   this time, if there is some
    345 	 * component magic going on, then vput whatever we got
    346 	 * back from the upper layer and return the lower vnode
    347 	 * instead.
    348 	 */
    349 	if (lowerdvp != NULLVP && !iswhiteout) {
    350 		int nameiop;
    351 
    352 		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
    353 
    354 		/*
    355 		 * Only do a LOOKUP on the bottom node, since
    356 		 * we won't be making changes to it anyway.
    357 		 */
    358 		nameiop = cnp->cn_nameiop;
    359 		cnp->cn_nameiop = LOOKUP;
    360 		if (um->um_op == UNMNT_BELOW) {
    361 			saved_cred = cnp->cn_cred;
    362 			cnp->cn_cred = um->um_cred;
    363 		}
    364 
    365 		/*
    366 		 * we shouldn't have to worry about locking interactions
    367 		 * between the lower layer and our union layer (w.r.t.
    368 		 * `..' processing) because we don't futz with lowervp
    369 		 * locks in the union-node instantiation code path.
    370 		 */
    371 		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
    372 				&lowervp, cnp);
    373 		if (um->um_op == UNMNT_BELOW)
    374 			cnp->cn_cred = saved_cred;
    375 		cnp->cn_nameiop = nameiop;
    376 
    377 		if (lowervp != lowerdvp)
    378 			VOP_UNLOCK(lowerdvp);
    379 
    380 		if (cnp->cn_consume != 0) {
    381 			if (uppervp != NULLVP) {
    382 				if (uppervp == upperdvp)
    383 					vrele(uppervp);
    384 				else
    385 					vput(uppervp);
    386 				uppervp = NULLVP;
    387 			}
    388 			*ap->a_vpp = lowervp;
    389 			return (lerror);
    390 		}
    391 	} else {
    392 		lerror = ENOENT;
    393 		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
    394 			lowervp = LOWERVP(dun->un_pvp);
    395 			if (lowervp != NULLVP) {
    396 				vref(lowervp);
    397 				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
    398 				lerror = 0;
    399 			}
    400 		}
    401 	}
    402 
    403 	/*
    404 	 * EJUSTRETURN is used by underlying filesystems to indicate that
    405 	 * a directory modification op was started successfully.
    406 	 * This will only happen in the upper layer, since
    407 	 * the lower layer only does LOOKUPs.
    408 	 * If this union is mounted read-only, bounce it now.
    409 	 */
    410 
    411 	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
    412 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
    413 	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
    414 		uerror = EROFS;
    415 
    416 	/*
    417 	 * at this point, we have uerror and lerror indicating
    418 	 * possible errors with the lookups in the upper and lower
    419 	 * layers.  additionally, uppervp and lowervp are (locked)
    420 	 * references to existing vnodes in the upper and lower layers.
    421 	 *
    422 	 * there are now three cases to consider.
    423 	 * 1. if both layers returned an error, then return whatever
    424 	 *    error the upper layer generated.
    425 	 *
    426 	 * 2. if the top layer failed and the bottom layer succeeded
    427 	 *    then two subcases occur.
    428 	 *    a.  the bottom vnode is not a directory, in which
    429 	 *	  case just return a new union vnode referencing
    430 	 *	  an empty top layer and the existing bottom layer.
    431 	 *    b.  the bottom vnode is a directory, in which case
    432 	 *	  create a new directory in the top-level and
    433 	 *	  continue as in case 3.
    434 	 *
    435 	 * 3. if the top layer succeeded then return a new union
    436 	 *    vnode referencing whatever the new top layer and
    437 	 *    whatever the bottom layer returned.
    438 	 */
    439 
    440 	*ap->a_vpp = NULLVP;
    441 
    442 
    443 	/* case 1. */
    444 	if ((uerror != 0) && (lerror != 0)) {
    445 		return (uerror);
    446 	}
    447 
    448 	/* case 2. */
    449 	if (uerror != 0 /* && (lerror == 0) */ ) {
    450 		if (lowervp->v_type == VDIR) { /* case 2b. */
    451 			/*
    452 			 * We may be racing another process to make the
    453 			 * upper-level shadow directory.  Be careful with
    454 			 * locks/etc!
    455 			 * If we have to create a shadow directory and want
    456 			 * to commit the node we have to restart the lookup
    457 			 * to get the componentname right.
    458 			 */
    459 			if (upperdvp) {
    460 				VOP_UNLOCK(upperdvp);
    461 				uerror = union_mkshadow(um, upperdvp, cnp,
    462 				    &uppervp);
    463 				vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
    464 				if (uerror == 0 && cnp->cn_nameiop != LOOKUP) {
    465 					vrele(uppervp);
    466 					if (lowervp != NULLVP)
    467 						vput(lowervp);
    468 					goto start;
    469 				}
    470 			}
    471 			if (uerror) {
    472 				if (lowervp != NULLVP) {
    473 					vput(lowervp);
    474 					lowervp = NULLVP;
    475 				}
    476 				return (uerror);
    477 			}
    478 		}
    479 	} else { /* uerror == 0 */
    480 		if (uppervp != upperdvp)
    481 			VOP_UNLOCK(uppervp);
    482 	}
    483 
    484 	if (lowervp != NULLVP)
    485 		VOP_UNLOCK(lowervp);
    486 
    487 	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
    488 			      uppervp, lowervp, 1);
    489 
    490 	if (error) {
    491 		if (uppervp != NULLVP)
    492 			vrele(uppervp);
    493 		if (lowervp != NULLVP)
    494 			vrele(lowervp);
    495 		return error;
    496 	}
    497 
    498 	return 0;
    499 }
    500 
    501 int
    502 union_create(void *v)
    503 {
    504 	struct vop_create_v3_args /* {
    505 		struct vnode *a_dvp;
    506 		struct vnode **a_vpp;
    507 		struct componentname *a_cnp;
    508 		struct vattr *a_vap;
    509 	} */ *ap = v;
    510 	struct union_node *un = VTOUNION(ap->a_dvp);
    511 	struct vnode *dvp = un->un_uppervp;
    512 	struct componentname *cnp = ap->a_cnp;
    513 
    514 	if (dvp != NULLVP) {
    515 		int error;
    516 		struct vnode *vp;
    517 		struct mount *mp;
    518 
    519 		mp = ap->a_dvp->v_mount;
    520 		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
    521 		if (error)
    522 			return (error);
    523 
    524 		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
    525 				NULLVP, 1);
    526 		if (error)
    527 			vrele(vp);
    528 		return (error);
    529 	}
    530 
    531 	return (EROFS);
    532 }
    533 
    534 int
    535 union_whiteout(void *v)
    536 {
    537 	struct vop_whiteout_args /* {
    538 		struct vnode *a_dvp;
    539 		struct componentname *a_cnp;
    540 		int a_flags;
    541 	} */ *ap = v;
    542 	struct union_node *un = VTOUNION(ap->a_dvp);
    543 	struct componentname *cnp = ap->a_cnp;
    544 
    545 	if (un->un_uppervp == NULLVP)
    546 		return (EOPNOTSUPP);
    547 
    548 	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
    549 }
    550 
    551 int
    552 union_mknod(void *v)
    553 {
    554 	struct vop_mknod_v3_args /* {
    555 		struct vnode *a_dvp;
    556 		struct vnode **a_vpp;
    557 		struct componentname *a_cnp;
    558 		struct vattr *a_vap;
    559 	} */ *ap = v;
    560 	struct union_node *un = VTOUNION(ap->a_dvp);
    561 	struct vnode *dvp = un->un_uppervp;
    562 	struct componentname *cnp = ap->a_cnp;
    563 
    564 	if (dvp != NULLVP) {
    565 		int error;
    566 		struct vnode *vp;
    567 		struct mount *mp;
    568 
    569 		mp = ap->a_dvp->v_mount;
    570 		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
    571 		if (error)
    572 			return (error);
    573 
    574 		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
    575 				      cnp, vp, NULLVP, 1);
    576 		if (error)
    577 			vrele(vp);
    578 		return (error);
    579 	}
    580 
    581 	return (EROFS);
    582 }
    583 
    584 int
    585 union_open(void *v)
    586 {
    587 	struct vop_open_args /* {
    588 		struct vnodeop_desc *a_desc;
    589 		struct vnode *a_vp;
    590 		int a_mode;
    591 		kauth_cred_t a_cred;
    592 	} */ *ap = v;
    593 	struct union_node *un = VTOUNION(ap->a_vp);
    594 	struct vnode *tvp;
    595 	int mode = ap->a_mode;
    596 	kauth_cred_t cred = ap->a_cred;
    597 	struct lwp *l = curlwp;
    598 	int error;
    599 
    600 	/*
    601 	 * If there is an existing upper vp then simply open that.
    602 	 */
    603 	tvp = un->un_uppervp;
    604 	if (tvp == NULLVP) {
    605 		/*
    606 		 * If the lower vnode is being opened for writing, then
    607 		 * copy the file contents to the upper vnode and open that,
    608 		 * otherwise can simply open the lower vnode.
    609 		 */
    610 		tvp = un->un_lowervp;
    611 		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
    612 			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
    613 			if (error == 0)
    614 				error = VOP_OPEN(un->un_uppervp, mode, cred);
    615 			return (error);
    616 		}
    617 
    618 		/*
    619 		 * Just open the lower vnode, but check for nodev mount flag
    620 		 */
    621 		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
    622 		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
    623 			return ENXIO;
    624 		un->un_openl++;
    625 		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
    626 		error = VOP_OPEN(tvp, mode, cred);
    627 		VOP_UNLOCK(tvp);
    628 
    629 		return (error);
    630 	}
    631 	/*
    632 	 * Just open the upper vnode, checking for nodev mount flag first
    633 	 */
    634 	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
    635 	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
    636 		return ENXIO;
    637 
    638 	error = VOP_OPEN(tvp, mode, cred);
    639 
    640 	return (error);
    641 }
    642 
    643 int
    644 union_close(void *v)
    645 {
    646 	struct vop_close_args /* {
    647 		struct vnode *a_vp;
    648 		int  a_fflag;
    649 		kauth_cred_t a_cred;
    650 	} */ *ap = v;
    651 	struct union_node *un = VTOUNION(ap->a_vp);
    652 	struct vnode *vp;
    653 	int error;
    654 	bool do_lock;
    655 
    656 	vp = un->un_uppervp;
    657 	if (vp != NULLVP) {
    658 		do_lock = false;
    659 	} else {
    660 		KASSERT(un->un_openl > 0);
    661 		--un->un_openl;
    662 		vp = un->un_lowervp;
    663 		do_lock = true;
    664 	}
    665 
    666 	KASSERT(vp != NULLVP);
    667 	ap->a_vp = vp;
    668 	if (do_lock)
    669 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    670 	error = VCALL(vp, VOFFSET(vop_close), ap);
    671 	if (do_lock)
    672 		VOP_UNLOCK(vp);
    673 
    674 	return error;
    675 }
    676 
    677 /*
    678  * Check access permission on the union vnode.
    679  * The access check being enforced is to check
    680  * against both the underlying vnode, and any
    681  * copied vnode.  This ensures that no additional
    682  * file permissions are given away simply because
    683  * the user caused an implicit file copy.
    684  */
    685 int
    686 union_access(void *v)
    687 {
    688 	struct vop_access_args /* {
    689 		struct vnodeop_desc *a_desc;
    690 		struct vnode *a_vp;
    691 		int a_mode;
    692 		kauth_cred_t a_cred;
    693 	} */ *ap = v;
    694 	struct vnode *vp = ap->a_vp;
    695 	struct union_node *un = VTOUNION(vp);
    696 	int error = EACCES;
    697 	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
    698 
    699 	/*
    700 	 * Disallow write attempts on read-only file systems;
    701 	 * unless the file is a socket, fifo, or a block or
    702 	 * character device resident on the file system.
    703 	 */
    704 	if (ap->a_mode & VWRITE) {
    705 		switch (vp->v_type) {
    706 		case VDIR:
    707 		case VLNK:
    708 		case VREG:
    709 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
    710 				return (EROFS);
    711 			break;
    712 		case VBAD:
    713 		case VBLK:
    714 		case VCHR:
    715 		case VSOCK:
    716 		case VFIFO:
    717 		case VNON:
    718 		default:
    719 			break;
    720 		}
    721 	}
    722 
    723 
    724 	if ((vp = un->un_uppervp) != NULLVP) {
    725 		ap->a_vp = vp;
    726 		return (VCALL(vp, VOFFSET(vop_access), ap));
    727 	}
    728 
    729 	if ((vp = un->un_lowervp) != NULLVP) {
    730 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    731 		ap->a_vp = vp;
    732 		error = VCALL(vp, VOFFSET(vop_access), ap);
    733 		if (error == 0) {
    734 			if (um->um_op == UNMNT_BELOW) {
    735 				ap->a_cred = um->um_cred;
    736 				error = VCALL(vp, VOFFSET(vop_access), ap);
    737 			}
    738 		}
    739 		VOP_UNLOCK(vp);
    740 		if (error)
    741 			return (error);
    742 	}
    743 
    744 	return (error);
    745 }
    746 
    747 /*
    748  * We handle getattr only to change the fsid and
    749  * track object sizes
    750  */
    751 int
    752 union_getattr(void *v)
    753 {
    754 	struct vop_getattr_args /* {
    755 		struct vnode *a_vp;
    756 		struct vattr *a_vap;
    757 		kauth_cred_t a_cred;
    758 	} */ *ap = v;
    759 	int error;
    760 	struct union_node *un = VTOUNION(ap->a_vp);
    761 	struct vnode *vp = un->un_uppervp;
    762 	struct vattr *vap;
    763 	struct vattr va;
    764 
    765 
    766 	/*
    767 	 * Some programs walk the filesystem hierarchy by counting
    768 	 * links to directories to avoid stat'ing all the time.
    769 	 * This means the link count on directories needs to be "correct".
    770 	 * The only way to do that is to call getattr on both layers
    771 	 * and fix up the link count.  The link count will not necessarily
    772 	 * be accurate but will be large enough to defeat the tree walkers.
    773 	 *
    774 	 * To make life more interesting, some filesystems don't keep
    775 	 * track of link counts in the expected way, and return a
    776 	 * link count of `1' for those directories; if either of the
    777 	 * component directories returns a link count of `1', we return a 1.
    778 	 */
    779 
    780 	vap = ap->a_vap;
    781 
    782 	vp = un->un_uppervp;
    783 	if (vp != NULLVP) {
    784 		error = VOP_GETATTR(vp, vap, ap->a_cred);
    785 		if (error)
    786 			return (error);
    787 		mutex_enter(&un->un_lock);
    788 		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
    789 	}
    790 
    791 	if (vp == NULLVP) {
    792 		vp = un->un_lowervp;
    793 	} else if (vp->v_type == VDIR) {
    794 		vp = un->un_lowervp;
    795 		if (vp != NULLVP)
    796 			vap = &va;
    797 	} else {
    798 		vp = NULLVP;
    799 	}
    800 
    801 	if (vp != NULLVP) {
    802 		if (vp == un->un_lowervp)
    803 			vn_lock(vp, LK_SHARED | LK_RETRY);
    804 		error = VOP_GETATTR(vp, vap, ap->a_cred);
    805 		if (vp == un->un_lowervp)
    806 			VOP_UNLOCK(vp);
    807 		if (error)
    808 			return (error);
    809 		mutex_enter(&un->un_lock);
    810 		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
    811 	}
    812 
    813 	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
    814 		/*
    815 		 * Link count manipulation:
    816 		 *	- If both return "2", return 2 (no subdirs)
    817 		 *	- If one or the other return "1", return "1" (ENOCLUE)
    818 		 */
    819 		if ((ap->a_vap->va_nlink == 2) &&
    820 		    (vap->va_nlink == 2))
    821 			;
    822 		else if (ap->a_vap->va_nlink != 1) {
    823 			if (vap->va_nlink == 1)
    824 				ap->a_vap->va_nlink = 1;
    825 			else
    826 				ap->a_vap->va_nlink += vap->va_nlink;
    827 		}
    828 	}
    829 	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
    830 	return (0);
    831 }
    832 
    833 int
    834 union_setattr(void *v)
    835 {
    836 	struct vop_setattr_args /* {
    837 		struct vnode *a_vp;
    838 		struct vattr *a_vap;
    839 		kauth_cred_t a_cred;
    840 	} */ *ap = v;
    841 	struct vattr *vap = ap->a_vap;
    842 	struct vnode *vp = ap->a_vp;
    843 	struct union_node *un = VTOUNION(vp);
    844 	bool size_only;		/* All but va_size are VNOVAL. */
    845 	int error;
    846 
    847 	size_only = (vap->va_flags == VNOVAL && vap->va_uid == (uid_t)VNOVAL &&
    848 	    vap->va_gid == (gid_t)VNOVAL && vap->va_atime.tv_sec == VNOVAL &&
    849 	    vap->va_mtime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL);
    850 
    851 	if (!size_only && (vp->v_mount->mnt_flag & MNT_RDONLY))
    852 		return (EROFS);
    853 	if (vap->va_size != VNOVAL) {
    854  		switch (vp->v_type) {
    855  		case VDIR:
    856  			return (EISDIR);
    857  		case VCHR:
    858  		case VBLK:
    859  		case VSOCK:
    860  		case VFIFO:
    861 			break;
    862 		case VREG:
    863 		case VLNK:
    864  		default:
    865 			/*
    866 			 * Disallow write attempts if the filesystem is
    867 			 * mounted read-only.
    868 			 */
    869 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
    870 				return (EROFS);
    871 		}
    872 	}
    873 
    874 	/*
    875 	 * Handle case of truncating lower object to zero size,
    876 	 * by creating a zero length upper object.  This is to
    877 	 * handle the case of open with O_TRUNC and O_CREAT.
    878 	 */
    879 	if ((un->un_uppervp == NULLVP) &&
    880 	    /* assert(un->un_lowervp != NULLVP) */
    881 	    (un->un_lowervp->v_type == VREG)) {
    882 		error = union_copyup(un, (vap->va_size != 0),
    883 						ap->a_cred, curlwp);
    884 		if (error)
    885 			return (error);
    886 	}
    887 
    888 	/*
    889 	 * Try to set attributes in upper layer, ignore size change to zero
    890 	 * for devices to handle O_TRUNC and return read-only filesystem error
    891 	 * otherwise.
    892 	 */
    893 	if (un->un_uppervp != NULLVP) {
    894 		error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
    895 		if ((error == 0) && (vap->va_size != VNOVAL)) {
    896 			mutex_enter(&un->un_lock);
    897 			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
    898 		}
    899 	} else {
    900 		KASSERT(un->un_lowervp != NULLVP);
    901 		if (NODE_IS_SPECIAL(un->un_lowervp)) {
    902 			if (size_only &&
    903 			    (vap->va_size == 0 || vap->va_size == VNOVAL))
    904 				error = 0;
    905 			else
    906 				error = EROFS;
    907 		} else {
    908 			error = EROFS;
    909 		}
    910 	}
    911 
    912 	return (error);
    913 }
    914 
    915 int
    916 union_read(void *v)
    917 {
    918 	struct vop_read_args /* {
    919 		struct vnode *a_vp;
    920 		struct uio *a_uio;
    921 		int  a_ioflag;
    922 		kauth_cred_t a_cred;
    923 	} */ *ap = v;
    924 	int error;
    925 	struct vnode *vp = OTHERVP(ap->a_vp);
    926 	int dolock = (vp == LOWERVP(ap->a_vp));
    927 
    928 	if (dolock)
    929 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    930 	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
    931 	if (dolock)
    932 		VOP_UNLOCK(vp);
    933 
    934 	/*
    935 	 * XXX
    936 	 * perhaps the size of the underlying object has changed under
    937 	 * our feet.  take advantage of the offset information present
    938 	 * in the uio structure.
    939 	 */
    940 	if (error == 0) {
    941 		struct union_node *un = VTOUNION(ap->a_vp);
    942 		off_t cur = ap->a_uio->uio_offset;
    943 		off_t usz = VNOVAL, lsz = VNOVAL;
    944 
    945 		mutex_enter(&un->un_lock);
    946 		if (vp == un->un_uppervp) {
    947 			if (cur > un->un_uppersz)
    948 				usz = cur;
    949 		} else {
    950 			if (cur > un->un_lowersz)
    951 				lsz = cur;
    952 		}
    953 
    954 		if (usz != VNOVAL || lsz != VNOVAL)
    955 			union_newsize(ap->a_vp, usz, lsz);
    956 		else
    957 			mutex_exit(&un->un_lock);
    958 	}
    959 
    960 	return (error);
    961 }
    962 
    963 int
    964 union_write(void *v)
    965 {
    966 	struct vop_read_args /* {
    967 		struct vnode *a_vp;
    968 		struct uio *a_uio;
    969 		int  a_ioflag;
    970 		kauth_cred_t a_cred;
    971 	} */ *ap = v;
    972 	int error;
    973 	struct vnode *vp;
    974 	struct union_node *un = VTOUNION(ap->a_vp);
    975 
    976 	vp = UPPERVP(ap->a_vp);
    977 	if (vp == NULLVP) {
    978 		vp = LOWERVP(ap->a_vp);
    979 		if (NODE_IS_SPECIAL(vp)) {
    980 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    981 			error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag,
    982 			    ap->a_cred);
    983 			VOP_UNLOCK(vp);
    984 			return error;
    985 		}
    986 		panic("union: missing upper layer in write");
    987 	}
    988 
    989 	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
    990 
    991 	/*
    992 	 * the size of the underlying object may be changed by the
    993 	 * write.
    994 	 */
    995 	if (error == 0) {
    996 		off_t cur = ap->a_uio->uio_offset;
    997 
    998 		mutex_enter(&un->un_lock);
    999 		if (cur > un->un_uppersz)
   1000 			union_newsize(ap->a_vp, cur, VNOVAL);
   1001 		else
   1002 			mutex_exit(&un->un_lock);
   1003 	}
   1004 
   1005 	return (error);
   1006 }
   1007 
   1008 int
   1009 union_ioctl(void *v)
   1010 {
   1011 	struct vop_ioctl_args /* {
   1012 		struct vnode *a_vp;
   1013 		int  a_command;
   1014 		void *a_data;
   1015 		int  a_fflag;
   1016 		kauth_cred_t a_cred;
   1017 	} */ *ap = v;
   1018 	struct vnode *ovp = OTHERVP(ap->a_vp);
   1019 
   1020 	ap->a_vp = ovp;
   1021 	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
   1022 }
   1023 
   1024 int
   1025 union_poll(void *v)
   1026 {
   1027 	struct vop_poll_args /* {
   1028 		struct vnode *a_vp;
   1029 		int a_events;
   1030 	} */ *ap = v;
   1031 	struct vnode *ovp = OTHERVP(ap->a_vp);
   1032 
   1033 	ap->a_vp = ovp;
   1034 	return (VCALL(ovp, VOFFSET(vop_poll), ap));
   1035 }
   1036 
   1037 int
   1038 union_revoke(void *v)
   1039 {
   1040 	struct vop_revoke_args /* {
   1041 		struct vnode *a_vp;
   1042 		int a_flags;
   1043 		struct proc *a_p;
   1044 	} */ *ap = v;
   1045 	struct vnode *vp = ap->a_vp;
   1046 
   1047 	if (UPPERVP(vp))
   1048 		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
   1049 	if (LOWERVP(vp))
   1050 		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
   1051 	vgone(vp);	/* XXXAD?? */
   1052 	return (0);
   1053 }
   1054 
   1055 int
   1056 union_mmap(void *v)
   1057 {
   1058 	struct vop_mmap_args /* {
   1059 		struct vnode *a_vp;
   1060 		vm_prot_t a_prot;
   1061 		kauth_cred_t a_cred;
   1062 	} */ *ap = v;
   1063 	struct vnode *ovp = OTHERVP(ap->a_vp);
   1064 
   1065 	ap->a_vp = ovp;
   1066 	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
   1067 }
   1068 
   1069 int
   1070 union_fsync(void *v)
   1071 {
   1072 	struct vop_fsync_args /* {
   1073 		struct vnode *a_vp;
   1074 		kauth_cred_t a_cred;
   1075 		int  a_flags;
   1076 		off_t offhi;
   1077 		off_t offlo;
   1078 	} */ *ap = v;
   1079 	int error = 0;
   1080 	struct vnode *targetvp;
   1081 
   1082 	/*
   1083 	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
   1084 	 * bother syncing the underlying vnodes, since (a) they'll be
   1085 	 * fsync'ed when reclaimed and (b) we could deadlock if
   1086 	 * they're locked; otherwise, pass it through to the
   1087 	 * underlying layer.
   1088 	 */
   1089 	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
   1090 		error = spec_fsync(v);
   1091 		if (error)
   1092 			return error;
   1093 	}
   1094 
   1095 	if (ap->a_flags & FSYNC_RECLAIM)
   1096 		return 0;
   1097 
   1098 	targetvp = OTHERVP(ap->a_vp);
   1099 	if (targetvp != NULLVP) {
   1100 		int dolock = (targetvp == LOWERVP(ap->a_vp));
   1101 
   1102 		if (dolock)
   1103 			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
   1104 		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
   1105 			    ap->a_offlo, ap->a_offhi);
   1106 		if (dolock)
   1107 			VOP_UNLOCK(targetvp);
   1108 	}
   1109 
   1110 	return (error);
   1111 }
   1112 
   1113 int
   1114 union_seek(void *v)
   1115 {
   1116 	struct vop_seek_args /* {
   1117 		struct vnode *a_vp;
   1118 		off_t  a_oldoff;
   1119 		off_t  a_newoff;
   1120 		kauth_cred_t a_cred;
   1121 	} */ *ap = v;
   1122 	struct vnode *ovp = OTHERVP(ap->a_vp);
   1123 
   1124 	ap->a_vp = ovp;
   1125 	return (VCALL(ovp, VOFFSET(vop_seek), ap));
   1126 }
   1127 
   1128 int
   1129 union_remove(void *v)
   1130 {
   1131 	struct vop_remove_args /* {
   1132 		struct vnode *a_dvp;
   1133 		struct vnode *a_vp;
   1134 		struct componentname *a_cnp;
   1135 	} */ *ap = v;
   1136 	int error;
   1137 	struct union_node *dun = VTOUNION(ap->a_dvp);
   1138 	struct union_node *un = VTOUNION(ap->a_vp);
   1139 	struct componentname *cnp = ap->a_cnp;
   1140 
   1141 	if (dun->un_uppervp == NULLVP)
   1142 		panic("union remove: null upper vnode");
   1143 
   1144 	if (un->un_uppervp != NULLVP) {
   1145 		struct vnode *dvp = dun->un_uppervp;
   1146 		struct vnode *vp = un->un_uppervp;
   1147 
   1148 		/*
   1149 		 * Account for VOP_REMOVE to vrele dvp and vp.
   1150 		 * Note: VOP_REMOVE will unlock dvp and vp.
   1151 		 */
   1152 		vref(dvp);
   1153 		vref(vp);
   1154 		if (union_dowhiteout(un, cnp->cn_cred))
   1155 			cnp->cn_flags |= DOWHITEOUT;
   1156 		error = VOP_REMOVE(dvp, vp, cnp);
   1157 		if (!error)
   1158 			union_removed_upper(un);
   1159 		vrele(ap->a_dvp);
   1160 		vrele(ap->a_vp);
   1161 	} else {
   1162 		error = union_mkwhiteout(
   1163 			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
   1164 			dun->un_uppervp, ap->a_cnp, un);
   1165 		vput(ap->a_dvp);
   1166 		vput(ap->a_vp);
   1167 	}
   1168 
   1169 	return (error);
   1170 }
   1171 
   1172 int
   1173 union_link(void *v)
   1174 {
   1175 	struct vop_link_args /* {
   1176 		struct vnode *a_dvp;
   1177 		struct vnode *a_vp;
   1178 		struct componentname *a_cnp;
   1179 	} */ *ap = v;
   1180 	int error = 0;
   1181 	struct componentname *cnp = ap->a_cnp;
   1182 	struct union_node *dun;
   1183 	struct vnode *vp;
   1184 	struct vnode *dvp;
   1185 
   1186 	dun = VTOUNION(ap->a_dvp);
   1187 
   1188 	KASSERT((ap->a_cnp->cn_flags & LOCKPARENT) != 0);
   1189 
   1190 	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
   1191 		vp = ap->a_vp;
   1192 	} else {
   1193 		struct union_node *un = VTOUNION(ap->a_vp);
   1194 		if (un->un_uppervp == NULLVP) {
   1195 			const bool droplock = (dun->un_uppervp == un->un_dirvp);
   1196 
   1197 			/*
   1198 			 * Needs to be copied before we can link it.
   1199 			 */
   1200 			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
   1201 			if (droplock)
   1202 				VOP_UNLOCK(dun->un_uppervp);
   1203 			error = union_copyup(un, 1, cnp->cn_cred, curlwp);
   1204 			if (droplock) {
   1205 				vn_lock(dun->un_uppervp,
   1206 				    LK_EXCLUSIVE | LK_RETRY);
   1207 				/*
   1208 				 * During copyup, we dropped the lock on the
   1209 				 * dir and invalidated any saved namei lookup
   1210 				 * state for the directory we'll be entering
   1211 				 * the link in.  We need to re-run the lookup
   1212 				 * in that directory to reset any state needed
   1213 				 * for VOP_LINK.
   1214 				 * Call relookup on the union-layer to reset
   1215 				 * the state.
   1216 				 */
   1217 				vp  = NULLVP;
   1218 				if (dun->un_uppervp == NULLVP)
   1219 					 panic("union: null upperdvp?");
   1220 				error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
   1221 				if (error) {
   1222 					VOP_UNLOCK(ap->a_vp);
   1223 					return EROFS;	/* ? */
   1224 				}
   1225 				if (vp != NULLVP) {
   1226 					/*
   1227 					 * The name we want to create has
   1228 					 * mysteriously appeared (a race?)
   1229 					 */
   1230 					error = EEXIST;
   1231 					VOP_UNLOCK(ap->a_vp);
   1232 					vput(ap->a_dvp);
   1233 					vput(vp);
   1234 					return (error);
   1235 				}
   1236 			}
   1237 			VOP_UNLOCK(ap->a_vp);
   1238 		}
   1239 		vp = un->un_uppervp;
   1240 	}
   1241 
   1242 	dvp = dun->un_uppervp;
   1243 	if (dvp == NULLVP)
   1244 		error = EROFS;
   1245 
   1246 	if (error) {
   1247 		vput(ap->a_dvp);
   1248 		return (error);
   1249 	}
   1250 
   1251 	/*
   1252 	 * Account for VOP_LINK to vrele dvp.
   1253 	 * Note: VOP_LINK will unlock dvp.
   1254 	 */
   1255 	vref(dvp);
   1256 	error = VOP_LINK(dvp, vp, cnp);
   1257 	vrele(ap->a_dvp);
   1258 
   1259 	return error;
   1260 }
   1261 
   1262 int
   1263 union_rename(void *v)
   1264 {
   1265 	struct vop_rename_args  /* {
   1266 		struct vnode *a_fdvp;
   1267 		struct vnode *a_fvp;
   1268 		struct componentname *a_fcnp;
   1269 		struct vnode *a_tdvp;
   1270 		struct vnode *a_tvp;
   1271 		struct componentname *a_tcnp;
   1272 	} */ *ap = v;
   1273 	int error;
   1274 
   1275 	struct vnode *fdvp = ap->a_fdvp;
   1276 	struct vnode *fvp = ap->a_fvp;
   1277 	struct vnode *tdvp = ap->a_tdvp;
   1278 	struct vnode *tvp = ap->a_tvp;
   1279 
   1280 	/*
   1281 	 * Account for VOP_RENAME to vrele all nodes.
   1282 	 * Note: VOP_RENAME will unlock tdvp.
   1283 	 */
   1284 
   1285 	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
   1286 		struct union_node *un = VTOUNION(fdvp);
   1287 		if (un->un_uppervp == NULLVP) {
   1288 			/*
   1289 			 * this should never happen in normal
   1290 			 * operation but might if there was
   1291 			 * a problem creating the top-level shadow
   1292 			 * directory.
   1293 			 */
   1294 			error = EXDEV;
   1295 			goto bad;
   1296 		}
   1297 
   1298 		fdvp = un->un_uppervp;
   1299 		vref(fdvp);
   1300 	}
   1301 
   1302 	if (fvp->v_op == union_vnodeop_p) {	/* always true */
   1303 		struct union_node *un = VTOUNION(fvp);
   1304 		if (un->un_uppervp == NULLVP) {
   1305 			/* XXX: should do a copyup */
   1306 			error = EXDEV;
   1307 			goto bad;
   1308 		}
   1309 
   1310 		if (un->un_lowervp != NULLVP)
   1311 			ap->a_fcnp->cn_flags |= DOWHITEOUT;
   1312 
   1313 		fvp = un->un_uppervp;
   1314 		vref(fvp);
   1315 	}
   1316 
   1317 	if (tdvp->v_op == union_vnodeop_p) {
   1318 		struct union_node *un = VTOUNION(tdvp);
   1319 		if (un->un_uppervp == NULLVP) {
   1320 			/*
   1321 			 * this should never happen in normal
   1322 			 * operation but might if there was
   1323 			 * a problem creating the top-level shadow
   1324 			 * directory.
   1325 			 */
   1326 			error = EXDEV;
   1327 			goto bad;
   1328 		}
   1329 
   1330 		tdvp = un->un_uppervp;
   1331 		vref(tdvp);
   1332 	}
   1333 
   1334 	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
   1335 		struct union_node *un = VTOUNION(tvp);
   1336 
   1337 		tvp = un->un_uppervp;
   1338 		if (tvp != NULLVP) {
   1339 			vref(tvp);
   1340 		}
   1341 	}
   1342 
   1343 	error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
   1344 	goto out;
   1345 
   1346 bad:
   1347 	vput(tdvp);
   1348 	if (tvp != NULLVP)
   1349 		vput(tvp);
   1350 	vrele(fdvp);
   1351 	vrele(fvp);
   1352 
   1353 out:
   1354 	if (fdvp != ap->a_fdvp) {
   1355 		vrele(ap->a_fdvp);
   1356 	}
   1357 	if (fvp != ap->a_fvp) {
   1358 		vrele(ap->a_fvp);
   1359 	}
   1360 	if (tdvp != ap->a_tdvp) {
   1361 		vrele(ap->a_tdvp);
   1362 	}
   1363 	if (tvp != ap->a_tvp) {
   1364 		vrele(ap->a_tvp);
   1365 	}
   1366 	return (error);
   1367 }
   1368 
   1369 int
   1370 union_mkdir(void *v)
   1371 {
   1372 	struct vop_mkdir_v3_args /* {
   1373 		struct vnode *a_dvp;
   1374 		struct vnode **a_vpp;
   1375 		struct componentname *a_cnp;
   1376 		struct vattr *a_vap;
   1377 	} */ *ap = v;
   1378 	struct union_node *un = VTOUNION(ap->a_dvp);
   1379 	struct vnode *dvp = un->un_uppervp;
   1380 	struct componentname *cnp = ap->a_cnp;
   1381 
   1382 	if (dvp != NULLVP) {
   1383 		int error;
   1384 		struct vnode *vp;
   1385 
   1386 		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
   1387 		if (error) {
   1388 			vrele(ap->a_dvp);
   1389 			return (error);
   1390 		}
   1391 
   1392 		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
   1393 				NULLVP, cnp, vp, NULLVP, 1);
   1394 		if (error)
   1395 			vrele(vp);
   1396 		return (error);
   1397 	}
   1398 
   1399 	return (EROFS);
   1400 }
   1401 
   1402 int
   1403 union_rmdir(void *v)
   1404 {
   1405 	struct vop_rmdir_args /* {
   1406 		struct vnode *a_dvp;
   1407 		struct vnode *a_vp;
   1408 		struct componentname *a_cnp;
   1409 	} */ *ap = v;
   1410 	int error;
   1411 	struct union_node *dun = VTOUNION(ap->a_dvp);
   1412 	struct union_node *un = VTOUNION(ap->a_vp);
   1413 	struct componentname *cnp = ap->a_cnp;
   1414 
   1415 	if (dun->un_uppervp == NULLVP)
   1416 		panic("union rmdir: null upper vnode");
   1417 
   1418 	error = union_check_rmdir(un, cnp->cn_cred);
   1419 	if (error) {
   1420 		vput(ap->a_dvp);
   1421 		vput(ap->a_vp);
   1422 		return error;
   1423 	}
   1424 
   1425 	if (un->un_uppervp != NULLVP) {
   1426 		struct vnode *dvp = dun->un_uppervp;
   1427 		struct vnode *vp = un->un_uppervp;
   1428 
   1429 		/*
   1430 		 * Account for VOP_RMDIR to vrele dvp and vp.
   1431 		 * Note: VOP_RMDIR will unlock dvp and vp.
   1432 		 */
   1433 		vref(dvp);
   1434 		vref(vp);
   1435 		if (union_dowhiteout(un, cnp->cn_cred))
   1436 			cnp->cn_flags |= DOWHITEOUT;
   1437 		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
   1438 		if (!error)
   1439 			union_removed_upper(un);
   1440 		vrele(ap->a_dvp);
   1441 		vrele(ap->a_vp);
   1442 	} else {
   1443 		error = union_mkwhiteout(
   1444 			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
   1445 			dun->un_uppervp, ap->a_cnp, un);
   1446 		vput(ap->a_dvp);
   1447 		vput(ap->a_vp);
   1448 	}
   1449 
   1450 	return (error);
   1451 }
   1452 
   1453 int
   1454 union_symlink(void *v)
   1455 {
   1456 	struct vop_symlink_v3_args /* {
   1457 		struct vnode *a_dvp;
   1458 		struct vnode **a_vpp;
   1459 		struct componentname *a_cnp;
   1460 		struct vattr *a_vap;
   1461 		char *a_target;
   1462 	} */ *ap = v;
   1463 	struct union_node *un = VTOUNION(ap->a_dvp);
   1464 	struct vnode *dvp = un->un_uppervp;
   1465 	struct componentname *cnp = ap->a_cnp;
   1466 
   1467 	if (dvp != NULLVP) {
   1468 		int error;
   1469 
   1470 		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
   1471 				    ap->a_target);
   1472 		return (error);
   1473 	}
   1474 
   1475 	return (EROFS);
   1476 }
   1477 
   1478 /*
   1479  * union_readdir works in concert with getdirentries and
   1480  * readdir(3) to provide a list of entries in the unioned
   1481  * directories.  getdirentries is responsible for walking
   1482  * down the union stack.  readdir(3) is responsible for
   1483  * eliminating duplicate names from the returned data stream.
   1484  */
   1485 int
   1486 union_readdir(void *v)
   1487 {
   1488 	struct vop_readdir_args /* {
   1489 		struct vnodeop_desc *a_desc;
   1490 		struct vnode *a_vp;
   1491 		struct uio *a_uio;
   1492 		kauth_cred_t a_cred;
   1493 		int *a_eofflag;
   1494 		u_long *a_cookies;
   1495 		int a_ncookies;
   1496 	} */ *ap = v;
   1497 	struct union_node *un = VTOUNION(ap->a_vp);
   1498 	struct vnode *uvp = un->un_uppervp;
   1499 
   1500 	if (uvp == NULLVP)
   1501 		return (0);
   1502 
   1503 	ap->a_vp = uvp;
   1504 	return (VCALL(uvp, VOFFSET(vop_readdir), ap));
   1505 }
   1506 
   1507 int
   1508 union_readlink(void *v)
   1509 {
   1510 	struct vop_readlink_args /* {
   1511 		struct vnode *a_vp;
   1512 		struct uio *a_uio;
   1513 		kauth_cred_t a_cred;
   1514 	} */ *ap = v;
   1515 	int error;
   1516 	struct vnode *vp = OTHERVP(ap->a_vp);
   1517 	int dolock = (vp == LOWERVP(ap->a_vp));
   1518 
   1519 	if (dolock)
   1520 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   1521 	ap->a_vp = vp;
   1522 	error = VCALL(vp, VOFFSET(vop_readlink), ap);
   1523 	if (dolock)
   1524 		VOP_UNLOCK(vp);
   1525 
   1526 	return (error);
   1527 }
   1528 
   1529 int
   1530 union_abortop(void *v)
   1531 {
   1532 	struct vop_abortop_args /* {
   1533 		struct vnode *a_dvp;
   1534 		struct componentname *a_cnp;
   1535 	} */ *ap = v;
   1536 
   1537 	KASSERT(UPPERVP(ap->a_dvp) != NULL);
   1538 
   1539 	ap->a_dvp = UPPERVP(ap->a_dvp);
   1540 	return VCALL(ap->a_dvp, VOFFSET(vop_abortop), ap);
   1541 }
   1542 
   1543 int
   1544 union_inactive(void *v)
   1545 {
   1546 	struct vop_inactive_args /* {
   1547 		const struct vnodeop_desc *a_desc;
   1548 		struct vnode *a_vp;
   1549 		bool *a_recycle;
   1550 	} */ *ap = v;
   1551 	struct vnode *vp = ap->a_vp;
   1552 	struct union_node *un = VTOUNION(vp);
   1553 	struct vnode **vpp;
   1554 
   1555 	/*
   1556 	 * Do nothing (and _don't_ bypass).
   1557 	 * Wait to vrele lowervp until reclaim,
   1558 	 * so that until then our union_node is in the
   1559 	 * cache and reusable.
   1560 	 *
   1561 	 * NEEDSWORK: Someday, consider inactive'ing
   1562 	 * the lowervp and then trying to reactivate it
   1563 	 * with capabilities (v_id)
   1564 	 * like they do in the name lookup cache code.
   1565 	 * That's too much work for now.
   1566 	 */
   1567 
   1568 	if (un->un_dircache != 0) {
   1569 		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
   1570 			vrele(*vpp);
   1571 		free(un->un_dircache, M_TEMP);
   1572 		un->un_dircache = 0;
   1573 	}
   1574 
   1575 	*ap->a_recycle = ((un->un_cflags & UN_CACHED) == 0);
   1576 	VOP_UNLOCK(vp);
   1577 
   1578 	return (0);
   1579 }
   1580 
   1581 int
   1582 union_reclaim(void *v)
   1583 {
   1584 	struct vop_reclaim_args /* {
   1585 		struct vnode *a_vp;
   1586 	} */ *ap = v;
   1587 
   1588 	union_freevp(ap->a_vp);
   1589 
   1590 	return (0);
   1591 }
   1592 
   1593 int
   1594 union_lock(void *v)
   1595 {
   1596 	struct vop_lock_args /* {
   1597 		struct vnode *a_vp;
   1598 		int a_flags;
   1599 	} */ *ap = v;
   1600 	struct vnode *vp;
   1601 	struct union_node *un;
   1602 	int error;
   1603 
   1604 	un = VTOUNION(ap->a_vp);
   1605 	mutex_enter(&un->un_lock);
   1606 	for (;;) {
   1607 		vp = LOCKVP(ap->a_vp);
   1608 		mutex_exit(&un->un_lock);
   1609 		if (vp == ap->a_vp)
   1610 			error = genfs_lock(ap);
   1611 		else
   1612 			error = VOP_LOCK(vp, ap->a_flags);
   1613 		if (error != 0)
   1614 			return error;
   1615 		mutex_enter(&un->un_lock);
   1616 		if (vp == LOCKVP(ap->a_vp))
   1617 			break;
   1618 		if (vp == ap->a_vp)
   1619 			genfs_unlock(ap);
   1620 		else
   1621 			VOP_UNLOCK(vp);
   1622 	}
   1623 	mutex_exit(&un->un_lock);
   1624 
   1625 	return error;
   1626 }
   1627 
   1628 int
   1629 union_unlock(void *v)
   1630 {
   1631 	struct vop_unlock_args /* {
   1632 		struct vnode *a_vp;
   1633 		int a_flags;
   1634 	} */ *ap = v;
   1635 	struct vnode *vp;
   1636 
   1637 	vp = LOCKVP(ap->a_vp);
   1638 	if (vp == ap->a_vp)
   1639 		genfs_unlock(ap);
   1640 	else
   1641 		VOP_UNLOCK(vp);
   1642 
   1643 	return 0;
   1644 }
   1645 
   1646 int
   1647 union_bmap(void *v)
   1648 {
   1649 	struct vop_bmap_args /* {
   1650 		struct vnode *a_vp;
   1651 		daddr_t  a_bn;
   1652 		struct vnode **a_vpp;
   1653 		daddr_t *a_bnp;
   1654 		int *a_runp;
   1655 	} */ *ap = v;
   1656 	int error;
   1657 	struct vnode *vp = OTHERVP(ap->a_vp);
   1658 	int dolock = (vp == LOWERVP(ap->a_vp));
   1659 
   1660 	if (dolock)
   1661 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   1662 	ap->a_vp = vp;
   1663 	error = VCALL(vp, VOFFSET(vop_bmap), ap);
   1664 	if (dolock)
   1665 		VOP_UNLOCK(vp);
   1666 
   1667 	return (error);
   1668 }
   1669 
   1670 int
   1671 union_print(void *v)
   1672 {
   1673 	struct vop_print_args /* {
   1674 		struct vnode *a_vp;
   1675 	} */ *ap = v;
   1676 	struct vnode *vp = ap->a_vp;
   1677 
   1678 	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
   1679 			vp, UPPERVP(vp), LOWERVP(vp));
   1680 	if (UPPERVP(vp) != NULLVP)
   1681 		vprint("union: upper", UPPERVP(vp));
   1682 	if (LOWERVP(vp) != NULLVP)
   1683 		vprint("union: lower", LOWERVP(vp));
   1684 	if (VTOUNION(vp)->un_dircache) {
   1685 		struct vnode **vpp;
   1686 		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
   1687 			vprint("dircache:", *vpp);
   1688 	}
   1689 
   1690 	return (0);
   1691 }
   1692 
   1693 int
   1694 union_islocked(void *v)
   1695 {
   1696 	struct vop_islocked_args /* {
   1697 		struct vnode *a_vp;
   1698 	} */ *ap = v;
   1699 	struct vnode *vp;
   1700 	struct union_node *un;
   1701 
   1702 	un = VTOUNION(ap->a_vp);
   1703 	mutex_enter(&un->un_lock);
   1704 	vp = LOCKVP(ap->a_vp);
   1705 	mutex_exit(&un->un_lock);
   1706 
   1707 	if (vp == ap->a_vp)
   1708 		return genfs_islocked(ap);
   1709 	else
   1710 		return VOP_ISLOCKED(vp);
   1711 }
   1712 
   1713 int
   1714 union_pathconf(void *v)
   1715 {
   1716 	struct vop_pathconf_args /* {
   1717 		struct vnode *a_vp;
   1718 		int a_name;
   1719 		int *a_retval;
   1720 	} */ *ap = v;
   1721 	int error;
   1722 	struct vnode *vp = OTHERVP(ap->a_vp);
   1723 	int dolock = (vp == LOWERVP(ap->a_vp));
   1724 
   1725 	if (dolock)
   1726 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   1727 	ap->a_vp = vp;
   1728 	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
   1729 	if (dolock)
   1730 		VOP_UNLOCK(vp);
   1731 
   1732 	return (error);
   1733 }
   1734 
   1735 int
   1736 union_advlock(void *v)
   1737 {
   1738 	struct vop_advlock_args /* {
   1739 		struct vnode *a_vp;
   1740 		void *a_id;
   1741 		int  a_op;
   1742 		struct flock *a_fl;
   1743 		int  a_flags;
   1744 	} */ *ap = v;
   1745 	struct vnode *ovp = OTHERVP(ap->a_vp);
   1746 
   1747 	ap->a_vp = ovp;
   1748 	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
   1749 }
   1750 
   1751 int
   1752 union_strategy(void *v)
   1753 {
   1754 	struct vop_strategy_args /* {
   1755 		struct vnode *a_vp;
   1756 		struct buf *a_bp;
   1757 	} */ *ap = v;
   1758 	struct vnode *ovp = OTHERVP(ap->a_vp);
   1759 	struct buf *bp = ap->a_bp;
   1760 
   1761 	KASSERT(ovp != NULLVP);
   1762 	if (!NODE_IS_SPECIAL(ovp))
   1763 		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
   1764 
   1765 	return (VOP_STRATEGY(ovp, bp));
   1766 }
   1767 
   1768 int
   1769 union_bwrite(void *v)
   1770 {
   1771 	struct vop_bwrite_args /* {
   1772 		struct vnode *a_vp;
   1773 		struct buf *a_bp;
   1774 	} */ *ap = v;
   1775 	struct vnode *ovp = OTHERVP(ap->a_vp);
   1776 	struct buf *bp = ap->a_bp;
   1777 
   1778 	KASSERT(ovp != NULLVP);
   1779 	if (!NODE_IS_SPECIAL(ovp))
   1780 		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
   1781 
   1782 	return (VOP_BWRITE(ovp, bp));
   1783 }
   1784 
   1785 int
   1786 union_getpages(void *v)
   1787 {
   1788 	struct vop_getpages_args /* {
   1789 		struct vnode *a_vp;
   1790 		voff_t a_offset;
   1791 		struct vm_page **a_m;
   1792 		int *a_count;
   1793 		int a_centeridx;
   1794 		vm_prot_t a_access_type;
   1795 		int a_advice;
   1796 		int a_flags;
   1797 	} */ *ap = v;
   1798 	struct vnode *vp = ap->a_vp;
   1799 
   1800 	KASSERT(mutex_owned(vp->v_interlock));
   1801 
   1802 	if (ap->a_flags & PGO_LOCKED) {
   1803 		return EBUSY;
   1804 	}
   1805 	ap->a_vp = OTHERVP(vp);
   1806 	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
   1807 
   1808 	/* Just pass the request on to the underlying layer. */
   1809 	return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
   1810 }
   1811 
   1812 int
   1813 union_putpages(void *v)
   1814 {
   1815 	struct vop_putpages_args /* {
   1816 		struct vnode *a_vp;
   1817 		voff_t a_offlo;
   1818 		voff_t a_offhi;
   1819 		int a_flags;
   1820 	} */ *ap = v;
   1821 	struct vnode *vp = ap->a_vp;
   1822 
   1823 	KASSERT(mutex_owned(vp->v_interlock));
   1824 
   1825 	ap->a_vp = OTHERVP(vp);
   1826 	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
   1827 
   1828 	if (ap->a_flags & PGO_RECLAIM) {
   1829 		mutex_exit(vp->v_interlock);
   1830 		return 0;
   1831 	}
   1832 
   1833 	/* Just pass the request on to the underlying layer. */
   1834 	return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
   1835 }
   1836 
   1837 int
   1838 union_kqfilter(void *v)
   1839 {
   1840 	struct vop_kqfilter_args /* {
   1841 		struct vnode	*a_vp;
   1842 		struct knote	*a_kn;
   1843 	} */ *ap = v;
   1844 	int error;
   1845 
   1846 	/*
   1847 	 * We watch either the upper layer file (if it already exists),
   1848 	 * or the lower layer one. If there is lower layer file only
   1849 	 * at this moment, we will keep watching that lower layer file
   1850 	 * even if upper layer file would be created later on.
   1851 	 */
   1852 	if (UPPERVP(ap->a_vp))
   1853 		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
   1854 	else if (LOWERVP(ap->a_vp))
   1855 		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
   1856 	else {
   1857 		/* panic? */
   1858 		error = EOPNOTSUPP;
   1859 	}
   1860 
   1861 	return (error);
   1862 }
   1863