Home | History | Annotate | Line # | Download | only in union
union_vnops.c revision 1.50
      1 /*	$NetBSD: union_vnops.c,v 1.50 2014/01/17 10:55:02 hannken Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1992, 1993, 1994, 1995
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * This code is derived from software contributed to Berkeley by
      8  * Jan-Simon Pendry.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. Neither the name of the University nor the names of its contributors
     19  *    may be used to endorse or promote products derived from this software
     20  *    without specific prior written permission.
     21  *
     22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     32  * SUCH DAMAGE.
     33  *
     34  *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
     35  */
     36 
     37 /*
     38  * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
     39  *
     40  * This code is derived from software contributed to Berkeley by
     41  * Jan-Simon Pendry.
     42  *
     43  * Redistribution and use in source and binary forms, with or without
     44  * modification, are permitted provided that the following conditions
     45  * are met:
     46  * 1. Redistributions of source code must retain the above copyright
     47  *    notice, this list of conditions and the following disclaimer.
     48  * 2. Redistributions in binary form must reproduce the above copyright
     49  *    notice, this list of conditions and the following disclaimer in the
     50  *    documentation and/or other materials provided with the distribution.
     51  * 3. All advertising materials mentioning features or use of this software
     52  *    must display the following acknowledgement:
     53  *	This product includes software developed by the University of
     54  *	California, Berkeley and its contributors.
     55  * 4. Neither the name of the University nor the names of its contributors
     56  *    may be used to endorse or promote products derived from this software
     57  *    without specific prior written permission.
     58  *
     59  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     60  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     61  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     62  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     63  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     64  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     65  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     66  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     67  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     68  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     69  * SUCH DAMAGE.
     70  *
     71  *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
     72  */
     73 
     74 #include <sys/cdefs.h>
     75 __KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.50 2014/01/17 10:55:02 hannken Exp $");
     76 
     77 #include <sys/param.h>
     78 #include <sys/systm.h>
     79 #include <sys/proc.h>
     80 #include <sys/file.h>
     81 #include <sys/time.h>
     82 #include <sys/stat.h>
     83 #include <sys/vnode.h>
     84 #include <sys/mount.h>
     85 #include <sys/namei.h>
     86 #include <sys/malloc.h>
     87 #include <sys/buf.h>
     88 #include <sys/queue.h>
     89 #include <sys/lock.h>
     90 #include <sys/kauth.h>
     91 
     92 #include <fs/union/union.h>
     93 #include <miscfs/genfs/genfs.h>
     94 #include <miscfs/specfs/specdev.h>
     95 
     96 int union_lookup(void *);
     97 int union_create(void *);
     98 int union_whiteout(void *);
     99 int union_mknod(void *);
    100 int union_open(void *);
    101 int union_close(void *);
    102 int union_access(void *);
    103 int union_getattr(void *);
    104 int union_setattr(void *);
    105 int union_read(void *);
    106 int union_write(void *);
    107 int union_ioctl(void *);
    108 int union_poll(void *);
    109 int union_revoke(void *);
    110 int union_mmap(void *);
    111 int union_fsync(void *);
    112 int union_seek(void *);
    113 int union_remove(void *);
    114 int union_link(void *);
    115 int union_rename(void *);
    116 int union_mkdir(void *);
    117 int union_rmdir(void *);
    118 int union_symlink(void *);
    119 int union_readdir(void *);
    120 int union_readlink(void *);
    121 int union_abortop(void *);
    122 int union_inactive(void *);
    123 int union_reclaim(void *);
    124 int union_lock(void *);
    125 int union_unlock(void *);
    126 int union_bmap(void *);
    127 int union_print(void *);
    128 int union_islocked(void *);
    129 int union_pathconf(void *);
    130 int union_advlock(void *);
    131 int union_strategy(void *);
    132 int union_bwrite(void *);
    133 int union_getpages(void *);
    134 int union_putpages(void *);
    135 int union_kqfilter(void *);
    136 
    137 static int union_lookup1(struct vnode *, struct vnode **,
    138 			      struct vnode **, struct componentname *);
    139 
    140 
    141 /*
    142  * Global vfs data structures
    143  */
    144 int (**union_vnodeop_p)(void *);
    145 const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
    146 	{ &vop_default_desc, vn_default_error },
    147 	{ &vop_lookup_desc, union_lookup },		/* lookup */
    148 	{ &vop_create_desc, union_create },		/* create */
    149 	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
    150 	{ &vop_mknod_desc, union_mknod },		/* mknod */
    151 	{ &vop_open_desc, union_open },			/* open */
    152 	{ &vop_close_desc, union_close },		/* close */
    153 	{ &vop_access_desc, union_access },		/* access */
    154 	{ &vop_getattr_desc, union_getattr },		/* getattr */
    155 	{ &vop_setattr_desc, union_setattr },		/* setattr */
    156 	{ &vop_read_desc, union_read },			/* read */
    157 	{ &vop_write_desc, union_write },		/* write */
    158 	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
    159 	{ &vop_poll_desc, union_poll },			/* select */
    160 	{ &vop_revoke_desc, union_revoke },		/* revoke */
    161 	{ &vop_mmap_desc, union_mmap },			/* mmap */
    162 	{ &vop_fsync_desc, union_fsync },		/* fsync */
    163 	{ &vop_seek_desc, union_seek },			/* seek */
    164 	{ &vop_remove_desc, union_remove },		/* remove */
    165 	{ &vop_link_desc, union_link },			/* link */
    166 	{ &vop_rename_desc, union_rename },		/* rename */
    167 	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
    168 	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
    169 	{ &vop_symlink_desc, union_symlink },		/* symlink */
    170 	{ &vop_readdir_desc, union_readdir },		/* readdir */
    171 	{ &vop_readlink_desc, union_readlink },		/* readlink */
    172 	{ &vop_abortop_desc, union_abortop },		/* abortop */
    173 	{ &vop_inactive_desc, union_inactive },		/* inactive */
    174 	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
    175 	{ &vop_lock_desc, union_lock },			/* lock */
    176 	{ &vop_unlock_desc, union_unlock },		/* unlock */
    177 	{ &vop_bmap_desc, union_bmap },			/* bmap */
    178 	{ &vop_strategy_desc, union_strategy },		/* strategy */
    179 	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
    180 	{ &vop_print_desc, union_print },		/* print */
    181 	{ &vop_islocked_desc, union_islocked },		/* islocked */
    182 	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
    183 	{ &vop_advlock_desc, union_advlock },		/* advlock */
    184 	{ &vop_getpages_desc, union_getpages },		/* getpages */
    185 	{ &vop_putpages_desc, union_putpages },		/* putpages */
    186 	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
    187 	{ NULL, NULL }
    188 };
    189 const struct vnodeopv_desc union_vnodeop_opv_desc =
    190 	{ &union_vnodeop_p, union_vnodeop_entries };
    191 
    192 #define NODE_IS_SPECIAL(vp) \
    193 	((vp)->v_type == VBLK || (vp)->v_type == VCHR || \
    194 	(vp)->v_type == VSOCK || (vp)->v_type == VFIFO)
    195 
    196 static int
    197 union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
    198 	struct componentname *cnp)
    199 {
    200 	int error;
    201 	struct vnode *tdvp;
    202 	struct vnode *dvp;
    203 	struct mount *mp;
    204 
    205 	dvp = *dvpp;
    206 
    207 	/*
    208 	 * If stepping up the directory tree, check for going
    209 	 * back across the mount point, in which case do what
    210 	 * lookup would do by stepping back down the mount
    211 	 * hierarchy.
    212 	 */
    213 	if (cnp->cn_flags & ISDOTDOT) {
    214 		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
    215 			/*
    216 			 * Don't do the NOCROSSMOUNT check
    217 			 * at this level.  By definition,
    218 			 * union fs deals with namespaces, not
    219 			 * filesystems.
    220 			 */
    221 			tdvp = dvp;
    222 			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
    223 			VOP_UNLOCK(tdvp);
    224 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
    225 		}
    226 	}
    227 
    228         error = VOP_LOOKUP(dvp, &tdvp, cnp);
    229 	if (error)
    230 		return (error);
    231 
    232 	dvp = tdvp;
    233 
    234 	/*
    235 	 * Lastly check if the current node is a mount point in
    236 	 * which case walk up the mount hierarchy making sure not to
    237 	 * bump into the root of the mount tree (ie. dvp != udvp).
    238 	 */
    239 	while (dvp != udvp && (dvp->v_type == VDIR) &&
    240 	       (mp = dvp->v_mountedhere)) {
    241 		if (vfs_busy(mp, NULL))
    242 			continue;
    243 		vput(dvp);
    244 		error = VFS_ROOT(mp, &tdvp);
    245 		vfs_unbusy(mp, false, NULL);
    246 		if (error) {
    247 			return (error);
    248 		}
    249 		dvp = tdvp;
    250 	}
    251 
    252 	*vpp = dvp;
    253 	return (0);
    254 }
    255 
    256 int
    257 union_lookup(void *v)
    258 {
    259 	struct vop_lookup_args /* {
    260 		struct vnodeop_desc *a_desc;
    261 		struct vnode *a_dvp;
    262 		struct vnode **a_vpp;
    263 		struct componentname *a_cnp;
    264 	} */ *ap = v;
    265 	int error;
    266 	int uerror, lerror;
    267 	struct vnode *uppervp, *lowervp;
    268 	struct vnode *upperdvp, *lowerdvp;
    269 	struct vnode *dvp = ap->a_dvp;
    270 	struct union_node *dun = VTOUNION(dvp);
    271 	struct componentname *cnp = ap->a_cnp;
    272 	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
    273 	kauth_cred_t saved_cred = NULL;
    274 	int iswhiteout;
    275 	struct vattr va;
    276 
    277 #ifdef notyet
    278 	if (cnp->cn_namelen == 3 &&
    279 			cnp->cn_nameptr[2] == '.' &&
    280 			cnp->cn_nameptr[1] == '.' &&
    281 			cnp->cn_nameptr[0] == '.') {
    282 		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
    283 		if (dvp == NULLVP)
    284 			return (ENOENT);
    285 		vref(dvp);
    286 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
    287 		return (0);
    288 	}
    289 #endif
    290 
    291 	if ((cnp->cn_flags & ISLASTCN) &&
    292 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
    293 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
    294 		return (EROFS);
    295 
    296 start:
    297 	upperdvp = dun->un_uppervp;
    298 	lowerdvp = dun->un_lowervp;
    299 	uppervp = NULLVP;
    300 	lowervp = NULLVP;
    301 	iswhiteout = 0;
    302 
    303 	/*
    304 	 * do the lookup in the upper level.
    305 	 * if that level comsumes additional pathnames,
    306 	 * then assume that something special is going
    307 	 * on and just return that vnode.
    308 	 */
    309 	if (upperdvp != NULLVP) {
    310 		uerror = union_lookup1(um->um_uppervp, &upperdvp,
    311 					&uppervp, cnp);
    312 		if (cnp->cn_consume != 0) {
    313 			*ap->a_vpp = uppervp;
    314 			return (uerror);
    315 		}
    316 		if (uerror == ENOENT || uerror == EJUSTRETURN) {
    317 			if (cnp->cn_flags & ISWHITEOUT) {
    318 				iswhiteout = 1;
    319 			} else if (lowerdvp != NULLVP) {
    320 				lerror = VOP_GETATTR(upperdvp, &va,
    321 					cnp->cn_cred);
    322 				if (lerror == 0 && (va.va_flags & OPAQUE))
    323 					iswhiteout = 1;
    324 			}
    325 		}
    326 	} else {
    327 		uerror = ENOENT;
    328 	}
    329 
    330 	/*
    331 	 * in a similar way to the upper layer, do the lookup
    332 	 * in the lower layer.   this time, if there is some
    333 	 * component magic going on, then vput whatever we got
    334 	 * back from the upper layer and return the lower vnode
    335 	 * instead.
    336 	 */
    337 	if (lowerdvp != NULLVP && !iswhiteout) {
    338 		int nameiop;
    339 
    340 		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
    341 
    342 		/*
    343 		 * Only do a LOOKUP on the bottom node, since
    344 		 * we won't be making changes to it anyway.
    345 		 */
    346 		nameiop = cnp->cn_nameiop;
    347 		cnp->cn_nameiop = LOOKUP;
    348 		if (um->um_op == UNMNT_BELOW) {
    349 			saved_cred = cnp->cn_cred;
    350 			cnp->cn_cred = um->um_cred;
    351 		}
    352 
    353 		/*
    354 		 * we shouldn't have to worry about locking interactions
    355 		 * between the lower layer and our union layer (w.r.t.
    356 		 * `..' processing) because we don't futz with lowervp
    357 		 * locks in the union-node instantiation code path.
    358 		 */
    359 		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
    360 				&lowervp, cnp);
    361 		if (um->um_op == UNMNT_BELOW)
    362 			cnp->cn_cred = saved_cred;
    363 		cnp->cn_nameiop = nameiop;
    364 
    365 		if (lowervp != lowerdvp)
    366 			VOP_UNLOCK(lowerdvp);
    367 
    368 		if (cnp->cn_consume != 0) {
    369 			if (uppervp != NULLVP) {
    370 				if (uppervp == upperdvp)
    371 					vrele(uppervp);
    372 				else
    373 					vput(uppervp);
    374 				uppervp = NULLVP;
    375 			}
    376 			*ap->a_vpp = lowervp;
    377 			return (lerror);
    378 		}
    379 	} else {
    380 		lerror = ENOENT;
    381 		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
    382 			lowervp = LOWERVP(dun->un_pvp);
    383 			if (lowervp != NULLVP) {
    384 				vref(lowervp);
    385 				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
    386 				lerror = 0;
    387 			}
    388 		}
    389 	}
    390 
    391 	/*
    392 	 * EJUSTRETURN is used by underlying filesystems to indicate that
    393 	 * a directory modification op was started successfully.
    394 	 * This will only happen in the upper layer, since
    395 	 * the lower layer only does LOOKUPs.
    396 	 * If this union is mounted read-only, bounce it now.
    397 	 */
    398 
    399 	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
    400 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
    401 	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
    402 		uerror = EROFS;
    403 
    404 	/*
    405 	 * at this point, we have uerror and lerror indicating
    406 	 * possible errors with the lookups in the upper and lower
    407 	 * layers.  additionally, uppervp and lowervp are (locked)
    408 	 * references to existing vnodes in the upper and lower layers.
    409 	 *
    410 	 * there are now three cases to consider.
    411 	 * 1. if both layers returned an error, then return whatever
    412 	 *    error the upper layer generated.
    413 	 *
    414 	 * 2. if the top layer failed and the bottom layer succeeded
    415 	 *    then two subcases occur.
    416 	 *    a.  the bottom vnode is not a directory, in which
    417 	 *	  case just return a new union vnode referencing
    418 	 *	  an empty top layer and the existing bottom layer.
    419 	 *    b.  the bottom vnode is a directory, in which case
    420 	 *	  create a new directory in the top-level and
    421 	 *	  continue as in case 3.
    422 	 *
    423 	 * 3. if the top layer succeeded then return a new union
    424 	 *    vnode referencing whatever the new top layer and
    425 	 *    whatever the bottom layer returned.
    426 	 */
    427 
    428 	*ap->a_vpp = NULLVP;
    429 
    430 
    431 	/* case 1. */
    432 	if ((uerror != 0) && (lerror != 0)) {
    433 		return (uerror);
    434 	}
    435 
    436 	/* case 2. */
    437 	if (uerror != 0 /* && (lerror == 0) */ ) {
    438 		if (lowervp->v_type == VDIR) { /* case 2b. */
    439 			/*
    440 			 * We may be racing another process to make the
    441 			 * upper-level shadow directory.  Be careful with
    442 			 * locks/etc!
    443 			 * If we have to create a shadow directory and want
    444 			 * to commit the node we have to restart the lookup
    445 			 * to get the componentname right.
    446 			 */
    447 			if (upperdvp) {
    448 				VOP_UNLOCK(upperdvp);
    449 				uerror = union_mkshadow(um, upperdvp, cnp,
    450 				    &uppervp);
    451 				vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
    452 				if (uerror == 0 && cnp->cn_nameiop != LOOKUP) {
    453 					vput(uppervp);
    454 					if (lowervp != NULLVP)
    455 						vput(lowervp);
    456 					goto start;
    457 				}
    458 			}
    459 			if (uerror) {
    460 				if (lowervp != NULLVP) {
    461 					vput(lowervp);
    462 					lowervp = NULLVP;
    463 				}
    464 				return (uerror);
    465 			}
    466 		}
    467 	}
    468 
    469 	if (lowervp != NULLVP)
    470 		VOP_UNLOCK(lowervp);
    471 
    472 	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
    473 			      uppervp, lowervp, 1);
    474 
    475 	if (error) {
    476 		if (uppervp != NULLVP)
    477 			vput(uppervp);
    478 		if (lowervp != NULLVP)
    479 			vrele(lowervp);
    480 	}
    481 
    482 	return (error);
    483 }
    484 
    485 int
    486 union_create(void *v)
    487 {
    488 	struct vop_create_v2_args /* {
    489 		struct vnode *a_dvp;
    490 		struct vnode **a_vpp;
    491 		struct componentname *a_cnp;
    492 		struct vattr *a_vap;
    493 	} */ *ap = v;
    494 	struct union_node *un = VTOUNION(ap->a_dvp);
    495 	struct vnode *dvp = un->un_uppervp;
    496 	struct componentname *cnp = ap->a_cnp;
    497 
    498 	if (dvp != NULLVP) {
    499 		int error;
    500 		struct vnode *vp;
    501 		struct mount *mp;
    502 
    503 		mp = ap->a_dvp->v_mount;
    504 		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
    505 		if (error)
    506 			return (error);
    507 
    508 		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
    509 				NULLVP, 1);
    510 		if (error)
    511 			vput(vp);
    512 		return (error);
    513 	}
    514 
    515 	return (EROFS);
    516 }
    517 
    518 int
    519 union_whiteout(void *v)
    520 {
    521 	struct vop_whiteout_args /* {
    522 		struct vnode *a_dvp;
    523 		struct componentname *a_cnp;
    524 		int a_flags;
    525 	} */ *ap = v;
    526 	struct union_node *un = VTOUNION(ap->a_dvp);
    527 	struct componentname *cnp = ap->a_cnp;
    528 
    529 	if (un->un_uppervp == NULLVP)
    530 		return (EOPNOTSUPP);
    531 
    532 	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
    533 }
    534 
    535 int
    536 union_mknod(void *v)
    537 {
    538 	struct vop_mknod_v2_args /* {
    539 		struct vnode *a_dvp;
    540 		struct vnode **a_vpp;
    541 		struct componentname *a_cnp;
    542 		struct vattr *a_vap;
    543 	} */ *ap = v;
    544 	struct union_node *un = VTOUNION(ap->a_dvp);
    545 	struct vnode *dvp = un->un_uppervp;
    546 	struct componentname *cnp = ap->a_cnp;
    547 
    548 	if (dvp != NULLVP) {
    549 		int error;
    550 		struct vnode *vp;
    551 		struct mount *mp;
    552 
    553 		mp = ap->a_dvp->v_mount;
    554 		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
    555 		if (error)
    556 			return (error);
    557 
    558 		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
    559 				      cnp, vp, NULLVP, 1);
    560 		if (error)
    561 		    vput(vp);
    562 		return (error);
    563 	}
    564 
    565 	return (EROFS);
    566 }
    567 
    568 int
    569 union_open(void *v)
    570 {
    571 	struct vop_open_args /* {
    572 		struct vnodeop_desc *a_desc;
    573 		struct vnode *a_vp;
    574 		int a_mode;
    575 		kauth_cred_t a_cred;
    576 	} */ *ap = v;
    577 	struct union_node *un = VTOUNION(ap->a_vp);
    578 	struct vnode *tvp;
    579 	int mode = ap->a_mode;
    580 	kauth_cred_t cred = ap->a_cred;
    581 	struct lwp *l = curlwp;
    582 	int error;
    583 
    584 	/*
    585 	 * If there is an existing upper vp then simply open that.
    586 	 */
    587 	tvp = un->un_uppervp;
    588 	if (tvp == NULLVP) {
    589 		/*
    590 		 * If the lower vnode is being opened for writing, then
    591 		 * copy the file contents to the upper vnode and open that,
    592 		 * otherwise can simply open the lower vnode.
    593 		 */
    594 		tvp = un->un_lowervp;
    595 		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
    596 			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
    597 			if (error == 0)
    598 				error = VOP_OPEN(un->un_uppervp, mode, cred);
    599 			return (error);
    600 		}
    601 
    602 		/*
    603 		 * Just open the lower vnode, but check for nodev mount flag
    604 		 */
    605 		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
    606 		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
    607 			return ENXIO;
    608 		un->un_openl++;
    609 		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
    610 		error = VOP_OPEN(tvp, mode, cred);
    611 		VOP_UNLOCK(tvp);
    612 
    613 		return (error);
    614 	}
    615 	/*
    616 	 * Just open the upper vnode, checking for nodev mount flag first
    617 	 */
    618 	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
    619 	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
    620 		return ENXIO;
    621 
    622 	error = VOP_OPEN(tvp, mode, cred);
    623 
    624 	return (error);
    625 }
    626 
    627 int
    628 union_close(void *v)
    629 {
    630 	struct vop_close_args /* {
    631 		struct vnode *a_vp;
    632 		int  a_fflag;
    633 		kauth_cred_t a_cred;
    634 	} */ *ap = v;
    635 	struct union_node *un = VTOUNION(ap->a_vp);
    636 	struct vnode *vp;
    637 	int error;
    638 	bool do_lock;
    639 
    640 	vp = un->un_uppervp;
    641 	if (vp != NULLVP) {
    642 		do_lock = false;
    643 	} else {
    644 		KASSERT(un->un_openl > 0);
    645 		--un->un_openl;
    646 		vp = un->un_lowervp;
    647 		do_lock = true;
    648 	}
    649 
    650 	KASSERT(vp != NULLVP);
    651 	ap->a_vp = vp;
    652 	if (do_lock)
    653 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    654 	error = VCALL(vp, VOFFSET(vop_close), ap);
    655 	if (do_lock)
    656 		VOP_UNLOCK(vp);
    657 
    658 	return error;
    659 }
    660 
    661 /*
    662  * Check access permission on the union vnode.
    663  * The access check being enforced is to check
    664  * against both the underlying vnode, and any
    665  * copied vnode.  This ensures that no additional
    666  * file permissions are given away simply because
    667  * the user caused an implicit file copy.
    668  */
    669 int
    670 union_access(void *v)
    671 {
    672 	struct vop_access_args /* {
    673 		struct vnodeop_desc *a_desc;
    674 		struct vnode *a_vp;
    675 		int a_mode;
    676 		kauth_cred_t a_cred;
    677 	} */ *ap = v;
    678 	struct vnode *vp = ap->a_vp;
    679 	struct union_node *un = VTOUNION(vp);
    680 	int error = EACCES;
    681 	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
    682 
    683 	/*
    684 	 * Disallow write attempts on read-only file systems;
    685 	 * unless the file is a socket, fifo, or a block or
    686 	 * character device resident on the file system.
    687 	 */
    688 	if (ap->a_mode & VWRITE) {
    689 		switch (vp->v_type) {
    690 		case VDIR:
    691 		case VLNK:
    692 		case VREG:
    693 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
    694 				return (EROFS);
    695 			break;
    696 		case VBAD:
    697 		case VBLK:
    698 		case VCHR:
    699 		case VSOCK:
    700 		case VFIFO:
    701 		case VNON:
    702 		default:
    703 			break;
    704 		}
    705 	}
    706 
    707 
    708 	if ((vp = un->un_uppervp) != NULLVP) {
    709 		ap->a_vp = vp;
    710 		return (VCALL(vp, VOFFSET(vop_access), ap));
    711 	}
    712 
    713 	if ((vp = un->un_lowervp) != NULLVP) {
    714 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    715 		ap->a_vp = vp;
    716 		error = VCALL(vp, VOFFSET(vop_access), ap);
    717 		if (error == 0) {
    718 			if (um->um_op == UNMNT_BELOW) {
    719 				ap->a_cred = um->um_cred;
    720 				error = VCALL(vp, VOFFSET(vop_access), ap);
    721 			}
    722 		}
    723 		VOP_UNLOCK(vp);
    724 		if (error)
    725 			return (error);
    726 	}
    727 
    728 	return (error);
    729 }
    730 
    731 /*
    732  * We handle getattr only to change the fsid and
    733  * track object sizes
    734  */
    735 int
    736 union_getattr(void *v)
    737 {
    738 	struct vop_getattr_args /* {
    739 		struct vnode *a_vp;
    740 		struct vattr *a_vap;
    741 		kauth_cred_t a_cred;
    742 	} */ *ap = v;
    743 	int error;
    744 	struct union_node *un = VTOUNION(ap->a_vp);
    745 	struct vnode *vp = un->un_uppervp;
    746 	struct vattr *vap;
    747 	struct vattr va;
    748 
    749 
    750 	/*
    751 	 * Some programs walk the filesystem hierarchy by counting
    752 	 * links to directories to avoid stat'ing all the time.
    753 	 * This means the link count on directories needs to be "correct".
    754 	 * The only way to do that is to call getattr on both layers
    755 	 * and fix up the link count.  The link count will not necessarily
    756 	 * be accurate but will be large enough to defeat the tree walkers.
    757 	 *
    758 	 * To make life more interesting, some filesystems don't keep
    759 	 * track of link counts in the expected way, and return a
    760 	 * link count of `1' for those directories; if either of the
    761 	 * component directories returns a link count of `1', we return a 1.
    762 	 */
    763 
    764 	vap = ap->a_vap;
    765 
    766 	vp = un->un_uppervp;
    767 	if (vp != NULLVP) {
    768 		error = VOP_GETATTR(vp, vap, ap->a_cred);
    769 		if (error)
    770 			return (error);
    771 		mutex_enter(&un->un_lock);
    772 		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
    773 	}
    774 
    775 	if (vp == NULLVP) {
    776 		vp = un->un_lowervp;
    777 	} else if (vp->v_type == VDIR) {
    778 		vp = un->un_lowervp;
    779 		if (vp != NULLVP)
    780 			vap = &va;
    781 	} else {
    782 		vp = NULLVP;
    783 	}
    784 
    785 	if (vp != NULLVP) {
    786 		if (vp == un->un_lowervp)
    787 			vn_lock(vp, LK_SHARED | LK_RETRY);
    788 		error = VOP_GETATTR(vp, vap, ap->a_cred);
    789 		if (vp == un->un_lowervp)
    790 			VOP_UNLOCK(vp);
    791 		if (error)
    792 			return (error);
    793 		mutex_enter(&un->un_lock);
    794 		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
    795 	}
    796 
    797 	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
    798 		/*
    799 		 * Link count manipulation:
    800 		 *	- If both return "2", return 2 (no subdirs)
    801 		 *	- If one or the other return "1", return "1" (ENOCLUE)
    802 		 */
    803 		if ((ap->a_vap->va_nlink == 2) &&
    804 		    (vap->va_nlink == 2))
    805 			;
    806 		else if (ap->a_vap->va_nlink != 1) {
    807 			if (vap->va_nlink == 1)
    808 				ap->a_vap->va_nlink = 1;
    809 			else
    810 				ap->a_vap->va_nlink += vap->va_nlink;
    811 		}
    812 	}
    813 	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
    814 	return (0);
    815 }
    816 
    817 int
    818 union_setattr(void *v)
    819 {
    820 	struct vop_setattr_args /* {
    821 		struct vnode *a_vp;
    822 		struct vattr *a_vap;
    823 		kauth_cred_t a_cred;
    824 	} */ *ap = v;
    825 	struct vattr *vap = ap->a_vap;
    826 	struct vnode *vp = ap->a_vp;
    827 	struct union_node *un = VTOUNION(vp);
    828 	bool size_only;		/* All but va_size are VNOVAL. */
    829 	int error;
    830 
    831 	size_only = (vap->va_flags == VNOVAL && vap->va_uid == (uid_t)VNOVAL &&
    832 	    vap->va_gid == (gid_t)VNOVAL && vap->va_atime.tv_sec == VNOVAL &&
    833 	    vap->va_mtime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL);
    834 
    835 	if (!size_only && (vp->v_mount->mnt_flag & MNT_RDONLY))
    836 		return (EROFS);
    837 	if (vap->va_size != VNOVAL) {
    838  		switch (vp->v_type) {
    839  		case VDIR:
    840  			return (EISDIR);
    841  		case VCHR:
    842  		case VBLK:
    843  		case VSOCK:
    844  		case VFIFO:
    845 			break;
    846 		case VREG:
    847 		case VLNK:
    848  		default:
    849 			/*
    850 			 * Disallow write attempts if the filesystem is
    851 			 * mounted read-only.
    852 			 */
    853 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
    854 				return (EROFS);
    855 		}
    856 	}
    857 
    858 	/*
    859 	 * Handle case of truncating lower object to zero size,
    860 	 * by creating a zero length upper object.  This is to
    861 	 * handle the case of open with O_TRUNC and O_CREAT.
    862 	 */
    863 	if ((un->un_uppervp == NULLVP) &&
    864 	    /* assert(un->un_lowervp != NULLVP) */
    865 	    (un->un_lowervp->v_type == VREG)) {
    866 		error = union_copyup(un, (vap->va_size != 0),
    867 						ap->a_cred, curlwp);
    868 		if (error)
    869 			return (error);
    870 	}
    871 
    872 	/*
    873 	 * Try to set attributes in upper layer, ignore size change to zero
    874 	 * for devices to handle O_TRUNC and return read-only filesystem error
    875 	 * otherwise.
    876 	 */
    877 	if (un->un_uppervp != NULLVP) {
    878 		error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
    879 		if ((error == 0) && (vap->va_size != VNOVAL)) {
    880 			mutex_enter(&un->un_lock);
    881 			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
    882 		}
    883 	} else {
    884 		KASSERT(un->un_lowervp != NULLVP);
    885 		if (NODE_IS_SPECIAL(un->un_lowervp)) {
    886 			if (size_only &&
    887 			    (vap->va_size == 0 || vap->va_size == VNOVAL))
    888 				error = 0;
    889 			else
    890 				error = EROFS;
    891 		} else {
    892 			error = EROFS;
    893 		}
    894 	}
    895 
    896 	return (error);
    897 }
    898 
    899 int
    900 union_read(void *v)
    901 {
    902 	struct vop_read_args /* {
    903 		struct vnode *a_vp;
    904 		struct uio *a_uio;
    905 		int  a_ioflag;
    906 		kauth_cred_t a_cred;
    907 	} */ *ap = v;
    908 	int error;
    909 	struct vnode *vp = OTHERVP(ap->a_vp);
    910 	int dolock = (vp == LOWERVP(ap->a_vp));
    911 
    912 	if (dolock)
    913 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    914 	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
    915 	if (dolock)
    916 		VOP_UNLOCK(vp);
    917 
    918 	/*
    919 	 * XXX
    920 	 * perhaps the size of the underlying object has changed under
    921 	 * our feet.  take advantage of the offset information present
    922 	 * in the uio structure.
    923 	 */
    924 	if (error == 0) {
    925 		struct union_node *un = VTOUNION(ap->a_vp);
    926 		off_t cur = ap->a_uio->uio_offset;
    927 		off_t usz = VNOVAL, lsz = VNOVAL;
    928 
    929 		mutex_enter(&un->un_lock);
    930 		if (vp == un->un_uppervp) {
    931 			if (cur > un->un_uppersz)
    932 				usz = cur;
    933 		} else {
    934 			if (cur > un->un_lowersz)
    935 				lsz = cur;
    936 		}
    937 
    938 		if (usz != VNOVAL || lsz != VNOVAL)
    939 			union_newsize(ap->a_vp, usz, lsz);
    940 		else
    941 			mutex_exit(&un->un_lock);
    942 	}
    943 
    944 	return (error);
    945 }
    946 
    947 int
    948 union_write(void *v)
    949 {
    950 	struct vop_read_args /* {
    951 		struct vnode *a_vp;
    952 		struct uio *a_uio;
    953 		int  a_ioflag;
    954 		kauth_cred_t a_cred;
    955 	} */ *ap = v;
    956 	int error;
    957 	struct vnode *vp;
    958 	struct union_node *un = VTOUNION(ap->a_vp);
    959 
    960 	vp = UPPERVP(ap->a_vp);
    961 	if (vp == NULLVP) {
    962 		vp = LOWERVP(ap->a_vp);
    963 		if (NODE_IS_SPECIAL(vp)) {
    964 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    965 			error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag,
    966 			    ap->a_cred);
    967 			VOP_UNLOCK(vp);
    968 			return error;
    969 		}
    970 		panic("union: missing upper layer in write");
    971 	}
    972 
    973 	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
    974 
    975 	/*
    976 	 * the size of the underlying object may be changed by the
    977 	 * write.
    978 	 */
    979 	if (error == 0) {
    980 		off_t cur = ap->a_uio->uio_offset;
    981 
    982 		mutex_enter(&un->un_lock);
    983 		if (cur > un->un_uppersz)
    984 			union_newsize(ap->a_vp, cur, VNOVAL);
    985 		else
    986 			mutex_exit(&un->un_lock);
    987 	}
    988 
    989 	return (error);
    990 }
    991 
    992 int
    993 union_ioctl(void *v)
    994 {
    995 	struct vop_ioctl_args /* {
    996 		struct vnode *a_vp;
    997 		int  a_command;
    998 		void *a_data;
    999 		int  a_fflag;
   1000 		kauth_cred_t a_cred;
   1001 	} */ *ap = v;
   1002 	struct vnode *ovp = OTHERVP(ap->a_vp);
   1003 
   1004 	ap->a_vp = ovp;
   1005 	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
   1006 }
   1007 
   1008 int
   1009 union_poll(void *v)
   1010 {
   1011 	struct vop_poll_args /* {
   1012 		struct vnode *a_vp;
   1013 		int a_events;
   1014 	} */ *ap = v;
   1015 	struct vnode *ovp = OTHERVP(ap->a_vp);
   1016 
   1017 	ap->a_vp = ovp;
   1018 	return (VCALL(ovp, VOFFSET(vop_poll), ap));
   1019 }
   1020 
   1021 int
   1022 union_revoke(void *v)
   1023 {
   1024 	struct vop_revoke_args /* {
   1025 		struct vnode *a_vp;
   1026 		int a_flags;
   1027 		struct proc *a_p;
   1028 	} */ *ap = v;
   1029 	struct vnode *vp = ap->a_vp;
   1030 
   1031 	if (UPPERVP(vp))
   1032 		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
   1033 	if (LOWERVP(vp))
   1034 		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
   1035 	vgone(vp);	/* XXXAD?? */
   1036 	return (0);
   1037 }
   1038 
   1039 int
   1040 union_mmap(void *v)
   1041 {
   1042 	struct vop_mmap_args /* {
   1043 		struct vnode *a_vp;
   1044 		vm_prot_t a_prot;
   1045 		kauth_cred_t a_cred;
   1046 	} */ *ap = v;
   1047 	struct vnode *ovp = OTHERVP(ap->a_vp);
   1048 
   1049 	ap->a_vp = ovp;
   1050 	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
   1051 }
   1052 
   1053 int
   1054 union_fsync(void *v)
   1055 {
   1056 	struct vop_fsync_args /* {
   1057 		struct vnode *a_vp;
   1058 		kauth_cred_t a_cred;
   1059 		int  a_flags;
   1060 		off_t offhi;
   1061 		off_t offlo;
   1062 	} */ *ap = v;
   1063 	int error = 0;
   1064 	struct vnode *targetvp;
   1065 
   1066 	/*
   1067 	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
   1068 	 * bother syncing the underlying vnodes, since (a) they'll be
   1069 	 * fsync'ed when reclaimed and (b) we could deadlock if
   1070 	 * they're locked; otherwise, pass it through to the
   1071 	 * underlying layer.
   1072 	 */
   1073 	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
   1074 		error = spec_fsync(v);
   1075 		if (error)
   1076 			return error;
   1077 	}
   1078 
   1079 	if (ap->a_flags & FSYNC_RECLAIM)
   1080 		return 0;
   1081 
   1082 	targetvp = OTHERVP(ap->a_vp);
   1083 	if (targetvp != NULLVP) {
   1084 		int dolock = (targetvp == LOWERVP(ap->a_vp));
   1085 
   1086 		if (dolock)
   1087 			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
   1088 		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
   1089 			    ap->a_offlo, ap->a_offhi);
   1090 		if (dolock)
   1091 			VOP_UNLOCK(targetvp);
   1092 	}
   1093 
   1094 	return (error);
   1095 }
   1096 
   1097 int
   1098 union_seek(void *v)
   1099 {
   1100 	struct vop_seek_args /* {
   1101 		struct vnode *a_vp;
   1102 		off_t  a_oldoff;
   1103 		off_t  a_newoff;
   1104 		kauth_cred_t a_cred;
   1105 	} */ *ap = v;
   1106 	struct vnode *ovp = OTHERVP(ap->a_vp);
   1107 
   1108 	ap->a_vp = ovp;
   1109 	return (VCALL(ovp, VOFFSET(vop_seek), ap));
   1110 }
   1111 
   1112 int
   1113 union_remove(void *v)
   1114 {
   1115 	struct vop_remove_args /* {
   1116 		struct vnode *a_dvp;
   1117 		struct vnode *a_vp;
   1118 		struct componentname *a_cnp;
   1119 	} */ *ap = v;
   1120 	int error;
   1121 	struct union_node *dun = VTOUNION(ap->a_dvp);
   1122 	struct union_node *un = VTOUNION(ap->a_vp);
   1123 	struct componentname *cnp = ap->a_cnp;
   1124 
   1125 	if (dun->un_uppervp == NULLVP)
   1126 		panic("union remove: null upper vnode");
   1127 
   1128 	if (un->un_uppervp != NULLVP) {
   1129 		struct vnode *dvp = dun->un_uppervp;
   1130 		struct vnode *vp = un->un_uppervp;
   1131 
   1132 		vref(dvp);
   1133 		dun->un_flags |= UN_KLOCK;
   1134 		vput(ap->a_dvp);
   1135 		vref(vp);
   1136 		un->un_flags |= UN_KLOCK;
   1137 		vput(ap->a_vp);
   1138 
   1139 		if (union_dowhiteout(un, cnp->cn_cred))
   1140 			cnp->cn_flags |= DOWHITEOUT;
   1141 		error = VOP_REMOVE(dvp, vp, cnp);
   1142 		if (!error)
   1143 			union_removed_upper(un);
   1144 	} else {
   1145 		error = union_mkwhiteout(
   1146 			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
   1147 			dun->un_uppervp, ap->a_cnp, un);
   1148 		vput(ap->a_dvp);
   1149 		vput(ap->a_vp);
   1150 	}
   1151 
   1152 	return (error);
   1153 }
   1154 
   1155 int
   1156 union_link(void *v)
   1157 {
   1158 	struct vop_link_args /* {
   1159 		struct vnode *a_dvp;
   1160 		struct vnode *a_vp;
   1161 		struct componentname *a_cnp;
   1162 	} */ *ap = v;
   1163 	int error = 0;
   1164 	struct componentname *cnp = ap->a_cnp;
   1165 	struct union_node *dun;
   1166 	struct vnode *vp;
   1167 	struct vnode *dvp;
   1168 
   1169 	dun = VTOUNION(ap->a_dvp);
   1170 
   1171 	KASSERT((ap->a_cnp->cn_flags & LOCKPARENT) != 0);
   1172 
   1173 	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
   1174 		vp = ap->a_vp;
   1175 	} else {
   1176 		struct union_node *un = VTOUNION(ap->a_vp);
   1177 		if (un->un_uppervp == NULLVP) {
   1178 			const bool droplock = (dun->un_uppervp == un->un_dirvp);
   1179 
   1180 			/*
   1181 			 * Needs to be copied before we can link it.
   1182 			 */
   1183 			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
   1184 			if (droplock)
   1185 				VOP_UNLOCK(dun->un_uppervp);
   1186 			error = union_copyup(un, 1, cnp->cn_cred, curlwp);
   1187 			if (droplock) {
   1188 				vn_lock(dun->un_uppervp,
   1189 				    LK_EXCLUSIVE | LK_RETRY);
   1190 				/*
   1191 				 * During copyup, we dropped the lock on the
   1192 				 * dir and invalidated any saved namei lookup
   1193 				 * state for the directory we'll be entering
   1194 				 * the link in.  We need to re-run the lookup
   1195 				 * in that directory to reset any state needed
   1196 				 * for VOP_LINK.
   1197 				 * Call relookup on the union-layer to reset
   1198 				 * the state.
   1199 				 */
   1200 				vp  = NULLVP;
   1201 				if (dun->un_uppervp == NULLVP)
   1202 					 panic("union: null upperdvp?");
   1203 				error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
   1204 				if (error) {
   1205 					VOP_UNLOCK(ap->a_vp);
   1206 					return EROFS;	/* ? */
   1207 				}
   1208 				if (vp != NULLVP) {
   1209 					/*
   1210 					 * The name we want to create has
   1211 					 * mysteriously appeared (a race?)
   1212 					 */
   1213 					error = EEXIST;
   1214 					VOP_UNLOCK(ap->a_vp);
   1215 					vput(ap->a_dvp);
   1216 					vput(vp);
   1217 					return (error);
   1218 				}
   1219 			}
   1220 			VOP_UNLOCK(ap->a_vp);
   1221 		}
   1222 		vp = un->un_uppervp;
   1223 	}
   1224 
   1225 	dvp = dun->un_uppervp;
   1226 	if (dvp == NULLVP)
   1227 		error = EROFS;
   1228 
   1229 	if (error) {
   1230 		vput(ap->a_dvp);
   1231 		return (error);
   1232 	}
   1233 
   1234 	vref(dvp);
   1235 	dun->un_flags |= UN_KLOCK;
   1236 	vput(ap->a_dvp);
   1237 
   1238 	return (VOP_LINK(dvp, vp, cnp));
   1239 }
   1240 
   1241 int
   1242 union_rename(void *v)
   1243 {
   1244 	struct vop_rename_args  /* {
   1245 		struct vnode *a_fdvp;
   1246 		struct vnode *a_fvp;
   1247 		struct componentname *a_fcnp;
   1248 		struct vnode *a_tdvp;
   1249 		struct vnode *a_tvp;
   1250 		struct componentname *a_tcnp;
   1251 	} */ *ap = v;
   1252 	int error;
   1253 
   1254 	struct vnode *fdvp = ap->a_fdvp;
   1255 	struct vnode *fvp = ap->a_fvp;
   1256 	struct vnode *tdvp = ap->a_tdvp;
   1257 	struct vnode *tvp = ap->a_tvp;
   1258 
   1259 	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
   1260 		struct union_node *un = VTOUNION(fdvp);
   1261 		if (un->un_uppervp == NULLVP) {
   1262 			/*
   1263 			 * this should never happen in normal
   1264 			 * operation but might if there was
   1265 			 * a problem creating the top-level shadow
   1266 			 * directory.
   1267 			 */
   1268 			error = EXDEV;
   1269 			goto bad;
   1270 		}
   1271 
   1272 		fdvp = un->un_uppervp;
   1273 		vref(fdvp);
   1274 	}
   1275 
   1276 	if (fvp->v_op == union_vnodeop_p) {	/* always true */
   1277 		struct union_node *un = VTOUNION(fvp);
   1278 		if (un->un_uppervp == NULLVP) {
   1279 			/* XXX: should do a copyup */
   1280 			error = EXDEV;
   1281 			goto bad;
   1282 		}
   1283 
   1284 		if (un->un_lowervp != NULLVP)
   1285 			ap->a_fcnp->cn_flags |= DOWHITEOUT;
   1286 
   1287 		fvp = un->un_uppervp;
   1288 		vref(fvp);
   1289 	}
   1290 
   1291 	if (tdvp->v_op == union_vnodeop_p) {
   1292 		struct union_node *un = VTOUNION(tdvp);
   1293 		if (un->un_uppervp == NULLVP) {
   1294 			/*
   1295 			 * this should never happen in normal
   1296 			 * operation but might if there was
   1297 			 * a problem creating the top-level shadow
   1298 			 * directory.
   1299 			 */
   1300 			error = EXDEV;
   1301 			goto bad;
   1302 		}
   1303 
   1304 		tdvp = un->un_uppervp;
   1305 		vref(tdvp);
   1306 		un->un_flags |= UN_KLOCK;
   1307 		vput(ap->a_tdvp);
   1308 	}
   1309 
   1310 	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
   1311 		struct union_node *un = VTOUNION(tvp);
   1312 
   1313 		tvp = un->un_uppervp;
   1314 		if (tvp != NULLVP) {
   1315 			vref(tvp);
   1316 			un->un_flags |= UN_KLOCK;
   1317 		}
   1318 		vput(ap->a_tvp);
   1319 	}
   1320 
   1321 	error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
   1322 	goto out;
   1323 
   1324 bad:
   1325 	vput(tdvp);
   1326 	if (tvp != NULLVP)
   1327 		vput(tvp);
   1328 	vrele(fdvp);
   1329 	vrele(fvp);
   1330 
   1331 out:
   1332 	if (fdvp != ap->a_fdvp) {
   1333 		vrele(ap->a_fdvp);
   1334 	}
   1335 	if (fvp != ap->a_fvp) {
   1336 		vrele(ap->a_fvp);
   1337 	}
   1338 	return (error);
   1339 }
   1340 
   1341 int
   1342 union_mkdir(void *v)
   1343 {
   1344 	struct vop_mkdir_v2_args /* {
   1345 		struct vnode *a_dvp;
   1346 		struct vnode **a_vpp;
   1347 		struct componentname *a_cnp;
   1348 		struct vattr *a_vap;
   1349 	} */ *ap = v;
   1350 	struct union_node *un = VTOUNION(ap->a_dvp);
   1351 	struct vnode *dvp = un->un_uppervp;
   1352 	struct componentname *cnp = ap->a_cnp;
   1353 
   1354 	if (dvp != NULLVP) {
   1355 		int error;
   1356 		struct vnode *vp;
   1357 
   1358 		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
   1359 		if (error) {
   1360 			vrele(ap->a_dvp);
   1361 			return (error);
   1362 		}
   1363 
   1364 		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
   1365 				NULLVP, cnp, vp, NULLVP, 1);
   1366 		if (error)
   1367 			vput(vp);
   1368 		return (error);
   1369 	}
   1370 
   1371 	return (EROFS);
   1372 }
   1373 
   1374 int
   1375 union_rmdir(void *v)
   1376 {
   1377 	struct vop_rmdir_args /* {
   1378 		struct vnode *a_dvp;
   1379 		struct vnode *a_vp;
   1380 		struct componentname *a_cnp;
   1381 	} */ *ap = v;
   1382 	int error;
   1383 	struct union_node *dun = VTOUNION(ap->a_dvp);
   1384 	struct union_node *un = VTOUNION(ap->a_vp);
   1385 	struct componentname *cnp = ap->a_cnp;
   1386 
   1387 	if (dun->un_uppervp == NULLVP)
   1388 		panic("union rmdir: null upper vnode");
   1389 
   1390 	error = union_check_rmdir(un, cnp->cn_cred);
   1391 	if (error) {
   1392 		vput(ap->a_dvp);
   1393 		vput(ap->a_vp);
   1394 		return error;
   1395 	}
   1396 
   1397 	if (un->un_uppervp != NULLVP) {
   1398 		struct vnode *dvp = dun->un_uppervp;
   1399 		struct vnode *vp = un->un_uppervp;
   1400 
   1401 		vref(dvp);
   1402 		dun->un_flags |= UN_KLOCK;
   1403 		vput(ap->a_dvp);
   1404 		vref(vp);
   1405 		un->un_flags |= UN_KLOCK;
   1406 		vput(ap->a_vp);
   1407 
   1408 		if (union_dowhiteout(un, cnp->cn_cred))
   1409 			cnp->cn_flags |= DOWHITEOUT;
   1410 		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
   1411 		if (!error)
   1412 			union_removed_upper(un);
   1413 	} else {
   1414 		error = union_mkwhiteout(
   1415 			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
   1416 			dun->un_uppervp, ap->a_cnp, un);
   1417 		vput(ap->a_dvp);
   1418 		vput(ap->a_vp);
   1419 	}
   1420 
   1421 	return (error);
   1422 }
   1423 
   1424 int
   1425 union_symlink(void *v)
   1426 {
   1427 	struct vop_symlink_v2_args /* {
   1428 		struct vnode *a_dvp;
   1429 		struct vnode **a_vpp;
   1430 		struct componentname *a_cnp;
   1431 		struct vattr *a_vap;
   1432 		char *a_target;
   1433 	} */ *ap = v;
   1434 	struct union_node *un = VTOUNION(ap->a_dvp);
   1435 	struct vnode *dvp = un->un_uppervp;
   1436 	struct componentname *cnp = ap->a_cnp;
   1437 
   1438 	if (dvp != NULLVP) {
   1439 		int error;
   1440 
   1441 		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
   1442 				    ap->a_target);
   1443 		return (error);
   1444 	}
   1445 
   1446 	return (EROFS);
   1447 }
   1448 
   1449 /*
   1450  * union_readdir works in concert with getdirentries and
   1451  * readdir(3) to provide a list of entries in the unioned
   1452  * directories.  getdirentries is responsible for walking
   1453  * down the union stack.  readdir(3) is responsible for
   1454  * eliminating duplicate names from the returned data stream.
   1455  */
   1456 int
   1457 union_readdir(void *v)
   1458 {
   1459 	struct vop_readdir_args /* {
   1460 		struct vnodeop_desc *a_desc;
   1461 		struct vnode *a_vp;
   1462 		struct uio *a_uio;
   1463 		kauth_cred_t a_cred;
   1464 		int *a_eofflag;
   1465 		u_long *a_cookies;
   1466 		int a_ncookies;
   1467 	} */ *ap = v;
   1468 	struct union_node *un = VTOUNION(ap->a_vp);
   1469 	struct vnode *uvp = un->un_uppervp;
   1470 
   1471 	if (uvp == NULLVP)
   1472 		return (0);
   1473 
   1474 	ap->a_vp = uvp;
   1475 	return (VCALL(uvp, VOFFSET(vop_readdir), ap));
   1476 }
   1477 
   1478 int
   1479 union_readlink(void *v)
   1480 {
   1481 	struct vop_readlink_args /* {
   1482 		struct vnode *a_vp;
   1483 		struct uio *a_uio;
   1484 		kauth_cred_t a_cred;
   1485 	} */ *ap = v;
   1486 	int error;
   1487 	struct vnode *vp = OTHERVP(ap->a_vp);
   1488 	int dolock = (vp == LOWERVP(ap->a_vp));
   1489 
   1490 	if (dolock)
   1491 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   1492 	ap->a_vp = vp;
   1493 	error = VCALL(vp, VOFFSET(vop_readlink), ap);
   1494 	if (dolock)
   1495 		VOP_UNLOCK(vp);
   1496 
   1497 	return (error);
   1498 }
   1499 
   1500 int
   1501 union_abortop(void *v)
   1502 {
   1503 	struct vop_abortop_args /* {
   1504 		struct vnode *a_dvp;
   1505 		struct componentname *a_cnp;
   1506 	} */ *ap = v;
   1507 
   1508 	KASSERT(UPPERVP(ap->a_dvp) != NULL);
   1509 
   1510 	ap->a_dvp = UPPERVP(ap->a_dvp);
   1511 	return VCALL(ap->a_dvp, VOFFSET(vop_abortop), ap);
   1512 }
   1513 
   1514 int
   1515 union_inactive(void *v)
   1516 {
   1517 	struct vop_inactive_args /* {
   1518 		const struct vnodeop_desc *a_desc;
   1519 		struct vnode *a_vp;
   1520 		bool *a_recycle;
   1521 	} */ *ap = v;
   1522 	struct vnode *vp = ap->a_vp;
   1523 	struct union_node *un = VTOUNION(vp);
   1524 	struct vnode **vpp;
   1525 
   1526 	/*
   1527 	 * Do nothing (and _don't_ bypass).
   1528 	 * Wait to vrele lowervp until reclaim,
   1529 	 * so that until then our union_node is in the
   1530 	 * cache and reusable.
   1531 	 *
   1532 	 * NEEDSWORK: Someday, consider inactive'ing
   1533 	 * the lowervp and then trying to reactivate it
   1534 	 * with capabilities (v_id)
   1535 	 * like they do in the name lookup cache code.
   1536 	 * That's too much work for now.
   1537 	 */
   1538 
   1539 	if (un->un_dircache != 0) {
   1540 		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
   1541 			vrele(*vpp);
   1542 		free(un->un_dircache, M_TEMP);
   1543 		un->un_dircache = 0;
   1544 	}
   1545 
   1546 	*ap->a_recycle = ((un->un_cflags & UN_CACHED) == 0);
   1547 	VOP_UNLOCK(vp);
   1548 
   1549 	return (0);
   1550 }
   1551 
   1552 int
   1553 union_reclaim(void *v)
   1554 {
   1555 	struct vop_reclaim_args /* {
   1556 		struct vnode *a_vp;
   1557 	} */ *ap = v;
   1558 
   1559 	union_freevp(ap->a_vp);
   1560 
   1561 	return (0);
   1562 }
   1563 
   1564 int
   1565 union_lock(void *v)
   1566 {
   1567 	struct vop_lock_args /* {
   1568 		struct vnode *a_vp;
   1569 		int a_flags;
   1570 	} */ *ap = v;
   1571 	struct vnode *vp;
   1572 	struct union_node *un;
   1573 	int error;
   1574 
   1575 	un = VTOUNION(ap->a_vp);
   1576 	mutex_enter(&un->un_lock);
   1577 	for (;;) {
   1578 		vp = LOCKVP(ap->a_vp);
   1579 		mutex_exit(&un->un_lock);
   1580 		if (vp == ap->a_vp)
   1581 			error = genfs_lock(ap);
   1582 		else
   1583 			error = VOP_LOCK(vp, ap->a_flags);
   1584 		if (error != 0)
   1585 			return error;
   1586 		mutex_enter(&un->un_lock);
   1587 		if (vp == LOCKVP(ap->a_vp))
   1588 			break;
   1589 		if (vp == ap->a_vp)
   1590 			genfs_unlock(ap);
   1591 		else
   1592 			VOP_UNLOCK(vp);
   1593 	}
   1594 	KASSERT((un->un_flags & UN_KLOCK) == 0);
   1595 	mutex_exit(&un->un_lock);
   1596 
   1597 	return error;
   1598 }
   1599 
   1600 /*
   1601  * When operations want to vput() a union node yet retain a lock on
   1602  * the upper vnode (say, to do some further operations like link(),
   1603  * mkdir(), ...), they set UN_KLOCK on the union node, then call
   1604  * vput() which calls VOP_UNLOCK() and comes here.  union_unlock()
   1605  * unlocks the union node (leaving the upper vnode alone), clears the
   1606  * KLOCK flag, and then returns to vput().  The caller then does whatever
   1607  * is left to do with the upper vnode, and ensures that it gets unlocked.
   1608  *
   1609  * If UN_KLOCK isn't set, then the upper vnode is unlocked here.
   1610  */
   1611 int
   1612 union_unlock(void *v)
   1613 {
   1614 	struct vop_unlock_args /* {
   1615 		struct vnode *a_vp;
   1616 		int a_flags;
   1617 	} */ *ap = v;
   1618 	struct vnode *vp;
   1619 	struct union_node *un;
   1620 
   1621 	un = VTOUNION(ap->a_vp);
   1622 	vp = LOCKVP(ap->a_vp);
   1623 	if ((un->un_flags & UN_KLOCK) == UN_KLOCK) {
   1624 		KASSERT(vp != ap->a_vp);
   1625 		un->un_flags &= ~UN_KLOCK;
   1626 		return 0;
   1627 	}
   1628 	if (vp == ap->a_vp)
   1629 		genfs_unlock(ap);
   1630 	else
   1631 		VOP_UNLOCK(vp);
   1632 
   1633 	return 0;
   1634 }
   1635 
   1636 int
   1637 union_bmap(void *v)
   1638 {
   1639 	struct vop_bmap_args /* {
   1640 		struct vnode *a_vp;
   1641 		daddr_t  a_bn;
   1642 		struct vnode **a_vpp;
   1643 		daddr_t *a_bnp;
   1644 		int *a_runp;
   1645 	} */ *ap = v;
   1646 	int error;
   1647 	struct vnode *vp = OTHERVP(ap->a_vp);
   1648 	int dolock = (vp == LOWERVP(ap->a_vp));
   1649 
   1650 	if (dolock)
   1651 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   1652 	ap->a_vp = vp;
   1653 	error = VCALL(vp, VOFFSET(vop_bmap), ap);
   1654 	if (dolock)
   1655 		VOP_UNLOCK(vp);
   1656 
   1657 	return (error);
   1658 }
   1659 
   1660 int
   1661 union_print(void *v)
   1662 {
   1663 	struct vop_print_args /* {
   1664 		struct vnode *a_vp;
   1665 	} */ *ap = v;
   1666 	struct vnode *vp = ap->a_vp;
   1667 
   1668 	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
   1669 			vp, UPPERVP(vp), LOWERVP(vp));
   1670 	if (UPPERVP(vp) != NULLVP)
   1671 		vprint("union: upper", UPPERVP(vp));
   1672 	if (LOWERVP(vp) != NULLVP)
   1673 		vprint("union: lower", LOWERVP(vp));
   1674 	if (VTOUNION(vp)->un_dircache) {
   1675 		struct vnode **vpp;
   1676 		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
   1677 			vprint("dircache:", *vpp);
   1678 	}
   1679 
   1680 	return (0);
   1681 }
   1682 
   1683 int
   1684 union_islocked(void *v)
   1685 {
   1686 	struct vop_islocked_args /* {
   1687 		struct vnode *a_vp;
   1688 	} */ *ap = v;
   1689 	struct vnode *vp;
   1690 	struct union_node *un;
   1691 
   1692 	un = VTOUNION(ap->a_vp);
   1693 	mutex_enter(&un->un_lock);
   1694 	vp = LOCKVP(ap->a_vp);
   1695 	mutex_exit(&un->un_lock);
   1696 
   1697 	if (vp == ap->a_vp)
   1698 		return genfs_islocked(ap);
   1699 	else
   1700 		return VOP_ISLOCKED(vp);
   1701 }
   1702 
   1703 int
   1704 union_pathconf(void *v)
   1705 {
   1706 	struct vop_pathconf_args /* {
   1707 		struct vnode *a_vp;
   1708 		int a_name;
   1709 		int *a_retval;
   1710 	} */ *ap = v;
   1711 	int error;
   1712 	struct vnode *vp = OTHERVP(ap->a_vp);
   1713 	int dolock = (vp == LOWERVP(ap->a_vp));
   1714 
   1715 	if (dolock)
   1716 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   1717 	ap->a_vp = vp;
   1718 	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
   1719 	if (dolock)
   1720 		VOP_UNLOCK(vp);
   1721 
   1722 	return (error);
   1723 }
   1724 
   1725 int
   1726 union_advlock(void *v)
   1727 {
   1728 	struct vop_advlock_args /* {
   1729 		struct vnode *a_vp;
   1730 		void *a_id;
   1731 		int  a_op;
   1732 		struct flock *a_fl;
   1733 		int  a_flags;
   1734 	} */ *ap = v;
   1735 	struct vnode *ovp = OTHERVP(ap->a_vp);
   1736 
   1737 	ap->a_vp = ovp;
   1738 	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
   1739 }
   1740 
   1741 int
   1742 union_strategy(void *v)
   1743 {
   1744 	struct vop_strategy_args /* {
   1745 		struct vnode *a_vp;
   1746 		struct buf *a_bp;
   1747 	} */ *ap = v;
   1748 	struct vnode *ovp = OTHERVP(ap->a_vp);
   1749 	struct buf *bp = ap->a_bp;
   1750 
   1751 	KASSERT(ovp != NULLVP);
   1752 	if (!NODE_IS_SPECIAL(ovp))
   1753 		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
   1754 
   1755 	return (VOP_STRATEGY(ovp, bp));
   1756 }
   1757 
   1758 int
   1759 union_bwrite(void *v)
   1760 {
   1761 	struct vop_bwrite_args /* {
   1762 		struct vnode *a_vp;
   1763 		struct buf *a_bp;
   1764 	} */ *ap = v;
   1765 	struct vnode *ovp = OTHERVP(ap->a_vp);
   1766 	struct buf *bp = ap->a_bp;
   1767 
   1768 	KASSERT(ovp != NULLVP);
   1769 	if (!NODE_IS_SPECIAL(ovp))
   1770 		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
   1771 
   1772 	return (VOP_BWRITE(ovp, bp));
   1773 }
   1774 
   1775 int
   1776 union_getpages(void *v)
   1777 {
   1778 	struct vop_getpages_args /* {
   1779 		struct vnode *a_vp;
   1780 		voff_t a_offset;
   1781 		struct vm_page **a_m;
   1782 		int *a_count;
   1783 		int a_centeridx;
   1784 		vm_prot_t a_access_type;
   1785 		int a_advice;
   1786 		int a_flags;
   1787 	} */ *ap = v;
   1788 	struct vnode *vp = ap->a_vp;
   1789 
   1790 	KASSERT(mutex_owned(vp->v_interlock));
   1791 
   1792 	if (ap->a_flags & PGO_LOCKED) {
   1793 		return EBUSY;
   1794 	}
   1795 	ap->a_vp = OTHERVP(vp);
   1796 	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
   1797 
   1798 	/* Just pass the request on to the underlying layer. */
   1799 	return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
   1800 }
   1801 
   1802 int
   1803 union_putpages(void *v)
   1804 {
   1805 	struct vop_putpages_args /* {
   1806 		struct vnode *a_vp;
   1807 		voff_t a_offlo;
   1808 		voff_t a_offhi;
   1809 		int a_flags;
   1810 	} */ *ap = v;
   1811 	struct vnode *vp = ap->a_vp;
   1812 
   1813 	KASSERT(mutex_owned(vp->v_interlock));
   1814 
   1815 	ap->a_vp = OTHERVP(vp);
   1816 	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
   1817 
   1818 	if (ap->a_flags & PGO_RECLAIM) {
   1819 		mutex_exit(vp->v_interlock);
   1820 		return 0;
   1821 	}
   1822 
   1823 	/* Just pass the request on to the underlying layer. */
   1824 	return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
   1825 }
   1826 
   1827 int
   1828 union_kqfilter(void *v)
   1829 {
   1830 	struct vop_kqfilter_args /* {
   1831 		struct vnode	*a_vp;
   1832 		struct knote	*a_kn;
   1833 	} */ *ap = v;
   1834 	int error;
   1835 
   1836 	/*
   1837 	 * We watch either the upper layer file (if it already exists),
   1838 	 * or the lower layer one. If there is lower layer file only
   1839 	 * at this moment, we will keep watching that lower layer file
   1840 	 * even if upper layer file would be created later on.
   1841 	 */
   1842 	if (UPPERVP(ap->a_vp))
   1843 		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
   1844 	else if (LOWERVP(ap->a_vp))
   1845 		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
   1846 	else {
   1847 		/* panic? */
   1848 		error = EOPNOTSUPP;
   1849 	}
   1850 
   1851 	return (error);
   1852 }
   1853