Home | History | Annotate | Line # | Download | only in union
union_subr.c revision 1.1
      1 /*	$NetBSD: union_subr.c,v 1.1 2003/03/16 08:26:52 jdolecek Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1994 Jan-Simon Pendry
      5  * Copyright (c) 1994
      6  *	The Regents of the University of California.  All rights reserved.
      7  *
      8  * This code is derived from software contributed to Berkeley by
      9  * Jan-Simon Pendry.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *	This product includes software developed by the University of
     22  *	California, Berkeley and its contributors.
     23  * 4. Neither the name of the University nor the names of its contributors
     24  *    may be used to endorse or promote products derived from this software
     25  *    without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     37  * SUCH DAMAGE.
     38  *
     39  *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
     40  */
     41 
     42 #include <sys/cdefs.h>
     43 __KERNEL_RCSID(0, "$NetBSD: union_subr.c,v 1.1 2003/03/16 08:26:52 jdolecek Exp $");
     44 
     45 #include <sys/param.h>
     46 #include <sys/systm.h>
     47 #include <sys/proc.h>
     48 #include <sys/time.h>
     49 #include <sys/kernel.h>
     50 #include <sys/vnode.h>
     51 #include <sys/namei.h>
     52 #include <sys/malloc.h>
     53 #include <sys/file.h>
     54 #include <sys/filedesc.h>
     55 #include <sys/queue.h>
     56 #include <sys/mount.h>
     57 #include <sys/stat.h>
     58 
     59 #include <uvm/uvm_extern.h>
     60 
     61 #include <fs/union/union.h>
     62 
     63 #ifdef DIAGNOSTIC
     64 #include <sys/proc.h>
     65 #endif
     66 
     67 /* must be power of two, otherwise change UNION_HASH() */
     68 #define NHASH 32
     69 
     70 /* unsigned int ... */
     71 #define UNION_HASH(u, l) \
     72 	(((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
     73 
     74 static LIST_HEAD(unhead, union_node) unhead[NHASH];
     75 static int unvplock[NHASH];
     76 
     77 static int union_list_lock __P((int));
     78 static void union_list_unlock __P((int));
     79 void union_updatevp __P((struct union_node *, struct vnode *, struct vnode *));
     80 static int union_relookup __P((struct union_mount *, struct vnode *,
     81 			       struct vnode **, struct componentname *,
     82 			       struct componentname *, const char *, int));
     83 int union_vn_close __P((struct vnode *, int, struct ucred *, struct proc *));
     84 static void union_dircache_r __P((struct vnode *, struct vnode ***, int *));
     85 struct vnode *union_dircache __P((struct vnode *, struct proc *));
     86 
     87 void
     88 union_init()
     89 {
     90 	int i;
     91 
     92 	for (i = 0; i < NHASH; i++)
     93 		LIST_INIT(&unhead[i]);
     94 	memset((caddr_t) unvplock, 0, sizeof(unvplock));
     95 }
     96 
     97 /*
     98  * Free global unionfs resources.
     99  */
    100 void
    101 union_done()
    102 {
    103 	/* Nothing */
    104 }
    105 
    106 static int
    107 union_list_lock(ix)
    108 	int ix;
    109 {
    110 
    111 	if (unvplock[ix] & UN_LOCKED) {
    112 		unvplock[ix] |= UN_WANTED;
    113 		(void) tsleep(&unvplock[ix], PINOD, "unionlk", 0);
    114 		return (1);
    115 	}
    116 
    117 	unvplock[ix] |= UN_LOCKED;
    118 
    119 	return (0);
    120 }
    121 
    122 static void
    123 union_list_unlock(ix)
    124 	int ix;
    125 {
    126 
    127 	unvplock[ix] &= ~UN_LOCKED;
    128 
    129 	if (unvplock[ix] & UN_WANTED) {
    130 		unvplock[ix] &= ~UN_WANTED;
    131 		wakeup((caddr_t) &unvplock[ix]);
    132 	}
    133 }
    134 
    135 void
    136 union_updatevp(un, uppervp, lowervp)
    137 	struct union_node *un;
    138 	struct vnode *uppervp;
    139 	struct vnode *lowervp;
    140 {
    141 	int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
    142 	int nhash = UNION_HASH(uppervp, lowervp);
    143 	int docache = (lowervp != NULLVP || uppervp != NULLVP);
    144 	int lhash, uhash;
    145 
    146 	/*
    147 	 * Ensure locking is ordered from lower to higher
    148 	 * to avoid deadlocks.
    149 	 */
    150 	if (nhash < ohash) {
    151 		lhash = nhash;
    152 		uhash = ohash;
    153 	} else {
    154 		lhash = ohash;
    155 		uhash = nhash;
    156 	}
    157 
    158 	if (lhash != uhash)
    159 		while (union_list_lock(lhash))
    160 			continue;
    161 
    162 	while (union_list_lock(uhash))
    163 		continue;
    164 
    165 	if (ohash != nhash || !docache) {
    166 		if (un->un_flags & UN_CACHED) {
    167 			un->un_flags &= ~UN_CACHED;
    168 			LIST_REMOVE(un, un_cache);
    169 		}
    170 	}
    171 
    172 	if (ohash != nhash)
    173 		union_list_unlock(ohash);
    174 
    175 	if (un->un_lowervp != lowervp) {
    176 		if (un->un_lowervp) {
    177 			vrele(un->un_lowervp);
    178 			if (un->un_path) {
    179 				free(un->un_path, M_TEMP);
    180 				un->un_path = 0;
    181 			}
    182 			if (un->un_dirvp) {
    183 				vrele(un->un_dirvp);
    184 				un->un_dirvp = NULLVP;
    185 			}
    186 		}
    187 		un->un_lowervp = lowervp;
    188 		un->un_lowersz = VNOVAL;
    189 	}
    190 
    191 	if (un->un_uppervp != uppervp) {
    192 		if (un->un_uppervp)
    193 			vrele(un->un_uppervp);
    194 
    195 		un->un_uppervp = uppervp;
    196 		un->un_uppersz = VNOVAL;
    197 	}
    198 
    199 	if (docache && (ohash != nhash)) {
    200 		LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
    201 		un->un_flags |= UN_CACHED;
    202 	}
    203 
    204 	union_list_unlock(nhash);
    205 }
    206 
    207 void
    208 union_newlower(un, lowervp)
    209 	struct union_node *un;
    210 	struct vnode *lowervp;
    211 {
    212 
    213 	union_updatevp(un, un->un_uppervp, lowervp);
    214 }
    215 
    216 void
    217 union_newupper(un, uppervp)
    218 	struct union_node *un;
    219 	struct vnode *uppervp;
    220 {
    221 
    222 	union_updatevp(un, uppervp, un->un_lowervp);
    223 }
    224 
    225 /*
    226  * Keep track of size changes in the underlying vnodes.
    227  * If the size changes, then callback to the vm layer
    228  * giving priority to the upper layer size.
    229  */
    230 void
    231 union_newsize(vp, uppersz, lowersz)
    232 	struct vnode *vp;
    233 	off_t uppersz, lowersz;
    234 {
    235 	struct union_node *un;
    236 	off_t sz;
    237 
    238 	/* only interested in regular files */
    239 	if (vp->v_type != VREG)
    240 		return;
    241 
    242 	un = VTOUNION(vp);
    243 	sz = VNOVAL;
    244 
    245 	if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
    246 		un->un_uppersz = uppersz;
    247 		if (sz == VNOVAL)
    248 			sz = un->un_uppersz;
    249 	}
    250 
    251 	if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
    252 		un->un_lowersz = lowersz;
    253 		if (sz == VNOVAL)
    254 			sz = un->un_lowersz;
    255 	}
    256 
    257 	if (sz != VNOVAL) {
    258 #ifdef UNION_DIAGNOSTIC
    259 		printf("union: %s size now %qd\n",
    260 		    uppersz != VNOVAL ? "upper" : "lower", sz);
    261 #endif
    262 		uvm_vnp_setsize(vp, sz);
    263 	}
    264 }
    265 
    266 /*
    267  * allocate a union_node/vnode pair.  the vnode is
    268  * referenced and locked.  the new vnode is returned
    269  * via (vpp).  (mp) is the mountpoint of the union filesystem,
    270  * (dvp) is the parent directory where the upper layer object
    271  * should exist (but doesn't) and (cnp) is the componentname
    272  * information which is partially copied to allow the upper
    273  * layer object to be created at a later time.  (uppervp)
    274  * and (lowervp) reference the upper and lower layer objects
    275  * being mapped.  either, but not both, can be nil.
    276  * if supplied, (uppervp) is locked.
    277  * the reference is either maintained in the new union_node
    278  * object which is allocated, or they are vrele'd.
    279  *
    280  * all union_nodes are maintained on a singly-linked
    281  * list.  new nodes are only allocated when they cannot
    282  * be found on this list.  entries on the list are
    283  * removed when the vfs reclaim entry is called.
    284  *
    285  * a single lock is kept for the entire list.  this is
    286  * needed because the getnewvnode() function can block
    287  * waiting for a vnode to become free, in which case there
    288  * may be more than one process trying to get the same
    289  * vnode.  this lock is only taken if we are going to
    290  * call getnewvnode, since the kernel itself is single-threaded.
    291  *
    292  * if an entry is found on the list, then call vget() to
    293  * take a reference.  this is done because there may be
    294  * zero references to it and so it needs to removed from
    295  * the vnode free list.
    296  */
    297 int
    298 union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
    299 	struct vnode **vpp;
    300 	struct mount *mp;
    301 	struct vnode *undvp;		/* parent union vnode */
    302 	struct vnode *dvp;		/* may be null */
    303 	struct componentname *cnp;	/* may be null */
    304 	struct vnode *uppervp;		/* may be null */
    305 	struct vnode *lowervp;		/* may be null */
    306 	int docache;
    307 {
    308 	int error;
    309 	struct union_node *un = NULL;
    310 	struct vnode *xlowervp = NULLVP;
    311 	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
    312 	int hash = 0;
    313 	int vflag;
    314 	int try;
    315 
    316 	if (uppervp == NULLVP && lowervp == NULLVP)
    317 		panic("union: unidentifiable allocation");
    318 
    319 	if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
    320 		xlowervp = lowervp;
    321 		lowervp = NULLVP;
    322 	}
    323 
    324 	/* detect the root vnode (and aliases) */
    325 	vflag = VLAYER;
    326 	if ((uppervp == um->um_uppervp) &&
    327 	    ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
    328 		if (lowervp == NULLVP) {
    329 			lowervp = um->um_lowervp;
    330 			if (lowervp != NULLVP)
    331 				VREF(lowervp);
    332 		}
    333 		vflag = VROOT;
    334 	}
    335 
    336 loop:
    337 	if (!docache) {
    338 		un = 0;
    339 	} else for (try = 0; try < 3; try++) {
    340 		switch (try) {
    341 		case 0:
    342 			if (lowervp == NULLVP)
    343 				continue;
    344 			hash = UNION_HASH(uppervp, lowervp);
    345 			break;
    346 
    347 		case 1:
    348 			if (uppervp == NULLVP)
    349 				continue;
    350 			hash = UNION_HASH(uppervp, NULLVP);
    351 			break;
    352 
    353 		case 2:
    354 			if (lowervp == NULLVP)
    355 				continue;
    356 			hash = UNION_HASH(NULLVP, lowervp);
    357 			break;
    358 		}
    359 
    360 		while (union_list_lock(hash))
    361 			continue;
    362 
    363 		for (un = unhead[hash].lh_first; un != 0;
    364 					un = un->un_cache.le_next) {
    365 			if ((un->un_lowervp == lowervp ||
    366 			     un->un_lowervp == NULLVP) &&
    367 			    (un->un_uppervp == uppervp ||
    368 			     un->un_uppervp == NULLVP) &&
    369 			    (UNIONTOV(un)->v_mount == mp)) {
    370 				if (vget(UNIONTOV(un), 0)) {
    371 					union_list_unlock(hash);
    372 					goto loop;
    373 				}
    374 				break;
    375 			}
    376 		}
    377 
    378 		union_list_unlock(hash);
    379 
    380 		if (un)
    381 			break;
    382 	}
    383 
    384 	if (un) {
    385 		/*
    386 		 * Obtain a lock on the union_node.
    387 		 * uppervp is locked, though un->un_uppervp
    388 		 * may not be.  this doesn't break the locking
    389 		 * hierarchy since in the case that un->un_uppervp
    390 		 * is not yet locked it will be vrele'd and replaced
    391 		 * with uppervp.
    392 		 */
    393 
    394 		if ((dvp != NULLVP) && (uppervp == dvp)) {
    395 			/*
    396 			 * Access ``.'', so (un) will already
    397 			 * be locked.  Since this process has
    398 			 * the lock on (uppervp) no other
    399 			 * process can hold the lock on (un).
    400 			 */
    401 #ifdef DIAGNOSTIC
    402 			if ((un->un_flags & UN_LOCKED) == 0)
    403 				panic("union: . not locked");
    404 			else if (curproc && un->un_pid != curproc->p_pid &&
    405 				    un->un_pid > -1 && curproc->p_pid > -1)
    406 				panic("union: allocvp not lock owner");
    407 #endif
    408 		} else {
    409 			if (un->un_flags & UN_LOCKED) {
    410 				vrele(UNIONTOV(un));
    411 				un->un_flags |= UN_WANTED;
    412 				(void) tsleep(&un->un_flags, PINOD,
    413 				    "unionalloc", 0);
    414 				goto loop;
    415 			}
    416 			un->un_flags |= UN_LOCKED;
    417 
    418 #ifdef DIAGNOSTIC
    419 			if (curproc)
    420 				un->un_pid = curproc->p_pid;
    421 			else
    422 				un->un_pid = -1;
    423 #endif
    424 		}
    425 
    426 		/*
    427 		 * At this point, the union_node is locked,
    428 		 * un->un_uppervp may not be locked, and uppervp
    429 		 * is locked or nil.
    430 		 */
    431 
    432 		/*
    433 		 * Save information about the upper layer.
    434 		 */
    435 		if (uppervp != un->un_uppervp) {
    436 			union_newupper(un, uppervp);
    437 		} else if (uppervp) {
    438 			vrele(uppervp);
    439 		}
    440 
    441 		if (un->un_uppervp) {
    442 			un->un_flags |= UN_ULOCK;
    443 			un->un_flags &= ~UN_KLOCK;
    444 		}
    445 
    446 		/*
    447 		 * Save information about the lower layer.
    448 		 * This needs to keep track of pathname
    449 		 * and directory information which union_vn_create
    450 		 * might need.
    451 		 */
    452 		if (lowervp != un->un_lowervp) {
    453 			union_newlower(un, lowervp);
    454 			if (cnp && (lowervp != NULLVP)) {
    455 				un->un_hash = cnp->cn_hash;
    456 				un->un_path = malloc(cnp->cn_namelen+1,
    457 						M_TEMP, M_WAITOK);
    458 				memcpy(un->un_path, cnp->cn_nameptr,
    459 						cnp->cn_namelen);
    460 				un->un_path[cnp->cn_namelen] = '\0';
    461 				VREF(dvp);
    462 				un->un_dirvp = dvp;
    463 			}
    464 		} else if (lowervp) {
    465 			vrele(lowervp);
    466 		}
    467 		*vpp = UNIONTOV(un);
    468 		return (0);
    469 	}
    470 
    471 	if (docache) {
    472 		/*
    473 		 * otherwise lock the vp list while we call getnewvnode
    474 		 * since that can block.
    475 		 */
    476 		hash = UNION_HASH(uppervp, lowervp);
    477 
    478 		if (union_list_lock(hash))
    479 			goto loop;
    480 	}
    481 
    482 	error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
    483 	if (error) {
    484 		if (uppervp) {
    485 			if (dvp == uppervp)
    486 				vrele(uppervp);
    487 			else
    488 				vput(uppervp);
    489 		}
    490 		if (lowervp)
    491 			vrele(lowervp);
    492 
    493 		goto out;
    494 	}
    495 
    496 	MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
    497 		M_TEMP, M_WAITOK);
    498 
    499 	(*vpp)->v_flag |= vflag;
    500 	(*vpp)->v_vnlock = NULL;	/* Make upper layers call VOP_LOCK */
    501 	if (uppervp)
    502 		(*vpp)->v_type = uppervp->v_type;
    503 	else
    504 		(*vpp)->v_type = lowervp->v_type;
    505 	un = VTOUNION(*vpp);
    506 	un->un_vnode = *vpp;
    507 	un->un_uppervp = uppervp;
    508 	un->un_uppersz = VNOVAL;
    509 	un->un_lowervp = lowervp;
    510 	un->un_lowersz = VNOVAL;
    511 	un->un_pvp = undvp;
    512 	if (undvp != NULLVP)
    513 		VREF(undvp);
    514 	un->un_dircache = 0;
    515 	un->un_openl = 0;
    516 	un->un_flags = UN_LOCKED;
    517 	if (un->un_uppervp)
    518 		un->un_flags |= UN_ULOCK;
    519 #ifdef DIAGNOSTIC
    520 	if (curproc)
    521 		un->un_pid = curproc->p_pid;
    522 	else
    523 		un->un_pid = -1;
    524 #endif
    525 	if (cnp && (lowervp != NULLVP)) {
    526 		un->un_hash = cnp->cn_hash;
    527 		un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
    528 		memcpy(un->un_path, cnp->cn_nameptr, cnp->cn_namelen);
    529 		un->un_path[cnp->cn_namelen] = '\0';
    530 		VREF(dvp);
    531 		un->un_dirvp = dvp;
    532 	} else {
    533 		un->un_hash = 0;
    534 		un->un_path = 0;
    535 		un->un_dirvp = 0;
    536 	}
    537 
    538 	if (docache) {
    539 		LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
    540 		un->un_flags |= UN_CACHED;
    541 	}
    542 
    543 	if (xlowervp)
    544 		vrele(xlowervp);
    545 
    546 out:
    547 	if (docache)
    548 		union_list_unlock(hash);
    549 
    550 	return (error);
    551 }
    552 
    553 int
    554 union_freevp(vp)
    555 	struct vnode *vp;
    556 {
    557 	struct union_node *un = VTOUNION(vp);
    558 
    559 	if (un->un_flags & UN_CACHED) {
    560 		un->un_flags &= ~UN_CACHED;
    561 		LIST_REMOVE(un, un_cache);
    562 	}
    563 
    564 	if (un->un_pvp != NULLVP)
    565 		vrele(un->un_pvp);
    566 	if (un->un_uppervp != NULLVP)
    567 		vrele(un->un_uppervp);
    568 	if (un->un_lowervp != NULLVP)
    569 		vrele(un->un_lowervp);
    570 	if (un->un_dirvp != NULLVP)
    571 		vrele(un->un_dirvp);
    572 	if (un->un_path)
    573 		free(un->un_path, M_TEMP);
    574 
    575 	FREE(vp->v_data, M_TEMP);
    576 	vp->v_data = 0;
    577 
    578 	return (0);
    579 }
    580 
    581 /*
    582  * copyfile.  copy the vnode (fvp) to the vnode (tvp)
    583  * using a sequence of reads and writes.  both (fvp)
    584  * and (tvp) are locked on entry and exit.
    585  */
    586 int
    587 union_copyfile(fvp, tvp, cred, p)
    588 	struct vnode *fvp;
    589 	struct vnode *tvp;
    590 	struct ucred *cred;
    591 	struct proc *p;
    592 {
    593 	char *buf;
    594 	struct uio uio;
    595 	struct iovec iov;
    596 	int error = 0;
    597 
    598 	/*
    599 	 * strategy:
    600 	 * allocate a buffer of size MAXBSIZE.
    601 	 * loop doing reads and writes, keeping track
    602 	 * of the current uio offset.
    603 	 * give up at the first sign of trouble.
    604 	 */
    605 
    606 	uio.uio_procp = p;
    607 	uio.uio_segflg = UIO_SYSSPACE;
    608 	uio.uio_offset = 0;
    609 
    610 	VOP_UNLOCK(fvp, 0);			/* XXX */
    611 	VOP_LEASE(fvp, p, cred, LEASE_READ);
    612 	vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);	/* XXX */
    613 	VOP_UNLOCK(tvp, 0);			/* XXX */
    614 	VOP_LEASE(tvp, p, cred, LEASE_WRITE);
    615 	vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);	/* XXX */
    616 
    617 	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
    618 
    619 	/* ugly loop follows... */
    620 	do {
    621 		off_t offset = uio.uio_offset;
    622 
    623 		uio.uio_iov = &iov;
    624 		uio.uio_iovcnt = 1;
    625 		iov.iov_base = buf;
    626 		iov.iov_len = MAXBSIZE;
    627 		uio.uio_resid = iov.iov_len;
    628 		uio.uio_rw = UIO_READ;
    629 		error = VOP_READ(fvp, &uio, 0, cred);
    630 
    631 		if (error == 0) {
    632 			uio.uio_iov = &iov;
    633 			uio.uio_iovcnt = 1;
    634 			iov.iov_base = buf;
    635 			iov.iov_len = MAXBSIZE - uio.uio_resid;
    636 			uio.uio_offset = offset;
    637 			uio.uio_rw = UIO_WRITE;
    638 			uio.uio_resid = iov.iov_len;
    639 
    640 			if (uio.uio_resid == 0)
    641 				break;
    642 
    643 			do {
    644 				error = VOP_WRITE(tvp, &uio, 0, cred);
    645 			} while ((uio.uio_resid > 0) && (error == 0));
    646 		}
    647 
    648 	} while (error == 0);
    649 
    650 	free(buf, M_TEMP);
    651 	return (error);
    652 }
    653 
    654 /*
    655  * (un) is assumed to be locked on entry and remains
    656  * locked on exit.
    657  */
    658 int
    659 union_copyup(un, docopy, cred, p)
    660 	struct union_node *un;
    661 	int docopy;
    662 	struct ucred *cred;
    663 	struct proc *p;
    664 {
    665 	int error;
    666 	struct vnode *lvp, *uvp;
    667 	struct vattr lvattr, uvattr;
    668 
    669 	error = union_vn_create(&uvp, un, p);
    670 	if (error)
    671 		return (error);
    672 
    673 	/* at this point, uppervp is locked */
    674 	union_newupper(un, uvp);
    675 	un->un_flags |= UN_ULOCK;
    676 
    677 	lvp = un->un_lowervp;
    678 
    679 	if (docopy) {
    680 		/*
    681 		 * XX - should not ignore errors
    682 		 * from VOP_CLOSE
    683 		 */
    684 		vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
    685 
    686         	error = VOP_GETATTR(lvp, &lvattr, cred, p);
    687 		if (error == 0)
    688 			error = VOP_OPEN(lvp, FREAD, cred, p);
    689 		if (error == 0) {
    690 			error = union_copyfile(lvp, uvp, cred, p);
    691 			(void) VOP_CLOSE(lvp, FREAD, cred, p);
    692 		}
    693 		if (error == 0) {
    694 			/* Copy permissions up too */
    695 			VATTR_NULL(&uvattr);
    696 			uvattr.va_mode = lvattr.va_mode;
    697 			uvattr.va_flags = lvattr.va_flags;
    698         		error = VOP_SETATTR(uvp, &uvattr, cred, p);
    699 		}
    700 		VOP_UNLOCK(lvp, 0);
    701 #ifdef UNION_DIAGNOSTIC
    702 		if (error == 0)
    703 			uprintf("union: copied up %s\n", un->un_path);
    704 #endif
    705 
    706 	}
    707 	union_vn_close(uvp, FWRITE, cred, p);
    708 
    709 	/*
    710 	 * Subsequent IOs will go to the top layer, so
    711 	 * call close on the lower vnode and open on the
    712 	 * upper vnode to ensure that the filesystem keeps
    713 	 * its references counts right.  This doesn't do
    714 	 * the right thing with (cred) and (FREAD) though.
    715 	 * Ignoring error returns is not right, either.
    716 	 */
    717 	if (error == 0) {
    718 		int i;
    719 
    720 		vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
    721 		for (i = 0; i < un->un_openl; i++) {
    722 			(void) VOP_CLOSE(lvp, FREAD, cred, p);
    723 			(void) VOP_OPEN(uvp, FREAD, cred, p);
    724 		}
    725 		un->un_openl = 0;
    726 		VOP_UNLOCK(lvp, 0);
    727 	}
    728 
    729 	return (error);
    730 
    731 }
    732 
    733 static int
    734 union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
    735 	struct union_mount *um;
    736 	struct vnode *dvp;
    737 	struct vnode **vpp;
    738 	struct componentname *cnp;
    739 	struct componentname *cn;
    740 	const char *path;
    741 	int pathlen;
    742 {
    743 	int error;
    744 
    745 	/*
    746 	 * A new componentname structure must be faked up because
    747 	 * there is no way to know where the upper level cnp came
    748 	 * from or what it is being used for.  This must duplicate
    749 	 * some of the work done by NDINIT, some of the work done
    750 	 * by namei, some of the work done by lookup and some of
    751 	 * the work done by VOP_LOOKUP when given a CREATE flag.
    752 	 * Conclusion: Horrible.
    753 	 *
    754 	 * The pathname buffer will be PNBUF_PUT'd by VOP_MKDIR.
    755 	 */
    756 	cn->cn_namelen = pathlen;
    757 	if ((cn->cn_namelen + 1) > MAXPATHLEN)
    758 		return (ENAMETOOLONG);
    759 	cn->cn_pnbuf = PNBUF_GET();
    760 	memcpy(cn->cn_pnbuf, path, cn->cn_namelen);
    761 	cn->cn_pnbuf[cn->cn_namelen] = '\0';
    762 
    763 	cn->cn_nameiop = CREATE;
    764 	cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
    765 	cn->cn_proc = cnp->cn_proc;
    766 	if (um->um_op == UNMNT_ABOVE)
    767 		cn->cn_cred = cnp->cn_cred;
    768 	else
    769 		cn->cn_cred = um->um_cred;
    770 	cn->cn_nameptr = cn->cn_pnbuf;
    771 	cn->cn_hash = cnp->cn_hash;
    772 	cn->cn_consume = cnp->cn_consume;
    773 
    774 	VREF(dvp);
    775 	error = relookup(dvp, vpp, cn);
    776 	if (!error)
    777 		vrele(dvp);
    778 	else {
    779 		PNBUF_PUT(cn->cn_pnbuf);
    780 		cn->cn_pnbuf = 0;
    781 	}
    782 
    783 	return (error);
    784 }
    785 
    786 /*
    787  * Create a shadow directory in the upper layer.
    788  * The new vnode is returned locked.
    789  *
    790  * (um) points to the union mount structure for access to the
    791  * the mounting process's credentials.
    792  * (dvp) is the directory in which to create the shadow directory.
    793  * it is unlocked on entry and exit.
    794  * (cnp) is the componentname to be created.
    795  * (vpp) is the returned newly created shadow directory, which
    796  * is returned locked.
    797  *
    798  * N.B. We still attempt to create shadow directories even if the union
    799  * is mounted read-only, which is a little nonintuitive.
    800  */
    801 int
    802 union_mkshadow(um, dvp, cnp, vpp)
    803 	struct union_mount *um;
    804 	struct vnode *dvp;
    805 	struct componentname *cnp;
    806 	struct vnode **vpp;
    807 {
    808 	int error;
    809 	struct vattr va;
    810 	struct proc *p = cnp->cn_proc;
    811 	struct componentname cn;
    812 
    813 	error = union_relookup(um, dvp, vpp, cnp, &cn,
    814 			cnp->cn_nameptr, cnp->cn_namelen);
    815 	if (error)
    816 		return (error);
    817 
    818 	if (*vpp) {
    819 		VOP_ABORTOP(dvp, &cn);
    820 		VOP_UNLOCK(dvp, 0);
    821 		vrele(*vpp);
    822 		*vpp = NULLVP;
    823 		return (EEXIST);
    824 	}
    825 
    826 	/*
    827 	 * policy: when creating the shadow directory in the
    828 	 * upper layer, create it owned by the user who did
    829 	 * the mount, group from parent directory, and mode
    830 	 * 777 modified by umask (ie mostly identical to the
    831 	 * mkdir syscall).  (jsp, kb)
    832 	 */
    833 
    834 	VATTR_NULL(&va);
    835 	va.va_type = VDIR;
    836 	va.va_mode = um->um_cmode;
    837 
    838 	/* VOP_LEASE: dvp is locked */
    839 	VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE);
    840 
    841 	error = VOP_MKDIR(dvp, vpp, &cn, &va);
    842 	return (error);
    843 }
    844 
    845 /*
    846  * Create a whiteout entry in the upper layer.
    847  *
    848  * (um) points to the union mount structure for access to the
    849  * the mounting process's credentials.
    850  * (dvp) is the directory in which to create the whiteout.
    851  * it is locked on entry and exit.
    852  * (cnp) is the componentname to be created.
    853  */
    854 int
    855 union_mkwhiteout(um, dvp, cnp, path)
    856 	struct union_mount *um;
    857 	struct vnode *dvp;
    858 	struct componentname *cnp;
    859 	char *path;
    860 {
    861 	int error;
    862 	struct proc *p = cnp->cn_proc;
    863 	struct vnode *wvp;
    864 	struct componentname cn;
    865 
    866 	VOP_UNLOCK(dvp, 0);
    867 	error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
    868 	if (error) {
    869 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
    870 		return (error);
    871 	}
    872 
    873 	if (wvp) {
    874 		VOP_ABORTOP(dvp, &cn);
    875 		vrele(dvp);
    876 		vrele(wvp);
    877 		return (EEXIST);
    878 	}
    879 
    880 	/* VOP_LEASE: dvp is locked */
    881 	VOP_LEASE(dvp, p, p->p_ucred, LEASE_WRITE);
    882 
    883 	error = VOP_WHITEOUT(dvp, &cn, CREATE);
    884 	if (error)
    885 		VOP_ABORTOP(dvp, &cn);
    886 
    887 	vrele(dvp);
    888 
    889 	return (error);
    890 }
    891 
    892 /*
    893  * union_vn_create: creates and opens a new shadow file
    894  * on the upper union layer.  this function is similar
    895  * in spirit to calling vn_open but it avoids calling namei().
    896  * the problem with calling namei is that a) it locks too many
    897  * things, and b) it doesn't start at the "right" directory,
    898  * whereas relookup is told where to start.
    899  */
    900 int
    901 union_vn_create(vpp, un, p)
    902 	struct vnode **vpp;
    903 	struct union_node *un;
    904 	struct proc *p;
    905 {
    906 	struct vnode *vp;
    907 	struct ucred *cred = p->p_ucred;
    908 	struct vattr vat;
    909 	struct vattr *vap = &vat;
    910 	int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
    911 	int error;
    912 	int cmode = UN_FILEMODE & ~p->p_cwdi->cwdi_cmask;
    913 	struct componentname cn;
    914 
    915 	*vpp = NULLVP;
    916 
    917 	/*
    918 	 * Build a new componentname structure (for the same
    919 	 * reasons outlines in union_mkshadow).
    920 	 * The difference here is that the file is owned by
    921 	 * the current user, rather than by the person who
    922 	 * did the mount, since the current user needs to be
    923 	 * able to write the file (that's why it is being
    924 	 * copied in the first place).
    925 	 */
    926 	cn.cn_namelen = strlen(un->un_path);
    927 	if ((cn.cn_namelen + 1) > MAXPATHLEN)
    928 		return (ENAMETOOLONG);
    929 	cn.cn_pnbuf = PNBUF_GET();
    930 	memcpy(cn.cn_pnbuf, un->un_path, cn.cn_namelen+1);
    931 	cn.cn_nameiop = CREATE;
    932 	cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
    933 	cn.cn_proc = p;
    934 	cn.cn_cred = p->p_ucred;
    935 	cn.cn_nameptr = cn.cn_pnbuf;
    936 	cn.cn_hash = un->un_hash;
    937 	cn.cn_consume = 0;
    938 
    939 	VREF(un->un_dirvp);
    940 	if ((error = relookup(un->un_dirvp, &vp, &cn)) != 0)
    941 		return (error);
    942 	vrele(un->un_dirvp);
    943 
    944 	if (vp) {
    945 		VOP_ABORTOP(un->un_dirvp, &cn);
    946 		if (un->un_dirvp == vp)
    947 			vrele(un->un_dirvp);
    948 		else
    949 			vput(un->un_dirvp);
    950 		vrele(vp);
    951 		return (EEXIST);
    952 	}
    953 
    954 	/*
    955 	 * Good - there was no race to create the file
    956 	 * so go ahead and create it.  The permissions
    957 	 * on the file will be 0666 modified by the
    958 	 * current user's umask.  Access to the file, while
    959 	 * it is unioned, will require access to the top *and*
    960 	 * bottom files.  Access when not unioned will simply
    961 	 * require access to the top-level file.
    962 	 * TODO: confirm choice of access permissions.
    963 	 */
    964 	VATTR_NULL(vap);
    965 	vap->va_type = VREG;
    966 	vap->va_mode = cmode;
    967 	VOP_LEASE(un->un_dirvp, p, cred, LEASE_WRITE);
    968 	if ((error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap)) != 0)
    969 		return (error);
    970 
    971 	if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0) {
    972 		vput(vp);
    973 		return (error);
    974 	}
    975 
    976 	vp->v_writecount++;
    977 	*vpp = vp;
    978 	return (0);
    979 }
    980 
    981 int
    982 union_vn_close(vp, fmode, cred, p)
    983 	struct vnode *vp;
    984 	int fmode;
    985 	struct ucred *cred;
    986 	struct proc *p;
    987 {
    988 
    989 	if (fmode & FWRITE)
    990 		--vp->v_writecount;
    991 	return (VOP_CLOSE(vp, fmode, cred, p));
    992 }
    993 
    994 void
    995 union_removed_upper(un)
    996 	struct union_node *un;
    997 {
    998 #if 1
    999 	/*
   1000 	 * We do not set the uppervp to NULLVP here, because lowervp
   1001 	 * may also be NULLVP, so this routine would end up creating
   1002 	 * a bogus union node with no upper or lower VP (that causes
   1003 	 * pain in many places that assume at least one VP exists).
   1004 	 * Since we've removed this node from the cache hash chains,
   1005 	 * it won't be found again.  When all current holders
   1006 	 * release it, union_inactive() will vgone() it.
   1007 	 */
   1008 	union_diruncache(un);
   1009 #else
   1010 	union_newupper(un, NULLVP);
   1011 #endif
   1012 
   1013 	if (un->un_flags & UN_CACHED) {
   1014 		un->un_flags &= ~UN_CACHED;
   1015 		LIST_REMOVE(un, un_cache);
   1016 	}
   1017 
   1018 	if (un->un_flags & UN_ULOCK) {
   1019 		un->un_flags &= ~UN_ULOCK;
   1020 		VOP_UNLOCK(un->un_uppervp, 0);
   1021 	}
   1022 }
   1023 
   1024 #if 0
   1025 struct vnode *
   1026 union_lowervp(vp)
   1027 	struct vnode *vp;
   1028 {
   1029 	struct union_node *un = VTOUNION(vp);
   1030 
   1031 	if ((un->un_lowervp != NULLVP) &&
   1032 	    (vp->v_type == un->un_lowervp->v_type)) {
   1033 		if (vget(un->un_lowervp, 0) == 0)
   1034 			return (un->un_lowervp);
   1035 	}
   1036 
   1037 	return (NULLVP);
   1038 }
   1039 #endif
   1040 
   1041 /*
   1042  * determine whether a whiteout is needed
   1043  * during a remove/rmdir operation.
   1044  */
   1045 int
   1046 union_dowhiteout(un, cred, p)
   1047 	struct union_node *un;
   1048 	struct ucred *cred;
   1049 	struct proc *p;
   1050 {
   1051 	struct vattr va;
   1052 
   1053 	if (un->un_lowervp != NULLVP)
   1054 		return (1);
   1055 
   1056 	if (VOP_GETATTR(un->un_uppervp, &va, cred, p) == 0 &&
   1057 	    (va.va_flags & OPAQUE))
   1058 		return (1);
   1059 
   1060 	return (0);
   1061 }
   1062 
   1063 static void
   1064 union_dircache_r(vp, vppp, cntp)
   1065 	struct vnode *vp;
   1066 	struct vnode ***vppp;
   1067 	int *cntp;
   1068 {
   1069 	struct union_node *un;
   1070 
   1071 	if (vp->v_op != union_vnodeop_p) {
   1072 		if (vppp) {
   1073 			VREF(vp);
   1074 			*(*vppp)++ = vp;
   1075 			if (--(*cntp) == 0)
   1076 				panic("union: dircache table too small");
   1077 		} else {
   1078 			(*cntp)++;
   1079 		}
   1080 
   1081 		return;
   1082 	}
   1083 
   1084 	un = VTOUNION(vp);
   1085 	if (un->un_uppervp != NULLVP)
   1086 		union_dircache_r(un->un_uppervp, vppp, cntp);
   1087 	if (un->un_lowervp != NULLVP)
   1088 		union_dircache_r(un->un_lowervp, vppp, cntp);
   1089 }
   1090 
   1091 struct vnode *
   1092 union_dircache(vp, p)
   1093 	struct vnode *vp;
   1094 	struct proc *p;
   1095 {
   1096 	int cnt;
   1097 	struct vnode *nvp = NULLVP;
   1098 	struct vnode **vpp;
   1099 	struct vnode **dircache;
   1100 	int error;
   1101 
   1102 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   1103 	dircache = VTOUNION(vp)->un_dircache;
   1104 
   1105 	nvp = NULLVP;
   1106 
   1107 	if (dircache == 0) {
   1108 		cnt = 0;
   1109 		union_dircache_r(vp, 0, &cnt);
   1110 		cnt++;
   1111 		dircache = (struct vnode **)
   1112 				malloc(cnt * sizeof(struct vnode *),
   1113 					M_TEMP, M_WAITOK);
   1114 		vpp = dircache;
   1115 		union_dircache_r(vp, &vpp, &cnt);
   1116 		VTOUNION(vp)->un_dircache = dircache;
   1117 		*vpp = NULLVP;
   1118 		vpp = dircache + 1;
   1119 	} else {
   1120 		vpp = dircache;
   1121 		do {
   1122 			if (*vpp++ == VTOUNION(vp)->un_uppervp)
   1123 				break;
   1124 		} while (*vpp != NULLVP);
   1125 	}
   1126 
   1127 	if (*vpp == NULLVP)
   1128 		goto out;
   1129 
   1130 	vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
   1131 	VREF(*vpp);
   1132 	error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0);
   1133 	if (!error) {
   1134 		VTOUNION(vp)->un_dircache = 0;
   1135 		VTOUNION(nvp)->un_dircache = dircache;
   1136 	}
   1137 
   1138 out:
   1139 	VOP_UNLOCK(vp, 0);
   1140 	return (nvp);
   1141 }
   1142 
   1143 void
   1144 union_diruncache(un)
   1145 	struct union_node *un;
   1146 {
   1147 	struct vnode **vpp;
   1148 
   1149 	if (un->un_dircache != 0) {
   1150 		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
   1151 			vrele(*vpp);
   1152 		free(un->un_dircache, M_TEMP);
   1153 		un->un_dircache = 0;
   1154 	}
   1155 }
   1156