Home | History | Annotate | Line # | Download | only in union
union_subr.c revision 1.4
      1 /*	$NetBSD: union_subr.c,v 1.4 2003/06/29 15:11:48 thorpej Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1994 Jan-Simon Pendry
      5  * Copyright (c) 1994
      6  *	The Regents of the University of California.  All rights reserved.
      7  *
      8  * This code is derived from software contributed to Berkeley by
      9  * Jan-Simon Pendry.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *	This product includes software developed by the University of
     22  *	California, Berkeley and its contributors.
     23  * 4. Neither the name of the University nor the names of its contributors
     24  *    may be used to endorse or promote products derived from this software
     25  *    without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     37  * SUCH DAMAGE.
     38  *
     39  *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
     40  */
     41 
     42 #include <sys/cdefs.h>
     43 __KERNEL_RCSID(0, "$NetBSD: union_subr.c,v 1.4 2003/06/29 15:11:48 thorpej Exp $");
     44 
     45 #include <sys/param.h>
     46 #include <sys/systm.h>
     47 #include <sys/proc.h>
     48 #include <sys/time.h>
     49 #include <sys/kernel.h>
     50 #include <sys/vnode.h>
     51 #include <sys/namei.h>
     52 #include <sys/malloc.h>
     53 #include <sys/file.h>
     54 #include <sys/filedesc.h>
     55 #include <sys/queue.h>
     56 #include <sys/mount.h>
     57 #include <sys/stat.h>
     58 
     59 #include <uvm/uvm_extern.h>
     60 
     61 #include <fs/union/union.h>
     62 
     63 #ifdef DIAGNOSTIC
     64 #include <sys/proc.h>
     65 #endif
     66 
     67 /* must be power of two, otherwise change UNION_HASH() */
     68 #define NHASH 32
     69 
     70 /* unsigned int ... */
     71 #define UNION_HASH(u, l) \
     72 	(((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
     73 
     74 static LIST_HEAD(unhead, union_node) unhead[NHASH];
     75 static int unvplock[NHASH];
     76 
     77 static int union_list_lock __P((int));
     78 static void union_list_unlock __P((int));
     79 void union_updatevp __P((struct union_node *, struct vnode *, struct vnode *));
     80 static int union_relookup __P((struct union_mount *, struct vnode *,
     81 			       struct vnode **, struct componentname *,
     82 			       struct componentname *, const char *, int));
     83 int union_vn_close __P((struct vnode *, int, struct ucred *, struct lwp *));
     84 static void union_dircache_r __P((struct vnode *, struct vnode ***, int *));
     85 struct vnode *union_dircache __P((struct vnode *, struct lwp *));
     86 
     87 void
     88 union_init()
     89 {
     90 	int i;
     91 
     92 	for (i = 0; i < NHASH; i++)
     93 		LIST_INIT(&unhead[i]);
     94 	memset((caddr_t) unvplock, 0, sizeof(unvplock));
     95 }
     96 
     97 /*
     98  * Free global unionfs resources.
     99  */
    100 void
    101 union_done()
    102 {
    103 
    104 	/* Make sure to unset the readdir hook. */
    105 	vn_union_readdir_hook = NULL;
    106 }
    107 
    108 static int
    109 union_list_lock(ix)
    110 	int ix;
    111 {
    112 
    113 	if (unvplock[ix] & UN_LOCKED) {
    114 		unvplock[ix] |= UN_WANTED;
    115 		(void) tsleep(&unvplock[ix], PINOD, "unionlk", 0);
    116 		return (1);
    117 	}
    118 
    119 	unvplock[ix] |= UN_LOCKED;
    120 
    121 	return (0);
    122 }
    123 
    124 static void
    125 union_list_unlock(ix)
    126 	int ix;
    127 {
    128 
    129 	unvplock[ix] &= ~UN_LOCKED;
    130 
    131 	if (unvplock[ix] & UN_WANTED) {
    132 		unvplock[ix] &= ~UN_WANTED;
    133 		wakeup((caddr_t) &unvplock[ix]);
    134 	}
    135 }
    136 
    137 void
    138 union_updatevp(un, uppervp, lowervp)
    139 	struct union_node *un;
    140 	struct vnode *uppervp;
    141 	struct vnode *lowervp;
    142 {
    143 	int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
    144 	int nhash = UNION_HASH(uppervp, lowervp);
    145 	int docache = (lowervp != NULLVP || uppervp != NULLVP);
    146 	int lhash, uhash;
    147 
    148 	/*
    149 	 * Ensure locking is ordered from lower to higher
    150 	 * to avoid deadlocks.
    151 	 */
    152 	if (nhash < ohash) {
    153 		lhash = nhash;
    154 		uhash = ohash;
    155 	} else {
    156 		lhash = ohash;
    157 		uhash = nhash;
    158 	}
    159 
    160 	if (lhash != uhash)
    161 		while (union_list_lock(lhash))
    162 			continue;
    163 
    164 	while (union_list_lock(uhash))
    165 		continue;
    166 
    167 	if (ohash != nhash || !docache) {
    168 		if (un->un_flags & UN_CACHED) {
    169 			un->un_flags &= ~UN_CACHED;
    170 			LIST_REMOVE(un, un_cache);
    171 		}
    172 	}
    173 
    174 	if (ohash != nhash)
    175 		union_list_unlock(ohash);
    176 
    177 	if (un->un_lowervp != lowervp) {
    178 		if (un->un_lowervp) {
    179 			vrele(un->un_lowervp);
    180 			if (un->un_path) {
    181 				free(un->un_path, M_TEMP);
    182 				un->un_path = 0;
    183 			}
    184 			if (un->un_dirvp) {
    185 				vrele(un->un_dirvp);
    186 				un->un_dirvp = NULLVP;
    187 			}
    188 		}
    189 		un->un_lowervp = lowervp;
    190 		un->un_lowersz = VNOVAL;
    191 	}
    192 
    193 	if (un->un_uppervp != uppervp) {
    194 		if (un->un_uppervp)
    195 			vrele(un->un_uppervp);
    196 
    197 		un->un_uppervp = uppervp;
    198 		un->un_uppersz = VNOVAL;
    199 	}
    200 
    201 	if (docache && (ohash != nhash)) {
    202 		LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
    203 		un->un_flags |= UN_CACHED;
    204 	}
    205 
    206 	union_list_unlock(nhash);
    207 }
    208 
    209 void
    210 union_newlower(un, lowervp)
    211 	struct union_node *un;
    212 	struct vnode *lowervp;
    213 {
    214 
    215 	union_updatevp(un, un->un_uppervp, lowervp);
    216 }
    217 
    218 void
    219 union_newupper(un, uppervp)
    220 	struct union_node *un;
    221 	struct vnode *uppervp;
    222 {
    223 
    224 	union_updatevp(un, uppervp, un->un_lowervp);
    225 }
    226 
    227 /*
    228  * Keep track of size changes in the underlying vnodes.
    229  * If the size changes, then callback to the vm layer
    230  * giving priority to the upper layer size.
    231  */
    232 void
    233 union_newsize(vp, uppersz, lowersz)
    234 	struct vnode *vp;
    235 	off_t uppersz, lowersz;
    236 {
    237 	struct union_node *un;
    238 	off_t sz;
    239 
    240 	/* only interested in regular files */
    241 	if (vp->v_type != VREG)
    242 		return;
    243 
    244 	un = VTOUNION(vp);
    245 	sz = VNOVAL;
    246 
    247 	if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
    248 		un->un_uppersz = uppersz;
    249 		if (sz == VNOVAL)
    250 			sz = un->un_uppersz;
    251 	}
    252 
    253 	if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
    254 		un->un_lowersz = lowersz;
    255 		if (sz == VNOVAL)
    256 			sz = un->un_lowersz;
    257 	}
    258 
    259 	if (sz != VNOVAL) {
    260 #ifdef UNION_DIAGNOSTIC
    261 		printf("union: %s size now %qd\n",
    262 		    uppersz != VNOVAL ? "upper" : "lower", sz);
    263 #endif
    264 		uvm_vnp_setsize(vp, sz);
    265 	}
    266 }
    267 
    268 /*
    269  * allocate a union_node/vnode pair.  the vnode is
    270  * referenced and locked.  the new vnode is returned
    271  * via (vpp).  (mp) is the mountpoint of the union filesystem,
    272  * (dvp) is the parent directory where the upper layer object
    273  * should exist (but doesn't) and (cnp) is the componentname
    274  * information which is partially copied to allow the upper
    275  * layer object to be created at a later time.  (uppervp)
    276  * and (lowervp) reference the upper and lower layer objects
    277  * being mapped.  either, but not both, can be nil.
    278  * if supplied, (uppervp) is locked.
    279  * the reference is either maintained in the new union_node
    280  * object which is allocated, or they are vrele'd.
    281  *
    282  * all union_nodes are maintained on a singly-linked
    283  * list.  new nodes are only allocated when they cannot
    284  * be found on this list.  entries on the list are
    285  * removed when the vfs reclaim entry is called.
    286  *
    287  * a single lock is kept for the entire list.  this is
    288  * needed because the getnewvnode() function can block
    289  * waiting for a vnode to become free, in which case there
    290  * may be more than one process trying to get the same
    291  * vnode.  this lock is only taken if we are going to
    292  * call getnewvnode, since the kernel itself is single-threaded.
    293  *
    294  * if an entry is found on the list, then call vget() to
    295  * take a reference.  this is done because there may be
    296  * zero references to it and so it needs to removed from
    297  * the vnode free list.
    298  */
    299 int
    300 union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache, l)
    301 	struct vnode **vpp;
    302 	struct mount *mp;
    303 	struct vnode *undvp;		/* parent union vnode */
    304 	struct vnode *dvp;		/* may be null */
    305 	struct componentname *cnp;	/* may be null */
    306 	struct vnode *uppervp;		/* may be null */
    307 	struct vnode *lowervp;		/* may be null */
    308 	int docache;
    309 	struct lwp *l;
    310 {
    311 	int error;
    312 	struct union_node *un = NULL;
    313 	struct vnode *xlowervp = NULLVP;
    314 	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
    315 	int hash = 0;
    316 	int vflag;
    317 	int try;
    318 
    319 	if (uppervp == NULLVP && lowervp == NULLVP)
    320 		panic("union: unidentifiable allocation");
    321 
    322 	if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
    323 		xlowervp = lowervp;
    324 		lowervp = NULLVP;
    325 	}
    326 
    327 	/* detect the root vnode (and aliases) */
    328 	vflag = VLAYER;
    329 	if ((uppervp == um->um_uppervp) &&
    330 	    ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
    331 		if (lowervp == NULLVP) {
    332 			lowervp = um->um_lowervp;
    333 			if (lowervp != NULLVP)
    334 				VREF(lowervp);
    335 		}
    336 		vflag = VROOT;
    337 	}
    338 
    339 loop:
    340 	if (!docache) {
    341 		un = 0;
    342 	} else for (try = 0; try < 3; try++) {
    343 		switch (try) {
    344 		case 0:
    345 			if (lowervp == NULLVP)
    346 				continue;
    347 			hash = UNION_HASH(uppervp, lowervp);
    348 			break;
    349 
    350 		case 1:
    351 			if (uppervp == NULLVP)
    352 				continue;
    353 			hash = UNION_HASH(uppervp, NULLVP);
    354 			break;
    355 
    356 		case 2:
    357 			if (lowervp == NULLVP)
    358 				continue;
    359 			hash = UNION_HASH(NULLVP, lowervp);
    360 			break;
    361 		}
    362 
    363 		while (union_list_lock(hash))
    364 			continue;
    365 
    366 		for (un = unhead[hash].lh_first; un != 0;
    367 					un = un->un_cache.le_next) {
    368 			if ((un->un_lowervp == lowervp ||
    369 			     un->un_lowervp == NULLVP) &&
    370 			    (un->un_uppervp == uppervp ||
    371 			     un->un_uppervp == NULLVP) &&
    372 			    (UNIONTOV(un)->v_mount == mp)) {
    373 				if (vget(UNIONTOV(un), 0, l)) {
    374 					union_list_unlock(hash);
    375 					goto loop;
    376 				}
    377 				break;
    378 			}
    379 		}
    380 
    381 		union_list_unlock(hash);
    382 
    383 		if (un)
    384 			break;
    385 	}
    386 
    387 	if (un) {
    388 		/*
    389 		 * Obtain a lock on the union_node.
    390 		 * uppervp is locked, though un->un_uppervp
    391 		 * may not be.  this doesn't break the locking
    392 		 * hierarchy since in the case that un->un_uppervp
    393 		 * is not yet locked it will be vrele'd and replaced
    394 		 * with uppervp.
    395 		 */
    396 
    397 		if ((dvp != NULLVP) && (uppervp == dvp)) {
    398 			/*
    399 			 * Access ``.'', so (un) will already
    400 			 * be locked.  Since this process has
    401 			 * the lock on (uppervp) no other
    402 			 * process can hold the lock on (un).
    403 			 */
    404 #ifdef DIAGNOSTIC
    405 			if ((un->un_flags & UN_LOCKED) == 0)
    406 				panic("union: . not locked");
    407 			else if (curproc && un->un_pid != curproc->p_pid &&
    408 				    un->un_pid > -1 && curproc->p_pid > -1)
    409 				panic("union: allocvp not lock owner");
    410 #endif
    411 		} else {
    412 			if (un->un_flags & UN_LOCKED) {
    413 				vrele(UNIONTOV(un));
    414 				un->un_flags |= UN_WANTED;
    415 				(void) tsleep(&un->un_flags, PINOD,
    416 				    "unionalloc", 0);
    417 				goto loop;
    418 			}
    419 			un->un_flags |= UN_LOCKED;
    420 
    421 #ifdef DIAGNOSTIC
    422 			if (curproc)
    423 				un->un_pid = curproc->p_pid;
    424 			else
    425 				un->un_pid = -1;
    426 #endif
    427 		}
    428 
    429 		/*
    430 		 * At this point, the union_node is locked,
    431 		 * un->un_uppervp may not be locked, and uppervp
    432 		 * is locked or nil.
    433 		 */
    434 
    435 		/*
    436 		 * Save information about the upper layer.
    437 		 */
    438 		if (uppervp != un->un_uppervp) {
    439 			union_newupper(un, uppervp);
    440 		} else if (uppervp) {
    441 			vrele(uppervp);
    442 		}
    443 
    444 		if (un->un_uppervp) {
    445 			un->un_flags |= UN_ULOCK;
    446 			un->un_flags &= ~UN_KLOCK;
    447 		}
    448 
    449 		/*
    450 		 * Save information about the lower layer.
    451 		 * This needs to keep track of pathname
    452 		 * and directory information which union_vn_create
    453 		 * might need.
    454 		 */
    455 		if (lowervp != un->un_lowervp) {
    456 			union_newlower(un, lowervp);
    457 			if (cnp && (lowervp != NULLVP)) {
    458 				un->un_hash = cnp->cn_hash;
    459 				un->un_path = malloc(cnp->cn_namelen+1,
    460 						M_TEMP, M_WAITOK);
    461 				memcpy(un->un_path, cnp->cn_nameptr,
    462 						cnp->cn_namelen);
    463 				un->un_path[cnp->cn_namelen] = '\0';
    464 				VREF(dvp);
    465 				un->un_dirvp = dvp;
    466 			}
    467 		} else if (lowervp) {
    468 			vrele(lowervp);
    469 		}
    470 		*vpp = UNIONTOV(un);
    471 		return (0);
    472 	}
    473 
    474 	if (docache) {
    475 		/*
    476 		 * otherwise lock the vp list while we call getnewvnode
    477 		 * since that can block.
    478 		 */
    479 		hash = UNION_HASH(uppervp, lowervp);
    480 
    481 		if (union_list_lock(hash))
    482 			goto loop;
    483 	}
    484 
    485 	error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
    486 	if (error) {
    487 		if (uppervp) {
    488 			if (dvp == uppervp)
    489 				vrele(uppervp);
    490 			else
    491 				vput(uppervp);
    492 		}
    493 		if (lowervp)
    494 			vrele(lowervp);
    495 
    496 		goto out;
    497 	}
    498 
    499 	MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
    500 		M_TEMP, M_WAITOK);
    501 
    502 	(*vpp)->v_flag |= vflag;
    503 	(*vpp)->v_vnlock = NULL;	/* Make upper layers call VOP_LOCK */
    504 	if (uppervp)
    505 		(*vpp)->v_type = uppervp->v_type;
    506 	else
    507 		(*vpp)->v_type = lowervp->v_type;
    508 	un = VTOUNION(*vpp);
    509 	un->un_vnode = *vpp;
    510 	un->un_uppervp = uppervp;
    511 	un->un_uppersz = VNOVAL;
    512 	un->un_lowervp = lowervp;
    513 	un->un_lowersz = VNOVAL;
    514 	un->un_pvp = undvp;
    515 	if (undvp != NULLVP)
    516 		VREF(undvp);
    517 	un->un_dircache = 0;
    518 	un->un_openl = 0;
    519 	un->un_flags = UN_LOCKED;
    520 	if (un->un_uppervp)
    521 		un->un_flags |= UN_ULOCK;
    522 #ifdef DIAGNOSTIC
    523 	if (curproc)
    524 		un->un_pid = curproc->p_pid;
    525 	else
    526 		un->un_pid = -1;
    527 #endif
    528 	if (cnp && (lowervp != NULLVP)) {
    529 		un->un_hash = cnp->cn_hash;
    530 		un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
    531 		memcpy(un->un_path, cnp->cn_nameptr, cnp->cn_namelen);
    532 		un->un_path[cnp->cn_namelen] = '\0';
    533 		VREF(dvp);
    534 		un->un_dirvp = dvp;
    535 	} else {
    536 		un->un_hash = 0;
    537 		un->un_path = 0;
    538 		un->un_dirvp = 0;
    539 	}
    540 
    541 	if (docache) {
    542 		LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
    543 		un->un_flags |= UN_CACHED;
    544 	}
    545 
    546 	if (xlowervp)
    547 		vrele(xlowervp);
    548 
    549 out:
    550 	if (docache)
    551 		union_list_unlock(hash);
    552 
    553 	return (error);
    554 }
    555 
    556 int
    557 union_freevp(vp)
    558 	struct vnode *vp;
    559 {
    560 	struct union_node *un = VTOUNION(vp);
    561 
    562 	if (un->un_flags & UN_CACHED) {
    563 		un->un_flags &= ~UN_CACHED;
    564 		LIST_REMOVE(un, un_cache);
    565 	}
    566 
    567 	if (un->un_pvp != NULLVP)
    568 		vrele(un->un_pvp);
    569 	if (un->un_uppervp != NULLVP)
    570 		vrele(un->un_uppervp);
    571 	if (un->un_lowervp != NULLVP)
    572 		vrele(un->un_lowervp);
    573 	if (un->un_dirvp != NULLVP)
    574 		vrele(un->un_dirvp);
    575 	if (un->un_path)
    576 		free(un->un_path, M_TEMP);
    577 
    578 	FREE(vp->v_data, M_TEMP);
    579 	vp->v_data = 0;
    580 
    581 	return (0);
    582 }
    583 
    584 /*
    585  * copyfile.  copy the vnode (fvp) to the vnode (tvp)
    586  * using a sequence of reads and writes.  both (fvp)
    587  * and (tvp) are locked on entry and exit.
    588  */
    589 int
    590 union_copyfile(fvp, tvp, cred, l)
    591 	struct vnode *fvp;
    592 	struct vnode *tvp;
    593 	struct ucred *cred;
    594 	struct lwp *l;
    595 {
    596 	char *buf;
    597 	struct uio uio;
    598 	struct iovec iov;
    599 	int error = 0;
    600 
    601 	/*
    602 	 * strategy:
    603 	 * allocate a buffer of size MAXBSIZE.
    604 	 * loop doing reads and writes, keeping track
    605 	 * of the current uio offset.
    606 	 * give up at the first sign of trouble.
    607 	 */
    608 
    609 	uio.uio_lwp = l;
    610 	uio.uio_segflg = UIO_SYSSPACE;
    611 	uio.uio_offset = 0;
    612 
    613 	VOP_UNLOCK(fvp, 0);			/* XXX */
    614 	VOP_LEASE(fvp, l, cred, LEASE_READ);
    615 	vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);	/* XXX */
    616 	VOP_UNLOCK(tvp, 0);			/* XXX */
    617 	VOP_LEASE(tvp, l, cred, LEASE_WRITE);
    618 	vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);	/* XXX */
    619 
    620 	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
    621 
    622 	/* ugly loop follows... */
    623 	do {
    624 		off_t offset = uio.uio_offset;
    625 
    626 		uio.uio_iov = &iov;
    627 		uio.uio_iovcnt = 1;
    628 		iov.iov_base = buf;
    629 		iov.iov_len = MAXBSIZE;
    630 		uio.uio_resid = iov.iov_len;
    631 		uio.uio_rw = UIO_READ;
    632 		error = VOP_READ(fvp, &uio, 0, cred);
    633 
    634 		if (error == 0) {
    635 			uio.uio_iov = &iov;
    636 			uio.uio_iovcnt = 1;
    637 			iov.iov_base = buf;
    638 			iov.iov_len = MAXBSIZE - uio.uio_resid;
    639 			uio.uio_offset = offset;
    640 			uio.uio_rw = UIO_WRITE;
    641 			uio.uio_resid = iov.iov_len;
    642 
    643 			if (uio.uio_resid == 0)
    644 				break;
    645 
    646 			do {
    647 				error = VOP_WRITE(tvp, &uio, 0, cred);
    648 			} while ((uio.uio_resid > 0) && (error == 0));
    649 		}
    650 
    651 	} while (error == 0);
    652 
    653 	free(buf, M_TEMP);
    654 	return (error);
    655 }
    656 
    657 /*
    658  * (un) is assumed to be locked on entry and remains
    659  * locked on exit.
    660  */
    661 int
    662 union_copyup(un, docopy, cred, l)
    663 	struct union_node *un;
    664 	int docopy;
    665 	struct ucred *cred;
    666 	struct lwp *l;
    667 {
    668 	int error;
    669 	struct vnode *lvp, *uvp;
    670 	struct vattr lvattr, uvattr;
    671 
    672 	error = union_vn_create(&uvp, un, l);
    673 	if (error)
    674 		return (error);
    675 
    676 	/* at this point, uppervp is locked */
    677 	union_newupper(un, uvp);
    678 	un->un_flags |= UN_ULOCK;
    679 
    680 	lvp = un->un_lowervp;
    681 
    682 	if (docopy) {
    683 		/*
    684 		 * XX - should not ignore errors
    685 		 * from VOP_CLOSE
    686 		 */
    687 		vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
    688 
    689         	error = VOP_GETATTR(lvp, &lvattr, cred, l);
    690 		if (error == 0)
    691 			error = VOP_OPEN(lvp, FREAD, cred, l);
    692 		if (error == 0) {
    693 			error = union_copyfile(lvp, uvp, cred, l);
    694 			(void) VOP_CLOSE(lvp, FREAD, cred, l);
    695 		}
    696 		if (error == 0) {
    697 			/* Copy permissions up too */
    698 			VATTR_NULL(&uvattr);
    699 			uvattr.va_mode = lvattr.va_mode;
    700 			uvattr.va_flags = lvattr.va_flags;
    701         		error = VOP_SETATTR(uvp, &uvattr, cred, l);
    702 		}
    703 		VOP_UNLOCK(lvp, 0);
    704 #ifdef UNION_DIAGNOSTIC
    705 		if (error == 0)
    706 			uprintf("union: copied up %s\n", un->un_path);
    707 #endif
    708 
    709 	}
    710 	union_vn_close(uvp, FWRITE, cred, l);
    711 
    712 	/*
    713 	 * Subsequent IOs will go to the top layer, so
    714 	 * call close on the lower vnode and open on the
    715 	 * upper vnode to ensure that the filesystem keeps
    716 	 * its references counts right.  This doesn't do
    717 	 * the right thing with (cred) and (FREAD) though.
    718 	 * Ignoring error returns is not right, either.
    719 	 */
    720 	if (error == 0) {
    721 		int i;
    722 
    723 		vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
    724 		for (i = 0; i < un->un_openl; i++) {
    725 			(void) VOP_CLOSE(lvp, FREAD, cred, l);
    726 			(void) VOP_OPEN(uvp, FREAD, cred, l);
    727 		}
    728 		un->un_openl = 0;
    729 		VOP_UNLOCK(lvp, 0);
    730 	}
    731 
    732 	return (error);
    733 
    734 }
    735 
    736 static int
    737 union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
    738 	struct union_mount *um;
    739 	struct vnode *dvp;
    740 	struct vnode **vpp;
    741 	struct componentname *cnp;
    742 	struct componentname *cn;
    743 	const char *path;
    744 	int pathlen;
    745 {
    746 	int error;
    747 
    748 	/*
    749 	 * A new componentname structure must be faked up because
    750 	 * there is no way to know where the upper level cnp came
    751 	 * from or what it is being used for.  This must duplicate
    752 	 * some of the work done by NDINIT, some of the work done
    753 	 * by namei, some of the work done by lookup and some of
    754 	 * the work done by VOP_LOOKUP when given a CREATE flag.
    755 	 * Conclusion: Horrible.
    756 	 *
    757 	 * The pathname buffer will be PNBUF_PUT'd by VOP_MKDIR.
    758 	 */
    759 	cn->cn_namelen = pathlen;
    760 	if ((cn->cn_namelen + 1) > MAXPATHLEN)
    761 		return (ENAMETOOLONG);
    762 	cn->cn_pnbuf = PNBUF_GET();
    763 	memcpy(cn->cn_pnbuf, path, cn->cn_namelen);
    764 	cn->cn_pnbuf[cn->cn_namelen] = '\0';
    765 
    766 	cn->cn_nameiop = CREATE;
    767 	cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
    768 	cn->cn_lwp = cnp->cn_lwp;
    769 	if (um->um_op == UNMNT_ABOVE)
    770 		cn->cn_cred = cnp->cn_cred;
    771 	else
    772 		cn->cn_cred = um->um_cred;
    773 	cn->cn_nameptr = cn->cn_pnbuf;
    774 	cn->cn_hash = cnp->cn_hash;
    775 	cn->cn_consume = cnp->cn_consume;
    776 
    777 	VREF(dvp);
    778 	error = relookup(dvp, vpp, cn);
    779 	if (!error)
    780 		vrele(dvp);
    781 	else {
    782 		PNBUF_PUT(cn->cn_pnbuf);
    783 		cn->cn_pnbuf = 0;
    784 	}
    785 
    786 	return (error);
    787 }
    788 
    789 /*
    790  * Create a shadow directory in the upper layer.
    791  * The new vnode is returned locked.
    792  *
    793  * (um) points to the union mount structure for access to the
    794  * the mounting process's credentials.
    795  * (dvp) is the directory in which to create the shadow directory.
    796  * it is unlocked on entry and exit.
    797  * (cnp) is the componentname to be created.
    798  * (vpp) is the returned newly created shadow directory, which
    799  * is returned locked.
    800  *
    801  * N.B. We still attempt to create shadow directories even if the union
    802  * is mounted read-only, which is a little nonintuitive.
    803  */
    804 int
    805 union_mkshadow(um, dvp, cnp, vpp)
    806 	struct union_mount *um;
    807 	struct vnode *dvp;
    808 	struct componentname *cnp;
    809 	struct vnode **vpp;
    810 {
    811 	int error;
    812 	struct vattr va;
    813 	struct lwp *l = cnp->cn_lwp;
    814 	struct componentname cn;
    815 
    816 	error = union_relookup(um, dvp, vpp, cnp, &cn,
    817 			cnp->cn_nameptr, cnp->cn_namelen);
    818 	if (error)
    819 		return (error);
    820 
    821 	if (*vpp) {
    822 		VOP_ABORTOP(dvp, &cn);
    823 		VOP_UNLOCK(dvp, 0);
    824 		vrele(*vpp);
    825 		*vpp = NULLVP;
    826 		return (EEXIST);
    827 	}
    828 
    829 	/*
    830 	 * policy: when creating the shadow directory in the
    831 	 * upper layer, create it owned by the user who did
    832 	 * the mount, group from parent directory, and mode
    833 	 * 777 modified by umask (ie mostly identical to the
    834 	 * mkdir syscall).  (jsp, kb)
    835 	 */
    836 
    837 	VATTR_NULL(&va);
    838 	va.va_type = VDIR;
    839 	va.va_mode = um->um_cmode;
    840 
    841 	/* VOP_LEASE: dvp is locked */
    842 	VOP_LEASE(dvp, l, cn.cn_cred, LEASE_WRITE);
    843 
    844 	error = VOP_MKDIR(dvp, vpp, &cn, &va);
    845 	return (error);
    846 }
    847 
    848 /*
    849  * Create a whiteout entry in the upper layer.
    850  *
    851  * (um) points to the union mount structure for access to the
    852  * the mounting process's credentials.
    853  * (dvp) is the directory in which to create the whiteout.
    854  * it is locked on entry and exit.
    855  * (cnp) is the componentname to be created.
    856  */
    857 int
    858 union_mkwhiteout(um, dvp, cnp, path)
    859 	struct union_mount *um;
    860 	struct vnode *dvp;
    861 	struct componentname *cnp;
    862 	char *path;
    863 {
    864 	int error;
    865 	struct lwp *l = cnp->cn_lwp;
    866 	struct vnode *wvp;
    867 	struct componentname cn;
    868 
    869 	VOP_UNLOCK(dvp, 0);
    870 	error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
    871 	if (error) {
    872 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
    873 		return (error);
    874 	}
    875 
    876 	if (wvp) {
    877 		VOP_ABORTOP(dvp, &cn);
    878 		vrele(dvp);
    879 		vrele(wvp);
    880 		return (EEXIST);
    881 	}
    882 
    883 	/* VOP_LEASE: dvp is locked */
    884 	VOP_LEASE(dvp, l, l->l_proc->p_ucred, LEASE_WRITE);
    885 
    886 	error = VOP_WHITEOUT(dvp, &cn, CREATE);
    887 	if (error)
    888 		VOP_ABORTOP(dvp, &cn);
    889 
    890 	vrele(dvp);
    891 
    892 	return (error);
    893 }
    894 
    895 /*
    896  * union_vn_create: creates and opens a new shadow file
    897  * on the upper union layer.  this function is similar
    898  * in spirit to calling vn_open but it avoids calling namei().
    899  * the problem with calling namei is that a) it locks too many
    900  * things, and b) it doesn't start at the "right" directory,
    901  * whereas relookup is told where to start.
    902  */
    903 int
    904 union_vn_create(vpp, un, l)
    905 	struct vnode **vpp;
    906 	struct union_node *un;
    907 	struct lwp *l;
    908 {
    909 	struct proc *p = l->l_proc;
    910 	struct vnode *vp;
    911 	struct ucred *cred = p->p_ucred;
    912 	struct vattr vat;
    913 	struct vattr *vap = &vat;
    914 	int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
    915 	int error;
    916 	int cmode = UN_FILEMODE & ~p->p_cwdi->cwdi_cmask;
    917 	struct componentname cn;
    918 
    919 	*vpp = NULLVP;
    920 
    921 	/*
    922 	 * Build a new componentname structure (for the same
    923 	 * reasons outlines in union_mkshadow).
    924 	 * The difference here is that the file is owned by
    925 	 * the current user, rather than by the person who
    926 	 * did the mount, since the current user needs to be
    927 	 * able to write the file (that's why it is being
    928 	 * copied in the first place).
    929 	 */
    930 	cn.cn_namelen = strlen(un->un_path);
    931 	if ((cn.cn_namelen + 1) > MAXPATHLEN)
    932 		return (ENAMETOOLONG);
    933 	cn.cn_pnbuf = PNBUF_GET();
    934 	memcpy(cn.cn_pnbuf, un->un_path, cn.cn_namelen+1);
    935 	cn.cn_nameiop = CREATE;
    936 	cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
    937 	cn.cn_lwp = l;
    938 	cn.cn_cred = l->l_proc->p_ucred;
    939 	cn.cn_nameptr = cn.cn_pnbuf;
    940 	cn.cn_hash = un->un_hash;
    941 	cn.cn_consume = 0;
    942 
    943 	VREF(un->un_dirvp);
    944 	if ((error = relookup(un->un_dirvp, &vp, &cn)) != 0)
    945 		return (error);
    946 	vrele(un->un_dirvp);
    947 
    948 	if (vp) {
    949 		VOP_ABORTOP(un->un_dirvp, &cn);
    950 		if (un->un_dirvp == vp)
    951 			vrele(un->un_dirvp);
    952 		else
    953 			vput(un->un_dirvp);
    954 		vrele(vp);
    955 		return (EEXIST);
    956 	}
    957 
    958 	/*
    959 	 * Good - there was no race to create the file
    960 	 * so go ahead and create it.  The permissions
    961 	 * on the file will be 0666 modified by the
    962 	 * current user's umask.  Access to the file, while
    963 	 * it is unioned, will require access to the top *and*
    964 	 * bottom files.  Access when not unioned will simply
    965 	 * require access to the top-level file.
    966 	 * TODO: confirm choice of access permissions.
    967 	 */
    968 	VATTR_NULL(vap);
    969 	vap->va_type = VREG;
    970 	vap->va_mode = cmode;
    971 	VOP_LEASE(un->un_dirvp, l, cred, LEASE_WRITE);
    972 	if ((error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap)) != 0)
    973 		return (error);
    974 
    975 	if ((error = VOP_OPEN(vp, fmode, cred, l)) != 0) {
    976 		vput(vp);
    977 		return (error);
    978 	}
    979 
    980 	vp->v_writecount++;
    981 	*vpp = vp;
    982 	return (0);
    983 }
    984 
    985 int
    986 union_vn_close(vp, fmode, cred, l)
    987 	struct vnode *vp;
    988 	int fmode;
    989 	struct ucred *cred;
    990 	struct lwp *l;
    991 {
    992 
    993 	if (fmode & FWRITE)
    994 		--vp->v_writecount;
    995 	return (VOP_CLOSE(vp, fmode, cred, l));
    996 }
    997 
    998 void
    999 union_removed_upper(un)
   1000 	struct union_node *un;
   1001 {
   1002 #if 1
   1003 	/*
   1004 	 * We do not set the uppervp to NULLVP here, because lowervp
   1005 	 * may also be NULLVP, so this routine would end up creating
   1006 	 * a bogus union node with no upper or lower VP (that causes
   1007 	 * pain in many places that assume at least one VP exists).
   1008 	 * Since we've removed this node from the cache hash chains,
   1009 	 * it won't be found again.  When all current holders
   1010 	 * release it, union_inactive() will vgone() it.
   1011 	 */
   1012 	union_diruncache(un);
   1013 #else
   1014 	union_newupper(un, NULLVP);
   1015 #endif
   1016 
   1017 	if (un->un_flags & UN_CACHED) {
   1018 		un->un_flags &= ~UN_CACHED;
   1019 		LIST_REMOVE(un, un_cache);
   1020 	}
   1021 
   1022 	if (un->un_flags & UN_ULOCK) {
   1023 		un->un_flags &= ~UN_ULOCK;
   1024 		VOP_UNLOCK(un->un_uppervp, 0);
   1025 	}
   1026 }
   1027 
   1028 #if 0
   1029 struct vnode *
   1030 union_lowervp(vp)
   1031 	struct vnode *vp;
   1032 {
   1033 	struct union_node *un = VTOUNION(vp);
   1034 
   1035 	if ((un->un_lowervp != NULLVP) &&
   1036 	    (vp->v_type == un->un_lowervp->v_type)) {
   1037 		if (vget(un->un_lowervp, 0) == 0)
   1038 			return (un->un_lowervp);
   1039 	}
   1040 
   1041 	return (NULLVP);
   1042 }
   1043 #endif
   1044 
   1045 /*
   1046  * determine whether a whiteout is needed
   1047  * during a remove/rmdir operation.
   1048  */
   1049 int
   1050 union_dowhiteout(un, cred, l)
   1051 	struct union_node *un;
   1052 	struct ucred *cred;
   1053 	struct lwp *l;
   1054 {
   1055 	struct vattr va;
   1056 
   1057 	if (un->un_lowervp != NULLVP)
   1058 		return (1);
   1059 
   1060 	if (VOP_GETATTR(un->un_uppervp, &va, cred, l) == 0 &&
   1061 	    (va.va_flags & OPAQUE))
   1062 		return (1);
   1063 
   1064 	return (0);
   1065 }
   1066 
   1067 static void
   1068 union_dircache_r(vp, vppp, cntp)
   1069 	struct vnode *vp;
   1070 	struct vnode ***vppp;
   1071 	int *cntp;
   1072 {
   1073 	struct union_node *un;
   1074 
   1075 	if (vp->v_op != union_vnodeop_p) {
   1076 		if (vppp) {
   1077 			VREF(vp);
   1078 			*(*vppp)++ = vp;
   1079 			if (--(*cntp) == 0)
   1080 				panic("union: dircache table too small");
   1081 		} else {
   1082 			(*cntp)++;
   1083 		}
   1084 
   1085 		return;
   1086 	}
   1087 
   1088 	un = VTOUNION(vp);
   1089 	if (un->un_uppervp != NULLVP)
   1090 		union_dircache_r(un->un_uppervp, vppp, cntp);
   1091 	if (un->un_lowervp != NULLVP)
   1092 		union_dircache_r(un->un_lowervp, vppp, cntp);
   1093 }
   1094 
   1095 struct vnode *
   1096 union_dircache(vp, l)
   1097 	struct vnode *vp;
   1098 	struct lwp *l;
   1099 {
   1100 	int cnt;
   1101 	struct vnode *nvp = NULLVP;
   1102 	struct vnode **vpp;
   1103 	struct vnode **dircache;
   1104 	int error;
   1105 
   1106 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   1107 	dircache = VTOUNION(vp)->un_dircache;
   1108 
   1109 	nvp = NULLVP;
   1110 
   1111 	if (dircache == 0) {
   1112 		cnt = 0;
   1113 		union_dircache_r(vp, 0, &cnt);
   1114 		cnt++;
   1115 		dircache = (struct vnode **)
   1116 				malloc(cnt * sizeof(struct vnode *),
   1117 					M_TEMP, M_WAITOK);
   1118 		vpp = dircache;
   1119 		union_dircache_r(vp, &vpp, &cnt);
   1120 		VTOUNION(vp)->un_dircache = dircache;
   1121 		*vpp = NULLVP;
   1122 		vpp = dircache + 1;
   1123 	} else {
   1124 		vpp = dircache;
   1125 		do {
   1126 			if (*vpp++ == VTOUNION(vp)->un_uppervp)
   1127 				break;
   1128 		} while (*vpp != NULLVP);
   1129 	}
   1130 
   1131 	if (*vpp == NULLVP)
   1132 		goto out;
   1133 
   1134 	vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
   1135 	VREF(*vpp);
   1136 	error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0, l);
   1137 	if (!error) {
   1138 		VTOUNION(vp)->un_dircache = 0;
   1139 		VTOUNION(nvp)->un_dircache = dircache;
   1140 	}
   1141 
   1142 out:
   1143 	VOP_UNLOCK(vp, 0);
   1144 	return (nvp);
   1145 }
   1146 
   1147 void
   1148 union_diruncache(un)
   1149 	struct union_node *un;
   1150 {
   1151 	struct vnode **vpp;
   1152 
   1153 	if (un->un_dircache != 0) {
   1154 		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
   1155 			vrele(*vpp);
   1156 		free(un->un_dircache, M_TEMP);
   1157 		un->un_dircache = 0;
   1158 	}
   1159 }
   1160 
   1161 /*
   1162  * This hook is called from vn_readdir() to switch to lower directory
   1163  * entry after the upper directory is read.
   1164  */
   1165 int
   1166 union_readdirhook(struct vnode **vpp, struct file *fp, struct lwp *l)
   1167 {
   1168 	struct vnode *vp = *vpp, *lvp;
   1169 	struct vattr va;
   1170 	int error;
   1171 
   1172 	if (vp->v_op != union_vnodeop_p)
   1173 		return (0);
   1174 
   1175 	if ((lvp = union_dircache(vp, l)) == NULLVP)
   1176 		return (0);
   1177 
   1178 	/*
   1179 	 * If the directory is opaque,
   1180 	 * then don't show lower entries
   1181 	 */
   1182 	error = VOP_GETATTR(vp, &va, fp->f_cred, l);
   1183 	if (error || (va.va_flags & OPAQUE)) {
   1184 		vput(lvp);
   1185 		return (error);
   1186 	}
   1187 
   1188 	error = VOP_OPEN(lvp, FREAD, fp->f_cred, l);
   1189 	if (error) {
   1190 		vput(lvp);
   1191 		return (error);
   1192 	}
   1193 	VOP_UNLOCK(lvp, 0);
   1194 	fp->f_data = (caddr_t) lvp;
   1195 	fp->f_offset = 0;
   1196 	error = vn_close(vp, FREAD, fp->f_cred, l);
   1197 	if (error)
   1198 		return (error);
   1199 	*vpp = lvp;
   1200 	return (0);
   1201 }
   1202