union_subr.c revision 1.45 1 /* $NetBSD: union_subr.c,v 1.45 2011/08/10 06:19:54 hannken Exp $ */
2
3 /*
4 * Copyright (c) 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95
35 */
36
37 /*
38 * Copyright (c) 1994 Jan-Simon Pendry
39 *
40 * This code is derived from software contributed to Berkeley by
41 * Jan-Simon Pendry.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed by the University of
54 * California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 * may be used to endorse or promote products derived from this software
57 * without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95
72 */
73
74 #include <sys/cdefs.h>
75 __KERNEL_RCSID(0, "$NetBSD: union_subr.c,v 1.45 2011/08/10 06:19:54 hannken Exp $");
76
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/proc.h>
80 #include <sys/time.h>
81 #include <sys/kernel.h>
82 #include <sys/vnode.h>
83 #include <sys/namei.h>
84 #include <sys/malloc.h>
85 #include <sys/dirent.h>
86 #include <sys/file.h>
87 #include <sys/filedesc.h>
88 #include <sys/queue.h>
89 #include <sys/mount.h>
90 #include <sys/stat.h>
91 #include <sys/kauth.h>
92
93 #include <uvm/uvm_extern.h>
94
95 #include <fs/union/union.h>
96
97 /* must be power of two, otherwise change UNION_HASH() */
98 #define NHASH 32
99
100 /* unsigned int ... */
101 #define UNION_HASH(u, l) \
102 (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
103
104 static LIST_HEAD(unhead, union_node) unhead[NHASH];
105 static int unvplock[NHASH];
106
107 static int union_list_lock(int);
108 static void union_list_unlock(int);
109 void union_updatevp(struct union_node *, struct vnode *, struct vnode *);
110 static int union_relookup(struct union_mount *, struct vnode *,
111 struct vnode **, struct componentname *,
112 struct componentname *, char **,
113 const char *, int);
114 int union_vn_close(struct vnode *, int, kauth_cred_t, struct lwp *);
115 static void union_dircache_r(struct vnode *, struct vnode ***, int *);
116 struct vnode *union_dircache(struct vnode *, struct lwp *);
117
118 void
119 union_init(void)
120 {
121 int i;
122
123 for (i = 0; i < NHASH; i++)
124 LIST_INIT(&unhead[i]);
125 memset(unvplock, 0, sizeof(unvplock));
126 }
127
128 /*
129 * Free global unionfs resources.
130 */
131 void
132 union_done(void)
133 {
134
135 /* Make sure to unset the readdir hook. */
136 vn_union_readdir_hook = NULL;
137 }
138
139 static int
140 union_list_lock(int ix)
141 {
142
143 if (unvplock[ix] & UN_LOCKED) {
144 unvplock[ix] |= UN_WANTED;
145 (void) tsleep(&unvplock[ix], PINOD, "unionlk", 0);
146 return (1);
147 }
148
149 unvplock[ix] |= UN_LOCKED;
150
151 return (0);
152 }
153
154 static void
155 union_list_unlock(int ix)
156 {
157
158 unvplock[ix] &= ~UN_LOCKED;
159
160 if (unvplock[ix] & UN_WANTED) {
161 unvplock[ix] &= ~UN_WANTED;
162 wakeup(&unvplock[ix]);
163 }
164 }
165
166 void
167 union_updatevp(struct union_node *un, struct vnode *uppervp,
168 struct vnode *lowervp)
169 {
170 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
171 int nhash = UNION_HASH(uppervp, lowervp);
172 int docache = (lowervp != NULLVP || uppervp != NULLVP);
173 int lhash, uhash;
174
175 /*
176 * Ensure locking is ordered from lower to higher
177 * to avoid deadlocks.
178 */
179 if (nhash < ohash) {
180 lhash = nhash;
181 uhash = ohash;
182 } else {
183 lhash = ohash;
184 uhash = nhash;
185 }
186
187 if (lhash != uhash)
188 while (union_list_lock(lhash))
189 continue;
190
191 while (union_list_lock(uhash))
192 continue;
193
194 if (ohash != nhash || !docache) {
195 if (un->un_flags & UN_CACHED) {
196 un->un_flags &= ~UN_CACHED;
197 LIST_REMOVE(un, un_cache);
198 }
199 }
200
201 if (ohash != nhash)
202 union_list_unlock(ohash);
203
204 if (un->un_lowervp != lowervp) {
205 if (un->un_lowervp) {
206 vrele(un->un_lowervp);
207 if (un->un_path) {
208 free(un->un_path, M_TEMP);
209 un->un_path = 0;
210 }
211 if (un->un_dirvp) {
212 vrele(un->un_dirvp);
213 un->un_dirvp = NULLVP;
214 }
215 }
216 un->un_lowervp = lowervp;
217 un->un_lowersz = VNOVAL;
218 }
219
220 if (un->un_uppervp != uppervp) {
221 if (un->un_uppervp)
222 vrele(un->un_uppervp);
223
224 un->un_uppervp = uppervp;
225 un->un_uppersz = VNOVAL;
226 }
227
228 if (docache && (ohash != nhash)) {
229 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
230 un->un_flags |= UN_CACHED;
231 }
232
233 union_list_unlock(nhash);
234 }
235
236 void
237 union_newlower(struct union_node *un, struct vnode *lowervp)
238 {
239
240 union_updatevp(un, un->un_uppervp, lowervp);
241 }
242
243 void
244 union_newupper(struct union_node *un, struct vnode *uppervp)
245 {
246
247 union_updatevp(un, uppervp, un->un_lowervp);
248 }
249
250 /*
251 * Keep track of size changes in the underlying vnodes.
252 * If the size changes, then callback to the vm layer
253 * giving priority to the upper layer size.
254 */
255 void
256 union_newsize(struct vnode *vp, off_t uppersz, off_t lowersz)
257 {
258 struct union_node *un;
259 off_t sz;
260
261 /* only interested in regular files */
262 if (vp->v_type != VREG) {
263 uvm_vnp_setsize(vp, 0);
264 return;
265 }
266
267 un = VTOUNION(vp);
268 sz = VNOVAL;
269
270 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
271 un->un_uppersz = uppersz;
272 if (sz == VNOVAL)
273 sz = un->un_uppersz;
274 }
275
276 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
277 un->un_lowersz = lowersz;
278 if (sz == VNOVAL)
279 sz = un->un_lowersz;
280 }
281
282 if (sz != VNOVAL) {
283 #ifdef UNION_DIAGNOSTIC
284 printf("union: %s size now %qd\n",
285 uppersz != VNOVAL ? "upper" : "lower", sz);
286 #endif
287 uvm_vnp_setsize(vp, sz);
288 }
289 }
290
291 /*
292 * allocate a union_node/vnode pair. the vnode is
293 * referenced and locked. the new vnode is returned
294 * via (vpp). (mp) is the mountpoint of the union filesystem,
295 * (dvp) is the parent directory where the upper layer object
296 * should exist (but doesn't) and (cnp) is the componentname
297 * information which is partially copied to allow the upper
298 * layer object to be created at a later time. (uppervp)
299 * and (lowervp) reference the upper and lower layer objects
300 * being mapped. either, but not both, can be nil.
301 * if supplied, (uppervp) is locked.
302 * the reference is either maintained in the new union_node
303 * object which is allocated, or they are vrele'd.
304 *
305 * all union_nodes are maintained on a singly-linked
306 * list. new nodes are only allocated when they cannot
307 * be found on this list. entries on the list are
308 * removed when the vfs reclaim entry is called.
309 *
310 * a single lock is kept for the entire list. this is
311 * needed because the getnewvnode() function can block
312 * waiting for a vnode to become free, in which case there
313 * may be more than one process trying to get the same
314 * vnode. this lock is only taken if we are going to
315 * call getnewvnode, since the kernel itself is single-threaded.
316 *
317 * if an entry is found on the list, then call vget() to
318 * take a reference. this is done because there may be
319 * zero references to it and so it needs to removed from
320 * the vnode free list.
321 */
322 int
323 union_allocvp(
324 struct vnode **vpp,
325 struct mount *mp,
326 struct vnode *undvp, /* parent union vnode */
327 struct vnode *dvp, /* may be null */
328 struct componentname *cnp, /* may be null */
329 struct vnode *uppervp, /* may be null */
330 struct vnode *lowervp, /* may be null */
331 int docache)
332 {
333 int error;
334 struct vattr va;
335 struct union_node *un = NULL, *un1;
336 struct vnode *vp, *xlowervp = NULLVP;
337 struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
338 voff_t uppersz, lowersz;
339 int hash = 0;
340 int vflag, iflag;
341 int try;
342
343 if (uppervp == NULLVP && lowervp == NULLVP)
344 panic("union: unidentifiable allocation");
345
346 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
347 xlowervp = lowervp;
348 lowervp = NULLVP;
349 }
350
351 /* detect the root vnode (and aliases) */
352 iflag = VI_LAYER;
353 vflag = 0;
354 if ((uppervp == um->um_uppervp) &&
355 ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
356 if (lowervp == NULLVP) {
357 lowervp = um->um_lowervp;
358 if (lowervp != NULLVP)
359 vref(lowervp);
360 }
361 iflag = 0;
362 vflag = VV_ROOT;
363 }
364
365 loop:
366 if (!docache) {
367 un = 0;
368 } else for (try = 0; try < 3; try++) {
369 switch (try) {
370 case 0:
371 if (lowervp == NULLVP)
372 continue;
373 hash = UNION_HASH(uppervp, lowervp);
374 break;
375
376 case 1:
377 if (uppervp == NULLVP)
378 continue;
379 hash = UNION_HASH(uppervp, NULLVP);
380 break;
381
382 case 2:
383 if (lowervp == NULLVP)
384 continue;
385 hash = UNION_HASH(NULLVP, lowervp);
386 break;
387 }
388
389 while (union_list_lock(hash))
390 continue;
391
392 for (un = unhead[hash].lh_first; un != 0;
393 un = un->un_cache.le_next) {
394 if ((un->un_lowervp == lowervp ||
395 un->un_lowervp == NULLVP) &&
396 (un->un_uppervp == uppervp ||
397 un->un_uppervp == NULLVP) &&
398 (UNIONTOV(un)->v_mount == mp)) {
399 vp = UNIONTOV(un);
400 mutex_enter(vp->v_interlock);
401 if (vget(vp, 0)) {
402 union_list_unlock(hash);
403 goto loop;
404 }
405 break;
406 }
407 }
408
409 union_list_unlock(hash);
410
411 if (un)
412 break;
413 }
414
415 if (un) {
416 /*
417 * Obtain a lock on the union_node.
418 * uppervp is locked, though un->un_uppervp
419 * may not be. this doesn't break the locking
420 * hierarchy since in the case that un->un_uppervp
421 * is not yet locked it will be vrele'd and replaced
422 * with uppervp.
423 */
424
425 if ((dvp != NULLVP) && (uppervp == dvp)) {
426 /*
427 * Access ``.'', so (un) will already
428 * be locked. Since this process has
429 * the lock on (uppervp) no other
430 * process can hold the lock on (un).
431 */
432 #ifdef DIAGNOSTIC
433 if ((un->un_flags & UN_LOCKED) == 0)
434 panic("union: . not locked");
435 else if (curproc && un->un_pid != curproc->p_pid &&
436 un->un_pid > -1 && curproc->p_pid > -1)
437 panic("union: allocvp not lock owner");
438 #endif
439 } else {
440 if (un->un_flags & UN_LOCKED) {
441 vrele(UNIONTOV(un));
442 un->un_flags |= UN_WANTED;
443 (void) tsleep(&un->un_flags, PINOD,
444 "unionalloc", 0);
445 goto loop;
446 }
447 un->un_flags |= UN_LOCKED;
448
449 #ifdef DIAGNOSTIC
450 if (curproc)
451 un->un_pid = curproc->p_pid;
452 else
453 un->un_pid = -1;
454 #endif
455 }
456
457 /*
458 * At this point, the union_node is locked,
459 * un->un_uppervp may not be locked, and uppervp
460 * is locked or nil.
461 */
462
463 /*
464 * Save information about the upper layer.
465 */
466 if (uppervp != un->un_uppervp) {
467 union_newupper(un, uppervp);
468 } else if (uppervp) {
469 vrele(uppervp);
470 }
471
472 if (un->un_uppervp) {
473 un->un_flags |= UN_ULOCK;
474 un->un_flags &= ~UN_KLOCK;
475 }
476
477 /*
478 * Save information about the lower layer.
479 * This needs to keep track of pathname
480 * and directory information which union_vn_create
481 * might need.
482 */
483 if (lowervp != un->un_lowervp) {
484 union_newlower(un, lowervp);
485 if (cnp && (lowervp != NULLVP)) {
486 un->un_hash = cnp->cn_hash;
487 un->un_path = malloc(cnp->cn_namelen+1,
488 M_TEMP, M_WAITOK);
489 memcpy(un->un_path, cnp->cn_nameptr,
490 cnp->cn_namelen);
491 un->un_path[cnp->cn_namelen] = '\0';
492 vref(dvp);
493 un->un_dirvp = dvp;
494 }
495 } else if (lowervp) {
496 vrele(lowervp);
497 }
498 *vpp = UNIONTOV(un);
499 return (0);
500 }
501
502 uppersz = lowersz = VNOVAL;
503 if (uppervp != NULLVP)
504 if (VOP_GETATTR(uppervp, &va, FSCRED) == 0)
505 uppersz = va.va_size;
506 if (lowervp != NULLVP)
507 if (VOP_GETATTR(lowervp, &va, FSCRED) == 0)
508 lowersz = va.va_size;
509 hash = UNION_HASH(uppervp, lowervp);
510
511 /*
512 * Get a new vnode and share the lock with upper layer vnode,
513 * unless layers are inverted.
514 */
515 vnode_t *svp = (uppervp != NULLVP) ? uppervp : lowervp;
516 error = getnewvnode(VT_UNION, mp, union_vnodeop_p,
517 svp->v_interlock, vpp);
518 if (error) {
519 if (uppervp) {
520 if (dvp == uppervp)
521 vrele(uppervp);
522 else
523 vput(uppervp);
524 }
525 if (lowervp)
526 vrele(lowervp);
527
528 goto out;
529 }
530
531 if (docache) {
532 while (union_list_lock(hash))
533 continue;
534 LIST_FOREACH(un1, &unhead[hash], un_cache) {
535 if (un1->un_lowervp == lowervp &&
536 un1->un_uppervp == uppervp &&
537 UNIONTOV(un1)->v_mount == mp) {
538 /*
539 * Another thread beat us, push back freshly
540 * allocated vnode and retry.
541 */
542 union_list_unlock(hash);
543 ungetnewvnode(*vpp);
544 goto loop;
545 }
546 }
547 }
548
549 (*vpp)->v_data = malloc(sizeof(struct union_node), M_TEMP, M_WAITOK);
550
551 (*vpp)->v_vflag |= vflag;
552 (*vpp)->v_iflag |= iflag;
553 if (uppervp)
554 (*vpp)->v_type = uppervp->v_type;
555 else
556 (*vpp)->v_type = lowervp->v_type;
557 un = VTOUNION(*vpp);
558 un->un_vnode = *vpp;
559 un->un_uppervp = uppervp;
560 un->un_lowervp = lowervp;
561 un->un_pvp = undvp;
562 if (undvp != NULLVP)
563 vref(undvp);
564 un->un_dircache = 0;
565 un->un_openl = 0;
566 un->un_flags = UN_LOCKED;
567
568 un->un_uppersz = VNOVAL;
569 un->un_lowersz = VNOVAL;
570 union_newsize(*vpp, uppersz, lowersz);
571
572 if (un->un_uppervp)
573 un->un_flags |= UN_ULOCK;
574 #ifdef DIAGNOSTIC
575 if (curproc)
576 un->un_pid = curproc->p_pid;
577 else
578 un->un_pid = -1;
579 #endif
580 if (dvp && cnp && (lowervp != NULLVP)) {
581 un->un_hash = cnp->cn_hash;
582 un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
583 memcpy(un->un_path, cnp->cn_nameptr, cnp->cn_namelen);
584 un->un_path[cnp->cn_namelen] = '\0';
585 vref(dvp);
586 un->un_dirvp = dvp;
587 } else {
588 un->un_hash = 0;
589 un->un_path = 0;
590 un->un_dirvp = 0;
591 }
592
593 if (docache) {
594 LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
595 un->un_flags |= UN_CACHED;
596 }
597
598 if (xlowervp)
599 vrele(xlowervp);
600
601 out:
602 if (docache)
603 union_list_unlock(hash);
604
605 return (error);
606 }
607
608 int
609 union_freevp(struct vnode *vp)
610 {
611 int hash;
612 struct union_node *un = VTOUNION(vp);
613
614 hash = UNION_HASH(un->un_uppervp, un->un_lowervp);
615
616 while (union_list_lock(hash))
617 continue;
618 if (un->un_flags & UN_CACHED) {
619 un->un_flags &= ~UN_CACHED;
620 LIST_REMOVE(un, un_cache);
621 }
622 union_list_unlock(hash);
623
624 if (un->un_pvp != NULLVP)
625 vrele(un->un_pvp);
626 if (un->un_uppervp != NULLVP)
627 vrele(un->un_uppervp);
628 if (un->un_lowervp != NULLVP)
629 vrele(un->un_lowervp);
630 if (un->un_dirvp != NULLVP)
631 vrele(un->un_dirvp);
632 if (un->un_path)
633 free(un->un_path, M_TEMP);
634
635 free(vp->v_data, M_TEMP);
636 vp->v_data = NULL;
637
638 return (0);
639 }
640
641 /*
642 * copyfile. copy the vnode (fvp) to the vnode (tvp)
643 * using a sequence of reads and writes. both (fvp)
644 * and (tvp) are locked on entry and exit.
645 */
646 int
647 union_copyfile(struct vnode *fvp, struct vnode *tvp, kauth_cred_t cred,
648 struct lwp *l)
649 {
650 char *tbuf;
651 struct uio uio;
652 struct iovec iov;
653 int error = 0;
654
655 /*
656 * strategy:
657 * allocate a buffer of size MAXBSIZE.
658 * loop doing reads and writes, keeping track
659 * of the current uio offset.
660 * give up at the first sign of trouble.
661 */
662
663 uio.uio_offset = 0;
664 UIO_SETUP_SYSSPACE(&uio);
665
666 VOP_UNLOCK(fvp); /* XXX */
667 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
668 VOP_UNLOCK(tvp); /* XXX */
669 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
670
671 tbuf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
672
673 /* ugly loop follows... */
674 do {
675 off_t offset = uio.uio_offset;
676
677 uio.uio_iov = &iov;
678 uio.uio_iovcnt = 1;
679 iov.iov_base = tbuf;
680 iov.iov_len = MAXBSIZE;
681 uio.uio_resid = iov.iov_len;
682 uio.uio_rw = UIO_READ;
683 error = VOP_READ(fvp, &uio, 0, cred);
684
685 if (error == 0) {
686 uio.uio_iov = &iov;
687 uio.uio_iovcnt = 1;
688 iov.iov_base = tbuf;
689 iov.iov_len = MAXBSIZE - uio.uio_resid;
690 uio.uio_offset = offset;
691 uio.uio_rw = UIO_WRITE;
692 uio.uio_resid = iov.iov_len;
693
694 if (uio.uio_resid == 0)
695 break;
696
697 do {
698 error = VOP_WRITE(tvp, &uio, 0, cred);
699 } while ((uio.uio_resid > 0) && (error == 0));
700 }
701
702 } while (error == 0);
703
704 free(tbuf, M_TEMP);
705 return (error);
706 }
707
708 /*
709 * (un) is assumed to be locked on entry and remains
710 * locked on exit.
711 */
712 int
713 union_copyup(struct union_node *un, int docopy, kauth_cred_t cred,
714 struct lwp *l)
715 {
716 int error;
717 struct vnode *lvp, *uvp;
718 struct vattr lvattr, uvattr;
719
720 error = union_vn_create(&uvp, un, l);
721 if (error)
722 return (error);
723
724 /* at this point, uppervp is locked */
725 union_newupper(un, uvp);
726 un->un_flags |= UN_ULOCK;
727
728 lvp = un->un_lowervp;
729
730 if (docopy) {
731 /*
732 * XX - should not ignore errors
733 * from VOP_CLOSE
734 */
735 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
736
737 error = VOP_GETATTR(lvp, &lvattr, cred);
738 if (error == 0)
739 error = VOP_OPEN(lvp, FREAD, cred);
740 if (error == 0) {
741 error = union_copyfile(lvp, uvp, cred, l);
742 (void) VOP_CLOSE(lvp, FREAD, cred);
743 }
744 if (error == 0) {
745 /* Copy permissions up too */
746 vattr_null(&uvattr);
747 uvattr.va_mode = lvattr.va_mode;
748 uvattr.va_flags = lvattr.va_flags;
749 error = VOP_SETATTR(uvp, &uvattr, cred);
750 }
751 VOP_UNLOCK(lvp);
752 #ifdef UNION_DIAGNOSTIC
753 if (error == 0)
754 uprintf("union: copied up %s\n", un->un_path);
755 #endif
756
757 }
758 union_vn_close(uvp, FWRITE, cred, l);
759
760 /*
761 * Subsequent IOs will go to the top layer, so
762 * call close on the lower vnode and open on the
763 * upper vnode to ensure that the filesystem keeps
764 * its references counts right. This doesn't do
765 * the right thing with (cred) and (FREAD) though.
766 * Ignoring error returns is not right, either.
767 */
768 if (error == 0) {
769 int i;
770
771 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
772 for (i = 0; i < un->un_openl; i++) {
773 (void) VOP_CLOSE(lvp, FREAD, cred);
774 (void) VOP_OPEN(uvp, FREAD, cred);
775 }
776 un->un_openl = 0;
777 VOP_UNLOCK(lvp);
778 }
779
780 return (error);
781
782 }
783
784 static int
785 union_relookup(
786 struct union_mount *um,
787 struct vnode *dvp,
788 struct vnode **vpp,
789 struct componentname *cnp,
790 struct componentname *cn,
791 char **pnbuf_ret,
792 const char *path,
793 int pathlen)
794 {
795 int error;
796 char *pnbuf;
797
798 /*
799 * A new componentname structure must be faked up because
800 * there is no way to know where the upper level cnp came
801 * from or what it is being used for. This must duplicate
802 * some of the work done by NDINIT, some of the work done
803 * by namei, some of the work done by lookup and some of
804 * the work done by VOP_LOOKUP when given a CREATE flag.
805 * Conclusion: Horrible.
806 */
807 cn->cn_namelen = pathlen;
808 if ((cn->cn_namelen + 1) > MAXPATHLEN)
809 return (ENAMETOOLONG);
810 pnbuf = PNBUF_GET();
811 memcpy(pnbuf, path, cn->cn_namelen);
812 pnbuf[cn->cn_namelen] = '\0';
813 *pnbuf_ret = pnbuf;
814
815 cn->cn_nameiop = CREATE;
816 cn->cn_flags = (LOCKPARENT|ISLASTCN);
817 if (um->um_op == UNMNT_ABOVE)
818 cn->cn_cred = cnp->cn_cred;
819 else
820 cn->cn_cred = um->um_cred;
821 cn->cn_nameptr = pnbuf;
822 cn->cn_hash = cnp->cn_hash;
823 cn->cn_consume = cnp->cn_consume;
824
825 error = relookup(dvp, vpp, cn, 0);
826 if (error) {
827 PNBUF_PUT(pnbuf);
828 *pnbuf_ret = NULL;
829 }
830
831 return (error);
832 }
833
834 /*
835 * Create a shadow directory in the upper layer.
836 * The new vnode is returned locked.
837 *
838 * (um) points to the union mount structure for access to the
839 * the mounting process's credentials.
840 * (dvp) is the directory in which to create the shadow directory.
841 * it is unlocked on entry and exit.
842 * (cnp) is the componentname to be created.
843 * (vpp) is the returned newly created shadow directory, which
844 * is returned locked.
845 *
846 * N.B. We still attempt to create shadow directories even if the union
847 * is mounted read-only, which is a little nonintuitive.
848 */
849 int
850 union_mkshadow(struct union_mount *um, struct vnode *dvp,
851 struct componentname *cnp, struct vnode **vpp)
852 {
853 int error;
854 struct vattr va;
855 struct componentname cn;
856 char *pnbuf;
857
858 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
859 error = union_relookup(um, dvp, vpp, cnp, &cn, &pnbuf,
860 cnp->cn_nameptr, cnp->cn_namelen);
861 if (error) {
862 VOP_UNLOCK(dvp);
863 return (error);
864 }
865
866 if (*vpp) {
867 VOP_ABORTOP(dvp, &cn);
868 PNBUF_PUT(pnbuf);
869 if (dvp != *vpp)
870 VOP_UNLOCK(dvp);
871 vput(*vpp);
872 *vpp = NULLVP;
873 return (EEXIST);
874 }
875
876 /*
877 * policy: when creating the shadow directory in the
878 * upper layer, create it owned by the user who did
879 * the mount, group from parent directory, and mode
880 * 777 modified by umask (ie mostly identical to the
881 * mkdir syscall). (jsp, kb)
882 */
883
884 vattr_null(&va);
885 va.va_type = VDIR;
886 va.va_mode = um->um_cmode;
887
888 vref(dvp);
889 error = VOP_MKDIR(dvp, vpp, &cn, &va);
890 PNBUF_PUT(pnbuf);
891 return (error);
892 }
893
894 /*
895 * Create a whiteout entry in the upper layer.
896 *
897 * (um) points to the union mount structure for access to the
898 * the mounting process's credentials.
899 * (dvp) is the directory in which to create the whiteout.
900 * it is locked on entry and exit.
901 * (cnp) is the componentname to be created.
902 */
903 int
904 union_mkwhiteout(struct union_mount *um, struct vnode *dvp,
905 struct componentname *cnp, char *path)
906 {
907 int error;
908 struct vnode *wvp;
909 struct componentname cn;
910 char *pnbuf;
911
912 VOP_UNLOCK(dvp);
913 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
914 error = union_relookup(um, dvp, &wvp, cnp, &cn, &pnbuf,
915 path, strlen(path));
916 if (error)
917 return (error);
918
919 if (wvp) {
920 VOP_ABORTOP(dvp, &cn);
921 PNBUF_PUT(pnbuf);
922 if (dvp != wvp)
923 VOP_UNLOCK(dvp);
924 vput(wvp);
925 return (EEXIST);
926 }
927
928 error = VOP_WHITEOUT(dvp, &cn, CREATE);
929 if (error) {
930 VOP_ABORTOP(dvp, &cn);
931 }
932
933 PNBUF_PUT(pnbuf);
934 return (error);
935 }
936
937 /*
938 * union_vn_create: creates and opens a new shadow file
939 * on the upper union layer. this function is similar
940 * in spirit to calling vn_open but it avoids calling namei().
941 * the problem with calling namei is that a) it locks too many
942 * things, and b) it doesn't start at the "right" directory,
943 * whereas relookup is told where to start.
944 */
945 int
946 union_vn_create(struct vnode **vpp, struct union_node *un, struct lwp *l)
947 {
948 struct vnode *vp;
949 kauth_cred_t cred = l->l_cred;
950 struct vattr vat;
951 struct vattr *vap = &vat;
952 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
953 int error;
954 int cmode = UN_FILEMODE & ~l->l_proc->p_cwdi->cwdi_cmask;
955 struct componentname cn;
956 char *pnbuf;
957
958 *vpp = NULLVP;
959
960 /*
961 * Build a new componentname structure (for the same
962 * reasons outlines in union_mkshadow).
963 * The difference here is that the file is owned by
964 * the current user, rather than by the person who
965 * did the mount, since the current user needs to be
966 * able to write the file (that's why it is being
967 * copied in the first place).
968 */
969 cn.cn_namelen = strlen(un->un_path);
970 if ((cn.cn_namelen + 1) > MAXPATHLEN)
971 return (ENAMETOOLONG);
972 pnbuf = PNBUF_GET();
973 memcpy(pnbuf, un->un_path, cn.cn_namelen+1);
974 cn.cn_nameiop = CREATE;
975 cn.cn_flags = (LOCKPARENT|ISLASTCN);
976 cn.cn_cred = l->l_cred;
977 cn.cn_nameptr = pnbuf;
978 cn.cn_hash = un->un_hash;
979 cn.cn_consume = 0;
980
981 vn_lock(un->un_dirvp, LK_EXCLUSIVE | LK_RETRY);
982 error = relookup(un->un_dirvp, &vp, &cn, 0);
983 if (error) {
984 PNBUF_PUT(pnbuf);
985 VOP_UNLOCK(un->un_dirvp);
986 return (error);
987 }
988
989 if (vp) {
990 VOP_ABORTOP(un->un_dirvp, &cn);
991 PNBUF_PUT(pnbuf);
992 if (un->un_dirvp != vp)
993 VOP_UNLOCK(un->un_dirvp);
994 vput(vp);
995 return (EEXIST);
996 }
997
998 /*
999 * Good - there was no race to create the file
1000 * so go ahead and create it. The permissions
1001 * on the file will be 0666 modified by the
1002 * current user's umask. Access to the file, while
1003 * it is unioned, will require access to the top *and*
1004 * bottom files. Access when not unioned will simply
1005 * require access to the top-level file.
1006 * TODO: confirm choice of access permissions.
1007 */
1008 vattr_null(vap);
1009 vap->va_type = VREG;
1010 vap->va_mode = cmode;
1011 vref(un->un_dirvp);
1012 if ((error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap)) != 0) {
1013 PNBUF_PUT(pnbuf);
1014 return (error);
1015 }
1016
1017 if ((error = VOP_OPEN(vp, fmode, cred)) != 0) {
1018 vput(vp);
1019 PNBUF_PUT(pnbuf);
1020 return (error);
1021 }
1022
1023 vp->v_writecount++;
1024 *vpp = vp;
1025 PNBUF_PUT(pnbuf);
1026 return (0);
1027 }
1028
1029 int
1030 union_vn_close(struct vnode *vp, int fmode, kauth_cred_t cred, struct lwp *l)
1031 {
1032
1033 if (fmode & FWRITE)
1034 --vp->v_writecount;
1035 return (VOP_CLOSE(vp, fmode, cred));
1036 }
1037
1038 void
1039 union_removed_upper(struct union_node *un)
1040 {
1041 int hash;
1042
1043 #if 1
1044 /*
1045 * We do not set the uppervp to NULLVP here, because lowervp
1046 * may also be NULLVP, so this routine would end up creating
1047 * a bogus union node with no upper or lower VP (that causes
1048 * pain in many places that assume at least one VP exists).
1049 * Since we've removed this node from the cache hash chains,
1050 * it won't be found again. When all current holders
1051 * release it, union_inactive() will vgone() it.
1052 */
1053 union_diruncache(un);
1054 #else
1055 union_newupper(un, NULLVP);
1056 #endif
1057
1058 hash = UNION_HASH(un->un_uppervp, un->un_lowervp);
1059
1060 while (union_list_lock(hash))
1061 continue;
1062 if (un->un_flags & UN_CACHED) {
1063 un->un_flags &= ~UN_CACHED;
1064 LIST_REMOVE(un, un_cache);
1065 }
1066 union_list_unlock(hash);
1067
1068 if (un->un_flags & UN_ULOCK) {
1069 un->un_flags &= ~UN_ULOCK;
1070 VOP_UNLOCK(un->un_uppervp);
1071 }
1072 }
1073
1074 #if 0
1075 struct vnode *
1076 union_lowervp(struct vnode *vp)
1077 {
1078 struct union_node *un = VTOUNION(vp);
1079
1080 if ((un->un_lowervp != NULLVP) &&
1081 (vp->v_type == un->un_lowervp->v_type)) {
1082 if (vget(un->un_lowervp, 0) == 0)
1083 return (un->un_lowervp);
1084 }
1085
1086 return (NULLVP);
1087 }
1088 #endif
1089
1090 /*
1091 * determine whether a whiteout is needed
1092 * during a remove/rmdir operation.
1093 */
1094 int
1095 union_dowhiteout(struct union_node *un, kauth_cred_t cred)
1096 {
1097 struct vattr va;
1098
1099 if (un->un_lowervp != NULLVP)
1100 return (1);
1101
1102 if (VOP_GETATTR(un->un_uppervp, &va, cred) == 0 &&
1103 (va.va_flags & OPAQUE))
1104 return (1);
1105
1106 return (0);
1107 }
1108
1109 static void
1110 union_dircache_r(struct vnode *vp, struct vnode ***vppp, int *cntp)
1111 {
1112 struct union_node *un;
1113
1114 if (vp->v_op != union_vnodeop_p) {
1115 if (vppp) {
1116 vref(vp);
1117 *(*vppp)++ = vp;
1118 if (--(*cntp) == 0)
1119 panic("union: dircache table too small");
1120 } else {
1121 (*cntp)++;
1122 }
1123
1124 return;
1125 }
1126
1127 un = VTOUNION(vp);
1128 if (un->un_uppervp != NULLVP)
1129 union_dircache_r(un->un_uppervp, vppp, cntp);
1130 if (un->un_lowervp != NULLVP)
1131 union_dircache_r(un->un_lowervp, vppp, cntp);
1132 }
1133
1134 struct vnode *
1135 union_dircache(struct vnode *vp, struct lwp *l)
1136 {
1137 int cnt;
1138 struct vnode *nvp = NULLVP;
1139 struct vnode **vpp;
1140 struct vnode **dircache;
1141 int error;
1142
1143 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1144 dircache = VTOUNION(vp)->un_dircache;
1145
1146 nvp = NULLVP;
1147
1148 if (dircache == 0) {
1149 cnt = 0;
1150 union_dircache_r(vp, 0, &cnt);
1151 cnt++;
1152 dircache = (struct vnode **)
1153 malloc(cnt * sizeof(struct vnode *),
1154 M_TEMP, M_WAITOK);
1155 vpp = dircache;
1156 union_dircache_r(vp, &vpp, &cnt);
1157 VTOUNION(vp)->un_dircache = dircache;
1158 *vpp = NULLVP;
1159 vpp = dircache + 1;
1160 } else {
1161 vpp = dircache;
1162 do {
1163 if (*vpp++ == VTOUNION(vp)->un_uppervp)
1164 break;
1165 } while (*vpp != NULLVP);
1166 }
1167
1168 if (*vpp == NULLVP)
1169 goto out;
1170
1171 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
1172 vref(*vpp);
1173 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0);
1174 if (!error) {
1175 VTOUNION(vp)->un_dircache = 0;
1176 VTOUNION(nvp)->un_dircache = dircache;
1177 }
1178
1179 out:
1180 VOP_UNLOCK(vp);
1181 return (nvp);
1182 }
1183
1184 void
1185 union_diruncache(struct union_node *un)
1186 {
1187 struct vnode **vpp;
1188
1189 if (un->un_dircache != 0) {
1190 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1191 vrele(*vpp);
1192 free(un->un_dircache, M_TEMP);
1193 un->un_dircache = 0;
1194 }
1195 }
1196
1197 /*
1198 * Check whether node can rmdir (check empty).
1199 */
1200 int
1201 union_check_rmdir(struct union_node *un, kauth_cred_t cred)
1202 {
1203 int dirlen, eofflag, error;
1204 char *dirbuf;
1205 struct vattr va;
1206 struct vnode *tvp;
1207 struct dirent *dp, *edp;
1208 struct componentname cn;
1209 struct iovec aiov;
1210 struct uio auio;
1211
1212 KASSERT(un->un_uppervp != NULL);
1213
1214 /* Check upper for being opaque. */
1215 KASSERT(VOP_ISLOCKED(un->un_uppervp));
1216 error = VOP_GETATTR(un->un_uppervp, &va, cred);
1217 if (error || (va.va_flags & OPAQUE))
1218 return error;
1219
1220 if (un->un_lowervp == NULL)
1221 return 0;
1222
1223 /* Check lower for being empty. */
1224 vn_lock(un->un_lowervp, LK_SHARED | LK_RETRY);
1225 error = VOP_GETATTR(un->un_lowervp, &va, cred);
1226 if (error) {
1227 VOP_UNLOCK(un->un_lowervp);
1228 return error;
1229 }
1230 dirlen = va.va_blocksize;
1231 dirbuf = kmem_alloc(dirlen, KM_SLEEP);
1232 if (dirbuf == NULL) {
1233 VOP_UNLOCK(un->un_lowervp);
1234 return ENOMEM;
1235 }
1236 /* error = 0; */
1237 eofflag = 0;
1238 auio.uio_offset = 0;
1239 do {
1240 aiov.iov_len = dirlen;
1241 aiov.iov_base = dirbuf;
1242 auio.uio_iov = &aiov;
1243 auio.uio_iovcnt = 1;
1244 auio.uio_resid = aiov.iov_len;
1245 auio.uio_rw = UIO_READ;
1246 UIO_SETUP_SYSSPACE(&auio);
1247 error = VOP_READDIR(un->un_lowervp, &auio, cred, &eofflag,
1248 NULL, NULL);
1249 if (error)
1250 break;
1251 edp = (struct dirent *)&dirbuf[dirlen - auio.uio_resid];
1252 for (dp = (struct dirent *)dirbuf;
1253 error == 0 && dp < edp;
1254 dp = (struct dirent *)((char *)dp + dp->d_reclen)) {
1255 if (dp->d_reclen == 0) {
1256 error = ENOTEMPTY;
1257 break;
1258 }
1259 if (dp->d_type == DT_WHT ||
1260 (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1261 (dp->d_namlen == 2 && !memcmp(dp->d_name, "..", 2)))
1262 continue;
1263 /* Check for presence in the upper layer. */
1264 cn.cn_nameiop = LOOKUP;
1265 cn.cn_flags = ISLASTCN | RDONLY;
1266 cn.cn_cred = cred;
1267 cn.cn_nameptr = dp->d_name;
1268 cn.cn_namelen = dp->d_namlen;
1269 cn.cn_hash = 0;
1270 cn.cn_consume = 0;
1271 error = VOP_LOOKUP(un->un_uppervp, &tvp, &cn);
1272 if (error == ENOENT && (cn.cn_flags & ISWHITEOUT)) {
1273 error = 0;
1274 continue;
1275 }
1276 if (error == 0)
1277 vput(tvp);
1278 error = ENOTEMPTY;
1279 }
1280 } while (error == 0 && !eofflag);
1281 kmem_free(dirbuf, dirlen);
1282 VOP_UNLOCK(un->un_lowervp);
1283
1284 return error;
1285 }
1286
1287 /*
1288 * This hook is called from vn_readdir() to switch to lower directory
1289 * entry after the upper directory is read.
1290 */
1291 int
1292 union_readdirhook(struct vnode **vpp, struct file *fp, struct lwp *l)
1293 {
1294 struct vnode *vp = *vpp, *lvp;
1295 struct vattr va;
1296 int error;
1297
1298 if (vp->v_op != union_vnodeop_p)
1299 return (0);
1300
1301 if ((lvp = union_dircache(vp, l)) == NULLVP)
1302 return (0);
1303
1304 /*
1305 * If the directory is opaque,
1306 * then don't show lower entries
1307 */
1308 error = VOP_GETATTR(vp, &va, fp->f_cred);
1309 if (error || (va.va_flags & OPAQUE)) {
1310 vput(lvp);
1311 return (error);
1312 }
1313
1314 error = VOP_OPEN(lvp, FREAD, fp->f_cred);
1315 if (error) {
1316 vput(lvp);
1317 return (error);
1318 }
1319 VOP_UNLOCK(lvp);
1320 fp->f_data = lvp;
1321 fp->f_offset = 0;
1322 error = vn_close(vp, FREAD, fp->f_cred);
1323 if (error)
1324 return (error);
1325 *vpp = lvp;
1326 return (0);
1327 }
1328