union_subr.c revision 1.2 1 /* $NetBSD: union_subr.c,v 1.2 2003/03/17 09:11:30 jdolecek Exp $ */
2
3 /*
4 * Copyright (c) 1994 Jan-Simon Pendry
5 * Copyright (c) 1994
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95
40 */
41
42 #include <sys/cdefs.h>
43 __KERNEL_RCSID(0, "$NetBSD: union_subr.c,v 1.2 2003/03/17 09:11:30 jdolecek Exp $");
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/time.h>
49 #include <sys/kernel.h>
50 #include <sys/vnode.h>
51 #include <sys/namei.h>
52 #include <sys/malloc.h>
53 #include <sys/file.h>
54 #include <sys/filedesc.h>
55 #include <sys/queue.h>
56 #include <sys/mount.h>
57 #include <sys/stat.h>
58
59 #include <uvm/uvm_extern.h>
60
61 #include <fs/union/union.h>
62
63 #ifdef DIAGNOSTIC
64 #include <sys/proc.h>
65 #endif
66
67 /* must be power of two, otherwise change UNION_HASH() */
68 #define NHASH 32
69
70 /* unsigned int ... */
71 #define UNION_HASH(u, l) \
72 (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
73
74 static LIST_HEAD(unhead, union_node) unhead[NHASH];
75 static int unvplock[NHASH];
76
77 static int union_list_lock __P((int));
78 static void union_list_unlock __P((int));
79 void union_updatevp __P((struct union_node *, struct vnode *, struct vnode *));
80 static int union_relookup __P((struct union_mount *, struct vnode *,
81 struct vnode **, struct componentname *,
82 struct componentname *, const char *, int));
83 int union_vn_close __P((struct vnode *, int, struct ucred *, struct proc *));
84 static void union_dircache_r __P((struct vnode *, struct vnode ***, int *));
85 struct vnode *union_dircache __P((struct vnode *, struct proc *));
86
87 void
88 union_init()
89 {
90 int i;
91
92 for (i = 0; i < NHASH; i++)
93 LIST_INIT(&unhead[i]);
94 memset((caddr_t) unvplock, 0, sizeof(unvplock));
95 }
96
97 /*
98 * Free global unionfs resources.
99 */
100 void
101 union_done()
102 {
103
104 /* Make sure to unset the readdir hook. */
105 vn_union_readdir_hook = NULL;
106 }
107
108 static int
109 union_list_lock(ix)
110 int ix;
111 {
112
113 if (unvplock[ix] & UN_LOCKED) {
114 unvplock[ix] |= UN_WANTED;
115 (void) tsleep(&unvplock[ix], PINOD, "unionlk", 0);
116 return (1);
117 }
118
119 unvplock[ix] |= UN_LOCKED;
120
121 return (0);
122 }
123
124 static void
125 union_list_unlock(ix)
126 int ix;
127 {
128
129 unvplock[ix] &= ~UN_LOCKED;
130
131 if (unvplock[ix] & UN_WANTED) {
132 unvplock[ix] &= ~UN_WANTED;
133 wakeup((caddr_t) &unvplock[ix]);
134 }
135 }
136
137 void
138 union_updatevp(un, uppervp, lowervp)
139 struct union_node *un;
140 struct vnode *uppervp;
141 struct vnode *lowervp;
142 {
143 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
144 int nhash = UNION_HASH(uppervp, lowervp);
145 int docache = (lowervp != NULLVP || uppervp != NULLVP);
146 int lhash, uhash;
147
148 /*
149 * Ensure locking is ordered from lower to higher
150 * to avoid deadlocks.
151 */
152 if (nhash < ohash) {
153 lhash = nhash;
154 uhash = ohash;
155 } else {
156 lhash = ohash;
157 uhash = nhash;
158 }
159
160 if (lhash != uhash)
161 while (union_list_lock(lhash))
162 continue;
163
164 while (union_list_lock(uhash))
165 continue;
166
167 if (ohash != nhash || !docache) {
168 if (un->un_flags & UN_CACHED) {
169 un->un_flags &= ~UN_CACHED;
170 LIST_REMOVE(un, un_cache);
171 }
172 }
173
174 if (ohash != nhash)
175 union_list_unlock(ohash);
176
177 if (un->un_lowervp != lowervp) {
178 if (un->un_lowervp) {
179 vrele(un->un_lowervp);
180 if (un->un_path) {
181 free(un->un_path, M_TEMP);
182 un->un_path = 0;
183 }
184 if (un->un_dirvp) {
185 vrele(un->un_dirvp);
186 un->un_dirvp = NULLVP;
187 }
188 }
189 un->un_lowervp = lowervp;
190 un->un_lowersz = VNOVAL;
191 }
192
193 if (un->un_uppervp != uppervp) {
194 if (un->un_uppervp)
195 vrele(un->un_uppervp);
196
197 un->un_uppervp = uppervp;
198 un->un_uppersz = VNOVAL;
199 }
200
201 if (docache && (ohash != nhash)) {
202 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
203 un->un_flags |= UN_CACHED;
204 }
205
206 union_list_unlock(nhash);
207 }
208
209 void
210 union_newlower(un, lowervp)
211 struct union_node *un;
212 struct vnode *lowervp;
213 {
214
215 union_updatevp(un, un->un_uppervp, lowervp);
216 }
217
218 void
219 union_newupper(un, uppervp)
220 struct union_node *un;
221 struct vnode *uppervp;
222 {
223
224 union_updatevp(un, uppervp, un->un_lowervp);
225 }
226
227 /*
228 * Keep track of size changes in the underlying vnodes.
229 * If the size changes, then callback to the vm layer
230 * giving priority to the upper layer size.
231 */
232 void
233 union_newsize(vp, uppersz, lowersz)
234 struct vnode *vp;
235 off_t uppersz, lowersz;
236 {
237 struct union_node *un;
238 off_t sz;
239
240 /* only interested in regular files */
241 if (vp->v_type != VREG)
242 return;
243
244 un = VTOUNION(vp);
245 sz = VNOVAL;
246
247 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
248 un->un_uppersz = uppersz;
249 if (sz == VNOVAL)
250 sz = un->un_uppersz;
251 }
252
253 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
254 un->un_lowersz = lowersz;
255 if (sz == VNOVAL)
256 sz = un->un_lowersz;
257 }
258
259 if (sz != VNOVAL) {
260 #ifdef UNION_DIAGNOSTIC
261 printf("union: %s size now %qd\n",
262 uppersz != VNOVAL ? "upper" : "lower", sz);
263 #endif
264 uvm_vnp_setsize(vp, sz);
265 }
266 }
267
268 /*
269 * allocate a union_node/vnode pair. the vnode is
270 * referenced and locked. the new vnode is returned
271 * via (vpp). (mp) is the mountpoint of the union filesystem,
272 * (dvp) is the parent directory where the upper layer object
273 * should exist (but doesn't) and (cnp) is the componentname
274 * information which is partially copied to allow the upper
275 * layer object to be created at a later time. (uppervp)
276 * and (lowervp) reference the upper and lower layer objects
277 * being mapped. either, but not both, can be nil.
278 * if supplied, (uppervp) is locked.
279 * the reference is either maintained in the new union_node
280 * object which is allocated, or they are vrele'd.
281 *
282 * all union_nodes are maintained on a singly-linked
283 * list. new nodes are only allocated when they cannot
284 * be found on this list. entries on the list are
285 * removed when the vfs reclaim entry is called.
286 *
287 * a single lock is kept for the entire list. this is
288 * needed because the getnewvnode() function can block
289 * waiting for a vnode to become free, in which case there
290 * may be more than one process trying to get the same
291 * vnode. this lock is only taken if we are going to
292 * call getnewvnode, since the kernel itself is single-threaded.
293 *
294 * if an entry is found on the list, then call vget() to
295 * take a reference. this is done because there may be
296 * zero references to it and so it needs to removed from
297 * the vnode free list.
298 */
299 int
300 union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
301 struct vnode **vpp;
302 struct mount *mp;
303 struct vnode *undvp; /* parent union vnode */
304 struct vnode *dvp; /* may be null */
305 struct componentname *cnp; /* may be null */
306 struct vnode *uppervp; /* may be null */
307 struct vnode *lowervp; /* may be null */
308 int docache;
309 {
310 int error;
311 struct union_node *un = NULL;
312 struct vnode *xlowervp = NULLVP;
313 struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
314 int hash = 0;
315 int vflag;
316 int try;
317
318 if (uppervp == NULLVP && lowervp == NULLVP)
319 panic("union: unidentifiable allocation");
320
321 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
322 xlowervp = lowervp;
323 lowervp = NULLVP;
324 }
325
326 /* detect the root vnode (and aliases) */
327 vflag = VLAYER;
328 if ((uppervp == um->um_uppervp) &&
329 ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
330 if (lowervp == NULLVP) {
331 lowervp = um->um_lowervp;
332 if (lowervp != NULLVP)
333 VREF(lowervp);
334 }
335 vflag = VROOT;
336 }
337
338 loop:
339 if (!docache) {
340 un = 0;
341 } else for (try = 0; try < 3; try++) {
342 switch (try) {
343 case 0:
344 if (lowervp == NULLVP)
345 continue;
346 hash = UNION_HASH(uppervp, lowervp);
347 break;
348
349 case 1:
350 if (uppervp == NULLVP)
351 continue;
352 hash = UNION_HASH(uppervp, NULLVP);
353 break;
354
355 case 2:
356 if (lowervp == NULLVP)
357 continue;
358 hash = UNION_HASH(NULLVP, lowervp);
359 break;
360 }
361
362 while (union_list_lock(hash))
363 continue;
364
365 for (un = unhead[hash].lh_first; un != 0;
366 un = un->un_cache.le_next) {
367 if ((un->un_lowervp == lowervp ||
368 un->un_lowervp == NULLVP) &&
369 (un->un_uppervp == uppervp ||
370 un->un_uppervp == NULLVP) &&
371 (UNIONTOV(un)->v_mount == mp)) {
372 if (vget(UNIONTOV(un), 0)) {
373 union_list_unlock(hash);
374 goto loop;
375 }
376 break;
377 }
378 }
379
380 union_list_unlock(hash);
381
382 if (un)
383 break;
384 }
385
386 if (un) {
387 /*
388 * Obtain a lock on the union_node.
389 * uppervp is locked, though un->un_uppervp
390 * may not be. this doesn't break the locking
391 * hierarchy since in the case that un->un_uppervp
392 * is not yet locked it will be vrele'd and replaced
393 * with uppervp.
394 */
395
396 if ((dvp != NULLVP) && (uppervp == dvp)) {
397 /*
398 * Access ``.'', so (un) will already
399 * be locked. Since this process has
400 * the lock on (uppervp) no other
401 * process can hold the lock on (un).
402 */
403 #ifdef DIAGNOSTIC
404 if ((un->un_flags & UN_LOCKED) == 0)
405 panic("union: . not locked");
406 else if (curproc && un->un_pid != curproc->p_pid &&
407 un->un_pid > -1 && curproc->p_pid > -1)
408 panic("union: allocvp not lock owner");
409 #endif
410 } else {
411 if (un->un_flags & UN_LOCKED) {
412 vrele(UNIONTOV(un));
413 un->un_flags |= UN_WANTED;
414 (void) tsleep(&un->un_flags, PINOD,
415 "unionalloc", 0);
416 goto loop;
417 }
418 un->un_flags |= UN_LOCKED;
419
420 #ifdef DIAGNOSTIC
421 if (curproc)
422 un->un_pid = curproc->p_pid;
423 else
424 un->un_pid = -1;
425 #endif
426 }
427
428 /*
429 * At this point, the union_node is locked,
430 * un->un_uppervp may not be locked, and uppervp
431 * is locked or nil.
432 */
433
434 /*
435 * Save information about the upper layer.
436 */
437 if (uppervp != un->un_uppervp) {
438 union_newupper(un, uppervp);
439 } else if (uppervp) {
440 vrele(uppervp);
441 }
442
443 if (un->un_uppervp) {
444 un->un_flags |= UN_ULOCK;
445 un->un_flags &= ~UN_KLOCK;
446 }
447
448 /*
449 * Save information about the lower layer.
450 * This needs to keep track of pathname
451 * and directory information which union_vn_create
452 * might need.
453 */
454 if (lowervp != un->un_lowervp) {
455 union_newlower(un, lowervp);
456 if (cnp && (lowervp != NULLVP)) {
457 un->un_hash = cnp->cn_hash;
458 un->un_path = malloc(cnp->cn_namelen+1,
459 M_TEMP, M_WAITOK);
460 memcpy(un->un_path, cnp->cn_nameptr,
461 cnp->cn_namelen);
462 un->un_path[cnp->cn_namelen] = '\0';
463 VREF(dvp);
464 un->un_dirvp = dvp;
465 }
466 } else if (lowervp) {
467 vrele(lowervp);
468 }
469 *vpp = UNIONTOV(un);
470 return (0);
471 }
472
473 if (docache) {
474 /*
475 * otherwise lock the vp list while we call getnewvnode
476 * since that can block.
477 */
478 hash = UNION_HASH(uppervp, lowervp);
479
480 if (union_list_lock(hash))
481 goto loop;
482 }
483
484 error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
485 if (error) {
486 if (uppervp) {
487 if (dvp == uppervp)
488 vrele(uppervp);
489 else
490 vput(uppervp);
491 }
492 if (lowervp)
493 vrele(lowervp);
494
495 goto out;
496 }
497
498 MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
499 M_TEMP, M_WAITOK);
500
501 (*vpp)->v_flag |= vflag;
502 (*vpp)->v_vnlock = NULL; /* Make upper layers call VOP_LOCK */
503 if (uppervp)
504 (*vpp)->v_type = uppervp->v_type;
505 else
506 (*vpp)->v_type = lowervp->v_type;
507 un = VTOUNION(*vpp);
508 un->un_vnode = *vpp;
509 un->un_uppervp = uppervp;
510 un->un_uppersz = VNOVAL;
511 un->un_lowervp = lowervp;
512 un->un_lowersz = VNOVAL;
513 un->un_pvp = undvp;
514 if (undvp != NULLVP)
515 VREF(undvp);
516 un->un_dircache = 0;
517 un->un_openl = 0;
518 un->un_flags = UN_LOCKED;
519 if (un->un_uppervp)
520 un->un_flags |= UN_ULOCK;
521 #ifdef DIAGNOSTIC
522 if (curproc)
523 un->un_pid = curproc->p_pid;
524 else
525 un->un_pid = -1;
526 #endif
527 if (cnp && (lowervp != NULLVP)) {
528 un->un_hash = cnp->cn_hash;
529 un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
530 memcpy(un->un_path, cnp->cn_nameptr, cnp->cn_namelen);
531 un->un_path[cnp->cn_namelen] = '\0';
532 VREF(dvp);
533 un->un_dirvp = dvp;
534 } else {
535 un->un_hash = 0;
536 un->un_path = 0;
537 un->un_dirvp = 0;
538 }
539
540 if (docache) {
541 LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
542 un->un_flags |= UN_CACHED;
543 }
544
545 if (xlowervp)
546 vrele(xlowervp);
547
548 out:
549 if (docache)
550 union_list_unlock(hash);
551
552 return (error);
553 }
554
555 int
556 union_freevp(vp)
557 struct vnode *vp;
558 {
559 struct union_node *un = VTOUNION(vp);
560
561 if (un->un_flags & UN_CACHED) {
562 un->un_flags &= ~UN_CACHED;
563 LIST_REMOVE(un, un_cache);
564 }
565
566 if (un->un_pvp != NULLVP)
567 vrele(un->un_pvp);
568 if (un->un_uppervp != NULLVP)
569 vrele(un->un_uppervp);
570 if (un->un_lowervp != NULLVP)
571 vrele(un->un_lowervp);
572 if (un->un_dirvp != NULLVP)
573 vrele(un->un_dirvp);
574 if (un->un_path)
575 free(un->un_path, M_TEMP);
576
577 FREE(vp->v_data, M_TEMP);
578 vp->v_data = 0;
579
580 return (0);
581 }
582
583 /*
584 * copyfile. copy the vnode (fvp) to the vnode (tvp)
585 * using a sequence of reads and writes. both (fvp)
586 * and (tvp) are locked on entry and exit.
587 */
588 int
589 union_copyfile(fvp, tvp, cred, p)
590 struct vnode *fvp;
591 struct vnode *tvp;
592 struct ucred *cred;
593 struct proc *p;
594 {
595 char *buf;
596 struct uio uio;
597 struct iovec iov;
598 int error = 0;
599
600 /*
601 * strategy:
602 * allocate a buffer of size MAXBSIZE.
603 * loop doing reads and writes, keeping track
604 * of the current uio offset.
605 * give up at the first sign of trouble.
606 */
607
608 uio.uio_procp = p;
609 uio.uio_segflg = UIO_SYSSPACE;
610 uio.uio_offset = 0;
611
612 VOP_UNLOCK(fvp, 0); /* XXX */
613 VOP_LEASE(fvp, p, cred, LEASE_READ);
614 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
615 VOP_UNLOCK(tvp, 0); /* XXX */
616 VOP_LEASE(tvp, p, cred, LEASE_WRITE);
617 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
618
619 buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
620
621 /* ugly loop follows... */
622 do {
623 off_t offset = uio.uio_offset;
624
625 uio.uio_iov = &iov;
626 uio.uio_iovcnt = 1;
627 iov.iov_base = buf;
628 iov.iov_len = MAXBSIZE;
629 uio.uio_resid = iov.iov_len;
630 uio.uio_rw = UIO_READ;
631 error = VOP_READ(fvp, &uio, 0, cred);
632
633 if (error == 0) {
634 uio.uio_iov = &iov;
635 uio.uio_iovcnt = 1;
636 iov.iov_base = buf;
637 iov.iov_len = MAXBSIZE - uio.uio_resid;
638 uio.uio_offset = offset;
639 uio.uio_rw = UIO_WRITE;
640 uio.uio_resid = iov.iov_len;
641
642 if (uio.uio_resid == 0)
643 break;
644
645 do {
646 error = VOP_WRITE(tvp, &uio, 0, cred);
647 } while ((uio.uio_resid > 0) && (error == 0));
648 }
649
650 } while (error == 0);
651
652 free(buf, M_TEMP);
653 return (error);
654 }
655
656 /*
657 * (un) is assumed to be locked on entry and remains
658 * locked on exit.
659 */
660 int
661 union_copyup(un, docopy, cred, p)
662 struct union_node *un;
663 int docopy;
664 struct ucred *cred;
665 struct proc *p;
666 {
667 int error;
668 struct vnode *lvp, *uvp;
669 struct vattr lvattr, uvattr;
670
671 error = union_vn_create(&uvp, un, p);
672 if (error)
673 return (error);
674
675 /* at this point, uppervp is locked */
676 union_newupper(un, uvp);
677 un->un_flags |= UN_ULOCK;
678
679 lvp = un->un_lowervp;
680
681 if (docopy) {
682 /*
683 * XX - should not ignore errors
684 * from VOP_CLOSE
685 */
686 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
687
688 error = VOP_GETATTR(lvp, &lvattr, cred, p);
689 if (error == 0)
690 error = VOP_OPEN(lvp, FREAD, cred, p);
691 if (error == 0) {
692 error = union_copyfile(lvp, uvp, cred, p);
693 (void) VOP_CLOSE(lvp, FREAD, cred, p);
694 }
695 if (error == 0) {
696 /* Copy permissions up too */
697 VATTR_NULL(&uvattr);
698 uvattr.va_mode = lvattr.va_mode;
699 uvattr.va_flags = lvattr.va_flags;
700 error = VOP_SETATTR(uvp, &uvattr, cred, p);
701 }
702 VOP_UNLOCK(lvp, 0);
703 #ifdef UNION_DIAGNOSTIC
704 if (error == 0)
705 uprintf("union: copied up %s\n", un->un_path);
706 #endif
707
708 }
709 union_vn_close(uvp, FWRITE, cred, p);
710
711 /*
712 * Subsequent IOs will go to the top layer, so
713 * call close on the lower vnode and open on the
714 * upper vnode to ensure that the filesystem keeps
715 * its references counts right. This doesn't do
716 * the right thing with (cred) and (FREAD) though.
717 * Ignoring error returns is not right, either.
718 */
719 if (error == 0) {
720 int i;
721
722 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
723 for (i = 0; i < un->un_openl; i++) {
724 (void) VOP_CLOSE(lvp, FREAD, cred, p);
725 (void) VOP_OPEN(uvp, FREAD, cred, p);
726 }
727 un->un_openl = 0;
728 VOP_UNLOCK(lvp, 0);
729 }
730
731 return (error);
732
733 }
734
735 static int
736 union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
737 struct union_mount *um;
738 struct vnode *dvp;
739 struct vnode **vpp;
740 struct componentname *cnp;
741 struct componentname *cn;
742 const char *path;
743 int pathlen;
744 {
745 int error;
746
747 /*
748 * A new componentname structure must be faked up because
749 * there is no way to know where the upper level cnp came
750 * from or what it is being used for. This must duplicate
751 * some of the work done by NDINIT, some of the work done
752 * by namei, some of the work done by lookup and some of
753 * the work done by VOP_LOOKUP when given a CREATE flag.
754 * Conclusion: Horrible.
755 *
756 * The pathname buffer will be PNBUF_PUT'd by VOP_MKDIR.
757 */
758 cn->cn_namelen = pathlen;
759 if ((cn->cn_namelen + 1) > MAXPATHLEN)
760 return (ENAMETOOLONG);
761 cn->cn_pnbuf = PNBUF_GET();
762 memcpy(cn->cn_pnbuf, path, cn->cn_namelen);
763 cn->cn_pnbuf[cn->cn_namelen] = '\0';
764
765 cn->cn_nameiop = CREATE;
766 cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
767 cn->cn_proc = cnp->cn_proc;
768 if (um->um_op == UNMNT_ABOVE)
769 cn->cn_cred = cnp->cn_cred;
770 else
771 cn->cn_cred = um->um_cred;
772 cn->cn_nameptr = cn->cn_pnbuf;
773 cn->cn_hash = cnp->cn_hash;
774 cn->cn_consume = cnp->cn_consume;
775
776 VREF(dvp);
777 error = relookup(dvp, vpp, cn);
778 if (!error)
779 vrele(dvp);
780 else {
781 PNBUF_PUT(cn->cn_pnbuf);
782 cn->cn_pnbuf = 0;
783 }
784
785 return (error);
786 }
787
788 /*
789 * Create a shadow directory in the upper layer.
790 * The new vnode is returned locked.
791 *
792 * (um) points to the union mount structure for access to the
793 * the mounting process's credentials.
794 * (dvp) is the directory in which to create the shadow directory.
795 * it is unlocked on entry and exit.
796 * (cnp) is the componentname to be created.
797 * (vpp) is the returned newly created shadow directory, which
798 * is returned locked.
799 *
800 * N.B. We still attempt to create shadow directories even if the union
801 * is mounted read-only, which is a little nonintuitive.
802 */
803 int
804 union_mkshadow(um, dvp, cnp, vpp)
805 struct union_mount *um;
806 struct vnode *dvp;
807 struct componentname *cnp;
808 struct vnode **vpp;
809 {
810 int error;
811 struct vattr va;
812 struct proc *p = cnp->cn_proc;
813 struct componentname cn;
814
815 error = union_relookup(um, dvp, vpp, cnp, &cn,
816 cnp->cn_nameptr, cnp->cn_namelen);
817 if (error)
818 return (error);
819
820 if (*vpp) {
821 VOP_ABORTOP(dvp, &cn);
822 VOP_UNLOCK(dvp, 0);
823 vrele(*vpp);
824 *vpp = NULLVP;
825 return (EEXIST);
826 }
827
828 /*
829 * policy: when creating the shadow directory in the
830 * upper layer, create it owned by the user who did
831 * the mount, group from parent directory, and mode
832 * 777 modified by umask (ie mostly identical to the
833 * mkdir syscall). (jsp, kb)
834 */
835
836 VATTR_NULL(&va);
837 va.va_type = VDIR;
838 va.va_mode = um->um_cmode;
839
840 /* VOP_LEASE: dvp is locked */
841 VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE);
842
843 error = VOP_MKDIR(dvp, vpp, &cn, &va);
844 return (error);
845 }
846
847 /*
848 * Create a whiteout entry in the upper layer.
849 *
850 * (um) points to the union mount structure for access to the
851 * the mounting process's credentials.
852 * (dvp) is the directory in which to create the whiteout.
853 * it is locked on entry and exit.
854 * (cnp) is the componentname to be created.
855 */
856 int
857 union_mkwhiteout(um, dvp, cnp, path)
858 struct union_mount *um;
859 struct vnode *dvp;
860 struct componentname *cnp;
861 char *path;
862 {
863 int error;
864 struct proc *p = cnp->cn_proc;
865 struct vnode *wvp;
866 struct componentname cn;
867
868 VOP_UNLOCK(dvp, 0);
869 error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
870 if (error) {
871 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
872 return (error);
873 }
874
875 if (wvp) {
876 VOP_ABORTOP(dvp, &cn);
877 vrele(dvp);
878 vrele(wvp);
879 return (EEXIST);
880 }
881
882 /* VOP_LEASE: dvp is locked */
883 VOP_LEASE(dvp, p, p->p_ucred, LEASE_WRITE);
884
885 error = VOP_WHITEOUT(dvp, &cn, CREATE);
886 if (error)
887 VOP_ABORTOP(dvp, &cn);
888
889 vrele(dvp);
890
891 return (error);
892 }
893
894 /*
895 * union_vn_create: creates and opens a new shadow file
896 * on the upper union layer. this function is similar
897 * in spirit to calling vn_open but it avoids calling namei().
898 * the problem with calling namei is that a) it locks too many
899 * things, and b) it doesn't start at the "right" directory,
900 * whereas relookup is told where to start.
901 */
902 int
903 union_vn_create(vpp, un, p)
904 struct vnode **vpp;
905 struct union_node *un;
906 struct proc *p;
907 {
908 struct vnode *vp;
909 struct ucred *cred = p->p_ucred;
910 struct vattr vat;
911 struct vattr *vap = &vat;
912 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
913 int error;
914 int cmode = UN_FILEMODE & ~p->p_cwdi->cwdi_cmask;
915 struct componentname cn;
916
917 *vpp = NULLVP;
918
919 /*
920 * Build a new componentname structure (for the same
921 * reasons outlines in union_mkshadow).
922 * The difference here is that the file is owned by
923 * the current user, rather than by the person who
924 * did the mount, since the current user needs to be
925 * able to write the file (that's why it is being
926 * copied in the first place).
927 */
928 cn.cn_namelen = strlen(un->un_path);
929 if ((cn.cn_namelen + 1) > MAXPATHLEN)
930 return (ENAMETOOLONG);
931 cn.cn_pnbuf = PNBUF_GET();
932 memcpy(cn.cn_pnbuf, un->un_path, cn.cn_namelen+1);
933 cn.cn_nameiop = CREATE;
934 cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
935 cn.cn_proc = p;
936 cn.cn_cred = p->p_ucred;
937 cn.cn_nameptr = cn.cn_pnbuf;
938 cn.cn_hash = un->un_hash;
939 cn.cn_consume = 0;
940
941 VREF(un->un_dirvp);
942 if ((error = relookup(un->un_dirvp, &vp, &cn)) != 0)
943 return (error);
944 vrele(un->un_dirvp);
945
946 if (vp) {
947 VOP_ABORTOP(un->un_dirvp, &cn);
948 if (un->un_dirvp == vp)
949 vrele(un->un_dirvp);
950 else
951 vput(un->un_dirvp);
952 vrele(vp);
953 return (EEXIST);
954 }
955
956 /*
957 * Good - there was no race to create the file
958 * so go ahead and create it. The permissions
959 * on the file will be 0666 modified by the
960 * current user's umask. Access to the file, while
961 * it is unioned, will require access to the top *and*
962 * bottom files. Access when not unioned will simply
963 * require access to the top-level file.
964 * TODO: confirm choice of access permissions.
965 */
966 VATTR_NULL(vap);
967 vap->va_type = VREG;
968 vap->va_mode = cmode;
969 VOP_LEASE(un->un_dirvp, p, cred, LEASE_WRITE);
970 if ((error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap)) != 0)
971 return (error);
972
973 if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0) {
974 vput(vp);
975 return (error);
976 }
977
978 vp->v_writecount++;
979 *vpp = vp;
980 return (0);
981 }
982
983 int
984 union_vn_close(vp, fmode, cred, p)
985 struct vnode *vp;
986 int fmode;
987 struct ucred *cred;
988 struct proc *p;
989 {
990
991 if (fmode & FWRITE)
992 --vp->v_writecount;
993 return (VOP_CLOSE(vp, fmode, cred, p));
994 }
995
996 void
997 union_removed_upper(un)
998 struct union_node *un;
999 {
1000 #if 1
1001 /*
1002 * We do not set the uppervp to NULLVP here, because lowervp
1003 * may also be NULLVP, so this routine would end up creating
1004 * a bogus union node with no upper or lower VP (that causes
1005 * pain in many places that assume at least one VP exists).
1006 * Since we've removed this node from the cache hash chains,
1007 * it won't be found again. When all current holders
1008 * release it, union_inactive() will vgone() it.
1009 */
1010 union_diruncache(un);
1011 #else
1012 union_newupper(un, NULLVP);
1013 #endif
1014
1015 if (un->un_flags & UN_CACHED) {
1016 un->un_flags &= ~UN_CACHED;
1017 LIST_REMOVE(un, un_cache);
1018 }
1019
1020 if (un->un_flags & UN_ULOCK) {
1021 un->un_flags &= ~UN_ULOCK;
1022 VOP_UNLOCK(un->un_uppervp, 0);
1023 }
1024 }
1025
1026 #if 0
1027 struct vnode *
1028 union_lowervp(vp)
1029 struct vnode *vp;
1030 {
1031 struct union_node *un = VTOUNION(vp);
1032
1033 if ((un->un_lowervp != NULLVP) &&
1034 (vp->v_type == un->un_lowervp->v_type)) {
1035 if (vget(un->un_lowervp, 0) == 0)
1036 return (un->un_lowervp);
1037 }
1038
1039 return (NULLVP);
1040 }
1041 #endif
1042
1043 /*
1044 * determine whether a whiteout is needed
1045 * during a remove/rmdir operation.
1046 */
1047 int
1048 union_dowhiteout(un, cred, p)
1049 struct union_node *un;
1050 struct ucred *cred;
1051 struct proc *p;
1052 {
1053 struct vattr va;
1054
1055 if (un->un_lowervp != NULLVP)
1056 return (1);
1057
1058 if (VOP_GETATTR(un->un_uppervp, &va, cred, p) == 0 &&
1059 (va.va_flags & OPAQUE))
1060 return (1);
1061
1062 return (0);
1063 }
1064
1065 static void
1066 union_dircache_r(vp, vppp, cntp)
1067 struct vnode *vp;
1068 struct vnode ***vppp;
1069 int *cntp;
1070 {
1071 struct union_node *un;
1072
1073 if (vp->v_op != union_vnodeop_p) {
1074 if (vppp) {
1075 VREF(vp);
1076 *(*vppp)++ = vp;
1077 if (--(*cntp) == 0)
1078 panic("union: dircache table too small");
1079 } else {
1080 (*cntp)++;
1081 }
1082
1083 return;
1084 }
1085
1086 un = VTOUNION(vp);
1087 if (un->un_uppervp != NULLVP)
1088 union_dircache_r(un->un_uppervp, vppp, cntp);
1089 if (un->un_lowervp != NULLVP)
1090 union_dircache_r(un->un_lowervp, vppp, cntp);
1091 }
1092
1093 struct vnode *
1094 union_dircache(vp, p)
1095 struct vnode *vp;
1096 struct proc *p;
1097 {
1098 int cnt;
1099 struct vnode *nvp = NULLVP;
1100 struct vnode **vpp;
1101 struct vnode **dircache;
1102 int error;
1103
1104 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1105 dircache = VTOUNION(vp)->un_dircache;
1106
1107 nvp = NULLVP;
1108
1109 if (dircache == 0) {
1110 cnt = 0;
1111 union_dircache_r(vp, 0, &cnt);
1112 cnt++;
1113 dircache = (struct vnode **)
1114 malloc(cnt * sizeof(struct vnode *),
1115 M_TEMP, M_WAITOK);
1116 vpp = dircache;
1117 union_dircache_r(vp, &vpp, &cnt);
1118 VTOUNION(vp)->un_dircache = dircache;
1119 *vpp = NULLVP;
1120 vpp = dircache + 1;
1121 } else {
1122 vpp = dircache;
1123 do {
1124 if (*vpp++ == VTOUNION(vp)->un_uppervp)
1125 break;
1126 } while (*vpp != NULLVP);
1127 }
1128
1129 if (*vpp == NULLVP)
1130 goto out;
1131
1132 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
1133 VREF(*vpp);
1134 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0);
1135 if (!error) {
1136 VTOUNION(vp)->un_dircache = 0;
1137 VTOUNION(nvp)->un_dircache = dircache;
1138 }
1139
1140 out:
1141 VOP_UNLOCK(vp, 0);
1142 return (nvp);
1143 }
1144
1145 void
1146 union_diruncache(un)
1147 struct union_node *un;
1148 {
1149 struct vnode **vpp;
1150
1151 if (un->un_dircache != 0) {
1152 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1153 vrele(*vpp);
1154 free(un->un_dircache, M_TEMP);
1155 un->un_dircache = 0;
1156 }
1157 }
1158
1159 /*
1160 * This hook is called from vn_readdir() to switch to lower directory
1161 * entry after the upper directory is read.
1162 */
1163 int
1164 union_readdirhook(struct vnode **vpp, struct file *fp, struct proc *p)
1165 {
1166 struct vnode *vp = *vpp, *lvp;
1167 struct vattr va;
1168 int error;
1169
1170 if (vp->v_op != union_vnodeop_p)
1171 return (0);
1172
1173 if ((lvp = union_dircache(vp, p)) == NULLVP)
1174 return (0);
1175
1176 /*
1177 * If the directory is opaque,
1178 * then don't show lower entries
1179 */
1180 error = VOP_GETATTR(vp, &va, fp->f_cred, p);
1181 if (error || (va.va_flags & OPAQUE)) {
1182 vput(lvp);
1183 return (error);
1184 }
1185
1186 error = VOP_OPEN(lvp, FREAD, fp->f_cred, p);
1187 if (error) {
1188 vput(lvp);
1189 return (error);
1190 }
1191 VOP_UNLOCK(lvp, 0);
1192 fp->f_data = (caddr_t) lvp;
1193 fp->f_offset = 0;
1194 error = vn_close(vp, FREAD, fp->f_cred, p);
1195 if (error)
1196 return (error);
1197 *vpp = lvp;
1198 return (0);
1199 }
1200