union_subr.c revision 1.1 1 /* $NetBSD: union_subr.c,v 1.1 2003/03/16 08:26:52 jdolecek Exp $ */
2
3 /*
4 * Copyright (c) 1994 Jan-Simon Pendry
5 * Copyright (c) 1994
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95
40 */
41
42 #include <sys/cdefs.h>
43 __KERNEL_RCSID(0, "$NetBSD: union_subr.c,v 1.1 2003/03/16 08:26:52 jdolecek Exp $");
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/time.h>
49 #include <sys/kernel.h>
50 #include <sys/vnode.h>
51 #include <sys/namei.h>
52 #include <sys/malloc.h>
53 #include <sys/file.h>
54 #include <sys/filedesc.h>
55 #include <sys/queue.h>
56 #include <sys/mount.h>
57 #include <sys/stat.h>
58
59 #include <uvm/uvm_extern.h>
60
61 #include <fs/union/union.h>
62
63 #ifdef DIAGNOSTIC
64 #include <sys/proc.h>
65 #endif
66
67 /* must be power of two, otherwise change UNION_HASH() */
68 #define NHASH 32
69
70 /* unsigned int ... */
71 #define UNION_HASH(u, l) \
72 (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
73
74 static LIST_HEAD(unhead, union_node) unhead[NHASH];
75 static int unvplock[NHASH];
76
77 static int union_list_lock __P((int));
78 static void union_list_unlock __P((int));
79 void union_updatevp __P((struct union_node *, struct vnode *, struct vnode *));
80 static int union_relookup __P((struct union_mount *, struct vnode *,
81 struct vnode **, struct componentname *,
82 struct componentname *, const char *, int));
83 int union_vn_close __P((struct vnode *, int, struct ucred *, struct proc *));
84 static void union_dircache_r __P((struct vnode *, struct vnode ***, int *));
85 struct vnode *union_dircache __P((struct vnode *, struct proc *));
86
87 void
88 union_init()
89 {
90 int i;
91
92 for (i = 0; i < NHASH; i++)
93 LIST_INIT(&unhead[i]);
94 memset((caddr_t) unvplock, 0, sizeof(unvplock));
95 }
96
97 /*
98 * Free global unionfs resources.
99 */
100 void
101 union_done()
102 {
103 /* Nothing */
104 }
105
106 static int
107 union_list_lock(ix)
108 int ix;
109 {
110
111 if (unvplock[ix] & UN_LOCKED) {
112 unvplock[ix] |= UN_WANTED;
113 (void) tsleep(&unvplock[ix], PINOD, "unionlk", 0);
114 return (1);
115 }
116
117 unvplock[ix] |= UN_LOCKED;
118
119 return (0);
120 }
121
122 static void
123 union_list_unlock(ix)
124 int ix;
125 {
126
127 unvplock[ix] &= ~UN_LOCKED;
128
129 if (unvplock[ix] & UN_WANTED) {
130 unvplock[ix] &= ~UN_WANTED;
131 wakeup((caddr_t) &unvplock[ix]);
132 }
133 }
134
135 void
136 union_updatevp(un, uppervp, lowervp)
137 struct union_node *un;
138 struct vnode *uppervp;
139 struct vnode *lowervp;
140 {
141 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
142 int nhash = UNION_HASH(uppervp, lowervp);
143 int docache = (lowervp != NULLVP || uppervp != NULLVP);
144 int lhash, uhash;
145
146 /*
147 * Ensure locking is ordered from lower to higher
148 * to avoid deadlocks.
149 */
150 if (nhash < ohash) {
151 lhash = nhash;
152 uhash = ohash;
153 } else {
154 lhash = ohash;
155 uhash = nhash;
156 }
157
158 if (lhash != uhash)
159 while (union_list_lock(lhash))
160 continue;
161
162 while (union_list_lock(uhash))
163 continue;
164
165 if (ohash != nhash || !docache) {
166 if (un->un_flags & UN_CACHED) {
167 un->un_flags &= ~UN_CACHED;
168 LIST_REMOVE(un, un_cache);
169 }
170 }
171
172 if (ohash != nhash)
173 union_list_unlock(ohash);
174
175 if (un->un_lowervp != lowervp) {
176 if (un->un_lowervp) {
177 vrele(un->un_lowervp);
178 if (un->un_path) {
179 free(un->un_path, M_TEMP);
180 un->un_path = 0;
181 }
182 if (un->un_dirvp) {
183 vrele(un->un_dirvp);
184 un->un_dirvp = NULLVP;
185 }
186 }
187 un->un_lowervp = lowervp;
188 un->un_lowersz = VNOVAL;
189 }
190
191 if (un->un_uppervp != uppervp) {
192 if (un->un_uppervp)
193 vrele(un->un_uppervp);
194
195 un->un_uppervp = uppervp;
196 un->un_uppersz = VNOVAL;
197 }
198
199 if (docache && (ohash != nhash)) {
200 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
201 un->un_flags |= UN_CACHED;
202 }
203
204 union_list_unlock(nhash);
205 }
206
207 void
208 union_newlower(un, lowervp)
209 struct union_node *un;
210 struct vnode *lowervp;
211 {
212
213 union_updatevp(un, un->un_uppervp, lowervp);
214 }
215
216 void
217 union_newupper(un, uppervp)
218 struct union_node *un;
219 struct vnode *uppervp;
220 {
221
222 union_updatevp(un, uppervp, un->un_lowervp);
223 }
224
225 /*
226 * Keep track of size changes in the underlying vnodes.
227 * If the size changes, then callback to the vm layer
228 * giving priority to the upper layer size.
229 */
230 void
231 union_newsize(vp, uppersz, lowersz)
232 struct vnode *vp;
233 off_t uppersz, lowersz;
234 {
235 struct union_node *un;
236 off_t sz;
237
238 /* only interested in regular files */
239 if (vp->v_type != VREG)
240 return;
241
242 un = VTOUNION(vp);
243 sz = VNOVAL;
244
245 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
246 un->un_uppersz = uppersz;
247 if (sz == VNOVAL)
248 sz = un->un_uppersz;
249 }
250
251 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
252 un->un_lowersz = lowersz;
253 if (sz == VNOVAL)
254 sz = un->un_lowersz;
255 }
256
257 if (sz != VNOVAL) {
258 #ifdef UNION_DIAGNOSTIC
259 printf("union: %s size now %qd\n",
260 uppersz != VNOVAL ? "upper" : "lower", sz);
261 #endif
262 uvm_vnp_setsize(vp, sz);
263 }
264 }
265
266 /*
267 * allocate a union_node/vnode pair. the vnode is
268 * referenced and locked. the new vnode is returned
269 * via (vpp). (mp) is the mountpoint of the union filesystem,
270 * (dvp) is the parent directory where the upper layer object
271 * should exist (but doesn't) and (cnp) is the componentname
272 * information which is partially copied to allow the upper
273 * layer object to be created at a later time. (uppervp)
274 * and (lowervp) reference the upper and lower layer objects
275 * being mapped. either, but not both, can be nil.
276 * if supplied, (uppervp) is locked.
277 * the reference is either maintained in the new union_node
278 * object which is allocated, or they are vrele'd.
279 *
280 * all union_nodes are maintained on a singly-linked
281 * list. new nodes are only allocated when they cannot
282 * be found on this list. entries on the list are
283 * removed when the vfs reclaim entry is called.
284 *
285 * a single lock is kept for the entire list. this is
286 * needed because the getnewvnode() function can block
287 * waiting for a vnode to become free, in which case there
288 * may be more than one process trying to get the same
289 * vnode. this lock is only taken if we are going to
290 * call getnewvnode, since the kernel itself is single-threaded.
291 *
292 * if an entry is found on the list, then call vget() to
293 * take a reference. this is done because there may be
294 * zero references to it and so it needs to removed from
295 * the vnode free list.
296 */
297 int
298 union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
299 struct vnode **vpp;
300 struct mount *mp;
301 struct vnode *undvp; /* parent union vnode */
302 struct vnode *dvp; /* may be null */
303 struct componentname *cnp; /* may be null */
304 struct vnode *uppervp; /* may be null */
305 struct vnode *lowervp; /* may be null */
306 int docache;
307 {
308 int error;
309 struct union_node *un = NULL;
310 struct vnode *xlowervp = NULLVP;
311 struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
312 int hash = 0;
313 int vflag;
314 int try;
315
316 if (uppervp == NULLVP && lowervp == NULLVP)
317 panic("union: unidentifiable allocation");
318
319 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
320 xlowervp = lowervp;
321 lowervp = NULLVP;
322 }
323
324 /* detect the root vnode (and aliases) */
325 vflag = VLAYER;
326 if ((uppervp == um->um_uppervp) &&
327 ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
328 if (lowervp == NULLVP) {
329 lowervp = um->um_lowervp;
330 if (lowervp != NULLVP)
331 VREF(lowervp);
332 }
333 vflag = VROOT;
334 }
335
336 loop:
337 if (!docache) {
338 un = 0;
339 } else for (try = 0; try < 3; try++) {
340 switch (try) {
341 case 0:
342 if (lowervp == NULLVP)
343 continue;
344 hash = UNION_HASH(uppervp, lowervp);
345 break;
346
347 case 1:
348 if (uppervp == NULLVP)
349 continue;
350 hash = UNION_HASH(uppervp, NULLVP);
351 break;
352
353 case 2:
354 if (lowervp == NULLVP)
355 continue;
356 hash = UNION_HASH(NULLVP, lowervp);
357 break;
358 }
359
360 while (union_list_lock(hash))
361 continue;
362
363 for (un = unhead[hash].lh_first; un != 0;
364 un = un->un_cache.le_next) {
365 if ((un->un_lowervp == lowervp ||
366 un->un_lowervp == NULLVP) &&
367 (un->un_uppervp == uppervp ||
368 un->un_uppervp == NULLVP) &&
369 (UNIONTOV(un)->v_mount == mp)) {
370 if (vget(UNIONTOV(un), 0)) {
371 union_list_unlock(hash);
372 goto loop;
373 }
374 break;
375 }
376 }
377
378 union_list_unlock(hash);
379
380 if (un)
381 break;
382 }
383
384 if (un) {
385 /*
386 * Obtain a lock on the union_node.
387 * uppervp is locked, though un->un_uppervp
388 * may not be. this doesn't break the locking
389 * hierarchy since in the case that un->un_uppervp
390 * is not yet locked it will be vrele'd and replaced
391 * with uppervp.
392 */
393
394 if ((dvp != NULLVP) && (uppervp == dvp)) {
395 /*
396 * Access ``.'', so (un) will already
397 * be locked. Since this process has
398 * the lock on (uppervp) no other
399 * process can hold the lock on (un).
400 */
401 #ifdef DIAGNOSTIC
402 if ((un->un_flags & UN_LOCKED) == 0)
403 panic("union: . not locked");
404 else if (curproc && un->un_pid != curproc->p_pid &&
405 un->un_pid > -1 && curproc->p_pid > -1)
406 panic("union: allocvp not lock owner");
407 #endif
408 } else {
409 if (un->un_flags & UN_LOCKED) {
410 vrele(UNIONTOV(un));
411 un->un_flags |= UN_WANTED;
412 (void) tsleep(&un->un_flags, PINOD,
413 "unionalloc", 0);
414 goto loop;
415 }
416 un->un_flags |= UN_LOCKED;
417
418 #ifdef DIAGNOSTIC
419 if (curproc)
420 un->un_pid = curproc->p_pid;
421 else
422 un->un_pid = -1;
423 #endif
424 }
425
426 /*
427 * At this point, the union_node is locked,
428 * un->un_uppervp may not be locked, and uppervp
429 * is locked or nil.
430 */
431
432 /*
433 * Save information about the upper layer.
434 */
435 if (uppervp != un->un_uppervp) {
436 union_newupper(un, uppervp);
437 } else if (uppervp) {
438 vrele(uppervp);
439 }
440
441 if (un->un_uppervp) {
442 un->un_flags |= UN_ULOCK;
443 un->un_flags &= ~UN_KLOCK;
444 }
445
446 /*
447 * Save information about the lower layer.
448 * This needs to keep track of pathname
449 * and directory information which union_vn_create
450 * might need.
451 */
452 if (lowervp != un->un_lowervp) {
453 union_newlower(un, lowervp);
454 if (cnp && (lowervp != NULLVP)) {
455 un->un_hash = cnp->cn_hash;
456 un->un_path = malloc(cnp->cn_namelen+1,
457 M_TEMP, M_WAITOK);
458 memcpy(un->un_path, cnp->cn_nameptr,
459 cnp->cn_namelen);
460 un->un_path[cnp->cn_namelen] = '\0';
461 VREF(dvp);
462 un->un_dirvp = dvp;
463 }
464 } else if (lowervp) {
465 vrele(lowervp);
466 }
467 *vpp = UNIONTOV(un);
468 return (0);
469 }
470
471 if (docache) {
472 /*
473 * otherwise lock the vp list while we call getnewvnode
474 * since that can block.
475 */
476 hash = UNION_HASH(uppervp, lowervp);
477
478 if (union_list_lock(hash))
479 goto loop;
480 }
481
482 error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
483 if (error) {
484 if (uppervp) {
485 if (dvp == uppervp)
486 vrele(uppervp);
487 else
488 vput(uppervp);
489 }
490 if (lowervp)
491 vrele(lowervp);
492
493 goto out;
494 }
495
496 MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
497 M_TEMP, M_WAITOK);
498
499 (*vpp)->v_flag |= vflag;
500 (*vpp)->v_vnlock = NULL; /* Make upper layers call VOP_LOCK */
501 if (uppervp)
502 (*vpp)->v_type = uppervp->v_type;
503 else
504 (*vpp)->v_type = lowervp->v_type;
505 un = VTOUNION(*vpp);
506 un->un_vnode = *vpp;
507 un->un_uppervp = uppervp;
508 un->un_uppersz = VNOVAL;
509 un->un_lowervp = lowervp;
510 un->un_lowersz = VNOVAL;
511 un->un_pvp = undvp;
512 if (undvp != NULLVP)
513 VREF(undvp);
514 un->un_dircache = 0;
515 un->un_openl = 0;
516 un->un_flags = UN_LOCKED;
517 if (un->un_uppervp)
518 un->un_flags |= UN_ULOCK;
519 #ifdef DIAGNOSTIC
520 if (curproc)
521 un->un_pid = curproc->p_pid;
522 else
523 un->un_pid = -1;
524 #endif
525 if (cnp && (lowervp != NULLVP)) {
526 un->un_hash = cnp->cn_hash;
527 un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
528 memcpy(un->un_path, cnp->cn_nameptr, cnp->cn_namelen);
529 un->un_path[cnp->cn_namelen] = '\0';
530 VREF(dvp);
531 un->un_dirvp = dvp;
532 } else {
533 un->un_hash = 0;
534 un->un_path = 0;
535 un->un_dirvp = 0;
536 }
537
538 if (docache) {
539 LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
540 un->un_flags |= UN_CACHED;
541 }
542
543 if (xlowervp)
544 vrele(xlowervp);
545
546 out:
547 if (docache)
548 union_list_unlock(hash);
549
550 return (error);
551 }
552
553 int
554 union_freevp(vp)
555 struct vnode *vp;
556 {
557 struct union_node *un = VTOUNION(vp);
558
559 if (un->un_flags & UN_CACHED) {
560 un->un_flags &= ~UN_CACHED;
561 LIST_REMOVE(un, un_cache);
562 }
563
564 if (un->un_pvp != NULLVP)
565 vrele(un->un_pvp);
566 if (un->un_uppervp != NULLVP)
567 vrele(un->un_uppervp);
568 if (un->un_lowervp != NULLVP)
569 vrele(un->un_lowervp);
570 if (un->un_dirvp != NULLVP)
571 vrele(un->un_dirvp);
572 if (un->un_path)
573 free(un->un_path, M_TEMP);
574
575 FREE(vp->v_data, M_TEMP);
576 vp->v_data = 0;
577
578 return (0);
579 }
580
581 /*
582 * copyfile. copy the vnode (fvp) to the vnode (tvp)
583 * using a sequence of reads and writes. both (fvp)
584 * and (tvp) are locked on entry and exit.
585 */
586 int
587 union_copyfile(fvp, tvp, cred, p)
588 struct vnode *fvp;
589 struct vnode *tvp;
590 struct ucred *cred;
591 struct proc *p;
592 {
593 char *buf;
594 struct uio uio;
595 struct iovec iov;
596 int error = 0;
597
598 /*
599 * strategy:
600 * allocate a buffer of size MAXBSIZE.
601 * loop doing reads and writes, keeping track
602 * of the current uio offset.
603 * give up at the first sign of trouble.
604 */
605
606 uio.uio_procp = p;
607 uio.uio_segflg = UIO_SYSSPACE;
608 uio.uio_offset = 0;
609
610 VOP_UNLOCK(fvp, 0); /* XXX */
611 VOP_LEASE(fvp, p, cred, LEASE_READ);
612 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
613 VOP_UNLOCK(tvp, 0); /* XXX */
614 VOP_LEASE(tvp, p, cred, LEASE_WRITE);
615 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
616
617 buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
618
619 /* ugly loop follows... */
620 do {
621 off_t offset = uio.uio_offset;
622
623 uio.uio_iov = &iov;
624 uio.uio_iovcnt = 1;
625 iov.iov_base = buf;
626 iov.iov_len = MAXBSIZE;
627 uio.uio_resid = iov.iov_len;
628 uio.uio_rw = UIO_READ;
629 error = VOP_READ(fvp, &uio, 0, cred);
630
631 if (error == 0) {
632 uio.uio_iov = &iov;
633 uio.uio_iovcnt = 1;
634 iov.iov_base = buf;
635 iov.iov_len = MAXBSIZE - uio.uio_resid;
636 uio.uio_offset = offset;
637 uio.uio_rw = UIO_WRITE;
638 uio.uio_resid = iov.iov_len;
639
640 if (uio.uio_resid == 0)
641 break;
642
643 do {
644 error = VOP_WRITE(tvp, &uio, 0, cred);
645 } while ((uio.uio_resid > 0) && (error == 0));
646 }
647
648 } while (error == 0);
649
650 free(buf, M_TEMP);
651 return (error);
652 }
653
654 /*
655 * (un) is assumed to be locked on entry and remains
656 * locked on exit.
657 */
658 int
659 union_copyup(un, docopy, cred, p)
660 struct union_node *un;
661 int docopy;
662 struct ucred *cred;
663 struct proc *p;
664 {
665 int error;
666 struct vnode *lvp, *uvp;
667 struct vattr lvattr, uvattr;
668
669 error = union_vn_create(&uvp, un, p);
670 if (error)
671 return (error);
672
673 /* at this point, uppervp is locked */
674 union_newupper(un, uvp);
675 un->un_flags |= UN_ULOCK;
676
677 lvp = un->un_lowervp;
678
679 if (docopy) {
680 /*
681 * XX - should not ignore errors
682 * from VOP_CLOSE
683 */
684 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
685
686 error = VOP_GETATTR(lvp, &lvattr, cred, p);
687 if (error == 0)
688 error = VOP_OPEN(lvp, FREAD, cred, p);
689 if (error == 0) {
690 error = union_copyfile(lvp, uvp, cred, p);
691 (void) VOP_CLOSE(lvp, FREAD, cred, p);
692 }
693 if (error == 0) {
694 /* Copy permissions up too */
695 VATTR_NULL(&uvattr);
696 uvattr.va_mode = lvattr.va_mode;
697 uvattr.va_flags = lvattr.va_flags;
698 error = VOP_SETATTR(uvp, &uvattr, cred, p);
699 }
700 VOP_UNLOCK(lvp, 0);
701 #ifdef UNION_DIAGNOSTIC
702 if (error == 0)
703 uprintf("union: copied up %s\n", un->un_path);
704 #endif
705
706 }
707 union_vn_close(uvp, FWRITE, cred, p);
708
709 /*
710 * Subsequent IOs will go to the top layer, so
711 * call close on the lower vnode and open on the
712 * upper vnode to ensure that the filesystem keeps
713 * its references counts right. This doesn't do
714 * the right thing with (cred) and (FREAD) though.
715 * Ignoring error returns is not right, either.
716 */
717 if (error == 0) {
718 int i;
719
720 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
721 for (i = 0; i < un->un_openl; i++) {
722 (void) VOP_CLOSE(lvp, FREAD, cred, p);
723 (void) VOP_OPEN(uvp, FREAD, cred, p);
724 }
725 un->un_openl = 0;
726 VOP_UNLOCK(lvp, 0);
727 }
728
729 return (error);
730
731 }
732
733 static int
734 union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
735 struct union_mount *um;
736 struct vnode *dvp;
737 struct vnode **vpp;
738 struct componentname *cnp;
739 struct componentname *cn;
740 const char *path;
741 int pathlen;
742 {
743 int error;
744
745 /*
746 * A new componentname structure must be faked up because
747 * there is no way to know where the upper level cnp came
748 * from or what it is being used for. This must duplicate
749 * some of the work done by NDINIT, some of the work done
750 * by namei, some of the work done by lookup and some of
751 * the work done by VOP_LOOKUP when given a CREATE flag.
752 * Conclusion: Horrible.
753 *
754 * The pathname buffer will be PNBUF_PUT'd by VOP_MKDIR.
755 */
756 cn->cn_namelen = pathlen;
757 if ((cn->cn_namelen + 1) > MAXPATHLEN)
758 return (ENAMETOOLONG);
759 cn->cn_pnbuf = PNBUF_GET();
760 memcpy(cn->cn_pnbuf, path, cn->cn_namelen);
761 cn->cn_pnbuf[cn->cn_namelen] = '\0';
762
763 cn->cn_nameiop = CREATE;
764 cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
765 cn->cn_proc = cnp->cn_proc;
766 if (um->um_op == UNMNT_ABOVE)
767 cn->cn_cred = cnp->cn_cred;
768 else
769 cn->cn_cred = um->um_cred;
770 cn->cn_nameptr = cn->cn_pnbuf;
771 cn->cn_hash = cnp->cn_hash;
772 cn->cn_consume = cnp->cn_consume;
773
774 VREF(dvp);
775 error = relookup(dvp, vpp, cn);
776 if (!error)
777 vrele(dvp);
778 else {
779 PNBUF_PUT(cn->cn_pnbuf);
780 cn->cn_pnbuf = 0;
781 }
782
783 return (error);
784 }
785
786 /*
787 * Create a shadow directory in the upper layer.
788 * The new vnode is returned locked.
789 *
790 * (um) points to the union mount structure for access to the
791 * the mounting process's credentials.
792 * (dvp) is the directory in which to create the shadow directory.
793 * it is unlocked on entry and exit.
794 * (cnp) is the componentname to be created.
795 * (vpp) is the returned newly created shadow directory, which
796 * is returned locked.
797 *
798 * N.B. We still attempt to create shadow directories even if the union
799 * is mounted read-only, which is a little nonintuitive.
800 */
801 int
802 union_mkshadow(um, dvp, cnp, vpp)
803 struct union_mount *um;
804 struct vnode *dvp;
805 struct componentname *cnp;
806 struct vnode **vpp;
807 {
808 int error;
809 struct vattr va;
810 struct proc *p = cnp->cn_proc;
811 struct componentname cn;
812
813 error = union_relookup(um, dvp, vpp, cnp, &cn,
814 cnp->cn_nameptr, cnp->cn_namelen);
815 if (error)
816 return (error);
817
818 if (*vpp) {
819 VOP_ABORTOP(dvp, &cn);
820 VOP_UNLOCK(dvp, 0);
821 vrele(*vpp);
822 *vpp = NULLVP;
823 return (EEXIST);
824 }
825
826 /*
827 * policy: when creating the shadow directory in the
828 * upper layer, create it owned by the user who did
829 * the mount, group from parent directory, and mode
830 * 777 modified by umask (ie mostly identical to the
831 * mkdir syscall). (jsp, kb)
832 */
833
834 VATTR_NULL(&va);
835 va.va_type = VDIR;
836 va.va_mode = um->um_cmode;
837
838 /* VOP_LEASE: dvp is locked */
839 VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE);
840
841 error = VOP_MKDIR(dvp, vpp, &cn, &va);
842 return (error);
843 }
844
845 /*
846 * Create a whiteout entry in the upper layer.
847 *
848 * (um) points to the union mount structure for access to the
849 * the mounting process's credentials.
850 * (dvp) is the directory in which to create the whiteout.
851 * it is locked on entry and exit.
852 * (cnp) is the componentname to be created.
853 */
854 int
855 union_mkwhiteout(um, dvp, cnp, path)
856 struct union_mount *um;
857 struct vnode *dvp;
858 struct componentname *cnp;
859 char *path;
860 {
861 int error;
862 struct proc *p = cnp->cn_proc;
863 struct vnode *wvp;
864 struct componentname cn;
865
866 VOP_UNLOCK(dvp, 0);
867 error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
868 if (error) {
869 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
870 return (error);
871 }
872
873 if (wvp) {
874 VOP_ABORTOP(dvp, &cn);
875 vrele(dvp);
876 vrele(wvp);
877 return (EEXIST);
878 }
879
880 /* VOP_LEASE: dvp is locked */
881 VOP_LEASE(dvp, p, p->p_ucred, LEASE_WRITE);
882
883 error = VOP_WHITEOUT(dvp, &cn, CREATE);
884 if (error)
885 VOP_ABORTOP(dvp, &cn);
886
887 vrele(dvp);
888
889 return (error);
890 }
891
892 /*
893 * union_vn_create: creates and opens a new shadow file
894 * on the upper union layer. this function is similar
895 * in spirit to calling vn_open but it avoids calling namei().
896 * the problem with calling namei is that a) it locks too many
897 * things, and b) it doesn't start at the "right" directory,
898 * whereas relookup is told where to start.
899 */
900 int
901 union_vn_create(vpp, un, p)
902 struct vnode **vpp;
903 struct union_node *un;
904 struct proc *p;
905 {
906 struct vnode *vp;
907 struct ucred *cred = p->p_ucred;
908 struct vattr vat;
909 struct vattr *vap = &vat;
910 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
911 int error;
912 int cmode = UN_FILEMODE & ~p->p_cwdi->cwdi_cmask;
913 struct componentname cn;
914
915 *vpp = NULLVP;
916
917 /*
918 * Build a new componentname structure (for the same
919 * reasons outlines in union_mkshadow).
920 * The difference here is that the file is owned by
921 * the current user, rather than by the person who
922 * did the mount, since the current user needs to be
923 * able to write the file (that's why it is being
924 * copied in the first place).
925 */
926 cn.cn_namelen = strlen(un->un_path);
927 if ((cn.cn_namelen + 1) > MAXPATHLEN)
928 return (ENAMETOOLONG);
929 cn.cn_pnbuf = PNBUF_GET();
930 memcpy(cn.cn_pnbuf, un->un_path, cn.cn_namelen+1);
931 cn.cn_nameiop = CREATE;
932 cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
933 cn.cn_proc = p;
934 cn.cn_cred = p->p_ucred;
935 cn.cn_nameptr = cn.cn_pnbuf;
936 cn.cn_hash = un->un_hash;
937 cn.cn_consume = 0;
938
939 VREF(un->un_dirvp);
940 if ((error = relookup(un->un_dirvp, &vp, &cn)) != 0)
941 return (error);
942 vrele(un->un_dirvp);
943
944 if (vp) {
945 VOP_ABORTOP(un->un_dirvp, &cn);
946 if (un->un_dirvp == vp)
947 vrele(un->un_dirvp);
948 else
949 vput(un->un_dirvp);
950 vrele(vp);
951 return (EEXIST);
952 }
953
954 /*
955 * Good - there was no race to create the file
956 * so go ahead and create it. The permissions
957 * on the file will be 0666 modified by the
958 * current user's umask. Access to the file, while
959 * it is unioned, will require access to the top *and*
960 * bottom files. Access when not unioned will simply
961 * require access to the top-level file.
962 * TODO: confirm choice of access permissions.
963 */
964 VATTR_NULL(vap);
965 vap->va_type = VREG;
966 vap->va_mode = cmode;
967 VOP_LEASE(un->un_dirvp, p, cred, LEASE_WRITE);
968 if ((error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap)) != 0)
969 return (error);
970
971 if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0) {
972 vput(vp);
973 return (error);
974 }
975
976 vp->v_writecount++;
977 *vpp = vp;
978 return (0);
979 }
980
981 int
982 union_vn_close(vp, fmode, cred, p)
983 struct vnode *vp;
984 int fmode;
985 struct ucred *cred;
986 struct proc *p;
987 {
988
989 if (fmode & FWRITE)
990 --vp->v_writecount;
991 return (VOP_CLOSE(vp, fmode, cred, p));
992 }
993
994 void
995 union_removed_upper(un)
996 struct union_node *un;
997 {
998 #if 1
999 /*
1000 * We do not set the uppervp to NULLVP here, because lowervp
1001 * may also be NULLVP, so this routine would end up creating
1002 * a bogus union node with no upper or lower VP (that causes
1003 * pain in many places that assume at least one VP exists).
1004 * Since we've removed this node from the cache hash chains,
1005 * it won't be found again. When all current holders
1006 * release it, union_inactive() will vgone() it.
1007 */
1008 union_diruncache(un);
1009 #else
1010 union_newupper(un, NULLVP);
1011 #endif
1012
1013 if (un->un_flags & UN_CACHED) {
1014 un->un_flags &= ~UN_CACHED;
1015 LIST_REMOVE(un, un_cache);
1016 }
1017
1018 if (un->un_flags & UN_ULOCK) {
1019 un->un_flags &= ~UN_ULOCK;
1020 VOP_UNLOCK(un->un_uppervp, 0);
1021 }
1022 }
1023
1024 #if 0
1025 struct vnode *
1026 union_lowervp(vp)
1027 struct vnode *vp;
1028 {
1029 struct union_node *un = VTOUNION(vp);
1030
1031 if ((un->un_lowervp != NULLVP) &&
1032 (vp->v_type == un->un_lowervp->v_type)) {
1033 if (vget(un->un_lowervp, 0) == 0)
1034 return (un->un_lowervp);
1035 }
1036
1037 return (NULLVP);
1038 }
1039 #endif
1040
1041 /*
1042 * determine whether a whiteout is needed
1043 * during a remove/rmdir operation.
1044 */
1045 int
1046 union_dowhiteout(un, cred, p)
1047 struct union_node *un;
1048 struct ucred *cred;
1049 struct proc *p;
1050 {
1051 struct vattr va;
1052
1053 if (un->un_lowervp != NULLVP)
1054 return (1);
1055
1056 if (VOP_GETATTR(un->un_uppervp, &va, cred, p) == 0 &&
1057 (va.va_flags & OPAQUE))
1058 return (1);
1059
1060 return (0);
1061 }
1062
1063 static void
1064 union_dircache_r(vp, vppp, cntp)
1065 struct vnode *vp;
1066 struct vnode ***vppp;
1067 int *cntp;
1068 {
1069 struct union_node *un;
1070
1071 if (vp->v_op != union_vnodeop_p) {
1072 if (vppp) {
1073 VREF(vp);
1074 *(*vppp)++ = vp;
1075 if (--(*cntp) == 0)
1076 panic("union: dircache table too small");
1077 } else {
1078 (*cntp)++;
1079 }
1080
1081 return;
1082 }
1083
1084 un = VTOUNION(vp);
1085 if (un->un_uppervp != NULLVP)
1086 union_dircache_r(un->un_uppervp, vppp, cntp);
1087 if (un->un_lowervp != NULLVP)
1088 union_dircache_r(un->un_lowervp, vppp, cntp);
1089 }
1090
1091 struct vnode *
1092 union_dircache(vp, p)
1093 struct vnode *vp;
1094 struct proc *p;
1095 {
1096 int cnt;
1097 struct vnode *nvp = NULLVP;
1098 struct vnode **vpp;
1099 struct vnode **dircache;
1100 int error;
1101
1102 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1103 dircache = VTOUNION(vp)->un_dircache;
1104
1105 nvp = NULLVP;
1106
1107 if (dircache == 0) {
1108 cnt = 0;
1109 union_dircache_r(vp, 0, &cnt);
1110 cnt++;
1111 dircache = (struct vnode **)
1112 malloc(cnt * sizeof(struct vnode *),
1113 M_TEMP, M_WAITOK);
1114 vpp = dircache;
1115 union_dircache_r(vp, &vpp, &cnt);
1116 VTOUNION(vp)->un_dircache = dircache;
1117 *vpp = NULLVP;
1118 vpp = dircache + 1;
1119 } else {
1120 vpp = dircache;
1121 do {
1122 if (*vpp++ == VTOUNION(vp)->un_uppervp)
1123 break;
1124 } while (*vpp != NULLVP);
1125 }
1126
1127 if (*vpp == NULLVP)
1128 goto out;
1129
1130 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
1131 VREF(*vpp);
1132 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0);
1133 if (!error) {
1134 VTOUNION(vp)->un_dircache = 0;
1135 VTOUNION(nvp)->un_dircache = dircache;
1136 }
1137
1138 out:
1139 VOP_UNLOCK(vp, 0);
1140 return (nvp);
1141 }
1142
1143 void
1144 union_diruncache(un)
1145 struct union_node *un;
1146 {
1147 struct vnode **vpp;
1148
1149 if (un->un_dircache != 0) {
1150 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1151 vrele(*vpp);
1152 free(un->un_dircache, M_TEMP);
1153 un->un_dircache = 0;
1154 }
1155 }
1156