union_vnops.c revision 1.45 1 /* $NetBSD: union_vnops.c,v 1.45 2011/08/12 17:41:17 hannken Exp $ */
2
3 /*
4 * Copyright (c) 1992, 1993, 1994, 1995
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)union_vnops.c 8.33 (Berkeley) 7/31/95
35 */
36
37 /*
38 * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39 *
40 * This code is derived from software contributed to Berkeley by
41 * Jan-Simon Pendry.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed by the University of
54 * California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 * may be used to endorse or promote products derived from this software
57 * without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 * @(#)union_vnops.c 8.33 (Berkeley) 7/31/95
72 */
73
74 #include <sys/cdefs.h>
75 __KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.45 2011/08/12 17:41:17 hannken Exp $");
76
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/proc.h>
80 #include <sys/file.h>
81 #include <sys/time.h>
82 #include <sys/stat.h>
83 #include <sys/vnode.h>
84 #include <sys/mount.h>
85 #include <sys/namei.h>
86 #include <sys/malloc.h>
87 #include <sys/buf.h>
88 #include <sys/queue.h>
89 #include <sys/lock.h>
90 #include <sys/kauth.h>
91
92 #include <fs/union/union.h>
93 #include <miscfs/genfs/genfs.h>
94 #include <miscfs/specfs/specdev.h>
95
96 int union_lookup(void *);
97 int union_create(void *);
98 int union_whiteout(void *);
99 int union_mknod(void *);
100 int union_open(void *);
101 int union_close(void *);
102 int union_access(void *);
103 int union_getattr(void *);
104 int union_setattr(void *);
105 int union_read(void *);
106 int union_write(void *);
107 int union_ioctl(void *);
108 int union_poll(void *);
109 int union_revoke(void *);
110 int union_mmap(void *);
111 int union_fsync(void *);
112 int union_seek(void *);
113 int union_remove(void *);
114 int union_link(void *);
115 int union_rename(void *);
116 int union_mkdir(void *);
117 int union_rmdir(void *);
118 int union_symlink(void *);
119 int union_readdir(void *);
120 int union_readlink(void *);
121 int union_abortop(void *);
122 int union_inactive(void *);
123 int union_reclaim(void *);
124 int union_lock(void *);
125 int union_unlock(void *);
126 int union_bmap(void *);
127 int union_print(void *);
128 int union_islocked(void *);
129 int union_pathconf(void *);
130 int union_advlock(void *);
131 int union_strategy(void *);
132 int union_bwrite(void *);
133 int union_getpages(void *);
134 int union_putpages(void *);
135 int union_kqfilter(void *);
136
137 static void union_fixup(struct union_node *);
138 static int union_lookup1(struct vnode *, struct vnode **,
139 struct vnode **, struct componentname *);
140
141
142 /*
143 * Global vfs data structures
144 */
145 int (**union_vnodeop_p)(void *);
146 const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
147 { &vop_default_desc, vn_default_error },
148 { &vop_lookup_desc, union_lookup }, /* lookup */
149 { &vop_create_desc, union_create }, /* create */
150 { &vop_whiteout_desc, union_whiteout }, /* whiteout */
151 { &vop_mknod_desc, union_mknod }, /* mknod */
152 { &vop_open_desc, union_open }, /* open */
153 { &vop_close_desc, union_close }, /* close */
154 { &vop_access_desc, union_access }, /* access */
155 { &vop_getattr_desc, union_getattr }, /* getattr */
156 { &vop_setattr_desc, union_setattr }, /* setattr */
157 { &vop_read_desc, union_read }, /* read */
158 { &vop_write_desc, union_write }, /* write */
159 { &vop_ioctl_desc, union_ioctl }, /* ioctl */
160 { &vop_poll_desc, union_poll }, /* select */
161 { &vop_revoke_desc, union_revoke }, /* revoke */
162 { &vop_mmap_desc, union_mmap }, /* mmap */
163 { &vop_fsync_desc, union_fsync }, /* fsync */
164 { &vop_seek_desc, union_seek }, /* seek */
165 { &vop_remove_desc, union_remove }, /* remove */
166 { &vop_link_desc, union_link }, /* link */
167 { &vop_rename_desc, union_rename }, /* rename */
168 { &vop_mkdir_desc, union_mkdir }, /* mkdir */
169 { &vop_rmdir_desc, union_rmdir }, /* rmdir */
170 { &vop_symlink_desc, union_symlink }, /* symlink */
171 { &vop_readdir_desc, union_readdir }, /* readdir */
172 { &vop_readlink_desc, union_readlink }, /* readlink */
173 { &vop_abortop_desc, union_abortop }, /* abortop */
174 { &vop_inactive_desc, union_inactive }, /* inactive */
175 { &vop_reclaim_desc, union_reclaim }, /* reclaim */
176 { &vop_lock_desc, union_lock }, /* lock */
177 { &vop_unlock_desc, union_unlock }, /* unlock */
178 { &vop_bmap_desc, union_bmap }, /* bmap */
179 { &vop_strategy_desc, union_strategy }, /* strategy */
180 { &vop_bwrite_desc, union_bwrite }, /* bwrite */
181 { &vop_print_desc, union_print }, /* print */
182 { &vop_islocked_desc, union_islocked }, /* islocked */
183 { &vop_pathconf_desc, union_pathconf }, /* pathconf */
184 { &vop_advlock_desc, union_advlock }, /* advlock */
185 { &vop_getpages_desc, union_getpages }, /* getpages */
186 { &vop_putpages_desc, union_putpages }, /* putpages */
187 { &vop_kqfilter_desc, union_kqfilter }, /* kqfilter */
188 { NULL, NULL }
189 };
190 const struct vnodeopv_desc union_vnodeop_opv_desc =
191 { &union_vnodeop_p, union_vnodeop_entries };
192
193 #define FIXUP(un) { \
194 if (((un)->un_flags & UN_ULOCK) == 0) { \
195 union_fixup(un); \
196 } \
197 }
198 #define NODE_IS_SPECIAL(vp) \
199 ((vp)->v_type == VBLK || (vp)->v_type == VCHR || \
200 (vp)->v_type == VSOCK || (vp)->v_type == VFIFO)
201
202 static void
203 union_fixup(struct union_node *un)
204 {
205
206 vn_lock(un->un_uppervp, LK_EXCLUSIVE | LK_RETRY);
207 un->un_flags |= UN_ULOCK;
208 }
209
210 static int
211 union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
212 struct componentname *cnp)
213 {
214 int error;
215 struct vnode *tdvp;
216 struct vnode *dvp;
217 struct mount *mp;
218
219 dvp = *dvpp;
220
221 /*
222 * If stepping up the directory tree, check for going
223 * back across the mount point, in which case do what
224 * lookup would do by stepping back down the mount
225 * hierarchy.
226 */
227 if (cnp->cn_flags & ISDOTDOT) {
228 while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
229 /*
230 * Don't do the NOCROSSMOUNT check
231 * at this level. By definition,
232 * union fs deals with namespaces, not
233 * filesystems.
234 */
235 tdvp = dvp;
236 *dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
237 VOP_UNLOCK(tdvp);
238 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
239 }
240 }
241
242 error = VOP_LOOKUP(dvp, &tdvp, cnp);
243 if (error)
244 return (error);
245
246 dvp = tdvp;
247
248 /*
249 * Lastly check if the current node is a mount point in
250 * which case walk up the mount hierarchy making sure not to
251 * bump into the root of the mount tree (ie. dvp != udvp).
252 */
253 while (dvp != udvp && (dvp->v_type == VDIR) &&
254 (mp = dvp->v_mountedhere)) {
255 if (vfs_busy(mp, NULL))
256 continue;
257 vput(dvp);
258 error = VFS_ROOT(mp, &tdvp);
259 vfs_unbusy(mp, false, NULL);
260 if (error) {
261 return (error);
262 }
263 dvp = tdvp;
264 }
265
266 *vpp = dvp;
267 return (0);
268 }
269
270 int
271 union_lookup(void *v)
272 {
273 struct vop_lookup_args /* {
274 struct vnodeop_desc *a_desc;
275 struct vnode *a_dvp;
276 struct vnode **a_vpp;
277 struct componentname *a_cnp;
278 } */ *ap = v;
279 int error;
280 int uerror, lerror;
281 struct vnode *uppervp, *lowervp;
282 struct vnode *upperdvp, *lowerdvp;
283 struct vnode *dvp = ap->a_dvp;
284 struct union_node *dun = VTOUNION(dvp);
285 struct componentname *cnp = ap->a_cnp;
286 struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
287 kauth_cred_t saved_cred = NULL;
288 int iswhiteout;
289 struct vattr va;
290
291 #ifdef notyet
292 if (cnp->cn_namelen == 3 &&
293 cnp->cn_nameptr[2] == '.' &&
294 cnp->cn_nameptr[1] == '.' &&
295 cnp->cn_nameptr[0] == '.') {
296 dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
297 if (dvp == NULLVP)
298 return (ENOENT);
299 vref(dvp);
300 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
301 return (0);
302 }
303 #endif
304
305 if ((cnp->cn_flags & ISLASTCN) &&
306 (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
307 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
308 return (EROFS);
309
310 start:
311 upperdvp = dun->un_uppervp;
312 lowerdvp = dun->un_lowervp;
313 uppervp = NULLVP;
314 lowervp = NULLVP;
315 iswhiteout = 0;
316
317 /*
318 * do the lookup in the upper level.
319 * if that level comsumes additional pathnames,
320 * then assume that something special is going
321 * on and just return that vnode.
322 */
323 if (upperdvp != NULLVP) {
324 FIXUP(dun);
325 /*
326 * If we're doing `..' in the underlying filesystem,
327 * we must drop our lock on the union node before
328 * going up the tree in the lower file system--if we block
329 * on the lowervp lock, and that's held by someone else
330 * coming down the tree and who's waiting for our lock,
331 * we would be hosed.
332 */
333 if (cnp->cn_flags & ISDOTDOT) {
334 /* retain lock on underlying VP */
335 dun->un_flags |= UN_KLOCK;
336 VOP_UNLOCK(dvp);
337 }
338 uerror = union_lookup1(um->um_uppervp, &upperdvp,
339 &uppervp, cnp);
340
341 if (cnp->cn_flags & ISDOTDOT) {
342 if (dun->un_uppervp == upperdvp) {
343 /*
344 * we got the underlying bugger back locked...
345 * now take back the union node lock. Since we
346 * hold the uppervp lock, we can diddle union
347 * locking flags at will. :)
348 */
349 dun->un_flags |= UN_ULOCK;
350 }
351 /*
352 * if upperdvp got swapped out, it means we did
353 * some mount point magic, and we do not have
354 * dun->un_uppervp locked currently--so we get it
355 * locked here (don't set the UN_ULOCK flag).
356 */
357 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
358 }
359 if (cnp->cn_consume != 0) {
360 *ap->a_vpp = uppervp;
361 return (uerror);
362 }
363 if (uerror == ENOENT || uerror == EJUSTRETURN) {
364 if (cnp->cn_flags & ISWHITEOUT) {
365 iswhiteout = 1;
366 } else if (lowerdvp != NULLVP) {
367 lerror = VOP_GETATTR(upperdvp, &va,
368 cnp->cn_cred);
369 if (lerror == 0 && (va.va_flags & OPAQUE))
370 iswhiteout = 1;
371 }
372 }
373 } else {
374 uerror = ENOENT;
375 }
376
377 /*
378 * in a similar way to the upper layer, do the lookup
379 * in the lower layer. this time, if there is some
380 * component magic going on, then vput whatever we got
381 * back from the upper layer and return the lower vnode
382 * instead.
383 */
384 if (lowerdvp != NULLVP && !iswhiteout) {
385 int nameiop;
386
387 vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
388
389 /*
390 * Only do a LOOKUP on the bottom node, since
391 * we won't be making changes to it anyway.
392 */
393 nameiop = cnp->cn_nameiop;
394 cnp->cn_nameiop = LOOKUP;
395 if (um->um_op == UNMNT_BELOW) {
396 saved_cred = cnp->cn_cred;
397 cnp->cn_cred = um->um_cred;
398 }
399
400 /*
401 * we shouldn't have to worry about locking interactions
402 * between the lower layer and our union layer (w.r.t.
403 * `..' processing) because we don't futz with lowervp
404 * locks in the union-node instantiation code path.
405 */
406 lerror = union_lookup1(um->um_lowervp, &lowerdvp,
407 &lowervp, cnp);
408 if (um->um_op == UNMNT_BELOW)
409 cnp->cn_cred = saved_cred;
410 cnp->cn_nameiop = nameiop;
411
412 if (lowervp != lowerdvp)
413 VOP_UNLOCK(lowerdvp);
414
415 if (cnp->cn_consume != 0) {
416 if (uppervp != NULLVP) {
417 if (uppervp == upperdvp)
418 vrele(uppervp);
419 else
420 vput(uppervp);
421 uppervp = NULLVP;
422 }
423 *ap->a_vpp = lowervp;
424 return (lerror);
425 }
426 } else {
427 lerror = ENOENT;
428 if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
429 lowervp = LOWERVP(dun->un_pvp);
430 if (lowervp != NULLVP) {
431 vref(lowervp);
432 vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
433 lerror = 0;
434 }
435 }
436 }
437
438 /*
439 * EJUSTRETURN is used by underlying filesystems to indicate that
440 * a directory modification op was started successfully.
441 * This will only happen in the upper layer, since
442 * the lower layer only does LOOKUPs.
443 * If this union is mounted read-only, bounce it now.
444 */
445
446 if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
447 (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
448 ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
449 uerror = EROFS;
450
451 /*
452 * at this point, we have uerror and lerror indicating
453 * possible errors with the lookups in the upper and lower
454 * layers. additionally, uppervp and lowervp are (locked)
455 * references to existing vnodes in the upper and lower layers.
456 *
457 * there are now three cases to consider.
458 * 1. if both layers returned an error, then return whatever
459 * error the upper layer generated.
460 *
461 * 2. if the top layer failed and the bottom layer succeeded
462 * then two subcases occur.
463 * a. the bottom vnode is not a directory, in which
464 * case just return a new union vnode referencing
465 * an empty top layer and the existing bottom layer.
466 * b. the bottom vnode is a directory, in which case
467 * create a new directory in the top-level and
468 * continue as in case 3.
469 *
470 * 3. if the top layer succeeded then return a new union
471 * vnode referencing whatever the new top layer and
472 * whatever the bottom layer returned.
473 */
474
475 *ap->a_vpp = NULLVP;
476
477
478 /* case 1. */
479 if ((uerror != 0) && (lerror != 0)) {
480 return (uerror);
481 }
482
483 /* case 2. */
484 if (uerror != 0 /* && (lerror == 0) */ ) {
485 if (lowervp->v_type == VDIR) { /* case 2b. */
486 /*
487 * We may be racing another process to make the
488 * upper-level shadow directory. Be careful with
489 * locks/etc!
490 * If we have to create a shadow directory and want
491 * to commit the node we have to restart the lookup
492 * to get the componentname right.
493 */
494 if (upperdvp) {
495 dun->un_flags &= ~UN_ULOCK;
496 VOP_UNLOCK(upperdvp);
497 uerror = union_mkshadow(um, upperdvp, cnp,
498 &uppervp);
499 vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
500 dun->un_flags |= UN_ULOCK;
501 if (uerror == 0 && cnp->cn_nameiop != LOOKUP) {
502 vput(uppervp);
503 if (lowervp != NULLVP)
504 vput(lowervp);
505 goto start;
506 }
507 }
508 if (uerror) {
509 if (lowervp != NULLVP) {
510 vput(lowervp);
511 lowervp = NULLVP;
512 }
513 return (uerror);
514 }
515 }
516 }
517
518 if (lowervp != NULLVP)
519 VOP_UNLOCK(lowervp);
520
521 error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
522 uppervp, lowervp, 1);
523
524 if (error) {
525 if (uppervp != NULLVP)
526 vput(uppervp);
527 if (lowervp != NULLVP)
528 vrele(lowervp);
529 }
530
531 return (error);
532 }
533
534 int
535 union_create(void *v)
536 {
537 struct vop_create_args /* {
538 struct vnode *a_dvp;
539 struct vnode **a_vpp;
540 struct componentname *a_cnp;
541 struct vattr *a_vap;
542 } */ *ap = v;
543 struct union_node *un = VTOUNION(ap->a_dvp);
544 struct vnode *dvp = un->un_uppervp;
545 struct componentname *cnp = ap->a_cnp;
546
547 if (dvp != NULLVP) {
548 int error;
549 struct vnode *vp;
550 struct mount *mp;
551
552 FIXUP(un);
553
554 vref(dvp);
555 un->un_flags |= UN_KLOCK;
556 mp = ap->a_dvp->v_mount;
557 vput(ap->a_dvp);
558 error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
559 if (error)
560 return (error);
561
562 error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
563 NULLVP, 1);
564 if (error)
565 vput(vp);
566 return (error);
567 }
568
569 vput(ap->a_dvp);
570 return (EROFS);
571 }
572
573 int
574 union_whiteout(void *v)
575 {
576 struct vop_whiteout_args /* {
577 struct vnode *a_dvp;
578 struct componentname *a_cnp;
579 int a_flags;
580 } */ *ap = v;
581 struct union_node *un = VTOUNION(ap->a_dvp);
582 struct componentname *cnp = ap->a_cnp;
583
584 if (un->un_uppervp == NULLVP)
585 return (EOPNOTSUPP);
586
587 FIXUP(un);
588 return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
589 }
590
591 int
592 union_mknod(void *v)
593 {
594 struct vop_mknod_args /* {
595 struct vnode *a_dvp;
596 struct vnode **a_vpp;
597 struct componentname *a_cnp;
598 struct vattr *a_vap;
599 } */ *ap = v;
600 struct union_node *un = VTOUNION(ap->a_dvp);
601 struct vnode *dvp = un->un_uppervp;
602 struct componentname *cnp = ap->a_cnp;
603
604 if (dvp != NULLVP) {
605 int error;
606 struct vnode *vp;
607 struct mount *mp;
608
609 FIXUP(un);
610
611 vref(dvp);
612 un->un_flags |= UN_KLOCK;
613 mp = ap->a_dvp->v_mount;
614 vput(ap->a_dvp);
615 error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
616 if (error)
617 return (error);
618
619 error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
620 cnp, vp, NULLVP, 1);
621 if (error)
622 vput(vp);
623 return (error);
624 }
625
626 vput(ap->a_dvp);
627 return (EROFS);
628 }
629
630 int
631 union_open(void *v)
632 {
633 struct vop_open_args /* {
634 struct vnodeop_desc *a_desc;
635 struct vnode *a_vp;
636 int a_mode;
637 kauth_cred_t a_cred;
638 } */ *ap = v;
639 struct union_node *un = VTOUNION(ap->a_vp);
640 struct vnode *tvp;
641 int mode = ap->a_mode;
642 kauth_cred_t cred = ap->a_cred;
643 struct lwp *l = curlwp;
644 int error;
645
646 /*
647 * If there is an existing upper vp then simply open that.
648 */
649 tvp = un->un_uppervp;
650 if (tvp == NULLVP) {
651 /*
652 * If the lower vnode is being opened for writing, then
653 * copy the file contents to the upper vnode and open that,
654 * otherwise can simply open the lower vnode.
655 */
656 tvp = un->un_lowervp;
657 if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
658 error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
659 if (error == 0)
660 error = VOP_OPEN(un->un_uppervp, mode, cred);
661 return (error);
662 }
663
664 /*
665 * Just open the lower vnode, but check for nodev mount flag
666 */
667 if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
668 (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
669 return ENXIO;
670 un->un_openl++;
671 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
672 error = VOP_OPEN(tvp, mode, cred);
673 VOP_UNLOCK(tvp);
674
675 return (error);
676 }
677 /*
678 * Just open the upper vnode, checking for nodev mount flag first
679 */
680 if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
681 (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
682 return ENXIO;
683
684 FIXUP(un);
685
686 error = VOP_OPEN(tvp, mode, cred);
687
688 return (error);
689 }
690
691 int
692 union_close(void *v)
693 {
694 struct vop_close_args /* {
695 struct vnode *a_vp;
696 int a_fflag;
697 kauth_cred_t a_cred;
698 } */ *ap = v;
699 struct union_node *un = VTOUNION(ap->a_vp);
700 struct vnode *vp;
701 int error;
702 bool do_lock;
703
704 vp = un->un_uppervp;
705 if (vp != NULLVP) {
706 do_lock = false;
707 } else {
708 KASSERT(un->un_openl > 0);
709 --un->un_openl;
710 vp = un->un_lowervp;
711 do_lock = true;
712 }
713
714 KASSERT(vp != NULLVP);
715 ap->a_vp = vp;
716 if (do_lock)
717 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
718 error = VCALL(vp, VOFFSET(vop_close), ap);
719 if (do_lock)
720 VOP_UNLOCK(vp);
721
722 return error;
723 }
724
725 /*
726 * Check access permission on the union vnode.
727 * The access check being enforced is to check
728 * against both the underlying vnode, and any
729 * copied vnode. This ensures that no additional
730 * file permissions are given away simply because
731 * the user caused an implicit file copy.
732 */
733 int
734 union_access(void *v)
735 {
736 struct vop_access_args /* {
737 struct vnodeop_desc *a_desc;
738 struct vnode *a_vp;
739 int a_mode;
740 kauth_cred_t a_cred;
741 } */ *ap = v;
742 struct vnode *vp = ap->a_vp;
743 struct union_node *un = VTOUNION(vp);
744 int error = EACCES;
745 struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
746
747 /*
748 * Disallow write attempts on read-only file systems;
749 * unless the file is a socket, fifo, or a block or
750 * character device resident on the file system.
751 */
752 if (ap->a_mode & VWRITE) {
753 switch (vp->v_type) {
754 case VDIR:
755 case VLNK:
756 case VREG:
757 if (vp->v_mount->mnt_flag & MNT_RDONLY)
758 return (EROFS);
759 break;
760 case VBAD:
761 case VBLK:
762 case VCHR:
763 case VSOCK:
764 case VFIFO:
765 case VNON:
766 default:
767 break;
768 }
769 }
770
771
772 if ((vp = un->un_uppervp) != NULLVP) {
773 FIXUP(un);
774 ap->a_vp = vp;
775 return (VCALL(vp, VOFFSET(vop_access), ap));
776 }
777
778 if ((vp = un->un_lowervp) != NULLVP) {
779 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
780 ap->a_vp = vp;
781 error = VCALL(vp, VOFFSET(vop_access), ap);
782 if (error == 0) {
783 if (um->um_op == UNMNT_BELOW) {
784 ap->a_cred = um->um_cred;
785 error = VCALL(vp, VOFFSET(vop_access), ap);
786 }
787 }
788 VOP_UNLOCK(vp);
789 if (error)
790 return (error);
791 }
792
793 return (error);
794 }
795
796 /*
797 * We handle getattr only to change the fsid and
798 * track object sizes
799 */
800 int
801 union_getattr(void *v)
802 {
803 struct vop_getattr_args /* {
804 struct vnode *a_vp;
805 struct vattr *a_vap;
806 kauth_cred_t a_cred;
807 } */ *ap = v;
808 int error;
809 struct union_node *un = VTOUNION(ap->a_vp);
810 struct vnode *vp = un->un_uppervp;
811 struct vattr *vap;
812 struct vattr va;
813
814
815 /*
816 * Some programs walk the filesystem hierarchy by counting
817 * links to directories to avoid stat'ing all the time.
818 * This means the link count on directories needs to be "correct".
819 * The only way to do that is to call getattr on both layers
820 * and fix up the link count. The link count will not necessarily
821 * be accurate but will be large enough to defeat the tree walkers.
822 *
823 * To make life more interesting, some filesystems don't keep
824 * track of link counts in the expected way, and return a
825 * link count of `1' for those directories; if either of the
826 * component directories returns a link count of `1', we return a 1.
827 */
828
829 vap = ap->a_vap;
830
831 vp = un->un_uppervp;
832 if (vp != NULLVP) {
833 /*
834 * It's not clear whether VOP_GETATTR is to be
835 * called with the vnode locked or not. stat() calls
836 * it with (vp) locked, and fstat calls it with
837 * (vp) unlocked.
838 * In the mean time, compensate here by checking
839 * the union_node's lock flag.
840 */
841 if (un->un_flags & UN_LOCKED)
842 FIXUP(un);
843
844 error = VOP_GETATTR(vp, vap, ap->a_cred);
845 if (error)
846 return (error);
847 union_newsize(ap->a_vp, vap->va_size, VNOVAL);
848 }
849
850 if (vp == NULLVP) {
851 vp = un->un_lowervp;
852 } else if (vp->v_type == VDIR) {
853 vp = un->un_lowervp;
854 if (vp != NULLVP)
855 vap = &va;
856 } else {
857 vp = NULLVP;
858 }
859
860 if (vp != NULLVP) {
861 error = VOP_GETATTR(vp, vap, ap->a_cred);
862 if (error)
863 return (error);
864 union_newsize(ap->a_vp, VNOVAL, vap->va_size);
865 }
866
867 if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
868 /*
869 * Link count manipulation:
870 * - If both return "2", return 2 (no subdirs)
871 * - If one or the other return "1", return "1" (ENOCLUE)
872 */
873 if ((ap->a_vap->va_nlink == 2) &&
874 (vap->va_nlink == 2))
875 ;
876 else if (ap->a_vap->va_nlink != 1) {
877 if (vap->va_nlink == 1)
878 ap->a_vap->va_nlink = 1;
879 else
880 ap->a_vap->va_nlink += vap->va_nlink;
881 }
882 }
883 ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
884 return (0);
885 }
886
887 int
888 union_setattr(void *v)
889 {
890 struct vop_setattr_args /* {
891 struct vnode *a_vp;
892 struct vattr *a_vap;
893 kauth_cred_t a_cred;
894 } */ *ap = v;
895 struct vattr *vap = ap->a_vap;
896 struct vnode *vp = ap->a_vp;
897 struct union_node *un = VTOUNION(vp);
898 bool size_only; /* All but va_size are VNOVAL. */
899 int error;
900
901 size_only = (vap->va_flags == VNOVAL && vap->va_uid == (uid_t)VNOVAL &&
902 vap->va_gid == (gid_t)VNOVAL && vap->va_atime.tv_sec == VNOVAL &&
903 vap->va_mtime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL);
904
905 if (!size_only && (vp->v_mount->mnt_flag & MNT_RDONLY))
906 return (EROFS);
907 if (vap->va_size != VNOVAL) {
908 switch (vp->v_type) {
909 case VDIR:
910 return (EISDIR);
911 case VCHR:
912 case VBLK:
913 case VSOCK:
914 case VFIFO:
915 break;
916 case VREG:
917 case VLNK:
918 default:
919 /*
920 * Disallow write attempts if the filesystem is
921 * mounted read-only.
922 */
923 if (vp->v_mount->mnt_flag & MNT_RDONLY)
924 return (EROFS);
925 }
926 }
927
928 /*
929 * Handle case of truncating lower object to zero size,
930 * by creating a zero length upper object. This is to
931 * handle the case of open with O_TRUNC and O_CREAT.
932 */
933 if ((un->un_uppervp == NULLVP) &&
934 /* assert(un->un_lowervp != NULLVP) */
935 (un->un_lowervp->v_type == VREG)) {
936 error = union_copyup(un, (vap->va_size != 0),
937 ap->a_cred, curlwp);
938 if (error)
939 return (error);
940 }
941
942 /*
943 * Try to set attributes in upper layer, ignore size change to zero
944 * for devices to handle O_TRUNC and return read-only filesystem error
945 * otherwise.
946 */
947 if (un->un_uppervp != NULLVP) {
948 FIXUP(un);
949 error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
950 if ((error == 0) && (vap->va_size != VNOVAL))
951 union_newsize(ap->a_vp, vap->va_size, VNOVAL);
952 } else {
953 KASSERT(un->un_lowervp != NULLVP);
954 if (NODE_IS_SPECIAL(un->un_lowervp)) {
955 if (size_only &&
956 (vap->va_size == 0 || vap->va_size == VNOVAL))
957 error = 0;
958 else
959 error = EROFS;
960 } else {
961 error = EROFS;
962 }
963 }
964
965 return (error);
966 }
967
968 int
969 union_read(void *v)
970 {
971 struct vop_read_args /* {
972 struct vnode *a_vp;
973 struct uio *a_uio;
974 int a_ioflag;
975 kauth_cred_t a_cred;
976 } */ *ap = v;
977 int error;
978 struct vnode *vp = OTHERVP(ap->a_vp);
979 int dolock = (vp == LOWERVP(ap->a_vp));
980
981 if (dolock)
982 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
983 else
984 FIXUP(VTOUNION(ap->a_vp));
985 error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
986 if (dolock)
987 VOP_UNLOCK(vp);
988
989 /*
990 * XXX
991 * perhaps the size of the underlying object has changed under
992 * our feet. take advantage of the offset information present
993 * in the uio structure.
994 */
995 if (error == 0) {
996 struct union_node *un = VTOUNION(ap->a_vp);
997 off_t cur = ap->a_uio->uio_offset;
998
999 if (vp == un->un_uppervp) {
1000 if (cur > un->un_uppersz)
1001 union_newsize(ap->a_vp, cur, VNOVAL);
1002 } else {
1003 if (cur > un->un_lowersz)
1004 union_newsize(ap->a_vp, VNOVAL, cur);
1005 }
1006 }
1007
1008 return (error);
1009 }
1010
1011 int
1012 union_write(void *v)
1013 {
1014 struct vop_read_args /* {
1015 struct vnode *a_vp;
1016 struct uio *a_uio;
1017 int a_ioflag;
1018 kauth_cred_t a_cred;
1019 } */ *ap = v;
1020 int error;
1021 struct vnode *vp;
1022 struct union_node *un = VTOUNION(ap->a_vp);
1023
1024 vp = UPPERVP(ap->a_vp);
1025 if (vp == NULLVP) {
1026 vp = LOWERVP(ap->a_vp);
1027 if (NODE_IS_SPECIAL(vp)) {
1028 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1029 error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag,
1030 ap->a_cred);
1031 VOP_UNLOCK(vp);
1032 return error;
1033 }
1034 panic("union: missing upper layer in write");
1035 }
1036
1037 FIXUP(un);
1038 error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1039
1040 /*
1041 * the size of the underlying object may be changed by the
1042 * write.
1043 */
1044 if (error == 0) {
1045 off_t cur = ap->a_uio->uio_offset;
1046
1047 if (cur > un->un_uppersz)
1048 union_newsize(ap->a_vp, cur, VNOVAL);
1049 }
1050
1051 return (error);
1052 }
1053
1054 int
1055 union_ioctl(void *v)
1056 {
1057 struct vop_ioctl_args /* {
1058 struct vnode *a_vp;
1059 int a_command;
1060 void *a_data;
1061 int a_fflag;
1062 kauth_cred_t a_cred;
1063 } */ *ap = v;
1064 struct vnode *ovp = OTHERVP(ap->a_vp);
1065
1066 ap->a_vp = ovp;
1067 return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1068 }
1069
1070 int
1071 union_poll(void *v)
1072 {
1073 struct vop_poll_args /* {
1074 struct vnode *a_vp;
1075 int a_events;
1076 } */ *ap = v;
1077 struct vnode *ovp = OTHERVP(ap->a_vp);
1078
1079 ap->a_vp = ovp;
1080 return (VCALL(ovp, VOFFSET(vop_poll), ap));
1081 }
1082
1083 int
1084 union_revoke(void *v)
1085 {
1086 struct vop_revoke_args /* {
1087 struct vnode *a_vp;
1088 int a_flags;
1089 struct proc *a_p;
1090 } */ *ap = v;
1091 struct vnode *vp = ap->a_vp;
1092
1093 if (UPPERVP(vp))
1094 VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1095 if (LOWERVP(vp))
1096 VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1097 vgone(vp); /* XXXAD?? */
1098 return (0);
1099 }
1100
1101 int
1102 union_mmap(void *v)
1103 {
1104 struct vop_mmap_args /* {
1105 struct vnode *a_vp;
1106 vm_prot_t a_prot;
1107 kauth_cred_t a_cred;
1108 } */ *ap = v;
1109 struct vnode *ovp = OTHERVP(ap->a_vp);
1110
1111 ap->a_vp = ovp;
1112 return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1113 }
1114
1115 int
1116 union_fsync(void *v)
1117 {
1118 struct vop_fsync_args /* {
1119 struct vnode *a_vp;
1120 kauth_cred_t a_cred;
1121 int a_flags;
1122 off_t offhi;
1123 off_t offlo;
1124 } */ *ap = v;
1125 int error = 0;
1126 struct vnode *targetvp;
1127
1128 /*
1129 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1130 * bother syncing the underlying vnodes, since (a) they'll be
1131 * fsync'ed when reclaimed and (b) we could deadlock if
1132 * they're locked; otherwise, pass it through to the
1133 * underlying layer.
1134 */
1135 if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
1136 error = spec_fsync(v);
1137 if (error)
1138 return error;
1139 }
1140
1141 if (ap->a_flags & FSYNC_RECLAIM)
1142 return 0;
1143
1144 targetvp = OTHERVP(ap->a_vp);
1145 if (targetvp != NULLVP) {
1146 int dolock = (targetvp == LOWERVP(ap->a_vp));
1147
1148 if (dolock)
1149 vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1150 else
1151 FIXUP(VTOUNION(ap->a_vp));
1152 error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1153 ap->a_offlo, ap->a_offhi);
1154 if (dolock)
1155 VOP_UNLOCK(targetvp);
1156 }
1157
1158 return (error);
1159 }
1160
1161 int
1162 union_seek(void *v)
1163 {
1164 struct vop_seek_args /* {
1165 struct vnode *a_vp;
1166 off_t a_oldoff;
1167 off_t a_newoff;
1168 kauth_cred_t a_cred;
1169 } */ *ap = v;
1170 struct vnode *ovp = OTHERVP(ap->a_vp);
1171
1172 ap->a_vp = ovp;
1173 return (VCALL(ovp, VOFFSET(vop_seek), ap));
1174 }
1175
1176 int
1177 union_remove(void *v)
1178 {
1179 struct vop_remove_args /* {
1180 struct vnode *a_dvp;
1181 struct vnode *a_vp;
1182 struct componentname *a_cnp;
1183 } */ *ap = v;
1184 int error;
1185 struct union_node *dun = VTOUNION(ap->a_dvp);
1186 struct union_node *un = VTOUNION(ap->a_vp);
1187 struct componentname *cnp = ap->a_cnp;
1188
1189 if (dun->un_uppervp == NULLVP)
1190 panic("union remove: null upper vnode");
1191
1192 if (un->un_uppervp != NULLVP) {
1193 struct vnode *dvp = dun->un_uppervp;
1194 struct vnode *vp = un->un_uppervp;
1195
1196 FIXUP(dun);
1197 vref(dvp);
1198 dun->un_flags |= UN_KLOCK;
1199 vput(ap->a_dvp);
1200 FIXUP(un);
1201 vref(vp);
1202 un->un_flags |= UN_KLOCK;
1203 vput(ap->a_vp);
1204
1205 if (union_dowhiteout(un, cnp->cn_cred))
1206 cnp->cn_flags |= DOWHITEOUT;
1207 error = VOP_REMOVE(dvp, vp, cnp);
1208 if (!error)
1209 union_removed_upper(un);
1210 } else {
1211 FIXUP(dun);
1212 error = union_mkwhiteout(
1213 MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1214 dun->un_uppervp, ap->a_cnp, un->un_path);
1215 vput(ap->a_dvp);
1216 vput(ap->a_vp);
1217 }
1218
1219 return (error);
1220 }
1221
1222 int
1223 union_link(void *v)
1224 {
1225 struct vop_link_args /* {
1226 struct vnode *a_dvp;
1227 struct vnode *a_vp;
1228 struct componentname *a_cnp;
1229 } */ *ap = v;
1230 int error = 0;
1231 struct componentname *cnp = ap->a_cnp;
1232 struct union_node *dun;
1233 struct vnode *vp;
1234 struct vnode *dvp;
1235
1236 dun = VTOUNION(ap->a_dvp);
1237
1238 KASSERT((ap->a_cnp->cn_flags & LOCKPARENT) != 0);
1239
1240 if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1241 vp = ap->a_vp;
1242 } else {
1243 struct union_node *un = VTOUNION(ap->a_vp);
1244 if (un->un_uppervp == NULLVP) {
1245 /*
1246 * Needs to be copied before we can link it.
1247 */
1248 vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1249 if (dun->un_uppervp == un->un_dirvp) {
1250 dun->un_flags &= ~UN_ULOCK;
1251 VOP_UNLOCK(dun->un_uppervp);
1252 }
1253 error = union_copyup(un, 1, cnp->cn_cred, curlwp);
1254 if (dun->un_uppervp == un->un_dirvp) {
1255 /*
1256 * During copyup, we dropped the lock on the
1257 * dir and invalidated any saved namei lookup
1258 * state for the directory we'll be entering
1259 * the link in. We need to re-run the lookup
1260 * in that directory to reset any state needed
1261 * for VOP_LINK.
1262 * Call relookup on the union-layer to reset
1263 * the state.
1264 */
1265 vp = NULLVP;
1266 if (dun->un_uppervp == NULLVP)
1267 panic("union: null upperdvp?");
1268 error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
1269 if (error) {
1270 VOP_UNLOCK(ap->a_vp);
1271 return EROFS; /* ? */
1272 }
1273 if (vp != NULLVP) {
1274 /*
1275 * The name we want to create has
1276 * mysteriously appeared (a race?)
1277 */
1278 error = EEXIST;
1279 VOP_UNLOCK(ap->a_vp);
1280 vput(ap->a_dvp);
1281 vput(vp);
1282 return (error);
1283 }
1284 }
1285 VOP_UNLOCK(ap->a_vp);
1286 }
1287 vp = un->un_uppervp;
1288 }
1289
1290 dvp = dun->un_uppervp;
1291 if (dvp == NULLVP)
1292 error = EROFS;
1293
1294 if (error) {
1295 vput(ap->a_dvp);
1296 return (error);
1297 }
1298
1299 FIXUP(dun);
1300 vref(dvp);
1301 dun->un_flags |= UN_KLOCK;
1302 vput(ap->a_dvp);
1303
1304 return (VOP_LINK(dvp, vp, cnp));
1305 }
1306
1307 int
1308 union_rename(void *v)
1309 {
1310 struct vop_rename_args /* {
1311 struct vnode *a_fdvp;
1312 struct vnode *a_fvp;
1313 struct componentname *a_fcnp;
1314 struct vnode *a_tdvp;
1315 struct vnode *a_tvp;
1316 struct componentname *a_tcnp;
1317 } */ *ap = v;
1318 int error;
1319
1320 struct vnode *fdvp = ap->a_fdvp;
1321 struct vnode *fvp = ap->a_fvp;
1322 struct vnode *tdvp = ap->a_tdvp;
1323 struct vnode *tvp = ap->a_tvp;
1324
1325 if (fdvp->v_op == union_vnodeop_p) { /* always true */
1326 struct union_node *un = VTOUNION(fdvp);
1327 if (un->un_uppervp == NULLVP) {
1328 /*
1329 * this should never happen in normal
1330 * operation but might if there was
1331 * a problem creating the top-level shadow
1332 * directory.
1333 */
1334 error = EXDEV;
1335 goto bad;
1336 }
1337
1338 fdvp = un->un_uppervp;
1339 vref(fdvp);
1340 }
1341
1342 if (fvp->v_op == union_vnodeop_p) { /* always true */
1343 struct union_node *un = VTOUNION(fvp);
1344 if (un->un_uppervp == NULLVP) {
1345 /* XXX: should do a copyup */
1346 error = EXDEV;
1347 goto bad;
1348 }
1349
1350 if (un->un_lowervp != NULLVP)
1351 ap->a_fcnp->cn_flags |= DOWHITEOUT;
1352
1353 fvp = un->un_uppervp;
1354 vref(fvp);
1355 }
1356
1357 if (tdvp->v_op == union_vnodeop_p) {
1358 struct union_node *un = VTOUNION(tdvp);
1359 if (un->un_uppervp == NULLVP) {
1360 /*
1361 * this should never happen in normal
1362 * operation but might if there was
1363 * a problem creating the top-level shadow
1364 * directory.
1365 */
1366 error = EXDEV;
1367 goto bad;
1368 }
1369
1370 tdvp = un->un_uppervp;
1371 vref(tdvp);
1372 un->un_flags |= UN_KLOCK;
1373 vput(ap->a_tdvp);
1374 }
1375
1376 if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1377 struct union_node *un = VTOUNION(tvp);
1378
1379 tvp = un->un_uppervp;
1380 if (tvp != NULLVP) {
1381 vref(tvp);
1382 un->un_flags |= UN_KLOCK;
1383 }
1384 vput(ap->a_tvp);
1385 }
1386
1387 error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
1388 goto out;
1389
1390 bad:
1391 vput(tdvp);
1392 if (tvp != NULLVP)
1393 vput(tvp);
1394 vrele(fdvp);
1395 vrele(fvp);
1396
1397 out:
1398 if (fdvp != ap->a_fdvp) {
1399 vrele(ap->a_fdvp);
1400 }
1401 if (fvp != ap->a_fvp) {
1402 vrele(ap->a_fvp);
1403 }
1404 return (error);
1405 }
1406
1407 int
1408 union_mkdir(void *v)
1409 {
1410 struct vop_mkdir_args /* {
1411 struct vnode *a_dvp;
1412 struct vnode **a_vpp;
1413 struct componentname *a_cnp;
1414 struct vattr *a_vap;
1415 } */ *ap = v;
1416 struct union_node *un = VTOUNION(ap->a_dvp);
1417 struct vnode *dvp = un->un_uppervp;
1418 struct componentname *cnp = ap->a_cnp;
1419
1420 if (dvp != NULLVP) {
1421 int error;
1422 struct vnode *vp;
1423
1424 FIXUP(un);
1425 vref(dvp);
1426 un->un_flags |= UN_KLOCK;
1427 VOP_UNLOCK(ap->a_dvp);
1428 error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1429 if (error) {
1430 vrele(ap->a_dvp);
1431 return (error);
1432 }
1433
1434 error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1435 NULLVP, cnp, vp, NULLVP, 1);
1436 if (error)
1437 vput(vp);
1438 vrele(ap->a_dvp);
1439 return (error);
1440 }
1441
1442 vput(ap->a_dvp);
1443 return (EROFS);
1444 }
1445
1446 int
1447 union_rmdir(void *v)
1448 {
1449 struct vop_rmdir_args /* {
1450 struct vnode *a_dvp;
1451 struct vnode *a_vp;
1452 struct componentname *a_cnp;
1453 } */ *ap = v;
1454 int error;
1455 struct union_node *dun = VTOUNION(ap->a_dvp);
1456 struct union_node *un = VTOUNION(ap->a_vp);
1457 struct componentname *cnp = ap->a_cnp;
1458
1459 if (dun->un_uppervp == NULLVP)
1460 panic("union rmdir: null upper vnode");
1461
1462 error = union_check_rmdir(un, cnp->cn_cred);
1463 if (error) {
1464 vput(ap->a_dvp);
1465 vput(ap->a_vp);
1466 return error;
1467 }
1468
1469 if (un->un_uppervp != NULLVP) {
1470 struct vnode *dvp = dun->un_uppervp;
1471 struct vnode *vp = un->un_uppervp;
1472
1473 FIXUP(dun);
1474 vref(dvp);
1475 dun->un_flags |= UN_KLOCK;
1476 vput(ap->a_dvp);
1477 FIXUP(un);
1478 vref(vp);
1479 un->un_flags |= UN_KLOCK;
1480 vput(ap->a_vp);
1481
1482 if (union_dowhiteout(un, cnp->cn_cred))
1483 cnp->cn_flags |= DOWHITEOUT;
1484 error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1485 if (!error)
1486 union_removed_upper(un);
1487 } else {
1488 FIXUP(dun);
1489 error = union_mkwhiteout(
1490 MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1491 dun->un_uppervp, ap->a_cnp, un->un_path);
1492 vput(ap->a_dvp);
1493 vput(ap->a_vp);
1494 }
1495
1496 return (error);
1497 }
1498
1499 int
1500 union_symlink(void *v)
1501 {
1502 struct vop_symlink_args /* {
1503 struct vnode *a_dvp;
1504 struct vnode **a_vpp;
1505 struct componentname *a_cnp;
1506 struct vattr *a_vap;
1507 char *a_target;
1508 } */ *ap = v;
1509 struct union_node *un = VTOUNION(ap->a_dvp);
1510 struct vnode *dvp = un->un_uppervp;
1511 struct componentname *cnp = ap->a_cnp;
1512
1513 if (dvp != NULLVP) {
1514 int error;
1515
1516 FIXUP(un);
1517 vref(dvp);
1518 un->un_flags |= UN_KLOCK;
1519 vput(ap->a_dvp);
1520 error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1521 ap->a_target);
1522 return (error);
1523 }
1524
1525 vput(ap->a_dvp);
1526 return (EROFS);
1527 }
1528
1529 /*
1530 * union_readdir works in concert with getdirentries and
1531 * readdir(3) to provide a list of entries in the unioned
1532 * directories. getdirentries is responsible for walking
1533 * down the union stack. readdir(3) is responsible for
1534 * eliminating duplicate names from the returned data stream.
1535 */
1536 int
1537 union_readdir(void *v)
1538 {
1539 struct vop_readdir_args /* {
1540 struct vnodeop_desc *a_desc;
1541 struct vnode *a_vp;
1542 struct uio *a_uio;
1543 kauth_cred_t a_cred;
1544 int *a_eofflag;
1545 u_long *a_cookies;
1546 int a_ncookies;
1547 } */ *ap = v;
1548 struct union_node *un = VTOUNION(ap->a_vp);
1549 struct vnode *uvp = un->un_uppervp;
1550
1551 if (uvp == NULLVP)
1552 return (0);
1553
1554 FIXUP(un);
1555 ap->a_vp = uvp;
1556 return (VCALL(uvp, VOFFSET(vop_readdir), ap));
1557 }
1558
1559 int
1560 union_readlink(void *v)
1561 {
1562 struct vop_readlink_args /* {
1563 struct vnode *a_vp;
1564 struct uio *a_uio;
1565 kauth_cred_t a_cred;
1566 } */ *ap = v;
1567 int error;
1568 struct vnode *vp = OTHERVP(ap->a_vp);
1569 int dolock = (vp == LOWERVP(ap->a_vp));
1570
1571 if (dolock)
1572 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1573 else
1574 FIXUP(VTOUNION(ap->a_vp));
1575 ap->a_vp = vp;
1576 error = VCALL(vp, VOFFSET(vop_readlink), ap);
1577 if (dolock)
1578 VOP_UNLOCK(vp);
1579
1580 return (error);
1581 }
1582
1583 int
1584 union_abortop(void *v)
1585 {
1586 struct vop_abortop_args /* {
1587 struct vnode *a_dvp;
1588 struct componentname *a_cnp;
1589 } */ *ap = v;
1590 int error;
1591 struct vnode *vp = OTHERVP(ap->a_dvp);
1592 struct union_node *un = VTOUNION(ap->a_dvp);
1593 int islocked = un->un_flags & UN_LOCKED;
1594 int dolock = (vp == LOWERVP(ap->a_dvp));
1595
1596 if (islocked) {
1597 if (dolock)
1598 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1599 else
1600 FIXUP(VTOUNION(ap->a_dvp));
1601 }
1602 ap->a_dvp = vp;
1603 error = VCALL(vp, VOFFSET(vop_abortop), ap);
1604 if (islocked && dolock)
1605 VOP_UNLOCK(vp);
1606
1607 return (error);
1608 }
1609
1610 int
1611 union_inactive(void *v)
1612 {
1613 struct vop_inactive_args /* {
1614 const struct vnodeop_desc *a_desc;
1615 struct vnode *a_vp;
1616 bool *a_recycle;
1617 } */ *ap = v;
1618 struct vnode *vp = ap->a_vp;
1619 struct union_node *un = VTOUNION(vp);
1620 struct vnode **vpp;
1621
1622 /*
1623 * Do nothing (and _don't_ bypass).
1624 * Wait to vrele lowervp until reclaim,
1625 * so that until then our union_node is in the
1626 * cache and reusable.
1627 *
1628 * NEEDSWORK: Someday, consider inactive'ing
1629 * the lowervp and then trying to reactivate it
1630 * with capabilities (v_id)
1631 * like they do in the name lookup cache code.
1632 * That's too much work for now.
1633 */
1634
1635 if (un->un_dircache != 0) {
1636 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1637 vrele(*vpp);
1638 free(un->un_dircache, M_TEMP);
1639 un->un_dircache = 0;
1640 }
1641
1642 *ap->a_recycle = ((un->un_flags & UN_CACHED) == 0);
1643 VOP_UNLOCK(vp);
1644
1645 return (0);
1646 }
1647
1648 int
1649 union_reclaim(void *v)
1650 {
1651 struct vop_reclaim_args /* {
1652 struct vnode *a_vp;
1653 } */ *ap = v;
1654
1655 union_freevp(ap->a_vp);
1656
1657 return (0);
1658 }
1659
1660 int
1661 union_lock(void *v)
1662 {
1663 struct vop_lock_args /* {
1664 struct vnode *a_vp;
1665 int a_flags;
1666 } */ *ap = v;
1667 struct vnode *vp = ap->a_vp;
1668 int flags = ap->a_flags;
1669 struct union_node *un;
1670 int error;
1671
1672 /* XXX unionfs can't handle shared locks yet */
1673 if ((flags & LK_SHARED) != 0) {
1674 flags = (flags & ~LK_SHARED) | LK_EXCLUSIVE;
1675 }
1676
1677 start:
1678 un = VTOUNION(vp);
1679
1680 if (un->un_uppervp != NULLVP) {
1681 if (((un->un_flags & UN_ULOCK) == 0) &&
1682 (vp->v_usecount != 0)) {
1683 /*
1684 * We MUST always use the order of: take upper
1685 * vp lock, manipulate union node flags, drop
1686 * upper vp lock. This code must not be an
1687 * exception.
1688 */
1689 error = vn_lock(un->un_uppervp, flags);
1690 if (error)
1691 return (error);
1692 un->un_flags |= UN_ULOCK;
1693 }
1694 #ifdef DIAGNOSTIC
1695 if (un->un_flags & UN_KLOCK) {
1696 vprint("union: dangling klock", vp);
1697 panic("union: dangling upper lock (%p)", vp);
1698 }
1699 #endif
1700 }
1701
1702 /* XXX ignores LK_NOWAIT */
1703 if (un->un_flags & UN_LOCKED) {
1704 KASSERT(curlwp == NULL || un->un_lwp == NULL ||
1705 un->un_lwp != curlwp);
1706 un->un_flags |= UN_WANTED;
1707 tsleep(&un->un_flags, PINOD, "unionlk2", 0);
1708 goto start;
1709 }
1710
1711 un->un_lwp = curlwp;
1712
1713 un->un_flags |= UN_LOCKED;
1714 return (0);
1715 }
1716
1717 /*
1718 * When operations want to vput() a union node yet retain a lock on
1719 * the upper vnode (say, to do some further operations like link(),
1720 * mkdir(), ...), they set UN_KLOCK on the union node, then call
1721 * vput() which calls VOP_UNLOCK() and comes here. union_unlock()
1722 * unlocks the union node (leaving the upper vnode alone), clears the
1723 * KLOCK flag, and then returns to vput(). The caller then does whatever
1724 * is left to do with the upper vnode, and ensures that it gets unlocked.
1725 *
1726 * If UN_KLOCK isn't set, then the upper vnode is unlocked here.
1727 */
1728 int
1729 union_unlock(void *v)
1730 {
1731 struct vop_unlock_args /* {
1732 struct vnode *a_vp;
1733 int a_flags;
1734 } */ *ap = v;
1735 struct union_node *un = VTOUNION(ap->a_vp);
1736
1737 KASSERT((un->un_flags & UN_LOCKED) != 0);
1738 KASSERT(curlwp == NULL || un->un_lwp == NULL ||
1739 un->un_lwp == curlwp);
1740
1741 un->un_flags &= ~UN_LOCKED;
1742
1743 if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK)
1744 VOP_UNLOCK(un->un_uppervp);
1745
1746 un->un_flags &= ~(UN_ULOCK|UN_KLOCK);
1747
1748 if (un->un_flags & UN_WANTED) {
1749 un->un_flags &= ~UN_WANTED;
1750 wakeup( &un->un_flags);
1751 }
1752
1753 un->un_lwp = NULL;
1754
1755 return (0);
1756 }
1757
1758 int
1759 union_bmap(void *v)
1760 {
1761 struct vop_bmap_args /* {
1762 struct vnode *a_vp;
1763 daddr_t a_bn;
1764 struct vnode **a_vpp;
1765 daddr_t *a_bnp;
1766 int *a_runp;
1767 } */ *ap = v;
1768 int error;
1769 struct vnode *vp = OTHERVP(ap->a_vp);
1770 int dolock = (vp == LOWERVP(ap->a_vp));
1771
1772 if (dolock)
1773 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1774 else
1775 FIXUP(VTOUNION(ap->a_vp));
1776 ap->a_vp = vp;
1777 error = VCALL(vp, VOFFSET(vop_bmap), ap);
1778 if (dolock)
1779 VOP_UNLOCK(vp);
1780
1781 return (error);
1782 }
1783
1784 int
1785 union_print(void *v)
1786 {
1787 struct vop_print_args /* {
1788 struct vnode *a_vp;
1789 } */ *ap = v;
1790 struct vnode *vp = ap->a_vp;
1791
1792 printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1793 vp, UPPERVP(vp), LOWERVP(vp));
1794 if (UPPERVP(vp) != NULLVP)
1795 vprint("union: upper", UPPERVP(vp));
1796 if (LOWERVP(vp) != NULLVP)
1797 vprint("union: lower", LOWERVP(vp));
1798 if (VTOUNION(vp)->un_dircache) {
1799 struct vnode **vpp;
1800 for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1801 vprint("dircache:", *vpp);
1802 }
1803
1804 return (0);
1805 }
1806
1807 int
1808 union_islocked(void *v)
1809 {
1810 struct vop_islocked_args /* {
1811 struct vnode *a_vp;
1812 } */ *ap = v;
1813
1814 return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? LK_EXCLUSIVE : 0);
1815 }
1816
1817 int
1818 union_pathconf(void *v)
1819 {
1820 struct vop_pathconf_args /* {
1821 struct vnode *a_vp;
1822 int a_name;
1823 int *a_retval;
1824 } */ *ap = v;
1825 int error;
1826 struct vnode *vp = OTHERVP(ap->a_vp);
1827 int dolock = (vp == LOWERVP(ap->a_vp));
1828
1829 if (dolock)
1830 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1831 else
1832 FIXUP(VTOUNION(ap->a_vp));
1833 ap->a_vp = vp;
1834 error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1835 if (dolock)
1836 VOP_UNLOCK(vp);
1837
1838 return (error);
1839 }
1840
1841 int
1842 union_advlock(void *v)
1843 {
1844 struct vop_advlock_args /* {
1845 struct vnode *a_vp;
1846 void *a_id;
1847 int a_op;
1848 struct flock *a_fl;
1849 int a_flags;
1850 } */ *ap = v;
1851 struct vnode *ovp = OTHERVP(ap->a_vp);
1852
1853 ap->a_vp = ovp;
1854 return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1855 }
1856
1857 int
1858 union_strategy(void *v)
1859 {
1860 struct vop_strategy_args /* {
1861 struct vnode *a_vp;
1862 struct buf *a_bp;
1863 } */ *ap = v;
1864 struct vnode *ovp = OTHERVP(ap->a_vp);
1865 struct buf *bp = ap->a_bp;
1866
1867 KASSERT(ovp != NULLVP);
1868 if (!NODE_IS_SPECIAL(ovp))
1869 KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1870
1871 return (VOP_STRATEGY(ovp, bp));
1872 }
1873
1874 int
1875 union_bwrite(void *v)
1876 {
1877 struct vop_bwrite_args /* {
1878 struct vnode *a_vp;
1879 struct buf *a_bp;
1880 } */ *ap = v;
1881 struct vnode *ovp = OTHERVP(ap->a_vp);
1882 struct buf *bp = ap->a_bp;
1883
1884 KASSERT(ovp != NULLVP);
1885 if (!NODE_IS_SPECIAL(ovp))
1886 KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1887
1888 return (VOP_BWRITE(ovp, bp));
1889 }
1890
1891 int
1892 union_getpages(void *v)
1893 {
1894 struct vop_getpages_args /* {
1895 struct vnode *a_vp;
1896 voff_t a_offset;
1897 struct vm_page **a_m;
1898 int *a_count;
1899 int a_centeridx;
1900 vm_prot_t a_access_type;
1901 int a_advice;
1902 int a_flags;
1903 } */ *ap = v;
1904 struct vnode *vp = ap->a_vp;
1905
1906 KASSERT(mutex_owned(vp->v_interlock));
1907
1908 if (ap->a_flags & PGO_LOCKED) {
1909 return EBUSY;
1910 }
1911 ap->a_vp = OTHERVP(vp);
1912 KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1913
1914 /* Just pass the request on to the underlying layer. */
1915 return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
1916 }
1917
1918 int
1919 union_putpages(void *v)
1920 {
1921 struct vop_putpages_args /* {
1922 struct vnode *a_vp;
1923 voff_t a_offlo;
1924 voff_t a_offhi;
1925 int a_flags;
1926 } */ *ap = v;
1927 struct vnode *vp = ap->a_vp;
1928
1929 KASSERT(mutex_owned(vp->v_interlock));
1930
1931 ap->a_vp = OTHERVP(vp);
1932 KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1933
1934 if (ap->a_flags & PGO_RECLAIM) {
1935 mutex_exit(vp->v_interlock);
1936 return 0;
1937 }
1938
1939 /* Just pass the request on to the underlying layer. */
1940 return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
1941 }
1942
1943 int
1944 union_kqfilter(void *v)
1945 {
1946 struct vop_kqfilter_args /* {
1947 struct vnode *a_vp;
1948 struct knote *a_kn;
1949 } */ *ap = v;
1950 int error;
1951
1952 /*
1953 * We watch either the upper layer file (if it already exists),
1954 * or the lower layer one. If there is lower layer file only
1955 * at this moment, we will keep watching that lower layer file
1956 * even if upper layer file would be created later on.
1957 */
1958 if (UPPERVP(ap->a_vp))
1959 error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
1960 else if (LOWERVP(ap->a_vp))
1961 error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
1962 else {
1963 /* panic? */
1964 error = EOPNOTSUPP;
1965 }
1966
1967 return (error);
1968 }
1969