union_vnops.c revision 1.76 1 /* $NetBSD: union_vnops.c,v 1.76 2021/06/29 22:38:46 dholland Exp $ */
2
3 /*
4 * Copyright (c) 1992, 1993, 1994, 1995
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)union_vnops.c 8.33 (Berkeley) 7/31/95
35 */
36
37 /*
38 * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39 *
40 * This code is derived from software contributed to Berkeley by
41 * Jan-Simon Pendry.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed by the University of
54 * California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 * may be used to endorse or promote products derived from this software
57 * without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 * @(#)union_vnops.c 8.33 (Berkeley) 7/31/95
72 */
73
74 #include <sys/cdefs.h>
75 __KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.76 2021/06/29 22:38:46 dholland Exp $");
76
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/proc.h>
80 #include <sys/file.h>
81 #include <sys/time.h>
82 #include <sys/stat.h>
83 #include <sys/vnode.h>
84 #include <sys/mount.h>
85 #include <sys/namei.h>
86 #include <sys/malloc.h>
87 #include <sys/buf.h>
88 #include <sys/queue.h>
89 #include <sys/lock.h>
90 #include <sys/kauth.h>
91
92 #include <fs/union/union.h>
93 #include <miscfs/genfs/genfs.h>
94 #include <miscfs/specfs/specdev.h>
95
96 int union_parsepath(void *);
97 int union_lookup(void *);
98 int union_create(void *);
99 int union_whiteout(void *);
100 int union_mknod(void *);
101 int union_open(void *);
102 int union_close(void *);
103 int union_access(void *);
104 int union_getattr(void *);
105 int union_setattr(void *);
106 int union_read(void *);
107 int union_write(void *);
108 int union_ioctl(void *);
109 int union_poll(void *);
110 int union_revoke(void *);
111 int union_mmap(void *);
112 int union_fsync(void *);
113 int union_seek(void *);
114 int union_remove(void *);
115 int union_link(void *);
116 int union_rename(void *);
117 int union_mkdir(void *);
118 int union_rmdir(void *);
119 int union_symlink(void *);
120 int union_readdir(void *);
121 int union_readlink(void *);
122 int union_abortop(void *);
123 int union_inactive(void *);
124 int union_reclaim(void *);
125 int union_lock(void *);
126 int union_unlock(void *);
127 int union_bmap(void *);
128 int union_print(void *);
129 int union_islocked(void *);
130 int union_pathconf(void *);
131 int union_advlock(void *);
132 int union_strategy(void *);
133 int union_bwrite(void *);
134 int union_getpages(void *);
135 int union_putpages(void *);
136 int union_kqfilter(void *);
137
138 static int union_lookup1(struct vnode *, struct vnode **,
139 struct vnode **, struct componentname *);
140
141
142 /*
143 * Global vfs data structures
144 */
145 int (**union_vnodeop_p)(void *);
146 const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
147 { &vop_default_desc, vn_default_error },
148 { &vop_parsepath_desc, union_parsepath }, /* parsepath */
149 { &vop_lookup_desc, union_lookup }, /* lookup */
150 { &vop_create_desc, union_create }, /* create */
151 { &vop_whiteout_desc, union_whiteout }, /* whiteout */
152 { &vop_mknod_desc, union_mknod }, /* mknod */
153 { &vop_open_desc, union_open }, /* open */
154 { &vop_close_desc, union_close }, /* close */
155 { &vop_access_desc, union_access }, /* access */
156 { &vop_accessx_desc, genfs_accessx }, /* accessx */
157 { &vop_getattr_desc, union_getattr }, /* getattr */
158 { &vop_setattr_desc, union_setattr }, /* setattr */
159 { &vop_read_desc, union_read }, /* read */
160 { &vop_write_desc, union_write }, /* write */
161 { &vop_fallocate_desc, genfs_eopnotsupp }, /* fallocate */
162 { &vop_fdiscard_desc, genfs_eopnotsupp }, /* fdiscard */
163 { &vop_ioctl_desc, union_ioctl }, /* ioctl */
164 { &vop_poll_desc, union_poll }, /* select */
165 { &vop_revoke_desc, union_revoke }, /* revoke */
166 { &vop_mmap_desc, union_mmap }, /* mmap */
167 { &vop_fsync_desc, union_fsync }, /* fsync */
168 { &vop_seek_desc, union_seek }, /* seek */
169 { &vop_remove_desc, union_remove }, /* remove */
170 { &vop_link_desc, union_link }, /* link */
171 { &vop_rename_desc, union_rename }, /* rename */
172 { &vop_mkdir_desc, union_mkdir }, /* mkdir */
173 { &vop_rmdir_desc, union_rmdir }, /* rmdir */
174 { &vop_symlink_desc, union_symlink }, /* symlink */
175 { &vop_readdir_desc, union_readdir }, /* readdir */
176 { &vop_readlink_desc, union_readlink }, /* readlink */
177 { &vop_abortop_desc, union_abortop }, /* abortop */
178 { &vop_inactive_desc, union_inactive }, /* inactive */
179 { &vop_reclaim_desc, union_reclaim }, /* reclaim */
180 { &vop_lock_desc, union_lock }, /* lock */
181 { &vop_unlock_desc, union_unlock }, /* unlock */
182 { &vop_bmap_desc, union_bmap }, /* bmap */
183 { &vop_strategy_desc, union_strategy }, /* strategy */
184 { &vop_bwrite_desc, union_bwrite }, /* bwrite */
185 { &vop_print_desc, union_print }, /* print */
186 { &vop_islocked_desc, union_islocked }, /* islocked */
187 { &vop_pathconf_desc, union_pathconf }, /* pathconf */
188 { &vop_advlock_desc, union_advlock }, /* advlock */
189 { &vop_getpages_desc, union_getpages }, /* getpages */
190 { &vop_putpages_desc, union_putpages }, /* putpages */
191 { &vop_kqfilter_desc, union_kqfilter }, /* kqfilter */
192 { NULL, NULL }
193 };
194 const struct vnodeopv_desc union_vnodeop_opv_desc =
195 { &union_vnodeop_p, union_vnodeop_entries };
196
197 #define NODE_IS_SPECIAL(vp) \
198 ((vp)->v_type == VBLK || (vp)->v_type == VCHR || \
199 (vp)->v_type == VSOCK || (vp)->v_type == VFIFO)
200
201 int
202 union_parsepath(void *v)
203 {
204 struct vop_parsepath_args /* {
205 struct vnode *a_dvp;
206 const char *a_name;
207 size_t *a_retval;
208 } */ *ap = v;
209 struct vnode *upperdvp, *lowerdvp;
210 size_t upper, lower;
211 int error;
212
213 upperdvp = UPPERVP(ap->a_dvp);
214 lowerdvp = LOWERVP(ap->a_dvp);
215
216 error = VOP_PARSEPATH(upperdvp, ap->a_name, &upper);
217 if (error) {
218 return error;
219 }
220
221 error = VOP_PARSEPATH(lowerdvp, ap->a_name, &lower);
222 if (error) {
223 return error;
224 }
225
226 /*
227 * If they're different, use the larger one. This is not a
228 * comprehensive solution, but it's sufficient for the
229 * non-default cases of parsepath that currently exist.
230 */
231 *ap->a_retval = MAX(upper, lower);
232 return 0;
233 }
234
235 static int
236 union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
237 struct componentname *cnp)
238 {
239 int error;
240 struct vnode *tdvp;
241 struct vnode *dvp;
242 struct mount *mp;
243
244 dvp = *dvpp;
245
246 /*
247 * If stepping up the directory tree, check for going
248 * back across the mount point, in which case do what
249 * lookup would do by stepping back down the mount
250 * hierarchy.
251 */
252 if (cnp->cn_flags & ISDOTDOT) {
253 while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
254 /*
255 * Don't do the NOCROSSMOUNT check
256 * at this level. By definition,
257 * union fs deals with namespaces, not
258 * filesystems.
259 */
260 tdvp = dvp;
261 *dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
262 VOP_UNLOCK(tdvp);
263 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
264 }
265 }
266
267 error = VOP_LOOKUP(dvp, &tdvp, cnp);
268 if (error)
269 return (error);
270 if (dvp != tdvp) {
271 if (cnp->cn_flags & ISDOTDOT)
272 VOP_UNLOCK(dvp);
273 error = vn_lock(tdvp, LK_EXCLUSIVE);
274 if (cnp->cn_flags & ISDOTDOT)
275 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
276 if (error) {
277 vrele(tdvp);
278 return error;
279 }
280 dvp = tdvp;
281 }
282
283 /*
284 * Lastly check if the current node is a mount point in
285 * which case walk up the mount hierarchy making sure not to
286 * bump into the root of the mount tree (ie. dvp != udvp).
287 */
288 while (dvp != udvp && (dvp->v_type == VDIR) &&
289 (mp = dvp->v_mountedhere)) {
290 if (vfs_busy(mp))
291 continue;
292 vput(dvp);
293 error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdvp);
294 vfs_unbusy(mp);
295 if (error) {
296 return (error);
297 }
298 dvp = tdvp;
299 }
300
301 *vpp = dvp;
302 return (0);
303 }
304
305 int
306 union_lookup(void *v)
307 {
308 struct vop_lookup_v2_args /* {
309 struct vnodeop_desc *a_desc;
310 struct vnode *a_dvp;
311 struct vnode **a_vpp;
312 struct componentname *a_cnp;
313 } */ *ap = v;
314 int error;
315 int uerror, lerror;
316 struct vnode *uppervp, *lowervp;
317 struct vnode *upperdvp, *lowerdvp;
318 struct vnode *dvp = ap->a_dvp;
319 struct union_node *dun = VTOUNION(dvp);
320 struct componentname *cnp = ap->a_cnp;
321 struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
322 kauth_cred_t saved_cred = NULL;
323 int iswhiteout;
324 struct vattr va;
325
326 #ifdef notyet
327 if (cnp->cn_namelen == 3 &&
328 cnp->cn_nameptr[2] == '.' &&
329 cnp->cn_nameptr[1] == '.' &&
330 cnp->cn_nameptr[0] == '.') {
331 dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
332 if (dvp == NULLVP)
333 return (ENOENT);
334 vref(dvp);
335 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
336 return (0);
337 }
338 #endif
339
340 if ((cnp->cn_flags & ISLASTCN) &&
341 (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
342 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
343 return (EROFS);
344
345 start:
346 upperdvp = dun->un_uppervp;
347 lowerdvp = dun->un_lowervp;
348 uppervp = NULLVP;
349 lowervp = NULLVP;
350 iswhiteout = 0;
351
352 /*
353 * do the lookup in the upper level.
354 * if that level comsumes additional pathnames,
355 * then assume that something special is going
356 * on and just return that vnode.
357 */
358 if (upperdvp != NULLVP) {
359 uerror = union_lookup1(um->um_uppervp, &upperdvp,
360 &uppervp, cnp);
361 if (cnp->cn_consume != 0) {
362 if (uppervp != upperdvp)
363 VOP_UNLOCK(uppervp);
364 *ap->a_vpp = uppervp;
365 return (uerror);
366 }
367 if (uerror == ENOENT || uerror == EJUSTRETURN) {
368 if (cnp->cn_flags & ISWHITEOUT) {
369 iswhiteout = 1;
370 } else if (lowerdvp != NULLVP) {
371 lerror = VOP_GETATTR(upperdvp, &va,
372 cnp->cn_cred);
373 if (lerror == 0 && (va.va_flags & OPAQUE))
374 iswhiteout = 1;
375 }
376 }
377 } else {
378 uerror = ENOENT;
379 }
380
381 /*
382 * in a similar way to the upper layer, do the lookup
383 * in the lower layer. this time, if there is some
384 * component magic going on, then vput whatever we got
385 * back from the upper layer and return the lower vnode
386 * instead.
387 */
388 if (lowerdvp != NULLVP && !iswhiteout) {
389 int nameiop;
390
391 vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
392
393 /*
394 * Only do a LOOKUP on the bottom node, since
395 * we won't be making changes to it anyway.
396 */
397 nameiop = cnp->cn_nameiop;
398 cnp->cn_nameiop = LOOKUP;
399 if (um->um_op == UNMNT_BELOW) {
400 saved_cred = cnp->cn_cred;
401 cnp->cn_cred = um->um_cred;
402 }
403
404 /*
405 * we shouldn't have to worry about locking interactions
406 * between the lower layer and our union layer (w.r.t.
407 * `..' processing) because we don't futz with lowervp
408 * locks in the union-node instantiation code path.
409 */
410 lerror = union_lookup1(um->um_lowervp, &lowerdvp,
411 &lowervp, cnp);
412 if (um->um_op == UNMNT_BELOW)
413 cnp->cn_cred = saved_cred;
414 cnp->cn_nameiop = nameiop;
415
416 if (lowervp != lowerdvp)
417 VOP_UNLOCK(lowerdvp);
418
419 if (cnp->cn_consume != 0) {
420 if (uppervp != NULLVP) {
421 if (uppervp == upperdvp)
422 vrele(uppervp);
423 else
424 vput(uppervp);
425 uppervp = NULLVP;
426 }
427 *ap->a_vpp = lowervp;
428 return (lerror);
429 }
430 } else {
431 lerror = ENOENT;
432 if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
433 lowervp = LOWERVP(dun->un_pvp);
434 if (lowervp != NULLVP) {
435 vref(lowervp);
436 vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
437 lerror = 0;
438 }
439 }
440 }
441
442 /*
443 * EJUSTRETURN is used by underlying filesystems to indicate that
444 * a directory modification op was started successfully.
445 * This will only happen in the upper layer, since
446 * the lower layer only does LOOKUPs.
447 * If this union is mounted read-only, bounce it now.
448 */
449
450 if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
451 (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
452 ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
453 uerror = EROFS;
454
455 /*
456 * at this point, we have uerror and lerror indicating
457 * possible errors with the lookups in the upper and lower
458 * layers. additionally, uppervp and lowervp are (locked)
459 * references to existing vnodes in the upper and lower layers.
460 *
461 * there are now three cases to consider.
462 * 1. if both layers returned an error, then return whatever
463 * error the upper layer generated.
464 *
465 * 2. if the top layer failed and the bottom layer succeeded
466 * then two subcases occur.
467 * a. the bottom vnode is not a directory, in which
468 * case just return a new union vnode referencing
469 * an empty top layer and the existing bottom layer.
470 * b. the bottom vnode is a directory, in which case
471 * create a new directory in the top-level and
472 * continue as in case 3.
473 *
474 * 3. if the top layer succeeded then return a new union
475 * vnode referencing whatever the new top layer and
476 * whatever the bottom layer returned.
477 */
478
479 *ap->a_vpp = NULLVP;
480
481
482 /* case 1. */
483 if ((uerror != 0) && (lerror != 0)) {
484 return (uerror);
485 }
486
487 /* case 2. */
488 if (uerror != 0 /* && (lerror == 0) */ ) {
489 if (lowervp->v_type == VDIR) { /* case 2b. */
490 /*
491 * We may be racing another process to make the
492 * upper-level shadow directory. Be careful with
493 * locks/etc!
494 * If we have to create a shadow directory and want
495 * to commit the node we have to restart the lookup
496 * to get the componentname right.
497 */
498 if (upperdvp) {
499 VOP_UNLOCK(upperdvp);
500 uerror = union_mkshadow(um, upperdvp, cnp,
501 &uppervp);
502 vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
503 if (uerror == 0 && cnp->cn_nameiop != LOOKUP) {
504 vrele(uppervp);
505 if (lowervp != NULLVP)
506 vput(lowervp);
507 goto start;
508 }
509 }
510 if (uerror) {
511 if (lowervp != NULLVP) {
512 vput(lowervp);
513 lowervp = NULLVP;
514 }
515 return (uerror);
516 }
517 }
518 } else { /* uerror == 0 */
519 if (uppervp != upperdvp)
520 VOP_UNLOCK(uppervp);
521 }
522
523 if (lowervp != NULLVP)
524 VOP_UNLOCK(lowervp);
525
526 error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
527 uppervp, lowervp, 1);
528
529 if (error) {
530 if (uppervp != NULLVP)
531 vrele(uppervp);
532 if (lowervp != NULLVP)
533 vrele(lowervp);
534 return error;
535 }
536
537 return 0;
538 }
539
540 int
541 union_create(void *v)
542 {
543 struct vop_create_v3_args /* {
544 struct vnode *a_dvp;
545 struct vnode **a_vpp;
546 struct componentname *a_cnp;
547 struct vattr *a_vap;
548 } */ *ap = v;
549 struct union_node *un = VTOUNION(ap->a_dvp);
550 struct vnode *dvp = un->un_uppervp;
551 struct componentname *cnp = ap->a_cnp;
552
553 if (dvp != NULLVP) {
554 int error;
555 struct vnode *vp;
556 struct mount *mp;
557
558 mp = ap->a_dvp->v_mount;
559
560 vp = NULL;
561 error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
562 if (error)
563 return (error);
564
565 error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
566 NULLVP, 1);
567 if (error)
568 vrele(vp);
569 return (error);
570 }
571
572 return (EROFS);
573 }
574
575 int
576 union_whiteout(void *v)
577 {
578 struct vop_whiteout_args /* {
579 struct vnode *a_dvp;
580 struct componentname *a_cnp;
581 int a_flags;
582 } */ *ap = v;
583 struct union_node *un = VTOUNION(ap->a_dvp);
584 struct componentname *cnp = ap->a_cnp;
585
586 if (un->un_uppervp == NULLVP)
587 return (EOPNOTSUPP);
588
589 return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
590 }
591
592 int
593 union_mknod(void *v)
594 {
595 struct vop_mknod_v3_args /* {
596 struct vnode *a_dvp;
597 struct vnode **a_vpp;
598 struct componentname *a_cnp;
599 struct vattr *a_vap;
600 } */ *ap = v;
601 struct union_node *un = VTOUNION(ap->a_dvp);
602 struct vnode *dvp = un->un_uppervp;
603 struct componentname *cnp = ap->a_cnp;
604
605 if (dvp != NULLVP) {
606 int error;
607 struct vnode *vp;
608 struct mount *mp;
609
610 mp = ap->a_dvp->v_mount;
611 error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
612 if (error)
613 return (error);
614
615 error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
616 cnp, vp, NULLVP, 1);
617 if (error)
618 vrele(vp);
619 return (error);
620 }
621
622 return (EROFS);
623 }
624
625 int
626 union_open(void *v)
627 {
628 struct vop_open_args /* {
629 struct vnodeop_desc *a_desc;
630 struct vnode *a_vp;
631 int a_mode;
632 kauth_cred_t a_cred;
633 } */ *ap = v;
634 struct union_node *un = VTOUNION(ap->a_vp);
635 struct vnode *tvp;
636 int mode = ap->a_mode;
637 kauth_cred_t cred = ap->a_cred;
638 struct lwp *l = curlwp;
639 int error;
640
641 /*
642 * If there is an existing upper vp then simply open that.
643 */
644 tvp = un->un_uppervp;
645 if (tvp == NULLVP) {
646 /*
647 * If the lower vnode is being opened for writing, then
648 * copy the file contents to the upper vnode and open that,
649 * otherwise can simply open the lower vnode.
650 */
651 tvp = un->un_lowervp;
652 if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
653 error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
654 if (error == 0)
655 error = VOP_OPEN(un->un_uppervp, mode, cred);
656 if (error == 0) {
657 mutex_enter(un->un_uppervp->v_interlock);
658 un->un_uppervp->v_writecount++;
659 mutex_exit(un->un_uppervp->v_interlock);
660 }
661 return (error);
662 }
663
664 /*
665 * Just open the lower vnode, but check for nodev mount flag
666 */
667 if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
668 (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
669 return ENXIO;
670 un->un_openl++;
671 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
672 error = VOP_OPEN(tvp, mode, cred);
673 VOP_UNLOCK(tvp);
674
675 return (error);
676 }
677 /*
678 * Just open the upper vnode, checking for nodev mount flag first
679 */
680 if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
681 (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
682 return ENXIO;
683
684 error = VOP_OPEN(tvp, mode, cred);
685 if (error == 0 && (ap->a_mode & FWRITE)) {
686 mutex_enter(tvp->v_interlock);
687 tvp->v_writecount++;
688 mutex_exit(tvp->v_interlock);
689 }
690
691 return (error);
692 }
693
694 int
695 union_close(void *v)
696 {
697 struct vop_close_args /* {
698 struct vnode *a_vp;
699 int a_fflag;
700 kauth_cred_t a_cred;
701 } */ *ap = v;
702 struct union_node *un = VTOUNION(ap->a_vp);
703 struct vnode *vp;
704 int error;
705 bool do_lock;
706
707 vp = un->un_uppervp;
708 if (vp != NULLVP) {
709 do_lock = false;
710 } else {
711 KASSERT(un->un_openl > 0);
712 --un->un_openl;
713 vp = un->un_lowervp;
714 do_lock = true;
715 }
716
717 KASSERT(vp != NULLVP);
718 ap->a_vp = vp;
719 if ((ap->a_fflag & FWRITE)) {
720 KASSERT(vp == un->un_uppervp);
721 mutex_enter(vp->v_interlock);
722 vp->v_writecount--;
723 mutex_exit(vp->v_interlock);
724 }
725 if (do_lock)
726 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
727 error = VCALL(vp, VOFFSET(vop_close), ap);
728 if (do_lock)
729 VOP_UNLOCK(vp);
730
731 return error;
732 }
733
734 /*
735 * Check access permission on the union vnode.
736 * The access check being enforced is to check
737 * against both the underlying vnode, and any
738 * copied vnode. This ensures that no additional
739 * file permissions are given away simply because
740 * the user caused an implicit file copy.
741 */
742 int
743 union_access(void *v)
744 {
745 struct vop_access_args /* {
746 struct vnodeop_desc *a_desc;
747 struct vnode *a_vp;
748 accmode_t a_accmode;
749 kauth_cred_t a_cred;
750 } */ *ap = v;
751 struct vnode *vp = ap->a_vp;
752 struct union_node *un = VTOUNION(vp);
753 int error = EACCES;
754 struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
755
756 /*
757 * Disallow write attempts on read-only file systems;
758 * unless the file is a socket, fifo, or a block or
759 * character device resident on the file system.
760 */
761 if (ap->a_accmode & VWRITE) {
762 switch (vp->v_type) {
763 case VDIR:
764 case VLNK:
765 case VREG:
766 if (vp->v_mount->mnt_flag & MNT_RDONLY)
767 return (EROFS);
768 break;
769 case VBAD:
770 case VBLK:
771 case VCHR:
772 case VSOCK:
773 case VFIFO:
774 case VNON:
775 default:
776 break;
777 }
778 }
779
780
781 if ((vp = un->un_uppervp) != NULLVP) {
782 ap->a_vp = vp;
783 return (VCALL(vp, VOFFSET(vop_access), ap));
784 }
785
786 if ((vp = un->un_lowervp) != NULLVP) {
787 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
788 ap->a_vp = vp;
789 error = VCALL(vp, VOFFSET(vop_access), ap);
790 if (error == 0) {
791 if (um->um_op == UNMNT_BELOW) {
792 ap->a_cred = um->um_cred;
793 error = VCALL(vp, VOFFSET(vop_access), ap);
794 }
795 }
796 VOP_UNLOCK(vp);
797 if (error)
798 return (error);
799 }
800
801 return (error);
802 }
803
804 /*
805 * We handle getattr only to change the fsid and
806 * track object sizes
807 */
808 int
809 union_getattr(void *v)
810 {
811 struct vop_getattr_args /* {
812 struct vnode *a_vp;
813 struct vattr *a_vap;
814 kauth_cred_t a_cred;
815 } */ *ap = v;
816 int error;
817 struct union_node *un = VTOUNION(ap->a_vp);
818 struct vnode *vp = un->un_uppervp;
819 struct vattr *vap;
820 struct vattr va;
821
822
823 /*
824 * Some programs walk the filesystem hierarchy by counting
825 * links to directories to avoid stat'ing all the time.
826 * This means the link count on directories needs to be "correct".
827 * The only way to do that is to call getattr on both layers
828 * and fix up the link count. The link count will not necessarily
829 * be accurate but will be large enough to defeat the tree walkers.
830 *
831 * To make life more interesting, some filesystems don't keep
832 * track of link counts in the expected way, and return a
833 * link count of `1' for those directories; if either of the
834 * component directories returns a link count of `1', we return a 1.
835 */
836
837 vap = ap->a_vap;
838
839 vp = un->un_uppervp;
840 if (vp != NULLVP) {
841 error = VOP_GETATTR(vp, vap, ap->a_cred);
842 if (error)
843 return (error);
844 mutex_enter(&un->un_lock);
845 union_newsize(ap->a_vp, vap->va_size, VNOVAL);
846 }
847
848 if (vp == NULLVP) {
849 vp = un->un_lowervp;
850 } else if (vp->v_type == VDIR) {
851 vp = un->un_lowervp;
852 if (vp != NULLVP)
853 vap = &va;
854 } else {
855 vp = NULLVP;
856 }
857
858 if (vp != NULLVP) {
859 if (vp == un->un_lowervp)
860 vn_lock(vp, LK_SHARED | LK_RETRY);
861 error = VOP_GETATTR(vp, vap, ap->a_cred);
862 if (vp == un->un_lowervp)
863 VOP_UNLOCK(vp);
864 if (error)
865 return (error);
866 mutex_enter(&un->un_lock);
867 union_newsize(ap->a_vp, VNOVAL, vap->va_size);
868 }
869
870 if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
871 /*
872 * Link count manipulation:
873 * - If both return "2", return 2 (no subdirs)
874 * - If one or the other return "1", return "1" (ENOCLUE)
875 */
876 if ((ap->a_vap->va_nlink == 2) &&
877 (vap->va_nlink == 2))
878 ;
879 else if (ap->a_vap->va_nlink != 1) {
880 if (vap->va_nlink == 1)
881 ap->a_vap->va_nlink = 1;
882 else
883 ap->a_vap->va_nlink += vap->va_nlink;
884 }
885 }
886 ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
887 return (0);
888 }
889
890 int
891 union_setattr(void *v)
892 {
893 struct vop_setattr_args /* {
894 struct vnode *a_vp;
895 struct vattr *a_vap;
896 kauth_cred_t a_cred;
897 } */ *ap = v;
898 struct vattr *vap = ap->a_vap;
899 struct vnode *vp = ap->a_vp;
900 struct union_node *un = VTOUNION(vp);
901 bool size_only; /* All but va_size are VNOVAL. */
902 int error;
903
904 size_only = (vap->va_flags == VNOVAL && vap->va_uid == (uid_t)VNOVAL &&
905 vap->va_gid == (gid_t)VNOVAL && vap->va_atime.tv_sec == VNOVAL &&
906 vap->va_mtime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL);
907
908 if (!size_only && (vp->v_mount->mnt_flag & MNT_RDONLY))
909 return (EROFS);
910 if (vap->va_size != VNOVAL) {
911 switch (vp->v_type) {
912 case VDIR:
913 return (EISDIR);
914 case VCHR:
915 case VBLK:
916 case VSOCK:
917 case VFIFO:
918 break;
919 case VREG:
920 case VLNK:
921 default:
922 /*
923 * Disallow write attempts if the filesystem is
924 * mounted read-only.
925 */
926 if (vp->v_mount->mnt_flag & MNT_RDONLY)
927 return (EROFS);
928 }
929 }
930
931 /*
932 * Handle case of truncating lower object to zero size,
933 * by creating a zero length upper object. This is to
934 * handle the case of open with O_TRUNC and O_CREAT.
935 */
936 if ((un->un_uppervp == NULLVP) &&
937 /* assert(un->un_lowervp != NULLVP) */
938 (un->un_lowervp->v_type == VREG)) {
939 error = union_copyup(un, (vap->va_size != 0),
940 ap->a_cred, curlwp);
941 if (error)
942 return (error);
943 }
944
945 /*
946 * Try to set attributes in upper layer, ignore size change to zero
947 * for devices to handle O_TRUNC and return read-only filesystem error
948 * otherwise.
949 */
950 if (un->un_uppervp != NULLVP) {
951 error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
952 if ((error == 0) && (vap->va_size != VNOVAL)) {
953 mutex_enter(&un->un_lock);
954 union_newsize(ap->a_vp, vap->va_size, VNOVAL);
955 }
956 } else {
957 KASSERT(un->un_lowervp != NULLVP);
958 if (NODE_IS_SPECIAL(un->un_lowervp)) {
959 if (size_only &&
960 (vap->va_size == 0 || vap->va_size == VNOVAL))
961 error = 0;
962 else
963 error = EROFS;
964 } else {
965 error = EROFS;
966 }
967 }
968
969 return (error);
970 }
971
972 int
973 union_read(void *v)
974 {
975 struct vop_read_args /* {
976 struct vnode *a_vp;
977 struct uio *a_uio;
978 int a_ioflag;
979 kauth_cred_t a_cred;
980 } */ *ap = v;
981 int error;
982 struct vnode *vp = OTHERVP(ap->a_vp);
983 int dolock = (vp == LOWERVP(ap->a_vp));
984
985 if (dolock)
986 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
987 error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
988 if (dolock)
989 VOP_UNLOCK(vp);
990
991 /*
992 * XXX
993 * perhaps the size of the underlying object has changed under
994 * our feet. take advantage of the offset information present
995 * in the uio structure.
996 */
997 if (error == 0) {
998 struct union_node *un = VTOUNION(ap->a_vp);
999 off_t cur = ap->a_uio->uio_offset;
1000 off_t usz = VNOVAL, lsz = VNOVAL;
1001
1002 mutex_enter(&un->un_lock);
1003 if (vp == un->un_uppervp) {
1004 if (cur > un->un_uppersz)
1005 usz = cur;
1006 } else {
1007 if (cur > un->un_lowersz)
1008 lsz = cur;
1009 }
1010
1011 if (usz != VNOVAL || lsz != VNOVAL)
1012 union_newsize(ap->a_vp, usz, lsz);
1013 else
1014 mutex_exit(&un->un_lock);
1015 }
1016
1017 return (error);
1018 }
1019
1020 int
1021 union_write(void *v)
1022 {
1023 struct vop_read_args /* {
1024 struct vnode *a_vp;
1025 struct uio *a_uio;
1026 int a_ioflag;
1027 kauth_cred_t a_cred;
1028 } */ *ap = v;
1029 int error;
1030 struct vnode *vp;
1031 struct union_node *un = VTOUNION(ap->a_vp);
1032
1033 vp = UPPERVP(ap->a_vp);
1034 if (vp == NULLVP) {
1035 vp = LOWERVP(ap->a_vp);
1036 if (NODE_IS_SPECIAL(vp)) {
1037 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1038 error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag,
1039 ap->a_cred);
1040 VOP_UNLOCK(vp);
1041 return error;
1042 }
1043 panic("union: missing upper layer in write");
1044 }
1045
1046 error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1047
1048 /*
1049 * the size of the underlying object may be changed by the
1050 * write.
1051 */
1052 if (error == 0) {
1053 off_t cur = ap->a_uio->uio_offset;
1054
1055 mutex_enter(&un->un_lock);
1056 if (cur > un->un_uppersz)
1057 union_newsize(ap->a_vp, cur, VNOVAL);
1058 else
1059 mutex_exit(&un->un_lock);
1060 }
1061
1062 return (error);
1063 }
1064
1065 int
1066 union_ioctl(void *v)
1067 {
1068 struct vop_ioctl_args /* {
1069 struct vnode *a_vp;
1070 int a_command;
1071 void *a_data;
1072 int a_fflag;
1073 kauth_cred_t a_cred;
1074 } */ *ap = v;
1075 struct vnode *ovp = OTHERVP(ap->a_vp);
1076
1077 ap->a_vp = ovp;
1078 return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1079 }
1080
1081 int
1082 union_poll(void *v)
1083 {
1084 struct vop_poll_args /* {
1085 struct vnode *a_vp;
1086 int a_events;
1087 } */ *ap = v;
1088 struct vnode *ovp = OTHERVP(ap->a_vp);
1089
1090 ap->a_vp = ovp;
1091 return (VCALL(ovp, VOFFSET(vop_poll), ap));
1092 }
1093
1094 int
1095 union_revoke(void *v)
1096 {
1097 struct vop_revoke_args /* {
1098 struct vnode *a_vp;
1099 int a_flags;
1100 struct proc *a_p;
1101 } */ *ap = v;
1102 struct vnode *vp = ap->a_vp;
1103
1104 if (UPPERVP(vp))
1105 VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1106 if (LOWERVP(vp))
1107 VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1108 vgone(vp); /* XXXAD?? */
1109 return (0);
1110 }
1111
1112 int
1113 union_mmap(void *v)
1114 {
1115 struct vop_mmap_args /* {
1116 struct vnode *a_vp;
1117 vm_prot_t a_prot;
1118 kauth_cred_t a_cred;
1119 } */ *ap = v;
1120 struct vnode *ovp = OTHERVP(ap->a_vp);
1121
1122 ap->a_vp = ovp;
1123 return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1124 }
1125
1126 int
1127 union_fsync(void *v)
1128 {
1129 struct vop_fsync_args /* {
1130 struct vnode *a_vp;
1131 kauth_cred_t a_cred;
1132 int a_flags;
1133 off_t offhi;
1134 off_t offlo;
1135 } */ *ap = v;
1136 int error = 0;
1137 struct vnode *targetvp;
1138
1139 /*
1140 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1141 * bother syncing the underlying vnodes, since (a) they'll be
1142 * fsync'ed when reclaimed and (b) we could deadlock if
1143 * they're locked; otherwise, pass it through to the
1144 * underlying layer.
1145 */
1146 if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
1147 error = spec_fsync(v);
1148 if (error)
1149 return error;
1150 }
1151
1152 if (ap->a_flags & FSYNC_RECLAIM)
1153 return 0;
1154
1155 targetvp = OTHERVP(ap->a_vp);
1156 if (targetvp != NULLVP) {
1157 int dolock = (targetvp == LOWERVP(ap->a_vp));
1158
1159 if (dolock)
1160 vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1161 error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1162 ap->a_offlo, ap->a_offhi);
1163 if (dolock)
1164 VOP_UNLOCK(targetvp);
1165 }
1166
1167 return (error);
1168 }
1169
1170 int
1171 union_seek(void *v)
1172 {
1173 struct vop_seek_args /* {
1174 struct vnode *a_vp;
1175 off_t a_oldoff;
1176 off_t a_newoff;
1177 kauth_cred_t a_cred;
1178 } */ *ap = v;
1179 struct vnode *ovp = OTHERVP(ap->a_vp);
1180
1181 ap->a_vp = ovp;
1182 return (VCALL(ovp, VOFFSET(vop_seek), ap));
1183 }
1184
1185 int
1186 union_remove(void *v)
1187 {
1188 struct vop_remove_v2_args /* {
1189 struct vnode *a_dvp;
1190 struct vnode *a_vp;
1191 struct componentname *a_cnp;
1192 } */ *ap = v;
1193 int error;
1194 struct union_node *dun = VTOUNION(ap->a_dvp);
1195 struct union_node *un = VTOUNION(ap->a_vp);
1196 struct componentname *cnp = ap->a_cnp;
1197
1198 if (dun->un_uppervp == NULLVP)
1199 panic("union remove: null upper vnode");
1200
1201 if (un->un_uppervp != NULLVP) {
1202 struct vnode *dvp = dun->un_uppervp;
1203 struct vnode *vp = un->un_uppervp;
1204
1205 /* Account for VOP_REMOVE to vrele vp. */
1206 vref(vp);
1207 if (union_dowhiteout(un, cnp->cn_cred))
1208 cnp->cn_flags |= DOWHITEOUT;
1209 error = VOP_REMOVE(dvp, vp, cnp);
1210 if (!error)
1211 union_removed_upper(un);
1212 vrele(ap->a_vp);
1213 } else {
1214 error = union_mkwhiteout(
1215 MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1216 dun->un_uppervp, ap->a_cnp, un);
1217 vput(ap->a_vp);
1218 }
1219
1220 return (error);
1221 }
1222
1223 int
1224 union_link(void *v)
1225 {
1226 struct vop_link_v2_args /* {
1227 struct vnode *a_dvp;
1228 struct vnode *a_vp;
1229 struct componentname *a_cnp;
1230 } */ *ap = v;
1231 int error = 0;
1232 struct componentname *cnp = ap->a_cnp;
1233 struct union_node *dun;
1234 struct vnode *vp;
1235 struct vnode *dvp;
1236
1237 dun = VTOUNION(ap->a_dvp);
1238
1239 KASSERT((ap->a_cnp->cn_flags & LOCKPARENT) != 0);
1240
1241 if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1242 vp = ap->a_vp;
1243 } else {
1244 struct union_node *un = VTOUNION(ap->a_vp);
1245 if (un->un_uppervp == NULLVP) {
1246 const bool droplock = (dun->un_uppervp == un->un_dirvp);
1247
1248 /*
1249 * Needs to be copied before we can link it.
1250 */
1251 vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1252 if (droplock)
1253 VOP_UNLOCK(dun->un_uppervp);
1254 error = union_copyup(un, 1, cnp->cn_cred, curlwp);
1255 if (droplock) {
1256 vn_lock(dun->un_uppervp,
1257 LK_EXCLUSIVE | LK_RETRY);
1258 /*
1259 * During copyup, we dropped the lock on the
1260 * dir and invalidated any saved namei lookup
1261 * state for the directory we'll be entering
1262 * the link in. We need to re-run the lookup
1263 * in that directory to reset any state needed
1264 * for VOP_LINK.
1265 * Call relookup on the union-layer to reset
1266 * the state.
1267 */
1268 vp = NULLVP;
1269 if (dun->un_uppervp == NULLVP)
1270 panic("union: null upperdvp?");
1271 error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
1272 if (error) {
1273 VOP_UNLOCK(ap->a_vp);
1274 return EROFS; /* ? */
1275 }
1276 if (vp != NULLVP) {
1277 /*
1278 * The name we want to create has
1279 * mysteriously appeared (a race?)
1280 */
1281 error = EEXIST;
1282 VOP_UNLOCK(ap->a_vp);
1283 vput(vp);
1284 return (error);
1285 }
1286 }
1287 VOP_UNLOCK(ap->a_vp);
1288 }
1289 vp = un->un_uppervp;
1290 }
1291
1292 dvp = dun->un_uppervp;
1293 if (dvp == NULLVP)
1294 error = EROFS;
1295
1296 if (error)
1297 return (error);
1298
1299 return VOP_LINK(dvp, vp, cnp);
1300 }
1301
1302 int
1303 union_rename(void *v)
1304 {
1305 struct vop_rename_args /* {
1306 struct vnode *a_fdvp;
1307 struct vnode *a_fvp;
1308 struct componentname *a_fcnp;
1309 struct vnode *a_tdvp;
1310 struct vnode *a_tvp;
1311 struct componentname *a_tcnp;
1312 } */ *ap = v;
1313 int error;
1314
1315 struct vnode *fdvp = ap->a_fdvp;
1316 struct vnode *fvp = ap->a_fvp;
1317 struct vnode *tdvp = ap->a_tdvp;
1318 struct vnode *tvp = ap->a_tvp;
1319
1320 /*
1321 * Account for VOP_RENAME to vrele all nodes.
1322 * Note: VOP_RENAME will unlock tdvp.
1323 */
1324
1325 if (fdvp->v_op == union_vnodeop_p) { /* always true */
1326 struct union_node *un = VTOUNION(fdvp);
1327 if (un->un_uppervp == NULLVP) {
1328 /*
1329 * this should never happen in normal
1330 * operation but might if there was
1331 * a problem creating the top-level shadow
1332 * directory.
1333 */
1334 error = EXDEV;
1335 goto bad;
1336 }
1337
1338 fdvp = un->un_uppervp;
1339 vref(fdvp);
1340 }
1341
1342 if (fvp->v_op == union_vnodeop_p) { /* always true */
1343 struct union_node *un = VTOUNION(fvp);
1344 if (un->un_uppervp == NULLVP) {
1345 /* XXX: should do a copyup */
1346 error = EXDEV;
1347 goto bad;
1348 }
1349
1350 if (un->un_lowervp != NULLVP)
1351 ap->a_fcnp->cn_flags |= DOWHITEOUT;
1352
1353 fvp = un->un_uppervp;
1354 vref(fvp);
1355 }
1356
1357 if (tdvp->v_op == union_vnodeop_p) {
1358 struct union_node *un = VTOUNION(tdvp);
1359 if (un->un_uppervp == NULLVP) {
1360 /*
1361 * this should never happen in normal
1362 * operation but might if there was
1363 * a problem creating the top-level shadow
1364 * directory.
1365 */
1366 error = EXDEV;
1367 goto bad;
1368 }
1369
1370 tdvp = un->un_uppervp;
1371 vref(tdvp);
1372 }
1373
1374 if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1375 struct union_node *un = VTOUNION(tvp);
1376
1377 tvp = un->un_uppervp;
1378 if (tvp != NULLVP) {
1379 vref(tvp);
1380 }
1381 }
1382
1383 error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
1384 goto out;
1385
1386 bad:
1387 vput(tdvp);
1388 if (tvp != NULLVP)
1389 vput(tvp);
1390 vrele(fdvp);
1391 vrele(fvp);
1392
1393 out:
1394 if (fdvp != ap->a_fdvp) {
1395 vrele(ap->a_fdvp);
1396 }
1397 if (fvp != ap->a_fvp) {
1398 vrele(ap->a_fvp);
1399 }
1400 if (tdvp != ap->a_tdvp) {
1401 vrele(ap->a_tdvp);
1402 }
1403 if (tvp != ap->a_tvp) {
1404 vrele(ap->a_tvp);
1405 }
1406 return (error);
1407 }
1408
1409 int
1410 union_mkdir(void *v)
1411 {
1412 struct vop_mkdir_v3_args /* {
1413 struct vnode *a_dvp;
1414 struct vnode **a_vpp;
1415 struct componentname *a_cnp;
1416 struct vattr *a_vap;
1417 } */ *ap = v;
1418 struct union_node *un = VTOUNION(ap->a_dvp);
1419 struct vnode *dvp = un->un_uppervp;
1420 struct componentname *cnp = ap->a_cnp;
1421
1422 if (dvp != NULLVP) {
1423 int error;
1424 struct vnode *vp;
1425
1426 vp = NULL;
1427 error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1428 if (error) {
1429 vrele(ap->a_dvp);
1430 return (error);
1431 }
1432
1433 error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1434 NULLVP, cnp, vp, NULLVP, 1);
1435 if (error)
1436 vrele(vp);
1437 return (error);
1438 }
1439
1440 return (EROFS);
1441 }
1442
1443 int
1444 union_rmdir(void *v)
1445 {
1446 struct vop_rmdir_v2_args /* {
1447 struct vnode *a_dvp;
1448 struct vnode *a_vp;
1449 struct componentname *a_cnp;
1450 } */ *ap = v;
1451 int error;
1452 struct union_node *dun = VTOUNION(ap->a_dvp);
1453 struct union_node *un = VTOUNION(ap->a_vp);
1454 struct componentname *cnp = ap->a_cnp;
1455
1456 if (dun->un_uppervp == NULLVP)
1457 panic("union rmdir: null upper vnode");
1458
1459 error = union_check_rmdir(un, cnp->cn_cred);
1460 if (error) {
1461 vput(ap->a_vp);
1462 return error;
1463 }
1464
1465 if (un->un_uppervp != NULLVP) {
1466 struct vnode *dvp = dun->un_uppervp;
1467 struct vnode *vp = un->un_uppervp;
1468
1469 /* Account for VOP_RMDIR to vrele vp. */
1470 vref(vp);
1471 if (union_dowhiteout(un, cnp->cn_cred))
1472 cnp->cn_flags |= DOWHITEOUT;
1473 error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1474 if (!error)
1475 union_removed_upper(un);
1476 vrele(ap->a_vp);
1477 } else {
1478 error = union_mkwhiteout(
1479 MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1480 dun->un_uppervp, ap->a_cnp, un);
1481 vput(ap->a_vp);
1482 }
1483
1484 return (error);
1485 }
1486
1487 int
1488 union_symlink(void *v)
1489 {
1490 struct vop_symlink_v3_args /* {
1491 struct vnode *a_dvp;
1492 struct vnode **a_vpp;
1493 struct componentname *a_cnp;
1494 struct vattr *a_vap;
1495 char *a_target;
1496 } */ *ap = v;
1497 struct union_node *un = VTOUNION(ap->a_dvp);
1498 struct vnode *dvp = un->un_uppervp;
1499 struct componentname *cnp = ap->a_cnp;
1500
1501 if (dvp != NULLVP) {
1502 int error;
1503
1504 error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1505 ap->a_target);
1506 return (error);
1507 }
1508
1509 return (EROFS);
1510 }
1511
1512 /*
1513 * union_readdir works in concert with getdirentries and
1514 * readdir(3) to provide a list of entries in the unioned
1515 * directories. getdirentries is responsible for walking
1516 * down the union stack. readdir(3) is responsible for
1517 * eliminating duplicate names from the returned data stream.
1518 */
1519 int
1520 union_readdir(void *v)
1521 {
1522 struct vop_readdir_args /* {
1523 struct vnodeop_desc *a_desc;
1524 struct vnode *a_vp;
1525 struct uio *a_uio;
1526 kauth_cred_t a_cred;
1527 int *a_eofflag;
1528 u_long *a_cookies;
1529 int a_ncookies;
1530 } */ *ap = v;
1531 struct union_node *un = VTOUNION(ap->a_vp);
1532 struct vnode *vp;
1533 int dolock, error;
1534
1535 if (un->un_hooknode) {
1536 KASSERT(un->un_uppervp == NULLVP);
1537 KASSERT(un->un_lowervp != NULLVP);
1538 vp = un->un_lowervp;
1539 dolock = 1;
1540 } else {
1541 vp = un->un_uppervp;
1542 dolock = 0;
1543 }
1544 if (vp == NULLVP)
1545 return 0;
1546
1547 if (dolock)
1548 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1549 ap->a_vp = vp;
1550 error = VCALL(vp, VOFFSET(vop_readdir), ap);
1551 if (dolock)
1552 VOP_UNLOCK(vp);
1553
1554 return error;
1555 }
1556
1557 int
1558 union_readlink(void *v)
1559 {
1560 struct vop_readlink_args /* {
1561 struct vnode *a_vp;
1562 struct uio *a_uio;
1563 kauth_cred_t a_cred;
1564 } */ *ap = v;
1565 int error;
1566 struct vnode *vp = OTHERVP(ap->a_vp);
1567 int dolock = (vp == LOWERVP(ap->a_vp));
1568
1569 if (dolock)
1570 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1571 ap->a_vp = vp;
1572 error = VCALL(vp, VOFFSET(vop_readlink), ap);
1573 if (dolock)
1574 VOP_UNLOCK(vp);
1575
1576 return (error);
1577 }
1578
1579 int
1580 union_abortop(void *v)
1581 {
1582 struct vop_abortop_args /* {
1583 struct vnode *a_dvp;
1584 struct componentname *a_cnp;
1585 } */ *ap = v;
1586
1587 KASSERT(UPPERVP(ap->a_dvp) != NULL);
1588
1589 ap->a_dvp = UPPERVP(ap->a_dvp);
1590 return VCALL(ap->a_dvp, VOFFSET(vop_abortop), ap);
1591 }
1592
1593 int
1594 union_inactive(void *v)
1595 {
1596 struct vop_inactive_v2_args /* {
1597 const struct vnodeop_desc *a_desc;
1598 struct vnode *a_vp;
1599 bool *a_recycle;
1600 } */ *ap = v;
1601 struct vnode *vp = ap->a_vp;
1602 struct union_node *un = VTOUNION(vp);
1603 struct vnode **vpp;
1604
1605 /*
1606 * Do nothing (and _don't_ bypass).
1607 * Wait to vrele lowervp until reclaim,
1608 * so that until then our union_node is in the
1609 * cache and reusable.
1610 *
1611 * NEEDSWORK: Someday, consider inactive'ing
1612 * the lowervp and then trying to reactivate it
1613 * with capabilities (v_id)
1614 * like they do in the name lookup cache code.
1615 * That's too much work for now.
1616 */
1617
1618 if (un->un_dircache != 0) {
1619 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1620 vrele(*vpp);
1621 free(un->un_dircache, M_TEMP);
1622 un->un_dircache = 0;
1623 }
1624
1625 *ap->a_recycle = ((un->un_cflags & UN_CACHED) == 0);
1626
1627 return (0);
1628 }
1629
1630 int
1631 union_reclaim(void *v)
1632 {
1633 struct vop_reclaim_v2_args /* {
1634 struct vnode *a_vp;
1635 } */ *ap = v;
1636 struct vnode *vp = ap->a_vp;
1637 struct vnode *uvp = UPPERVP(vp);
1638
1639 VOP_UNLOCK(vp);
1640
1641 if (uvp != NULL) {
1642 mutex_enter(uvp->v_interlock);
1643 KASSERT(vp->v_interlock == uvp->v_interlock);
1644 uvp->v_writecount -= vp->v_writecount;
1645 mutex_exit(uvp->v_interlock);
1646 }
1647
1648 union_freevp(vp);
1649
1650 return (0);
1651 }
1652
1653 static int
1654 union_lock1(struct vnode *vp, struct vnode *lockvp, int flags)
1655 {
1656 struct vop_lock_args ap;
1657
1658 ap.a_desc = VDESC(vop_lock);
1659 ap.a_vp = lockvp;
1660 ap.a_flags = flags;
1661
1662 if (lockvp == vp)
1663 return genfs_lock(&ap);
1664 else
1665 return VCALL(ap.a_vp, VOFFSET(vop_lock), &ap);
1666 }
1667
1668 static int
1669 union_unlock1(struct vnode *vp, struct vnode *lockvp)
1670 {
1671 struct vop_unlock_args ap;
1672
1673 ap.a_desc = VDESC(vop_unlock);
1674 ap.a_vp = lockvp;
1675
1676 if (lockvp == vp)
1677 return genfs_unlock(&ap);
1678 else
1679 return VCALL(ap.a_vp, VOFFSET(vop_unlock), &ap);
1680 }
1681
1682 int
1683 union_lock(void *v)
1684 {
1685 struct vop_lock_args /* {
1686 struct vnode *a_vp;
1687 int a_flags;
1688 } */ *ap = v;
1689 struct vnode *vp = ap->a_vp, *lockvp;
1690 struct union_node *un = VTOUNION(vp);
1691 int flags = ap->a_flags;
1692 int error;
1693
1694 if ((flags & LK_NOWAIT) != 0) {
1695 if (!mutex_tryenter(&un->un_lock))
1696 return EBUSY;
1697 lockvp = LOCKVP(vp);
1698 error = union_lock1(vp, lockvp, flags);
1699 mutex_exit(&un->un_lock);
1700 if (error)
1701 return error;
1702 if (mutex_tryenter(vp->v_interlock)) {
1703 error = vdead_check(vp, VDEAD_NOWAIT);
1704 mutex_exit(vp->v_interlock);
1705 } else
1706 error = EBUSY;
1707 if (error)
1708 union_unlock1(vp, lockvp);
1709 return error;
1710 }
1711
1712 mutex_enter(&un->un_lock);
1713 for (;;) {
1714 lockvp = LOCKVP(vp);
1715 mutex_exit(&un->un_lock);
1716 error = union_lock1(vp, lockvp, flags);
1717 if (error != 0)
1718 return error;
1719 mutex_enter(&un->un_lock);
1720 if (lockvp == LOCKVP(vp))
1721 break;
1722 union_unlock1(vp, lockvp);
1723 }
1724 mutex_exit(&un->un_lock);
1725
1726 mutex_enter(vp->v_interlock);
1727 error = vdead_check(vp, VDEAD_NOWAIT);
1728 if (error) {
1729 union_unlock1(vp, lockvp);
1730 error = vdead_check(vp, 0);
1731 KASSERT(error == ENOENT);
1732 }
1733 mutex_exit(vp->v_interlock);
1734 return error;
1735 }
1736
1737 int
1738 union_unlock(void *v)
1739 {
1740 struct vop_unlock_args /* {
1741 struct vnode *a_vp;
1742 int a_flags;
1743 } */ *ap = v;
1744 struct vnode *vp = ap->a_vp, *lockvp;
1745
1746 lockvp = LOCKVP(vp);
1747 union_unlock1(vp, lockvp);
1748
1749 return 0;
1750 }
1751
1752 int
1753 union_bmap(void *v)
1754 {
1755 struct vop_bmap_args /* {
1756 struct vnode *a_vp;
1757 daddr_t a_bn;
1758 struct vnode **a_vpp;
1759 daddr_t *a_bnp;
1760 int *a_runp;
1761 } */ *ap = v;
1762 int error;
1763 struct vnode *vp = OTHERVP(ap->a_vp);
1764 int dolock = (vp == LOWERVP(ap->a_vp));
1765
1766 if (dolock)
1767 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1768 ap->a_vp = vp;
1769 error = VCALL(vp, VOFFSET(vop_bmap), ap);
1770 if (dolock)
1771 VOP_UNLOCK(vp);
1772
1773 return (error);
1774 }
1775
1776 int
1777 union_print(void *v)
1778 {
1779 struct vop_print_args /* {
1780 struct vnode *a_vp;
1781 } */ *ap = v;
1782 struct vnode *vp = ap->a_vp;
1783
1784 printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1785 vp, UPPERVP(vp), LOWERVP(vp));
1786 if (UPPERVP(vp) != NULLVP)
1787 vprint("union: upper", UPPERVP(vp));
1788 if (LOWERVP(vp) != NULLVP)
1789 vprint("union: lower", LOWERVP(vp));
1790 if (VTOUNION(vp)->un_dircache) {
1791 struct vnode **vpp;
1792 for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1793 vprint("dircache:", *vpp);
1794 }
1795
1796 return (0);
1797 }
1798
1799 int
1800 union_islocked(void *v)
1801 {
1802 struct vop_islocked_args /* {
1803 struct vnode *a_vp;
1804 } */ *ap = v;
1805 struct vnode *vp;
1806 struct union_node *un;
1807
1808 un = VTOUNION(ap->a_vp);
1809 mutex_enter(&un->un_lock);
1810 vp = LOCKVP(ap->a_vp);
1811 mutex_exit(&un->un_lock);
1812
1813 if (vp == ap->a_vp)
1814 return genfs_islocked(ap);
1815 else
1816 return VOP_ISLOCKED(vp);
1817 }
1818
1819 int
1820 union_pathconf(void *v)
1821 {
1822 struct vop_pathconf_args /* {
1823 struct vnode *a_vp;
1824 int a_name;
1825 int *a_retval;
1826 } */ *ap = v;
1827 int error;
1828 struct vnode *vp = OTHERVP(ap->a_vp);
1829 int dolock = (vp == LOWERVP(ap->a_vp));
1830
1831 if (dolock)
1832 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1833 ap->a_vp = vp;
1834 error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1835 if (dolock)
1836 VOP_UNLOCK(vp);
1837
1838 return (error);
1839 }
1840
1841 int
1842 union_advlock(void *v)
1843 {
1844 struct vop_advlock_args /* {
1845 struct vnode *a_vp;
1846 void *a_id;
1847 int a_op;
1848 struct flock *a_fl;
1849 int a_flags;
1850 } */ *ap = v;
1851 struct vnode *ovp = OTHERVP(ap->a_vp);
1852
1853 ap->a_vp = ovp;
1854 return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1855 }
1856
1857 int
1858 union_strategy(void *v)
1859 {
1860 struct vop_strategy_args /* {
1861 struct vnode *a_vp;
1862 struct buf *a_bp;
1863 } */ *ap = v;
1864 struct vnode *ovp = OTHERVP(ap->a_vp);
1865 struct buf *bp = ap->a_bp;
1866
1867 KASSERT(ovp != NULLVP);
1868 if (!NODE_IS_SPECIAL(ovp))
1869 KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1870
1871 return (VOP_STRATEGY(ovp, bp));
1872 }
1873
1874 int
1875 union_bwrite(void *v)
1876 {
1877 struct vop_bwrite_args /* {
1878 struct vnode *a_vp;
1879 struct buf *a_bp;
1880 } */ *ap = v;
1881 struct vnode *ovp = OTHERVP(ap->a_vp);
1882 struct buf *bp = ap->a_bp;
1883
1884 KASSERT(ovp != NULLVP);
1885 if (!NODE_IS_SPECIAL(ovp))
1886 KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1887
1888 return (VOP_BWRITE(ovp, bp));
1889 }
1890
1891 int
1892 union_getpages(void *v)
1893 {
1894 struct vop_getpages_args /* {
1895 struct vnode *a_vp;
1896 voff_t a_offset;
1897 struct vm_page **a_m;
1898 int *a_count;
1899 int a_centeridx;
1900 vm_prot_t a_access_type;
1901 int a_advice;
1902 int a_flags;
1903 } */ *ap = v;
1904 struct vnode *vp = ap->a_vp;
1905
1906 KASSERT(rw_lock_held(vp->v_uobj.vmobjlock));
1907
1908 if (ap->a_flags & PGO_LOCKED) {
1909 return EBUSY;
1910 }
1911 ap->a_vp = OTHERVP(vp);
1912 KASSERT(vp->v_uobj.vmobjlock == ap->a_vp->v_uobj.vmobjlock);
1913
1914 /* Just pass the request on to the underlying layer. */
1915 return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
1916 }
1917
1918 int
1919 union_putpages(void *v)
1920 {
1921 struct vop_putpages_args /* {
1922 struct vnode *a_vp;
1923 voff_t a_offlo;
1924 voff_t a_offhi;
1925 int a_flags;
1926 } */ *ap = v;
1927 struct vnode *vp = ap->a_vp;
1928
1929 KASSERT(rw_lock_held(vp->v_uobj.vmobjlock));
1930
1931 ap->a_vp = OTHERVP(vp);
1932 KASSERT(vp->v_uobj.vmobjlock == ap->a_vp->v_uobj.vmobjlock);
1933
1934 if (ap->a_flags & PGO_RECLAIM) {
1935 rw_exit(vp->v_uobj.vmobjlock);
1936 return 0;
1937 }
1938
1939 /* Just pass the request on to the underlying layer. */
1940 return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
1941 }
1942
1943 int
1944 union_kqfilter(void *v)
1945 {
1946 struct vop_kqfilter_args /* {
1947 struct vnode *a_vp;
1948 struct knote *a_kn;
1949 } */ *ap = v;
1950 int error;
1951
1952 /*
1953 * We watch either the upper layer file (if it already exists),
1954 * or the lower layer one. If there is lower layer file only
1955 * at this moment, we will keep watching that lower layer file
1956 * even if upper layer file would be created later on.
1957 */
1958 if (UPPERVP(ap->a_vp))
1959 error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
1960 else if (LOWERVP(ap->a_vp))
1961 error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
1962 else {
1963 /* panic? */
1964 error = EOPNOTSUPP;
1965 }
1966
1967 return (error);
1968 }
1969