null_vnops.c revision 1.2 1 /* $NetBSD: null_vnops.c,v 1.2 1994/06/29 06:34:35 cgd Exp $ */
2
3 /*
4 * Copyright (c) 1992, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * John Heidemann of the UCLA Ficus project.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)null_vnops.c 8.1 (Berkeley) 6/10/93
39 *
40 * Ancestors:
41 * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92
42 * Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp
43 * ...and...
44 * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
45 */
46
47 /*
48 * Null Layer
49 *
50 * (See mount_null(8) for more information.)
51 *
52 * The null layer duplicates a portion of the file system
53 * name space under a new name. In this respect, it is
54 * similar to the loopback file system. It differs from
55 * the loopback fs in two respects: it is implemented using
56 * a stackable layers techniques, and it's "null-node"s stack above
57 * all lower-layer vnodes, not just over directory vnodes.
58 *
59 * The null layer has two purposes. First, it serves as a demonstration
60 * of layering by proving a layer which does nothing. (It actually
61 * does everything the loopback file system does, which is slightly
62 * more than nothing.) Second, the null layer can serve as a prototype
63 * layer. Since it provides all necessary layer framework,
64 * new file system layers can be created very easily be starting
65 * with a null layer.
66 *
67 * The remainder of this man page examines the null layer as a basis
68 * for constructing new layers.
69 *
70 *
71 * INSTANTIATING NEW NULL LAYERS
72 *
73 * New null layers are created with mount_null(8).
74 * Mount_null(8) takes two arguments, the pathname
75 * of the lower vfs (target-pn) and the pathname where the null
76 * layer will appear in the namespace (alias-pn). After
77 * the null layer is put into place, the contents
78 * of target-pn subtree will be aliased under alias-pn.
79 *
80 *
81 * OPERATION OF A NULL LAYER
82 *
83 * The null layer is the minimum file system layer,
84 * simply bypassing all possible operations to the lower layer
85 * for processing there. The majority of its activity centers
86 * on the bypass routine, though which nearly all vnode operations
87 * pass.
88 *
89 * The bypass routine accepts arbitrary vnode operations for
90 * handling by the lower layer. It begins by examing vnode
91 * operation arguments and replacing any null-nodes by their
92 * lower-layer equivlants. It then invokes the operation
93 * on the lower layer. Finally, it replaces the null-nodes
94 * in the arguments and, if a vnode is return by the operation,
95 * stacks a null-node on top of the returned vnode.
96 *
97 * Although bypass handles most operations,
98 * vop_getattr, _inactive, _reclaim, and _print are not bypassed.
99 * Vop_getattr must change the fsid being returned.
100 * Vop_inactive and vop_reclaim are not bypassed so that
101 * they can handle freeing null-layer specific data.
102 * Vop_print is not bypassed to avoid excessive debugging
103 * information.
104 *
105 *
106 * INSTANTIATING VNODE STACKS
107 *
108 * Mounting associates the null layer with a lower layer,
109 * effect stacking two VFSes. Vnode stacks are instead
110 * created on demand as files are accessed.
111 *
112 * The initial mount creates a single vnode stack for the
113 * root of the new null layer. All other vnode stacks
114 * are created as a result of vnode operations on
115 * this or other null vnode stacks.
116 *
117 * New vnode stacks come into existance as a result of
118 * an operation which returns a vnode.
119 * The bypass routine stacks a null-node above the new
120 * vnode before returning it to the caller.
121 *
122 * For example, imagine mounting a null layer with
123 * "mount_null /usr/include /dev/layer/null".
124 * Changing directory to /dev/layer/null will assign
125 * the root null-node (which was created when the null layer was mounted).
126 * Now consider opening "sys". A vop_lookup would be
127 * done on the root null-node. This operation would bypass through
128 * to the lower layer which would return a vnode representing
129 * the UFS "sys". Null_bypass then builds a null-node
130 * aliasing the UFS "sys" and returns this to the caller.
131 * Later operations on the null-node "sys" will repeat this
132 * process when constructing other vnode stacks.
133 *
134 *
135 * CREATING OTHER FILE SYSTEM LAYERS
136 *
137 * One of the easiest ways to construct new file system layers is to make
138 * a copy of the null layer, rename all files and variables, and
139 * then begin modifing the copy. Sed can be used to easily rename
140 * all variables.
141 *
142 * The umap layer is an example of a layer descended from the
143 * null layer.
144 *
145 *
146 * INVOKING OPERATIONS ON LOWER LAYERS
147 *
148 * There are two techniques to invoke operations on a lower layer
149 * when the operation cannot be completely bypassed. Each method
150 * is appropriate in different situations. In both cases,
151 * it is the responsibility of the aliasing layer to make
152 * the operation arguments "correct" for the lower layer
153 * by mapping an vnode arguments to the lower layer.
154 *
155 * The first approach is to call the aliasing layer's bypass routine.
156 * This method is most suitable when you wish to invoke the operation
157 * currently being hanldled on the lower layer. It has the advantage
158 * that the bypass routine already must do argument mapping.
159 * An example of this is null_getattrs in the null layer.
160 *
161 * A second approach is to directly invoked vnode operations on
162 * the lower layer with the VOP_OPERATIONNAME interface.
163 * The advantage of this method is that it is easy to invoke
164 * arbitrary operations on the lower layer. The disadvantage
165 * is that vnodes arguments must be manualy mapped.
166 *
167 */
168
169 #include <sys/param.h>
170 #include <sys/systm.h>
171 #include <sys/proc.h>
172 #include <sys/time.h>
173 #include <sys/types.h>
174 #include <sys/vnode.h>
175 #include <sys/mount.h>
176 #include <sys/namei.h>
177 #include <sys/malloc.h>
178 #include <sys/buf.h>
179 #include <miscfs/nullfs/null.h>
180
181
182 int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
183
184 /*
185 * This is the 10-Apr-92 bypass routine.
186 * This version has been optimized for speed, throwing away some
187 * safety checks. It should still always work, but it's not as
188 * robust to programmer errors.
189 * Define SAFETY to include some error checking code.
190 *
191 * In general, we map all vnodes going down and unmap them on the way back.
192 * As an exception to this, vnodes can be marked "unmapped" by setting
193 * the Nth bit in operation's vdesc_flags.
194 *
195 * Also, some BSD vnode operations have the side effect of vrele'ing
196 * their arguments. With stacking, the reference counts are held
197 * by the upper node, not the lower one, so we must handle these
198 * side-effects here. This is not of concern in Sun-derived systems
199 * since there are no such side-effects.
200 *
201 * This makes the following assumptions:
202 * - only one returned vpp
203 * - no INOUT vpp's (Sun's vop_open has one of these)
204 * - the vnode operation vector of the first vnode should be used
205 * to determine what implementation of the op should be invoked
206 * - all mapped vnodes are of our vnode-type (NEEDSWORK:
207 * problems on rmdir'ing mount points and renaming?)
208 */
209 int
210 null_bypass(ap)
211 struct vop_generic_args /* {
212 struct vnodeop_desc *a_desc;
213 <other random data follows, presumably>
214 } */ *ap;
215 {
216 extern int (**null_vnodeop_p)(); /* not extern, really "forward" */
217 register struct vnode **this_vp_p;
218 int error;
219 struct vnode *old_vps[VDESC_MAX_VPS];
220 struct vnode **vps_p[VDESC_MAX_VPS];
221 struct vnode ***vppp;
222 struct vnodeop_desc *descp = ap->a_desc;
223 int reles, i;
224
225 if (null_bug_bypass)
226 printf ("null_bypass: %s\n", descp->vdesc_name);
227
228 #ifdef SAFETY
229 /*
230 * We require at least one vp.
231 */
232 if (descp->vdesc_vp_offsets == NULL ||
233 descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
234 panic ("null_bypass: no vp's in map.\n");
235 #endif
236
237 /*
238 * Map the vnodes going in.
239 * Later, we'll invoke the operation based on
240 * the first mapped vnode's operation vector.
241 */
242 reles = descp->vdesc_flags;
243 for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
244 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
245 break; /* bail out at end of list */
246 vps_p[i] = this_vp_p =
247 VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
248 /*
249 * We're not guaranteed that any but the first vnode
250 * are of our type. Check for and don't map any
251 * that aren't. (We must always map first vp or vclean fails.)
252 */
253 if (i && (*this_vp_p)->v_op != null_vnodeop_p) {
254 old_vps[i] = NULL;
255 } else {
256 old_vps[i] = *this_vp_p;
257 *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
258 /*
259 * XXX - Several operations have the side effect
260 * of vrele'ing their vp's. We must account for
261 * that. (This should go away in the future.)
262 */
263 if (reles & 1)
264 VREF(*this_vp_p);
265 }
266
267 }
268
269 /*
270 * Call the operation on the lower layer
271 * with the modified argument structure.
272 */
273 error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
274
275 /*
276 * Maintain the illusion of call-by-value
277 * by restoring vnodes in the argument structure
278 * to their original value.
279 */
280 reles = descp->vdesc_flags;
281 for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
282 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
283 break; /* bail out at end of list */
284 if (old_vps[i]) {
285 *(vps_p[i]) = old_vps[i];
286 if (reles & 1)
287 vrele(*(vps_p[i]));
288 }
289 }
290
291 /*
292 * Map the possible out-going vpp
293 * (Assumes that the lower layer always returns
294 * a VREF'ed vpp unless it gets an error.)
295 */
296 if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
297 !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
298 !error) {
299 /*
300 * XXX - even though some ops have vpp returned vp's,
301 * several ops actually vrele this before returning.
302 * We must avoid these ops.
303 * (This should go away when these ops are regularized.)
304 */
305 if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
306 goto out;
307 vppp = VOPARG_OFFSETTO(struct vnode***,
308 descp->vdesc_vpp_offset,ap);
309 error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
310 }
311
312 out:
313 return (error);
314 }
315
316
317 /*
318 * We handle getattr only to change the fsid.
319 */
320 int
321 null_getattr(ap)
322 struct vop_getattr_args /* {
323 struct vnode *a_vp;
324 struct vattr *a_vap;
325 struct ucred *a_cred;
326 struct proc *a_p;
327 } */ *ap;
328 {
329 int error;
330 if (error = null_bypass(ap))
331 return (error);
332 /* Requires that arguments be restored. */
333 ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
334 return (0);
335 }
336
337
338 int
339 null_inactive(ap)
340 struct vop_inactive_args /* {
341 struct vnode *a_vp;
342 } */ *ap;
343 {
344 /*
345 * Do nothing (and _don't_ bypass).
346 * Wait to vrele lowervp until reclaim,
347 * so that until then our null_node is in the
348 * cache and reusable.
349 *
350 * NEEDSWORK: Someday, consider inactive'ing
351 * the lowervp and then trying to reactivate it
352 * with capabilities (v_id)
353 * like they do in the name lookup cache code.
354 * That's too much work for now.
355 */
356 return (0);
357 }
358
359 int
360 null_reclaim(ap)
361 struct vop_reclaim_args /* {
362 struct vnode *a_vp;
363 } */ *ap;
364 {
365 struct vnode *vp = ap->a_vp;
366 struct null_node *xp = VTONULL(vp);
367 struct vnode *lowervp = xp->null_lowervp;
368
369 /*
370 * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
371 * so we can't call VOPs on ourself.
372 */
373 /* After this assignment, this node will not be re-used. */
374 xp->null_lowervp = NULL;
375 remque(xp);
376 FREE(vp->v_data, M_TEMP);
377 vp->v_data = NULL;
378 vrele (lowervp);
379 return (0);
380 }
381
382
383 int
384 null_print(ap)
385 struct vop_print_args /* {
386 struct vnode *a_vp;
387 } */ *ap;
388 {
389 register struct vnode *vp = ap->a_vp;
390 printf ("\ttag VT_NULLFS, vp=%x, lowervp=%x\n", vp, NULLVPTOLOWERVP(vp));
391 return (0);
392 }
393
394
395 /*
396 * XXX - vop_strategy must be hand coded because it has no
397 * vnode in its arguments.
398 * This goes away with a merged VM/buffer cache.
399 */
400 int
401 null_strategy(ap)
402 struct vop_strategy_args /* {
403 struct buf *a_bp;
404 } */ *ap;
405 {
406 struct buf *bp = ap->a_bp;
407 int error;
408 struct vnode *savedvp;
409
410 savedvp = bp->b_vp;
411 bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
412
413 error = VOP_STRATEGY(bp);
414
415 bp->b_vp = savedvp;
416
417 return (error);
418 }
419
420
421 /*
422 * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no
423 * vnode in its arguments.
424 * This goes away with a merged VM/buffer cache.
425 */
426 int
427 null_bwrite(ap)
428 struct vop_bwrite_args /* {
429 struct buf *a_bp;
430 } */ *ap;
431 {
432 struct buf *bp = ap->a_bp;
433 int error;
434 struct vnode *savedvp;
435
436 savedvp = bp->b_vp;
437 bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
438
439 error = VOP_BWRITE(bp);
440
441 bp->b_vp = savedvp;
442
443 return (error);
444 }
445
446 /*
447 * Global vfs data structures
448 */
449 int (**null_vnodeop_p)();
450 struct vnodeopv_entry_desc null_vnodeop_entries[] = {
451 { &vop_default_desc, null_bypass },
452
453 { &vop_getattr_desc, null_getattr },
454 { &vop_inactive_desc, null_inactive },
455 { &vop_reclaim_desc, null_reclaim },
456 { &vop_print_desc, null_print },
457
458 { &vop_strategy_desc, null_strategy },
459 { &vop_bwrite_desc, null_bwrite },
460
461 { (struct vnodeop_desc*)NULL, (int(*)())NULL }
462 };
463 struct vnodeopv_desc null_vnodeop_opv_desc =
464 { &null_vnodeop_p, null_vnodeop_entries };
465