null_vnops.c revision 1.2.2.2 1 /* $NetBSD: null_vnops.c,v 1.2.2.2 1994/08/19 12:13:37 mycroft Exp $ */
2
3 /*
4 * Copyright (c) 1992, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * John Heidemann of the UCLA Ficus project.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)null_vnops.c 8.1 (Berkeley) 6/10/93
39 *
40 * Ancestors:
41 * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92
42 * Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp
43 * ...and...
44 * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
45 */
46
47 /*
48 * Null Layer
49 *
50 * (See mount_null(8) for more information.)
51 *
52 * The null layer duplicates a portion of the file system
53 * name space under a new name. In this respect, it is
54 * similar to the loopback file system. It differs from
55 * the loopback fs in two respects: it is implemented using
56 * a stackable layers techniques, and it's "null-node"s stack above
57 * all lower-layer vnodes, not just over directory vnodes.
58 *
59 * The null layer has two purposes. First, it serves as a demonstration
60 * of layering by proving a layer which does nothing. (It actually
61 * does everything the loopback file system does, which is slightly
62 * more than nothing.) Second, the null layer can serve as a prototype
63 * layer. Since it provides all necessary layer framework,
64 * new file system layers can be created very easily be starting
65 * with a null layer.
66 *
67 * The remainder of this man page examines the null layer as a basis
68 * for constructing new layers.
69 *
70 *
71 * INSTANTIATING NEW NULL LAYERS
72 *
73 * New null layers are created with mount_null(8).
74 * Mount_null(8) takes two arguments, the pathname
75 * of the lower vfs (target-pn) and the pathname where the null
76 * layer will appear in the namespace (alias-pn). After
77 * the null layer is put into place, the contents
78 * of target-pn subtree will be aliased under alias-pn.
79 *
80 *
81 * OPERATION OF A NULL LAYER
82 *
83 * The null layer is the minimum file system layer,
84 * simply bypassing all possible operations to the lower layer
85 * for processing there. The majority of its activity centers
86 * on the bypass routine, though which nearly all vnode operations
87 * pass.
88 *
89 * The bypass routine accepts arbitrary vnode operations for
90 * handling by the lower layer. It begins by examing vnode
91 * operation arguments and replacing any null-nodes by their
92 * lower-layer equivlants. It then invokes the operation
93 * on the lower layer. Finally, it replaces the null-nodes
94 * in the arguments and, if a vnode is return by the operation,
95 * stacks a null-node on top of the returned vnode.
96 *
97 * Although bypass handles most operations,
98 * vop_getattr, _inactive, _reclaim, and _print are not bypassed.
99 * Vop_getattr must change the fsid being returned.
100 * Vop_inactive and vop_reclaim are not bypassed so that
101 * they can handle freeing null-layer specific data.
102 * Vop_print is not bypassed to avoid excessive debugging
103 * information.
104 *
105 *
106 * INSTANTIATING VNODE STACKS
107 *
108 * Mounting associates the null layer with a lower layer,
109 * effect stacking two VFSes. Vnode stacks are instead
110 * created on demand as files are accessed.
111 *
112 * The initial mount creates a single vnode stack for the
113 * root of the new null layer. All other vnode stacks
114 * are created as a result of vnode operations on
115 * this or other null vnode stacks.
116 *
117 * New vnode stacks come into existance as a result of
118 * an operation which returns a vnode.
119 * The bypass routine stacks a null-node above the new
120 * vnode before returning it to the caller.
121 *
122 * For example, imagine mounting a null layer with
123 * "mount_null /usr/include /dev/layer/null".
124 * Changing directory to /dev/layer/null will assign
125 * the root null-node (which was created when the null layer was mounted).
126 * Now consider opening "sys". A vop_lookup would be
127 * done on the root null-node. This operation would bypass through
128 * to the lower layer which would return a vnode representing
129 * the UFS "sys". Null_bypass then builds a null-node
130 * aliasing the UFS "sys" and returns this to the caller.
131 * Later operations on the null-node "sys" will repeat this
132 * process when constructing other vnode stacks.
133 *
134 *
135 * CREATING OTHER FILE SYSTEM LAYERS
136 *
137 * One of the easiest ways to construct new file system layers is to make
138 * a copy of the null layer, rename all files and variables, and
139 * then begin modifing the copy. Sed can be used to easily rename
140 * all variables.
141 *
142 * The umap layer is an example of a layer descended from the
143 * null layer.
144 *
145 *
146 * INVOKING OPERATIONS ON LOWER LAYERS
147 *
148 * There are two techniques to invoke operations on a lower layer
149 * when the operation cannot be completely bypassed. Each method
150 * is appropriate in different situations. In both cases,
151 * it is the responsibility of the aliasing layer to make
152 * the operation arguments "correct" for the lower layer
153 * by mapping an vnode arguments to the lower layer.
154 *
155 * The first approach is to call the aliasing layer's bypass routine.
156 * This method is most suitable when you wish to invoke the operation
157 * currently being hanldled on the lower layer. It has the advantage
158 * that the bypass routine already must do argument mapping.
159 * An example of this is null_getattrs in the null layer.
160 *
161 * A second approach is to directly invoked vnode operations on
162 * the lower layer with the VOP_OPERATIONNAME interface.
163 * The advantage of this method is that it is easy to invoke
164 * arbitrary operations on the lower layer. The disadvantage
165 * is that vnodes arguments must be manualy mapped.
166 *
167 */
168
169 #include <sys/param.h>
170 #include <sys/systm.h>
171 #include <sys/proc.h>
172 #include <sys/time.h>
173 #include <sys/types.h>
174 #include <sys/vnode.h>
175 #include <sys/mount.h>
176 #include <sys/namei.h>
177 #include <sys/malloc.h>
178 #include <sys/buf.h>
179 #include <miscfs/nullfs/null.h>
180
181
182 int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
183
184 /*
185 * This is the 10-Apr-92 bypass routine.
186 * This version has been optimized for speed, throwing away some
187 * safety checks. It should still always work, but it's not as
188 * robust to programmer errors.
189 * Define SAFETY to include some error checking code.
190 *
191 * In general, we map all vnodes going down and unmap them on the way back.
192 * As an exception to this, vnodes can be marked "unmapped" by setting
193 * the Nth bit in operation's vdesc_flags.
194 *
195 * Also, some BSD vnode operations have the side effect of vrele'ing
196 * their arguments. With stacking, the reference counts are held
197 * by the upper node, not the lower one, so we must handle these
198 * side-effects here. This is not of concern in Sun-derived systems
199 * since there are no such side-effects.
200 *
201 * This makes the following assumptions:
202 * - only one returned vpp
203 * - no INOUT vpp's (Sun's vop_open has one of these)
204 * - the vnode operation vector of the first vnode should be used
205 * to determine what implementation of the op should be invoked
206 * - all mapped vnodes are of our vnode-type (NEEDSWORK:
207 * problems on rmdir'ing mount points and renaming?)
208 */
209 int
210 null_bypass(ap)
211 struct vop_generic_args /* {
212 struct vnodeop_desc *a_desc;
213 <other random data follows, presumably>
214 } */ *ap;
215 {
216 extern int (**null_vnodeop_p)(); /* not extern, really "forward" */
217 register struct vnode **this_vp_p;
218 int error;
219 struct vnode *old_vps[VDESC_MAX_VPS];
220 struct vnode **vps_p[VDESC_MAX_VPS];
221 struct vnode ***vppp;
222 struct vnodeop_desc *descp = ap->a_desc;
223 int reles, i;
224
225 if (null_bug_bypass)
226 printf ("null_bypass: %s\n", descp->vdesc_name);
227
228 #ifdef SAFETY
229 /*
230 * We require at least one vp.
231 */
232 if (descp->vdesc_vp_offsets == NULL ||
233 descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
234 panic ("null_bypass: no vp's in map.\n");
235 #endif
236
237 /*
238 * Map the vnodes going in.
239 * Later, we'll invoke the operation based on
240 * the first mapped vnode's operation vector.
241 */
242 reles = descp->vdesc_flags;
243 for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
244 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
245 break; /* bail out at end of list */
246 vps_p[i] = this_vp_p =
247 VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
248 /*
249 * We're not guaranteed that any but the first vnode
250 * are of our type. Check for and don't map any
251 * that aren't. (We must always map first vp or vclean fails.)
252 */
253 if (i && (*this_vp_p == NULLVP ||
254 (*this_vp_p)->v_op != null_vnodeop_p)) {
255 old_vps[i] = NULLVP;
256 } else {
257 old_vps[i] = *this_vp_p;
258 *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
259 /*
260 * XXX - Several operations have the side effect
261 * of vrele'ing their vp's. We must account for
262 * that. (This should go away in the future.)
263 */
264 if (reles & 1)
265 VREF(*this_vp_p);
266 }
267
268 }
269
270 /*
271 * Call the operation on the lower layer
272 * with the modified argument structure.
273 */
274 error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
275
276 /*
277 * Maintain the illusion of call-by-value
278 * by restoring vnodes in the argument structure
279 * to their original value.
280 */
281 reles = descp->vdesc_flags;
282 for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
283 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
284 break; /* bail out at end of list */
285 if (old_vps[i] != NULLVP) {
286 *(vps_p[i]) = old_vps[i];
287 if (reles & 1)
288 vrele(*(vps_p[i]));
289 }
290 }
291
292 /*
293 * Map the possible out-going vpp
294 * (Assumes that the lower layer always returns
295 * a VREF'ed vpp unless it gets an error.)
296 */
297 if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
298 !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
299 !error) {
300 /*
301 * XXX - even though some ops have vpp returned vp's,
302 * several ops actually vrele this before returning.
303 * We must avoid these ops.
304 * (This should go away when these ops are regularized.)
305 */
306 if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
307 goto out;
308 vppp = VOPARG_OFFSETTO(struct vnode***,
309 descp->vdesc_vpp_offset,ap);
310 error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
311 }
312
313 out:
314 return (error);
315 }
316
317
318 /*
319 * We handle getattr only to change the fsid.
320 */
321 int
322 null_getattr(ap)
323 struct vop_getattr_args /* {
324 struct vnode *a_vp;
325 struct vattr *a_vap;
326 struct ucred *a_cred;
327 struct proc *a_p;
328 } */ *ap;
329 {
330 int error;
331 if (error = null_bypass(ap))
332 return (error);
333 /* Requires that arguments be restored. */
334 ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
335 return (0);
336 }
337
338
339 int
340 null_inactive(ap)
341 struct vop_inactive_args /* {
342 struct vnode *a_vp;
343 } */ *ap;
344 {
345 /*
346 * Do nothing (and _don't_ bypass).
347 * Wait to vrele lowervp until reclaim,
348 * so that until then our null_node is in the
349 * cache and reusable.
350 *
351 * NEEDSWORK: Someday, consider inactive'ing
352 * the lowervp and then trying to reactivate it
353 * with capabilities (v_id)
354 * like they do in the name lookup cache code.
355 * That's too much work for now.
356 */
357 return (0);
358 }
359
360 int
361 null_reclaim(ap)
362 struct vop_reclaim_args /* {
363 struct vnode *a_vp;
364 } */ *ap;
365 {
366 struct vnode *vp = ap->a_vp;
367 struct null_node *xp = VTONULL(vp);
368 struct vnode *lowervp = xp->null_lowervp;
369
370 /*
371 * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
372 * so we can't call VOPs on ourself.
373 */
374 /* After this assignment, this node will not be re-used. */
375 xp->null_lowervp = NULL;
376 LIST_REMOVE(xp, null_hash);
377 FREE(vp->v_data, M_TEMP);
378 vp->v_data = NULL;
379 vrele (lowervp);
380 return (0);
381 }
382
383
384 int
385 null_print(ap)
386 struct vop_print_args /* {
387 struct vnode *a_vp;
388 } */ *ap;
389 {
390 register struct vnode *vp = ap->a_vp;
391 printf ("\ttag VT_NULLFS, vp=%x, lowervp=%x\n", vp, NULLVPTOLOWERVP(vp));
392 return (0);
393 }
394
395
396 /*
397 * XXX - vop_strategy must be hand coded because it has no
398 * vnode in its arguments.
399 * This goes away with a merged VM/buffer cache.
400 */
401 int
402 null_strategy(ap)
403 struct vop_strategy_args /* {
404 struct buf *a_bp;
405 } */ *ap;
406 {
407 struct buf *bp = ap->a_bp;
408 int error;
409 struct vnode *savedvp;
410
411 savedvp = bp->b_vp;
412 bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
413
414 error = VOP_STRATEGY(bp);
415
416 bp->b_vp = savedvp;
417
418 return (error);
419 }
420
421
422 /*
423 * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no
424 * vnode in its arguments.
425 * This goes away with a merged VM/buffer cache.
426 */
427 int
428 null_bwrite(ap)
429 struct vop_bwrite_args /* {
430 struct buf *a_bp;
431 } */ *ap;
432 {
433 struct buf *bp = ap->a_bp;
434 int error;
435 struct vnode *savedvp;
436
437 savedvp = bp->b_vp;
438 bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
439
440 error = VOP_BWRITE(bp);
441
442 bp->b_vp = savedvp;
443
444 return (error);
445 }
446
447 /*
448 * Global vfs data structures
449 */
450 int (**null_vnodeop_p)();
451 struct vnodeopv_entry_desc null_vnodeop_entries[] = {
452 { &vop_default_desc, null_bypass },
453
454 { &vop_getattr_desc, null_getattr },
455 { &vop_inactive_desc, null_inactive },
456 { &vop_reclaim_desc, null_reclaim },
457 { &vop_print_desc, null_print },
458
459 { &vop_strategy_desc, null_strategy },
460 { &vop_bwrite_desc, null_bwrite },
461
462 { (struct vnodeop_desc*)NULL, (int(*)())NULL }
463 };
464 struct vnodeopv_desc null_vnodeop_opv_desc =
465 { &null_vnodeop_p, null_vnodeop_entries };
466