vnd.c revision 1.6 1 /*
2 * Copyright (c) 1988 University of Utah.
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the Systems Programming Group of the University of Utah Computer
8 * Science Department.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * from: Utah $Hdr: vn.c 1.13 94/04/02$
39 *
40 * @(#)vn.c 8.6 (Berkeley) 4/1/94
41 */
42
43 /*
44 * Vnode disk driver.
45 *
46 * Block/character interface to a vnode. Allows one to treat a file
47 * as a disk (e.g. build a filesystem in it, mount it, etc.).
48 *
49 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode
50 * instead of a simple VOP_RDWR. We do this to avoid distorting the
51 * local buffer cache.
52 *
53 * NOTE 2: There is a security issue involved with this driver.
54 * Once mounted all access to the contents of the "mapped" file via
55 * the special file is controlled by the permissions on the special
56 * file, the protection of the mapped file is ignored (effectively,
57 * by using root credentials in all transactions).
58 *
59 * NOTE 3: Doesn't interact with leases, should it?
60 */
61 #include "vn.h"
62 #if NVN > 0
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/namei.h>
67 #include <sys/proc.h>
68 #include <sys/errno.h>
69 #include <sys/dkstat.h>
70 #include <sys/buf.h>
71 #include <sys/malloc.h>
72 #include <sys/ioctl.h>
73 #include <sys/mount.h>
74 #include <sys/vnode.h>
75 #include <sys/file.h>
76 #include <sys/uio.h>
77
78 #include <miscfs/specfs/specdev.h>
79
80 #include <dev/vnioctl.h>
81
82 #ifdef DEBUG
83 int dovncluster = 1;
84 int vndebug = 0x00;
85 #define VDB_FOLLOW 0x01
86 #define VDB_INIT 0x02
87 #define VDB_IO 0x04
88 #endif
89
90 #define b_cylin b_resid
91
92 #define vnunit(x) ((minor(x) >> 3) & 0x7) /* for consistency */
93
94 #define getvnbuf() \
95 ((struct buf *)malloc(sizeof(struct buf), M_DEVBUF, M_WAITOK))
96 #define putvnbuf(bp) \
97 free((caddr_t)(bp), M_DEVBUF)
98
99 struct vn_softc {
100 int sc_flags; /* flags */
101 size_t sc_size; /* size of vn */
102 struct vnode *sc_vp; /* vnode */
103 struct ucred *sc_cred; /* credentials */
104 int sc_maxactive; /* max # of active requests */
105 struct buf sc_tab; /* transfer queue */
106 };
107
108 /* sc_flags */
109 #define VNF_ALIVE 0x01
110 #define VNF_INITED 0x02
111
112 #if 0 /* if you need static allocation */
113 struct vn_softc vn_softc[NVN];
114 int numvnd = NVN;
115 #else
116 struct vn_softc *vn_softc;
117 int numvnd;
118 #endif
119
120 void
121 vnattach(num)
122 int num;
123 {
124 char *mem;
125 register u_long size;
126
127 if (num <= 0)
128 return;
129 size = num * sizeof(struct vn_softc);
130 mem = malloc(size, M_DEVBUF, M_NOWAIT);
131 if (mem == NULL) {
132 printf("WARNING: no memory for vnode disks\n");
133 return;
134 }
135 bzero(mem, size);
136 vn_softc = (struct vn_softc *)mem;
137 numvnd = num;
138 }
139
140 int
141 vnopen(dev, flags, mode, p)
142 dev_t dev;
143 int flags, mode;
144 struct proc *p;
145 {
146 int unit = vnunit(dev);
147
148 #ifdef DEBUG
149 if (vndebug & VDB_FOLLOW)
150 printf("vnopen(%x, %x, %x, %x)\n", dev, flags, mode, p);
151 #endif
152 if (unit >= numvnd)
153 return(ENXIO);
154 return(0);
155 }
156
157 int
158 vnclose(dev, flags, mode, p)
159 dev_t dev;
160 int flags, mode;
161 struct proc *p;
162 {
163 #ifdef DEBUG
164 if (vndebug & VDB_FOLLOW)
165 printf("vnclose(%x, %x, %x, %x)\n", dev, flags, mode, p);
166 #endif
167 return 0;
168 }
169
170 /*
171 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY.
172 * Note that this driver can only be used for swapping over NFS on the hp
173 * since nfs_strategy on the vax cannot handle u-areas and page tables.
174 */
175 void
176 vnstrategy(bp)
177 register struct buf *bp;
178 {
179 int unit = vnunit(bp->b_dev);
180 register struct vn_softc *vn = &vn_softc[unit];
181 register struct buf *nbp;
182 register int bn, bsize, resid;
183 register caddr_t addr;
184 int sz, flags, error;
185 extern void vniodone();
186
187 #ifdef DEBUG
188 if (vndebug & VDB_FOLLOW)
189 printf("vnstrategy(%x): unit %d\n", bp, unit);
190 #endif
191 if ((vn->sc_flags & VNF_INITED) == 0) {
192 bp->b_error = ENXIO;
193 bp->b_flags |= B_ERROR;
194 biodone(bp);
195 return;
196 }
197 bn = bp->b_blkno;
198 sz = howmany(bp->b_bcount, DEV_BSIZE);
199 bp->b_resid = bp->b_bcount;
200 if (bn < 0 || bn + sz > vn->sc_size) {
201 if (bn != vn->sc_size) {
202 bp->b_error = EINVAL;
203 bp->b_flags |= B_ERROR;
204 }
205 biodone(bp);
206 return;
207 }
208 bn = dbtob(bn);
209 bsize = vn->sc_vp->v_mount->mnt_stat.f_iosize;
210 addr = bp->b_data;
211 flags = bp->b_flags | B_CALL;
212 for (resid = bp->b_resid; resid; resid -= sz) {
213 struct vnode *vp;
214 daddr_t nbn;
215 int off, s, nra;
216
217 nra = 0;
218 #if (BSD > 199103)
219 error = VOP_BMAP(vn->sc_vp, bn / bsize, &vp, &nbn, &nra);
220 #else
221 error = VOP_BMAP(vn->sc_vp, bn / bsize, &vp, &nbn);
222 #endif
223 if (error == 0 && (long)nbn == -1)
224 error = EIO;
225 #ifdef DEBUG
226 if (!dovncluster)
227 nra = 0;
228 #endif
229
230 if (off = bn % bsize)
231 sz = bsize - off;
232 else
233 sz = (1 + nra) * bsize;
234 if (resid < sz)
235 sz = resid;
236 #ifdef DEBUG
237 if (vndebug & VDB_IO)
238 printf("vnstrategy: vp %x/%x bn %x/%x sz %x\n",
239 vn->sc_vp, vp, bn, nbn, sz);
240 #endif
241
242 nbp = getvnbuf();
243 nbp->b_flags = flags;
244 nbp->b_bcount = sz;
245 nbp->b_bufsize = bp->b_bufsize;
246 nbp->b_error = 0;
247 if (vp->v_type == VBLK || vp->v_type == VCHR)
248 nbp->b_dev = vp->v_rdev;
249 else
250 nbp->b_dev = NODEV;
251 nbp->b_data = addr;
252 nbp->b_blkno = nbn + btodb(off);
253 nbp->b_proc = bp->b_proc;
254 nbp->b_iodone = vniodone;
255 nbp->b_vp = vp;
256 nbp->b_pfcent = (int) bp; /* XXX */
257 nbp->b_rcred = vn->sc_cred; /* XXX crdup? */
258 nbp->b_wcred = vn->sc_cred; /* XXX crdup? */
259 nbp->b_dirtyoff = bp->b_dirtyoff;
260 nbp->b_dirtyend = bp->b_dirtyend;
261 nbp->b_validoff = bp->b_validoff;
262 nbp->b_validend = bp->b_validend;
263 /*
264 * If there was an error or a hole in the file...punt.
265 * Note that we deal with this after the nbp allocation.
266 * This ensures that we properly clean up any operations
267 * that we have already fired off.
268 *
269 * XXX we could deal with holes here but it would be
270 * a hassle (in the write case).
271 */
272 if (error) {
273 nbp->b_error = error;
274 nbp->b_flags |= B_ERROR;
275 bp->b_resid -= (resid - sz);
276 biodone(nbp);
277 return;
278 }
279 /*
280 * Just sort by block number
281 */
282 nbp->b_cylin = nbp->b_blkno;
283 s = splbio();
284 disksort(&vn->sc_tab, nbp);
285 if (vn->sc_tab.b_active < vn->sc_maxactive) {
286 vn->sc_tab.b_active++;
287 vnstart(vn);
288 }
289 splx(s);
290 bn += sz;
291 addr += sz;
292 }
293 }
294
295 /*
296 * Feed requests sequentially.
297 * We do it this way to keep from flooding NFS servers if we are connected
298 * to an NFS file. This places the burden on the client rather than the
299 * server.
300 */
301 vnstart(vn)
302 register struct vn_softc *vn;
303 {
304 register struct buf *bp;
305
306 /*
307 * Dequeue now since lower level strategy routine might
308 * queue using same links
309 */
310 bp = vn->sc_tab.b_actf;
311 vn->sc_tab.b_actf = bp->b_actf;
312 #ifdef DEBUG
313 if (vndebug & VDB_IO)
314 printf("vnstart(%d): bp %x vp %x blkno %x addr %x cnt %x\n",
315 vn-vn_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data,
316 bp->b_bcount);
317 #endif
318 if ((bp->b_flags & B_READ) == 0)
319 bp->b_vp->v_numoutput++;
320 VOP_STRATEGY(bp);
321 }
322
323 void
324 vniodone(bp)
325 register struct buf *bp;
326 {
327 register struct buf *pbp = (struct buf *)bp->b_pfcent; /* XXX */
328 register struct vn_softc *vn = &vn_softc[vnunit(pbp->b_dev)];
329 int s;
330
331 s = splbio();
332 #ifdef DEBUG
333 if (vndebug & VDB_IO)
334 printf("vniodone(%d): bp %x vp %x blkno %x addr %x cnt %x\n",
335 vn-vn_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data,
336 bp->b_bcount);
337 #endif
338 if (bp->b_error) {
339 #ifdef DEBUG
340 if (vndebug & VDB_IO)
341 printf("vniodone: bp %x error %d\n", bp, bp->b_error);
342 #endif
343 pbp->b_flags |= B_ERROR;
344 pbp->b_error = biowait(bp);
345 }
346 pbp->b_resid -= bp->b_bcount;
347 putvnbuf(bp);
348 if (pbp->b_resid == 0) {
349 #ifdef DEBUG
350 if (vndebug & VDB_IO)
351 printf("vniodone: pbp %x iodone\n", pbp);
352 #endif
353 biodone(pbp);
354 }
355 if (vn->sc_tab.b_actf)
356 vnstart(vn);
357 else
358 vn->sc_tab.b_active--;
359 splx(s);
360 }
361
362 vnread(dev, uio, flags, p)
363 dev_t dev;
364 struct uio *uio;
365 int flags;
366 struct proc *p;
367 {
368
369 #ifdef DEBUG
370 if (vndebug & VDB_FOLLOW)
371 printf("vnread(%x, %x, %x, %x)\n", dev, uio, flags, p);
372 #endif
373 return(physio(vnstrategy, NULL, dev, B_READ, minphys, uio));
374 }
375
376 vnwrite(dev, uio, flags, p)
377 dev_t dev;
378 struct uio *uio;
379 int flags;
380 struct proc *p;
381 {
382
383 #ifdef DEBUG
384 if (vndebug & VDB_FOLLOW)
385 printf("vnwrite(%x, %x, %x, %x)\n", dev, uio, flags, p);
386 #endif
387 return(physio(vnstrategy, NULL, dev, B_WRITE, minphys, uio));
388 }
389
390 /* ARGSUSED */
391 vnioctl(dev, cmd, data, flag, p)
392 dev_t dev;
393 u_long cmd;
394 caddr_t data;
395 int flag;
396 struct proc *p;
397 {
398 int unit = vnunit(dev);
399 register struct vn_softc *vn;
400 struct vn_ioctl *vio;
401 struct vattr vattr;
402 struct nameidata nd;
403 int error;
404
405 #ifdef DEBUG
406 if (vndebug & VDB_FOLLOW)
407 printf("vnioctl(%x, %x, %x, %x, %x): unit %d\n",
408 dev, cmd, data, flag, p, unit);
409 #endif
410 error = suser(p->p_ucred, &p->p_acflag);
411 if (error)
412 return (error);
413 if (unit >= numvnd)
414 return (ENXIO);
415
416 vn = &vn_softc[unit];
417 vio = (struct vn_ioctl *)data;
418 switch (cmd) {
419
420 case VNIOCSET:
421 if (vn->sc_flags & VNF_INITED)
422 return(EBUSY);
423 /*
424 * Always open for read and write.
425 * This is probably bogus, but it lets vn_open()
426 * weed out directories, sockets, etc. so we don't
427 * have to worry about them.
428 */
429 #if (BSD > 199103)
430 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vn_file, p);
431 if (error = vn_open(&nd, FREAD|FWRITE, 0))
432 return(error);
433 #else
434 nd.ni_nameiop = LOOKUP | FOLLOW;
435 nd.ni_segflg = UIO_USERSPACE;
436 nd.ni_dirp = vio->vn_file;
437 if (error = vn_open(&nd, p, FREAD|FWRITE, 0))
438 return(error);
439 #endif
440 if (error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p)) {
441 VOP_UNLOCK(nd.ni_vp);
442 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
443 return(error);
444 }
445 VOP_UNLOCK(nd.ni_vp);
446 vn->sc_vp = nd.ni_vp;
447 vn->sc_size = btodb(vattr.va_size); /* note truncation */
448 if (error = vnsetcred(vn, p->p_ucred)) {
449 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
450 return(error);
451 }
452 vnthrottle(vn, vn->sc_vp);
453 vio->vn_size = dbtob(vn->sc_size);
454 vn->sc_flags |= VNF_INITED;
455 #ifdef DEBUG
456 if (vndebug & VDB_INIT)
457 printf("vnioctl: SET vp %x size %x\n",
458 vn->sc_vp, vn->sc_size);
459 #endif
460 break;
461
462 case VNIOCCLR:
463 if ((vn->sc_flags & VNF_INITED) == 0)
464 return(ENXIO);
465 vnclear(vn);
466 #ifdef DEBUG
467 if (vndebug & VDB_INIT)
468 printf("vnioctl: CLRed\n");
469 #endif
470 break;
471
472 default:
473 return(ENXIO);
474 }
475 return(0);
476 }
477
478 /*
479 * Duplicate the current processes' credentials. Since we are called only
480 * as the result of a SET ioctl and only root can do that, any future access
481 * to this "disk" is essentially as root. Note that credentials may change
482 * if some other uid can write directly to the mapped file (NFS).
483 */
484 vnsetcred(vn, cred)
485 register struct vn_softc *vn;
486 struct ucred *cred;
487 {
488 struct uio auio;
489 struct iovec aiov;
490 char *tmpbuf;
491 int error;
492
493 vn->sc_cred = crdup(cred);
494 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
495
496 /* XXX: Horrible kludge to establish credentials for NFS */
497 aiov.iov_base = tmpbuf;
498 aiov.iov_len = min(DEV_BSIZE, dbtob(vn->sc_size));
499 auio.uio_iov = &aiov;
500 auio.uio_iovcnt = 1;
501 auio.uio_offset = 0;
502 auio.uio_rw = UIO_READ;
503 auio.uio_segflg = UIO_SYSSPACE;
504 auio.uio_resid = aiov.iov_len;
505 error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred);
506
507 free(tmpbuf, M_TEMP);
508 return (error);
509 }
510
511 /*
512 * Set maxactive based on FS type
513 */
514 vnthrottle(vn, vp)
515 register struct vn_softc *vn;
516 struct vnode *vp;
517 {
518 #ifdef NFSCLIENT
519 #if (BSD > 199103)
520 extern int (**nfsv2_vnodeop_p)();
521
522 if (vp->v_op == nfsv2_vnodeop_p)
523 #else
524 extern struct vnodeops nfsv2_vnodeops;
525
526 if (vp->v_op == &nfsv2_vnodeops)
527 #endif
528 vn->sc_maxactive = 2;
529 else
530 #endif
531 vn->sc_maxactive = 8;
532
533 if (vn->sc_maxactive < 1)
534 vn->sc_maxactive = 1;
535 }
536
537 vnshutdown()
538 {
539 register struct vn_softc *vn;
540
541 for (vn = &vn_softc[0]; vn < &vn_softc[numvnd]; vn++)
542 if (vn->sc_flags & VNF_INITED)
543 vnclear(vn);
544 }
545
546 vnclear(vn)
547 register struct vn_softc *vn;
548 {
549 register struct vnode *vp = vn->sc_vp;
550 struct proc *p = curproc; /* XXX */
551
552 #ifdef DEBUG
553 if (vndebug & VDB_FOLLOW)
554 printf("vnclear(%x): vp %x\n", vp);
555 #endif
556 vn->sc_flags &= ~VNF_INITED;
557 if (vp == (struct vnode *)0)
558 panic("vnioctl: null vp");
559 (void) vn_close(vp, FREAD|FWRITE, vn->sc_cred, p);
560 crfree(vn->sc_cred);
561 vn->sc_vp = (struct vnode *)0;
562 vn->sc_cred = (struct ucred *)0;
563 vn->sc_size = 0;
564 }
565
566 vnsize(dev)
567 dev_t dev;
568 {
569 int unit = vnunit(dev);
570 register struct vn_softc *vn = &vn_softc[unit];
571
572 if (unit >= numvnd || (vn->sc_flags & VNF_INITED) == 0)
573 return(-1);
574 return(vn->sc_size);
575 }
576
577 vndump(dev)
578 {
579 return(ENXIO);
580 }
581 #endif
582