vnd.c revision 1.3 1 /*
2 * Copyright (c) 1988 University of Utah.
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the Systems Programming Group of the University of Utah Computer
8 * Science Department.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * from: Utah $Hdr: vn.c 1.8 92/12/20$
39 *
40 * @(#)vn.c 8.1 (Berkeley) 6/10/93
41 */
42
43 /*
44 * Vnode disk driver.
45 *
46 * Block/character interface to a vnode. Allows one to treat a file
47 * as a disk (e.g. build a filesystem in it, mount it, etc.).
48 *
49 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode
50 * instead of a simple VOP_RDWR. We do this to avoid distorting the
51 * local buffer cache.
52 *
53 * NOTE 2: There is a security issue involved with this driver.
54 * Once mounted all access to the contents of the "mapped" file via
55 * the special file is controlled by the permissions on the special
56 * file, the protection of the mapped file is ignored (effectively,
57 * by using root credentials in all transactions).
58 *
59 * NOTE 3: Doesn't interact with leases, should it?
60 */
61 #include "vn.h"
62 #if NVN > 0
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/namei.h>
67 #include <sys/proc.h>
68 #include <sys/errno.h>
69 #include <sys/dkstat.h>
70 #include <sys/buf.h>
71 #include <sys/malloc.h>
72 #include <sys/ioctl.h>
73 #include <sys/mount.h>
74 #include <sys/vnode.h>
75 #include <sys/file.h>
76 #include <sys/uio.h>
77
78 #include <miscfs/specfs/specdev.h>
79
80 #include <dev/vnioctl.h>
81
82 #ifdef DEBUG
83 int vndebug = 0x00;
84 #define VDB_FOLLOW 0x01
85 #define VDB_INIT 0x02
86 #define VDB_IO 0x04
87 #endif
88
89 #define b_cylin b_resid
90
91 #define vnunit(x) ((minor(x) >> 3) & 0x7) /* for consistency */
92
93 #define getvnbuf() \
94 ((struct buf *)malloc(sizeof(struct buf), M_DEVBUF, M_WAITOK))
95 #define putvnbuf(bp) \
96 free((caddr_t)(bp), M_DEVBUF)
97
98 struct vn_softc {
99 int sc_flags; /* flags */
100 size_t sc_size; /* size of vn */
101 struct vnode *sc_vp; /* vnode */
102 struct ucred *sc_cred; /* credentials */
103 int sc_maxactive; /* max # of active requests */
104 struct buf sc_tab; /* transfer queue */
105 };
106
107 /* sc_flags */
108 #define VNF_ALIVE 0x01
109 #define VNF_INITED 0x02
110
111 #if 0 /* if you need static allocation */
112 struct vn_softc vn_softc[NVN];
113 int numvnd = NVN;
114 #else
115 struct vn_softc *vn_softc;
116 int numvnd;
117 #endif
118
119 void
120 vnattach(num)
121 int num;
122 {
123 char *mem;
124 register u_long size;
125
126 if (num <= 0)
127 return;
128 size = num * sizeof(struct vn_softc);
129 mem = malloc(size, M_DEVBUF, M_NOWAIT);
130 if (mem == NULL) {
131 printf("WARNING: no memory for vnode disks\n");
132 return;
133 }
134 bzero(mem, size);
135 vn_softc = (struct vn_softc *)mem;
136 numvnd = num;
137 }
138
139 int
140 vnopen(dev, flags, mode, p)
141 dev_t dev;
142 int flags, mode;
143 struct proc *p;
144 {
145 int unit = vnunit(dev);
146
147 #ifdef DEBUG
148 if (vndebug & VDB_FOLLOW)
149 printf("vnopen(%x, %x, %x, %x)\n", dev, flags, mode, p);
150 #endif
151 if (unit >= numvnd)
152 return(ENXIO);
153 return(0);
154 }
155
156 /*
157 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY.
158 * Note that this driver can only be used for swapping over NFS on the hp
159 * since nfs_strategy on the vax cannot handle u-areas and page tables.
160 */
161 vnstrategy(bp)
162 register struct buf *bp;
163 {
164 int unit = vnunit(bp->b_dev);
165 register struct vn_softc *vn = &vn_softc[unit];
166 register struct buf *nbp;
167 register int bn, bsize, resid;
168 register caddr_t addr;
169 int sz, flags;
170 extern void vniodone();
171
172 #ifdef DEBUG
173 if (vndebug & VDB_FOLLOW)
174 printf("vnstrategy(%x): unit %d\n", bp, unit);
175 #endif
176 if ((vn->sc_flags & VNF_INITED) == 0) {
177 bp->b_error = ENXIO;
178 bp->b_flags |= B_ERROR;
179 biodone(bp);
180 return;
181 }
182 bn = bp->b_blkno;
183 sz = howmany(bp->b_bcount, DEV_BSIZE);
184 bp->b_resid = bp->b_bcount;
185 if (bn < 0 || bn + sz > vn->sc_size) {
186 if (bn != vn->sc_size) {
187 bp->b_error = EINVAL;
188 bp->b_flags |= B_ERROR;
189 }
190 biodone(bp);
191 return;
192 }
193 bn = dbtob(bn);
194 #if (BSD > 199103)
195 bsize = vn->sc_vp->v_mount->mnt_stat.f_iosize;
196 #else
197 bsize = vn->sc_vp->v_mount->mnt_stat.f_bsize;
198 #endif
199 addr = bp->b_un.b_addr;
200 flags = bp->b_flags | B_CALL;
201 for (resid = bp->b_resid; resid; resid -= sz) {
202 struct vnode *vp;
203 daddr_t nbn;
204 int off, s;
205
206 nbp = getvnbuf();
207 off = bn % bsize;
208 sz = min(bsize - off, resid);
209 #if (BSD > 199103)
210 (void) VOP_BMAP(vn->sc_vp, bn / bsize, &vp, &nbn, NULL);
211 #else
212 (void) VOP_BMAP(vn->sc_vp, bn / bsize, &vp, &nbn);
213 #endif
214 #ifdef DEBUG
215 if (vndebug & VDB_IO)
216 printf("vnstrategy: vp %x/%x bn %x/%x\n",
217 vn->sc_vp, vp, bn, nbn);
218 #endif
219 nbp->b_flags = flags;
220 nbp->b_bcount = sz;
221 nbp->b_bufsize = bp->b_bufsize;
222 nbp->b_error = 0;
223 if (vp->v_type == VBLK || vp->v_type == VCHR)
224 nbp->b_dev = vp->v_rdev;
225 else
226 nbp->b_dev = NODEV;
227 nbp->b_un.b_addr = addr;
228 nbp->b_blkno = nbn + btodb(off);
229 nbp->b_proc = bp->b_proc;
230 nbp->b_iodone = vniodone;
231 nbp->b_vp = vp;
232 nbp->b_pfcent = (int) bp; /* XXX */
233 nbp->b_rcred = vn->sc_cred; /* XXX crdup? */
234 nbp->b_wcred = vn->sc_cred; /* XXX crdup? */
235 nbp->b_dirtyoff = bp->b_dirtyoff;
236 nbp->b_dirtyend = bp->b_dirtyend;
237 #if (BSD > 199103)
238 nbp->b_validoff = bp->b_validoff;
239 nbp->b_validend = bp->b_validend;
240 #endif
241 /*
242 * There is a hole in the file...punt.
243 * Note that we deal with this after the nbp allocation.
244 * This ensures that we properly clean up any operations
245 * that we have already fired off.
246 *
247 * XXX we could deal with this but it would be
248 * a hassle (in the write case).
249 */
250 if ((long)nbn == -1) {
251 nbp->b_error = EIO;
252 nbp->b_flags |= B_ERROR;
253 bp->b_resid -= (resid - sz);
254 biodone(nbp);
255 return;
256 }
257 /*
258 * Just sort by block number
259 */
260 nbp->b_cylin = nbp->b_blkno;
261 s = splbio();
262 disksort(&vn->sc_tab, nbp);
263 if (vn->sc_tab.b_active < vn->sc_maxactive) {
264 vn->sc_tab.b_active++;
265 vnstart(vn);
266 }
267 splx(s);
268 bn += sz;
269 addr += sz;
270 }
271 }
272
273 /*
274 * Feed requests sequentially.
275 * We do it this way to keep from flooding NFS servers if we are connected
276 * to an NFS file. This places the burden on the client rather than the
277 * server.
278 */
279 vnstart(vn)
280 register struct vn_softc *vn;
281 {
282 register struct buf *bp;
283
284 /*
285 * Dequeue now since lower level strategy routine might
286 * queue using same links
287 */
288 bp = vn->sc_tab.b_actf;
289 vn->sc_tab.b_actf = bp->b_actf;
290 #ifdef DEBUG
291 if (vndebug & VDB_IO)
292 printf("vnstart(%d): bp %x vp %x blkno %x addr %x cnt %x\n",
293 vn-vn_softc, bp, bp->b_vp, bp->b_blkno, bp->b_un.b_addr,
294 bp->b_bcount);
295 #endif
296 if ((bp->b_flags & B_READ) == 0)
297 bp->b_vp->v_numoutput++;
298 VOP_STRATEGY(bp);
299 }
300
301 void
302 vniodone(bp)
303 register struct buf *bp;
304 {
305 register struct buf *pbp = (struct buf *)bp->b_pfcent; /* XXX */
306 register struct vn_softc *vn = &vn_softc[vnunit(pbp->b_dev)];
307 int s;
308
309 s = splbio();
310 #ifdef DEBUG
311 if (vndebug & VDB_IO)
312 printf("vniodone(%d): bp %x vp %x blkno %x addr %x cnt %x\n",
313 vn-vn_softc, bp, bp->b_vp, bp->b_blkno, bp->b_un.b_addr,
314 bp->b_bcount);
315 #endif
316 if (bp->b_error) {
317 #ifdef DEBUG
318 if (vndebug & VDB_IO)
319 printf("vniodone: bp %x error %d\n", bp, bp->b_error);
320 #endif
321 pbp->b_flags |= B_ERROR;
322 pbp->b_error = biowait(bp);
323 }
324 pbp->b_resid -= bp->b_bcount;
325 putvnbuf(bp);
326 if (pbp->b_resid == 0) {
327 #ifdef DEBUG
328 if (vndebug & VDB_IO)
329 printf("vniodone: pbp %x iodone\n", pbp);
330 #endif
331 biodone(pbp);
332 }
333 if (vn->sc_tab.b_actf)
334 vnstart(vn);
335 else
336 vn->sc_tab.b_active--;
337 splx(s);
338 }
339
340 vnread(dev, uio, flags, p)
341 dev_t dev;
342 struct uio *uio;
343 int flags;
344 struct proc *p;
345 {
346
347 #ifdef DEBUG
348 if (vndebug & VDB_FOLLOW)
349 printf("vnread(%x, %x, %x, %x)\n", dev, uio, flags, p);
350 #endif
351 return(physio(vnstrategy, NULL, dev, B_READ, minphys, uio));
352 }
353
354 vnwrite(dev, uio, flags, p)
355 dev_t dev;
356 struct uio *uio;
357 int flags;
358 struct proc *p;
359 {
360
361 #ifdef DEBUG
362 if (vndebug & VDB_FOLLOW)
363 printf("vnwrite(%x, %x, %x, %x)\n", dev, uio, flags, p);
364 #endif
365 return(physio(vnstrategy, NULL, dev, B_WRITE, minphys, uio));
366 }
367
368 /* ARGSUSED */
369 vnioctl(dev, cmd, data, flag, p)
370 dev_t dev;
371 u_long cmd;
372 caddr_t data;
373 int flag;
374 struct proc *p;
375 {
376 int unit = vnunit(dev);
377 register struct vn_softc *vn;
378 struct vn_ioctl *vio;
379 struct vattr vattr;
380 struct nameidata nd;
381 int error;
382
383 #ifdef DEBUG
384 if (vndebug & VDB_FOLLOW)
385 printf("vnioctl(%x, %x, %x, %x, %x): unit %d\n",
386 dev, cmd, data, flag, p, unit);
387 #endif
388 error = suser(p->p_ucred, &p->p_acflag);
389 if (error)
390 return (error);
391 if (unit >= numvnd)
392 return (ENXIO);
393
394 vn = &vn_softc[unit];
395 vio = (struct vn_ioctl *)data;
396 switch (cmd) {
397
398 case VNIOCSET:
399 if (vn->sc_flags & VNF_INITED)
400 return(EBUSY);
401 /*
402 * Always open for read and write.
403 * This is probably bogus, but it lets vn_open()
404 * weed out directories, sockets, etc. so we don't
405 * have to worry about them.
406 */
407 #if (BSD > 199103)
408 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vn_file, p);
409 if (error = vn_open(&nd, FREAD|FWRITE, 0))
410 return(error);
411 #else
412 nd.ni_nameiop = LOOKUP | FOLLOW;
413 nd.ni_segflg = UIO_USERSPACE;
414 nd.ni_dirp = vio->vn_file;
415 if (error = vn_open(&nd, p, FREAD|FWRITE, 0))
416 return(error);
417 #endif
418 if (error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p)) {
419 VOP_UNLOCK(nd.ni_vp);
420 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
421 return(error);
422 }
423 VOP_UNLOCK(nd.ni_vp);
424 vn->sc_vp = nd.ni_vp;
425 vn->sc_size = btodb(vattr.va_size); /* note truncation */
426 if (error = vnsetcred(vn, p->p_ucred)) {
427 (void) vn_close(vn->sc_vp, FREAD|FWRITE, p->p_ucred, p);
428 return(error);
429 }
430 vnthrottle(vn, vn->sc_vp);
431 vio->vn_size = dbtob(vn->sc_size);
432 vn->sc_flags |= VNF_INITED;
433 #ifdef DEBUG
434 if (vndebug & VDB_INIT)
435 printf("vnioctl: SET vp %x size %x\n",
436 vn->sc_vp, vn->sc_size);
437 #endif
438 break;
439
440 case VNIOCCLR:
441 if ((vn->sc_flags & VNF_INITED) == 0)
442 return(ENXIO);
443 vnclear(vn);
444 #ifdef DEBUG
445 if (vndebug & VDB_INIT)
446 printf("vnioctl: CLRed\n");
447 #endif
448 break;
449
450 default:
451 return(ENXIO);
452 }
453 return(0);
454 }
455
456 /*
457 * Duplicate the current processes' credentials. Since we are called only
458 * as the result of a SET ioctl and only root can do that, any future access
459 * to this "disk" is essentially as root. Note that credentials may change
460 * if some other uid can write directly to the mapped file (NFS).
461 */
462 vnsetcred(vn, cred)
463 register struct vn_softc *vn;
464 struct ucred *cred;
465 {
466 struct uio auio;
467 struct iovec aiov;
468 char tmpbuf[DEV_BSIZE];
469
470 vn->sc_cred = crdup(cred);
471 /* XXX: Horrible kludge to establish credentials for NFS */
472 aiov.iov_base = tmpbuf;
473 aiov.iov_len = min(DEV_BSIZE, dbtob(vn->sc_size));
474 auio.uio_iov = &aiov;
475 auio.uio_iovcnt = 1;
476 auio.uio_offset = 0;
477 auio.uio_rw = UIO_READ;
478 auio.uio_segflg = UIO_SYSSPACE;
479 auio.uio_resid = aiov.iov_len;
480 return(VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred));
481 }
482
483 /*
484 * Set maxactive based on FS type
485 */
486 vnthrottle(vn, vp)
487 register struct vn_softc *vn;
488 struct vnode *vp;
489 {
490 #ifdef NFSCLIENT
491 #if (BSD > 199103)
492 extern int (**nfsv2_vnodeop_p)();
493 if (vp->v_op == nfsv2_vnodeop_p)
494 #else
495 extern struct vnodeops nfsv2_vnodeops;
496 if (vp->v_op == &nfsv2_vnodeops)
497 #endif
498 vn->sc_maxactive = 2;
499 else
500 #endif
501 vn->sc_maxactive = 8;
502
503 if (vn->sc_maxactive < 1)
504 vn->sc_maxactive = 1;
505 }
506
507 vnshutdown()
508 {
509 register struct vn_softc *vn;
510
511 for (vn = &vn_softc[0]; vn < &vn_softc[numvnd]; vn++)
512 if (vn->sc_flags & VNF_INITED)
513 vnclear(vn);
514 }
515
516 vnclear(vn)
517 register struct vn_softc *vn;
518 {
519 register struct vnode *vp = vn->sc_vp;
520 struct proc *p = curproc; /* XXX */
521
522 #ifdef DEBUG
523 if (vndebug & VDB_FOLLOW)
524 printf("vnclear(%x): vp %x\n", vp);
525 #endif
526 vn->sc_flags &= ~VNF_INITED;
527 if (vp == (struct vnode *)0)
528 panic("vnioctl: null vp");
529 #if 0
530 /* XXX - this doesn't work right now */
531 (void) VOP_FSYNC(vp, 0, vn->sc_cred, MNT_WAIT, p);
532 #endif
533 (void) vn_close(vp, FREAD|FWRITE, vn->sc_cred, p);
534 crfree(vn->sc_cred);
535 vn->sc_vp = (struct vnode *)0;
536 vn->sc_cred = (struct ucred *)0;
537 vn->sc_size = 0;
538 }
539
540 vnsize(dev)
541 dev_t dev;
542 {
543 int unit = vnunit(dev);
544 register struct vn_softc *vn = &vn_softc[unit];
545
546 if (unit >= numvnd || (vn->sc_flags & VNF_INITED) == 0)
547 return(-1);
548 return(vn->sc_size);
549 }
550
551 vndump(dev)
552 {
553 return(ENXIO);
554 }
555 #endif
556