vnd.c revision 1.21 1 /* $NetBSD: vnd.c,v 1.21 1995/10/05 06:20:57 mycroft Exp $ */
2
3 /*
4 * Copyright (c) 1988 University of Utah.
5 * Copyright (c) 1990, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * the Systems Programming Group of the University of Utah Computer
10 * Science Department.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * from: Utah $Hdr: vn.c 1.13 94/04/02$
41 *
42 * @(#)vn.c 8.6 (Berkeley) 4/1/94
43 */
44
45 /*
46 * Vnode disk driver.
47 *
48 * Block/character interface to a vnode. Allows one to treat a file
49 * as a disk (e.g. build a filesystem in it, mount it, etc.).
50 *
51 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode
52 * instead of a simple VOP_RDWR. We do this to avoid distorting the
53 * local buffer cache.
54 *
55 * NOTE 2: There is a security issue involved with this driver.
56 * Once mounted all access to the contents of the "mapped" file via
57 * the special file is controlled by the permissions on the special
58 * file, the protection of the mapped file is ignored (effectively,
59 * by using root credentials in all transactions).
60 *
61 * NOTE 3: Doesn't interact with leases, should it?
62 */
63 #include "vnd.h"
64 #if NVND > 0
65
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/namei.h>
69 #include <sys/proc.h>
70 #include <sys/errno.h>
71 #include <sys/dkstat.h>
72 #include <sys/buf.h>
73 #include <sys/malloc.h>
74 #include <sys/ioctl.h>
75 #include <sys/disklabel.h>
76 #include <sys/mount.h>
77 #include <sys/vnode.h>
78 #include <sys/file.h>
79 #include <sys/uio.h>
80
81 #include <miscfs/specfs/specdev.h>
82
83 #include <dev/vndioctl.h>
84
85 #ifdef DEBUG
86 int dovndcluster = 1;
87 int vnddebug = 0x00;
88 #define VDB_FOLLOW 0x01
89 #define VDB_INIT 0x02
90 #define VDB_IO 0x04
91 #endif
92
93 #define b_cylin b_resid
94
95 #define vndunit(x) DISKUNIT(x)
96
97 struct vndbuf {
98 struct buf vb_buf;
99 struct buf *vb_obp;
100 };
101
102 #define getvndbuf() \
103 ((struct vndbuf *)malloc(sizeof(struct vndbuf), M_DEVBUF, M_WAITOK))
104 #define putvndbuf(vbp) \
105 free((caddr_t)(vbp), M_DEVBUF)
106
107 struct vnd_softc {
108 int sc_flags; /* flags */
109 size_t sc_size; /* size of vnd */
110 struct vnode *sc_vp; /* vnode */
111 struct ucred *sc_cred; /* credentials */
112 int sc_maxactive; /* max # of active requests */
113 struct buf sc_tab; /* transfer queue */
114 };
115
116 /* sc_flags */
117 #define VNF_ALIVE 0x01
118 #define VNF_INITED 0x02
119
120 #if 0 /* if you need static allocation */
121 struct vnd_softc vn_softc[NVND];
122 int numvnd = NVND;
123 #else
124 struct vnd_softc *vnd_softc;
125 int numvnd;
126 #endif
127
128 void vndclear __P((struct vnd_softc *));
129 void vndstart __P((struct vnd_softc *));
130 int vndsetcred __P((struct vnd_softc *, struct ucred *));
131 void vndthrottle __P((struct vnd_softc *, struct vnode *));
132
133 void
134 vndattach(num)
135 int num;
136 {
137 char *mem;
138 register u_long size;
139
140 if (num <= 0)
141 return;
142 size = num * sizeof(struct vnd_softc);
143 mem = malloc(size, M_DEVBUF, M_NOWAIT);
144 if (mem == NULL) {
145 printf("WARNING: no memory for vnode disks\n");
146 return;
147 }
148 bzero(mem, size);
149 vnd_softc = (struct vnd_softc *)mem;
150 numvnd = num;
151 }
152
153 int
154 vndopen(dev, flags, mode, p)
155 dev_t dev;
156 int flags, mode;
157 struct proc *p;
158 {
159 int unit = vndunit(dev);
160
161 #ifdef DEBUG
162 if (vnddebug & VDB_FOLLOW)
163 printf("vndopen(%x, %x, %x, %x)\n", dev, flags, mode, p);
164 #endif
165 if (unit >= numvnd)
166 return(ENXIO);
167 return(0);
168 }
169
170 int
171 vndclose(dev, flags, mode, p)
172 dev_t dev;
173 int flags, mode;
174 struct proc *p;
175 {
176 #ifdef DEBUG
177 if (vnddebug & VDB_FOLLOW)
178 printf("vndclose(%x, %x, %x, %x)\n", dev, flags, mode, p);
179 #endif
180 return 0;
181 }
182
183 /*
184 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY.
185 * Note that this driver can only be used for swapping over NFS on the hp
186 * since nfs_strategy on the vax cannot handle u-areas and page tables.
187 */
188 void
189 vndstrategy(bp)
190 register struct buf *bp;
191 {
192 int unit = vndunit(bp->b_dev);
193 register struct vnd_softc *vnd = &vnd_softc[unit];
194 register struct vndbuf *nbp;
195 register int bn, bsize, resid;
196 register caddr_t addr;
197 int sz, flags, error;
198 extern void vndiodone();
199
200 #ifdef DEBUG
201 if (vnddebug & VDB_FOLLOW)
202 printf("vndstrategy(%x): unit %d\n", bp, unit);
203 #endif
204 if ((vnd->sc_flags & VNF_INITED) == 0) {
205 bp->b_error = ENXIO;
206 bp->b_flags |= B_ERROR;
207 biodone(bp);
208 return;
209 }
210 bn = bp->b_blkno;
211 sz = howmany(bp->b_bcount, DEV_BSIZE);
212 bp->b_resid = bp->b_bcount;
213 if (bn < 0 || bn + sz > vnd->sc_size) {
214 if (bn != vnd->sc_size) {
215 bp->b_error = EINVAL;
216 bp->b_flags |= B_ERROR;
217 }
218 biodone(bp);
219 return;
220 }
221 bn = dbtob(bn);
222 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize;
223 addr = bp->b_data;
224 flags = bp->b_flags | B_CALL;
225 for (resid = bp->b_resid; resid; resid -= sz) {
226 struct vnode *vp;
227 daddr_t nbn;
228 int off, s, nra;
229
230 nra = 0;
231 VOP_LOCK(vnd->sc_vp);
232 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra);
233 VOP_UNLOCK(vnd->sc_vp);
234 if (error == 0 && (long)nbn == -1)
235 error = EIO;
236 #ifdef DEBUG
237 if (!dovndcluster)
238 nra = 0;
239 #endif
240
241 if (off = bn % bsize)
242 sz = bsize - off;
243 else
244 sz = (1 + nra) * bsize;
245 if (resid < sz)
246 sz = resid;
247 #ifdef DEBUG
248 if (vnddebug & VDB_IO)
249 printf("vndstrategy: vp %x/%x bn %x/%x sz %x\n",
250 vnd->sc_vp, vp, bn, nbn, sz);
251 #endif
252
253 nbp = getvndbuf();
254 nbp->vb_buf.b_flags = flags;
255 nbp->vb_buf.b_bcount = sz;
256 nbp->vb_buf.b_bufsize = bp->b_bufsize;
257 nbp->vb_buf.b_error = 0;
258 if (vp->v_type == VBLK || vp->v_type == VCHR)
259 nbp->vb_buf.b_dev = vp->v_rdev;
260 else
261 nbp->vb_buf.b_dev = NODEV;
262 nbp->vb_buf.b_data = addr;
263 nbp->vb_buf.b_blkno = nbn + btodb(off);
264 nbp->vb_buf.b_proc = bp->b_proc;
265 nbp->vb_buf.b_iodone = vndiodone;
266 nbp->vb_buf.b_vp = vp;
267 nbp->vb_buf.b_rcred = vnd->sc_cred; /* XXX crdup? */
268 nbp->vb_buf.b_wcred = vnd->sc_cred; /* XXX crdup? */
269 nbp->vb_buf.b_dirtyoff = bp->b_dirtyoff;
270 nbp->vb_buf.b_dirtyend = bp->b_dirtyend;
271 nbp->vb_buf.b_validoff = bp->b_validoff;
272 nbp->vb_buf.b_validend = bp->b_validend;
273
274 /* save a reference to the old buffer */
275 nbp->vb_obp = bp;
276
277 /*
278 * If there was an error or a hole in the file...punt.
279 * Note that we deal with this after the nbp allocation.
280 * This ensures that we properly clean up any operations
281 * that we have already fired off.
282 *
283 * XXX we could deal with holes here but it would be
284 * a hassle (in the write case).
285 */
286 if (error) {
287 nbp->vb_buf.b_error = error;
288 nbp->vb_buf.b_flags |= B_ERROR;
289 bp->b_resid -= (resid - sz);
290 biodone(&nbp->vb_buf);
291 return;
292 }
293 /*
294 * Just sort by block number
295 */
296 nbp->vb_buf.b_cylin = nbp->vb_buf.b_blkno;
297 s = splbio();
298 disksort(&vnd->sc_tab, &nbp->vb_buf);
299 if (vnd->sc_tab.b_active < vnd->sc_maxactive) {
300 vnd->sc_tab.b_active++;
301 vndstart(vnd);
302 }
303 splx(s);
304 bn += sz;
305 addr += sz;
306 }
307 }
308
309 /*
310 * Feed requests sequentially.
311 * We do it this way to keep from flooding NFS servers if we are connected
312 * to an NFS file. This places the burden on the client rather than the
313 * server.
314 */
315 void
316 vndstart(vnd)
317 register struct vnd_softc *vnd;
318 {
319 register struct buf *bp;
320
321 /*
322 * Dequeue now since lower level strategy routine might
323 * queue using same links
324 */
325 bp = vnd->sc_tab.b_actf;
326 vnd->sc_tab.b_actf = bp->b_actf;
327 #ifdef DEBUG
328 if (vnddebug & VDB_IO)
329 printf("vndstart(%d): bp %x vp %x blkno %x addr %x cnt %x\n",
330 vnd-vnd_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data,
331 bp->b_bcount);
332 #endif
333 if ((bp->b_flags & B_READ) == 0)
334 bp->b_vp->v_numoutput++;
335 VOP_STRATEGY(bp);
336 }
337
338 void
339 vndiodone(vbp)
340 register struct vndbuf *vbp;
341 {
342 register struct buf *pbp = vbp->vb_obp;
343 register struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)];
344 int s;
345
346 s = splbio();
347 #ifdef DEBUG
348 if (vnddebug & VDB_IO)
349 printf("vndiodone(%d): vbp %x vp %x blkno %x addr %x cnt %x\n",
350 vnd-vnd_softc, vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno,
351 vbp->vb_buf.b_data, vbp->vb_buf.b_bcount);
352 #endif
353 if (vbp->vb_buf.b_error) {
354 #ifdef DEBUG
355 if (vnddebug & VDB_IO)
356 printf("vndiodone: vbp %x error %d\n", vbp,
357 vbp->vb_buf.b_error);
358 #endif
359 pbp->b_flags |= B_ERROR;
360 pbp->b_error = biowait(&vbp->vb_buf);
361 }
362 pbp->b_resid -= vbp->vb_buf.b_bcount;
363 putvndbuf(vbp);
364 if (pbp->b_resid == 0) {
365 #ifdef DEBUG
366 if (vnddebug & VDB_IO)
367 printf("vndiodone: pbp %x iodone\n", pbp);
368 #endif
369 biodone(pbp);
370 }
371 if (vnd->sc_tab.b_actf)
372 vndstart(vnd);
373 else
374 vnd->sc_tab.b_active--;
375 splx(s);
376 }
377
378 int
379 vndread(dev, uio)
380 dev_t dev;
381 struct uio *uio;
382 {
383
384 #ifdef DEBUG
385 if (vnddebug & VDB_FOLLOW)
386 printf("vndread(%x, %x)\n", dev, uio);
387 #endif
388 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio));
389 }
390
391 int
392 vndwrite(dev, uio)
393 dev_t dev;
394 struct uio *uio;
395 {
396
397 #ifdef DEBUG
398 if (vnddebug & VDB_FOLLOW)
399 printf("vndwrite(%x, %x)\n", dev, uio);
400 #endif
401 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio));
402 }
403
404 /* ARGSUSED */
405 int
406 vndioctl(dev, cmd, data, flag, p)
407 dev_t dev;
408 u_long cmd;
409 caddr_t data;
410 int flag;
411 struct proc *p;
412 {
413 int unit = vndunit(dev);
414 register struct vnd_softc *vnd;
415 struct vnd_ioctl *vio;
416 struct vattr vattr;
417 struct nameidata nd;
418 int error;
419
420 #ifdef DEBUG
421 if (vnddebug & VDB_FOLLOW)
422 printf("vndioctl(%x, %lx, %x, %x, %x): unit %d\n",
423 dev, cmd, data, flag, p, unit);
424 #endif
425 error = suser(p->p_ucred, &p->p_acflag);
426 if (error)
427 return (error);
428 if (unit >= numvnd)
429 return (ENXIO);
430
431 vnd = &vnd_softc[unit];
432 vio = (struct vnd_ioctl *)data;
433 switch (cmd) {
434
435 case VNDIOCSET:
436 if (vnd->sc_flags & VNF_INITED)
437 return(EBUSY);
438 /*
439 * Always open for read and write.
440 * This is probably bogus, but it lets vn_open()
441 * weed out directories, sockets, etc. so we don't
442 * have to worry about them.
443 */
444 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p);
445 if (error = vn_open(&nd, FREAD|FWRITE, 0))
446 return(error);
447 if (error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p)) {
448 VOP_UNLOCK(nd.ni_vp);
449 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
450 return(error);
451 }
452 VOP_UNLOCK(nd.ni_vp);
453 vnd->sc_vp = nd.ni_vp;
454 vnd->sc_size = btodb(vattr.va_size); /* note truncation */
455 if (error = vndsetcred(vnd, p->p_ucred)) {
456 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
457 return(error);
458 }
459 vndthrottle(vnd, vnd->sc_vp);
460 vio->vnd_size = dbtob(vnd->sc_size);
461 vnd->sc_flags |= VNF_INITED;
462 #ifdef DEBUG
463 if (vnddebug & VDB_INIT)
464 printf("vndioctl: SET vp %x size %x\n",
465 vnd->sc_vp, vnd->sc_size);
466 #endif
467 break;
468
469 case VNDIOCCLR:
470 if ((vnd->sc_flags & VNF_INITED) == 0)
471 return(ENXIO);
472 vndclear(vnd);
473 #ifdef DEBUG
474 if (vnddebug & VDB_INIT)
475 printf("vndioctl: CLRed\n");
476 #endif
477 break;
478
479 default:
480 return(ENOTTY);
481 }
482 return(0);
483 }
484
485 /*
486 * Duplicate the current processes' credentials. Since we are called only
487 * as the result of a SET ioctl and only root can do that, any future access
488 * to this "disk" is essentially as root. Note that credentials may change
489 * if some other uid can write directly to the mapped file (NFS).
490 */
491 int
492 vndsetcred(vnd, cred)
493 register struct vnd_softc *vnd;
494 struct ucred *cred;
495 {
496 struct uio auio;
497 struct iovec aiov;
498 char *tmpbuf;
499 int error;
500
501 vnd->sc_cred = crdup(cred);
502 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
503
504 /* XXX: Horrible kludge to establish credentials for NFS */
505 aiov.iov_base = tmpbuf;
506 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size));
507 auio.uio_iov = &aiov;
508 auio.uio_iovcnt = 1;
509 auio.uio_offset = 0;
510 auio.uio_rw = UIO_READ;
511 auio.uio_segflg = UIO_SYSSPACE;
512 auio.uio_resid = aiov.iov_len;
513 VOP_LOCK(vnd->sc_vp);
514 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred);
515 VOP_UNLOCK(vnd->sc_vp);
516
517 free(tmpbuf, M_TEMP);
518 return (error);
519 }
520
521 /*
522 * Set maxactive based on FS type
523 */
524 void
525 vndthrottle(vnd, vp)
526 register struct vnd_softc *vnd;
527 struct vnode *vp;
528 {
529 #ifdef NFSCLIENT
530 extern int (**nfsv2_vnodeop_p)();
531
532 if (vp->v_op == nfsv2_vnodeop_p)
533 vnd->sc_maxactive = 2;
534 else
535 #endif
536 vnd->sc_maxactive = 8;
537
538 if (vnd->sc_maxactive < 1)
539 vnd->sc_maxactive = 1;
540 }
541
542 void
543 vndshutdown()
544 {
545 register struct vnd_softc *vnd;
546
547 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++)
548 if (vnd->sc_flags & VNF_INITED)
549 vndclear(vnd);
550 }
551
552 void
553 vndclear(vnd)
554 register struct vnd_softc *vnd;
555 {
556 register struct vnode *vp = vnd->sc_vp;
557 struct proc *p = curproc; /* XXX */
558
559 #ifdef DEBUG
560 if (vnddebug & VDB_FOLLOW)
561 printf("vndclear(%x): vp %x\n", vp);
562 #endif
563 vnd->sc_flags &= ~VNF_INITED;
564 if (vp == (struct vnode *)0)
565 panic("vndioctl: null vp");
566 (void) vn_close(vp, FREAD|FWRITE, vnd->sc_cred, p);
567 crfree(vnd->sc_cred);
568 vnd->sc_vp = (struct vnode *)0;
569 vnd->sc_cred = (struct ucred *)0;
570 vnd->sc_size = 0;
571 }
572
573 int
574 vndsize(dev)
575 dev_t dev;
576 {
577 int unit = vndunit(dev);
578 register struct vnd_softc *vnd = &vnd_softc[unit];
579
580 if (unit >= numvnd || (vnd->sc_flags & VNF_INITED) == 0)
581 return(-1);
582 return(vnd->sc_size);
583 }
584
585 int
586 vnddump(dev, blkno, va, size)
587 dev_t dev;
588 daddr_t blkno;
589 caddr_t va;
590 size_t size;
591 {
592
593 /* Not implemented. */
594 return ENXIO;
595 }
596 #endif
597