vnd.c revision 1.74 1 /* $NetBSD: vnd.c,v 1.74 2001/10/22 16:43:55 mrg Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Copyright (c) 1988 University of Utah.
41 * Copyright (c) 1990, 1993
42 * The Regents of the University of California. All rights reserved.
43 *
44 * This code is derived from software contributed to Berkeley by
45 * the Systems Programming Group of the University of Utah Computer
46 * Science Department.
47 *
48 * Redistribution and use in source and binary forms, with or without
49 * modification, are permitted provided that the following conditions
50 * are met:
51 * 1. Redistributions of source code must retain the above copyright
52 * notice, this list of conditions and the following disclaimer.
53 * 2. Redistributions in binary form must reproduce the above copyright
54 * notice, this list of conditions and the following disclaimer in the
55 * documentation and/or other materials provided with the distribution.
56 * 3. All advertising materials mentioning features or use of this software
57 * must display the following acknowledgement:
58 * This product includes software developed by the University of
59 * California, Berkeley and its contributors.
60 * 4. Neither the name of the University nor the names of its contributors
61 * may be used to endorse or promote products derived from this software
62 * without specific prior written permission.
63 *
64 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
65 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
67 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
68 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
69 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
70 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
71 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
72 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
73 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
74 * SUCH DAMAGE.
75 *
76 * from: Utah $Hdr: vn.c 1.13 94/04/02$
77 *
78 * @(#)vn.c 8.9 (Berkeley) 5/14/95
79 */
80
81 /*
82 * Vnode disk driver.
83 *
84 * Block/character interface to a vnode. Allows one to treat a file
85 * as a disk (e.g. build a filesystem in it, mount it, etc.).
86 *
87 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode
88 * instead of a simple VOP_RDWR. We do this to avoid distorting the
89 * local buffer cache.
90 *
91 * NOTE 2: There is a security issue involved with this driver.
92 * Once mounted all access to the contents of the "mapped" file via
93 * the special file is controlled by the permissions on the special
94 * file, the protection of the mapped file is ignored (effectively,
95 * by using root credentials in all transactions).
96 *
97 * NOTE 3: Doesn't interact with leases, should it?
98 */
99
100 #if defined(_KERNEL_OPT)
101 #include "fs_nfs.h"
102 #endif
103
104 #include <sys/param.h>
105 #include <sys/systm.h>
106 #include <sys/namei.h>
107 #include <sys/proc.h>
108 #include <sys/errno.h>
109 #include <sys/buf.h>
110 #include <sys/malloc.h>
111 #include <sys/ioctl.h>
112 #include <sys/disklabel.h>
113 #include <sys/device.h>
114 #include <sys/disk.h>
115 #include <sys/stat.h>
116 #include <sys/mount.h>
117 #include <sys/vnode.h>
118 #include <sys/file.h>
119 #include <sys/uio.h>
120 #include <sys/conf.h>
121
122 #include <miscfs/specfs/specdev.h>
123
124 #include <dev/vndvar.h>
125
126 #if defined(VNDDEBUG) && !defined(DEBUG)
127 #define DEBUG
128 #endif
129
130 #ifdef DEBUG
131 int dovndcluster = 1;
132 #define VDB_FOLLOW 0x01
133 #define VDB_INIT 0x02
134 #define VDB_IO 0x04
135 #define VDB_LABEL 0x08
136 int vnddebug = 0x00;
137 #endif
138
139 #define vndunit(x) DISKUNIT(x)
140
141 struct vndxfer {
142 struct buf *vx_bp; /* Pointer to parent buffer */
143 int vx_error;
144 int vx_pending; /* # of pending aux buffers */
145 int vx_flags;
146 #define VX_BUSY 1
147 };
148
149 struct vndbuf {
150 struct buf vb_buf;
151 struct vndxfer *vb_xfer;
152 };
153
154 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_NOWAIT)
155 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx))
156
157 #define VND_GETBUF(vnd) pool_get(&(vnd)->sc_vbpool, PR_NOWAIT)
158 #define VND_PUTBUF(vnd, vb) pool_put(&(vnd)->sc_vbpool, (vb))
159
160 struct vnd_softc *vnd_softc;
161 int numvnd = 0;
162
163 #define VNDLABELDEV(dev) \
164 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART))
165
166 /* called by main() at boot time */
167 void vndattach __P((int));
168
169 void vndclear __P((struct vnd_softc *));
170 void vndstart __P((struct vnd_softc *));
171 int vndsetcred __P((struct vnd_softc *, struct ucred *));
172 void vndthrottle __P((struct vnd_softc *, struct vnode *));
173 void vndiodone __P((struct buf *));
174 void vndshutdown __P((void));
175
176 void vndgetdefaultlabel __P((struct vnd_softc *, struct disklabel *));
177 void vndgetdisklabel __P((dev_t));
178
179 static int vndlock __P((struct vnd_softc *));
180 static void vndunlock __P((struct vnd_softc *));
181
182 void
183 vndattach(num)
184 int num;
185 {
186 int i;
187 char *mem;
188
189 if (num <= 0)
190 return;
191 i = num * sizeof(struct vnd_softc);
192 mem = malloc(i, M_DEVBUF, M_NOWAIT);
193 if (mem == NULL) {
194 printf("WARNING: no memory for vnode disks\n");
195 return;
196 }
197 memset(mem, 0, i);
198 vnd_softc = (struct vnd_softc *)mem;
199 numvnd = num;
200
201 for (i = 0; i < numvnd; i++)
202 BUFQ_INIT(&vnd_softc[i].sc_tab);
203 }
204
205 int
206 vndopen(dev, flags, mode, p)
207 dev_t dev;
208 int flags, mode;
209 struct proc *p;
210 {
211 int unit = vndunit(dev);
212 struct vnd_softc *sc;
213 int error = 0, part, pmask;
214 struct disklabel *lp;
215
216 #ifdef DEBUG
217 if (vnddebug & VDB_FOLLOW)
218 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p);
219 #endif
220 if (unit >= numvnd)
221 return (ENXIO);
222 sc = &vnd_softc[unit];
223
224 if ((error = vndlock(sc)) != 0)
225 return (error);
226
227 lp = sc->sc_dkdev.dk_label;
228
229 part = DISKPART(dev);
230 pmask = (1 << part);
231
232 /*
233 * If we're initialized, check to see if there are any other
234 * open partitions. If not, then it's safe to update the
235 * in-core disklabel.
236 */
237 if ((sc->sc_flags & VNF_INITED) && (sc->sc_dkdev.dk_openmask == 0))
238 vndgetdisklabel(dev);
239
240 /* Check that the partitions exists. */
241 if (part != RAW_PART) {
242 if (((sc->sc_flags & VNF_INITED) == 0) ||
243 ((part >= lp->d_npartitions) ||
244 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
245 error = ENXIO;
246 goto done;
247 }
248 }
249
250 /* Prevent our unit from being unconfigured while open. */
251 switch (mode) {
252 case S_IFCHR:
253 sc->sc_dkdev.dk_copenmask |= pmask;
254 break;
255
256 case S_IFBLK:
257 sc->sc_dkdev.dk_bopenmask |= pmask;
258 break;
259 }
260 sc->sc_dkdev.dk_openmask =
261 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
262
263 done:
264 vndunlock(sc);
265 return (error);
266 }
267
268 int
269 vndclose(dev, flags, mode, p)
270 dev_t dev;
271 int flags, mode;
272 struct proc *p;
273 {
274 int unit = vndunit(dev);
275 struct vnd_softc *sc;
276 int error = 0, part;
277
278 #ifdef DEBUG
279 if (vnddebug & VDB_FOLLOW)
280 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p);
281 #endif
282
283 if (unit >= numvnd)
284 return (ENXIO);
285 sc = &vnd_softc[unit];
286
287 if ((error = vndlock(sc)) != 0)
288 return (error);
289
290 part = DISKPART(dev);
291
292 /* ...that much closer to allowing unconfiguration... */
293 switch (mode) {
294 case S_IFCHR:
295 sc->sc_dkdev.dk_copenmask &= ~(1 << part);
296 break;
297
298 case S_IFBLK:
299 sc->sc_dkdev.dk_bopenmask &= ~(1 << part);
300 break;
301 }
302 sc->sc_dkdev.dk_openmask =
303 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
304
305 vndunlock(sc);
306 return (0);
307 }
308
309 /*
310 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY.
311 */
312 void
313 vndstrategy(bp)
314 struct buf *bp;
315 {
316 int unit = vndunit(bp->b_dev);
317 struct vnd_softc *vnd = &vnd_softc[unit];
318 struct vndxfer *vnx;
319 int s, bsize, resid;
320 off_t bn;
321 caddr_t addr;
322 int sz, flags, error, wlabel;
323 struct disklabel *lp;
324 struct partition *pp;
325
326 #ifdef DEBUG
327 if (vnddebug & VDB_FOLLOW)
328 printf("vndstrategy(%p): unit %d\n", bp, unit);
329 #endif
330 if ((vnd->sc_flags & VNF_INITED) == 0) {
331 bp->b_error = ENXIO;
332 bp->b_flags |= B_ERROR;
333 goto done;
334 }
335
336 /* If it's a nil transfer, wake up the top half now. */
337 if (bp->b_bcount == 0)
338 goto done;
339
340 lp = vnd->sc_dkdev.dk_label;
341
342 /*
343 * The transfer must be a whole number of blocks.
344 */
345 if ((bp->b_bcount % lp->d_secsize) != 0) {
346 bp->b_error = EINVAL;
347 bp->b_flags |= B_ERROR;
348 goto done;
349 }
350
351 /*
352 * Do bounds checking and adjust transfer. If there's an error,
353 * the bounds check will flag that for us.
354 */
355 wlabel = vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING);
356 if (DISKPART(bp->b_dev) != RAW_PART)
357 if (bounds_check_with_label(bp, lp, wlabel) <= 0)
358 goto done;
359
360 bp->b_resid = bp->b_bcount;
361
362 /*
363 * Put the block number in terms of the logical blocksize
364 * of the "device".
365 */
366 bn = bp->b_blkno / (lp->d_secsize / DEV_BSIZE);
367
368 /*
369 * Translate the partition-relative block number to an absolute.
370 */
371 if (DISKPART(bp->b_dev) != RAW_PART) {
372 pp = &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
373 bn += pp->p_offset;
374 }
375
376 /* ...and convert to a byte offset within the file. */
377 bn *= lp->d_secsize;
378
379 if (vnd->sc_vp->v_mount == NULL) {
380 bp->b_error = ENXIO;
381 bp->b_flags |= B_ERROR;
382 goto done;
383 }
384 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize;
385 addr = bp->b_data;
386 flags = (bp->b_flags & (B_READ|B_ASYNC)) | B_CALL;
387
388 /* Allocate a header for this transfer and link it to the buffer */
389 s = splbio();
390 vnx = VND_GETXFER(vnd);
391 splx(s);
392 vnx->vx_flags = VX_BUSY;
393 vnx->vx_error = 0;
394 vnx->vx_pending = 0;
395 vnx->vx_bp = bp;
396
397 for (resid = bp->b_resid; resid; resid -= sz) {
398 struct vndbuf *nbp;
399 struct vnode *vp;
400 daddr_t nbn;
401 int off, nra;
402
403 nra = 0;
404 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
405 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra);
406 VOP_UNLOCK(vnd->sc_vp, 0);
407
408 if (error == 0 && (long)nbn == -1)
409 error = EIO;
410
411 /*
412 * If there was an error or a hole in the file...punt.
413 * Note that we may have to wait for any operations
414 * that we have already fired off before releasing
415 * the buffer.
416 *
417 * XXX we could deal with holes here but it would be
418 * a hassle (in the write case).
419 */
420 if (error) {
421 s = splbio();
422 vnx->vx_error = error;
423 goto out;
424 }
425
426 #ifdef DEBUG
427 if (!dovndcluster)
428 nra = 0;
429 #endif
430
431 if ((off = bn % bsize) != 0)
432 sz = bsize - off;
433 else
434 sz = (1 + nra) * bsize;
435 if (resid < sz)
436 sz = resid;
437 #ifdef DEBUG
438 if (vnddebug & VDB_IO)
439 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%x sz 0x%x\n",
440 vnd->sc_vp, vp, (long long)bn, nbn, sz);
441 #endif
442
443 s = splbio();
444 nbp = VND_GETBUF(vnd);
445 splx(s);
446 nbp->vb_buf.b_flags = flags;
447 nbp->vb_buf.b_bcount = sz;
448 nbp->vb_buf.b_bufsize = bp->b_bufsize;
449 nbp->vb_buf.b_error = 0;
450 nbp->vb_buf.b_data = addr;
451 nbp->vb_buf.b_blkno = nbp->vb_buf.b_rawblkno = nbn + btodb(off);
452 nbp->vb_buf.b_proc = bp->b_proc;
453 nbp->vb_buf.b_iodone = vndiodone;
454 nbp->vb_buf.b_vp = NULLVP;
455 LIST_INIT(&nbp->vb_buf.b_dep);
456
457 nbp->vb_xfer = vnx;
458
459 /*
460 * Just sort by block number
461 */
462 s = splbio();
463 if (vnx->vx_error != 0) {
464 VND_PUTBUF(vnd, nbp);
465 goto out;
466 }
467 vnx->vx_pending++;
468 bgetvp(vp, &nbp->vb_buf);
469 disksort_blkno(&vnd->sc_tab, &nbp->vb_buf);
470 vndstart(vnd);
471 splx(s);
472 bn += sz;
473 addr += sz;
474 }
475
476 s = splbio();
477
478 out: /* Arrive here at splbio */
479 vnx->vx_flags &= ~VX_BUSY;
480 if (vnx->vx_pending == 0) {
481 if (vnx->vx_error != 0) {
482 bp->b_error = vnx->vx_error;
483 bp->b_flags |= B_ERROR;
484 }
485 VND_PUTXFER(vnd, vnx);
486 biodone(bp);
487 }
488 splx(s);
489 return;
490
491 done:
492 biodone(bp);
493 }
494
495 /*
496 * Feed requests sequentially.
497 * We do it this way to keep from flooding NFS servers if we are connected
498 * to an NFS file. This places the burden on the client rather than the
499 * server.
500 */
501 void
502 vndstart(vnd)
503 struct vnd_softc *vnd;
504 {
505 struct buf *bp;
506
507 /*
508 * Dequeue now since lower level strategy routine might
509 * queue using same links
510 */
511
512 if ((vnd->sc_flags & VNF_BUSY) != 0)
513 return;
514
515 vnd->sc_flags |= VNF_BUSY;
516
517 while (vnd->sc_active < vnd->sc_maxactive) {
518 bp = BUFQ_FIRST(&vnd->sc_tab);
519 if (bp == NULL)
520 break;
521 BUFQ_REMOVE(&vnd->sc_tab, bp);
522 vnd->sc_active++;
523 #ifdef DEBUG
524 if (vnddebug & VDB_IO)
525 printf("vndstart(%ld): bp %p vp %p blkno 0x%x"
526 " flags %lx addr %p cnt 0x%lx\n",
527 (long) (vnd-vnd_softc), bp, bp->b_vp, bp->b_blkno,
528 bp->b_flags, bp->b_data, bp->b_bcount);
529 #endif
530
531 /* Instrumentation. */
532 disk_busy(&vnd->sc_dkdev);
533
534 if ((bp->b_flags & B_READ) == 0)
535 bp->b_vp->v_numoutput++;
536 VOP_STRATEGY(bp);
537 }
538 vnd->sc_flags &= ~VNF_BUSY;
539 }
540
541 void
542 vndiodone(bp)
543 struct buf *bp;
544 {
545 struct vndbuf *vbp = (struct vndbuf *) bp;
546 struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer;
547 struct buf *pbp = vnx->vx_bp;
548 struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)];
549 int s, resid;
550
551 s = splbio();
552 #ifdef DEBUG
553 if (vnddebug & VDB_IO)
554 printf("vndiodone(%ld): vbp %p vp %p blkno 0x%x addr %p cnt 0x%lx\n",
555 (long) (vnd-vnd_softc), vbp, vbp->vb_buf.b_vp,
556 vbp->vb_buf.b_blkno, vbp->vb_buf.b_data,
557 vbp->vb_buf.b_bcount);
558 #endif
559
560 resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid;
561 pbp->b_resid -= resid;
562 disk_unbusy(&vnd->sc_dkdev, resid);
563 vnx->vx_pending--;
564
565 if (vbp->vb_buf.b_error) {
566 #ifdef DEBUG
567 if (vnddebug & VDB_IO)
568 printf("vndiodone: vbp %p error %d\n", vbp,
569 vbp->vb_buf.b_error);
570 #endif
571 vnx->vx_error = vbp->vb_buf.b_error;
572 }
573
574 if (vbp->vb_buf.b_vp != NULLVP)
575 brelvp(&vbp->vb_buf);
576
577 VND_PUTBUF(vnd, vbp);
578
579 /*
580 * Wrap up this transaction if it has run to completion or, in
581 * case of an error, when all auxiliary buffers have returned.
582 */
583 if (vnx->vx_error != 0) {
584 pbp->b_flags |= B_ERROR;
585 pbp->b_error = vnx->vx_error;
586 if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) {
587
588 #ifdef DEBUG
589 if (vnddebug & VDB_IO)
590 printf("vndiodone: pbp %p iodone: error %d\n",
591 pbp, vnx->vx_error);
592 #endif
593 VND_PUTXFER(vnd, vnx);
594 biodone(pbp);
595 }
596 } else if (pbp->b_resid == 0) {
597
598 #ifdef DIAGNOSTIC
599 if (vnx->vx_pending != 0)
600 panic("vndiodone: vnx pending: %d", vnx->vx_pending);
601 #endif
602
603 if ((vnx->vx_flags & VX_BUSY) == 0) {
604 #ifdef DEBUG
605 if (vnddebug & VDB_IO)
606 printf("vndiodone: pbp %p iodone\n", pbp);
607 #endif
608 VND_PUTXFER(vnd, vnx);
609 biodone(pbp);
610 }
611 }
612
613 vnd->sc_active--;
614 vndstart(vnd);
615 splx(s);
616 }
617
618 /* ARGSUSED */
619 int
620 vndread(dev, uio, flags)
621 dev_t dev;
622 struct uio *uio;
623 int flags;
624 {
625 int unit = vndunit(dev);
626 struct vnd_softc *sc;
627
628 #ifdef DEBUG
629 if (vnddebug & VDB_FOLLOW)
630 printf("vndread(0x%x, %p)\n", dev, uio);
631 #endif
632
633 if (unit >= numvnd)
634 return (ENXIO);
635 sc = &vnd_softc[unit];
636
637 if ((sc->sc_flags & VNF_INITED) == 0)
638 return (ENXIO);
639
640 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio));
641 }
642
643 /* ARGSUSED */
644 int
645 vndwrite(dev, uio, flags)
646 dev_t dev;
647 struct uio *uio;
648 int flags;
649 {
650 int unit = vndunit(dev);
651 struct vnd_softc *sc;
652
653 #ifdef DEBUG
654 if (vnddebug & VDB_FOLLOW)
655 printf("vndwrite(0x%x, %p)\n", dev, uio);
656 #endif
657
658 if (unit >= numvnd)
659 return (ENXIO);
660 sc = &vnd_softc[unit];
661
662 if ((sc->sc_flags & VNF_INITED) == 0)
663 return (ENXIO);
664
665 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio));
666 }
667
668 /* ARGSUSED */
669 int
670 vndioctl(dev, cmd, data, flag, p)
671 dev_t dev;
672 u_long cmd;
673 caddr_t data;
674 int flag;
675 struct proc *p;
676 {
677 int unit = vndunit(dev);
678 struct vnd_softc *vnd;
679 struct vnd_ioctl *vio;
680 struct vattr vattr;
681 struct nameidata nd;
682 int error, part, pmask;
683 size_t geomsize;
684 #ifdef __HAVE_OLD_DISKLABEL
685 struct disklabel newlabel;
686 #endif
687
688 #ifdef DEBUG
689 if (vnddebug & VDB_FOLLOW)
690 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n",
691 dev, cmd, data, flag, p, unit);
692 #endif
693 error = suser(p->p_ucred, &p->p_acflag);
694 if (error)
695 return (error);
696 if (unit >= numvnd)
697 return (ENXIO);
698
699 vnd = &vnd_softc[unit];
700 vio = (struct vnd_ioctl *)data;
701
702 /* Must be open for writes for these commands... */
703 switch (cmd) {
704 case VNDIOCSET:
705 case VNDIOCCLR:
706 case DIOCSDINFO:
707 case DIOCWDINFO:
708 #ifdef __HAVE_OLD_DISKLABEL
709 case ODIOCSDINFO:
710 case ODIOCWDINFO:
711 #endif
712 case DIOCWLABEL:
713 if ((flag & FWRITE) == 0)
714 return (EBADF);
715 }
716
717 /* Must be initialized for these... */
718 switch (cmd) {
719 case VNDIOCCLR:
720 case DIOCGDINFO:
721 case DIOCSDINFO:
722 case DIOCWDINFO:
723 case DIOCGPART:
724 case DIOCWLABEL:
725 case DIOCGDEFLABEL:
726 #ifdef __HAVE_OLD_DISKLABEL
727 case ODIOCGDINFO:
728 case ODIOCSDINFO:
729 case ODIOCWDINFO:
730 case ODIOCGDEFLABEL:
731 #endif
732 if ((vnd->sc_flags & VNF_INITED) == 0)
733 return (ENXIO);
734 }
735
736 switch (cmd) {
737 case VNDIOCSET:
738 if (vnd->sc_flags & VNF_INITED)
739 return (EBUSY);
740
741 if ((error = vndlock(vnd)) != 0)
742 return (error);
743
744 /*
745 * Always open for read and write.
746 * This is probably bogus, but it lets vn_open()
747 * weed out directories, sockets, etc. so we don't
748 * have to worry about them.
749 */
750 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p);
751 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
752 vndunlock(vnd);
753 return(error);
754 }
755 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p);
756 if (error) {
757 VOP_UNLOCK(nd.ni_vp, 0);
758 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
759 vndunlock(vnd);
760 return(error);
761 }
762 VOP_UNLOCK(nd.ni_vp, 0);
763 vnd->sc_vp = nd.ni_vp;
764 vnd->sc_size = btodb(vattr.va_size); /* note truncation */
765
766 /*
767 * Use pseudo-geometry specified. If none was provided,
768 * use "standard" Adaptec fictitious geometry.
769 */
770 if (vio->vnd_flags & VNDIOF_HASGEOM) {
771
772 memcpy(&vnd->sc_geom, &vio->vnd_geom,
773 sizeof(vio->vnd_geom));
774
775 /*
776 * Sanity-check the sector size.
777 * XXX Don't allow secsize < DEV_BSIZE. Should
778 * XXX we?
779 */
780 if (vnd->sc_geom.vng_secsize < DEV_BSIZE ||
781 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0) {
782 (void) vn_close(nd.ni_vp, FREAD|FWRITE,
783 p->p_ucred, p);
784 vndunlock(vnd);
785 return (EINVAL);
786 }
787
788 /*
789 * Compute the size (in DEV_BSIZE blocks) specified
790 * by the geometry.
791 */
792 geomsize = (vnd->sc_geom.vng_nsectors *
793 vnd->sc_geom.vng_ntracks *
794 vnd->sc_geom.vng_ncylinders) *
795 (vnd->sc_geom.vng_secsize / DEV_BSIZE);
796
797 /*
798 * Sanity-check the size against the specified
799 * geometry.
800 */
801 if (vnd->sc_size < geomsize) {
802 (void) vn_close(nd.ni_vp, FREAD|FWRITE,
803 p->p_ucred, p);
804 vndunlock(vnd);
805 return (EINVAL);
806 }
807 } else {
808 /*
809 * Size must be at least 2048 DEV_BSIZE blocks
810 * (1M) in order to use this geometry.
811 */
812 if (vnd->sc_size < (32 * 64)) {
813 vndunlock(vnd);
814 return (EINVAL);
815 }
816
817 vnd->sc_geom.vng_secsize = DEV_BSIZE;
818 vnd->sc_geom.vng_nsectors = 32;
819 vnd->sc_geom.vng_ntracks = 64;
820 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32);
821
822 /*
823 * Compute the actual size allowed by this geometry.
824 */
825 geomsize = 32 * 64 * vnd->sc_geom.vng_ncylinders;
826 }
827
828 /*
829 * Truncate the size to that specified by
830 * the geometry.
831 * XXX Should we even bother with this?
832 */
833 vnd->sc_size = geomsize;
834
835 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) {
836 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
837 vndunlock(vnd);
838 return(error);
839 }
840 vndthrottle(vnd, vnd->sc_vp);
841 vio->vnd_size = dbtob(vnd->sc_size);
842 vnd->sc_flags |= VNF_INITED;
843 #ifdef DEBUG
844 if (vnddebug & VDB_INIT)
845 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n",
846 vnd->sc_vp, (unsigned long) vnd->sc_size,
847 vnd->sc_geom.vng_secsize,
848 vnd->sc_geom.vng_nsectors,
849 vnd->sc_geom.vng_ntracks,
850 vnd->sc_geom.vng_ncylinders);
851 #endif
852
853 /* Attach the disk. */
854 memset(vnd->sc_xname, 0, sizeof(vnd->sc_xname)); /* XXX */
855 sprintf(vnd->sc_xname, "vnd%d", unit); /* XXX */
856 vnd->sc_dkdev.dk_name = vnd->sc_xname;
857 disk_attach(&vnd->sc_dkdev);
858
859 /* Initialize the xfer and buffer pools. */
860 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0,
861 0, 0, "vndxpl", 0, NULL, NULL, M_DEVBUF);
862 pool_init(&vnd->sc_vbpool, sizeof(struct vndbuf), 0,
863 0, 0, "vndbpl", 0, NULL, NULL, M_DEVBUF);
864
865 /* Try and read the disklabel. */
866 vndgetdisklabel(dev);
867
868 vndunlock(vnd);
869
870 break;
871
872 case VNDIOCCLR:
873 if ((error = vndlock(vnd)) != 0)
874 return (error);
875
876 /*
877 * Don't unconfigure if any other partitions are open
878 * or if both the character and block flavors of this
879 * partition are open.
880 */
881 part = DISKPART(dev);
882 pmask = (1 << part);
883 if ((vnd->sc_dkdev.dk_openmask & ~pmask) ||
884 ((vnd->sc_dkdev.dk_bopenmask & pmask) &&
885 (vnd->sc_dkdev.dk_copenmask & pmask))) {
886 vndunlock(vnd);
887 return (EBUSY);
888 }
889
890 vndclear(vnd);
891 #ifdef DEBUG
892 if (vnddebug & VDB_INIT)
893 printf("vndioctl: CLRed\n");
894 #endif
895
896 /* Destroy the xfer and buffer pools. */
897 pool_destroy(&vnd->sc_vxpool);
898 pool_destroy(&vnd->sc_vbpool);
899
900 /* Detatch the disk. */
901 disk_detach(&vnd->sc_dkdev);
902
903 vndunlock(vnd);
904
905 break;
906
907 case DIOCGDINFO:
908 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label);
909 break;
910
911 #ifdef __HAVE_OLD_DISKLABEL
912 case ODIOCGDINFO:
913 newlabel = *(vnd->sc_dkdev.dk_label);
914 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
915 return ENOTTY;
916 memcpy(data, &newlabel, sizeof (struct olddisklabel));
917 break;
918 #endif
919
920 case DIOCGPART:
921 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label;
922 ((struct partinfo *)data)->part =
923 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
924 break;
925
926 case DIOCWDINFO:
927 case DIOCSDINFO:
928 #ifdef __HAVE_OLD_DISKLABEL
929 case ODIOCWDINFO:
930 case ODIOCSDINFO:
931 #endif
932 {
933 struct disklabel *lp;
934
935 if ((error = vndlock(vnd)) != 0)
936 return (error);
937
938 vnd->sc_flags |= VNF_LABELLING;
939
940 #ifdef __HAVE_OLD_DISKLABEL
941 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
942 memset(&newlabel, 0, sizeof newlabel);
943 memcpy(&newlabel, data, sizeof (struct olddisklabel));
944 lp = &newlabel;
945 } else
946 #endif
947 lp = (struct disklabel *)data;
948
949 error = setdisklabel(vnd->sc_dkdev.dk_label,
950 lp, 0, vnd->sc_dkdev.dk_cpulabel);
951 if (error == 0) {
952 if (cmd == DIOCWDINFO
953 #ifdef __HAVE_OLD_DISKLABEL
954 || cmd == ODIOCWDINFO
955 #endif
956 )
957 error = writedisklabel(VNDLABELDEV(dev),
958 vndstrategy, vnd->sc_dkdev.dk_label,
959 vnd->sc_dkdev.dk_cpulabel);
960 }
961
962 vnd->sc_flags &= ~VNF_LABELLING;
963
964 vndunlock(vnd);
965
966 if (error)
967 return (error);
968 break;
969 }
970
971 case DIOCWLABEL:
972 if (*(int *)data != 0)
973 vnd->sc_flags |= VNF_WLABEL;
974 else
975 vnd->sc_flags &= ~VNF_WLABEL;
976 break;
977
978 case DIOCGDEFLABEL:
979 vndgetdefaultlabel(vnd, (struct disklabel *)data);
980 break;
981
982 #ifdef __HAVE_OLD_DISKLABEL
983 case ODIOCGDEFLABEL:
984 vndgetdefaultlabel(vnd, &newlabel);
985 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
986 return ENOTTY;
987 memcpy(data, &newlabel, sizeof (struct olddisklabel));
988 break;
989 #endif
990
991 default:
992 return (ENOTTY);
993 }
994
995 return (0);
996 }
997
998 /*
999 * Duplicate the current processes' credentials. Since we are called only
1000 * as the result of a SET ioctl and only root can do that, any future access
1001 * to this "disk" is essentially as root. Note that credentials may change
1002 * if some other uid can write directly to the mapped file (NFS).
1003 */
1004 int
1005 vndsetcred(vnd, cred)
1006 struct vnd_softc *vnd;
1007 struct ucred *cred;
1008 {
1009 struct uio auio;
1010 struct iovec aiov;
1011 char *tmpbuf;
1012 int error;
1013
1014 vnd->sc_cred = crdup(cred);
1015 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
1016
1017 /* XXX: Horrible kludge to establish credentials for NFS */
1018 aiov.iov_base = tmpbuf;
1019 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size));
1020 auio.uio_iov = &aiov;
1021 auio.uio_iovcnt = 1;
1022 auio.uio_offset = 0;
1023 auio.uio_rw = UIO_READ;
1024 auio.uio_segflg = UIO_SYSSPACE;
1025 auio.uio_resid = aiov.iov_len;
1026 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1027 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred);
1028 if (error == 0) {
1029 /*
1030 * Because vnd does all IO directly through the vnode
1031 * we need to flush (at least) the buffer from the above
1032 * VOP_READ from the buffer cache to prevent cache
1033 * incoherencies. Also, be careful to write dirty
1034 * buffers back to stable storage.
1035 */
1036 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred,
1037 curproc, 0, 0);
1038 }
1039 VOP_UNLOCK(vnd->sc_vp, 0);
1040
1041 free(tmpbuf, M_TEMP);
1042 return (error);
1043 }
1044
1045 /*
1046 * Set maxactive based on FS type
1047 */
1048 void
1049 vndthrottle(vnd, vp)
1050 struct vnd_softc *vnd;
1051 struct vnode *vp;
1052 {
1053 #ifdef NFS
1054 extern int (**nfsv2_vnodeop_p) __P((void *));
1055
1056 if (vp->v_op == nfsv2_vnodeop_p)
1057 vnd->sc_maxactive = 2;
1058 else
1059 #endif
1060 vnd->sc_maxactive = 8;
1061
1062 if (vnd->sc_maxactive < 1)
1063 vnd->sc_maxactive = 1;
1064 }
1065
1066 void
1067 vndshutdown()
1068 {
1069 struct vnd_softc *vnd;
1070
1071 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++)
1072 if (vnd->sc_flags & VNF_INITED)
1073 vndclear(vnd);
1074 }
1075
1076 void
1077 vndclear(vnd)
1078 struct vnd_softc *vnd;
1079 {
1080 struct vnode *vp = vnd->sc_vp;
1081 struct proc *p = curproc; /* XXX */
1082
1083 #ifdef DEBUG
1084 if (vnddebug & VDB_FOLLOW)
1085 printf("vndclear(%p): vp %p\n", vnd, vp);
1086 #endif
1087 vnd->sc_flags &= ~VNF_INITED;
1088 if (vp == (struct vnode *)0)
1089 panic("vndioctl: null vp");
1090 (void) vn_close(vp, FREAD|FWRITE, vnd->sc_cred, p);
1091 crfree(vnd->sc_cred);
1092 vnd->sc_vp = (struct vnode *)0;
1093 vnd->sc_cred = (struct ucred *)0;
1094 vnd->sc_size = 0;
1095 }
1096
1097 int
1098 vndsize(dev)
1099 dev_t dev;
1100 {
1101 struct vnd_softc *sc;
1102 struct disklabel *lp;
1103 int part, unit, omask;
1104 int size;
1105
1106 unit = vndunit(dev);
1107 if (unit >= numvnd)
1108 return (-1);
1109 sc = &vnd_softc[unit];
1110
1111 if ((sc->sc_flags & VNF_INITED) == 0)
1112 return (-1);
1113
1114 part = DISKPART(dev);
1115 omask = sc->sc_dkdev.dk_openmask & (1 << part);
1116 lp = sc->sc_dkdev.dk_label;
1117
1118 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curproc))
1119 return (-1);
1120
1121 if (lp->d_partitions[part].p_fstype != FS_SWAP)
1122 size = -1;
1123 else
1124 size = lp->d_partitions[part].p_size *
1125 (lp->d_secsize / DEV_BSIZE);
1126
1127 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curproc))
1128 return (-1);
1129
1130 return (size);
1131 }
1132
1133 int
1134 vnddump(dev, blkno, va, size)
1135 dev_t dev;
1136 daddr_t blkno;
1137 caddr_t va;
1138 size_t size;
1139 {
1140
1141 /* Not implemented. */
1142 return ENXIO;
1143 }
1144
1145 void
1146 vndgetdefaultlabel(sc, lp)
1147 struct vnd_softc *sc;
1148 struct disklabel *lp;
1149 {
1150 struct vndgeom *vng = &sc->sc_geom;
1151 struct partition *pp;
1152
1153 memset(lp, 0, sizeof(*lp));
1154
1155 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE);
1156 lp->d_secsize = vng->vng_secsize;
1157 lp->d_nsectors = vng->vng_nsectors;
1158 lp->d_ntracks = vng->vng_ntracks;
1159 lp->d_ncylinders = vng->vng_ncylinders;
1160 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1161
1162 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename));
1163 lp->d_type = DTYPE_VND;
1164 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1165 lp->d_rpm = 3600;
1166 lp->d_interleave = 1;
1167 lp->d_flags = 0;
1168
1169 pp = &lp->d_partitions[RAW_PART];
1170 pp->p_offset = 0;
1171 pp->p_size = lp->d_secperunit;
1172 pp->p_fstype = FS_UNUSED;
1173 lp->d_npartitions = RAW_PART + 1;
1174
1175 lp->d_magic = DISKMAGIC;
1176 lp->d_magic2 = DISKMAGIC;
1177 lp->d_checksum = dkcksum(lp);
1178 }
1179
1180 /*
1181 * Read the disklabel from a vnd. If one is not present, create a fake one.
1182 */
1183 void
1184 vndgetdisklabel(dev)
1185 dev_t dev;
1186 {
1187 struct vnd_softc *sc = &vnd_softc[vndunit(dev)];
1188 char *errstring;
1189 struct disklabel *lp = sc->sc_dkdev.dk_label;
1190 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel;
1191 int i;
1192
1193 memset(clp, 0, sizeof(*clp));
1194
1195 vndgetdefaultlabel(sc, lp);
1196
1197 /*
1198 * Call the generic disklabel extraction routine.
1199 */
1200 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp);
1201 if (errstring) {
1202 /*
1203 * Lack of disklabel is common, but we print the warning
1204 * anyway, since it might contain other useful information.
1205 */
1206 printf("%s: %s\n", sc->sc_xname, errstring);
1207
1208 /*
1209 * For historical reasons, if there's no disklabel
1210 * present, all partitions must be FS_BSDFFS and
1211 * occupy the entire disk.
1212 */
1213 for (i = 0; i < MAXPARTITIONS; i++) {
1214 /*
1215 * Don't wipe out port specific hack (such as
1216 * dos partition hack of i386 port).
1217 */
1218 if (lp->d_partitions[i].p_fstype != FS_UNUSED)
1219 continue;
1220
1221 lp->d_partitions[i].p_size = lp->d_secperunit;
1222 lp->d_partitions[i].p_offset = 0;
1223 lp->d_partitions[i].p_fstype = FS_BSDFFS;
1224 }
1225
1226 strncpy(lp->d_packname, "default label",
1227 sizeof(lp->d_packname));
1228
1229 lp->d_checksum = dkcksum(lp);
1230 }
1231 }
1232
1233 /*
1234 * Wait interruptibly for an exclusive lock.
1235 *
1236 * XXX
1237 * Several drivers do this; it should be abstracted and made MP-safe.
1238 */
1239 static int
1240 vndlock(sc)
1241 struct vnd_softc *sc;
1242 {
1243 int error;
1244
1245 while ((sc->sc_flags & VNF_LOCKED) != 0) {
1246 sc->sc_flags |= VNF_WANTED;
1247 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0)
1248 return (error);
1249 }
1250 sc->sc_flags |= VNF_LOCKED;
1251 return (0);
1252 }
1253
1254 /*
1255 * Unlock and wake up any waiters.
1256 */
1257 static void
1258 vndunlock(sc)
1259 struct vnd_softc *sc;
1260 {
1261
1262 sc->sc_flags &= ~VNF_LOCKED;
1263 if ((sc->sc_flags & VNF_WANTED) != 0) {
1264 sc->sc_flags &= ~VNF_WANTED;
1265 wakeup(sc);
1266 }
1267 }
1268