vnd.c revision 1.157 1 /* $NetBSD: vnd.c,v 1.157 2006/11/09 15:27:40 jmmv Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. Neither the name of the University nor the names of its contributors
56 * may be used to endorse or promote products derived from this software
57 * without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 * from: Utah $Hdr: vn.c 1.13 94/04/02$
72 *
73 * @(#)vn.c 8.9 (Berkeley) 5/14/95
74 */
75
76 /*
77 * Copyright (c) 1988 University of Utah.
78 *
79 * This code is derived from software contributed to Berkeley by
80 * the Systems Programming Group of the University of Utah Computer
81 * Science Department.
82 *
83 * Redistribution and use in source and binary forms, with or without
84 * modification, are permitted provided that the following conditions
85 * are met:
86 * 1. Redistributions of source code must retain the above copyright
87 * notice, this list of conditions and the following disclaimer.
88 * 2. Redistributions in binary form must reproduce the above copyright
89 * notice, this list of conditions and the following disclaimer in the
90 * documentation and/or other materials provided with the distribution.
91 * 3. All advertising materials mentioning features or use of this software
92 * must display the following acknowledgement:
93 * This product includes software developed by the University of
94 * California, Berkeley and its contributors.
95 * 4. Neither the name of the University nor the names of its contributors
96 * may be used to endorse or promote products derived from this software
97 * without specific prior written permission.
98 *
99 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
100 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
101 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
102 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
103 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
104 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
105 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
106 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
107 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
108 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
109 * SUCH DAMAGE.
110 *
111 * from: Utah $Hdr: vn.c 1.13 94/04/02$
112 *
113 * @(#)vn.c 8.9 (Berkeley) 5/14/95
114 */
115
116 /*
117 * Vnode disk driver.
118 *
119 * Block/character interface to a vnode. Allows one to treat a file
120 * as a disk (e.g. build a filesystem in it, mount it, etc.).
121 *
122 * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations,
123 * this uses them to avoid distorting the local buffer cache. If those
124 * block-level operations are not available, this falls back to the regular
125 * read and write calls. Using these may distort the cache in some cases
126 * but better have the driver working than preventing it to work on file
127 * systems where the block-level operations are not implemented for
128 * whatever reason.
129 *
130 * NOTE 2: There is a security issue involved with this driver.
131 * Once mounted all access to the contents of the "mapped" file via
132 * the special file is controlled by the permissions on the special
133 * file, the protection of the mapped file is ignored (effectively,
134 * by using root credentials in all transactions).
135 *
136 * NOTE 3: Doesn't interact with leases, should it?
137 */
138
139 #include <sys/cdefs.h>
140 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.157 2006/11/09 15:27:40 jmmv Exp $");
141
142 #if defined(_KERNEL_OPT)
143 #include "fs_nfs.h"
144 #include "opt_vnd.h"
145 #endif
146
147 #include <sys/param.h>
148 #include <sys/systm.h>
149 #include <sys/namei.h>
150 #include <sys/proc.h>
151 #include <sys/kthread.h>
152 #include <sys/errno.h>
153 #include <sys/buf.h>
154 #include <sys/bufq.h>
155 #include <sys/malloc.h>
156 #include <sys/ioctl.h>
157 #include <sys/disklabel.h>
158 #include <sys/device.h>
159 #include <sys/disk.h>
160 #include <sys/stat.h>
161 #include <sys/mount.h>
162 #include <sys/vnode.h>
163 #include <sys/file.h>
164 #include <sys/uio.h>
165 #include <sys/conf.h>
166 #include <sys/kauth.h>
167
168 #include <net/zlib.h>
169
170 #include <miscfs/genfs/genfs.h>
171 #include <miscfs/specfs/specdev.h>
172
173 #include <dev/vndvar.h>
174
175 #if defined(VNDDEBUG) && !defined(DEBUG)
176 #define DEBUG
177 #endif
178
179 #ifdef DEBUG
180 int dovndcluster = 1;
181 #define VDB_FOLLOW 0x01
182 #define VDB_INIT 0x02
183 #define VDB_IO 0x04
184 #define VDB_LABEL 0x08
185 int vnddebug = 0x00;
186 #endif
187
188 #define vndunit(x) DISKUNIT(x)
189
190 struct vndxfer {
191 struct buf vx_buf;
192 struct vnd_softc *vx_vnd;
193 };
194 #define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp)
195
196 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK)
197 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx))
198
199 #define VNDLABELDEV(dev) \
200 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART))
201
202 /* called by main() at boot time (XXX: and the LKM driver) */
203 void vndattach(int);
204
205 static void vndclear(struct vnd_softc *, int);
206 static int vndsetcred(struct vnd_softc *, kauth_cred_t);
207 static void vndthrottle(struct vnd_softc *, struct vnode *);
208 static void vndiodone(struct buf *);
209 #if 0
210 static void vndshutdown(void);
211 #endif
212
213 static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *);
214 static void vndgetdisklabel(dev_t, struct vnd_softc *);
215
216 static int vndlock(struct vnd_softc *);
217 static void vndunlock(struct vnd_softc *);
218 #ifdef VND_COMPRESSION
219 static void compstrategy(struct buf *, off_t);
220 static void *vnd_alloc(void *, u_int, u_int);
221 static void vnd_free(void *, void *);
222 #endif /* VND_COMPRESSION */
223
224 static void vndthread(void *);
225 static boolean_t vnode_has_op(const struct vnode *, int);
226 static void handle_with_rdwr(struct vnd_softc *, const struct buf *,
227 struct buf *);
228 static void handle_with_strategy(struct vnd_softc *, const struct buf *,
229 struct buf *);
230
231 static dev_type_open(vndopen);
232 static dev_type_close(vndclose);
233 static dev_type_read(vndread);
234 static dev_type_write(vndwrite);
235 static dev_type_ioctl(vndioctl);
236 static dev_type_strategy(vndstrategy);
237 static dev_type_dump(vnddump);
238 static dev_type_size(vndsize);
239
240 const struct bdevsw vnd_bdevsw = {
241 vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK
242 };
243
244 const struct cdevsw vnd_cdevsw = {
245 vndopen, vndclose, vndread, vndwrite, vndioctl,
246 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
247 };
248
249 static int vnd_match(struct device *, struct cfdata *, void *);
250 static void vnd_attach(struct device *, struct device *, void *);
251 static int vnd_detach(struct device *, int);
252
253 CFATTACH_DECL(vnd, sizeof(struct vnd_softc),
254 vnd_match, vnd_attach, vnd_detach, NULL);
255 extern struct cfdriver vnd_cd;
256
257 static struct vnd_softc *vnd_spawn(int);
258 int vnd_destroy(struct device *);
259
260 void
261 vndattach(int num __unused)
262 {
263 int error;
264
265 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca);
266 if (error)
267 aprint_error("%s: unable to register cfattach\n",
268 vnd_cd.cd_name);
269 }
270
271 static int
272 vnd_match(struct device *self __unused, struct cfdata *cfdata __unused,
273 void *aux __unused)
274 {
275 return 1;
276 }
277
278 static void
279 vnd_attach(struct device *parent __unused, struct device *self,
280 void *aux __unused)
281 {
282 struct vnd_softc *sc = (struct vnd_softc *)self;
283
284 sc->sc_comp_offsets = NULL;
285 sc->sc_comp_buff = NULL;
286 sc->sc_comp_decombuf = NULL;
287 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK);
288 pseudo_disk_init(&sc->sc_dkdev);
289 }
290
291 static int
292 vnd_detach(struct device *self, int flags __unused)
293 {
294 struct vnd_softc *sc = (struct vnd_softc *)self;
295 if (sc->sc_flags & VNF_INITED)
296 return EBUSY;
297
298 bufq_free(sc->sc_tab);
299
300 return 0;
301 }
302
303 static struct vnd_softc *
304 vnd_spawn(int unit)
305 {
306 struct cfdata *cf;
307
308 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
309 cf->cf_name = vnd_cd.cd_name;
310 cf->cf_atname = vnd_cd.cd_name;
311 cf->cf_unit = unit;
312 cf->cf_fstate = FSTATE_STAR;
313
314 return (struct vnd_softc *)config_attach_pseudo(cf);
315 }
316
317 int
318 vnd_destroy(struct device *dev)
319 {
320 int error;
321 struct cfdata *cf;
322
323 cf = device_cfdata(dev);
324 error = config_detach(dev, DETACH_QUIET);
325 if (error)
326 return error;
327 free(cf, M_DEVBUF);
328 return 0;
329 }
330
331 static int
332 vndopen(dev_t dev, int flags __unused, int mode, struct lwp *l __unused)
333 {
334 int unit = vndunit(dev);
335 struct vnd_softc *sc;
336 int error = 0, part, pmask;
337 struct disklabel *lp;
338
339 #ifdef DEBUG
340 if (vnddebug & VDB_FOLLOW)
341 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l);
342 #endif
343 sc = device_lookup(&vnd_cd, unit);
344 if (sc == NULL) {
345 sc = vnd_spawn(unit);
346 if (sc == NULL)
347 return ENOMEM;
348 }
349
350 if ((error = vndlock(sc)) != 0)
351 return (error);
352
353 lp = sc->sc_dkdev.dk_label;
354
355 part = DISKPART(dev);
356 pmask = (1 << part);
357
358 /*
359 * If we're initialized, check to see if there are any other
360 * open partitions. If not, then it's safe to update the
361 * in-core disklabel. Only read the disklabel if it is
362 * not already valid.
363 */
364 if ((sc->sc_flags & (VNF_INITED|VNF_VLABEL)) == VNF_INITED &&
365 sc->sc_dkdev.dk_openmask == 0)
366 vndgetdisklabel(dev, sc);
367
368 /* Check that the partitions exists. */
369 if (part != RAW_PART) {
370 if (((sc->sc_flags & VNF_INITED) == 0) ||
371 ((part >= lp->d_npartitions) ||
372 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
373 error = ENXIO;
374 goto done;
375 }
376 }
377
378 /* Prevent our unit from being unconfigured while open. */
379 switch (mode) {
380 case S_IFCHR:
381 sc->sc_dkdev.dk_copenmask |= pmask;
382 break;
383
384 case S_IFBLK:
385 sc->sc_dkdev.dk_bopenmask |= pmask;
386 break;
387 }
388 sc->sc_dkdev.dk_openmask =
389 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
390
391 done:
392 vndunlock(sc);
393 return (error);
394 }
395
396 static int
397 vndclose(dev_t dev, int flags __unused, int mode, struct lwp *l __unused)
398 {
399 int unit = vndunit(dev);
400 struct vnd_softc *sc;
401 int error = 0, part;
402
403 #ifdef DEBUG
404 if (vnddebug & VDB_FOLLOW)
405 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l);
406 #endif
407 sc = device_lookup(&vnd_cd, unit);
408 if (sc == NULL)
409 return ENXIO;
410
411 if ((error = vndlock(sc)) != 0)
412 return (error);
413
414 part = DISKPART(dev);
415
416 /* ...that much closer to allowing unconfiguration... */
417 switch (mode) {
418 case S_IFCHR:
419 sc->sc_dkdev.dk_copenmask &= ~(1 << part);
420 break;
421
422 case S_IFBLK:
423 sc->sc_dkdev.dk_bopenmask &= ~(1 << part);
424 break;
425 }
426 sc->sc_dkdev.dk_openmask =
427 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
428
429 vndunlock(sc);
430
431 if ((sc->sc_flags & VNF_INITED) == 0) {
432 if ((error = vnd_destroy((struct device *)sc)) != 0) {
433 aprint_error("%s: unable to detach instance\n",
434 sc->sc_dev.dv_xname);
435 return error;
436 }
437 }
438
439 return (0);
440 }
441
442 /*
443 * Queue the request, and wakeup the kernel thread to handle it.
444 */
445 static void
446 vndstrategy(struct buf *bp)
447 {
448 int unit = vndunit(bp->b_dev);
449 struct vnd_softc *vnd =
450 (struct vnd_softc *)device_lookup(&vnd_cd, unit);
451 struct disklabel *lp = vnd->sc_dkdev.dk_label;
452 daddr_t blkno;
453 int s = splbio();
454
455 bp->b_resid = bp->b_bcount;
456
457 if ((vnd->sc_flags & VNF_INITED) == 0) {
458 bp->b_error = ENXIO;
459 bp->b_flags |= B_ERROR;
460 goto done;
461 }
462
463 /*
464 * The transfer must be a whole number of blocks.
465 */
466 if ((bp->b_bcount % lp->d_secsize) != 0) {
467 bp->b_error = EINVAL;
468 bp->b_flags |= B_ERROR;
469 goto done;
470 }
471
472 /*
473 * check if we're read-only.
474 */
475 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) {
476 bp->b_error = EACCES;
477 bp->b_flags |= B_ERROR;
478 goto done;
479 }
480
481 /*
482 * Do bounds checking and adjust transfer. If there's an error,
483 * the bounds check will flag that for us.
484 */
485 if (DISKPART(bp->b_dev) == RAW_PART) {
486 if (bounds_check_with_mediasize(bp, DEV_BSIZE,
487 vnd->sc_size) <= 0)
488 goto done;
489 } else {
490 if (bounds_check_with_label(&vnd->sc_dkdev,
491 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0)
492 goto done;
493 }
494
495 /* If it's a nil transfer, wake up the top half now. */
496 if (bp->b_bcount == 0)
497 goto done;
498
499 /*
500 * Put the block number in terms of the logical blocksize
501 * of the "device".
502 */
503
504 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE);
505
506 /*
507 * Translate the partition-relative block number to an absolute.
508 */
509 if (DISKPART(bp->b_dev) != RAW_PART) {
510 struct partition *pp;
511
512 pp = &vnd->sc_dkdev.dk_label->d_partitions[
513 DISKPART(bp->b_dev)];
514 blkno += pp->p_offset;
515 }
516 bp->b_rawblkno = blkno;
517
518 #ifdef DEBUG
519 if (vnddebug & VDB_FOLLOW)
520 printf("vndstrategy(%p): unit %d\n", bp, unit);
521 #endif
522 BUFQ_PUT(vnd->sc_tab, bp);
523 wakeup(&vnd->sc_tab);
524 splx(s);
525 return;
526 done:
527 biodone(bp);
528 splx(s);
529 }
530
531 static void
532 vndthread(void *arg)
533 {
534 struct vnd_softc *vnd = arg;
535 boolean_t usestrategy;
536 int s;
537
538 /* Determine whether we can use VOP_BMAP and VOP_STRATEGY to
539 * directly access the backing vnode. If we can, use these two
540 * operations to avoid messing with the local buffer cache.
541 * Otherwise fall back to regular VOP_READ/VOP_WRITE operations
542 * which are guaranteed to work with any file system. */
543 usestrategy = vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) &&
544 vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy));
545
546 #ifdef DEBUG
547 if (vnddebug & VDB_INIT)
548 printf("vndthread: vp %p, %s\n", vnd->sc_vp,
549 usestrategy ?
550 "using bmap/strategy operations" :
551 "using read/write operations");
552 #endif
553
554 s = splbio();
555 vnd->sc_flags |= VNF_KTHREAD;
556 wakeup(&vnd->sc_kthread);
557
558 /*
559 * Dequeue requests and serve them depending on the available
560 * vnode operations.
561 */
562 while ((vnd->sc_flags & VNF_VUNCONF) == 0) {
563 struct vndxfer *vnx;
564 int flags;
565 struct buf *obp;
566 struct buf *bp;
567
568 obp = BUFQ_GET(vnd->sc_tab);
569 if (obp == NULL) {
570 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0);
571 continue;
572 };
573 splx(s);
574 flags = obp->b_flags;
575 #ifdef DEBUG
576 if (vnddebug & VDB_FOLLOW)
577 printf("vndthread(%p\n", obp);
578 #endif
579
580 if (vnd->sc_vp->v_mount == NULL) {
581 obp->b_error = ENXIO;
582 obp->b_flags |= B_ERROR;
583 goto done;
584 }
585 #ifdef VND_COMPRESSION
586 /* handle a compressed read */
587 if ((flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) {
588 off_t bn;
589
590 /* Convert to a byte offset within the file. */
591 bn = obp->b_rawblkno *
592 vnd->sc_dkdev.dk_label->d_secsize;
593
594 compstrategy(obp, bn);
595 goto done;
596 }
597 #endif /* VND_COMPRESSION */
598
599 /*
600 * Allocate a header for this transfer and link it to the
601 * buffer
602 */
603 s = splbio();
604 vnx = VND_GETXFER(vnd);
605 splx(s);
606 vnx->vx_vnd = vnd;
607
608 s = splbio();
609 while (vnd->sc_active >= vnd->sc_maxactive) {
610 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0);
611 }
612 vnd->sc_active++;
613 splx(s);
614
615 /* Instrumentation. */
616 disk_busy(&vnd->sc_dkdev);
617
618 bp = &vnx->vx_buf;
619 BUF_INIT(bp);
620 bp->b_flags = (obp->b_flags & B_READ) | B_CALL;
621 bp->b_iodone = vndiodone;
622 bp->b_private = obp;
623 bp->b_vp = vnd->sc_vp;
624 bp->b_data = obp->b_data;
625 bp->b_bcount = bp->b_resid = obp->b_bcount;
626 BIO_COPYPRIO(bp, obp);
627
628 /* Handle the request using the appropriate operations. */
629 if (usestrategy)
630 handle_with_strategy(vnd, obp, bp);
631 else
632 handle_with_rdwr(vnd, obp, bp);
633
634 s = splbio();
635 continue;
636
637 done:
638 biodone(obp);
639 s = splbio();
640 }
641
642 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF);
643 wakeup(&vnd->sc_kthread);
644 splx(s);
645 kthread_exit(0);
646 }
647
648 /*
649 * Checks if the given vnode supports the requested operation.
650 * The operation is specified the offset returned by VOFFSET.
651 *
652 * XXX The test below used to determine this is quite fragile
653 * because it relies on the file system to use genfs to specify
654 * unimplemented operations. There might be another way to do
655 * it more cleanly.
656 */
657 static boolean_t
658 vnode_has_op(const struct vnode *vp, int opoffset)
659 {
660 int (*defaultp)(void *);
661 int (*opp)(void *);
662
663 defaultp = vp->v_op[VOFFSET(vop_default)];
664 opp = vp->v_op[opoffset];
665
666 return opp != defaultp && opp != genfs_eopnotsupp &&
667 opp != genfs_badop && opp != genfs_nullop;
668 }
669
670 /*
671 * Handes the read/write request given in 'bp' using the vnode's VOP_READ
672 * and VOP_WRITE operations.
673 *
674 * 'obp' is a pointer to the original request fed to the vnd device.
675 */
676 static void
677 handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp)
678 {
679 boolean_t doread;
680 off_t offset;
681 struct vnode *vp;
682
683 doread = bp->b_flags & B_READ;
684 offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize;
685 vp = vnd->sc_vp;
686
687 #if defined(DEBUG)
688 if (vnddebug & VDB_IO)
689 printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64
690 ", secsize %d, offset %" PRIu64
691 ", bcount %d, resid %d\n",
692 vp, doread ? "read" : "write", obp->b_rawblkno,
693 vnd->sc_dkdev.dk_label->d_secsize, offset,
694 bp->b_bcount, bp->b_resid);
695 #endif
696
697 /* Issue the read or write operation. */
698 bp->b_error =
699 vn_rdwr(doread ? UIO_READ : UIO_WRITE,
700 vp, bp->b_data, bp->b_bcount, offset,
701 UIO_SYSSPACE, 0, vnd->sc_cred, &bp->b_resid, NULL);
702 if (bp->b_error != 0)
703 bp->b_flags |= B_ERROR;
704 else
705 KASSERT(!(bp->b_flags & B_ERROR));
706
707 /* Flush the vnode if requested. */
708 if (obp->b_flags & B_VFLUSH) {
709 if (vn_lock(vp, LK_EXCLUSIVE | LK_RETRY) == 0) {
710 VOP_FSYNC(vp, vnd->sc_cred,
711 FSYNC_WAIT | FSYNC_DATAONLY, 0, 0, NULL);
712 VOP_UNLOCK(vp, 0);
713 }
714 }
715
716 /* We need to increase the number of outputs on the vnode if
717 * there was any write to it (either due to a real write or due
718 * to a flush). */
719 if (!doread || obp->b_flags & B_VFLUSH)
720 vp->v_numoutput++;
721
722 biodone(bp);
723 }
724
725 /*
726 * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP
727 * and VOP_STRATEGY operations.
728 *
729 * 'obp' is a pointer to the original request fed to the vnd device.
730 */
731 static void
732 handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp,
733 struct buf *bp)
734 {
735 int bsize, error, flags, skipped;
736 size_t resid, sz;
737 off_t bn, offset;
738 struct mount *mp;
739
740 flags = obp->b_flags;
741
742 mp = NULL;
743 if (!(flags & B_READ)) {
744 int s;
745
746 s = splbio();
747 V_INCR_NUMOUTPUT(bp->b_vp);
748 splx(s);
749
750 vn_start_write(vnd->sc_vp, &mp, V_WAIT);
751 KASSERT(mp != NULL);
752 }
753
754 /* convert to a byte offset within the file. */
755 bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize;
756
757 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize;
758 skipped = 0;
759
760 /*
761 * Break the request into bsize pieces and feed them
762 * sequentially using VOP_BMAP/VOP_STRATEGY.
763 * We do it this way to keep from flooding NFS servers if we
764 * are connected to an NFS file. This places the burden on
765 * the client rather than the server.
766 */
767 error = 0;
768 for (offset = 0, resid = bp->b_resid; resid;
769 resid -= sz, offset += sz) {
770 struct buf *nbp;
771 struct vnode *vp;
772 daddr_t nbn;
773 int off, nra;
774
775 nra = 0;
776 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
777 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra);
778 VOP_UNLOCK(vnd->sc_vp, 0);
779
780 if (error == 0 && (long)nbn == -1)
781 error = EIO;
782
783 /*
784 * If there was an error or a hole in the file...punt.
785 * Note that we may have to wait for any operations
786 * that we have already fired off before releasing
787 * the buffer.
788 *
789 * XXX we could deal with holes here but it would be
790 * a hassle (in the write case).
791 */
792 if (error) {
793 skipped += resid;
794 break;
795 }
796
797 #ifdef DEBUG
798 if (!dovndcluster)
799 nra = 0;
800 #endif
801
802 off = bn % bsize;
803 sz = MIN(((off_t)1 + nra) * bsize - off, resid);
804 #ifdef DEBUG
805 if (vnddebug & VDB_IO)
806 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64
807 " sz 0x%x\n",
808 vnd->sc_vp, vp, (long long)bn, nbn, sz);
809 #endif
810
811 nbp = getiobuf();
812 nestiobuf_setup(bp, nbp, offset, sz);
813 nbp->b_blkno = nbn + btodb(off);
814
815 #if 0 /* XXX #ifdef DEBUG */
816 if (vnddebug & VDB_IO)
817 printf("vndstart(%ld): bp %p vp %p blkno "
818 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n",
819 (long) (vnd-vnd_softc), &nbp->vb_buf,
820 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno,
821 nbp->vb_buf.b_flags, nbp->vb_buf.b_data,
822 nbp->vb_buf.b_bcount);
823 #endif
824 VOP_STRATEGY(vp, nbp);
825 bn += sz;
826 }
827 nestiobuf_done(bp, skipped, error);
828
829 if (!(flags & B_READ)) {
830 KASSERT(mp != NULL);
831 vn_finished_write(mp, 0);
832 }
833 }
834
835 static void
836 vndiodone(struct buf *bp)
837 {
838 struct vndxfer *vnx = VND_BUFTOXFER(bp);
839 struct vnd_softc *vnd = vnx->vx_vnd;
840 struct buf *obp = bp->b_private;
841
842 KASSERT(&vnx->vx_buf == bp);
843 KASSERT(vnd->sc_active > 0);
844 #ifdef DEBUG
845 if (vnddebug & VDB_IO) {
846 printf("vndiodone1: bp %p iodone: error %d\n",
847 bp, (bp->b_flags & B_ERROR) != 0 ? bp->b_error : 0);
848 }
849 #endif
850 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid,
851 (bp->b_flags & B_READ));
852 vnd->sc_active--;
853 if (vnd->sc_active == 0) {
854 wakeup(&vnd->sc_tab);
855 }
856 obp->b_flags |= bp->b_flags & B_ERROR;
857 obp->b_error = bp->b_error;
858 obp->b_resid = bp->b_resid;
859 VND_PUTXFER(vnd, vnx);
860 biodone(obp);
861 }
862
863 /* ARGSUSED */
864 static int
865 vndread(dev_t dev, struct uio *uio, int flags __unused)
866 {
867 int unit = vndunit(dev);
868 struct vnd_softc *sc;
869
870 #ifdef DEBUG
871 if (vnddebug & VDB_FOLLOW)
872 printf("vndread(0x%x, %p)\n", dev, uio);
873 #endif
874
875 sc = device_lookup(&vnd_cd, unit);
876 if (sc == NULL)
877 return ENXIO;
878
879 if ((sc->sc_flags & VNF_INITED) == 0)
880 return (ENXIO);
881
882 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio));
883 }
884
885 /* ARGSUSED */
886 static int
887 vndwrite(dev_t dev, struct uio *uio, int flags __unused)
888 {
889 int unit = vndunit(dev);
890 struct vnd_softc *sc;
891
892 #ifdef DEBUG
893 if (vnddebug & VDB_FOLLOW)
894 printf("vndwrite(0x%x, %p)\n", dev, uio);
895 #endif
896
897 sc = device_lookup(&vnd_cd, unit);
898 if (sc == NULL)
899 return ENXIO;
900
901 if ((sc->sc_flags & VNF_INITED) == 0)
902 return (ENXIO);
903
904 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio));
905 }
906
907 static int
908 vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va)
909 {
910 struct vnd_softc *vnd;
911
912 if (*un == -1)
913 *un = unit;
914 if (*un < 0)
915 return EINVAL;
916
917 vnd = device_lookup(&vnd_cd, *un);
918 if (vnd == NULL)
919 return (*un >= vnd_cd.cd_ndevs) ? ENXIO : -1;
920
921 if ((vnd->sc_flags & VNF_INITED) == 0)
922 return -1;
923
924 return VOP_GETATTR(vnd->sc_vp, va, l->l_cred, l);
925 }
926
927 /* ARGSUSED */
928 static int
929 vndioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l)
930 {
931 int unit = vndunit(dev);
932 struct vnd_softc *vnd;
933 struct vnd_ioctl *vio;
934 struct vattr vattr;
935 struct nameidata nd;
936 int error, part, pmask;
937 size_t geomsize;
938 int fflags;
939 #ifdef __HAVE_OLD_DISKLABEL
940 struct disklabel newlabel;
941 #endif
942
943 #ifdef DEBUG
944 if (vnddebug & VDB_FOLLOW)
945 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n",
946 dev, cmd, data, flag, l->l_proc, unit);
947 #endif
948 vnd = device_lookup(&vnd_cd, unit);
949 if (vnd == NULL &&
950 #ifdef COMPAT_30
951 cmd != VNDIOOCGET &&
952 #endif
953 cmd != VNDIOCGET)
954 return ENXIO;
955 vio = (struct vnd_ioctl *)data;
956
957 /* Must be open for writes for these commands... */
958 switch (cmd) {
959 case VNDIOCSET:
960 case VNDIOCCLR:
961 case DIOCSDINFO:
962 case DIOCWDINFO:
963 #ifdef __HAVE_OLD_DISKLABEL
964 case ODIOCSDINFO:
965 case ODIOCWDINFO:
966 #endif
967 case DIOCKLABEL:
968 case DIOCWLABEL:
969 if ((flag & FWRITE) == 0)
970 return (EBADF);
971 }
972
973 /* Must be initialized for these... */
974 switch (cmd) {
975 case VNDIOCCLR:
976 case DIOCGDINFO:
977 case DIOCSDINFO:
978 case DIOCWDINFO:
979 case DIOCGPART:
980 case DIOCKLABEL:
981 case DIOCWLABEL:
982 case DIOCGDEFLABEL:
983 #ifdef __HAVE_OLD_DISKLABEL
984 case ODIOCGDINFO:
985 case ODIOCSDINFO:
986 case ODIOCWDINFO:
987 case ODIOCGDEFLABEL:
988 #endif
989 if ((vnd->sc_flags & VNF_INITED) == 0)
990 return (ENXIO);
991 }
992
993 switch (cmd) {
994 case VNDIOCSET:
995 if (vnd->sc_flags & VNF_INITED)
996 return (EBUSY);
997
998 if ((error = vndlock(vnd)) != 0)
999 return (error);
1000
1001 fflags = FREAD;
1002 if ((vio->vnd_flags & VNDIOF_READONLY) == 0)
1003 fflags |= FWRITE;
1004 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, l);
1005 if ((error = vn_open(&nd, fflags, 0)) != 0)
1006 goto unlock_and_exit;
1007 KASSERT(l);
1008 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred, l);
1009 if (!error && nd.ni_vp->v_type != VREG)
1010 error = EOPNOTSUPP;
1011 if (error) {
1012 VOP_UNLOCK(nd.ni_vp, 0);
1013 goto close_and_exit;
1014 }
1015
1016 /* If using a compressed file, initialize its info */
1017 /* (or abort with an error if kernel has no compression) */
1018 if (vio->vnd_flags & VNF_COMP) {
1019 #ifdef VND_COMPRESSION
1020 struct vnd_comp_header *ch;
1021 int i;
1022 u_int32_t comp_size;
1023 u_int32_t comp_maxsize;
1024
1025 /* allocate space for compresed file header */
1026 ch = malloc(sizeof(struct vnd_comp_header),
1027 M_TEMP, M_WAITOK);
1028
1029 /* read compressed file header */
1030 error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)ch,
1031 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE,
1032 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL);
1033 if(error) {
1034 free(ch, M_TEMP);
1035 VOP_UNLOCK(nd.ni_vp, 0);
1036 goto close_and_exit;
1037 }
1038
1039 /* save some header info */
1040 vnd->sc_comp_blksz = ntohl(ch->block_size);
1041 /* note last offset is the file byte size */
1042 vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1;
1043 free(ch, M_TEMP);
1044 if (vnd->sc_comp_blksz == 0 ||
1045 vnd->sc_comp_blksz % DEV_BSIZE !=0) {
1046 VOP_UNLOCK(nd.ni_vp, 0);
1047 error = EINVAL;
1048 goto close_and_exit;
1049 }
1050 if(sizeof(struct vnd_comp_header) +
1051 sizeof(u_int64_t) * vnd->sc_comp_numoffs >
1052 vattr.va_size) {
1053 VOP_UNLOCK(nd.ni_vp, 0);
1054 error = EINVAL;
1055 goto close_and_exit;
1056 }
1057
1058 /* set decompressed file size */
1059 vattr.va_size =
1060 ((u_quad_t)vnd->sc_comp_numoffs - 1) *
1061 (u_quad_t)vnd->sc_comp_blksz;
1062
1063 /* allocate space for all the compressed offsets */
1064 vnd->sc_comp_offsets =
1065 malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs,
1066 M_DEVBUF, M_WAITOK);
1067
1068 /* read in the offsets */
1069 error = vn_rdwr(UIO_READ, nd.ni_vp,
1070 (caddr_t)vnd->sc_comp_offsets,
1071 sizeof(u_int64_t) * vnd->sc_comp_numoffs,
1072 sizeof(struct vnd_comp_header), UIO_SYSSPACE,
1073 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL);
1074 if(error) {
1075 VOP_UNLOCK(nd.ni_vp, 0);
1076 goto close_and_exit;
1077 }
1078 /*
1079 * find largest block size (used for allocation limit).
1080 * Also convert offset to native byte order.
1081 */
1082 comp_maxsize = 0;
1083 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) {
1084 vnd->sc_comp_offsets[i] =
1085 be64toh(vnd->sc_comp_offsets[i]);
1086 comp_size = be64toh(vnd->sc_comp_offsets[i + 1])
1087 - vnd->sc_comp_offsets[i];
1088 if (comp_size > comp_maxsize)
1089 comp_maxsize = comp_size;
1090 }
1091 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] =
1092 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]);
1093
1094 /* create compressed data buffer */
1095 vnd->sc_comp_buff = malloc(comp_maxsize,
1096 M_DEVBUF, M_WAITOK);
1097
1098 /* create decompressed buffer */
1099 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz,
1100 M_DEVBUF, M_WAITOK);
1101 vnd->sc_comp_buffblk = -1;
1102
1103 /* Initialize decompress stream */
1104 bzero(&vnd->sc_comp_stream, sizeof(z_stream));
1105 vnd->sc_comp_stream.zalloc = vnd_alloc;
1106 vnd->sc_comp_stream.zfree = vnd_free;
1107 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS);
1108 if(error) {
1109 if(vnd->sc_comp_stream.msg)
1110 printf("vnd%d: compressed file, %s\n",
1111 unit, vnd->sc_comp_stream.msg);
1112 VOP_UNLOCK(nd.ni_vp, 0);
1113 error = EINVAL;
1114 goto close_and_exit;
1115 }
1116
1117 vnd->sc_flags |= VNF_COMP | VNF_READONLY;
1118 #else /* !VND_COMPRESSION */
1119 VOP_UNLOCK(nd.ni_vp, 0);
1120 error = EOPNOTSUPP;
1121 goto close_and_exit;
1122 #endif /* VND_COMPRESSION */
1123 }
1124
1125 VOP_UNLOCK(nd.ni_vp, 0);
1126 vnd->sc_vp = nd.ni_vp;
1127 vnd->sc_size = btodb(vattr.va_size); /* note truncation */
1128
1129 /*
1130 * Use pseudo-geometry specified. If none was provided,
1131 * use "standard" Adaptec fictitious geometry.
1132 */
1133 if (vio->vnd_flags & VNDIOF_HASGEOM) {
1134
1135 memcpy(&vnd->sc_geom, &vio->vnd_geom,
1136 sizeof(vio->vnd_geom));
1137
1138 /*
1139 * Sanity-check the sector size.
1140 * XXX Don't allow secsize < DEV_BSIZE. Should
1141 * XXX we?
1142 */
1143 if (vnd->sc_geom.vng_secsize < DEV_BSIZE ||
1144 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0 ||
1145 vnd->sc_geom.vng_ncylinders == 0 ||
1146 (vnd->sc_geom.vng_ntracks *
1147 vnd->sc_geom.vng_nsectors) == 0) {
1148 error = EINVAL;
1149 goto close_and_exit;
1150 }
1151
1152 /*
1153 * Compute the size (in DEV_BSIZE blocks) specified
1154 * by the geometry.
1155 */
1156 geomsize = (vnd->sc_geom.vng_nsectors *
1157 vnd->sc_geom.vng_ntracks *
1158 vnd->sc_geom.vng_ncylinders) *
1159 (vnd->sc_geom.vng_secsize / DEV_BSIZE);
1160
1161 /*
1162 * Sanity-check the size against the specified
1163 * geometry.
1164 */
1165 if (vnd->sc_size < geomsize) {
1166 error = EINVAL;
1167 goto close_and_exit;
1168 }
1169 } else if (vnd->sc_size >= (32 * 64)) {
1170 /*
1171 * Size must be at least 2048 DEV_BSIZE blocks
1172 * (1M) in order to use this geometry.
1173 */
1174 vnd->sc_geom.vng_secsize = DEV_BSIZE;
1175 vnd->sc_geom.vng_nsectors = 32;
1176 vnd->sc_geom.vng_ntracks = 64;
1177 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32);
1178 } else {
1179 vnd->sc_geom.vng_secsize = DEV_BSIZE;
1180 vnd->sc_geom.vng_nsectors = 1;
1181 vnd->sc_geom.vng_ntracks = 1;
1182 vnd->sc_geom.vng_ncylinders = vnd->sc_size;
1183 }
1184
1185 if (vio->vnd_flags & VNDIOF_READONLY) {
1186 vnd->sc_flags |= VNF_READONLY;
1187 }
1188
1189 if ((error = vndsetcred(vnd, l->l_cred)) != 0)
1190 goto close_and_exit;
1191
1192 vndthrottle(vnd, vnd->sc_vp);
1193 vio->vnd_size = dbtob(vnd->sc_size);
1194 vnd->sc_flags |= VNF_INITED;
1195
1196 /* create the kernel thread, wait for it to be up */
1197 error = kthread_create1(vndthread, vnd, &vnd->sc_kthread,
1198 vnd->sc_dev.dv_xname);
1199 if (error)
1200 goto close_and_exit;
1201 while ((vnd->sc_flags & VNF_KTHREAD) == 0) {
1202 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0);
1203 }
1204 #ifdef DEBUG
1205 if (vnddebug & VDB_INIT)
1206 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n",
1207 vnd->sc_vp, (unsigned long) vnd->sc_size,
1208 vnd->sc_geom.vng_secsize,
1209 vnd->sc_geom.vng_nsectors,
1210 vnd->sc_geom.vng_ntracks,
1211 vnd->sc_geom.vng_ncylinders);
1212 #endif
1213
1214 /* Attach the disk. */
1215 vnd->sc_dkdev.dk_name = vnd->sc_dev.dv_xname;
1216 pseudo_disk_attach(&vnd->sc_dkdev);
1217
1218 /* Initialize the xfer and buffer pools. */
1219 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0,
1220 0, 0, "vndxpl", NULL);
1221
1222 /* Try and read the disklabel. */
1223 vndgetdisklabel(dev, vnd);
1224
1225 vndunlock(vnd);
1226
1227 break;
1228
1229 close_and_exit:
1230 (void) vn_close(nd.ni_vp, fflags, l->l_cred, l);
1231 unlock_and_exit:
1232 #ifdef VND_COMPRESSION
1233 /* free any allocated memory (for compressed file) */
1234 if(vnd->sc_comp_offsets) {
1235 free(vnd->sc_comp_offsets, M_DEVBUF);
1236 vnd->sc_comp_offsets = NULL;
1237 }
1238 if(vnd->sc_comp_buff) {
1239 free(vnd->sc_comp_buff, M_DEVBUF);
1240 vnd->sc_comp_buff = NULL;
1241 }
1242 if(vnd->sc_comp_decombuf) {
1243 free(vnd->sc_comp_decombuf, M_DEVBUF);
1244 vnd->sc_comp_decombuf = NULL;
1245 }
1246 #endif /* VND_COMPRESSION */
1247 vndunlock(vnd);
1248 return (error);
1249
1250 case VNDIOCCLR:
1251 if ((error = vndlock(vnd)) != 0)
1252 return (error);
1253
1254 /*
1255 * Don't unconfigure if any other partitions are open
1256 * or if both the character and block flavors of this
1257 * partition are open.
1258 */
1259 part = DISKPART(dev);
1260 pmask = (1 << part);
1261 if (((vnd->sc_dkdev.dk_openmask & ~pmask) ||
1262 ((vnd->sc_dkdev.dk_bopenmask & pmask) &&
1263 (vnd->sc_dkdev.dk_copenmask & pmask))) &&
1264 !(vio->vnd_flags & VNDIOF_FORCE)) {
1265 vndunlock(vnd);
1266 return (EBUSY);
1267 }
1268
1269 /*
1270 * XXX vndclear() might call vndclose() implicitely;
1271 * release lock to avoid recursion
1272 */
1273 vndunlock(vnd);
1274 vndclear(vnd, minor(dev));
1275 #ifdef DEBUG
1276 if (vnddebug & VDB_INIT)
1277 printf("vndioctl: CLRed\n");
1278 #endif
1279
1280 /* Destroy the xfer and buffer pools. */
1281 pool_destroy(&vnd->sc_vxpool);
1282
1283 /* Detatch the disk. */
1284 pseudo_disk_detach(&vnd->sc_dkdev);
1285 break;
1286
1287 #ifdef COMPAT_30
1288 case VNDIOOCGET: {
1289 struct vnd_ouser *vnu;
1290 struct vattr va;
1291 vnu = (struct vnd_ouser *)data;
1292 KASSERT(l);
1293 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) {
1294 case 0:
1295 vnu->vnu_dev = va.va_fsid;
1296 vnu->vnu_ino = va.va_fileid;
1297 break;
1298 case -1:
1299 /* unused is not an error */
1300 vnu->vnu_dev = 0;
1301 vnu->vnu_ino = 0;
1302 break;
1303 default:
1304 return error;
1305 }
1306 break;
1307 }
1308 #endif
1309 case VNDIOCGET: {
1310 struct vnd_user *vnu;
1311 struct vattr va;
1312 vnu = (struct vnd_user *)data;
1313 KASSERT(l);
1314 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) {
1315 case 0:
1316 vnu->vnu_dev = va.va_fsid;
1317 vnu->vnu_ino = va.va_fileid;
1318 break;
1319 case -1:
1320 /* unused is not an error */
1321 vnu->vnu_dev = 0;
1322 vnu->vnu_ino = 0;
1323 break;
1324 default:
1325 return error;
1326 }
1327 break;
1328 }
1329
1330 case DIOCGDINFO:
1331 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label);
1332 break;
1333
1334 #ifdef __HAVE_OLD_DISKLABEL
1335 case ODIOCGDINFO:
1336 newlabel = *(vnd->sc_dkdev.dk_label);
1337 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1338 return ENOTTY;
1339 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1340 break;
1341 #endif
1342
1343 case DIOCGPART:
1344 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label;
1345 ((struct partinfo *)data)->part =
1346 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1347 break;
1348
1349 case DIOCWDINFO:
1350 case DIOCSDINFO:
1351 #ifdef __HAVE_OLD_DISKLABEL
1352 case ODIOCWDINFO:
1353 case ODIOCSDINFO:
1354 #endif
1355 {
1356 struct disklabel *lp;
1357
1358 if ((error = vndlock(vnd)) != 0)
1359 return (error);
1360
1361 vnd->sc_flags |= VNF_LABELLING;
1362
1363 #ifdef __HAVE_OLD_DISKLABEL
1364 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1365 memset(&newlabel, 0, sizeof newlabel);
1366 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1367 lp = &newlabel;
1368 } else
1369 #endif
1370 lp = (struct disklabel *)data;
1371
1372 error = setdisklabel(vnd->sc_dkdev.dk_label,
1373 lp, 0, vnd->sc_dkdev.dk_cpulabel);
1374 if (error == 0) {
1375 if (cmd == DIOCWDINFO
1376 #ifdef __HAVE_OLD_DISKLABEL
1377 || cmd == ODIOCWDINFO
1378 #endif
1379 )
1380 error = writedisklabel(VNDLABELDEV(dev),
1381 vndstrategy, vnd->sc_dkdev.dk_label,
1382 vnd->sc_dkdev.dk_cpulabel);
1383 }
1384
1385 vnd->sc_flags &= ~VNF_LABELLING;
1386
1387 vndunlock(vnd);
1388
1389 if (error)
1390 return (error);
1391 break;
1392 }
1393
1394 case DIOCKLABEL:
1395 if (*(int *)data != 0)
1396 vnd->sc_flags |= VNF_KLABEL;
1397 else
1398 vnd->sc_flags &= ~VNF_KLABEL;
1399 break;
1400
1401 case DIOCWLABEL:
1402 if (*(int *)data != 0)
1403 vnd->sc_flags |= VNF_WLABEL;
1404 else
1405 vnd->sc_flags &= ~VNF_WLABEL;
1406 break;
1407
1408 case DIOCGDEFLABEL:
1409 vndgetdefaultlabel(vnd, (struct disklabel *)data);
1410 break;
1411
1412 #ifdef __HAVE_OLD_DISKLABEL
1413 case ODIOCGDEFLABEL:
1414 vndgetdefaultlabel(vnd, &newlabel);
1415 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1416 return ENOTTY;
1417 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1418 break;
1419 #endif
1420
1421 default:
1422 return (ENOTTY);
1423 }
1424
1425 return (0);
1426 }
1427
1428 /*
1429 * Duplicate the current processes' credentials. Since we are called only
1430 * as the result of a SET ioctl and only root can do that, any future access
1431 * to this "disk" is essentially as root. Note that credentials may change
1432 * if some other uid can write directly to the mapped file (NFS).
1433 */
1434 static int
1435 vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred)
1436 {
1437 struct uio auio;
1438 struct iovec aiov;
1439 char *tmpbuf;
1440 int error;
1441
1442 vnd->sc_cred = kauth_cred_dup(cred);
1443 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
1444
1445 /* XXX: Horrible kludge to establish credentials for NFS */
1446 aiov.iov_base = tmpbuf;
1447 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size));
1448 auio.uio_iov = &aiov;
1449 auio.uio_iovcnt = 1;
1450 auio.uio_offset = 0;
1451 auio.uio_rw = UIO_READ;
1452 auio.uio_resid = aiov.iov_len;
1453 UIO_SETUP_SYSSPACE(&auio);
1454 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1455 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred);
1456 if (error == 0) {
1457 /*
1458 * Because vnd does all IO directly through the vnode
1459 * we need to flush (at least) the buffer from the above
1460 * VOP_READ from the buffer cache to prevent cache
1461 * incoherencies. Also, be careful to write dirty
1462 * buffers back to stable storage.
1463 */
1464 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred,
1465 curlwp, 0, 0);
1466 }
1467 VOP_UNLOCK(vnd->sc_vp, 0);
1468
1469 free(tmpbuf, M_TEMP);
1470 return (error);
1471 }
1472
1473 /*
1474 * Set maxactive based on FS type
1475 */
1476 static void
1477 vndthrottle(struct vnd_softc *vnd, struct vnode *vp)
1478 {
1479 #ifdef NFS
1480 extern int (**nfsv2_vnodeop_p)(void *);
1481
1482 if (vp->v_op == nfsv2_vnodeop_p)
1483 vnd->sc_maxactive = 2;
1484 else
1485 #endif
1486 vnd->sc_maxactive = 8;
1487
1488 if (vnd->sc_maxactive < 1)
1489 vnd->sc_maxactive = 1;
1490 }
1491
1492 #if 0
1493 static void
1494 vndshutdown(void)
1495 {
1496 struct vnd_softc *vnd;
1497
1498 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++)
1499 if (vnd->sc_flags & VNF_INITED)
1500 vndclear(vnd);
1501 }
1502 #endif
1503
1504 static void
1505 vndclear(struct vnd_softc *vnd, int myminor)
1506 {
1507 struct vnode *vp = vnd->sc_vp;
1508 struct lwp *l = curlwp;
1509 int fflags = FREAD;
1510 int bmaj, cmaj, i, mn;
1511 int s;
1512
1513 #ifdef DEBUG
1514 if (vnddebug & VDB_FOLLOW)
1515 printf("vndclear(%p): vp %p\n", vnd, vp);
1516 #endif
1517 /* locate the major number */
1518 bmaj = bdevsw_lookup_major(&vnd_bdevsw);
1519 cmaj = cdevsw_lookup_major(&vnd_cdevsw);
1520
1521 /* Nuke the vnodes for any open instances */
1522 for (i = 0; i < MAXPARTITIONS; i++) {
1523 mn = DISKMINOR(device_unit(&vnd->sc_dev), i);
1524 vdevgone(bmaj, mn, mn, VBLK);
1525 if (mn != myminor) /* XXX avoid to kill own vnode */
1526 vdevgone(cmaj, mn, mn, VCHR);
1527 }
1528
1529 if ((vnd->sc_flags & VNF_READONLY) == 0)
1530 fflags |= FWRITE;
1531
1532 s = splbio();
1533 bufq_drain(vnd->sc_tab);
1534 splx(s);
1535
1536 vnd->sc_flags |= VNF_VUNCONF;
1537 wakeup(&vnd->sc_tab);
1538 while (vnd->sc_flags & VNF_KTHREAD)
1539 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0);
1540
1541 #ifdef VND_COMPRESSION
1542 /* free the compressed file buffers */
1543 if(vnd->sc_flags & VNF_COMP) {
1544 if(vnd->sc_comp_offsets) {
1545 free(vnd->sc_comp_offsets, M_DEVBUF);
1546 vnd->sc_comp_offsets = NULL;
1547 }
1548 if(vnd->sc_comp_buff) {
1549 free(vnd->sc_comp_buff, M_DEVBUF);
1550 vnd->sc_comp_buff = NULL;
1551 }
1552 if(vnd->sc_comp_decombuf) {
1553 free(vnd->sc_comp_decombuf, M_DEVBUF);
1554 vnd->sc_comp_decombuf = NULL;
1555 }
1556 }
1557 #endif /* VND_COMPRESSION */
1558 vnd->sc_flags &=
1559 ~(VNF_INITED | VNF_READONLY | VNF_VLABEL
1560 | VNF_VUNCONF | VNF_COMP);
1561 if (vp == (struct vnode *)0)
1562 panic("vndclear: null vp");
1563 (void) vn_close(vp, fflags, vnd->sc_cred, l);
1564 kauth_cred_free(vnd->sc_cred);
1565 vnd->sc_vp = (struct vnode *)0;
1566 vnd->sc_cred = (kauth_cred_t)0;
1567 vnd->sc_size = 0;
1568 }
1569
1570 static int
1571 vndsize(dev_t dev)
1572 {
1573 struct vnd_softc *sc;
1574 struct disklabel *lp;
1575 int part, unit, omask;
1576 int size;
1577
1578 unit = vndunit(dev);
1579 sc = (struct vnd_softc *)device_lookup(&vnd_cd, unit);
1580 if (sc == NULL)
1581 return -1;
1582
1583 if ((sc->sc_flags & VNF_INITED) == 0)
1584 return (-1);
1585
1586 part = DISKPART(dev);
1587 omask = sc->sc_dkdev.dk_openmask & (1 << part);
1588 lp = sc->sc_dkdev.dk_label;
1589
1590 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */
1591 return (-1);
1592
1593 if (lp->d_partitions[part].p_fstype != FS_SWAP)
1594 size = -1;
1595 else
1596 size = lp->d_partitions[part].p_size *
1597 (lp->d_secsize / DEV_BSIZE);
1598
1599 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */
1600 return (-1);
1601
1602 return (size);
1603 }
1604
1605 static int
1606 vnddump(dev_t dev __unused, daddr_t blkno __unused, caddr_t va __unused,
1607 size_t size __unused)
1608 {
1609
1610 /* Not implemented. */
1611 return ENXIO;
1612 }
1613
1614 static void
1615 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp)
1616 {
1617 struct vndgeom *vng = &sc->sc_geom;
1618 struct partition *pp;
1619
1620 memset(lp, 0, sizeof(*lp));
1621
1622 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE);
1623 lp->d_secsize = vng->vng_secsize;
1624 lp->d_nsectors = vng->vng_nsectors;
1625 lp->d_ntracks = vng->vng_ntracks;
1626 lp->d_ncylinders = vng->vng_ncylinders;
1627 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1628
1629 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename));
1630 lp->d_type = DTYPE_VND;
1631 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1632 lp->d_rpm = 3600;
1633 lp->d_interleave = 1;
1634 lp->d_flags = 0;
1635
1636 pp = &lp->d_partitions[RAW_PART];
1637 pp->p_offset = 0;
1638 pp->p_size = lp->d_secperunit;
1639 pp->p_fstype = FS_UNUSED;
1640 lp->d_npartitions = RAW_PART + 1;
1641
1642 lp->d_magic = DISKMAGIC;
1643 lp->d_magic2 = DISKMAGIC;
1644 lp->d_checksum = dkcksum(lp);
1645 }
1646
1647 /*
1648 * Read the disklabel from a vnd. If one is not present, create a fake one.
1649 */
1650 static void
1651 vndgetdisklabel(dev_t dev, struct vnd_softc *sc)
1652 {
1653 const char *errstring;
1654 struct disklabel *lp = sc->sc_dkdev.dk_label;
1655 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel;
1656 int i;
1657
1658 memset(clp, 0, sizeof(*clp));
1659
1660 vndgetdefaultlabel(sc, lp);
1661
1662 /*
1663 * Call the generic disklabel extraction routine.
1664 */
1665 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp);
1666 if (errstring) {
1667 /*
1668 * Lack of disklabel is common, but we print the warning
1669 * anyway, since it might contain other useful information.
1670 */
1671 printf("%s: %s\n", sc->sc_dev.dv_xname, errstring);
1672
1673 /*
1674 * For historical reasons, if there's no disklabel
1675 * present, all partitions must be FS_BSDFFS and
1676 * occupy the entire disk.
1677 */
1678 for (i = 0; i < MAXPARTITIONS; i++) {
1679 /*
1680 * Don't wipe out port specific hack (such as
1681 * dos partition hack of i386 port).
1682 */
1683 if (lp->d_partitions[i].p_size != 0)
1684 continue;
1685
1686 lp->d_partitions[i].p_size = lp->d_secperunit;
1687 lp->d_partitions[i].p_offset = 0;
1688 lp->d_partitions[i].p_fstype = FS_BSDFFS;
1689 }
1690
1691 strncpy(lp->d_packname, "default label",
1692 sizeof(lp->d_packname));
1693
1694 lp->d_npartitions = MAXPARTITIONS;
1695 lp->d_checksum = dkcksum(lp);
1696 }
1697
1698 /* In-core label now valid. */
1699 sc->sc_flags |= VNF_VLABEL;
1700 }
1701
1702 /*
1703 * Wait interruptibly for an exclusive lock.
1704 *
1705 * XXX
1706 * Several drivers do this; it should be abstracted and made MP-safe.
1707 */
1708 static int
1709 vndlock(struct vnd_softc *sc)
1710 {
1711 int error;
1712
1713 while ((sc->sc_flags & VNF_LOCKED) != 0) {
1714 sc->sc_flags |= VNF_WANTED;
1715 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0)
1716 return (error);
1717 }
1718 sc->sc_flags |= VNF_LOCKED;
1719 return (0);
1720 }
1721
1722 /*
1723 * Unlock and wake up any waiters.
1724 */
1725 static void
1726 vndunlock(struct vnd_softc *sc)
1727 {
1728
1729 sc->sc_flags &= ~VNF_LOCKED;
1730 if ((sc->sc_flags & VNF_WANTED) != 0) {
1731 sc->sc_flags &= ~VNF_WANTED;
1732 wakeup(sc);
1733 }
1734 }
1735
1736 #ifdef VND_COMPRESSION
1737 /* compressed file read */
1738 static void
1739 compstrategy(struct buf *bp, off_t bn)
1740 {
1741 int error;
1742 int unit = vndunit(bp->b_dev);
1743 struct vnd_softc *vnd =
1744 (struct vnd_softc *)device_lookup(&vnd_cd, unit);
1745 u_int32_t comp_block;
1746 struct uio auio;
1747 caddr_t addr;
1748 int s;
1749
1750 /* set up constants for data move */
1751 auio.uio_rw = UIO_READ;
1752 UIO_SETUP_SYSSPACE(&auio);
1753
1754 /* read, and transfer the data */
1755 addr = bp->b_data;
1756 s = splbio();
1757 while (bp->b_resid > 0) {
1758 unsigned length;
1759 size_t length_in_buffer;
1760 u_int32_t offset_in_buffer;
1761 struct iovec aiov;
1762
1763 /* calculate the compressed block number */
1764 comp_block = bn / (off_t)vnd->sc_comp_blksz;
1765
1766 /* check for good block number */
1767 if (comp_block >= vnd->sc_comp_numoffs) {
1768 bp->b_error = EINVAL;
1769 bp->b_flags |= B_ERROR;
1770 splx(s);
1771 return;
1772 }
1773
1774 /* read in the compressed block, if not in buffer */
1775 if (comp_block != vnd->sc_comp_buffblk) {
1776 length = vnd->sc_comp_offsets[comp_block + 1] -
1777 vnd->sc_comp_offsets[comp_block];
1778 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1779 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff,
1780 length, vnd->sc_comp_offsets[comp_block],
1781 UIO_SYSSPACE, IO_UNIT, vnd->sc_cred, NULL, NULL);
1782 if (error) {
1783 bp->b_error = error;
1784 bp->b_flags |= B_ERROR;
1785 VOP_UNLOCK(vnd->sc_vp, 0);
1786 splx(s);
1787 return;
1788 }
1789 /* uncompress the buffer */
1790 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff;
1791 vnd->sc_comp_stream.avail_in = length;
1792 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf;
1793 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz;
1794 inflateReset(&vnd->sc_comp_stream);
1795 error = inflate(&vnd->sc_comp_stream, Z_FINISH);
1796 if (error != Z_STREAM_END) {
1797 if (vnd->sc_comp_stream.msg)
1798 printf("%s: compressed file, %s\n",
1799 vnd->sc_dev.dv_xname,
1800 vnd->sc_comp_stream.msg);
1801 bp->b_error = EBADMSG;
1802 bp->b_flags |= B_ERROR;
1803 VOP_UNLOCK(vnd->sc_vp, 0);
1804 splx(s);
1805 return;
1806 }
1807 vnd->sc_comp_buffblk = comp_block;
1808 VOP_UNLOCK(vnd->sc_vp, 0);
1809 }
1810
1811 /* transfer the usable uncompressed data */
1812 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz;
1813 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer;
1814 if (length_in_buffer > bp->b_resid)
1815 length_in_buffer = bp->b_resid;
1816 auio.uio_iov = &aiov;
1817 auio.uio_iovcnt = 1;
1818 aiov.iov_base = addr;
1819 aiov.iov_len = length_in_buffer;
1820 auio.uio_resid = aiov.iov_len;
1821 auio.uio_offset = 0;
1822 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer,
1823 length_in_buffer, &auio);
1824 if (error) {
1825 bp->b_error = error;
1826 bp->b_flags |= B_ERROR;
1827 splx(s);
1828 return;
1829 }
1830
1831 bn += length_in_buffer;
1832 addr += length_in_buffer;
1833 bp->b_resid -= length_in_buffer;
1834 }
1835 splx(s);
1836 }
1837
1838 /* compression memory allocation routines */
1839 static void *
1840 vnd_alloc(void *aux __unused, u_int items, u_int siz)
1841 {
1842 return malloc(items * siz, M_TEMP, M_NOWAIT);
1843 }
1844
1845 static void
1846 vnd_free(void *aux __unused, void *ptr)
1847 {
1848 free(ptr, M_TEMP);
1849 }
1850 #endif /* VND_COMPRESSION */
1851