vnd.c revision 1.271 1 /* $NetBSD: vnd.c,v 1.271 2019/01/27 02:08:41 pgoyette Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: vn.c 1.13 94/04/02$
66 *
67 * @(#)vn.c 8.9 (Berkeley) 5/14/95
68 */
69
70 /*
71 * Vnode disk driver.
72 *
73 * Block/character interface to a vnode. Allows one to treat a file
74 * as a disk (e.g. build a filesystem in it, mount it, etc.).
75 *
76 * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations,
77 * this uses them to avoid distorting the local buffer cache. If those
78 * block-level operations are not available, this falls back to the regular
79 * read and write calls. Using these may distort the cache in some cases
80 * but better have the driver working than preventing it to work on file
81 * systems where the block-level operations are not implemented for
82 * whatever reason.
83 *
84 * NOTE 2: There is a security issue involved with this driver.
85 * Once mounted all access to the contents of the "mapped" file via
86 * the special file is controlled by the permissions on the special
87 * file, the protection of the mapped file is ignored (effectively,
88 * by using root credentials in all transactions).
89 *
90 * NOTE 3: Doesn't interact with leases, should it?
91 */
92
93 #include <sys/cdefs.h>
94 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.271 2019/01/27 02:08:41 pgoyette Exp $");
95
96 #if defined(_KERNEL_OPT)
97 #include "opt_vnd.h"
98 #include "opt_compat_netbsd.h"
99 #endif
100
101 #include <sys/param.h>
102 #include <sys/systm.h>
103 #include <sys/namei.h>
104 #include <sys/proc.h>
105 #include <sys/kthread.h>
106 #include <sys/errno.h>
107 #include <sys/buf.h>
108 #include <sys/bufq.h>
109 #include <sys/malloc.h>
110 #include <sys/ioctl.h>
111 #include <sys/disklabel.h>
112 #include <sys/device.h>
113 #include <sys/disk.h>
114 #include <sys/stat.h>
115 #include <sys/mount.h>
116 #include <sys/vnode.h>
117 #include <sys/fstrans.h>
118 #include <sys/file.h>
119 #include <sys/uio.h>
120 #include <sys/conf.h>
121 #include <sys/kauth.h>
122 #include <sys/module.h>
123 #include <sys/compat_stub.h>
124
125 #include <net/zlib.h>
126
127 #include <miscfs/genfs/genfs.h>
128 #include <miscfs/specfs/specdev.h>
129
130 #include <dev/dkvar.h>
131 #include <dev/vndvar.h>
132
133 #include "ioconf.h"
134
135 #if defined(VNDDEBUG) && !defined(DEBUG)
136 #define DEBUG
137 #endif
138
139 #ifdef DEBUG
140 int dovndcluster = 1;
141 #define VDB_FOLLOW 0x01
142 #define VDB_INIT 0x02
143 #define VDB_IO 0x04
144 #define VDB_LABEL 0x08
145 int vnddebug = 0;
146 #endif
147
148 #define vndunit(x) DISKUNIT(x)
149
150 struct vndxfer {
151 struct buf vx_buf;
152 struct vnd_softc *vx_vnd;
153 };
154 #define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp)
155
156 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK)
157 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx))
158
159 #define VNDLABELDEV(dev) \
160 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART))
161
162 #define VND_MAXPENDING(vnd) ((vnd)->sc_maxactive * 4)
163 #define VND_MAXPAGES(vnd) (1024 * 1024 / PAGE_SIZE)
164
165
166 static void vndclear(struct vnd_softc *, int);
167 static int vnddoclear(struct vnd_softc *, int, int, bool);
168 static int vndsetcred(struct vnd_softc *, kauth_cred_t);
169 static void vndthrottle(struct vnd_softc *, struct vnode *);
170 static void vndiodone(struct buf *);
171 #if 0
172 static void vndshutdown(void);
173 #endif
174
175 static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *);
176 static void vndgetdisklabel(dev_t, struct vnd_softc *);
177
178 static int vndlock(struct vnd_softc *);
179 static void vndunlock(struct vnd_softc *);
180 #ifdef VND_COMPRESSION
181 static void compstrategy(struct buf *, off_t);
182 static void *vnd_alloc(void *, u_int, u_int);
183 static void vnd_free(void *, void *);
184 #endif /* VND_COMPRESSION */
185
186 static void vndthread(void *);
187 static bool vnode_has_op(const struct vnode *, int);
188 static void handle_with_rdwr(struct vnd_softc *, const struct buf *,
189 struct buf *);
190 static void handle_with_strategy(struct vnd_softc *, const struct buf *,
191 struct buf *);
192 static void vnd_set_geometry(struct vnd_softc *);
193
194 static dev_type_open(vndopen);
195 static dev_type_close(vndclose);
196 static dev_type_read(vndread);
197 static dev_type_write(vndwrite);
198 static dev_type_ioctl(vndioctl);
199 static dev_type_strategy(vndstrategy);
200 static dev_type_dump(vnddump);
201 static dev_type_size(vndsize);
202
203 const struct bdevsw vnd_bdevsw = {
204 .d_open = vndopen,
205 .d_close = vndclose,
206 .d_strategy = vndstrategy,
207 .d_ioctl = vndioctl,
208 .d_dump = vnddump,
209 .d_psize = vndsize,
210 .d_discard = nodiscard,
211 .d_flag = D_DISK
212 };
213
214 const struct cdevsw vnd_cdevsw = {
215 .d_open = vndopen,
216 .d_close = vndclose,
217 .d_read = vndread,
218 .d_write = vndwrite,
219 .d_ioctl = vndioctl,
220 .d_stop = nostop,
221 .d_tty = notty,
222 .d_poll = nopoll,
223 .d_mmap = nommap,
224 .d_kqfilter = nokqfilter,
225 .d_discard = nodiscard,
226 .d_flag = D_DISK
227 };
228
229 static int vnd_match(device_t, cfdata_t, void *);
230 static void vnd_attach(device_t, device_t, void *);
231 static int vnd_detach(device_t, int);
232
233 CFATTACH_DECL3_NEW(vnd, sizeof(struct vnd_softc),
234 vnd_match, vnd_attach, vnd_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN);
235
236 static struct vnd_softc *vnd_spawn(int);
237 int vnd_destroy(device_t);
238
239 static struct dkdriver vnddkdriver = {
240 .d_strategy = vndstrategy,
241 .d_minphys = minphys
242 };
243
244 void
245 vndattach(int num)
246 {
247 int error;
248
249 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca);
250 if (error)
251 aprint_error("%s: unable to register cfattach, error = %d\n",
252 vnd_cd.cd_name, error);
253 }
254
255 static int
256 vnd_match(device_t self, cfdata_t cfdata, void *aux)
257 {
258
259 return 1;
260 }
261
262 static void
263 vnd_attach(device_t parent, device_t self, void *aux)
264 {
265 struct vnd_softc *sc = device_private(self);
266
267 sc->sc_dev = self;
268 sc->sc_comp_offsets = NULL;
269 sc->sc_comp_buff = NULL;
270 sc->sc_comp_decombuf = NULL;
271 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK);
272 disk_init(&sc->sc_dkdev, device_xname(self), &vnddkdriver);
273 if (!pmf_device_register(self, NULL, NULL))
274 aprint_error_dev(self, "couldn't establish power handler\n");
275 }
276
277 static int
278 vnd_detach(device_t self, int flags)
279 {
280 int error;
281 struct vnd_softc *sc = device_private(self);
282
283 if (sc->sc_flags & VNF_INITED) {
284 error = vnddoclear(sc, 0, -1, (flags & DETACH_FORCE) != 0);
285 if (error != 0)
286 return error;
287 }
288
289 pmf_device_deregister(self);
290 bufq_free(sc->sc_tab);
291 disk_destroy(&sc->sc_dkdev);
292
293 return 0;
294 }
295
296 static struct vnd_softc *
297 vnd_spawn(int unit)
298 {
299 cfdata_t cf;
300
301 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
302 cf->cf_name = vnd_cd.cd_name;
303 cf->cf_atname = vnd_cd.cd_name;
304 cf->cf_unit = unit;
305 cf->cf_fstate = FSTATE_STAR;
306
307 return device_private(config_attach_pseudo(cf));
308 }
309
310 int
311 vnd_destroy(device_t dev)
312 {
313 int error;
314 cfdata_t cf;
315
316 cf = device_cfdata(dev);
317 error = config_detach(dev, DETACH_QUIET);
318 if (error)
319 return error;
320 free(cf, M_DEVBUF);
321 return 0;
322 }
323
324 static int
325 vndopen(dev_t dev, int flags, int mode, struct lwp *l)
326 {
327 int unit = vndunit(dev);
328 struct vnd_softc *sc;
329 int error = 0, part, pmask;
330 struct disklabel *lp;
331
332 #ifdef DEBUG
333 if (vnddebug & VDB_FOLLOW)
334 printf("vndopen(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l);
335 #endif
336 sc = device_lookup_private(&vnd_cd, unit);
337 if (sc == NULL) {
338 sc = vnd_spawn(unit);
339 if (sc == NULL)
340 return ENOMEM;
341
342 /* compatibility, keep disklabel after close */
343 sc->sc_flags = VNF_KLABEL;
344 }
345
346 if ((error = vndlock(sc)) != 0)
347 return error;
348
349 mutex_enter(&sc->sc_dkdev.dk_openlock);
350
351 if ((sc->sc_flags & VNF_CLEARING) != 0) {
352 error = ENXIO;
353 goto done;
354 }
355
356 lp = sc->sc_dkdev.dk_label;
357
358 part = DISKPART(dev);
359 pmask = (1 << part);
360
361 if (sc->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
362 error = EBUSY;
363 goto done;
364 }
365
366 if (sc->sc_flags & VNF_INITED) {
367 if ((sc->sc_dkdev.dk_openmask & ~(1<<RAW_PART)) != 0) {
368 /*
369 * If any non-raw partition is open, but the disk
370 * has been invalidated, disallow further opens.
371 */
372 if ((sc->sc_flags & VNF_VLABEL) == 0) {
373 error = EIO;
374 goto done;
375 }
376 } else {
377 /*
378 * Load the partition info if not already loaded.
379 */
380 if ((sc->sc_flags & VNF_VLABEL) == 0) {
381 sc->sc_flags |= VNF_VLABEL;
382 vndgetdisklabel(dev, sc);
383 }
384 }
385 }
386
387 /* Check that the partitions exists. */
388 if (part != RAW_PART) {
389 if (((sc->sc_flags & VNF_INITED) == 0) ||
390 ((part >= lp->d_npartitions) ||
391 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
392 error = ENXIO;
393 goto done;
394 }
395 }
396
397 /* Prevent our unit from being unconfigured while open. */
398 switch (mode) {
399 case S_IFCHR:
400 sc->sc_dkdev.dk_copenmask |= pmask;
401 break;
402
403 case S_IFBLK:
404 sc->sc_dkdev.dk_bopenmask |= pmask;
405 break;
406 }
407 sc->sc_dkdev.dk_openmask =
408 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
409
410 done:
411 mutex_exit(&sc->sc_dkdev.dk_openlock);
412 vndunlock(sc);
413 return error;
414 }
415
416 static int
417 vndclose(dev_t dev, int flags, int mode, struct lwp *l)
418 {
419 int unit = vndunit(dev);
420 struct vnd_softc *sc;
421 int error = 0, part;
422
423 #ifdef DEBUG
424 if (vnddebug & VDB_FOLLOW)
425 printf("vndclose(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l);
426 #endif
427 sc = device_lookup_private(&vnd_cd, unit);
428 if (sc == NULL)
429 return ENXIO;
430
431 if ((error = vndlock(sc)) != 0)
432 return error;
433
434 mutex_enter(&sc->sc_dkdev.dk_openlock);
435
436 part = DISKPART(dev);
437
438 /* ...that much closer to allowing unconfiguration... */
439 switch (mode) {
440 case S_IFCHR:
441 sc->sc_dkdev.dk_copenmask &= ~(1 << part);
442 break;
443
444 case S_IFBLK:
445 sc->sc_dkdev.dk_bopenmask &= ~(1 << part);
446 break;
447 }
448 sc->sc_dkdev.dk_openmask =
449 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
450
451 /* are we last opener ? */
452 if (sc->sc_dkdev.dk_openmask == 0) {
453 if ((sc->sc_flags & VNF_KLABEL) == 0)
454 sc->sc_flags &= ~VNF_VLABEL;
455 }
456
457 mutex_exit(&sc->sc_dkdev.dk_openlock);
458
459 vndunlock(sc);
460
461 if ((sc->sc_flags & VNF_INITED) == 0) {
462 if ((error = vnd_destroy(sc->sc_dev)) != 0) {
463 aprint_error_dev(sc->sc_dev,
464 "unable to detach instance\n");
465 return error;
466 }
467 }
468
469 return 0;
470 }
471
472 /*
473 * Queue the request, and wakeup the kernel thread to handle it.
474 */
475 static void
476 vndstrategy(struct buf *bp)
477 {
478 int unit = vndunit(bp->b_dev);
479 struct vnd_softc *vnd =
480 device_lookup_private(&vnd_cd, unit);
481 struct disklabel *lp;
482 daddr_t blkno;
483 int s = splbio();
484
485 if (vnd == NULL) {
486 bp->b_error = ENXIO;
487 goto done;
488 }
489 lp = vnd->sc_dkdev.dk_label;
490
491 if ((vnd->sc_flags & VNF_INITED) == 0) {
492 bp->b_error = ENXIO;
493 goto done;
494 }
495
496 /*
497 * The transfer must be a whole number of blocks.
498 */
499 if ((bp->b_bcount % lp->d_secsize) != 0) {
500 bp->b_error = EINVAL;
501 goto done;
502 }
503
504 /*
505 * check if we're read-only.
506 */
507 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) {
508 bp->b_error = EACCES;
509 goto done;
510 }
511
512 /* If it's a nil transfer, wake up the top half now. */
513 if (bp->b_bcount == 0) {
514 goto done;
515 }
516
517 /*
518 * Do bounds checking and adjust transfer. If there's an error,
519 * the bounds check will flag that for us.
520 */
521 if (DISKPART(bp->b_dev) == RAW_PART) {
522 if (bounds_check_with_mediasize(bp, DEV_BSIZE,
523 vnd->sc_size) <= 0)
524 goto done;
525 } else {
526 if (bounds_check_with_label(&vnd->sc_dkdev,
527 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0)
528 goto done;
529 }
530
531 /*
532 * Put the block number in terms of the logical blocksize
533 * of the "device".
534 */
535
536 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE);
537
538 /*
539 * Translate the partition-relative block number to an absolute.
540 */
541 if (DISKPART(bp->b_dev) != RAW_PART) {
542 struct partition *pp;
543
544 pp = &vnd->sc_dkdev.dk_label->d_partitions[
545 DISKPART(bp->b_dev)];
546 blkno += pp->p_offset;
547 }
548 bp->b_rawblkno = blkno;
549
550 #ifdef DEBUG
551 if (vnddebug & VDB_FOLLOW)
552 printf("vndstrategy(%p): unit %d\n", bp, unit);
553 #endif
554 if ((vnd->sc_flags & VNF_USE_VN_RDWR)) {
555 KASSERT(vnd->sc_pending >= 0 &&
556 vnd->sc_pending <= VND_MAXPENDING(vnd));
557 while (vnd->sc_pending == VND_MAXPENDING(vnd))
558 tsleep(&vnd->sc_pending, PRIBIO, "vndpc", 0);
559 vnd->sc_pending++;
560 }
561 bufq_put(vnd->sc_tab, bp);
562 wakeup(&vnd->sc_tab);
563 splx(s);
564 return;
565
566 done:
567 bp->b_resid = bp->b_bcount;
568 biodone(bp);
569 splx(s);
570 }
571
572 static bool
573 vnode_has_strategy(struct vnd_softc *vnd)
574 {
575 return vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) &&
576 vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy));
577 }
578
579 /* Verify that I/O requests cannot be smaller than the
580 * smallest I/O size supported by the backend.
581 */
582 static bool
583 vnode_has_large_blocks(struct vnd_softc *vnd)
584 {
585 u_int32_t vnd_secsize, iosize;
586
587 iosize = vnd->sc_iosize;
588 vnd_secsize = vnd->sc_geom.vng_secsize;
589
590 return vnd_secsize % iosize != 0;
591 }
592
593 /* XXX this function needs a reliable check to detect
594 * sparse files. Otherwise, bmap/strategy may be used
595 * and fail on non-allocated blocks. VOP_READ/VOP_WRITE
596 * works on sparse files.
597 */
598 #if notyet
599 static bool
600 vnode_strategy_probe(struct vnd_softc *vnd)
601 {
602 int error;
603 daddr_t nbn;
604
605 if (!vnode_has_strategy(vnd))
606 return false;
607
608 if (vnode_has_large_blocks(vnd))
609 return false;
610
611 /* Convert the first logical block number to its
612 * physical block number.
613 */
614 error = 0;
615 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
616 error = VOP_BMAP(vnd->sc_vp, 0, NULL, &nbn, NULL);
617 VOP_UNLOCK(vnd->sc_vp);
618
619 /* Test if that worked. */
620 if (error == 0 && (long)nbn == -1)
621 return false;
622
623 return true;
624 }
625 #endif
626
627 static void
628 vndthread(void *arg)
629 {
630 struct vnd_softc *vnd = arg;
631 int s;
632
633 /* Determine whether we can *use* VOP_BMAP and VOP_STRATEGY to
634 * directly access the backing vnode. If we can, use these two
635 * operations to avoid messing with the local buffer cache.
636 * Otherwise fall back to regular VOP_READ/VOP_WRITE operations
637 * which are guaranteed to work with any file system. */
638 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 &&
639 ! vnode_has_strategy(vnd))
640 vnd->sc_flags |= VNF_USE_VN_RDWR;
641
642 /* VOP_STRATEGY can only be used if the backing vnode allows
643 * to access blocks as small as defined by the vnd geometry.
644 */
645 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 &&
646 vnode_has_large_blocks(vnd))
647 vnd->sc_flags |= VNF_USE_VN_RDWR;
648
649 #ifdef DEBUG
650 if (vnddebug & VDB_INIT)
651 printf("vndthread: vp %p, %s\n", vnd->sc_vp,
652 (vnd->sc_flags & VNF_USE_VN_RDWR) == 0 ?
653 "using bmap/strategy operations" :
654 "using read/write operations");
655 #endif
656
657 s = splbio();
658 vnd->sc_flags |= VNF_KTHREAD;
659 wakeup(&vnd->sc_kthread);
660
661 /*
662 * Dequeue requests and serve them depending on the available
663 * vnode operations.
664 */
665 while ((vnd->sc_flags & VNF_VUNCONF) == 0) {
666 struct vndxfer *vnx;
667 struct buf *obp;
668 struct buf *bp;
669
670 obp = bufq_get(vnd->sc_tab);
671 if (obp == NULL) {
672 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0);
673 continue;
674 };
675 if ((vnd->sc_flags & VNF_USE_VN_RDWR)) {
676 KASSERT(vnd->sc_pending > 0 &&
677 vnd->sc_pending <= VND_MAXPENDING(vnd));
678 if (vnd->sc_pending-- == VND_MAXPENDING(vnd))
679 wakeup(&vnd->sc_pending);
680 }
681 splx(s);
682 #ifdef DEBUG
683 if (vnddebug & VDB_FOLLOW)
684 printf("vndthread(%p)\n", obp);
685 #endif
686
687 if (vnd->sc_vp->v_mount == NULL) {
688 obp->b_error = ENXIO;
689 goto done;
690 }
691 #ifdef VND_COMPRESSION
692 /* handle a compressed read */
693 if ((obp->b_flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) {
694 off_t bn;
695
696 /* Convert to a byte offset within the file. */
697 bn = obp->b_rawblkno *
698 vnd->sc_dkdev.dk_label->d_secsize;
699
700 compstrategy(obp, bn);
701 goto done;
702 }
703 #endif /* VND_COMPRESSION */
704
705 /*
706 * Allocate a header for this transfer and link it to the
707 * buffer
708 */
709 s = splbio();
710 vnx = VND_GETXFER(vnd);
711 splx(s);
712 vnx->vx_vnd = vnd;
713
714 s = splbio();
715 while (vnd->sc_active >= vnd->sc_maxactive) {
716 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0);
717 }
718 vnd->sc_active++;
719 splx(s);
720
721 /* Instrumentation. */
722 disk_busy(&vnd->sc_dkdev);
723
724 bp = &vnx->vx_buf;
725 buf_init(bp);
726 bp->b_flags = (obp->b_flags & B_READ);
727 bp->b_oflags = obp->b_oflags;
728 bp->b_cflags = obp->b_cflags;
729 bp->b_iodone = vndiodone;
730 bp->b_private = obp;
731 bp->b_vp = vnd->sc_vp;
732 bp->b_objlock = bp->b_vp->v_interlock;
733 bp->b_data = obp->b_data;
734 bp->b_bcount = obp->b_bcount;
735 BIO_COPYPRIO(bp, obp);
736
737 /* Make sure the request succeeds while suspending this fs. */
738 fstrans_start_lazy(vnd->sc_vp->v_mount);
739
740 /* Handle the request using the appropriate operations. */
741 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0)
742 handle_with_strategy(vnd, obp, bp);
743 else
744 handle_with_rdwr(vnd, obp, bp);
745
746 fstrans_done(vnd->sc_vp->v_mount);
747
748 s = splbio();
749 continue;
750
751 done:
752 biodone(obp);
753 s = splbio();
754 }
755
756 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF);
757 wakeup(&vnd->sc_kthread);
758 splx(s);
759 kthread_exit(0);
760 }
761
762 /*
763 * Checks if the given vnode supports the requested operation.
764 * The operation is specified the offset returned by VOFFSET.
765 *
766 * XXX The test below used to determine this is quite fragile
767 * because it relies on the file system to use genfs to specify
768 * unimplemented operations. There might be another way to do
769 * it more cleanly.
770 */
771 static bool
772 vnode_has_op(const struct vnode *vp, int opoffset)
773 {
774 int (*defaultp)(void *);
775 int (*opp)(void *);
776
777 defaultp = vp->v_op[VOFFSET(vop_default)];
778 opp = vp->v_op[opoffset];
779
780 return opp != defaultp && opp != genfs_eopnotsupp &&
781 opp != genfs_badop && opp != genfs_nullop;
782 }
783
784 /*
785 * Handles the read/write request given in 'bp' using the vnode's VOP_READ
786 * and VOP_WRITE operations.
787 *
788 * 'obp' is a pointer to the original request fed to the vnd device.
789 */
790 static void
791 handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp)
792 {
793 bool doread;
794 off_t offset;
795 size_t len, resid;
796 struct vnode *vp;
797
798 doread = bp->b_flags & B_READ;
799 offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize;
800 len = bp->b_bcount;
801 vp = vnd->sc_vp;
802
803 #if defined(DEBUG)
804 if (vnddebug & VDB_IO)
805 printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64
806 ", secsize %d, offset %" PRIu64
807 ", bcount %d\n",
808 vp, doread ? "read" : "write", obp->b_rawblkno,
809 vnd->sc_dkdev.dk_label->d_secsize, offset,
810 bp->b_bcount);
811 #endif
812
813 /* Issue the read or write operation. */
814 bp->b_error =
815 vn_rdwr(doread ? UIO_READ : UIO_WRITE,
816 vp, bp->b_data, len, offset, UIO_SYSSPACE,
817 IO_ADV_ENCODE(POSIX_FADV_NOREUSE) | IO_DIRECT,
818 vnd->sc_cred, &resid, NULL);
819 bp->b_resid = resid;
820
821 /*
822 * Avoid caching too many pages, the vnd user
823 * is usually a filesystem and caches itself.
824 * We need some amount of caching to not hinder
825 * read-ahead and write-behind operations.
826 */
827 mutex_enter(vp->v_interlock);
828 if (vp->v_uobj.uo_npages > VND_MAXPAGES(vnd))
829 (void) VOP_PUTPAGES(vp, 0, 0,
830 PGO_ALLPAGES | PGO_CLEANIT | PGO_FREE);
831 else
832 mutex_exit(vp->v_interlock);
833
834 /* We need to increase the number of outputs on the vnode if
835 * there was any write to it. */
836 if (!doread) {
837 mutex_enter(vp->v_interlock);
838 vp->v_numoutput++;
839 mutex_exit(vp->v_interlock);
840 }
841
842 biodone(bp);
843 }
844
845 /*
846 * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP
847 * and VOP_STRATEGY operations.
848 *
849 * 'obp' is a pointer to the original request fed to the vnd device.
850 */
851 static void
852 handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp,
853 struct buf *bp)
854 {
855 int bsize, error, flags, skipped;
856 size_t resid, sz;
857 off_t bn, offset;
858 struct vnode *vp;
859 struct buf *nbp = NULL;
860
861 flags = obp->b_flags;
862
863
864 /* convert to a byte offset within the file. */
865 bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize;
866
867 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize;
868 skipped = 0;
869
870 /*
871 * Break the request into bsize pieces and feed them
872 * sequentially using VOP_BMAP/VOP_STRATEGY.
873 * We do it this way to keep from flooding NFS servers if we
874 * are connected to an NFS file. This places the burden on
875 * the client rather than the server.
876 */
877 error = 0;
878 bp->b_resid = bp->b_bcount;
879 for (offset = 0, resid = bp->b_resid; /* true */;
880 resid -= sz, offset += sz) {
881 daddr_t nbn;
882 int off, nra;
883
884 nra = 0;
885 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
886 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra);
887 VOP_UNLOCK(vnd->sc_vp);
888
889 if (error == 0 && (long)nbn == -1)
890 error = EIO;
891
892 /*
893 * If there was an error or a hole in the file...punt.
894 * Note that we may have to wait for any operations
895 * that we have already fired off before releasing
896 * the buffer.
897 *
898 * XXX we could deal with holes here but it would be
899 * a hassle (in the write case).
900 */
901 if (error) {
902 skipped += resid;
903 break;
904 }
905
906 #ifdef DEBUG
907 if (!dovndcluster)
908 nra = 0;
909 #endif
910
911 off = bn % bsize;
912 sz = MIN(((off_t)1 + nra) * bsize - off, resid);
913 #ifdef DEBUG
914 if (vnddebug & VDB_IO)
915 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64
916 " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn,
917 nbn, sz);
918 #endif
919
920 nbp = getiobuf(vp, true);
921 nestiobuf_setup(bp, nbp, offset, sz);
922 nbp->b_blkno = nbn + btodb(off);
923
924 #if 0 /* XXX #ifdef DEBUG */
925 if (vnddebug & VDB_IO)
926 printf("vndstart(%ld): bp %p vp %p blkno "
927 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n",
928 (long) (vnd-vnd_softc), &nbp->vb_buf,
929 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno,
930 nbp->vb_buf.b_flags, nbp->vb_buf.b_data,
931 nbp->vb_buf.b_bcount);
932 #endif
933 if (resid == sz) {
934 break;
935 }
936 VOP_STRATEGY(vp, nbp);
937 bn += sz;
938 }
939 if (!(flags & B_READ)) {
940 struct vnode *w_vp;
941 /*
942 * this is the last nested buf, account for
943 * the parent buf write too.
944 * This has to be done last, so that
945 * fsync won't wait for this write which
946 * has no chance to complete before all nested bufs
947 * have been queued. But it has to be done
948 * before the last VOP_STRATEGY()
949 * or the call to nestiobuf_done().
950 */
951 w_vp = bp->b_vp;
952 mutex_enter(w_vp->v_interlock);
953 w_vp->v_numoutput++;
954 mutex_exit(w_vp->v_interlock);
955 }
956 KASSERT(skipped != 0 || nbp != NULL);
957 if (skipped)
958 nestiobuf_done(bp, skipped, error);
959 else
960 VOP_STRATEGY(vp, nbp);
961 }
962
963 static void
964 vndiodone(struct buf *bp)
965 {
966 struct vndxfer *vnx = VND_BUFTOXFER(bp);
967 struct vnd_softc *vnd = vnx->vx_vnd;
968 struct buf *obp = bp->b_private;
969 int s = splbio();
970
971 KASSERT(&vnx->vx_buf == bp);
972 KASSERT(vnd->sc_active > 0);
973 #ifdef DEBUG
974 if (vnddebug & VDB_IO) {
975 printf("vndiodone1: bp %p iodone: error %d\n",
976 bp, bp->b_error);
977 }
978 #endif
979 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid,
980 (bp->b_flags & B_READ));
981 vnd->sc_active--;
982 if (vnd->sc_active == 0) {
983 wakeup(&vnd->sc_tab);
984 }
985 splx(s);
986 obp->b_error = bp->b_error;
987 obp->b_resid = bp->b_resid;
988 buf_destroy(bp);
989 VND_PUTXFER(vnd, vnx);
990 biodone(obp);
991 }
992
993 /* ARGSUSED */
994 static int
995 vndread(dev_t dev, struct uio *uio, int flags)
996 {
997 int unit = vndunit(dev);
998 struct vnd_softc *sc;
999
1000 #ifdef DEBUG
1001 if (vnddebug & VDB_FOLLOW)
1002 printf("vndread(0x%"PRIx64", %p)\n", dev, uio);
1003 #endif
1004
1005 sc = device_lookup_private(&vnd_cd, unit);
1006 if (sc == NULL)
1007 return ENXIO;
1008
1009 if ((sc->sc_flags & VNF_INITED) == 0)
1010 return ENXIO;
1011
1012 return physio(vndstrategy, NULL, dev, B_READ, minphys, uio);
1013 }
1014
1015 /* ARGSUSED */
1016 static int
1017 vndwrite(dev_t dev, struct uio *uio, int flags)
1018 {
1019 int unit = vndunit(dev);
1020 struct vnd_softc *sc;
1021
1022 #ifdef DEBUG
1023 if (vnddebug & VDB_FOLLOW)
1024 printf("vndwrite(0x%"PRIx64", %p)\n", dev, uio);
1025 #endif
1026
1027 sc = device_lookup_private(&vnd_cd, unit);
1028 if (sc == NULL)
1029 return ENXIO;
1030
1031 if ((sc->sc_flags & VNF_INITED) == 0)
1032 return ENXIO;
1033
1034 return physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio);
1035 }
1036
1037 static int
1038 vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va)
1039 {
1040 int error;
1041 struct vnd_softc *vnd;
1042
1043 if (*un == -1)
1044 *un = unit;
1045 if (*un < 0)
1046 return EINVAL;
1047
1048 vnd = device_lookup_private(&vnd_cd, *un);
1049 if (vnd == NULL)
1050 return -1;
1051
1052 if ((vnd->sc_flags & VNF_INITED) == 0)
1053 return -1;
1054
1055 vn_lock(vnd->sc_vp, LK_SHARED | LK_RETRY);
1056 error = VOP_GETATTR(vnd->sc_vp, va, l->l_cred);
1057 VOP_UNLOCK(vnd->sc_vp);
1058 return error;
1059 }
1060
1061 static int
1062 vnddoclear(struct vnd_softc *vnd, int pmask, int minor, bool force)
1063 {
1064 int error;
1065
1066 if ((error = vndlock(vnd)) != 0)
1067 return error;
1068
1069 /*
1070 * Don't unconfigure if any other partitions are open
1071 * or if both the character and block flavors of this
1072 * partition are open.
1073 */
1074 if (DK_BUSY(vnd, pmask) && !force) {
1075 vndunlock(vnd);
1076 return EBUSY;
1077 }
1078
1079 /* Delete all of our wedges */
1080 dkwedge_delall(&vnd->sc_dkdev);
1081
1082 /*
1083 * XXX vndclear() might call vndclose() implicitly;
1084 * release lock to avoid recursion
1085 *
1086 * Set VNF_CLEARING to prevent vndopen() from
1087 * sneaking in after we vndunlock().
1088 */
1089 vnd->sc_flags |= VNF_CLEARING;
1090 vndunlock(vnd);
1091 vndclear(vnd, minor);
1092 #ifdef DEBUG
1093 if (vnddebug & VDB_INIT)
1094 printf("%s: CLRed\n", __func__);
1095 #endif
1096
1097 /* Destroy the xfer and buffer pools. */
1098 pool_destroy(&vnd->sc_vxpool);
1099
1100 /* Detach the disk. */
1101 disk_detach(&vnd->sc_dkdev);
1102
1103 return 0;
1104 }
1105
1106 static int
1107 vndioctl_get(struct lwp *l, void *data, int unit, struct vattr *va)
1108 {
1109 int error;
1110
1111 KASSERT(l);
1112
1113 /* the first member is always int vnd_unit in all the versions */
1114 if (*(int *)data >= vnd_cd.cd_ndevs)
1115 return ENXIO;
1116
1117 switch (error = vnd_cget(l, unit, (int *)data, va)) {
1118 case -1:
1119 /* unused is not an error */
1120 memset(va, 0, sizeof(*va));
1121 /*FALLTHROUGH*/
1122 case 0:
1123 return 0;
1124 default:
1125 return error;
1126 }
1127 }
1128
1129 /* ARGSUSED */
1130 static int
1131 vndioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1132 {
1133 bool force;
1134 int unit = vndunit(dev);
1135 struct vnd_softc *vnd;
1136 struct vnd_ioctl *vio;
1137 struct vattr vattr;
1138 struct pathbuf *pb;
1139 struct nameidata nd;
1140 int error, part, pmask;
1141 uint64_t geomsize;
1142 int fflags;
1143 #ifdef __HAVE_OLD_DISKLABEL
1144 struct disklabel newlabel;
1145 #endif
1146
1147 #ifdef DEBUG
1148 if (vnddebug & VDB_FOLLOW)
1149 printf("vndioctl(0x%"PRIx64", 0x%lx, %p, 0x%x, %p): unit %d\n",
1150 dev, cmd, data, flag, l->l_proc, unit);
1151 #endif
1152 /* Do the get's first; they don't need initialization or verification */
1153 switch (cmd) {
1154 case VNDIOCGET:
1155 if ((error = vndioctl_get(l, data, unit, &vattr)) != 0)
1156 return error;
1157
1158 struct vnd_user *vnu = data;
1159 vnu->vnu_dev = vattr.va_fsid;
1160 vnu->vnu_ino = vattr.va_fileid;
1161 return 0;
1162
1163 default:
1164 /* First check for COMPAT_50 hook */
1165 MODULE_CALL_HOOK(compat_vndioctl_50_hook,
1166 (cmd, l, data, unit, &vattr, vndioctl_get),
1167 enosys(), error);
1168
1169 /*
1170 * If not present, then COMPAT_30 hook also not
1171 * present, so just continue with checks for the
1172 * "write" commands
1173 */
1174 if (error == ENOSYS) {
1175 error = 0;
1176 break;
1177 }
1178
1179 /* If not already handled, try the COMPAT_30 hook */
1180 if (error == EPASSTHROUGH)
1181 MODULE_CALL_HOOK(compat_vndioctl_30_hook,
1182 (cmd, l, data, unit, &vattr, vndioctl_get),
1183 enosys(), error);
1184
1185 /* If no COMPAT_30 module, or not handled, check writes */
1186 if (error == ENOSYS || error == EPASSTHROUGH) {
1187 error = 0;
1188 break;
1189 }
1190 return error;
1191 }
1192
1193 vnd = device_lookup_private(&vnd_cd, unit);
1194 if (vnd == NULL)
1195 return ENXIO;
1196 vio = (struct vnd_ioctl *)data;
1197
1198 /* Must be open for writes for these commands... */
1199 switch (cmd) {
1200 case VNDIOCSET50:
1201 case VNDIOCCLR50:
1202 if (!compat_vndioctl_50_hook.hooked)
1203 return EINVAL;
1204 /* FALLTHROUGH */
1205 case VNDIOCSET:
1206 case VNDIOCCLR:
1207 case DIOCSDINFO:
1208 case DIOCWDINFO:
1209 #ifdef __HAVE_OLD_DISKLABEL
1210 case ODIOCSDINFO:
1211 case ODIOCWDINFO:
1212 #endif
1213 case DIOCKLABEL:
1214 case DIOCWLABEL:
1215 if ((flag & FWRITE) == 0)
1216 return EBADF;
1217 }
1218
1219 /* Must be initialized for these... */
1220 switch (cmd) {
1221 case VNDIOCCLR:
1222 case VNDIOCCLR50:
1223 case DIOCGDINFO:
1224 case DIOCSDINFO:
1225 case DIOCWDINFO:
1226 case DIOCGPARTINFO:
1227 case DIOCKLABEL:
1228 case DIOCWLABEL:
1229 case DIOCGDEFLABEL:
1230 case DIOCCACHESYNC:
1231 #ifdef __HAVE_OLD_DISKLABEL
1232 case ODIOCGDINFO:
1233 case ODIOCSDINFO:
1234 case ODIOCWDINFO:
1235 case ODIOCGDEFLABEL:
1236 #endif
1237 if ((vnd->sc_flags & VNF_INITED) == 0)
1238 return ENXIO;
1239 }
1240
1241 error = disk_ioctl(&vnd->sc_dkdev, dev, cmd, data, flag, l);
1242 if (error != EPASSTHROUGH)
1243 return error;
1244
1245
1246 switch (cmd) {
1247 case VNDIOCSET50:
1248 case VNDIOCSET:
1249 if (vnd->sc_flags & VNF_INITED)
1250 return EBUSY;
1251
1252 if ((error = vndlock(vnd)) != 0)
1253 return error;
1254
1255 fflags = FREAD;
1256 if ((vio->vnd_flags & VNDIOF_READONLY) == 0)
1257 fflags |= FWRITE;
1258 if ((vio->vnd_flags & VNDIOF_FILEIO) != 0)
1259 vnd->sc_flags |= VNF_USE_VN_RDWR;
1260 error = pathbuf_copyin(vio->vnd_file, &pb);
1261 if (error) {
1262 goto unlock_and_exit;
1263 }
1264 NDINIT(&nd, LOOKUP, FOLLOW, pb);
1265 if ((error = vn_open(&nd, fflags, 0)) != 0) {
1266 pathbuf_destroy(pb);
1267 goto unlock_and_exit;
1268 }
1269 KASSERT(l);
1270 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred);
1271 if (!error && nd.ni_vp->v_type != VREG)
1272 error = EOPNOTSUPP;
1273 if (!error && vattr.va_bytes < vattr.va_size)
1274 /* File is definitely sparse, use vn_rdwr() */
1275 vnd->sc_flags |= VNF_USE_VN_RDWR;
1276 if (error) {
1277 VOP_UNLOCK(nd.ni_vp);
1278 goto close_and_exit;
1279 }
1280
1281 /* If using a compressed file, initialize its info */
1282 /* (or abort with an error if kernel has no compression) */
1283 if (vio->vnd_flags & VNDIOF_COMP) {
1284 #ifdef VND_COMPRESSION
1285 struct vnd_comp_header *ch;
1286 int i;
1287 uint32_t comp_size;
1288 uint32_t comp_maxsize;
1289
1290 /* allocate space for compresed file header */
1291 ch = malloc(sizeof(struct vnd_comp_header),
1292 M_TEMP, M_WAITOK);
1293
1294 /* read compressed file header */
1295 error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)ch,
1296 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE,
1297 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL);
1298 if (error) {
1299 free(ch, M_TEMP);
1300 VOP_UNLOCK(nd.ni_vp);
1301 goto close_and_exit;
1302 }
1303
1304 if (be32toh(ch->block_size) == 0 ||
1305 be32toh(ch->num_blocks) > UINT32_MAX - 1) {
1306 free(ch, M_TEMP);
1307 VOP_UNLOCK(nd.ni_vp);
1308 goto close_and_exit;
1309 }
1310
1311 /* save some header info */
1312 vnd->sc_comp_blksz = be32toh(ch->block_size);
1313 /* note last offset is the file byte size */
1314 vnd->sc_comp_numoffs = be32toh(ch->num_blocks) + 1;
1315 free(ch, M_TEMP);
1316 if (!DK_DEV_BSIZE_OK(vnd->sc_comp_blksz)) {
1317 VOP_UNLOCK(nd.ni_vp);
1318 error = EINVAL;
1319 goto close_and_exit;
1320 }
1321 KASSERT(0 < vnd->sc_comp_blksz);
1322 KASSERT(0 < vnd->sc_comp_numoffs);
1323 /*
1324 * @#^@!$& gcc -Wtype-limits refuses to let me
1325 * write SIZE_MAX/sizeof(uint64_t) < numoffs,
1326 * because the range of the type on amd64 makes
1327 * the comparisons always false.
1328 */
1329 #if SIZE_MAX <= UINT32_MAX*(64/CHAR_BIT)
1330 if (SIZE_MAX/sizeof(uint64_t) < vnd->sc_comp_numoffs) {
1331 VOP_UNLOCK(nd.ni_vp);
1332 error = EINVAL;
1333 goto close_and_exit;
1334 }
1335 #endif
1336 if ((vattr.va_size < sizeof(struct vnd_comp_header)) ||
1337 (vattr.va_size - sizeof(struct vnd_comp_header) <
1338 sizeof(uint64_t)*vnd->sc_comp_numoffs) ||
1339 (UQUAD_MAX/vnd->sc_comp_blksz <
1340 vnd->sc_comp_numoffs - 1)) {
1341 VOP_UNLOCK(nd.ni_vp);
1342 error = EINVAL;
1343 goto close_and_exit;
1344 }
1345
1346 /* set decompressed file size */
1347 KASSERT(vnd->sc_comp_numoffs - 1 <=
1348 UQUAD_MAX/vnd->sc_comp_blksz);
1349 vattr.va_size =
1350 ((u_quad_t)vnd->sc_comp_numoffs - 1) *
1351 (u_quad_t)vnd->sc_comp_blksz;
1352
1353 /* allocate space for all the compressed offsets */
1354 __CTASSERT(UINT32_MAX <= UQUAD_MAX/sizeof(uint64_t));
1355 vnd->sc_comp_offsets =
1356 malloc(sizeof(uint64_t) * vnd->sc_comp_numoffs,
1357 M_DEVBUF, M_WAITOK);
1358
1359 /* read in the offsets */
1360 error = vn_rdwr(UIO_READ, nd.ni_vp,
1361 (void *)vnd->sc_comp_offsets,
1362 sizeof(uint64_t) * vnd->sc_comp_numoffs,
1363 sizeof(struct vnd_comp_header), UIO_SYSSPACE,
1364 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL);
1365 if (error) {
1366 VOP_UNLOCK(nd.ni_vp);
1367 goto close_and_exit;
1368 }
1369 /*
1370 * find largest block size (used for allocation limit).
1371 * Also convert offset to native byte order.
1372 */
1373 comp_maxsize = 0;
1374 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) {
1375 vnd->sc_comp_offsets[i] =
1376 be64toh(vnd->sc_comp_offsets[i]);
1377 comp_size =
1378 be64toh(vnd->sc_comp_offsets[i + 1])
1379 - vnd->sc_comp_offsets[i];
1380 if (comp_size > comp_maxsize)
1381 comp_maxsize = comp_size;
1382 }
1383 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] =
1384 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs
1385 - 1]);
1386
1387 /* create compressed data buffer */
1388 vnd->sc_comp_buff = malloc(comp_maxsize,
1389 M_DEVBUF, M_WAITOK);
1390
1391 /* create decompressed buffer */
1392 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz,
1393 M_DEVBUF, M_WAITOK);
1394 vnd->sc_comp_buffblk = -1;
1395
1396 /* Initialize decompress stream */
1397 memset(&vnd->sc_comp_stream, 0, sizeof(z_stream));
1398 vnd->sc_comp_stream.zalloc = vnd_alloc;
1399 vnd->sc_comp_stream.zfree = vnd_free;
1400 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS);
1401 if (error) {
1402 if (vnd->sc_comp_stream.msg)
1403 printf("vnd%d: compressed file, %s\n",
1404 unit, vnd->sc_comp_stream.msg);
1405 VOP_UNLOCK(nd.ni_vp);
1406 error = EINVAL;
1407 goto close_and_exit;
1408 }
1409
1410 vnd->sc_flags |= VNF_COMP | VNF_READONLY;
1411 #else /* !VND_COMPRESSION */
1412 VOP_UNLOCK(nd.ni_vp);
1413 error = EOPNOTSUPP;
1414 goto close_and_exit;
1415 #endif /* VND_COMPRESSION */
1416 }
1417
1418 VOP_UNLOCK(nd.ni_vp);
1419 vnd->sc_vp = nd.ni_vp;
1420 vnd->sc_size = btodb(vattr.va_size); /* note truncation */
1421
1422 /* get smallest I/O size for underlying device, fall back to
1423 * fundamental I/O size of underlying filesystem
1424 */
1425 error = bdev_ioctl(vattr.va_fsid, DIOCGSECTORSIZE, &vnd->sc_iosize, FKIOCTL, l);
1426 if (error)
1427 vnd->sc_iosize = vnd->sc_vp->v_mount->mnt_stat.f_frsize;
1428
1429 /*
1430 * Use pseudo-geometry specified. If none was provided,
1431 * use "standard" Adaptec fictitious geometry.
1432 */
1433 if (vio->vnd_flags & VNDIOF_HASGEOM) {
1434
1435 memcpy(&vnd->sc_geom, &vio->vnd_geom,
1436 sizeof(vio->vnd_geom));
1437
1438 /*
1439 * Sanity-check the sector size.
1440 */
1441 if (!DK_DEV_BSIZE_OK(vnd->sc_geom.vng_secsize) ||
1442 vnd->sc_geom.vng_ntracks == 0 ||
1443 vnd->sc_geom.vng_nsectors == 0) {
1444 error = EINVAL;
1445 goto close_and_exit;
1446 }
1447
1448 /*
1449 * Compute missing cylinder count from size
1450 */
1451 if (vnd->sc_geom.vng_ncylinders == 0)
1452 vnd->sc_geom.vng_ncylinders = vnd->sc_size /
1453 (vnd->sc_geom.vng_ntracks *
1454 vnd->sc_geom.vng_nsectors);
1455
1456 /*
1457 * Compute the size (in DEV_BSIZE blocks) specified
1458 * by the geometry.
1459 */
1460 geomsize = (int64_t)vnd->sc_geom.vng_nsectors *
1461 vnd->sc_geom.vng_ntracks *
1462 vnd->sc_geom.vng_ncylinders *
1463 (vnd->sc_geom.vng_secsize / DEV_BSIZE);
1464
1465 /*
1466 * Sanity-check the size against the specified
1467 * geometry.
1468 */
1469 if (vnd->sc_size < geomsize) {
1470 error = EINVAL;
1471 goto close_and_exit;
1472 }
1473 } else if (vnd->sc_size >= (32 * 64)) {
1474 /*
1475 * Size must be at least 2048 DEV_BSIZE blocks
1476 * (1M) in order to use this geometry.
1477 */
1478 vnd->sc_geom.vng_secsize = DEV_BSIZE;
1479 vnd->sc_geom.vng_nsectors = 32;
1480 vnd->sc_geom.vng_ntracks = 64;
1481 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32);
1482 } else {
1483 vnd->sc_geom.vng_secsize = DEV_BSIZE;
1484 vnd->sc_geom.vng_nsectors = 1;
1485 vnd->sc_geom.vng_ntracks = 1;
1486 vnd->sc_geom.vng_ncylinders = vnd->sc_size;
1487 }
1488
1489 vnd_set_geometry(vnd);
1490
1491 if (vio->vnd_flags & VNDIOF_READONLY) {
1492 vnd->sc_flags |= VNF_READONLY;
1493 }
1494
1495 if ((error = vndsetcred(vnd, l->l_cred)) != 0)
1496 goto close_and_exit;
1497
1498 vndthrottle(vnd, vnd->sc_vp);
1499 vio->vnd_osize = dbtob(vnd->sc_size);
1500 if (cmd != VNDIOCSET50)
1501 vio->vnd_size = dbtob(vnd->sc_size);
1502 vnd->sc_flags |= VNF_INITED;
1503
1504 /* create the kernel thread, wait for it to be up */
1505 error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd,
1506 &vnd->sc_kthread, "%s", device_xname(vnd->sc_dev));
1507 if (error)
1508 goto close_and_exit;
1509 while ((vnd->sc_flags & VNF_KTHREAD) == 0) {
1510 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0);
1511 }
1512 #ifdef DEBUG
1513 if (vnddebug & VDB_INIT)
1514 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n",
1515 vnd->sc_vp, (unsigned long) vnd->sc_size,
1516 vnd->sc_geom.vng_secsize,
1517 vnd->sc_geom.vng_nsectors,
1518 vnd->sc_geom.vng_ntracks,
1519 vnd->sc_geom.vng_ncylinders);
1520 #endif
1521
1522 /* Attach the disk. */
1523 disk_attach(&vnd->sc_dkdev);
1524
1525 /* Initialize the xfer and buffer pools. */
1526 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0,
1527 0, 0, "vndxpl", NULL, IPL_BIO);
1528
1529 vndunlock(vnd);
1530
1531 pathbuf_destroy(pb);
1532
1533 /* Discover wedges on this disk */
1534 dkwedge_discover(&vnd->sc_dkdev);
1535
1536 break;
1537
1538 close_and_exit:
1539 (void) vn_close(nd.ni_vp, fflags, l->l_cred);
1540 pathbuf_destroy(pb);
1541 unlock_and_exit:
1542 #ifdef VND_COMPRESSION
1543 /* free any allocated memory (for compressed file) */
1544 if (vnd->sc_comp_offsets) {
1545 free(vnd->sc_comp_offsets, M_DEVBUF);
1546 vnd->sc_comp_offsets = NULL;
1547 }
1548 if (vnd->sc_comp_buff) {
1549 free(vnd->sc_comp_buff, M_DEVBUF);
1550 vnd->sc_comp_buff = NULL;
1551 }
1552 if (vnd->sc_comp_decombuf) {
1553 free(vnd->sc_comp_decombuf, M_DEVBUF);
1554 vnd->sc_comp_decombuf = NULL;
1555 }
1556 #endif /* VND_COMPRESSION */
1557 vndunlock(vnd);
1558 return error;
1559
1560 case VNDIOCCLR50:
1561 case VNDIOCCLR:
1562 part = DISKPART(dev);
1563 pmask = (1 << part);
1564 force = (vio->vnd_flags & VNDIOF_FORCE) != 0;
1565
1566 if ((error = vnddoclear(vnd, pmask, minor(dev), force)) != 0)
1567 return error;
1568
1569 break;
1570
1571
1572 case DIOCWDINFO:
1573 case DIOCSDINFO:
1574 #ifdef __HAVE_OLD_DISKLABEL
1575 case ODIOCWDINFO:
1576 case ODIOCSDINFO:
1577 #endif
1578 {
1579 struct disklabel *lp;
1580
1581 if ((error = vndlock(vnd)) != 0)
1582 return error;
1583
1584 vnd->sc_flags |= VNF_LABELLING;
1585
1586 #ifdef __HAVE_OLD_DISKLABEL
1587 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1588 memset(&newlabel, 0, sizeof newlabel);
1589 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1590 lp = &newlabel;
1591 } else
1592 #endif
1593 lp = (struct disklabel *)data;
1594
1595 error = setdisklabel(vnd->sc_dkdev.dk_label,
1596 lp, 0, vnd->sc_dkdev.dk_cpulabel);
1597 if (error == 0) {
1598 if (cmd == DIOCWDINFO
1599 #ifdef __HAVE_OLD_DISKLABEL
1600 || cmd == ODIOCWDINFO
1601 #endif
1602 )
1603 error = writedisklabel(VNDLABELDEV(dev),
1604 vndstrategy, vnd->sc_dkdev.dk_label,
1605 vnd->sc_dkdev.dk_cpulabel);
1606 }
1607
1608 vnd->sc_flags &= ~VNF_LABELLING;
1609
1610 vndunlock(vnd);
1611
1612 if (error)
1613 return error;
1614 break;
1615 }
1616
1617 case DIOCKLABEL:
1618 if (*(int *)data != 0)
1619 vnd->sc_flags |= VNF_KLABEL;
1620 else
1621 vnd->sc_flags &= ~VNF_KLABEL;
1622 break;
1623
1624 case DIOCWLABEL:
1625 if (*(int *)data != 0)
1626 vnd->sc_flags |= VNF_WLABEL;
1627 else
1628 vnd->sc_flags &= ~VNF_WLABEL;
1629 break;
1630
1631 case DIOCGDEFLABEL:
1632 vndgetdefaultlabel(vnd, (struct disklabel *)data);
1633 break;
1634
1635 #ifdef __HAVE_OLD_DISKLABEL
1636 case ODIOCGDEFLABEL:
1637 vndgetdefaultlabel(vnd, &newlabel);
1638 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1639 return ENOTTY;
1640 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1641 break;
1642 #endif
1643
1644 case DIOCCACHESYNC:
1645 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1646 error = VOP_FSYNC(vnd->sc_vp, vnd->sc_cred,
1647 FSYNC_WAIT | FSYNC_DATAONLY | FSYNC_CACHE, 0, 0);
1648 VOP_UNLOCK(vnd->sc_vp);
1649 return error;
1650
1651 default:
1652 return ENOTTY;
1653 }
1654
1655 return 0;
1656 }
1657
1658 /*
1659 * Duplicate the current processes' credentials. Since we are called only
1660 * as the result of a SET ioctl and only root can do that, any future access
1661 * to this "disk" is essentially as root. Note that credentials may change
1662 * if some other uid can write directly to the mapped file (NFS).
1663 */
1664 static int
1665 vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred)
1666 {
1667 struct uio auio;
1668 struct iovec aiov;
1669 char *tmpbuf;
1670 int error;
1671
1672 vnd->sc_cred = kauth_cred_dup(cred);
1673 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
1674
1675 /* XXX: Horrible kludge to establish credentials for NFS */
1676 aiov.iov_base = tmpbuf;
1677 aiov.iov_len = uimin(DEV_BSIZE, dbtob(vnd->sc_size));
1678 auio.uio_iov = &aiov;
1679 auio.uio_iovcnt = 1;
1680 auio.uio_offset = 0;
1681 auio.uio_rw = UIO_READ;
1682 auio.uio_resid = aiov.iov_len;
1683 UIO_SETUP_SYSSPACE(&auio);
1684 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1685 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred);
1686 if (error == 0) {
1687 /*
1688 * Because vnd does all IO directly through the vnode
1689 * we need to flush (at least) the buffer from the above
1690 * VOP_READ from the buffer cache to prevent cache
1691 * incoherencies. Also, be careful to write dirty
1692 * buffers back to stable storage.
1693 */
1694 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred,
1695 curlwp, 0, 0);
1696 }
1697 VOP_UNLOCK(vnd->sc_vp);
1698
1699 free(tmpbuf, M_TEMP);
1700 return error;
1701 }
1702
1703 /*
1704 * Set maxactive based on FS type
1705 */
1706 static void
1707 vndthrottle(struct vnd_softc *vnd, struct vnode *vp)
1708 {
1709
1710 if (vp->v_tag == VT_NFS)
1711 vnd->sc_maxactive = 2;
1712 else
1713 vnd->sc_maxactive = 8;
1714
1715 if (vnd->sc_maxactive < 1)
1716 vnd->sc_maxactive = 1;
1717 }
1718
1719 #if 0
1720 static void
1721 vndshutdown(void)
1722 {
1723 struct vnd_softc *vnd;
1724
1725 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++)
1726 if (vnd->sc_flags & VNF_INITED)
1727 vndclear(vnd);
1728 }
1729 #endif
1730
1731 static void
1732 vndclear(struct vnd_softc *vnd, int myminor)
1733 {
1734 struct vnode *vp = vnd->sc_vp;
1735 int fflags = FREAD;
1736 int bmaj, cmaj, i, mn;
1737 int s;
1738
1739 #ifdef DEBUG
1740 if (vnddebug & VDB_FOLLOW)
1741 printf("vndclear(%p): vp %p\n", vnd, vp);
1742 #endif
1743 /* locate the major number */
1744 bmaj = bdevsw_lookup_major(&vnd_bdevsw);
1745 cmaj = cdevsw_lookup_major(&vnd_cdevsw);
1746
1747 /* Nuke the vnodes for any open instances */
1748 for (i = 0; i < MAXPARTITIONS; i++) {
1749 mn = DISKMINOR(device_unit(vnd->sc_dev), i);
1750 vdevgone(bmaj, mn, mn, VBLK);
1751 if (mn != myminor) /* XXX avoid to kill own vnode */
1752 vdevgone(cmaj, mn, mn, VCHR);
1753 }
1754
1755 if ((vnd->sc_flags & VNF_READONLY) == 0)
1756 fflags |= FWRITE;
1757
1758 s = splbio();
1759 bufq_drain(vnd->sc_tab);
1760 splx(s);
1761
1762 vnd->sc_flags |= VNF_VUNCONF;
1763 wakeup(&vnd->sc_tab);
1764 while (vnd->sc_flags & VNF_KTHREAD)
1765 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0);
1766
1767 #ifdef VND_COMPRESSION
1768 /* free the compressed file buffers */
1769 if (vnd->sc_flags & VNF_COMP) {
1770 if (vnd->sc_comp_offsets) {
1771 free(vnd->sc_comp_offsets, M_DEVBUF);
1772 vnd->sc_comp_offsets = NULL;
1773 }
1774 if (vnd->sc_comp_buff) {
1775 free(vnd->sc_comp_buff, M_DEVBUF);
1776 vnd->sc_comp_buff = NULL;
1777 }
1778 if (vnd->sc_comp_decombuf) {
1779 free(vnd->sc_comp_decombuf, M_DEVBUF);
1780 vnd->sc_comp_decombuf = NULL;
1781 }
1782 }
1783 #endif /* VND_COMPRESSION */
1784 vnd->sc_flags &=
1785 ~(VNF_INITED | VNF_READONLY | VNF_KLABEL | VNF_VLABEL
1786 | VNF_VUNCONF | VNF_COMP | VNF_CLEARING);
1787 if (vp == NULL)
1788 panic("vndclear: null vp");
1789 (void) vn_close(vp, fflags, vnd->sc_cred);
1790 kauth_cred_free(vnd->sc_cred);
1791 vnd->sc_vp = NULL;
1792 vnd->sc_cred = NULL;
1793 vnd->sc_size = 0;
1794 }
1795
1796 static int
1797 vndsize(dev_t dev)
1798 {
1799 struct vnd_softc *sc;
1800 struct disklabel *lp;
1801 int part, unit, omask;
1802 int size;
1803
1804 unit = vndunit(dev);
1805 sc = device_lookup_private(&vnd_cd, unit);
1806 if (sc == NULL)
1807 return -1;
1808
1809 if ((sc->sc_flags & VNF_INITED) == 0)
1810 return -1;
1811
1812 part = DISKPART(dev);
1813 omask = sc->sc_dkdev.dk_openmask & (1 << part);
1814 lp = sc->sc_dkdev.dk_label;
1815
1816 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */
1817 return -1;
1818
1819 if (lp->d_partitions[part].p_fstype != FS_SWAP)
1820 size = -1;
1821 else
1822 size = lp->d_partitions[part].p_size *
1823 (lp->d_secsize / DEV_BSIZE);
1824
1825 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */
1826 return -1;
1827
1828 return size;
1829 }
1830
1831 static int
1832 vnddump(dev_t dev, daddr_t blkno, void *va,
1833 size_t size)
1834 {
1835
1836 /* Not implemented. */
1837 return ENXIO;
1838 }
1839
1840 static void
1841 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp)
1842 {
1843 struct vndgeom *vng = &sc->sc_geom;
1844 struct partition *pp;
1845 unsigned spb;
1846
1847 memset(lp, 0, sizeof(*lp));
1848
1849 spb = vng->vng_secsize / DEV_BSIZE;
1850 if (sc->sc_size / spb > UINT32_MAX)
1851 lp->d_secperunit = UINT32_MAX;
1852 else
1853 lp->d_secperunit = sc->sc_size / spb;
1854 lp->d_secsize = vng->vng_secsize;
1855 lp->d_nsectors = vng->vng_nsectors;
1856 lp->d_ntracks = vng->vng_ntracks;
1857 lp->d_ncylinders = vng->vng_ncylinders;
1858 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1859
1860 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename));
1861 lp->d_type = DKTYPE_VND;
1862 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1863 lp->d_rpm = 3600;
1864 lp->d_interleave = 1;
1865 lp->d_flags = 0;
1866
1867 pp = &lp->d_partitions[RAW_PART];
1868 pp->p_offset = 0;
1869 pp->p_size = lp->d_secperunit;
1870 pp->p_fstype = FS_UNUSED;
1871 lp->d_npartitions = RAW_PART + 1;
1872
1873 lp->d_magic = DISKMAGIC;
1874 lp->d_magic2 = DISKMAGIC;
1875 lp->d_checksum = dkcksum(lp);
1876 }
1877
1878 /*
1879 * Read the disklabel from a vnd. If one is not present, create a fake one.
1880 */
1881 static void
1882 vndgetdisklabel(dev_t dev, struct vnd_softc *sc)
1883 {
1884 const char *errstring;
1885 struct disklabel *lp = sc->sc_dkdev.dk_label;
1886 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel;
1887 int i;
1888
1889 memset(clp, 0, sizeof(*clp));
1890
1891 vndgetdefaultlabel(sc, lp);
1892
1893 /*
1894 * Call the generic disklabel extraction routine.
1895 */
1896 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp);
1897 if (errstring) {
1898 /*
1899 * Lack of disklabel is common, but we print the warning
1900 * anyway, since it might contain other useful information.
1901 */
1902 aprint_normal_dev(sc->sc_dev, "%s\n", errstring);
1903
1904 /*
1905 * For historical reasons, if there's no disklabel
1906 * present, all partitions must be FS_BSDFFS and
1907 * occupy the entire disk.
1908 */
1909 for (i = 0; i < MAXPARTITIONS; i++) {
1910 /*
1911 * Don't wipe out port specific hack (such as
1912 * dos partition hack of i386 port).
1913 */
1914 if (lp->d_partitions[i].p_size != 0)
1915 continue;
1916
1917 lp->d_partitions[i].p_size = lp->d_secperunit;
1918 lp->d_partitions[i].p_offset = 0;
1919 lp->d_partitions[i].p_fstype = FS_BSDFFS;
1920 }
1921
1922 strncpy(lp->d_packname, "default label",
1923 sizeof(lp->d_packname));
1924
1925 lp->d_npartitions = MAXPARTITIONS;
1926 lp->d_checksum = dkcksum(lp);
1927 }
1928 }
1929
1930 /*
1931 * Wait interruptibly for an exclusive lock.
1932 *
1933 * XXX
1934 * Several drivers do this; it should be abstracted and made MP-safe.
1935 */
1936 static int
1937 vndlock(struct vnd_softc *sc)
1938 {
1939 int error;
1940
1941 while ((sc->sc_flags & VNF_LOCKED) != 0) {
1942 sc->sc_flags |= VNF_WANTED;
1943 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0)
1944 return error;
1945 }
1946 sc->sc_flags |= VNF_LOCKED;
1947 return 0;
1948 }
1949
1950 /*
1951 * Unlock and wake up any waiters.
1952 */
1953 static void
1954 vndunlock(struct vnd_softc *sc)
1955 {
1956
1957 sc->sc_flags &= ~VNF_LOCKED;
1958 if ((sc->sc_flags & VNF_WANTED) != 0) {
1959 sc->sc_flags &= ~VNF_WANTED;
1960 wakeup(sc);
1961 }
1962 }
1963
1964 #ifdef VND_COMPRESSION
1965 /* compressed file read */
1966 static void
1967 compstrategy(struct buf *bp, off_t bn)
1968 {
1969 int error;
1970 int unit = vndunit(bp->b_dev);
1971 struct vnd_softc *vnd =
1972 device_lookup_private(&vnd_cd, unit);
1973 u_int32_t comp_block;
1974 struct uio auio;
1975 char *addr;
1976 int s;
1977
1978 /* set up constants for data move */
1979 auio.uio_rw = UIO_READ;
1980 UIO_SETUP_SYSSPACE(&auio);
1981
1982 /* read, and transfer the data */
1983 addr = bp->b_data;
1984 bp->b_resid = bp->b_bcount;
1985 s = splbio();
1986 while (bp->b_resid > 0) {
1987 unsigned length;
1988 size_t length_in_buffer;
1989 u_int32_t offset_in_buffer;
1990 struct iovec aiov;
1991
1992 /* calculate the compressed block number */
1993 comp_block = bn / (off_t)vnd->sc_comp_blksz;
1994
1995 /* check for good block number */
1996 if (comp_block >= vnd->sc_comp_numoffs) {
1997 bp->b_error = EINVAL;
1998 splx(s);
1999 return;
2000 }
2001
2002 /* read in the compressed block, if not in buffer */
2003 if (comp_block != vnd->sc_comp_buffblk) {
2004 length = vnd->sc_comp_offsets[comp_block + 1] -
2005 vnd->sc_comp_offsets[comp_block];
2006 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
2007 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff,
2008 length, vnd->sc_comp_offsets[comp_block],
2009 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vnd->sc_cred,
2010 NULL, NULL);
2011 if (error) {
2012 bp->b_error = error;
2013 VOP_UNLOCK(vnd->sc_vp);
2014 splx(s);
2015 return;
2016 }
2017 /* uncompress the buffer */
2018 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff;
2019 vnd->sc_comp_stream.avail_in = length;
2020 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf;
2021 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz;
2022 inflateReset(&vnd->sc_comp_stream);
2023 error = inflate(&vnd->sc_comp_stream, Z_FINISH);
2024 if (error != Z_STREAM_END) {
2025 if (vnd->sc_comp_stream.msg)
2026 aprint_normal_dev(vnd->sc_dev,
2027 "compressed file, %s\n",
2028 vnd->sc_comp_stream.msg);
2029 bp->b_error = EBADMSG;
2030 VOP_UNLOCK(vnd->sc_vp);
2031 splx(s);
2032 return;
2033 }
2034 vnd->sc_comp_buffblk = comp_block;
2035 VOP_UNLOCK(vnd->sc_vp);
2036 }
2037
2038 /* transfer the usable uncompressed data */
2039 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz;
2040 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer;
2041 if (length_in_buffer > bp->b_resid)
2042 length_in_buffer = bp->b_resid;
2043 auio.uio_iov = &aiov;
2044 auio.uio_iovcnt = 1;
2045 aiov.iov_base = addr;
2046 aiov.iov_len = length_in_buffer;
2047 auio.uio_resid = aiov.iov_len;
2048 auio.uio_offset = 0;
2049 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer,
2050 length_in_buffer, &auio);
2051 if (error) {
2052 bp->b_error = error;
2053 splx(s);
2054 return;
2055 }
2056
2057 bn += length_in_buffer;
2058 addr += length_in_buffer;
2059 bp->b_resid -= length_in_buffer;
2060 }
2061 splx(s);
2062 }
2063
2064 /* compression memory allocation routines */
2065 static void *
2066 vnd_alloc(void *aux, u_int items, u_int siz)
2067 {
2068 return malloc(items * siz, M_TEMP, M_NOWAIT);
2069 }
2070
2071 static void
2072 vnd_free(void *aux, void *ptr)
2073 {
2074 free(ptr, M_TEMP);
2075 }
2076 #endif /* VND_COMPRESSION */
2077
2078 static void
2079 vnd_set_geometry(struct vnd_softc *vnd)
2080 {
2081 struct disk_geom *dg = &vnd->sc_dkdev.dk_geom;
2082
2083 memset(dg, 0, sizeof(*dg));
2084
2085 dg->dg_secperunit = (int64_t)vnd->sc_geom.vng_nsectors *
2086 vnd->sc_geom.vng_ntracks * vnd->sc_geom.vng_ncylinders;
2087 dg->dg_secsize = vnd->sc_geom.vng_secsize;
2088 dg->dg_nsectors = vnd->sc_geom.vng_nsectors;
2089 dg->dg_ntracks = vnd->sc_geom.vng_ntracks;
2090 dg->dg_ncylinders = vnd->sc_geom.vng_ncylinders;
2091
2092 #ifdef DEBUG
2093 if (vnddebug & VDB_LABEL) {
2094 printf("dg->dg_secperunit: %" PRId64 "\n", dg->dg_secperunit);
2095 printf("dg->dg_ncylinders: %u\n", dg->dg_ncylinders);
2096 }
2097 #endif
2098 disk_set_info(vnd->sc_dev, &vnd->sc_dkdev, NULL);
2099 }
2100
2101 #ifdef VND_COMPRESSION
2102 #define VND_DEPENDS "zlib"
2103 #else
2104 #define VND_DEPENDS NULL
2105 #endif
2106
2107 MODULE(MODULE_CLASS_DRIVER, vnd, VND_DEPENDS);
2108
2109 #ifdef _MODULE
2110 int vnd_bmajor = -1, vnd_cmajor = -1;
2111
2112 CFDRIVER_DECL(vnd, DV_DISK, NULL);
2113 #endif
2114
2115 static int
2116 vnd_modcmd(modcmd_t cmd, void *arg)
2117 {
2118 int error = 0;
2119
2120 switch (cmd) {
2121 case MODULE_CMD_INIT:
2122 #ifdef _MODULE
2123 error = config_cfdriver_attach(&vnd_cd);
2124 if (error)
2125 break;
2126
2127 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca);
2128 if (error) {
2129 config_cfdriver_detach(&vnd_cd);
2130 #ifdef DIAGNOSTIC
2131 aprint_error("%s: unable to register cfattach for \n"
2132 "%s, error %d", __func__, vnd_cd.cd_name, error);
2133 #endif
2134 break;
2135 }
2136
2137 /*
2138 * Attach the {b,c}devsw's
2139 */
2140 error = devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor,
2141 &vnd_cdevsw, &vnd_cmajor);
2142 /*
2143 * If devsw_attach fails, remove from autoconf database
2144 */
2145 if (error) {
2146 config_cfattach_detach(vnd_cd.cd_name, &vnd_ca);
2147 config_cfdriver_detach(&vnd_cd);
2148 #ifdef DIAGNOSTIC
2149 aprint_error("%s: unable to attach %s devsw, "
2150 "error %d", __func__, vnd_cd.cd_name, error);
2151 #endif
2152 break;
2153 }
2154 #endif
2155 break;
2156
2157 case MODULE_CMD_FINI:
2158 #ifdef _MODULE
2159 /*
2160 * Remove {b,c}devsw's
2161 */
2162 devsw_detach(&vnd_bdevsw, &vnd_cdevsw);
2163
2164 /*
2165 * Now remove device from autoconf database
2166 */
2167 error = config_cfattach_detach(vnd_cd.cd_name, &vnd_ca);
2168 if (error) {
2169 (void)devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor,
2170 &vnd_cdevsw, &vnd_cmajor);
2171 #ifdef DIAGNOSTIC
2172 aprint_error("%s: failed to detach %s cfattach, "
2173 "error %d\n", __func__, vnd_cd.cd_name, error);
2174 #endif
2175 break;
2176 }
2177 error = config_cfdriver_detach(&vnd_cd);
2178 if (error) {
2179 (void)config_cfattach_attach(vnd_cd.cd_name, &vnd_ca);
2180 (void)devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor,
2181 &vnd_cdevsw, &vnd_cmajor);
2182 #ifdef DIAGNOSTIC
2183 aprint_error("%s: failed to detach %s cfdriver, "
2184 "error %d\n", __func__, vnd_cd.cd_name, error);
2185 break;
2186 #endif
2187 }
2188 #endif
2189 break;
2190
2191 case MODULE_CMD_STAT:
2192 return ENOTTY;
2193
2194 default:
2195 return ENOTTY;
2196 }
2197
2198 return error;
2199 }
2200