vnd.c revision 1.281 1 /* $NetBSD: vnd.c,v 1.281 2021/06/13 10:01:43 mlelstv Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2008, 2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988 University of Utah.
34 * Copyright (c) 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: vn.c 1.13 94/04/02$
66 *
67 * @(#)vn.c 8.9 (Berkeley) 5/14/95
68 */
69
70 /*
71 * Vnode disk driver.
72 *
73 * Block/character interface to a vnode. Allows one to treat a file
74 * as a disk (e.g. build a filesystem in it, mount it, etc.).
75 *
76 * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations,
77 * this uses them to avoid distorting the local buffer cache. If those
78 * block-level operations are not available, this falls back to the regular
79 * read and write calls. Using these may distort the cache in some cases
80 * but better have the driver working than preventing it to work on file
81 * systems where the block-level operations are not implemented for
82 * whatever reason.
83 *
84 * NOTE 2: There is a security issue involved with this driver.
85 * Once mounted all access to the contents of the "mapped" file via
86 * the special file is controlled by the permissions on the special
87 * file, the protection of the mapped file is ignored (effectively,
88 * by using root credentials in all transactions).
89 *
90 * NOTE 3: Doesn't interact with leases, should it?
91 */
92
93 #include <sys/cdefs.h>
94 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.281 2021/06/13 10:01:43 mlelstv Exp $");
95
96 #if defined(_KERNEL_OPT)
97 #include "opt_vnd.h"
98 #include "opt_compat_netbsd.h"
99 #endif
100
101 #include <sys/param.h>
102 #include <sys/systm.h>
103 #include <sys/namei.h>
104 #include <sys/proc.h>
105 #include <sys/kthread.h>
106 #include <sys/errno.h>
107 #include <sys/buf.h>
108 #include <sys/bufq.h>
109 #include <sys/malloc.h>
110 #include <sys/ioctl.h>
111 #include <sys/disklabel.h>
112 #include <sys/device.h>
113 #include <sys/disk.h>
114 #include <sys/stat.h>
115 #include <sys/mount.h>
116 #include <sys/vnode.h>
117 #include <sys/fstrans.h>
118 #include <sys/file.h>
119 #include <sys/uio.h>
120 #include <sys/conf.h>
121 #include <sys/kauth.h>
122 #include <sys/module.h>
123 #include <sys/compat_stub.h>
124 #include <sys/atomic.h>
125
126 #include <net/zlib.h>
127
128 #include <miscfs/genfs/genfs.h>
129 #include <miscfs/specfs/specdev.h>
130
131 #include <dev/dkvar.h>
132 #include <dev/vndvar.h>
133
134 #include "ioconf.h"
135
136 #if defined(VNDDEBUG) && !defined(DEBUG)
137 #define DEBUG
138 #endif
139
140 #ifdef DEBUG
141 int dovndcluster = 1;
142 #define VDB_FOLLOW 0x01
143 #define VDB_INIT 0x02
144 #define VDB_IO 0x04
145 #define VDB_LABEL 0x08
146 int vnddebug = 0;
147 #endif
148
149 #define vndunit(x) DISKUNIT(x)
150
151 struct vndxfer {
152 struct buf vx_buf;
153 struct vnd_softc *vx_vnd;
154 };
155 #define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp)
156
157 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK)
158 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx))
159
160 #define VNDLABELDEV(dev) \
161 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART))
162
163 #define VND_MAXPENDING(vnd) ((vnd)->sc_maxactive * 4)
164 #define VND_MAXPAGES(vnd) (1024 * 1024 / PAGE_SIZE)
165
166
167 static void vndclear(struct vnd_softc *, int);
168 static int vnddoclear(struct vnd_softc *, int, int, bool);
169 static int vndsetcred(struct vnd_softc *, kauth_cred_t);
170 static void vndthrottle(struct vnd_softc *, struct vnode *);
171 static void vndiodone(struct buf *);
172 #if 0
173 static void vndshutdown(void);
174 #endif
175
176 static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *);
177 static void vndgetdisklabel(dev_t, struct vnd_softc *);
178
179 static int vndlock(struct vnd_softc *);
180 static void vndunlock(struct vnd_softc *);
181 #ifdef VND_COMPRESSION
182 static void compstrategy(struct buf *, off_t);
183 static void *vnd_alloc(void *, u_int, u_int);
184 static void vnd_free(void *, void *);
185 #endif /* VND_COMPRESSION */
186
187 static void vndthread(void *);
188 static bool vnode_has_op(const struct vnode *, int);
189 static void handle_with_rdwr(struct vnd_softc *, const struct buf *,
190 struct buf *);
191 static void handle_with_strategy(struct vnd_softc *, const struct buf *,
192 struct buf *);
193 static void vnd_set_geometry(struct vnd_softc *);
194
195 static dev_type_open(vndopen);
196 static dev_type_close(vndclose);
197 static dev_type_read(vndread);
198 static dev_type_write(vndwrite);
199 static dev_type_ioctl(vndioctl);
200 static dev_type_strategy(vndstrategy);
201 static dev_type_dump(vnddump);
202 static dev_type_size(vndsize);
203
204 const struct bdevsw vnd_bdevsw = {
205 .d_open = vndopen,
206 .d_close = vndclose,
207 .d_strategy = vndstrategy,
208 .d_ioctl = vndioctl,
209 .d_dump = vnddump,
210 .d_psize = vndsize,
211 .d_discard = nodiscard,
212 .d_flag = D_DISK
213 };
214
215 const struct cdevsw vnd_cdevsw = {
216 .d_open = vndopen,
217 .d_close = vndclose,
218 .d_read = vndread,
219 .d_write = vndwrite,
220 .d_ioctl = vndioctl,
221 .d_stop = nostop,
222 .d_tty = notty,
223 .d_poll = nopoll,
224 .d_mmap = nommap,
225 .d_kqfilter = nokqfilter,
226 .d_discard = nodiscard,
227 .d_flag = D_DISK
228 };
229
230 static int vnd_match(device_t, cfdata_t, void *);
231 static void vnd_attach(device_t, device_t, void *);
232 static int vnd_detach(device_t, int);
233
234 CFATTACH_DECL3_NEW(vnd, sizeof(struct vnd_softc),
235 vnd_match, vnd_attach, vnd_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN);
236
237 static struct vnd_softc *vnd_spawn(int);
238 static int vnd_destroy(device_t);
239
240 static const struct dkdriver vnddkdriver = {
241 .d_strategy = vndstrategy,
242 .d_minphys = minphys
243 };
244
245 void
246 vndattach(int num)
247 {
248 int error;
249
250 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca);
251 if (error)
252 aprint_error("%s: unable to register cfattach, error = %d\n",
253 vnd_cd.cd_name, error);
254 }
255
256 static int
257 vnd_match(device_t self, cfdata_t cfdata, void *aux)
258 {
259
260 return 1;
261 }
262
263 static void
264 vnd_attach(device_t parent, device_t self, void *aux)
265 {
266 struct vnd_softc *sc = device_private(self);
267
268 sc->sc_dev = self;
269 sc->sc_comp_offsets = NULL;
270 sc->sc_comp_buff = NULL;
271 sc->sc_comp_decombuf = NULL;
272 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK);
273 disk_init(&sc->sc_dkdev, device_xname(self), &vnddkdriver);
274 if (!pmf_device_register(self, NULL, NULL))
275 aprint_error_dev(self, "couldn't establish power handler\n");
276 }
277
278 static int
279 vnd_detach(device_t self, int flags)
280 {
281 int error;
282 struct vnd_softc *sc = device_private(self);
283
284 if (sc->sc_flags & VNF_INITED) {
285 error = vnddoclear(sc, 0, -1, (flags & DETACH_FORCE) != 0);
286 if (error != 0)
287 return error;
288 }
289
290 pmf_device_deregister(self);
291 bufq_free(sc->sc_tab);
292 disk_destroy(&sc->sc_dkdev);
293
294 return 0;
295 }
296
297 static struct vnd_softc *
298 vnd_spawn(int unit)
299 {
300 cfdata_t cf;
301
302 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
303 cf->cf_name = vnd_cd.cd_name;
304 cf->cf_atname = vnd_cd.cd_name;
305 cf->cf_unit = unit;
306 cf->cf_fstate = FSTATE_STAR;
307
308 return device_private(config_attach_pseudo(cf));
309 }
310
311 static int
312 vnd_destroy(device_t dev)
313 {
314 int error;
315 cfdata_t cf;
316
317 cf = device_cfdata(dev);
318 error = config_detach(dev, DETACH_QUIET);
319 if (error)
320 return error;
321 free(cf, M_DEVBUF);
322 return 0;
323 }
324
325 static int
326 vndopen(dev_t dev, int flags, int mode, struct lwp *l)
327 {
328 int unit = vndunit(dev);
329 struct vnd_softc *sc;
330 int error = 0, part, pmask;
331 struct disklabel *lp;
332
333 #ifdef DEBUG
334 if (vnddebug & VDB_FOLLOW)
335 printf("vndopen(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l);
336 #endif
337 sc = device_lookup_private(&vnd_cd, unit);
338 if (sc == NULL) {
339 sc = vnd_spawn(unit);
340 if (sc == NULL)
341 return ENOMEM;
342
343 /* compatibility, keep disklabel after close */
344 sc->sc_flags = VNF_KLABEL;
345 }
346
347 if ((error = vndlock(sc)) != 0)
348 return error;
349
350 mutex_enter(&sc->sc_dkdev.dk_openlock);
351
352 if ((sc->sc_flags & VNF_CLEARING) != 0) {
353 error = ENXIO;
354 goto done;
355 }
356
357 lp = sc->sc_dkdev.dk_label;
358
359 part = DISKPART(dev);
360 pmask = (1 << part);
361
362 if (sc->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
363 error = EBUSY;
364 goto done;
365 }
366
367 if ((flags & FWRITE) && (sc->sc_flags & VNF_READONLY)) {
368 error = EROFS;
369 goto done;
370 }
371
372 if (sc->sc_flags & VNF_INITED) {
373 if ((sc->sc_dkdev.dk_openmask & ~(1<<RAW_PART)) != 0) {
374 /*
375 * If any non-raw partition is open, but the disk
376 * has been invalidated, disallow further opens.
377 */
378 if ((sc->sc_flags & VNF_VLABEL) == 0) {
379 error = EIO;
380 goto done;
381 }
382 } else {
383 /*
384 * Load the partition info if not already loaded.
385 */
386 if ((sc->sc_flags & VNF_VLABEL) == 0) {
387 sc->sc_flags |= VNF_VLABEL;
388 vndgetdisklabel(dev, sc);
389 }
390 }
391 }
392
393 /* Check that the partitions exists. */
394 if (part != RAW_PART) {
395 if (((sc->sc_flags & VNF_INITED) == 0) ||
396 ((part >= lp->d_npartitions) ||
397 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
398 error = ENXIO;
399 goto done;
400 }
401 }
402
403 /* Prevent our unit from being unconfigured while open. */
404 switch (mode) {
405 case S_IFCHR:
406 sc->sc_dkdev.dk_copenmask |= pmask;
407 break;
408
409 case S_IFBLK:
410 sc->sc_dkdev.dk_bopenmask |= pmask;
411 break;
412 }
413 sc->sc_dkdev.dk_openmask =
414 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
415
416 done:
417 mutex_exit(&sc->sc_dkdev.dk_openlock);
418 vndunlock(sc);
419 return error;
420 }
421
422 static int
423 vndclose(dev_t dev, int flags, int mode, struct lwp *l)
424 {
425 int unit = vndunit(dev);
426 struct vnd_softc *sc;
427 int error = 0, part;
428
429 #ifdef DEBUG
430 if (vnddebug & VDB_FOLLOW)
431 printf("vndclose(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l);
432 #endif
433 sc = device_lookup_private(&vnd_cd, unit);
434 if (sc == NULL)
435 return ENXIO;
436
437 if ((error = vndlock(sc)) != 0)
438 return error;
439
440 mutex_enter(&sc->sc_dkdev.dk_openlock);
441
442 part = DISKPART(dev);
443
444 /* ...that much closer to allowing unconfiguration... */
445 switch (mode) {
446 case S_IFCHR:
447 sc->sc_dkdev.dk_copenmask &= ~(1 << part);
448 break;
449
450 case S_IFBLK:
451 sc->sc_dkdev.dk_bopenmask &= ~(1 << part);
452 break;
453 }
454 sc->sc_dkdev.dk_openmask =
455 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
456
457 /* are we last opener ? */
458 if (sc->sc_dkdev.dk_openmask == 0) {
459 if ((sc->sc_flags & VNF_KLABEL) == 0)
460 sc->sc_flags &= ~VNF_VLABEL;
461 }
462
463 mutex_exit(&sc->sc_dkdev.dk_openlock);
464
465 vndunlock(sc);
466
467 if ((sc->sc_flags & VNF_INITED) == 0) {
468 if ((error = vnd_destroy(sc->sc_dev)) != 0) {
469 aprint_error_dev(sc->sc_dev,
470 "unable to detach instance\n");
471 return error;
472 }
473 }
474
475 return 0;
476 }
477
478 /*
479 * Queue the request, and wakeup the kernel thread to handle it.
480 */
481 static void
482 vndstrategy(struct buf *bp)
483 {
484 int unit = vndunit(bp->b_dev);
485 struct vnd_softc *vnd =
486 device_lookup_private(&vnd_cd, unit);
487 struct disklabel *lp;
488 daddr_t blkno;
489 int s = splbio();
490
491 if (vnd == NULL) {
492 bp->b_error = ENXIO;
493 goto done;
494 }
495 lp = vnd->sc_dkdev.dk_label;
496
497 if ((vnd->sc_flags & VNF_INITED) == 0) {
498 bp->b_error = ENXIO;
499 goto done;
500 }
501
502 /*
503 * The transfer must be a whole number of blocks.
504 */
505 if ((bp->b_bcount % lp->d_secsize) != 0) {
506 bp->b_error = EINVAL;
507 goto done;
508 }
509
510 /*
511 * check if we're read-only.
512 */
513 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) {
514 bp->b_error = EACCES;
515 goto done;
516 }
517
518 /* If it's a nil transfer, wake up the top half now. */
519 if (bp->b_bcount == 0) {
520 goto done;
521 }
522
523 /*
524 * Do bounds checking and adjust transfer. If there's an error,
525 * the bounds check will flag that for us.
526 */
527 if (DISKPART(bp->b_dev) == RAW_PART) {
528 if (bounds_check_with_mediasize(bp, DEV_BSIZE,
529 vnd->sc_size) <= 0)
530 goto done;
531 } else {
532 if (bounds_check_with_label(&vnd->sc_dkdev,
533 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0)
534 goto done;
535 }
536
537 /*
538 * Put the block number in terms of the logical blocksize
539 * of the "device".
540 */
541
542 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE);
543
544 /*
545 * Translate the partition-relative block number to an absolute.
546 */
547 if (DISKPART(bp->b_dev) != RAW_PART) {
548 struct partition *pp;
549
550 pp = &vnd->sc_dkdev.dk_label->d_partitions[
551 DISKPART(bp->b_dev)];
552 blkno += pp->p_offset;
553 }
554 bp->b_rawblkno = blkno;
555
556 #ifdef DEBUG
557 if (vnddebug & VDB_FOLLOW)
558 printf("vndstrategy(%p): unit %d\n", bp, unit);
559 #endif
560 if ((vnd->sc_flags & VNF_USE_VN_RDWR)) {
561 KASSERT(vnd->sc_pending >= 0 &&
562 vnd->sc_pending <= VND_MAXPENDING(vnd));
563 while (vnd->sc_pending == VND_MAXPENDING(vnd))
564 tsleep(&vnd->sc_pending, PRIBIO, "vndpc", 0);
565 vnd->sc_pending++;
566 }
567 bufq_put(vnd->sc_tab, bp);
568 wakeup(&vnd->sc_tab);
569 splx(s);
570 return;
571
572 done:
573 bp->b_resid = bp->b_bcount;
574 biodone(bp);
575 splx(s);
576 }
577
578 static bool
579 vnode_has_strategy(struct vnd_softc *vnd)
580 {
581 return vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) &&
582 vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy));
583 }
584
585 /* Verify that I/O requests cannot be smaller than the
586 * smallest I/O size supported by the backend.
587 */
588 static bool
589 vnode_has_large_blocks(struct vnd_softc *vnd)
590 {
591 u_int32_t vnd_secsize, iosize;
592
593 iosize = vnd->sc_iosize;
594 vnd_secsize = vnd->sc_geom.vng_secsize;
595
596 return vnd_secsize % iosize != 0;
597 }
598
599 /* XXX this function needs a reliable check to detect
600 * sparse files. Otherwise, bmap/strategy may be used
601 * and fail on non-allocated blocks. VOP_READ/VOP_WRITE
602 * works on sparse files.
603 */
604 #if notyet
605 static bool
606 vnode_strategy_probe(struct vnd_softc *vnd)
607 {
608 int error;
609 daddr_t nbn;
610
611 if (!vnode_has_strategy(vnd))
612 return false;
613
614 if (vnode_has_large_blocks(vnd))
615 return false;
616
617 /* Convert the first logical block number to its
618 * physical block number.
619 */
620 error = 0;
621 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
622 error = VOP_BMAP(vnd->sc_vp, 0, NULL, &nbn, NULL);
623 VOP_UNLOCK(vnd->sc_vp);
624
625 /* Test if that worked. */
626 if (error == 0 && (long)nbn == -1)
627 return false;
628
629 return true;
630 }
631 #endif
632
633 static void
634 vndthread(void *arg)
635 {
636 struct vnd_softc *vnd = arg;
637 int s;
638
639 /* Determine whether we can *use* VOP_BMAP and VOP_STRATEGY to
640 * directly access the backing vnode. If we can, use these two
641 * operations to avoid messing with the local buffer cache.
642 * Otherwise fall back to regular VOP_READ/VOP_WRITE operations
643 * which are guaranteed to work with any file system. */
644 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 &&
645 ! vnode_has_strategy(vnd))
646 vnd->sc_flags |= VNF_USE_VN_RDWR;
647
648 /* VOP_STRATEGY can only be used if the backing vnode allows
649 * to access blocks as small as defined by the vnd geometry.
650 */
651 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 &&
652 vnode_has_large_blocks(vnd))
653 vnd->sc_flags |= VNF_USE_VN_RDWR;
654
655 #ifdef DEBUG
656 if (vnddebug & VDB_INIT)
657 printf("vndthread: vp %p, %s\n", vnd->sc_vp,
658 (vnd->sc_flags & VNF_USE_VN_RDWR) == 0 ?
659 "using bmap/strategy operations" :
660 "using read/write operations");
661 #endif
662
663 s = splbio();
664 vnd->sc_flags |= VNF_KTHREAD;
665 wakeup(&vnd->sc_kthread);
666
667 /*
668 * Dequeue requests and serve them depending on the available
669 * vnode operations.
670 */
671 while ((vnd->sc_flags & VNF_VUNCONF) == 0) {
672 struct vndxfer *vnx;
673 struct buf *obp;
674 struct buf *bp;
675
676 obp = bufq_get(vnd->sc_tab);
677 if (obp == NULL) {
678 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0);
679 continue;
680 };
681 if ((vnd->sc_flags & VNF_USE_VN_RDWR)) {
682 KASSERT(vnd->sc_pending > 0 &&
683 vnd->sc_pending <= VND_MAXPENDING(vnd));
684 if (vnd->sc_pending-- == VND_MAXPENDING(vnd))
685 wakeup(&vnd->sc_pending);
686 }
687 splx(s);
688 #ifdef DEBUG
689 if (vnddebug & VDB_FOLLOW)
690 printf("vndthread(%p)\n", obp);
691 #endif
692
693 if (vnd->sc_vp->v_mount == NULL) {
694 obp->b_error = ENXIO;
695 goto done;
696 }
697 #ifdef VND_COMPRESSION
698 /* handle a compressed read */
699 if ((obp->b_flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) {
700 off_t bn;
701
702 /* Convert to a byte offset within the file. */
703 bn = obp->b_rawblkno *
704 vnd->sc_dkdev.dk_label->d_secsize;
705
706 compstrategy(obp, bn);
707 goto done;
708 }
709 #endif /* VND_COMPRESSION */
710
711 /*
712 * Allocate a header for this transfer and link it to the
713 * buffer
714 */
715 s = splbio();
716 vnx = VND_GETXFER(vnd);
717 splx(s);
718 vnx->vx_vnd = vnd;
719
720 s = splbio();
721 while (vnd->sc_active >= vnd->sc_maxactive) {
722 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0);
723 }
724 vnd->sc_active++;
725 splx(s);
726
727 /* Instrumentation. */
728 disk_busy(&vnd->sc_dkdev);
729
730 bp = &vnx->vx_buf;
731 buf_init(bp);
732 bp->b_flags = (obp->b_flags & (B_READ | B_PHYS | B_RAW));
733 bp->b_oflags = obp->b_oflags;
734 bp->b_cflags = obp->b_cflags;
735 bp->b_iodone = vndiodone;
736 bp->b_private = obp;
737 bp->b_vp = vnd->sc_vp;
738 bp->b_objlock = bp->b_vp->v_interlock;
739 bp->b_data = obp->b_data;
740 bp->b_bcount = obp->b_bcount;
741 BIO_COPYPRIO(bp, obp);
742
743 /* Make sure the request succeeds while suspending this fs. */
744 fstrans_start_lazy(vnd->sc_vp->v_mount);
745
746 /* Handle the request using the appropriate operations. */
747 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0)
748 handle_with_strategy(vnd, obp, bp);
749 else
750 handle_with_rdwr(vnd, obp, bp);
751
752 fstrans_done(vnd->sc_vp->v_mount);
753
754 s = splbio();
755 continue;
756
757 done:
758 biodone(obp);
759 s = splbio();
760 }
761
762 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF);
763 wakeup(&vnd->sc_kthread);
764 splx(s);
765 kthread_exit(0);
766 }
767
768 /*
769 * Checks if the given vnode supports the requested operation.
770 * The operation is specified the offset returned by VOFFSET.
771 *
772 * XXX The test below used to determine this is quite fragile
773 * because it relies on the file system to use genfs to specify
774 * unimplemented operations. There might be another way to do
775 * it more cleanly.
776 */
777 static bool
778 vnode_has_op(const struct vnode *vp, int opoffset)
779 {
780 int (*defaultp)(void *);
781 int (*opp)(void *);
782
783 defaultp = vp->v_op[VOFFSET(vop_default)];
784 opp = vp->v_op[opoffset];
785
786 return opp != defaultp && opp != genfs_eopnotsupp &&
787 opp != genfs_badop && opp != genfs_nullop;
788 }
789
790 /*
791 * Handles the read/write request given in 'bp' using the vnode's VOP_READ
792 * and VOP_WRITE operations.
793 *
794 * 'obp' is a pointer to the original request fed to the vnd device.
795 */
796 static void
797 handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp)
798 {
799 bool doread;
800 off_t offset;
801 size_t len, resid;
802 struct vnode *vp;
803 int npages;
804
805 doread = bp->b_flags & B_READ;
806 offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize;
807 len = bp->b_bcount;
808 vp = vnd->sc_vp;
809
810 #if defined(DEBUG)
811 if (vnddebug & VDB_IO)
812 printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64
813 ", secsize %d, offset %" PRIu64
814 ", bcount %d\n",
815 vp, doread ? "read" : "write", obp->b_rawblkno,
816 vnd->sc_dkdev.dk_label->d_secsize, offset,
817 bp->b_bcount);
818 #endif
819
820 /* Issue the read or write operation. */
821 bp->b_error =
822 vn_rdwr(doread ? UIO_READ : UIO_WRITE,
823 vp, bp->b_data, len, offset, UIO_SYSSPACE,
824 IO_ADV_ENCODE(POSIX_FADV_NOREUSE) | IO_DIRECT,
825 vnd->sc_cred, &resid, NULL);
826 bp->b_resid = resid;
827
828 /*
829 * Avoid caching too many pages, the vnd user
830 * is usually a filesystem and caches itself.
831 * We need some amount of caching to not hinder
832 * read-ahead and write-behind operations.
833 */
834 npages = atomic_load_relaxed(&vp->v_uobj.uo_npages);
835 if (npages > VND_MAXPAGES(vnd)) {
836 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
837 (void) VOP_PUTPAGES(vp, 0, 0,
838 PGO_ALLPAGES | PGO_CLEANIT | PGO_FREE);
839 }
840
841 /* We need to increase the number of outputs on the vnode if
842 * there was any write to it. */
843 if (!doread) {
844 mutex_enter(vp->v_interlock);
845 vp->v_numoutput++;
846 mutex_exit(vp->v_interlock);
847 }
848
849 biodone(bp);
850 }
851
852 /*
853 * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP
854 * and VOP_STRATEGY operations.
855 *
856 * 'obp' is a pointer to the original request fed to the vnd device.
857 */
858 static void
859 handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp,
860 struct buf *bp)
861 {
862 int bsize, error, flags, skipped;
863 size_t resid, sz;
864 off_t bn, offset;
865 struct vnode *vp;
866 struct buf *nbp = NULL;
867
868 flags = obp->b_flags;
869
870
871 /* convert to a byte offset within the file. */
872 bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize;
873
874 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize;
875 skipped = 0;
876
877 /*
878 * Break the request into bsize pieces and feed them
879 * sequentially using VOP_BMAP/VOP_STRATEGY.
880 * We do it this way to keep from flooding NFS servers if we
881 * are connected to an NFS file. This places the burden on
882 * the client rather than the server.
883 */
884 error = 0;
885 bp->b_resid = bp->b_bcount;
886 for (offset = 0, resid = bp->b_resid; /* true */;
887 resid -= sz, offset += sz) {
888 daddr_t nbn;
889 int off, nra;
890
891 nra = 0;
892 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
893 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra);
894 VOP_UNLOCK(vnd->sc_vp);
895
896 if (error == 0 && (long)nbn == -1)
897 error = EIO;
898
899 /*
900 * If there was an error or a hole in the file...punt.
901 * Note that we may have to wait for any operations
902 * that we have already fired off before releasing
903 * the buffer.
904 *
905 * XXX we could deal with holes here but it would be
906 * a hassle (in the write case).
907 */
908 if (error) {
909 skipped += resid;
910 break;
911 }
912
913 #ifdef DEBUG
914 if (!dovndcluster)
915 nra = 0;
916 #endif
917
918 off = bn % bsize;
919 sz = MIN(((off_t)1 + nra) * bsize - off, resid);
920 #ifdef DEBUG
921 if (vnddebug & VDB_IO)
922 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64
923 " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn,
924 nbn, sz);
925 #endif
926
927 nbp = getiobuf(vp, true);
928 nestiobuf_setup(bp, nbp, offset, sz);
929 nbp->b_blkno = nbn + btodb(off);
930
931 #if 0 /* XXX #ifdef DEBUG */
932 if (vnddebug & VDB_IO)
933 printf("vndstart(%ld): bp %p vp %p blkno "
934 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n",
935 (long) (vnd-vnd_softc), &nbp->vb_buf,
936 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno,
937 nbp->vb_buf.b_flags, nbp->vb_buf.b_data,
938 nbp->vb_buf.b_bcount);
939 #endif
940 if (resid == sz) {
941 break;
942 }
943 VOP_STRATEGY(vp, nbp);
944 bn += sz;
945 }
946 if (!(flags & B_READ)) {
947 struct vnode *w_vp;
948 /*
949 * this is the last nested buf, account for
950 * the parent buf write too.
951 * This has to be done last, so that
952 * fsync won't wait for this write which
953 * has no chance to complete before all nested bufs
954 * have been queued. But it has to be done
955 * before the last VOP_STRATEGY()
956 * or the call to nestiobuf_done().
957 */
958 w_vp = bp->b_vp;
959 mutex_enter(w_vp->v_interlock);
960 w_vp->v_numoutput++;
961 mutex_exit(w_vp->v_interlock);
962 }
963 KASSERT(skipped != 0 || nbp != NULL);
964 if (skipped)
965 nestiobuf_done(bp, skipped, error);
966 else
967 VOP_STRATEGY(vp, nbp);
968 }
969
970 static void
971 vndiodone(struct buf *bp)
972 {
973 struct vndxfer *vnx = VND_BUFTOXFER(bp);
974 struct vnd_softc *vnd = vnx->vx_vnd;
975 struct buf *obp = bp->b_private;
976 int s = splbio();
977
978 KERNEL_LOCK(1, NULL); /* XXXSMP */
979 KASSERT(&vnx->vx_buf == bp);
980 KASSERT(vnd->sc_active > 0);
981 #ifdef DEBUG
982 if (vnddebug & VDB_IO) {
983 printf("vndiodone1: bp %p iodone: error %d\n",
984 bp, bp->b_error);
985 }
986 #endif
987 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid,
988 (bp->b_flags & B_READ));
989 vnd->sc_active--;
990 if (vnd->sc_active == 0) {
991 wakeup(&vnd->sc_tab);
992 }
993 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */
994 splx(s);
995 obp->b_error = bp->b_error;
996 obp->b_resid = bp->b_resid;
997 buf_destroy(bp);
998 VND_PUTXFER(vnd, vnx);
999 biodone(obp);
1000 }
1001
1002 /* ARGSUSED */
1003 static int
1004 vndread(dev_t dev, struct uio *uio, int flags)
1005 {
1006 int unit = vndunit(dev);
1007 struct vnd_softc *sc;
1008
1009 #ifdef DEBUG
1010 if (vnddebug & VDB_FOLLOW)
1011 printf("vndread(0x%"PRIx64", %p)\n", dev, uio);
1012 #endif
1013
1014 sc = device_lookup_private(&vnd_cd, unit);
1015 if (sc == NULL)
1016 return ENXIO;
1017
1018 if ((sc->sc_flags & VNF_INITED) == 0)
1019 return ENXIO;
1020
1021 return physio(vndstrategy, NULL, dev, B_READ, minphys, uio);
1022 }
1023
1024 /* ARGSUSED */
1025 static int
1026 vndwrite(dev_t dev, struct uio *uio, int flags)
1027 {
1028 int unit = vndunit(dev);
1029 struct vnd_softc *sc;
1030
1031 #ifdef DEBUG
1032 if (vnddebug & VDB_FOLLOW)
1033 printf("vndwrite(0x%"PRIx64", %p)\n", dev, uio);
1034 #endif
1035
1036 sc = device_lookup_private(&vnd_cd, unit);
1037 if (sc == NULL)
1038 return ENXIO;
1039
1040 if ((sc->sc_flags & VNF_INITED) == 0)
1041 return ENXIO;
1042
1043 return physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio);
1044 }
1045
1046 static int
1047 vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va)
1048 {
1049 int error;
1050 struct vnd_softc *vnd;
1051
1052 if (*un == -1)
1053 *un = unit;
1054 if (*un < 0)
1055 return EINVAL;
1056
1057 vnd = device_lookup_private(&vnd_cd, *un);
1058 if (vnd == NULL)
1059 return -1;
1060
1061 if ((vnd->sc_flags & VNF_INITED) == 0)
1062 return -1;
1063
1064 vn_lock(vnd->sc_vp, LK_SHARED | LK_RETRY);
1065 error = VOP_GETATTR(vnd->sc_vp, va, l->l_cred);
1066 VOP_UNLOCK(vnd->sc_vp);
1067 return error;
1068 }
1069
1070 static int
1071 vnddoclear(struct vnd_softc *vnd, int pmask, int minor, bool force)
1072 {
1073 int error;
1074
1075 if ((error = vndlock(vnd)) != 0)
1076 return error;
1077
1078 /*
1079 * Don't unconfigure if any other partitions are open
1080 * or if both the character and block flavors of this
1081 * partition are open.
1082 */
1083 if (DK_BUSY(vnd, pmask) && !force) {
1084 vndunlock(vnd);
1085 return EBUSY;
1086 }
1087
1088 /* Delete all of our wedges */
1089 dkwedge_delall(&vnd->sc_dkdev);
1090
1091 /*
1092 * XXX vndclear() might call vndclose() implicitly;
1093 * release lock to avoid recursion
1094 *
1095 * Set VNF_CLEARING to prevent vndopen() from
1096 * sneaking in after we vndunlock().
1097 */
1098 vnd->sc_flags |= VNF_CLEARING;
1099 vndunlock(vnd);
1100 vndclear(vnd, minor);
1101 #ifdef DEBUG
1102 if (vnddebug & VDB_INIT)
1103 printf("%s: CLRed\n", __func__);
1104 #endif
1105
1106 /* Destroy the xfer and buffer pools. */
1107 pool_destroy(&vnd->sc_vxpool);
1108
1109 /* Detach the disk. */
1110 disk_detach(&vnd->sc_dkdev);
1111
1112 return 0;
1113 }
1114
1115 static int
1116 vndioctl_get(struct lwp *l, void *data, int unit, struct vattr *va)
1117 {
1118 int error;
1119
1120 KASSERT(l);
1121
1122 /* the first member is always int vnd_unit in all the versions */
1123 if (*(int *)data >= vnd_cd.cd_ndevs)
1124 return ENXIO;
1125
1126 switch (error = vnd_cget(l, unit, (int *)data, va)) {
1127 case -1:
1128 /* unused is not an error */
1129 memset(va, 0, sizeof(*va));
1130 /*FALLTHROUGH*/
1131 case 0:
1132 return 0;
1133 default:
1134 return error;
1135 }
1136 }
1137
1138 /* ARGSUSED */
1139 static int
1140 vndioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1141 {
1142 bool force;
1143 int unit = vndunit(dev);
1144 struct vnd_softc *vnd;
1145 struct vnd_ioctl *vio;
1146 struct vattr vattr;
1147 struct pathbuf *pb;
1148 struct nameidata nd;
1149 int error, part, pmask;
1150 uint64_t geomsize;
1151 int fflags;
1152 #ifdef __HAVE_OLD_DISKLABEL
1153 struct disklabel newlabel;
1154 #endif
1155
1156 #ifdef DEBUG
1157 if (vnddebug & VDB_FOLLOW)
1158 printf("vndioctl(0x%"PRIx64", 0x%lx, %p, 0x%x, %p): unit %d\n",
1159 dev, cmd, data, flag, l->l_proc, unit);
1160 #endif
1161 /* Do the get's first; they don't need initialization or verification */
1162 switch (cmd) {
1163 case VNDIOCGET:
1164 if ((error = vndioctl_get(l, data, unit, &vattr)) != 0)
1165 return error;
1166
1167 struct vnd_user *vnu = data;
1168 vnu->vnu_dev = vattr.va_fsid;
1169 vnu->vnu_ino = vattr.va_fileid;
1170 return 0;
1171
1172 default:
1173 /* First check for COMPAT_50 hook */
1174 MODULE_HOOK_CALL(compat_vndioctl_50_hook,
1175 (cmd, l, data, unit, &vattr, vndioctl_get),
1176 enosys(), error);
1177
1178 /*
1179 * If not present, then COMPAT_30 hook also not
1180 * present, so just continue with checks for the
1181 * "write" commands
1182 */
1183 if (error == ENOSYS) {
1184 error = 0;
1185 break;
1186 }
1187
1188 /* If not already handled, try the COMPAT_30 hook */
1189 if (error == EPASSTHROUGH)
1190 MODULE_HOOK_CALL(compat_vndioctl_30_hook,
1191 (cmd, l, data, unit, &vattr, vndioctl_get),
1192 enosys(), error);
1193
1194 /* If no COMPAT_30 module, or not handled, check writes */
1195 if (error == ENOSYS || error == EPASSTHROUGH) {
1196 error = 0;
1197 break;
1198 }
1199 return error;
1200 }
1201
1202 vnd = device_lookup_private(&vnd_cd, unit);
1203 if (vnd == NULL)
1204 return ENXIO;
1205 vio = (struct vnd_ioctl *)data;
1206
1207 /* Must be open for writes for these commands... */
1208 switch (cmd) {
1209 case VNDIOCSET50:
1210 case VNDIOCCLR50:
1211 if (!compat_vndioctl_50_hook.hooked)
1212 return EINVAL;
1213 /* FALLTHROUGH */
1214 case VNDIOCSET:
1215 case VNDIOCCLR:
1216 case DIOCSDINFO:
1217 case DIOCWDINFO:
1218 #ifdef __HAVE_OLD_DISKLABEL
1219 case ODIOCSDINFO:
1220 case ODIOCWDINFO:
1221 #endif
1222 case DIOCKLABEL:
1223 case DIOCWLABEL:
1224 case DIOCCACHESYNC:
1225 if ((flag & FWRITE) == 0)
1226 return EBADF;
1227 }
1228
1229 /* Must be initialized for these... */
1230 switch (cmd) {
1231 case VNDIOCCLR:
1232 case VNDIOCCLR50:
1233 case DIOCGDINFO:
1234 case DIOCSDINFO:
1235 case DIOCWDINFO:
1236 case DIOCGPARTINFO:
1237 case DIOCKLABEL:
1238 case DIOCWLABEL:
1239 case DIOCGDEFLABEL:
1240 case DIOCGCACHE:
1241 case DIOCGSTRATEGY:
1242 case DIOCCACHESYNC:
1243 #ifdef __HAVE_OLD_DISKLABEL
1244 case ODIOCGDINFO:
1245 case ODIOCSDINFO:
1246 case ODIOCWDINFO:
1247 case ODIOCGDEFLABEL:
1248 #endif
1249 if ((vnd->sc_flags & VNF_INITED) == 0)
1250 return ENXIO;
1251 }
1252
1253 error = disk_ioctl(&vnd->sc_dkdev, dev, cmd, data, flag, l);
1254 if (error != EPASSTHROUGH)
1255 return error;
1256
1257
1258 switch (cmd) {
1259 case VNDIOCSET50:
1260 case VNDIOCSET:
1261 if (vnd->sc_flags & VNF_INITED)
1262 return EBUSY;
1263
1264 if ((error = vndlock(vnd)) != 0)
1265 return error;
1266
1267 fflags = FREAD;
1268 if ((vio->vnd_flags & VNDIOF_READONLY) == 0)
1269 fflags |= FWRITE;
1270 if ((vio->vnd_flags & VNDIOF_FILEIO) != 0)
1271 vnd->sc_flags |= VNF_USE_VN_RDWR;
1272 error = pathbuf_copyin(vio->vnd_file, &pb);
1273 if (error) {
1274 goto unlock_and_exit;
1275 }
1276 NDINIT(&nd, LOOKUP, FOLLOW, pb);
1277 if ((error = vn_open(&nd, fflags, 0)) != 0) {
1278 pathbuf_destroy(pb);
1279 goto unlock_and_exit;
1280 }
1281 KASSERT(l);
1282 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred);
1283 if (!error && nd.ni_vp->v_type != VREG)
1284 error = EOPNOTSUPP;
1285 if (!error && vattr.va_bytes < vattr.va_size)
1286 /* File is definitely sparse, use vn_rdwr() */
1287 vnd->sc_flags |= VNF_USE_VN_RDWR;
1288 if (error) {
1289 VOP_UNLOCK(nd.ni_vp);
1290 goto close_and_exit;
1291 }
1292
1293 /* If using a compressed file, initialize its info */
1294 /* (or abort with an error if kernel has no compression) */
1295 if (vio->vnd_flags & VNDIOF_COMP) {
1296 #ifdef VND_COMPRESSION
1297 struct vnd_comp_header *ch;
1298 int i;
1299 uint32_t comp_size;
1300 uint32_t comp_maxsize;
1301
1302 /* allocate space for compresed file header */
1303 ch = malloc(sizeof(struct vnd_comp_header),
1304 M_TEMP, M_WAITOK);
1305
1306 /* read compressed file header */
1307 error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)ch,
1308 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE,
1309 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL);
1310 if (error) {
1311 free(ch, M_TEMP);
1312 VOP_UNLOCK(nd.ni_vp);
1313 goto close_and_exit;
1314 }
1315
1316 if (be32toh(ch->block_size) == 0 ||
1317 be32toh(ch->num_blocks) > UINT32_MAX - 1) {
1318 free(ch, M_TEMP);
1319 VOP_UNLOCK(nd.ni_vp);
1320 goto close_and_exit;
1321 }
1322
1323 /* save some header info */
1324 vnd->sc_comp_blksz = be32toh(ch->block_size);
1325 /* note last offset is the file byte size */
1326 vnd->sc_comp_numoffs = be32toh(ch->num_blocks) + 1;
1327 free(ch, M_TEMP);
1328 if (!DK_DEV_BSIZE_OK(vnd->sc_comp_blksz)) {
1329 VOP_UNLOCK(nd.ni_vp);
1330 error = EINVAL;
1331 goto close_and_exit;
1332 }
1333 KASSERT(0 < vnd->sc_comp_blksz);
1334 KASSERT(0 < vnd->sc_comp_numoffs);
1335 /*
1336 * @#^@!$& gcc -Wtype-limits refuses to let me
1337 * write SIZE_MAX/sizeof(uint64_t) < numoffs,
1338 * because the range of the type on amd64 makes
1339 * the comparisons always false.
1340 */
1341 #if SIZE_MAX <= UINT32_MAX*(64/CHAR_BIT)
1342 if (SIZE_MAX/sizeof(uint64_t) < vnd->sc_comp_numoffs) {
1343 VOP_UNLOCK(nd.ni_vp);
1344 error = EINVAL;
1345 goto close_and_exit;
1346 }
1347 #endif
1348 if ((vattr.va_size < sizeof(struct vnd_comp_header)) ||
1349 (vattr.va_size - sizeof(struct vnd_comp_header) <
1350 sizeof(uint64_t)*vnd->sc_comp_numoffs) ||
1351 (UQUAD_MAX/vnd->sc_comp_blksz <
1352 vnd->sc_comp_numoffs - 1)) {
1353 VOP_UNLOCK(nd.ni_vp);
1354 error = EINVAL;
1355 goto close_and_exit;
1356 }
1357
1358 /* set decompressed file size */
1359 KASSERT(vnd->sc_comp_numoffs - 1 <=
1360 UQUAD_MAX/vnd->sc_comp_blksz);
1361 vattr.va_size =
1362 ((u_quad_t)vnd->sc_comp_numoffs - 1) *
1363 (u_quad_t)vnd->sc_comp_blksz;
1364
1365 /* allocate space for all the compressed offsets */
1366 __CTASSERT(UINT32_MAX <= UQUAD_MAX/sizeof(uint64_t));
1367 vnd->sc_comp_offsets =
1368 malloc(sizeof(uint64_t) * vnd->sc_comp_numoffs,
1369 M_DEVBUF, M_WAITOK);
1370
1371 /* read in the offsets */
1372 error = vn_rdwr(UIO_READ, nd.ni_vp,
1373 (void *)vnd->sc_comp_offsets,
1374 sizeof(uint64_t) * vnd->sc_comp_numoffs,
1375 sizeof(struct vnd_comp_header), UIO_SYSSPACE,
1376 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL);
1377 if (error) {
1378 VOP_UNLOCK(nd.ni_vp);
1379 goto close_and_exit;
1380 }
1381 /*
1382 * find largest block size (used for allocation limit).
1383 * Also convert offset to native byte order.
1384 */
1385 comp_maxsize = 0;
1386 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) {
1387 vnd->sc_comp_offsets[i] =
1388 be64toh(vnd->sc_comp_offsets[i]);
1389 comp_size =
1390 be64toh(vnd->sc_comp_offsets[i + 1])
1391 - vnd->sc_comp_offsets[i];
1392 if (comp_size > comp_maxsize)
1393 comp_maxsize = comp_size;
1394 }
1395 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] =
1396 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs
1397 - 1]);
1398
1399 /* create compressed data buffer */
1400 vnd->sc_comp_buff = malloc(comp_maxsize,
1401 M_DEVBUF, M_WAITOK);
1402
1403 /* create decompressed buffer */
1404 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz,
1405 M_DEVBUF, M_WAITOK);
1406 vnd->sc_comp_buffblk = -1;
1407
1408 /* Initialize decompress stream */
1409 memset(&vnd->sc_comp_stream, 0, sizeof(z_stream));
1410 vnd->sc_comp_stream.zalloc = vnd_alloc;
1411 vnd->sc_comp_stream.zfree = vnd_free;
1412 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS);
1413 if (error) {
1414 if (vnd->sc_comp_stream.msg)
1415 printf("vnd%d: compressed file, %s\n",
1416 unit, vnd->sc_comp_stream.msg);
1417 VOP_UNLOCK(nd.ni_vp);
1418 error = EINVAL;
1419 goto close_and_exit;
1420 }
1421
1422 vnd->sc_flags |= VNF_COMP | VNF_READONLY;
1423 #else /* !VND_COMPRESSION */
1424 VOP_UNLOCK(nd.ni_vp);
1425 error = EOPNOTSUPP;
1426 goto close_and_exit;
1427 #endif /* VND_COMPRESSION */
1428 }
1429
1430 VOP_UNLOCK(nd.ni_vp);
1431 vnd->sc_vp = nd.ni_vp;
1432 vnd->sc_size = btodb(vattr.va_size); /* note truncation */
1433
1434 /* get smallest I/O size for underlying device, fall back to
1435 * fundamental I/O size of underlying filesystem
1436 */
1437 error = bdev_ioctl(vattr.va_fsid, DIOCGSECTORSIZE, &vnd->sc_iosize, FKIOCTL, l);
1438 if (error)
1439 vnd->sc_iosize = vnd->sc_vp->v_mount->mnt_stat.f_frsize;
1440
1441 /* Default I/O size to DEV_BSIZE */
1442 if (vnd->sc_iosize == 0)
1443 vnd->sc_iosize = DEV_BSIZE;
1444
1445 /*
1446 * Use pseudo-geometry specified. If none was provided,
1447 * use "standard" Adaptec fictitious geometry.
1448 */
1449 if (vio->vnd_flags & VNDIOF_HASGEOM) {
1450
1451 memcpy(&vnd->sc_geom, &vio->vnd_geom,
1452 sizeof(vio->vnd_geom));
1453
1454 /*
1455 * Sanity-check the sector size.
1456 */
1457 if (!DK_DEV_BSIZE_OK(vnd->sc_geom.vng_secsize) ||
1458 vnd->sc_geom.vng_ntracks == 0 ||
1459 vnd->sc_geom.vng_nsectors == 0) {
1460 error = EINVAL;
1461 goto close_and_exit;
1462 }
1463
1464 /*
1465 * Compute missing cylinder count from size
1466 */
1467 if (vnd->sc_geom.vng_ncylinders == 0)
1468 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (
1469 (vnd->sc_geom.vng_secsize / DEV_BSIZE) *
1470 vnd->sc_geom.vng_ntracks *
1471 vnd->sc_geom.vng_nsectors);
1472
1473 /*
1474 * Compute the size (in DEV_BSIZE blocks) specified
1475 * by the geometry.
1476 */
1477 geomsize = (int64_t)vnd->sc_geom.vng_nsectors *
1478 vnd->sc_geom.vng_ntracks *
1479 vnd->sc_geom.vng_ncylinders *
1480 (vnd->sc_geom.vng_secsize / DEV_BSIZE);
1481
1482 /*
1483 * Sanity-check the size against the specified
1484 * geometry.
1485 */
1486 if (vnd->sc_size < geomsize) {
1487 error = EINVAL;
1488 goto close_and_exit;
1489 }
1490 } else if (vnd->sc_size >= (32 * 64)) {
1491 /*
1492 * Size must be at least 2048 DEV_BSIZE blocks
1493 * (1M) in order to use this geometry.
1494 */
1495 vnd->sc_geom.vng_secsize = DEV_BSIZE;
1496 vnd->sc_geom.vng_nsectors = 32;
1497 vnd->sc_geom.vng_ntracks = 64;
1498 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32);
1499 } else {
1500 vnd->sc_geom.vng_secsize = DEV_BSIZE;
1501 vnd->sc_geom.vng_nsectors = 1;
1502 vnd->sc_geom.vng_ntracks = 1;
1503 vnd->sc_geom.vng_ncylinders = vnd->sc_size;
1504 }
1505
1506 vnd_set_geometry(vnd);
1507
1508 if (vio->vnd_flags & VNDIOF_READONLY) {
1509 vnd->sc_flags |= VNF_READONLY;
1510 }
1511
1512 if ((error = vndsetcred(vnd, l->l_cred)) != 0)
1513 goto close_and_exit;
1514
1515 vndthrottle(vnd, vnd->sc_vp);
1516 vio->vnd_osize = dbtob(vnd->sc_size);
1517 if (cmd != VNDIOCSET50)
1518 vio->vnd_size = dbtob(vnd->sc_size);
1519 vnd->sc_flags |= VNF_INITED;
1520
1521 /* create the kernel thread, wait for it to be up */
1522 error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd,
1523 &vnd->sc_kthread, "%s", device_xname(vnd->sc_dev));
1524 if (error)
1525 goto close_and_exit;
1526 while ((vnd->sc_flags & VNF_KTHREAD) == 0) {
1527 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0);
1528 }
1529 #ifdef DEBUG
1530 if (vnddebug & VDB_INIT)
1531 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n",
1532 vnd->sc_vp, (unsigned long) vnd->sc_size,
1533 vnd->sc_geom.vng_secsize,
1534 vnd->sc_geom.vng_nsectors,
1535 vnd->sc_geom.vng_ntracks,
1536 vnd->sc_geom.vng_ncylinders);
1537 #endif
1538
1539 /* Attach the disk. */
1540 disk_attach(&vnd->sc_dkdev);
1541
1542 /* Initialize the xfer and buffer pools. */
1543 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0,
1544 0, 0, "vndxpl", NULL, IPL_BIO);
1545
1546 vndunlock(vnd);
1547
1548 pathbuf_destroy(pb);
1549
1550 /* Discover wedges on this disk */
1551 dkwedge_discover(&vnd->sc_dkdev);
1552
1553 break;
1554
1555 close_and_exit:
1556 (void) vn_close(nd.ni_vp, fflags, l->l_cred);
1557 pathbuf_destroy(pb);
1558 unlock_and_exit:
1559 #ifdef VND_COMPRESSION
1560 /* free any allocated memory (for compressed file) */
1561 if (vnd->sc_comp_offsets) {
1562 free(vnd->sc_comp_offsets, M_DEVBUF);
1563 vnd->sc_comp_offsets = NULL;
1564 }
1565 if (vnd->sc_comp_buff) {
1566 free(vnd->sc_comp_buff, M_DEVBUF);
1567 vnd->sc_comp_buff = NULL;
1568 }
1569 if (vnd->sc_comp_decombuf) {
1570 free(vnd->sc_comp_decombuf, M_DEVBUF);
1571 vnd->sc_comp_decombuf = NULL;
1572 }
1573 #endif /* VND_COMPRESSION */
1574 vndunlock(vnd);
1575 return error;
1576
1577 case VNDIOCCLR50:
1578 case VNDIOCCLR:
1579 part = DISKPART(dev);
1580 pmask = (1 << part);
1581 force = (vio->vnd_flags & VNDIOF_FORCE) != 0;
1582
1583 if ((error = vnddoclear(vnd, pmask, minor(dev), force)) != 0)
1584 return error;
1585
1586 break;
1587
1588
1589 case DIOCWDINFO:
1590 case DIOCSDINFO:
1591 #ifdef __HAVE_OLD_DISKLABEL
1592 case ODIOCWDINFO:
1593 case ODIOCSDINFO:
1594 #endif
1595 {
1596 struct disklabel *lp;
1597
1598 if ((error = vndlock(vnd)) != 0)
1599 return error;
1600
1601 vnd->sc_flags |= VNF_LABELLING;
1602
1603 #ifdef __HAVE_OLD_DISKLABEL
1604 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1605 memset(&newlabel, 0, sizeof newlabel);
1606 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1607 lp = &newlabel;
1608 } else
1609 #endif
1610 lp = (struct disklabel *)data;
1611
1612 error = setdisklabel(vnd->sc_dkdev.dk_label,
1613 lp, 0, vnd->sc_dkdev.dk_cpulabel);
1614 if (error == 0) {
1615 if (cmd == DIOCWDINFO
1616 #ifdef __HAVE_OLD_DISKLABEL
1617 || cmd == ODIOCWDINFO
1618 #endif
1619 )
1620 error = writedisklabel(VNDLABELDEV(dev),
1621 vndstrategy, vnd->sc_dkdev.dk_label,
1622 vnd->sc_dkdev.dk_cpulabel);
1623 }
1624
1625 vnd->sc_flags &= ~VNF_LABELLING;
1626
1627 vndunlock(vnd);
1628
1629 if (error)
1630 return error;
1631 break;
1632 }
1633
1634 case DIOCKLABEL:
1635 if (*(int *)data != 0)
1636 vnd->sc_flags |= VNF_KLABEL;
1637 else
1638 vnd->sc_flags &= ~VNF_KLABEL;
1639 break;
1640
1641 case DIOCWLABEL:
1642 if (*(int *)data != 0)
1643 vnd->sc_flags |= VNF_WLABEL;
1644 else
1645 vnd->sc_flags &= ~VNF_WLABEL;
1646 break;
1647
1648 case DIOCGDEFLABEL:
1649 vndgetdefaultlabel(vnd, (struct disklabel *)data);
1650 break;
1651
1652 #ifdef __HAVE_OLD_DISKLABEL
1653 case ODIOCGDEFLABEL:
1654 vndgetdefaultlabel(vnd, &newlabel);
1655 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1656 return ENOTTY;
1657 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1658 break;
1659 #endif
1660
1661 case DIOCGSTRATEGY:
1662 {
1663 struct disk_strategy *dks = (void *)data;
1664
1665 /* No lock needed, never changed */
1666 strlcpy(dks->dks_name,
1667 bufq_getstrategyname(vnd->sc_tab),
1668 sizeof(dks->dks_name));
1669 dks->dks_paramlen = 0;
1670 break;
1671 }
1672 case DIOCGCACHE:
1673 {
1674 int *bits = (int *)data;
1675 *bits |= DKCACHE_READ | DKCACHE_WRITE;
1676 break;
1677 }
1678 case DIOCCACHESYNC:
1679 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1680 error = VOP_FSYNC(vnd->sc_vp, vnd->sc_cred,
1681 FSYNC_WAIT | FSYNC_DATAONLY | FSYNC_CACHE, 0, 0);
1682 VOP_UNLOCK(vnd->sc_vp);
1683 return error;
1684
1685 default:
1686 return ENOTTY;
1687 }
1688
1689 return 0;
1690 }
1691
1692 /*
1693 * Duplicate the current processes' credentials. Since we are called only
1694 * as the result of a SET ioctl and only root can do that, any future access
1695 * to this "disk" is essentially as root. Note that credentials may change
1696 * if some other uid can write directly to the mapped file (NFS).
1697 */
1698 static int
1699 vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred)
1700 {
1701 struct uio auio;
1702 struct iovec aiov;
1703 char *tmpbuf;
1704 int error;
1705
1706 vnd->sc_cred = kauth_cred_dup(cred);
1707 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
1708
1709 /* XXX: Horrible kludge to establish credentials for NFS */
1710 aiov.iov_base = tmpbuf;
1711 aiov.iov_len = uimin(DEV_BSIZE, dbtob(vnd->sc_size));
1712 auio.uio_iov = &aiov;
1713 auio.uio_iovcnt = 1;
1714 auio.uio_offset = 0;
1715 auio.uio_rw = UIO_READ;
1716 auio.uio_resid = aiov.iov_len;
1717 UIO_SETUP_SYSSPACE(&auio);
1718 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1719 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred);
1720 if (error == 0) {
1721 /*
1722 * Because vnd does all IO directly through the vnode
1723 * we need to flush (at least) the buffer from the above
1724 * VOP_READ from the buffer cache to prevent cache
1725 * incoherencies. Also, be careful to write dirty
1726 * buffers back to stable storage.
1727 */
1728 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred,
1729 curlwp, 0, 0);
1730 }
1731 VOP_UNLOCK(vnd->sc_vp);
1732
1733 free(tmpbuf, M_TEMP);
1734 return error;
1735 }
1736
1737 /*
1738 * Set maxactive based on FS type
1739 */
1740 static void
1741 vndthrottle(struct vnd_softc *vnd, struct vnode *vp)
1742 {
1743
1744 if (vp->v_tag == VT_NFS)
1745 vnd->sc_maxactive = 2;
1746 else
1747 vnd->sc_maxactive = 8;
1748
1749 if (vnd->sc_maxactive < 1)
1750 vnd->sc_maxactive = 1;
1751 }
1752
1753 #if 0
1754 static void
1755 vndshutdown(void)
1756 {
1757 struct vnd_softc *vnd;
1758
1759 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++)
1760 if (vnd->sc_flags & VNF_INITED)
1761 vndclear(vnd);
1762 }
1763 #endif
1764
1765 static void
1766 vndclear(struct vnd_softc *vnd, int myminor)
1767 {
1768 struct vnode *vp = vnd->sc_vp;
1769 int fflags = FREAD;
1770 int bmaj, cmaj, i, mn;
1771 int s;
1772
1773 #ifdef DEBUG
1774 if (vnddebug & VDB_FOLLOW)
1775 printf("vndclear(%p): vp %p\n", vnd, vp);
1776 #endif
1777 /* locate the major number */
1778 bmaj = bdevsw_lookup_major(&vnd_bdevsw);
1779 cmaj = cdevsw_lookup_major(&vnd_cdevsw);
1780
1781 /* Nuke the vnodes for any open instances */
1782 for (i = 0; i < MAXPARTITIONS; i++) {
1783 mn = DISKMINOR(device_unit(vnd->sc_dev), i);
1784 vdevgone(bmaj, mn, mn, VBLK);
1785 if (mn != myminor) /* XXX avoid to kill own vnode */
1786 vdevgone(cmaj, mn, mn, VCHR);
1787 }
1788
1789 if ((vnd->sc_flags & VNF_READONLY) == 0)
1790 fflags |= FWRITE;
1791
1792 s = splbio();
1793 bufq_drain(vnd->sc_tab);
1794 splx(s);
1795
1796 vnd->sc_flags |= VNF_VUNCONF;
1797 wakeup(&vnd->sc_tab);
1798 while (vnd->sc_flags & VNF_KTHREAD)
1799 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0);
1800
1801 #ifdef VND_COMPRESSION
1802 /* free the compressed file buffers */
1803 if (vnd->sc_flags & VNF_COMP) {
1804 if (vnd->sc_comp_offsets) {
1805 free(vnd->sc_comp_offsets, M_DEVBUF);
1806 vnd->sc_comp_offsets = NULL;
1807 }
1808 if (vnd->sc_comp_buff) {
1809 free(vnd->sc_comp_buff, M_DEVBUF);
1810 vnd->sc_comp_buff = NULL;
1811 }
1812 if (vnd->sc_comp_decombuf) {
1813 free(vnd->sc_comp_decombuf, M_DEVBUF);
1814 vnd->sc_comp_decombuf = NULL;
1815 }
1816 }
1817 #endif /* VND_COMPRESSION */
1818 vnd->sc_flags &=
1819 ~(VNF_INITED | VNF_READONLY | VNF_KLABEL | VNF_VLABEL
1820 | VNF_VUNCONF | VNF_COMP | VNF_CLEARING);
1821 if (vp == NULL)
1822 panic("vndclear: null vp");
1823 (void) vn_close(vp, fflags, vnd->sc_cred);
1824 kauth_cred_free(vnd->sc_cred);
1825 vnd->sc_vp = NULL;
1826 vnd->sc_cred = NULL;
1827 vnd->sc_size = 0;
1828 }
1829
1830 static int
1831 vndsize(dev_t dev)
1832 {
1833 struct vnd_softc *sc;
1834 struct disklabel *lp;
1835 int part, unit, omask;
1836 int size;
1837
1838 unit = vndunit(dev);
1839 sc = device_lookup_private(&vnd_cd, unit);
1840 if (sc == NULL)
1841 return -1;
1842
1843 if ((sc->sc_flags & VNF_INITED) == 0)
1844 return -1;
1845
1846 part = DISKPART(dev);
1847 omask = sc->sc_dkdev.dk_openmask & (1 << part);
1848 lp = sc->sc_dkdev.dk_label;
1849
1850 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */
1851 return -1;
1852
1853 if (lp->d_partitions[part].p_fstype != FS_SWAP)
1854 size = -1;
1855 else
1856 size = lp->d_partitions[part].p_size *
1857 (lp->d_secsize / DEV_BSIZE);
1858
1859 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */
1860 return -1;
1861
1862 return size;
1863 }
1864
1865 static int
1866 vnddump(dev_t dev, daddr_t blkno, void *va,
1867 size_t size)
1868 {
1869
1870 /* Not implemented. */
1871 return ENXIO;
1872 }
1873
1874 static void
1875 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp)
1876 {
1877 struct vndgeom *vng = &sc->sc_geom;
1878 struct partition *pp;
1879 unsigned spb;
1880
1881 memset(lp, 0, sizeof(*lp));
1882
1883 spb = vng->vng_secsize / DEV_BSIZE;
1884 if (sc->sc_size / spb > UINT32_MAX)
1885 lp->d_secperunit = UINT32_MAX;
1886 else
1887 lp->d_secperunit = sc->sc_size / spb;
1888 lp->d_secsize = vng->vng_secsize;
1889 lp->d_nsectors = vng->vng_nsectors;
1890 lp->d_ntracks = vng->vng_ntracks;
1891 lp->d_ncylinders = vng->vng_ncylinders;
1892 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1893
1894 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename));
1895 lp->d_type = DKTYPE_VND;
1896 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1897 lp->d_rpm = 3600;
1898 lp->d_interleave = 1;
1899 lp->d_flags = 0;
1900
1901 pp = &lp->d_partitions[RAW_PART];
1902 pp->p_offset = 0;
1903 pp->p_size = lp->d_secperunit;
1904 pp->p_fstype = FS_UNUSED;
1905 lp->d_npartitions = RAW_PART + 1;
1906
1907 lp->d_magic = DISKMAGIC;
1908 lp->d_magic2 = DISKMAGIC;
1909 lp->d_checksum = dkcksum(lp);
1910 }
1911
1912 /*
1913 * Read the disklabel from a vnd. If one is not present, create a fake one.
1914 */
1915 static void
1916 vndgetdisklabel(dev_t dev, struct vnd_softc *sc)
1917 {
1918 const char *errstring;
1919 struct disklabel *lp = sc->sc_dkdev.dk_label;
1920 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel;
1921 int i;
1922
1923 memset(clp, 0, sizeof(*clp));
1924
1925 vndgetdefaultlabel(sc, lp);
1926
1927 /*
1928 * Call the generic disklabel extraction routine.
1929 */
1930 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp);
1931 if (errstring) {
1932 /*
1933 * Lack of disklabel is common, but we print the warning
1934 * anyway, since it might contain other useful information.
1935 */
1936 aprint_normal_dev(sc->sc_dev, "%s\n", errstring);
1937
1938 /*
1939 * For historical reasons, if there's no disklabel
1940 * present, all partitions must be FS_BSDFFS and
1941 * occupy the entire disk.
1942 */
1943 for (i = 0; i < MAXPARTITIONS; i++) {
1944 /*
1945 * Don't wipe out port specific hack (such as
1946 * dos partition hack of i386 port).
1947 */
1948 if (lp->d_partitions[i].p_size != 0)
1949 continue;
1950
1951 lp->d_partitions[i].p_size = lp->d_secperunit;
1952 lp->d_partitions[i].p_offset = 0;
1953 lp->d_partitions[i].p_fstype = FS_BSDFFS;
1954 }
1955
1956 strncpy(lp->d_packname, "default label",
1957 sizeof(lp->d_packname));
1958
1959 lp->d_npartitions = MAXPARTITIONS;
1960 lp->d_checksum = dkcksum(lp);
1961 }
1962 }
1963
1964 /*
1965 * Wait interruptibly for an exclusive lock.
1966 *
1967 * XXX
1968 * Several drivers do this; it should be abstracted and made MP-safe.
1969 */
1970 static int
1971 vndlock(struct vnd_softc *sc)
1972 {
1973 int error;
1974
1975 while ((sc->sc_flags & VNF_LOCKED) != 0) {
1976 sc->sc_flags |= VNF_WANTED;
1977 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0)
1978 return error;
1979 }
1980 sc->sc_flags |= VNF_LOCKED;
1981 return 0;
1982 }
1983
1984 /*
1985 * Unlock and wake up any waiters.
1986 */
1987 static void
1988 vndunlock(struct vnd_softc *sc)
1989 {
1990
1991 sc->sc_flags &= ~VNF_LOCKED;
1992 if ((sc->sc_flags & VNF_WANTED) != 0) {
1993 sc->sc_flags &= ~VNF_WANTED;
1994 wakeup(sc);
1995 }
1996 }
1997
1998 #ifdef VND_COMPRESSION
1999 /* compressed file read */
2000 static void
2001 compstrategy(struct buf *bp, off_t bn)
2002 {
2003 int error;
2004 int unit = vndunit(bp->b_dev);
2005 struct vnd_softc *vnd =
2006 device_lookup_private(&vnd_cd, unit);
2007 u_int32_t comp_block;
2008 struct uio auio;
2009 char *addr;
2010 int s;
2011
2012 /* set up constants for data move */
2013 auio.uio_rw = UIO_READ;
2014 UIO_SETUP_SYSSPACE(&auio);
2015
2016 /* read, and transfer the data */
2017 addr = bp->b_data;
2018 bp->b_resid = bp->b_bcount;
2019 s = splbio();
2020 while (bp->b_resid > 0) {
2021 unsigned length;
2022 size_t length_in_buffer;
2023 u_int32_t offset_in_buffer;
2024 struct iovec aiov;
2025
2026 /* calculate the compressed block number */
2027 comp_block = bn / (off_t)vnd->sc_comp_blksz;
2028
2029 /* check for good block number */
2030 if (comp_block >= vnd->sc_comp_numoffs) {
2031 bp->b_error = EINVAL;
2032 splx(s);
2033 return;
2034 }
2035
2036 /* read in the compressed block, if not in buffer */
2037 if (comp_block != vnd->sc_comp_buffblk) {
2038 length = vnd->sc_comp_offsets[comp_block + 1] -
2039 vnd->sc_comp_offsets[comp_block];
2040 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
2041 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff,
2042 length, vnd->sc_comp_offsets[comp_block],
2043 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vnd->sc_cred,
2044 NULL, NULL);
2045 if (error) {
2046 bp->b_error = error;
2047 VOP_UNLOCK(vnd->sc_vp);
2048 splx(s);
2049 return;
2050 }
2051 /* uncompress the buffer */
2052 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff;
2053 vnd->sc_comp_stream.avail_in = length;
2054 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf;
2055 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz;
2056 inflateReset(&vnd->sc_comp_stream);
2057 error = inflate(&vnd->sc_comp_stream, Z_FINISH);
2058 if (error != Z_STREAM_END) {
2059 if (vnd->sc_comp_stream.msg)
2060 aprint_normal_dev(vnd->sc_dev,
2061 "compressed file, %s\n",
2062 vnd->sc_comp_stream.msg);
2063 bp->b_error = EBADMSG;
2064 VOP_UNLOCK(vnd->sc_vp);
2065 splx(s);
2066 return;
2067 }
2068 vnd->sc_comp_buffblk = comp_block;
2069 VOP_UNLOCK(vnd->sc_vp);
2070 }
2071
2072 /* transfer the usable uncompressed data */
2073 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz;
2074 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer;
2075 if (length_in_buffer > bp->b_resid)
2076 length_in_buffer = bp->b_resid;
2077 auio.uio_iov = &aiov;
2078 auio.uio_iovcnt = 1;
2079 aiov.iov_base = addr;
2080 aiov.iov_len = length_in_buffer;
2081 auio.uio_resid = aiov.iov_len;
2082 auio.uio_offset = 0;
2083 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer,
2084 length_in_buffer, &auio);
2085 if (error) {
2086 bp->b_error = error;
2087 splx(s);
2088 return;
2089 }
2090
2091 bn += length_in_buffer;
2092 addr += length_in_buffer;
2093 bp->b_resid -= length_in_buffer;
2094 }
2095 splx(s);
2096 }
2097
2098 /* compression memory allocation routines */
2099 static void *
2100 vnd_alloc(void *aux, u_int items, u_int siz)
2101 {
2102 return malloc(items * siz, M_TEMP, M_NOWAIT);
2103 }
2104
2105 static void
2106 vnd_free(void *aux, void *ptr)
2107 {
2108 free(ptr, M_TEMP);
2109 }
2110 #endif /* VND_COMPRESSION */
2111
2112 static void
2113 vnd_set_geometry(struct vnd_softc *vnd)
2114 {
2115 struct disk_geom *dg = &vnd->sc_dkdev.dk_geom;
2116 unsigned spb;
2117
2118 memset(dg, 0, sizeof(*dg));
2119
2120 spb = vnd->sc_geom.vng_secsize / DEV_BSIZE;
2121 dg->dg_secperunit = vnd->sc_size / spb;
2122 dg->dg_secsize = vnd->sc_geom.vng_secsize;
2123 dg->dg_nsectors = vnd->sc_geom.vng_nsectors;
2124 dg->dg_ntracks = vnd->sc_geom.vng_ntracks;
2125 dg->dg_ncylinders = vnd->sc_geom.vng_ncylinders;
2126
2127 #ifdef DEBUG
2128 if (vnddebug & VDB_LABEL) {
2129 printf("dg->dg_secperunit: %" PRId64 "\n", dg->dg_secperunit);
2130 printf("dg->dg_ncylinders: %u\n", dg->dg_ncylinders);
2131 }
2132 #endif
2133 disk_set_info(vnd->sc_dev, &vnd->sc_dkdev, NULL);
2134 }
2135
2136 #ifdef VND_COMPRESSION
2137 #define VND_DEPENDS "zlib"
2138 #else
2139 #define VND_DEPENDS NULL
2140 #endif
2141
2142 MODULE(MODULE_CLASS_DRIVER, vnd, VND_DEPENDS);
2143
2144 #ifdef _MODULE
2145 int vnd_bmajor = -1, vnd_cmajor = -1;
2146
2147 CFDRIVER_DECL(vnd, DV_DISK, NULL);
2148 #endif
2149
2150 static int
2151 vnd_modcmd(modcmd_t cmd, void *arg)
2152 {
2153 int error = 0;
2154
2155 switch (cmd) {
2156 case MODULE_CMD_INIT:
2157 #ifdef _MODULE
2158 error = config_cfdriver_attach(&vnd_cd);
2159 if (error)
2160 break;
2161
2162 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca);
2163 if (error) {
2164 config_cfdriver_detach(&vnd_cd);
2165 #ifdef DIAGNOSTIC
2166 aprint_error("%s: unable to register cfattach for \n"
2167 "%s, error %d", __func__, vnd_cd.cd_name, error);
2168 #endif
2169 break;
2170 }
2171
2172 /*
2173 * Attach the {b,c}devsw's
2174 */
2175 error = devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor,
2176 &vnd_cdevsw, &vnd_cmajor);
2177 /*
2178 * If devsw_attach fails, remove from autoconf database
2179 */
2180 if (error) {
2181 config_cfattach_detach(vnd_cd.cd_name, &vnd_ca);
2182 config_cfdriver_detach(&vnd_cd);
2183 #ifdef DIAGNOSTIC
2184 aprint_error("%s: unable to attach %s devsw, "
2185 "error %d", __func__, vnd_cd.cd_name, error);
2186 #endif
2187 break;
2188 }
2189 #endif
2190 break;
2191
2192 case MODULE_CMD_FINI:
2193 #ifdef _MODULE
2194 /*
2195 * Remove {b,c}devsw's
2196 */
2197 devsw_detach(&vnd_bdevsw, &vnd_cdevsw);
2198
2199 /*
2200 * Now remove device from autoconf database
2201 */
2202 error = config_cfattach_detach(vnd_cd.cd_name, &vnd_ca);
2203 if (error) {
2204 (void)devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor,
2205 &vnd_cdevsw, &vnd_cmajor);
2206 #ifdef DIAGNOSTIC
2207 aprint_error("%s: failed to detach %s cfattach, "
2208 "error %d\n", __func__, vnd_cd.cd_name, error);
2209 #endif
2210 break;
2211 }
2212 error = config_cfdriver_detach(&vnd_cd);
2213 if (error) {
2214 (void)config_cfattach_attach(vnd_cd.cd_name, &vnd_ca);
2215 (void)devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor,
2216 &vnd_cdevsw, &vnd_cmajor);
2217 #ifdef DIAGNOSTIC
2218 aprint_error("%s: failed to detach %s cfdriver, "
2219 "error %d\n", __func__, vnd_cd.cd_name, error);
2220 break;
2221 #endif
2222 }
2223 #endif
2224 break;
2225
2226 case MODULE_CMD_STAT:
2227 return ENOTTY;
2228
2229 default:
2230 return ENOTTY;
2231 }
2232
2233 return error;
2234 }
2235