vnd.c revision 1.133 1 /* $NetBSD: vnd.c,v 1.133 2006/02/01 05:05:22 cube Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. Neither the name of the University nor the names of its contributors
56 * may be used to endorse or promote products derived from this software
57 * without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 * from: Utah $Hdr: vn.c 1.13 94/04/02$
72 *
73 * @(#)vn.c 8.9 (Berkeley) 5/14/95
74 */
75
76 /*
77 * Copyright (c) 1988 University of Utah.
78 *
79 * This code is derived from software contributed to Berkeley by
80 * the Systems Programming Group of the University of Utah Computer
81 * Science Department.
82 *
83 * Redistribution and use in source and binary forms, with or without
84 * modification, are permitted provided that the following conditions
85 * are met:
86 * 1. Redistributions of source code must retain the above copyright
87 * notice, this list of conditions and the following disclaimer.
88 * 2. Redistributions in binary form must reproduce the above copyright
89 * notice, this list of conditions and the following disclaimer in the
90 * documentation and/or other materials provided with the distribution.
91 * 3. All advertising materials mentioning features or use of this software
92 * must display the following acknowledgement:
93 * This product includes software developed by the University of
94 * California, Berkeley and its contributors.
95 * 4. Neither the name of the University nor the names of its contributors
96 * may be used to endorse or promote products derived from this software
97 * without specific prior written permission.
98 *
99 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
100 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
101 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
102 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
103 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
104 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
105 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
106 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
107 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
108 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
109 * SUCH DAMAGE.
110 *
111 * from: Utah $Hdr: vn.c 1.13 94/04/02$
112 *
113 * @(#)vn.c 8.9 (Berkeley) 5/14/95
114 */
115
116 /*
117 * Vnode disk driver.
118 *
119 * Block/character interface to a vnode. Allows one to treat a file
120 * as a disk (e.g. build a filesystem in it, mount it, etc.).
121 *
122 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode
123 * instead of a simple VOP_RDWR. We do this to avoid distorting the
124 * local buffer cache.
125 *
126 * NOTE 2: There is a security issue involved with this driver.
127 * Once mounted all access to the contents of the "mapped" file via
128 * the special file is controlled by the permissions on the special
129 * file, the protection of the mapped file is ignored (effectively,
130 * by using root credentials in all transactions).
131 *
132 * NOTE 3: Doesn't interact with leases, should it?
133 */
134
135 #include <sys/cdefs.h>
136 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.133 2006/02/01 05:05:22 cube Exp $");
137
138 #if defined(_KERNEL_OPT)
139 #include "fs_nfs.h"
140 #include "opt_vnd.h"
141 #endif
142
143 #include <sys/param.h>
144 #include <sys/systm.h>
145 #include <sys/namei.h>
146 #include <sys/proc.h>
147 #include <sys/kthread.h>
148 #include <sys/errno.h>
149 #include <sys/buf.h>
150 #include <sys/bufq.h>
151 #include <sys/malloc.h>
152 #include <sys/ioctl.h>
153 #include <sys/disklabel.h>
154 #include <sys/device.h>
155 #include <sys/disk.h>
156 #include <sys/stat.h>
157 #include <sys/mount.h>
158 #include <sys/vnode.h>
159 #include <sys/file.h>
160 #include <sys/uio.h>
161 #include <sys/conf.h>
162 #include <net/zlib.h>
163
164 #include <miscfs/specfs/specdev.h>
165
166 #include <dev/vndvar.h>
167
168 #if defined(VNDDEBUG) && !defined(DEBUG)
169 #define DEBUG
170 #endif
171
172 #ifdef DEBUG
173 int dovndcluster = 1;
174 #define VDB_FOLLOW 0x01
175 #define VDB_INIT 0x02
176 #define VDB_IO 0x04
177 #define VDB_LABEL 0x08
178 int vnddebug = 0x00;
179 #endif
180
181 #define vndunit(x) DISKUNIT(x)
182
183 struct vndxfer {
184 struct buf vx_buf;
185 struct vnd_softc *vx_vnd;
186 };
187 #define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp)
188
189 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK)
190 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx))
191
192 #define VNDLABELDEV(dev) \
193 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART))
194
195 /* called by main() at boot time (XXX: and the LKM driver) */
196 void vndattach(int);
197
198 static void vndclear(struct vnd_softc *, int);
199 static int vndsetcred(struct vnd_softc *, struct ucred *);
200 static void vndthrottle(struct vnd_softc *, struct vnode *);
201 static void vndiodone(struct buf *);
202 #if 0
203 static void vndshutdown(void);
204 #endif
205
206 static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *);
207 static void vndgetdisklabel(dev_t, struct vnd_softc *);
208
209 static int vndlock(struct vnd_softc *);
210 static void vndunlock(struct vnd_softc *);
211 #ifdef VND_COMPRESSION
212 static void compstrategy(struct buf *, off_t);
213 static void *vnd_alloc(void *, u_int, u_int);
214 static void vnd_free(void *, void *);
215 #endif /* VND_COMPRESSION */
216
217 static void vndthread(void *);
218
219 static dev_type_open(vndopen);
220 static dev_type_close(vndclose);
221 static dev_type_read(vndread);
222 static dev_type_write(vndwrite);
223 static dev_type_ioctl(vndioctl);
224 static dev_type_strategy(vndstrategy);
225 static dev_type_dump(vnddump);
226 static dev_type_size(vndsize);
227
228 const struct bdevsw vnd_bdevsw = {
229 vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK
230 };
231
232 const struct cdevsw vnd_cdevsw = {
233 vndopen, vndclose, vndread, vndwrite, vndioctl,
234 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
235 };
236
237 static int vnd_match(struct device *, struct cfdata *, void *);
238 static void vnd_attach(struct device *, struct device *, void *);
239 static int vnd_detach(struct device *, int);
240
241 CFATTACH_DECL(vnd, sizeof(struct vnd_softc),
242 vnd_match, vnd_attach, vnd_detach, NULL);
243 extern struct cfdriver vnd_cd;
244
245 static struct vnd_softc *vnd_spawn(int);
246 int vnd_destroy(struct device *);
247
248 void
249 vndattach(int num)
250 {
251 int error;
252
253 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca);
254 if (error)
255 aprint_error("%s: unable to register cfattach\n",
256 vnd_cd.cd_name);
257 }
258
259 static int
260 vnd_match(struct device *self, struct cfdata *cfdata, void *aux)
261 {
262 return 1;
263 }
264
265 static void
266 vnd_attach(struct device *parent, struct device *self, void *aux)
267 {
268 struct vnd_softc *sc = (struct vnd_softc *)self;
269
270 sc->sc_comp_offsets = NULL;
271 sc->sc_comp_buff = NULL;
272 sc->sc_comp_decombuf = NULL;
273 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK);
274 pseudo_disk_init(&sc->sc_dkdev);
275
276 aprint_normal("%s: vnode disk driver\n", self->dv_xname);
277 }
278
279 static int
280 vnd_detach(struct device *self, int flags)
281 {
282 struct vnd_softc *sc = (struct vnd_softc *)self;
283 if (sc->sc_flags & VNF_INITED)
284 return EBUSY;
285
286 bufq_free(sc->sc_tab);
287
288 return 0;
289 }
290
291 static struct vnd_softc *
292 vnd_spawn(int unit)
293 {
294 struct cfdata *cf;
295
296 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
297 cf->cf_name = vnd_cd.cd_name;
298 cf->cf_atname = vnd_cd.cd_name;
299 cf->cf_unit = unit;
300 cf->cf_fstate = FSTATE_STAR;
301
302 return (struct vnd_softc *)config_attach_pseudo(cf);
303 }
304
305 int
306 vnd_destroy(struct device *dev)
307 {
308 int error;
309
310 error = config_detach(dev, 0);
311 if (error)
312 return error;
313 free(dev->dv_cfdata, M_DEVBUF);
314 return 0;
315 }
316
317 static int
318 vndopen(dev_t dev, int flags, int mode, struct lwp *l)
319 {
320 int unit = vndunit(dev);
321 struct vnd_softc *sc;
322 int error = 0, part, pmask;
323 struct disklabel *lp;
324
325 #ifdef DEBUG
326 if (vnddebug & VDB_FOLLOW)
327 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l);
328 #endif
329 sc = device_lookup(&vnd_cd, unit);
330 if (sc == NULL) {
331 sc = vnd_spawn(unit);
332 if (sc == NULL)
333 return ENOMEM;
334 }
335
336 if ((error = vndlock(sc)) != 0)
337 return (error);
338
339 lp = sc->sc_dkdev.dk_label;
340
341 part = DISKPART(dev);
342 pmask = (1 << part);
343
344 /*
345 * If we're initialized, check to see if there are any other
346 * open partitions. If not, then it's safe to update the
347 * in-core disklabel. Only read the disklabel if it is
348 * not already valid.
349 */
350 if ((sc->sc_flags & (VNF_INITED|VNF_VLABEL)) == VNF_INITED &&
351 sc->sc_dkdev.dk_openmask == 0)
352 vndgetdisklabel(dev, sc);
353
354 /* Check that the partitions exists. */
355 if (part != RAW_PART) {
356 if (((sc->sc_flags & VNF_INITED) == 0) ||
357 ((part >= lp->d_npartitions) ||
358 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
359 error = ENXIO;
360 goto done;
361 }
362 }
363
364 /* Prevent our unit from being unconfigured while open. */
365 switch (mode) {
366 case S_IFCHR:
367 sc->sc_dkdev.dk_copenmask |= pmask;
368 break;
369
370 case S_IFBLK:
371 sc->sc_dkdev.dk_bopenmask |= pmask;
372 break;
373 }
374 sc->sc_dkdev.dk_openmask =
375 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
376
377 done:
378 vndunlock(sc);
379 return (error);
380 }
381
382 static int
383 vndclose(dev_t dev, int flags, int mode, struct lwp *l)
384 {
385 int unit = vndunit(dev);
386 struct vnd_softc *sc;
387 int error = 0, part;
388
389 #ifdef DEBUG
390 if (vnddebug & VDB_FOLLOW)
391 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l);
392 #endif
393 sc = device_lookup(&vnd_cd, unit);
394 if (sc == NULL)
395 return ENXIO;
396
397 if ((error = vndlock(sc)) != 0)
398 return (error);
399
400 part = DISKPART(dev);
401
402 /* ...that much closer to allowing unconfiguration... */
403 switch (mode) {
404 case S_IFCHR:
405 sc->sc_dkdev.dk_copenmask &= ~(1 << part);
406 break;
407
408 case S_IFBLK:
409 sc->sc_dkdev.dk_bopenmask &= ~(1 << part);
410 break;
411 }
412 sc->sc_dkdev.dk_openmask =
413 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
414
415 vndunlock(sc);
416 return (0);
417 }
418
419 /*
420 * Qeue the request, and wakeup the kernel thread to handle it.
421 */
422 static void
423 vndstrategy(struct buf *bp)
424 {
425 int unit = vndunit(bp->b_dev);
426 struct vnd_softc *vnd =
427 (struct vnd_softc *)device_lookup(&vnd_cd, unit);
428 struct disklabel *lp = vnd->sc_dkdev.dk_label;
429 daddr_t blkno;
430 int s = splbio();
431
432 bp->b_resid = bp->b_bcount;
433
434 if ((vnd->sc_flags & VNF_INITED) == 0) {
435 bp->b_error = ENXIO;
436 bp->b_flags |= B_ERROR;
437 goto done;
438 }
439
440 /*
441 * The transfer must be a whole number of blocks.
442 */
443 if ((bp->b_bcount % lp->d_secsize) != 0) {
444 bp->b_error = EINVAL;
445 bp->b_flags |= B_ERROR;
446 goto done;
447 }
448
449 /*
450 * check if we're read-only.
451 */
452 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) {
453 bp->b_error = EACCES;
454 bp->b_flags |= B_ERROR;
455 goto done;
456 }
457
458 /*
459 * Do bounds checking and adjust transfer. If there's an error,
460 * the bounds check will flag that for us.
461 */
462 if (DISKPART(bp->b_dev) != RAW_PART) {
463 if (bounds_check_with_label(&vnd->sc_dkdev,
464 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0)
465 goto done;
466 }
467
468 /* If it's a nil transfer, wake up the top half now. */
469 if (bp->b_bcount == 0)
470 goto done;
471
472 /*
473 * Put the block number in terms of the logical blocksize
474 * of the "device".
475 */
476
477 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE);
478
479 /*
480 * Translate the partition-relative block number to an absolute.
481 */
482 if (DISKPART(bp->b_dev) != RAW_PART) {
483 struct partition *pp;
484
485 pp = &vnd->sc_dkdev.dk_label->d_partitions[
486 DISKPART(bp->b_dev)];
487 blkno += pp->p_offset;
488 }
489 bp->b_rawblkno = blkno;
490
491 #ifdef DEBUG
492 if (vnddebug & VDB_FOLLOW)
493 printf("vndstrategy(%p): unit %d\n", bp, unit);
494 #endif
495 BUFQ_PUT(vnd->sc_tab, bp);
496 wakeup(&vnd->sc_tab);
497 splx(s);
498 return;
499 done:
500 biodone(bp);
501 splx(s);
502 }
503
504 static void
505 vndthread(void *arg)
506 {
507 struct vnd_softc *vnd = arg;
508 struct mount *mp;
509 int s, bsize;
510 int sz, error;
511 struct disklabel *lp;
512
513 s = splbio();
514 vnd->sc_flags |= VNF_KTHREAD;
515 wakeup(&vnd->sc_kthread);
516
517 /*
518 * Dequeue requests, break them into bsize pieces and submit using
519 * VOP_BMAP/VOP_STRATEGY.
520 */
521 while ((vnd->sc_flags & VNF_VUNCONF) == 0) {
522 struct vndxfer *vnx;
523 off_t offset;
524 int resid;
525 int skipped = 0;
526 off_t bn;
527 int flags;
528 struct buf *obp;
529 struct buf *bp;
530
531 obp = BUFQ_GET(vnd->sc_tab);
532 if (obp == NULL) {
533 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0);
534 continue;
535 };
536 splx(s);
537 flags = obp->b_flags;
538 #ifdef DEBUG
539 if (vnddebug & VDB_FOLLOW)
540 printf("vndthread(%p\n", obp);
541 #endif
542 lp = vnd->sc_dkdev.dk_label;
543
544 /* convert to a byte offset within the file. */
545 bn = obp->b_rawblkno * lp->d_secsize;
546
547 if (vnd->sc_vp->v_mount == NULL) {
548 obp->b_error = ENXIO;
549 obp->b_flags |= B_ERROR;
550 goto done;
551 }
552 #ifdef VND_COMPRESSION
553 /* handle a compressed read */
554 if ((flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) {
555 compstrategy(obp, bn);
556 goto done;
557 }
558 #endif /* VND_COMPRESSION */
559
560 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize;
561
562 /*
563 * Allocate a header for this transfer and link it to the
564 * buffer
565 */
566 s = splbio();
567 vnx = VND_GETXFER(vnd);
568 splx(s);
569 vnx->vx_vnd = vnd;
570
571 bp = &vnx->vx_buf;
572 BUF_INIT(bp);
573 bp->b_flags = (obp->b_flags & B_READ) | B_CALL;
574 bp->b_iodone = vndiodone;
575 bp->b_private = obp;
576 bp->b_vp = NULL;
577 bp->b_data = obp->b_data;
578 bp->b_bcount = bp->b_resid = obp->b_bcount;
579 BIO_COPYPRIO(bp, obp);
580
581 s = splbio();
582 while (vnd->sc_active >= vnd->sc_maxactive) {
583 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0);
584 }
585 vnd->sc_active++;
586 splx(s);
587
588 if ((flags & B_READ) == 0)
589 vn_start_write(vnd->sc_vp, &mp, V_WAIT);
590
591 /* Instrumentation. */
592 disk_busy(&vnd->sc_dkdev);
593
594 /*
595 * Feed requests sequentially.
596 * We do it this way to keep from flooding NFS servers if we
597 * are connected to an NFS file. This places the burden on
598 * the client rather than the server.
599 */
600 error = 0;
601 for (offset = 0, resid = bp->b_resid; resid;
602 resid -= sz, offset += sz) {
603 struct buf *nbp;
604 struct vnode *vp;
605 daddr_t nbn;
606 int off, nra;
607
608 nra = 0;
609 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
610 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra);
611 VOP_UNLOCK(vnd->sc_vp, 0);
612
613 if (error == 0 && (long)nbn == -1)
614 error = EIO;
615
616 /*
617 * If there was an error or a hole in the file...punt.
618 * Note that we may have to wait for any operations
619 * that we have already fired off before releasing
620 * the buffer.
621 *
622 * XXX we could deal with holes here but it would be
623 * a hassle (in the write case).
624 */
625 if (error) {
626 skipped += resid;
627 break;
628 }
629
630 #ifdef DEBUG
631 if (!dovndcluster)
632 nra = 0;
633 #endif
634
635 if ((off = bn % bsize) != 0)
636 sz = bsize - off;
637 else
638 sz = (1 + nra) * bsize;
639 if (resid < sz)
640 sz = resid;
641 #ifdef DEBUG
642 if (vnddebug & VDB_IO)
643 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64
644 " sz 0x%x\n",
645 vnd->sc_vp, vp, (long long)bn, nbn, sz);
646 #endif
647
648 nbp = getiobuf();
649 nestiobuf_setup(bp, nbp, offset, sz);
650 nbp->b_blkno = nbn + btodb(off);
651
652 #if 0 /* XXX #ifdef DEBUG */
653 if (vnddebug & VDB_IO)
654 printf("vndstart(%ld): bp %p vp %p blkno "
655 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n",
656 (long) (vnd-vnd_softc), &nbp->vb_buf,
657 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno,
658 nbp->vb_buf.b_flags, nbp->vb_buf.b_data,
659 nbp->vb_buf.b_bcount);
660 #endif
661 VOP_STRATEGY(vp, nbp);
662 bn += sz;
663 }
664 nestiobuf_done(bp, skipped, error);
665
666 if ((flags & B_READ) == 0)
667 vn_finished_write(mp, 0);
668
669 s = splbio();
670 continue;
671 done:
672 biodone(obp);
673 s = splbio();
674 }
675
676 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF);
677 wakeup(&vnd->sc_kthread);
678 splx(s);
679 kthread_exit(0);
680 }
681
682 static void
683 vndiodone(struct buf *bp)
684 {
685 struct vndxfer *vnx = VND_BUFTOXFER(bp);
686 struct vnd_softc *vnd = vnx->vx_vnd;
687 struct buf *obp = bp->b_private;
688
689 KASSERT(&vnx->vx_buf == bp);
690 KASSERT(vnd->sc_active > 0);
691 #ifdef DEBUG
692 if (vnddebug & VDB_IO) {
693 printf("vndiodone1: bp %p iodone: error %d\n",
694 bp, (bp->b_flags & B_ERROR) != 0 ? bp->b_error : 0);
695 }
696 #endif
697 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid,
698 (bp->b_flags & B_READ));
699 vnd->sc_active--;
700 if (vnd->sc_active == 0) {
701 wakeup(&vnd->sc_tab);
702 }
703 obp->b_flags |= bp->b_flags & B_ERROR;
704 obp->b_error = bp->b_error;
705 obp->b_resid = bp->b_resid;
706 VND_PUTXFER(vnd, vnx);
707 biodone(obp);
708 }
709
710 /* ARGSUSED */
711 static int
712 vndread(dev_t dev, struct uio *uio, int flags)
713 {
714 int unit = vndunit(dev);
715 struct vnd_softc *sc;
716
717 #ifdef DEBUG
718 if (vnddebug & VDB_FOLLOW)
719 printf("vndread(0x%x, %p)\n", dev, uio);
720 #endif
721
722 sc = device_lookup(&vnd_cd, unit);
723 if (sc == NULL)
724 return ENXIO;
725
726 if ((sc->sc_flags & VNF_INITED) == 0)
727 return (ENXIO);
728
729 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio));
730 }
731
732 /* ARGSUSED */
733 static int
734 vndwrite(dev_t dev, struct uio *uio, int flags)
735 {
736 int unit = vndunit(dev);
737 struct vnd_softc *sc;
738
739 #ifdef DEBUG
740 if (vnddebug & VDB_FOLLOW)
741 printf("vndwrite(0x%x, %p)\n", dev, uio);
742 #endif
743
744 sc = device_lookup(&vnd_cd, unit);
745 if (sc == NULL)
746 return ENXIO;
747
748 if ((sc->sc_flags & VNF_INITED) == 0)
749 return (ENXIO);
750
751 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio));
752 }
753
754 static int
755 vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va)
756 {
757 struct vnd_softc *vnd;
758
759 if (*un == -1)
760 *un = unit;
761 if (*un < 0)
762 return EINVAL;
763
764 vnd = device_lookup(&vnd_cd, *un);
765 if (vnd == NULL)
766 /*
767 * vnconfig(8) has weird expectations to list the
768 * devices.
769 * It will stop as soon as it gets ENXIO, but
770 * will continue if it gets something else...
771 */
772 return (*un >= vnd_cd.cd_ndevs) ? ENXIO : -1;
773
774 if ((vnd->sc_flags & VNF_INITED) == 0)
775 return -1;
776
777 return VOP_GETATTR(vnd->sc_vp, va, l->l_proc->p_ucred, l);
778 }
779
780 /* ARGSUSED */
781 static int
782 vndioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l)
783 {
784 int unit = vndunit(dev);
785 struct vnd_softc *vnd;
786 struct vnd_ioctl *vio;
787 struct vattr vattr;
788 struct nameidata nd;
789 int error, part, pmask;
790 size_t geomsize;
791 struct proc *p = (l != NULL) ? l->l_proc : NULL;
792 int fflags;
793 #ifdef __HAVE_OLD_DISKLABEL
794 struct disklabel newlabel;
795 #endif
796
797 #ifdef DEBUG
798 if (vnddebug & VDB_FOLLOW)
799 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n",
800 dev, cmd, data, flag, p, unit);
801 #endif
802 vnd = device_lookup(&vnd_cd, unit);
803 if (vnd == NULL &&
804 #ifdef COMPAT_30
805 cmd != VNDIOOCGET &&
806 #endif
807 cmd != VNDIOCGET)
808 return ENXIO;
809 vio = (struct vnd_ioctl *)data;
810
811 /* Must be open for writes for these commands... */
812 switch (cmd) {
813 case VNDIOCSET:
814 case VNDIOCCLR:
815 case DIOCSDINFO:
816 case DIOCWDINFO:
817 #ifdef __HAVE_OLD_DISKLABEL
818 case ODIOCSDINFO:
819 case ODIOCWDINFO:
820 #endif
821 case DIOCKLABEL:
822 case DIOCWLABEL:
823 if ((flag & FWRITE) == 0)
824 return (EBADF);
825 }
826
827 /* Must be initialized for these... */
828 switch (cmd) {
829 case VNDIOCCLR:
830 case DIOCGDINFO:
831 case DIOCSDINFO:
832 case DIOCWDINFO:
833 case DIOCGPART:
834 case DIOCKLABEL:
835 case DIOCWLABEL:
836 case DIOCGDEFLABEL:
837 #ifdef __HAVE_OLD_DISKLABEL
838 case ODIOCGDINFO:
839 case ODIOCSDINFO:
840 case ODIOCWDINFO:
841 case ODIOCGDEFLABEL:
842 #endif
843 if ((vnd->sc_flags & VNF_INITED) == 0)
844 return (ENXIO);
845 }
846
847 switch (cmd) {
848 case VNDIOCSET:
849 if (vnd->sc_flags & VNF_INITED)
850 return (EBUSY);
851
852 if ((error = vndlock(vnd)) != 0)
853 return (error);
854
855 fflags = FREAD;
856 if ((vio->vnd_flags & VNDIOF_READONLY) == 0)
857 fflags |= FWRITE;
858 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, l);
859 if ((error = vn_open(&nd, fflags, 0)) != 0)
860 goto unlock_and_exit;
861 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_proc->p_ucred, l);
862 if (!error && nd.ni_vp->v_type != VREG)
863 error = EOPNOTSUPP;
864 if (error) {
865 VOP_UNLOCK(nd.ni_vp, 0);
866 goto close_and_exit;
867 }
868
869 /* If using a compressed file, initialize its info */
870 /* (or abort with an error if kernel has no compression) */
871 if (vio->vnd_flags & VNF_COMP) {
872 #ifdef VND_COMPRESSION
873 struct vnd_comp_header *ch;
874 int i;
875 u_int32_t comp_size;
876 u_int32_t comp_maxsize;
877
878 /* allocate space for compresed file header */
879 ch = malloc(sizeof(struct vnd_comp_header),
880 M_TEMP, M_WAITOK);
881
882 /* read compressed file header */
883 error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)ch,
884 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE,
885 IO_UNIT|IO_NODELOCKED, p->p_ucred, NULL, NULL);
886 if(error) {
887 free(ch, M_TEMP);
888 VOP_UNLOCK(nd.ni_vp, 0);
889 goto close_and_exit;
890 }
891
892 /* save some header info */
893 vnd->sc_comp_blksz = ntohl(ch->block_size);
894 /* note last offset is the file byte size */
895 vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1;
896 free(ch, M_TEMP);
897 if(vnd->sc_comp_blksz % DEV_BSIZE !=0) {
898 VOP_UNLOCK(nd.ni_vp, 0);
899 error = EINVAL;
900 goto close_and_exit;
901 }
902 if(sizeof(struct vnd_comp_header) +
903 sizeof(u_int64_t) * vnd->sc_comp_numoffs >
904 vattr.va_size) {
905 VOP_UNLOCK(nd.ni_vp, 0);
906 error = EINVAL;
907 goto close_and_exit;
908 }
909
910 /* set decompressed file size */
911 vattr.va_size =
912 (vnd->sc_comp_numoffs - 1) * vnd->sc_comp_blksz;
913
914 /* allocate space for all the compressed offsets */
915 vnd->sc_comp_offsets =
916 malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs,
917 M_DEVBUF, M_WAITOK);
918
919 /* read in the offsets */
920 error = vn_rdwr(UIO_READ, nd.ni_vp,
921 (caddr_t)vnd->sc_comp_offsets,
922 sizeof(u_int64_t) * vnd->sc_comp_numoffs,
923 sizeof(struct vnd_comp_header), UIO_SYSSPACE,
924 IO_UNIT|IO_NODELOCKED, p->p_ucred, NULL, NULL);
925 if(error) {
926 VOP_UNLOCK(nd.ni_vp, 0);
927 goto close_and_exit;
928 }
929 /*
930 * find largest block size (used for allocation limit).
931 * Also convert offset to native byte order.
932 */
933 comp_maxsize = 0;
934 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) {
935 vnd->sc_comp_offsets[i] =
936 be64toh(vnd->sc_comp_offsets[i]);
937 comp_size = be64toh(vnd->sc_comp_offsets[i + 1])
938 - vnd->sc_comp_offsets[i];
939 if (comp_size > comp_maxsize)
940 comp_maxsize = comp_size;
941 }
942 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] =
943 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]);
944
945 /* create compressed data buffer */
946 vnd->sc_comp_buff = malloc(comp_maxsize,
947 M_DEVBUF, M_WAITOK);
948
949 /* create decompressed buffer */
950 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz,
951 M_DEVBUF, M_WAITOK);
952 vnd->sc_comp_buffblk = -1;
953
954 /* Initialize decompress stream */
955 bzero(&vnd->sc_comp_stream, sizeof(z_stream));
956 vnd->sc_comp_stream.zalloc = vnd_alloc;
957 vnd->sc_comp_stream.zfree = vnd_free;
958 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS);
959 if(error) {
960 if(vnd->sc_comp_stream.msg)
961 printf("vnd%d: compressed file, %s\n",
962 unit, vnd->sc_comp_stream.msg);
963 VOP_UNLOCK(nd.ni_vp, 0);
964 error = EINVAL;
965 goto close_and_exit;
966 }
967
968 vnd->sc_flags |= VNF_COMP | VNF_READONLY;
969 #else /* !VND_COMPRESSION */
970 VOP_UNLOCK(nd.ni_vp, 0);
971 error = EOPNOTSUPP;
972 goto close_and_exit;
973 #endif /* VND_COMPRESSION */
974 }
975
976 VOP_UNLOCK(nd.ni_vp, 0);
977 vnd->sc_vp = nd.ni_vp;
978 vnd->sc_size = btodb(vattr.va_size); /* note truncation */
979
980 /*
981 * Use pseudo-geometry specified. If none was provided,
982 * use "standard" Adaptec fictitious geometry.
983 */
984 if (vio->vnd_flags & VNDIOF_HASGEOM) {
985
986 memcpy(&vnd->sc_geom, &vio->vnd_geom,
987 sizeof(vio->vnd_geom));
988
989 /*
990 * Sanity-check the sector size.
991 * XXX Don't allow secsize < DEV_BSIZE. Should
992 * XXX we?
993 */
994 if (vnd->sc_geom.vng_secsize < DEV_BSIZE ||
995 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0 ||
996 vnd->sc_geom.vng_ncylinders == 0 ||
997 (vnd->sc_geom.vng_ntracks *
998 vnd->sc_geom.vng_nsectors) == 0) {
999 error = EINVAL;
1000 goto close_and_exit;
1001 }
1002
1003 /*
1004 * Compute the size (in DEV_BSIZE blocks) specified
1005 * by the geometry.
1006 */
1007 geomsize = (vnd->sc_geom.vng_nsectors *
1008 vnd->sc_geom.vng_ntracks *
1009 vnd->sc_geom.vng_ncylinders) *
1010 (vnd->sc_geom.vng_secsize / DEV_BSIZE);
1011
1012 /*
1013 * Sanity-check the size against the specified
1014 * geometry.
1015 */
1016 if (vnd->sc_size < geomsize) {
1017 error = EINVAL;
1018 goto close_and_exit;
1019 }
1020 } else if (vnd->sc_size >= (32 * 64)) {
1021 /*
1022 * Size must be at least 2048 DEV_BSIZE blocks
1023 * (1M) in order to use this geometry.
1024 */
1025 vnd->sc_geom.vng_secsize = DEV_BSIZE;
1026 vnd->sc_geom.vng_nsectors = 32;
1027 vnd->sc_geom.vng_ntracks = 64;
1028 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32);
1029 } else {
1030 vnd->sc_geom.vng_secsize = DEV_BSIZE;
1031 vnd->sc_geom.vng_nsectors = 1;
1032 vnd->sc_geom.vng_ntracks = 1;
1033 vnd->sc_geom.vng_ncylinders = vnd->sc_size;
1034 }
1035
1036 if (vio->vnd_flags & VNDIOF_READONLY) {
1037 vnd->sc_flags |= VNF_READONLY;
1038 }
1039
1040 if ((error = vndsetcred(vnd, p->p_ucred)) != 0)
1041 goto close_and_exit;
1042
1043 vndthrottle(vnd, vnd->sc_vp);
1044 vio->vnd_size = dbtob(vnd->sc_size);
1045 vnd->sc_flags |= VNF_INITED;
1046
1047 /* create the kernel thread, wait for it to be up */
1048 error = kthread_create1(vndthread, vnd, &vnd->sc_kthread,
1049 vnd->sc_dev.dv_xname);
1050 if (error)
1051 goto close_and_exit;
1052 while ((vnd->sc_flags & VNF_KTHREAD) == 0) {
1053 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0);
1054 }
1055 #ifdef DEBUG
1056 if (vnddebug & VDB_INIT)
1057 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n",
1058 vnd->sc_vp, (unsigned long) vnd->sc_size,
1059 vnd->sc_geom.vng_secsize,
1060 vnd->sc_geom.vng_nsectors,
1061 vnd->sc_geom.vng_ntracks,
1062 vnd->sc_geom.vng_ncylinders);
1063 #endif
1064
1065 /* Attach the disk. */
1066 vnd->sc_dkdev.dk_name = vnd->sc_dev.dv_xname;
1067 pseudo_disk_attach(&vnd->sc_dkdev);
1068
1069 /* Initialize the xfer and buffer pools. */
1070 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0,
1071 0, 0, "vndxpl", NULL);
1072
1073 /* Try and read the disklabel. */
1074 vndgetdisklabel(dev, vnd);
1075
1076 vndunlock(vnd);
1077
1078 break;
1079
1080 close_and_exit:
1081 (void) vn_close(nd.ni_vp, fflags, p->p_ucred, l);
1082 unlock_and_exit:
1083 #ifdef VND_COMPRESSION
1084 /* free any allocated memory (for compressed file) */
1085 if(vnd->sc_comp_offsets) {
1086 free(vnd->sc_comp_offsets, M_DEVBUF);
1087 vnd->sc_comp_offsets = NULL;
1088 }
1089 if(vnd->sc_comp_buff) {
1090 free(vnd->sc_comp_buff, M_DEVBUF);
1091 vnd->sc_comp_buff = NULL;
1092 }
1093 if(vnd->sc_comp_decombuf) {
1094 free(vnd->sc_comp_decombuf, M_DEVBUF);
1095 vnd->sc_comp_decombuf = NULL;
1096 }
1097 #endif /* VND_COMPRESSION */
1098 vndunlock(vnd);
1099 return (error);
1100
1101 case VNDIOCCLR:
1102 if ((error = vndlock(vnd)) != 0)
1103 return (error);
1104
1105 /*
1106 * Don't unconfigure if any other partitions are open
1107 * or if both the character and block flavors of this
1108 * partition are open.
1109 */
1110 part = DISKPART(dev);
1111 pmask = (1 << part);
1112 if (((vnd->sc_dkdev.dk_openmask & ~pmask) ||
1113 ((vnd->sc_dkdev.dk_bopenmask & pmask) &&
1114 (vnd->sc_dkdev.dk_copenmask & pmask))) &&
1115 !(vio->vnd_flags & VNDIOF_FORCE)) {
1116 vndunlock(vnd);
1117 return (EBUSY);
1118 }
1119
1120 /*
1121 * XXX vndclear() might call vndclose() implicitely;
1122 * release lock to avoid recursion
1123 */
1124 vndunlock(vnd);
1125 vndclear(vnd, minor(dev));
1126 #ifdef DEBUG
1127 if (vnddebug & VDB_INIT)
1128 printf("vndioctl: CLRed\n");
1129 #endif
1130
1131 /* Destroy the xfer and buffer pools. */
1132 pool_destroy(&vnd->sc_vxpool);
1133
1134 /* Detatch the disk. */
1135 pseudo_disk_detach(&vnd->sc_dkdev);
1136 if ((error = vnd_destroy((struct device *)vnd)) != 0) {
1137 aprint_error("%s: unable to detach instance\n",
1138 vnd->sc_dev.dv_xname);
1139 return error;
1140 }
1141
1142 break;
1143
1144 #ifdef COMPAT_30
1145 case VNDIOOCGET: {
1146 struct vnd_ouser *vnu;
1147 struct vattr va;
1148 vnu = (struct vnd_ouser *)data;
1149 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) {
1150 case 0:
1151 vnu->vnu_dev = va.va_fsid;
1152 vnu->vnu_ino = va.va_fileid;
1153 break;
1154 case -1:
1155 /* unused is not an error */
1156 vnu->vnu_dev = 0;
1157 vnu->vnu_ino = 0;
1158 break;
1159 default:
1160 return error;
1161 }
1162 break;
1163 }
1164 #endif
1165 case VNDIOCGET: {
1166 struct vnd_user *vnu;
1167 struct vattr va;
1168 vnu = (struct vnd_user *)data;
1169 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) {
1170 case 0:
1171 vnu->vnu_dev = va.va_fsid;
1172 vnu->vnu_ino = va.va_fileid;
1173 break;
1174 case -1:
1175 /* unused is not an error */
1176 vnu->vnu_dev = 0;
1177 vnu->vnu_ino = 0;
1178 break;
1179 default:
1180 return error;
1181 }
1182 break;
1183 }
1184
1185 case DIOCGDINFO:
1186 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label);
1187 break;
1188
1189 #ifdef __HAVE_OLD_DISKLABEL
1190 case ODIOCGDINFO:
1191 newlabel = *(vnd->sc_dkdev.dk_label);
1192 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1193 return ENOTTY;
1194 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1195 break;
1196 #endif
1197
1198 case DIOCGPART:
1199 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label;
1200 ((struct partinfo *)data)->part =
1201 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1202 break;
1203
1204 case DIOCWDINFO:
1205 case DIOCSDINFO:
1206 #ifdef __HAVE_OLD_DISKLABEL
1207 case ODIOCWDINFO:
1208 case ODIOCSDINFO:
1209 #endif
1210 {
1211 struct disklabel *lp;
1212
1213 if ((error = vndlock(vnd)) != 0)
1214 return (error);
1215
1216 vnd->sc_flags |= VNF_LABELLING;
1217
1218 #ifdef __HAVE_OLD_DISKLABEL
1219 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1220 memset(&newlabel, 0, sizeof newlabel);
1221 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1222 lp = &newlabel;
1223 } else
1224 #endif
1225 lp = (struct disklabel *)data;
1226
1227 error = setdisklabel(vnd->sc_dkdev.dk_label,
1228 lp, 0, vnd->sc_dkdev.dk_cpulabel);
1229 if (error == 0) {
1230 if (cmd == DIOCWDINFO
1231 #ifdef __HAVE_OLD_DISKLABEL
1232 || cmd == ODIOCWDINFO
1233 #endif
1234 )
1235 error = writedisklabel(VNDLABELDEV(dev),
1236 vndstrategy, vnd->sc_dkdev.dk_label,
1237 vnd->sc_dkdev.dk_cpulabel);
1238 }
1239
1240 vnd->sc_flags &= ~VNF_LABELLING;
1241
1242 vndunlock(vnd);
1243
1244 if (error)
1245 return (error);
1246 break;
1247 }
1248
1249 case DIOCKLABEL:
1250 if (*(int *)data != 0)
1251 vnd->sc_flags |= VNF_KLABEL;
1252 else
1253 vnd->sc_flags &= ~VNF_KLABEL;
1254 break;
1255
1256 case DIOCWLABEL:
1257 if (*(int *)data != 0)
1258 vnd->sc_flags |= VNF_WLABEL;
1259 else
1260 vnd->sc_flags &= ~VNF_WLABEL;
1261 break;
1262
1263 case DIOCGDEFLABEL:
1264 vndgetdefaultlabel(vnd, (struct disklabel *)data);
1265 break;
1266
1267 #ifdef __HAVE_OLD_DISKLABEL
1268 case ODIOCGDEFLABEL:
1269 vndgetdefaultlabel(vnd, &newlabel);
1270 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1271 return ENOTTY;
1272 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1273 break;
1274 #endif
1275
1276 default:
1277 return (ENOTTY);
1278 }
1279
1280 return (0);
1281 }
1282
1283 /*
1284 * Duplicate the current processes' credentials. Since we are called only
1285 * as the result of a SET ioctl and only root can do that, any future access
1286 * to this "disk" is essentially as root. Note that credentials may change
1287 * if some other uid can write directly to the mapped file (NFS).
1288 */
1289 static int
1290 vndsetcred(struct vnd_softc *vnd, struct ucred *cred)
1291 {
1292 struct uio auio;
1293 struct iovec aiov;
1294 char *tmpbuf;
1295 int error;
1296
1297 vnd->sc_cred = crdup(cred);
1298 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
1299
1300 /* XXX: Horrible kludge to establish credentials for NFS */
1301 aiov.iov_base = tmpbuf;
1302 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size));
1303 auio.uio_iov = &aiov;
1304 auio.uio_iovcnt = 1;
1305 auio.uio_offset = 0;
1306 auio.uio_rw = UIO_READ;
1307 auio.uio_segflg = UIO_SYSSPACE;
1308 auio.uio_resid = aiov.iov_len;
1309 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1310 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred);
1311 if (error == 0) {
1312 /*
1313 * Because vnd does all IO directly through the vnode
1314 * we need to flush (at least) the buffer from the above
1315 * VOP_READ from the buffer cache to prevent cache
1316 * incoherencies. Also, be careful to write dirty
1317 * buffers back to stable storage.
1318 */
1319 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred,
1320 curlwp, 0, 0);
1321 }
1322 VOP_UNLOCK(vnd->sc_vp, 0);
1323
1324 free(tmpbuf, M_TEMP);
1325 return (error);
1326 }
1327
1328 /*
1329 * Set maxactive based on FS type
1330 */
1331 static void
1332 vndthrottle(struct vnd_softc *vnd, struct vnode *vp)
1333 {
1334 #ifdef NFS
1335 extern int (**nfsv2_vnodeop_p)(void *);
1336
1337 if (vp->v_op == nfsv2_vnodeop_p)
1338 vnd->sc_maxactive = 2;
1339 else
1340 #endif
1341 vnd->sc_maxactive = 8;
1342
1343 if (vnd->sc_maxactive < 1)
1344 vnd->sc_maxactive = 1;
1345 }
1346
1347 #if 0
1348 static void
1349 vndshutdown(void)
1350 {
1351 struct vnd_softc *vnd;
1352
1353 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++)
1354 if (vnd->sc_flags & VNF_INITED)
1355 vndclear(vnd);
1356 }
1357 #endif
1358
1359 static void
1360 vndclear(struct vnd_softc *vnd, int myminor)
1361 {
1362 struct vnode *vp = vnd->sc_vp;
1363 struct lwp *l = curlwp;
1364 int fflags = FREAD;
1365 int bmaj, cmaj, i, mn;
1366 int s;
1367
1368 #ifdef DEBUG
1369 if (vnddebug & VDB_FOLLOW)
1370 printf("vndclear(%p): vp %p\n", vnd, vp);
1371 #endif
1372 /* locate the major number */
1373 bmaj = bdevsw_lookup_major(&vnd_bdevsw);
1374 cmaj = cdevsw_lookup_major(&vnd_cdevsw);
1375
1376 /* Nuke the vnodes for any open instances */
1377 for (i = 0; i < MAXPARTITIONS; i++) {
1378 mn = DISKMINOR(vnd->sc_dev.dv_unit, i);
1379 vdevgone(bmaj, mn, mn, VBLK);
1380 if (mn != myminor) /* XXX avoid to kill own vnode */
1381 vdevgone(cmaj, mn, mn, VCHR);
1382 }
1383
1384 if ((vnd->sc_flags & VNF_READONLY) == 0)
1385 fflags |= FWRITE;
1386
1387 s = splbio();
1388 bufq_drain(vnd->sc_tab);
1389 splx(s);
1390
1391 vnd->sc_flags |= VNF_VUNCONF;
1392 wakeup(&vnd->sc_tab);
1393 while (vnd->sc_flags & VNF_KTHREAD)
1394 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0);
1395
1396 #ifdef VND_COMPRESSION
1397 /* free the compressed file buffers */
1398 if(vnd->sc_flags & VNF_COMP) {
1399 if(vnd->sc_comp_offsets) {
1400 free(vnd->sc_comp_offsets, M_DEVBUF);
1401 vnd->sc_comp_offsets = NULL;
1402 }
1403 if(vnd->sc_comp_buff) {
1404 free(vnd->sc_comp_buff, M_DEVBUF);
1405 vnd->sc_comp_buff = NULL;
1406 }
1407 if(vnd->sc_comp_decombuf) {
1408 free(vnd->sc_comp_decombuf, M_DEVBUF);
1409 vnd->sc_comp_decombuf = NULL;
1410 }
1411 }
1412 #endif /* VND_COMPRESSION */
1413 vnd->sc_flags &=
1414 ~(VNF_INITED | VNF_READONLY | VNF_VLABEL
1415 | VNF_VUNCONF | VNF_COMP);
1416 if (vp == (struct vnode *)0)
1417 panic("vndclear: null vp");
1418 (void) vn_close(vp, fflags, vnd->sc_cred, l);
1419 crfree(vnd->sc_cred);
1420 vnd->sc_vp = (struct vnode *)0;
1421 vnd->sc_cred = (struct ucred *)0;
1422 vnd->sc_size = 0;
1423 }
1424
1425 static int
1426 vndsize(dev_t dev)
1427 {
1428 struct vnd_softc *sc;
1429 struct disklabel *lp;
1430 int part, unit, omask;
1431 int size;
1432
1433 unit = vndunit(dev);
1434 sc = (struct vnd_softc *)device_lookup(&vnd_cd, unit);
1435 if (sc == NULL)
1436 return -1;
1437
1438 if ((sc->sc_flags & VNF_INITED) == 0)
1439 return (-1);
1440
1441 part = DISKPART(dev);
1442 omask = sc->sc_dkdev.dk_openmask & (1 << part);
1443 lp = sc->sc_dkdev.dk_label;
1444
1445 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */
1446 return (-1);
1447
1448 if (lp->d_partitions[part].p_fstype != FS_SWAP)
1449 size = -1;
1450 else
1451 size = lp->d_partitions[part].p_size *
1452 (lp->d_secsize / DEV_BSIZE);
1453
1454 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */
1455 return (-1);
1456
1457 return (size);
1458 }
1459
1460 static int
1461 vnddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
1462 {
1463
1464 /* Not implemented. */
1465 return ENXIO;
1466 }
1467
1468 static void
1469 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp)
1470 {
1471 struct vndgeom *vng = &sc->sc_geom;
1472 struct partition *pp;
1473
1474 memset(lp, 0, sizeof(*lp));
1475
1476 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE);
1477 lp->d_secsize = vng->vng_secsize;
1478 lp->d_nsectors = vng->vng_nsectors;
1479 lp->d_ntracks = vng->vng_ntracks;
1480 lp->d_ncylinders = vng->vng_ncylinders;
1481 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1482
1483 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename));
1484 lp->d_type = DTYPE_VND;
1485 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1486 lp->d_rpm = 3600;
1487 lp->d_interleave = 1;
1488 lp->d_flags = 0;
1489
1490 pp = &lp->d_partitions[RAW_PART];
1491 pp->p_offset = 0;
1492 pp->p_size = lp->d_secperunit;
1493 pp->p_fstype = FS_UNUSED;
1494 lp->d_npartitions = RAW_PART + 1;
1495
1496 lp->d_magic = DISKMAGIC;
1497 lp->d_magic2 = DISKMAGIC;
1498 lp->d_checksum = dkcksum(lp);
1499 }
1500
1501 /*
1502 * Read the disklabel from a vnd. If one is not present, create a fake one.
1503 */
1504 static void
1505 vndgetdisklabel(dev_t dev, struct vnd_softc *sc)
1506 {
1507 const char *errstring;
1508 struct disklabel *lp = sc->sc_dkdev.dk_label;
1509 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel;
1510 int i;
1511
1512 memset(clp, 0, sizeof(*clp));
1513
1514 vndgetdefaultlabel(sc, lp);
1515
1516 /*
1517 * Call the generic disklabel extraction routine.
1518 */
1519 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp);
1520 if (errstring) {
1521 /*
1522 * Lack of disklabel is common, but we print the warning
1523 * anyway, since it might contain other useful information.
1524 */
1525 printf("%s: %s\n", sc->sc_dev.dv_xname, errstring);
1526
1527 /*
1528 * For historical reasons, if there's no disklabel
1529 * present, all partitions must be FS_BSDFFS and
1530 * occupy the entire disk.
1531 */
1532 for (i = 0; i < MAXPARTITIONS; i++) {
1533 /*
1534 * Don't wipe out port specific hack (such as
1535 * dos partition hack of i386 port).
1536 */
1537 if (lp->d_partitions[i].p_size != 0)
1538 continue;
1539
1540 lp->d_partitions[i].p_size = lp->d_secperunit;
1541 lp->d_partitions[i].p_offset = 0;
1542 lp->d_partitions[i].p_fstype = FS_BSDFFS;
1543 }
1544
1545 strncpy(lp->d_packname, "default label",
1546 sizeof(lp->d_packname));
1547
1548 lp->d_npartitions = MAXPARTITIONS;
1549 lp->d_checksum = dkcksum(lp);
1550 }
1551
1552 /* In-core label now valid. */
1553 sc->sc_flags |= VNF_VLABEL;
1554 }
1555
1556 /*
1557 * Wait interruptibly for an exclusive lock.
1558 *
1559 * XXX
1560 * Several drivers do this; it should be abstracted and made MP-safe.
1561 */
1562 static int
1563 vndlock(struct vnd_softc *sc)
1564 {
1565 int error;
1566
1567 while ((sc->sc_flags & VNF_LOCKED) != 0) {
1568 sc->sc_flags |= VNF_WANTED;
1569 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0)
1570 return (error);
1571 }
1572 sc->sc_flags |= VNF_LOCKED;
1573 return (0);
1574 }
1575
1576 /*
1577 * Unlock and wake up any waiters.
1578 */
1579 static void
1580 vndunlock(struct vnd_softc *sc)
1581 {
1582
1583 sc->sc_flags &= ~VNF_LOCKED;
1584 if ((sc->sc_flags & VNF_WANTED) != 0) {
1585 sc->sc_flags &= ~VNF_WANTED;
1586 wakeup(sc);
1587 }
1588 }
1589
1590 #ifdef VND_COMPRESSION
1591 /* compressed file read */
1592 static void
1593 compstrategy(struct buf *bp, off_t bn)
1594 {
1595 int error;
1596 int unit = vndunit(bp->b_dev);
1597 struct vnd_softc *vnd =
1598 (struct vnd_softc *)device_lookup(&vnd_cd, unit);
1599 u_int32_t comp_block;
1600 struct uio auio;
1601 caddr_t addr;
1602 int s;
1603
1604 /* set up constants for data move */
1605 auio.uio_rw = UIO_READ;
1606 auio.uio_segflg = UIO_SYSSPACE;
1607
1608 /* read, and transfer the data */
1609 addr = bp->b_data;
1610 s = splbio();
1611 while (bp->b_resid > 0) {
1612 unsigned length;
1613 size_t length_in_buffer;
1614 u_int32_t offset_in_buffer;
1615 struct iovec aiov;
1616
1617 /* calculate the compressed block number */
1618 comp_block = bn / (off_t)vnd->sc_comp_blksz;
1619
1620 /* check for good block number */
1621 if (comp_block >= vnd->sc_comp_numoffs) {
1622 bp->b_error = EINVAL;
1623 bp->b_flags |= B_ERROR;
1624 splx(s);
1625 return;
1626 }
1627
1628 /* read in the compressed block, if not in buffer */
1629 if (comp_block != vnd->sc_comp_buffblk) {
1630 length = vnd->sc_comp_offsets[comp_block + 1] -
1631 vnd->sc_comp_offsets[comp_block];
1632 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1633 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff,
1634 length, vnd->sc_comp_offsets[comp_block],
1635 UIO_SYSSPACE, IO_UNIT, vnd->sc_cred, NULL, NULL);
1636 if (error) {
1637 bp->b_error = error;
1638 bp->b_flags |= B_ERROR;
1639 VOP_UNLOCK(vnd->sc_vp, 0);
1640 splx(s);
1641 return;
1642 }
1643 /* uncompress the buffer */
1644 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff;
1645 vnd->sc_comp_stream.avail_in = length;
1646 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf;
1647 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz;
1648 inflateReset(&vnd->sc_comp_stream);
1649 error = inflate(&vnd->sc_comp_stream, Z_FINISH);
1650 if (error != Z_STREAM_END) {
1651 if (vnd->sc_comp_stream.msg)
1652 printf("%s: compressed file, %s\n",
1653 vnd->sc_dev.dv_xname,
1654 vnd->sc_comp_stream.msg);
1655 bp->b_error = EBADMSG;
1656 bp->b_flags |= B_ERROR;
1657 VOP_UNLOCK(vnd->sc_vp, 0);
1658 splx(s);
1659 return;
1660 }
1661 vnd->sc_comp_buffblk = comp_block;
1662 VOP_UNLOCK(vnd->sc_vp, 0);
1663 }
1664
1665 /* transfer the usable uncompressed data */
1666 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz;
1667 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer;
1668 if (length_in_buffer > bp->b_resid)
1669 length_in_buffer = bp->b_resid;
1670 auio.uio_iov = &aiov;
1671 auio.uio_iovcnt = 1;
1672 aiov.iov_base = addr;
1673 aiov.iov_len = length_in_buffer;
1674 auio.uio_resid = aiov.iov_len;
1675 auio.uio_offset = 0;
1676 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer,
1677 length_in_buffer, &auio);
1678 if (error) {
1679 bp->b_error = error;
1680 bp->b_flags |= B_ERROR;
1681 splx(s);
1682 return;
1683 }
1684
1685 bn += length_in_buffer;
1686 addr += length_in_buffer;
1687 bp->b_resid -= length_in_buffer;
1688 }
1689 splx(s);
1690 }
1691
1692 /* compression memory allocation routines */
1693 static void *
1694 vnd_alloc(void *aux, u_int items, u_int siz)
1695 {
1696 return malloc(items * siz, M_TEMP, M_NOWAIT);
1697 }
1698
1699 static void
1700 vnd_free(void *aux, void *ptr)
1701 {
1702 free(ptr, M_TEMP);
1703 }
1704 #endif /* VND_COMPRESSION */
1705