spec_vnops.c revision 1.126 1 /* $NetBSD: spec_vnops.c,v 1.126 2009/10/06 04:28:10 elad Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. Neither the name of the University nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 *
57 * @(#)spec_vnops.c 8.15 (Berkeley) 7/14/95
58 */
59
60 #include <sys/cdefs.h>
61 __KERNEL_RCSID(0, "$NetBSD: spec_vnops.c,v 1.126 2009/10/06 04:28:10 elad Exp $");
62
63 #include <sys/param.h>
64 #include <sys/proc.h>
65 #include <sys/systm.h>
66 #include <sys/kernel.h>
67 #include <sys/conf.h>
68 #include <sys/buf.h>
69 #include <sys/mount.h>
70 #include <sys/namei.h>
71 #include <sys/vnode.h>
72 #include <sys/stat.h>
73 #include <sys/errno.h>
74 #include <sys/ioctl.h>
75 #include <sys/poll.h>
76 #include <sys/file.h>
77 #include <sys/disklabel.h>
78 #include <sys/lockf.h>
79 #include <sys/tty.h>
80 #include <sys/kauth.h>
81 #include <sys/fstrans.h>
82 #include <sys/module.h>
83
84 #include <miscfs/genfs/genfs.h>
85 #include <miscfs/specfs/specdev.h>
86
87 /* symbolic sleep message strings for devices */
88 const char devopn[] = "devopn";
89 const char devio[] = "devio";
90 const char devwait[] = "devwait";
91 const char devin[] = "devin";
92 const char devout[] = "devout";
93 const char devioc[] = "devioc";
94 const char devcls[] = "devcls";
95
96 vnode_t *specfs_hash[SPECHSZ];
97
98 /*
99 * This vnode operations vector is used for special device nodes
100 * created from whole cloth by the kernel. For the ops vector for
101 * vnodes built from special devices found in a filesystem, see (e.g)
102 * ffs_specop_entries[] in ffs_vnops.c or the equivalent for other
103 * filesystems.
104 */
105
106 int (**spec_vnodeop_p)(void *);
107 const struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
108 { &vop_default_desc, vn_default_error },
109 { &vop_lookup_desc, spec_lookup }, /* lookup */
110 { &vop_create_desc, spec_create }, /* create */
111 { &vop_mknod_desc, spec_mknod }, /* mknod */
112 { &vop_open_desc, spec_open }, /* open */
113 { &vop_close_desc, spec_close }, /* close */
114 { &vop_access_desc, spec_access }, /* access */
115 { &vop_getattr_desc, spec_getattr }, /* getattr */
116 { &vop_setattr_desc, spec_setattr }, /* setattr */
117 { &vop_read_desc, spec_read }, /* read */
118 { &vop_write_desc, spec_write }, /* write */
119 { &vop_fcntl_desc, spec_fcntl }, /* fcntl */
120 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
121 { &vop_poll_desc, spec_poll }, /* poll */
122 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */
123 { &vop_revoke_desc, spec_revoke }, /* revoke */
124 { &vop_mmap_desc, spec_mmap }, /* mmap */
125 { &vop_fsync_desc, spec_fsync }, /* fsync */
126 { &vop_seek_desc, spec_seek }, /* seek */
127 { &vop_remove_desc, spec_remove }, /* remove */
128 { &vop_link_desc, spec_link }, /* link */
129 { &vop_rename_desc, spec_rename }, /* rename */
130 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
131 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
132 { &vop_symlink_desc, spec_symlink }, /* symlink */
133 { &vop_readdir_desc, spec_readdir }, /* readdir */
134 { &vop_readlink_desc, spec_readlink }, /* readlink */
135 { &vop_abortop_desc, spec_abortop }, /* abortop */
136 { &vop_inactive_desc, spec_inactive }, /* inactive */
137 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */
138 { &vop_lock_desc, spec_lock }, /* lock */
139 { &vop_unlock_desc, spec_unlock }, /* unlock */
140 { &vop_bmap_desc, spec_bmap }, /* bmap */
141 { &vop_strategy_desc, spec_strategy }, /* strategy */
142 { &vop_print_desc, spec_print }, /* print */
143 { &vop_islocked_desc, spec_islocked }, /* islocked */
144 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
145 { &vop_advlock_desc, spec_advlock }, /* advlock */
146 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */
147 { &vop_getpages_desc, spec_getpages }, /* getpages */
148 { &vop_putpages_desc, spec_putpages }, /* putpages */
149 { NULL, NULL }
150 };
151 const struct vnodeopv_desc spec_vnodeop_opv_desc =
152 { &spec_vnodeop_p, spec_vnodeop_entries };
153
154 /* Returns true if vnode is /dev/mem or /dev/kmem. */
155 bool
156 iskmemvp(struct vnode *vp)
157 {
158 return ((vp->v_type == VCHR) && iskmemdev(vp->v_rdev));
159 }
160
161 /*
162 * Returns true if dev is /dev/mem or /dev/kmem.
163 */
164 int
165 iskmemdev(dev_t dev)
166 {
167 /* mem_no is emitted by config(8) to generated devsw.c */
168 extern const int mem_no;
169
170 /* minor 14 is /dev/io on i386 with COMPAT_10 */
171 return (major(dev) == mem_no && (minor(dev) < 2 || minor(dev) == 14));
172 }
173
174 /*
175 * Initialize a vnode that represents a device.
176 */
177 void
178 spec_node_init(vnode_t *vp, dev_t rdev)
179 {
180 specnode_t *sn;
181 specdev_t *sd;
182 vnode_t *vp2;
183 vnode_t **vpp;
184
185 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR);
186 KASSERT(vp->v_specnode == NULL);
187
188 /*
189 * Search the hash table for this device. If known, add a
190 * reference to the device structure. If not known, create
191 * a new entry to represent the device. In all cases add
192 * the vnode to the hash table.
193 */
194 sn = kmem_alloc(sizeof(*sn), KM_SLEEP);
195 if (sn == NULL) {
196 /* XXX */
197 panic("spec_node_init: unable to allocate memory");
198 }
199 sd = kmem_alloc(sizeof(*sd), KM_SLEEP);
200 if (sd == NULL) {
201 /* XXX */
202 panic("spec_node_init: unable to allocate memory");
203 }
204 mutex_enter(&device_lock);
205 vpp = &specfs_hash[SPECHASH(rdev)];
206 for (vp2 = *vpp; vp2 != NULL; vp2 = vp2->v_specnext) {
207 KASSERT(vp2->v_specnode != NULL);
208 if (rdev == vp2->v_rdev && vp->v_type == vp2->v_type) {
209 break;
210 }
211 }
212 if (vp2 == NULL) {
213 /* No existing record, create a new one. */
214 sd->sd_rdev = rdev;
215 sd->sd_mountpoint = NULL;
216 sd->sd_lockf = NULL;
217 sd->sd_refcnt = 1;
218 sd->sd_opencnt = 0;
219 sd->sd_bdevvp = NULL;
220 sn->sn_dev = sd;
221 sd = NULL;
222 } else {
223 /* Use the existing record. */
224 sn->sn_dev = vp2->v_specnode->sn_dev;
225 sn->sn_dev->sd_refcnt++;
226 }
227 /* Insert vnode into the hash chain. */
228 sn->sn_opencnt = 0;
229 sn->sn_rdev = rdev;
230 sn->sn_gone = false;
231 vp->v_specnode = sn;
232 vp->v_specnext = *vpp;
233 *vpp = vp;
234 mutex_exit(&device_lock);
235
236 /* Free the record we allocated if unused. */
237 if (sd != NULL) {
238 kmem_free(sd, sizeof(*sd));
239 }
240 }
241
242 /*
243 * A vnode representing a special device is going away. Close
244 * the device if the vnode holds it open.
245 */
246 void
247 spec_node_revoke(vnode_t *vp)
248 {
249 specnode_t *sn;
250 specdev_t *sd;
251
252 sn = vp->v_specnode;
253 sd = sn->sn_dev;
254
255 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR);
256 KASSERT(vp->v_specnode != NULL);
257 KASSERT((vp->v_iflag & VI_XLOCK) != 0);
258 KASSERT(sn->sn_gone == false);
259
260 mutex_enter(&device_lock);
261 KASSERT(sn->sn_opencnt <= sd->sd_opencnt);
262 if (sn->sn_opencnt != 0) {
263 sd->sd_opencnt -= (sn->sn_opencnt - 1);
264 sn->sn_opencnt = 1;
265 sn->sn_gone = true;
266 mutex_exit(&device_lock);
267
268 VOP_CLOSE(vp, FNONBLOCK, NOCRED);
269
270 mutex_enter(&device_lock);
271 KASSERT(sn->sn_opencnt == 0);
272 }
273 mutex_exit(&device_lock);
274 }
275
276 /*
277 * A vnode representing a special device is being recycled.
278 * Destroy the specfs component.
279 */
280 void
281 spec_node_destroy(vnode_t *vp)
282 {
283 specnode_t *sn;
284 specdev_t *sd;
285 vnode_t **vpp, *vp2;
286 int refcnt;
287
288 sn = vp->v_specnode;
289 sd = sn->sn_dev;
290
291 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR);
292 KASSERT(vp->v_specnode != NULL);
293 KASSERT(sn->sn_opencnt == 0);
294
295 mutex_enter(&device_lock);
296 /* Remove from the hash and destroy the node. */
297 vpp = &specfs_hash[SPECHASH(vp->v_rdev)];
298 for (vp2 = *vpp;; vp2 = vp2->v_specnext) {
299 if (vp2 == NULL) {
300 panic("spec_node_destroy: corrupt hash");
301 }
302 if (vp2 == vp) {
303 KASSERT(vp == *vpp);
304 *vpp = vp->v_specnext;
305 break;
306 }
307 if (vp2->v_specnext == vp) {
308 vp2->v_specnext = vp->v_specnext;
309 break;
310 }
311 }
312 sn = vp->v_specnode;
313 vp->v_specnode = NULL;
314 refcnt = sd->sd_refcnt--;
315 KASSERT(refcnt > 0);
316 mutex_exit(&device_lock);
317
318 /* If the device is no longer in use, destroy our record. */
319 if (refcnt == 1) {
320 KASSERT(sd->sd_opencnt == 0);
321 KASSERT(sd->sd_bdevvp == NULL);
322 kmem_free(sd, sizeof(*sd));
323 }
324 kmem_free(sn, sizeof(*sn));
325 }
326
327 /*
328 * Trivial lookup routine that always fails.
329 */
330 int
331 spec_lookup(void *v)
332 {
333 struct vop_lookup_args /* {
334 struct vnode *a_dvp;
335 struct vnode **a_vpp;
336 struct componentname *a_cnp;
337 } */ *ap = v;
338
339 *ap->a_vpp = NULL;
340 return (ENOTDIR);
341 }
342
343 /*
344 * Open a special file.
345 */
346 /* ARGSUSED */
347 int
348 spec_open(void *v)
349 {
350 struct vop_open_args /* {
351 struct vnode *a_vp;
352 int a_mode;
353 kauth_cred_t a_cred;
354 } */ *ap = v;
355 struct lwp *l;
356 struct vnode *vp;
357 dev_t dev;
358 int error;
359 struct partinfo pi;
360 enum kauth_device_req req;
361 specnode_t *sn;
362 specdev_t *sd;
363
364 u_int gen;
365 const char *name;
366
367 l = curlwp;
368 vp = ap->a_vp;
369 dev = vp->v_rdev;
370 sn = vp->v_specnode;
371 sd = sn->sn_dev;
372 name = NULL;
373 gen = 0;
374
375 /*
376 * Don't allow open if fs is mounted -nodev.
377 */
378 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
379 return (ENXIO);
380
381 switch (ap->a_mode & (FREAD | FWRITE)) {
382 case FREAD | FWRITE:
383 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_RW;
384 break;
385 case FWRITE:
386 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_WRITE;
387 break;
388 default:
389 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_READ;
390 break;
391 }
392
393 switch (vp->v_type) {
394 case VCHR:
395 error = kauth_authorize_device_spec(ap->a_cred, req, vp);
396 if (error != 0)
397 return (error);
398
399 /*
400 * Character devices can accept opens from multiple
401 * vnodes.
402 */
403 mutex_enter(&device_lock);
404 if (sn->sn_gone) {
405 mutex_exit(&device_lock);
406 return (EBADF);
407 }
408 sd->sd_opencnt++;
409 sn->sn_opencnt++;
410 mutex_exit(&device_lock);
411 if (cdev_type(dev) == D_TTY)
412 vp->v_vflag |= VV_ISTTY;
413 VOP_UNLOCK(vp, 0);
414 do {
415 const struct cdevsw *cdev;
416
417 gen = module_gen;
418 error = cdev_open(dev, ap->a_mode, S_IFCHR, l);
419 if (error != ENXIO)
420 break;
421
422 /* Check if we already have a valid driver */
423 mutex_enter(&device_lock);
424 cdev = cdevsw_lookup(dev);
425 mutex_exit(&device_lock);
426 if (cdev != NULL)
427 break;
428
429 /* Get device name from devsw_conv array */
430 if ((name = cdevsw_getname(major(dev))) == NULL)
431 break;
432
433 /* Try to autoload device module */
434 mutex_enter(&module_lock);
435 (void) module_autoload(name, MODULE_CLASS_DRIVER);
436 mutex_exit(&module_lock);
437 } while (gen != module_gen);
438
439 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
440 break;
441
442 case VBLK:
443 error = kauth_authorize_device_spec(ap->a_cred, req, vp);
444 if (error != 0)
445 return (error);
446
447 /*
448 * For block devices, permit only one open. The buffer
449 * cache cannot remain self-consistent with multiple
450 * vnodes holding a block device open.
451 */
452 mutex_enter(&device_lock);
453 if (sn->sn_gone) {
454 mutex_exit(&device_lock);
455 return (EBADF);
456 }
457 if (sd->sd_opencnt != 0) {
458 mutex_exit(&device_lock);
459 return EBUSY;
460 }
461 sn->sn_opencnt = 1;
462 sd->sd_opencnt = 1;
463 sd->sd_bdevvp = vp;
464 mutex_exit(&device_lock);
465 do {
466 const struct bdevsw *bdev;
467
468 gen = module_gen;
469 error = bdev_open(dev, ap->a_mode, S_IFBLK, l);
470 if (error != ENXIO)
471 break;
472
473 /* Check if we already have a valid driver */
474 mutex_enter(&device_lock);
475 bdev = bdevsw_lookup(dev);
476 mutex_exit(&device_lock);
477 if (bdev != NULL)
478 break;
479
480 /* Get device name from devsw_conv array */
481 if ((name = bdevsw_getname(major(dev))) == NULL)
482 break;
483
484 VOP_UNLOCK(vp, 0);
485
486 /* Try to autoload device module */
487 mutex_enter(&module_lock);
488 (void) module_autoload(name, MODULE_CLASS_DRIVER);
489 mutex_exit(&module_lock);
490
491 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
492 } while (gen != module_gen);
493
494 break;
495
496 case VNON:
497 case VLNK:
498 case VDIR:
499 case VREG:
500 case VBAD:
501 case VFIFO:
502 case VSOCK:
503 default:
504 return 0;
505 }
506
507 mutex_enter(&device_lock);
508 if (sn->sn_gone) {
509 if (error == 0)
510 error = EBADF;
511 } else if (error != 0) {
512 sd->sd_opencnt--;
513 sn->sn_opencnt--;
514 if (vp->v_type == VBLK)
515 sd->sd_bdevvp = NULL;
516
517 }
518 mutex_exit(&device_lock);
519
520 if (cdev_type(dev) != D_DISK || error != 0)
521 return error;
522
523 if (vp->v_type == VCHR)
524 error = cdev_ioctl(vp->v_rdev, DIOCGPART, &pi, FREAD, curlwp);
525 else
526 error = bdev_ioctl(vp->v_rdev, DIOCGPART, &pi, FREAD, curlwp);
527 if (error == 0)
528 uvm_vnp_setsize(vp,
529 (voff_t)pi.disklab->d_secsize * pi.part->p_size);
530 return 0;
531 }
532
533 /*
534 * Vnode op for read
535 */
536 /* ARGSUSED */
537 int
538 spec_read(void *v)
539 {
540 struct vop_read_args /* {
541 struct vnode *a_vp;
542 struct uio *a_uio;
543 int a_ioflag;
544 kauth_cred_t a_cred;
545 } */ *ap = v;
546 struct vnode *vp = ap->a_vp;
547 struct uio *uio = ap->a_uio;
548 struct lwp *l = curlwp;
549 struct buf *bp;
550 daddr_t bn;
551 int bsize, bscale;
552 struct partinfo dpart;
553 int n, on;
554 int error = 0;
555
556 #ifdef DIAGNOSTIC
557 if (uio->uio_rw != UIO_READ)
558 panic("spec_read mode");
559 if (&uio->uio_vmspace->vm_map != kernel_map &&
560 uio->uio_vmspace != curproc->p_vmspace)
561 panic("spec_read proc");
562 #endif
563 if (uio->uio_resid == 0)
564 return (0);
565
566 switch (vp->v_type) {
567
568 case VCHR:
569 VOP_UNLOCK(vp, 0);
570 error = cdev_read(vp->v_rdev, uio, ap->a_ioflag);
571 vn_lock(vp, LK_SHARED | LK_RETRY);
572 return (error);
573
574 case VBLK:
575 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
576 if (uio->uio_offset < 0)
577 return (EINVAL);
578 bsize = BLKDEV_IOSIZE;
579 if (bdev_ioctl(vp->v_rdev, DIOCGPART, &dpart, FREAD, l) == 0) {
580 if (dpart.part->p_fstype == FS_BSDFFS &&
581 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
582 bsize = dpart.part->p_frag *
583 dpart.part->p_fsize;
584 }
585 bscale = bsize >> DEV_BSHIFT;
586 do {
587 bn = (uio->uio_offset >> DEV_BSHIFT) &~ (bscale - 1);
588 on = uio->uio_offset % bsize;
589 n = min((unsigned)(bsize - on), uio->uio_resid);
590 error = bread(vp, bn, bsize, NOCRED, 0, &bp);
591 n = min(n, bsize - bp->b_resid);
592 if (error) {
593 brelse(bp, 0);
594 return (error);
595 }
596 error = uiomove((char *)bp->b_data + on, n, uio);
597 brelse(bp, 0);
598 } while (error == 0 && uio->uio_resid > 0 && n != 0);
599 return (error);
600
601 default:
602 panic("spec_read type");
603 }
604 /* NOTREACHED */
605 }
606
607 /*
608 * Vnode op for write
609 */
610 /* ARGSUSED */
611 int
612 spec_write(void *v)
613 {
614 struct vop_write_args /* {
615 struct vnode *a_vp;
616 struct uio *a_uio;
617 int a_ioflag;
618 kauth_cred_t a_cred;
619 } */ *ap = v;
620 struct vnode *vp = ap->a_vp;
621 struct uio *uio = ap->a_uio;
622 struct lwp *l = curlwp;
623 struct buf *bp;
624 daddr_t bn;
625 int bsize, bscale;
626 struct partinfo dpart;
627 int n, on;
628 int error = 0;
629
630 #ifdef DIAGNOSTIC
631 if (uio->uio_rw != UIO_WRITE)
632 panic("spec_write mode");
633 if (&uio->uio_vmspace->vm_map != kernel_map &&
634 uio->uio_vmspace != curproc->p_vmspace)
635 panic("spec_write proc");
636 #endif
637
638 switch (vp->v_type) {
639
640 case VCHR:
641 VOP_UNLOCK(vp, 0);
642 error = cdev_write(vp->v_rdev, uio, ap->a_ioflag);
643 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
644 return (error);
645
646 case VBLK:
647 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
648 if (uio->uio_resid == 0)
649 return (0);
650 if (uio->uio_offset < 0)
651 return (EINVAL);
652 bsize = BLKDEV_IOSIZE;
653 if (bdev_ioctl(vp->v_rdev, DIOCGPART, &dpart, FREAD, l) == 0) {
654 if (dpart.part->p_fstype == FS_BSDFFS &&
655 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
656 bsize = dpart.part->p_frag *
657 dpart.part->p_fsize;
658 }
659 bscale = bsize >> DEV_BSHIFT;
660 do {
661 bn = (uio->uio_offset >> DEV_BSHIFT) &~ (bscale - 1);
662 on = uio->uio_offset % bsize;
663 n = min((unsigned)(bsize - on), uio->uio_resid);
664 if (n == bsize)
665 bp = getblk(vp, bn, bsize, 0, 0);
666 else
667 error = bread(vp, bn, bsize, NOCRED,
668 B_MODIFY, &bp);
669 if (error) {
670 brelse(bp, 0);
671 return (error);
672 }
673 n = min(n, bsize - bp->b_resid);
674 error = uiomove((char *)bp->b_data + on, n, uio);
675 if (error)
676 brelse(bp, 0);
677 else {
678 if (n + on == bsize)
679 bawrite(bp);
680 else
681 bdwrite(bp);
682 error = bp->b_error;
683 }
684 } while (error == 0 && uio->uio_resid > 0 && n != 0);
685 return (error);
686
687 default:
688 panic("spec_write type");
689 }
690 /* NOTREACHED */
691 }
692
693 /*
694 * Device ioctl operation.
695 */
696 /* ARGSUSED */
697 int
698 spec_ioctl(void *v)
699 {
700 struct vop_ioctl_args /* {
701 struct vnode *a_vp;
702 u_long a_command;
703 void *a_data;
704 int a_fflag;
705 kauth_cred_t a_cred;
706 } */ *ap = v;
707 struct vnode *vp;
708 dev_t dev;
709
710 /*
711 * Extract all the info we need from the vnode, taking care to
712 * avoid a race with VOP_REVOKE().
713 */
714
715 vp = ap->a_vp;
716 dev = NODEV;
717 mutex_enter(&vp->v_interlock);
718 if ((vp->v_iflag & VI_XLOCK) == 0 && vp->v_specnode) {
719 dev = vp->v_rdev;
720 }
721 mutex_exit(&vp->v_interlock);
722 if (dev == NODEV) {
723 return ENXIO;
724 }
725
726 switch (vp->v_type) {
727
728 case VCHR:
729 return cdev_ioctl(dev, ap->a_command, ap->a_data,
730 ap->a_fflag, curlwp);
731
732 case VBLK:
733 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
734 return bdev_ioctl(dev, ap->a_command, ap->a_data,
735 ap->a_fflag, curlwp);
736
737 default:
738 panic("spec_ioctl");
739 /* NOTREACHED */
740 }
741 }
742
743 /* ARGSUSED */
744 int
745 spec_poll(void *v)
746 {
747 struct vop_poll_args /* {
748 struct vnode *a_vp;
749 int a_events;
750 } */ *ap = v;
751 struct vnode *vp;
752 dev_t dev;
753
754 /*
755 * Extract all the info we need from the vnode, taking care to
756 * avoid a race with VOP_REVOKE().
757 */
758
759 vp = ap->a_vp;
760 dev = NODEV;
761 mutex_enter(&vp->v_interlock);
762 if ((vp->v_iflag & VI_XLOCK) == 0 && vp->v_specnode) {
763 dev = vp->v_rdev;
764 }
765 mutex_exit(&vp->v_interlock);
766 if (dev == NODEV) {
767 return POLLERR;
768 }
769
770 switch (vp->v_type) {
771
772 case VCHR:
773 return cdev_poll(dev, ap->a_events, curlwp);
774
775 default:
776 return (genfs_poll(v));
777 }
778 }
779
780 /* ARGSUSED */
781 int
782 spec_kqfilter(void *v)
783 {
784 struct vop_kqfilter_args /* {
785 struct vnode *a_vp;
786 struct proc *a_kn;
787 } */ *ap = v;
788 dev_t dev;
789
790 switch (ap->a_vp->v_type) {
791
792 case VCHR:
793 dev = ap->a_vp->v_rdev;
794 return cdev_kqfilter(dev, ap->a_kn);
795 default:
796 /*
797 * Block devices don't support kqfilter, and refuse it
798 * for any other files (like those vflush()ed) too.
799 */
800 return (EOPNOTSUPP);
801 }
802 }
803
804 /*
805 * Allow mapping of only D_DISK. This is called only for VBLK.
806 */
807 int
808 spec_mmap(void *v)
809 {
810 struct vop_mmap_args /* {
811 struct vnode *a_vp;
812 vm_prot_t a_prot;
813 kauth_cred_t a_cred;
814 } */ *ap = v;
815 struct vnode *vp = ap->a_vp;
816
817 KASSERT(vp->v_type == VBLK);
818 if (bdev_type(vp->v_rdev) != D_DISK)
819 return EINVAL;
820
821 return 0;
822 }
823
824 /*
825 * Synch buffers associated with a block device
826 */
827 /* ARGSUSED */
828 int
829 spec_fsync(void *v)
830 {
831 struct vop_fsync_args /* {
832 struct vnode *a_vp;
833 kauth_cred_t a_cred;
834 int a_flags;
835 off_t offlo;
836 off_t offhi;
837 } */ *ap = v;
838 struct vnode *vp = ap->a_vp;
839 struct mount *mp;
840 int error;
841
842 if (vp->v_type == VBLK) {
843 if ((mp = vp->v_specmountpoint) != NULL) {
844 error = VFS_FSYNC(mp, vp, ap->a_flags | FSYNC_VFS);
845 if (error != EOPNOTSUPP)
846 return error;
847 }
848 vflushbuf(vp, (ap->a_flags & FSYNC_WAIT) != 0);
849 }
850 return (0);
851 }
852
853 /*
854 * Just call the device strategy routine
855 */
856 int
857 spec_strategy(void *v)
858 {
859 struct vop_strategy_args /* {
860 struct vnode *a_vp;
861 struct buf *a_bp;
862 } */ *ap = v;
863 struct vnode *vp = ap->a_vp;
864 struct buf *bp = ap->a_bp;
865 int error;
866
867 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
868
869 error = 0;
870 bp->b_dev = vp->v_rdev;
871
872 if (!(bp->b_flags & B_READ))
873 error = fscow_run(bp, false);
874
875 if (error) {
876 bp->b_error = error;
877 biodone(bp);
878 return (error);
879 }
880
881 bdev_strategy(bp);
882
883 return (0);
884 }
885
886 int
887 spec_inactive(void *v)
888 {
889 struct vop_inactive_args /* {
890 struct vnode *a_vp;
891 struct proc *a_l;
892 } */ *ap = v;
893
894 VOP_UNLOCK(ap->a_vp, 0);
895 return (0);
896 }
897
898 /*
899 * This is a noop, simply returning what one has been given.
900 */
901 int
902 spec_bmap(void *v)
903 {
904 struct vop_bmap_args /* {
905 struct vnode *a_vp;
906 daddr_t a_bn;
907 struct vnode **a_vpp;
908 daddr_t *a_bnp;
909 int *a_runp;
910 } */ *ap = v;
911
912 if (ap->a_vpp != NULL)
913 *ap->a_vpp = ap->a_vp;
914 if (ap->a_bnp != NULL)
915 *ap->a_bnp = ap->a_bn;
916 if (ap->a_runp != NULL)
917 *ap->a_runp = (MAXBSIZE >> DEV_BSHIFT) - 1;
918 return (0);
919 }
920
921 /*
922 * Device close routine
923 */
924 /* ARGSUSED */
925 int
926 spec_close(void *v)
927 {
928 struct vop_close_args /* {
929 struct vnode *a_vp;
930 int a_fflag;
931 kauth_cred_t a_cred;
932 } */ *ap = v;
933 struct vnode *vp = ap->a_vp;
934 struct session *sess;
935 dev_t dev = vp->v_rdev;
936 int mode, error, flags, flags1, count;
937 specnode_t *sn;
938 specdev_t *sd;
939
940 flags = vp->v_iflag;
941 sn = vp->v_specnode;
942 sd = sn->sn_dev;
943
944 switch (vp->v_type) {
945
946 case VCHR:
947 /*
948 * Hack: a tty device that is a controlling terminal
949 * has a reference from the session structure. We
950 * cannot easily tell that a character device is a
951 * controlling terminal, unless it is the closing
952 * process' controlling terminal. In that case, if the
953 * open count is 1 release the reference from the
954 * session. Also, remove the link from the tty back to
955 * the session and pgrp.
956 *
957 * XXX V. fishy.
958 */
959 mutex_enter(proc_lock);
960 sess = curlwp->l_proc->p_session;
961 if (sn->sn_opencnt == 1 && vp == sess->s_ttyvp) {
962 mutex_spin_enter(&tty_lock);
963 sess->s_ttyvp = NULL;
964 if (sess->s_ttyp->t_session != NULL) {
965 sess->s_ttyp->t_pgrp = NULL;
966 sess->s_ttyp->t_session = NULL;
967 mutex_spin_exit(&tty_lock);
968 /* Releases proc_lock. */
969 proc_sessrele(sess);
970 } else {
971 mutex_spin_exit(&tty_lock);
972 if (sess->s_ttyp->t_pgrp != NULL)
973 panic("spec_close: spurious pgrp ref");
974 mutex_exit(proc_lock);
975 }
976 vrele(vp);
977 } else
978 mutex_exit(proc_lock);
979
980 /*
981 * If the vnode is locked, then we are in the midst
982 * of forcably closing the device, otherwise we only
983 * close on last reference.
984 */
985 mode = S_IFCHR;
986 break;
987
988 case VBLK:
989 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
990 /*
991 * On last close of a block device (that isn't mounted)
992 * we must invalidate any in core blocks, so that
993 * we can, for instance, change floppy disks.
994 */
995 error = vinvalbuf(vp, V_SAVE, ap->a_cred, curlwp, 0, 0);
996 if (error)
997 return (error);
998 /*
999 * We do not want to really close the device if it
1000 * is still in use unless we are trying to close it
1001 * forcibly. Since every use (buffer, vnode, swap, cmap)
1002 * holds a reference to the vnode, and because we mark
1003 * any other vnodes that alias this device, when the
1004 * sum of the reference counts on all the aliased
1005 * vnodes descends to one, we are on last close.
1006 */
1007 mode = S_IFBLK;
1008 break;
1009
1010 default:
1011 panic("spec_close: not special");
1012 }
1013
1014 mutex_enter(&device_lock);
1015 sn->sn_opencnt--;
1016 count = --sd->sd_opencnt;
1017 if (vp->v_type == VBLK)
1018 sd->sd_bdevvp = NULL;
1019 mutex_exit(&device_lock);
1020
1021 if (count != 0)
1022 return 0;
1023
1024 flags1 = ap->a_fflag;
1025
1026 /*
1027 * if VI_XLOCK is set, then we're going away soon, so make this
1028 * non-blocking. Also ensures that we won't wedge in vn_lock below.
1029 */
1030 if (flags & VI_XLOCK)
1031 flags1 |= FNONBLOCK;
1032
1033 /*
1034 * If we're able to block, release the vnode lock & reacquire. We
1035 * might end up sleeping for someone else who wants our queues. They
1036 * won't get them if we hold the vnode locked. Also, if VI_XLOCK is
1037 * set, don't release the lock as we won't be able to regain it.
1038 */
1039 if (!(flags1 & FNONBLOCK))
1040 VOP_UNLOCK(vp, 0);
1041
1042 if (vp->v_type == VBLK)
1043 error = bdev_close(dev, flags1, mode, curlwp);
1044 else
1045 error = cdev_close(dev, flags1, mode, curlwp);
1046
1047 if (!(flags1 & FNONBLOCK))
1048 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1049
1050 return (error);
1051 }
1052
1053 /*
1054 * Print out the contents of a special device vnode.
1055 */
1056 int
1057 spec_print(void *v)
1058 {
1059 struct vop_print_args /* {
1060 struct vnode *a_vp;
1061 } */ *ap = v;
1062
1063 printf("dev %llu, %llu\n", (unsigned long long)major(ap->a_vp->v_rdev),
1064 (unsigned long long)minor(ap->a_vp->v_rdev));
1065 return 0;
1066 }
1067
1068 /*
1069 * Return POSIX pathconf information applicable to special devices.
1070 */
1071 int
1072 spec_pathconf(void *v)
1073 {
1074 struct vop_pathconf_args /* {
1075 struct vnode *a_vp;
1076 int a_name;
1077 register_t *a_retval;
1078 } */ *ap = v;
1079
1080 switch (ap->a_name) {
1081 case _PC_LINK_MAX:
1082 *ap->a_retval = LINK_MAX;
1083 return (0);
1084 case _PC_MAX_CANON:
1085 *ap->a_retval = MAX_CANON;
1086 return (0);
1087 case _PC_MAX_INPUT:
1088 *ap->a_retval = MAX_INPUT;
1089 return (0);
1090 case _PC_PIPE_BUF:
1091 *ap->a_retval = PIPE_BUF;
1092 return (0);
1093 case _PC_CHOWN_RESTRICTED:
1094 *ap->a_retval = 1;
1095 return (0);
1096 case _PC_VDISABLE:
1097 *ap->a_retval = _POSIX_VDISABLE;
1098 return (0);
1099 case _PC_SYNC_IO:
1100 *ap->a_retval = 1;
1101 return (0);
1102 default:
1103 return (EINVAL);
1104 }
1105 /* NOTREACHED */
1106 }
1107
1108 /*
1109 * Advisory record locking support.
1110 */
1111 int
1112 spec_advlock(void *v)
1113 {
1114 struct vop_advlock_args /* {
1115 struct vnode *a_vp;
1116 void *a_id;
1117 int a_op;
1118 struct flock *a_fl;
1119 int a_flags;
1120 } */ *ap = v;
1121 struct vnode *vp = ap->a_vp;
1122
1123 return lf_advlock(ap, &vp->v_speclockf, (off_t)0);
1124 }
1125