spec_vnops.c revision 1.136 1 /* $NetBSD: spec_vnops.c,v 1.136 2012/12/20 08:03:43 hannken Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. Neither the name of the University nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 *
57 * @(#)spec_vnops.c 8.15 (Berkeley) 7/14/95
58 */
59
60 #include <sys/cdefs.h>
61 __KERNEL_RCSID(0, "$NetBSD: spec_vnops.c,v 1.136 2012/12/20 08:03:43 hannken Exp $");
62
63 #include <sys/param.h>
64 #include <sys/proc.h>
65 #include <sys/systm.h>
66 #include <sys/kernel.h>
67 #include <sys/conf.h>
68 #include <sys/buf.h>
69 #include <sys/mount.h>
70 #include <sys/namei.h>
71 #include <sys/vnode.h>
72 #include <sys/stat.h>
73 #include <sys/errno.h>
74 #include <sys/ioctl.h>
75 #include <sys/poll.h>
76 #include <sys/file.h>
77 #include <sys/disklabel.h>
78 #include <sys/lockf.h>
79 #include <sys/tty.h>
80 #include <sys/kauth.h>
81 #include <sys/fstrans.h>
82 #include <sys/module.h>
83
84 #include <miscfs/genfs/genfs.h>
85 #include <miscfs/specfs/specdev.h>
86
87 /* symbolic sleep message strings for devices */
88 const char devopn[] = "devopn";
89 const char devio[] = "devio";
90 const char devwait[] = "devwait";
91 const char devin[] = "devin";
92 const char devout[] = "devout";
93 const char devioc[] = "devioc";
94 const char devcls[] = "devcls";
95
96 vnode_t *specfs_hash[SPECHSZ];
97
98 /*
99 * This vnode operations vector is used for special device nodes
100 * created from whole cloth by the kernel. For the ops vector for
101 * vnodes built from special devices found in a filesystem, see (e.g)
102 * ffs_specop_entries[] in ffs_vnops.c or the equivalent for other
103 * filesystems.
104 */
105
106 int (**spec_vnodeop_p)(void *);
107 const struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
108 { &vop_default_desc, vn_default_error },
109 { &vop_lookup_desc, spec_lookup }, /* lookup */
110 { &vop_create_desc, spec_create }, /* create */
111 { &vop_mknod_desc, spec_mknod }, /* mknod */
112 { &vop_open_desc, spec_open }, /* open */
113 { &vop_close_desc, spec_close }, /* close */
114 { &vop_access_desc, spec_access }, /* access */
115 { &vop_getattr_desc, spec_getattr }, /* getattr */
116 { &vop_setattr_desc, spec_setattr }, /* setattr */
117 { &vop_read_desc, spec_read }, /* read */
118 { &vop_write_desc, spec_write }, /* write */
119 { &vop_fcntl_desc, spec_fcntl }, /* fcntl */
120 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
121 { &vop_poll_desc, spec_poll }, /* poll */
122 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */
123 { &vop_revoke_desc, spec_revoke }, /* revoke */
124 { &vop_mmap_desc, spec_mmap }, /* mmap */
125 { &vop_fsync_desc, spec_fsync }, /* fsync */
126 { &vop_seek_desc, spec_seek }, /* seek */
127 { &vop_remove_desc, spec_remove }, /* remove */
128 { &vop_link_desc, spec_link }, /* link */
129 { &vop_rename_desc, spec_rename }, /* rename */
130 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
131 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
132 { &vop_symlink_desc, spec_symlink }, /* symlink */
133 { &vop_readdir_desc, spec_readdir }, /* readdir */
134 { &vop_readlink_desc, spec_readlink }, /* readlink */
135 { &vop_abortop_desc, spec_abortop }, /* abortop */
136 { &vop_inactive_desc, spec_inactive }, /* inactive */
137 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */
138 { &vop_lock_desc, spec_lock }, /* lock */
139 { &vop_unlock_desc, spec_unlock }, /* unlock */
140 { &vop_bmap_desc, spec_bmap }, /* bmap */
141 { &vop_strategy_desc, spec_strategy }, /* strategy */
142 { &vop_print_desc, spec_print }, /* print */
143 { &vop_islocked_desc, spec_islocked }, /* islocked */
144 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
145 { &vop_advlock_desc, spec_advlock }, /* advlock */
146 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */
147 { &vop_getpages_desc, spec_getpages }, /* getpages */
148 { &vop_putpages_desc, spec_putpages }, /* putpages */
149 { NULL, NULL }
150 };
151 const struct vnodeopv_desc spec_vnodeop_opv_desc =
152 { &spec_vnodeop_p, spec_vnodeop_entries };
153
154 static kauth_listener_t rawio_listener;
155
156 /* Returns true if vnode is /dev/mem or /dev/kmem. */
157 bool
158 iskmemvp(struct vnode *vp)
159 {
160 return ((vp->v_type == VCHR) && iskmemdev(vp->v_rdev));
161 }
162
163 /*
164 * Returns true if dev is /dev/mem or /dev/kmem.
165 */
166 int
167 iskmemdev(dev_t dev)
168 {
169 /* mem_no is emitted by config(8) to generated devsw.c */
170 extern const int mem_no;
171
172 /* minor 14 is /dev/io on i386 with COMPAT_10 */
173 return (major(dev) == mem_no && (minor(dev) < 2 || minor(dev) == 14));
174 }
175
176 static int
177 rawio_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
178 void *arg0, void *arg1, void *arg2, void *arg3)
179 {
180 int result;
181
182 result = KAUTH_RESULT_DEFER;
183
184 if ((action != KAUTH_DEVICE_RAWIO_SPEC) &&
185 (action != KAUTH_DEVICE_RAWIO_PASSTHRU))
186 return result;
187
188 /* Access is mandated by permissions. */
189 result = KAUTH_RESULT_ALLOW;
190
191 return result;
192 }
193
194 void
195 spec_init(void)
196 {
197
198 rawio_listener = kauth_listen_scope(KAUTH_SCOPE_DEVICE,
199 rawio_listener_cb, NULL);
200 }
201
202 /*
203 * Initialize a vnode that represents a device.
204 */
205 void
206 spec_node_init(vnode_t *vp, dev_t rdev)
207 {
208 specnode_t *sn;
209 specdev_t *sd;
210 vnode_t *vp2;
211 vnode_t **vpp;
212
213 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR);
214 KASSERT(vp->v_specnode == NULL);
215
216 /*
217 * Search the hash table for this device. If known, add a
218 * reference to the device structure. If not known, create
219 * a new entry to represent the device. In all cases add
220 * the vnode to the hash table.
221 */
222 sn = kmem_alloc(sizeof(*sn), KM_SLEEP);
223 if (sn == NULL) {
224 /* XXX */
225 panic("spec_node_init: unable to allocate memory");
226 }
227 sd = kmem_alloc(sizeof(*sd), KM_SLEEP);
228 if (sd == NULL) {
229 /* XXX */
230 panic("spec_node_init: unable to allocate memory");
231 }
232 mutex_enter(&device_lock);
233 vpp = &specfs_hash[SPECHASH(rdev)];
234 for (vp2 = *vpp; vp2 != NULL; vp2 = vp2->v_specnext) {
235 KASSERT(vp2->v_specnode != NULL);
236 if (rdev == vp2->v_rdev && vp->v_type == vp2->v_type) {
237 break;
238 }
239 }
240 if (vp2 == NULL) {
241 /* No existing record, create a new one. */
242 sd->sd_rdev = rdev;
243 sd->sd_mountpoint = NULL;
244 sd->sd_lockf = NULL;
245 sd->sd_refcnt = 1;
246 sd->sd_opencnt = 0;
247 sd->sd_bdevvp = NULL;
248 sn->sn_dev = sd;
249 sd = NULL;
250 } else {
251 /* Use the existing record. */
252 sn->sn_dev = vp2->v_specnode->sn_dev;
253 sn->sn_dev->sd_refcnt++;
254 }
255 /* Insert vnode into the hash chain. */
256 sn->sn_opencnt = 0;
257 sn->sn_rdev = rdev;
258 sn->sn_gone = false;
259 vp->v_specnode = sn;
260 vp->v_specnext = *vpp;
261 *vpp = vp;
262 mutex_exit(&device_lock);
263
264 /* Free the record we allocated if unused. */
265 if (sd != NULL) {
266 kmem_free(sd, sizeof(*sd));
267 }
268 }
269
270 /*
271 * A vnode representing a special device is going away. Close
272 * the device if the vnode holds it open.
273 */
274 void
275 spec_node_revoke(vnode_t *vp)
276 {
277 specnode_t *sn;
278 specdev_t *sd;
279
280 sn = vp->v_specnode;
281 sd = sn->sn_dev;
282
283 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR);
284 KASSERT(vp->v_specnode != NULL);
285 KASSERT((vp->v_iflag & VI_XLOCK) != 0);
286 KASSERT(sn->sn_gone == false);
287
288 mutex_enter(&device_lock);
289 KASSERT(sn->sn_opencnt <= sd->sd_opencnt);
290 if (sn->sn_opencnt != 0) {
291 sd->sd_opencnt -= (sn->sn_opencnt - 1);
292 sn->sn_opencnt = 1;
293 sn->sn_gone = true;
294 mutex_exit(&device_lock);
295
296 VOP_CLOSE(vp, FNONBLOCK, NOCRED);
297
298 mutex_enter(&device_lock);
299 KASSERT(sn->sn_opencnt == 0);
300 }
301 mutex_exit(&device_lock);
302 }
303
304 /*
305 * A vnode representing a special device is being recycled.
306 * Destroy the specfs component.
307 */
308 void
309 spec_node_destroy(vnode_t *vp)
310 {
311 specnode_t *sn;
312 specdev_t *sd;
313 vnode_t **vpp, *vp2;
314 int refcnt;
315
316 sn = vp->v_specnode;
317 sd = sn->sn_dev;
318
319 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR);
320 KASSERT(vp->v_specnode != NULL);
321 KASSERT(sn->sn_opencnt == 0);
322
323 mutex_enter(&device_lock);
324 /* Remove from the hash and destroy the node. */
325 vpp = &specfs_hash[SPECHASH(vp->v_rdev)];
326 for (vp2 = *vpp;; vp2 = vp2->v_specnext) {
327 if (vp2 == NULL) {
328 panic("spec_node_destroy: corrupt hash");
329 }
330 if (vp2 == vp) {
331 KASSERT(vp == *vpp);
332 *vpp = vp->v_specnext;
333 break;
334 }
335 if (vp2->v_specnext == vp) {
336 vp2->v_specnext = vp->v_specnext;
337 break;
338 }
339 }
340 sn = vp->v_specnode;
341 vp->v_specnode = NULL;
342 refcnt = sd->sd_refcnt--;
343 KASSERT(refcnt > 0);
344 mutex_exit(&device_lock);
345
346 /* If the device is no longer in use, destroy our record. */
347 if (refcnt == 1) {
348 KASSERT(sd->sd_opencnt == 0);
349 KASSERT(sd->sd_bdevvp == NULL);
350 kmem_free(sd, sizeof(*sd));
351 }
352 kmem_free(sn, sizeof(*sn));
353 }
354
355 /*
356 * Trivial lookup routine that always fails.
357 */
358 int
359 spec_lookup(void *v)
360 {
361 struct vop_lookup_args /* {
362 struct vnode *a_dvp;
363 struct vnode **a_vpp;
364 struct componentname *a_cnp;
365 } */ *ap = v;
366
367 *ap->a_vpp = NULL;
368 return (ENOTDIR);
369 }
370
371 /*
372 * Open a special file.
373 */
374 /* ARGSUSED */
375 int
376 spec_open(void *v)
377 {
378 struct vop_open_args /* {
379 struct vnode *a_vp;
380 int a_mode;
381 kauth_cred_t a_cred;
382 } */ *ap = v;
383 struct lwp *l;
384 struct vnode *vp;
385 dev_t dev;
386 int error;
387 struct partinfo pi;
388 enum kauth_device_req req;
389 specnode_t *sn;
390 specdev_t *sd;
391
392 u_int gen;
393 const char *name;
394
395 l = curlwp;
396 vp = ap->a_vp;
397 dev = vp->v_rdev;
398 sn = vp->v_specnode;
399 sd = sn->sn_dev;
400 name = NULL;
401 gen = 0;
402
403 /*
404 * Don't allow open if fs is mounted -nodev.
405 */
406 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
407 return (ENXIO);
408
409 switch (ap->a_mode & (FREAD | FWRITE)) {
410 case FREAD | FWRITE:
411 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_RW;
412 break;
413 case FWRITE:
414 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_WRITE;
415 break;
416 default:
417 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_READ;
418 break;
419 }
420
421 switch (vp->v_type) {
422 case VCHR:
423 error = kauth_authorize_device_spec(ap->a_cred, req, vp);
424 if (error != 0)
425 return (error);
426
427 /*
428 * Character devices can accept opens from multiple
429 * vnodes.
430 */
431 mutex_enter(&device_lock);
432 if (sn->sn_gone) {
433 mutex_exit(&device_lock);
434 return (EBADF);
435 }
436 sd->sd_opencnt++;
437 sn->sn_opencnt++;
438 mutex_exit(&device_lock);
439 if (cdev_type(dev) == D_TTY)
440 vp->v_vflag |= VV_ISTTY;
441 VOP_UNLOCK(vp);
442 do {
443 const struct cdevsw *cdev;
444
445 gen = module_gen;
446 error = cdev_open(dev, ap->a_mode, S_IFCHR, l);
447 if (error != ENXIO)
448 break;
449
450 /* Check if we already have a valid driver */
451 mutex_enter(&device_lock);
452 cdev = cdevsw_lookup(dev);
453 mutex_exit(&device_lock);
454 if (cdev != NULL)
455 break;
456
457 /* Get device name from devsw_conv array */
458 if ((name = cdevsw_getname(major(dev))) == NULL)
459 break;
460
461 /* Try to autoload device module */
462 (void) module_autoload(name, MODULE_CLASS_DRIVER);
463 } while (gen != module_gen);
464
465 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
466 break;
467
468 case VBLK:
469 error = kauth_authorize_device_spec(ap->a_cred, req, vp);
470 if (error != 0)
471 return (error);
472
473 /*
474 * For block devices, permit only one open. The buffer
475 * cache cannot remain self-consistent with multiple
476 * vnodes holding a block device open.
477 */
478 mutex_enter(&device_lock);
479 if (sn->sn_gone) {
480 mutex_exit(&device_lock);
481 return (EBADF);
482 }
483 if (sd->sd_opencnt != 0) {
484 mutex_exit(&device_lock);
485 return EBUSY;
486 }
487 sn->sn_opencnt = 1;
488 sd->sd_opencnt = 1;
489 sd->sd_bdevvp = vp;
490 mutex_exit(&device_lock);
491 do {
492 const struct bdevsw *bdev;
493
494 gen = module_gen;
495 error = bdev_open(dev, ap->a_mode, S_IFBLK, l);
496 if (error != ENXIO)
497 break;
498
499 /* Check if we already have a valid driver */
500 mutex_enter(&device_lock);
501 bdev = bdevsw_lookup(dev);
502 mutex_exit(&device_lock);
503 if (bdev != NULL)
504 break;
505
506 /* Get device name from devsw_conv array */
507 if ((name = bdevsw_getname(major(dev))) == NULL)
508 break;
509
510 VOP_UNLOCK(vp);
511
512 /* Try to autoload device module */
513 (void) module_autoload(name, MODULE_CLASS_DRIVER);
514
515 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
516 } while (gen != module_gen);
517
518 break;
519
520 case VNON:
521 case VLNK:
522 case VDIR:
523 case VREG:
524 case VBAD:
525 case VFIFO:
526 case VSOCK:
527 default:
528 return 0;
529 }
530
531 mutex_enter(&device_lock);
532 if (sn->sn_gone) {
533 if (error == 0)
534 error = EBADF;
535 } else if (error != 0) {
536 sd->sd_opencnt--;
537 sn->sn_opencnt--;
538 if (vp->v_type == VBLK)
539 sd->sd_bdevvp = NULL;
540
541 }
542 mutex_exit(&device_lock);
543
544 if (cdev_type(dev) != D_DISK || error != 0)
545 return error;
546
547 if (vp->v_type == VCHR)
548 error = cdev_ioctl(vp->v_rdev, DIOCGPART, &pi, FREAD, curlwp);
549 else
550 error = bdev_ioctl(vp->v_rdev, DIOCGPART, &pi, FREAD, curlwp);
551 if (error == 0)
552 uvm_vnp_setsize(vp,
553 (voff_t)pi.disklab->d_secsize * pi.part->p_size);
554 return 0;
555 }
556
557 /*
558 * Vnode op for read
559 */
560 /* ARGSUSED */
561 int
562 spec_read(void *v)
563 {
564 struct vop_read_args /* {
565 struct vnode *a_vp;
566 struct uio *a_uio;
567 int a_ioflag;
568 kauth_cred_t a_cred;
569 } */ *ap = v;
570 struct vnode *vp = ap->a_vp;
571 struct uio *uio = ap->a_uio;
572 struct lwp *l = curlwp;
573 struct buf *bp;
574 daddr_t bn;
575 int bsize, bscale;
576 struct partinfo dpart;
577 int n, on;
578 int error = 0;
579
580 #ifdef DIAGNOSTIC
581 if (uio->uio_rw != UIO_READ)
582 panic("spec_read mode");
583 if (&uio->uio_vmspace->vm_map != kernel_map &&
584 uio->uio_vmspace != curproc->p_vmspace)
585 panic("spec_read proc");
586 #endif
587 if (uio->uio_resid == 0)
588 return (0);
589
590 switch (vp->v_type) {
591
592 case VCHR:
593 VOP_UNLOCK(vp);
594 error = cdev_read(vp->v_rdev, uio, ap->a_ioflag);
595 vn_lock(vp, LK_SHARED | LK_RETRY);
596 return (error);
597
598 case VBLK:
599 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
600 if (uio->uio_offset < 0)
601 return (EINVAL);
602 bsize = BLKDEV_IOSIZE;
603 if (bdev_ioctl(vp->v_rdev, DIOCGPART, &dpart, FREAD, l) == 0) {
604 if (dpart.part->p_fstype == FS_BSDFFS &&
605 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
606 bsize = dpart.part->p_frag *
607 dpart.part->p_fsize;
608 }
609 bscale = bsize >> DEV_BSHIFT;
610 do {
611 bn = (uio->uio_offset >> DEV_BSHIFT) &~ (bscale - 1);
612 on = uio->uio_offset % bsize;
613 n = min((unsigned)(bsize - on), uio->uio_resid);
614 error = bread(vp, bn, bsize, NOCRED, 0, &bp);
615 if (error) {
616 return (error);
617 }
618 n = min(n, bsize - bp->b_resid);
619 error = uiomove((char *)bp->b_data + on, n, uio);
620 brelse(bp, 0);
621 } while (error == 0 && uio->uio_resid > 0 && n != 0);
622 return (error);
623
624 default:
625 panic("spec_read type");
626 }
627 /* NOTREACHED */
628 }
629
630 /*
631 * Vnode op for write
632 */
633 /* ARGSUSED */
634 int
635 spec_write(void *v)
636 {
637 struct vop_write_args /* {
638 struct vnode *a_vp;
639 struct uio *a_uio;
640 int a_ioflag;
641 kauth_cred_t a_cred;
642 } */ *ap = v;
643 struct vnode *vp = ap->a_vp;
644 struct uio *uio = ap->a_uio;
645 struct lwp *l = curlwp;
646 struct buf *bp;
647 daddr_t bn;
648 int bsize, bscale;
649 struct partinfo dpart;
650 int n, on;
651 int error = 0;
652
653 #ifdef DIAGNOSTIC
654 if (uio->uio_rw != UIO_WRITE)
655 panic("spec_write mode");
656 if (&uio->uio_vmspace->vm_map != kernel_map &&
657 uio->uio_vmspace != curproc->p_vmspace)
658 panic("spec_write proc");
659 #endif
660
661 switch (vp->v_type) {
662
663 case VCHR:
664 VOP_UNLOCK(vp);
665 error = cdev_write(vp->v_rdev, uio, ap->a_ioflag);
666 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
667 return (error);
668
669 case VBLK:
670 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
671 if (uio->uio_resid == 0)
672 return (0);
673 if (uio->uio_offset < 0)
674 return (EINVAL);
675 bsize = BLKDEV_IOSIZE;
676 if (bdev_ioctl(vp->v_rdev, DIOCGPART, &dpart, FREAD, l) == 0) {
677 if (dpart.part->p_fstype == FS_BSDFFS &&
678 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
679 bsize = dpart.part->p_frag *
680 dpart.part->p_fsize;
681 }
682 bscale = bsize >> DEV_BSHIFT;
683 do {
684 bn = (uio->uio_offset >> DEV_BSHIFT) &~ (bscale - 1);
685 on = uio->uio_offset % bsize;
686 n = min((unsigned)(bsize - on), uio->uio_resid);
687 if (n == bsize)
688 bp = getblk(vp, bn, bsize, 0, 0);
689 else
690 error = bread(vp, bn, bsize, NOCRED,
691 B_MODIFY, &bp);
692 if (error) {
693 return (error);
694 }
695 n = min(n, bsize - bp->b_resid);
696 error = uiomove((char *)bp->b_data + on, n, uio);
697 if (error)
698 brelse(bp, 0);
699 else {
700 if (n + on == bsize)
701 bawrite(bp);
702 else
703 bdwrite(bp);
704 error = bp->b_error;
705 }
706 } while (error == 0 && uio->uio_resid > 0 && n != 0);
707 return (error);
708
709 default:
710 panic("spec_write type");
711 }
712 /* NOTREACHED */
713 }
714
715 /*
716 * Device ioctl operation.
717 */
718 /* ARGSUSED */
719 int
720 spec_ioctl(void *v)
721 {
722 struct vop_ioctl_args /* {
723 struct vnode *a_vp;
724 u_long a_command;
725 void *a_data;
726 int a_fflag;
727 kauth_cred_t a_cred;
728 } */ *ap = v;
729 struct vnode *vp;
730 dev_t dev;
731
732 /*
733 * Extract all the info we need from the vnode, taking care to
734 * avoid a race with VOP_REVOKE().
735 */
736
737 vp = ap->a_vp;
738 dev = NODEV;
739 mutex_enter(vp->v_interlock);
740 if ((vp->v_iflag & VI_XLOCK) == 0 && vp->v_specnode) {
741 dev = vp->v_rdev;
742 }
743 mutex_exit(vp->v_interlock);
744 if (dev == NODEV) {
745 return ENXIO;
746 }
747
748 switch (vp->v_type) {
749
750 case VCHR:
751 return cdev_ioctl(dev, ap->a_command, ap->a_data,
752 ap->a_fflag, curlwp);
753
754 case VBLK:
755 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
756 return bdev_ioctl(dev, ap->a_command, ap->a_data,
757 ap->a_fflag, curlwp);
758
759 default:
760 panic("spec_ioctl");
761 /* NOTREACHED */
762 }
763 }
764
765 /* ARGSUSED */
766 int
767 spec_poll(void *v)
768 {
769 struct vop_poll_args /* {
770 struct vnode *a_vp;
771 int a_events;
772 } */ *ap = v;
773 struct vnode *vp;
774 dev_t dev;
775
776 /*
777 * Extract all the info we need from the vnode, taking care to
778 * avoid a race with VOP_REVOKE().
779 */
780
781 vp = ap->a_vp;
782 dev = NODEV;
783 mutex_enter(vp->v_interlock);
784 if ((vp->v_iflag & VI_XLOCK) == 0 && vp->v_specnode) {
785 dev = vp->v_rdev;
786 }
787 mutex_exit(vp->v_interlock);
788 if (dev == NODEV) {
789 return POLLERR;
790 }
791
792 switch (vp->v_type) {
793
794 case VCHR:
795 return cdev_poll(dev, ap->a_events, curlwp);
796
797 default:
798 return (genfs_poll(v));
799 }
800 }
801
802 /* ARGSUSED */
803 int
804 spec_kqfilter(void *v)
805 {
806 struct vop_kqfilter_args /* {
807 struct vnode *a_vp;
808 struct proc *a_kn;
809 } */ *ap = v;
810 dev_t dev;
811
812 switch (ap->a_vp->v_type) {
813
814 case VCHR:
815 dev = ap->a_vp->v_rdev;
816 return cdev_kqfilter(dev, ap->a_kn);
817 default:
818 /*
819 * Block devices don't support kqfilter, and refuse it
820 * for any other files (like those vflush()ed) too.
821 */
822 return (EOPNOTSUPP);
823 }
824 }
825
826 /*
827 * Allow mapping of only D_DISK. This is called only for VBLK.
828 */
829 int
830 spec_mmap(void *v)
831 {
832 struct vop_mmap_args /* {
833 struct vnode *a_vp;
834 vm_prot_t a_prot;
835 kauth_cred_t a_cred;
836 } */ *ap = v;
837 struct vnode *vp = ap->a_vp;
838
839 KASSERT(vp->v_type == VBLK);
840 if (bdev_type(vp->v_rdev) != D_DISK)
841 return EINVAL;
842
843 return 0;
844 }
845
846 /*
847 * Synch buffers associated with a block device
848 */
849 /* ARGSUSED */
850 int
851 spec_fsync(void *v)
852 {
853 struct vop_fsync_args /* {
854 struct vnode *a_vp;
855 kauth_cred_t a_cred;
856 int a_flags;
857 off_t offlo;
858 off_t offhi;
859 } */ *ap = v;
860 struct vnode *vp = ap->a_vp;
861 struct mount *mp;
862 int error;
863
864 if (vp->v_type == VBLK) {
865 if ((mp = vp->v_specmountpoint) != NULL) {
866 error = VFS_FSYNC(mp, vp, ap->a_flags);
867 if (error != EOPNOTSUPP)
868 return error;
869 }
870 return vflushbuf(vp, ap->a_flags);
871 }
872 return (0);
873 }
874
875 /*
876 * Just call the device strategy routine
877 */
878 int
879 spec_strategy(void *v)
880 {
881 struct vop_strategy_args /* {
882 struct vnode *a_vp;
883 struct buf *a_bp;
884 } */ *ap = v;
885 struct vnode *vp = ap->a_vp;
886 struct buf *bp = ap->a_bp;
887 int error;
888
889 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
890
891 error = 0;
892 bp->b_dev = vp->v_rdev;
893
894 if (!(bp->b_flags & B_READ))
895 error = fscow_run(bp, false);
896
897 if (error) {
898 bp->b_error = error;
899 biodone(bp);
900 return (error);
901 }
902
903 bdev_strategy(bp);
904
905 return (0);
906 }
907
908 int
909 spec_inactive(void *v)
910 {
911 struct vop_inactive_args /* {
912 struct vnode *a_vp;
913 struct proc *a_l;
914 } */ *ap = v;
915
916 VOP_UNLOCK(ap->a_vp);
917 return (0);
918 }
919
920 /*
921 * This is a noop, simply returning what one has been given.
922 */
923 int
924 spec_bmap(void *v)
925 {
926 struct vop_bmap_args /* {
927 struct vnode *a_vp;
928 daddr_t a_bn;
929 struct vnode **a_vpp;
930 daddr_t *a_bnp;
931 int *a_runp;
932 } */ *ap = v;
933
934 if (ap->a_vpp != NULL)
935 *ap->a_vpp = ap->a_vp;
936 if (ap->a_bnp != NULL)
937 *ap->a_bnp = ap->a_bn;
938 if (ap->a_runp != NULL)
939 *ap->a_runp = (MAXBSIZE >> DEV_BSHIFT) - 1;
940 return (0);
941 }
942
943 /*
944 * Device close routine
945 */
946 /* ARGSUSED */
947 int
948 spec_close(void *v)
949 {
950 struct vop_close_args /* {
951 struct vnode *a_vp;
952 int a_fflag;
953 kauth_cred_t a_cred;
954 } */ *ap = v;
955 struct vnode *vp = ap->a_vp;
956 struct session *sess;
957 dev_t dev = vp->v_rdev;
958 int mode, error, flags, flags1, count;
959 specnode_t *sn;
960 specdev_t *sd;
961
962 flags = vp->v_iflag;
963 sn = vp->v_specnode;
964 sd = sn->sn_dev;
965
966 switch (vp->v_type) {
967
968 case VCHR:
969 /*
970 * Hack: a tty device that is a controlling terminal
971 * has a reference from the session structure. We
972 * cannot easily tell that a character device is a
973 * controlling terminal, unless it is the closing
974 * process' controlling terminal. In that case, if the
975 * open count is 1 release the reference from the
976 * session. Also, remove the link from the tty back to
977 * the session and pgrp.
978 *
979 * XXX V. fishy.
980 */
981 mutex_enter(proc_lock);
982 sess = curlwp->l_proc->p_session;
983 if (sn->sn_opencnt == 1 && vp == sess->s_ttyvp) {
984 mutex_spin_enter(&tty_lock);
985 sess->s_ttyvp = NULL;
986 if (sess->s_ttyp->t_session != NULL) {
987 sess->s_ttyp->t_pgrp = NULL;
988 sess->s_ttyp->t_session = NULL;
989 mutex_spin_exit(&tty_lock);
990 /* Releases proc_lock. */
991 proc_sessrele(sess);
992 } else {
993 mutex_spin_exit(&tty_lock);
994 if (sess->s_ttyp->t_pgrp != NULL)
995 panic("spec_close: spurious pgrp ref");
996 mutex_exit(proc_lock);
997 }
998 vrele(vp);
999 } else
1000 mutex_exit(proc_lock);
1001
1002 /*
1003 * If the vnode is locked, then we are in the midst
1004 * of forcably closing the device, otherwise we only
1005 * close on last reference.
1006 */
1007 mode = S_IFCHR;
1008 break;
1009
1010 case VBLK:
1011 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
1012 /*
1013 * On last close of a block device (that isn't mounted)
1014 * we must invalidate any in core blocks, so that
1015 * we can, for instance, change floppy disks.
1016 */
1017 error = vinvalbuf(vp, V_SAVE, ap->a_cred, curlwp, 0, 0);
1018 if (error)
1019 return (error);
1020 /*
1021 * We do not want to really close the device if it
1022 * is still in use unless we are trying to close it
1023 * forcibly. Since every use (buffer, vnode, swap, cmap)
1024 * holds a reference to the vnode, and because we mark
1025 * any other vnodes that alias this device, when the
1026 * sum of the reference counts on all the aliased
1027 * vnodes descends to one, we are on last close.
1028 */
1029 mode = S_IFBLK;
1030 break;
1031
1032 default:
1033 panic("spec_close: not special");
1034 }
1035
1036 mutex_enter(&device_lock);
1037 sn->sn_opencnt--;
1038 count = --sd->sd_opencnt;
1039 if (vp->v_type == VBLK)
1040 sd->sd_bdevvp = NULL;
1041 mutex_exit(&device_lock);
1042
1043 if (count != 0)
1044 return 0;
1045
1046 flags1 = ap->a_fflag;
1047
1048 /*
1049 * if VI_XLOCK is set, then we're going away soon, so make this
1050 * non-blocking. Also ensures that we won't wedge in vn_lock below.
1051 */
1052 if (flags & VI_XLOCK)
1053 flags1 |= FNONBLOCK;
1054
1055 /*
1056 * If we're able to block, release the vnode lock & reacquire. We
1057 * might end up sleeping for someone else who wants our queues. They
1058 * won't get them if we hold the vnode locked. Also, if VI_XLOCK is
1059 * set, don't release the lock as we won't be able to regain it.
1060 */
1061 if (!(flags1 & FNONBLOCK))
1062 VOP_UNLOCK(vp);
1063
1064 if (vp->v_type == VBLK)
1065 error = bdev_close(dev, flags1, mode, curlwp);
1066 else
1067 error = cdev_close(dev, flags1, mode, curlwp);
1068
1069 if (!(flags1 & FNONBLOCK))
1070 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1071
1072 return (error);
1073 }
1074
1075 /*
1076 * Print out the contents of a special device vnode.
1077 */
1078 int
1079 spec_print(void *v)
1080 {
1081 struct vop_print_args /* {
1082 struct vnode *a_vp;
1083 } */ *ap = v;
1084
1085 printf("dev %llu, %llu\n", (unsigned long long)major(ap->a_vp->v_rdev),
1086 (unsigned long long)minor(ap->a_vp->v_rdev));
1087 return 0;
1088 }
1089
1090 /*
1091 * Return POSIX pathconf information applicable to special devices.
1092 */
1093 int
1094 spec_pathconf(void *v)
1095 {
1096 struct vop_pathconf_args /* {
1097 struct vnode *a_vp;
1098 int a_name;
1099 register_t *a_retval;
1100 } */ *ap = v;
1101
1102 switch (ap->a_name) {
1103 case _PC_LINK_MAX:
1104 *ap->a_retval = LINK_MAX;
1105 return (0);
1106 case _PC_MAX_CANON:
1107 *ap->a_retval = MAX_CANON;
1108 return (0);
1109 case _PC_MAX_INPUT:
1110 *ap->a_retval = MAX_INPUT;
1111 return (0);
1112 case _PC_PIPE_BUF:
1113 *ap->a_retval = PIPE_BUF;
1114 return (0);
1115 case _PC_CHOWN_RESTRICTED:
1116 *ap->a_retval = 1;
1117 return (0);
1118 case _PC_VDISABLE:
1119 *ap->a_retval = _POSIX_VDISABLE;
1120 return (0);
1121 case _PC_SYNC_IO:
1122 *ap->a_retval = 1;
1123 return (0);
1124 default:
1125 return (EINVAL);
1126 }
1127 /* NOTREACHED */
1128 }
1129
1130 /*
1131 * Advisory record locking support.
1132 */
1133 int
1134 spec_advlock(void *v)
1135 {
1136 struct vop_advlock_args /* {
1137 struct vnode *a_vp;
1138 void *a_id;
1139 int a_op;
1140 struct flock *a_fl;
1141 int a_flags;
1142 } */ *ap = v;
1143 struct vnode *vp = ap->a_vp;
1144
1145 return lf_advlock(ap, &vp->v_speclockf, (off_t)0);
1146 }
1147