spec_vnops.c revision 1.142 1 /* $NetBSD: spec_vnops.c,v 1.142 2014/02/07 15:29:22 hannken Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. Neither the name of the University nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 *
57 * @(#)spec_vnops.c 8.15 (Berkeley) 7/14/95
58 */
59
60 #include <sys/cdefs.h>
61 __KERNEL_RCSID(0, "$NetBSD: spec_vnops.c,v 1.142 2014/02/07 15:29:22 hannken Exp $");
62
63 #include <sys/param.h>
64 #include <sys/proc.h>
65 #include <sys/systm.h>
66 #include <sys/kernel.h>
67 #include <sys/conf.h>
68 #include <sys/buf.h>
69 #include <sys/mount.h>
70 #include <sys/namei.h>
71 #include <sys/vnode.h>
72 #include <sys/stat.h>
73 #include <sys/errno.h>
74 #include <sys/ioctl.h>
75 #include <sys/poll.h>
76 #include <sys/file.h>
77 #include <sys/disklabel.h>
78 #include <sys/lockf.h>
79 #include <sys/tty.h>
80 #include <sys/kauth.h>
81 #include <sys/fstrans.h>
82 #include <sys/module.h>
83
84 #include <miscfs/genfs/genfs.h>
85 #include <miscfs/specfs/specdev.h>
86
87 /* symbolic sleep message strings for devices */
88 const char devopn[] = "devopn";
89 const char devio[] = "devio";
90 const char devwait[] = "devwait";
91 const char devin[] = "devin";
92 const char devout[] = "devout";
93 const char devioc[] = "devioc";
94 const char devcls[] = "devcls";
95
96 #define SPECHSZ 64
97 #if ((SPECHSZ&(SPECHSZ-1)) == 0)
98 #define SPECHASH(rdev) (((rdev>>5)+(rdev))&(SPECHSZ-1))
99 #else
100 #define SPECHASH(rdev) (((unsigned)((rdev>>5)+(rdev)))%SPECHSZ)
101 #endif
102
103 static vnode_t *specfs_hash[SPECHSZ];
104
105 /*
106 * This vnode operations vector is used for special device nodes
107 * created from whole cloth by the kernel. For the ops vector for
108 * vnodes built from special devices found in a filesystem, see (e.g)
109 * ffs_specop_entries[] in ffs_vnops.c or the equivalent for other
110 * filesystems.
111 */
112
113 int (**spec_vnodeop_p)(void *);
114 const struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
115 { &vop_default_desc, vn_default_error },
116 { &vop_lookup_desc, spec_lookup }, /* lookup */
117 { &vop_create_desc, spec_create }, /* create */
118 { &vop_mknod_desc, spec_mknod }, /* mknod */
119 { &vop_open_desc, spec_open }, /* open */
120 { &vop_close_desc, spec_close }, /* close */
121 { &vop_access_desc, spec_access }, /* access */
122 { &vop_getattr_desc, spec_getattr }, /* getattr */
123 { &vop_setattr_desc, spec_setattr }, /* setattr */
124 { &vop_read_desc, spec_read }, /* read */
125 { &vop_write_desc, spec_write }, /* write */
126 { &vop_fcntl_desc, spec_fcntl }, /* fcntl */
127 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
128 { &vop_poll_desc, spec_poll }, /* poll */
129 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */
130 { &vop_revoke_desc, spec_revoke }, /* revoke */
131 { &vop_mmap_desc, spec_mmap }, /* mmap */
132 { &vop_fsync_desc, spec_fsync }, /* fsync */
133 { &vop_seek_desc, spec_seek }, /* seek */
134 { &vop_remove_desc, spec_remove }, /* remove */
135 { &vop_link_desc, spec_link }, /* link */
136 { &vop_rename_desc, spec_rename }, /* rename */
137 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
138 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
139 { &vop_symlink_desc, spec_symlink }, /* symlink */
140 { &vop_readdir_desc, spec_readdir }, /* readdir */
141 { &vop_readlink_desc, spec_readlink }, /* readlink */
142 { &vop_abortop_desc, spec_abortop }, /* abortop */
143 { &vop_inactive_desc, spec_inactive }, /* inactive */
144 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */
145 { &vop_lock_desc, spec_lock }, /* lock */
146 { &vop_unlock_desc, spec_unlock }, /* unlock */
147 { &vop_bmap_desc, spec_bmap }, /* bmap */
148 { &vop_strategy_desc, spec_strategy }, /* strategy */
149 { &vop_print_desc, spec_print }, /* print */
150 { &vop_islocked_desc, spec_islocked }, /* islocked */
151 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
152 { &vop_advlock_desc, spec_advlock }, /* advlock */
153 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */
154 { &vop_getpages_desc, spec_getpages }, /* getpages */
155 { &vop_putpages_desc, spec_putpages }, /* putpages */
156 { NULL, NULL }
157 };
158 const struct vnodeopv_desc spec_vnodeop_opv_desc =
159 { &spec_vnodeop_p, spec_vnodeop_entries };
160
161 static kauth_listener_t rawio_listener;
162
163 /* Returns true if vnode is /dev/mem or /dev/kmem. */
164 bool
165 iskmemvp(struct vnode *vp)
166 {
167 return ((vp->v_type == VCHR) && iskmemdev(vp->v_rdev));
168 }
169
170 /*
171 * Returns true if dev is /dev/mem or /dev/kmem.
172 */
173 int
174 iskmemdev(dev_t dev)
175 {
176 /* mem_no is emitted by config(8) to generated devsw.c */
177 extern const int mem_no;
178
179 /* minor 14 is /dev/io on i386 with COMPAT_10 */
180 return (major(dev) == mem_no && (minor(dev) < 2 || minor(dev) == 14));
181 }
182
183 static int
184 rawio_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
185 void *arg0, void *arg1, void *arg2, void *arg3)
186 {
187 int result;
188
189 result = KAUTH_RESULT_DEFER;
190
191 if ((action != KAUTH_DEVICE_RAWIO_SPEC) &&
192 (action != KAUTH_DEVICE_RAWIO_PASSTHRU))
193 return result;
194
195 /* Access is mandated by permissions. */
196 result = KAUTH_RESULT_ALLOW;
197
198 return result;
199 }
200
201 void
202 spec_init(void)
203 {
204
205 rawio_listener = kauth_listen_scope(KAUTH_SCOPE_DEVICE,
206 rawio_listener_cb, NULL);
207 }
208
209 /*
210 * Initialize a vnode that represents a device.
211 */
212 void
213 spec_node_init(vnode_t *vp, dev_t rdev)
214 {
215 specnode_t *sn;
216 specdev_t *sd;
217 vnode_t *vp2;
218 vnode_t **vpp;
219
220 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR);
221 KASSERT(vp->v_specnode == NULL);
222
223 /*
224 * Search the hash table for this device. If known, add a
225 * reference to the device structure. If not known, create
226 * a new entry to represent the device. In all cases add
227 * the vnode to the hash table.
228 */
229 sn = kmem_alloc(sizeof(*sn), KM_SLEEP);
230 if (sn == NULL) {
231 /* XXX */
232 panic("spec_node_init: unable to allocate memory");
233 }
234 sd = kmem_alloc(sizeof(*sd), KM_SLEEP);
235 if (sd == NULL) {
236 /* XXX */
237 panic("spec_node_init: unable to allocate memory");
238 }
239 mutex_enter(&device_lock);
240 vpp = &specfs_hash[SPECHASH(rdev)];
241 for (vp2 = *vpp; vp2 != NULL; vp2 = vp2->v_specnext) {
242 KASSERT(vp2->v_specnode != NULL);
243 if (rdev == vp2->v_rdev && vp->v_type == vp2->v_type) {
244 break;
245 }
246 }
247 if (vp2 == NULL) {
248 /* No existing record, create a new one. */
249 sd->sd_rdev = rdev;
250 sd->sd_mountpoint = NULL;
251 sd->sd_lockf = NULL;
252 sd->sd_refcnt = 1;
253 sd->sd_opencnt = 0;
254 sd->sd_bdevvp = NULL;
255 sn->sn_dev = sd;
256 sd = NULL;
257 } else {
258 /* Use the existing record. */
259 sn->sn_dev = vp2->v_specnode->sn_dev;
260 sn->sn_dev->sd_refcnt++;
261 }
262 /* Insert vnode into the hash chain. */
263 sn->sn_opencnt = 0;
264 sn->sn_rdev = rdev;
265 sn->sn_gone = false;
266 vp->v_specnode = sn;
267 vp->v_specnext = *vpp;
268 *vpp = vp;
269 mutex_exit(&device_lock);
270
271 /* Free the record we allocated if unused. */
272 if (sd != NULL) {
273 kmem_free(sd, sizeof(*sd));
274 }
275 }
276
277 /*
278 * Lookup a vnode by device number and return it referenced.
279 */
280 int
281 spec_node_lookup_by_dev(enum vtype type, dev_t dev, vnode_t **vpp)
282 {
283 int error;
284 vnode_t *vp;
285
286 mutex_enter(&device_lock);
287 for (vp = specfs_hash[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
288 if (type == vp->v_type && dev == vp->v_rdev) {
289 mutex_enter(vp->v_interlock);
290 /* If clean or being cleaned, then ignore it. */
291 if ((vp->v_iflag & (VI_CLEAN | VI_XLOCK)) == 0)
292 break;
293 mutex_exit(vp->v_interlock);
294 }
295 }
296 KASSERT(vp == NULL || mutex_owned(vp->v_interlock));
297 if (vp == NULL) {
298 mutex_exit(&device_lock);
299 return ENOENT;
300 }
301 /*
302 * If it is an opened block device return the opened vnode.
303 */
304 if (type == VBLK && vp->v_specnode->sn_dev->sd_bdevvp != NULL) {
305 mutex_exit(vp->v_interlock);
306 vp = vp->v_specnode->sn_dev->sd_bdevvp;
307 mutex_enter(vp->v_interlock);
308 }
309 mutex_exit(&device_lock);
310 error = vget(vp, 0);
311 if (error != 0)
312 return error;
313 *vpp = vp;
314
315 return 0;
316 }
317
318 /*
319 * Lookup a vnode by file system mounted on and return it referenced.
320 */
321 int
322 spec_node_lookup_by_mount(struct mount *mp, vnode_t **vpp)
323 {
324 int i, error;
325 vnode_t *vp, *vq;
326
327 mutex_enter(&device_lock);
328 for (i = 0, vq = NULL; i < SPECHSZ && vq == NULL; i++) {
329 for (vp = specfs_hash[i]; vp; vp = vp->v_specnext) {
330 if (vp->v_type != VBLK)
331 continue;
332 vq = vp->v_specnode->sn_dev->sd_bdevvp;
333 if (vq != NULL &&
334 vq->v_specnode->sn_dev->sd_mountpoint == mp)
335 break;
336 vq = NULL;
337 }
338 }
339 if (vq == NULL) {
340 mutex_exit(&device_lock);
341 return ENOENT;
342 }
343 mutex_enter(vq->v_interlock);
344 mutex_exit(&device_lock);
345 error = vget(vq, 0);
346 if (error != 0)
347 return error;
348 *vpp = vq;
349
350 return 0;
351
352 }
353
354 /*
355 * Get the file system mounted on this block device.
356 */
357 struct mount *
358 spec_node_getmountedfs(vnode_t *devvp)
359 {
360 struct mount *mp;
361
362 KASSERT(devvp->v_type == VBLK);
363 mp = devvp->v_specnode->sn_dev->sd_mountpoint;
364
365 return mp;
366 }
367
368 /*
369 * Set the file system mounted on this block device.
370 */
371 void
372 spec_node_setmountedfs(vnode_t *devvp, struct mount *mp)
373 {
374
375 KASSERT(devvp->v_type == VBLK);
376 KASSERT(devvp->v_specnode->sn_dev->sd_mountpoint == NULL || mp == NULL);
377 devvp->v_specnode->sn_dev->sd_mountpoint = mp;
378 }
379
380 /*
381 * A vnode representing a special device is going away. Close
382 * the device if the vnode holds it open.
383 */
384 void
385 spec_node_revoke(vnode_t *vp)
386 {
387 specnode_t *sn;
388 specdev_t *sd;
389
390 sn = vp->v_specnode;
391 sd = sn->sn_dev;
392
393 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR);
394 KASSERT(vp->v_specnode != NULL);
395 KASSERT((vp->v_iflag & VI_XLOCK) != 0);
396 KASSERT(sn->sn_gone == false);
397
398 mutex_enter(&device_lock);
399 KASSERT(sn->sn_opencnt <= sd->sd_opencnt);
400 if (sn->sn_opencnt != 0) {
401 sd->sd_opencnt -= (sn->sn_opencnt - 1);
402 sn->sn_opencnt = 1;
403 sn->sn_gone = true;
404 mutex_exit(&device_lock);
405
406 VOP_CLOSE(vp, FNONBLOCK, NOCRED);
407
408 mutex_enter(&device_lock);
409 KASSERT(sn->sn_opencnt == 0);
410 }
411 mutex_exit(&device_lock);
412 }
413
414 /*
415 * A vnode representing a special device is being recycled.
416 * Destroy the specfs component.
417 */
418 void
419 spec_node_destroy(vnode_t *vp)
420 {
421 specnode_t *sn;
422 specdev_t *sd;
423 vnode_t **vpp, *vp2;
424 int refcnt;
425
426 sn = vp->v_specnode;
427 sd = sn->sn_dev;
428
429 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR);
430 KASSERT(vp->v_specnode != NULL);
431 KASSERT(sn->sn_opencnt == 0);
432
433 mutex_enter(&device_lock);
434 /* Remove from the hash and destroy the node. */
435 vpp = &specfs_hash[SPECHASH(vp->v_rdev)];
436 for (vp2 = *vpp;; vp2 = vp2->v_specnext) {
437 if (vp2 == NULL) {
438 panic("spec_node_destroy: corrupt hash");
439 }
440 if (vp2 == vp) {
441 KASSERT(vp == *vpp);
442 *vpp = vp->v_specnext;
443 break;
444 }
445 if (vp2->v_specnext == vp) {
446 vp2->v_specnext = vp->v_specnext;
447 break;
448 }
449 }
450 sn = vp->v_specnode;
451 vp->v_specnode = NULL;
452 refcnt = sd->sd_refcnt--;
453 KASSERT(refcnt > 0);
454 mutex_exit(&device_lock);
455
456 /* If the device is no longer in use, destroy our record. */
457 if (refcnt == 1) {
458 KASSERT(sd->sd_opencnt == 0);
459 KASSERT(sd->sd_bdevvp == NULL);
460 kmem_free(sd, sizeof(*sd));
461 }
462 kmem_free(sn, sizeof(*sn));
463 }
464
465 /*
466 * Trivial lookup routine that always fails.
467 */
468 int
469 spec_lookup(void *v)
470 {
471 struct vop_lookup_v2_args /* {
472 struct vnode *a_dvp;
473 struct vnode **a_vpp;
474 struct componentname *a_cnp;
475 } */ *ap = v;
476
477 *ap->a_vpp = NULL;
478 return (ENOTDIR);
479 }
480
481 /*
482 * Open a special file.
483 */
484 /* ARGSUSED */
485 int
486 spec_open(void *v)
487 {
488 struct vop_open_args /* {
489 struct vnode *a_vp;
490 int a_mode;
491 kauth_cred_t a_cred;
492 } */ *ap = v;
493 struct lwp *l;
494 struct vnode *vp;
495 dev_t dev;
496 int error;
497 struct partinfo pi;
498 enum kauth_device_req req;
499 specnode_t *sn;
500 specdev_t *sd;
501
502 u_int gen;
503 const char *name;
504
505 l = curlwp;
506 vp = ap->a_vp;
507 dev = vp->v_rdev;
508 sn = vp->v_specnode;
509 sd = sn->sn_dev;
510 name = NULL;
511 gen = 0;
512
513 /*
514 * Don't allow open if fs is mounted -nodev.
515 */
516 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
517 return (ENXIO);
518
519 switch (ap->a_mode & (FREAD | FWRITE)) {
520 case FREAD | FWRITE:
521 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_RW;
522 break;
523 case FWRITE:
524 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_WRITE;
525 break;
526 default:
527 req = KAUTH_REQ_DEVICE_RAWIO_SPEC_READ;
528 break;
529 }
530
531 switch (vp->v_type) {
532 case VCHR:
533 error = kauth_authorize_device_spec(ap->a_cred, req, vp);
534 if (error != 0)
535 return (error);
536
537 /*
538 * Character devices can accept opens from multiple
539 * vnodes.
540 */
541 mutex_enter(&device_lock);
542 if (sn->sn_gone) {
543 mutex_exit(&device_lock);
544 return (EBADF);
545 }
546 sd->sd_opencnt++;
547 sn->sn_opencnt++;
548 mutex_exit(&device_lock);
549 if (cdev_type(dev) == D_TTY)
550 vp->v_vflag |= VV_ISTTY;
551 VOP_UNLOCK(vp);
552 do {
553 const struct cdevsw *cdev;
554
555 gen = module_gen;
556 error = cdev_open(dev, ap->a_mode, S_IFCHR, l);
557 if (error != ENXIO)
558 break;
559
560 /* Check if we already have a valid driver */
561 mutex_enter(&device_lock);
562 cdev = cdevsw_lookup(dev);
563 mutex_exit(&device_lock);
564 if (cdev != NULL)
565 break;
566
567 /* Get device name from devsw_conv array */
568 if ((name = cdevsw_getname(major(dev))) == NULL)
569 break;
570
571 /* Try to autoload device module */
572 (void) module_autoload(name, MODULE_CLASS_DRIVER);
573 } while (gen != module_gen);
574
575 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
576 break;
577
578 case VBLK:
579 error = kauth_authorize_device_spec(ap->a_cred, req, vp);
580 if (error != 0)
581 return (error);
582
583 /*
584 * For block devices, permit only one open. The buffer
585 * cache cannot remain self-consistent with multiple
586 * vnodes holding a block device open.
587 */
588 mutex_enter(&device_lock);
589 if (sn->sn_gone) {
590 mutex_exit(&device_lock);
591 return (EBADF);
592 }
593 if (sd->sd_opencnt != 0) {
594 mutex_exit(&device_lock);
595 return EBUSY;
596 }
597 sn->sn_opencnt = 1;
598 sd->sd_opencnt = 1;
599 sd->sd_bdevvp = vp;
600 mutex_exit(&device_lock);
601 do {
602 const struct bdevsw *bdev;
603
604 gen = module_gen;
605 error = bdev_open(dev, ap->a_mode, S_IFBLK, l);
606 if (error != ENXIO)
607 break;
608
609 /* Check if we already have a valid driver */
610 mutex_enter(&device_lock);
611 bdev = bdevsw_lookup(dev);
612 mutex_exit(&device_lock);
613 if (bdev != NULL)
614 break;
615
616 /* Get device name from devsw_conv array */
617 if ((name = bdevsw_getname(major(dev))) == NULL)
618 break;
619
620 VOP_UNLOCK(vp);
621
622 /* Try to autoload device module */
623 (void) module_autoload(name, MODULE_CLASS_DRIVER);
624
625 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
626 } while (gen != module_gen);
627
628 break;
629
630 case VNON:
631 case VLNK:
632 case VDIR:
633 case VREG:
634 case VBAD:
635 case VFIFO:
636 case VSOCK:
637 default:
638 return 0;
639 }
640
641 mutex_enter(&device_lock);
642 if (sn->sn_gone) {
643 if (error == 0)
644 error = EBADF;
645 } else if (error != 0) {
646 sd->sd_opencnt--;
647 sn->sn_opencnt--;
648 if (vp->v_type == VBLK)
649 sd->sd_bdevvp = NULL;
650
651 }
652 mutex_exit(&device_lock);
653
654 if (cdev_type(dev) != D_DISK || error != 0)
655 return error;
656
657 if (vp->v_type == VCHR)
658 error = cdev_ioctl(vp->v_rdev, DIOCGPART, &pi, FREAD, curlwp);
659 else
660 error = bdev_ioctl(vp->v_rdev, DIOCGPART, &pi, FREAD, curlwp);
661 if (error == 0)
662 uvm_vnp_setsize(vp,
663 (voff_t)pi.disklab->d_secsize * pi.part->p_size);
664 return 0;
665 }
666
667 /*
668 * Vnode op for read
669 */
670 /* ARGSUSED */
671 int
672 spec_read(void *v)
673 {
674 struct vop_read_args /* {
675 struct vnode *a_vp;
676 struct uio *a_uio;
677 int a_ioflag;
678 kauth_cred_t a_cred;
679 } */ *ap = v;
680 struct vnode *vp = ap->a_vp;
681 struct uio *uio = ap->a_uio;
682 struct lwp *l = curlwp;
683 struct buf *bp;
684 daddr_t bn;
685 int bsize, bscale;
686 struct partinfo dpart;
687 int n, on;
688 int error = 0;
689
690 #ifdef DIAGNOSTIC
691 if (uio->uio_rw != UIO_READ)
692 panic("spec_read mode");
693 if (&uio->uio_vmspace->vm_map != kernel_map &&
694 uio->uio_vmspace != curproc->p_vmspace)
695 panic("spec_read proc");
696 #endif
697 if (uio->uio_resid == 0)
698 return (0);
699
700 switch (vp->v_type) {
701
702 case VCHR:
703 VOP_UNLOCK(vp);
704 error = cdev_read(vp->v_rdev, uio, ap->a_ioflag);
705 vn_lock(vp, LK_SHARED | LK_RETRY);
706 return (error);
707
708 case VBLK:
709 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
710 if (uio->uio_offset < 0)
711 return (EINVAL);
712 bsize = BLKDEV_IOSIZE;
713
714 /*
715 * dholland 20130616: XXX this logic should not be
716 * here. It is here because the old buffer cache
717 * demands that all accesses to the same blocks need
718 * to be the same size; but it only works for FFS and
719 * nowadays I think it'll fail silently if the size
720 * info in the disklabel is wrong. (Or missing.) The
721 * buffer cache needs to be smarter; or failing that
722 * we need a reliable way here to get the right block
723 * size; or a reliable way to guarantee that (a) the
724 * fs is not mounted when we get here and (b) any
725 * buffers generated here will get purged when the fs
726 * does get mounted.
727 */
728 if (bdev_ioctl(vp->v_rdev, DIOCGPART, &dpart, FREAD, l) == 0) {
729 if (dpart.part->p_fstype == FS_BSDFFS &&
730 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
731 bsize = dpart.part->p_frag *
732 dpart.part->p_fsize;
733 }
734
735 bscale = bsize >> DEV_BSHIFT;
736 do {
737 bn = (uio->uio_offset >> DEV_BSHIFT) &~ (bscale - 1);
738 on = uio->uio_offset % bsize;
739 n = min((unsigned)(bsize - on), uio->uio_resid);
740 error = bread(vp, bn, bsize, NOCRED, 0, &bp);
741 if (error) {
742 return (error);
743 }
744 n = min(n, bsize - bp->b_resid);
745 error = uiomove((char *)bp->b_data + on, n, uio);
746 brelse(bp, 0);
747 } while (error == 0 && uio->uio_resid > 0 && n != 0);
748 return (error);
749
750 default:
751 panic("spec_read type");
752 }
753 /* NOTREACHED */
754 }
755
756 /*
757 * Vnode op for write
758 */
759 /* ARGSUSED */
760 int
761 spec_write(void *v)
762 {
763 struct vop_write_args /* {
764 struct vnode *a_vp;
765 struct uio *a_uio;
766 int a_ioflag;
767 kauth_cred_t a_cred;
768 } */ *ap = v;
769 struct vnode *vp = ap->a_vp;
770 struct uio *uio = ap->a_uio;
771 struct lwp *l = curlwp;
772 struct buf *bp;
773 daddr_t bn;
774 int bsize, bscale;
775 struct partinfo dpart;
776 int n, on;
777 int error = 0;
778
779 #ifdef DIAGNOSTIC
780 if (uio->uio_rw != UIO_WRITE)
781 panic("spec_write mode");
782 if (&uio->uio_vmspace->vm_map != kernel_map &&
783 uio->uio_vmspace != curproc->p_vmspace)
784 panic("spec_write proc");
785 #endif
786
787 switch (vp->v_type) {
788
789 case VCHR:
790 VOP_UNLOCK(vp);
791 error = cdev_write(vp->v_rdev, uio, ap->a_ioflag);
792 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
793 return (error);
794
795 case VBLK:
796 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
797 if (uio->uio_resid == 0)
798 return (0);
799 if (uio->uio_offset < 0)
800 return (EINVAL);
801 bsize = BLKDEV_IOSIZE;
802 if (bdev_ioctl(vp->v_rdev, DIOCGPART, &dpart, FREAD, l) == 0) {
803 if (dpart.part->p_fstype == FS_BSDFFS &&
804 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
805 bsize = dpart.part->p_frag *
806 dpart.part->p_fsize;
807 }
808 bscale = bsize >> DEV_BSHIFT;
809 do {
810 bn = (uio->uio_offset >> DEV_BSHIFT) &~ (bscale - 1);
811 on = uio->uio_offset % bsize;
812 n = min((unsigned)(bsize - on), uio->uio_resid);
813 if (n == bsize)
814 bp = getblk(vp, bn, bsize, 0, 0);
815 else
816 error = bread(vp, bn, bsize, NOCRED,
817 B_MODIFY, &bp);
818 if (error) {
819 return (error);
820 }
821 n = min(n, bsize - bp->b_resid);
822 error = uiomove((char *)bp->b_data + on, n, uio);
823 if (error)
824 brelse(bp, 0);
825 else {
826 if (n + on == bsize)
827 bawrite(bp);
828 else
829 bdwrite(bp);
830 error = bp->b_error;
831 }
832 } while (error == 0 && uio->uio_resid > 0 && n != 0);
833 return (error);
834
835 default:
836 panic("spec_write type");
837 }
838 /* NOTREACHED */
839 }
840
841 /*
842 * Device ioctl operation.
843 */
844 /* ARGSUSED */
845 int
846 spec_ioctl(void *v)
847 {
848 struct vop_ioctl_args /* {
849 struct vnode *a_vp;
850 u_long a_command;
851 void *a_data;
852 int a_fflag;
853 kauth_cred_t a_cred;
854 } */ *ap = v;
855 struct vnode *vp;
856 dev_t dev;
857
858 /*
859 * Extract all the info we need from the vnode, taking care to
860 * avoid a race with VOP_REVOKE().
861 */
862
863 vp = ap->a_vp;
864 dev = NODEV;
865 mutex_enter(vp->v_interlock);
866 if ((vp->v_iflag & VI_XLOCK) == 0 && vp->v_specnode) {
867 dev = vp->v_rdev;
868 }
869 mutex_exit(vp->v_interlock);
870 if (dev == NODEV) {
871 return ENXIO;
872 }
873
874 switch (vp->v_type) {
875
876 case VCHR:
877 return cdev_ioctl(dev, ap->a_command, ap->a_data,
878 ap->a_fflag, curlwp);
879
880 case VBLK:
881 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
882 return bdev_ioctl(dev, ap->a_command, ap->a_data,
883 ap->a_fflag, curlwp);
884
885 default:
886 panic("spec_ioctl");
887 /* NOTREACHED */
888 }
889 }
890
891 /* ARGSUSED */
892 int
893 spec_poll(void *v)
894 {
895 struct vop_poll_args /* {
896 struct vnode *a_vp;
897 int a_events;
898 } */ *ap = v;
899 struct vnode *vp;
900 dev_t dev;
901
902 /*
903 * Extract all the info we need from the vnode, taking care to
904 * avoid a race with VOP_REVOKE().
905 */
906
907 vp = ap->a_vp;
908 dev = NODEV;
909 mutex_enter(vp->v_interlock);
910 if ((vp->v_iflag & VI_XLOCK) == 0 && vp->v_specnode) {
911 dev = vp->v_rdev;
912 }
913 mutex_exit(vp->v_interlock);
914 if (dev == NODEV) {
915 return POLLERR;
916 }
917
918 switch (vp->v_type) {
919
920 case VCHR:
921 return cdev_poll(dev, ap->a_events, curlwp);
922
923 default:
924 return (genfs_poll(v));
925 }
926 }
927
928 /* ARGSUSED */
929 int
930 spec_kqfilter(void *v)
931 {
932 struct vop_kqfilter_args /* {
933 struct vnode *a_vp;
934 struct proc *a_kn;
935 } */ *ap = v;
936 dev_t dev;
937
938 switch (ap->a_vp->v_type) {
939
940 case VCHR:
941 dev = ap->a_vp->v_rdev;
942 return cdev_kqfilter(dev, ap->a_kn);
943 default:
944 /*
945 * Block devices don't support kqfilter, and refuse it
946 * for any other files (like those vflush()ed) too.
947 */
948 return (EOPNOTSUPP);
949 }
950 }
951
952 /*
953 * Allow mapping of only D_DISK. This is called only for VBLK.
954 */
955 int
956 spec_mmap(void *v)
957 {
958 struct vop_mmap_args /* {
959 struct vnode *a_vp;
960 vm_prot_t a_prot;
961 kauth_cred_t a_cred;
962 } */ *ap = v;
963 struct vnode *vp = ap->a_vp;
964
965 KASSERT(vp->v_type == VBLK);
966 if (bdev_type(vp->v_rdev) != D_DISK)
967 return EINVAL;
968
969 return 0;
970 }
971
972 /*
973 * Synch buffers associated with a block device
974 */
975 /* ARGSUSED */
976 int
977 spec_fsync(void *v)
978 {
979 struct vop_fsync_args /* {
980 struct vnode *a_vp;
981 kauth_cred_t a_cred;
982 int a_flags;
983 off_t offlo;
984 off_t offhi;
985 } */ *ap = v;
986 struct vnode *vp = ap->a_vp;
987 struct mount *mp;
988 int error;
989
990 if (vp->v_type == VBLK) {
991 if ((mp = spec_node_getmountedfs(vp)) != NULL) {
992 error = VFS_FSYNC(mp, vp, ap->a_flags);
993 if (error != EOPNOTSUPP)
994 return error;
995 }
996 return vflushbuf(vp, ap->a_flags);
997 }
998 return (0);
999 }
1000
1001 /*
1002 * Just call the device strategy routine
1003 */
1004 int
1005 spec_strategy(void *v)
1006 {
1007 struct vop_strategy_args /* {
1008 struct vnode *a_vp;
1009 struct buf *a_bp;
1010 } */ *ap = v;
1011 struct vnode *vp = ap->a_vp;
1012 struct buf *bp = ap->a_bp;
1013 int error;
1014
1015 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
1016
1017 error = 0;
1018 bp->b_dev = vp->v_rdev;
1019
1020 if (!(bp->b_flags & B_READ))
1021 error = fscow_run(bp, false);
1022
1023 if (error) {
1024 bp->b_error = error;
1025 bp->b_resid = bp->b_bcount;
1026 biodone(bp);
1027 return (error);
1028 }
1029
1030 bdev_strategy(bp);
1031
1032 return (0);
1033 }
1034
1035 int
1036 spec_inactive(void *v)
1037 {
1038 struct vop_inactive_args /* {
1039 struct vnode *a_vp;
1040 struct proc *a_l;
1041 } */ *ap = v;
1042
1043 VOP_UNLOCK(ap->a_vp);
1044 return (0);
1045 }
1046
1047 /*
1048 * This is a noop, simply returning what one has been given.
1049 */
1050 int
1051 spec_bmap(void *v)
1052 {
1053 struct vop_bmap_args /* {
1054 struct vnode *a_vp;
1055 daddr_t a_bn;
1056 struct vnode **a_vpp;
1057 daddr_t *a_bnp;
1058 int *a_runp;
1059 } */ *ap = v;
1060
1061 if (ap->a_vpp != NULL)
1062 *ap->a_vpp = ap->a_vp;
1063 if (ap->a_bnp != NULL)
1064 *ap->a_bnp = ap->a_bn;
1065 if (ap->a_runp != NULL)
1066 *ap->a_runp = (MAXBSIZE >> DEV_BSHIFT) - 1;
1067 return (0);
1068 }
1069
1070 /*
1071 * Device close routine
1072 */
1073 /* ARGSUSED */
1074 int
1075 spec_close(void *v)
1076 {
1077 struct vop_close_args /* {
1078 struct vnode *a_vp;
1079 int a_fflag;
1080 kauth_cred_t a_cred;
1081 } */ *ap = v;
1082 struct vnode *vp = ap->a_vp;
1083 struct session *sess;
1084 dev_t dev = vp->v_rdev;
1085 int mode, error, flags, flags1, count;
1086 specnode_t *sn;
1087 specdev_t *sd;
1088
1089 flags = vp->v_iflag;
1090 sn = vp->v_specnode;
1091 sd = sn->sn_dev;
1092
1093 switch (vp->v_type) {
1094
1095 case VCHR:
1096 /*
1097 * Hack: a tty device that is a controlling terminal
1098 * has a reference from the session structure. We
1099 * cannot easily tell that a character device is a
1100 * controlling terminal, unless it is the closing
1101 * process' controlling terminal. In that case, if the
1102 * open count is 1 release the reference from the
1103 * session. Also, remove the link from the tty back to
1104 * the session and pgrp.
1105 *
1106 * XXX V. fishy.
1107 */
1108 mutex_enter(proc_lock);
1109 sess = curlwp->l_proc->p_session;
1110 if (sn->sn_opencnt == 1 && vp == sess->s_ttyvp) {
1111 mutex_spin_enter(&tty_lock);
1112 sess->s_ttyvp = NULL;
1113 if (sess->s_ttyp->t_session != NULL) {
1114 sess->s_ttyp->t_pgrp = NULL;
1115 sess->s_ttyp->t_session = NULL;
1116 mutex_spin_exit(&tty_lock);
1117 /* Releases proc_lock. */
1118 proc_sessrele(sess);
1119 } else {
1120 mutex_spin_exit(&tty_lock);
1121 if (sess->s_ttyp->t_pgrp != NULL)
1122 panic("spec_close: spurious pgrp ref");
1123 mutex_exit(proc_lock);
1124 }
1125 vrele(vp);
1126 } else
1127 mutex_exit(proc_lock);
1128
1129 /*
1130 * If the vnode is locked, then we are in the midst
1131 * of forcably closing the device, otherwise we only
1132 * close on last reference.
1133 */
1134 mode = S_IFCHR;
1135 break;
1136
1137 case VBLK:
1138 KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
1139 /*
1140 * On last close of a block device (that isn't mounted)
1141 * we must invalidate any in core blocks, so that
1142 * we can, for instance, change floppy disks.
1143 */
1144 error = vinvalbuf(vp, V_SAVE, ap->a_cred, curlwp, 0, 0);
1145 if (error)
1146 return (error);
1147 /*
1148 * We do not want to really close the device if it
1149 * is still in use unless we are trying to close it
1150 * forcibly. Since every use (buffer, vnode, swap, cmap)
1151 * holds a reference to the vnode, and because we mark
1152 * any other vnodes that alias this device, when the
1153 * sum of the reference counts on all the aliased
1154 * vnodes descends to one, we are on last close.
1155 */
1156 mode = S_IFBLK;
1157 break;
1158
1159 default:
1160 panic("spec_close: not special");
1161 }
1162
1163 mutex_enter(&device_lock);
1164 sn->sn_opencnt--;
1165 count = --sd->sd_opencnt;
1166 if (vp->v_type == VBLK)
1167 sd->sd_bdevvp = NULL;
1168 mutex_exit(&device_lock);
1169
1170 if (count != 0)
1171 return 0;
1172
1173 flags1 = ap->a_fflag;
1174
1175 /*
1176 * if VI_XLOCK is set, then we're going away soon, so make this
1177 * non-blocking. Also ensures that we won't wedge in vn_lock below.
1178 */
1179 if (flags & VI_XLOCK)
1180 flags1 |= FNONBLOCK;
1181
1182 /*
1183 * If we're able to block, release the vnode lock & reacquire. We
1184 * might end up sleeping for someone else who wants our queues. They
1185 * won't get them if we hold the vnode locked. Also, if VI_XLOCK is
1186 * set, don't release the lock as we won't be able to regain it.
1187 */
1188 if (!(flags1 & FNONBLOCK))
1189 VOP_UNLOCK(vp);
1190
1191 if (vp->v_type == VBLK)
1192 error = bdev_close(dev, flags1, mode, curlwp);
1193 else
1194 error = cdev_close(dev, flags1, mode, curlwp);
1195
1196 if (!(flags1 & FNONBLOCK))
1197 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1198
1199 return (error);
1200 }
1201
1202 /*
1203 * Print out the contents of a special device vnode.
1204 */
1205 int
1206 spec_print(void *v)
1207 {
1208 struct vop_print_args /* {
1209 struct vnode *a_vp;
1210 } */ *ap = v;
1211
1212 printf("dev %llu, %llu\n", (unsigned long long)major(ap->a_vp->v_rdev),
1213 (unsigned long long)minor(ap->a_vp->v_rdev));
1214 return 0;
1215 }
1216
1217 /*
1218 * Return POSIX pathconf information applicable to special devices.
1219 */
1220 int
1221 spec_pathconf(void *v)
1222 {
1223 struct vop_pathconf_args /* {
1224 struct vnode *a_vp;
1225 int a_name;
1226 register_t *a_retval;
1227 } */ *ap = v;
1228
1229 switch (ap->a_name) {
1230 case _PC_LINK_MAX:
1231 *ap->a_retval = LINK_MAX;
1232 return (0);
1233 case _PC_MAX_CANON:
1234 *ap->a_retval = MAX_CANON;
1235 return (0);
1236 case _PC_MAX_INPUT:
1237 *ap->a_retval = MAX_INPUT;
1238 return (0);
1239 case _PC_PIPE_BUF:
1240 *ap->a_retval = PIPE_BUF;
1241 return (0);
1242 case _PC_CHOWN_RESTRICTED:
1243 *ap->a_retval = 1;
1244 return (0);
1245 case _PC_VDISABLE:
1246 *ap->a_retval = _POSIX_VDISABLE;
1247 return (0);
1248 case _PC_SYNC_IO:
1249 *ap->a_retval = 1;
1250 return (0);
1251 default:
1252 return (EINVAL);
1253 }
1254 /* NOTREACHED */
1255 }
1256
1257 /*
1258 * Advisory record locking support.
1259 */
1260 int
1261 spec_advlock(void *v)
1262 {
1263 struct vop_advlock_args /* {
1264 struct vnode *a_vp;
1265 void *a_id;
1266 int a_op;
1267 struct flock *a_fl;
1268 int a_flags;
1269 } */ *ap = v;
1270 struct vnode *vp = ap->a_vp;
1271
1272 return lf_advlock(ap, &vp->v_speclockf, (off_t)0);
1273 }
1274