spec_vnops.c revision 1.38 1 /* $NetBSD: spec_vnops.c,v 1.38 1997/10/16 23:57:51 christos Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)spec_vnops.c 8.8 (Berkeley) 11/21/94
36 */
37
38 #include <sys/param.h>
39 #include <sys/proc.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/conf.h>
43 #include <sys/buf.h>
44 #include <sys/mount.h>
45 #include <sys/namei.h>
46 #include <sys/vnode.h>
47 #include <sys/stat.h>
48 #include <sys/errno.h>
49 #include <sys/ioctl.h>
50 #include <sys/file.h>
51 #include <sys/disklabel.h>
52 #include <sys/lockf.h>
53
54 #include <miscfs/genfs/genfs.h>
55 #include <miscfs/specfs/specdev.h>
56
57 /* symbolic sleep message strings for devices */
58 const char devopn[] = "devopn";
59 const char devio[] = "devio";
60 const char devwait[] = "devwait";
61 const char devin[] = "devin";
62 const char devout[] = "devout";
63 const char devioc[] = "devioc";
64 const char devcls[] = "devcls";
65
66 int (**spec_vnodeop_p) __P((void *));
67 struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
68 { &vop_default_desc, vn_default_error },
69 { &vop_lookup_desc, spec_lookup }, /* lookup */
70 { &vop_create_desc, spec_create }, /* create */
71 { &vop_mknod_desc, spec_mknod }, /* mknod */
72 { &vop_open_desc, spec_open }, /* open */
73 { &vop_close_desc, spec_close }, /* close */
74 { &vop_access_desc, spec_access }, /* access */
75 { &vop_getattr_desc, spec_getattr }, /* getattr */
76 { &vop_setattr_desc, spec_setattr }, /* setattr */
77 { &vop_read_desc, spec_read }, /* read */
78 { &vop_write_desc, spec_write }, /* write */
79 { &vop_lease_desc, spec_lease_check }, /* lease */
80 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
81 { &vop_poll_desc, spec_poll }, /* poll */
82 { &vop_mmap_desc, spec_mmap }, /* mmap */
83 { &vop_fsync_desc, spec_fsync }, /* fsync */
84 { &vop_seek_desc, spec_seek }, /* seek */
85 { &vop_remove_desc, spec_remove }, /* remove */
86 { &vop_link_desc, spec_link }, /* link */
87 { &vop_rename_desc, spec_rename }, /* rename */
88 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
89 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
90 { &vop_symlink_desc, spec_symlink }, /* symlink */
91 { &vop_readdir_desc, spec_readdir }, /* readdir */
92 { &vop_readlink_desc, spec_readlink }, /* readlink */
93 { &vop_abortop_desc, spec_abortop }, /* abortop */
94 { &vop_inactive_desc, spec_inactive }, /* inactive */
95 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */
96 { &vop_lock_desc, spec_lock }, /* lock */
97 { &vop_unlock_desc, spec_unlock }, /* unlock */
98 { &vop_bmap_desc, spec_bmap }, /* bmap */
99 { &vop_strategy_desc, spec_strategy }, /* strategy */
100 { &vop_print_desc, spec_print }, /* print */
101 { &vop_islocked_desc, spec_islocked }, /* islocked */
102 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
103 { &vop_advlock_desc, spec_advlock }, /* advlock */
104 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
105 { &vop_valloc_desc, spec_valloc }, /* valloc */
106 { &vop_vfree_desc, spec_vfree }, /* vfree */
107 { &vop_truncate_desc, spec_truncate }, /* truncate */
108 { &vop_update_desc, spec_update }, /* update */
109 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */
110 { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
111 };
112 struct vnodeopv_desc spec_vnodeop_opv_desc =
113 { &spec_vnodeop_p, spec_vnodeop_entries };
114
115 /*
116 * Trivial lookup routine that always fails.
117 */
118 int
119 spec_lookup(v)
120 void *v;
121 {
122 struct vop_lookup_args /* {
123 struct vnode *a_dvp;
124 struct vnode **a_vpp;
125 struct componentname *a_cnp;
126 } */ *ap = v;
127
128 *ap->a_vpp = NULL;
129 return (ENOTDIR);
130 }
131
132 /*
133 * Open a special file.
134 */
135 /* ARGSUSED */
136 int
137 spec_open(v)
138 void *v;
139 {
140 struct vop_open_args /* {
141 struct vnode *a_vp;
142 int a_mode;
143 struct ucred *a_cred;
144 struct proc *a_p;
145 } */ *ap = v;
146 struct vnode *bvp, *vp = ap->a_vp;
147 dev_t bdev, dev = (dev_t)vp->v_rdev;
148 register int maj = major(dev);
149 int error;
150
151 /*
152 * Don't allow open if fs is mounted -nodev.
153 */
154 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
155 return (ENXIO);
156
157 switch (vp->v_type) {
158
159 case VCHR:
160 if ((u_int)maj >= nchrdev)
161 return (ENXIO);
162 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
163 /*
164 * When running in very secure mode, do not allow
165 * opens for writing of any disk character devices.
166 */
167 if (securelevel >= 2 && cdevsw[maj].d_type == D_DISK)
168 return (EPERM);
169 /*
170 * When running in secure mode, do not allow opens
171 * for writing of /dev/mem, /dev/kmem, or character
172 * devices whose corresponding block devices are
173 * currently mounted.
174 */
175 if (securelevel >= 1) {
176 if ((bdev = chrtoblk(dev)) != (dev_t)NODEV &&
177 vfinddev(bdev, VBLK, &bvp) &&
178 bvp->v_usecount > 0 &&
179 (error = vfs_mountedon(bvp)))
180 return (error);
181 if (iskmemdev(dev))
182 return (EPERM);
183 }
184 }
185 if (cdevsw[maj].d_type == D_TTY)
186 vp->v_flag |= VISTTY;
187 VOP_UNLOCK(vp);
188 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p);
189 VOP_LOCK(vp);
190 return (error);
191
192 case VBLK:
193 if ((u_int)maj >= nblkdev)
194 return (ENXIO);
195 /*
196 * When running in very secure mode, do not allow
197 * opens for writing of any disk block devices.
198 */
199 if (securelevel >= 2 && ap->a_cred != FSCRED &&
200 (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK)
201 return (EPERM);
202 /*
203 * Do not allow opens of block devices that are
204 * currently mounted.
205 */
206 if ((error = vfs_mountedon(vp)) != 0)
207 return (error);
208 return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p));
209 case VNON:
210 case VLNK:
211 case VDIR:
212 case VREG:
213 case VBAD:
214 case VFIFO:
215 case VSOCK:
216 break;
217 }
218 return (0);
219 }
220
221 /*
222 * Vnode op for read
223 */
224 /* ARGSUSED */
225 int
226 spec_read(v)
227 void *v;
228 {
229 struct vop_read_args /* {
230 struct vnode *a_vp;
231 struct uio *a_uio;
232 int a_ioflag;
233 struct ucred *a_cred;
234 } */ *ap = v;
235 register struct vnode *vp = ap->a_vp;
236 register struct uio *uio = ap->a_uio;
237 struct proc *p = uio->uio_procp;
238 struct buf *bp;
239 daddr_t bn, nextbn;
240 long bsize, bscale, ssize;
241 struct partinfo dpart;
242 int n, on, majordev;
243 int (*ioctl) __P((dev_t, u_long, caddr_t, int, struct proc *));
244 int error = 0;
245
246 #ifdef DIAGNOSTIC
247 if (uio->uio_rw != UIO_READ)
248 panic("spec_read mode");
249 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
250 panic("spec_read proc");
251 #endif
252 if (uio->uio_resid == 0)
253 return (0);
254
255 switch (vp->v_type) {
256
257 case VCHR:
258 VOP_UNLOCK(vp);
259 error = (*cdevsw[major(vp->v_rdev)].d_read)
260 (vp->v_rdev, uio, ap->a_ioflag);
261 VOP_LOCK(vp);
262 return (error);
263
264 case VBLK:
265 if (uio->uio_offset < 0)
266 return (EINVAL);
267 bsize = BLKDEV_IOSIZE;
268 ssize = DEV_BSIZE;
269 if ((majordev = major(vp->v_rdev)) < nblkdev &&
270 (ioctl = bdevsw[majordev].d_ioctl) != NULL &&
271 (*ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) {
272 if (dpart.part->p_fstype == FS_BSDFFS &&
273 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
274 bsize = dpart.part->p_frag *
275 dpart.part->p_fsize;
276 if (dpart.disklab->d_secsize != 0)
277 ssize = dpart.disklab->d_secsize;
278 }
279 bscale = bsize / ssize;
280 do {
281 bn = (uio->uio_offset / ssize) &~ (bscale - 1);
282 on = uio->uio_offset % bsize;
283 n = min((unsigned)(bsize - on), uio->uio_resid);
284 if (vp->v_lastr + bscale == bn) {
285 nextbn = bn + bscale;
286 error = breadn(vp, bn, (int)bsize, &nextbn,
287 (int *)&bsize, 1, NOCRED, &bp);
288 } else
289 error = bread(vp, bn, (int)bsize, NOCRED, &bp);
290 vp->v_lastr = bn;
291 n = min(n, bsize - bp->b_resid);
292 if (error) {
293 brelse(bp);
294 return (error);
295 }
296 error = uiomove((char *)bp->b_data + on, n, uio);
297 brelse(bp);
298 } while (error == 0 && uio->uio_resid > 0 && n != 0);
299 return (error);
300
301 default:
302 panic("spec_read type");
303 }
304 /* NOTREACHED */
305 }
306
307 /*
308 * Vnode op for write
309 */
310 /* ARGSUSED */
311 int
312 spec_write(v)
313 void *v;
314 {
315 struct vop_write_args /* {
316 struct vnode *a_vp;
317 struct uio *a_uio;
318 int a_ioflag;
319 struct ucred *a_cred;
320 } */ *ap = v;
321 register struct vnode *vp = ap->a_vp;
322 register struct uio *uio = ap->a_uio;
323 struct proc *p = uio->uio_procp;
324 struct buf *bp;
325 daddr_t bn;
326 long bsize, bscale, ssize;
327 struct partinfo dpart;
328 int n, on, majordev;
329 int (*ioctl) __P((dev_t, u_long, caddr_t, int, struct proc *));
330 int error = 0;
331
332 #ifdef DIAGNOSTIC
333 if (uio->uio_rw != UIO_WRITE)
334 panic("spec_write mode");
335 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
336 panic("spec_write proc");
337 #endif
338
339 switch (vp->v_type) {
340
341 case VCHR:
342 VOP_UNLOCK(vp);
343 error = (*cdevsw[major(vp->v_rdev)].d_write)
344 (vp->v_rdev, uio, ap->a_ioflag);
345 VOP_LOCK(vp);
346 return (error);
347
348 case VBLK:
349 if (uio->uio_resid == 0)
350 return (0);
351 if (uio->uio_offset < 0)
352 return (EINVAL);
353 bsize = BLKDEV_IOSIZE;
354 ssize = DEV_BSIZE;
355 if ((majordev = major(vp->v_rdev)) < nblkdev &&
356 (ioctl = bdevsw[majordev].d_ioctl) != NULL &&
357 (*ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) {
358 if (dpart.part->p_fstype == FS_BSDFFS &&
359 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
360 bsize = dpart.part->p_frag *
361 dpart.part->p_fsize;
362 if (dpart.disklab->d_secsize != 0)
363 ssize = dpart.disklab->d_secsize;
364 }
365 bscale = bsize / ssize;
366 do {
367 bn = (uio->uio_offset / ssize) &~ (bscale - 1);
368 on = uio->uio_offset % bsize;
369 n = min((unsigned)(bsize - on), uio->uio_resid);
370 if (n == bsize)
371 bp = getblk(vp, bn, bsize, 0, 0);
372 else
373 error = bread(vp, bn, bsize, NOCRED, &bp);
374 n = min(n, bsize - bp->b_resid);
375 if (error) {
376 brelse(bp);
377 return (error);
378 }
379 error = uiomove((char *)bp->b_data + on, n, uio);
380 if (n + on == bsize)
381 bawrite(bp);
382 else
383 bdwrite(bp);
384 } while (error == 0 && uio->uio_resid > 0 && n != 0);
385 return (error);
386
387 default:
388 panic("spec_write type");
389 }
390 /* NOTREACHED */
391 }
392
393 /*
394 * Device ioctl operation.
395 */
396 /* ARGSUSED */
397 int
398 spec_ioctl(v)
399 void *v;
400 {
401 struct vop_ioctl_args /* {
402 struct vnode *a_vp;
403 u_long a_command;
404 caddr_t a_data;
405 int a_fflag;
406 struct ucred *a_cred;
407 struct proc *a_p;
408 } */ *ap = v;
409 dev_t dev = ap->a_vp->v_rdev;
410 int maj = major(dev);
411
412 switch (ap->a_vp->v_type) {
413
414 case VCHR:
415 return ((*cdevsw[maj].d_ioctl)(dev, ap->a_command, ap->a_data,
416 ap->a_fflag, ap->a_p));
417
418 case VBLK:
419 if (ap->a_command == 0 && (long)ap->a_data == B_TAPE)
420 if (bdevsw[maj].d_type == D_TAPE)
421 return (0);
422 else
423 return (1);
424 return ((*bdevsw[maj].d_ioctl)(dev, ap->a_command, ap->a_data,
425 ap->a_fflag, ap->a_p));
426
427 default:
428 panic("spec_ioctl");
429 /* NOTREACHED */
430 }
431 }
432
433 /* ARGSUSED */
434 int
435 spec_poll(v)
436 void *v;
437 {
438 struct vop_poll_args /* {
439 struct vnode *a_vp;
440 int a_events;
441 struct proc *a_p;
442 } */ *ap = v;
443 register dev_t dev;
444
445 switch (ap->a_vp->v_type) {
446
447 case VCHR:
448 dev = ap->a_vp->v_rdev;
449 return (*cdevsw[major(dev)].d_poll)(dev, ap->a_events, ap->a_p);
450
451 default:
452 return (genfs_poll(v));
453 }
454 }
455 /*
456 * Synch buffers associated with a block device
457 */
458 /* ARGSUSED */
459 int
460 spec_fsync(v)
461 void *v;
462 {
463 struct vop_fsync_args /* {
464 struct vnode *a_vp;
465 struct ucred *a_cred;
466 int a_waitfor;
467 struct proc *a_p;
468 } */ *ap = v;
469 register struct vnode *vp = ap->a_vp;
470
471 if (vp->v_type == VBLK)
472 vflushbuf(vp, ap->a_waitfor == MNT_WAIT);
473 return (0);
474 }
475
476 /*
477 * Just call the device strategy routine
478 */
479 int
480 spec_strategy(v)
481 void *v;
482 {
483 struct vop_strategy_args /* {
484 struct buf *a_bp;
485 } */ *ap = v;
486
487 (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp);
488 return (0);
489 }
490
491 /*
492 * This is a noop, simply returning what one has been given.
493 */
494 int
495 spec_bmap(v)
496 void *v;
497 {
498 struct vop_bmap_args /* {
499 struct vnode *a_vp;
500 daddr_t a_bn;
501 struct vnode **a_vpp;
502 daddr_t *a_bnp;
503 } */ *ap = v;
504
505 if (ap->a_vpp != NULL)
506 *ap->a_vpp = ap->a_vp;
507 if (ap->a_bnp != NULL)
508 *ap->a_bnp = ap->a_bn;
509 return (0);
510 }
511
512 /*
513 * At the moment we do not do any locking.
514 */
515 /* ARGSUSED */
516 int
517 spec_lock(v)
518 void *v;
519 {
520
521 return (0);
522 }
523
524 /* ARGSUSED */
525 int
526 spec_unlock(v)
527 void *v;
528 {
529
530 return (0);
531 }
532
533 /*
534 * Device close routine
535 */
536 /* ARGSUSED */
537 int
538 spec_close(v)
539 void *v;
540 {
541 struct vop_close_args /* {
542 struct vnode *a_vp;
543 int a_fflag;
544 struct ucred *a_cred;
545 struct proc *a_p;
546 } */ *ap = v;
547 register struct vnode *vp = ap->a_vp;
548 dev_t dev = vp->v_rdev;
549 int (*devclose) __P((dev_t, int, int, struct proc *));
550 int mode, error;
551
552 switch (vp->v_type) {
553
554 case VCHR:
555 /*
556 * Hack: a tty device that is a controlling terminal
557 * has a reference from the session structure.
558 * We cannot easily tell that a character device is
559 * a controlling terminal, unless it is the closing
560 * process' controlling terminal. In that case,
561 * if the reference count is 2 (this last descriptor
562 * plus the session), release the reference from the session.
563 */
564 if (vcount(vp) == 2 && ap->a_p &&
565 vp == ap->a_p->p_session->s_ttyvp) {
566 vrele(vp);
567 ap->a_p->p_session->s_ttyvp = NULL;
568 }
569 /*
570 * If the vnode is locked, then we are in the midst
571 * of forcably closing the device, otherwise we only
572 * close on last reference.
573 */
574 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
575 return (0);
576 devclose = cdevsw[major(dev)].d_close;
577 mode = S_IFCHR;
578 break;
579
580 case VBLK:
581 /*
582 * On last close of a block device (that isn't mounted)
583 * we must invalidate any in core blocks, so that
584 * we can, for instance, change floppy disks.
585 */
586 error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
587 if (error)
588 return (error);
589 /*
590 * We do not want to really close the device if it
591 * is still in use unless we are trying to close it
592 * forcibly. Since every use (buffer, vnode, swap, cmap)
593 * holds a reference to the vnode, and because we mark
594 * any other vnodes that alias this device, when the
595 * sum of the reference counts on all the aliased
596 * vnodes descends to one, we are on last close.
597 */
598 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
599 return (0);
600 devclose = bdevsw[major(dev)].d_close;
601 mode = S_IFBLK;
602 break;
603
604 default:
605 panic("spec_close: not special");
606 }
607
608 return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
609 }
610
611 /*
612 * Print out the contents of a special device vnode.
613 */
614 int
615 spec_print(v)
616 void *v;
617 {
618 struct vop_print_args /* {
619 struct vnode *a_vp;
620 } */ *ap = v;
621
622 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
623 minor(ap->a_vp->v_rdev));
624 return 0;
625 }
626
627 /*
628 * Return POSIX pathconf information applicable to special devices.
629 */
630 int
631 spec_pathconf(v)
632 void *v;
633 {
634 struct vop_pathconf_args /* {
635 struct vnode *a_vp;
636 int a_name;
637 register_t *a_retval;
638 } */ *ap = v;
639
640 switch (ap->a_name) {
641 case _PC_LINK_MAX:
642 *ap->a_retval = LINK_MAX;
643 return (0);
644 case _PC_MAX_CANON:
645 *ap->a_retval = MAX_CANON;
646 return (0);
647 case _PC_MAX_INPUT:
648 *ap->a_retval = MAX_INPUT;
649 return (0);
650 case _PC_PIPE_BUF:
651 *ap->a_retval = PIPE_BUF;
652 return (0);
653 case _PC_CHOWN_RESTRICTED:
654 *ap->a_retval = 1;
655 return (0);
656 case _PC_VDISABLE:
657 *ap->a_retval = _POSIX_VDISABLE;
658 return (0);
659 default:
660 return (EINVAL);
661 }
662 /* NOTREACHED */
663 }
664
665 /*
666 * Advisory record locking support.
667 */
668 int
669 spec_advlock(v)
670 void *v;
671 {
672 struct vop_advlock_args /* {
673 struct vnode *a_vp;
674 caddr_t a_id;
675 int a_op;
676 struct flock *a_fl;
677 int a_flags;
678 } */ *ap = v;
679 register struct vnode *vp = ap->a_vp;
680
681 return (lf_advlock(&vp->v_speclockf, (off_t)0, ap->a_id, ap->a_op,
682 ap->a_fl, ap->a_flags));
683 }
684