spec_vnops.c revision 1.35 1 /* $NetBSD: spec_vnops.c,v 1.35 1997/04/02 17:09:47 kleink Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)spec_vnops.c 8.8 (Berkeley) 11/21/94
36 */
37
38 #include <sys/param.h>
39 #include <sys/proc.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/conf.h>
43 #include <sys/buf.h>
44 #include <sys/mount.h>
45 #include <sys/namei.h>
46 #include <sys/vnode.h>
47 #include <sys/stat.h>
48 #include <sys/errno.h>
49 #include <sys/ioctl.h>
50 #include <sys/file.h>
51 #include <sys/disklabel.h>
52 #include <sys/lockf.h>
53
54 #include <miscfs/genfs/genfs.h>
55 #include <miscfs/specfs/specdev.h>
56
57 /* symbolic sleep message strings for devices */
58 char devopn[] = "devopn";
59 char devio[] = "devio";
60 char devwait[] = "devwait";
61 char devin[] = "devin";
62 char devout[] = "devout";
63 char devioc[] = "devioc";
64 char devcls[] = "devcls";
65
66 int (**spec_vnodeop_p) __P((void *));
67 struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
68 { &vop_default_desc, vn_default_error },
69 { &vop_lookup_desc, spec_lookup }, /* lookup */
70 { &vop_create_desc, spec_create }, /* create */
71 { &vop_mknod_desc, spec_mknod }, /* mknod */
72 { &vop_open_desc, spec_open }, /* open */
73 { &vop_close_desc, spec_close }, /* close */
74 { &vop_access_desc, spec_access }, /* access */
75 { &vop_getattr_desc, spec_getattr }, /* getattr */
76 { &vop_setattr_desc, spec_setattr }, /* setattr */
77 { &vop_read_desc, spec_read }, /* read */
78 { &vop_write_desc, spec_write }, /* write */
79 { &vop_lease_desc, spec_lease_check }, /* lease */
80 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
81 { &vop_poll_desc, spec_poll }, /* poll */
82 { &vop_mmap_desc, spec_mmap }, /* mmap */
83 { &vop_fsync_desc, spec_fsync }, /* fsync */
84 { &vop_seek_desc, spec_seek }, /* seek */
85 { &vop_remove_desc, spec_remove }, /* remove */
86 { &vop_link_desc, spec_link }, /* link */
87 { &vop_rename_desc, spec_rename }, /* rename */
88 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
89 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
90 { &vop_symlink_desc, spec_symlink }, /* symlink */
91 { &vop_readdir_desc, spec_readdir }, /* readdir */
92 { &vop_readlink_desc, spec_readlink }, /* readlink */
93 { &vop_abortop_desc, spec_abortop }, /* abortop */
94 { &vop_inactive_desc, spec_inactive }, /* inactive */
95 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */
96 { &vop_lock_desc, spec_lock }, /* lock */
97 { &vop_unlock_desc, spec_unlock }, /* unlock */
98 { &vop_bmap_desc, spec_bmap }, /* bmap */
99 { &vop_strategy_desc, spec_strategy }, /* strategy */
100 { &vop_print_desc, spec_print }, /* print */
101 { &vop_islocked_desc, spec_islocked }, /* islocked */
102 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
103 { &vop_advlock_desc, spec_advlock }, /* advlock */
104 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
105 { &vop_valloc_desc, spec_valloc }, /* valloc */
106 { &vop_vfree_desc, spec_vfree }, /* vfree */
107 { &vop_truncate_desc, spec_truncate }, /* truncate */
108 { &vop_update_desc, spec_update }, /* update */
109 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */
110 { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
111 };
112 struct vnodeopv_desc spec_vnodeop_opv_desc =
113 { &spec_vnodeop_p, spec_vnodeop_entries };
114
115 /*
116 * Trivial lookup routine that always fails.
117 */
118 int
119 spec_lookup(v)
120 void *v;
121 {
122 struct vop_lookup_args /* {
123 struct vnode *a_dvp;
124 struct vnode **a_vpp;
125 struct componentname *a_cnp;
126 } */ *ap = v;
127
128 *ap->a_vpp = NULL;
129 return (ENOTDIR);
130 }
131
132 /*
133 * Open a special file.
134 */
135 /* ARGSUSED */
136 int
137 spec_open(v)
138 void *v;
139 {
140 struct vop_open_args /* {
141 struct vnode *a_vp;
142 int a_mode;
143 struct ucred *a_cred;
144 struct proc *a_p;
145 } */ *ap = v;
146 struct vnode *bvp, *vp = ap->a_vp;
147 dev_t bdev, dev = (dev_t)vp->v_rdev;
148 register int maj = major(dev);
149 int error;
150
151 /*
152 * Don't allow open if fs is mounted -nodev.
153 */
154 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
155 return (ENXIO);
156
157 switch (vp->v_type) {
158
159 case VCHR:
160 if ((u_int)maj >= nchrdev)
161 return (ENXIO);
162 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
163 /*
164 * When running in very secure mode, do not allow
165 * opens for writing of any disk character devices.
166 */
167 if (securelevel >= 2 && cdevsw[maj].d_type == D_DISK)
168 return (EPERM);
169 /*
170 * When running in secure mode, do not allow opens
171 * for writing of /dev/mem, /dev/kmem, or character
172 * devices whose corresponding block devices are
173 * currently mounted.
174 */
175 if (securelevel >= 1) {
176 if ((bdev = chrtoblk(dev)) != NODEV &&
177 vfinddev(bdev, VBLK, &bvp) &&
178 bvp->v_usecount > 0 &&
179 (error = vfs_mountedon(bvp)))
180 return (error);
181 if (iskmemdev(dev))
182 return (EPERM);
183 }
184 }
185 if (cdevsw[maj].d_type == D_TTY)
186 vp->v_flag |= VISTTY;
187 VOP_UNLOCK(vp);
188 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p);
189 VOP_LOCK(vp);
190 return (error);
191
192 case VBLK:
193 if ((u_int)maj >= nblkdev)
194 return (ENXIO);
195 /*
196 * When running in very secure mode, do not allow
197 * opens for writing of any disk block devices.
198 */
199 if (securelevel >= 2 && ap->a_cred != FSCRED &&
200 (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK)
201 return (EPERM);
202 /*
203 * Do not allow opens of block devices that are
204 * currently mounted.
205 */
206 if ((error = vfs_mountedon(vp)) != 0)
207 return (error);
208 return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p));
209 case VNON:
210 case VLNK:
211 case VDIR:
212 case VREG:
213 case VBAD:
214 case VFIFO:
215 case VSOCK:
216 break;
217 }
218 return (0);
219 }
220
221 /*
222 * Vnode op for read
223 */
224 /* ARGSUSED */
225 int
226 spec_read(v)
227 void *v;
228 {
229 struct vop_read_args /* {
230 struct vnode *a_vp;
231 struct uio *a_uio;
232 int a_ioflag;
233 struct ucred *a_cred;
234 } */ *ap = v;
235 register struct vnode *vp = ap->a_vp;
236 register struct uio *uio = ap->a_uio;
237 struct proc *p = uio->uio_procp;
238 struct buf *bp;
239 daddr_t bn, nextbn;
240 long bsize, bscale, ssize;
241 struct partinfo dpart;
242 int n, on, majordev;
243 int (*ioctl) __P((dev_t, u_long, caddr_t, int, struct proc *));
244 int error = 0;
245
246 #ifdef DIAGNOSTIC
247 if (uio->uio_rw != UIO_READ)
248 panic("spec_read mode");
249 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
250 panic("spec_read proc");
251 #endif
252 if (uio->uio_resid == 0)
253 return (0);
254
255 switch (vp->v_type) {
256
257 case VCHR:
258 VOP_UNLOCK(vp);
259 error = (*cdevsw[major(vp->v_rdev)].d_read)
260 (vp->v_rdev, uio, ap->a_ioflag);
261 VOP_LOCK(vp);
262 return (error);
263
264 case VBLK:
265 if (uio->uio_resid == 0)
266 return (0);
267 if (uio->uio_offset < 0)
268 return (EINVAL);
269 bsize = BLKDEV_IOSIZE;
270 ssize = DEV_BSIZE;
271 if ((majordev = major(vp->v_rdev)) < nblkdev &&
272 (ioctl = bdevsw[majordev].d_ioctl) != NULL &&
273 (*ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) {
274 if (dpart.part->p_fstype == FS_BSDFFS &&
275 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
276 bsize = dpart.part->p_frag *
277 dpart.part->p_fsize;
278 if (dpart.disklab->d_secsize != 0)
279 ssize = dpart.disklab->d_secsize;
280 }
281 bscale = bsize / ssize;
282 do {
283 bn = (uio->uio_offset / ssize) &~ (bscale - 1);
284 on = uio->uio_offset % bsize;
285 n = min((unsigned)(bsize - on), uio->uio_resid);
286 if (vp->v_lastr + bscale == bn) {
287 nextbn = bn + bscale;
288 error = breadn(vp, bn, (int)bsize, &nextbn,
289 (int *)&bsize, 1, NOCRED, &bp);
290 } else
291 error = bread(vp, bn, (int)bsize, NOCRED, &bp);
292 vp->v_lastr = bn;
293 n = min(n, bsize - bp->b_resid);
294 if (error) {
295 brelse(bp);
296 return (error);
297 }
298 error = uiomove((char *)bp->b_data + on, n, uio);
299 brelse(bp);
300 } while (error == 0 && uio->uio_resid > 0 && n != 0);
301 return (error);
302
303 default:
304 panic("spec_read type");
305 }
306 /* NOTREACHED */
307 }
308
309 /*
310 * Vnode op for write
311 */
312 /* ARGSUSED */
313 int
314 spec_write(v)
315 void *v;
316 {
317 struct vop_write_args /* {
318 struct vnode *a_vp;
319 struct uio *a_uio;
320 int a_ioflag;
321 struct ucred *a_cred;
322 } */ *ap = v;
323 register struct vnode *vp = ap->a_vp;
324 register struct uio *uio = ap->a_uio;
325 struct proc *p = uio->uio_procp;
326 struct buf *bp;
327 daddr_t bn;
328 long bsize, bscale, ssize;
329 struct partinfo dpart;
330 int n, on, majordev;
331 int (*ioctl) __P((dev_t, u_long, caddr_t, int, struct proc *));
332 int error = 0;
333
334 #ifdef DIAGNOSTIC
335 if (uio->uio_rw != UIO_WRITE)
336 panic("spec_write mode");
337 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
338 panic("spec_write proc");
339 #endif
340
341 switch (vp->v_type) {
342
343 case VCHR:
344 VOP_UNLOCK(vp);
345 error = (*cdevsw[major(vp->v_rdev)].d_write)
346 (vp->v_rdev, uio, ap->a_ioflag);
347 VOP_LOCK(vp);
348 return (error);
349
350 case VBLK:
351 if (uio->uio_resid == 0)
352 return (0);
353 if (uio->uio_offset < 0)
354 return (EINVAL);
355 bsize = BLKDEV_IOSIZE;
356 ssize = DEV_BSIZE;
357 if ((majordev = major(vp->v_rdev)) < nblkdev &&
358 (ioctl = bdevsw[majordev].d_ioctl) != NULL &&
359 (*ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) {
360 if (dpart.part->p_fstype == FS_BSDFFS &&
361 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
362 bsize = dpart.part->p_frag *
363 dpart.part->p_fsize;
364 if (dpart.disklab->d_secsize != 0)
365 ssize = dpart.disklab->d_secsize;
366 }
367 bscale = bsize / ssize;
368 do {
369 bn = (uio->uio_offset / ssize) &~ (bscale - 1);
370 on = uio->uio_offset % bsize;
371 n = min((unsigned)(bsize - on), uio->uio_resid);
372 if (n == bsize)
373 bp = getblk(vp, bn, bsize, 0, 0);
374 else
375 error = bread(vp, bn, bsize, NOCRED, &bp);
376 n = min(n, bsize - bp->b_resid);
377 if (error) {
378 brelse(bp);
379 return (error);
380 }
381 error = uiomove((char *)bp->b_data + on, n, uio);
382 if (n + on == bsize)
383 bawrite(bp);
384 else
385 bdwrite(bp);
386 } while (error == 0 && uio->uio_resid > 0 && n != 0);
387 return (error);
388
389 default:
390 panic("spec_write type");
391 }
392 /* NOTREACHED */
393 }
394
395 /*
396 * Device ioctl operation.
397 */
398 /* ARGSUSED */
399 int
400 spec_ioctl(v)
401 void *v;
402 {
403 struct vop_ioctl_args /* {
404 struct vnode *a_vp;
405 u_long a_command;
406 caddr_t a_data;
407 int a_fflag;
408 struct ucred *a_cred;
409 struct proc *a_p;
410 } */ *ap = v;
411 dev_t dev = ap->a_vp->v_rdev;
412 int maj = major(dev);
413
414 switch (ap->a_vp->v_type) {
415
416 case VCHR:
417 return ((*cdevsw[maj].d_ioctl)(dev, ap->a_command, ap->a_data,
418 ap->a_fflag, ap->a_p));
419
420 case VBLK:
421 if (ap->a_command == 0 && (long)ap->a_data == B_TAPE)
422 if (bdevsw[maj].d_type == D_TAPE)
423 return (0);
424 else
425 return (1);
426 return ((*bdevsw[maj].d_ioctl)(dev, ap->a_command, ap->a_data,
427 ap->a_fflag, ap->a_p));
428
429 default:
430 panic("spec_ioctl");
431 /* NOTREACHED */
432 }
433 }
434
435 /* ARGSUSED */
436 int
437 spec_poll(v)
438 void *v;
439 {
440 struct vop_poll_args /* {
441 struct vnode *a_vp;
442 int a_events;
443 struct proc *a_p;
444 } */ *ap = v;
445 register dev_t dev;
446
447 switch (ap->a_vp->v_type) {
448
449 case VCHR:
450 dev = ap->a_vp->v_rdev;
451 return (*cdevsw[major(dev)].d_poll)(dev, ap->a_events, ap->a_p);
452
453 default:
454 return (genfs_poll(v));
455 }
456 }
457 /*
458 * Synch buffers associated with a block device
459 */
460 /* ARGSUSED */
461 int
462 spec_fsync(v)
463 void *v;
464 {
465 struct vop_fsync_args /* {
466 struct vnode *a_vp;
467 struct ucred *a_cred;
468 int a_waitfor;
469 struct proc *a_p;
470 } */ *ap = v;
471 register struct vnode *vp = ap->a_vp;
472
473 if (vp->v_type == VBLK)
474 vflushbuf(vp, ap->a_waitfor == MNT_WAIT);
475 return (0);
476 }
477
478 /*
479 * Just call the device strategy routine
480 */
481 int
482 spec_strategy(v)
483 void *v;
484 {
485 struct vop_strategy_args /* {
486 struct buf *a_bp;
487 } */ *ap = v;
488
489 (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp);
490 return (0);
491 }
492
493 /*
494 * This is a noop, simply returning what one has been given.
495 */
496 int
497 spec_bmap(v)
498 void *v;
499 {
500 struct vop_bmap_args /* {
501 struct vnode *a_vp;
502 daddr_t a_bn;
503 struct vnode **a_vpp;
504 daddr_t *a_bnp;
505 } */ *ap = v;
506
507 if (ap->a_vpp != NULL)
508 *ap->a_vpp = ap->a_vp;
509 if (ap->a_bnp != NULL)
510 *ap->a_bnp = ap->a_bn;
511 return (0);
512 }
513
514 /*
515 * At the moment we do not do any locking.
516 */
517 /* ARGSUSED */
518 int
519 spec_lock(v)
520 void *v;
521 {
522
523 return (0);
524 }
525
526 /* ARGSUSED */
527 int
528 spec_unlock(v)
529 void *v;
530 {
531
532 return (0);
533 }
534
535 /*
536 * Device close routine
537 */
538 /* ARGSUSED */
539 int
540 spec_close(v)
541 void *v;
542 {
543 struct vop_close_args /* {
544 struct vnode *a_vp;
545 int a_fflag;
546 struct ucred *a_cred;
547 struct proc *a_p;
548 } */ *ap = v;
549 register struct vnode *vp = ap->a_vp;
550 dev_t dev = vp->v_rdev;
551 int (*devclose) __P((dev_t, int, int, struct proc *));
552 int mode, error;
553
554 switch (vp->v_type) {
555
556 case VCHR:
557 /*
558 * Hack: a tty device that is a controlling terminal
559 * has a reference from the session structure.
560 * We cannot easily tell that a character device is
561 * a controlling terminal, unless it is the closing
562 * process' controlling terminal. In that case,
563 * if the reference count is 2 (this last descriptor
564 * plus the session), release the reference from the session.
565 */
566 if (vcount(vp) == 2 && ap->a_p &&
567 vp == ap->a_p->p_session->s_ttyvp) {
568 vrele(vp);
569 ap->a_p->p_session->s_ttyvp = NULL;
570 }
571 /*
572 * If the vnode is locked, then we are in the midst
573 * of forcably closing the device, otherwise we only
574 * close on last reference.
575 */
576 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
577 return (0);
578 devclose = cdevsw[major(dev)].d_close;
579 mode = S_IFCHR;
580 break;
581
582 case VBLK:
583 /*
584 * On last close of a block device (that isn't mounted)
585 * we must invalidate any in core blocks, so that
586 * we can, for instance, change floppy disks.
587 */
588 error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
589 if (error)
590 return (error);
591 /*
592 * We do not want to really close the device if it
593 * is still in use unless we are trying to close it
594 * forcibly. Since every use (buffer, vnode, swap, cmap)
595 * holds a reference to the vnode, and because we mark
596 * any other vnodes that alias this device, when the
597 * sum of the reference counts on all the aliased
598 * vnodes descends to one, we are on last close.
599 */
600 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
601 return (0);
602 devclose = bdevsw[major(dev)].d_close;
603 mode = S_IFBLK;
604 break;
605
606 default:
607 panic("spec_close: not special");
608 }
609
610 return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
611 }
612
613 /*
614 * Print out the contents of a special device vnode.
615 */
616 int
617 spec_print(v)
618 void *v;
619 {
620 struct vop_print_args /* {
621 struct vnode *a_vp;
622 } */ *ap = v;
623
624 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
625 minor(ap->a_vp->v_rdev));
626 return 0;
627 }
628
629 /*
630 * Return POSIX pathconf information applicable to special devices.
631 */
632 int
633 spec_pathconf(v)
634 void *v;
635 {
636 struct vop_pathconf_args /* {
637 struct vnode *a_vp;
638 int a_name;
639 register_t *a_retval;
640 } */ *ap = v;
641
642 switch (ap->a_name) {
643 case _PC_LINK_MAX:
644 *ap->a_retval = LINK_MAX;
645 return (0);
646 case _PC_MAX_CANON:
647 *ap->a_retval = MAX_CANON;
648 return (0);
649 case _PC_MAX_INPUT:
650 *ap->a_retval = MAX_INPUT;
651 return (0);
652 case _PC_PIPE_BUF:
653 *ap->a_retval = PIPE_BUF;
654 return (0);
655 case _PC_CHOWN_RESTRICTED:
656 *ap->a_retval = 1;
657 return (0);
658 case _PC_VDISABLE:
659 *ap->a_retval = _POSIX_VDISABLE;
660 return (0);
661 default:
662 return (EINVAL);
663 }
664 /* NOTREACHED */
665 }
666
667 /*
668 * Advisory record locking support.
669 */
670 int
671 spec_advlock(v)
672 void *v;
673 {
674 struct vop_advlock_args /* {
675 struct vnode *a_vp;
676 caddr_t a_id;
677 int a_op;
678 struct flock *a_fl;
679 int a_flags;
680 } */ *ap = v;
681 register struct vnode *vp = ap->a_vp;
682
683 return (lf_advlock(&vp->v_speclockf, (off_t)0, ap->a_id, ap->a_op,
684 ap->a_fl, ap->a_flags));
685 }
686