spec_vnops.c revision 1.43 1 /* $NetBSD: spec_vnops.c,v 1.43 1998/10/02 00:21:39 ross Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)spec_vnops.c 8.15 (Berkeley) 7/14/95
36 */
37
38 #include <sys/param.h>
39 #include <sys/proc.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/conf.h>
43 #include <sys/buf.h>
44 #include <sys/mount.h>
45 #include <sys/namei.h>
46 #include <sys/vnode.h>
47 #include <sys/stat.h>
48 #include <sys/errno.h>
49 #include <sys/ioctl.h>
50 #include <sys/file.h>
51 #include <sys/disklabel.h>
52 #include <sys/lockf.h>
53
54 #include <miscfs/genfs/genfs.h>
55 #include <miscfs/specfs/specdev.h>
56
57 /* symbolic sleep message strings for devices */
58 const char devopn[] = "devopn";
59 const char devio[] = "devio";
60 const char devwait[] = "devwait";
61 const char devin[] = "devin";
62 const char devout[] = "devout";
63 const char devioc[] = "devioc";
64 const char devcls[] = "devcls";
65
66 int (**spec_vnodeop_p) __P((void *));
67 struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
68 { &vop_default_desc, vn_default_error },
69 { &vop_lookup_desc, spec_lookup }, /* lookup */
70 { &vop_create_desc, spec_create }, /* create */
71 { &vop_mknod_desc, spec_mknod }, /* mknod */
72 { &vop_open_desc, spec_open }, /* open */
73 { &vop_close_desc, spec_close }, /* close */
74 { &vop_access_desc, spec_access }, /* access */
75 { &vop_getattr_desc, spec_getattr }, /* getattr */
76 { &vop_setattr_desc, spec_setattr }, /* setattr */
77 { &vop_read_desc, spec_read }, /* read */
78 { &vop_write_desc, spec_write }, /* write */
79 { &vop_lease_desc, spec_lease_check }, /* lease */
80 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
81 { &vop_poll_desc, spec_poll }, /* poll */
82 { &vop_revoke_desc, spec_revoke }, /* revoke */
83 { &vop_mmap_desc, spec_mmap }, /* mmap */
84 { &vop_fsync_desc, spec_fsync }, /* fsync */
85 { &vop_seek_desc, spec_seek }, /* seek */
86 { &vop_remove_desc, spec_remove }, /* remove */
87 { &vop_link_desc, spec_link }, /* link */
88 { &vop_rename_desc, spec_rename }, /* rename */
89 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
90 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
91 { &vop_symlink_desc, spec_symlink }, /* symlink */
92 { &vop_readdir_desc, spec_readdir }, /* readdir */
93 { &vop_readlink_desc, spec_readlink }, /* readlink */
94 { &vop_abortop_desc, spec_abortop }, /* abortop */
95 { &vop_inactive_desc, spec_inactive }, /* inactive */
96 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */
97 { &vop_lock_desc, spec_lock }, /* lock */
98 { &vop_unlock_desc, spec_unlock }, /* unlock */
99 { &vop_bmap_desc, spec_bmap }, /* bmap */
100 { &vop_strategy_desc, spec_strategy }, /* strategy */
101 { &vop_print_desc, spec_print }, /* print */
102 { &vop_islocked_desc, spec_islocked }, /* islocked */
103 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
104 { &vop_advlock_desc, spec_advlock }, /* advlock */
105 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
106 { &vop_valloc_desc, spec_valloc }, /* valloc */
107 { &vop_vfree_desc, spec_vfree }, /* vfree */
108 { &vop_truncate_desc, spec_truncate }, /* truncate */
109 { &vop_update_desc, spec_update }, /* update */
110 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */
111 { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
112 };
113 struct vnodeopv_desc spec_vnodeop_opv_desc =
114 { &spec_vnodeop_p, spec_vnodeop_entries };
115
116 /*
117 * Trivial lookup routine that always fails.
118 */
119 int
120 spec_lookup(v)
121 void *v;
122 {
123 struct vop_lookup_args /* {
124 struct vnode *a_dvp;
125 struct vnode **a_vpp;
126 struct componentname *a_cnp;
127 } */ *ap = v;
128
129 *ap->a_vpp = NULL;
130 return (ENOTDIR);
131 }
132
133 /*
134 * Open a special file.
135 */
136 /* ARGSUSED */
137 int
138 spec_open(v)
139 void *v;
140 {
141 struct vop_open_args /* {
142 struct vnode *a_vp;
143 int a_mode;
144 struct ucred *a_cred;
145 struct proc *a_p;
146 } */ *ap = v;
147 struct proc *p = ap->a_p;
148 struct vnode *bvp, *vp = ap->a_vp;
149 dev_t bdev, dev = (dev_t)vp->v_rdev;
150 register int maj = major(dev);
151 int error;
152
153 /*
154 * Don't allow open if fs is mounted -nodev.
155 */
156 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
157 return (ENXIO);
158
159 switch (vp->v_type) {
160
161 case VCHR:
162 if ((u_int)maj >= nchrdev)
163 return (ENXIO);
164 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
165 /*
166 * When running in very secure mode, do not allow
167 * opens for writing of any disk character devices.
168 */
169 if (securelevel >= 2 && cdevsw[maj].d_type == D_DISK)
170 return (EPERM);
171 /*
172 * When running in secure mode, do not allow opens
173 * for writing of /dev/mem, /dev/kmem, or character
174 * devices whose corresponding block devices are
175 * currently mounted.
176 */
177 if (securelevel >= 1) {
178 if ((bdev = chrtoblk(dev)) != (dev_t)NODEV &&
179 vfinddev(bdev, VBLK, &bvp) &&
180 bvp->v_usecount > 0 &&
181 (error = vfs_mountedon(bvp)))
182 return (error);
183 if (iskmemdev(dev))
184 return (EPERM);
185 }
186 }
187 if (cdevsw[maj].d_type == D_TTY)
188 vp->v_flag |= VISTTY;
189 VOP_UNLOCK(vp, 0);
190 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p);
191 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
192 return (error);
193
194 case VBLK:
195 if ((u_int)maj >= nblkdev)
196 return (ENXIO);
197 /*
198 * When running in very secure mode, do not allow
199 * opens for writing of any disk block devices.
200 */
201 if (securelevel >= 2 && ap->a_cred != FSCRED &&
202 (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK)
203 return (EPERM);
204 /*
205 * Do not allow opens of block devices that are
206 * currently mounted.
207 */
208 if ((error = vfs_mountedon(vp)) != 0)
209 return (error);
210 return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p));
211 case VNON:
212 case VLNK:
213 case VDIR:
214 case VREG:
215 case VBAD:
216 case VFIFO:
217 case VSOCK:
218 break;
219 }
220 return (0);
221 }
222
223 /*
224 * Vnode op for read
225 */
226 /* ARGSUSED */
227 int
228 spec_read(v)
229 void *v;
230 {
231 struct vop_read_args /* {
232 struct vnode *a_vp;
233 struct uio *a_uio;
234 int a_ioflag;
235 struct ucred *a_cred;
236 } */ *ap = v;
237 register struct vnode *vp = ap->a_vp;
238 register struct uio *uio = ap->a_uio;
239 struct proc *p = uio->uio_procp;
240 struct buf *bp;
241 daddr_t bn, nextbn;
242 long bsize, bscale, ssize;
243 struct partinfo dpart;
244 int n, on, majordev;
245 int (*ioctl) __P((dev_t, u_long, caddr_t, int, struct proc *));
246 int error = 0;
247
248 #ifdef DIAGNOSTIC
249 if (uio->uio_rw != UIO_READ)
250 panic("spec_read mode");
251 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
252 panic("spec_read proc");
253 #endif
254 if (uio->uio_resid == 0)
255 return (0);
256
257 switch (vp->v_type) {
258
259 case VCHR:
260 VOP_UNLOCK(vp, 0);
261 error = (*cdevsw[major(vp->v_rdev)].d_read)
262 (vp->v_rdev, uio, ap->a_ioflag);
263 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
264 return (error);
265
266 case VBLK:
267 if (uio->uio_offset < 0)
268 return (EINVAL);
269 bsize = BLKDEV_IOSIZE;
270 ssize = DEV_BSIZE;
271 if ((majordev = major(vp->v_rdev)) < nblkdev &&
272 (ioctl = bdevsw[majordev].d_ioctl) != NULL &&
273 (*ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) {
274 if (dpart.part->p_fstype == FS_BSDFFS &&
275 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
276 bsize = dpart.part->p_frag *
277 dpart.part->p_fsize;
278 if (dpart.disklab->d_secsize != 0)
279 ssize = dpart.disklab->d_secsize;
280 }
281 bscale = bsize / ssize;
282 do {
283 bn = (uio->uio_offset / ssize) &~ (bscale - 1);
284 on = uio->uio_offset % bsize;
285 n = min((unsigned)(bsize - on), uio->uio_resid);
286 if (vp->v_lastr + bscale == bn) {
287 nextbn = bn + bscale;
288 error = breadn(vp, bn, (int)bsize, &nextbn,
289 (int *)&bsize, 1, NOCRED, &bp);
290 } else
291 error = bread(vp, bn, (int)bsize, NOCRED, &bp);
292 vp->v_lastr = bn;
293 n = min(n, bsize - bp->b_resid);
294 if (error) {
295 brelse(bp);
296 return (error);
297 }
298 error = uiomove((char *)bp->b_data + on, n, uio);
299 brelse(bp);
300 } while (error == 0 && uio->uio_resid > 0 && n != 0);
301 return (error);
302
303 default:
304 panic("spec_read type");
305 }
306 /* NOTREACHED */
307 }
308
309 /*
310 * Vnode op for write
311 */
312 /* ARGSUSED */
313 int
314 spec_write(v)
315 void *v;
316 {
317 struct vop_write_args /* {
318 struct vnode *a_vp;
319 struct uio *a_uio;
320 int a_ioflag;
321 struct ucred *a_cred;
322 } */ *ap = v;
323 register struct vnode *vp = ap->a_vp;
324 register struct uio *uio = ap->a_uio;
325 struct proc *p = uio->uio_procp;
326 struct buf *bp;
327 daddr_t bn;
328 long bsize, bscale, ssize;
329 struct partinfo dpart;
330 int n, on, majordev;
331 int (*ioctl) __P((dev_t, u_long, caddr_t, int, struct proc *));
332 int error = 0;
333
334 #ifdef DIAGNOSTIC
335 if (uio->uio_rw != UIO_WRITE)
336 panic("spec_write mode");
337 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
338 panic("spec_write proc");
339 #endif
340
341 switch (vp->v_type) {
342
343 case VCHR:
344 VOP_UNLOCK(vp, 0);
345 error = (*cdevsw[major(vp->v_rdev)].d_write)
346 (vp->v_rdev, uio, ap->a_ioflag);
347 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
348 return (error);
349
350 case VBLK:
351 if (uio->uio_resid == 0)
352 return (0);
353 if (uio->uio_offset < 0)
354 return (EINVAL);
355 bsize = BLKDEV_IOSIZE;
356 ssize = DEV_BSIZE;
357 if ((majordev = major(vp->v_rdev)) < nblkdev &&
358 (ioctl = bdevsw[majordev].d_ioctl) != NULL &&
359 (*ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) {
360 if (dpart.part->p_fstype == FS_BSDFFS &&
361 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
362 bsize = dpart.part->p_frag *
363 dpart.part->p_fsize;
364 if (dpart.disklab->d_secsize != 0)
365 ssize = dpart.disklab->d_secsize;
366 }
367 bscale = bsize / ssize;
368 do {
369 bn = (uio->uio_offset / ssize) &~ (bscale - 1);
370 on = uio->uio_offset % bsize;
371 n = min((unsigned)(bsize - on), uio->uio_resid);
372 if (n == bsize)
373 bp = getblk(vp, bn, bsize, 0, 0);
374 else
375 error = bread(vp, bn, bsize, NOCRED, &bp);
376 if (error) {
377 brelse(bp);
378 return (error);
379 }
380 n = min(n, bsize - bp->b_resid);
381 error = uiomove((char *)bp->b_data + on, n, uio);
382 if (error)
383 brelse(bp);
384 else {
385 if (n + on == bsize)
386 bawrite(bp);
387 else
388 bdwrite(bp);
389 if (bp->b_flags & B_ERROR)
390 error = bp->b_error;
391 }
392 } while (error == 0 && uio->uio_resid > 0 && n != 0);
393 return (error);
394
395 default:
396 panic("spec_write type");
397 }
398 /* NOTREACHED */
399 }
400
401 /*
402 * Device ioctl operation.
403 */
404 /* ARGSUSED */
405 int
406 spec_ioctl(v)
407 void *v;
408 {
409 struct vop_ioctl_args /* {
410 struct vnode *a_vp;
411 u_long a_command;
412 caddr_t a_data;
413 int a_fflag;
414 struct ucred *a_cred;
415 struct proc *a_p;
416 } */ *ap = v;
417 dev_t dev = ap->a_vp->v_rdev;
418 int maj = major(dev);
419
420 switch (ap->a_vp->v_type) {
421
422 case VCHR:
423 return ((*cdevsw[maj].d_ioctl)(dev, ap->a_command, ap->a_data,
424 ap->a_fflag, ap->a_p));
425
426 case VBLK:
427 if (ap->a_command == 0 && (long)ap->a_data == B_TAPE) {
428 if (bdevsw[maj].d_type == D_TAPE)
429 return (0);
430 else
431 return (1);
432 }
433 return ((*bdevsw[maj].d_ioctl)(dev, ap->a_command, ap->a_data,
434 ap->a_fflag, ap->a_p));
435
436 default:
437 panic("spec_ioctl");
438 /* NOTREACHED */
439 }
440 }
441
442 /* ARGSUSED */
443 int
444 spec_poll(v)
445 void *v;
446 {
447 struct vop_poll_args /* {
448 struct vnode *a_vp;
449 int a_events;
450 struct proc *a_p;
451 } */ *ap = v;
452 register dev_t dev;
453
454 switch (ap->a_vp->v_type) {
455
456 case VCHR:
457 dev = ap->a_vp->v_rdev;
458 return (*cdevsw[major(dev)].d_poll)(dev, ap->a_events, ap->a_p);
459
460 default:
461 return (genfs_poll(v));
462 }
463 }
464 /*
465 * Synch buffers associated with a block device
466 */
467 /* ARGSUSED */
468 int
469 spec_fsync(v)
470 void *v;
471 {
472 struct vop_fsync_args /* {
473 struct vnode *a_vp;
474 struct ucred *a_cred;
475 int a_flags;
476 struct proc *a_p;
477 } */ *ap = v;
478 register struct vnode *vp = ap->a_vp;
479
480 if (vp->v_type == VBLK)
481 vflushbuf(vp, (ap->a_flags & FSYNC_WAIT) != 0);
482 return (0);
483 }
484
485 /*
486 * Just call the device strategy routine
487 */
488 int
489 spec_strategy(v)
490 void *v;
491 {
492 struct vop_strategy_args /* {
493 struct buf *a_bp;
494 } */ *ap = v;
495
496 (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp);
497 return (0);
498 }
499
500 int
501 spec_inactive(v)
502 void *v;
503 {
504 struct vop_inactive_args /* {
505 struct vnode *a_vp;
506 struct proc *a_p;
507 } */ *ap = v;
508
509 VOP_UNLOCK(ap->a_vp, 0);
510 return (0);
511 }
512
513 /*
514 * This is a noop, simply returning what one has been given.
515 */
516 int
517 spec_bmap(v)
518 void *v;
519 {
520 struct vop_bmap_args /* {
521 struct vnode *a_vp;
522 daddr_t a_bn;
523 struct vnode **a_vpp;
524 daddr_t *a_bnp;
525 int *a_runp;
526 } */ *ap = v;
527
528 if (ap->a_vpp != NULL)
529 *ap->a_vpp = ap->a_vp;
530 if (ap->a_bnp != NULL)
531 *ap->a_bnp = ap->a_bn;
532 if (ap->a_runp != NULL)
533 *ap->a_runp = 0;
534 return (0);
535 }
536
537 /*
538 * Device close routine
539 */
540 /* ARGSUSED */
541 int
542 spec_close(v)
543 void *v;
544 {
545 struct vop_close_args /* {
546 struct vnode *a_vp;
547 int a_fflag;
548 struct ucred *a_cred;
549 struct proc *a_p;
550 } */ *ap = v;
551 register struct vnode *vp = ap->a_vp;
552 dev_t dev = vp->v_rdev;
553 int (*devclose) __P((dev_t, int, int, struct proc *));
554 int mode, error;
555
556 switch (vp->v_type) {
557
558 case VCHR:
559 /*
560 * Hack: a tty device that is a controlling terminal
561 * has a reference from the session structure.
562 * We cannot easily tell that a character device is
563 * a controlling terminal, unless it is the closing
564 * process' controlling terminal. In that case,
565 * if the reference count is 2 (this last descriptor
566 * plus the session), release the reference from the session.
567 */
568 if (vcount(vp) == 2 && ap->a_p &&
569 vp == ap->a_p->p_session->s_ttyvp) {
570 vrele(vp);
571 ap->a_p->p_session->s_ttyvp = NULL;
572 }
573 /*
574 * If the vnode is locked, then we are in the midst
575 * of forcably closing the device, otherwise we only
576 * close on last reference.
577 */
578 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
579 return (0);
580 devclose = cdevsw[major(dev)].d_close;
581 mode = S_IFCHR;
582 break;
583
584 case VBLK:
585 /*
586 * On last close of a block device (that isn't mounted)
587 * we must invalidate any in core blocks, so that
588 * we can, for instance, change floppy disks.
589 */
590 error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
591 if (error)
592 return (error);
593 /*
594 * We do not want to really close the device if it
595 * is still in use unless we are trying to close it
596 * forcibly. Since every use (buffer, vnode, swap, cmap)
597 * holds a reference to the vnode, and because we mark
598 * any other vnodes that alias this device, when the
599 * sum of the reference counts on all the aliased
600 * vnodes descends to one, we are on last close.
601 */
602 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
603 return (0);
604 devclose = bdevsw[major(dev)].d_close;
605 mode = S_IFBLK;
606 break;
607
608 default:
609 panic("spec_close: not special");
610 }
611
612 return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
613 }
614
615 /*
616 * Print out the contents of a special device vnode.
617 */
618 int
619 spec_print(v)
620 void *v;
621 {
622 struct vop_print_args /* {
623 struct vnode *a_vp;
624 } */ *ap = v;
625
626 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
627 minor(ap->a_vp->v_rdev));
628 return 0;
629 }
630
631 /*
632 * Return POSIX pathconf information applicable to special devices.
633 */
634 int
635 spec_pathconf(v)
636 void *v;
637 {
638 struct vop_pathconf_args /* {
639 struct vnode *a_vp;
640 int a_name;
641 register_t *a_retval;
642 } */ *ap = v;
643
644 switch (ap->a_name) {
645 case _PC_LINK_MAX:
646 *ap->a_retval = LINK_MAX;
647 return (0);
648 case _PC_MAX_CANON:
649 *ap->a_retval = MAX_CANON;
650 return (0);
651 case _PC_MAX_INPUT:
652 *ap->a_retval = MAX_INPUT;
653 return (0);
654 case _PC_PIPE_BUF:
655 *ap->a_retval = PIPE_BUF;
656 return (0);
657 case _PC_CHOWN_RESTRICTED:
658 *ap->a_retval = 1;
659 return (0);
660 case _PC_VDISABLE:
661 *ap->a_retval = _POSIX_VDISABLE;
662 return (0);
663 case _PC_SYNC_IO:
664 *ap->a_retval = 1;
665 return (0);
666 default:
667 return (EINVAL);
668 }
669 /* NOTREACHED */
670 }
671
672 /*
673 * Advisory record locking support.
674 */
675 int
676 spec_advlock(v)
677 void *v;
678 {
679 struct vop_advlock_args /* {
680 struct vnode *a_vp;
681 caddr_t a_id;
682 int a_op;
683 struct flock *a_fl;
684 int a_flags;
685 } */ *ap = v;
686 register struct vnode *vp = ap->a_vp;
687
688 return (lf_advlock(&vp->v_speclockf, (off_t)0, ap->a_id, ap->a_op,
689 ap->a_fl, ap->a_flags));
690 }
691