spec_vnops.c revision 1.18 1 /* $NetBSD: spec_vnops.c,v 1.18 1994/10/20 04:26:38 cgd Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)spec_vnops.c 8.6 (Berkeley) 4/9/94
36 */
37
38 #include <sys/param.h>
39 #include <sys/proc.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/conf.h>
43 #include <sys/buf.h>
44 #include <sys/mount.h>
45 #include <sys/namei.h>
46 #include <sys/vnode.h>
47 #include <sys/stat.h>
48 #include <sys/errno.h>
49 #include <sys/ioctl.h>
50 #include <sys/file.h>
51 #include <sys/disklabel.h>
52 #include <miscfs/specfs/specdev.h>
53
54 /* symbolic sleep message strings for devices */
55 char devopn[] = "devopn";
56 char devio[] = "devio";
57 char devwait[] = "devwait";
58 char devin[] = "devin";
59 char devout[] = "devout";
60 char devioc[] = "devioc";
61 char devcls[] = "devcls";
62
63 int (**spec_vnodeop_p)();
64 struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
65 { &vop_default_desc, vn_default_error },
66 { &vop_lookup_desc, spec_lookup }, /* lookup */
67 { &vop_create_desc, spec_create }, /* create */
68 { &vop_mknod_desc, spec_mknod }, /* mknod */
69 { &vop_open_desc, spec_open }, /* open */
70 { &vop_close_desc, spec_close }, /* close */
71 { &vop_access_desc, spec_access }, /* access */
72 { &vop_getattr_desc, spec_getattr }, /* getattr */
73 { &vop_setattr_desc, spec_setattr }, /* setattr */
74 { &vop_read_desc, spec_read }, /* read */
75 { &vop_write_desc, spec_write }, /* write */
76 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
77 { &vop_select_desc, spec_select }, /* select */
78 { &vop_mmap_desc, spec_mmap }, /* mmap */
79 { &vop_fsync_desc, spec_fsync }, /* fsync */
80 { &vop_seek_desc, spec_seek }, /* seek */
81 { &vop_remove_desc, spec_remove }, /* remove */
82 { &vop_link_desc, spec_link }, /* link */
83 { &vop_rename_desc, spec_rename }, /* rename */
84 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
85 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
86 { &vop_symlink_desc, spec_symlink }, /* symlink */
87 { &vop_readdir_desc, spec_readdir }, /* readdir */
88 { &vop_readlink_desc, spec_readlink }, /* readlink */
89 { &vop_abortop_desc, spec_abortop }, /* abortop */
90 { &vop_inactive_desc, spec_inactive }, /* inactive */
91 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */
92 { &vop_lock_desc, spec_lock }, /* lock */
93 { &vop_unlock_desc, spec_unlock }, /* unlock */
94 { &vop_bmap_desc, spec_bmap }, /* bmap */
95 { &vop_strategy_desc, spec_strategy }, /* strategy */
96 { &vop_print_desc, spec_print }, /* print */
97 { &vop_islocked_desc, spec_islocked }, /* islocked */
98 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
99 { &vop_advlock_desc, spec_advlock }, /* advlock */
100 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
101 { &vop_valloc_desc, spec_valloc }, /* valloc */
102 { &vop_vfree_desc, spec_vfree }, /* vfree */
103 { &vop_truncate_desc, spec_truncate }, /* truncate */
104 { &vop_update_desc, spec_update }, /* update */
105 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */
106 { (struct vnodeop_desc*)NULL, (int(*)())NULL }
107 };
108 struct vnodeopv_desc spec_vnodeop_opv_desc =
109 { &spec_vnodeop_p, spec_vnodeop_entries };
110
111 /*
112 * Trivial lookup routine that always fails.
113 */
114 int
115 spec_lookup(ap)
116 struct vop_lookup_args /* {
117 struct vnode *a_dvp;
118 struct vnode **a_vpp;
119 struct componentname *a_cnp;
120 } */ *ap;
121 {
122
123 *ap->a_vpp = NULL;
124 return (ENOTDIR);
125 }
126
127 /*
128 * Open a special file.
129 */
130 /* ARGSUSED */
131 spec_open(ap)
132 struct vop_open_args /* {
133 struct vnode *a_vp;
134 int a_mode;
135 struct ucred *a_cred;
136 struct proc *a_p;
137 } */ *ap;
138 {
139 struct vnode *bvp, *vp = ap->a_vp;
140 dev_t bdev, dev = (dev_t)vp->v_rdev;
141 register int maj = major(dev);
142 int error;
143
144 /*
145 * Don't allow open if fs is mounted -nodev.
146 */
147 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
148 return (ENXIO);
149
150 switch (vp->v_type) {
151
152 case VCHR:
153 if ((u_int)maj >= nchrdev)
154 return (ENXIO);
155 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
156 /*
157 * When running in very secure mode, do not allow
158 * opens for writing of any disk character devices.
159 */
160 if (securelevel >= 2 && isdisk(dev, VCHR))
161 return (EPERM);
162 /*
163 * When running in secure mode, do not allow opens
164 * for writing of /dev/mem, /dev/kmem, or character
165 * devices whose corresponding block devices are
166 * currently mounted.
167 */
168 if (securelevel >= 1) {
169 if ((bdev = chrtoblk(dev)) != NODEV &&
170 vfinddev(bdev, VBLK, &bvp) &&
171 bvp->v_usecount > 0 &&
172 (error = vfs_mountedon(bvp)))
173 return (error);
174 if (iskmemdev(dev))
175 return (EPERM);
176 }
177 }
178 VOP_UNLOCK(vp);
179 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p);
180 VOP_LOCK(vp);
181 return (error);
182
183 case VBLK:
184 if ((u_int)maj >= nblkdev)
185 return (ENXIO);
186 /*
187 * When running in very secure mode, do not allow
188 * opens for writing of any disk block devices.
189 */
190 if (securelevel >= 2 && ap->a_cred != FSCRED &&
191 (ap->a_mode & FWRITE) && isdisk(dev, VBLK))
192 return (EPERM);
193 /*
194 * Do not allow opens of block devices that are
195 * currently mounted.
196 */
197 if (error = vfs_mountedon(vp))
198 return (error);
199 return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p));
200 }
201 return (0);
202 }
203
204 /*
205 * Vnode op for read
206 */
207 /* ARGSUSED */
208 spec_read(ap)
209 struct vop_read_args /* {
210 struct vnode *a_vp;
211 struct uio *a_uio;
212 int a_ioflag;
213 struct ucred *a_cred;
214 } */ *ap;
215 {
216 register struct vnode *vp = ap->a_vp;
217 register struct uio *uio = ap->a_uio;
218 struct proc *p = uio->uio_procp;
219 struct buf *bp;
220 daddr_t bn, nextbn;
221 long bsize, bscale, ssize;
222 struct partinfo dpart;
223 int n, on, majordev, (*ioctl)();
224 int error = 0;
225 dev_t dev;
226
227 #ifdef DIAGNOSTIC
228 if (uio->uio_rw != UIO_READ)
229 panic("spec_read mode");
230 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
231 panic("spec_read proc");
232 #endif
233 if (uio->uio_resid == 0)
234 return (0);
235
236 switch (vp->v_type) {
237
238 case VCHR:
239 VOP_UNLOCK(vp);
240 error = (*cdevsw[major(vp->v_rdev)].d_read)
241 (vp->v_rdev, uio, ap->a_ioflag);
242 VOP_LOCK(vp);
243 return (error);
244
245 case VBLK:
246 if (uio->uio_offset < 0)
247 return (EINVAL);
248 bsize = BLKDEV_IOSIZE;
249 ssize = DEV_BSIZE;
250 dev = vp->v_rdev;
251 if ((majordev = major(dev)) < nblkdev &&
252 (ioctl = bdevsw[majordev].d_ioctl) != NULL &&
253 (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) {
254 if (dpart.part->p_fstype == FS_BSDFFS &&
255 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
256 bsize = dpart.part->p_frag *
257 dpart.part->p_fsize;
258 if (dpart.disklab->d_secsize != 0)
259 ssize = dpart.disklab->d_secsize;
260 }
261 bscale = bsize / ssize;
262 do {
263 bn = (uio->uio_offset / ssize) &~ (bscale - 1);
264 on = uio->uio_offset % bsize;
265 n = min((unsigned)(bsize - on), uio->uio_resid);
266 if (vp->v_lastr + bscale == bn) {
267 nextbn = bn + bscale;
268 error = breadn(vp, bn, (int)bsize, &nextbn,
269 (int *)&bsize, 1, NOCRED, &bp);
270 } else
271 error = bread(vp, bn, (int)bsize, NOCRED, &bp);
272 vp->v_lastr = bn;
273 n = min(n, bsize - bp->b_resid);
274 if (error) {
275 brelse(bp);
276 return (error);
277 }
278 error = uiomove((char *)bp->b_data + on, n, uio);
279 if (n + on == bsize)
280 bp->b_flags |= B_AGE;
281 brelse(bp);
282 } while (error == 0 && uio->uio_resid > 0 && n != 0);
283 return (error);
284
285 default:
286 panic("spec_read type");
287 }
288 /* NOTREACHED */
289 }
290
291 /*
292 * Vnode op for write
293 */
294 /* ARGSUSED */
295 spec_write(ap)
296 struct vop_write_args /* {
297 struct vnode *a_vp;
298 struct uio *a_uio;
299 int a_ioflag;
300 struct ucred *a_cred;
301 } */ *ap;
302 {
303 register struct vnode *vp = ap->a_vp;
304 register struct uio *uio = ap->a_uio;
305 struct proc *p = uio->uio_procp;
306 struct buf *bp;
307 daddr_t bn;
308 int bsize, blkmask, ssize;
309 struct partinfo dpart;
310 register int n, on;
311 int error = 0;
312
313 #ifdef DIAGNOSTIC
314 if (uio->uio_rw != UIO_WRITE)
315 panic("spec_write mode");
316 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
317 panic("spec_write proc");
318 #endif
319
320 switch (vp->v_type) {
321
322 case VCHR:
323 VOP_UNLOCK(vp);
324 error = (*cdevsw[major(vp->v_rdev)].d_write)
325 (vp->v_rdev, uio, ap->a_ioflag);
326 VOP_LOCK(vp);
327 return (error);
328
329 case VBLK:
330 if (uio->uio_resid == 0)
331 return (0);
332 if (uio->uio_offset < 0)
333 return (EINVAL);
334 bsize = BLKDEV_IOSIZE;
335 ssize = DEV_BSIZE;
336 if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART,
337 (caddr_t)&dpart, FREAD, p) == 0) {
338 if (dpart.part->p_fstype == FS_BSDFFS &&
339 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
340 bsize = dpart.part->p_frag *
341 dpart.part->p_fsize;
342 if (dpart.disklab->d_secsize != 0)
343 ssize = dpart.disklab->d_secsize;
344 }
345 blkmask = (bsize / ssize) - 1;
346 do {
347 bn = (uio->uio_offset / ssize) &~ blkmask;
348 on = uio->uio_offset % bsize;
349 n = min((unsigned)(bsize - on), uio->uio_resid);
350 if (n == bsize)
351 bp = getblk(vp, bn, bsize, 0, 0);
352 else
353 error = bread(vp, bn, bsize, NOCRED, &bp);
354 n = min(n, bsize - bp->b_resid);
355 if (error) {
356 brelse(bp);
357 return (error);
358 }
359 error = uiomove((char *)bp->b_data + on, n, uio);
360 if (n + on == bsize) {
361 bp->b_flags |= B_AGE;
362 bawrite(bp);
363 } else
364 bdwrite(bp);
365 } while (error == 0 && uio->uio_resid > 0 && n != 0);
366 return (error);
367
368 default:
369 panic("spec_write type");
370 }
371 /* NOTREACHED */
372 }
373
374 /*
375 * Device ioctl operation.
376 */
377 /* ARGSUSED */
378 spec_ioctl(ap)
379 struct vop_ioctl_args /* {
380 struct vnode *a_vp;
381 int a_command;
382 caddr_t a_data;
383 int a_fflag;
384 struct ucred *a_cred;
385 struct proc *a_p;
386 } */ *ap;
387 {
388 dev_t dev = ap->a_vp->v_rdev;
389
390 switch (ap->a_vp->v_type) {
391
392 case VCHR:
393 return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
394 ap->a_fflag, ap->a_p));
395
396 case VBLK:
397 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE)
398 if (bdevsw[major(dev)].d_flags & B_TAPE)
399 return (0);
400 else
401 return (1);
402 return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
403 ap->a_fflag, ap->a_p));
404
405 default:
406 panic("spec_ioctl");
407 /* NOTREACHED */
408 }
409 }
410
411 /* ARGSUSED */
412 spec_select(ap)
413 struct vop_select_args /* {
414 struct vnode *a_vp;
415 int a_which;
416 int a_fflags;
417 struct ucred *a_cred;
418 struct proc *a_p;
419 } */ *ap;
420 {
421 register dev_t dev;
422
423 switch (ap->a_vp->v_type) {
424
425 default:
426 return (1); /* XXX */
427
428 case VCHR:
429 dev = ap->a_vp->v_rdev;
430 return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_p);
431 }
432 }
433 /*
434 * Synch buffers associated with a block device
435 */
436 /* ARGSUSED */
437 int
438 spec_fsync(ap)
439 struct vop_fsync_args /* {
440 struct vnode *a_vp;
441 struct ucred *a_cred;
442 int a_waitfor;
443 struct proc *a_p;
444 } */ *ap;
445 {
446 register struct vnode *vp = ap->a_vp;
447 register struct buf *bp;
448 struct buf *nbp;
449 int s;
450
451 if (vp->v_type == VCHR)
452 return (0);
453 /*
454 * Flush all dirty buffers associated with a block device.
455 */
456 loop:
457 s = splbio();
458 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
459 nbp = bp->b_vnbufs.le_next;
460 if ((bp->b_flags & B_BUSY))
461 continue;
462 if ((bp->b_flags & B_DELWRI) == 0)
463 panic("spec_fsync: not dirty");
464 bremfree(bp);
465 bp->b_flags |= B_BUSY;
466 splx(s);
467 bawrite(bp);
468 goto loop;
469 }
470 if (ap->a_waitfor == MNT_WAIT) {
471 while (vp->v_numoutput) {
472 vp->v_flag |= VBWAIT;
473 sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
474 }
475 #ifdef DIAGNOSTIC
476 if (vp->v_dirtyblkhd.lh_first) {
477 vprint("spec_fsync: dirty", vp);
478 goto loop;
479 }
480 #endif
481 }
482 splx(s);
483 return (0);
484 }
485
486 /*
487 * Just call the device strategy routine
488 */
489 spec_strategy(ap)
490 struct vop_strategy_args /* {
491 struct buf *a_bp;
492 } */ *ap;
493 {
494
495 (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp);
496 return (0);
497 }
498
499 /*
500 * This is a noop, simply returning what one has been given.
501 */
502 spec_bmap(ap)
503 struct vop_bmap_args /* {
504 struct vnode *a_vp;
505 daddr_t a_bn;
506 struct vnode **a_vpp;
507 daddr_t *a_bnp;
508 } */ *ap;
509 {
510
511 if (ap->a_vpp != NULL)
512 *ap->a_vpp = ap->a_vp;
513 if (ap->a_bnp != NULL)
514 *ap->a_bnp = ap->a_bn;
515 return (0);
516 }
517
518 /*
519 * At the moment we do not do any locking.
520 */
521 /* ARGSUSED */
522 spec_lock(ap)
523 struct vop_lock_args /* {
524 struct vnode *a_vp;
525 } */ *ap;
526 {
527
528 return (0);
529 }
530
531 /* ARGSUSED */
532 spec_unlock(ap)
533 struct vop_unlock_args /* {
534 struct vnode *a_vp;
535 } */ *ap;
536 {
537
538 return (0);
539 }
540
541 /*
542 * Device close routine
543 */
544 /* ARGSUSED */
545 spec_close(ap)
546 struct vop_close_args /* {
547 struct vnode *a_vp;
548 int a_fflag;
549 struct ucred *a_cred;
550 struct proc *a_p;
551 } */ *ap;
552 {
553 register struct vnode *vp = ap->a_vp;
554 dev_t dev = vp->v_rdev;
555 int (*devclose) __P((dev_t, int, int, struct proc *));
556 int mode, error;
557
558 switch (vp->v_type) {
559
560 case VCHR:
561 /*
562 * Hack: a tty device that is a controlling terminal
563 * has a reference from the session structure.
564 * We cannot easily tell that a character device is
565 * a controlling terminal, unless it is the closing
566 * process' controlling terminal. In that case,
567 * if the reference count is 2 (this last descriptor
568 * plus the session), release the reference from the session.
569 */
570 if (vcount(vp) == 2 && ap->a_p &&
571 vp == ap->a_p->p_session->s_ttyvp) {
572 vrele(vp);
573 ap->a_p->p_session->s_ttyvp = NULL;
574 }
575 /*
576 * If the vnode is locked, then we are in the midst
577 * of forcably closing the device, otherwise we only
578 * close on last reference.
579 */
580 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
581 return (0);
582 devclose = cdevsw[major(dev)].d_close;
583 mode = S_IFCHR;
584 break;
585
586 case VBLK:
587 /*
588 * On last close of a block device (that isn't mounted)
589 * we must invalidate any in core blocks, so that
590 * we can, for instance, change floppy disks.
591 */
592 if (error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0))
593 return (error);
594 /*
595 * We do not want to really close the device if it
596 * is still in use unless we are trying to close it
597 * forcibly. Since every use (buffer, vnode, swap, cmap)
598 * holds a reference to the vnode, and because we mark
599 * any other vnodes that alias this device, when the
600 * sum of the reference counts on all the aliased
601 * vnodes descends to one, we are on last close.
602 */
603 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
604 return (0);
605 devclose = bdevsw[major(dev)].d_close;
606 mode = S_IFBLK;
607 break;
608
609 default:
610 panic("spec_close: not special");
611 }
612
613 return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
614 }
615
616 /*
617 * Print out the contents of a special device vnode.
618 */
619 spec_print(ap)
620 struct vop_print_args /* {
621 struct vnode *a_vp;
622 } */ *ap;
623 {
624
625 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
626 minor(ap->a_vp->v_rdev));
627 }
628
629 /*
630 * Return POSIX pathconf information applicable to special devices.
631 */
632 spec_pathconf(ap)
633 struct vop_pathconf_args /* {
634 struct vnode *a_vp;
635 int a_name;
636 register_t *a_retval;
637 } */ *ap;
638 {
639
640 switch (ap->a_name) {
641 case _PC_LINK_MAX:
642 *ap->a_retval = LINK_MAX;
643 return (0);
644 case _PC_MAX_CANON:
645 *ap->a_retval = MAX_CANON;
646 return (0);
647 case _PC_MAX_INPUT:
648 *ap->a_retval = MAX_INPUT;
649 return (0);
650 case _PC_PIPE_BUF:
651 *ap->a_retval = PIPE_BUF;
652 return (0);
653 case _PC_CHOWN_RESTRICTED:
654 *ap->a_retval = 1;
655 return (0);
656 case _PC_VDISABLE:
657 *ap->a_retval = _POSIX_VDISABLE;
658 return (0);
659 default:
660 return (EINVAL);
661 }
662 /* NOTREACHED */
663 }
664
665 /*
666 * Special device advisory byte-level locks.
667 */
668 /* ARGSUSED */
669 spec_advlock(ap)
670 struct vop_advlock_args /* {
671 struct vnode *a_vp;
672 caddr_t a_id;
673 int a_op;
674 struct flock *a_fl;
675 int a_flags;
676 } */ *ap;
677 {
678
679 return (EOPNOTSUPP);
680 }
681
682 /*
683 * Special device failed operation
684 */
685 spec_ebadf()
686 {
687
688 return (EBADF);
689 }
690
691 /*
692 * Special device bad operation
693 */
694 spec_badop()
695 {
696
697 panic("spec_badop called");
698 /* NOTREACHED */
699 }
700