spec_vnops.c revision 1.16 1 /* $NetBSD: spec_vnops.c,v 1.16 1994/06/29 06:35:03 cgd Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)spec_vnops.c 8.6 (Berkeley) 4/9/94
36 */
37
38 #include <sys/param.h>
39 #include <sys/proc.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/conf.h>
43 #include <sys/buf.h>
44 #include <sys/mount.h>
45 #include <sys/namei.h>
46 #include <sys/vnode.h>
47 #include <sys/stat.h>
48 #include <sys/errno.h>
49 #include <sys/ioctl.h>
50 #include <sys/file.h>
51 #include <sys/disklabel.h>
52 #include <miscfs/specfs/specdev.h>
53
54 /* symbolic sleep message strings for devices */
55 char devopn[] = "devopn";
56 char devio[] = "devio";
57 char devwait[] = "devwait";
58 char devin[] = "devin";
59 char devout[] = "devout";
60 char devioc[] = "devioc";
61 char devcls[] = "devcls";
62
63 int (**spec_vnodeop_p)();
64 struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
65 { &vop_default_desc, vn_default_error },
66 { &vop_lookup_desc, spec_lookup }, /* lookup */
67 { &vop_create_desc, spec_create }, /* create */
68 { &vop_mknod_desc, spec_mknod }, /* mknod */
69 { &vop_open_desc, spec_open }, /* open */
70 { &vop_close_desc, spec_close }, /* close */
71 { &vop_access_desc, spec_access }, /* access */
72 { &vop_getattr_desc, spec_getattr }, /* getattr */
73 { &vop_setattr_desc, spec_setattr }, /* setattr */
74 { &vop_read_desc, spec_read }, /* read */
75 { &vop_write_desc, spec_write }, /* write */
76 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
77 { &vop_select_desc, spec_select }, /* select */
78 { &vop_mmap_desc, spec_mmap }, /* mmap */
79 { &vop_fsync_desc, spec_fsync }, /* fsync */
80 { &vop_seek_desc, spec_seek }, /* seek */
81 { &vop_remove_desc, spec_remove }, /* remove */
82 { &vop_link_desc, spec_link }, /* link */
83 { &vop_rename_desc, spec_rename }, /* rename */
84 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
85 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
86 { &vop_symlink_desc, spec_symlink }, /* symlink */
87 { &vop_readdir_desc, spec_readdir }, /* readdir */
88 { &vop_readlink_desc, spec_readlink }, /* readlink */
89 { &vop_abortop_desc, spec_abortop }, /* abortop */
90 { &vop_inactive_desc, spec_inactive }, /* inactive */
91 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */
92 { &vop_lock_desc, spec_lock }, /* lock */
93 { &vop_unlock_desc, spec_unlock }, /* unlock */
94 { &vop_bmap_desc, spec_bmap }, /* bmap */
95 { &vop_strategy_desc, spec_strategy }, /* strategy */
96 { &vop_print_desc, spec_print }, /* print */
97 { &vop_islocked_desc, spec_islocked }, /* islocked */
98 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
99 { &vop_advlock_desc, spec_advlock }, /* advlock */
100 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
101 { &vop_valloc_desc, spec_valloc }, /* valloc */
102 { &vop_vfree_desc, spec_vfree }, /* vfree */
103 { &vop_truncate_desc, spec_truncate }, /* truncate */
104 { &vop_update_desc, spec_update }, /* update */
105 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */
106 { (struct vnodeop_desc*)NULL, (int(*)())NULL }
107 };
108 struct vnodeopv_desc spec_vnodeop_opv_desc =
109 { &spec_vnodeop_p, spec_vnodeop_entries };
110
111 /*
112 * Trivial lookup routine that always fails.
113 */
114 int
115 spec_lookup(ap)
116 struct vop_lookup_args /* {
117 struct vnode *a_dvp;
118 struct vnode **a_vpp;
119 struct componentname *a_cnp;
120 } */ *ap;
121 {
122
123 *ap->a_vpp = NULL;
124 return (ENOTDIR);
125 }
126
127 /*
128 * Open a special file.
129 */
130 /* ARGSUSED */
131 spec_open(ap)
132 struct vop_open_args /* {
133 struct vnode *a_vp;
134 int a_mode;
135 struct ucred *a_cred;
136 struct proc *a_p;
137 } */ *ap;
138 {
139 struct vnode *bvp, *vp = ap->a_vp;
140 dev_t bdev, dev = (dev_t)vp->v_rdev;
141 register int maj = major(dev);
142 int error;
143
144 /*
145 * Don't allow open if fs is mounted -nodev.
146 */
147 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
148 return (ENXIO);
149
150 switch (vp->v_type) {
151
152 case VCHR:
153 if ((u_int)maj >= nchrdev)
154 return (ENXIO);
155 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
156 /*
157 * When running in very secure mode, do not allow
158 * opens for writing of any disk character devices.
159 */
160 if (securelevel >= 2 && isdisk(dev, VCHR))
161 return (EPERM);
162 /*
163 * When running in secure mode, do not allow opens
164 * for writing of /dev/mem, /dev/kmem, or character
165 * devices whose corresponding block devices are
166 * currently mounted.
167 */
168 if (securelevel >= 1) {
169 if ((bdev = chrtoblk(dev)) != NODEV &&
170 vfinddev(bdev, VBLK, &bvp) &&
171 bvp->v_usecount > 0 &&
172 (error = vfs_mountedon(bvp)))
173 return (error);
174 if (iskmemdev(dev))
175 return (EPERM);
176 }
177 }
178 VOP_UNLOCK(vp);
179 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p);
180 VOP_LOCK(vp);
181 return (error);
182
183 case VBLK:
184 if ((u_int)maj >= nblkdev)
185 return (ENXIO);
186 /*
187 * When running in very secure mode, do not allow
188 * opens for writing of any disk block devices.
189 */
190 if (securelevel >= 2 && ap->a_cred != FSCRED &&
191 (ap->a_mode & FWRITE) && isdisk(dev, VBLK))
192 return (EPERM);
193 /*
194 * Do not allow opens of block devices that are
195 * currently mounted.
196 */
197 if (error = vfs_mountedon(vp))
198 return (error);
199 return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p));
200 }
201 return (0);
202 }
203
204 /*
205 * Vnode op for read
206 */
207 /* ARGSUSED */
208 spec_read(ap)
209 struct vop_read_args /* {
210 struct vnode *a_vp;
211 struct uio *a_uio;
212 int a_ioflag;
213 struct ucred *a_cred;
214 } */ *ap;
215 {
216 register struct vnode *vp = ap->a_vp;
217 register struct uio *uio = ap->a_uio;
218 struct proc *p = uio->uio_procp;
219 struct buf *bp;
220 daddr_t bn, nextbn;
221 long bsize, bscale;
222 struct partinfo dpart;
223 int n, on, majordev, (*ioctl)();
224 int error = 0;
225 dev_t dev;
226
227 #ifdef DIAGNOSTIC
228 if (uio->uio_rw != UIO_READ)
229 panic("spec_read mode");
230 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
231 panic("spec_read proc");
232 #endif
233 if (uio->uio_resid == 0)
234 return (0);
235
236 switch (vp->v_type) {
237
238 case VCHR:
239 VOP_UNLOCK(vp);
240 error = (*cdevsw[major(vp->v_rdev)].d_read)
241 (vp->v_rdev, uio, ap->a_ioflag);
242 VOP_LOCK(vp);
243 return (error);
244
245 case VBLK:
246 if (uio->uio_offset < 0)
247 return (EINVAL);
248 bsize = BLKDEV_IOSIZE;
249 dev = vp->v_rdev;
250 if ((majordev = major(dev)) < nblkdev &&
251 (ioctl = bdevsw[majordev].d_ioctl) != NULL &&
252 (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 &&
253 dpart.part->p_fstype == FS_BSDFFS &&
254 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
255 bsize = dpart.part->p_frag * dpart.part->p_fsize;
256 bscale = bsize / DEV_BSIZE;
257 do {
258 bn = (uio->uio_offset / DEV_BSIZE) &~ (bscale - 1);
259 on = uio->uio_offset % bsize;
260 n = min((unsigned)(bsize - on), uio->uio_resid);
261 if (vp->v_lastr + bscale == bn) {
262 nextbn = bn + bscale;
263 error = breadn(vp, bn, (int)bsize, &nextbn,
264 (int *)&bsize, 1, NOCRED, &bp);
265 } else
266 error = bread(vp, bn, (int)bsize, NOCRED, &bp);
267 vp->v_lastr = bn;
268 n = min(n, bsize - bp->b_resid);
269 if (error) {
270 brelse(bp);
271 return (error);
272 }
273 error = uiomove((char *)bp->b_data + on, n, uio);
274 if (n + on == bsize)
275 bp->b_flags |= B_AGE;
276 brelse(bp);
277 } while (error == 0 && uio->uio_resid > 0 && n != 0);
278 return (error);
279
280 default:
281 panic("spec_read type");
282 }
283 /* NOTREACHED */
284 }
285
286 /*
287 * Vnode op for write
288 */
289 /* ARGSUSED */
290 spec_write(ap)
291 struct vop_write_args /* {
292 struct vnode *a_vp;
293 struct uio *a_uio;
294 int a_ioflag;
295 struct ucred *a_cred;
296 } */ *ap;
297 {
298 register struct vnode *vp = ap->a_vp;
299 register struct uio *uio = ap->a_uio;
300 struct proc *p = uio->uio_procp;
301 struct buf *bp;
302 daddr_t bn;
303 int bsize, blkmask;
304 struct partinfo dpart;
305 register int n, on;
306 int error = 0;
307
308 #ifdef DIAGNOSTIC
309 if (uio->uio_rw != UIO_WRITE)
310 panic("spec_write mode");
311 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
312 panic("spec_write proc");
313 #endif
314
315 switch (vp->v_type) {
316
317 case VCHR:
318 VOP_UNLOCK(vp);
319 error = (*cdevsw[major(vp->v_rdev)].d_write)
320 (vp->v_rdev, uio, ap->a_ioflag);
321 VOP_LOCK(vp);
322 return (error);
323
324 case VBLK:
325 if (uio->uio_resid == 0)
326 return (0);
327 if (uio->uio_offset < 0)
328 return (EINVAL);
329 bsize = BLKDEV_IOSIZE;
330 if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART,
331 (caddr_t)&dpart, FREAD, p) == 0) {
332 if (dpart.part->p_fstype == FS_BSDFFS &&
333 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
334 bsize = dpart.part->p_frag *
335 dpart.part->p_fsize;
336 }
337 blkmask = (bsize / DEV_BSIZE) - 1;
338 do {
339 bn = (uio->uio_offset / DEV_BSIZE) &~ blkmask;
340 on = uio->uio_offset % bsize;
341 n = min((unsigned)(bsize - on), uio->uio_resid);
342 if (n == bsize)
343 bp = getblk(vp, bn, bsize, 0, 0);
344 else
345 error = bread(vp, bn, bsize, NOCRED, &bp);
346 n = min(n, bsize - bp->b_resid);
347 if (error) {
348 brelse(bp);
349 return (error);
350 }
351 error = uiomove((char *)bp->b_data + on, n, uio);
352 if (n + on == bsize) {
353 bp->b_flags |= B_AGE;
354 bawrite(bp);
355 } else
356 bdwrite(bp);
357 } while (error == 0 && uio->uio_resid > 0 && n != 0);
358 return (error);
359
360 default:
361 panic("spec_write type");
362 }
363 /* NOTREACHED */
364 }
365
366 /*
367 * Device ioctl operation.
368 */
369 /* ARGSUSED */
370 spec_ioctl(ap)
371 struct vop_ioctl_args /* {
372 struct vnode *a_vp;
373 int a_command;
374 caddr_t a_data;
375 int a_fflag;
376 struct ucred *a_cred;
377 struct proc *a_p;
378 } */ *ap;
379 {
380 dev_t dev = ap->a_vp->v_rdev;
381
382 switch (ap->a_vp->v_type) {
383
384 case VCHR:
385 return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
386 ap->a_fflag, ap->a_p));
387
388 case VBLK:
389 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE)
390 if (bdevsw[major(dev)].d_flags & B_TAPE)
391 return (0);
392 else
393 return (1);
394 return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
395 ap->a_fflag, ap->a_p));
396
397 default:
398 panic("spec_ioctl");
399 /* NOTREACHED */
400 }
401 }
402
403 /* ARGSUSED */
404 spec_select(ap)
405 struct vop_select_args /* {
406 struct vnode *a_vp;
407 int a_which;
408 int a_fflags;
409 struct ucred *a_cred;
410 struct proc *a_p;
411 } */ *ap;
412 {
413 register dev_t dev;
414
415 switch (ap->a_vp->v_type) {
416
417 default:
418 return (1); /* XXX */
419
420 case VCHR:
421 dev = ap->a_vp->v_rdev;
422 return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_p);
423 }
424 }
425 /*
426 * Synch buffers associated with a block device
427 */
428 /* ARGSUSED */
429 int
430 spec_fsync(ap)
431 struct vop_fsync_args /* {
432 struct vnode *a_vp;
433 struct ucred *a_cred;
434 int a_waitfor;
435 struct proc *a_p;
436 } */ *ap;
437 {
438 register struct vnode *vp = ap->a_vp;
439 register struct buf *bp;
440 struct buf *nbp;
441 int s;
442
443 if (vp->v_type == VCHR)
444 return (0);
445 /*
446 * Flush all dirty buffers associated with a block device.
447 */
448 loop:
449 s = splbio();
450 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
451 nbp = bp->b_vnbufs.le_next;
452 if ((bp->b_flags & B_BUSY))
453 continue;
454 if ((bp->b_flags & B_DELWRI) == 0)
455 panic("spec_fsync: not dirty");
456 bremfree(bp);
457 bp->b_flags |= B_BUSY;
458 splx(s);
459 bawrite(bp);
460 goto loop;
461 }
462 if (ap->a_waitfor == MNT_WAIT) {
463 while (vp->v_numoutput) {
464 vp->v_flag |= VBWAIT;
465 sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
466 }
467 #ifdef DIAGNOSTIC
468 if (vp->v_dirtyblkhd.lh_first) {
469 vprint("spec_fsync: dirty", vp);
470 goto loop;
471 }
472 #endif
473 }
474 splx(s);
475 return (0);
476 }
477
478 /*
479 * Just call the device strategy routine
480 */
481 spec_strategy(ap)
482 struct vop_strategy_args /* {
483 struct buf *a_bp;
484 } */ *ap;
485 {
486
487 (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp);
488 return (0);
489 }
490
491 /*
492 * This is a noop, simply returning what one has been given.
493 */
494 spec_bmap(ap)
495 struct vop_bmap_args /* {
496 struct vnode *a_vp;
497 daddr_t a_bn;
498 struct vnode **a_vpp;
499 daddr_t *a_bnp;
500 } */ *ap;
501 {
502
503 if (ap->a_vpp != NULL)
504 *ap->a_vpp = ap->a_vp;
505 if (ap->a_bnp != NULL)
506 *ap->a_bnp = ap->a_bn;
507 return (0);
508 }
509
510 /*
511 * At the moment we do not do any locking.
512 */
513 /* ARGSUSED */
514 spec_lock(ap)
515 struct vop_lock_args /* {
516 struct vnode *a_vp;
517 } */ *ap;
518 {
519
520 return (0);
521 }
522
523 /* ARGSUSED */
524 spec_unlock(ap)
525 struct vop_unlock_args /* {
526 struct vnode *a_vp;
527 } */ *ap;
528 {
529
530 return (0);
531 }
532
533 /*
534 * Device close routine
535 */
536 /* ARGSUSED */
537 spec_close(ap)
538 struct vop_close_args /* {
539 struct vnode *a_vp;
540 int a_fflag;
541 struct ucred *a_cred;
542 struct proc *a_p;
543 } */ *ap;
544 {
545 register struct vnode *vp = ap->a_vp;
546 dev_t dev = vp->v_rdev;
547 int (*devclose) __P((dev_t, int, int, struct proc *));
548 int mode, error;
549
550 switch (vp->v_type) {
551
552 case VCHR:
553 /*
554 * Hack: a tty device that is a controlling terminal
555 * has a reference from the session structure.
556 * We cannot easily tell that a character device is
557 * a controlling terminal, unless it is the closing
558 * process' controlling terminal. In that case,
559 * if the reference count is 2 (this last descriptor
560 * plus the session), release the reference from the session.
561 */
562 if (vcount(vp) == 2 && ap->a_p &&
563 vp == ap->a_p->p_session->s_ttyvp) {
564 vrele(vp);
565 ap->a_p->p_session->s_ttyvp = NULL;
566 }
567 /*
568 * If the vnode is locked, then we are in the midst
569 * of forcably closing the device, otherwise we only
570 * close on last reference.
571 */
572 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
573 return (0);
574 devclose = cdevsw[major(dev)].d_close;
575 mode = S_IFCHR;
576 break;
577
578 case VBLK:
579 /*
580 * On last close of a block device (that isn't mounted)
581 * we must invalidate any in core blocks, so that
582 * we can, for instance, change floppy disks.
583 */
584 if (error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0))
585 return (error);
586 /*
587 * We do not want to really close the device if it
588 * is still in use unless we are trying to close it
589 * forcibly. Since every use (buffer, vnode, swap, cmap)
590 * holds a reference to the vnode, and because we mark
591 * any other vnodes that alias this device, when the
592 * sum of the reference counts on all the aliased
593 * vnodes descends to one, we are on last close.
594 */
595 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
596 return (0);
597 devclose = bdevsw[major(dev)].d_close;
598 mode = S_IFBLK;
599 break;
600
601 default:
602 panic("spec_close: not special");
603 }
604
605 return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
606 }
607
608 /*
609 * Print out the contents of a special device vnode.
610 */
611 spec_print(ap)
612 struct vop_print_args /* {
613 struct vnode *a_vp;
614 } */ *ap;
615 {
616
617 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
618 minor(ap->a_vp->v_rdev));
619 }
620
621 /*
622 * Return POSIX pathconf information applicable to special devices.
623 */
624 spec_pathconf(ap)
625 struct vop_pathconf_args /* {
626 struct vnode *a_vp;
627 int a_name;
628 int *a_retval;
629 } */ *ap;
630 {
631
632 switch (ap->a_name) {
633 case _PC_LINK_MAX:
634 *ap->a_retval = LINK_MAX;
635 return (0);
636 case _PC_MAX_CANON:
637 *ap->a_retval = MAX_CANON;
638 return (0);
639 case _PC_MAX_INPUT:
640 *ap->a_retval = MAX_INPUT;
641 return (0);
642 case _PC_PIPE_BUF:
643 *ap->a_retval = PIPE_BUF;
644 return (0);
645 case _PC_CHOWN_RESTRICTED:
646 *ap->a_retval = 1;
647 return (0);
648 case _PC_VDISABLE:
649 *ap->a_retval = _POSIX_VDISABLE;
650 return (0);
651 default:
652 return (EINVAL);
653 }
654 /* NOTREACHED */
655 }
656
657 /*
658 * Special device advisory byte-level locks.
659 */
660 /* ARGSUSED */
661 spec_advlock(ap)
662 struct vop_advlock_args /* {
663 struct vnode *a_vp;
664 caddr_t a_id;
665 int a_op;
666 struct flock *a_fl;
667 int a_flags;
668 } */ *ap;
669 {
670
671 return (EOPNOTSUPP);
672 }
673
674 /*
675 * Special device failed operation
676 */
677 spec_ebadf()
678 {
679
680 return (EBADF);
681 }
682
683 /*
684 * Special device bad operation
685 */
686 spec_badop()
687 {
688
689 panic("spec_badop called");
690 /* NOTREACHED */
691 }
692