spec_vnops.c revision 1.15 1 /*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * from: @(#)spec_vnops.c 8.6 (Berkeley) 4/9/94
34 * $Id: spec_vnops.c,v 1.15 1994/06/08 11:33:47 mycroft Exp $
35 */
36
37 #include <sys/param.h>
38 #include <sys/proc.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/conf.h>
42 #include <sys/buf.h>
43 #include <sys/mount.h>
44 #include <sys/namei.h>
45 #include <sys/vnode.h>
46 #include <sys/stat.h>
47 #include <sys/errno.h>
48 #include <sys/ioctl.h>
49 #include <sys/file.h>
50 #include <sys/disklabel.h>
51 #include <miscfs/specfs/specdev.h>
52
53 /* symbolic sleep message strings for devices */
54 char devopn[] = "devopn";
55 char devio[] = "devio";
56 char devwait[] = "devwait";
57 char devin[] = "devin";
58 char devout[] = "devout";
59 char devioc[] = "devioc";
60 char devcls[] = "devcls";
61
62 int (**spec_vnodeop_p)();
63 struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
64 { &vop_default_desc, vn_default_error },
65 { &vop_lookup_desc, spec_lookup }, /* lookup */
66 { &vop_create_desc, spec_create }, /* create */
67 { &vop_mknod_desc, spec_mknod }, /* mknod */
68 { &vop_open_desc, spec_open }, /* open */
69 { &vop_close_desc, spec_close }, /* close */
70 { &vop_access_desc, spec_access }, /* access */
71 { &vop_getattr_desc, spec_getattr }, /* getattr */
72 { &vop_setattr_desc, spec_setattr }, /* setattr */
73 { &vop_read_desc, spec_read }, /* read */
74 { &vop_write_desc, spec_write }, /* write */
75 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
76 { &vop_select_desc, spec_select }, /* select */
77 { &vop_mmap_desc, spec_mmap }, /* mmap */
78 { &vop_fsync_desc, spec_fsync }, /* fsync */
79 { &vop_seek_desc, spec_seek }, /* seek */
80 { &vop_remove_desc, spec_remove }, /* remove */
81 { &vop_link_desc, spec_link }, /* link */
82 { &vop_rename_desc, spec_rename }, /* rename */
83 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
84 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
85 { &vop_symlink_desc, spec_symlink }, /* symlink */
86 { &vop_readdir_desc, spec_readdir }, /* readdir */
87 { &vop_readlink_desc, spec_readlink }, /* readlink */
88 { &vop_abortop_desc, spec_abortop }, /* abortop */
89 { &vop_inactive_desc, spec_inactive }, /* inactive */
90 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */
91 { &vop_lock_desc, spec_lock }, /* lock */
92 { &vop_unlock_desc, spec_unlock }, /* unlock */
93 { &vop_bmap_desc, spec_bmap }, /* bmap */
94 { &vop_strategy_desc, spec_strategy }, /* strategy */
95 { &vop_print_desc, spec_print }, /* print */
96 { &vop_islocked_desc, spec_islocked }, /* islocked */
97 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
98 { &vop_advlock_desc, spec_advlock }, /* advlock */
99 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
100 { &vop_valloc_desc, spec_valloc }, /* valloc */
101 { &vop_vfree_desc, spec_vfree }, /* vfree */
102 { &vop_truncate_desc, spec_truncate }, /* truncate */
103 { &vop_update_desc, spec_update }, /* update */
104 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */
105 { (struct vnodeop_desc*)NULL, (int(*)())NULL }
106 };
107 struct vnodeopv_desc spec_vnodeop_opv_desc =
108 { &spec_vnodeop_p, spec_vnodeop_entries };
109
110 /*
111 * Trivial lookup routine that always fails.
112 */
113 int
114 spec_lookup(ap)
115 struct vop_lookup_args /* {
116 struct vnode *a_dvp;
117 struct vnode **a_vpp;
118 struct componentname *a_cnp;
119 } */ *ap;
120 {
121
122 *ap->a_vpp = NULL;
123 return (ENOTDIR);
124 }
125
126 /*
127 * Open a special file.
128 */
129 /* ARGSUSED */
130 spec_open(ap)
131 struct vop_open_args /* {
132 struct vnode *a_vp;
133 int a_mode;
134 struct ucred *a_cred;
135 struct proc *a_p;
136 } */ *ap;
137 {
138 struct vnode *bvp, *vp = ap->a_vp;
139 dev_t bdev, dev = (dev_t)vp->v_rdev;
140 register int maj = major(dev);
141 int error;
142
143 /*
144 * Don't allow open if fs is mounted -nodev.
145 */
146 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
147 return (ENXIO);
148
149 switch (vp->v_type) {
150
151 case VCHR:
152 if ((u_int)maj >= nchrdev)
153 return (ENXIO);
154 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
155 /*
156 * When running in very secure mode, do not allow
157 * opens for writing of any disk character devices.
158 */
159 if (securelevel >= 2 && isdisk(dev, VCHR))
160 return (EPERM);
161 /*
162 * When running in secure mode, do not allow opens
163 * for writing of /dev/mem, /dev/kmem, or character
164 * devices whose corresponding block devices are
165 * currently mounted.
166 */
167 if (securelevel >= 1) {
168 if ((bdev = chrtoblk(dev)) != NODEV &&
169 vfinddev(bdev, VBLK, &bvp) &&
170 bvp->v_usecount > 0 &&
171 (error = vfs_mountedon(bvp)))
172 return (error);
173 if (iskmemdev(dev))
174 return (EPERM);
175 }
176 }
177 VOP_UNLOCK(vp);
178 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p);
179 VOP_LOCK(vp);
180 return (error);
181
182 case VBLK:
183 if ((u_int)maj >= nblkdev)
184 return (ENXIO);
185 /*
186 * When running in very secure mode, do not allow
187 * opens for writing of any disk block devices.
188 */
189 if (securelevel >= 2 && ap->a_cred != FSCRED &&
190 (ap->a_mode & FWRITE) && isdisk(dev, VBLK))
191 return (EPERM);
192 /*
193 * Do not allow opens of block devices that are
194 * currently mounted.
195 */
196 if (error = vfs_mountedon(vp))
197 return (error);
198 return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p));
199 }
200 return (0);
201 }
202
203 /*
204 * Vnode op for read
205 */
206 /* ARGSUSED */
207 spec_read(ap)
208 struct vop_read_args /* {
209 struct vnode *a_vp;
210 struct uio *a_uio;
211 int a_ioflag;
212 struct ucred *a_cred;
213 } */ *ap;
214 {
215 register struct vnode *vp = ap->a_vp;
216 register struct uio *uio = ap->a_uio;
217 struct proc *p = uio->uio_procp;
218 struct buf *bp;
219 daddr_t bn, nextbn;
220 long bsize, bscale;
221 struct partinfo dpart;
222 int n, on, majordev, (*ioctl)();
223 int error = 0;
224 dev_t dev;
225
226 #ifdef DIAGNOSTIC
227 if (uio->uio_rw != UIO_READ)
228 panic("spec_read mode");
229 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
230 panic("spec_read proc");
231 #endif
232 if (uio->uio_resid == 0)
233 return (0);
234
235 switch (vp->v_type) {
236
237 case VCHR:
238 VOP_UNLOCK(vp);
239 error = (*cdevsw[major(vp->v_rdev)].d_read)
240 (vp->v_rdev, uio, ap->a_ioflag);
241 VOP_LOCK(vp);
242 return (error);
243
244 case VBLK:
245 if (uio->uio_offset < 0)
246 return (EINVAL);
247 bsize = BLKDEV_IOSIZE;
248 dev = vp->v_rdev;
249 if ((majordev = major(dev)) < nblkdev &&
250 (ioctl = bdevsw[majordev].d_ioctl) != NULL &&
251 (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 &&
252 dpart.part->p_fstype == FS_BSDFFS &&
253 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
254 bsize = dpart.part->p_frag * dpart.part->p_fsize;
255 bscale = bsize / DEV_BSIZE;
256 do {
257 bn = (uio->uio_offset / DEV_BSIZE) &~ (bscale - 1);
258 on = uio->uio_offset % bsize;
259 n = min((unsigned)(bsize - on), uio->uio_resid);
260 if (vp->v_lastr + bscale == bn) {
261 nextbn = bn + bscale;
262 error = breadn(vp, bn, (int)bsize, &nextbn,
263 (int *)&bsize, 1, NOCRED, &bp);
264 } else
265 error = bread(vp, bn, (int)bsize, NOCRED, &bp);
266 vp->v_lastr = bn;
267 n = min(n, bsize - bp->b_resid);
268 if (error) {
269 brelse(bp);
270 return (error);
271 }
272 error = uiomove((char *)bp->b_data + on, n, uio);
273 if (n + on == bsize)
274 bp->b_flags |= B_AGE;
275 brelse(bp);
276 } while (error == 0 && uio->uio_resid > 0 && n != 0);
277 return (error);
278
279 default:
280 panic("spec_read type");
281 }
282 /* NOTREACHED */
283 }
284
285 /*
286 * Vnode op for write
287 */
288 /* ARGSUSED */
289 spec_write(ap)
290 struct vop_write_args /* {
291 struct vnode *a_vp;
292 struct uio *a_uio;
293 int a_ioflag;
294 struct ucred *a_cred;
295 } */ *ap;
296 {
297 register struct vnode *vp = ap->a_vp;
298 register struct uio *uio = ap->a_uio;
299 struct proc *p = uio->uio_procp;
300 struct buf *bp;
301 daddr_t bn;
302 int bsize, blkmask;
303 struct partinfo dpart;
304 register int n, on;
305 int error = 0;
306
307 #ifdef DIAGNOSTIC
308 if (uio->uio_rw != UIO_WRITE)
309 panic("spec_write mode");
310 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
311 panic("spec_write proc");
312 #endif
313
314 switch (vp->v_type) {
315
316 case VCHR:
317 VOP_UNLOCK(vp);
318 error = (*cdevsw[major(vp->v_rdev)].d_write)
319 (vp->v_rdev, uio, ap->a_ioflag);
320 VOP_LOCK(vp);
321 return (error);
322
323 case VBLK:
324 if (uio->uio_resid == 0)
325 return (0);
326 if (uio->uio_offset < 0)
327 return (EINVAL);
328 bsize = BLKDEV_IOSIZE;
329 if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART,
330 (caddr_t)&dpart, FREAD, p) == 0) {
331 if (dpart.part->p_fstype == FS_BSDFFS &&
332 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
333 bsize = dpart.part->p_frag *
334 dpart.part->p_fsize;
335 }
336 blkmask = (bsize / DEV_BSIZE) - 1;
337 do {
338 bn = (uio->uio_offset / DEV_BSIZE) &~ blkmask;
339 on = uio->uio_offset % bsize;
340 n = min((unsigned)(bsize - on), uio->uio_resid);
341 if (n == bsize)
342 bp = getblk(vp, bn, bsize, 0, 0);
343 else
344 error = bread(vp, bn, bsize, NOCRED, &bp);
345 n = min(n, bsize - bp->b_resid);
346 if (error) {
347 brelse(bp);
348 return (error);
349 }
350 error = uiomove((char *)bp->b_data + on, n, uio);
351 if (n + on == bsize) {
352 bp->b_flags |= B_AGE;
353 bawrite(bp);
354 } else
355 bdwrite(bp);
356 } while (error == 0 && uio->uio_resid > 0 && n != 0);
357 return (error);
358
359 default:
360 panic("spec_write type");
361 }
362 /* NOTREACHED */
363 }
364
365 /*
366 * Device ioctl operation.
367 */
368 /* ARGSUSED */
369 spec_ioctl(ap)
370 struct vop_ioctl_args /* {
371 struct vnode *a_vp;
372 int a_command;
373 caddr_t a_data;
374 int a_fflag;
375 struct ucred *a_cred;
376 struct proc *a_p;
377 } */ *ap;
378 {
379 dev_t dev = ap->a_vp->v_rdev;
380
381 switch (ap->a_vp->v_type) {
382
383 case VCHR:
384 return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
385 ap->a_fflag, ap->a_p));
386
387 case VBLK:
388 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE)
389 if (bdevsw[major(dev)].d_flags & B_TAPE)
390 return (0);
391 else
392 return (1);
393 return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
394 ap->a_fflag, ap->a_p));
395
396 default:
397 panic("spec_ioctl");
398 /* NOTREACHED */
399 }
400 }
401
402 /* ARGSUSED */
403 spec_select(ap)
404 struct vop_select_args /* {
405 struct vnode *a_vp;
406 int a_which;
407 int a_fflags;
408 struct ucred *a_cred;
409 struct proc *a_p;
410 } */ *ap;
411 {
412 register dev_t dev;
413
414 switch (ap->a_vp->v_type) {
415
416 default:
417 return (1); /* XXX */
418
419 case VCHR:
420 dev = ap->a_vp->v_rdev;
421 return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_p);
422 }
423 }
424 /*
425 * Synch buffers associated with a block device
426 */
427 /* ARGSUSED */
428 int
429 spec_fsync(ap)
430 struct vop_fsync_args /* {
431 struct vnode *a_vp;
432 struct ucred *a_cred;
433 int a_waitfor;
434 struct proc *a_p;
435 } */ *ap;
436 {
437 register struct vnode *vp = ap->a_vp;
438 register struct buf *bp;
439 struct buf *nbp;
440 int s;
441
442 if (vp->v_type == VCHR)
443 return (0);
444 /*
445 * Flush all dirty buffers associated with a block device.
446 */
447 loop:
448 s = splbio();
449 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
450 nbp = bp->b_vnbufs.le_next;
451 if ((bp->b_flags & B_BUSY))
452 continue;
453 if ((bp->b_flags & B_DELWRI) == 0)
454 panic("spec_fsync: not dirty");
455 bremfree(bp);
456 bp->b_flags |= B_BUSY;
457 splx(s);
458 bawrite(bp);
459 goto loop;
460 }
461 if (ap->a_waitfor == MNT_WAIT) {
462 while (vp->v_numoutput) {
463 vp->v_flag |= VBWAIT;
464 sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
465 }
466 #ifdef DIAGNOSTIC
467 if (vp->v_dirtyblkhd.lh_first) {
468 vprint("spec_fsync: dirty", vp);
469 goto loop;
470 }
471 #endif
472 }
473 splx(s);
474 return (0);
475 }
476
477 /*
478 * Just call the device strategy routine
479 */
480 spec_strategy(ap)
481 struct vop_strategy_args /* {
482 struct buf *a_bp;
483 } */ *ap;
484 {
485
486 (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp);
487 return (0);
488 }
489
490 /*
491 * This is a noop, simply returning what one has been given.
492 */
493 spec_bmap(ap)
494 struct vop_bmap_args /* {
495 struct vnode *a_vp;
496 daddr_t a_bn;
497 struct vnode **a_vpp;
498 daddr_t *a_bnp;
499 } */ *ap;
500 {
501
502 if (ap->a_vpp != NULL)
503 *ap->a_vpp = ap->a_vp;
504 if (ap->a_bnp != NULL)
505 *ap->a_bnp = ap->a_bn;
506 return (0);
507 }
508
509 /*
510 * At the moment we do not do any locking.
511 */
512 /* ARGSUSED */
513 spec_lock(ap)
514 struct vop_lock_args /* {
515 struct vnode *a_vp;
516 } */ *ap;
517 {
518
519 return (0);
520 }
521
522 /* ARGSUSED */
523 spec_unlock(ap)
524 struct vop_unlock_args /* {
525 struct vnode *a_vp;
526 } */ *ap;
527 {
528
529 return (0);
530 }
531
532 /*
533 * Device close routine
534 */
535 /* ARGSUSED */
536 spec_close(ap)
537 struct vop_close_args /* {
538 struct vnode *a_vp;
539 int a_fflag;
540 struct ucred *a_cred;
541 struct proc *a_p;
542 } */ *ap;
543 {
544 register struct vnode *vp = ap->a_vp;
545 dev_t dev = vp->v_rdev;
546 int (*devclose) __P((dev_t, int, int, struct proc *));
547 int mode, error;
548
549 switch (vp->v_type) {
550
551 case VCHR:
552 /*
553 * Hack: a tty device that is a controlling terminal
554 * has a reference from the session structure.
555 * We cannot easily tell that a character device is
556 * a controlling terminal, unless it is the closing
557 * process' controlling terminal. In that case,
558 * if the reference count is 2 (this last descriptor
559 * plus the session), release the reference from the session.
560 */
561 if (vcount(vp) == 2 && ap->a_p &&
562 vp == ap->a_p->p_session->s_ttyvp) {
563 vrele(vp);
564 ap->a_p->p_session->s_ttyvp = NULL;
565 }
566 /*
567 * If the vnode is locked, then we are in the midst
568 * of forcably closing the device, otherwise we only
569 * close on last reference.
570 */
571 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
572 return (0);
573 devclose = cdevsw[major(dev)].d_close;
574 mode = S_IFCHR;
575 break;
576
577 case VBLK:
578 /*
579 * On last close of a block device (that isn't mounted)
580 * we must invalidate any in core blocks, so that
581 * we can, for instance, change floppy disks.
582 */
583 if (error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0))
584 return (error);
585 /*
586 * We do not want to really close the device if it
587 * is still in use unless we are trying to close it
588 * forcibly. Since every use (buffer, vnode, swap, cmap)
589 * holds a reference to the vnode, and because we mark
590 * any other vnodes that alias this device, when the
591 * sum of the reference counts on all the aliased
592 * vnodes descends to one, we are on last close.
593 */
594 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
595 return (0);
596 devclose = bdevsw[major(dev)].d_close;
597 mode = S_IFBLK;
598 break;
599
600 default:
601 panic("spec_close: not special");
602 }
603
604 return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
605 }
606
607 /*
608 * Print out the contents of a special device vnode.
609 */
610 spec_print(ap)
611 struct vop_print_args /* {
612 struct vnode *a_vp;
613 } */ *ap;
614 {
615
616 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
617 minor(ap->a_vp->v_rdev));
618 }
619
620 /*
621 * Return POSIX pathconf information applicable to special devices.
622 */
623 spec_pathconf(ap)
624 struct vop_pathconf_args /* {
625 struct vnode *a_vp;
626 int a_name;
627 int *a_retval;
628 } */ *ap;
629 {
630
631 switch (ap->a_name) {
632 case _PC_LINK_MAX:
633 *ap->a_retval = LINK_MAX;
634 return (0);
635 case _PC_MAX_CANON:
636 *ap->a_retval = MAX_CANON;
637 return (0);
638 case _PC_MAX_INPUT:
639 *ap->a_retval = MAX_INPUT;
640 return (0);
641 case _PC_PIPE_BUF:
642 *ap->a_retval = PIPE_BUF;
643 return (0);
644 case _PC_CHOWN_RESTRICTED:
645 *ap->a_retval = 1;
646 return (0);
647 case _PC_VDISABLE:
648 *ap->a_retval = _POSIX_VDISABLE;
649 return (0);
650 default:
651 return (EINVAL);
652 }
653 /* NOTREACHED */
654 }
655
656 /*
657 * Special device advisory byte-level locks.
658 */
659 /* ARGSUSED */
660 spec_advlock(ap)
661 struct vop_advlock_args /* {
662 struct vnode *a_vp;
663 caddr_t a_id;
664 int a_op;
665 struct flock *a_fl;
666 int a_flags;
667 } */ *ap;
668 {
669
670 return (EOPNOTSUPP);
671 }
672
673 /*
674 * Special device failed operation
675 */
676 spec_ebadf()
677 {
678
679 return (EBADF);
680 }
681
682 /*
683 * Special device bad operation
684 */
685 spec_badop()
686 {
687
688 panic("spec_badop called");
689 /* NOTREACHED */
690 }
691