spec_vnops.c revision 1.55 1 /* $NetBSD: spec_vnops.c,v 1.55 2001/08/17 05:51:53 chs Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)spec_vnops.c 8.15 (Berkeley) 7/14/95
36 */
37
38 #include <sys/param.h>
39 #include <sys/proc.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/conf.h>
43 #include <sys/buf.h>
44 #include <sys/mount.h>
45 #include <sys/namei.h>
46 #include <sys/vnode.h>
47 #include <sys/stat.h>
48 #include <sys/errno.h>
49 #include <sys/ioctl.h>
50 #include <sys/file.h>
51 #include <sys/disklabel.h>
52 #include <sys/lockf.h>
53
54 #include <miscfs/genfs/genfs.h>
55 #include <miscfs/specfs/specdev.h>
56
57 /* symbolic sleep message strings for devices */
58 const char devopn[] = "devopn";
59 const char devio[] = "devio";
60 const char devwait[] = "devwait";
61 const char devin[] = "devin";
62 const char devout[] = "devout";
63 const char devioc[] = "devioc";
64 const char devcls[] = "devcls";
65
66 /*
67 * This vnode operations vector is used for two things only:
68 * - special device nodes created from whole cloth by the kernel.
69 * - as a temporary vnodeops replacement for vnodes which were found to
70 * be aliased by callers of checkalias().
71 * For the ops vector for vnodes built from special devices found in a
72 * filesystem, see (e.g) ffs_specop_entries[] in ffs_vnops.c or the
73 * equivalent for other filesystems.
74 */
75
76 int (**spec_vnodeop_p) __P((void *));
77 const struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
78 { &vop_default_desc, vn_default_error },
79 { &vop_lookup_desc, spec_lookup }, /* lookup */
80 { &vop_create_desc, spec_create }, /* create */
81 { &vop_mknod_desc, spec_mknod }, /* mknod */
82 { &vop_open_desc, spec_open }, /* open */
83 { &vop_close_desc, spec_close }, /* close */
84 { &vop_access_desc, spec_access }, /* access */
85 { &vop_getattr_desc, spec_getattr }, /* getattr */
86 { &vop_setattr_desc, spec_setattr }, /* setattr */
87 { &vop_read_desc, spec_read }, /* read */
88 { &vop_write_desc, spec_write }, /* write */
89 { &vop_lease_desc, spec_lease_check }, /* lease */
90 { &vop_fcntl_desc, spec_fcntl }, /* fcntl */
91 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
92 { &vop_poll_desc, spec_poll }, /* poll */
93 { &vop_revoke_desc, spec_revoke }, /* revoke */
94 { &vop_mmap_desc, spec_mmap }, /* mmap */
95 { &vop_fsync_desc, spec_fsync }, /* fsync */
96 { &vop_seek_desc, spec_seek }, /* seek */
97 { &vop_remove_desc, spec_remove }, /* remove */
98 { &vop_link_desc, spec_link }, /* link */
99 { &vop_rename_desc, spec_rename }, /* rename */
100 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
101 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
102 { &vop_symlink_desc, spec_symlink }, /* symlink */
103 { &vop_readdir_desc, spec_readdir }, /* readdir */
104 { &vop_readlink_desc, spec_readlink }, /* readlink */
105 { &vop_abortop_desc, spec_abortop }, /* abortop */
106 { &vop_inactive_desc, spec_inactive }, /* inactive */
107 { &vop_reclaim_desc, spec_reclaim }, /* reclaim */
108 { &vop_lock_desc, spec_lock }, /* lock */
109 { &vop_unlock_desc, spec_unlock }, /* unlock */
110 { &vop_bmap_desc, spec_bmap }, /* bmap */
111 { &vop_strategy_desc, spec_strategy }, /* strategy */
112 { &vop_print_desc, spec_print }, /* print */
113 { &vop_islocked_desc, spec_islocked }, /* islocked */
114 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
115 { &vop_advlock_desc, spec_advlock }, /* advlock */
116 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
117 { &vop_valloc_desc, spec_valloc }, /* valloc */
118 { &vop_vfree_desc, spec_vfree }, /* vfree */
119 { &vop_truncate_desc, spec_truncate }, /* truncate */
120 { &vop_update_desc, spec_update }, /* update */
121 { &vop_bwrite_desc, spec_bwrite }, /* bwrite */
122 { &vop_getpages_desc, spec_getpages }, /* getpages */
123 { &vop_putpages_desc, spec_putpages }, /* putpages */
124 { NULL, NULL }
125 };
126 const struct vnodeopv_desc spec_vnodeop_opv_desc =
127 { &spec_vnodeop_p, spec_vnodeop_entries };
128
129 /*
130 * Trivial lookup routine that always fails.
131 */
132 int
133 spec_lookup(v)
134 void *v;
135 {
136 struct vop_lookup_args /* {
137 struct vnode *a_dvp;
138 struct vnode **a_vpp;
139 struct componentname *a_cnp;
140 } */ *ap = v;
141
142 *ap->a_vpp = NULL;
143 return (ENOTDIR);
144 }
145
146 /*
147 * Open a special file.
148 */
149 /* ARGSUSED */
150 int
151 spec_open(v)
152 void *v;
153 {
154 struct vop_open_args /* {
155 struct vnode *a_vp;
156 int a_mode;
157 struct ucred *a_cred;
158 struct proc *a_p;
159 } */ *ap = v;
160 struct proc *p = ap->a_p;
161 struct vnode *bvp, *vp = ap->a_vp;
162 dev_t bdev, dev = (dev_t)vp->v_rdev;
163 int maj = major(dev);
164 int error;
165 struct partinfo pi;
166
167 /*
168 * Don't allow open if fs is mounted -nodev.
169 */
170 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
171 return (ENXIO);
172
173 switch (vp->v_type) {
174
175 case VCHR:
176 if ((u_int)maj >= nchrdev)
177 return (ENXIO);
178 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
179 /*
180 * When running in very secure mode, do not allow
181 * opens for writing of any disk character devices.
182 */
183 if (securelevel >= 2 && cdevsw[maj].d_type == D_DISK)
184 return (EPERM);
185 /*
186 * When running in secure mode, do not allow opens
187 * for writing of /dev/mem, /dev/kmem, or character
188 * devices whose corresponding block devices are
189 * currently mounted.
190 */
191 if (securelevel >= 1) {
192 if ((bdev = chrtoblk(dev)) != (dev_t)NODEV &&
193 vfinddev(bdev, VBLK, &bvp) &&
194 (error = vfs_mountedon(bvp)))
195 return (error);
196 if (iskmemdev(dev))
197 return (EPERM);
198 }
199 }
200 if (cdevsw[maj].d_type == D_TTY)
201 vp->v_flag |= VISTTY;
202 VOP_UNLOCK(vp, 0);
203 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p);
204 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
205 return (error);
206
207 case VBLK:
208 if ((u_int)maj >= nblkdev)
209 return (ENXIO);
210 /*
211 * When running in very secure mode, do not allow
212 * opens for writing of any disk block devices.
213 */
214 if (securelevel >= 2 && ap->a_cred != FSCRED &&
215 (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK)
216 return (EPERM);
217 /*
218 * Do not allow opens of block devices that are
219 * currently mounted.
220 */
221 if ((error = vfs_mountedon(vp)) != 0)
222 return (error);
223 error = (*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p);
224 if (error) {
225 return error;
226 }
227 error = (*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev,
228 DIOCGPART, (caddr_t)&pi, FREAD, curproc);
229 if (error == 0) {
230 vp->v_uvm.u_size = (voff_t)pi.disklab->d_secsize *
231 pi.part->p_size;
232 }
233 return 0;
234
235 case VNON:
236 case VLNK:
237 case VDIR:
238 case VREG:
239 case VBAD:
240 case VFIFO:
241 case VSOCK:
242 break;
243 }
244 return (0);
245 }
246
247 /*
248 * Vnode op for read
249 */
250 /* ARGSUSED */
251 int
252 spec_read(v)
253 void *v;
254 {
255 struct vop_read_args /* {
256 struct vnode *a_vp;
257 struct uio *a_uio;
258 int a_ioflag;
259 struct ucred *a_cred;
260 } */ *ap = v;
261 struct vnode *vp = ap->a_vp;
262 struct uio *uio = ap->a_uio;
263 void *win;
264 vsize_t bytelen;
265 int error = 0;
266
267 #ifdef DIAGNOSTIC
268 if (uio->uio_rw != UIO_READ)
269 panic("spec_read mode");
270 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
271 panic("spec_read proc");
272 #endif
273 if (uio->uio_resid == 0)
274 return (0);
275
276 if (vp->v_type == VCHR) {
277 VOP_UNLOCK(vp, 0);
278 error = (*cdevsw[major(vp->v_rdev)].d_read)
279 (vp->v_rdev, uio, ap->a_ioflag);
280 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
281 return (error);
282 }
283 KASSERT(vp->v_type == VBLK);
284
285 if (uio->uio_offset < 0) {
286 return (EINVAL);
287 }
288 do {
289 bytelen = uio->uio_resid;
290 win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset, &bytelen,
291 UBC_READ);
292 error = uiomove(win, bytelen, uio);
293 ubc_release(win, 0);
294 } while (error == 0 && uio->uio_resid > 0);
295 return (error);
296 }
297
298 /*
299 * Vnode op for write
300 */
301 /* ARGSUSED */
302 int
303 spec_write(v)
304 void *v;
305 {
306 struct vop_write_args /* {
307 struct vnode *a_vp;
308 struct uio *a_uio;
309 int a_ioflag;
310 struct ucred *a_cred;
311 } */ *ap = v;
312 struct vnode *vp = ap->a_vp;
313 struct uio *uio = ap->a_uio;
314 void *win;
315 vsize_t bytelen;
316 int error = 0;
317
318 #ifdef DIAGNOSTIC
319 if (uio->uio_rw != UIO_WRITE)
320 panic("spec_write mode");
321 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
322 panic("spec_write proc");
323 #endif
324
325 if (vp->v_type == VCHR) {
326 VOP_UNLOCK(vp, 0);
327 error = (*cdevsw[major(vp->v_rdev)].d_write)
328 (vp->v_rdev, uio, ap->a_ioflag);
329 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
330 return (error);
331 }
332 KASSERT(vp->v_type == VBLK);
333
334 if (uio->uio_resid == 0)
335 return (0);
336 if (uio->uio_offset < 0)
337 return (EINVAL);
338 do {
339 bytelen = uio->uio_resid;
340 win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset, &bytelen,
341 UBC_WRITE);
342 error = uiomove(win, bytelen, uio);
343 ubc_release(win, 0);
344 } while (error == 0 && uio->uio_resid > 0);
345 return (error);
346 }
347
348 /*
349 * Device ioctl operation.
350 */
351 /* ARGSUSED */
352 int
353 spec_ioctl(v)
354 void *v;
355 {
356 struct vop_ioctl_args /* {
357 struct vnode *a_vp;
358 u_long a_command;
359 caddr_t a_data;
360 int a_fflag;
361 struct ucred *a_cred;
362 struct proc *a_p;
363 } */ *ap = v;
364 dev_t dev = ap->a_vp->v_rdev;
365 int maj = major(dev);
366
367 switch (ap->a_vp->v_type) {
368
369 case VCHR:
370 return ((*cdevsw[maj].d_ioctl)(dev, ap->a_command, ap->a_data,
371 ap->a_fflag, ap->a_p));
372
373 case VBLK:
374 if (ap->a_command == 0 && (long)ap->a_data == B_TAPE) {
375 if (bdevsw[maj].d_type == D_TAPE)
376 return (0);
377 else
378 return (1);
379 }
380 return ((*bdevsw[maj].d_ioctl)(dev, ap->a_command, ap->a_data,
381 ap->a_fflag, ap->a_p));
382
383 default:
384 panic("spec_ioctl");
385 /* NOTREACHED */
386 }
387 }
388
389 /* ARGSUSED */
390 int
391 spec_poll(v)
392 void *v;
393 {
394 struct vop_poll_args /* {
395 struct vnode *a_vp;
396 int a_events;
397 struct proc *a_p;
398 } */ *ap = v;
399 dev_t dev;
400
401 switch (ap->a_vp->v_type) {
402
403 case VCHR:
404 dev = ap->a_vp->v_rdev;
405 return (*cdevsw[major(dev)].d_poll)(dev, ap->a_events, ap->a_p);
406
407 default:
408 return (genfs_poll(v));
409 }
410 }
411 /*
412 * Synch buffers associated with a block device
413 */
414 /* ARGSUSED */
415 int
416 spec_fsync(v)
417 void *v;
418 {
419 struct vop_fsync_args /* {
420 struct vnode *a_vp;
421 struct ucred *a_cred;
422 int a_flags;
423 off_t offlo;
424 off_t offhi;
425 struct proc *a_p;
426 } */ *ap = v;
427 struct vnode *vp = ap->a_vp;
428
429 if (vp->v_type == VBLK)
430 vflushbuf(vp, (ap->a_flags & FSYNC_WAIT) != 0);
431 return (0);
432 }
433
434 /*
435 * Just call the device strategy routine
436 */
437 int
438 spec_strategy(v)
439 void *v;
440 {
441 struct vop_strategy_args /* {
442 struct buf *a_bp;
443 } */ *ap = v;
444 struct buf *bp;
445
446 bp = ap->a_bp;
447 if (!(bp->b_flags & B_READ) &&
448 (LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start)
449 (*bioops.io_start)(bp);
450 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
451 return (0);
452 }
453
454 int
455 spec_inactive(v)
456 void *v;
457 {
458 struct vop_inactive_args /* {
459 struct vnode *a_vp;
460 struct proc *a_p;
461 } */ *ap = v;
462
463 VOP_UNLOCK(ap->a_vp, 0);
464 return (0);
465 }
466
467 /*
468 * This is a noop, simply returning what one has been given.
469 */
470 int
471 spec_bmap(v)
472 void *v;
473 {
474 struct vop_bmap_args /* {
475 struct vnode *a_vp;
476 daddr_t a_bn;
477 struct vnode **a_vpp;
478 daddr_t *a_bnp;
479 int *a_runp;
480 } */ *ap = v;
481
482 if (ap->a_vpp != NULL)
483 *ap->a_vpp = ap->a_vp;
484 if (ap->a_bnp != NULL)
485 *ap->a_bnp = ap->a_bn;
486 if (ap->a_runp != NULL)
487 *ap->a_runp = (MAXBSIZE >> DEV_BSHIFT) - 1;
488 return (0);
489 }
490
491 /*
492 * Device close routine
493 */
494 /* ARGSUSED */
495 int
496 spec_close(v)
497 void *v;
498 {
499 struct vop_close_args /* {
500 struct vnode *a_vp;
501 int a_fflag;
502 struct ucred *a_cred;
503 struct proc *a_p;
504 } */ *ap = v;
505 struct vnode *vp = ap->a_vp;
506 dev_t dev = vp->v_rdev;
507 int (*devclose) __P((dev_t, int, int, struct proc *));
508 int mode, error, count, flags, flags1;
509
510 count = vcount(vp);
511 simple_lock(&vp->v_interlock);
512 flags = vp->v_flag;
513 simple_unlock(&vp->v_interlock);
514
515 switch (vp->v_type) {
516
517 case VCHR:
518 /*
519 * Hack: a tty device that is a controlling terminal
520 * has a reference from the session structure.
521 * We cannot easily tell that a character device is
522 * a controlling terminal, unless it is the closing
523 * process' controlling terminal. In that case,
524 * if the reference count is 2 (this last descriptor
525 * plus the session), release the reference from the session.
526 */
527 if (count == 2 && ap->a_p &&
528 vp == ap->a_p->p_session->s_ttyvp) {
529 vrele(vp);
530 count--;
531 ap->a_p->p_session->s_ttyvp = NULL;
532 }
533 /*
534 * If the vnode is locked, then we are in the midst
535 * of forcably closing the device, otherwise we only
536 * close on last reference.
537 */
538 if (count > 1 && (flags & VXLOCK) == 0)
539 return (0);
540 devclose = cdevsw[major(dev)].d_close;
541 mode = S_IFCHR;
542 break;
543
544 case VBLK:
545 /*
546 * On last close of a block device (that isn't mounted)
547 * we must invalidate any in core blocks, so that
548 * we can, for instance, change floppy disks.
549 */
550 error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
551 if (error)
552 return (error);
553 /*
554 * We do not want to really close the device if it
555 * is still in use unless we are trying to close it
556 * forcibly. Since every use (buffer, vnode, swap, cmap)
557 * holds a reference to the vnode, and because we mark
558 * any other vnodes that alias this device, when the
559 * sum of the reference counts on all the aliased
560 * vnodes descends to one, we are on last close.
561 */
562 if (count > 1 && (flags & VXLOCK) == 0)
563 return (0);
564 devclose = bdevsw[major(dev)].d_close;
565 mode = S_IFBLK;
566 break;
567
568 default:
569 panic("spec_close: not special");
570 }
571
572 flags1 = ap->a_fflag;
573
574 /*
575 * if VXLOCK is set, then we're going away soon, so make this
576 * non-blocking. Also ensures that we won't wedge in vn_lock below.
577 */
578 if (flags & VXLOCK)
579 flags1 |= FNONBLOCK;
580
581 /*
582 * If we're able to block, release the vnode lock & reaquire. We
583 * might end up sleaping for someone else who wants our queues. They
584 * won't get them if we hold the vnode locked. Also, if VXLOCK is set,
585 * don't release the lock as we won't be able to regain it.
586 */
587 if (!(flags1 & FNONBLOCK))
588 VOP_UNLOCK(vp, 0);
589
590 error = (*devclose)(dev, flags1, mode, ap->a_p);
591
592 if (!(flags1 & FNONBLOCK))
593 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
594
595 return (error);
596 }
597
598 /*
599 * Print out the contents of a special device vnode.
600 */
601 int
602 spec_print(v)
603 void *v;
604 {
605 struct vop_print_args /* {
606 struct vnode *a_vp;
607 } */ *ap = v;
608
609 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
610 minor(ap->a_vp->v_rdev));
611 return 0;
612 }
613
614 /*
615 * Return POSIX pathconf information applicable to special devices.
616 */
617 int
618 spec_pathconf(v)
619 void *v;
620 {
621 struct vop_pathconf_args /* {
622 struct vnode *a_vp;
623 int a_name;
624 register_t *a_retval;
625 } */ *ap = v;
626
627 switch (ap->a_name) {
628 case _PC_LINK_MAX:
629 *ap->a_retval = LINK_MAX;
630 return (0);
631 case _PC_MAX_CANON:
632 *ap->a_retval = MAX_CANON;
633 return (0);
634 case _PC_MAX_INPUT:
635 *ap->a_retval = MAX_INPUT;
636 return (0);
637 case _PC_PIPE_BUF:
638 *ap->a_retval = PIPE_BUF;
639 return (0);
640 case _PC_CHOWN_RESTRICTED:
641 *ap->a_retval = 1;
642 return (0);
643 case _PC_VDISABLE:
644 *ap->a_retval = _POSIX_VDISABLE;
645 return (0);
646 case _PC_SYNC_IO:
647 *ap->a_retval = 1;
648 return (0);
649 default:
650 return (EINVAL);
651 }
652 /* NOTREACHED */
653 }
654
655 /*
656 * Advisory record locking support.
657 */
658 int
659 spec_advlock(v)
660 void *v;
661 {
662 struct vop_advlock_args /* {
663 struct vnode *a_vp;
664 caddr_t a_id;
665 int a_op;
666 struct flock *a_fl;
667 int a_flags;
668 } */ *ap = v;
669 struct vnode *vp = ap->a_vp;
670
671 return lf_advlock(ap, &vp->v_speclockf, (off_t)0);
672 }
673
674 /*
675 * glue for genfs_{get,put}pages()
676 */
677 int
678 spec_size(v)
679 void *v;
680 {
681 struct vop_size_args /* {
682 struct vnode *a_vp;
683 off_t a_size;
684 off_t *a_eobp;
685 } */ *ap = v;
686
687 *ap->a_eobp = ap->a_size;
688 return 0;
689 }
690