procfs_vnops.c revision 1.9 1 /*
2 * Copyright (c) 1993 The Regents of the University of California.
3 * Copyright (c) 1993 Jan-Simon Pendry
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Jan-Simon Pendry.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * From:
38 * Id: procfs_vnops.c,v 4.2 1994/01/02 15:28:44 jsp Exp
39 *
40 * $Id: procfs_vnops.c,v 1.9 1994/01/05 07:51:34 cgd Exp $
41 */
42
43 /*
44 * procfs vnode interface
45 */
46
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/time.h>
50 #include <sys/kernel.h>
51 #include <sys/file.h>
52 #include <sys/proc.h>
53 #include <sys/vnode.h>
54 #include <sys/namei.h>
55 #include <sys/malloc.h>
56 #include <sys/resourcevar.h>
57 #include <miscfs/procfs/procfs.h>
58 #include <vm/vm.h> /* for page_size */
59
60 /*
61 * Vnode Operations.
62 *
63 */
64
65 /*
66 * This is a list of the valid names in the
67 * process-specific sub-directories. It is
68 * used in procfs_lookup and procfs_readdir
69 */
70 static struct pfsnames {
71 u_short d_namlen;
72 char d_name[PROCFS_NAMELEN];
73 pfstype d_pfstype;
74 } procent[] = {
75 #define N(s) sizeof(s)-1, s
76 /* namlen, nam, type */
77 { N("file"), Pfile },
78 { N("mem"), Pmem },
79 { N("regs"), Pregs },
80 { N("ctl"), Pctl },
81 { N("status"), Pstatus },
82 { N("note"), Pnote },
83 { N("notepg"), Pnotepg },
84 #undef N
85 };
86 #define Nprocent (sizeof(procent)/sizeof(procent[0]))
87
88 static pid_t atopid __P((const char *, u_int));
89
90 /*
91 * set things up for doing i/o on
92 * the pfsnode (vp). (vp) is locked
93 * on entry, and should be left locked
94 * on exit.
95 *
96 * for procfs we don't need to do anything
97 * in particular for i/o. all that is done
98 * is to support exclusive open on process
99 * memory images.
100 */
101 procfs_open(vp, mode, cred, p)
102 struct vnode *vp;
103 int mode;
104 struct ucred *cred;
105 struct proc *p;
106 {
107 struct pfsnode *pfs = VTOPFS(vp);
108
109 switch (pfs->pfs_type) {
110 case Pmem:
111 if (PFIND(pfs->pfs_pid) == 0)
112 return (ENOENT); /* was ESRCH, jsp */
113
114 if ((pfs->pfs_flags & FWRITE) && (mode & O_EXCL) ||
115 (pfs->pfs_flags & O_EXCL) && (mode & FWRITE))
116 return (EBUSY);
117
118
119 if (mode & FWRITE)
120 pfs->pfs_flags = (mode & (FWRITE|O_EXCL));
121
122 return (0);
123
124 default:
125 break;
126 }
127
128 return (0);
129 }
130
131 /*
132 * close the pfsnode (vp) after doing i/o.
133 * (vp) is not locked on entry or exit.
134 *
135 * nothing to do for procfs other than undo
136 * any exclusive open flag (see _open above).
137 */
138 procfs_close(vp, flag, cred, p)
139 struct vnode *vp;
140 int flag;
141 struct ucred *cred;
142 struct proc *p;
143 {
144 struct pfsnode *pfs = VTOPFS(vp);
145
146 switch (pfs->pfs_type) {
147 case Pmem:
148 if ((flag & FWRITE) && (pfs->pfs_flags & O_EXCL))
149 pfs->pfs_flags &= ~(FWRITE|O_EXCL);
150 break;
151 }
152
153 return (0);
154 }
155
156 /*
157 * do an ioctl operation on pfsnode (vp).
158 * (vp) is not locked on entry or exit.
159 */
160 procfs_ioctl(vp, com, data, fflag, cred, p)
161 struct vnode *vp;
162 int com;
163 caddr_t data;
164 int fflag;
165 struct ucred *cred;
166 struct proc *p;
167 {
168
169 return (ENOTTY);
170 }
171
172 /*
173 * do block mapping for pfsnode (vp).
174 * since we don't use the buffer cache
175 * for procfs this function should never
176 * be called. in any case, it's not clear
177 * what part of the kernel ever makes use
178 * of this function. for sanity, this is the
179 * usual no-op bmap, although returning
180 * (EIO) would be a reasonable alternative.
181 */
182 procfs_bmap(vp, bn, vpp, bnp)
183 struct vnode *vp;
184 daddr_t bn;
185 struct vnode **vpp;
186 daddr_t *bnp;
187 {
188
189 if (vpp != NULL)
190 *vpp = vp;
191 if (bnp != NULL)
192 *bnp = bn;
193 return (0);
194 }
195
196 /*
197 * _inactive is called when the pfsnode
198 * is vrele'd and the reference count goes
199 * to zero. (vp) will be on the vnode free
200 * list, so to get it back vget() must be
201 * used.
202 *
203 * for procfs, check if the process is still
204 * alive and if it isn't then just throw away
205 * the vnode by calling vgone(). this may
206 * be overkill and a waste of time since the
207 * chances are that the process will still be
208 * there and PFIND is not free.
209 *
210 * (vp) is not locked on entry or exit.
211 */
212 procfs_inactive(vp, p)
213 struct vnode *vp;
214 struct proc *p;
215 {
216 struct pfsnode *pfs = VTOPFS(vp);
217
218 if (PFIND(pfs->pfs_pid) == 0)
219 vgone(vp);
220
221 return (0);
222 }
223
224 /*
225 * _reclaim is called when getnewvnode()
226 * wants to make use of an entry on the vnode
227 * free list. at this time the filesystem needs
228 * to free any private data and remove the node
229 * from any private lists.
230 */
231 procfs_reclaim(vp)
232 struct vnode *vp;
233 {
234 int error;
235
236 error = procfs_freevp(vp);
237 return (error);
238 }
239
240 /*
241 * _print is used for debugging.
242 * just print a readable description
243 * of (vp).
244 */
245 procfs_print(vp)
246 struct vnode *vp;
247 {
248 struct pfsnode *pfs = VTOPFS(vp);
249
250 printf("tag VT_PROCFS, pid %d, mode %x, flags %x\n",
251 pfs->pfs_pid,
252 pfs->pfs_mode, pfs->pfs_flags);
253 }
254
255 /*
256 * _abortop is called when operations such as
257 * rename and create fail. this entry is responsible
258 * for undoing any side-effects caused by the lookup.
259 * this will always include freeing the pathname buffer.
260 */
261 procfs_abortop(ndp)
262 struct nameidata *ndp;
263 {
264
265 if ((ndp->ni_nameiop & (HASBUF | SAVESTART)) == HASBUF)
266 FREE(ndp->ni_pnbuf, M_NAMEI);
267 return (0);
268 }
269
270 /*
271 * generic entry point for unsupported operations
272 */
273 procfs_badop()
274 {
275
276 return (EIO);
277 }
278
279 /*
280 * Invent attributes for pfsnode (vp) and store
281 * them in (vap).
282 * Directories lengths are returned as zero since
283 * any real length would require the genuine size
284 * to be computed, and nothing cares anyway.
285 *
286 * this is relatively minimal for procfs.
287 */
288 procfs_getattr(vp, vap, cred, p)
289 struct vnode *vp;
290 struct vattr *vap;
291 struct ucred *cred;
292 struct proc *p;
293 {
294 struct pfsnode *pfs = VTOPFS(vp);
295 struct proc *procp;
296 int error;
297
298 /* first check the process still exists */
299 procp = PFIND(pfs->pfs_pid);
300 if (procp == 0)
301 return (ENOENT);
302
303 error = 0;
304
305 /* start by zeroing out the attributes */
306 VATTR_NULL(vap);
307
308 /* next do all the common fields */
309 vap->va_type = vp->v_type;
310 vap->va_mode = pfs->pfs_mode;
311 vap->va_fileid = pfs->pfs_fileno;
312 vap->va_flags = 0;
313 vap->va_blocksize = page_size;
314 vap->va_bytes = vap->va_size = 0;
315
316 /*
317 * Make all times be current TOD.
318 * It would be possible to get the process start
319 * time from the p_stat structure, but there's
320 * no "file creation" time stamp anyway, and the
321 * p_stat structure is not addressible if u. gets
322 * swapped out for that process.
323 */
324 microtime(&vap->va_ctime);
325 vap->va_atime = vap->va_mtime = vap->va_ctime;
326
327 /*
328 * now do the object specific fields
329 *
330 * The size could be set from struct reg, but it's hardly
331 * worth the trouble, and it puts some (potentially) machine
332 * dependent data into this machine-independent code. If it
333 * becomes important then this function should break out into
334 * a per-file stat function in the corresponding .c file.
335 */
336
337 switch (pfs->pfs_type) {
338 case Proot:
339 vap->va_nlink = 2;
340 vap->va_uid = 0;
341 vap->va_gid = 0;
342 break;
343
344 case Pproc:
345 vap->va_nlink = 2;
346 vap->va_uid = procp->p_ucred->cr_uid;
347 vap->va_gid = procp->p_ucred->cr_gid;
348 break;
349
350 case Pfile:
351 error = EOPNOTSUPP;
352 break;
353
354 case Pmem:
355 vap->va_nlink = 1;
356 vap->va_bytes = vap->va_size =
357 ctob(procp->p_vmspace->vm_tsize +
358 procp->p_vmspace->vm_dsize +
359 procp->p_vmspace->vm_ssize);
360 vap->va_uid = procp->p_ucred->cr_uid;
361 vap->va_gid = procp->p_ucred->cr_gid;
362 break;
363
364 case Pregs:
365 case Pctl:
366 case Pstatus:
367 case Pnote:
368 case Pnotepg:
369 vap->va_nlink = 1;
370 vap->va_uid = procp->p_ucred->cr_uid;
371 vap->va_gid = procp->p_ucred->cr_gid;
372 break;
373
374 default:
375 panic("procfs_getattr");
376 }
377
378 return (error);
379 }
380
381 procfs_setattr(vp, vap, cred, p)
382 struct vnode *vp;
383 struct vattr *vap;
384 struct ucred *cred;
385 struct proc *p;
386 {
387 /*
388 * just fake out attribute setting
389 * it's not good to generate an error
390 * return, otherwise things like creat()
391 * will fail when they try to set the
392 * file length to 0. worse, this means
393 * that echo $note > /proc/$pid/note will fail.
394 */
395
396 return (0);
397 }
398
399 /*
400 * implement access checking.
401 *
402 * something very similar to this code is duplicated
403 * throughout the 4bsd kernel and should be moved
404 * into kern/vfs_subr.c sometime.
405 *
406 * actually, the check for super-user is slightly
407 * broken since it will allow read access to write-only
408 * objects. this doesn't cause any particular trouble
409 * but does mean that the i/o entry points need to check
410 * that the operation really does make sense.
411 */
412 procfs_access(vp, mode, cred, p)
413 struct vnode *vp;
414 int mode;
415 struct ucred *cred;
416 struct proc *p;
417 {
418 struct vattr *vap;
419 struct vattr vattr;
420 int error;
421
422 /*
423 * If you're the super-user,
424 * you always get access.
425 */
426 if (cred->cr_uid == (uid_t) 0)
427 return (0);
428 vap = &vattr;
429 if (error = VOP_GETATTR(vp, vap, cred, p))
430 return (error);
431
432 /*
433 * Access check is based on only one of owner, group, public.
434 * If not owner, then check group. If not a member of the
435 * group, then check public access.
436 */
437 if (cred->cr_uid != vap->va_uid) {
438 gid_t *gp;
439 int i;
440
441 mode >>= 3;
442 gp = cred->cr_groups;
443 for (i = 0; i < cred->cr_ngroups; i++, gp++)
444 if (vap->va_gid == *gp)
445 goto found;
446 mode >>= 3;
447 found:
448 ;
449 }
450
451 if ((vap->va_mode & mode) == mode)
452 return (0);
453
454 return (EACCES);
455 }
456
457 /*
458 * lookup. this is incredibly complicated in the
459 * general case, however for most pseudo-filesystems
460 * very little needs to be done.
461 *
462 * (dvp) is the directory in which the lookup takes place.
463 * (ndp) contains all the information about the type of
464 * lookup being done.
465 *
466 * (dvp) is locked on entry.
467 * the job of lookup is to set ndp->ni_dvp, and ndp->ni_vp.
468 * (this changes in 4.4 where all we want is the equivalent
469 * of ndp->ni_vp.)
470 *
471 * unless you want to get a migraine, just make sure your
472 * filesystem doesn't do any locking of its own. otherwise
473 * read and inwardly digest ufs_lookup().
474 */
475 procfs_lookup(dvp, ndp, p)
476 struct vnode *dvp;
477 struct nameidata *ndp;
478 struct proc *p;
479 {
480 char *pname = ndp->ni_ptr;
481 int error = 0;
482 int flag;
483 pid_t pid;
484 struct vnode *nvp;
485 struct pfsnode *pfs;
486 struct proc *procp;
487 int mode;
488 pfstype pfs_type;
489 int i;
490
491 if (ndp->ni_namelen == 1 && *pname == '.') {
492 ndp->ni_vp = dvp;
493 ndp->ni_dvp = dvp;
494 VREF(dvp);
495 return (0);
496 }
497
498 ndp->ni_dvp = dvp;
499 ndp->ni_vp = NULL;
500
501 pfs = VTOPFS(dvp);
502 switch (pfs->pfs_type) {
503 case Proot:
504 if (ndp->ni_isdotdot)
505 return (EIO);
506
507 if (NDEQ(ndp, "curproc", 7))
508 pid = p->p_pid;
509 else
510 pid = atopid(pname, ndp->ni_namelen);
511 if (pid == NO_PID)
512 return (ENOENT);
513
514 procp = PFIND(pid);
515 if (procp == 0)
516 return (ENOENT);
517
518 error = procfs_allocvp(dvp->v_mount, &nvp, pid, Pproc);
519 if (error)
520 return (error);
521
522 nvp->v_type = VDIR;
523 pfs = VTOPFS(nvp);
524
525 ndp->ni_vp = nvp;
526 return (0);
527
528 case Pproc:
529 if (ndp->ni_isdotdot) {
530 ndp->ni_dvp = dvp;
531 error = procfs_root(dvp->v_mount, &ndp->ni_vp);
532 return (error);
533 }
534
535 procp = PFIND(pfs->pfs_pid);
536 if (procp == 0)
537 return (ENOENT);
538
539 for (i = 0; i < Nprocent; i++) {
540 struct pfsnames *dp = &procent[i];
541
542 if (ndp->ni_namelen == dp->d_namlen &&
543 bcmp(pname, dp->d_name, dp->d_namlen) == 0) {
544 pfs_type = dp->d_pfstype;
545 goto found;
546 }
547 }
548 return (ENOENT);
549
550 found:
551 if (pfs_type == Pfile) {
552 nvp = procfs_findtextvp(procp);
553 if (nvp) {
554 VREF(nvp);
555 VOP_LOCK(nvp);
556 } else {
557 error = ENXIO;
558 }
559 } else {
560 error = procfs_allocvp(dvp->v_mount, &nvp,
561 pfs->pfs_pid, pfs_type);
562 if (error)
563 return (error);
564
565 nvp->v_type = VREG;
566 pfs = VTOPFS(nvp);
567 }
568 ndp->ni_vp = nvp;
569 return (error);
570
571 default:
572 return (ENOTDIR);
573 }
574 }
575
576 /*
577 * readdir returns directory entries from pfsnode (vp).
578 *
579 * the strategy here with procfs is to generate a single
580 * directory entry at a time (struct pfsdent) and then
581 * copy that out to userland using uiomove. a more efficent
582 * though more complex implementation, would try to minimize
583 * the number of calls to uiomove(). for procfs, this is
584 * hardly worth the added code complexity.
585 *
586 * this should just be done through read()
587 */
588 procfs_readdir(vp, uio, cred, eofflagp, cookies, ncookies)
589 struct vnode *vp;
590 struct uio *uio;
591 struct ucred *cred;
592 int *eofflagp;
593 u_int *cookies;
594 int ncookies;
595 {
596 struct pfsdent d;
597 struct pfsdent *dp = &d;
598 struct pfsnode *pfs;
599 int error;
600 int count;
601 int i;
602
603 pfs = VTOPFS(vp);
604
605 if (uio->uio_resid < UIO_MX)
606 return (EINVAL);
607 if (uio->uio_offset & (UIO_MX-1))
608 return (EINVAL);
609 if (uio->uio_offset < 0)
610 return (EINVAL);
611
612 error = 0;
613 count = 0;
614 i = uio->uio_offset / UIO_MX;
615
616 switch (pfs->pfs_type) {
617 /*
618 * this is for the process-specific sub-directories.
619 * all that is needed to is copy out all the entries
620 * from the procent[] table (top of this file).
621 */
622 case Pproc: {
623 while (uio->uio_resid >= UIO_MX) {
624 struct pfsnames *dt;
625
626 if (i >= Nprocent) {
627 *eofflagp = 1;
628 break;
629 }
630
631 dt = &procent[i];
632 dp->d_reclen = UIO_MX;
633 dp->d_fileno = PROCFS_FILENO(pfs->pfs_pid, dt->d_pfstype);
634 dp->d_namlen = dt->d_namlen;
635 bcopy(dt->d_name, dp->d_name, sizeof(dt->d_name)-1);
636 error = uiomove((caddr_t) dp, UIO_MX, uio);
637 if (error)
638 break;
639 count += UIO_MX;
640 i++;
641 }
642
643 break;
644
645 }
646
647 /*
648 * this is for the root of the procfs filesystem
649 * what is needed is a special entry for "curproc"
650 * followed by an entry for each process on allproc
651 #ifdef PROCFS_ZOMBIE
652 * and zombproc.
653 #endif
654 */
655
656 case Proot: {
657 int pcnt;
658 #ifdef PROCFS_ZOMBIE
659 int doingzomb = 0;
660 #endif
661 struct proc *p;
662
663 p = allproc;
664
665 #define PROCFS_XFILES 1 /* number of other entries, like "curproc" */
666 pcnt = PROCFS_XFILES;
667
668 while (p && uio->uio_resid >= UIO_MX) {
669 bzero((char *) dp, UIO_MX);
670 dp->d_reclen = UIO_MX;
671
672 switch (i) {
673 case 0:
674 /* ship out entry for "curproc" */
675 dp->d_fileno = PROCFS_FILENO(PID_MAX+1, Pproc);
676 dp->d_namlen = 7;
677 bcopy("curproc", dp->d_name, dp->d_namlen+1);
678 break;
679
680 default:
681 if (pcnt >= i) {
682 dp->d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
683 dp->d_namlen = sprintf(dp->d_name, "%ld", (long) p->p_pid);
684 }
685
686 p = p->p_nxt;
687
688 #ifdef PROCFS_ZOMBIE
689 if (p == 0 && doingzomb == 0) {
690 doingzomb = 1;
691 p = zombproc;
692 }
693 #endif
694
695 if (pcnt++ < i)
696 continue;
697
698 break;
699 }
700 error = uiomove((caddr_t) dp, UIO_MX, uio);
701 if (error)
702 break;
703 count += UIO_MX;
704 i++;
705 }
706
707 break;
708
709 }
710
711 default:
712 error = ENOTDIR;
713 break;
714 }
715
716 uio->uio_offset = i * UIO_MX;
717 if (count == 0)
718 *eofflagp = 1;
719
720 return (error);
721 }
722
723 /*
724 * convert decimal ascii to pid_t
725 */
726 static pid_t
727 atopid(b, len)
728 const char *b;
729 u_int len;
730 {
731 pid_t p = 0;
732
733 while (len--) {
734 char c = *b++;
735 if (c < '0' || c > '9')
736 return (NO_PID);
737 p = 10 * p + (c - '0');
738 if (p > PID_MAX)
739 return (NO_PID);
740 }
741
742 return (p);
743 }
744
745 /*
746 * procfs vnode operations.
747 */
748 struct vnodeops procfs_vnodeops = {
749 procfs_lookup, /* lookup */
750 procfs_create, /* create */
751 procfs_mknod, /* mknod */
752 procfs_open, /* open */
753 procfs_close, /* close */
754 procfs_access, /* access */
755 procfs_getattr, /* getattr */
756 procfs_setattr, /* setattr */
757 procfs_read, /* read */
758 procfs_write, /* write */
759 procfs_ioctl, /* ioctl */
760 procfs_select, /* select */
761 procfs_mmap, /* mmap */
762 procfs_fsync, /* fsync */
763 procfs_seek, /* seek */
764 procfs_remove, /* remove */
765 procfs_link, /* link */
766 procfs_rename, /* rename */
767 procfs_mkdir, /* mkdir */
768 procfs_rmdir, /* rmdir */
769 procfs_symlink, /* symlink */
770 procfs_readdir, /* readdir */
771 procfs_readlink, /* readlink */
772 procfs_abortop, /* abortop */
773 procfs_inactive, /* inactive */
774 procfs_reclaim, /* reclaim */
775 procfs_lock, /* lock */
776 procfs_unlock, /* unlock */
777 procfs_bmap, /* bmap */
778 procfs_strategy, /* strategy */
779 procfs_print, /* print */
780 procfs_islocked, /* islocked */
781 procfs_advlock, /* advlock */
782 };
783