procfs_vnops.c revision 1.70 1 /* $NetBSD: procfs_vnops.c,v 1.70 2000/03/30 02:20:14 simonb Exp $ */
2
3 /*
4 * Copyright (c) 1993 Jan-Simon Pendry
5 * Copyright (c) 1993, 1995
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
40 */
41
42 /*
43 * procfs vnode interface
44 */
45
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/time.h>
49 #include <sys/kernel.h>
50 #include <sys/file.h>
51 #include <sys/proc.h>
52 #include <sys/vnode.h>
53 #include <sys/namei.h>
54 #include <sys/malloc.h>
55 #include <sys/dirent.h>
56 #include <sys/resourcevar.h>
57 #include <sys/ptrace.h>
58 #include <sys/stat.h>
59
60 #include <vm/vm.h> /* for PAGE_SIZE */
61
62 #include <machine/reg.h>
63
64 #include <miscfs/genfs/genfs.h>
65 #include <miscfs/procfs/procfs.h>
66
67 /*
68 * Vnode Operations.
69 *
70 */
71
72 /*
73 * This is a list of the valid names in the
74 * process-specific sub-directories. It is
75 * used in procfs_lookup and procfs_readdir
76 */
77 struct proc_target {
78 u_char pt_type;
79 u_char pt_namlen;
80 char *pt_name;
81 pfstype pt_pfstype;
82 int (*pt_valid) __P((struct proc *p));
83 } proc_targets[] = {
84 #define N(s) sizeof(s)-1, s
85 /* name type validp */
86 { DT_DIR, N("."), Pproc, NULL },
87 { DT_DIR, N(".."), Proot, NULL },
88 { DT_REG, N("file"), Pfile, procfs_validfile },
89 { DT_REG, N("mem"), Pmem, NULL },
90 { DT_REG, N("regs"), Pregs, procfs_validregs },
91 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs },
92 { DT_REG, N("ctl"), Pctl, NULL },
93 { DT_REG, N("status"), Pstatus, NULL },
94 { DT_REG, N("note"), Pnote, NULL },
95 { DT_REG, N("notepg"), Pnotepg, NULL },
96 { DT_REG, N("map"), Pmap, procfs_validmap },
97 { DT_REG, N("cmdline"), Pcmdline, NULL },
98 #undef N
99 };
100 static int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
101
102 int procfs_lookup __P((void *));
103 #define procfs_create genfs_eopnotsupp_rele
104 #define procfs_mknod genfs_eopnotsupp_rele
105 int procfs_open __P((void *));
106 int procfs_close __P((void *));
107 int procfs_access __P((void *));
108 int procfs_getattr __P((void *));
109 int procfs_setattr __P((void *));
110 #define procfs_read procfs_rw
111 #define procfs_write procfs_rw
112 #define procfs_fcntl genfs_fcntl
113 #define procfs_ioctl genfs_enoioctl
114 #define procfs_poll genfs_poll
115 #define procfs_revoke genfs_revoke
116 #define procfs_mmap genfs_eopnotsupp
117 #define procfs_fsync genfs_nullop
118 #define procfs_seek genfs_nullop
119 #define procfs_remove genfs_eopnotsupp_rele
120 int procfs_link __P((void *));
121 #define procfs_rename genfs_eopnotsupp_rele
122 #define procfs_mkdir genfs_eopnotsupp_rele
123 #define procfs_rmdir genfs_eopnotsupp_rele
124 int procfs_symlink __P((void *));
125 int procfs_readdir __P((void *));
126 int procfs_readlink __P((void *));
127 #define procfs_abortop genfs_abortop
128 int procfs_inactive __P((void *));
129 int procfs_reclaim __P((void *));
130 #define procfs_lock genfs_lock
131 #define procfs_unlock genfs_unlock
132 int procfs_bmap __P((void *));
133 #define procfs_strategy genfs_badop
134 int procfs_print __P((void *));
135 int procfs_pathconf __P((void *));
136 #define procfs_islocked genfs_islocked
137 #define procfs_advlock genfs_einval
138 #define procfs_blkatoff genfs_eopnotsupp
139 #define procfs_valloc genfs_eopnotsupp
140 #define procfs_vfree genfs_nullop
141 #define procfs_truncate genfs_eopnotsupp
142 #define procfs_update genfs_nullop
143 #define procfs_bwrite genfs_eopnotsupp
144
145 static pid_t atopid __P((const char *, u_int));
146
147 /*
148 * procfs vnode operations.
149 */
150 int (**procfs_vnodeop_p) __P((void *));
151 struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
152 { &vop_default_desc, vn_default_error },
153 { &vop_lookup_desc, procfs_lookup }, /* lookup */
154 { &vop_create_desc, procfs_create }, /* create */
155 { &vop_mknod_desc, procfs_mknod }, /* mknod */
156 { &vop_open_desc, procfs_open }, /* open */
157 { &vop_close_desc, procfs_close }, /* close */
158 { &vop_access_desc, procfs_access }, /* access */
159 { &vop_getattr_desc, procfs_getattr }, /* getattr */
160 { &vop_setattr_desc, procfs_setattr }, /* setattr */
161 { &vop_read_desc, procfs_read }, /* read */
162 { &vop_write_desc, procfs_write }, /* write */
163 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */
164 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */
165 { &vop_poll_desc, procfs_poll }, /* poll */
166 { &vop_revoke_desc, procfs_revoke }, /* revoke */
167 { &vop_mmap_desc, procfs_mmap }, /* mmap */
168 { &vop_fsync_desc, procfs_fsync }, /* fsync */
169 { &vop_seek_desc, procfs_seek }, /* seek */
170 { &vop_remove_desc, procfs_remove }, /* remove */
171 { &vop_link_desc, procfs_link }, /* link */
172 { &vop_rename_desc, procfs_rename }, /* rename */
173 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */
174 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */
175 { &vop_symlink_desc, procfs_symlink }, /* symlink */
176 { &vop_readdir_desc, procfs_readdir }, /* readdir */
177 { &vop_readlink_desc, procfs_readlink }, /* readlink */
178 { &vop_abortop_desc, procfs_abortop }, /* abortop */
179 { &vop_inactive_desc, procfs_inactive }, /* inactive */
180 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */
181 { &vop_lock_desc, procfs_lock }, /* lock */
182 { &vop_unlock_desc, procfs_unlock }, /* unlock */
183 { &vop_bmap_desc, procfs_bmap }, /* bmap */
184 { &vop_strategy_desc, procfs_strategy }, /* strategy */
185 { &vop_print_desc, procfs_print }, /* print */
186 { &vop_islocked_desc, procfs_islocked }, /* islocked */
187 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */
188 { &vop_advlock_desc, procfs_advlock }, /* advlock */
189 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */
190 { &vop_valloc_desc, procfs_valloc }, /* valloc */
191 { &vop_vfree_desc, procfs_vfree }, /* vfree */
192 { &vop_truncate_desc, procfs_truncate }, /* truncate */
193 { &vop_update_desc, procfs_update }, /* update */
194 { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
195 };
196 struct vnodeopv_desc procfs_vnodeop_opv_desc =
197 { &procfs_vnodeop_p, procfs_vnodeop_entries };
198 /*
199 * set things up for doing i/o on
200 * the pfsnode (vp). (vp) is locked
201 * on entry, and should be left locked
202 * on exit.
203 *
204 * for procfs we don't need to do anything
205 * in particular for i/o. all that is done
206 * is to support exclusive open on process
207 * memory images.
208 */
209 int
210 procfs_open(v)
211 void *v;
212 {
213 struct vop_open_args /* {
214 struct vnode *a_vp;
215 int a_mode;
216 struct ucred *a_cred;
217 struct proc *a_p;
218 } */ *ap = v;
219 struct pfsnode *pfs = VTOPFS(ap->a_vp);
220 struct proc *p1, *p2;
221 int error;
222
223 p1 = ap->a_p; /* tracer */
224 p2 = PFIND(pfs->pfs_pid); /* traced */
225
226 if (p2 == NULL)
227 return (ENOENT); /* was ESRCH, jsp */
228
229 switch (pfs->pfs_type) {
230 case Pmem:
231 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
232 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)))
233 return (EBUSY);
234
235 if ((error = procfs_checkioperm(p1, p2)) != 0)
236 return (EPERM);
237
238 if (ap->a_mode & FWRITE)
239 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
240
241 return (0);
242
243 default:
244 break;
245 }
246
247 return (0);
248 }
249
250 /*
251 * close the pfsnode (vp) after doing i/o.
252 * (vp) is not locked on entry or exit.
253 *
254 * nothing to do for procfs other than undo
255 * any exclusive open flag (see _open above).
256 */
257 int
258 procfs_close(v)
259 void *v;
260 {
261 struct vop_close_args /* {
262 struct vnode *a_vp;
263 int a_fflag;
264 struct ucred *a_cred;
265 struct proc *a_p;
266 } */ *ap = v;
267 struct pfsnode *pfs = VTOPFS(ap->a_vp);
268
269 switch (pfs->pfs_type) {
270 case Pmem:
271 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
272 pfs->pfs_flags &= ~(FWRITE|O_EXCL);
273 break;
274
275 default:
276 break;
277 }
278
279 return (0);
280 }
281
282 /*
283 * do block mapping for pfsnode (vp).
284 * since we don't use the buffer cache
285 * for procfs this function should never
286 * be called. in any case, it's not clear
287 * what part of the kernel ever makes use
288 * of this function. for sanity, this is the
289 * usual no-op bmap, although returning
290 * (EIO) would be a reasonable alternative.
291 */
292 int
293 procfs_bmap(v)
294 void *v;
295 {
296 struct vop_bmap_args /* {
297 struct vnode *a_vp;
298 daddr_t a_bn;
299 struct vnode **a_vpp;
300 daddr_t *a_bnp;
301 int * a_runp;
302 } */ *ap = v;
303
304 if (ap->a_vpp != NULL)
305 *ap->a_vpp = ap->a_vp;
306 if (ap->a_bnp != NULL)
307 *ap->a_bnp = ap->a_bn;
308 if (ap->a_runp != NULL)
309 *ap->a_runp = 0;
310 return (0);
311 }
312
313 /*
314 * _inactive is called when the pfsnode
315 * is vrele'd and the reference count goes
316 * to zero. (vp) will be on the vnode free
317 * list, so to get it back vget() must be
318 * used.
319 *
320 * for procfs, check if the process is still
321 * alive and if it isn't then just throw away
322 * the vnode by calling vgone(). this may
323 * be overkill and a waste of time since the
324 * chances are that the process will still be
325 * there and PFIND is not free.
326 *
327 * (vp) is locked on entry, but must be unlocked on exit.
328 */
329 int
330 procfs_inactive(v)
331 void *v;
332 {
333 struct vop_inactive_args /* {
334 struct vnode *a_vp;
335 struct proc *a_p;
336 } */ *ap = v;
337 struct pfsnode *pfs = VTOPFS(ap->a_vp);
338
339 VOP_UNLOCK(ap->a_vp, 0);
340 if (PFIND(pfs->pfs_pid) == 0)
341 vgone(ap->a_vp);
342
343 return (0);
344 }
345
346 /*
347 * _reclaim is called when getnewvnode()
348 * wants to make use of an entry on the vnode
349 * free list. at this time the filesystem needs
350 * to free any private data and remove the node
351 * from any private lists.
352 */
353 int
354 procfs_reclaim(v)
355 void *v;
356 {
357 struct vop_reclaim_args /* {
358 struct vnode *a_vp;
359 } */ *ap = v;
360
361 return (procfs_freevp(ap->a_vp));
362 }
363
364 /*
365 * Return POSIX pathconf information applicable to special devices.
366 */
367 int
368 procfs_pathconf(v)
369 void *v;
370 {
371 struct vop_pathconf_args /* {
372 struct vnode *a_vp;
373 int a_name;
374 register_t *a_retval;
375 } */ *ap = v;
376
377 switch (ap->a_name) {
378 case _PC_LINK_MAX:
379 *ap->a_retval = LINK_MAX;
380 return (0);
381 case _PC_MAX_CANON:
382 *ap->a_retval = MAX_CANON;
383 return (0);
384 case _PC_MAX_INPUT:
385 *ap->a_retval = MAX_INPUT;
386 return (0);
387 case _PC_PIPE_BUF:
388 *ap->a_retval = PIPE_BUF;
389 return (0);
390 case _PC_CHOWN_RESTRICTED:
391 *ap->a_retval = 1;
392 return (0);
393 case _PC_VDISABLE:
394 *ap->a_retval = _POSIX_VDISABLE;
395 return (0);
396 case _PC_SYNC_IO:
397 *ap->a_retval = 1;
398 return (0);
399 default:
400 return (EINVAL);
401 }
402 /* NOTREACHED */
403 }
404
405 /*
406 * _print is used for debugging.
407 * just print a readable description
408 * of (vp).
409 */
410 int
411 procfs_print(v)
412 void *v;
413 {
414 struct vop_print_args /* {
415 struct vnode *a_vp;
416 } */ *ap = v;
417 struct pfsnode *pfs = VTOPFS(ap->a_vp);
418
419 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n",
420 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
421 return 0;
422 }
423
424 int
425 procfs_link(v)
426 void *v;
427 {
428 struct vop_link_args /* {
429 struct vnode *a_dvp;
430 struct vnode *a_vp;
431 struct componentname *a_cnp;
432 } */ *ap = v;
433
434 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
435 vput(ap->a_dvp);
436 return (EROFS);
437 }
438
439 int
440 procfs_symlink(v)
441 void *v;
442 {
443 struct vop_symlink_args /* {
444 struct vnode *a_dvp;
445 struct vnode **a_vpp;
446 struct componentname *a_cnp;
447 struct vattr *a_vap;
448 char *a_target;
449 } */ *ap = v;
450
451 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
452 vput(ap->a_dvp);
453 return (EROFS);
454 }
455
456 /*
457 * Invent attributes for pfsnode (vp) and store
458 * them in (vap).
459 * Directories lengths are returned as zero since
460 * any real length would require the genuine size
461 * to be computed, and nothing cares anyway.
462 *
463 * this is relatively minimal for procfs.
464 */
465 int
466 procfs_getattr(v)
467 void *v;
468 {
469 struct vop_getattr_args /* {
470 struct vnode *a_vp;
471 struct vattr *a_vap;
472 struct ucred *a_cred;
473 struct proc *a_p;
474 } */ *ap = v;
475 struct pfsnode *pfs = VTOPFS(ap->a_vp);
476 struct vattr *vap = ap->a_vap;
477 struct proc *procp;
478 struct timeval tv;
479 int error;
480
481 /* first check the process still exists */
482 switch (pfs->pfs_type) {
483 case Proot:
484 case Pcurproc:
485 case Pself:
486 procp = 0;
487 break;
488
489 default:
490 procp = PFIND(pfs->pfs_pid);
491 if (procp == 0)
492 return (ENOENT);
493 break;
494 }
495
496 error = 0;
497
498 /* start by zeroing out the attributes */
499 VATTR_NULL(vap);
500
501 /* next do all the common fields */
502 vap->va_type = ap->a_vp->v_type;
503 vap->va_mode = pfs->pfs_mode;
504 vap->va_fileid = pfs->pfs_fileno;
505 vap->va_flags = 0;
506 vap->va_blocksize = PAGE_SIZE;
507
508 /*
509 * Make all times be current TOD.
510 * It would be possible to get the process start
511 * time from the p_stat structure, but there's
512 * no "file creation" time stamp anyway, and the
513 * p_stat structure is not addressible if u. gets
514 * swapped out for that process.
515 */
516 microtime(&tv);
517 TIMEVAL_TO_TIMESPEC(&tv, &vap->va_ctime);
518 vap->va_atime = vap->va_mtime = vap->va_ctime;
519
520 switch (pfs->pfs_type) {
521 case Pmem:
522 case Pregs:
523 case Pfpregs:
524 /*
525 * If the process has exercised some setuid or setgid
526 * privilege, then rip away read/write permission so
527 * that only root can gain access.
528 */
529 if (procp->p_flag & P_SUGID)
530 vap->va_mode &= ~(S_IRUSR|S_IWUSR);
531 /* FALLTHROUGH */
532 case Pctl:
533 case Pstatus:
534 case Pnote:
535 case Pnotepg:
536 case Pmap:
537 case Pcmdline:
538 vap->va_nlink = 1;
539 vap->va_uid = procp->p_ucred->cr_uid;
540 vap->va_gid = procp->p_ucred->cr_gid;
541 break;
542
543 default:
544 break;
545 }
546
547 /*
548 * now do the object specific fields
549 *
550 * The size could be set from struct reg, but it's hardly
551 * worth the trouble, and it puts some (potentially) machine
552 * dependent data into this machine-independent code. If it
553 * becomes important then this function should break out into
554 * a per-file stat function in the corresponding .c file.
555 */
556
557 switch (pfs->pfs_type) {
558 case Proot:
559 /*
560 * Set nlink to 1 to tell fts(3) we don't actually know.
561 */
562 vap->va_nlink = 1;
563 vap->va_uid = 0;
564 vap->va_gid = 0;
565 vap->va_bytes = vap->va_size = DEV_BSIZE;
566 break;
567
568 case Pcurproc: {
569 char buf[16]; /* should be enough */
570 vap->va_nlink = 1;
571 vap->va_uid = 0;
572 vap->va_gid = 0;
573 vap->va_bytes = vap->va_size =
574 sprintf(buf, "%ld", (long)curproc->p_pid);
575 break;
576 }
577
578 case Pself:
579 vap->va_nlink = 1;
580 vap->va_uid = 0;
581 vap->va_gid = 0;
582 vap->va_bytes = vap->va_size = sizeof("curproc");
583 break;
584
585 case Pproc:
586 vap->va_nlink = 2;
587 vap->va_uid = procp->p_ucred->cr_uid;
588 vap->va_gid = procp->p_ucred->cr_gid;
589 vap->va_bytes = vap->va_size = DEV_BSIZE;
590 break;
591
592 case Pfile:
593 error = EOPNOTSUPP;
594 break;
595
596 case Pmem:
597 vap->va_bytes = vap->va_size =
598 ctob(procp->p_vmspace->vm_tsize +
599 procp->p_vmspace->vm_dsize +
600 procp->p_vmspace->vm_ssize);
601 break;
602
603 #if defined(PT_GETREGS) || defined(PT_SETREGS)
604 case Pregs:
605 vap->va_bytes = vap->va_size = sizeof(struct reg);
606 break;
607 #endif
608
609 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS)
610 case Pfpregs:
611 vap->va_bytes = vap->va_size = sizeof(struct fpreg);
612 break;
613 #endif
614
615 case Pctl:
616 case Pstatus:
617 case Pnote:
618 case Pnotepg:
619 case Pmap:
620 case Pcmdline:
621 vap->va_bytes = vap->va_size = 0;
622 break;
623
624 default:
625 panic("procfs_getattr");
626 }
627
628 return (error);
629 }
630
631 /*ARGSUSED*/
632 int
633 procfs_setattr(v)
634 void *v;
635 {
636 /*
637 * just fake out attribute setting
638 * it's not good to generate an error
639 * return, otherwise things like creat()
640 * will fail when they try to set the
641 * file length to 0. worse, this means
642 * that echo $note > /proc/$pid/note will fail.
643 */
644
645 return (0);
646 }
647
648 /*
649 * implement access checking.
650 *
651 * actually, the check for super-user is slightly
652 * broken since it will allow read access to write-only
653 * objects. this doesn't cause any particular trouble
654 * but does mean that the i/o entry points need to check
655 * that the operation really does make sense.
656 */
657 int
658 procfs_access(v)
659 void *v;
660 {
661 struct vop_access_args /* {
662 struct vnode *a_vp;
663 int a_mode;
664 struct ucred *a_cred;
665 struct proc *a_p;
666 } */ *ap = v;
667 struct vattr va;
668 int error;
669
670 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) != 0)
671 return (error);
672
673 return (vaccess(va.va_type, va.va_mode,
674 va.va_uid, va.va_gid, ap->a_mode, ap->a_cred));
675 }
676
677 /*
678 * lookup. this is incredibly complicated in the
679 * general case, however for most pseudo-filesystems
680 * very little needs to be done.
681 *
682 * Locking isn't hard here, just poorly documented.
683 *
684 * If we're looking up ".", just vref the parent & return it.
685 *
686 * If we're looking up "..", unlock the parent, and lock "..". If everything
687 * went ok, and we're on the last component and the caller requested the
688 * parent locked, try to re-lock the parent. We do this to prevent lock
689 * races.
690 *
691 * For anything else, get the needed node. Then unlock the parent if not
692 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the
693 * parent in the .. case).
694 *
695 * We try to exit with the parent locked in error cases.
696 */
697 int
698 procfs_lookup(v)
699 void *v;
700 {
701 struct vop_lookup_args /* {
702 struct vnode * a_dvp;
703 struct vnode ** a_vpp;
704 struct componentname * a_cnp;
705 } */ *ap = v;
706 struct componentname *cnp = ap->a_cnp;
707 struct vnode **vpp = ap->a_vpp;
708 struct vnode *dvp = ap->a_dvp;
709 const char *pname = cnp->cn_nameptr;
710 struct proc_target *pt;
711 struct vnode *fvp;
712 pid_t pid;
713 struct pfsnode *pfs;
714 struct proc *p;
715 int i, error, wantpunlock, iscurproc = 0, isself = 0;
716
717 *vpp = NULL;
718 cnp->cn_flags &= ~PDIRUNLOCK;
719
720 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
721 return (EROFS);
722
723 if (cnp->cn_namelen == 1 && *pname == '.') {
724 *vpp = dvp;
725 VREF(dvp);
726 return (0);
727 }
728
729 wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN));
730 pfs = VTOPFS(dvp);
731 switch (pfs->pfs_type) {
732 case Proot:
733 /*
734 * Shouldn't get here with .. in the root node.
735 */
736 if (cnp->cn_flags & ISDOTDOT)
737 return (EIO);
738
739 iscurproc = CNEQ(cnp, "curproc", 7);
740 isself = CNEQ(cnp, "self", 4);
741
742 if (iscurproc || isself) {
743 error = procfs_allocvp(dvp->v_mount, vpp, 0,
744 iscurproc ? Pcurproc : Pself);
745 if ((error == 0) && (wantpunlock)) {
746 VOP_UNLOCK(dvp, 0);
747 cnp->cn_flags |= PDIRUNLOCK;
748 }
749 return (error);
750 }
751
752 pid = atopid(pname, cnp->cn_namelen);
753 if (pid == NO_PID)
754 break;
755
756 p = PFIND(pid);
757 if (p == 0)
758 break;
759
760 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc);
761 if ((error == 0) && (wantpunlock)) {
762 VOP_UNLOCK(dvp, 0);
763 cnp->cn_flags |= PDIRUNLOCK;
764 }
765 return (error);
766
767 case Pproc:
768 /*
769 * do the .. dance. We unlock the directory, and then
770 * get the root dir. That will automatically return ..
771 * locked. Then if the caller wanted dvp locked, we
772 * re-lock.
773 */
774 if (cnp->cn_flags & ISDOTDOT) {
775 VOP_UNLOCK(dvp, 0);
776 cnp->cn_flags |= PDIRUNLOCK;
777 error = procfs_root(dvp->v_mount, vpp);
778 if ((error == 0) && (wantpunlock == 0) &&
779 ((error = vn_lock(dvp, LK_EXCLUSIVE)) == 0))
780 cnp->cn_flags &= ~PDIRUNLOCK;
781 return (error);
782 }
783
784 p = PFIND(pfs->pfs_pid);
785 if (p == 0)
786 break;
787
788 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
789 if (cnp->cn_namelen == pt->pt_namlen &&
790 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
791 (pt->pt_valid == NULL || (*pt->pt_valid)(p)))
792 goto found;
793 }
794 break;
795
796 found:
797 if (pt->pt_pfstype == Pfile) {
798 fvp = procfs_findtextvp(p);
799 /* We already checked that it exists. */
800 VREF(fvp);
801 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
802 if (wantpunlock) {
803 VOP_UNLOCK(dvp, 0);
804 cnp->cn_flags |= PDIRUNLOCK;
805 }
806 *vpp = fvp;
807 return (0);
808 }
809
810 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
811 pt->pt_pfstype);
812 if ((error == 0) && (wantpunlock)) {
813 VOP_UNLOCK(dvp, 0);
814 cnp->cn_flags |= PDIRUNLOCK;
815 }
816 return (error);
817
818 default:
819 return (ENOTDIR);
820 }
821
822 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
823 }
824
825 int
826 procfs_validfile(p)
827 struct proc *p;
828 {
829
830 return (procfs_findtextvp(p) != NULLVP);
831 }
832
833 /*
834 * readdir returns directory entries from pfsnode (vp).
835 *
836 * the strategy here with procfs is to generate a single
837 * directory entry at a time (struct dirent) and then
838 * copy that out to userland using uiomove. a more efficent
839 * though more complex implementation, would try to minimize
840 * the number of calls to uiomove(). for procfs, this is
841 * hardly worth the added code complexity.
842 *
843 * this should just be done through read()
844 */
845 int
846 procfs_readdir(v)
847 void *v;
848 {
849 struct vop_readdir_args /* {
850 struct vnode *a_vp;
851 struct uio *a_uio;
852 struct ucred *a_cred;
853 int *a_eofflag;
854 off_t **a_cookies;
855 int *a_ncookies;
856 } */ *ap = v;
857 struct uio *uio = ap->a_uio;
858 struct dirent d;
859 struct pfsnode *pfs;
860 off_t i;
861 int error;
862 off_t *cookies = NULL;
863 int ncookies;
864
865 pfs = VTOPFS(ap->a_vp);
866
867 if (uio->uio_resid < UIO_MX)
868 return (EINVAL);
869 if (uio->uio_offset < 0)
870 return (EINVAL);
871
872 error = 0;
873 i = uio->uio_offset;
874 memset((caddr_t)&d, 0, UIO_MX);
875 d.d_reclen = UIO_MX;
876 ncookies = uio->uio_resid / UIO_MX;
877
878 switch (pfs->pfs_type) {
879 /*
880 * this is for the process-specific sub-directories.
881 * all that is needed to is copy out all the entries
882 * from the procent[] table (top of this file).
883 */
884 case Pproc: {
885 struct proc *p;
886 struct proc_target *pt;
887
888 if (i >= nproc_targets)
889 return 0;
890
891 p = PFIND(pfs->pfs_pid);
892 if (p == NULL)
893 break;
894
895 if (ap->a_ncookies) {
896 ncookies = min(ncookies, (nproc_targets - i));
897 MALLOC(cookies, off_t *, ncookies * sizeof (off_t),
898 M_TEMP, M_WAITOK);
899 *ap->a_cookies = cookies;
900 }
901
902 for (pt = &proc_targets[i];
903 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) {
904 if (pt->pt_valid && (*pt->pt_valid)(p) == 0)
905 continue;
906
907 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype);
908 d.d_namlen = pt->pt_namlen;
909 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
910 d.d_type = pt->pt_type;
911
912 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
913 break;
914 if (cookies)
915 *cookies++ = i + 1;
916 }
917
918 break;
919 }
920
921 /*
922 * this is for the root of the procfs filesystem
923 * what is needed are special entries for "curproc"
924 * and "self" followed by an entry for each process
925 * on allproc
926 #ifdef PROCFS_ZOMBIE
927 * and deadproc and zombproc.
928 #endif
929 */
930
931 case Proot: {
932 int pcnt = i, nc = 0;
933 const struct proclist_desc *pd;
934 volatile struct proc *p;
935
936 if (pcnt > 3)
937 pcnt = 3;
938 if (ap->a_ncookies) {
939 /*
940 * XXX Potentially allocating too much space here,
941 * but I'm lazy. This loop needs some work.
942 */
943 MALLOC(cookies, off_t *, ncookies * sizeof (off_t),
944 M_TEMP, M_WAITOK);
945 *ap->a_cookies = cookies;
946 }
947 /*
948 * XXX: THIS LOOP ASSUMES THAT allproc IS THE FIRST
949 * PROCLIST IN THE proclists!
950 */
951 proclist_lock_read();
952 pd = proclists;
953 #ifdef PROCFS_ZOMBIE
954 again:
955 #endif
956 for (p = LIST_FIRST(pd->pd_list);
957 p != NULL && uio->uio_resid >= UIO_MX; i++, pcnt++) {
958 switch (i) {
959 case 0: /* `.' */
960 case 1: /* `..' */
961 d.d_fileno = PROCFS_FILENO(0, Proot);
962 d.d_namlen = i + 1;
963 memcpy(d.d_name, "..", d.d_namlen);
964 d.d_name[i + 1] = '\0';
965 d.d_type = DT_DIR;
966 break;
967
968 case 2:
969 d.d_fileno = PROCFS_FILENO(0, Pcurproc);
970 d.d_namlen = sizeof("curproc") - 1;
971 memcpy(d.d_name, "curproc", sizeof("curproc"));
972 d.d_type = DT_LNK;
973 break;
974
975 case 3:
976 d.d_fileno = PROCFS_FILENO(0, Pself);
977 d.d_namlen = sizeof("self") - 1;
978 memcpy(d.d_name, "self", sizeof("self"));
979 d.d_type = DT_LNK;
980 break;
981
982 default:
983 while (pcnt < i) {
984 pcnt++;
985 p = LIST_NEXT(p, p_list);
986 if (!p)
987 goto done;
988 }
989 d.d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
990 d.d_namlen = sprintf(d.d_name, "%ld",
991 (long)p->p_pid);
992 d.d_type = DT_REG;
993 p = p->p_list.le_next;
994 break;
995 }
996
997 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
998 break;
999 nc++;
1000 if (cookies)
1001 *cookies++ = i + 1;
1002 }
1003 done:
1004
1005 #ifdef PROCFS_ZOMBIE
1006 pd++;
1007 if (p == NULL && pd->pd_list != NULL)
1008 goto again;
1009 #endif
1010 proclist_unlock_read();
1011 ncookies = nc;
1012
1013 break;
1014
1015 }
1016
1017 default:
1018 error = ENOTDIR;
1019 break;
1020 }
1021
1022 if (ap->a_ncookies) {
1023 if (error) {
1024 if (cookies)
1025 FREE(*ap->a_cookies, M_TEMP);
1026 *ap->a_ncookies = 0;
1027 *ap->a_cookies = NULL;
1028 } else
1029 *ap->a_ncookies = ncookies;
1030 }
1031 uio->uio_offset = i;
1032 return (error);
1033 }
1034
1035 /*
1036 * readlink reads the link of `curproc'
1037 */
1038 int
1039 procfs_readlink(v)
1040 void *v;
1041 {
1042 struct vop_readlink_args *ap = v;
1043 char buf[16]; /* should be enough */
1044 int len;
1045
1046 if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pcurproc))
1047 len = sprintf(buf, "%ld", (long)curproc->p_pid);
1048 else if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pself))
1049 len = sprintf(buf, "%s", "curproc");
1050 else
1051 return (EINVAL);
1052
1053 return (uiomove((caddr_t)buf, len, ap->a_uio));
1054 }
1055
1056 /*
1057 * convert decimal ascii to pid_t
1058 */
1059 static pid_t
1060 atopid(b, len)
1061 const char *b;
1062 u_int len;
1063 {
1064 pid_t p = 0;
1065
1066 while (len--) {
1067 char c = *b++;
1068 if (c < '0' || c > '9')
1069 return (NO_PID);
1070 p = 10 * p + (c - '0');
1071 if (p > PID_MAX)
1072 return (NO_PID);
1073 }
1074
1075 return (p);
1076 }
1077