procfs_vnops.c revision 1.64 1 /* $NetBSD: procfs_vnops.c,v 1.64 1999/07/25 18:33:47 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1993 Jan-Simon Pendry
5 * Copyright (c) 1993, 1995
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
40 */
41
42 /*
43 * procfs vnode interface
44 */
45
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/time.h>
49 #include <sys/kernel.h>
50 #include <sys/file.h>
51 #include <sys/proc.h>
52 #include <sys/vnode.h>
53 #include <sys/namei.h>
54 #include <sys/malloc.h>
55 #include <sys/dirent.h>
56 #include <sys/resourcevar.h>
57 #include <sys/ptrace.h>
58 #include <sys/stat.h>
59
60 #include <vm/vm.h> /* for PAGE_SIZE */
61
62 #include <machine/reg.h>
63
64 #include <miscfs/genfs/genfs.h>
65 #include <miscfs/procfs/procfs.h>
66
67 /*
68 * Vnode Operations.
69 *
70 */
71
72 /*
73 * This is a list of the valid names in the
74 * process-specific sub-directories. It is
75 * used in procfs_lookup and procfs_readdir
76 */
77 struct proc_target {
78 u_char pt_type;
79 u_char pt_namlen;
80 char *pt_name;
81 pfstype pt_pfstype;
82 int (*pt_valid) __P((struct proc *p));
83 } proc_targets[] = {
84 #define N(s) sizeof(s)-1, s
85 /* name type validp */
86 { DT_DIR, N("."), Pproc, NULL },
87 { DT_DIR, N(".."), Proot, NULL },
88 { DT_REG, N("file"), Pfile, procfs_validfile },
89 { DT_REG, N("mem"), Pmem, NULL },
90 { DT_REG, N("regs"), Pregs, procfs_validregs },
91 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs },
92 { DT_REG, N("ctl"), Pctl, NULL },
93 { DT_REG, N("status"), Pstatus, NULL },
94 { DT_REG, N("note"), Pnote, NULL },
95 { DT_REG, N("notepg"), Pnotepg, NULL },
96 { DT_REG, N("map"), Pmap, procfs_validmap },
97 { DT_REG, N("cmdline"), Pcmdline, NULL },
98 #undef N
99 };
100 static int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
101
102 static pid_t atopid __P((const char *, u_int));
103
104 int procfs_lookup __P((void *));
105 #define procfs_create genfs_eopnotsupp_rele
106 #define procfs_mknod genfs_eopnotsupp_rele
107 int procfs_open __P((void *));
108 int procfs_close __P((void *));
109 int procfs_access __P((void *));
110 int procfs_getattr __P((void *));
111 int procfs_setattr __P((void *));
112 #define procfs_read procfs_rw
113 #define procfs_write procfs_rw
114 #define procfs_ioctl genfs_enoioctl
115 #define procfs_poll genfs_poll
116 #define procfs_revoke genfs_revoke
117 #define procfs_mmap genfs_eopnotsupp
118 #define procfs_fsync genfs_nullop
119 #define procfs_seek genfs_nullop
120 #define procfs_remove genfs_eopnotsupp_rele
121 int procfs_link __P((void *));
122 #define procfs_rename genfs_eopnotsupp_rele
123 #define procfs_mkdir genfs_eopnotsupp_rele
124 #define procfs_rmdir genfs_eopnotsupp_rele
125 int procfs_symlink __P((void *));
126 int procfs_readdir __P((void *));
127 int procfs_readlink __P((void *));
128 #define procfs_abortop genfs_abortop
129 int procfs_inactive __P((void *));
130 int procfs_reclaim __P((void *));
131 #define procfs_lock genfs_lock
132 #define procfs_unlock genfs_unlock
133 int procfs_bmap __P((void *));
134 #define procfs_strategy genfs_badop
135 int procfs_print __P((void *));
136 int procfs_pathconf __P((void *));
137 #define procfs_islocked genfs_islocked
138 #define procfs_advlock genfs_einval
139 #define procfs_blkatoff genfs_eopnotsupp
140 #define procfs_valloc genfs_eopnotsupp
141 #define procfs_vfree genfs_nullop
142 #define procfs_truncate genfs_eopnotsupp
143 #define procfs_update genfs_nullop
144 #define procfs_bwrite genfs_eopnotsupp
145
146 static pid_t atopid __P((const char *, u_int));
147
148 /*
149 * procfs vnode operations.
150 */
151 int (**procfs_vnodeop_p) __P((void *));
152 struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
153 { &vop_default_desc, vn_default_error },
154 { &vop_lookup_desc, procfs_lookup }, /* lookup */
155 { &vop_create_desc, procfs_create }, /* create */
156 { &vop_mknod_desc, procfs_mknod }, /* mknod */
157 { &vop_open_desc, procfs_open }, /* open */
158 { &vop_close_desc, procfs_close }, /* close */
159 { &vop_access_desc, procfs_access }, /* access */
160 { &vop_getattr_desc, procfs_getattr }, /* getattr */
161 { &vop_setattr_desc, procfs_setattr }, /* setattr */
162 { &vop_read_desc, procfs_read }, /* read */
163 { &vop_write_desc, procfs_write }, /* write */
164 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */
165 { &vop_poll_desc, procfs_poll }, /* poll */
166 { &vop_revoke_desc, procfs_revoke }, /* revoke */
167 { &vop_mmap_desc, procfs_mmap }, /* mmap */
168 { &vop_fsync_desc, procfs_fsync }, /* fsync */
169 { &vop_seek_desc, procfs_seek }, /* seek */
170 { &vop_remove_desc, procfs_remove }, /* remove */
171 { &vop_link_desc, procfs_link }, /* link */
172 { &vop_rename_desc, procfs_rename }, /* rename */
173 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */
174 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */
175 { &vop_symlink_desc, procfs_symlink }, /* symlink */
176 { &vop_readdir_desc, procfs_readdir }, /* readdir */
177 { &vop_readlink_desc, procfs_readlink }, /* readlink */
178 { &vop_abortop_desc, procfs_abortop }, /* abortop */
179 { &vop_inactive_desc, procfs_inactive }, /* inactive */
180 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */
181 { &vop_lock_desc, procfs_lock }, /* lock */
182 { &vop_unlock_desc, procfs_unlock }, /* unlock */
183 { &vop_bmap_desc, procfs_bmap }, /* bmap */
184 { &vop_strategy_desc, procfs_strategy }, /* strategy */
185 { &vop_print_desc, procfs_print }, /* print */
186 { &vop_islocked_desc, procfs_islocked }, /* islocked */
187 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */
188 { &vop_advlock_desc, procfs_advlock }, /* advlock */
189 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */
190 { &vop_valloc_desc, procfs_valloc }, /* valloc */
191 { &vop_vfree_desc, procfs_vfree }, /* vfree */
192 { &vop_truncate_desc, procfs_truncate }, /* truncate */
193 { &vop_update_desc, procfs_update }, /* update */
194 { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
195 };
196 struct vnodeopv_desc procfs_vnodeop_opv_desc =
197 { &procfs_vnodeop_p, procfs_vnodeop_entries };
198 /*
199 * set things up for doing i/o on
200 * the pfsnode (vp). (vp) is locked
201 * on entry, and should be left locked
202 * on exit.
203 *
204 * for procfs we don't need to do anything
205 * in particular for i/o. all that is done
206 * is to support exclusive open on process
207 * memory images.
208 */
209 int
210 procfs_open(v)
211 void *v;
212 {
213 struct vop_open_args /* {
214 struct vnode *a_vp;
215 int a_mode;
216 struct ucred *a_cred;
217 struct proc *a_p;
218 } */ *ap = v;
219 struct pfsnode *pfs = VTOPFS(ap->a_vp);
220 struct proc *p1, *p2;
221 int error;
222
223 p1 = ap->a_p; /* tracer */
224 p2 = PFIND(pfs->pfs_pid); /* traced */
225
226 if (p2 == NULL)
227 return (ENOENT); /* was ESRCH, jsp */
228
229 switch (pfs->pfs_type) {
230 case Pmem:
231 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
232 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)))
233 return (EBUSY);
234
235 if ((error = procfs_checkioperm(p1, p2)) != 0)
236 return (EPERM);
237
238 if (ap->a_mode & FWRITE)
239 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
240
241 return (0);
242
243 default:
244 break;
245 }
246
247 return (0);
248 }
249
250 /*
251 * close the pfsnode (vp) after doing i/o.
252 * (vp) is not locked on entry or exit.
253 *
254 * nothing to do for procfs other than undo
255 * any exclusive open flag (see _open above).
256 */
257 int
258 procfs_close(v)
259 void *v;
260 {
261 struct vop_close_args /* {
262 struct vnode *a_vp;
263 int a_fflag;
264 struct ucred *a_cred;
265 struct proc *a_p;
266 } */ *ap = v;
267 struct pfsnode *pfs = VTOPFS(ap->a_vp);
268
269 switch (pfs->pfs_type) {
270 case Pmem:
271 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
272 pfs->pfs_flags &= ~(FWRITE|O_EXCL);
273 break;
274
275 default:
276 break;
277 }
278
279 return (0);
280 }
281
282 /*
283 * do block mapping for pfsnode (vp).
284 * since we don't use the buffer cache
285 * for procfs this function should never
286 * be called. in any case, it's not clear
287 * what part of the kernel ever makes use
288 * of this function. for sanity, this is the
289 * usual no-op bmap, although returning
290 * (EIO) would be a reasonable alternative.
291 */
292 int
293 procfs_bmap(v)
294 void *v;
295 {
296 struct vop_bmap_args /* {
297 struct vnode *a_vp;
298 daddr_t a_bn;
299 struct vnode **a_vpp;
300 daddr_t *a_bnp;
301 int * a_runp;
302 } */ *ap = v;
303
304 if (ap->a_vpp != NULL)
305 *ap->a_vpp = ap->a_vp;
306 if (ap->a_bnp != NULL)
307 *ap->a_bnp = ap->a_bn;
308 if (ap->a_runp != NULL)
309 *ap->a_runp = 0;
310 return (0);
311 }
312
313 /*
314 * _inactive is called when the pfsnode
315 * is vrele'd and the reference count goes
316 * to zero. (vp) will be on the vnode free
317 * list, so to get it back vget() must be
318 * used.
319 *
320 * for procfs, check if the process is still
321 * alive and if it isn't then just throw away
322 * the vnode by calling vgone(). this may
323 * be overkill and a waste of time since the
324 * chances are that the process will still be
325 * there and PFIND is not free.
326 *
327 * (vp) is locked on entry, but must be unlocked on exit.
328 */
329 int
330 procfs_inactive(v)
331 void *v;
332 {
333 struct vop_inactive_args /* {
334 struct vnode *a_vp;
335 struct proc *a_p;
336 } */ *ap = v;
337 struct pfsnode *pfs = VTOPFS(ap->a_vp);
338
339 VOP_UNLOCK(ap->a_vp, 0);
340 if (PFIND(pfs->pfs_pid) == 0)
341 vgone(ap->a_vp);
342
343 return (0);
344 }
345
346 /*
347 * _reclaim is called when getnewvnode()
348 * wants to make use of an entry on the vnode
349 * free list. at this time the filesystem needs
350 * to free any private data and remove the node
351 * from any private lists.
352 */
353 int
354 procfs_reclaim(v)
355 void *v;
356 {
357 struct vop_reclaim_args /* {
358 struct vnode *a_vp;
359 } */ *ap = v;
360
361 return (procfs_freevp(ap->a_vp));
362 }
363
364 /*
365 * Return POSIX pathconf information applicable to special devices.
366 */
367 int
368 procfs_pathconf(v)
369 void *v;
370 {
371 struct vop_pathconf_args /* {
372 struct vnode *a_vp;
373 int a_name;
374 register_t *a_retval;
375 } */ *ap = v;
376
377 switch (ap->a_name) {
378 case _PC_LINK_MAX:
379 *ap->a_retval = LINK_MAX;
380 return (0);
381 case _PC_MAX_CANON:
382 *ap->a_retval = MAX_CANON;
383 return (0);
384 case _PC_MAX_INPUT:
385 *ap->a_retval = MAX_INPUT;
386 return (0);
387 case _PC_PIPE_BUF:
388 *ap->a_retval = PIPE_BUF;
389 return (0);
390 case _PC_CHOWN_RESTRICTED:
391 *ap->a_retval = 1;
392 return (0);
393 case _PC_VDISABLE:
394 *ap->a_retval = _POSIX_VDISABLE;
395 return (0);
396 case _PC_SYNC_IO:
397 *ap->a_retval = 1;
398 return (0);
399 default:
400 return (EINVAL);
401 }
402 /* NOTREACHED */
403 }
404
405 /*
406 * _print is used for debugging.
407 * just print a readable description
408 * of (vp).
409 */
410 int
411 procfs_print(v)
412 void *v;
413 {
414 struct vop_print_args /* {
415 struct vnode *a_vp;
416 } */ *ap = v;
417 struct pfsnode *pfs = VTOPFS(ap->a_vp);
418
419 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n",
420 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
421 return 0;
422 }
423
424 int
425 procfs_link(v)
426 void *v;
427 {
428 struct vop_link_args /* {
429 struct vnode *a_dvp;
430 struct vnode *a_vp;
431 struct componentname *a_cnp;
432 } */ *ap = v;
433
434 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
435 vput(ap->a_dvp);
436 return (EROFS);
437 }
438
439 int
440 procfs_symlink(v)
441 void *v;
442 {
443 struct vop_symlink_args /* {
444 struct vnode *a_dvp;
445 struct vnode **a_vpp;
446 struct componentname *a_cnp;
447 struct vattr *a_vap;
448 char *a_target;
449 } */ *ap = v;
450
451 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
452 vput(ap->a_dvp);
453 return (EROFS);
454 }
455
456 /*
457 * Invent attributes for pfsnode (vp) and store
458 * them in (vap).
459 * Directories lengths are returned as zero since
460 * any real length would require the genuine size
461 * to be computed, and nothing cares anyway.
462 *
463 * this is relatively minimal for procfs.
464 */
465 int
466 procfs_getattr(v)
467 void *v;
468 {
469 struct vop_getattr_args /* {
470 struct vnode *a_vp;
471 struct vattr *a_vap;
472 struct ucred *a_cred;
473 struct proc *a_p;
474 } */ *ap = v;
475 struct pfsnode *pfs = VTOPFS(ap->a_vp);
476 struct vattr *vap = ap->a_vap;
477 struct proc *procp;
478 struct timeval tv;
479 int error;
480
481 /* first check the process still exists */
482 switch (pfs->pfs_type) {
483 case Proot:
484 case Pcurproc:
485 procp = 0;
486 break;
487
488 default:
489 procp = PFIND(pfs->pfs_pid);
490 if (procp == 0)
491 return (ENOENT);
492 break;
493 }
494
495 error = 0;
496
497 /* start by zeroing out the attributes */
498 VATTR_NULL(vap);
499
500 /* next do all the common fields */
501 vap->va_type = ap->a_vp->v_type;
502 vap->va_mode = pfs->pfs_mode;
503 vap->va_fileid = pfs->pfs_fileno;
504 vap->va_flags = 0;
505 vap->va_blocksize = PAGE_SIZE;
506
507 /*
508 * Make all times be current TOD.
509 * It would be possible to get the process start
510 * time from the p_stat structure, but there's
511 * no "file creation" time stamp anyway, and the
512 * p_stat structure is not addressible if u. gets
513 * swapped out for that process.
514 */
515 microtime(&tv);
516 TIMEVAL_TO_TIMESPEC(&tv, &vap->va_ctime);
517 vap->va_atime = vap->va_mtime = vap->va_ctime;
518
519 switch (pfs->pfs_type) {
520 case Pmem:
521 case Pregs:
522 case Pfpregs:
523 /*
524 * If the process has exercised some setuid or setgid
525 * privilege, then rip away read/write permission so
526 * that only root can gain access.
527 */
528 if (procp->p_flag & P_SUGID)
529 vap->va_mode &= ~(S_IRUSR|S_IWUSR);
530 /* FALLTHROUGH */
531 case Pctl:
532 case Pstatus:
533 case Pnote:
534 case Pnotepg:
535 case Pmap:
536 case Pcmdline:
537 vap->va_nlink = 1;
538 vap->va_uid = procp->p_ucred->cr_uid;
539 vap->va_gid = procp->p_ucred->cr_gid;
540 break;
541
542 default:
543 break;
544 }
545
546 /*
547 * now do the object specific fields
548 *
549 * The size could be set from struct reg, but it's hardly
550 * worth the trouble, and it puts some (potentially) machine
551 * dependent data into this machine-independent code. If it
552 * becomes important then this function should break out into
553 * a per-file stat function in the corresponding .c file.
554 */
555
556 switch (pfs->pfs_type) {
557 case Proot:
558 /*
559 * Set nlink to 1 to tell fts(3) we don't actually know.
560 */
561 vap->va_nlink = 1;
562 vap->va_uid = 0;
563 vap->va_gid = 0;
564 vap->va_bytes = vap->va_size = DEV_BSIZE;
565 break;
566
567 case Pcurproc: {
568 char buf[16]; /* should be enough */
569 vap->va_nlink = 1;
570 vap->va_uid = 0;
571 vap->va_gid = 0;
572 vap->va_bytes = vap->va_size =
573 sprintf(buf, "%ld", (long)curproc->p_pid);
574 break;
575 }
576
577 case Pproc:
578 vap->va_nlink = 2;
579 vap->va_uid = procp->p_ucred->cr_uid;
580 vap->va_gid = procp->p_ucred->cr_gid;
581 vap->va_bytes = vap->va_size = DEV_BSIZE;
582 break;
583
584 case Pfile:
585 error = EOPNOTSUPP;
586 break;
587
588 case Pmem:
589 vap->va_bytes = vap->va_size =
590 ctob(procp->p_vmspace->vm_tsize +
591 procp->p_vmspace->vm_dsize +
592 procp->p_vmspace->vm_ssize);
593 break;
594
595 #if defined(PT_GETREGS) || defined(PT_SETREGS)
596 case Pregs:
597 vap->va_bytes = vap->va_size = sizeof(struct reg);
598 break;
599 #endif
600
601 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS)
602 case Pfpregs:
603 vap->va_bytes = vap->va_size = sizeof(struct fpreg);
604 break;
605 #endif
606
607 case Pctl:
608 case Pstatus:
609 case Pnote:
610 case Pnotepg:
611 case Pmap:
612 case Pcmdline:
613 vap->va_bytes = vap->va_size = 0;
614 break;
615
616 default:
617 panic("procfs_getattr");
618 }
619
620 return (error);
621 }
622
623 /*ARGSUSED*/
624 int
625 procfs_setattr(v)
626 void *v;
627 {
628 /*
629 * just fake out attribute setting
630 * it's not good to generate an error
631 * return, otherwise things like creat()
632 * will fail when they try to set the
633 * file length to 0. worse, this means
634 * that echo $note > /proc/$pid/note will fail.
635 */
636
637 return (0);
638 }
639
640 /*
641 * implement access checking.
642 *
643 * actually, the check for super-user is slightly
644 * broken since it will allow read access to write-only
645 * objects. this doesn't cause any particular trouble
646 * but does mean that the i/o entry points need to check
647 * that the operation really does make sense.
648 */
649 int
650 procfs_access(v)
651 void *v;
652 {
653 struct vop_access_args /* {
654 struct vnode *a_vp;
655 int a_mode;
656 struct ucred *a_cred;
657 struct proc *a_p;
658 } */ *ap = v;
659 struct vattr va;
660 int error;
661
662 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) != 0)
663 return (error);
664
665 return (vaccess(va.va_type, va.va_mode,
666 va.va_uid, va.va_gid, ap->a_mode, ap->a_cred));
667 }
668
669 /*
670 * lookup. this is incredibly complicated in the
671 * general case, however for most pseudo-filesystems
672 * very little needs to be done.
673 *
674 * Locking isn't hard here, just poorly documented.
675 *
676 * If we're looking up ".", just vref the parent & return it.
677 *
678 * If we're looking up "..", unlock the parent, and lock "..". If everything
679 * went ok, and we're on the last component and the caller requested the
680 * parent locked, try to re-lock the parent. We do this to prevent lock
681 * races.
682 *
683 * For anything else, get the needed node. Then unlock the parent if not
684 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the
685 * parent in the .. case).
686 *
687 * We try to exit with the parent locked in error cases.
688 */
689 int
690 procfs_lookup(v)
691 void *v;
692 {
693 struct vop_lookup_args /* {
694 struct vnode * a_dvp;
695 struct vnode ** a_vpp;
696 struct componentname * a_cnp;
697 } */ *ap = v;
698 struct componentname *cnp = ap->a_cnp;
699 struct vnode **vpp = ap->a_vpp;
700 struct vnode *dvp = ap->a_dvp;
701 const char *pname = cnp->cn_nameptr;
702 struct proc_target *pt;
703 struct vnode *fvp;
704 pid_t pid;
705 struct pfsnode *pfs;
706 struct proc *p;
707 int i, error, wantpunlock;
708
709 *vpp = NULL;
710 cnp->cn_flags &= ~PDIRUNLOCK;
711
712 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
713 return (EROFS);
714
715 if (cnp->cn_namelen == 1 && *pname == '.') {
716 *vpp = dvp;
717 VREF(dvp);
718 return (0);
719 }
720
721 wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN));
722 pfs = VTOPFS(dvp);
723 switch (pfs->pfs_type) {
724 case Proot:
725 /*
726 * Shouldn't get here with .. in the root node.
727 */
728 if (cnp->cn_flags & ISDOTDOT)
729 return (EIO);
730
731 if (CNEQ(cnp, "curproc", 7)) {
732 error = procfs_allocvp(dvp->v_mount, vpp, 0, Pcurproc);
733 if ((error == 0) && (wantpunlock)) {
734 VOP_UNLOCK(dvp, 0);
735 cnp->cn_flags |= PDIRUNLOCK;
736 }
737 return (error);
738 }
739
740 pid = atopid(pname, cnp->cn_namelen);
741 if (pid == NO_PID)
742 break;
743
744 p = PFIND(pid);
745 if (p == 0)
746 break;
747
748 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc);
749 if ((error == 0) && (wantpunlock)) {
750 VOP_UNLOCK(dvp, 0);
751 cnp->cn_flags |= PDIRUNLOCK;
752 }
753 return (error);
754
755 case Pproc:
756 /*
757 * do the .. dance. We unlock the directory, and then
758 * get the root dir. That will automatically return ..
759 * locked. Then if the caller wanted dvp locked, we
760 * re-lock.
761 */
762 if (cnp->cn_flags & ISDOTDOT) {
763 VOP_UNLOCK(dvp, 0);
764 cnp->cn_flags |= PDIRUNLOCK;
765 error = procfs_root(dvp->v_mount, vpp);
766 if ((error == 0) && (wantpunlock == 0) &&
767 ((error = vn_lock(dvp, LK_EXCLUSIVE)) == 0))
768 cnp->cn_flags &= ~PDIRUNLOCK;
769 return (error);
770 }
771
772 p = PFIND(pfs->pfs_pid);
773 if (p == 0)
774 break;
775
776 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
777 if (cnp->cn_namelen == pt->pt_namlen &&
778 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
779 (pt->pt_valid == NULL || (*pt->pt_valid)(p)))
780 goto found;
781 }
782 break;
783
784 found:
785 if (pt->pt_pfstype == Pfile) {
786 fvp = procfs_findtextvp(p);
787 /* We already checked that it exists. */
788 VREF(fvp);
789 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
790 if (wantpunlock) {
791 VOP_UNLOCK(dvp, 0);
792 cnp->cn_flags |= PDIRUNLOCK;
793 }
794 *vpp = fvp;
795 return (0);
796 }
797
798 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
799 pt->pt_pfstype);
800 if ((error == 0) && (wantpunlock)) {
801 VOP_UNLOCK(dvp, 0);
802 cnp->cn_flags |= PDIRUNLOCK;
803 }
804 return (error);
805
806 default:
807 return (ENOTDIR);
808 }
809
810 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
811 }
812
813 int
814 procfs_validfile(p)
815 struct proc *p;
816 {
817
818 return (procfs_findtextvp(p) != NULLVP);
819 }
820
821 /*
822 * readdir returns directory entries from pfsnode (vp).
823 *
824 * the strategy here with procfs is to generate a single
825 * directory entry at a time (struct dirent) and then
826 * copy that out to userland using uiomove. a more efficent
827 * though more complex implementation, would try to minimize
828 * the number of calls to uiomove(). for procfs, this is
829 * hardly worth the added code complexity.
830 *
831 * this should just be done through read()
832 */
833 int
834 procfs_readdir(v)
835 void *v;
836 {
837 struct vop_readdir_args /* {
838 struct vnode *a_vp;
839 struct uio *a_uio;
840 struct ucred *a_cred;
841 int *a_eofflag;
842 off_t **a_cookies;
843 int *a_ncookies;
844 } */ *ap = v;
845 struct uio *uio = ap->a_uio;
846 struct dirent d;
847 struct pfsnode *pfs;
848 int i;
849 int error;
850 off_t *cookies = NULL;
851 int ncookies;
852
853 pfs = VTOPFS(ap->a_vp);
854
855 if (uio->uio_resid < UIO_MX)
856 return (EINVAL);
857 if (uio->uio_offset < 0)
858 return (EINVAL);
859
860 error = 0;
861 i = uio->uio_offset;
862 memset((caddr_t)&d, 0, UIO_MX);
863 d.d_reclen = UIO_MX;
864 ncookies = uio->uio_resid / UIO_MX;
865
866 switch (pfs->pfs_type) {
867 /*
868 * this is for the process-specific sub-directories.
869 * all that is needed to is copy out all the entries
870 * from the procent[] table (top of this file).
871 */
872 case Pproc: {
873 struct proc *p;
874 struct proc_target *pt;
875
876 p = PFIND(pfs->pfs_pid);
877 if (p == NULL)
878 break;
879
880 if (ap->a_ncookies) {
881 ncookies = min(ncookies, (nproc_targets - i));
882 MALLOC(cookies, off_t *, ncookies * sizeof (off_t),
883 M_TEMP, M_WAITOK);
884 *ap->a_cookies = cookies;
885 }
886
887 for (pt = &proc_targets[i];
888 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) {
889 if (pt->pt_valid && (*pt->pt_valid)(p) == 0)
890 continue;
891
892 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype);
893 d.d_namlen = pt->pt_namlen;
894 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
895 d.d_type = pt->pt_type;
896
897 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
898 break;
899 if (cookies)
900 *cookies++ = i + 1;
901 }
902
903 break;
904 }
905
906 /*
907 * this is for the root of the procfs filesystem
908 * what is needed is a special entry for "curproc"
909 * followed by an entry for each process on allproc
910 #ifdef PROCFS_ZOMBIE
911 * and deadproc and zombproc.
912 #endif
913 */
914
915 case Proot: {
916 int pcnt = i, nc = 0;
917 const struct proclist_desc *pd;
918 volatile struct proc *p;
919
920 if (pcnt > 3)
921 pcnt = 3;
922 if (ap->a_ncookies) {
923 /*
924 * XXX Potentially allocating too much space here,
925 * but I'm lazy. This loop needs some work.
926 */
927 MALLOC(cookies, off_t *, ncookies * sizeof (off_t),
928 M_TEMP, M_WAITOK);
929 *ap->a_cookies = cookies;
930 }
931 /*
932 * XXX: THIS LOOP ASSUMES THAT allproc IS THE FIRST
933 * PROCLIST IN THE proclists!
934 */
935 proclist_lock_read();
936 pd = proclists;
937 #ifdef PROCFS_ZOMBIE
938 again:
939 #endif
940 for (p = LIST_FIRST(pd->pd_list);
941 p != NULL && uio->uio_resid >= UIO_MX; i++, pcnt++) {
942 switch (i) {
943 case 0: /* `.' */
944 case 1: /* `..' */
945 d.d_fileno = PROCFS_FILENO(0, Proot);
946 d.d_namlen = i + 1;
947 memcpy(d.d_name, "..", d.d_namlen);
948 d.d_name[i + 1] = '\0';
949 d.d_type = DT_DIR;
950 break;
951
952 case 2:
953 d.d_fileno = PROCFS_FILENO(0, Pcurproc);
954 d.d_namlen = 7;
955 memcpy(d.d_name, "curproc", 8);
956 d.d_type = DT_LNK;
957 break;
958
959 default:
960 while (pcnt < i) {
961 pcnt++;
962 p = LIST_NEXT(p, p_list);
963 if (!p)
964 goto done;
965 }
966 d.d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
967 d.d_namlen = sprintf(d.d_name, "%ld",
968 (long)p->p_pid);
969 d.d_type = DT_REG;
970 p = p->p_list.le_next;
971 break;
972 }
973
974 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
975 break;
976 nc++;
977 if (cookies)
978 *cookies++ = i + 1;
979 }
980 done:
981
982 #ifdef PROCFS_ZOMBIE
983 pd++;
984 if (p == NULL && pd->pd_list != NULL)
985 goto again;
986 #endif
987 proclist_unlock_read();
988 ncookies = nc;
989
990 break;
991
992 }
993
994 default:
995 error = ENOTDIR;
996 break;
997 }
998
999 if (ap->a_ncookies) {
1000 if (error) {
1001 if (cookies)
1002 FREE(*ap->a_cookies, M_TEMP);
1003 *ap->a_ncookies = 0;
1004 *ap->a_cookies = NULL;
1005 } else
1006 *ap->a_ncookies = ncookies;
1007 }
1008 uio->uio_offset = i;
1009 return (error);
1010 }
1011
1012 /*
1013 * readlink reads the link of `curproc'
1014 */
1015 int
1016 procfs_readlink(v)
1017 void *v;
1018 {
1019 struct vop_readlink_args *ap = v;
1020 char buf[16]; /* should be enough */
1021 int len;
1022
1023 if (VTOPFS(ap->a_vp)->pfs_fileno != PROCFS_FILENO(0, Pcurproc))
1024 return (EINVAL);
1025
1026 len = sprintf(buf, "%ld", (long)curproc->p_pid);
1027
1028 return (uiomove((caddr_t)buf, len, ap->a_uio));
1029 }
1030
1031 /*
1032 * convert decimal ascii to pid_t
1033 */
1034 static pid_t
1035 atopid(b, len)
1036 const char *b;
1037 u_int len;
1038 {
1039 pid_t p = 0;
1040
1041 while (len--) {
1042 char c = *b++;
1043 if (c < '0' || c > '9')
1044 return (NO_PID);
1045 p = 10 * p + (c - '0');
1046 if (p > PID_MAX)
1047 return (NO_PID);
1048 }
1049
1050 return (p);
1051 }
1052