procfs_vnops.c revision 1.73 1 /* $NetBSD: procfs_vnops.c,v 1.73 2000/08/09 21:08:11 tv Exp $ */
2
3 /*
4 * Copyright (c) 1993 Jan-Simon Pendry
5 * Copyright (c) 1993, 1995
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
40 */
41
42 /*
43 * procfs vnode interface
44 */
45
46 #if defined(_KERNEL) && !defined(_LKM)
47 #include "opt_compat_linux.h"
48 #endif
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/time.h>
53 #include <sys/kernel.h>
54 #include <sys/file.h>
55 #include <sys/proc.h>
56 #include <sys/vnode.h>
57 #include <sys/namei.h>
58 #include <sys/malloc.h>
59 #include <sys/dirent.h>
60 #include <sys/resourcevar.h>
61 #include <sys/ptrace.h>
62 #include <sys/stat.h>
63
64 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */
65
66 #include <machine/reg.h>
67
68 #include <miscfs/genfs/genfs.h>
69 #include <miscfs/procfs/procfs.h>
70
71 /*
72 * Vnode Operations.
73 *
74 */
75
76 /*
77 * This is a list of the valid names in the
78 * process-specific sub-directories. It is
79 * used in procfs_lookup and procfs_readdir
80 */
81 struct proc_target {
82 u_char pt_type;
83 u_char pt_namlen;
84 char *pt_name;
85 pfstype pt_pfstype;
86 int (*pt_valid) __P((struct proc *p));
87 } proc_targets[] = {
88 #define N(s) sizeof(s)-1, s
89 /* name type validp */
90 { DT_DIR, N("."), Pproc, NULL },
91 { DT_DIR, N(".."), Proot, NULL },
92 { DT_REG, N("file"), Pfile, procfs_validfile },
93 { DT_REG, N("mem"), Pmem, NULL },
94 { DT_REG, N("regs"), Pregs, procfs_validregs },
95 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs },
96 { DT_REG, N("ctl"), Pctl, NULL },
97 { DT_REG, N("status"), Pstatus, NULL },
98 { DT_REG, N("note"), Pnote, NULL },
99 { DT_REG, N("notepg"), Pnotepg, NULL },
100 { DT_REG, N("map"), Pmap, procfs_validmap },
101 { DT_REG, N("cmdline"), Pcmdline, NULL },
102 #ifdef COMPAT_LINUX
103 { DT_REG, N("exe"), Pfile, procfs_validfile },
104 #endif
105 #undef N
106 };
107 static int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
108
109 int procfs_lookup __P((void *));
110 #define procfs_create genfs_eopnotsupp_rele
111 #define procfs_mknod genfs_eopnotsupp_rele
112 int procfs_open __P((void *));
113 int procfs_close __P((void *));
114 int procfs_access __P((void *));
115 int procfs_getattr __P((void *));
116 int procfs_setattr __P((void *));
117 #define procfs_read procfs_rw
118 #define procfs_write procfs_rw
119 #define procfs_fcntl genfs_fcntl
120 #define procfs_ioctl genfs_enoioctl
121 #define procfs_poll genfs_poll
122 #define procfs_revoke genfs_revoke
123 #define procfs_mmap genfs_eopnotsupp
124 #define procfs_fsync genfs_nullop
125 #define procfs_seek genfs_nullop
126 #define procfs_remove genfs_eopnotsupp_rele
127 int procfs_link __P((void *));
128 #define procfs_rename genfs_eopnotsupp_rele
129 #define procfs_mkdir genfs_eopnotsupp_rele
130 #define procfs_rmdir genfs_eopnotsupp_rele
131 int procfs_symlink __P((void *));
132 int procfs_readdir __P((void *));
133 int procfs_readlink __P((void *));
134 #define procfs_abortop genfs_abortop
135 int procfs_inactive __P((void *));
136 int procfs_reclaim __P((void *));
137 #define procfs_lock genfs_lock
138 #define procfs_unlock genfs_unlock
139 int procfs_bmap __P((void *));
140 #define procfs_strategy genfs_badop
141 int procfs_print __P((void *));
142 int procfs_pathconf __P((void *));
143 #define procfs_islocked genfs_islocked
144 #define procfs_advlock genfs_einval
145 #define procfs_blkatoff genfs_eopnotsupp
146 #define procfs_valloc genfs_eopnotsupp
147 #define procfs_vfree genfs_nullop
148 #define procfs_truncate genfs_eopnotsupp
149 #define procfs_update genfs_nullop
150 #define procfs_bwrite genfs_eopnotsupp
151
152 static pid_t atopid __P((const char *, u_int));
153
154 /*
155 * procfs vnode operations.
156 */
157 int (**procfs_vnodeop_p) __P((void *));
158 struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
159 { &vop_default_desc, vn_default_error },
160 { &vop_lookup_desc, procfs_lookup }, /* lookup */
161 { &vop_create_desc, procfs_create }, /* create */
162 { &vop_mknod_desc, procfs_mknod }, /* mknod */
163 { &vop_open_desc, procfs_open }, /* open */
164 { &vop_close_desc, procfs_close }, /* close */
165 { &vop_access_desc, procfs_access }, /* access */
166 { &vop_getattr_desc, procfs_getattr }, /* getattr */
167 { &vop_setattr_desc, procfs_setattr }, /* setattr */
168 { &vop_read_desc, procfs_read }, /* read */
169 { &vop_write_desc, procfs_write }, /* write */
170 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */
171 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */
172 { &vop_poll_desc, procfs_poll }, /* poll */
173 { &vop_revoke_desc, procfs_revoke }, /* revoke */
174 { &vop_mmap_desc, procfs_mmap }, /* mmap */
175 { &vop_fsync_desc, procfs_fsync }, /* fsync */
176 { &vop_seek_desc, procfs_seek }, /* seek */
177 { &vop_remove_desc, procfs_remove }, /* remove */
178 { &vop_link_desc, procfs_link }, /* link */
179 { &vop_rename_desc, procfs_rename }, /* rename */
180 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */
181 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */
182 { &vop_symlink_desc, procfs_symlink }, /* symlink */
183 { &vop_readdir_desc, procfs_readdir }, /* readdir */
184 { &vop_readlink_desc, procfs_readlink }, /* readlink */
185 { &vop_abortop_desc, procfs_abortop }, /* abortop */
186 { &vop_inactive_desc, procfs_inactive }, /* inactive */
187 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */
188 { &vop_lock_desc, procfs_lock }, /* lock */
189 { &vop_unlock_desc, procfs_unlock }, /* unlock */
190 { &vop_bmap_desc, procfs_bmap }, /* bmap */
191 { &vop_strategy_desc, procfs_strategy }, /* strategy */
192 { &vop_print_desc, procfs_print }, /* print */
193 { &vop_islocked_desc, procfs_islocked }, /* islocked */
194 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */
195 { &vop_advlock_desc, procfs_advlock }, /* advlock */
196 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */
197 { &vop_valloc_desc, procfs_valloc }, /* valloc */
198 { &vop_vfree_desc, procfs_vfree }, /* vfree */
199 { &vop_truncate_desc, procfs_truncate }, /* truncate */
200 { &vop_update_desc, procfs_update }, /* update */
201 { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
202 };
203 struct vnodeopv_desc procfs_vnodeop_opv_desc =
204 { &procfs_vnodeop_p, procfs_vnodeop_entries };
205 /*
206 * set things up for doing i/o on
207 * the pfsnode (vp). (vp) is locked
208 * on entry, and should be left locked
209 * on exit.
210 *
211 * for procfs we don't need to do anything
212 * in particular for i/o. all that is done
213 * is to support exclusive open on process
214 * memory images.
215 */
216 int
217 procfs_open(v)
218 void *v;
219 {
220 struct vop_open_args /* {
221 struct vnode *a_vp;
222 int a_mode;
223 struct ucred *a_cred;
224 struct proc *a_p;
225 } */ *ap = v;
226 struct pfsnode *pfs = VTOPFS(ap->a_vp);
227 struct proc *p1, *p2;
228 int error;
229
230 p1 = ap->a_p; /* tracer */
231 p2 = PFIND(pfs->pfs_pid); /* traced */
232
233 if (p2 == NULL)
234 return (ENOENT); /* was ESRCH, jsp */
235
236 switch (pfs->pfs_type) {
237 case Pmem:
238 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
239 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)))
240 return (EBUSY);
241
242 if ((error = procfs_checkioperm(p1, p2)) != 0)
243 return (EPERM);
244
245 if (ap->a_mode & FWRITE)
246 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
247
248 return (0);
249
250 default:
251 break;
252 }
253
254 return (0);
255 }
256
257 /*
258 * close the pfsnode (vp) after doing i/o.
259 * (vp) is not locked on entry or exit.
260 *
261 * nothing to do for procfs other than undo
262 * any exclusive open flag (see _open above).
263 */
264 int
265 procfs_close(v)
266 void *v;
267 {
268 struct vop_close_args /* {
269 struct vnode *a_vp;
270 int a_fflag;
271 struct ucred *a_cred;
272 struct proc *a_p;
273 } */ *ap = v;
274 struct pfsnode *pfs = VTOPFS(ap->a_vp);
275
276 switch (pfs->pfs_type) {
277 case Pmem:
278 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
279 pfs->pfs_flags &= ~(FWRITE|O_EXCL);
280 break;
281
282 default:
283 break;
284 }
285
286 return (0);
287 }
288
289 /*
290 * do block mapping for pfsnode (vp).
291 * since we don't use the buffer cache
292 * for procfs this function should never
293 * be called. in any case, it's not clear
294 * what part of the kernel ever makes use
295 * of this function. for sanity, this is the
296 * usual no-op bmap, although returning
297 * (EIO) would be a reasonable alternative.
298 */
299 int
300 procfs_bmap(v)
301 void *v;
302 {
303 struct vop_bmap_args /* {
304 struct vnode *a_vp;
305 daddr_t a_bn;
306 struct vnode **a_vpp;
307 daddr_t *a_bnp;
308 int * a_runp;
309 } */ *ap = v;
310
311 if (ap->a_vpp != NULL)
312 *ap->a_vpp = ap->a_vp;
313 if (ap->a_bnp != NULL)
314 *ap->a_bnp = ap->a_bn;
315 if (ap->a_runp != NULL)
316 *ap->a_runp = 0;
317 return (0);
318 }
319
320 /*
321 * _inactive is called when the pfsnode
322 * is vrele'd and the reference count goes
323 * to zero. (vp) will be on the vnode free
324 * list, so to get it back vget() must be
325 * used.
326 *
327 * for procfs, check if the process is still
328 * alive and if it isn't then just throw away
329 * the vnode by calling vgone(). this may
330 * be overkill and a waste of time since the
331 * chances are that the process will still be
332 * there and PFIND is not free.
333 *
334 * (vp) is locked on entry, but must be unlocked on exit.
335 */
336 int
337 procfs_inactive(v)
338 void *v;
339 {
340 struct vop_inactive_args /* {
341 struct vnode *a_vp;
342 struct proc *a_p;
343 } */ *ap = v;
344 struct pfsnode *pfs = VTOPFS(ap->a_vp);
345
346 VOP_UNLOCK(ap->a_vp, 0);
347 if (PFIND(pfs->pfs_pid) == 0)
348 vgone(ap->a_vp);
349
350 return (0);
351 }
352
353 /*
354 * _reclaim is called when getnewvnode()
355 * wants to make use of an entry on the vnode
356 * free list. at this time the filesystem needs
357 * to free any private data and remove the node
358 * from any private lists.
359 */
360 int
361 procfs_reclaim(v)
362 void *v;
363 {
364 struct vop_reclaim_args /* {
365 struct vnode *a_vp;
366 } */ *ap = v;
367
368 return (procfs_freevp(ap->a_vp));
369 }
370
371 /*
372 * Return POSIX pathconf information applicable to special devices.
373 */
374 int
375 procfs_pathconf(v)
376 void *v;
377 {
378 struct vop_pathconf_args /* {
379 struct vnode *a_vp;
380 int a_name;
381 register_t *a_retval;
382 } */ *ap = v;
383
384 switch (ap->a_name) {
385 case _PC_LINK_MAX:
386 *ap->a_retval = LINK_MAX;
387 return (0);
388 case _PC_MAX_CANON:
389 *ap->a_retval = MAX_CANON;
390 return (0);
391 case _PC_MAX_INPUT:
392 *ap->a_retval = MAX_INPUT;
393 return (0);
394 case _PC_PIPE_BUF:
395 *ap->a_retval = PIPE_BUF;
396 return (0);
397 case _PC_CHOWN_RESTRICTED:
398 *ap->a_retval = 1;
399 return (0);
400 case _PC_VDISABLE:
401 *ap->a_retval = _POSIX_VDISABLE;
402 return (0);
403 case _PC_SYNC_IO:
404 *ap->a_retval = 1;
405 return (0);
406 default:
407 return (EINVAL);
408 }
409 /* NOTREACHED */
410 }
411
412 /*
413 * _print is used for debugging.
414 * just print a readable description
415 * of (vp).
416 */
417 int
418 procfs_print(v)
419 void *v;
420 {
421 struct vop_print_args /* {
422 struct vnode *a_vp;
423 } */ *ap = v;
424 struct pfsnode *pfs = VTOPFS(ap->a_vp);
425
426 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n",
427 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
428 return 0;
429 }
430
431 int
432 procfs_link(v)
433 void *v;
434 {
435 struct vop_link_args /* {
436 struct vnode *a_dvp;
437 struct vnode *a_vp;
438 struct componentname *a_cnp;
439 } */ *ap = v;
440
441 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
442 vput(ap->a_dvp);
443 return (EROFS);
444 }
445
446 int
447 procfs_symlink(v)
448 void *v;
449 {
450 struct vop_symlink_args /* {
451 struct vnode *a_dvp;
452 struct vnode **a_vpp;
453 struct componentname *a_cnp;
454 struct vattr *a_vap;
455 char *a_target;
456 } */ *ap = v;
457
458 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
459 vput(ap->a_dvp);
460 return (EROFS);
461 }
462
463 /*
464 * Invent attributes for pfsnode (vp) and store
465 * them in (vap).
466 * Directories lengths are returned as zero since
467 * any real length would require the genuine size
468 * to be computed, and nothing cares anyway.
469 *
470 * this is relatively minimal for procfs.
471 */
472 int
473 procfs_getattr(v)
474 void *v;
475 {
476 struct vop_getattr_args /* {
477 struct vnode *a_vp;
478 struct vattr *a_vap;
479 struct ucred *a_cred;
480 struct proc *a_p;
481 } */ *ap = v;
482 struct pfsnode *pfs = VTOPFS(ap->a_vp);
483 struct vattr *vap = ap->a_vap;
484 struct proc *procp;
485 struct timeval tv;
486 int error;
487
488 /* first check the process still exists */
489 switch (pfs->pfs_type) {
490 case Proot:
491 case Pcurproc:
492 case Pself:
493 procp = 0;
494 break;
495
496 default:
497 procp = PFIND(pfs->pfs_pid);
498 if (procp == 0)
499 return (ENOENT);
500 break;
501 }
502
503 error = 0;
504
505 /* start by zeroing out the attributes */
506 VATTR_NULL(vap);
507
508 /* next do all the common fields */
509 vap->va_type = ap->a_vp->v_type;
510 vap->va_mode = pfs->pfs_mode;
511 vap->va_fileid = pfs->pfs_fileno;
512 vap->va_flags = 0;
513 vap->va_blocksize = PAGE_SIZE;
514
515 /*
516 * Make all times be current TOD.
517 * It would be possible to get the process start
518 * time from the p_stat structure, but there's
519 * no "file creation" time stamp anyway, and the
520 * p_stat structure is not addressible if u. gets
521 * swapped out for that process.
522 */
523 microtime(&tv);
524 TIMEVAL_TO_TIMESPEC(&tv, &vap->va_ctime);
525 vap->va_atime = vap->va_mtime = vap->va_ctime;
526
527 switch (pfs->pfs_type) {
528 case Pmem:
529 case Pregs:
530 case Pfpregs:
531 /*
532 * If the process has exercised some setuid or setgid
533 * privilege, then rip away read/write permission so
534 * that only root can gain access.
535 */
536 if (procp->p_flag & P_SUGID)
537 vap->va_mode &= ~(S_IRUSR|S_IWUSR);
538 /* FALLTHROUGH */
539 case Pctl:
540 case Pstatus:
541 case Pnote:
542 case Pnotepg:
543 case Pmap:
544 case Pcmdline:
545 vap->va_nlink = 1;
546 vap->va_uid = procp->p_ucred->cr_uid;
547 vap->va_gid = procp->p_ucred->cr_gid;
548 break;
549
550 default:
551 break;
552 }
553
554 /*
555 * now do the object specific fields
556 *
557 * The size could be set from struct reg, but it's hardly
558 * worth the trouble, and it puts some (potentially) machine
559 * dependent data into this machine-independent code. If it
560 * becomes important then this function should break out into
561 * a per-file stat function in the corresponding .c file.
562 */
563
564 switch (pfs->pfs_type) {
565 case Proot:
566 /*
567 * Set nlink to 1 to tell fts(3) we don't actually know.
568 */
569 vap->va_nlink = 1;
570 vap->va_uid = 0;
571 vap->va_gid = 0;
572 vap->va_bytes = vap->va_size = DEV_BSIZE;
573 break;
574
575 case Pcurproc: {
576 char buf[16]; /* should be enough */
577 vap->va_nlink = 1;
578 vap->va_uid = 0;
579 vap->va_gid = 0;
580 vap->va_bytes = vap->va_size =
581 sprintf(buf, "%ld", (long)curproc->p_pid);
582 break;
583 }
584
585 case Pself:
586 vap->va_nlink = 1;
587 vap->va_uid = 0;
588 vap->va_gid = 0;
589 vap->va_bytes = vap->va_size = sizeof("curproc");
590 break;
591
592 case Pproc:
593 vap->va_nlink = 2;
594 vap->va_uid = procp->p_ucred->cr_uid;
595 vap->va_gid = procp->p_ucred->cr_gid;
596 vap->va_bytes = vap->va_size = DEV_BSIZE;
597 break;
598
599 case Pfile:
600 error = EOPNOTSUPP;
601 break;
602
603 case Pmem:
604 vap->va_bytes = vap->va_size =
605 ctob(procp->p_vmspace->vm_tsize +
606 procp->p_vmspace->vm_dsize +
607 procp->p_vmspace->vm_ssize);
608 break;
609
610 #if defined(PT_GETREGS) || defined(PT_SETREGS)
611 case Pregs:
612 vap->va_bytes = vap->va_size = sizeof(struct reg);
613 break;
614 #endif
615
616 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS)
617 case Pfpregs:
618 vap->va_bytes = vap->va_size = sizeof(struct fpreg);
619 break;
620 #endif
621
622 case Pctl:
623 case Pstatus:
624 case Pnote:
625 case Pnotepg:
626 case Pmap:
627 case Pcmdline:
628 vap->va_bytes = vap->va_size = 0;
629 break;
630
631 default:
632 panic("procfs_getattr");
633 }
634
635 return (error);
636 }
637
638 /*ARGSUSED*/
639 int
640 procfs_setattr(v)
641 void *v;
642 {
643 /*
644 * just fake out attribute setting
645 * it's not good to generate an error
646 * return, otherwise things like creat()
647 * will fail when they try to set the
648 * file length to 0. worse, this means
649 * that echo $note > /proc/$pid/note will fail.
650 */
651
652 return (0);
653 }
654
655 /*
656 * implement access checking.
657 *
658 * actually, the check for super-user is slightly
659 * broken since it will allow read access to write-only
660 * objects. this doesn't cause any particular trouble
661 * but does mean that the i/o entry points need to check
662 * that the operation really does make sense.
663 */
664 int
665 procfs_access(v)
666 void *v;
667 {
668 struct vop_access_args /* {
669 struct vnode *a_vp;
670 int a_mode;
671 struct ucred *a_cred;
672 struct proc *a_p;
673 } */ *ap = v;
674 struct vattr va;
675 int error;
676
677 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) != 0)
678 return (error);
679
680 return (vaccess(va.va_type, va.va_mode,
681 va.va_uid, va.va_gid, ap->a_mode, ap->a_cred));
682 }
683
684 /*
685 * lookup. this is incredibly complicated in the
686 * general case, however for most pseudo-filesystems
687 * very little needs to be done.
688 *
689 * Locking isn't hard here, just poorly documented.
690 *
691 * If we're looking up ".", just vref the parent & return it.
692 *
693 * If we're looking up "..", unlock the parent, and lock "..". If everything
694 * went ok, and we're on the last component and the caller requested the
695 * parent locked, try to re-lock the parent. We do this to prevent lock
696 * races.
697 *
698 * For anything else, get the needed node. Then unlock the parent if not
699 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the
700 * parent in the .. case).
701 *
702 * We try to exit with the parent locked in error cases.
703 */
704 int
705 procfs_lookup(v)
706 void *v;
707 {
708 struct vop_lookup_args /* {
709 struct vnode * a_dvp;
710 struct vnode ** a_vpp;
711 struct componentname * a_cnp;
712 } */ *ap = v;
713 struct componentname *cnp = ap->a_cnp;
714 struct vnode **vpp = ap->a_vpp;
715 struct vnode *dvp = ap->a_dvp;
716 const char *pname = cnp->cn_nameptr;
717 struct proc_target *pt;
718 struct vnode *fvp;
719 pid_t pid;
720 struct pfsnode *pfs;
721 struct proc *p;
722 int i, error, wantpunlock, iscurproc = 0, isself = 0;
723
724 *vpp = NULL;
725 cnp->cn_flags &= ~PDIRUNLOCK;
726
727 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
728 return (EROFS);
729
730 if (cnp->cn_namelen == 1 && *pname == '.') {
731 *vpp = dvp;
732 VREF(dvp);
733 return (0);
734 }
735
736 wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN));
737 pfs = VTOPFS(dvp);
738 switch (pfs->pfs_type) {
739 case Proot:
740 /*
741 * Shouldn't get here with .. in the root node.
742 */
743 if (cnp->cn_flags & ISDOTDOT)
744 return (EIO);
745
746 iscurproc = CNEQ(cnp, "curproc", 7);
747 isself = CNEQ(cnp, "self", 4);
748
749 if (iscurproc || isself) {
750 error = procfs_allocvp(dvp->v_mount, vpp, 0,
751 iscurproc ? Pcurproc : Pself);
752 if ((error == 0) && (wantpunlock)) {
753 VOP_UNLOCK(dvp, 0);
754 cnp->cn_flags |= PDIRUNLOCK;
755 }
756 return (error);
757 }
758
759 pid = atopid(pname, cnp->cn_namelen);
760 if (pid == NO_PID)
761 break;
762
763 p = PFIND(pid);
764 if (p == 0)
765 break;
766
767 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc);
768 if ((error == 0) && (wantpunlock)) {
769 VOP_UNLOCK(dvp, 0);
770 cnp->cn_flags |= PDIRUNLOCK;
771 }
772 return (error);
773
774 case Pproc:
775 /*
776 * do the .. dance. We unlock the directory, and then
777 * get the root dir. That will automatically return ..
778 * locked. Then if the caller wanted dvp locked, we
779 * re-lock.
780 */
781 if (cnp->cn_flags & ISDOTDOT) {
782 VOP_UNLOCK(dvp, 0);
783 cnp->cn_flags |= PDIRUNLOCK;
784 error = procfs_root(dvp->v_mount, vpp);
785 if ((error == 0) && (wantpunlock == 0) &&
786 ((error = vn_lock(dvp, LK_EXCLUSIVE)) == 0))
787 cnp->cn_flags &= ~PDIRUNLOCK;
788 return (error);
789 }
790
791 p = PFIND(pfs->pfs_pid);
792 if (p == 0)
793 break;
794
795 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
796 if (cnp->cn_namelen == pt->pt_namlen &&
797 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
798 (pt->pt_valid == NULL || (*pt->pt_valid)(p)))
799 goto found;
800 }
801 break;
802
803 found:
804 if (pt->pt_pfstype == Pfile) {
805 fvp = procfs_findtextvp(p);
806 /* We already checked that it exists. */
807 VREF(fvp);
808 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
809 if (wantpunlock) {
810 VOP_UNLOCK(dvp, 0);
811 cnp->cn_flags |= PDIRUNLOCK;
812 }
813 *vpp = fvp;
814 return (0);
815 }
816
817 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
818 pt->pt_pfstype);
819 if ((error == 0) && (wantpunlock)) {
820 VOP_UNLOCK(dvp, 0);
821 cnp->cn_flags |= PDIRUNLOCK;
822 }
823 return (error);
824
825 default:
826 return (ENOTDIR);
827 }
828
829 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
830 }
831
832 int
833 procfs_validfile(p)
834 struct proc *p;
835 {
836
837 return (procfs_findtextvp(p) != NULLVP);
838 }
839
840 /*
841 * readdir returns directory entries from pfsnode (vp).
842 *
843 * the strategy here with procfs is to generate a single
844 * directory entry at a time (struct dirent) and then
845 * copy that out to userland using uiomove. a more efficent
846 * though more complex implementation, would try to minimize
847 * the number of calls to uiomove(). for procfs, this is
848 * hardly worth the added code complexity.
849 *
850 * this should just be done through read()
851 */
852 int
853 procfs_readdir(v)
854 void *v;
855 {
856 struct vop_readdir_args /* {
857 struct vnode *a_vp;
858 struct uio *a_uio;
859 struct ucred *a_cred;
860 int *a_eofflag;
861 off_t **a_cookies;
862 int *a_ncookies;
863 } */ *ap = v;
864 struct uio *uio = ap->a_uio;
865 struct dirent d;
866 struct pfsnode *pfs;
867 off_t i;
868 int error;
869 off_t *cookies = NULL;
870 int ncookies;
871
872 pfs = VTOPFS(ap->a_vp);
873
874 if (uio->uio_resid < UIO_MX)
875 return (EINVAL);
876 if (uio->uio_offset < 0)
877 return (EINVAL);
878
879 error = 0;
880 i = uio->uio_offset;
881 memset((caddr_t)&d, 0, UIO_MX);
882 d.d_reclen = UIO_MX;
883 ncookies = uio->uio_resid / UIO_MX;
884
885 switch (pfs->pfs_type) {
886 /*
887 * this is for the process-specific sub-directories.
888 * all that is needed to is copy out all the entries
889 * from the procent[] table (top of this file).
890 */
891 case Pproc: {
892 struct proc *p;
893 struct proc_target *pt;
894
895 if (i >= nproc_targets)
896 return 0;
897
898 p = PFIND(pfs->pfs_pid);
899 if (p == NULL)
900 break;
901
902 if (ap->a_ncookies) {
903 ncookies = min(ncookies, (nproc_targets - i));
904 cookies = malloc(ncookies * sizeof (off_t),
905 M_TEMP, M_WAITOK);
906 *ap->a_cookies = cookies;
907 }
908
909 for (pt = &proc_targets[i];
910 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) {
911 if (pt->pt_valid && (*pt->pt_valid)(p) == 0)
912 continue;
913
914 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype);
915 d.d_namlen = pt->pt_namlen;
916 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
917 d.d_type = pt->pt_type;
918
919 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
920 break;
921 if (cookies)
922 *cookies++ = i + 1;
923 }
924
925 break;
926 }
927
928 /*
929 * this is for the root of the procfs filesystem
930 * what is needed are special entries for "curproc"
931 * and "self" followed by an entry for each process
932 * on allproc
933 #ifdef PROCFS_ZOMBIE
934 * and deadproc and zombproc.
935 #endif
936 */
937
938 case Proot: {
939 int pcnt = i, nc = 0;
940 const struct proclist_desc *pd;
941 volatile struct proc *p;
942
943 if (pcnt > 3)
944 pcnt = 3;
945 if (ap->a_ncookies) {
946 /*
947 * XXX Potentially allocating too much space here,
948 * but I'm lazy. This loop needs some work.
949 */
950 cookies = malloc(ncookies * sizeof (off_t),
951 M_TEMP, M_WAITOK);
952 *ap->a_cookies = cookies;
953 }
954 /*
955 * XXX: THIS LOOP ASSUMES THAT allproc IS THE FIRST
956 * PROCLIST IN THE proclists!
957 */
958 proclist_lock_read();
959 pd = proclists;
960 #ifdef PROCFS_ZOMBIE
961 again:
962 #endif
963 for (p = LIST_FIRST(pd->pd_list);
964 p != NULL && uio->uio_resid >= UIO_MX; i++, pcnt++) {
965 switch (i) {
966 case 0: /* `.' */
967 case 1: /* `..' */
968 d.d_fileno = PROCFS_FILENO(0, Proot);
969 d.d_namlen = i + 1;
970 memcpy(d.d_name, "..", d.d_namlen);
971 d.d_name[i + 1] = '\0';
972 d.d_type = DT_DIR;
973 break;
974
975 case 2:
976 d.d_fileno = PROCFS_FILENO(0, Pcurproc);
977 d.d_namlen = sizeof("curproc") - 1;
978 memcpy(d.d_name, "curproc", sizeof("curproc"));
979 d.d_type = DT_LNK;
980 break;
981
982 case 3:
983 d.d_fileno = PROCFS_FILENO(0, Pself);
984 d.d_namlen = sizeof("self") - 1;
985 memcpy(d.d_name, "self", sizeof("self"));
986 d.d_type = DT_LNK;
987 break;
988
989 default:
990 while (pcnt < i) {
991 pcnt++;
992 p = LIST_NEXT(p, p_list);
993 if (!p)
994 goto done;
995 }
996 d.d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
997 d.d_namlen = sprintf(d.d_name, "%ld",
998 (long)p->p_pid);
999 d.d_type = DT_REG;
1000 p = p->p_list.le_next;
1001 break;
1002 }
1003
1004 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
1005 break;
1006 nc++;
1007 if (cookies)
1008 *cookies++ = i + 1;
1009 }
1010 done:
1011
1012 #ifdef PROCFS_ZOMBIE
1013 pd++;
1014 if (p == NULL && pd->pd_list != NULL)
1015 goto again;
1016 #endif
1017 proclist_unlock_read();
1018 ncookies = nc;
1019
1020 break;
1021
1022 }
1023
1024 default:
1025 error = ENOTDIR;
1026 break;
1027 }
1028
1029 if (ap->a_ncookies) {
1030 if (error) {
1031 if (cookies)
1032 free(*ap->a_cookies, M_TEMP);
1033 *ap->a_ncookies = 0;
1034 *ap->a_cookies = NULL;
1035 } else
1036 *ap->a_ncookies = ncookies;
1037 }
1038 uio->uio_offset = i;
1039 return (error);
1040 }
1041
1042 /*
1043 * readlink reads the link of `curproc'
1044 */
1045 int
1046 procfs_readlink(v)
1047 void *v;
1048 {
1049 struct vop_readlink_args *ap = v;
1050 char buf[16]; /* should be enough */
1051 int len;
1052
1053 if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pcurproc))
1054 len = sprintf(buf, "%ld", (long)curproc->p_pid);
1055 else if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pself))
1056 len = sprintf(buf, "%s", "curproc");
1057 else
1058 return (EINVAL);
1059
1060 return (uiomove((caddr_t)buf, len, ap->a_uio));
1061 }
1062
1063 /*
1064 * convert decimal ascii to pid_t
1065 */
1066 static pid_t
1067 atopid(b, len)
1068 const char *b;
1069 u_int len;
1070 {
1071 pid_t p = 0;
1072
1073 while (len--) {
1074 char c = *b++;
1075 if (c < '0' || c > '9')
1076 return (NO_PID);
1077 p = 10 * p + (c - '0');
1078 if (p > PID_MAX)
1079 return (NO_PID);
1080 }
1081
1082 return (p);
1083 }
1084