procfs_vnops.c revision 1.83 1 /* $NetBSD: procfs_vnops.c,v 1.83 2001/08/31 04:56:26 chs Exp $ */
2
3 /*
4 * Copyright (c) 1993 Jan-Simon Pendry
5 * Copyright (c) 1993, 1995
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
40 */
41
42 /*
43 * procfs vnode interface
44 */
45
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/time.h>
49 #include <sys/kernel.h>
50 #include <sys/file.h>
51 #include <sys/proc.h>
52 #include <sys/vnode.h>
53 #include <sys/namei.h>
54 #include <sys/malloc.h>
55 #include <sys/mount.h>
56 #include <sys/dirent.h>
57 #include <sys/resourcevar.h>
58 #include <sys/ptrace.h>
59 #include <sys/stat.h>
60
61 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */
62
63 #include <machine/reg.h>
64
65 #include <miscfs/genfs/genfs.h>
66 #include <miscfs/procfs/procfs.h>
67
68 /*
69 * Vnode Operations.
70 *
71 */
72
73 static int procfs_validfile_linux __P((struct proc *, struct mount *));
74
75 /*
76 * This is a list of the valid names in the
77 * process-specific sub-directories. It is
78 * used in procfs_lookup and procfs_readdir
79 */
80 const struct proc_target {
81 u_char pt_type;
82 u_char pt_namlen;
83 char *pt_name;
84 pfstype pt_pfstype;
85 int (*pt_valid) __P((struct proc *, struct mount *));
86 } proc_targets[] = {
87 #define N(s) sizeof(s)-1, s
88 /* name type validp */
89 { DT_DIR, N("."), Pproc, NULL },
90 { DT_DIR, N(".."), Proot, NULL },
91 { DT_REG, N("file"), Pfile, procfs_validfile },
92 { DT_REG, N("mem"), Pmem, NULL },
93 { DT_REG, N("regs"), Pregs, procfs_validregs },
94 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs },
95 { DT_REG, N("ctl"), Pctl, NULL },
96 { DT_REG, N("status"), Pstatus, NULL },
97 { DT_REG, N("note"), Pnote, NULL },
98 { DT_REG, N("notepg"), Pnotepg, NULL },
99 { DT_REG, N("map"), Pmap, procfs_validmap },
100 { DT_REG, N("maps"), Pmaps, procfs_validmap },
101 { DT_REG, N("cmdline"), Pcmdline, NULL },
102 { DT_REG, N("exe"), Pfile, procfs_validfile_linux },
103 #undef N
104 };
105 static int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
106
107 /*
108 * List of files in the root directory. Note: the validate function will
109 * be called with p == NULL for these ones.
110 */
111 struct proc_target proc_root_targets[] = {
112 #define N(s) sizeof(s)-1, s
113 /* name type validp */
114 { DT_REG, N("meminfo"), Pmeminfo, procfs_validfile_linux },
115 { DT_REG, N("cpuinfo"), Pcpuinfo, procfs_validfile_linux },
116 #undef N
117 };
118 static int nproc_root_targets =
119 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]);
120
121 int procfs_lookup __P((void *));
122 #define procfs_create genfs_eopnotsupp_rele
123 #define procfs_mknod genfs_eopnotsupp_rele
124 int procfs_open __P((void *));
125 int procfs_close __P((void *));
126 int procfs_access __P((void *));
127 int procfs_getattr __P((void *));
128 int procfs_setattr __P((void *));
129 #define procfs_read procfs_rw
130 #define procfs_write procfs_rw
131 #define procfs_fcntl genfs_fcntl
132 #define procfs_ioctl genfs_enoioctl
133 #define procfs_poll genfs_poll
134 #define procfs_revoke genfs_revoke
135 #define procfs_fsync genfs_nullop
136 #define procfs_seek genfs_nullop
137 #define procfs_remove genfs_eopnotsupp_rele
138 int procfs_link __P((void *));
139 #define procfs_rename genfs_eopnotsupp_rele
140 #define procfs_mkdir genfs_eopnotsupp_rele
141 #define procfs_rmdir genfs_eopnotsupp_rele
142 int procfs_symlink __P((void *));
143 int procfs_readdir __P((void *));
144 int procfs_readlink __P((void *));
145 #define procfs_abortop genfs_abortop
146 int procfs_inactive __P((void *));
147 int procfs_reclaim __P((void *));
148 #define procfs_lock genfs_lock
149 #define procfs_unlock genfs_unlock
150 #define procfs_bmap genfs_badop
151 #define procfs_strategy genfs_badop
152 int procfs_print __P((void *));
153 int procfs_pathconf __P((void *));
154 #define procfs_islocked genfs_islocked
155 #define procfs_advlock genfs_einval
156 #define procfs_blkatoff genfs_eopnotsupp
157 #define procfs_valloc genfs_eopnotsupp
158 #define procfs_vfree genfs_nullop
159 #define procfs_truncate genfs_eopnotsupp
160 #define procfs_update genfs_nullop
161 #define procfs_bwrite genfs_eopnotsupp
162
163 static pid_t atopid __P((const char *, u_int));
164
165 /*
166 * procfs vnode operations.
167 */
168 int (**procfs_vnodeop_p) __P((void *));
169 const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
170 { &vop_default_desc, vn_default_error },
171 { &vop_lookup_desc, procfs_lookup }, /* lookup */
172 { &vop_create_desc, procfs_create }, /* create */
173 { &vop_mknod_desc, procfs_mknod }, /* mknod */
174 { &vop_open_desc, procfs_open }, /* open */
175 { &vop_close_desc, procfs_close }, /* close */
176 { &vop_access_desc, procfs_access }, /* access */
177 { &vop_getattr_desc, procfs_getattr }, /* getattr */
178 { &vop_setattr_desc, procfs_setattr }, /* setattr */
179 { &vop_read_desc, procfs_read }, /* read */
180 { &vop_write_desc, procfs_write }, /* write */
181 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */
182 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */
183 { &vop_poll_desc, procfs_poll }, /* poll */
184 { &vop_revoke_desc, procfs_revoke }, /* revoke */
185 { &vop_fsync_desc, procfs_fsync }, /* fsync */
186 { &vop_seek_desc, procfs_seek }, /* seek */
187 { &vop_remove_desc, procfs_remove }, /* remove */
188 { &vop_link_desc, procfs_link }, /* link */
189 { &vop_rename_desc, procfs_rename }, /* rename */
190 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */
191 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */
192 { &vop_symlink_desc, procfs_symlink }, /* symlink */
193 { &vop_readdir_desc, procfs_readdir }, /* readdir */
194 { &vop_readlink_desc, procfs_readlink }, /* readlink */
195 { &vop_abortop_desc, procfs_abortop }, /* abortop */
196 { &vop_inactive_desc, procfs_inactive }, /* inactive */
197 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */
198 { &vop_lock_desc, procfs_lock }, /* lock */
199 { &vop_unlock_desc, procfs_unlock }, /* unlock */
200 { &vop_bmap_desc, procfs_bmap }, /* bmap */
201 { &vop_strategy_desc, procfs_strategy }, /* strategy */
202 { &vop_print_desc, procfs_print }, /* print */
203 { &vop_islocked_desc, procfs_islocked }, /* islocked */
204 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */
205 { &vop_advlock_desc, procfs_advlock }, /* advlock */
206 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */
207 { &vop_valloc_desc, procfs_valloc }, /* valloc */
208 { &vop_vfree_desc, procfs_vfree }, /* vfree */
209 { &vop_truncate_desc, procfs_truncate }, /* truncate */
210 { &vop_update_desc, procfs_update }, /* update */
211 { NULL, NULL }
212 };
213 const struct vnodeopv_desc procfs_vnodeop_opv_desc =
214 { &procfs_vnodeop_p, procfs_vnodeop_entries };
215 /*
216 * set things up for doing i/o on
217 * the pfsnode (vp). (vp) is locked
218 * on entry, and should be left locked
219 * on exit.
220 *
221 * for procfs we don't need to do anything
222 * in particular for i/o. all that is done
223 * is to support exclusive open on process
224 * memory images.
225 */
226 int
227 procfs_open(v)
228 void *v;
229 {
230 struct vop_open_args /* {
231 struct vnode *a_vp;
232 int a_mode;
233 struct ucred *a_cred;
234 struct proc *a_p;
235 } */ *ap = v;
236 struct pfsnode *pfs = VTOPFS(ap->a_vp);
237 struct proc *p1, *p2;
238 int error;
239
240 p1 = ap->a_p; /* tracer */
241 p2 = PFIND(pfs->pfs_pid); /* traced */
242
243 if (p2 == NULL)
244 return (ENOENT); /* was ESRCH, jsp */
245
246 switch (pfs->pfs_type) {
247 case Pmem:
248 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
249 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)))
250 return (EBUSY);
251
252 if ((error = procfs_checkioperm(p1, p2)) != 0)
253 return (EPERM);
254
255 if (ap->a_mode & FWRITE)
256 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
257
258 return (0);
259
260 default:
261 break;
262 }
263
264 return (0);
265 }
266
267 /*
268 * close the pfsnode (vp) after doing i/o.
269 * (vp) is not locked on entry or exit.
270 *
271 * nothing to do for procfs other than undo
272 * any exclusive open flag (see _open above).
273 */
274 int
275 procfs_close(v)
276 void *v;
277 {
278 struct vop_close_args /* {
279 struct vnode *a_vp;
280 int a_fflag;
281 struct ucred *a_cred;
282 struct proc *a_p;
283 } */ *ap = v;
284 struct pfsnode *pfs = VTOPFS(ap->a_vp);
285
286 switch (pfs->pfs_type) {
287 case Pmem:
288 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
289 pfs->pfs_flags &= ~(FWRITE|O_EXCL);
290 break;
291
292 default:
293 break;
294 }
295
296 return (0);
297 }
298
299 /*
300 * _inactive is called when the pfsnode
301 * is vrele'd and the reference count goes
302 * to zero. (vp) will be on the vnode free
303 * list, so to get it back vget() must be
304 * used.
305 *
306 * for procfs, check if the process is still
307 * alive and if it isn't then just throw away
308 * the vnode by calling vgone(). this may
309 * be overkill and a waste of time since the
310 * chances are that the process will still be
311 * there and PFIND is not free.
312 *
313 * (vp) is locked on entry, but must be unlocked on exit.
314 */
315 int
316 procfs_inactive(v)
317 void *v;
318 {
319 struct vop_inactive_args /* {
320 struct vnode *a_vp;
321 struct proc *a_p;
322 } */ *ap = v;
323 struct pfsnode *pfs = VTOPFS(ap->a_vp);
324
325 VOP_UNLOCK(ap->a_vp, 0);
326 if (PFIND(pfs->pfs_pid) == 0)
327 vgone(ap->a_vp);
328
329 return (0);
330 }
331
332 /*
333 * _reclaim is called when getnewvnode()
334 * wants to make use of an entry on the vnode
335 * free list. at this time the filesystem needs
336 * to free any private data and remove the node
337 * from any private lists.
338 */
339 int
340 procfs_reclaim(v)
341 void *v;
342 {
343 struct vop_reclaim_args /* {
344 struct vnode *a_vp;
345 } */ *ap = v;
346
347 return (procfs_freevp(ap->a_vp));
348 }
349
350 /*
351 * Return POSIX pathconf information applicable to special devices.
352 */
353 int
354 procfs_pathconf(v)
355 void *v;
356 {
357 struct vop_pathconf_args /* {
358 struct vnode *a_vp;
359 int a_name;
360 register_t *a_retval;
361 } */ *ap = v;
362
363 switch (ap->a_name) {
364 case _PC_LINK_MAX:
365 *ap->a_retval = LINK_MAX;
366 return (0);
367 case _PC_MAX_CANON:
368 *ap->a_retval = MAX_CANON;
369 return (0);
370 case _PC_MAX_INPUT:
371 *ap->a_retval = MAX_INPUT;
372 return (0);
373 case _PC_PIPE_BUF:
374 *ap->a_retval = PIPE_BUF;
375 return (0);
376 case _PC_CHOWN_RESTRICTED:
377 *ap->a_retval = 1;
378 return (0);
379 case _PC_VDISABLE:
380 *ap->a_retval = _POSIX_VDISABLE;
381 return (0);
382 case _PC_SYNC_IO:
383 *ap->a_retval = 1;
384 return (0);
385 default:
386 return (EINVAL);
387 }
388 /* NOTREACHED */
389 }
390
391 /*
392 * _print is used for debugging.
393 * just print a readable description
394 * of (vp).
395 */
396 int
397 procfs_print(v)
398 void *v;
399 {
400 struct vop_print_args /* {
401 struct vnode *a_vp;
402 } */ *ap = v;
403 struct pfsnode *pfs = VTOPFS(ap->a_vp);
404
405 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n",
406 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
407 return 0;
408 }
409
410 int
411 procfs_link(v)
412 void *v;
413 {
414 struct vop_link_args /* {
415 struct vnode *a_dvp;
416 struct vnode *a_vp;
417 struct componentname *a_cnp;
418 } */ *ap = v;
419
420 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
421 vput(ap->a_dvp);
422 return (EROFS);
423 }
424
425 int
426 procfs_symlink(v)
427 void *v;
428 {
429 struct vop_symlink_args /* {
430 struct vnode *a_dvp;
431 struct vnode **a_vpp;
432 struct componentname *a_cnp;
433 struct vattr *a_vap;
434 char *a_target;
435 } */ *ap = v;
436
437 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
438 vput(ap->a_dvp);
439 return (EROFS);
440 }
441
442 /*
443 * Invent attributes for pfsnode (vp) and store
444 * them in (vap).
445 * Directories lengths are returned as zero since
446 * any real length would require the genuine size
447 * to be computed, and nothing cares anyway.
448 *
449 * this is relatively minimal for procfs.
450 */
451 int
452 procfs_getattr(v)
453 void *v;
454 {
455 struct vop_getattr_args /* {
456 struct vnode *a_vp;
457 struct vattr *a_vap;
458 struct ucred *a_cred;
459 struct proc *a_p;
460 } */ *ap = v;
461 struct pfsnode *pfs = VTOPFS(ap->a_vp);
462 struct vattr *vap = ap->a_vap;
463 struct proc *procp;
464 struct timeval tv;
465 int error;
466
467 /* first check the process still exists */
468 switch (pfs->pfs_type) {
469 case Proot:
470 case Pcurproc:
471 case Pself:
472 procp = 0;
473 break;
474
475 default:
476 procp = PFIND(pfs->pfs_pid);
477 if (procp == 0)
478 return (ENOENT);
479 break;
480 }
481
482 error = 0;
483
484 /* start by zeroing out the attributes */
485 VATTR_NULL(vap);
486
487 /* next do all the common fields */
488 vap->va_type = ap->a_vp->v_type;
489 vap->va_mode = pfs->pfs_mode;
490 vap->va_fileid = pfs->pfs_fileno;
491 vap->va_flags = 0;
492 vap->va_blocksize = PAGE_SIZE;
493
494 /*
495 * Make all times be current TOD.
496 * It would be possible to get the process start
497 * time from the p_stat structure, but there's
498 * no "file creation" time stamp anyway, and the
499 * p_stat structure is not addressible if u. gets
500 * swapped out for that process.
501 */
502 microtime(&tv);
503 TIMEVAL_TO_TIMESPEC(&tv, &vap->va_ctime);
504 vap->va_atime = vap->va_mtime = vap->va_ctime;
505
506 switch (pfs->pfs_type) {
507 case Pmem:
508 case Pregs:
509 case Pfpregs:
510 /*
511 * If the process has exercised some setuid or setgid
512 * privilege, then rip away read/write permission so
513 * that only root can gain access.
514 */
515 if (procp->p_flag & P_SUGID)
516 vap->va_mode &= ~(S_IRUSR|S_IWUSR);
517 /* FALLTHROUGH */
518 case Pctl:
519 case Pstatus:
520 case Pnote:
521 case Pnotepg:
522 case Pmap:
523 case Pmaps:
524 case Pcmdline:
525 vap->va_nlink = 1;
526 vap->va_uid = procp->p_ucred->cr_uid;
527 vap->va_gid = procp->p_ucred->cr_gid;
528 break;
529 case Pmeminfo:
530 case Pcpuinfo:
531 vap->va_nlink = 1;
532 vap->va_uid = vap->va_gid = 0;
533 break;
534
535 default:
536 break;
537 }
538
539 /*
540 * now do the object specific fields
541 *
542 * The size could be set from struct reg, but it's hardly
543 * worth the trouble, and it puts some (potentially) machine
544 * dependent data into this machine-independent code. If it
545 * becomes important then this function should break out into
546 * a per-file stat function in the corresponding .c file.
547 */
548
549 switch (pfs->pfs_type) {
550 case Proot:
551 /*
552 * Set nlink to 1 to tell fts(3) we don't actually know.
553 */
554 vap->va_nlink = 1;
555 vap->va_uid = 0;
556 vap->va_gid = 0;
557 vap->va_bytes = vap->va_size = DEV_BSIZE;
558 break;
559
560 case Pcurproc: {
561 char buf[16]; /* should be enough */
562 vap->va_nlink = 1;
563 vap->va_uid = 0;
564 vap->va_gid = 0;
565 vap->va_bytes = vap->va_size =
566 sprintf(buf, "%ld", (long)curproc->p_pid);
567 break;
568 }
569
570 case Pself:
571 vap->va_nlink = 1;
572 vap->va_uid = 0;
573 vap->va_gid = 0;
574 vap->va_bytes = vap->va_size = sizeof("curproc");
575 break;
576
577 case Pproc:
578 vap->va_nlink = 2;
579 vap->va_uid = procp->p_ucred->cr_uid;
580 vap->va_gid = procp->p_ucred->cr_gid;
581 vap->va_bytes = vap->va_size = DEV_BSIZE;
582 break;
583
584 case Pfile:
585 error = EOPNOTSUPP;
586 break;
587
588 case Pmem:
589 vap->va_bytes = vap->va_size =
590 ctob(procp->p_vmspace->vm_tsize +
591 procp->p_vmspace->vm_dsize +
592 procp->p_vmspace->vm_ssize);
593 break;
594
595 #if defined(PT_GETREGS) || defined(PT_SETREGS)
596 case Pregs:
597 vap->va_bytes = vap->va_size = sizeof(struct reg);
598 break;
599 #endif
600
601 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS)
602 case Pfpregs:
603 vap->va_bytes = vap->va_size = sizeof(struct fpreg);
604 break;
605 #endif
606
607 case Pctl:
608 case Pstatus:
609 case Pnote:
610 case Pnotepg:
611 case Pcmdline:
612 case Pmeminfo:
613 case Pcpuinfo:
614 vap->va_bytes = vap->va_size = 0;
615 break;
616 case Pmap:
617 case Pmaps:
618 /*
619 * Advise a larger blocksize for the map files, so that
620 * they may be read in one pass.
621 */
622 vap->va_blocksize = 4 * PAGE_SIZE;
623 vap->va_bytes = vap->va_size = 0;
624 break;
625
626 default:
627 panic("procfs_getattr");
628 }
629
630 return (error);
631 }
632
633 /*ARGSUSED*/
634 int
635 procfs_setattr(v)
636 void *v;
637 {
638 /*
639 * just fake out attribute setting
640 * it's not good to generate an error
641 * return, otherwise things like creat()
642 * will fail when they try to set the
643 * file length to 0. worse, this means
644 * that echo $note > /proc/$pid/note will fail.
645 */
646
647 return (0);
648 }
649
650 /*
651 * implement access checking.
652 *
653 * actually, the check for super-user is slightly
654 * broken since it will allow read access to write-only
655 * objects. this doesn't cause any particular trouble
656 * but does mean that the i/o entry points need to check
657 * that the operation really does make sense.
658 */
659 int
660 procfs_access(v)
661 void *v;
662 {
663 struct vop_access_args /* {
664 struct vnode *a_vp;
665 int a_mode;
666 struct ucred *a_cred;
667 struct proc *a_p;
668 } */ *ap = v;
669 struct vattr va;
670 int error;
671
672 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) != 0)
673 return (error);
674
675 return (vaccess(va.va_type, va.va_mode,
676 va.va_uid, va.va_gid, ap->a_mode, ap->a_cred));
677 }
678
679 /*
680 * lookup. this is incredibly complicated in the
681 * general case, however for most pseudo-filesystems
682 * very little needs to be done.
683 *
684 * Locking isn't hard here, just poorly documented.
685 *
686 * If we're looking up ".", just vref the parent & return it.
687 *
688 * If we're looking up "..", unlock the parent, and lock "..". If everything
689 * went ok, and we're on the last component and the caller requested the
690 * parent locked, try to re-lock the parent. We do this to prevent lock
691 * races.
692 *
693 * For anything else, get the needed node. Then unlock the parent if not
694 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the
695 * parent in the .. case).
696 *
697 * We try to exit with the parent locked in error cases.
698 */
699 int
700 procfs_lookup(v)
701 void *v;
702 {
703 struct vop_lookup_args /* {
704 struct vnode * a_dvp;
705 struct vnode ** a_vpp;
706 struct componentname * a_cnp;
707 } */ *ap = v;
708 struct componentname *cnp = ap->a_cnp;
709 struct vnode **vpp = ap->a_vpp;
710 struct vnode *dvp = ap->a_dvp;
711 const char *pname = cnp->cn_nameptr;
712 const struct proc_target *pt = NULL;
713 struct vnode *fvp;
714 pid_t pid;
715 struct pfsnode *pfs;
716 struct proc *p = NULL;
717 int i, error, wantpunlock, iscurproc = 0, isself = 0;
718
719 *vpp = NULL;
720 cnp->cn_flags &= ~PDIRUNLOCK;
721
722 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
723 return (EROFS);
724
725 if (cnp->cn_namelen == 1 && *pname == '.') {
726 *vpp = dvp;
727 VREF(dvp);
728 return (0);
729 }
730
731 wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN));
732 pfs = VTOPFS(dvp);
733 switch (pfs->pfs_type) {
734 case Proot:
735 /*
736 * Shouldn't get here with .. in the root node.
737 */
738 if (cnp->cn_flags & ISDOTDOT)
739 return (EIO);
740
741 iscurproc = CNEQ(cnp, "curproc", 7);
742 isself = CNEQ(cnp, "self", 4);
743
744 if (iscurproc || isself) {
745 error = procfs_allocvp(dvp->v_mount, vpp, 0,
746 iscurproc ? Pcurproc : Pself);
747 if ((error == 0) && (wantpunlock)) {
748 VOP_UNLOCK(dvp, 0);
749 cnp->cn_flags |= PDIRUNLOCK;
750 }
751 return (error);
752 }
753
754 for (i = 0; i < nproc_root_targets; i++) {
755 pt = &proc_root_targets[i];
756 if (cnp->cn_namelen == pt->pt_namlen &&
757 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
758 (pt->pt_valid == NULL ||
759 (*pt->pt_valid)(p, dvp->v_mount)))
760 break;
761 }
762
763 if (i != nproc_root_targets) {
764 error = procfs_allocvp(dvp->v_mount, vpp, 0,
765 pt->pt_pfstype);
766 if ((error == 0) && (wantpunlock)) {
767 VOP_UNLOCK(dvp, 0);
768 cnp->cn_flags |= PDIRUNLOCK;
769 }
770 return (error);
771 }
772
773 pid = atopid(pname, cnp->cn_namelen);
774 if (pid == NO_PID)
775 break;
776
777 p = PFIND(pid);
778 if (p == 0)
779 break;
780
781 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc);
782 if ((error == 0) && (wantpunlock)) {
783 VOP_UNLOCK(dvp, 0);
784 cnp->cn_flags |= PDIRUNLOCK;
785 }
786 return (error);
787
788 case Pproc:
789 /*
790 * do the .. dance. We unlock the directory, and then
791 * get the root dir. That will automatically return ..
792 * locked. Then if the caller wanted dvp locked, we
793 * re-lock.
794 */
795 if (cnp->cn_flags & ISDOTDOT) {
796 VOP_UNLOCK(dvp, 0);
797 cnp->cn_flags |= PDIRUNLOCK;
798 error = procfs_root(dvp->v_mount, vpp);
799 if ((error == 0) && (wantpunlock == 0) &&
800 ((error = vn_lock(dvp, LK_EXCLUSIVE)) == 0))
801 cnp->cn_flags &= ~PDIRUNLOCK;
802 return (error);
803 }
804
805 p = PFIND(pfs->pfs_pid);
806 if (p == 0)
807 break;
808
809 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
810 if (cnp->cn_namelen == pt->pt_namlen &&
811 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
812 (pt->pt_valid == NULL ||
813 (*pt->pt_valid)(p, dvp->v_mount)))
814 goto found;
815 }
816 break;
817
818 found:
819 if (pt->pt_pfstype == Pfile) {
820 fvp = p->p_textvp;
821 /* We already checked that it exists. */
822 VREF(fvp);
823 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
824 if (wantpunlock) {
825 VOP_UNLOCK(dvp, 0);
826 cnp->cn_flags |= PDIRUNLOCK;
827 }
828 *vpp = fvp;
829 return (0);
830 }
831
832 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
833 pt->pt_pfstype);
834 if ((error == 0) && (wantpunlock)) {
835 VOP_UNLOCK(dvp, 0);
836 cnp->cn_flags |= PDIRUNLOCK;
837 }
838 return (error);
839
840 default:
841 return (ENOTDIR);
842 }
843
844 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
845 }
846
847 int
848 procfs_validfile(p, mp)
849 struct proc *p;
850 struct mount *mp;
851 {
852 return (p->p_textvp != NULL);
853 }
854
855 static int
856 procfs_validfile_linux(p, mp)
857 struct proc *p;
858 struct mount *mp;
859 {
860 int flags;
861
862 flags = VFSTOPROC(mp)->pmnt_flags;
863 return ((flags & PROCFSMNT_LINUXCOMPAT) &&
864 (p == NULL || procfs_validfile(p, mp)));
865 }
866
867 /*
868 * readdir returns directory entries from pfsnode (vp).
869 *
870 * the strategy here with procfs is to generate a single
871 * directory entry at a time (struct dirent) and then
872 * copy that out to userland using uiomove. a more efficent
873 * though more complex implementation, would try to minimize
874 * the number of calls to uiomove(). for procfs, this is
875 * hardly worth the added code complexity.
876 *
877 * this should just be done through read()
878 */
879 int
880 procfs_readdir(v)
881 void *v;
882 {
883 struct vop_readdir_args /* {
884 struct vnode *a_vp;
885 struct uio *a_uio;
886 struct ucred *a_cred;
887 int *a_eofflag;
888 off_t **a_cookies;
889 int *a_ncookies;
890 } */ *ap = v;
891 struct uio *uio = ap->a_uio;
892 struct dirent d;
893 struct pfsnode *pfs;
894 off_t i;
895 int error;
896 off_t *cookies = NULL;
897 int ncookies, left, skip, j;
898 struct vnode *vp;
899 const struct proc_target *pt;
900
901 vp = ap->a_vp;
902 pfs = VTOPFS(vp);
903
904 if (uio->uio_resid < UIO_MX)
905 return (EINVAL);
906 if (uio->uio_offset < 0)
907 return (EINVAL);
908
909 error = 0;
910 i = uio->uio_offset;
911 memset((caddr_t)&d, 0, UIO_MX);
912 d.d_reclen = UIO_MX;
913 ncookies = uio->uio_resid / UIO_MX;
914
915 switch (pfs->pfs_type) {
916 /*
917 * this is for the process-specific sub-directories.
918 * all that is needed to is copy out all the entries
919 * from the procent[] table (top of this file).
920 */
921 case Pproc: {
922 struct proc *p;
923
924 if (i >= nproc_targets)
925 return 0;
926
927 p = PFIND(pfs->pfs_pid);
928 if (p == NULL)
929 break;
930
931 if (ap->a_ncookies) {
932 ncookies = min(ncookies, (nproc_targets - i));
933 cookies = malloc(ncookies * sizeof (off_t),
934 M_TEMP, M_WAITOK);
935 *ap->a_cookies = cookies;
936 }
937
938 for (pt = &proc_targets[i];
939 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) {
940 if (pt->pt_valid &&
941 (*pt->pt_valid)(p, vp->v_mount) == 0)
942 continue;
943
944 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype);
945 d.d_namlen = pt->pt_namlen;
946 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
947 d.d_type = pt->pt_type;
948
949 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
950 break;
951 if (cookies)
952 *cookies++ = i + 1;
953 }
954
955 break;
956 }
957
958 /*
959 * this is for the root of the procfs filesystem
960 * what is needed are special entries for "curproc"
961 * and "self" followed by an entry for each process
962 * on allproc
963 #ifdef PROCFS_ZOMBIE
964 * and deadproc and zombproc.
965 #endif
966 */
967
968 case Proot: {
969 int pcnt = i, nc = 0;
970 const struct proclist_desc *pd;
971 volatile struct proc *p;
972
973 if (pcnt > 3)
974 pcnt = 3;
975 if (ap->a_ncookies) {
976 /*
977 * XXX Potentially allocating too much space here,
978 * but I'm lazy. This loop needs some work.
979 */
980 cookies = malloc(ncookies * sizeof (off_t),
981 M_TEMP, M_WAITOK);
982 *ap->a_cookies = cookies;
983 }
984 /*
985 * XXX: THIS LOOP ASSUMES THAT allproc IS THE FIRST
986 * PROCLIST IN THE proclists!
987 */
988 proclist_lock_read();
989 pd = proclists;
990 #ifdef PROCFS_ZOMBIE
991 again:
992 #endif
993 for (p = LIST_FIRST(pd->pd_list);
994 p != NULL && uio->uio_resid >= UIO_MX; i++, pcnt++) {
995 switch (i) {
996 case 0: /* `.' */
997 case 1: /* `..' */
998 d.d_fileno = PROCFS_FILENO(0, Proot);
999 d.d_namlen = i + 1;
1000 memcpy(d.d_name, "..", d.d_namlen);
1001 d.d_name[i + 1] = '\0';
1002 d.d_type = DT_DIR;
1003 break;
1004
1005 case 2:
1006 d.d_fileno = PROCFS_FILENO(0, Pcurproc);
1007 d.d_namlen = sizeof("curproc") - 1;
1008 memcpy(d.d_name, "curproc", sizeof("curproc"));
1009 d.d_type = DT_LNK;
1010 break;
1011
1012 case 3:
1013 d.d_fileno = PROCFS_FILENO(0, Pself);
1014 d.d_namlen = sizeof("self") - 1;
1015 memcpy(d.d_name, "self", sizeof("self"));
1016 d.d_type = DT_LNK;
1017 break;
1018
1019 default:
1020 while (pcnt < i) {
1021 pcnt++;
1022 p = LIST_NEXT(p, p_list);
1023 if (!p)
1024 goto done;
1025 }
1026 d.d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
1027 d.d_namlen = sprintf(d.d_name, "%ld",
1028 (long)p->p_pid);
1029 d.d_type = DT_DIR;
1030 p = p->p_list.le_next;
1031 break;
1032 }
1033
1034 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
1035 break;
1036 nc++;
1037 if (cookies)
1038 *cookies++ = i + 1;
1039 }
1040 done:
1041
1042 #ifdef PROCFS_ZOMBIE
1043 pd++;
1044 if (p == NULL && pd->pd_list != NULL)
1045 goto again;
1046 #endif
1047 proclist_unlock_read();
1048
1049 skip = i - pcnt;
1050 if (skip >= nproc_root_targets)
1051 break;
1052 left = nproc_root_targets - skip;
1053 for (j = 0, pt = &proc_root_targets[0];
1054 uio->uio_resid >= UIO_MX && j < left;
1055 pt++, j++, i++) {
1056 if (pt->pt_valid &&
1057 (*pt->pt_valid)(NULL, vp->v_mount) == 0)
1058 continue;
1059 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype);
1060 d.d_namlen = pt->pt_namlen;
1061 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1062 d.d_type = pt->pt_type;
1063
1064 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
1065 break;
1066 nc++;
1067 if (cookies)
1068 *cookies++ = i + 1;
1069 }
1070
1071 ncookies = nc;
1072 break;
1073 }
1074
1075 default:
1076 error = ENOTDIR;
1077 break;
1078 }
1079
1080 if (ap->a_ncookies) {
1081 if (error) {
1082 if (cookies)
1083 free(*ap->a_cookies, M_TEMP);
1084 *ap->a_ncookies = 0;
1085 *ap->a_cookies = NULL;
1086 } else
1087 *ap->a_ncookies = ncookies;
1088 }
1089 uio->uio_offset = i;
1090 return (error);
1091 }
1092
1093 /*
1094 * readlink reads the link of `curproc'
1095 */
1096 int
1097 procfs_readlink(v)
1098 void *v;
1099 {
1100 struct vop_readlink_args *ap = v;
1101 char buf[16]; /* should be enough */
1102 int len;
1103
1104 if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pcurproc))
1105 len = sprintf(buf, "%ld", (long)curproc->p_pid);
1106 else if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pself))
1107 len = sprintf(buf, "%s", "curproc");
1108 else
1109 return (EINVAL);
1110
1111 return (uiomove((caddr_t)buf, len, ap->a_uio));
1112 }
1113
1114 /*
1115 * convert decimal ascii to pid_t
1116 */
1117 static pid_t
1118 atopid(b, len)
1119 const char *b;
1120 u_int len;
1121 {
1122 pid_t p = 0;
1123
1124 while (len--) {
1125 char c = *b++;
1126 if (c < '0' || c > '9')
1127 return (NO_PID);
1128 p = 10 * p + (c - '0');
1129 if (p > PID_MAX)
1130 return (NO_PID);
1131 }
1132
1133 return (p);
1134 }
1135