procfs_vnops.c revision 1.84 1 /* $NetBSD: procfs_vnops.c,v 1.84 2001/11/06 07:20:37 simonb Exp $ */
2
3 /*
4 * Copyright (c) 1993 Jan-Simon Pendry
5 * Copyright (c) 1993, 1995
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
40 */
41
42 /*
43 * procfs vnode interface
44 */
45
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/time.h>
49 #include <sys/kernel.h>
50 #include <sys/file.h>
51 #include <sys/proc.h>
52 #include <sys/vnode.h>
53 #include <sys/namei.h>
54 #include <sys/malloc.h>
55 #include <sys/mount.h>
56 #include <sys/dirent.h>
57 #include <sys/resourcevar.h>
58 #include <sys/ptrace.h>
59 #include <sys/stat.h>
60
61 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */
62
63 #include <machine/reg.h>
64
65 #include <miscfs/genfs/genfs.h>
66 #include <miscfs/procfs/procfs.h>
67
68 /*
69 * Vnode Operations.
70 *
71 */
72
73 static int procfs_validfile_linux __P((struct proc *, struct mount *));
74
75 /*
76 * This is a list of the valid names in the
77 * process-specific sub-directories. It is
78 * used in procfs_lookup and procfs_readdir
79 */
80 const struct proc_target {
81 u_char pt_type;
82 u_char pt_namlen;
83 char *pt_name;
84 pfstype pt_pfstype;
85 int (*pt_valid) __P((struct proc *, struct mount *));
86 } proc_targets[] = {
87 #define N(s) sizeof(s)-1, s
88 /* name type validp */
89 { DT_DIR, N("."), Pproc, NULL },
90 { DT_DIR, N(".."), Proot, NULL },
91 { DT_REG, N("file"), Pfile, procfs_validfile },
92 { DT_REG, N("mem"), Pmem, NULL },
93 { DT_REG, N("regs"), Pregs, procfs_validregs },
94 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs },
95 { DT_REG, N("ctl"), Pctl, NULL },
96 { DT_REG, N("status"), Pstatus, NULL },
97 { DT_REG, N("note"), Pnote, NULL },
98 { DT_REG, N("notepg"), Pnotepg, NULL },
99 { DT_REG, N("map"), Pmap, procfs_validmap },
100 { DT_REG, N("maps"), Pmaps, procfs_validmap },
101 { DT_REG, N("cmdline"), Pcmdline, NULL },
102 { DT_REG, N("exe"), Pfile, procfs_validfile_linux },
103 #undef N
104 };
105 static int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
106
107 /*
108 * List of files in the root directory. Note: the validate function will
109 * be called with p == NULL for these ones.
110 */
111 struct proc_target proc_root_targets[] = {
112 #define N(s) sizeof(s)-1, s
113 /* name type validp */
114 { DT_REG, N("meminfo"), Pmeminfo, procfs_validfile_linux },
115 { DT_REG, N("cpuinfo"), Pcpuinfo, procfs_validfile_linux },
116 #undef N
117 };
118 static int nproc_root_targets =
119 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]);
120
121 int procfs_lookup __P((void *));
122 #define procfs_create genfs_eopnotsupp_rele
123 #define procfs_mknod genfs_eopnotsupp_rele
124 int procfs_open __P((void *));
125 int procfs_close __P((void *));
126 int procfs_access __P((void *));
127 int procfs_getattr __P((void *));
128 int procfs_setattr __P((void *));
129 #define procfs_read procfs_rw
130 #define procfs_write procfs_rw
131 #define procfs_fcntl genfs_fcntl
132 #define procfs_ioctl genfs_enoioctl
133 #define procfs_poll genfs_poll
134 #define procfs_revoke genfs_revoke
135 #define procfs_fsync genfs_nullop
136 #define procfs_seek genfs_nullop
137 #define procfs_remove genfs_eopnotsupp_rele
138 int procfs_link __P((void *));
139 #define procfs_rename genfs_eopnotsupp_rele
140 #define procfs_mkdir genfs_eopnotsupp_rele
141 #define procfs_rmdir genfs_eopnotsupp_rele
142 int procfs_symlink __P((void *));
143 int procfs_readdir __P((void *));
144 int procfs_readlink __P((void *));
145 #define procfs_abortop genfs_abortop
146 int procfs_inactive __P((void *));
147 int procfs_reclaim __P((void *));
148 #define procfs_lock genfs_lock
149 #define procfs_unlock genfs_unlock
150 #define procfs_bmap genfs_badop
151 #define procfs_strategy genfs_badop
152 int procfs_print __P((void *));
153 int procfs_pathconf __P((void *));
154 #define procfs_islocked genfs_islocked
155 #define procfs_advlock genfs_einval
156 #define procfs_blkatoff genfs_eopnotsupp
157 #define procfs_valloc genfs_eopnotsupp
158 #define procfs_vfree genfs_nullop
159 #define procfs_truncate genfs_eopnotsupp
160 #define procfs_update genfs_nullop
161 #define procfs_bwrite genfs_eopnotsupp
162
163 static pid_t atopid __P((const char *, u_int));
164
165 /*
166 * procfs vnode operations.
167 */
168 int (**procfs_vnodeop_p) __P((void *));
169 const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
170 { &vop_default_desc, vn_default_error },
171 { &vop_lookup_desc, procfs_lookup }, /* lookup */
172 { &vop_create_desc, procfs_create }, /* create */
173 { &vop_mknod_desc, procfs_mknod }, /* mknod */
174 { &vop_open_desc, procfs_open }, /* open */
175 { &vop_close_desc, procfs_close }, /* close */
176 { &vop_access_desc, procfs_access }, /* access */
177 { &vop_getattr_desc, procfs_getattr }, /* getattr */
178 { &vop_setattr_desc, procfs_setattr }, /* setattr */
179 { &vop_read_desc, procfs_read }, /* read */
180 { &vop_write_desc, procfs_write }, /* write */
181 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */
182 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */
183 { &vop_poll_desc, procfs_poll }, /* poll */
184 { &vop_revoke_desc, procfs_revoke }, /* revoke */
185 { &vop_fsync_desc, procfs_fsync }, /* fsync */
186 { &vop_seek_desc, procfs_seek }, /* seek */
187 { &vop_remove_desc, procfs_remove }, /* remove */
188 { &vop_link_desc, procfs_link }, /* link */
189 { &vop_rename_desc, procfs_rename }, /* rename */
190 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */
191 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */
192 { &vop_symlink_desc, procfs_symlink }, /* symlink */
193 { &vop_readdir_desc, procfs_readdir }, /* readdir */
194 { &vop_readlink_desc, procfs_readlink }, /* readlink */
195 { &vop_abortop_desc, procfs_abortop }, /* abortop */
196 { &vop_inactive_desc, procfs_inactive }, /* inactive */
197 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */
198 { &vop_lock_desc, procfs_lock }, /* lock */
199 { &vop_unlock_desc, procfs_unlock }, /* unlock */
200 { &vop_bmap_desc, procfs_bmap }, /* bmap */
201 { &vop_strategy_desc, procfs_strategy }, /* strategy */
202 { &vop_print_desc, procfs_print }, /* print */
203 { &vop_islocked_desc, procfs_islocked }, /* islocked */
204 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */
205 { &vop_advlock_desc, procfs_advlock }, /* advlock */
206 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */
207 { &vop_valloc_desc, procfs_valloc }, /* valloc */
208 { &vop_vfree_desc, procfs_vfree }, /* vfree */
209 { &vop_truncate_desc, procfs_truncate }, /* truncate */
210 { &vop_update_desc, procfs_update }, /* update */
211 { NULL, NULL }
212 };
213 const struct vnodeopv_desc procfs_vnodeop_opv_desc =
214 { &procfs_vnodeop_p, procfs_vnodeop_entries };
215 /*
216 * set things up for doing i/o on
217 * the pfsnode (vp). (vp) is locked
218 * on entry, and should be left locked
219 * on exit.
220 *
221 * for procfs we don't need to do anything
222 * in particular for i/o. all that is done
223 * is to support exclusive open on process
224 * memory images.
225 */
226 int
227 procfs_open(v)
228 void *v;
229 {
230 struct vop_open_args /* {
231 struct vnode *a_vp;
232 int a_mode;
233 struct ucred *a_cred;
234 struct proc *a_p;
235 } */ *ap = v;
236 struct pfsnode *pfs = VTOPFS(ap->a_vp);
237 struct proc *p1, *p2;
238
239 p1 = ap->a_p; /* tracer */
240 p2 = PFIND(pfs->pfs_pid); /* traced */
241
242 if (p2 == NULL)
243 return (ENOENT); /* was ESRCH, jsp */
244
245 switch (pfs->pfs_type) {
246 case Pmem:
247 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
248 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)))
249 return (EBUSY);
250
251 if (procfs_checkioperm(p1, p2) != 0)
252 return (EPERM);
253
254 if (ap->a_mode & FWRITE)
255 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
256
257 return (0);
258
259 default:
260 break;
261 }
262
263 return (0);
264 }
265
266 /*
267 * close the pfsnode (vp) after doing i/o.
268 * (vp) is not locked on entry or exit.
269 *
270 * nothing to do for procfs other than undo
271 * any exclusive open flag (see _open above).
272 */
273 int
274 procfs_close(v)
275 void *v;
276 {
277 struct vop_close_args /* {
278 struct vnode *a_vp;
279 int a_fflag;
280 struct ucred *a_cred;
281 struct proc *a_p;
282 } */ *ap = v;
283 struct pfsnode *pfs = VTOPFS(ap->a_vp);
284
285 switch (pfs->pfs_type) {
286 case Pmem:
287 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
288 pfs->pfs_flags &= ~(FWRITE|O_EXCL);
289 break;
290
291 default:
292 break;
293 }
294
295 return (0);
296 }
297
298 /*
299 * _inactive is called when the pfsnode
300 * is vrele'd and the reference count goes
301 * to zero. (vp) will be on the vnode free
302 * list, so to get it back vget() must be
303 * used.
304 *
305 * for procfs, check if the process is still
306 * alive and if it isn't then just throw away
307 * the vnode by calling vgone(). this may
308 * be overkill and a waste of time since the
309 * chances are that the process will still be
310 * there and PFIND is not free.
311 *
312 * (vp) is locked on entry, but must be unlocked on exit.
313 */
314 int
315 procfs_inactive(v)
316 void *v;
317 {
318 struct vop_inactive_args /* {
319 struct vnode *a_vp;
320 struct proc *a_p;
321 } */ *ap = v;
322 struct pfsnode *pfs = VTOPFS(ap->a_vp);
323
324 VOP_UNLOCK(ap->a_vp, 0);
325 if (PFIND(pfs->pfs_pid) == 0)
326 vgone(ap->a_vp);
327
328 return (0);
329 }
330
331 /*
332 * _reclaim is called when getnewvnode()
333 * wants to make use of an entry on the vnode
334 * free list. at this time the filesystem needs
335 * to free any private data and remove the node
336 * from any private lists.
337 */
338 int
339 procfs_reclaim(v)
340 void *v;
341 {
342 struct vop_reclaim_args /* {
343 struct vnode *a_vp;
344 } */ *ap = v;
345
346 return (procfs_freevp(ap->a_vp));
347 }
348
349 /*
350 * Return POSIX pathconf information applicable to special devices.
351 */
352 int
353 procfs_pathconf(v)
354 void *v;
355 {
356 struct vop_pathconf_args /* {
357 struct vnode *a_vp;
358 int a_name;
359 register_t *a_retval;
360 } */ *ap = v;
361
362 switch (ap->a_name) {
363 case _PC_LINK_MAX:
364 *ap->a_retval = LINK_MAX;
365 return (0);
366 case _PC_MAX_CANON:
367 *ap->a_retval = MAX_CANON;
368 return (0);
369 case _PC_MAX_INPUT:
370 *ap->a_retval = MAX_INPUT;
371 return (0);
372 case _PC_PIPE_BUF:
373 *ap->a_retval = PIPE_BUF;
374 return (0);
375 case _PC_CHOWN_RESTRICTED:
376 *ap->a_retval = 1;
377 return (0);
378 case _PC_VDISABLE:
379 *ap->a_retval = _POSIX_VDISABLE;
380 return (0);
381 case _PC_SYNC_IO:
382 *ap->a_retval = 1;
383 return (0);
384 default:
385 return (EINVAL);
386 }
387 /* NOTREACHED */
388 }
389
390 /*
391 * _print is used for debugging.
392 * just print a readable description
393 * of (vp).
394 */
395 int
396 procfs_print(v)
397 void *v;
398 {
399 struct vop_print_args /* {
400 struct vnode *a_vp;
401 } */ *ap = v;
402 struct pfsnode *pfs = VTOPFS(ap->a_vp);
403
404 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n",
405 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
406 return 0;
407 }
408
409 int
410 procfs_link(v)
411 void *v;
412 {
413 struct vop_link_args /* {
414 struct vnode *a_dvp;
415 struct vnode *a_vp;
416 struct componentname *a_cnp;
417 } */ *ap = v;
418
419 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
420 vput(ap->a_dvp);
421 return (EROFS);
422 }
423
424 int
425 procfs_symlink(v)
426 void *v;
427 {
428 struct vop_symlink_args /* {
429 struct vnode *a_dvp;
430 struct vnode **a_vpp;
431 struct componentname *a_cnp;
432 struct vattr *a_vap;
433 char *a_target;
434 } */ *ap = v;
435
436 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
437 vput(ap->a_dvp);
438 return (EROFS);
439 }
440
441 /*
442 * Invent attributes for pfsnode (vp) and store
443 * them in (vap).
444 * Directories lengths are returned as zero since
445 * any real length would require the genuine size
446 * to be computed, and nothing cares anyway.
447 *
448 * this is relatively minimal for procfs.
449 */
450 int
451 procfs_getattr(v)
452 void *v;
453 {
454 struct vop_getattr_args /* {
455 struct vnode *a_vp;
456 struct vattr *a_vap;
457 struct ucred *a_cred;
458 struct proc *a_p;
459 } */ *ap = v;
460 struct pfsnode *pfs = VTOPFS(ap->a_vp);
461 struct vattr *vap = ap->a_vap;
462 struct proc *procp;
463 struct timeval tv;
464 int error;
465
466 /* first check the process still exists */
467 switch (pfs->pfs_type) {
468 case Proot:
469 case Pcurproc:
470 case Pself:
471 procp = 0;
472 break;
473
474 default:
475 procp = PFIND(pfs->pfs_pid);
476 if (procp == 0)
477 return (ENOENT);
478 break;
479 }
480
481 error = 0;
482
483 /* start by zeroing out the attributes */
484 VATTR_NULL(vap);
485
486 /* next do all the common fields */
487 vap->va_type = ap->a_vp->v_type;
488 vap->va_mode = pfs->pfs_mode;
489 vap->va_fileid = pfs->pfs_fileno;
490 vap->va_flags = 0;
491 vap->va_blocksize = PAGE_SIZE;
492
493 /*
494 * Make all times be current TOD.
495 * It would be possible to get the process start
496 * time from the p_stat structure, but there's
497 * no "file creation" time stamp anyway, and the
498 * p_stat structure is not addressible if u. gets
499 * swapped out for that process.
500 */
501 microtime(&tv);
502 TIMEVAL_TO_TIMESPEC(&tv, &vap->va_ctime);
503 vap->va_atime = vap->va_mtime = vap->va_ctime;
504
505 switch (pfs->pfs_type) {
506 case Pmem:
507 case Pregs:
508 case Pfpregs:
509 /*
510 * If the process has exercised some setuid or setgid
511 * privilege, then rip away read/write permission so
512 * that only root can gain access.
513 */
514 if (procp->p_flag & P_SUGID)
515 vap->va_mode &= ~(S_IRUSR|S_IWUSR);
516 /* FALLTHROUGH */
517 case Pctl:
518 case Pstatus:
519 case Pnote:
520 case Pnotepg:
521 case Pmap:
522 case Pmaps:
523 case Pcmdline:
524 vap->va_nlink = 1;
525 vap->va_uid = procp->p_ucred->cr_uid;
526 vap->va_gid = procp->p_ucred->cr_gid;
527 break;
528 case Pmeminfo:
529 case Pcpuinfo:
530 vap->va_nlink = 1;
531 vap->va_uid = vap->va_gid = 0;
532 break;
533
534 default:
535 break;
536 }
537
538 /*
539 * now do the object specific fields
540 *
541 * The size could be set from struct reg, but it's hardly
542 * worth the trouble, and it puts some (potentially) machine
543 * dependent data into this machine-independent code. If it
544 * becomes important then this function should break out into
545 * a per-file stat function in the corresponding .c file.
546 */
547
548 switch (pfs->pfs_type) {
549 case Proot:
550 /*
551 * Set nlink to 1 to tell fts(3) we don't actually know.
552 */
553 vap->va_nlink = 1;
554 vap->va_uid = 0;
555 vap->va_gid = 0;
556 vap->va_bytes = vap->va_size = DEV_BSIZE;
557 break;
558
559 case Pcurproc: {
560 char buf[16]; /* should be enough */
561 vap->va_nlink = 1;
562 vap->va_uid = 0;
563 vap->va_gid = 0;
564 vap->va_bytes = vap->va_size =
565 sprintf(buf, "%ld", (long)curproc->p_pid);
566 break;
567 }
568
569 case Pself:
570 vap->va_nlink = 1;
571 vap->va_uid = 0;
572 vap->va_gid = 0;
573 vap->va_bytes = vap->va_size = sizeof("curproc");
574 break;
575
576 case Pproc:
577 vap->va_nlink = 2;
578 vap->va_uid = procp->p_ucred->cr_uid;
579 vap->va_gid = procp->p_ucred->cr_gid;
580 vap->va_bytes = vap->va_size = DEV_BSIZE;
581 break;
582
583 case Pfile:
584 error = EOPNOTSUPP;
585 break;
586
587 case Pmem:
588 vap->va_bytes = vap->va_size =
589 ctob(procp->p_vmspace->vm_tsize +
590 procp->p_vmspace->vm_dsize +
591 procp->p_vmspace->vm_ssize);
592 break;
593
594 #if defined(PT_GETREGS) || defined(PT_SETREGS)
595 case Pregs:
596 vap->va_bytes = vap->va_size = sizeof(struct reg);
597 break;
598 #endif
599
600 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS)
601 case Pfpregs:
602 vap->va_bytes = vap->va_size = sizeof(struct fpreg);
603 break;
604 #endif
605
606 case Pctl:
607 case Pstatus:
608 case Pnote:
609 case Pnotepg:
610 case Pcmdline:
611 case Pmeminfo:
612 case Pcpuinfo:
613 vap->va_bytes = vap->va_size = 0;
614 break;
615 case Pmap:
616 case Pmaps:
617 /*
618 * Advise a larger blocksize for the map files, so that
619 * they may be read in one pass.
620 */
621 vap->va_blocksize = 4 * PAGE_SIZE;
622 vap->va_bytes = vap->va_size = 0;
623 break;
624
625 default:
626 panic("procfs_getattr");
627 }
628
629 return (error);
630 }
631
632 /*ARGSUSED*/
633 int
634 procfs_setattr(v)
635 void *v;
636 {
637 /*
638 * just fake out attribute setting
639 * it's not good to generate an error
640 * return, otherwise things like creat()
641 * will fail when they try to set the
642 * file length to 0. worse, this means
643 * that echo $note > /proc/$pid/note will fail.
644 */
645
646 return (0);
647 }
648
649 /*
650 * implement access checking.
651 *
652 * actually, the check for super-user is slightly
653 * broken since it will allow read access to write-only
654 * objects. this doesn't cause any particular trouble
655 * but does mean that the i/o entry points need to check
656 * that the operation really does make sense.
657 */
658 int
659 procfs_access(v)
660 void *v;
661 {
662 struct vop_access_args /* {
663 struct vnode *a_vp;
664 int a_mode;
665 struct ucred *a_cred;
666 struct proc *a_p;
667 } */ *ap = v;
668 struct vattr va;
669 int error;
670
671 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) != 0)
672 return (error);
673
674 return (vaccess(va.va_type, va.va_mode,
675 va.va_uid, va.va_gid, ap->a_mode, ap->a_cred));
676 }
677
678 /*
679 * lookup. this is incredibly complicated in the
680 * general case, however for most pseudo-filesystems
681 * very little needs to be done.
682 *
683 * Locking isn't hard here, just poorly documented.
684 *
685 * If we're looking up ".", just vref the parent & return it.
686 *
687 * If we're looking up "..", unlock the parent, and lock "..". If everything
688 * went ok, and we're on the last component and the caller requested the
689 * parent locked, try to re-lock the parent. We do this to prevent lock
690 * races.
691 *
692 * For anything else, get the needed node. Then unlock the parent if not
693 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the
694 * parent in the .. case).
695 *
696 * We try to exit with the parent locked in error cases.
697 */
698 int
699 procfs_lookup(v)
700 void *v;
701 {
702 struct vop_lookup_args /* {
703 struct vnode * a_dvp;
704 struct vnode ** a_vpp;
705 struct componentname * a_cnp;
706 } */ *ap = v;
707 struct componentname *cnp = ap->a_cnp;
708 struct vnode **vpp = ap->a_vpp;
709 struct vnode *dvp = ap->a_dvp;
710 const char *pname = cnp->cn_nameptr;
711 const struct proc_target *pt = NULL;
712 struct vnode *fvp;
713 pid_t pid;
714 struct pfsnode *pfs;
715 struct proc *p = NULL;
716 int i, error, wantpunlock, iscurproc = 0, isself = 0;
717
718 *vpp = NULL;
719 cnp->cn_flags &= ~PDIRUNLOCK;
720
721 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
722 return (EROFS);
723
724 if (cnp->cn_namelen == 1 && *pname == '.') {
725 *vpp = dvp;
726 VREF(dvp);
727 return (0);
728 }
729
730 wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN));
731 pfs = VTOPFS(dvp);
732 switch (pfs->pfs_type) {
733 case Proot:
734 /*
735 * Shouldn't get here with .. in the root node.
736 */
737 if (cnp->cn_flags & ISDOTDOT)
738 return (EIO);
739
740 iscurproc = CNEQ(cnp, "curproc", 7);
741 isself = CNEQ(cnp, "self", 4);
742
743 if (iscurproc || isself) {
744 error = procfs_allocvp(dvp->v_mount, vpp, 0,
745 iscurproc ? Pcurproc : Pself);
746 if ((error == 0) && (wantpunlock)) {
747 VOP_UNLOCK(dvp, 0);
748 cnp->cn_flags |= PDIRUNLOCK;
749 }
750 return (error);
751 }
752
753 for (i = 0; i < nproc_root_targets; i++) {
754 pt = &proc_root_targets[i];
755 if (cnp->cn_namelen == pt->pt_namlen &&
756 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
757 (pt->pt_valid == NULL ||
758 (*pt->pt_valid)(p, dvp->v_mount)))
759 break;
760 }
761
762 if (i != nproc_root_targets) {
763 error = procfs_allocvp(dvp->v_mount, vpp, 0,
764 pt->pt_pfstype);
765 if ((error == 0) && (wantpunlock)) {
766 VOP_UNLOCK(dvp, 0);
767 cnp->cn_flags |= PDIRUNLOCK;
768 }
769 return (error);
770 }
771
772 pid = atopid(pname, cnp->cn_namelen);
773 if (pid == NO_PID)
774 break;
775
776 p = PFIND(pid);
777 if (p == 0)
778 break;
779
780 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc);
781 if ((error == 0) && (wantpunlock)) {
782 VOP_UNLOCK(dvp, 0);
783 cnp->cn_flags |= PDIRUNLOCK;
784 }
785 return (error);
786
787 case Pproc:
788 /*
789 * do the .. dance. We unlock the directory, and then
790 * get the root dir. That will automatically return ..
791 * locked. Then if the caller wanted dvp locked, we
792 * re-lock.
793 */
794 if (cnp->cn_flags & ISDOTDOT) {
795 VOP_UNLOCK(dvp, 0);
796 cnp->cn_flags |= PDIRUNLOCK;
797 error = procfs_root(dvp->v_mount, vpp);
798 if ((error == 0) && (wantpunlock == 0) &&
799 ((error = vn_lock(dvp, LK_EXCLUSIVE)) == 0))
800 cnp->cn_flags &= ~PDIRUNLOCK;
801 return (error);
802 }
803
804 p = PFIND(pfs->pfs_pid);
805 if (p == 0)
806 break;
807
808 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
809 if (cnp->cn_namelen == pt->pt_namlen &&
810 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
811 (pt->pt_valid == NULL ||
812 (*pt->pt_valid)(p, dvp->v_mount)))
813 goto found;
814 }
815 break;
816
817 found:
818 if (pt->pt_pfstype == Pfile) {
819 fvp = p->p_textvp;
820 /* We already checked that it exists. */
821 VREF(fvp);
822 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
823 if (wantpunlock) {
824 VOP_UNLOCK(dvp, 0);
825 cnp->cn_flags |= PDIRUNLOCK;
826 }
827 *vpp = fvp;
828 return (0);
829 }
830
831 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
832 pt->pt_pfstype);
833 if ((error == 0) && (wantpunlock)) {
834 VOP_UNLOCK(dvp, 0);
835 cnp->cn_flags |= PDIRUNLOCK;
836 }
837 return (error);
838
839 default:
840 return (ENOTDIR);
841 }
842
843 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
844 }
845
846 int
847 procfs_validfile(p, mp)
848 struct proc *p;
849 struct mount *mp;
850 {
851 return (p->p_textvp != NULL);
852 }
853
854 static int
855 procfs_validfile_linux(p, mp)
856 struct proc *p;
857 struct mount *mp;
858 {
859 int flags;
860
861 flags = VFSTOPROC(mp)->pmnt_flags;
862 return ((flags & PROCFSMNT_LINUXCOMPAT) &&
863 (p == NULL || procfs_validfile(p, mp)));
864 }
865
866 /*
867 * readdir returns directory entries from pfsnode (vp).
868 *
869 * the strategy here with procfs is to generate a single
870 * directory entry at a time (struct dirent) and then
871 * copy that out to userland using uiomove. a more efficent
872 * though more complex implementation, would try to minimize
873 * the number of calls to uiomove(). for procfs, this is
874 * hardly worth the added code complexity.
875 *
876 * this should just be done through read()
877 */
878 int
879 procfs_readdir(v)
880 void *v;
881 {
882 struct vop_readdir_args /* {
883 struct vnode *a_vp;
884 struct uio *a_uio;
885 struct ucred *a_cred;
886 int *a_eofflag;
887 off_t **a_cookies;
888 int *a_ncookies;
889 } */ *ap = v;
890 struct uio *uio = ap->a_uio;
891 struct dirent d;
892 struct pfsnode *pfs;
893 off_t i;
894 int error;
895 off_t *cookies = NULL;
896 int ncookies, left, skip, j;
897 struct vnode *vp;
898 const struct proc_target *pt;
899
900 vp = ap->a_vp;
901 pfs = VTOPFS(vp);
902
903 if (uio->uio_resid < UIO_MX)
904 return (EINVAL);
905 if (uio->uio_offset < 0)
906 return (EINVAL);
907
908 error = 0;
909 i = uio->uio_offset;
910 memset((caddr_t)&d, 0, UIO_MX);
911 d.d_reclen = UIO_MX;
912 ncookies = uio->uio_resid / UIO_MX;
913
914 switch (pfs->pfs_type) {
915 /*
916 * this is for the process-specific sub-directories.
917 * all that is needed to is copy out all the entries
918 * from the procent[] table (top of this file).
919 */
920 case Pproc: {
921 struct proc *p;
922
923 if (i >= nproc_targets)
924 return 0;
925
926 p = PFIND(pfs->pfs_pid);
927 if (p == NULL)
928 break;
929
930 if (ap->a_ncookies) {
931 ncookies = min(ncookies, (nproc_targets - i));
932 cookies = malloc(ncookies * sizeof (off_t),
933 M_TEMP, M_WAITOK);
934 *ap->a_cookies = cookies;
935 }
936
937 for (pt = &proc_targets[i];
938 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) {
939 if (pt->pt_valid &&
940 (*pt->pt_valid)(p, vp->v_mount) == 0)
941 continue;
942
943 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype);
944 d.d_namlen = pt->pt_namlen;
945 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
946 d.d_type = pt->pt_type;
947
948 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
949 break;
950 if (cookies)
951 *cookies++ = i + 1;
952 }
953
954 break;
955 }
956
957 /*
958 * this is for the root of the procfs filesystem
959 * what is needed are special entries for "curproc"
960 * and "self" followed by an entry for each process
961 * on allproc
962 #ifdef PROCFS_ZOMBIE
963 * and deadproc and zombproc.
964 #endif
965 */
966
967 case Proot: {
968 int pcnt = i, nc = 0;
969 const struct proclist_desc *pd;
970 volatile struct proc *p;
971
972 if (pcnt > 3)
973 pcnt = 3;
974 if (ap->a_ncookies) {
975 /*
976 * XXX Potentially allocating too much space here,
977 * but I'm lazy. This loop needs some work.
978 */
979 cookies = malloc(ncookies * sizeof (off_t),
980 M_TEMP, M_WAITOK);
981 *ap->a_cookies = cookies;
982 }
983 /*
984 * XXX: THIS LOOP ASSUMES THAT allproc IS THE FIRST
985 * PROCLIST IN THE proclists!
986 */
987 proclist_lock_read();
988 pd = proclists;
989 #ifdef PROCFS_ZOMBIE
990 again:
991 #endif
992 for (p = LIST_FIRST(pd->pd_list);
993 p != NULL && uio->uio_resid >= UIO_MX; i++, pcnt++) {
994 switch (i) {
995 case 0: /* `.' */
996 case 1: /* `..' */
997 d.d_fileno = PROCFS_FILENO(0, Proot);
998 d.d_namlen = i + 1;
999 memcpy(d.d_name, "..", d.d_namlen);
1000 d.d_name[i + 1] = '\0';
1001 d.d_type = DT_DIR;
1002 break;
1003
1004 case 2:
1005 d.d_fileno = PROCFS_FILENO(0, Pcurproc);
1006 d.d_namlen = sizeof("curproc") - 1;
1007 memcpy(d.d_name, "curproc", sizeof("curproc"));
1008 d.d_type = DT_LNK;
1009 break;
1010
1011 case 3:
1012 d.d_fileno = PROCFS_FILENO(0, Pself);
1013 d.d_namlen = sizeof("self") - 1;
1014 memcpy(d.d_name, "self", sizeof("self"));
1015 d.d_type = DT_LNK;
1016 break;
1017
1018 default:
1019 while (pcnt < i) {
1020 pcnt++;
1021 p = LIST_NEXT(p, p_list);
1022 if (!p)
1023 goto done;
1024 }
1025 d.d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
1026 d.d_namlen = sprintf(d.d_name, "%ld",
1027 (long)p->p_pid);
1028 d.d_type = DT_DIR;
1029 p = p->p_list.le_next;
1030 break;
1031 }
1032
1033 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
1034 break;
1035 nc++;
1036 if (cookies)
1037 *cookies++ = i + 1;
1038 }
1039 done:
1040
1041 #ifdef PROCFS_ZOMBIE
1042 pd++;
1043 if (p == NULL && pd->pd_list != NULL)
1044 goto again;
1045 #endif
1046 proclist_unlock_read();
1047
1048 skip = i - pcnt;
1049 if (skip >= nproc_root_targets)
1050 break;
1051 left = nproc_root_targets - skip;
1052 for (j = 0, pt = &proc_root_targets[0];
1053 uio->uio_resid >= UIO_MX && j < left;
1054 pt++, j++, i++) {
1055 if (pt->pt_valid &&
1056 (*pt->pt_valid)(NULL, vp->v_mount) == 0)
1057 continue;
1058 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype);
1059 d.d_namlen = pt->pt_namlen;
1060 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1061 d.d_type = pt->pt_type;
1062
1063 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
1064 break;
1065 nc++;
1066 if (cookies)
1067 *cookies++ = i + 1;
1068 }
1069
1070 ncookies = nc;
1071 break;
1072 }
1073
1074 default:
1075 error = ENOTDIR;
1076 break;
1077 }
1078
1079 if (ap->a_ncookies) {
1080 if (error) {
1081 if (cookies)
1082 free(*ap->a_cookies, M_TEMP);
1083 *ap->a_ncookies = 0;
1084 *ap->a_cookies = NULL;
1085 } else
1086 *ap->a_ncookies = ncookies;
1087 }
1088 uio->uio_offset = i;
1089 return (error);
1090 }
1091
1092 /*
1093 * readlink reads the link of `curproc'
1094 */
1095 int
1096 procfs_readlink(v)
1097 void *v;
1098 {
1099 struct vop_readlink_args *ap = v;
1100 char buf[16]; /* should be enough */
1101 int len;
1102
1103 if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pcurproc))
1104 len = sprintf(buf, "%ld", (long)curproc->p_pid);
1105 else if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pself))
1106 len = sprintf(buf, "%s", "curproc");
1107 else
1108 return (EINVAL);
1109
1110 return (uiomove((caddr_t)buf, len, ap->a_uio));
1111 }
1112
1113 /*
1114 * convert decimal ascii to pid_t
1115 */
1116 static pid_t
1117 atopid(b, len)
1118 const char *b;
1119 u_int len;
1120 {
1121 pid_t p = 0;
1122
1123 while (len--) {
1124 char c = *b++;
1125 if (c < '0' || c > '9')
1126 return (NO_PID);
1127 p = 10 * p + (c - '0');
1128 if (p > PID_MAX)
1129 return (NO_PID);
1130 }
1131
1132 return (p);
1133 }
1134