procfs_vnops.c revision 1.78.2.5 1 /* $NetBSD: procfs_vnops.c,v 1.78.2.5 2001/11/14 19:17:12 nathanw Exp $ */
2
3 /*
4 * Copyright (c) 1993 Jan-Simon Pendry
5 * Copyright (c) 1993, 1995
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
40 */
41
42 /*
43 * procfs vnode interface
44 */
45
46 #include <sys/cdefs.h>
47 __KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.78.2.5 2001/11/14 19:17:12 nathanw Exp $");
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/time.h>
52 #include <sys/kernel.h>
53 #include <sys/file.h>
54 #include <sys/lwp.h>
55 #include <sys/proc.h>
56 #include <sys/vnode.h>
57 #include <sys/namei.h>
58 #include <sys/malloc.h>
59 #include <sys/mount.h>
60 #include <sys/dirent.h>
61 #include <sys/resourcevar.h>
62 #include <sys/ptrace.h>
63 #include <sys/stat.h>
64
65 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */
66
67 #include <machine/reg.h>
68
69 #include <miscfs/genfs/genfs.h>
70 #include <miscfs/procfs/procfs.h>
71
72 /*
73 * Vnode Operations.
74 *
75 */
76
77 static int procfs_validfile_linux __P((struct lwp *, struct mount *));
78
79 /*
80 * This is a list of the valid names in the
81 * process-specific sub-directories. It is
82 * used in procfs_lookup and procfs_readdir
83 */
84 const struct proc_target {
85 u_char pt_type;
86 u_char pt_namlen;
87 char *pt_name;
88 pfstype pt_pfstype;
89 int (*pt_valid) __P((struct lwp *, struct mount *));
90 } proc_targets[] = {
91 #define N(s) sizeof(s)-1, s
92 /* name type validp */
93 { DT_DIR, N("."), Pproc, NULL },
94 { DT_DIR, N(".."), Proot, NULL },
95 { DT_REG, N("file"), Pfile, procfs_validfile },
96 { DT_REG, N("mem"), Pmem, NULL },
97 { DT_REG, N("regs"), Pregs, procfs_validregs },
98 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs },
99 { DT_REG, N("ctl"), Pctl, NULL },
100 { DT_REG, N("status"), Pstatus, NULL },
101 { DT_REG, N("note"), Pnote, NULL },
102 { DT_REG, N("notepg"), Pnotepg, NULL },
103 { DT_REG, N("map"), Pmap, procfs_validmap },
104 { DT_REG, N("maps"), Pmaps, procfs_validmap },
105 { DT_REG, N("cmdline"), Pcmdline, NULL },
106 { DT_REG, N("exe"), Pfile, procfs_validfile_linux },
107 #undef N
108 };
109 static int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
110
111 /*
112 * List of files in the root directory. Note: the validate function will
113 * be called with p == NULL for these ones.
114 */
115 struct proc_target proc_root_targets[] = {
116 #define N(s) sizeof(s)-1, s
117 /* name type validp */
118 { DT_REG, N("meminfo"), Pmeminfo, procfs_validfile_linux },
119 { DT_REG, N("cpuinfo"), Pcpuinfo, procfs_validfile_linux },
120 #undef N
121 };
122 static int nproc_root_targets =
123 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]);
124
125 int procfs_lookup __P((void *));
126 #define procfs_create genfs_eopnotsupp_rele
127 #define procfs_mknod genfs_eopnotsupp_rele
128 int procfs_open __P((void *));
129 int procfs_close __P((void *));
130 int procfs_access __P((void *));
131 int procfs_getattr __P((void *));
132 int procfs_setattr __P((void *));
133 #define procfs_read procfs_rw
134 #define procfs_write procfs_rw
135 #define procfs_fcntl genfs_fcntl
136 #define procfs_ioctl genfs_enoioctl
137 #define procfs_poll genfs_poll
138 #define procfs_revoke genfs_revoke
139 #define procfs_fsync genfs_nullop
140 #define procfs_seek genfs_nullop
141 #define procfs_remove genfs_eopnotsupp_rele
142 int procfs_link __P((void *));
143 #define procfs_rename genfs_eopnotsupp_rele
144 #define procfs_mkdir genfs_eopnotsupp_rele
145 #define procfs_rmdir genfs_eopnotsupp_rele
146 int procfs_symlink __P((void *));
147 int procfs_readdir __P((void *));
148 int procfs_readlink __P((void *));
149 #define procfs_abortop genfs_abortop
150 int procfs_inactive __P((void *));
151 int procfs_reclaim __P((void *));
152 #define procfs_lock genfs_lock
153 #define procfs_unlock genfs_unlock
154 #define procfs_bmap genfs_badop
155 #define procfs_strategy genfs_badop
156 int procfs_print __P((void *));
157 int procfs_pathconf __P((void *));
158 #define procfs_islocked genfs_islocked
159 #define procfs_advlock genfs_einval
160 #define procfs_blkatoff genfs_eopnotsupp
161 #define procfs_valloc genfs_eopnotsupp
162 #define procfs_vfree genfs_nullop
163 #define procfs_truncate genfs_eopnotsupp
164 #define procfs_update genfs_nullop
165 #define procfs_bwrite genfs_eopnotsupp
166
167 static pid_t atopid __P((const char *, u_int));
168
169 /*
170 * procfs vnode operations.
171 */
172 int (**procfs_vnodeop_p) __P((void *));
173 const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
174 { &vop_default_desc, vn_default_error },
175 { &vop_lookup_desc, procfs_lookup }, /* lookup */
176 { &vop_create_desc, procfs_create }, /* create */
177 { &vop_mknod_desc, procfs_mknod }, /* mknod */
178 { &vop_open_desc, procfs_open }, /* open */
179 { &vop_close_desc, procfs_close }, /* close */
180 { &vop_access_desc, procfs_access }, /* access */
181 { &vop_getattr_desc, procfs_getattr }, /* getattr */
182 { &vop_setattr_desc, procfs_setattr }, /* setattr */
183 { &vop_read_desc, procfs_read }, /* read */
184 { &vop_write_desc, procfs_write }, /* write */
185 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */
186 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */
187 { &vop_poll_desc, procfs_poll }, /* poll */
188 { &vop_revoke_desc, procfs_revoke }, /* revoke */
189 { &vop_fsync_desc, procfs_fsync }, /* fsync */
190 { &vop_seek_desc, procfs_seek }, /* seek */
191 { &vop_remove_desc, procfs_remove }, /* remove */
192 { &vop_link_desc, procfs_link }, /* link */
193 { &vop_rename_desc, procfs_rename }, /* rename */
194 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */
195 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */
196 { &vop_symlink_desc, procfs_symlink }, /* symlink */
197 { &vop_readdir_desc, procfs_readdir }, /* readdir */
198 { &vop_readlink_desc, procfs_readlink }, /* readlink */
199 { &vop_abortop_desc, procfs_abortop }, /* abortop */
200 { &vop_inactive_desc, procfs_inactive }, /* inactive */
201 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */
202 { &vop_lock_desc, procfs_lock }, /* lock */
203 { &vop_unlock_desc, procfs_unlock }, /* unlock */
204 { &vop_bmap_desc, procfs_bmap }, /* bmap */
205 { &vop_strategy_desc, procfs_strategy }, /* strategy */
206 { &vop_print_desc, procfs_print }, /* print */
207 { &vop_islocked_desc, procfs_islocked }, /* islocked */
208 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */
209 { &vop_advlock_desc, procfs_advlock }, /* advlock */
210 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */
211 { &vop_valloc_desc, procfs_valloc }, /* valloc */
212 { &vop_vfree_desc, procfs_vfree }, /* vfree */
213 { &vop_truncate_desc, procfs_truncate }, /* truncate */
214 { &vop_update_desc, procfs_update }, /* update */
215 { NULL, NULL }
216 };
217 const struct vnodeopv_desc procfs_vnodeop_opv_desc =
218 { &procfs_vnodeop_p, procfs_vnodeop_entries };
219 /*
220 * set things up for doing i/o on
221 * the pfsnode (vp). (vp) is locked
222 * on entry, and should be left locked
223 * on exit.
224 *
225 * for procfs we don't need to do anything
226 * in particular for i/o. all that is done
227 * is to support exclusive open on process
228 * memory images.
229 */
230 int
231 procfs_open(v)
232 void *v;
233 {
234 struct vop_open_args /* {
235 struct vnode *a_vp;
236 int a_mode;
237 struct ucred *a_cred;
238 struct proc *a_p;
239 } */ *ap = v;
240 struct pfsnode *pfs = VTOPFS(ap->a_vp);
241 struct proc *p1, *p2;
242
243 p1 = ap->a_p; /* tracer */
244 p2 = PFIND(pfs->pfs_pid); /* traced */
245
246 if (p2 == NULL)
247 return (ENOENT); /* was ESRCH, jsp */
248
249 switch (pfs->pfs_type) {
250 case Pmem:
251 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
252 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)))
253 return (EBUSY);
254
255 if (procfs_checkioperm(p1, p2) != 0)
256 return (EPERM);
257
258 if (ap->a_mode & FWRITE)
259 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
260
261 return (0);
262
263 default:
264 break;
265 }
266
267 return (0);
268 }
269
270 /*
271 * close the pfsnode (vp) after doing i/o.
272 * (vp) is not locked on entry or exit.
273 *
274 * nothing to do for procfs other than undo
275 * any exclusive open flag (see _open above).
276 */
277 int
278 procfs_close(v)
279 void *v;
280 {
281 struct vop_close_args /* {
282 struct vnode *a_vp;
283 int a_fflag;
284 struct ucred *a_cred;
285 struct proc *a_p;
286 } */ *ap = v;
287 struct pfsnode *pfs = VTOPFS(ap->a_vp);
288
289 switch (pfs->pfs_type) {
290 case Pmem:
291 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
292 pfs->pfs_flags &= ~(FWRITE|O_EXCL);
293 break;
294
295 default:
296 break;
297 }
298
299 return (0);
300 }
301
302 /*
303 * _inactive is called when the pfsnode
304 * is vrele'd and the reference count goes
305 * to zero. (vp) will be on the vnode free
306 * list, so to get it back vget() must be
307 * used.
308 *
309 * for procfs, check if the process is still
310 * alive and if it isn't then just throw away
311 * the vnode by calling vgone(). this may
312 * be overkill and a waste of time since the
313 * chances are that the process will still be
314 * there and PFIND is not free.
315 *
316 * (vp) is locked on entry, but must be unlocked on exit.
317 */
318 int
319 procfs_inactive(v)
320 void *v;
321 {
322 struct vop_inactive_args /* {
323 struct vnode *a_vp;
324 struct proc *a_p;
325 } */ *ap = v;
326 struct pfsnode *pfs = VTOPFS(ap->a_vp);
327
328 VOP_UNLOCK(ap->a_vp, 0);
329 if (PFIND(pfs->pfs_pid) == 0)
330 vgone(ap->a_vp);
331
332 return (0);
333 }
334
335 /*
336 * _reclaim is called when getnewvnode()
337 * wants to make use of an entry on the vnode
338 * free list. at this time the filesystem needs
339 * to free any private data and remove the node
340 * from any private lists.
341 */
342 int
343 procfs_reclaim(v)
344 void *v;
345 {
346 struct vop_reclaim_args /* {
347 struct vnode *a_vp;
348 } */ *ap = v;
349
350 return (procfs_freevp(ap->a_vp));
351 }
352
353 /*
354 * Return POSIX pathconf information applicable to special devices.
355 */
356 int
357 procfs_pathconf(v)
358 void *v;
359 {
360 struct vop_pathconf_args /* {
361 struct vnode *a_vp;
362 int a_name;
363 register_t *a_retval;
364 } */ *ap = v;
365
366 switch (ap->a_name) {
367 case _PC_LINK_MAX:
368 *ap->a_retval = LINK_MAX;
369 return (0);
370 case _PC_MAX_CANON:
371 *ap->a_retval = MAX_CANON;
372 return (0);
373 case _PC_MAX_INPUT:
374 *ap->a_retval = MAX_INPUT;
375 return (0);
376 case _PC_PIPE_BUF:
377 *ap->a_retval = PIPE_BUF;
378 return (0);
379 case _PC_CHOWN_RESTRICTED:
380 *ap->a_retval = 1;
381 return (0);
382 case _PC_VDISABLE:
383 *ap->a_retval = _POSIX_VDISABLE;
384 return (0);
385 case _PC_SYNC_IO:
386 *ap->a_retval = 1;
387 return (0);
388 default:
389 return (EINVAL);
390 }
391 /* NOTREACHED */
392 }
393
394 /*
395 * _print is used for debugging.
396 * just print a readable description
397 * of (vp).
398 */
399 int
400 procfs_print(v)
401 void *v;
402 {
403 struct vop_print_args /* {
404 struct vnode *a_vp;
405 } */ *ap = v;
406 struct pfsnode *pfs = VTOPFS(ap->a_vp);
407
408 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n",
409 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
410 return 0;
411 }
412
413 int
414 procfs_link(v)
415 void *v;
416 {
417 struct vop_link_args /* {
418 struct vnode *a_dvp;
419 struct vnode *a_vp;
420 struct componentname *a_cnp;
421 } */ *ap = v;
422
423 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
424 vput(ap->a_dvp);
425 return (EROFS);
426 }
427
428 int
429 procfs_symlink(v)
430 void *v;
431 {
432 struct vop_symlink_args /* {
433 struct vnode *a_dvp;
434 struct vnode **a_vpp;
435 struct componentname *a_cnp;
436 struct vattr *a_vap;
437 char *a_target;
438 } */ *ap = v;
439
440 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
441 vput(ap->a_dvp);
442 return (EROFS);
443 }
444
445 /*
446 * Invent attributes for pfsnode (vp) and store
447 * them in (vap).
448 * Directories lengths are returned as zero since
449 * any real length would require the genuine size
450 * to be computed, and nothing cares anyway.
451 *
452 * this is relatively minimal for procfs.
453 */
454 int
455 procfs_getattr(v)
456 void *v;
457 {
458 struct vop_getattr_args /* {
459 struct vnode *a_vp;
460 struct vattr *a_vap;
461 struct ucred *a_cred;
462 struct proc *a_p;
463 } */ *ap = v;
464 struct pfsnode *pfs = VTOPFS(ap->a_vp);
465 struct vattr *vap = ap->a_vap;
466 struct proc *procp;
467 struct timeval tv;
468 int error;
469
470 /* first check the process still exists */
471 switch (pfs->pfs_type) {
472 case Proot:
473 case Pcurproc:
474 case Pself:
475 procp = 0;
476 break;
477
478 default:
479 procp = PFIND(pfs->pfs_pid);
480 if (procp == 0)
481 return (ENOENT);
482 break;
483 }
484
485 error = 0;
486
487 /* start by zeroing out the attributes */
488 VATTR_NULL(vap);
489
490 /* next do all the common fields */
491 vap->va_type = ap->a_vp->v_type;
492 vap->va_mode = pfs->pfs_mode;
493 vap->va_fileid = pfs->pfs_fileno;
494 vap->va_flags = 0;
495 vap->va_blocksize = PAGE_SIZE;
496
497 /*
498 * Make all times be current TOD.
499 * It would be possible to get the process start
500 * time from the p_stat structure, but there's
501 * no "file creation" time stamp anyway, and the
502 * p_stat structure is not addressible if u. gets
503 * swapped out for that process.
504 */
505 microtime(&tv);
506 TIMEVAL_TO_TIMESPEC(&tv, &vap->va_ctime);
507 vap->va_atime = vap->va_mtime = vap->va_ctime;
508
509 switch (pfs->pfs_type) {
510 case Pmem:
511 case Pregs:
512 case Pfpregs:
513 /*
514 * If the process has exercised some setuid or setgid
515 * privilege, then rip away read/write permission so
516 * that only root can gain access.
517 */
518 if (procp->p_flag & P_SUGID)
519 vap->va_mode &= ~(S_IRUSR|S_IWUSR);
520 /* FALLTHROUGH */
521 case Pctl:
522 case Pstatus:
523 case Pnote:
524 case Pnotepg:
525 case Pmap:
526 case Pmaps:
527 case Pcmdline:
528 vap->va_nlink = 1;
529 vap->va_uid = procp->p_ucred->cr_uid;
530 vap->va_gid = procp->p_ucred->cr_gid;
531 break;
532 case Pmeminfo:
533 case Pcpuinfo:
534 vap->va_nlink = 1;
535 vap->va_uid = vap->va_gid = 0;
536 break;
537
538 default:
539 break;
540 }
541
542 /*
543 * now do the object specific fields
544 *
545 * The size could be set from struct reg, but it's hardly
546 * worth the trouble, and it puts some (potentially) machine
547 * dependent data into this machine-independent code. If it
548 * becomes important then this function should break out into
549 * a per-file stat function in the corresponding .c file.
550 */
551
552 switch (pfs->pfs_type) {
553 case Proot:
554 /*
555 * Set nlink to 1 to tell fts(3) we don't actually know.
556 */
557 vap->va_nlink = 1;
558 vap->va_uid = 0;
559 vap->va_gid = 0;
560 vap->va_bytes = vap->va_size = DEV_BSIZE;
561 break;
562
563 case Pcurproc: {
564 char buf[16]; /* should be enough */
565 vap->va_nlink = 1;
566 vap->va_uid = 0;
567 vap->va_gid = 0;
568 vap->va_bytes = vap->va_size =
569 sprintf(buf, "%ld", (long)curproc->l_proc->p_pid);
570 break;
571 }
572
573 case Pself:
574 vap->va_nlink = 1;
575 vap->va_uid = 0;
576 vap->va_gid = 0;
577 vap->va_bytes = vap->va_size = sizeof("curproc");
578 break;
579
580 case Pproc:
581 vap->va_nlink = 2;
582 vap->va_uid = procp->p_ucred->cr_uid;
583 vap->va_gid = procp->p_ucred->cr_gid;
584 vap->va_bytes = vap->va_size = DEV_BSIZE;
585 break;
586
587 case Pfile:
588 error = EOPNOTSUPP;
589 break;
590
591 case Pmem:
592 vap->va_bytes = vap->va_size =
593 ctob(procp->p_vmspace->vm_tsize +
594 procp->p_vmspace->vm_dsize +
595 procp->p_vmspace->vm_ssize);
596 break;
597
598 #if defined(PT_GETREGS) || defined(PT_SETREGS)
599 case Pregs:
600 vap->va_bytes = vap->va_size = sizeof(struct reg);
601 break;
602 #endif
603
604 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS)
605 case Pfpregs:
606 vap->va_bytes = vap->va_size = sizeof(struct fpreg);
607 break;
608 #endif
609
610 case Pctl:
611 case Pstatus:
612 case Pnote:
613 case Pnotepg:
614 case Pcmdline:
615 case Pmeminfo:
616 case Pcpuinfo:
617 vap->va_bytes = vap->va_size = 0;
618 break;
619 case Pmap:
620 case Pmaps:
621 /*
622 * Advise a larger blocksize for the map files, so that
623 * they may be read in one pass.
624 */
625 vap->va_blocksize = 4 * PAGE_SIZE;
626 vap->va_bytes = vap->va_size = 0;
627 break;
628
629 default:
630 panic("procfs_getattr");
631 }
632
633 return (error);
634 }
635
636 /*ARGSUSED*/
637 int
638 procfs_setattr(v)
639 void *v;
640 {
641 /*
642 * just fake out attribute setting
643 * it's not good to generate an error
644 * return, otherwise things like creat()
645 * will fail when they try to set the
646 * file length to 0. worse, this means
647 * that echo $note > /proc/$pid/note will fail.
648 */
649
650 return (0);
651 }
652
653 /*
654 * implement access checking.
655 *
656 * actually, the check for super-user is slightly
657 * broken since it will allow read access to write-only
658 * objects. this doesn't cause any particular trouble
659 * but does mean that the i/o entry points need to check
660 * that the operation really does make sense.
661 */
662 int
663 procfs_access(v)
664 void *v;
665 {
666 struct vop_access_args /* {
667 struct vnode *a_vp;
668 int a_mode;
669 struct ucred *a_cred;
670 struct proc *a_p;
671 } */ *ap = v;
672 struct vattr va;
673 int error;
674
675 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) != 0)
676 return (error);
677
678 return (vaccess(va.va_type, va.va_mode,
679 va.va_uid, va.va_gid, ap->a_mode, ap->a_cred));
680 }
681
682 /*
683 * lookup. this is incredibly complicated in the
684 * general case, however for most pseudo-filesystems
685 * very little needs to be done.
686 *
687 * Locking isn't hard here, just poorly documented.
688 *
689 * If we're looking up ".", just vref the parent & return it.
690 *
691 * If we're looking up "..", unlock the parent, and lock "..". If everything
692 * went ok, and we're on the last component and the caller requested the
693 * parent locked, try to re-lock the parent. We do this to prevent lock
694 * races.
695 *
696 * For anything else, get the needed node. Then unlock the parent if not
697 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the
698 * parent in the .. case).
699 *
700 * We try to exit with the parent locked in error cases.
701 */
702 int
703 procfs_lookup(v)
704 void *v;
705 {
706 struct vop_lookup_args /* {
707 struct vnode * a_dvp;
708 struct vnode ** a_vpp;
709 struct componentname * a_cnp;
710 } */ *ap = v;
711 struct componentname *cnp = ap->a_cnp;
712 struct vnode **vpp = ap->a_vpp;
713 struct vnode *dvp = ap->a_dvp;
714 const char *pname = cnp->cn_nameptr;
715 const struct proc_target *pt = NULL;
716 struct vnode *fvp;
717 pid_t pid;
718 struct pfsnode *pfs;
719 struct proc *p = NULL;
720 int i, error, wantpunlock, iscurproc = 0, isself = 0;
721
722 *vpp = NULL;
723 cnp->cn_flags &= ~PDIRUNLOCK;
724
725 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
726 return (EROFS);
727
728 if (cnp->cn_namelen == 1 && *pname == '.') {
729 *vpp = dvp;
730 VREF(dvp);
731 return (0);
732 }
733
734 wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN));
735 pfs = VTOPFS(dvp);
736 switch (pfs->pfs_type) {
737 case Proot:
738 /*
739 * Shouldn't get here with .. in the root node.
740 */
741 if (cnp->cn_flags & ISDOTDOT)
742 return (EIO);
743
744 iscurproc = CNEQ(cnp, "curproc", 7);
745 isself = CNEQ(cnp, "self", 4);
746
747 if (iscurproc || isself) {
748 error = procfs_allocvp(dvp->v_mount, vpp, 0,
749 iscurproc ? Pcurproc : Pself);
750 if ((error == 0) && (wantpunlock)) {
751 VOP_UNLOCK(dvp, 0);
752 cnp->cn_flags |= PDIRUNLOCK;
753 }
754 return (error);
755 }
756
757 for (i = 0; i < nproc_root_targets; i++) {
758 pt = &proc_root_targets[i];
759 if (cnp->cn_namelen == pt->pt_namlen &&
760 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
761 (pt->pt_valid == NULL ||
762 (*pt->pt_valid)(LIST_FIRST(&p->p_lwps),
763 dvp->v_mount)))
764 break;
765 }
766
767 if (i != nproc_root_targets) {
768 error = procfs_allocvp(dvp->v_mount, vpp, 0,
769 pt->pt_pfstype);
770 if ((error == 0) && (wantpunlock)) {
771 VOP_UNLOCK(dvp, 0);
772 cnp->cn_flags |= PDIRUNLOCK;
773 }
774 return (error);
775 }
776
777 pid = atopid(pname, cnp->cn_namelen);
778 if (pid == NO_PID)
779 break;
780
781 p = PFIND(pid);
782 if (p == 0)
783 break;
784
785 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc);
786 if ((error == 0) && (wantpunlock)) {
787 VOP_UNLOCK(dvp, 0);
788 cnp->cn_flags |= PDIRUNLOCK;
789 }
790 return (error);
791
792 case Pproc:
793 /*
794 * do the .. dance. We unlock the directory, and then
795 * get the root dir. That will automatically return ..
796 * locked. Then if the caller wanted dvp locked, we
797 * re-lock.
798 */
799 if (cnp->cn_flags & ISDOTDOT) {
800 VOP_UNLOCK(dvp, 0);
801 cnp->cn_flags |= PDIRUNLOCK;
802 error = procfs_root(dvp->v_mount, vpp);
803 if ((error == 0) && (wantpunlock == 0) &&
804 ((error = vn_lock(dvp, LK_EXCLUSIVE)) == 0))
805 cnp->cn_flags &= ~PDIRUNLOCK;
806 return (error);
807 }
808
809 p = PFIND(pfs->pfs_pid);
810 if (p == 0)
811 break;
812
813 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
814 if (cnp->cn_namelen == pt->pt_namlen &&
815 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
816 (pt->pt_valid == NULL ||
817 (*pt->pt_valid)(LIST_FIRST(&p->p_lwps),
818 dvp->v_mount)))
819 goto found;
820 }
821 break;
822
823 found:
824 if (pt->pt_pfstype == Pfile) {
825 fvp = p->p_textvp;
826 /* We already checked that it exists. */
827 VREF(fvp);
828 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
829 if (wantpunlock) {
830 VOP_UNLOCK(dvp, 0);
831 cnp->cn_flags |= PDIRUNLOCK;
832 }
833 *vpp = fvp;
834 return (0);
835 }
836
837 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
838 pt->pt_pfstype);
839 if ((error == 0) && (wantpunlock)) {
840 VOP_UNLOCK(dvp, 0);
841 cnp->cn_flags |= PDIRUNLOCK;
842 }
843 return (error);
844
845 default:
846 return (ENOTDIR);
847 }
848
849 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
850 }
851
852 int
853 procfs_validfile(l, mp)
854 struct lwp *l;
855 struct mount *mp;
856 {
857 return (l->l_proc->p_textvp != NULL);
858 }
859
860 static int
861 procfs_validfile_linux(l, mp)
862 struct lwp *l;
863 struct mount *mp;
864 {
865 int flags;
866
867 flags = VFSTOPROC(mp)->pmnt_flags;
868 return ((flags & PROCFSMNT_LINUXCOMPAT) &&
869 (l == NULL || procfs_validfile(l, mp)));
870 }
871
872 /*
873 * readdir returns directory entries from pfsnode (vp).
874 *
875 * the strategy here with procfs is to generate a single
876 * directory entry at a time (struct dirent) and then
877 * copy that out to userland using uiomove. a more efficent
878 * though more complex implementation, would try to minimize
879 * the number of calls to uiomove(). for procfs, this is
880 * hardly worth the added code complexity.
881 *
882 * this should just be done through read()
883 */
884 int
885 procfs_readdir(v)
886 void *v;
887 {
888 struct vop_readdir_args /* {
889 struct vnode *a_vp;
890 struct uio *a_uio;
891 struct ucred *a_cred;
892 int *a_eofflag;
893 off_t **a_cookies;
894 int *a_ncookies;
895 } */ *ap = v;
896 struct uio *uio = ap->a_uio;
897 struct dirent d;
898 struct pfsnode *pfs;
899 off_t i;
900 int error;
901 off_t *cookies = NULL;
902 int ncookies, left, skip, j;
903 struct vnode *vp;
904 const struct proc_target *pt;
905
906 vp = ap->a_vp;
907 pfs = VTOPFS(vp);
908
909 if (uio->uio_resid < UIO_MX)
910 return (EINVAL);
911 if (uio->uio_offset < 0)
912 return (EINVAL);
913
914 error = 0;
915 i = uio->uio_offset;
916 memset((caddr_t)&d, 0, UIO_MX);
917 d.d_reclen = UIO_MX;
918 ncookies = uio->uio_resid / UIO_MX;
919
920 switch (pfs->pfs_type) {
921 /*
922 * this is for the process-specific sub-directories.
923 * all that is needed to is copy out all the entries
924 * from the procent[] table (top of this file).
925 */
926 case Pproc: {
927 struct proc *p;
928
929 if (i >= nproc_targets)
930 return 0;
931
932 p = PFIND(pfs->pfs_pid);
933 if (p == NULL)
934 break;
935
936 if (ap->a_ncookies) {
937 ncookies = min(ncookies, (nproc_targets - i));
938 cookies = malloc(ncookies * sizeof (off_t),
939 M_TEMP, M_WAITOK);
940 *ap->a_cookies = cookies;
941 }
942
943 for (pt = &proc_targets[i];
944 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) {
945 if (pt->pt_valid &&
946 (*pt->pt_valid)(LIST_FIRST(&p->p_lwps),
947 vp->v_mount) == 0)
948 continue;
949
950 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype);
951 d.d_namlen = pt->pt_namlen;
952 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
953 d.d_type = pt->pt_type;
954
955 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
956 break;
957 if (cookies)
958 *cookies++ = i + 1;
959 }
960
961 break;
962 }
963
964 /*
965 * this is for the root of the procfs filesystem
966 * what is needed are special entries for "curproc"
967 * and "self" followed by an entry for each process
968 * on allproc
969 #ifdef PROCFS_ZOMBIE
970 * and deadproc and zombproc.
971 #endif
972 */
973
974 case Proot: {
975 int pcnt = i, nc = 0;
976 const struct proclist_desc *pd;
977 volatile struct proc *p;
978
979 if (pcnt > 3)
980 pcnt = 3;
981 if (ap->a_ncookies) {
982 /*
983 * XXX Potentially allocating too much space here,
984 * but I'm lazy. This loop needs some work.
985 */
986 cookies = malloc(ncookies * sizeof (off_t),
987 M_TEMP, M_WAITOK);
988 *ap->a_cookies = cookies;
989 }
990 /*
991 * XXX: THIS LOOP ASSUMES THAT allproc IS THE FIRST
992 * PROCLIST IN THE proclists!
993 */
994 proclist_lock_read();
995 pd = proclists;
996 #ifdef PROCFS_ZOMBIE
997 again:
998 #endif
999 for (p = LIST_FIRST(pd->pd_list);
1000 p != NULL && uio->uio_resid >= UIO_MX; i++, pcnt++) {
1001 switch (i) {
1002 case 0: /* `.' */
1003 case 1: /* `..' */
1004 d.d_fileno = PROCFS_FILENO(0, Proot);
1005 d.d_namlen = i + 1;
1006 memcpy(d.d_name, "..", d.d_namlen);
1007 d.d_name[i + 1] = '\0';
1008 d.d_type = DT_DIR;
1009 break;
1010
1011 case 2:
1012 d.d_fileno = PROCFS_FILENO(0, Pcurproc);
1013 d.d_namlen = sizeof("curproc") - 1;
1014 memcpy(d.d_name, "curproc", sizeof("curproc"));
1015 d.d_type = DT_LNK;
1016 break;
1017
1018 case 3:
1019 d.d_fileno = PROCFS_FILENO(0, Pself);
1020 d.d_namlen = sizeof("self") - 1;
1021 memcpy(d.d_name, "self", sizeof("self"));
1022 d.d_type = DT_LNK;
1023 break;
1024
1025 default:
1026 while (pcnt < i) {
1027 pcnt++;
1028 p = LIST_NEXT(p, p_list);
1029 if (!p)
1030 goto done;
1031 }
1032 d.d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
1033 d.d_namlen = sprintf(d.d_name, "%ld",
1034 (long)p->p_pid);
1035 d.d_type = DT_DIR;
1036 p = p->p_list.le_next;
1037 break;
1038 }
1039
1040 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
1041 break;
1042 nc++;
1043 if (cookies)
1044 *cookies++ = i + 1;
1045 }
1046 done:
1047
1048 #ifdef PROCFS_ZOMBIE
1049 pd++;
1050 if (p == NULL && pd->pd_list != NULL)
1051 goto again;
1052 #endif
1053 proclist_unlock_read();
1054
1055 skip = i - pcnt;
1056 if (skip >= nproc_root_targets)
1057 break;
1058 left = nproc_root_targets - skip;
1059 for (j = 0, pt = &proc_root_targets[0];
1060 uio->uio_resid >= UIO_MX && j < left;
1061 pt++, j++, i++) {
1062 if (pt->pt_valid &&
1063 (*pt->pt_valid)(NULL, vp->v_mount) == 0)
1064 continue;
1065 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype);
1066 d.d_namlen = pt->pt_namlen;
1067 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1068 d.d_type = pt->pt_type;
1069
1070 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
1071 break;
1072 nc++;
1073 if (cookies)
1074 *cookies++ = i + 1;
1075 }
1076
1077 ncookies = nc;
1078 break;
1079 }
1080
1081 default:
1082 error = ENOTDIR;
1083 break;
1084 }
1085
1086 if (ap->a_ncookies) {
1087 if (error) {
1088 if (cookies)
1089 free(*ap->a_cookies, M_TEMP);
1090 *ap->a_ncookies = 0;
1091 *ap->a_cookies = NULL;
1092 } else
1093 *ap->a_ncookies = ncookies;
1094 }
1095 uio->uio_offset = i;
1096 return (error);
1097 }
1098
1099 /*
1100 * readlink reads the link of `curproc'
1101 */
1102 int
1103 procfs_readlink(v)
1104 void *v;
1105 {
1106 struct vop_readlink_args *ap = v;
1107 char buf[16]; /* should be enough */
1108 int len;
1109
1110 if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pcurproc))
1111 len = sprintf(buf, "%ld", (long)curproc->l_proc->p_pid);
1112 else if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pself))
1113 len = sprintf(buf, "%s", "curproc");
1114 else
1115 return (EINVAL);
1116
1117 return (uiomove((caddr_t)buf, len, ap->a_uio));
1118 }
1119
1120 /*
1121 * convert decimal ascii to pid_t
1122 */
1123 static pid_t
1124 atopid(b, len)
1125 const char *b;
1126 u_int len;
1127 {
1128 pid_t p = 0;
1129
1130 while (len--) {
1131 char c = *b++;
1132 if (c < '0' || c > '9')
1133 return (NO_PID);
1134 p = 10 * p + (c - '0');
1135 if (p > PID_MAX)
1136 return (NO_PID);
1137 }
1138
1139 return (p);
1140 }
1141