procfs_vnops.c revision 1.85 1 /* $NetBSD: procfs_vnops.c,v 1.85 2001/11/10 13:33:44 lukem Exp $ */
2
3 /*
4 * Copyright (c) 1993 Jan-Simon Pendry
5 * Copyright (c) 1993, 1995
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
40 */
41
42 /*
43 * procfs vnode interface
44 */
45
46 #include <sys/cdefs.h>
47 __KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.85 2001/11/10 13:33:44 lukem Exp $");
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/time.h>
52 #include <sys/kernel.h>
53 #include <sys/file.h>
54 #include <sys/proc.h>
55 #include <sys/vnode.h>
56 #include <sys/namei.h>
57 #include <sys/malloc.h>
58 #include <sys/mount.h>
59 #include <sys/dirent.h>
60 #include <sys/resourcevar.h>
61 #include <sys/ptrace.h>
62 #include <sys/stat.h>
63
64 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */
65
66 #include <machine/reg.h>
67
68 #include <miscfs/genfs/genfs.h>
69 #include <miscfs/procfs/procfs.h>
70
71 /*
72 * Vnode Operations.
73 *
74 */
75
76 static int procfs_validfile_linux __P((struct proc *, struct mount *));
77
78 /*
79 * This is a list of the valid names in the
80 * process-specific sub-directories. It is
81 * used in procfs_lookup and procfs_readdir
82 */
83 const struct proc_target {
84 u_char pt_type;
85 u_char pt_namlen;
86 char *pt_name;
87 pfstype pt_pfstype;
88 int (*pt_valid) __P((struct proc *, struct mount *));
89 } proc_targets[] = {
90 #define N(s) sizeof(s)-1, s
91 /* name type validp */
92 { DT_DIR, N("."), Pproc, NULL },
93 { DT_DIR, N(".."), Proot, NULL },
94 { DT_REG, N("file"), Pfile, procfs_validfile },
95 { DT_REG, N("mem"), Pmem, NULL },
96 { DT_REG, N("regs"), Pregs, procfs_validregs },
97 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs },
98 { DT_REG, N("ctl"), Pctl, NULL },
99 { DT_REG, N("status"), Pstatus, NULL },
100 { DT_REG, N("note"), Pnote, NULL },
101 { DT_REG, N("notepg"), Pnotepg, NULL },
102 { DT_REG, N("map"), Pmap, procfs_validmap },
103 { DT_REG, N("maps"), Pmaps, procfs_validmap },
104 { DT_REG, N("cmdline"), Pcmdline, NULL },
105 { DT_REG, N("exe"), Pfile, procfs_validfile_linux },
106 #undef N
107 };
108 static int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
109
110 /*
111 * List of files in the root directory. Note: the validate function will
112 * be called with p == NULL for these ones.
113 */
114 struct proc_target proc_root_targets[] = {
115 #define N(s) sizeof(s)-1, s
116 /* name type validp */
117 { DT_REG, N("meminfo"), Pmeminfo, procfs_validfile_linux },
118 { DT_REG, N("cpuinfo"), Pcpuinfo, procfs_validfile_linux },
119 #undef N
120 };
121 static int nproc_root_targets =
122 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]);
123
124 int procfs_lookup __P((void *));
125 #define procfs_create genfs_eopnotsupp_rele
126 #define procfs_mknod genfs_eopnotsupp_rele
127 int procfs_open __P((void *));
128 int procfs_close __P((void *));
129 int procfs_access __P((void *));
130 int procfs_getattr __P((void *));
131 int procfs_setattr __P((void *));
132 #define procfs_read procfs_rw
133 #define procfs_write procfs_rw
134 #define procfs_fcntl genfs_fcntl
135 #define procfs_ioctl genfs_enoioctl
136 #define procfs_poll genfs_poll
137 #define procfs_revoke genfs_revoke
138 #define procfs_fsync genfs_nullop
139 #define procfs_seek genfs_nullop
140 #define procfs_remove genfs_eopnotsupp_rele
141 int procfs_link __P((void *));
142 #define procfs_rename genfs_eopnotsupp_rele
143 #define procfs_mkdir genfs_eopnotsupp_rele
144 #define procfs_rmdir genfs_eopnotsupp_rele
145 int procfs_symlink __P((void *));
146 int procfs_readdir __P((void *));
147 int procfs_readlink __P((void *));
148 #define procfs_abortop genfs_abortop
149 int procfs_inactive __P((void *));
150 int procfs_reclaim __P((void *));
151 #define procfs_lock genfs_lock
152 #define procfs_unlock genfs_unlock
153 #define procfs_bmap genfs_badop
154 #define procfs_strategy genfs_badop
155 int procfs_print __P((void *));
156 int procfs_pathconf __P((void *));
157 #define procfs_islocked genfs_islocked
158 #define procfs_advlock genfs_einval
159 #define procfs_blkatoff genfs_eopnotsupp
160 #define procfs_valloc genfs_eopnotsupp
161 #define procfs_vfree genfs_nullop
162 #define procfs_truncate genfs_eopnotsupp
163 #define procfs_update genfs_nullop
164 #define procfs_bwrite genfs_eopnotsupp
165
166 static pid_t atopid __P((const char *, u_int));
167
168 /*
169 * procfs vnode operations.
170 */
171 int (**procfs_vnodeop_p) __P((void *));
172 const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
173 { &vop_default_desc, vn_default_error },
174 { &vop_lookup_desc, procfs_lookup }, /* lookup */
175 { &vop_create_desc, procfs_create }, /* create */
176 { &vop_mknod_desc, procfs_mknod }, /* mknod */
177 { &vop_open_desc, procfs_open }, /* open */
178 { &vop_close_desc, procfs_close }, /* close */
179 { &vop_access_desc, procfs_access }, /* access */
180 { &vop_getattr_desc, procfs_getattr }, /* getattr */
181 { &vop_setattr_desc, procfs_setattr }, /* setattr */
182 { &vop_read_desc, procfs_read }, /* read */
183 { &vop_write_desc, procfs_write }, /* write */
184 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */
185 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */
186 { &vop_poll_desc, procfs_poll }, /* poll */
187 { &vop_revoke_desc, procfs_revoke }, /* revoke */
188 { &vop_fsync_desc, procfs_fsync }, /* fsync */
189 { &vop_seek_desc, procfs_seek }, /* seek */
190 { &vop_remove_desc, procfs_remove }, /* remove */
191 { &vop_link_desc, procfs_link }, /* link */
192 { &vop_rename_desc, procfs_rename }, /* rename */
193 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */
194 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */
195 { &vop_symlink_desc, procfs_symlink }, /* symlink */
196 { &vop_readdir_desc, procfs_readdir }, /* readdir */
197 { &vop_readlink_desc, procfs_readlink }, /* readlink */
198 { &vop_abortop_desc, procfs_abortop }, /* abortop */
199 { &vop_inactive_desc, procfs_inactive }, /* inactive */
200 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */
201 { &vop_lock_desc, procfs_lock }, /* lock */
202 { &vop_unlock_desc, procfs_unlock }, /* unlock */
203 { &vop_bmap_desc, procfs_bmap }, /* bmap */
204 { &vop_strategy_desc, procfs_strategy }, /* strategy */
205 { &vop_print_desc, procfs_print }, /* print */
206 { &vop_islocked_desc, procfs_islocked }, /* islocked */
207 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */
208 { &vop_advlock_desc, procfs_advlock }, /* advlock */
209 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */
210 { &vop_valloc_desc, procfs_valloc }, /* valloc */
211 { &vop_vfree_desc, procfs_vfree }, /* vfree */
212 { &vop_truncate_desc, procfs_truncate }, /* truncate */
213 { &vop_update_desc, procfs_update }, /* update */
214 { NULL, NULL }
215 };
216 const struct vnodeopv_desc procfs_vnodeop_opv_desc =
217 { &procfs_vnodeop_p, procfs_vnodeop_entries };
218 /*
219 * set things up for doing i/o on
220 * the pfsnode (vp). (vp) is locked
221 * on entry, and should be left locked
222 * on exit.
223 *
224 * for procfs we don't need to do anything
225 * in particular for i/o. all that is done
226 * is to support exclusive open on process
227 * memory images.
228 */
229 int
230 procfs_open(v)
231 void *v;
232 {
233 struct vop_open_args /* {
234 struct vnode *a_vp;
235 int a_mode;
236 struct ucred *a_cred;
237 struct proc *a_p;
238 } */ *ap = v;
239 struct pfsnode *pfs = VTOPFS(ap->a_vp);
240 struct proc *p1, *p2;
241
242 p1 = ap->a_p; /* tracer */
243 p2 = PFIND(pfs->pfs_pid); /* traced */
244
245 if (p2 == NULL)
246 return (ENOENT); /* was ESRCH, jsp */
247
248 switch (pfs->pfs_type) {
249 case Pmem:
250 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
251 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)))
252 return (EBUSY);
253
254 if (procfs_checkioperm(p1, p2) != 0)
255 return (EPERM);
256
257 if (ap->a_mode & FWRITE)
258 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
259
260 return (0);
261
262 default:
263 break;
264 }
265
266 return (0);
267 }
268
269 /*
270 * close the pfsnode (vp) after doing i/o.
271 * (vp) is not locked on entry or exit.
272 *
273 * nothing to do for procfs other than undo
274 * any exclusive open flag (see _open above).
275 */
276 int
277 procfs_close(v)
278 void *v;
279 {
280 struct vop_close_args /* {
281 struct vnode *a_vp;
282 int a_fflag;
283 struct ucred *a_cred;
284 struct proc *a_p;
285 } */ *ap = v;
286 struct pfsnode *pfs = VTOPFS(ap->a_vp);
287
288 switch (pfs->pfs_type) {
289 case Pmem:
290 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
291 pfs->pfs_flags &= ~(FWRITE|O_EXCL);
292 break;
293
294 default:
295 break;
296 }
297
298 return (0);
299 }
300
301 /*
302 * _inactive is called when the pfsnode
303 * is vrele'd and the reference count goes
304 * to zero. (vp) will be on the vnode free
305 * list, so to get it back vget() must be
306 * used.
307 *
308 * for procfs, check if the process is still
309 * alive and if it isn't then just throw away
310 * the vnode by calling vgone(). this may
311 * be overkill and a waste of time since the
312 * chances are that the process will still be
313 * there and PFIND is not free.
314 *
315 * (vp) is locked on entry, but must be unlocked on exit.
316 */
317 int
318 procfs_inactive(v)
319 void *v;
320 {
321 struct vop_inactive_args /* {
322 struct vnode *a_vp;
323 struct proc *a_p;
324 } */ *ap = v;
325 struct pfsnode *pfs = VTOPFS(ap->a_vp);
326
327 VOP_UNLOCK(ap->a_vp, 0);
328 if (PFIND(pfs->pfs_pid) == 0)
329 vgone(ap->a_vp);
330
331 return (0);
332 }
333
334 /*
335 * _reclaim is called when getnewvnode()
336 * wants to make use of an entry on the vnode
337 * free list. at this time the filesystem needs
338 * to free any private data and remove the node
339 * from any private lists.
340 */
341 int
342 procfs_reclaim(v)
343 void *v;
344 {
345 struct vop_reclaim_args /* {
346 struct vnode *a_vp;
347 } */ *ap = v;
348
349 return (procfs_freevp(ap->a_vp));
350 }
351
352 /*
353 * Return POSIX pathconf information applicable to special devices.
354 */
355 int
356 procfs_pathconf(v)
357 void *v;
358 {
359 struct vop_pathconf_args /* {
360 struct vnode *a_vp;
361 int a_name;
362 register_t *a_retval;
363 } */ *ap = v;
364
365 switch (ap->a_name) {
366 case _PC_LINK_MAX:
367 *ap->a_retval = LINK_MAX;
368 return (0);
369 case _PC_MAX_CANON:
370 *ap->a_retval = MAX_CANON;
371 return (0);
372 case _PC_MAX_INPUT:
373 *ap->a_retval = MAX_INPUT;
374 return (0);
375 case _PC_PIPE_BUF:
376 *ap->a_retval = PIPE_BUF;
377 return (0);
378 case _PC_CHOWN_RESTRICTED:
379 *ap->a_retval = 1;
380 return (0);
381 case _PC_VDISABLE:
382 *ap->a_retval = _POSIX_VDISABLE;
383 return (0);
384 case _PC_SYNC_IO:
385 *ap->a_retval = 1;
386 return (0);
387 default:
388 return (EINVAL);
389 }
390 /* NOTREACHED */
391 }
392
393 /*
394 * _print is used for debugging.
395 * just print a readable description
396 * of (vp).
397 */
398 int
399 procfs_print(v)
400 void *v;
401 {
402 struct vop_print_args /* {
403 struct vnode *a_vp;
404 } */ *ap = v;
405 struct pfsnode *pfs = VTOPFS(ap->a_vp);
406
407 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n",
408 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
409 return 0;
410 }
411
412 int
413 procfs_link(v)
414 void *v;
415 {
416 struct vop_link_args /* {
417 struct vnode *a_dvp;
418 struct vnode *a_vp;
419 struct componentname *a_cnp;
420 } */ *ap = v;
421
422 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
423 vput(ap->a_dvp);
424 return (EROFS);
425 }
426
427 int
428 procfs_symlink(v)
429 void *v;
430 {
431 struct vop_symlink_args /* {
432 struct vnode *a_dvp;
433 struct vnode **a_vpp;
434 struct componentname *a_cnp;
435 struct vattr *a_vap;
436 char *a_target;
437 } */ *ap = v;
438
439 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
440 vput(ap->a_dvp);
441 return (EROFS);
442 }
443
444 /*
445 * Invent attributes for pfsnode (vp) and store
446 * them in (vap).
447 * Directories lengths are returned as zero since
448 * any real length would require the genuine size
449 * to be computed, and nothing cares anyway.
450 *
451 * this is relatively minimal for procfs.
452 */
453 int
454 procfs_getattr(v)
455 void *v;
456 {
457 struct vop_getattr_args /* {
458 struct vnode *a_vp;
459 struct vattr *a_vap;
460 struct ucred *a_cred;
461 struct proc *a_p;
462 } */ *ap = v;
463 struct pfsnode *pfs = VTOPFS(ap->a_vp);
464 struct vattr *vap = ap->a_vap;
465 struct proc *procp;
466 struct timeval tv;
467 int error;
468
469 /* first check the process still exists */
470 switch (pfs->pfs_type) {
471 case Proot:
472 case Pcurproc:
473 case Pself:
474 procp = 0;
475 break;
476
477 default:
478 procp = PFIND(pfs->pfs_pid);
479 if (procp == 0)
480 return (ENOENT);
481 break;
482 }
483
484 error = 0;
485
486 /* start by zeroing out the attributes */
487 VATTR_NULL(vap);
488
489 /* next do all the common fields */
490 vap->va_type = ap->a_vp->v_type;
491 vap->va_mode = pfs->pfs_mode;
492 vap->va_fileid = pfs->pfs_fileno;
493 vap->va_flags = 0;
494 vap->va_blocksize = PAGE_SIZE;
495
496 /*
497 * Make all times be current TOD.
498 * It would be possible to get the process start
499 * time from the p_stat structure, but there's
500 * no "file creation" time stamp anyway, and the
501 * p_stat structure is not addressible if u. gets
502 * swapped out for that process.
503 */
504 microtime(&tv);
505 TIMEVAL_TO_TIMESPEC(&tv, &vap->va_ctime);
506 vap->va_atime = vap->va_mtime = vap->va_ctime;
507
508 switch (pfs->pfs_type) {
509 case Pmem:
510 case Pregs:
511 case Pfpregs:
512 /*
513 * If the process has exercised some setuid or setgid
514 * privilege, then rip away read/write permission so
515 * that only root can gain access.
516 */
517 if (procp->p_flag & P_SUGID)
518 vap->va_mode &= ~(S_IRUSR|S_IWUSR);
519 /* FALLTHROUGH */
520 case Pctl:
521 case Pstatus:
522 case Pnote:
523 case Pnotepg:
524 case Pmap:
525 case Pmaps:
526 case Pcmdline:
527 vap->va_nlink = 1;
528 vap->va_uid = procp->p_ucred->cr_uid;
529 vap->va_gid = procp->p_ucred->cr_gid;
530 break;
531 case Pmeminfo:
532 case Pcpuinfo:
533 vap->va_nlink = 1;
534 vap->va_uid = vap->va_gid = 0;
535 break;
536
537 default:
538 break;
539 }
540
541 /*
542 * now do the object specific fields
543 *
544 * The size could be set from struct reg, but it's hardly
545 * worth the trouble, and it puts some (potentially) machine
546 * dependent data into this machine-independent code. If it
547 * becomes important then this function should break out into
548 * a per-file stat function in the corresponding .c file.
549 */
550
551 switch (pfs->pfs_type) {
552 case Proot:
553 /*
554 * Set nlink to 1 to tell fts(3) we don't actually know.
555 */
556 vap->va_nlink = 1;
557 vap->va_uid = 0;
558 vap->va_gid = 0;
559 vap->va_bytes = vap->va_size = DEV_BSIZE;
560 break;
561
562 case Pcurproc: {
563 char buf[16]; /* should be enough */
564 vap->va_nlink = 1;
565 vap->va_uid = 0;
566 vap->va_gid = 0;
567 vap->va_bytes = vap->va_size =
568 sprintf(buf, "%ld", (long)curproc->p_pid);
569 break;
570 }
571
572 case Pself:
573 vap->va_nlink = 1;
574 vap->va_uid = 0;
575 vap->va_gid = 0;
576 vap->va_bytes = vap->va_size = sizeof("curproc");
577 break;
578
579 case Pproc:
580 vap->va_nlink = 2;
581 vap->va_uid = procp->p_ucred->cr_uid;
582 vap->va_gid = procp->p_ucred->cr_gid;
583 vap->va_bytes = vap->va_size = DEV_BSIZE;
584 break;
585
586 case Pfile:
587 error = EOPNOTSUPP;
588 break;
589
590 case Pmem:
591 vap->va_bytes = vap->va_size =
592 ctob(procp->p_vmspace->vm_tsize +
593 procp->p_vmspace->vm_dsize +
594 procp->p_vmspace->vm_ssize);
595 break;
596
597 #if defined(PT_GETREGS) || defined(PT_SETREGS)
598 case Pregs:
599 vap->va_bytes = vap->va_size = sizeof(struct reg);
600 break;
601 #endif
602
603 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS)
604 case Pfpregs:
605 vap->va_bytes = vap->va_size = sizeof(struct fpreg);
606 break;
607 #endif
608
609 case Pctl:
610 case Pstatus:
611 case Pnote:
612 case Pnotepg:
613 case Pcmdline:
614 case Pmeminfo:
615 case Pcpuinfo:
616 vap->va_bytes = vap->va_size = 0;
617 break;
618 case Pmap:
619 case Pmaps:
620 /*
621 * Advise a larger blocksize for the map files, so that
622 * they may be read in one pass.
623 */
624 vap->va_blocksize = 4 * PAGE_SIZE;
625 vap->va_bytes = vap->va_size = 0;
626 break;
627
628 default:
629 panic("procfs_getattr");
630 }
631
632 return (error);
633 }
634
635 /*ARGSUSED*/
636 int
637 procfs_setattr(v)
638 void *v;
639 {
640 /*
641 * just fake out attribute setting
642 * it's not good to generate an error
643 * return, otherwise things like creat()
644 * will fail when they try to set the
645 * file length to 0. worse, this means
646 * that echo $note > /proc/$pid/note will fail.
647 */
648
649 return (0);
650 }
651
652 /*
653 * implement access checking.
654 *
655 * actually, the check for super-user is slightly
656 * broken since it will allow read access to write-only
657 * objects. this doesn't cause any particular trouble
658 * but does mean that the i/o entry points need to check
659 * that the operation really does make sense.
660 */
661 int
662 procfs_access(v)
663 void *v;
664 {
665 struct vop_access_args /* {
666 struct vnode *a_vp;
667 int a_mode;
668 struct ucred *a_cred;
669 struct proc *a_p;
670 } */ *ap = v;
671 struct vattr va;
672 int error;
673
674 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) != 0)
675 return (error);
676
677 return (vaccess(va.va_type, va.va_mode,
678 va.va_uid, va.va_gid, ap->a_mode, ap->a_cred));
679 }
680
681 /*
682 * lookup. this is incredibly complicated in the
683 * general case, however for most pseudo-filesystems
684 * very little needs to be done.
685 *
686 * Locking isn't hard here, just poorly documented.
687 *
688 * If we're looking up ".", just vref the parent & return it.
689 *
690 * If we're looking up "..", unlock the parent, and lock "..". If everything
691 * went ok, and we're on the last component and the caller requested the
692 * parent locked, try to re-lock the parent. We do this to prevent lock
693 * races.
694 *
695 * For anything else, get the needed node. Then unlock the parent if not
696 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the
697 * parent in the .. case).
698 *
699 * We try to exit with the parent locked in error cases.
700 */
701 int
702 procfs_lookup(v)
703 void *v;
704 {
705 struct vop_lookup_args /* {
706 struct vnode * a_dvp;
707 struct vnode ** a_vpp;
708 struct componentname * a_cnp;
709 } */ *ap = v;
710 struct componentname *cnp = ap->a_cnp;
711 struct vnode **vpp = ap->a_vpp;
712 struct vnode *dvp = ap->a_dvp;
713 const char *pname = cnp->cn_nameptr;
714 const struct proc_target *pt = NULL;
715 struct vnode *fvp;
716 pid_t pid;
717 struct pfsnode *pfs;
718 struct proc *p = NULL;
719 int i, error, wantpunlock, iscurproc = 0, isself = 0;
720
721 *vpp = NULL;
722 cnp->cn_flags &= ~PDIRUNLOCK;
723
724 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
725 return (EROFS);
726
727 if (cnp->cn_namelen == 1 && *pname == '.') {
728 *vpp = dvp;
729 VREF(dvp);
730 return (0);
731 }
732
733 wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN));
734 pfs = VTOPFS(dvp);
735 switch (pfs->pfs_type) {
736 case Proot:
737 /*
738 * Shouldn't get here with .. in the root node.
739 */
740 if (cnp->cn_flags & ISDOTDOT)
741 return (EIO);
742
743 iscurproc = CNEQ(cnp, "curproc", 7);
744 isself = CNEQ(cnp, "self", 4);
745
746 if (iscurproc || isself) {
747 error = procfs_allocvp(dvp->v_mount, vpp, 0,
748 iscurproc ? Pcurproc : Pself);
749 if ((error == 0) && (wantpunlock)) {
750 VOP_UNLOCK(dvp, 0);
751 cnp->cn_flags |= PDIRUNLOCK;
752 }
753 return (error);
754 }
755
756 for (i = 0; i < nproc_root_targets; i++) {
757 pt = &proc_root_targets[i];
758 if (cnp->cn_namelen == pt->pt_namlen &&
759 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
760 (pt->pt_valid == NULL ||
761 (*pt->pt_valid)(p, dvp->v_mount)))
762 break;
763 }
764
765 if (i != nproc_root_targets) {
766 error = procfs_allocvp(dvp->v_mount, vpp, 0,
767 pt->pt_pfstype);
768 if ((error == 0) && (wantpunlock)) {
769 VOP_UNLOCK(dvp, 0);
770 cnp->cn_flags |= PDIRUNLOCK;
771 }
772 return (error);
773 }
774
775 pid = atopid(pname, cnp->cn_namelen);
776 if (pid == NO_PID)
777 break;
778
779 p = PFIND(pid);
780 if (p == 0)
781 break;
782
783 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc);
784 if ((error == 0) && (wantpunlock)) {
785 VOP_UNLOCK(dvp, 0);
786 cnp->cn_flags |= PDIRUNLOCK;
787 }
788 return (error);
789
790 case Pproc:
791 /*
792 * do the .. dance. We unlock the directory, and then
793 * get the root dir. That will automatically return ..
794 * locked. Then if the caller wanted dvp locked, we
795 * re-lock.
796 */
797 if (cnp->cn_flags & ISDOTDOT) {
798 VOP_UNLOCK(dvp, 0);
799 cnp->cn_flags |= PDIRUNLOCK;
800 error = procfs_root(dvp->v_mount, vpp);
801 if ((error == 0) && (wantpunlock == 0) &&
802 ((error = vn_lock(dvp, LK_EXCLUSIVE)) == 0))
803 cnp->cn_flags &= ~PDIRUNLOCK;
804 return (error);
805 }
806
807 p = PFIND(pfs->pfs_pid);
808 if (p == 0)
809 break;
810
811 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
812 if (cnp->cn_namelen == pt->pt_namlen &&
813 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
814 (pt->pt_valid == NULL ||
815 (*pt->pt_valid)(p, dvp->v_mount)))
816 goto found;
817 }
818 break;
819
820 found:
821 if (pt->pt_pfstype == Pfile) {
822 fvp = p->p_textvp;
823 /* We already checked that it exists. */
824 VREF(fvp);
825 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
826 if (wantpunlock) {
827 VOP_UNLOCK(dvp, 0);
828 cnp->cn_flags |= PDIRUNLOCK;
829 }
830 *vpp = fvp;
831 return (0);
832 }
833
834 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
835 pt->pt_pfstype);
836 if ((error == 0) && (wantpunlock)) {
837 VOP_UNLOCK(dvp, 0);
838 cnp->cn_flags |= PDIRUNLOCK;
839 }
840 return (error);
841
842 default:
843 return (ENOTDIR);
844 }
845
846 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
847 }
848
849 int
850 procfs_validfile(p, mp)
851 struct proc *p;
852 struct mount *mp;
853 {
854 return (p->p_textvp != NULL);
855 }
856
857 static int
858 procfs_validfile_linux(p, mp)
859 struct proc *p;
860 struct mount *mp;
861 {
862 int flags;
863
864 flags = VFSTOPROC(mp)->pmnt_flags;
865 return ((flags & PROCFSMNT_LINUXCOMPAT) &&
866 (p == NULL || procfs_validfile(p, mp)));
867 }
868
869 /*
870 * readdir returns directory entries from pfsnode (vp).
871 *
872 * the strategy here with procfs is to generate a single
873 * directory entry at a time (struct dirent) and then
874 * copy that out to userland using uiomove. a more efficent
875 * though more complex implementation, would try to minimize
876 * the number of calls to uiomove(). for procfs, this is
877 * hardly worth the added code complexity.
878 *
879 * this should just be done through read()
880 */
881 int
882 procfs_readdir(v)
883 void *v;
884 {
885 struct vop_readdir_args /* {
886 struct vnode *a_vp;
887 struct uio *a_uio;
888 struct ucred *a_cred;
889 int *a_eofflag;
890 off_t **a_cookies;
891 int *a_ncookies;
892 } */ *ap = v;
893 struct uio *uio = ap->a_uio;
894 struct dirent d;
895 struct pfsnode *pfs;
896 off_t i;
897 int error;
898 off_t *cookies = NULL;
899 int ncookies, left, skip, j;
900 struct vnode *vp;
901 const struct proc_target *pt;
902
903 vp = ap->a_vp;
904 pfs = VTOPFS(vp);
905
906 if (uio->uio_resid < UIO_MX)
907 return (EINVAL);
908 if (uio->uio_offset < 0)
909 return (EINVAL);
910
911 error = 0;
912 i = uio->uio_offset;
913 memset((caddr_t)&d, 0, UIO_MX);
914 d.d_reclen = UIO_MX;
915 ncookies = uio->uio_resid / UIO_MX;
916
917 switch (pfs->pfs_type) {
918 /*
919 * this is for the process-specific sub-directories.
920 * all that is needed to is copy out all the entries
921 * from the procent[] table (top of this file).
922 */
923 case Pproc: {
924 struct proc *p;
925
926 if (i >= nproc_targets)
927 return 0;
928
929 p = PFIND(pfs->pfs_pid);
930 if (p == NULL)
931 break;
932
933 if (ap->a_ncookies) {
934 ncookies = min(ncookies, (nproc_targets - i));
935 cookies = malloc(ncookies * sizeof (off_t),
936 M_TEMP, M_WAITOK);
937 *ap->a_cookies = cookies;
938 }
939
940 for (pt = &proc_targets[i];
941 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) {
942 if (pt->pt_valid &&
943 (*pt->pt_valid)(p, vp->v_mount) == 0)
944 continue;
945
946 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype);
947 d.d_namlen = pt->pt_namlen;
948 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
949 d.d_type = pt->pt_type;
950
951 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
952 break;
953 if (cookies)
954 *cookies++ = i + 1;
955 }
956
957 break;
958 }
959
960 /*
961 * this is for the root of the procfs filesystem
962 * what is needed are special entries for "curproc"
963 * and "self" followed by an entry for each process
964 * on allproc
965 #ifdef PROCFS_ZOMBIE
966 * and deadproc and zombproc.
967 #endif
968 */
969
970 case Proot: {
971 int pcnt = i, nc = 0;
972 const struct proclist_desc *pd;
973 volatile struct proc *p;
974
975 if (pcnt > 3)
976 pcnt = 3;
977 if (ap->a_ncookies) {
978 /*
979 * XXX Potentially allocating too much space here,
980 * but I'm lazy. This loop needs some work.
981 */
982 cookies = malloc(ncookies * sizeof (off_t),
983 M_TEMP, M_WAITOK);
984 *ap->a_cookies = cookies;
985 }
986 /*
987 * XXX: THIS LOOP ASSUMES THAT allproc IS THE FIRST
988 * PROCLIST IN THE proclists!
989 */
990 proclist_lock_read();
991 pd = proclists;
992 #ifdef PROCFS_ZOMBIE
993 again:
994 #endif
995 for (p = LIST_FIRST(pd->pd_list);
996 p != NULL && uio->uio_resid >= UIO_MX; i++, pcnt++) {
997 switch (i) {
998 case 0: /* `.' */
999 case 1: /* `..' */
1000 d.d_fileno = PROCFS_FILENO(0, Proot);
1001 d.d_namlen = i + 1;
1002 memcpy(d.d_name, "..", d.d_namlen);
1003 d.d_name[i + 1] = '\0';
1004 d.d_type = DT_DIR;
1005 break;
1006
1007 case 2:
1008 d.d_fileno = PROCFS_FILENO(0, Pcurproc);
1009 d.d_namlen = sizeof("curproc") - 1;
1010 memcpy(d.d_name, "curproc", sizeof("curproc"));
1011 d.d_type = DT_LNK;
1012 break;
1013
1014 case 3:
1015 d.d_fileno = PROCFS_FILENO(0, Pself);
1016 d.d_namlen = sizeof("self") - 1;
1017 memcpy(d.d_name, "self", sizeof("self"));
1018 d.d_type = DT_LNK;
1019 break;
1020
1021 default:
1022 while (pcnt < i) {
1023 pcnt++;
1024 p = LIST_NEXT(p, p_list);
1025 if (!p)
1026 goto done;
1027 }
1028 d.d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
1029 d.d_namlen = sprintf(d.d_name, "%ld",
1030 (long)p->p_pid);
1031 d.d_type = DT_DIR;
1032 p = p->p_list.le_next;
1033 break;
1034 }
1035
1036 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
1037 break;
1038 nc++;
1039 if (cookies)
1040 *cookies++ = i + 1;
1041 }
1042 done:
1043
1044 #ifdef PROCFS_ZOMBIE
1045 pd++;
1046 if (p == NULL && pd->pd_list != NULL)
1047 goto again;
1048 #endif
1049 proclist_unlock_read();
1050
1051 skip = i - pcnt;
1052 if (skip >= nproc_root_targets)
1053 break;
1054 left = nproc_root_targets - skip;
1055 for (j = 0, pt = &proc_root_targets[0];
1056 uio->uio_resid >= UIO_MX && j < left;
1057 pt++, j++, i++) {
1058 if (pt->pt_valid &&
1059 (*pt->pt_valid)(NULL, vp->v_mount) == 0)
1060 continue;
1061 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype);
1062 d.d_namlen = pt->pt_namlen;
1063 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1064 d.d_type = pt->pt_type;
1065
1066 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
1067 break;
1068 nc++;
1069 if (cookies)
1070 *cookies++ = i + 1;
1071 }
1072
1073 ncookies = nc;
1074 break;
1075 }
1076
1077 default:
1078 error = ENOTDIR;
1079 break;
1080 }
1081
1082 if (ap->a_ncookies) {
1083 if (error) {
1084 if (cookies)
1085 free(*ap->a_cookies, M_TEMP);
1086 *ap->a_ncookies = 0;
1087 *ap->a_cookies = NULL;
1088 } else
1089 *ap->a_ncookies = ncookies;
1090 }
1091 uio->uio_offset = i;
1092 return (error);
1093 }
1094
1095 /*
1096 * readlink reads the link of `curproc'
1097 */
1098 int
1099 procfs_readlink(v)
1100 void *v;
1101 {
1102 struct vop_readlink_args *ap = v;
1103 char buf[16]; /* should be enough */
1104 int len;
1105
1106 if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pcurproc))
1107 len = sprintf(buf, "%ld", (long)curproc->p_pid);
1108 else if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pself))
1109 len = sprintf(buf, "%s", "curproc");
1110 else
1111 return (EINVAL);
1112
1113 return (uiomove((caddr_t)buf, len, ap->a_uio));
1114 }
1115
1116 /*
1117 * convert decimal ascii to pid_t
1118 */
1119 static pid_t
1120 atopid(b, len)
1121 const char *b;
1122 u_int len;
1123 {
1124 pid_t p = 0;
1125
1126 while (len--) {
1127 char c = *b++;
1128 if (c < '0' || c > '9')
1129 return (NO_PID);
1130 p = 10 * p + (c - '0');
1131 if (p > PID_MAX)
1132 return (NO_PID);
1133 }
1134
1135 return (p);
1136 }
1137