procfs_vnops.c revision 1.70.4.2 1 /* $NetBSD: procfs_vnops.c,v 1.70.4.2 2002/01/14 10:59:32 he Exp $ */
2
3 /*
4 * Copyright (c) 1993 Jan-Simon Pendry
5 * Copyright (c) 1993, 1995
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
40 */
41
42 /*
43 * procfs vnode interface
44 */
45
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/time.h>
49 #include <sys/kernel.h>
50 #include <sys/file.h>
51 #include <sys/proc.h>
52 #include <sys/vnode.h>
53 #include <sys/namei.h>
54 #include <sys/malloc.h>
55 #include <sys/mount.h>
56 #include <sys/dirent.h>
57 #include <sys/resourcevar.h>
58 #include <sys/ptrace.h>
59 #include <sys/stat.h>
60
61 #include <vm/vm.h> /* for PAGE_SIZE */
62
63 #include <machine/reg.h>
64
65 #include <miscfs/genfs/genfs.h>
66 #include <miscfs/procfs/procfs.h>
67
68 /*
69 * Vnode Operations.
70 *
71 */
72
73 static int procfs_validfile_linux __P((struct proc *, struct mount *));
74
75 /*
76 * This is a list of the valid names in the
77 * process-specific sub-directories. It is
78 * used in procfs_lookup and procfs_readdir
79 */
80 struct proc_target {
81 u_char pt_type;
82 u_char pt_namlen;
83 char *pt_name;
84 pfstype pt_pfstype;
85 int (*pt_valid) __P((struct proc *, struct mount *));
86 } proc_targets[] = {
87 #define N(s) sizeof(s)-1, s
88 /* name type validp */
89 { DT_DIR, N("."), Pproc, NULL },
90 { DT_DIR, N(".."), Proot, NULL },
91 { DT_REG, N("file"), Pfile, procfs_validfile },
92 { DT_REG, N("mem"), Pmem, NULL },
93 { DT_REG, N("regs"), Pregs, procfs_validregs },
94 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs },
95 { DT_REG, N("ctl"), Pctl, NULL },
96 { DT_REG, N("status"), Pstatus, NULL },
97 { DT_REG, N("note"), Pnote, NULL },
98 { DT_REG, N("notepg"), Pnotepg, NULL },
99 { DT_REG, N("map"), Pmap, procfs_validmap },
100 { DT_REG, N("cmdline"), Pcmdline, NULL },
101 { DT_REG, N("exe"), Pfile, procfs_validfile_linux },
102 #undef N
103 };
104 static int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
105
106 /*
107 * List of files in the root directory. Note: the validate function will
108 * be called with p == NULL for these ones.
109 */
110 struct proc_target proc_root_targets[] = {
111 #define N(s) sizeof(s)-1, s
112 /* name type validp */
113 { DT_REG, N("meminfo"), Pmeminfo, procfs_validfile_linux },
114 { DT_REG, N("cpuinfo"), Pcpuinfo, procfs_validfile_linux },
115 #undef N
116 };
117 static int nproc_root_targets =
118 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]);
119
120
121
122 int procfs_lookup __P((void *));
123 #define procfs_create genfs_eopnotsupp_rele
124 #define procfs_mknod genfs_eopnotsupp_rele
125 int procfs_open __P((void *));
126 int procfs_close __P((void *));
127 int procfs_access __P((void *));
128 int procfs_getattr __P((void *));
129 int procfs_setattr __P((void *));
130 #define procfs_read procfs_rw
131 #define procfs_write procfs_rw
132 #define procfs_fcntl genfs_fcntl
133 #define procfs_ioctl genfs_enoioctl
134 #define procfs_poll genfs_poll
135 #define procfs_revoke genfs_revoke
136 #define procfs_mmap genfs_eopnotsupp
137 #define procfs_fsync genfs_nullop
138 #define procfs_seek genfs_nullop
139 #define procfs_remove genfs_eopnotsupp_rele
140 int procfs_link __P((void *));
141 #define procfs_rename genfs_eopnotsupp_rele
142 #define procfs_mkdir genfs_eopnotsupp_rele
143 #define procfs_rmdir genfs_eopnotsupp_rele
144 int procfs_symlink __P((void *));
145 int procfs_readdir __P((void *));
146 int procfs_readlink __P((void *));
147 #define procfs_abortop genfs_abortop
148 int procfs_inactive __P((void *));
149 int procfs_reclaim __P((void *));
150 #define procfs_lock genfs_lock
151 #define procfs_unlock genfs_unlock
152 int procfs_bmap __P((void *));
153 #define procfs_strategy genfs_badop
154 int procfs_print __P((void *));
155 int procfs_pathconf __P((void *));
156 #define procfs_islocked genfs_islocked
157 #define procfs_advlock genfs_einval
158 #define procfs_blkatoff genfs_eopnotsupp
159 #define procfs_valloc genfs_eopnotsupp
160 #define procfs_vfree genfs_nullop
161 #define procfs_truncate genfs_eopnotsupp
162 #define procfs_update genfs_nullop
163 #define procfs_bwrite genfs_eopnotsupp
164
165 static pid_t atopid __P((const char *, u_int));
166
167 /*
168 * procfs vnode operations.
169 */
170 int (**procfs_vnodeop_p) __P((void *));
171 struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
172 { &vop_default_desc, vn_default_error },
173 { &vop_lookup_desc, procfs_lookup }, /* lookup */
174 { &vop_create_desc, procfs_create }, /* create */
175 { &vop_mknod_desc, procfs_mknod }, /* mknod */
176 { &vop_open_desc, procfs_open }, /* open */
177 { &vop_close_desc, procfs_close }, /* close */
178 { &vop_access_desc, procfs_access }, /* access */
179 { &vop_getattr_desc, procfs_getattr }, /* getattr */
180 { &vop_setattr_desc, procfs_setattr }, /* setattr */
181 { &vop_read_desc, procfs_read }, /* read */
182 { &vop_write_desc, procfs_write }, /* write */
183 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */
184 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */
185 { &vop_poll_desc, procfs_poll }, /* poll */
186 { &vop_revoke_desc, procfs_revoke }, /* revoke */
187 { &vop_mmap_desc, procfs_mmap }, /* mmap */
188 { &vop_fsync_desc, procfs_fsync }, /* fsync */
189 { &vop_seek_desc, procfs_seek }, /* seek */
190 { &vop_remove_desc, procfs_remove }, /* remove */
191 { &vop_link_desc, procfs_link }, /* link */
192 { &vop_rename_desc, procfs_rename }, /* rename */
193 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */
194 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */
195 { &vop_symlink_desc, procfs_symlink }, /* symlink */
196 { &vop_readdir_desc, procfs_readdir }, /* readdir */
197 { &vop_readlink_desc, procfs_readlink }, /* readlink */
198 { &vop_abortop_desc, procfs_abortop }, /* abortop */
199 { &vop_inactive_desc, procfs_inactive }, /* inactive */
200 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */
201 { &vop_lock_desc, procfs_lock }, /* lock */
202 { &vop_unlock_desc, procfs_unlock }, /* unlock */
203 { &vop_bmap_desc, procfs_bmap }, /* bmap */
204 { &vop_strategy_desc, procfs_strategy }, /* strategy */
205 { &vop_print_desc, procfs_print }, /* print */
206 { &vop_islocked_desc, procfs_islocked }, /* islocked */
207 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */
208 { &vop_advlock_desc, procfs_advlock }, /* advlock */
209 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */
210 { &vop_valloc_desc, procfs_valloc }, /* valloc */
211 { &vop_vfree_desc, procfs_vfree }, /* vfree */
212 { &vop_truncate_desc, procfs_truncate }, /* truncate */
213 { &vop_update_desc, procfs_update }, /* update */
214 { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
215 };
216 struct vnodeopv_desc procfs_vnodeop_opv_desc =
217 { &procfs_vnodeop_p, procfs_vnodeop_entries };
218 /*
219 * set things up for doing i/o on
220 * the pfsnode (vp). (vp) is locked
221 * on entry, and should be left locked
222 * on exit.
223 *
224 * for procfs we don't need to do anything
225 * in particular for i/o. all that is done
226 * is to support exclusive open on process
227 * memory images.
228 */
229 int
230 procfs_open(v)
231 void *v;
232 {
233 struct vop_open_args /* {
234 struct vnode *a_vp;
235 int a_mode;
236 struct ucred *a_cred;
237 struct proc *a_p;
238 } */ *ap = v;
239 struct pfsnode *pfs = VTOPFS(ap->a_vp);
240 struct proc *p1, *p2;
241 int error;
242
243 p1 = ap->a_p; /* tracer */
244 p2 = PFIND(pfs->pfs_pid); /* traced */
245
246 if (p2 == NULL)
247 return (ENOENT); /* was ESRCH, jsp */
248
249 switch (pfs->pfs_type) {
250 case Pmem:
251 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
252 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)))
253 return (EBUSY);
254
255 if ((error = procfs_checkioperm(p1, p2)) != 0)
256 return (error);
257
258 if (ap->a_mode & FWRITE)
259 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
260
261 return (0);
262
263 default:
264 break;
265 }
266
267 return (0);
268 }
269
270 /*
271 * close the pfsnode (vp) after doing i/o.
272 * (vp) is not locked on entry or exit.
273 *
274 * nothing to do for procfs other than undo
275 * any exclusive open flag (see _open above).
276 */
277 int
278 procfs_close(v)
279 void *v;
280 {
281 struct vop_close_args /* {
282 struct vnode *a_vp;
283 int a_fflag;
284 struct ucred *a_cred;
285 struct proc *a_p;
286 } */ *ap = v;
287 struct pfsnode *pfs = VTOPFS(ap->a_vp);
288
289 switch (pfs->pfs_type) {
290 case Pmem:
291 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
292 pfs->pfs_flags &= ~(FWRITE|O_EXCL);
293 break;
294
295 default:
296 break;
297 }
298
299 return (0);
300 }
301
302 /*
303 * do block mapping for pfsnode (vp).
304 * since we don't use the buffer cache
305 * for procfs this function should never
306 * be called. in any case, it's not clear
307 * what part of the kernel ever makes use
308 * of this function. for sanity, this is the
309 * usual no-op bmap, although returning
310 * (EIO) would be a reasonable alternative.
311 */
312 int
313 procfs_bmap(v)
314 void *v;
315 {
316 struct vop_bmap_args /* {
317 struct vnode *a_vp;
318 daddr_t a_bn;
319 struct vnode **a_vpp;
320 daddr_t *a_bnp;
321 int * a_runp;
322 } */ *ap = v;
323
324 if (ap->a_vpp != NULL)
325 *ap->a_vpp = ap->a_vp;
326 if (ap->a_bnp != NULL)
327 *ap->a_bnp = ap->a_bn;
328 if (ap->a_runp != NULL)
329 *ap->a_runp = 0;
330 return (0);
331 }
332
333 /*
334 * _inactive is called when the pfsnode
335 * is vrele'd and the reference count goes
336 * to zero. (vp) will be on the vnode free
337 * list, so to get it back vget() must be
338 * used.
339 *
340 * for procfs, check if the process is still
341 * alive and if it isn't then just throw away
342 * the vnode by calling vgone(). this may
343 * be overkill and a waste of time since the
344 * chances are that the process will still be
345 * there and PFIND is not free.
346 *
347 * (vp) is locked on entry, but must be unlocked on exit.
348 */
349 int
350 procfs_inactive(v)
351 void *v;
352 {
353 struct vop_inactive_args /* {
354 struct vnode *a_vp;
355 struct proc *a_p;
356 } */ *ap = v;
357 struct pfsnode *pfs = VTOPFS(ap->a_vp);
358
359 VOP_UNLOCK(ap->a_vp, 0);
360 if (PFIND(pfs->pfs_pid) == 0)
361 vgone(ap->a_vp);
362
363 return (0);
364 }
365
366 /*
367 * _reclaim is called when getnewvnode()
368 * wants to make use of an entry on the vnode
369 * free list. at this time the filesystem needs
370 * to free any private data and remove the node
371 * from any private lists.
372 */
373 int
374 procfs_reclaim(v)
375 void *v;
376 {
377 struct vop_reclaim_args /* {
378 struct vnode *a_vp;
379 } */ *ap = v;
380
381 return (procfs_freevp(ap->a_vp));
382 }
383
384 /*
385 * Return POSIX pathconf information applicable to special devices.
386 */
387 int
388 procfs_pathconf(v)
389 void *v;
390 {
391 struct vop_pathconf_args /* {
392 struct vnode *a_vp;
393 int a_name;
394 register_t *a_retval;
395 } */ *ap = v;
396
397 switch (ap->a_name) {
398 case _PC_LINK_MAX:
399 *ap->a_retval = LINK_MAX;
400 return (0);
401 case _PC_MAX_CANON:
402 *ap->a_retval = MAX_CANON;
403 return (0);
404 case _PC_MAX_INPUT:
405 *ap->a_retval = MAX_INPUT;
406 return (0);
407 case _PC_PIPE_BUF:
408 *ap->a_retval = PIPE_BUF;
409 return (0);
410 case _PC_CHOWN_RESTRICTED:
411 *ap->a_retval = 1;
412 return (0);
413 case _PC_VDISABLE:
414 *ap->a_retval = _POSIX_VDISABLE;
415 return (0);
416 case _PC_SYNC_IO:
417 *ap->a_retval = 1;
418 return (0);
419 default:
420 return (EINVAL);
421 }
422 /* NOTREACHED */
423 }
424
425 /*
426 * _print is used for debugging.
427 * just print a readable description
428 * of (vp).
429 */
430 int
431 procfs_print(v)
432 void *v;
433 {
434 struct vop_print_args /* {
435 struct vnode *a_vp;
436 } */ *ap = v;
437 struct pfsnode *pfs = VTOPFS(ap->a_vp);
438
439 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n",
440 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
441 return 0;
442 }
443
444 int
445 procfs_link(v)
446 void *v;
447 {
448 struct vop_link_args /* {
449 struct vnode *a_dvp;
450 struct vnode *a_vp;
451 struct componentname *a_cnp;
452 } */ *ap = v;
453
454 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
455 vput(ap->a_dvp);
456 return (EROFS);
457 }
458
459 int
460 procfs_symlink(v)
461 void *v;
462 {
463 struct vop_symlink_args /* {
464 struct vnode *a_dvp;
465 struct vnode **a_vpp;
466 struct componentname *a_cnp;
467 struct vattr *a_vap;
468 char *a_target;
469 } */ *ap = v;
470
471 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
472 vput(ap->a_dvp);
473 return (EROFS);
474 }
475
476 /*
477 * Invent attributes for pfsnode (vp) and store
478 * them in (vap).
479 * Directories lengths are returned as zero since
480 * any real length would require the genuine size
481 * to be computed, and nothing cares anyway.
482 *
483 * this is relatively minimal for procfs.
484 */
485 int
486 procfs_getattr(v)
487 void *v;
488 {
489 struct vop_getattr_args /* {
490 struct vnode *a_vp;
491 struct vattr *a_vap;
492 struct ucred *a_cred;
493 struct proc *a_p;
494 } */ *ap = v;
495 struct pfsnode *pfs = VTOPFS(ap->a_vp);
496 struct vattr *vap = ap->a_vap;
497 struct proc *procp;
498 struct timeval tv;
499 int error;
500
501 /* first check the process still exists */
502 switch (pfs->pfs_type) {
503 case Proot:
504 case Pcurproc:
505 case Pself:
506 procp = 0;
507 break;
508
509 default:
510 procp = PFIND(pfs->pfs_pid);
511 if (procp == 0)
512 return (ENOENT);
513 break;
514 }
515
516 error = 0;
517
518 /* start by zeroing out the attributes */
519 VATTR_NULL(vap);
520
521 /* next do all the common fields */
522 vap->va_type = ap->a_vp->v_type;
523 vap->va_mode = pfs->pfs_mode;
524 vap->va_fileid = pfs->pfs_fileno;
525 vap->va_flags = 0;
526 vap->va_blocksize = PAGE_SIZE;
527
528 /*
529 * Make all times be current TOD.
530 * It would be possible to get the process start
531 * time from the p_stat structure, but there's
532 * no "file creation" time stamp anyway, and the
533 * p_stat structure is not addressible if u. gets
534 * swapped out for that process.
535 */
536 microtime(&tv);
537 TIMEVAL_TO_TIMESPEC(&tv, &vap->va_ctime);
538 vap->va_atime = vap->va_mtime = vap->va_ctime;
539
540 switch (pfs->pfs_type) {
541 case Pmem:
542 case Pregs:
543 case Pfpregs:
544 /*
545 * If the process has exercised some setuid or setgid
546 * privilege, then rip away read/write permission so
547 * that only root can gain access.
548 */
549 if (procp->p_flag & P_SUGID)
550 vap->va_mode &= ~(S_IRUSR|S_IWUSR);
551 /* FALLTHROUGH */
552 case Pctl:
553 case Pstatus:
554 case Pnote:
555 case Pnotepg:
556 case Pmap:
557 case Pcmdline:
558 vap->va_nlink = 1;
559 vap->va_uid = procp->p_ucred->cr_uid;
560 vap->va_gid = procp->p_ucred->cr_gid;
561 break;
562 case Pmeminfo:
563 case Pcpuinfo:
564 vap->va_nlink = 1;
565 vap->va_uid = vap->va_gid = 0;
566 break;
567
568 default:
569 break;
570 }
571
572 /*
573 * now do the object specific fields
574 *
575 * The size could be set from struct reg, but it's hardly
576 * worth the trouble, and it puts some (potentially) machine
577 * dependent data into this machine-independent code. If it
578 * becomes important then this function should break out into
579 * a per-file stat function in the corresponding .c file.
580 */
581
582 switch (pfs->pfs_type) {
583 case Proot:
584 /*
585 * Set nlink to 1 to tell fts(3) we don't actually know.
586 */
587 vap->va_nlink = 1;
588 vap->va_uid = 0;
589 vap->va_gid = 0;
590 vap->va_bytes = vap->va_size = DEV_BSIZE;
591 break;
592
593 case Pcurproc: {
594 char buf[16]; /* should be enough */
595 vap->va_nlink = 1;
596 vap->va_uid = 0;
597 vap->va_gid = 0;
598 vap->va_bytes = vap->va_size =
599 sprintf(buf, "%ld", (long)curproc->p_pid);
600 break;
601 }
602
603 case Pself:
604 vap->va_nlink = 1;
605 vap->va_uid = 0;
606 vap->va_gid = 0;
607 vap->va_bytes = vap->va_size = sizeof("curproc");
608 break;
609
610 case Pproc:
611 vap->va_nlink = 2;
612 vap->va_uid = procp->p_ucred->cr_uid;
613 vap->va_gid = procp->p_ucred->cr_gid;
614 vap->va_bytes = vap->va_size = DEV_BSIZE;
615 break;
616
617 case Pfile:
618 error = EOPNOTSUPP;
619 break;
620
621 case Pmem:
622 vap->va_bytes = vap->va_size =
623 ctob(procp->p_vmspace->vm_tsize +
624 procp->p_vmspace->vm_dsize +
625 procp->p_vmspace->vm_ssize);
626 break;
627
628 #if defined(PT_GETREGS) || defined(PT_SETREGS)
629 case Pregs:
630 vap->va_bytes = vap->va_size = sizeof(struct reg);
631 break;
632 #endif
633
634 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS)
635 case Pfpregs:
636 vap->va_bytes = vap->va_size = sizeof(struct fpreg);
637 break;
638 #endif
639
640 case Pctl:
641 case Pstatus:
642 case Pnote:
643 case Pnotepg:
644 case Pmap:
645 case Pcmdline:
646 case Pmeminfo:
647 case Pcpuinfo:
648 vap->va_bytes = vap->va_size = 0;
649 break;
650
651 default:
652 panic("procfs_getattr");
653 }
654
655 return (error);
656 }
657
658 /*ARGSUSED*/
659 int
660 procfs_setattr(v)
661 void *v;
662 {
663 /*
664 * just fake out attribute setting
665 * it's not good to generate an error
666 * return, otherwise things like creat()
667 * will fail when they try to set the
668 * file length to 0. worse, this means
669 * that echo $note > /proc/$pid/note will fail.
670 */
671
672 return (0);
673 }
674
675 /*
676 * implement access checking.
677 *
678 * actually, the check for super-user is slightly
679 * broken since it will allow read access to write-only
680 * objects. this doesn't cause any particular trouble
681 * but does mean that the i/o entry points need to check
682 * that the operation really does make sense.
683 */
684 int
685 procfs_access(v)
686 void *v;
687 {
688 struct vop_access_args /* {
689 struct vnode *a_vp;
690 int a_mode;
691 struct ucred *a_cred;
692 struct proc *a_p;
693 } */ *ap = v;
694 struct vattr va;
695 int error;
696
697 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) != 0)
698 return (error);
699
700 return (vaccess(va.va_type, va.va_mode,
701 va.va_uid, va.va_gid, ap->a_mode, ap->a_cred));
702 }
703
704 /*
705 * lookup. this is incredibly complicated in the
706 * general case, however for most pseudo-filesystems
707 * very little needs to be done.
708 *
709 * Locking isn't hard here, just poorly documented.
710 *
711 * If we're looking up ".", just vref the parent & return it.
712 *
713 * If we're looking up "..", unlock the parent, and lock "..". If everything
714 * went ok, and we're on the last component and the caller requested the
715 * parent locked, try to re-lock the parent. We do this to prevent lock
716 * races.
717 *
718 * For anything else, get the needed node. Then unlock the parent if not
719 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the
720 * parent in the .. case).
721 *
722 * We try to exit with the parent locked in error cases.
723 */
724 int
725 procfs_lookup(v)
726 void *v;
727 {
728 struct vop_lookup_args /* {
729 struct vnode * a_dvp;
730 struct vnode ** a_vpp;
731 struct componentname * a_cnp;
732 } */ *ap = v;
733 struct componentname *cnp = ap->a_cnp;
734 struct vnode **vpp = ap->a_vpp;
735 struct vnode *dvp = ap->a_dvp;
736 const char *pname = cnp->cn_nameptr;
737 struct proc_target *pt = NULL;
738 struct vnode *fvp;
739 pid_t pid;
740 struct pfsnode *pfs;
741 struct proc *p = NULL;
742 int i, error, wantpunlock, iscurproc = 0, isself = 0;
743
744 *vpp = NULL;
745 cnp->cn_flags &= ~PDIRUNLOCK;
746
747 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
748 return (EROFS);
749
750 if (cnp->cn_namelen == 1 && *pname == '.') {
751 *vpp = dvp;
752 VREF(dvp);
753 return (0);
754 }
755
756 wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN));
757 pfs = VTOPFS(dvp);
758 switch (pfs->pfs_type) {
759 case Proot:
760 /*
761 * Shouldn't get here with .. in the root node.
762 */
763 if (cnp->cn_flags & ISDOTDOT)
764 return (EIO);
765
766 iscurproc = CNEQ(cnp, "curproc", 7);
767 isself = CNEQ(cnp, "self", 4);
768
769 if (iscurproc || isself) {
770 error = procfs_allocvp(dvp->v_mount, vpp, 0,
771 iscurproc ? Pcurproc : Pself);
772 if ((error == 0) && (wantpunlock)) {
773 VOP_UNLOCK(dvp, 0);
774 cnp->cn_flags |= PDIRUNLOCK;
775 }
776 return (error);
777 }
778
779 for (i = 0; i < nproc_root_targets; i++) {
780 pt = &proc_root_targets[i];
781 if (cnp->cn_namelen == pt->pt_namlen &&
782 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
783 (pt->pt_valid == NULL ||
784 (*pt->pt_valid)(p, dvp->v_mount)))
785 break;
786 }
787
788 if (i != nproc_root_targets) {
789 error = procfs_allocvp(dvp->v_mount, vpp, 0,
790 pt->pt_pfstype);
791 if ((error == 0) && (wantpunlock)) {
792 VOP_UNLOCK(dvp, 0);
793 cnp->cn_flags |= PDIRUNLOCK;
794 }
795 return (error);
796 }
797
798 pid = atopid(pname, cnp->cn_namelen);
799 if (pid == NO_PID)
800 break;
801
802 p = PFIND(pid);
803 if (p == 0)
804 break;
805
806 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc);
807 if ((error == 0) && (wantpunlock)) {
808 VOP_UNLOCK(dvp, 0);
809 cnp->cn_flags |= PDIRUNLOCK;
810 }
811 return (error);
812
813 case Pproc:
814 /*
815 * do the .. dance. We unlock the directory, and then
816 * get the root dir. That will automatically return ..
817 * locked. Then if the caller wanted dvp locked, we
818 * re-lock.
819 */
820 if (cnp->cn_flags & ISDOTDOT) {
821 VOP_UNLOCK(dvp, 0);
822 cnp->cn_flags |= PDIRUNLOCK;
823 error = procfs_root(dvp->v_mount, vpp);
824 if ((error == 0) && (wantpunlock == 0) &&
825 ((error = vn_lock(dvp, LK_EXCLUSIVE)) == 0))
826 cnp->cn_flags &= ~PDIRUNLOCK;
827 return (error);
828 }
829
830 p = PFIND(pfs->pfs_pid);
831 if (p == 0)
832 break;
833
834 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
835 if (cnp->cn_namelen == pt->pt_namlen &&
836 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
837 (pt->pt_valid == NULL ||
838 (*pt->pt_valid)(p, dvp->v_mount)))
839 goto found;
840 }
841 break;
842
843 found:
844 if (pt->pt_pfstype == Pfile) {
845 fvp = p->p_textvp;
846 /* We already checked that it exists. */
847 VREF(fvp);
848 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
849 if (wantpunlock) {
850 VOP_UNLOCK(dvp, 0);
851 cnp->cn_flags |= PDIRUNLOCK;
852 }
853 *vpp = fvp;
854 return (0);
855 }
856
857 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
858 pt->pt_pfstype);
859 if ((error == 0) && (wantpunlock)) {
860 VOP_UNLOCK(dvp, 0);
861 cnp->cn_flags |= PDIRUNLOCK;
862 }
863 return (error);
864
865 default:
866 return (ENOTDIR);
867 }
868
869 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
870 }
871
872 int
873 procfs_validfile(p, mp)
874 struct proc *p;
875 struct mount *mp;
876 {
877 return (p->p_textvp != NULL);
878 }
879
880 static int
881 procfs_validfile_linux(p, mp)
882 struct proc *p;
883 struct mount *mp;
884 {
885 int flags;
886
887 flags = VFSTOPROC(mp)->pmnt_flags;
888 return ((flags & PROCFSMNT_LINUXCOMPAT) &&
889 (p == NULL || procfs_validfile(p, mp)));
890 }
891
892 /*
893 * readdir returns directory entries from pfsnode (vp).
894 *
895 * the strategy here with procfs is to generate a single
896 * directory entry at a time (struct dirent) and then
897 * copy that out to userland using uiomove. a more efficent
898 * though more complex implementation, would try to minimize
899 * the number of calls to uiomove(). for procfs, this is
900 * hardly worth the added code complexity.
901 *
902 * this should just be done through read()
903 */
904 int
905 procfs_readdir(v)
906 void *v;
907 {
908 struct vop_readdir_args /* {
909 struct vnode *a_vp;
910 struct uio *a_uio;
911 struct ucred *a_cred;
912 int *a_eofflag;
913 off_t **a_cookies;
914 int *a_ncookies;
915 } */ *ap = v;
916 struct uio *uio = ap->a_uio;
917 struct dirent d;
918 struct pfsnode *pfs;
919 off_t i;
920 int error;
921 off_t *cookies = NULL;
922 int ncookies, left, skip, j;
923 struct vnode *vp;
924 struct proc_target *pt;
925
926 vp = ap->a_vp;
927 pfs = VTOPFS(vp);
928
929 if (uio->uio_resid < UIO_MX)
930 return (EINVAL);
931 if (uio->uio_offset < 0)
932 return (EINVAL);
933
934 error = 0;
935 i = uio->uio_offset;
936 memset((caddr_t)&d, 0, UIO_MX);
937 d.d_reclen = UIO_MX;
938 ncookies = uio->uio_resid / UIO_MX;
939
940 switch (pfs->pfs_type) {
941 /*
942 * this is for the process-specific sub-directories.
943 * all that is needed to is copy out all the entries
944 * from the procent[] table (top of this file).
945 */
946 case Pproc: {
947 struct proc *p;
948
949 if (i >= nproc_targets)
950 return 0;
951
952 p = PFIND(pfs->pfs_pid);
953 if (p == NULL)
954 break;
955
956 if (ap->a_ncookies) {
957 ncookies = min(ncookies, (nproc_targets - i));
958 MALLOC(cookies, off_t *, ncookies * sizeof (off_t),
959 M_TEMP, M_WAITOK);
960 *ap->a_cookies = cookies;
961 }
962
963 for (pt = &proc_targets[i];
964 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) {
965 if (pt->pt_valid &&
966 (*pt->pt_valid)(p, vp->v_mount) == 0)
967 continue;
968
969 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype);
970 d.d_namlen = pt->pt_namlen;
971 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
972 d.d_type = pt->pt_type;
973
974 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
975 break;
976 if (cookies)
977 *cookies++ = i + 1;
978 }
979
980 break;
981 }
982
983 /*
984 * this is for the root of the procfs filesystem
985 * what is needed are special entries for "curproc"
986 * and "self" followed by an entry for each process
987 * on allproc
988 #ifdef PROCFS_ZOMBIE
989 * and deadproc and zombproc.
990 #endif
991 */
992
993 case Proot: {
994 int pcnt = i, nc = 0;
995 const struct proclist_desc *pd;
996 volatile struct proc *p;
997
998 if (pcnt > 3)
999 pcnt = 3;
1000 if (ap->a_ncookies) {
1001 /*
1002 * XXX Potentially allocating too much space here,
1003 * but I'm lazy. This loop needs some work.
1004 */
1005 MALLOC(cookies, off_t *, ncookies * sizeof (off_t),
1006 M_TEMP, M_WAITOK);
1007 *ap->a_cookies = cookies;
1008 }
1009 /*
1010 * XXX: THIS LOOP ASSUMES THAT allproc IS THE FIRST
1011 * PROCLIST IN THE proclists!
1012 */
1013 proclist_lock_read();
1014 pd = proclists;
1015 #ifdef PROCFS_ZOMBIE
1016 again:
1017 #endif
1018 for (p = LIST_FIRST(pd->pd_list);
1019 p != NULL && uio->uio_resid >= UIO_MX; i++, pcnt++) {
1020 switch (i) {
1021 case 0: /* `.' */
1022 case 1: /* `..' */
1023 d.d_fileno = PROCFS_FILENO(0, Proot);
1024 d.d_namlen = i + 1;
1025 memcpy(d.d_name, "..", d.d_namlen);
1026 d.d_name[i + 1] = '\0';
1027 d.d_type = DT_DIR;
1028 break;
1029
1030 case 2:
1031 d.d_fileno = PROCFS_FILENO(0, Pcurproc);
1032 d.d_namlen = sizeof("curproc") - 1;
1033 memcpy(d.d_name, "curproc", sizeof("curproc"));
1034 d.d_type = DT_LNK;
1035 break;
1036
1037 case 3:
1038 d.d_fileno = PROCFS_FILENO(0, Pself);
1039 d.d_namlen = sizeof("self") - 1;
1040 memcpy(d.d_name, "self", sizeof("self"));
1041 d.d_type = DT_LNK;
1042 break;
1043
1044 default:
1045 while (pcnt < i) {
1046 pcnt++;
1047 p = LIST_NEXT(p, p_list);
1048 if (!p)
1049 goto done;
1050 }
1051 d.d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
1052 d.d_namlen = sprintf(d.d_name, "%ld",
1053 (long)p->p_pid);
1054 d.d_type = DT_REG;
1055 p = p->p_list.le_next;
1056 break;
1057 }
1058
1059 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
1060 break;
1061 nc++;
1062 if (cookies)
1063 *cookies++ = i + 1;
1064 }
1065 done:
1066
1067 #ifdef PROCFS_ZOMBIE
1068 pd++;
1069 if (p == NULL && pd->pd_list != NULL)
1070 goto again;
1071 #endif
1072 proclist_unlock_read();
1073
1074 skip = i - pcnt;
1075 if (skip >= nproc_root_targets)
1076 break;
1077 left = nproc_root_targets - skip;
1078 for (j = 0, pt = &proc_root_targets[0];
1079 uio->uio_resid >= UIO_MX && j < left;
1080 pt++, j++, i++) {
1081 if (pt->pt_valid &&
1082 (*pt->pt_valid)(NULL, vp->v_mount) == 0)
1083 continue;
1084 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype);
1085 d.d_namlen = pt->pt_namlen;
1086 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1087 d.d_type = pt->pt_type;
1088
1089 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
1090 break;
1091 nc++;
1092 if (cookies)
1093 *cookies++ = i + 1;
1094 }
1095
1096 ncookies = nc;
1097 break;
1098 }
1099
1100 default:
1101 error = ENOTDIR;
1102 break;
1103 }
1104
1105 if (ap->a_ncookies) {
1106 if (error) {
1107 if (cookies)
1108 FREE(*ap->a_cookies, M_TEMP);
1109 *ap->a_ncookies = 0;
1110 *ap->a_cookies = NULL;
1111 } else
1112 *ap->a_ncookies = ncookies;
1113 }
1114 uio->uio_offset = i;
1115 return (error);
1116 }
1117
1118 /*
1119 * readlink reads the link of `curproc'
1120 */
1121 int
1122 procfs_readlink(v)
1123 void *v;
1124 {
1125 struct vop_readlink_args *ap = v;
1126 char buf[16]; /* should be enough */
1127 int len;
1128
1129 if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pcurproc))
1130 len = sprintf(buf, "%ld", (long)curproc->p_pid);
1131 else if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pself))
1132 len = sprintf(buf, "%s", "curproc");
1133 else
1134 return (EINVAL);
1135
1136 return (uiomove((caddr_t)buf, len, ap->a_uio));
1137 }
1138
1139 /*
1140 * convert decimal ascii to pid_t
1141 */
1142 static pid_t
1143 atopid(b, len)
1144 const char *b;
1145 u_int len;
1146 {
1147 pid_t p = 0;
1148
1149 while (len--) {
1150 char c = *b++;
1151 if (c < '0' || c > '9')
1152 return (NO_PID);
1153 p = 10 * p + (c - '0');
1154 if (p > PID_MAX)
1155 return (NO_PID);
1156 }
1157
1158 return (p);
1159 }
1160