procfs_vnops.c revision 1.106.2.1 1 /* $NetBSD: procfs_vnops.c,v 1.106.2.1 2003/07/02 15:26:54 darrenr Exp $ */
2
3 /*
4 * Copyright (c) 1993 Jan-Simon Pendry
5 * Copyright (c) 1993, 1995
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
40 */
41
42 /*
43 * procfs vnode interface
44 */
45
46 #include <sys/cdefs.h>
47 __KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.106.2.1 2003/07/02 15:26:54 darrenr Exp $");
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/time.h>
52 #include <sys/kernel.h>
53 #include <sys/file.h>
54 #include <sys/filedesc.h>
55 #include <sys/proc.h>
56 #include <sys/vnode.h>
57 #include <sys/namei.h>
58 #include <sys/malloc.h>
59 #include <sys/mount.h>
60 #include <sys/dirent.h>
61 #include <sys/resourcevar.h>
62 #include <sys/stat.h>
63 #include <sys/ptrace.h>
64
65 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */
66
67 #include <machine/reg.h>
68
69 #include <miscfs/genfs/genfs.h>
70 #include <miscfs/procfs/procfs.h>
71
72 /*
73 * Vnode Operations.
74 *
75 */
76
77 static int procfs_validfile_linux __P((struct lwp *, struct mount *));
78
79 /*
80 * This is a list of the valid names in the
81 * process-specific sub-directories. It is
82 * used in procfs_lookup and procfs_readdir
83 */
84 static const struct proc_target {
85 u_char pt_type;
86 u_char pt_namlen;
87 const char *pt_name;
88 pfstype pt_pfstype;
89 int (*pt_valid) __P((struct lwp *, struct mount *));
90 } proc_targets[] = {
91 #define N(s) sizeof(s)-1, s
92 /* name type validp */
93 { DT_DIR, N("."), Pproc, NULL },
94 { DT_DIR, N(".."), Proot, NULL },
95 { DT_DIR, N("fd"), Pfd, NULL },
96 { DT_REG, N("file"), Pfile, procfs_validfile },
97 { DT_REG, N("mem"), Pmem, NULL },
98 { DT_REG, N("regs"), Pregs, procfs_validregs },
99 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs },
100 { DT_REG, N("ctl"), Pctl, NULL },
101 { DT_REG, N("stat"), Pstat, procfs_validfile_linux },
102 { DT_REG, N("status"), Pstatus, NULL },
103 { DT_REG, N("note"), Pnote, NULL },
104 { DT_REG, N("notepg"), Pnotepg, NULL },
105 { DT_REG, N("map"), Pmap, procfs_validmap },
106 { DT_REG, N("maps"), Pmaps, procfs_validmap },
107 { DT_REG, N("cmdline"), Pcmdline, NULL },
108 { DT_REG, N("exe"), Pfile, procfs_validfile_linux },
109 #ifdef __HAVE_PROCFS_MACHDEP
110 PROCFS_MACHDEP_NODETYPE_DEFNS
111 #endif
112 #undef N
113 };
114 static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
115
116 /*
117 * List of files in the root directory. Note: the validate function will
118 * be called with p == NULL for these ones.
119 */
120 static const struct proc_target proc_root_targets[] = {
121 #define N(s) sizeof(s)-1, s
122 /* name type validp */
123 { DT_REG, N("meminfo"), Pmeminfo, procfs_validfile_linux },
124 { DT_REG, N("cpuinfo"), Pcpuinfo, procfs_validfile_linux },
125 { DT_REG, N("uptime"), Puptime, procfs_validfile_linux },
126 #undef N
127 };
128 static const int nproc_root_targets =
129 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]);
130
131 int procfs_lookup __P((void *));
132 #define procfs_create genfs_eopnotsupp
133 #define procfs_mknod genfs_eopnotsupp
134 int procfs_open __P((void *));
135 int procfs_close __P((void *));
136 int procfs_access __P((void *));
137 int procfs_getattr __P((void *));
138 int procfs_setattr __P((void *));
139 #define procfs_read procfs_rw
140 #define procfs_write procfs_rw
141 #define procfs_fcntl genfs_fcntl
142 #define procfs_ioctl genfs_enoioctl
143 #define procfs_poll genfs_poll
144 #define procfs_revoke genfs_revoke
145 #define procfs_fsync genfs_nullop
146 #define procfs_seek genfs_nullop
147 #define procfs_remove genfs_eopnotsupp
148 int procfs_link __P((void *));
149 #define procfs_rename genfs_eopnotsupp
150 #define procfs_mkdir genfs_eopnotsupp
151 #define procfs_rmdir genfs_eopnotsupp
152 int procfs_symlink __P((void *));
153 int procfs_readdir __P((void *));
154 int procfs_readlink __P((void *));
155 #define procfs_abortop genfs_abortop
156 int procfs_inactive __P((void *));
157 int procfs_reclaim __P((void *));
158 #define procfs_lock genfs_lock
159 #define procfs_unlock genfs_unlock
160 #define procfs_bmap genfs_badop
161 #define procfs_strategy genfs_badop
162 int procfs_print __P((void *));
163 int procfs_pathconf __P((void *));
164 #define procfs_islocked genfs_islocked
165 #define procfs_advlock genfs_einval
166 #define procfs_blkatoff genfs_eopnotsupp
167 #define procfs_valloc genfs_eopnotsupp
168 #define procfs_vfree genfs_nullop
169 #define procfs_truncate genfs_eopnotsupp
170 #define procfs_update genfs_nullop
171 #define procfs_bwrite genfs_eopnotsupp
172 #define procfs_putpages genfs_null_putpages
173
174 static int atoi __P((const char *, size_t));
175
176 /*
177 * procfs vnode operations.
178 */
179 int (**procfs_vnodeop_p) __P((void *));
180 const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
181 { &vop_default_desc, vn_default_error },
182 { &vop_lookup_desc, procfs_lookup }, /* lookup */
183 { &vop_create_desc, procfs_create }, /* create */
184 { &vop_mknod_desc, procfs_mknod }, /* mknod */
185 { &vop_open_desc, procfs_open }, /* open */
186 { &vop_close_desc, procfs_close }, /* close */
187 { &vop_access_desc, procfs_access }, /* access */
188 { &vop_getattr_desc, procfs_getattr }, /* getattr */
189 { &vop_setattr_desc, procfs_setattr }, /* setattr */
190 { &vop_read_desc, procfs_read }, /* read */
191 { &vop_write_desc, procfs_write }, /* write */
192 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */
193 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */
194 { &vop_poll_desc, procfs_poll }, /* poll */
195 { &vop_revoke_desc, procfs_revoke }, /* revoke */
196 { &vop_fsync_desc, procfs_fsync }, /* fsync */
197 { &vop_seek_desc, procfs_seek }, /* seek */
198 { &vop_remove_desc, procfs_remove }, /* remove */
199 { &vop_link_desc, procfs_link }, /* link */
200 { &vop_rename_desc, procfs_rename }, /* rename */
201 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */
202 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */
203 { &vop_symlink_desc, procfs_symlink }, /* symlink */
204 { &vop_readdir_desc, procfs_readdir }, /* readdir */
205 { &vop_readlink_desc, procfs_readlink }, /* readlink */
206 { &vop_abortop_desc, procfs_abortop }, /* abortop */
207 { &vop_inactive_desc, procfs_inactive }, /* inactive */
208 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */
209 { &vop_lock_desc, procfs_lock }, /* lock */
210 { &vop_unlock_desc, procfs_unlock }, /* unlock */
211 { &vop_bmap_desc, procfs_bmap }, /* bmap */
212 { &vop_strategy_desc, procfs_strategy }, /* strategy */
213 { &vop_print_desc, procfs_print }, /* print */
214 { &vop_islocked_desc, procfs_islocked }, /* islocked */
215 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */
216 { &vop_advlock_desc, procfs_advlock }, /* advlock */
217 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */
218 { &vop_valloc_desc, procfs_valloc }, /* valloc */
219 { &vop_vfree_desc, procfs_vfree }, /* vfree */
220 { &vop_truncate_desc, procfs_truncate }, /* truncate */
221 { &vop_update_desc, procfs_update }, /* update */
222 { &vop_putpages_desc, procfs_putpages }, /* putpages */
223 { NULL, NULL }
224 };
225 const struct vnodeopv_desc procfs_vnodeop_opv_desc =
226 { &procfs_vnodeop_p, procfs_vnodeop_entries };
227 /*
228 * set things up for doing i/o on
229 * the pfsnode (vp). (vp) is locked
230 * on entry, and should be left locked
231 * on exit.
232 *
233 * for procfs we don't need to do anything
234 * in particular for i/o. all that is done
235 * is to support exclusive open on process
236 * memory images.
237 */
238 int
239 procfs_open(v)
240 void *v;
241 {
242 struct vop_open_args /* {
243 struct vnode *a_vp;
244 int a_mode;
245 struct ucred *a_cred;
246 struct proc *a_p;
247 } */ *ap = v;
248 struct pfsnode *pfs = VTOPFS(ap->a_vp);
249 struct lwp *l1;
250 struct proc *p2;
251 int error;
252
253 l1 = ap->a_l; /* tracer */
254 p2 = PFIND(pfs->pfs_pid); /* traced */
255
256 if (p2 == NULL)
257 return (ENOENT); /* was ESRCH, jsp */
258
259 switch (pfs->pfs_type) {
260 case Pmem:
261 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
262 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)))
263 return (EBUSY);
264
265 if ((error = process_checkioperm(l1, p2)) != 0)
266 return (error);
267
268 if (ap->a_mode & FWRITE)
269 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
270
271 return (0);
272
273 default:
274 break;
275 }
276
277 return (0);
278 }
279
280 /*
281 * close the pfsnode (vp) after doing i/o.
282 * (vp) is not locked on entry or exit.
283 *
284 * nothing to do for procfs other than undo
285 * any exclusive open flag (see _open above).
286 */
287 int
288 procfs_close(v)
289 void *v;
290 {
291 struct vop_close_args /* {
292 struct vnode *a_vp;
293 int a_fflag;
294 struct ucred *a_cred;
295 struct proc *a_p;
296 } */ *ap = v;
297 struct pfsnode *pfs = VTOPFS(ap->a_vp);
298
299 switch (pfs->pfs_type) {
300 case Pmem:
301 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
302 pfs->pfs_flags &= ~(FWRITE|O_EXCL);
303 break;
304
305 default:
306 break;
307 }
308
309 return (0);
310 }
311
312 /*
313 * _inactive is called when the pfsnode
314 * is vrele'd and the reference count goes
315 * to zero. (vp) will be on the vnode free
316 * list, so to get it back vget() must be
317 * used.
318 *
319 * for procfs, check if the process is still
320 * alive and if it isn't then just throw away
321 * the vnode by calling vgone(). this may
322 * be overkill and a waste of time since the
323 * chances are that the process will still be
324 * there and PFIND is not free.
325 *
326 * (vp) is locked on entry, but must be unlocked on exit.
327 */
328 int
329 procfs_inactive(v)
330 void *v;
331 {
332 struct vop_inactive_args /* {
333 struct vnode *a_vp;
334 struct proc *a_p;
335 } */ *ap = v;
336 struct pfsnode *pfs = VTOPFS(ap->a_vp);
337
338 VOP_UNLOCK(ap->a_vp, 0);
339 if (PFIND(pfs->pfs_pid) == NULL)
340 vgone(ap->a_vp);
341
342 return (0);
343 }
344
345 /*
346 * _reclaim is called when getnewvnode()
347 * wants to make use of an entry on the vnode
348 * free list. at this time the filesystem needs
349 * to free any private data and remove the node
350 * from any private lists.
351 */
352 int
353 procfs_reclaim(v)
354 void *v;
355 {
356 struct vop_reclaim_args /* {
357 struct vnode *a_vp;
358 } */ *ap = v;
359
360 return (procfs_freevp(ap->a_vp));
361 }
362
363 /*
364 * Return POSIX pathconf information applicable to special devices.
365 */
366 int
367 procfs_pathconf(v)
368 void *v;
369 {
370 struct vop_pathconf_args /* {
371 struct vnode *a_vp;
372 int a_name;
373 register_t *a_retval;
374 } */ *ap = v;
375
376 switch (ap->a_name) {
377 case _PC_LINK_MAX:
378 *ap->a_retval = LINK_MAX;
379 return (0);
380 case _PC_MAX_CANON:
381 *ap->a_retval = MAX_CANON;
382 return (0);
383 case _PC_MAX_INPUT:
384 *ap->a_retval = MAX_INPUT;
385 return (0);
386 case _PC_PIPE_BUF:
387 *ap->a_retval = PIPE_BUF;
388 return (0);
389 case _PC_CHOWN_RESTRICTED:
390 *ap->a_retval = 1;
391 return (0);
392 case _PC_VDISABLE:
393 *ap->a_retval = _POSIX_VDISABLE;
394 return (0);
395 case _PC_SYNC_IO:
396 *ap->a_retval = 1;
397 return (0);
398 default:
399 return (EINVAL);
400 }
401 /* NOTREACHED */
402 }
403
404 /*
405 * _print is used for debugging.
406 * just print a readable description
407 * of (vp).
408 */
409 int
410 procfs_print(v)
411 void *v;
412 {
413 struct vop_print_args /* {
414 struct vnode *a_vp;
415 } */ *ap = v;
416 struct pfsnode *pfs = VTOPFS(ap->a_vp);
417
418 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n",
419 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
420 return 0;
421 }
422
423 int
424 procfs_link(v)
425 void *v;
426 {
427 struct vop_link_args /* {
428 struct vnode *a_dvp;
429 struct vnode *a_vp;
430 struct componentname *a_cnp;
431 } */ *ap = v;
432
433 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
434 vput(ap->a_dvp);
435 return (EROFS);
436 }
437
438 int
439 procfs_symlink(v)
440 void *v;
441 {
442 struct vop_symlink_args /* {
443 struct vnode *a_dvp;
444 struct vnode **a_vpp;
445 struct componentname *a_cnp;
446 struct vattr *a_vap;
447 char *a_target;
448 } */ *ap = v;
449
450 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
451 vput(ap->a_dvp);
452 return (EROFS);
453 }
454
455 /*
456 * Invent attributes for pfsnode (vp) and store
457 * them in (vap).
458 * Directories lengths are returned as zero since
459 * any real length would require the genuine size
460 * to be computed, and nothing cares anyway.
461 *
462 * this is relatively minimal for procfs.
463 */
464 int
465 procfs_getattr(v)
466 void *v;
467 {
468 struct vop_getattr_args /* {
469 struct vnode *a_vp;
470 struct vattr *a_vap;
471 struct ucred *a_cred;
472 struct proc *a_p;
473 } */ *ap = v;
474 struct pfsnode *pfs = VTOPFS(ap->a_vp);
475 struct vattr *vap = ap->a_vap;
476 struct proc *procp;
477 int error;
478
479 /* first check the process still exists */
480 switch (pfs->pfs_type) {
481 case Proot:
482 case Pcurproc:
483 case Pself:
484 procp = 0;
485 break;
486
487 default:
488 procp = PFIND(pfs->pfs_pid);
489 if (procp == NULL)
490 return (ENOENT);
491 break;
492 }
493
494 error = 0;
495
496 /* start by zeroing out the attributes */
497 VATTR_NULL(vap);
498
499 /* next do all the common fields */
500 vap->va_type = ap->a_vp->v_type;
501 vap->va_mode = pfs->pfs_mode;
502 vap->va_fileid = pfs->pfs_fileno;
503 vap->va_flags = 0;
504 vap->va_blocksize = PAGE_SIZE;
505
506 /*
507 * Make all times be current TOD. Avoid microtime(9), it's slow.
508 * We don't guard the read from time(9) with splclock(9) since we
509 * don't actually need to be THAT sure the access is atomic.
510 *
511 * It would be possible to get the process start
512 * time from the p_stat structure, but there's
513 * no "file creation" time stamp anyway, and the
514 * p_stat structure is not addressible if u. gets
515 * swapped out for that process.
516 */
517 TIMEVAL_TO_TIMESPEC(&time, &vap->va_ctime);
518 vap->va_atime = vap->va_mtime = vap->va_ctime;
519
520 switch (pfs->pfs_type) {
521 case Pmem:
522 case Pregs:
523 case Pfpregs:
524 #if defined(__HAVE_PROCFS_MACHDEP) && defined(PROCFS_MACHDEP_PROTECT_CASES)
525 PROCFS_MACHDEP_PROTECT_CASES
526 #endif
527 /*
528 * If the process has exercised some setuid or setgid
529 * privilege, then rip away read/write permission so
530 * that only root can gain access.
531 */
532 if (procp->p_flag & P_SUGID)
533 vap->va_mode &= ~(S_IRUSR|S_IWUSR);
534 /* FALLTHROUGH */
535 case Pctl:
536 case Pstatus:
537 case Pstat:
538 case Pnote:
539 case Pnotepg:
540 case Pmap:
541 case Pmaps:
542 case Pcmdline:
543 vap->va_nlink = 1;
544 vap->va_uid = procp->p_ucred->cr_uid;
545 vap->va_gid = procp->p_ucred->cr_gid;
546 break;
547 case Pmeminfo:
548 case Pcpuinfo:
549 case Puptime:
550 vap->va_nlink = 1;
551 vap->va_uid = vap->va_gid = 0;
552 break;
553
554 default:
555 break;
556 }
557
558 /*
559 * now do the object specific fields
560 *
561 * The size could be set from struct reg, but it's hardly
562 * worth the trouble, and it puts some (potentially) machine
563 * dependent data into this machine-independent code. If it
564 * becomes important then this function should break out into
565 * a per-file stat function in the corresponding .c file.
566 */
567
568 switch (pfs->pfs_type) {
569 case Proot:
570 /*
571 * Set nlink to 1 to tell fts(3) we don't actually know.
572 */
573 vap->va_nlink = 1;
574 vap->va_uid = 0;
575 vap->va_gid = 0;
576 vap->va_bytes = vap->va_size = DEV_BSIZE;
577 break;
578
579 case Pcurproc: {
580 char buf[16]; /* should be enough */
581 vap->va_nlink = 1;
582 vap->va_uid = 0;
583 vap->va_gid = 0;
584 vap->va_bytes = vap->va_size =
585 sprintf(buf, "%ld", (long)curproc->p_pid);
586 break;
587 }
588
589 case Pself:
590 vap->va_nlink = 1;
591 vap->va_uid = 0;
592 vap->va_gid = 0;
593 vap->va_bytes = vap->va_size = sizeof("curproc");
594 break;
595
596 case Pfd:
597 if (pfs->pfs_fd != -1) {
598 struct file *fp;
599 struct proc *pown;
600
601 if ((error = procfs_getfp(pfs, &pown, &fp)) != 0)
602 return error;
603 FILE_USE(fp);
604 vap->va_nlink = 1;
605 vap->va_uid = fp->f_cred->cr_uid;
606 vap->va_gid = fp->f_cred->cr_gid;
607 switch (fp->f_type) {
608 case DTYPE_VNODE:
609 vap->va_bytes = vap->va_size =
610 ((struct vnode *)fp->f_data)->v_size;
611 break;
612 default:
613 vap->va_bytes = vap->va_size = 0;
614 break;
615 }
616 FILE_UNUSE(fp, proc_representative_lwp(pown));
617 break;
618 }
619 /*FALLTHROUGH*/
620 case Pproc:
621 vap->va_nlink = 2;
622 vap->va_uid = procp->p_ucred->cr_uid;
623 vap->va_gid = procp->p_ucred->cr_gid;
624 vap->va_bytes = vap->va_size = DEV_BSIZE;
625 break;
626
627 case Pfile:
628 error = EOPNOTSUPP;
629 break;
630
631 case Pmem:
632 vap->va_bytes = vap->va_size =
633 ctob(procp->p_vmspace->vm_tsize +
634 procp->p_vmspace->vm_dsize +
635 procp->p_vmspace->vm_ssize);
636 break;
637
638 #if defined(PT_GETREGS) || defined(PT_SETREGS)
639 case Pregs:
640 vap->va_bytes = vap->va_size = sizeof(struct reg);
641 break;
642 #endif
643
644 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS)
645 case Pfpregs:
646 vap->va_bytes = vap->va_size = sizeof(struct fpreg);
647 break;
648 #endif
649
650 case Pctl:
651 case Pstatus:
652 case Pstat:
653 case Pnote:
654 case Pnotepg:
655 case Pcmdline:
656 case Pmeminfo:
657 case Pcpuinfo:
658 case Puptime:
659 vap->va_bytes = vap->va_size = 0;
660 break;
661 case Pmap:
662 case Pmaps:
663 /*
664 * Advise a larger blocksize for the map files, so that
665 * they may be read in one pass.
666 */
667 vap->va_blocksize = 4 * PAGE_SIZE;
668 vap->va_bytes = vap->va_size = 0;
669 break;
670
671 #ifdef __HAVE_PROCFS_MACHDEP
672 PROCFS_MACHDEP_NODETYPE_CASES
673 error = procfs_machdep_getattr(ap->a_vp, vap, procp);
674 break;
675 #endif
676
677 default:
678 panic("procfs_getattr");
679 }
680
681 return (error);
682 }
683
684 /*ARGSUSED*/
685 int
686 procfs_setattr(v)
687 void *v;
688 {
689 /*
690 * just fake out attribute setting
691 * it's not good to generate an error
692 * return, otherwise things like creat()
693 * will fail when they try to set the
694 * file length to 0. worse, this means
695 * that echo $note > /proc/$pid/note will fail.
696 */
697
698 return (0);
699 }
700
701 /*
702 * implement access checking.
703 *
704 * actually, the check for super-user is slightly
705 * broken since it will allow read access to write-only
706 * objects. this doesn't cause any particular trouble
707 * but does mean that the i/o entry points need to check
708 * that the operation really does make sense.
709 */
710 int
711 procfs_access(v)
712 void *v;
713 {
714 struct vop_access_args /* {
715 struct vnode *a_vp;
716 int a_mode;
717 struct ucred *a_cred;
718 struct lwp *a_l;
719 } */ *ap = v;
720 struct vattr va;
721 int error;
722
723 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_l)) != 0)
724 return (error);
725
726 return (vaccess(va.va_type, va.va_mode,
727 va.va_uid, va.va_gid, ap->a_mode, ap->a_cred));
728 }
729
730 /*
731 * lookup. this is incredibly complicated in the
732 * general case, however for most pseudo-filesystems
733 * very little needs to be done.
734 *
735 * Locking isn't hard here, just poorly documented.
736 *
737 * If we're looking up ".", just vref the parent & return it.
738 *
739 * If we're looking up "..", unlock the parent, and lock "..". If everything
740 * went ok, and we're on the last component and the caller requested the
741 * parent locked, try to re-lock the parent. We do this to prevent lock
742 * races.
743 *
744 * For anything else, get the needed node. Then unlock the parent if not
745 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the
746 * parent in the .. case).
747 *
748 * We try to exit with the parent locked in error cases.
749 */
750 int
751 procfs_lookup(v)
752 void *v;
753 {
754 struct vop_lookup_args /* {
755 struct vnode * a_dvp;
756 struct vnode ** a_vpp;
757 struct componentname * a_cnp;
758 } */ *ap = v;
759 struct componentname *cnp = ap->a_cnp;
760 struct vnode **vpp = ap->a_vpp;
761 struct vnode *dvp = ap->a_dvp;
762 const char *pname = cnp->cn_nameptr;
763 const struct proc_target *pt = NULL;
764 struct vnode *fvp;
765 pid_t pid;
766 struct pfsnode *pfs;
767 struct proc *p = NULL;
768 struct lwp *l = NULL;
769 int i, error, wantpunlock, iscurproc = 0, isself = 0;
770
771 *vpp = NULL;
772 cnp->cn_flags &= ~PDIRUNLOCK;
773
774 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
775 return (EROFS);
776
777 if (cnp->cn_namelen == 1 && *pname == '.') {
778 *vpp = dvp;
779 VREF(dvp);
780 return (0);
781 }
782
783 wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN));
784 pfs = VTOPFS(dvp);
785 switch (pfs->pfs_type) {
786 case Proot:
787 /*
788 * Shouldn't get here with .. in the root node.
789 */
790 if (cnp->cn_flags & ISDOTDOT)
791 return (EIO);
792
793 iscurproc = CNEQ(cnp, "curproc", 7);
794 isself = CNEQ(cnp, "self", 4);
795
796 if (iscurproc || isself) {
797 error = procfs_allocvp(dvp->v_mount, vpp, 0,
798 iscurproc ? Pcurproc : Pself, -1);
799 if ((error == 0) && (wantpunlock)) {
800 VOP_UNLOCK(dvp, 0);
801 cnp->cn_flags |= PDIRUNLOCK;
802 }
803 return (error);
804 }
805
806 for (i = 0; i < nproc_root_targets; i++) {
807 pt = &proc_root_targets[i];
808 if (cnp->cn_namelen == pt->pt_namlen &&
809 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
810 (pt->pt_valid == NULL ||
811 (*pt->pt_valid)(cnp->cn_lwp, dvp->v_mount)))
812 break;
813 }
814
815 if (i != nproc_root_targets) {
816 error = procfs_allocvp(dvp->v_mount, vpp, 0,
817 pt->pt_pfstype, -1);
818 if ((error == 0) && (wantpunlock)) {
819 VOP_UNLOCK(dvp, 0);
820 cnp->cn_flags |= PDIRUNLOCK;
821 }
822 return (error);
823 }
824
825 pid = (pid_t)atoi(pname, cnp->cn_namelen);
826
827 p = PFIND(pid);
828 if (p == NULL)
829 break;
830
831 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc, -1);
832 if ((error == 0) && (wantpunlock)) {
833 VOP_UNLOCK(dvp, 0);
834 cnp->cn_flags |= PDIRUNLOCK;
835 }
836 return (error);
837
838 case Pproc:
839 /*
840 * do the .. dance. We unlock the directory, and then
841 * get the root dir. That will automatically return ..
842 * locked. Then if the caller wanted dvp locked, we
843 * re-lock.
844 */
845 if (cnp->cn_flags & ISDOTDOT) {
846 VOP_UNLOCK(dvp, 0);
847 cnp->cn_flags |= PDIRUNLOCK;
848 error = procfs_root(dvp->v_mount, vpp, cnp->cn_lwp);
849 if ((error == 0) && (wantpunlock == 0) &&
850 ((error = vn_lock(dvp, LK_EXCLUSIVE)) == 0))
851 cnp->cn_flags &= ~PDIRUNLOCK;
852 return (error);
853 }
854
855 p = PFIND(pfs->pfs_pid);
856 if (p == NULL)
857 break;
858 l = proc_representative_lwp(p);
859
860 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
861 if (cnp->cn_namelen == pt->pt_namlen &&
862 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
863 (pt->pt_valid == NULL ||
864 (*pt->pt_valid)(cnp->cn_lwp, dvp->v_mount)))
865 goto found;
866 }
867 break;
868
869 found:
870 if (pt->pt_pfstype == Pfile) {
871 fvp = p->p_textvp;
872 /* We already checked that it exists. */
873 VREF(fvp);
874 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
875 if (wantpunlock) {
876 VOP_UNLOCK(dvp, 0);
877 cnp->cn_flags |= PDIRUNLOCK;
878 }
879 *vpp = fvp;
880 return (0);
881 }
882
883 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
884 pt->pt_pfstype, -1);
885 if ((error == 0) && (wantpunlock)) {
886 VOP_UNLOCK(dvp, 0);
887 cnp->cn_flags |= PDIRUNLOCK;
888 }
889 return (error);
890
891 case Pfd: {
892 int fd;
893 struct file *fp;
894 /*
895 * do the .. dance. We unlock the directory, and then
896 * get the proc dir. That will automatically return ..
897 * locked. Then if the caller wanted dvp locked, we
898 * re-lock.
899 */
900 if (cnp->cn_flags & ISDOTDOT) {
901 VOP_UNLOCK(dvp, 0);
902 cnp->cn_flags |= PDIRUNLOCK;
903 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
904 Pproc, -1);
905 if ((error == 0) && (wantpunlock == 0) &&
906 ((error = vn_lock(dvp, LK_EXCLUSIVE)) == 0))
907 cnp->cn_flags &= ~PDIRUNLOCK;
908 return (error);
909 }
910 fd = atoi(pname, cnp->cn_namelen);
911 p = PFIND(pfs->pfs_pid);
912 if (p == NULL || (fp = fd_getfile(p->p_fd, fd)) == NULL)
913 return ENOENT;
914 FILE_USE(fp);
915
916 switch (fp->f_type) {
917 case DTYPE_VNODE:
918 fvp = (struct vnode *)fp->f_data;
919
920 /* Don't show directories */
921 if (fvp->v_type == VDIR)
922 goto symlink;
923
924 VREF(fvp);
925 FILE_UNUSE(fp, l);
926 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY |
927 (p == curproc ? LK_CANRECURSE : 0));
928 *vpp = fvp;
929 error = 0;
930 break;
931 default:
932 symlink:
933 FILE_UNUSE(fp, l);
934 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
935 Pfd, fd);
936 break;
937 }
938 if ((error == 0) && (wantpunlock)) {
939 VOP_UNLOCK(dvp, 0);
940 cnp->cn_flags |= PDIRUNLOCK;
941 }
942 return error;
943 }
944 default:
945 return (ENOTDIR);
946 }
947
948 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
949 }
950
951 int
952 procfs_validfile(l, mp)
953 struct lwp *l;
954 struct mount *mp;
955 {
956 return (l->l_proc->p_textvp != NULL);
957 }
958
959 static int
960 procfs_validfile_linux(l, mp)
961 struct lwp *l;
962 struct mount *mp;
963 {
964 int flags;
965
966 flags = VFSTOPROC(mp)->pmnt_flags;
967 return ((flags & PROCFSMNT_LINUXCOMPAT) &&
968 (l == NULL || l->l_proc == NULL || procfs_validfile(l, mp)));
969 }
970
971 /*
972 * readdir returns directory entries from pfsnode (vp).
973 *
974 * the strategy here with procfs is to generate a single
975 * directory entry at a time (struct dirent) and then
976 * copy that out to userland using uiomove. a more efficent
977 * though more complex implementation, would try to minimize
978 * the number of calls to uiomove(). for procfs, this is
979 * hardly worth the added code complexity.
980 *
981 * this should just be done through read()
982 */
983 int
984 procfs_readdir(v)
985 void *v;
986 {
987 struct vop_readdir_args /* {
988 struct vnode *a_vp;
989 struct uio *a_uio;
990 struct ucred *a_cred;
991 int *a_eofflag;
992 off_t **a_cookies;
993 int *a_ncookies;
994 } */ *ap = v;
995 struct uio *uio = ap->a_uio;
996 struct dirent d;
997 struct pfsnode *pfs;
998 off_t i;
999 int error;
1000 off_t *cookies = NULL;
1001 int ncookies, left, skip, j;
1002 struct vnode *vp;
1003 const struct proc_target *pt;
1004
1005 vp = ap->a_vp;
1006 pfs = VTOPFS(vp);
1007
1008 if (uio->uio_resid < UIO_MX)
1009 return (EINVAL);
1010 if (uio->uio_offset < 0)
1011 return (EINVAL);
1012
1013 error = 0;
1014 i = uio->uio_offset;
1015 memset((caddr_t)&d, 0, UIO_MX);
1016 d.d_reclen = UIO_MX;
1017 ncookies = uio->uio_resid / UIO_MX;
1018
1019 switch (pfs->pfs_type) {
1020 /*
1021 * this is for the process-specific sub-directories.
1022 * all that is needed to is copy out all the entries
1023 * from the procent[] table (top of this file).
1024 */
1025 case Pproc: {
1026 struct proc *p;
1027 struct lwp *l;
1028
1029 if (i >= nproc_targets)
1030 return 0;
1031
1032 p = PFIND(pfs->pfs_pid);
1033 if (p == NULL)
1034 break;
1035
1036 if (ap->a_ncookies) {
1037 ncookies = min(ncookies, (nproc_targets - i));
1038 cookies = malloc(ncookies * sizeof (off_t),
1039 M_TEMP, M_WAITOK);
1040 *ap->a_cookies = cookies;
1041 }
1042
1043 for (pt = &proc_targets[i];
1044 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) {
1045 if (pt->pt_valid &&
1046 (*pt->pt_valid)(l, vp->v_mount) == 0)
1047 continue;
1048
1049 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid,
1050 pt->pt_pfstype, -1);
1051 d.d_namlen = pt->pt_namlen;
1052 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1053 d.d_type = pt->pt_type;
1054
1055 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
1056 break;
1057 if (cookies)
1058 *cookies++ = i + 1;
1059 }
1060
1061 break;
1062 }
1063 case Pfd: {
1064 struct proc *p;
1065 struct filedesc *fdp;
1066 struct file *fp;
1067 int lim, last, nc = 0;
1068
1069 p = PFIND(pfs->pfs_pid);
1070 if (p == NULL)
1071 return ESRCH;
1072
1073 fdp = p->p_fd;
1074
1075 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
1076 last = min(fdp->fd_nfiles, lim);
1077 if (i >= lim)
1078 return 0;
1079
1080 if (ap->a_ncookies) {
1081 ncookies = min(ncookies, (fdp->fd_nfiles + 2 - i));
1082 cookies = malloc(ncookies * sizeof (off_t),
1083 M_TEMP, M_WAITOK);
1084 *ap->a_cookies = cookies;
1085 }
1086
1087 for (; i < 2 && uio->uio_resid >= UIO_MX; i++) {
1088 pt = &proc_targets[i];
1089 d.d_fileno = (pfs->pfs_pid << 8) + i;
1090 d.d_namlen = pt->pt_namlen;
1091 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid,
1092 pt->pt_pfstype, -1);
1093 (void)memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1094 d.d_type = pt->pt_type;
1095 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
1096 break;
1097 if (cookies)
1098 *cookies++ = i + 1;
1099 nc++;
1100 }
1101 if (error) {
1102 ncookies = nc;
1103 break;
1104 }
1105 for (; uio->uio_resid >= UIO_MX && i < fdp->fd_nfiles; i++) {
1106 if ((fp = fd_getfile(fdp, i - 2)) == NULL)
1107 continue;
1108 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, Pfd, i - 2);
1109 d.d_namlen = snprintf(d.d_name, sizeof(d.d_name),
1110 "%lld", (long long)(i - 2));
1111 d.d_type = VREG;
1112 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
1113 break;
1114 if (cookies)
1115 *cookies++ = i + 1;
1116 nc++;
1117 }
1118 ncookies = nc;
1119 break;
1120 }
1121
1122 /*
1123 * this is for the root of the procfs filesystem
1124 * what is needed are special entries for "curproc"
1125 * and "self" followed by an entry for each process
1126 * on allproc
1127 #ifdef PROCFS_ZOMBIE
1128 * and deadproc and zombproc.
1129 #endif
1130 */
1131
1132 case Proot: {
1133 int pcnt = i, nc = 0;
1134 const struct proclist_desc *pd;
1135 volatile struct proc *p;
1136
1137 if (pcnt > 3)
1138 pcnt = 3;
1139 if (ap->a_ncookies) {
1140 /*
1141 * XXX Potentially allocating too much space here,
1142 * but I'm lazy. This loop needs some work.
1143 */
1144 cookies = malloc(ncookies * sizeof (off_t),
1145 M_TEMP, M_WAITOK);
1146 *ap->a_cookies = cookies;
1147 }
1148 /*
1149 * XXX: THIS LOOP ASSUMES THAT allproc IS THE FIRST
1150 * PROCLIST IN THE proclists!
1151 */
1152 proclist_lock_read();
1153 pd = proclists;
1154 #ifdef PROCFS_ZOMBIE
1155 again:
1156 #endif
1157 for (p = LIST_FIRST(pd->pd_list);
1158 p != NULL && uio->uio_resid >= UIO_MX; i++, pcnt++) {
1159 switch (i) {
1160 case 0: /* `.' */
1161 case 1: /* `..' */
1162 d.d_fileno = PROCFS_FILENO(0, Proot, -1);
1163 d.d_namlen = i + 1;
1164 memcpy(d.d_name, "..", d.d_namlen);
1165 d.d_name[i + 1] = '\0';
1166 d.d_type = DT_DIR;
1167 break;
1168
1169 case 2:
1170 d.d_fileno = PROCFS_FILENO(0, Pcurproc, -1);
1171 d.d_namlen = sizeof("curproc") - 1;
1172 memcpy(d.d_name, "curproc", sizeof("curproc"));
1173 d.d_type = DT_LNK;
1174 break;
1175
1176 case 3:
1177 d.d_fileno = PROCFS_FILENO(0, Pself, -1);
1178 d.d_namlen = sizeof("self") - 1;
1179 memcpy(d.d_name, "self", sizeof("self"));
1180 d.d_type = DT_LNK;
1181 break;
1182
1183 default:
1184 while (pcnt < i) {
1185 pcnt++;
1186 p = LIST_NEXT(p, p_list);
1187 if (!p)
1188 goto done;
1189 }
1190 d.d_fileno = PROCFS_FILENO(p->p_pid, Pproc, -1);
1191 d.d_namlen = sprintf(d.d_name, "%ld",
1192 (long)p->p_pid);
1193 d.d_type = DT_DIR;
1194 p = p->p_list.le_next;
1195 break;
1196 }
1197
1198 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
1199 break;
1200 nc++;
1201 if (cookies)
1202 *cookies++ = i + 1;
1203 }
1204 done:
1205
1206 #ifdef PROCFS_ZOMBIE
1207 pd++;
1208 if (p == NULL && pd->pd_list != NULL)
1209 goto again;
1210 #endif
1211 proclist_unlock_read();
1212
1213 skip = i - pcnt;
1214 if (skip >= nproc_root_targets)
1215 break;
1216 left = nproc_root_targets - skip;
1217 for (j = 0, pt = &proc_root_targets[0];
1218 uio->uio_resid >= UIO_MX && j < left;
1219 pt++, j++, i++) {
1220 if (pt->pt_valid &&
1221 (*pt->pt_valid)(NULL, vp->v_mount) == 0)
1222 continue;
1223 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype, -1);
1224 d.d_namlen = pt->pt_namlen;
1225 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1226 d.d_type = pt->pt_type;
1227
1228 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
1229 break;
1230 nc++;
1231 if (cookies)
1232 *cookies++ = i + 1;
1233 }
1234
1235 ncookies = nc;
1236 break;
1237 }
1238
1239 default:
1240 error = ENOTDIR;
1241 break;
1242 }
1243
1244 if (ap->a_ncookies) {
1245 if (error) {
1246 if (cookies)
1247 free(*ap->a_cookies, M_TEMP);
1248 *ap->a_ncookies = 0;
1249 *ap->a_cookies = NULL;
1250 } else
1251 *ap->a_ncookies = ncookies;
1252 }
1253 uio->uio_offset = i;
1254 return (error);
1255 }
1256
1257 /*
1258 * readlink reads the link of `curproc'
1259 */
1260 int
1261 procfs_readlink(v)
1262 void *v;
1263 {
1264 struct vop_readlink_args *ap = v;
1265 char buf[16]; /* should be enough */
1266 char *bp = buf;
1267 char *path = NULL;
1268 int len;
1269 int error = 0;
1270 struct pfsnode *pfs = VTOPFS(ap->a_vp);
1271
1272 if (pfs->pfs_fileno == PROCFS_FILENO(0, Pcurproc, -1))
1273 len = sprintf(buf, "%ld", (long)curproc->p_pid);
1274 else if (pfs->pfs_fileno == PROCFS_FILENO(0, Pself, -1))
1275 len = sprintf(buf, "%s", "curproc");
1276 else {
1277 struct file *fp;
1278 struct proc *pown;
1279 struct vnode *vxp, *vp;
1280
1281 if ((error = procfs_getfp(pfs, &pown, &fp)) != 0)
1282 return error;
1283 FILE_USE(fp);
1284 switch (fp->f_type) {
1285 case DTYPE_VNODE:
1286 vxp = (struct vnode *)fp->f_data;
1287 if (vxp->v_type != VDIR) {
1288 FILE_UNUSE(fp, proc_representative_lwp(pown));
1289 return EINVAL;
1290 }
1291 if ((path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK))
1292 == NULL) {
1293 FILE_UNUSE(fp, proc_representative_lwp(pown));
1294 return ENOMEM;
1295 }
1296 bp = path + MAXPATHLEN;
1297 *--bp = '\0';
1298 vp = curproc->p_cwdi->cwdi_rdir;
1299 if (vp == NULL)
1300 vp = rootvnode;
1301 error = getcwd_common(vxp, vp, &bp, path,
1302 MAXPATHLEN / 2, 0, curlwp);
1303 FILE_UNUSE(fp, proc_representative_lwp(pown));
1304 if (error) {
1305 free(path, M_TEMP);
1306 return error;
1307 }
1308 len = strlen(bp);
1309 break;
1310
1311 case DTYPE_MISC:
1312 len = sprintf(buf, "%s", "[misc]");
1313 break;
1314
1315 case DTYPE_KQUEUE:
1316 len = sprintf(buf, "%s", "[kqueue]");
1317 break;
1318
1319 default:
1320 return EINVAL;
1321 }
1322 }
1323
1324 error = uiomove((caddr_t)bp, len, ap->a_uio);
1325 if (path)
1326 free(path, M_TEMP);
1327 return error;
1328 }
1329
1330 /*
1331 * convert decimal ascii to int
1332 */
1333 static int
1334 atoi(b, len)
1335 const char *b;
1336 size_t len;
1337 {
1338 int p = 0;
1339
1340 while (len--) {
1341 char c = *b++;
1342 if (c < '0' || c > '9')
1343 return -1;
1344 p = 10 * p + (c - '0');
1345 }
1346
1347 return p;
1348 }
1349