procfs_vnops.c revision 1.165 1 /* $NetBSD: procfs_vnops.c,v 1.165 2008/01/23 15:04:40 elad Exp $ */
2
3 /*-
4 * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Copyright (c) 1993, 1995
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * Jan-Simon Pendry.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
71 */
72
73 /*
74 * Copyright (c) 1993 Jan-Simon Pendry
75 *
76 * This code is derived from software contributed to Berkeley by
77 * Jan-Simon Pendry.
78 *
79 * Redistribution and use in source and binary forms, with or without
80 * modification, are permitted provided that the following conditions
81 * are met:
82 * 1. Redistributions of source code must retain the above copyright
83 * notice, this list of conditions and the following disclaimer.
84 * 2. Redistributions in binary form must reproduce the above copyright
85 * notice, this list of conditions and the following disclaimer in the
86 * documentation and/or other materials provided with the distribution.
87 * 3. All advertising materials mentioning features or use of this software
88 * must display the following acknowledgement:
89 * This product includes software developed by the University of
90 * California, Berkeley and its contributors.
91 * 4. Neither the name of the University nor the names of its contributors
92 * may be used to endorse or promote products derived from this software
93 * without specific prior written permission.
94 *
95 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
96 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
97 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
98 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
99 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
100 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
101 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
102 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
103 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
104 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
105 * SUCH DAMAGE.
106 *
107 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
108 */
109
110 /*
111 * procfs vnode interface
112 */
113
114 #include <sys/cdefs.h>
115 __KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.165 2008/01/23 15:04:40 elad Exp $");
116
117 #include <sys/param.h>
118 #include <sys/systm.h>
119 #include <sys/time.h>
120 #include <sys/kernel.h>
121 #include <sys/file.h>
122 #include <sys/filedesc.h>
123 #include <sys/proc.h>
124 #include <sys/vnode.h>
125 #include <sys/namei.h>
126 #include <sys/malloc.h>
127 #include <sys/mount.h>
128 #include <sys/dirent.h>
129 #include <sys/resourcevar.h>
130 #include <sys/stat.h>
131 #include <sys/ptrace.h>
132 #include <sys/kauth.h>
133
134 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */
135
136 #include <machine/reg.h>
137
138 #include <miscfs/genfs/genfs.h>
139 #include <miscfs/procfs/procfs.h>
140
141 /*
142 * Vnode Operations.
143 *
144 */
145
146 static int procfs_validfile_linux(struct lwp *, struct mount *);
147 static int procfs_root_readdir_callback(struct proc *, void *);
148 static void procfs_dir(pfstype, struct lwp *, struct proc *, char **, char *,
149 size_t);
150
151 /*
152 * This is a list of the valid names in the
153 * process-specific sub-directories. It is
154 * used in procfs_lookup and procfs_readdir
155 */
156 static const struct proc_target {
157 u_char pt_type;
158 u_char pt_namlen;
159 const char *pt_name;
160 pfstype pt_pfstype;
161 int (*pt_valid)(struct lwp *, struct mount *);
162 } proc_targets[] = {
163 #define N(s) sizeof(s)-1, s
164 /* name type validp */
165 { DT_DIR, N("."), PFSproc, NULL },
166 { DT_DIR, N(".."), PFSroot, NULL },
167 { DT_DIR, N("fd"), PFSfd, NULL },
168 { DT_REG, N("file"), PFSfile, procfs_validfile },
169 { DT_REG, N("mem"), PFSmem, NULL },
170 { DT_REG, N("regs"), PFSregs, procfs_validregs },
171 { DT_REG, N("fpregs"), PFSfpregs, procfs_validfpregs },
172 { DT_REG, N("ctl"), PFSctl, NULL },
173 { DT_REG, N("stat"), PFSstat, procfs_validfile_linux },
174 { DT_REG, N("status"), PFSstatus, NULL },
175 { DT_REG, N("note"), PFSnote, NULL },
176 { DT_REG, N("notepg"), PFSnotepg, NULL },
177 { DT_REG, N("map"), PFSmap, procfs_validmap },
178 { DT_REG, N("maps"), PFSmaps, procfs_validmap },
179 { DT_REG, N("cmdline"), PFScmdline, NULL },
180 { DT_REG, N("exe"), PFSexe, procfs_validfile },
181 { DT_LNK, N("cwd"), PFScwd, NULL },
182 { DT_LNK, N("root"), PFSchroot, NULL },
183 { DT_LNK, N("emul"), PFSemul, NULL },
184 { DT_REG, N("statm"), PFSstatm, procfs_validfile_linux },
185 #ifdef __HAVE_PROCFS_MACHDEP
186 PROCFS_MACHDEP_NODETYPE_DEFNS
187 #endif
188 #undef N
189 };
190 static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
191
192 /*
193 * List of files in the root directory. Note: the validate function will
194 * be called with p == NULL for these ones.
195 */
196 static const struct proc_target proc_root_targets[] = {
197 #define N(s) sizeof(s)-1, s
198 /* name type validp */
199 { DT_REG, N("meminfo"), PFSmeminfo, procfs_validfile_linux },
200 { DT_REG, N("cpuinfo"), PFScpuinfo, procfs_validfile_linux },
201 { DT_REG, N("uptime"), PFSuptime, procfs_validfile_linux },
202 { DT_REG, N("mounts"), PFSmounts, procfs_validfile_linux },
203 { DT_REG, N("devices"), PFSdevices, procfs_validfile_linux },
204 { DT_REG, N("stat"), PFScpustat, procfs_validfile_linux },
205 { DT_REG, N("loadavg"), PFSloadavg, procfs_validfile_linux },
206 #undef N
207 };
208 static const int nproc_root_targets =
209 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]);
210
211 int procfs_lookup(void *);
212 #define procfs_create genfs_eopnotsupp
213 #define procfs_mknod genfs_eopnotsupp
214 int procfs_open(void *);
215 int procfs_close(void *);
216 int procfs_access(void *);
217 int procfs_getattr(void *);
218 int procfs_setattr(void *);
219 #define procfs_read procfs_rw
220 #define procfs_write procfs_rw
221 #define procfs_fcntl genfs_fcntl
222 #define procfs_ioctl genfs_enoioctl
223 #define procfs_poll genfs_poll
224 #define procfs_revoke genfs_revoke
225 #define procfs_fsync genfs_nullop
226 #define procfs_seek genfs_nullop
227 #define procfs_remove genfs_eopnotsupp
228 int procfs_link(void *);
229 #define procfs_rename genfs_eopnotsupp
230 #define procfs_mkdir genfs_eopnotsupp
231 #define procfs_rmdir genfs_eopnotsupp
232 int procfs_symlink(void *);
233 int procfs_readdir(void *);
234 int procfs_readlink(void *);
235 #define procfs_abortop genfs_abortop
236 int procfs_inactive(void *);
237 int procfs_reclaim(void *);
238 #define procfs_lock genfs_lock
239 #define procfs_unlock genfs_unlock
240 #define procfs_bmap genfs_badop
241 #define procfs_strategy genfs_badop
242 int procfs_print(void *);
243 int procfs_pathconf(void *);
244 #define procfs_islocked genfs_islocked
245 #define procfs_advlock genfs_einval
246 #define procfs_bwrite genfs_eopnotsupp
247 #define procfs_putpages genfs_null_putpages
248
249 static int atoi(const char *, size_t);
250
251 /*
252 * procfs vnode operations.
253 */
254 int (**procfs_vnodeop_p)(void *);
255 const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
256 { &vop_default_desc, vn_default_error },
257 { &vop_lookup_desc, procfs_lookup }, /* lookup */
258 { &vop_create_desc, procfs_create }, /* create */
259 { &vop_mknod_desc, procfs_mknod }, /* mknod */
260 { &vop_open_desc, procfs_open }, /* open */
261 { &vop_close_desc, procfs_close }, /* close */
262 { &vop_access_desc, procfs_access }, /* access */
263 { &vop_getattr_desc, procfs_getattr }, /* getattr */
264 { &vop_setattr_desc, procfs_setattr }, /* setattr */
265 { &vop_read_desc, procfs_read }, /* read */
266 { &vop_write_desc, procfs_write }, /* write */
267 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */
268 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */
269 { &vop_poll_desc, procfs_poll }, /* poll */
270 { &vop_revoke_desc, procfs_revoke }, /* revoke */
271 { &vop_fsync_desc, procfs_fsync }, /* fsync */
272 { &vop_seek_desc, procfs_seek }, /* seek */
273 { &vop_remove_desc, procfs_remove }, /* remove */
274 { &vop_link_desc, procfs_link }, /* link */
275 { &vop_rename_desc, procfs_rename }, /* rename */
276 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */
277 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */
278 { &vop_symlink_desc, procfs_symlink }, /* symlink */
279 { &vop_readdir_desc, procfs_readdir }, /* readdir */
280 { &vop_readlink_desc, procfs_readlink }, /* readlink */
281 { &vop_abortop_desc, procfs_abortop }, /* abortop */
282 { &vop_inactive_desc, procfs_inactive }, /* inactive */
283 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */
284 { &vop_lock_desc, procfs_lock }, /* lock */
285 { &vop_unlock_desc, procfs_unlock }, /* unlock */
286 { &vop_bmap_desc, procfs_bmap }, /* bmap */
287 { &vop_strategy_desc, procfs_strategy }, /* strategy */
288 { &vop_print_desc, procfs_print }, /* print */
289 { &vop_islocked_desc, procfs_islocked }, /* islocked */
290 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */
291 { &vop_advlock_desc, procfs_advlock }, /* advlock */
292 { &vop_putpages_desc, procfs_putpages }, /* putpages */
293 { NULL, NULL }
294 };
295 const struct vnodeopv_desc procfs_vnodeop_opv_desc =
296 { &procfs_vnodeop_p, procfs_vnodeop_entries };
297 /*
298 * set things up for doing i/o on
299 * the pfsnode (vp). (vp) is locked
300 * on entry, and should be left locked
301 * on exit.
302 *
303 * for procfs we don't need to do anything
304 * in particular for i/o. all that is done
305 * is to support exclusive open on process
306 * memory images.
307 */
308 int
309 procfs_open(v)
310 void *v;
311 {
312 struct vop_open_args /* {
313 struct vnode *a_vp;
314 int a_mode;
315 kauth_cred_t a_cred;
316 } */ *ap = v;
317 struct pfsnode *pfs = VTOPFS(ap->a_vp);
318 struct lwp *l1;
319 struct proc *p2;
320 int error;
321
322 if ((error = procfs_proc_lock(pfs->pfs_pid, &p2, ENOENT)) != 0)
323 return error;
324
325 l1 = curlwp; /* tracer */
326
327 #define M2K(m) (((m) & FREAD) && ((m) & FWRITE) ? \
328 KAUTH_REQ_PROCESS_PROCFS_RW : \
329 (m) & FWRITE ? KAUTH_REQ_PROCESS_PROCFS_WRITE : \
330 KAUTH_REQ_PROCESS_PROCFS_READ)
331
332 mutex_enter(&p2->p_mutex);
333 error = kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_PROCFS,
334 p2, pfs, KAUTH_ARG(M2K(ap->a_mode)), NULL);
335 mutex_exit(&p2->p_mutex);
336 if (error) {
337 procfs_proc_unlock(p2);
338 return (error);
339 }
340
341 #undef M2K
342
343 switch (pfs->pfs_type) {
344 case PFSmem:
345 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
346 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) {
347 error = EBUSY;
348 break;
349 }
350
351 if (!proc_isunder(p2, l1)) {
352 error = EPERM;
353 break;
354 }
355
356 if (ap->a_mode & FWRITE)
357 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
358
359 break;
360
361 case PFSregs:
362 case PFSfpregs:
363 if (!proc_isunder(p2, l1)) {
364 error = EPERM;
365 break;
366 }
367 break;
368
369 default:
370 break;
371 }
372
373 procfs_proc_unlock(p2);
374 return (error);
375 }
376
377 /*
378 * close the pfsnode (vp) after doing i/o.
379 * (vp) is not locked on entry or exit.
380 *
381 * nothing to do for procfs other than undo
382 * any exclusive open flag (see _open above).
383 */
384 int
385 procfs_close(v)
386 void *v;
387 {
388 struct vop_close_args /* {
389 struct vnode *a_vp;
390 int a_fflag;
391 kauth_cred_t a_cred;
392 } */ *ap = v;
393 struct pfsnode *pfs = VTOPFS(ap->a_vp);
394
395 switch (pfs->pfs_type) {
396 case PFSmem:
397 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
398 pfs->pfs_flags &= ~(FWRITE|O_EXCL);
399 break;
400
401 default:
402 break;
403 }
404
405 return (0);
406 }
407
408 /*
409 * _inactive is called when the pfsnode
410 * is vrele'd and the reference count goes
411 * to zero. (vp) will be on the vnode free
412 * list, so to get it back vget() must be
413 * used.
414 *
415 * (vp) is locked on entry, but must be unlocked on exit.
416 */
417 int
418 procfs_inactive(v)
419 void *v;
420 {
421 struct vop_inactive_args /* {
422 struct vnode *a_vp;
423 bool *a_recycle;
424 } */ *ap = v;
425 struct vnode *vp = ap->a_vp;
426 struct pfsnode *pfs = VTOPFS(vp);
427
428 mutex_enter(&proclist_lock);
429 *ap->a_recycle = (p_find(pfs->pfs_pid, PFIND_LOCKED) == NULL);
430 mutex_exit(&proclist_lock);
431
432 VOP_UNLOCK(vp, 0);
433
434 return (0);
435 }
436
437 /*
438 * _reclaim is called when getnewvnode()
439 * wants to make use of an entry on the vnode
440 * free list. at this time the filesystem needs
441 * to free any private data and remove the node
442 * from any private lists.
443 */
444 int
445 procfs_reclaim(v)
446 void *v;
447 {
448 struct vop_reclaim_args /* {
449 struct vnode *a_vp;
450 } */ *ap = v;
451
452 return (procfs_freevp(ap->a_vp));
453 }
454
455 /*
456 * Return POSIX pathconf information applicable to special devices.
457 */
458 int
459 procfs_pathconf(v)
460 void *v;
461 {
462 struct vop_pathconf_args /* {
463 struct vnode *a_vp;
464 int a_name;
465 register_t *a_retval;
466 } */ *ap = v;
467
468 switch (ap->a_name) {
469 case _PC_LINK_MAX:
470 *ap->a_retval = LINK_MAX;
471 return (0);
472 case _PC_MAX_CANON:
473 *ap->a_retval = MAX_CANON;
474 return (0);
475 case _PC_MAX_INPUT:
476 *ap->a_retval = MAX_INPUT;
477 return (0);
478 case _PC_PIPE_BUF:
479 *ap->a_retval = PIPE_BUF;
480 return (0);
481 case _PC_CHOWN_RESTRICTED:
482 *ap->a_retval = 1;
483 return (0);
484 case _PC_VDISABLE:
485 *ap->a_retval = _POSIX_VDISABLE;
486 return (0);
487 case _PC_SYNC_IO:
488 *ap->a_retval = 1;
489 return (0);
490 default:
491 return (EINVAL);
492 }
493 /* NOTREACHED */
494 }
495
496 /*
497 * _print is used for debugging.
498 * just print a readable description
499 * of (vp).
500 */
501 int
502 procfs_print(v)
503 void *v;
504 {
505 struct vop_print_args /* {
506 struct vnode *a_vp;
507 } */ *ap = v;
508 struct pfsnode *pfs = VTOPFS(ap->a_vp);
509
510 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n",
511 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
512 return 0;
513 }
514
515 int
516 procfs_link(v)
517 void *v;
518 {
519 struct vop_link_args /* {
520 struct vnode *a_dvp;
521 struct vnode *a_vp;
522 struct componentname *a_cnp;
523 } */ *ap = v;
524
525 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
526 vput(ap->a_dvp);
527 return (EROFS);
528 }
529
530 int
531 procfs_symlink(v)
532 void *v;
533 {
534 struct vop_symlink_args /* {
535 struct vnode *a_dvp;
536 struct vnode **a_vpp;
537 struct componentname *a_cnp;
538 struct vattr *a_vap;
539 char *a_target;
540 } */ *ap = v;
541
542 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
543 vput(ap->a_dvp);
544 return (EROFS);
545 }
546
547 /*
548 * Works out the path to (and vnode of) the target process's current
549 * working directory or chroot. If the caller is in a chroot and
550 * can't "reach" the target's cwd or root (or some other error
551 * occurs), a "/" is returned for the path and a NULL pointer is
552 * returned for the vnode.
553 */
554 static void
555 procfs_dir(pfstype t, struct lwp *caller, struct proc *target, char **bpp,
556 char *path, size_t len)
557 {
558 struct cwdinfo *cwdi;
559 struct vnode *vp, *rvp;
560 char *bp;
561
562 cwdi = caller->l_proc->p_cwdi;
563 rw_enter(&cwdi->cwdi_lock, RW_READER);
564
565 rvp = cwdi->cwdi_rdir;
566 bp = bpp ? *bpp : NULL;
567
568 switch (t) {
569 case PFScwd:
570 vp = target->p_cwdi->cwdi_cdir;
571 break;
572 case PFSchroot:
573 vp = target->p_cwdi->cwdi_rdir;
574 break;
575 case PFSexe:
576 vp = target->p_textvp;
577 break;
578 default:
579 rw_exit(&cwdi->cwdi_lock);
580 return;
581 }
582
583 /*
584 * XXX: this horrible kludge avoids locking panics when
585 * attempting to lookup links that point to within procfs
586 */
587 if (vp != NULL && vp->v_tag == VT_PROCFS) {
588 if (bpp) {
589 *--bp = '/';
590 *bpp = bp;
591 }
592 rw_exit(&cwdi->cwdi_lock);
593 return;
594 }
595
596 if (rvp == NULL)
597 rvp = rootvnode;
598 if (vp == NULL || getcwd_common(vp, rvp, bp ? &bp : NULL, path,
599 len / 2, 0, caller) != 0) {
600 vp = NULL;
601 if (bpp) {
602 /*
603 if (t == PFSexe) {
604 snprintf(path, len, "%s/%d/file"
605 mp->mnt_stat.f_mntonname, pfs->pfs_pid);
606 } else */ {
607 bp = *bpp;
608 *--bp = '/';
609 }
610 }
611 }
612
613 if (bpp)
614 *bpp = bp;
615
616 rw_exit(&cwdi->cwdi_lock);
617 }
618
619 /*
620 * Invent attributes for pfsnode (vp) and store
621 * them in (vap).
622 * Directories lengths are returned as zero since
623 * any real length would require the genuine size
624 * to be computed, and nothing cares anyway.
625 *
626 * this is relatively minimal for procfs.
627 */
628 int
629 procfs_getattr(v)
630 void *v;
631 {
632 struct vop_getattr_args /* {
633 struct vnode *a_vp;
634 struct vattr *a_vap;
635 kauth_cred_t a_cred;
636 } */ *ap = v;
637 struct pfsnode *pfs = VTOPFS(ap->a_vp);
638 struct vattr *vap = ap->a_vap;
639 struct proc *procp;
640 char *path;
641 int error;
642
643 /* first check the process still exists */
644 switch (pfs->pfs_type) {
645 case PFSroot:
646 case PFScurproc:
647 case PFSself:
648 procp = NULL;
649 break;
650
651 default:
652 error = procfs_proc_lock(pfs->pfs_pid, &procp, ENOENT);
653 if (error != 0)
654 return (error);
655 break;
656 }
657
658 switch (pfs->pfs_type) {
659 case PFScwd:
660 case PFSchroot:
661 case PFSexe:
662 MALLOC(path, char *, MAXPATHLEN + 4, M_TEMP,
663 M_WAITOK|M_CANFAIL);
664 if (path == NULL && procp != NULL) {
665 procfs_proc_unlock(procp);
666 return (ENOMEM);
667 }
668 break;
669
670 default:
671 path = NULL;
672 break;
673 }
674
675 if (procp != NULL) {
676 mutex_enter(&procp->p_mutex);
677 error = kauth_authorize_process(kauth_cred_get(),
678 KAUTH_PROCESS_CANSEE, procp,
679 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
680 mutex_exit(&procp->p_mutex);
681 if (error != 0) {
682 procfs_proc_unlock(procp);
683 if (path != NULL)
684 free(path, M_TEMP);
685 return (ENOENT);
686 }
687 }
688
689 error = 0;
690
691 /* start by zeroing out the attributes */
692 VATTR_NULL(vap);
693
694 /* next do all the common fields */
695 vap->va_type = ap->a_vp->v_type;
696 vap->va_mode = pfs->pfs_mode;
697 vap->va_fileid = pfs->pfs_fileno;
698 vap->va_flags = 0;
699 vap->va_blocksize = PAGE_SIZE;
700
701 /*
702 * Make all times be current TOD.
703 *
704 * It would be possible to get the process start
705 * time from the p_stats structure, but there's
706 * no "file creation" time stamp anyway, and the
707 * p_stats structure is not addressable if u. gets
708 * swapped out for that process.
709 */
710 getnanotime(&vap->va_ctime);
711 vap->va_atime = vap->va_mtime = vap->va_ctime;
712 if (procp)
713 TIMEVAL_TO_TIMESPEC(&procp->p_stats->p_start,
714 &vap->va_birthtime);
715 else
716 getnanotime(&vap->va_birthtime);
717
718 switch (pfs->pfs_type) {
719 case PFSmem:
720 case PFSregs:
721 case PFSfpregs:
722 #if defined(__HAVE_PROCFS_MACHDEP) && defined(PROCFS_MACHDEP_PROTECT_CASES)
723 PROCFS_MACHDEP_PROTECT_CASES
724 #endif
725 /*
726 * If the process has exercised some setuid or setgid
727 * privilege, then rip away read/write permission so
728 * that only root can gain access.
729 */
730 if (procp->p_flag & PK_SUGID)
731 vap->va_mode &= ~(S_IRUSR|S_IWUSR);
732 /* FALLTHROUGH */
733 case PFSctl:
734 case PFSstatus:
735 case PFSstat:
736 case PFSnote:
737 case PFSnotepg:
738 case PFSmap:
739 case PFSmaps:
740 case PFScmdline:
741 case PFSemul:
742 case PFSstatm:
743 vap->va_nlink = 1;
744 vap->va_uid = kauth_cred_geteuid(procp->p_cred);
745 vap->va_gid = kauth_cred_getegid(procp->p_cred);
746 break;
747 case PFSmeminfo:
748 case PFSdevices:
749 case PFScpuinfo:
750 case PFSuptime:
751 case PFSmounts:
752 case PFScpustat:
753 case PFSloadavg:
754 vap->va_nlink = 1;
755 vap->va_uid = vap->va_gid = 0;
756 break;
757
758 default:
759 break;
760 }
761
762 /*
763 * now do the object specific fields
764 *
765 * The size could be set from struct reg, but it's hardly
766 * worth the trouble, and it puts some (potentially) machine
767 * dependent data into this machine-independent code. If it
768 * becomes important then this function should break out into
769 * a per-file stat function in the corresponding .c file.
770 */
771
772 switch (pfs->pfs_type) {
773 case PFSroot:
774 /*
775 * Set nlink to 1 to tell fts(3) we don't actually know.
776 */
777 vap->va_nlink = 1;
778 vap->va_uid = 0;
779 vap->va_gid = 0;
780 vap->va_bytes = vap->va_size = DEV_BSIZE;
781 break;
782
783 case PFSself:
784 case PFScurproc: {
785 char bf[16]; /* should be enough */
786 vap->va_nlink = 1;
787 vap->va_uid = 0;
788 vap->va_gid = 0;
789 vap->va_bytes = vap->va_size =
790 snprintf(bf, sizeof(bf), "%ld", (long)curproc->p_pid);
791 break;
792 }
793
794 case PFSfd:
795 if (pfs->pfs_fd != -1) {
796 struct file *fp;
797
798 fp = fd_getfile(procp->p_fd, pfs->pfs_fd);
799 if (fp == NULL) {
800 error = EBADF;
801 break;
802 }
803 FILE_USE(fp);
804 vap->va_nlink = 1;
805 vap->va_uid = kauth_cred_geteuid(fp->f_cred);
806 vap->va_gid = kauth_cred_getegid(fp->f_cred);
807 switch (fp->f_type) {
808 case DTYPE_VNODE:
809 vap->va_bytes = vap->va_size =
810 ((struct vnode *)fp->f_data)->v_size;
811 break;
812 default:
813 vap->va_bytes = vap->va_size = 0;
814 break;
815 }
816 FILE_UNUSE(fp, curlwp);
817 break;
818 }
819 /*FALLTHROUGH*/
820 case PFSproc:
821 vap->va_nlink = 2;
822 vap->va_uid = kauth_cred_geteuid(procp->p_cred);
823 vap->va_gid = kauth_cred_getegid(procp->p_cred);
824 vap->va_bytes = vap->va_size = DEV_BSIZE;
825 break;
826
827 case PFSfile:
828 error = EOPNOTSUPP;
829 break;
830
831 case PFSmem:
832 vap->va_bytes = vap->va_size =
833 ctob(procp->p_vmspace->vm_tsize +
834 procp->p_vmspace->vm_dsize +
835 procp->p_vmspace->vm_ssize);
836 break;
837
838 #if defined(PT_GETREGS) || defined(PT_SETREGS)
839 case PFSregs:
840 vap->va_bytes = vap->va_size = sizeof(struct reg);
841 break;
842 #endif
843
844 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS)
845 case PFSfpregs:
846 vap->va_bytes = vap->va_size = sizeof(struct fpreg);
847 break;
848 #endif
849
850 case PFSctl:
851 case PFSstatus:
852 case PFSstat:
853 case PFSnote:
854 case PFSnotepg:
855 case PFScmdline:
856 case PFSmeminfo:
857 case PFSdevices:
858 case PFScpuinfo:
859 case PFSuptime:
860 case PFSmounts:
861 case PFScpustat:
862 case PFSloadavg:
863 case PFSstatm:
864 vap->va_bytes = vap->va_size = 0;
865 break;
866 case PFSmap:
867 case PFSmaps:
868 /*
869 * Advise a larger blocksize for the map files, so that
870 * they may be read in one pass.
871 */
872 vap->va_blocksize = 4 * PAGE_SIZE;
873 vap->va_bytes = vap->va_size = 0;
874 break;
875
876 case PFScwd:
877 case PFSchroot:
878 case PFSexe: {
879 char *bp;
880
881 vap->va_nlink = 1;
882 vap->va_uid = 0;
883 vap->va_gid = 0;
884 bp = path + MAXPATHLEN;
885 *--bp = '\0';
886 procfs_dir(pfs->pfs_type, curlwp, procp, &bp, path,
887 MAXPATHLEN);
888 vap->va_bytes = vap->va_size = strlen(bp);
889 break;
890 }
891
892 case PFSemul:
893 vap->va_bytes = vap->va_size = strlen(procp->p_emul->e_name);
894 break;
895
896 #ifdef __HAVE_PROCFS_MACHDEP
897 PROCFS_MACHDEP_NODETYPE_CASES
898 error = procfs_machdep_getattr(ap->a_vp, vap, procp);
899 break;
900 #endif
901
902 default:
903 panic("procfs_getattr");
904 }
905
906 if (procp != NULL)
907 procfs_proc_unlock(procp);
908 if (path != NULL)
909 free(path, M_TEMP);
910
911 return (error);
912 }
913
914 /*ARGSUSED*/
915 int
916 procfs_setattr(void *v)
917 {
918 /*
919 * just fake out attribute setting
920 * it's not good to generate an error
921 * return, otherwise things like creat()
922 * will fail when they try to set the
923 * file length to 0. worse, this means
924 * that echo $note > /proc/$pid/note will fail.
925 */
926
927 return (0);
928 }
929
930 /*
931 * implement access checking.
932 *
933 * actually, the check for super-user is slightly
934 * broken since it will allow read access to write-only
935 * objects. this doesn't cause any particular trouble
936 * but does mean that the i/o entry points need to check
937 * that the operation really does make sense.
938 */
939 int
940 procfs_access(v)
941 void *v;
942 {
943 struct vop_access_args /* {
944 struct vnode *a_vp;
945 int a_mode;
946 kauth_cred_t a_cred;
947 } */ *ap = v;
948 struct vattr va;
949 int error;
950
951 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred)) != 0)
952 return (error);
953
954 return (vaccess(va.va_type, va.va_mode,
955 va.va_uid, va.va_gid, ap->a_mode, ap->a_cred));
956 }
957
958 /*
959 * lookup. this is incredibly complicated in the
960 * general case, however for most pseudo-filesystems
961 * very little needs to be done.
962 *
963 * Locking isn't hard here, just poorly documented.
964 *
965 * If we're looking up ".", just vref the parent & return it.
966 *
967 * If we're looking up "..", unlock the parent, and lock "..". If everything
968 * went ok, and we're on the last component and the caller requested the
969 * parent locked, try to re-lock the parent. We do this to prevent lock
970 * races.
971 *
972 * For anything else, get the needed node. Then unlock the parent if not
973 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the
974 * parent in the .. case).
975 *
976 * We try to exit with the parent locked in error cases.
977 */
978 int
979 procfs_lookup(v)
980 void *v;
981 {
982 struct vop_lookup_args /* {
983 struct vnode * a_dvp;
984 struct vnode ** a_vpp;
985 struct componentname * a_cnp;
986 } */ *ap = v;
987 struct componentname *cnp = ap->a_cnp;
988 struct vnode **vpp = ap->a_vpp;
989 struct vnode *dvp = ap->a_dvp;
990 const char *pname = cnp->cn_nameptr;
991 const struct proc_target *pt = NULL;
992 struct vnode *fvp;
993 pid_t pid, vnpid;
994 struct pfsnode *pfs;
995 struct proc *p = NULL;
996 struct lwp *l = NULL;
997 int i, error;
998 pfstype type;
999
1000 *vpp = NULL;
1001
1002 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
1003 return (EROFS);
1004
1005 if (cnp->cn_namelen == 1 && *pname == '.') {
1006 *vpp = dvp;
1007 VREF(dvp);
1008 return (0);
1009 }
1010
1011 pfs = VTOPFS(dvp);
1012 switch (pfs->pfs_type) {
1013 case PFSroot:
1014 /*
1015 * Shouldn't get here with .. in the root node.
1016 */
1017 if (cnp->cn_flags & ISDOTDOT)
1018 return (EIO);
1019
1020 for (i = 0; i < nproc_root_targets; i++) {
1021 pt = &proc_root_targets[i];
1022 /*
1023 * check for node match. proc is always NULL here,
1024 * so call pt_valid with constant NULL lwp.
1025 */
1026 if (cnp->cn_namelen == pt->pt_namlen &&
1027 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
1028 (pt->pt_valid == NULL ||
1029 (*pt->pt_valid)(NULL, dvp->v_mount)))
1030 break;
1031 }
1032
1033 if (i != nproc_root_targets) {
1034 error = procfs_allocvp(dvp->v_mount, vpp, 0,
1035 pt->pt_pfstype, -1, NULL);
1036 return (error);
1037 }
1038
1039 if (CNEQ(cnp, "curproc", 7)) {
1040 pid = curproc->p_pid;
1041 vnpid = 0;
1042 type = PFScurproc;
1043 } else if (CNEQ(cnp, "self", 4)) {
1044 pid = curproc->p_pid;
1045 vnpid = 0;
1046 type = PFSself;
1047 } else {
1048 pid = (pid_t)atoi(pname, cnp->cn_namelen);
1049 vnpid = pid;
1050 type = PFSproc;
1051 }
1052
1053 if (procfs_proc_lock(pid, &p, ESRCH) != 0)
1054 break;
1055 error = procfs_allocvp(dvp->v_mount, vpp, vnpid, type, -1, p);
1056 procfs_proc_unlock(p);
1057 return (error);
1058
1059 case PFSproc:
1060 /*
1061 * do the .. dance. We unlock the directory, and then
1062 * get the root dir. That will automatically return ..
1063 * locked. Then if the caller wanted dvp locked, we
1064 * re-lock.
1065 */
1066 if (cnp->cn_flags & ISDOTDOT) {
1067 VOP_UNLOCK(dvp, 0);
1068 error = procfs_root(dvp->v_mount, vpp);
1069 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
1070 return (error);
1071 }
1072
1073 if (procfs_proc_lock(pfs->pfs_pid, &p, ESRCH) != 0)
1074 break;
1075
1076 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
1077 struct lwp *plwp;
1078 int found;
1079
1080 mutex_enter(&p->p_smutex);
1081 plwp = proc_representative_lwp(p, NULL, 1);
1082 lwp_addref(plwp);
1083 mutex_exit(&p->p_smutex);
1084 found = cnp->cn_namelen == pt->pt_namlen &&
1085 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
1086 (pt->pt_valid == NULL
1087 || (*pt->pt_valid)(plwp, dvp->v_mount));
1088 lwp_delref(plwp);
1089 if (found)
1090 break;
1091 }
1092 if (i == nproc_targets) {
1093 procfs_proc_unlock(p);
1094 break;
1095 }
1096 if (pt->pt_pfstype == PFSfile) {
1097 fvp = p->p_textvp;
1098 /* We already checked that it exists. */
1099 VREF(fvp);
1100 procfs_proc_unlock(p);
1101 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
1102 *vpp = fvp;
1103 return (0);
1104 }
1105
1106 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
1107 pt->pt_pfstype, -1, p);
1108 procfs_proc_unlock(p);
1109 return (error);
1110
1111 case PFSfd: {
1112 int fd;
1113 struct file *fp;
1114
1115 if ((error = procfs_proc_lock(pfs->pfs_pid, &p, ENOENT)) != 0)
1116 return error;
1117
1118 /*
1119 * do the .. dance. We unlock the directory, and then
1120 * get the proc dir. That will automatically return ..
1121 * locked. Then re-lock the directory.
1122 */
1123 if (cnp->cn_flags & ISDOTDOT) {
1124 VOP_UNLOCK(dvp, 0);
1125 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
1126 PFSproc, -1, p);
1127 procfs_proc_unlock(p);
1128 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
1129 return (error);
1130 }
1131 fd = atoi(pname, cnp->cn_namelen);
1132
1133 fp = fd_getfile(p->p_fd, fd);
1134 if (fp == NULL) {
1135 procfs_proc_unlock(p);
1136 return ENOENT;
1137 }
1138
1139 FILE_USE(fp);
1140 fvp = (struct vnode *)fp->f_data;
1141
1142 /* Don't show directories */
1143 if (fp->f_type == DTYPE_VNODE && fvp->v_type != VDIR) {
1144 VREF(fvp);
1145 FILE_UNUSE(fp, l);
1146 procfs_proc_unlock(p);
1147 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY |
1148 (p == curproc ? LK_CANRECURSE : 0));
1149 *vpp = fvp;
1150 return 0;
1151 }
1152
1153 FILE_UNUSE(fp, l);
1154 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
1155 PFSfd, fd, p);
1156 procfs_proc_unlock(p);
1157 return error;
1158 }
1159 default:
1160 return (ENOTDIR);
1161 }
1162
1163 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
1164 }
1165
1166 int
1167 procfs_validfile(struct lwp *l, struct mount *mp)
1168 {
1169 return l != NULL && l->l_proc != NULL && l->l_proc->p_textvp != NULL;
1170 }
1171
1172 static int
1173 procfs_validfile_linux(l, mp)
1174 struct lwp *l;
1175 struct mount *mp;
1176 {
1177 int flags;
1178
1179 flags = VFSTOPROC(mp)->pmnt_flags;
1180 return (flags & PROCFSMNT_LINUXCOMPAT) &&
1181 (l == NULL || l->l_proc == NULL || procfs_validfile(l, mp));
1182 }
1183
1184 struct procfs_root_readdir_ctx {
1185 struct uio *uiop;
1186 off_t *cookies;
1187 int ncookies;
1188 off_t off;
1189 off_t startoff;
1190 int error;
1191 };
1192
1193 static int
1194 procfs_root_readdir_callback(struct proc *p, void *arg)
1195 {
1196 struct procfs_root_readdir_ctx *ctxp = arg;
1197 struct dirent d;
1198 struct uio *uiop;
1199 int error;
1200
1201 uiop = ctxp->uiop;
1202 if (uiop->uio_resid < UIO_MX)
1203 return -1; /* no space */
1204
1205 if (ctxp->off < ctxp->startoff) {
1206 ctxp->off++;
1207 return 0;
1208 }
1209
1210 if (kauth_authorize_process(kauth_cred_get(),
1211 KAUTH_PROCESS_CANSEE, p,
1212 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL) != 0)
1213 return 0;
1214
1215 memset(&d, 0, UIO_MX);
1216 d.d_reclen = UIO_MX;
1217 d.d_fileno = PROCFS_FILENO(p->p_pid, PFSproc, -1);
1218 d.d_namlen = snprintf(d.d_name,
1219 UIO_MX - offsetof(struct dirent, d_name), "%ld", (long)p->p_pid);
1220 d.d_type = DT_DIR;
1221
1222 mutex_exit(&proclist_lock);
1223 error = uiomove(&d, UIO_MX, uiop);
1224 mutex_enter(&proclist_lock);
1225 if (error) {
1226 ctxp->error = error;
1227 return -1;
1228 }
1229
1230 ctxp->ncookies++;
1231 if (ctxp->cookies)
1232 *(ctxp->cookies)++ = ctxp->off + 1;
1233 ctxp->off++;
1234
1235 return 0;
1236 }
1237
1238 /*
1239 * readdir returns directory entries from pfsnode (vp).
1240 *
1241 * the strategy here with procfs is to generate a single
1242 * directory entry at a time (struct dirent) and then
1243 * copy that out to userland using uiomove. a more efficent
1244 * though more complex implementation, would try to minimize
1245 * the number of calls to uiomove(). for procfs, this is
1246 * hardly worth the added code complexity.
1247 *
1248 * this should just be done through read()
1249 */
1250 int
1251 procfs_readdir(v)
1252 void *v;
1253 {
1254 struct vop_readdir_args /* {
1255 struct vnode *a_vp;
1256 struct uio *a_uio;
1257 kauth_cred_t a_cred;
1258 int *a_eofflag;
1259 off_t **a_cookies;
1260 int *a_ncookies;
1261 } */ *ap = v;
1262 struct uio *uio = ap->a_uio;
1263 struct dirent d;
1264 struct pfsnode *pfs;
1265 off_t i;
1266 int error;
1267 off_t *cookies = NULL;
1268 int ncookies;
1269 struct vnode *vp;
1270 const struct proc_target *pt;
1271 struct procfs_root_readdir_ctx ctx;
1272 struct lwp *l;
1273 int nfd;
1274
1275 vp = ap->a_vp;
1276 pfs = VTOPFS(vp);
1277
1278 if (uio->uio_resid < UIO_MX)
1279 return (EINVAL);
1280 if (uio->uio_offset < 0)
1281 return (EINVAL);
1282
1283 error = 0;
1284 i = uio->uio_offset;
1285 memset(&d, 0, UIO_MX);
1286 d.d_reclen = UIO_MX;
1287 ncookies = uio->uio_resid / UIO_MX;
1288
1289 switch (pfs->pfs_type) {
1290 /*
1291 * this is for the process-specific sub-directories.
1292 * all that is needed to is copy out all the entries
1293 * from the procent[] table (top of this file).
1294 */
1295 case PFSproc: {
1296 struct proc *p;
1297
1298 if (i >= nproc_targets)
1299 return 0;
1300
1301 if (procfs_proc_lock(pfs->pfs_pid, &p, ESRCH) != 0)
1302 break;
1303
1304 if (ap->a_ncookies) {
1305 ncookies = min(ncookies, (nproc_targets - i));
1306 cookies = malloc(ncookies * sizeof (off_t),
1307 M_TEMP, M_WAITOK);
1308 *ap->a_cookies = cookies;
1309 }
1310
1311 for (pt = &proc_targets[i];
1312 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) {
1313 if (pt->pt_valid) {
1314 /* XXX LWP can disappear */
1315 mutex_enter(&p->p_smutex);
1316 l = proc_representative_lwp(p, NULL, 1);
1317 mutex_exit(&p->p_smutex);
1318 if ((*pt->pt_valid)(l, vp->v_mount) == 0)
1319 continue;
1320 }
1321
1322 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid,
1323 pt->pt_pfstype, -1);
1324 d.d_namlen = pt->pt_namlen;
1325 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1326 d.d_type = pt->pt_type;
1327
1328 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1329 break;
1330 if (cookies)
1331 *cookies++ = i + 1;
1332 }
1333
1334 procfs_proc_unlock(p);
1335 break;
1336 }
1337 case PFSfd: {
1338 struct proc *p;
1339 struct filedesc *fdp;
1340 struct file *fp;
1341 int lim, nc = 0;
1342
1343 if ((error = procfs_proc_lock(pfs->pfs_pid, &p, ESRCH)) != 0)
1344 return error;
1345
1346 /* XXX Should this be by file as well? */
1347 if (kauth_authorize_process(kauth_cred_get(),
1348 KAUTH_PROCESS_CANSEE, p,
1349 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_OPENFILES), NULL,
1350 NULL) != 0) {
1351 procfs_proc_unlock(p);
1352 return ESRCH;
1353 }
1354
1355 fdp = p->p_fd;
1356 nfd = fdp->fd_nfiles;
1357
1358 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
1359 if (i >= lim) {
1360 procfs_proc_unlock(p);
1361 return 0;
1362 }
1363
1364 if (ap->a_ncookies) {
1365 ncookies = min(ncookies, (nfd + 2 - i));
1366 cookies = malloc(ncookies * sizeof (off_t),
1367 M_TEMP, M_WAITOK);
1368 *ap->a_cookies = cookies;
1369 }
1370
1371 for (; i < 2 && uio->uio_resid >= UIO_MX; i++) {
1372 pt = &proc_targets[i];
1373 d.d_namlen = pt->pt_namlen;
1374 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid,
1375 pt->pt_pfstype, -1);
1376 (void)memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1377 d.d_type = pt->pt_type;
1378 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1379 break;
1380 if (cookies)
1381 *cookies++ = i + 1;
1382 nc++;
1383 }
1384 if (error) {
1385 ncookies = nc;
1386 break;
1387 }
1388 for (; uio->uio_resid >= UIO_MX && i < nfd; i++) {
1389 /* check the descriptor exists */
1390 if ((fp = fd_getfile(fdp, i - 2)) == NULL)
1391 continue;
1392 mutex_exit(&fp->f_lock);
1393
1394 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, PFSfd, i - 2);
1395 d.d_namlen = snprintf(d.d_name, sizeof(d.d_name),
1396 "%lld", (long long)(i - 2));
1397 d.d_type = VREG;
1398 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1399 break;
1400 if (cookies)
1401 *cookies++ = i + 1;
1402 nc++;
1403 }
1404 ncookies = nc;
1405 procfs_proc_unlock(p);
1406 break;
1407 }
1408
1409 /*
1410 * this is for the root of the procfs filesystem
1411 * what is needed are special entries for "curproc"
1412 * and "self" followed by an entry for each process
1413 * on allproc.
1414 */
1415
1416 case PFSroot: {
1417 int nc = 0;
1418
1419 if (ap->a_ncookies) {
1420 /*
1421 * XXX Potentially allocating too much space here,
1422 * but I'm lazy. This loop needs some work.
1423 */
1424 cookies = malloc(ncookies * sizeof (off_t),
1425 M_TEMP, M_WAITOK);
1426 *ap->a_cookies = cookies;
1427 }
1428 error = 0;
1429 /* 0 ... 3 are static entries. */
1430 for (; i <= 3 && uio->uio_resid >= UIO_MX; i++) {
1431 switch (i) {
1432 case 0: /* `.' */
1433 case 1: /* `..' */
1434 d.d_fileno = PROCFS_FILENO(0, PFSroot, -1);
1435 d.d_namlen = i + 1;
1436 memcpy(d.d_name, "..", d.d_namlen);
1437 d.d_name[i + 1] = '\0';
1438 d.d_type = DT_DIR;
1439 break;
1440
1441 case 2:
1442 d.d_fileno = PROCFS_FILENO(0, PFScurproc, -1);
1443 d.d_namlen = sizeof("curproc") - 1;
1444 memcpy(d.d_name, "curproc", sizeof("curproc"));
1445 d.d_type = DT_LNK;
1446 break;
1447
1448 case 3:
1449 d.d_fileno = PROCFS_FILENO(0, PFSself, -1);
1450 d.d_namlen = sizeof("self") - 1;
1451 memcpy(d.d_name, "self", sizeof("self"));
1452 d.d_type = DT_LNK;
1453 break;
1454 }
1455
1456 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1457 break;
1458 nc++;
1459 if (cookies)
1460 *cookies++ = i + 1;
1461 }
1462 /* 4 ... are process entries. */
1463 ctx.uiop = uio;
1464 ctx.error = 0;
1465 ctx.off = 4;
1466 ctx.startoff = i;
1467 ctx.cookies = cookies;
1468 ctx.ncookies = nc;
1469 proclist_foreach_call(&allproc,
1470 procfs_root_readdir_callback, &ctx);
1471 cookies = ctx.cookies;
1472 nc = ctx.ncookies;
1473 error = ctx.error;
1474 if (error)
1475 break;
1476
1477 /* misc entries. */
1478 if (i < ctx.off)
1479 i = ctx.off;
1480 if (i >= ctx.off + nproc_root_targets)
1481 break;
1482 for (pt = &proc_root_targets[i - ctx.off];
1483 uio->uio_resid >= UIO_MX &&
1484 pt < &proc_root_targets[nproc_root_targets];
1485 pt++, i++) {
1486 if (pt->pt_valid &&
1487 (*pt->pt_valid)(NULL, vp->v_mount) == 0)
1488 continue;
1489 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype, -1);
1490 d.d_namlen = pt->pt_namlen;
1491 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1492 d.d_type = pt->pt_type;
1493
1494 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1495 break;
1496 nc++;
1497 if (cookies)
1498 *cookies++ = i + 1;
1499 }
1500
1501 ncookies = nc;
1502 break;
1503 }
1504
1505 default:
1506 error = ENOTDIR;
1507 break;
1508 }
1509
1510 if (ap->a_ncookies) {
1511 if (error) {
1512 if (cookies)
1513 free(*ap->a_cookies, M_TEMP);
1514 *ap->a_ncookies = 0;
1515 *ap->a_cookies = NULL;
1516 } else
1517 *ap->a_ncookies = ncookies;
1518 }
1519 uio->uio_offset = i;
1520 return (error);
1521 }
1522
1523 /*
1524 * readlink reads the link of `curproc' and others
1525 */
1526 int
1527 procfs_readlink(v)
1528 void *v;
1529 {
1530 struct vop_readlink_args *ap = v;
1531 char bf[16]; /* should be enough */
1532 char *bp = bf;
1533 char *path = NULL;
1534 int len = 0;
1535 int error = 0;
1536 struct pfsnode *pfs = VTOPFS(ap->a_vp);
1537 struct proc *pown;
1538
1539 if (pfs->pfs_fileno == PROCFS_FILENO(0, PFScurproc, -1))
1540 len = snprintf(bf, sizeof(bf), "%ld", (long)curproc->p_pid);
1541 else if (pfs->pfs_fileno == PROCFS_FILENO(0, PFSself, -1))
1542 len = snprintf(bf, sizeof(bf), "%s", "curproc");
1543 else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFScwd, -1) ||
1544 pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFSchroot, -1) ||
1545 pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFSexe, -1)) {
1546 if ((error = procfs_proc_lock(pfs->pfs_pid, &pown, ESRCH)) != 0)
1547 return error;
1548 MALLOC(path, char *, MAXPATHLEN + 4, M_TEMP,
1549 M_WAITOK|M_CANFAIL);
1550 if (path == NULL) {
1551 procfs_proc_unlock(pown);
1552 return (ENOMEM);
1553 }
1554 bp = path + MAXPATHLEN;
1555 *--bp = '\0';
1556 procfs_dir(PROCFS_TYPE(pfs->pfs_fileno), curlwp, pown,
1557 &bp, path, MAXPATHLEN);
1558 procfs_proc_unlock(pown);
1559 len = strlen(bp);
1560 } else {
1561 struct file *fp;
1562 struct vnode *vxp, *vp;
1563
1564 if ((error = procfs_proc_lock(pfs->pfs_pid, &pown, ESRCH)) != 0)
1565 return error;
1566
1567 fp = fd_getfile(pown->p_fd, pfs->pfs_fd);
1568 if (fp == NULL) {
1569 procfs_proc_unlock(pown);
1570 return EBADF;
1571 }
1572
1573 FILE_USE(fp);
1574 switch (fp->f_type) {
1575 case DTYPE_VNODE:
1576 vxp = (struct vnode *)fp->f_data;
1577 if (vxp->v_type != VDIR) {
1578 FILE_UNUSE(fp, curlwp);
1579 error = EINVAL;
1580 break;
1581 }
1582 if ((path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK))
1583 == NULL) {
1584 FILE_UNUSE(fp, curlwp);
1585 error = ENOMEM;
1586 break;
1587 }
1588 bp = path + MAXPATHLEN;
1589 *--bp = '\0';
1590
1591 /*
1592 * XXX: kludge to avoid locking against ourselves
1593 * in getcwd()
1594 */
1595 if (vxp->v_tag == VT_PROCFS) {
1596 *--bp = '/';
1597 } else {
1598 rw_enter(&curproc->p_cwdi->cwdi_lock, RW_READER);
1599 vp = curproc->p_cwdi->cwdi_rdir;
1600 if (vp == NULL)
1601 vp = rootvnode;
1602 error = getcwd_common(vxp, vp, &bp, path,
1603 MAXPATHLEN / 2, 0, curlwp);
1604 rw_exit(&curproc->p_cwdi->cwdi_lock);
1605 }
1606 if (error)
1607 break;
1608 len = strlen(bp);
1609 break;
1610
1611 case DTYPE_MISC:
1612 len = snprintf(bf, sizeof(bf), "%s", "[misc]");
1613 break;
1614
1615 case DTYPE_KQUEUE:
1616 len = snprintf(bf, sizeof(bf), "%s", "[kqueue]");
1617 break;
1618
1619 default:
1620 error = EINVAL;
1621 break;
1622 }
1623 FILE_UNUSE(fp, curlwp);
1624 procfs_proc_unlock(pown);
1625 }
1626
1627 if (error == 0)
1628 error = uiomove(bp, len, ap->a_uio);
1629 if (path)
1630 free(path, M_TEMP);
1631 return error;
1632 }
1633
1634 /*
1635 * convert decimal ascii to int
1636 */
1637 static int
1638 atoi(b, len)
1639 const char *b;
1640 size_t len;
1641 {
1642 int p = 0;
1643
1644 while (len--) {
1645 char c = *b++;
1646 if (c < '0' || c > '9')
1647 return -1;
1648 p = 10 * p + (c - '0');
1649 }
1650
1651 return p;
1652 }
1653