procfs_vnops.c revision 1.203.2.1 1 /* $NetBSD: procfs_vnops.c,v 1.203.2.1 2019/06/10 22:09:06 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1993, 1995
34 * The Regents of the University of California. All rights reserved.
35 *
36 * This code is derived from software contributed to Berkeley by
37 * Jan-Simon Pendry.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
64 */
65
66 /*
67 * Copyright (c) 1993 Jan-Simon Pendry
68 *
69 * This code is derived from software contributed to Berkeley by
70 * Jan-Simon Pendry.
71 *
72 * Redistribution and use in source and binary forms, with or without
73 * modification, are permitted provided that the following conditions
74 * are met:
75 * 1. Redistributions of source code must retain the above copyright
76 * notice, this list of conditions and the following disclaimer.
77 * 2. Redistributions in binary form must reproduce the above copyright
78 * notice, this list of conditions and the following disclaimer in the
79 * documentation and/or other materials provided with the distribution.
80 * 3. All advertising materials mentioning features or use of this software
81 * must display the following acknowledgement:
82 * This product includes software developed by the University of
83 * California, Berkeley and its contributors.
84 * 4. Neither the name of the University nor the names of its contributors
85 * may be used to endorse or promote products derived from this software
86 * without specific prior written permission.
87 *
88 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
89 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
90 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
91 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
92 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
93 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
94 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
95 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
96 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
97 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
98 * SUCH DAMAGE.
99 *
100 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
101 */
102
103 /*
104 * procfs vnode interface
105 */
106
107 #include <sys/cdefs.h>
108 __KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.203.2.1 2019/06/10 22:09:06 christos Exp $");
109
110 #include <sys/param.h>
111 #include <sys/systm.h>
112 #include <sys/time.h>
113 #include <sys/kernel.h>
114 #include <sys/file.h>
115 #include <sys/filedesc.h>
116 #include <sys/proc.h>
117 #include <sys/vnode.h>
118 #include <sys/namei.h>
119 #include <sys/malloc.h>
120 #include <sys/mount.h>
121 #include <sys/dirent.h>
122 #include <sys/resourcevar.h>
123 #include <sys/stat.h>
124 #include <sys/ptrace.h>
125 #include <sys/kauth.h>
126 #include <sys/exec.h>
127
128 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */
129
130 #include <machine/reg.h>
131
132 #include <miscfs/genfs/genfs.h>
133 #include <miscfs/procfs/procfs.h>
134
135 /*
136 * Vnode Operations.
137 *
138 */
139
140 static int procfs_validfile_linux(struct lwp *, struct mount *);
141 static int procfs_root_readdir_callback(struct proc *, void *);
142 static void procfs_dir(pfstype, struct lwp *, struct proc *, char **, char *,
143 size_t);
144
145 /*
146 * This is a list of the valid names in the
147 * process-specific sub-directories. It is
148 * used in procfs_lookup and procfs_readdir
149 */
150 static const struct proc_target {
151 u_char pt_type;
152 u_char pt_namlen;
153 const char *pt_name;
154 pfstype pt_pfstype;
155 int (*pt_valid)(struct lwp *, struct mount *);
156 } proc_targets[] = {
157 #define N(s) sizeof(s)-1, s
158 /* name type validp */
159 { DT_DIR, N("."), PFSproc, NULL },
160 { DT_DIR, N(".."), PFSroot, NULL },
161 { DT_DIR, N("fd"), PFSfd, NULL },
162 { DT_DIR, N("task"), PFStask, procfs_validfile_linux },
163 { DT_LNK, N("cwd"), PFScwd, NULL },
164 { DT_LNK, N("emul"), PFSemul, NULL },
165 { DT_LNK, N("root"), PFSchroot, NULL },
166 { DT_REG, N("auxv"), PFSauxv, procfs_validauxv },
167 { DT_REG, N("cmdline"), PFScmdline, NULL },
168 { DT_REG, N("environ"), PFSenviron, NULL },
169 { DT_REG, N("exe"), PFSexe, procfs_validfile },
170 { DT_REG, N("file"), PFSfile, procfs_validfile },
171 { DT_REG, N("fpregs"), PFSfpregs, procfs_validfpregs },
172 { DT_REG, N("limit"), PFSlimit, NULL },
173 { DT_REG, N("map"), PFSmap, procfs_validmap },
174 { DT_REG, N("maps"), PFSmaps, procfs_validmap },
175 { DT_REG, N("mem"), PFSmem, NULL },
176 { DT_REG, N("note"), PFSnote, NULL },
177 { DT_REG, N("notepg"), PFSnotepg, NULL },
178 { DT_REG, N("regs"), PFSregs, procfs_validregs },
179 { DT_REG, N("stat"), PFSstat, procfs_validfile_linux },
180 { DT_REG, N("statm"), PFSstatm, procfs_validfile_linux },
181 { DT_REG, N("status"), PFSstatus, NULL },
182 #ifdef __HAVE_PROCFS_MACHDEP
183 PROCFS_MACHDEP_NODETYPE_DEFNS
184 #endif
185 #undef N
186 };
187 static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
188
189 /*
190 * List of files in the root directory. Note: the validate function will
191 * be called with p == NULL for these ones.
192 */
193 static const struct proc_target proc_root_targets[] = {
194 #define N(s) sizeof(s)-1, s
195 /* name type validp */
196 { DT_REG, N("meminfo"), PFSmeminfo, procfs_validfile_linux },
197 { DT_REG, N("cpuinfo"), PFScpuinfo, procfs_validfile_linux },
198 { DT_REG, N("uptime"), PFSuptime, procfs_validfile_linux },
199 { DT_REG, N("mounts"), PFSmounts, procfs_validfile_linux },
200 { DT_REG, N("devices"), PFSdevices, procfs_validfile_linux },
201 { DT_REG, N("stat"), PFScpustat, procfs_validfile_linux },
202 { DT_REG, N("loadavg"), PFSloadavg, procfs_validfile_linux },
203 { DT_REG, N("version"), PFSversion, procfs_validfile_linux },
204 #undef N
205 };
206 static const int nproc_root_targets =
207 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]);
208
209 int procfs_lookup(void *);
210 #define procfs_create genfs_eopnotsupp
211 #define procfs_mknod genfs_eopnotsupp
212 int procfs_open(void *);
213 int procfs_close(void *);
214 int procfs_access(void *);
215 int procfs_getattr(void *);
216 int procfs_setattr(void *);
217 #define procfs_read procfs_rw
218 #define procfs_write procfs_rw
219 #define procfs_fcntl genfs_fcntl
220 #define procfs_ioctl genfs_enoioctl
221 #define procfs_poll genfs_poll
222 #define procfs_kqfilter genfs_kqfilter
223 #define procfs_revoke genfs_revoke
224 #define procfs_fsync genfs_nullop
225 #define procfs_seek genfs_nullop
226 #define procfs_remove genfs_eopnotsupp
227 int procfs_link(void *);
228 #define procfs_rename genfs_eopnotsupp
229 #define procfs_mkdir genfs_eopnotsupp
230 #define procfs_rmdir genfs_eopnotsupp
231 int procfs_symlink(void *);
232 int procfs_readdir(void *);
233 int procfs_readlink(void *);
234 #define procfs_abortop genfs_abortop
235 int procfs_inactive(void *);
236 int procfs_reclaim(void *);
237 #define procfs_lock genfs_lock
238 #define procfs_unlock genfs_unlock
239 #define procfs_bmap genfs_badop
240 #define procfs_strategy genfs_badop
241 int procfs_print(void *);
242 int procfs_pathconf(void *);
243 #define procfs_islocked genfs_islocked
244 #define procfs_advlock genfs_einval
245 #define procfs_bwrite genfs_eopnotsupp
246 #define procfs_putpages genfs_null_putpages
247
248 static int atoi(const char *, size_t);
249
250 /*
251 * procfs vnode operations.
252 */
253 int (**procfs_vnodeop_p)(void *);
254 const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
255 { &vop_default_desc, vn_default_error },
256 { &vop_lookup_desc, procfs_lookup }, /* lookup */
257 { &vop_create_desc, procfs_create }, /* create */
258 { &vop_mknod_desc, procfs_mknod }, /* mknod */
259 { &vop_open_desc, procfs_open }, /* open */
260 { &vop_close_desc, procfs_close }, /* close */
261 { &vop_access_desc, procfs_access }, /* access */
262 { &vop_getattr_desc, procfs_getattr }, /* getattr */
263 { &vop_setattr_desc, procfs_setattr }, /* setattr */
264 { &vop_read_desc, procfs_read }, /* read */
265 { &vop_write_desc, procfs_write }, /* write */
266 { &vop_fallocate_desc, genfs_eopnotsupp }, /* fallocate */
267 { &vop_fdiscard_desc, genfs_eopnotsupp }, /* fdiscard */
268 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */
269 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */
270 { &vop_poll_desc, procfs_poll }, /* poll */
271 { &vop_kqfilter_desc, procfs_kqfilter }, /* kqfilter */
272 { &vop_revoke_desc, procfs_revoke }, /* revoke */
273 { &vop_fsync_desc, procfs_fsync }, /* fsync */
274 { &vop_seek_desc, procfs_seek }, /* seek */
275 { &vop_remove_desc, procfs_remove }, /* remove */
276 { &vop_link_desc, procfs_link }, /* link */
277 { &vop_rename_desc, procfs_rename }, /* rename */
278 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */
279 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */
280 { &vop_symlink_desc, procfs_symlink }, /* symlink */
281 { &vop_readdir_desc, procfs_readdir }, /* readdir */
282 { &vop_readlink_desc, procfs_readlink }, /* readlink */
283 { &vop_abortop_desc, procfs_abortop }, /* abortop */
284 { &vop_inactive_desc, procfs_inactive }, /* inactive */
285 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */
286 { &vop_lock_desc, procfs_lock }, /* lock */
287 { &vop_unlock_desc, procfs_unlock }, /* unlock */
288 { &vop_bmap_desc, procfs_bmap }, /* bmap */
289 { &vop_strategy_desc, procfs_strategy }, /* strategy */
290 { &vop_print_desc, procfs_print }, /* print */
291 { &vop_islocked_desc, procfs_islocked }, /* islocked */
292 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */
293 { &vop_advlock_desc, procfs_advlock }, /* advlock */
294 { &vop_putpages_desc, procfs_putpages }, /* putpages */
295 { NULL, NULL }
296 };
297 const struct vnodeopv_desc procfs_vnodeop_opv_desc =
298 { &procfs_vnodeop_p, procfs_vnodeop_entries };
299 /*
300 * set things up for doing i/o on
301 * the pfsnode (vp). (vp) is locked
302 * on entry, and should be left locked
303 * on exit.
304 *
305 * for procfs we don't need to do anything
306 * in particular for i/o. all that is done
307 * is to support exclusive open on process
308 * memory images.
309 */
310 int
311 procfs_open(void *v)
312 {
313 struct vop_open_args /* {
314 struct vnode *a_vp;
315 int a_mode;
316 kauth_cred_t a_cred;
317 } */ *ap = v;
318 struct pfsnode *pfs = VTOPFS(ap->a_vp);
319 struct lwp *l1;
320 struct proc *p2;
321 int error;
322
323 if ((error = procfs_proc_lock(pfs->pfs_pid, &p2, ENOENT)) != 0)
324 return error;
325
326 l1 = curlwp; /* tracer */
327
328 #define M2K(m) (((m) & FREAD) && ((m) & FWRITE) ? \
329 KAUTH_REQ_PROCESS_PROCFS_RW : \
330 (m) & FWRITE ? KAUTH_REQ_PROCESS_PROCFS_WRITE : \
331 KAUTH_REQ_PROCESS_PROCFS_READ)
332
333 mutex_enter(p2->p_lock);
334 error = kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_PROCFS,
335 p2, pfs, KAUTH_ARG(M2K(ap->a_mode)), NULL);
336 mutex_exit(p2->p_lock);
337 if (error) {
338 procfs_proc_unlock(p2);
339 return (error);
340 }
341
342 #undef M2K
343
344 switch (pfs->pfs_type) {
345 case PFSmem:
346 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
347 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) {
348 error = EBUSY;
349 break;
350 }
351
352 if (!proc_isunder(p2, l1)) {
353 error = EPERM;
354 break;
355 }
356
357 if (ap->a_mode & FWRITE)
358 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
359
360 break;
361
362 case PFSregs:
363 case PFSfpregs:
364 if (!proc_isunder(p2, l1)) {
365 error = EPERM;
366 break;
367 }
368 break;
369
370 default:
371 break;
372 }
373
374 procfs_proc_unlock(p2);
375 return (error);
376 }
377
378 /*
379 * close the pfsnode (vp) after doing i/o.
380 * (vp) is not locked on entry or exit.
381 *
382 * nothing to do for procfs other than undo
383 * any exclusive open flag (see _open above).
384 */
385 int
386 procfs_close(void *v)
387 {
388 struct vop_close_args /* {
389 struct vnode *a_vp;
390 int a_fflag;
391 kauth_cred_t a_cred;
392 } */ *ap = v;
393 struct pfsnode *pfs = VTOPFS(ap->a_vp);
394
395 switch (pfs->pfs_type) {
396 case PFSmem:
397 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
398 pfs->pfs_flags &= ~(FWRITE|O_EXCL);
399 break;
400
401 default:
402 break;
403 }
404
405 return (0);
406 }
407
408 /*
409 * _inactive is called when the pfsnode
410 * is vrele'd and the reference count goes
411 * to zero. (vp) will be on the vnode free
412 * list, so to get it back vget() must be
413 * used.
414 *
415 * (vp) is locked on entry, but must be unlocked on exit.
416 */
417 int
418 procfs_inactive(void *v)
419 {
420 struct vop_inactive_v2_args /* {
421 struct vnode *a_vp;
422 bool *a_recycle;
423 } */ *ap = v;
424 struct vnode *vp = ap->a_vp;
425 struct pfsnode *pfs = VTOPFS(vp);
426
427 mutex_enter(proc_lock);
428 *ap->a_recycle = (proc_find(pfs->pfs_pid) == NULL);
429 mutex_exit(proc_lock);
430
431 return (0);
432 }
433
434 /*
435 * _reclaim is called when getnewvnode()
436 * wants to make use of an entry on the vnode
437 * free list. at this time the filesystem needs
438 * to free any private data and remove the node
439 * from any private lists.
440 */
441 int
442 procfs_reclaim(void *v)
443 {
444 struct vop_reclaim_v2_args /* {
445 struct vnode *a_vp;
446 } */ *ap = v;
447 struct vnode *vp = ap->a_vp;
448 struct pfsnode *pfs = VTOPFS(vp);
449
450 VOP_UNLOCK(vp);
451
452 /*
453 * To interlock with procfs_revoke_vnodes().
454 */
455 mutex_enter(vp->v_interlock);
456 vp->v_data = NULL;
457 mutex_exit(vp->v_interlock);
458 kmem_free(pfs, sizeof(*pfs));
459 return 0;
460 }
461
462 /*
463 * Return POSIX pathconf information applicable to special devices.
464 */
465 int
466 procfs_pathconf(void *v)
467 {
468 struct vop_pathconf_args /* {
469 struct vnode *a_vp;
470 int a_name;
471 register_t *a_retval;
472 } */ *ap = v;
473
474 switch (ap->a_name) {
475 case _PC_LINK_MAX:
476 *ap->a_retval = LINK_MAX;
477 return (0);
478 case _PC_MAX_CANON:
479 *ap->a_retval = MAX_CANON;
480 return (0);
481 case _PC_MAX_INPUT:
482 *ap->a_retval = MAX_INPUT;
483 return (0);
484 case _PC_PIPE_BUF:
485 *ap->a_retval = PIPE_BUF;
486 return (0);
487 case _PC_CHOWN_RESTRICTED:
488 *ap->a_retval = 1;
489 return (0);
490 case _PC_VDISABLE:
491 *ap->a_retval = _POSIX_VDISABLE;
492 return (0);
493 case _PC_SYNC_IO:
494 *ap->a_retval = 1;
495 return (0);
496 default:
497 return (EINVAL);
498 }
499 /* NOTREACHED */
500 }
501
502 /*
503 * _print is used for debugging.
504 * just print a readable description
505 * of (vp).
506 */
507 int
508 procfs_print(void *v)
509 {
510 struct vop_print_args /* {
511 struct vnode *a_vp;
512 } */ *ap = v;
513 struct pfsnode *pfs = VTOPFS(ap->a_vp);
514
515 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n",
516 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
517 return 0;
518 }
519
520 int
521 procfs_link(void *v)
522 {
523 struct vop_link_v2_args /* {
524 struct vnode *a_dvp;
525 struct vnode *a_vp;
526 struct componentname *a_cnp;
527 } */ *ap = v;
528
529 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
530 return (EROFS);
531 }
532
533 int
534 procfs_symlink(void *v)
535 {
536 struct vop_symlink_v3_args /* {
537 struct vnode *a_dvp;
538 struct vnode **a_vpp;
539 struct componentname *a_cnp;
540 struct vattr *a_vap;
541 char *a_target;
542 } */ *ap = v;
543
544 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
545 return (EROFS);
546 }
547
548 /*
549 * Works out the path to the target process's current
550 * working directory or chroot. If the caller is in a chroot and
551 * can't "reach" the target's cwd or root (or some other error
552 * occurs), a "/" is returned for the path.
553 */
554 static void
555 procfs_dir(pfstype t, struct lwp *caller, struct proc *target, char **bpp,
556 char *path, size_t len)
557 {
558 struct cwdinfo *cwdi;
559 struct vnode *vp, *rvp;
560 char *bp;
561
562 /*
563 * Lock target cwdi and take a reference to the vnode
564 * we are interested in to prevent it from disappearing
565 * before getcwd_common() below.
566 */
567 rw_enter(&target->p_cwdi->cwdi_lock, RW_READER);
568 switch (t) {
569 case PFScwd:
570 vp = target->p_cwdi->cwdi_cdir;
571 break;
572 case PFSchroot:
573 vp = target->p_cwdi->cwdi_rdir;
574 break;
575 default:
576 rw_exit(&target->p_cwdi->cwdi_lock);
577 return;
578 }
579 if (vp != NULL)
580 vref(vp);
581 rw_exit(&target->p_cwdi->cwdi_lock);
582
583 cwdi = caller->l_proc->p_cwdi;
584 rw_enter(&cwdi->cwdi_lock, RW_READER);
585
586 rvp = cwdi->cwdi_rdir;
587 bp = bpp ? *bpp : NULL;
588
589 /*
590 * XXX: this horrible kludge avoids locking panics when
591 * attempting to lookup links that point to within procfs
592 */
593 if (vp != NULL && vp->v_tag == VT_PROCFS) {
594 if (bpp) {
595 *--bp = '/';
596 *bpp = bp;
597 }
598 vrele(vp);
599 rw_exit(&cwdi->cwdi_lock);
600 return;
601 }
602
603 if (rvp == NULL)
604 rvp = rootvnode;
605 if (vp == NULL || getcwd_common(vp, rvp, bp ? &bp : NULL, path,
606 len / 2, 0, caller) != 0) {
607 if (bpp) {
608 bp = *bpp;
609 *--bp = '/';
610 }
611 }
612
613 if (bpp)
614 *bpp = bp;
615
616 if (vp != NULL)
617 vrele(vp);
618 rw_exit(&cwdi->cwdi_lock);
619 }
620
621 /*
622 * Invent attributes for pfsnode (vp) and store
623 * them in (vap).
624 * Directories lengths are returned as zero since
625 * any real length would require the genuine size
626 * to be computed, and nothing cares anyway.
627 *
628 * this is relatively minimal for procfs.
629 */
630 int
631 procfs_getattr(void *v)
632 {
633 struct vop_getattr_args /* {
634 struct vnode *a_vp;
635 struct vattr *a_vap;
636 kauth_cred_t a_cred;
637 } */ *ap = v;
638 struct pfsnode *pfs = VTOPFS(ap->a_vp);
639 struct vattr *vap = ap->a_vap;
640 struct proc *procp;
641 char *path, *bp, bf[16];
642 int error;
643
644 /* first check the process still exists */
645 switch (pfs->pfs_type) {
646 case PFSroot:
647 case PFScurproc:
648 case PFSself:
649 procp = NULL;
650 break;
651
652 default:
653 error = procfs_proc_lock(pfs->pfs_pid, &procp, ENOENT);
654 if (error != 0)
655 return (error);
656 break;
657 }
658
659 switch (pfs->pfs_type) {
660 case PFStask:
661 if (pfs->pfs_fd == -1) {
662 path = NULL;
663 break;
664 }
665 /*FALLTHROUGH*/
666 case PFScwd:
667 case PFSchroot:
668 path = malloc(MAXPATHLEN + 4, M_TEMP, M_WAITOK);
669 if (path == NULL && procp != NULL) {
670 procfs_proc_unlock(procp);
671 return (ENOMEM);
672 }
673 break;
674
675 default:
676 path = NULL;
677 break;
678 }
679
680 if (procp != NULL) {
681 mutex_enter(procp->p_lock);
682 error = kauth_authorize_process(kauth_cred_get(),
683 KAUTH_PROCESS_CANSEE, procp,
684 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
685 mutex_exit(procp->p_lock);
686 if (error != 0) {
687 procfs_proc_unlock(procp);
688 if (path != NULL)
689 free(path, M_TEMP);
690 return (ENOENT);
691 }
692 }
693
694 error = 0;
695
696 /* start by zeroing out the attributes */
697 vattr_null(vap);
698
699 /* next do all the common fields */
700 vap->va_type = ap->a_vp->v_type;
701 vap->va_mode = pfs->pfs_mode;
702 vap->va_fileid = pfs->pfs_fileno;
703 vap->va_flags = 0;
704 vap->va_blocksize = PAGE_SIZE;
705
706 /*
707 * Make all times be current TOD.
708 *
709 * It would be possible to get the process start
710 * time from the p_stats structure, but there's
711 * no "file creation" time stamp anyway, and the
712 * p_stats structure is not addressable if u. gets
713 * swapped out for that process.
714 */
715 getnanotime(&vap->va_ctime);
716 vap->va_atime = vap->va_mtime = vap->va_ctime;
717 if (procp)
718 TIMEVAL_TO_TIMESPEC(&procp->p_stats->p_start,
719 &vap->va_birthtime);
720 else
721 getnanotime(&vap->va_birthtime);
722
723 switch (pfs->pfs_type) {
724 case PFSmem:
725 case PFSregs:
726 case PFSfpregs:
727 #if defined(__HAVE_PROCFS_MACHDEP) && defined(PROCFS_MACHDEP_PROTECT_CASES)
728 PROCFS_MACHDEP_PROTECT_CASES
729 #endif
730 /*
731 * If the process has exercised some setuid or setgid
732 * privilege, then rip away read/write permission so
733 * that only root can gain access.
734 */
735 if (procp->p_flag & PK_SUGID)
736 vap->va_mode &= ~(S_IRUSR|S_IWUSR);
737 /* FALLTHROUGH */
738 case PFSstatus:
739 case PFSstat:
740 case PFSnote:
741 case PFSnotepg:
742 case PFScmdline:
743 case PFSenviron:
744 case PFSemul:
745 case PFSstatm:
746
747 case PFSmap:
748 case PFSmaps:
749 case PFSlimit:
750 case PFSauxv:
751 vap->va_nlink = 1;
752 vap->va_uid = kauth_cred_geteuid(procp->p_cred);
753 vap->va_gid = kauth_cred_getegid(procp->p_cred);
754 break;
755 case PFScwd:
756 case PFSchroot:
757 case PFSmeminfo:
758 case PFSdevices:
759 case PFScpuinfo:
760 case PFSuptime:
761 case PFSmounts:
762 case PFScpustat:
763 case PFSloadavg:
764 case PFSversion:
765 case PFSexe:
766 case PFSself:
767 case PFScurproc:
768 case PFSroot:
769 vap->va_nlink = 1;
770 vap->va_uid = vap->va_gid = 0;
771 break;
772
773 case PFSproc:
774 case PFStask:
775 case PFSfile:
776 case PFSfd:
777 break;
778
779 default:
780 panic("%s: %d/1", __func__, pfs->pfs_type);
781 }
782
783 /*
784 * now do the object specific fields
785 *
786 * The size could be set from struct reg, but it's hardly
787 * worth the trouble, and it puts some (potentially) machine
788 * dependent data into this machine-independent code. If it
789 * becomes important then this function should break out into
790 * a per-file stat function in the corresponding .c file.
791 */
792
793 switch (pfs->pfs_type) {
794 case PFSroot:
795 vap->va_bytes = vap->va_size = DEV_BSIZE;
796 break;
797
798 case PFSself:
799 case PFScurproc:
800 vap->va_bytes = vap->va_size =
801 snprintf(bf, sizeof(bf), "%ld", (long)curproc->p_pid);
802 break;
803 case PFStask:
804 if (pfs->pfs_fd != -1) {
805 vap->va_nlink = 1;
806 vap->va_uid = 0;
807 vap->va_gid = 0;
808 vap->va_bytes = vap->va_size =
809 snprintf(bf, sizeof(bf), "..");
810 break;
811 }
812 /*FALLTHROUGH*/
813 case PFSfd:
814 if (pfs->pfs_fd != -1) {
815 file_t *fp;
816
817 fp = fd_getfile2(procp, pfs->pfs_fd);
818 if (fp == NULL) {
819 error = EBADF;
820 break;
821 }
822 vap->va_nlink = 1;
823 vap->va_uid = kauth_cred_geteuid(fp->f_cred);
824 vap->va_gid = kauth_cred_getegid(fp->f_cred);
825 switch (fp->f_type) {
826 case DTYPE_VNODE:
827 vap->va_bytes = vap->va_size =
828 fp->f_vnode->v_size;
829 break;
830 default:
831 vap->va_bytes = vap->va_size = 0;
832 break;
833 }
834 closef(fp);
835 break;
836 }
837 /*FALLTHROUGH*/
838 case PFSproc:
839 vap->va_nlink = 2;
840 vap->va_uid = kauth_cred_geteuid(procp->p_cred);
841 vap->va_gid = kauth_cred_getegid(procp->p_cred);
842 vap->va_bytes = vap->va_size = DEV_BSIZE;
843 break;
844
845 case PFSfile:
846 error = EOPNOTSUPP;
847 break;
848
849 case PFSmem:
850 vap->va_bytes = vap->va_size =
851 ctob(procp->p_vmspace->vm_tsize +
852 procp->p_vmspace->vm_dsize +
853 procp->p_vmspace->vm_ssize);
854 break;
855
856 case PFSauxv:
857 vap->va_bytes = vap->va_size = procp->p_execsw->es_arglen;
858 break;
859
860 #if defined(PT_GETREGS) || defined(PT_SETREGS)
861 case PFSregs:
862 vap->va_bytes = vap->va_size = sizeof(struct reg);
863 break;
864 #endif
865
866 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS)
867 case PFSfpregs:
868 vap->va_bytes = vap->va_size = sizeof(struct fpreg);
869 break;
870 #endif
871
872 case PFSstatus:
873 case PFSstat:
874 case PFSnote:
875 case PFSnotepg:
876 case PFScmdline:
877 case PFSenviron:
878 case PFSmeminfo:
879 case PFSdevices:
880 case PFScpuinfo:
881 case PFSuptime:
882 case PFSmounts:
883 case PFScpustat:
884 case PFSloadavg:
885 case PFSstatm:
886 case PFSversion:
887 vap->va_bytes = vap->va_size = 0;
888 break;
889 case PFSlimit:
890 case PFSmap:
891 case PFSmaps:
892 /*
893 * Advise a larger blocksize for the map files, so that
894 * they may be read in one pass.
895 */
896 vap->va_blocksize = 4 * PAGE_SIZE;
897 vap->va_bytes = vap->va_size = 0;
898 break;
899
900 case PFScwd:
901 case PFSchroot:
902 bp = path + MAXPATHLEN;
903 *--bp = '\0';
904 procfs_dir(pfs->pfs_type, curlwp, procp, &bp, path,
905 MAXPATHLEN);
906 vap->va_bytes = vap->va_size = strlen(bp);
907 break;
908
909 case PFSexe:
910 vap->va_bytes = vap->va_size = strlen(procp->p_path);
911 break;
912
913 case PFSemul:
914 vap->va_bytes = vap->va_size = strlen(procp->p_emul->e_name);
915 break;
916
917 #ifdef __HAVE_PROCFS_MACHDEP
918 PROCFS_MACHDEP_NODETYPE_CASES
919 error = procfs_machdep_getattr(ap->a_vp, vap, procp);
920 break;
921 #endif
922
923 default:
924 panic("%s: %d/2", __func__, pfs->pfs_type);
925 }
926
927 if (procp != NULL)
928 procfs_proc_unlock(procp);
929 if (path != NULL)
930 free(path, M_TEMP);
931
932 return (error);
933 }
934
935 /*ARGSUSED*/
936 int
937 procfs_setattr(void *v)
938 {
939 /*
940 * just fake out attribute setting
941 * it's not good to generate an error
942 * return, otherwise things like creat()
943 * will fail when they try to set the
944 * file length to 0. worse, this means
945 * that echo $note > /proc/$pid/note will fail.
946 */
947
948 return (0);
949 }
950
951 /*
952 * implement access checking.
953 *
954 * actually, the check for super-user is slightly
955 * broken since it will allow read access to write-only
956 * objects. this doesn't cause any particular trouble
957 * but does mean that the i/o entry points need to check
958 * that the operation really does make sense.
959 */
960 int
961 procfs_access(void *v)
962 {
963 struct vop_access_args /* {
964 struct vnode *a_vp;
965 int a_mode;
966 kauth_cred_t a_cred;
967 } */ *ap = v;
968 struct vattr va;
969 int error;
970
971 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred)) != 0)
972 return (error);
973
974 return kauth_authorize_vnode(ap->a_cred,
975 KAUTH_ACCESS_ACTION(ap->a_mode, ap->a_vp->v_type, va.va_mode),
976 ap->a_vp, NULL, genfs_can_access(va.va_type, va.va_mode,
977 va.va_uid, va.va_gid, ap->a_mode, ap->a_cred));
978 }
979
980 /*
981 * lookup. this is incredibly complicated in the
982 * general case, however for most pseudo-filesystems
983 * very little needs to be done.
984 *
985 * Locking isn't hard here, just poorly documented.
986 *
987 * If we're looking up ".", just vref the parent & return it.
988 *
989 * If we're looking up "..", unlock the parent, and lock "..". If everything
990 * went ok, and we're on the last component and the caller requested the
991 * parent locked, try to re-lock the parent. We do this to prevent lock
992 * races.
993 *
994 * For anything else, get the needed node. Then unlock the parent if not
995 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the
996 * parent in the .. case).
997 *
998 * We try to exit with the parent locked in error cases.
999 */
1000 int
1001 procfs_lookup(void *v)
1002 {
1003 struct vop_lookup_v2_args /* {
1004 struct vnode * a_dvp;
1005 struct vnode ** a_vpp;
1006 struct componentname * a_cnp;
1007 } */ *ap = v;
1008 struct componentname *cnp = ap->a_cnp;
1009 struct vnode **vpp = ap->a_vpp;
1010 struct vnode *dvp = ap->a_dvp;
1011 const char *pname = cnp->cn_nameptr;
1012 const struct proc_target *pt = NULL;
1013 struct vnode *fvp;
1014 pid_t pid, vnpid;
1015 struct pfsnode *pfs;
1016 struct proc *p = NULL;
1017 struct lwp *plwp;
1018 int i, error;
1019 pfstype type;
1020
1021 *vpp = NULL;
1022
1023 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
1024 return (EROFS);
1025
1026 if (cnp->cn_namelen == 1 && *pname == '.') {
1027 *vpp = dvp;
1028 vref(dvp);
1029 return (0);
1030 }
1031
1032 pfs = VTOPFS(dvp);
1033 switch (pfs->pfs_type) {
1034 case PFSroot:
1035 /*
1036 * Shouldn't get here with .. in the root node.
1037 */
1038 if (cnp->cn_flags & ISDOTDOT)
1039 return (EIO);
1040
1041 for (i = 0; i < nproc_root_targets; i++) {
1042 pt = &proc_root_targets[i];
1043 /*
1044 * check for node match. proc is always NULL here,
1045 * so call pt_valid with constant NULL lwp.
1046 */
1047 if (cnp->cn_namelen == pt->pt_namlen &&
1048 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
1049 (pt->pt_valid == NULL ||
1050 (*pt->pt_valid)(NULL, dvp->v_mount)))
1051 break;
1052 }
1053
1054 if (i != nproc_root_targets) {
1055 error = procfs_allocvp(dvp->v_mount, vpp, 0,
1056 pt->pt_pfstype, -1);
1057 return (error);
1058 }
1059
1060 if (CNEQ(cnp, "curproc", 7)) {
1061 pid = curproc->p_pid;
1062 vnpid = 0;
1063 type = PFScurproc;
1064 } else if (CNEQ(cnp, "self", 4)) {
1065 pid = curproc->p_pid;
1066 vnpid = 0;
1067 type = PFSself;
1068 } else {
1069 pid = (pid_t)atoi(pname, cnp->cn_namelen);
1070 vnpid = pid;
1071 type = PFSproc;
1072 }
1073
1074 if (procfs_proc_lock(pid, &p, ESRCH) != 0)
1075 break;
1076 error = procfs_allocvp(dvp->v_mount, vpp, vnpid, type, -1);
1077 procfs_proc_unlock(p);
1078 return (error);
1079
1080 case PFSproc:
1081 if (cnp->cn_flags & ISDOTDOT) {
1082 error = procfs_allocvp(dvp->v_mount, vpp, 0, PFSroot,
1083 -1);
1084 return (error);
1085 }
1086
1087 if (procfs_proc_lock(pfs->pfs_pid, &p, ESRCH) != 0)
1088 break;
1089
1090 mutex_enter(p->p_lock);
1091 LIST_FOREACH(plwp, &p->p_lwps, l_sibling) {
1092 if (plwp->l_stat != LSZOMB)
1093 break;
1094 }
1095 /* Process is exiting if no-LWPS or all LWPs are LSZOMB */
1096 if (plwp == NULL) {
1097 mutex_exit(p->p_lock);
1098 procfs_proc_unlock(p);
1099 return ESRCH;
1100 }
1101
1102 lwp_addref(plwp);
1103 mutex_exit(p->p_lock);
1104
1105 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
1106 int found;
1107
1108 found = cnp->cn_namelen == pt->pt_namlen &&
1109 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
1110 (pt->pt_valid == NULL
1111 || (*pt->pt_valid)(plwp, dvp->v_mount));
1112 if (found)
1113 break;
1114 }
1115 lwp_delref(plwp);
1116
1117 if (i == nproc_targets) {
1118 procfs_proc_unlock(p);
1119 break;
1120 }
1121 if (pt->pt_pfstype == PFSfile) {
1122 fvp = p->p_textvp;
1123 /* We already checked that it exists. */
1124 vref(fvp);
1125 procfs_proc_unlock(p);
1126 *vpp = fvp;
1127 return (0);
1128 }
1129
1130 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
1131 pt->pt_pfstype, -1);
1132 procfs_proc_unlock(p);
1133 return (error);
1134
1135 case PFSfd: {
1136 int fd;
1137 file_t *fp;
1138
1139 if ((error = procfs_proc_lock(pfs->pfs_pid, &p, ENOENT)) != 0)
1140 return error;
1141
1142 if (cnp->cn_flags & ISDOTDOT) {
1143 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
1144 PFSproc, -1);
1145 procfs_proc_unlock(p);
1146 return (error);
1147 }
1148 fd = atoi(pname, cnp->cn_namelen);
1149
1150 fp = fd_getfile2(p, fd);
1151 if (fp == NULL) {
1152 procfs_proc_unlock(p);
1153 return ENOENT;
1154 }
1155 fvp = fp->f_vnode;
1156
1157 /* Don't show directories */
1158 if (fp->f_type == DTYPE_VNODE && fvp->v_type != VDIR) {
1159 vref(fvp);
1160 closef(fp);
1161 procfs_proc_unlock(p);
1162 *vpp = fvp;
1163 return 0;
1164 }
1165
1166 closef(fp);
1167 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
1168 PFSfd, fd);
1169 procfs_proc_unlock(p);
1170 return error;
1171 }
1172 case PFStask: {
1173 int xpid;
1174
1175 if ((error = procfs_proc_lock(pfs->pfs_pid, &p, ENOENT)) != 0)
1176 return error;
1177
1178 if (cnp->cn_flags & ISDOTDOT) {
1179 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
1180 PFSproc, -1);
1181 procfs_proc_unlock(p);
1182 return (error);
1183 }
1184 xpid = atoi(pname, cnp->cn_namelen);
1185
1186 if (xpid != pfs->pfs_pid) {
1187 procfs_proc_unlock(p);
1188 return ENOENT;
1189 }
1190 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
1191 PFStask, 0);
1192 procfs_proc_unlock(p);
1193 return error;
1194 }
1195 default:
1196 return (ENOTDIR);
1197 }
1198
1199 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
1200 }
1201
1202 int
1203 procfs_validfile(struct lwp *l, struct mount *mp)
1204 {
1205 return l != NULL && l->l_proc != NULL && l->l_proc->p_textvp != NULL;
1206 }
1207
1208 static int
1209 procfs_validfile_linux(struct lwp *l, struct mount *mp)
1210 {
1211 int flags;
1212
1213 flags = VFSTOPROC(mp)->pmnt_flags;
1214 return (flags & PROCFSMNT_LINUXCOMPAT) &&
1215 (l == NULL || l->l_proc == NULL || procfs_validfile(l, mp));
1216 }
1217
1218 struct procfs_root_readdir_ctx {
1219 struct uio *uiop;
1220 off_t *cookies;
1221 int ncookies;
1222 off_t off;
1223 off_t startoff;
1224 int error;
1225 };
1226
1227 static int
1228 procfs_root_readdir_callback(struct proc *p, void *arg)
1229 {
1230 struct procfs_root_readdir_ctx *ctxp = arg;
1231 struct dirent d;
1232 struct uio *uiop;
1233 int error;
1234
1235 uiop = ctxp->uiop;
1236 if (uiop->uio_resid < UIO_MX)
1237 return -1; /* no space */
1238
1239 if (ctxp->off < ctxp->startoff) {
1240 ctxp->off++;
1241 return 0;
1242 }
1243
1244 if (kauth_authorize_process(kauth_cred_get(),
1245 KAUTH_PROCESS_CANSEE, p,
1246 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL) != 0)
1247 return 0;
1248
1249 memset(&d, 0, UIO_MX);
1250 d.d_reclen = UIO_MX;
1251 d.d_fileno = PROCFS_FILENO(p->p_pid, PFSproc, -1);
1252 d.d_namlen = snprintf(d.d_name,
1253 UIO_MX - offsetof(struct dirent, d_name), "%ld", (long)p->p_pid);
1254 d.d_type = DT_DIR;
1255
1256 mutex_exit(proc_lock);
1257 error = uiomove(&d, UIO_MX, uiop);
1258 mutex_enter(proc_lock);
1259 if (error) {
1260 ctxp->error = error;
1261 return -1;
1262 }
1263
1264 ctxp->ncookies++;
1265 if (ctxp->cookies)
1266 *(ctxp->cookies)++ = ctxp->off + 1;
1267 ctxp->off++;
1268
1269 return 0;
1270 }
1271
1272 /*
1273 * readdir returns directory entries from pfsnode (vp).
1274 *
1275 * the strategy here with procfs is to generate a single
1276 * directory entry at a time (struct dirent) and then
1277 * copy that out to userland using uiomove. a more efficent
1278 * though more complex implementation, would try to minimize
1279 * the number of calls to uiomove(). for procfs, this is
1280 * hardly worth the added code complexity.
1281 *
1282 * this should just be done through read()
1283 */
1284 int
1285 procfs_readdir(void *v)
1286 {
1287 struct vop_readdir_args /* {
1288 struct vnode *a_vp;
1289 struct uio *a_uio;
1290 kauth_cred_t a_cred;
1291 int *a_eofflag;
1292 off_t **a_cookies;
1293 int *a_ncookies;
1294 } */ *ap = v;
1295 struct uio *uio = ap->a_uio;
1296 struct dirent d;
1297 struct pfsnode *pfs;
1298 off_t i;
1299 int error;
1300 off_t *cookies = NULL;
1301 int ncookies;
1302 struct vnode *vp;
1303 const struct proc_target *pt;
1304 struct procfs_root_readdir_ctx ctx;
1305 struct lwp *l;
1306 int nfd;
1307
1308 vp = ap->a_vp;
1309 pfs = VTOPFS(vp);
1310
1311 if (uio->uio_resid < UIO_MX)
1312 return (EINVAL);
1313 if (uio->uio_offset < 0)
1314 return (EINVAL);
1315
1316 error = 0;
1317 i = uio->uio_offset;
1318 memset(&d, 0, UIO_MX);
1319 d.d_reclen = UIO_MX;
1320 ncookies = uio->uio_resid / UIO_MX;
1321
1322 switch (pfs->pfs_type) {
1323 /*
1324 * this is for the process-specific sub-directories.
1325 * all that is needed to is copy out all the entries
1326 * from the procent[] table (top of this file).
1327 */
1328 case PFSproc: {
1329 struct proc *p;
1330
1331 if (i >= nproc_targets)
1332 return 0;
1333
1334 if (procfs_proc_lock(pfs->pfs_pid, &p, ESRCH) != 0)
1335 break;
1336
1337 if (ap->a_ncookies) {
1338 ncookies = uimin(ncookies, (nproc_targets - i));
1339 cookies = malloc(ncookies * sizeof (off_t),
1340 M_TEMP, M_WAITOK);
1341 *ap->a_cookies = cookies;
1342 }
1343
1344 for (pt = &proc_targets[i];
1345 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) {
1346 if (pt->pt_valid) {
1347 /* XXXSMP LWP can disappear */
1348 mutex_enter(p->p_lock);
1349 l = LIST_FIRST(&p->p_lwps);
1350 KASSERT(l != NULL);
1351 mutex_exit(p->p_lock);
1352 if ((*pt->pt_valid)(l, vp->v_mount) == 0)
1353 continue;
1354 }
1355
1356 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid,
1357 pt->pt_pfstype, -1);
1358 d.d_namlen = pt->pt_namlen;
1359 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1360 d.d_type = pt->pt_type;
1361
1362 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1363 break;
1364 if (cookies)
1365 *cookies++ = i + 1;
1366 }
1367
1368 procfs_proc_unlock(p);
1369 break;
1370 }
1371 case PFSfd: {
1372 struct proc *p;
1373 file_t *fp;
1374 int lim, nc = 0;
1375
1376 if ((error = procfs_proc_lock(pfs->pfs_pid, &p, ESRCH)) != 0)
1377 return error;
1378
1379 /* XXX Should this be by file as well? */
1380 if (kauth_authorize_process(kauth_cred_get(),
1381 KAUTH_PROCESS_CANSEE, p,
1382 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_OPENFILES), NULL,
1383 NULL) != 0) {
1384 procfs_proc_unlock(p);
1385 return ESRCH;
1386 }
1387
1388 nfd = p->p_fd->fd_dt->dt_nfiles;
1389
1390 lim = uimin((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
1391 if (i >= lim) {
1392 procfs_proc_unlock(p);
1393 return 0;
1394 }
1395
1396 if (ap->a_ncookies) {
1397 ncookies = uimin(ncookies, (nfd + 2 - i));
1398 cookies = malloc(ncookies * sizeof (off_t),
1399 M_TEMP, M_WAITOK);
1400 *ap->a_cookies = cookies;
1401 }
1402
1403 for (; i < 2 && uio->uio_resid >= UIO_MX; i++) {
1404 pt = &proc_targets[i];
1405 d.d_namlen = pt->pt_namlen;
1406 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid,
1407 pt->pt_pfstype, -1);
1408 (void)memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1409 d.d_type = pt->pt_type;
1410 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1411 break;
1412 if (cookies)
1413 *cookies++ = i + 1;
1414 nc++;
1415 }
1416 if (error) {
1417 ncookies = nc;
1418 break;
1419 }
1420 for (; uio->uio_resid >= UIO_MX && i < nfd; i++) {
1421 /* check the descriptor exists */
1422 if ((fp = fd_getfile2(p, i - 2)) == NULL)
1423 continue;
1424 closef(fp);
1425
1426 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, PFSfd, i - 2);
1427 d.d_namlen = snprintf(d.d_name, sizeof(d.d_name),
1428 "%lld", (long long)(i - 2));
1429 d.d_type = VREG;
1430 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1431 break;
1432 if (cookies)
1433 *cookies++ = i + 1;
1434 nc++;
1435 }
1436 ncookies = nc;
1437 procfs_proc_unlock(p);
1438 break;
1439 }
1440 case PFStask: {
1441 struct proc *p;
1442 int nc = 0;
1443
1444 if ((error = procfs_proc_lock(pfs->pfs_pid, &p, ESRCH)) != 0)
1445 return error;
1446
1447 nfd = 3; /* ., .., pid */
1448
1449 if (ap->a_ncookies) {
1450 ncookies = uimin(ncookies, (nfd + 2 - i));
1451 cookies = malloc(ncookies * sizeof (off_t),
1452 M_TEMP, M_WAITOK);
1453 *ap->a_cookies = cookies;
1454 }
1455
1456 for (; i < 2 && uio->uio_resid >= UIO_MX; i++) {
1457 pt = &proc_targets[i];
1458 d.d_namlen = pt->pt_namlen;
1459 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid,
1460 pt->pt_pfstype, -1);
1461 (void)memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1462 d.d_type = pt->pt_type;
1463 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1464 break;
1465 if (cookies)
1466 *cookies++ = i + 1;
1467 nc++;
1468 }
1469 if (error) {
1470 ncookies = nc;
1471 break;
1472 }
1473 for (; uio->uio_resid >= UIO_MX && i < nfd; i++) {
1474 /* check the descriptor exists */
1475 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, PFStask,
1476 i - 2);
1477 d.d_namlen = snprintf(d.d_name, sizeof(d.d_name),
1478 "%ld", (long)pfs->pfs_pid);
1479 d.d_type = DT_LNK;
1480 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1481 break;
1482 if (cookies)
1483 *cookies++ = i + 1;
1484 nc++;
1485 }
1486 ncookies = nc;
1487 procfs_proc_unlock(p);
1488 break;
1489 }
1490
1491 /*
1492 * this is for the root of the procfs filesystem
1493 * what is needed are special entries for "curproc"
1494 * and "self" followed by an entry for each process
1495 * on allproc.
1496 */
1497
1498 case PFSroot: {
1499 int nc = 0;
1500
1501 if (ap->a_ncookies) {
1502 /*
1503 * XXX Potentially allocating too much space here,
1504 * but I'm lazy. This loop needs some work.
1505 */
1506 cookies = malloc(ncookies * sizeof (off_t),
1507 M_TEMP, M_WAITOK);
1508 *ap->a_cookies = cookies;
1509 }
1510 error = 0;
1511 /* 0 ... 3 are static entries. */
1512 for (; i <= 3 && uio->uio_resid >= UIO_MX; i++) {
1513 switch (i) {
1514 case 0: /* `.' */
1515 case 1: /* `..' */
1516 d.d_fileno = PROCFS_FILENO(0, PFSroot, -1);
1517 d.d_namlen = i + 1;
1518 memcpy(d.d_name, "..", d.d_namlen);
1519 d.d_name[i + 1] = '\0';
1520 d.d_type = DT_DIR;
1521 break;
1522
1523 case 2:
1524 d.d_fileno = PROCFS_FILENO(0, PFScurproc, -1);
1525 d.d_namlen = sizeof("curproc") - 1;
1526 memcpy(d.d_name, "curproc", sizeof("curproc"));
1527 d.d_type = DT_LNK;
1528 break;
1529
1530 case 3:
1531 d.d_fileno = PROCFS_FILENO(0, PFSself, -1);
1532 d.d_namlen = sizeof("self") - 1;
1533 memcpy(d.d_name, "self", sizeof("self"));
1534 d.d_type = DT_LNK;
1535 break;
1536 }
1537
1538 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1539 break;
1540 nc++;
1541 if (cookies)
1542 *cookies++ = i + 1;
1543 }
1544 /* 4 ... are process entries. */
1545 ctx.uiop = uio;
1546 ctx.error = 0;
1547 ctx.off = 4;
1548 ctx.startoff = i;
1549 ctx.cookies = cookies;
1550 ctx.ncookies = nc;
1551 proclist_foreach_call(&allproc,
1552 procfs_root_readdir_callback, &ctx);
1553 cookies = ctx.cookies;
1554 nc = ctx.ncookies;
1555 error = ctx.error;
1556 if (error)
1557 break;
1558
1559 /* misc entries. */
1560 if (i < ctx.off)
1561 i = ctx.off;
1562 if (i >= ctx.off + nproc_root_targets)
1563 break;
1564 for (pt = &proc_root_targets[i - ctx.off];
1565 uio->uio_resid >= UIO_MX &&
1566 pt < &proc_root_targets[nproc_root_targets];
1567 pt++, i++) {
1568 if (pt->pt_valid &&
1569 (*pt->pt_valid)(NULL, vp->v_mount) == 0)
1570 continue;
1571 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype, -1);
1572 d.d_namlen = pt->pt_namlen;
1573 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1574 d.d_type = pt->pt_type;
1575
1576 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1577 break;
1578 nc++;
1579 if (cookies)
1580 *cookies++ = i + 1;
1581 }
1582
1583 ncookies = nc;
1584 break;
1585 }
1586
1587 default:
1588 error = ENOTDIR;
1589 break;
1590 }
1591
1592 if (ap->a_ncookies) {
1593 if (error) {
1594 if (cookies)
1595 free(*ap->a_cookies, M_TEMP);
1596 *ap->a_ncookies = 0;
1597 *ap->a_cookies = NULL;
1598 } else
1599 *ap->a_ncookies = ncookies;
1600 }
1601 uio->uio_offset = i;
1602 return (error);
1603 }
1604
1605 /*
1606 * readlink reads the link of `curproc' and others
1607 */
1608 int
1609 procfs_readlink(void *v)
1610 {
1611 struct vop_readlink_args *ap = v;
1612 char bf[16]; /* should be enough */
1613 char *bp = bf;
1614 char *path = NULL;
1615 int len = 0;
1616 int error = 0;
1617 struct pfsnode *pfs = VTOPFS(ap->a_vp);
1618 struct proc *pown = NULL;
1619
1620 if (pfs->pfs_fileno == PROCFS_FILENO(0, PFScurproc, -1))
1621 len = snprintf(bf, sizeof(bf), "%ld", (long)curproc->p_pid);
1622 else if (pfs->pfs_fileno == PROCFS_FILENO(0, PFSself, -1))
1623 len = snprintf(bf, sizeof(bf), "%s", "curproc");
1624 else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFStask, 0))
1625 len = snprintf(bf, sizeof(bf), "..");
1626 else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFSexe, -1)) {
1627 if ((error = procfs_proc_lock(pfs->pfs_pid, &pown, ESRCH)) != 0)
1628 return error;
1629 bp = pown->p_path;
1630 len = strlen(bp);
1631 } else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFScwd, -1) ||
1632 pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFSchroot, -1)) {
1633 if ((error = procfs_proc_lock(pfs->pfs_pid, &pown, ESRCH)) != 0)
1634 return error;
1635 path = malloc(MAXPATHLEN + 4, M_TEMP, M_WAITOK);
1636 if (path == NULL) {
1637 procfs_proc_unlock(pown);
1638 return (ENOMEM);
1639 }
1640 bp = path + MAXPATHLEN;
1641 *--bp = '\0';
1642 procfs_dir(PROCFS_TYPE(pfs->pfs_fileno), curlwp, pown,
1643 &bp, path, MAXPATHLEN);
1644 len = strlen(bp);
1645 } else {
1646 file_t *fp;
1647 struct vnode *vxp, *vp;
1648
1649 if ((error = procfs_proc_lock(pfs->pfs_pid, &pown, ESRCH)) != 0)
1650 return error;
1651
1652 fp = fd_getfile2(pown, pfs->pfs_fd);
1653 if (fp == NULL) {
1654 procfs_proc_unlock(pown);
1655 return EBADF;
1656 }
1657
1658 switch (fp->f_type) {
1659 case DTYPE_VNODE:
1660 vxp = fp->f_vnode;
1661 if (vxp->v_type != VDIR) {
1662 error = EINVAL;
1663 break;
1664 }
1665 if ((path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK))
1666 == NULL) {
1667 error = ENOMEM;
1668 break;
1669 }
1670 bp = path + MAXPATHLEN;
1671 *--bp = '\0';
1672
1673 /*
1674 * XXX: kludge to avoid locking against ourselves
1675 * in getcwd()
1676 */
1677 if (vxp->v_tag == VT_PROCFS) {
1678 *--bp = '/';
1679 } else {
1680 rw_enter(&curproc->p_cwdi->cwdi_lock,
1681 RW_READER);
1682 vp = curproc->p_cwdi->cwdi_rdir;
1683 if (vp == NULL)
1684 vp = rootvnode;
1685 error = getcwd_common(vxp, vp, &bp, path,
1686 MAXPATHLEN / 2, 0, curlwp);
1687 rw_exit(&curproc->p_cwdi->cwdi_lock);
1688 }
1689 if (error)
1690 break;
1691 len = strlen(bp);
1692 break;
1693
1694 case DTYPE_MISC:
1695 len = snprintf(bf, sizeof(bf), "%s", "[misc]");
1696 break;
1697
1698 case DTYPE_KQUEUE:
1699 len = snprintf(bf, sizeof(bf), "%s", "[kqueue]");
1700 break;
1701
1702 case DTYPE_SEM:
1703 len = snprintf(bf, sizeof(bf), "%s", "[ksem]");
1704 break;
1705
1706 default:
1707 error = EINVAL;
1708 break;
1709 }
1710 closef(fp);
1711 }
1712
1713 if (error == 0)
1714 error = uiomove(bp, len, ap->a_uio);
1715 if (pown)
1716 procfs_proc_unlock(pown);
1717 if (path)
1718 free(path, M_TEMP);
1719 return error;
1720 }
1721
1722 /*
1723 * convert decimal ascii to int
1724 */
1725 static int
1726 atoi(const char *b, size_t len)
1727 {
1728 int p = 0;
1729
1730 while (len--) {
1731 char c = *b++;
1732 if (c < '0' || c > '9')
1733 return -1;
1734 p = 10 * p + (c - '0');
1735 }
1736
1737 return p;
1738 }
1739