procfs_vnops.c revision 1.215 1 /* $NetBSD: procfs_vnops.c,v 1.215 2020/06/27 17:29:19 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1993, 1995
34 * The Regents of the University of California. All rights reserved.
35 *
36 * This code is derived from software contributed to Berkeley by
37 * Jan-Simon Pendry.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
64 */
65
66 /*
67 * Copyright (c) 1993 Jan-Simon Pendry
68 *
69 * This code is derived from software contributed to Berkeley by
70 * Jan-Simon Pendry.
71 *
72 * Redistribution and use in source and binary forms, with or without
73 * modification, are permitted provided that the following conditions
74 * are met:
75 * 1. Redistributions of source code must retain the above copyright
76 * notice, this list of conditions and the following disclaimer.
77 * 2. Redistributions in binary form must reproduce the above copyright
78 * notice, this list of conditions and the following disclaimer in the
79 * documentation and/or other materials provided with the distribution.
80 * 3. All advertising materials mentioning features or use of this software
81 * must display the following acknowledgement:
82 * This product includes software developed by the University of
83 * California, Berkeley and its contributors.
84 * 4. Neither the name of the University nor the names of its contributors
85 * may be used to endorse or promote products derived from this software
86 * without specific prior written permission.
87 *
88 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
89 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
90 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
91 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
92 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
93 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
94 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
95 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
96 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
97 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
98 * SUCH DAMAGE.
99 *
100 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
101 */
102
103 /*
104 * procfs vnode interface
105 */
106
107 #include <sys/cdefs.h>
108 __KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.215 2020/06/27 17:29:19 christos Exp $");
109
110 #include <sys/param.h>
111 #include <sys/atomic.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/kernel.h>
115 #include <sys/file.h>
116 #include <sys/filedesc.h>
117 #include <sys/proc.h>
118 #include <sys/vnode.h>
119 #include <sys/namei.h>
120 #include <sys/malloc.h>
121 #include <sys/mount.h>
122 #include <sys/dirent.h>
123 #include <sys/resourcevar.h>
124 #include <sys/stat.h>
125 #include <sys/ptrace.h>
126 #include <sys/kauth.h>
127 #include <sys/exec.h>
128
129 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */
130
131 #include <machine/reg.h>
132
133 #include <miscfs/genfs/genfs.h>
134 #include <miscfs/procfs/procfs.h>
135
136 /*
137 * Vnode Operations.
138 *
139 */
140
141 static int procfs_validfile_linux(struct lwp *, struct mount *);
142 static int procfs_root_readdir_callback(struct proc *, void *);
143 static void procfs_dir(pfstype, struct lwp *, struct proc *, char **, char *,
144 size_t);
145
146 /*
147 * This is a list of the valid names in the
148 * process-specific sub-directories. It is
149 * used in procfs_lookup and procfs_readdir
150 */
151 static const struct proc_target {
152 u_char pt_type;
153 u_char pt_namlen;
154 const char *pt_name;
155 pfstype pt_pfstype;
156 int (*pt_valid)(struct lwp *, struct mount *);
157 } proc_targets[] = {
158 #define N(s) sizeof(s)-1, s
159 /* name type validp */
160 { DT_DIR, N("."), PFSproc, NULL },
161 { DT_DIR, N(".."), PFSroot, NULL },
162 { DT_DIR, N("fd"), PFSfd, NULL },
163 { DT_DIR, N("task"), PFStask, procfs_validfile_linux },
164 { DT_LNK, N("cwd"), PFScwd, NULL },
165 { DT_LNK, N("emul"), PFSemul, NULL },
166 { DT_LNK, N("root"), PFSchroot, NULL },
167 { DT_REG, N("auxv"), PFSauxv, procfs_validauxv },
168 { DT_REG, N("cmdline"), PFScmdline, NULL },
169 { DT_REG, N("environ"), PFSenviron, NULL },
170 { DT_REG, N("exe"), PFSexe, procfs_validfile },
171 { DT_REG, N("file"), PFSfile, procfs_validfile },
172 { DT_REG, N("fpregs"), PFSfpregs, procfs_validfpregs },
173 { DT_REG, N("limit"), PFSlimit, NULL },
174 { DT_REG, N("map"), PFSmap, procfs_validmap },
175 { DT_REG, N("maps"), PFSmaps, procfs_validmap },
176 { DT_REG, N("mem"), PFSmem, NULL },
177 { DT_REG, N("note"), PFSnote, NULL },
178 { DT_REG, N("notepg"), PFSnotepg, NULL },
179 { DT_REG, N("regs"), PFSregs, procfs_validregs },
180 { DT_REG, N("stat"), PFSstat, procfs_validfile_linux },
181 { DT_REG, N("statm"), PFSstatm, procfs_validfile_linux },
182 { DT_REG, N("status"), PFSstatus, NULL },
183 #ifdef __HAVE_PROCFS_MACHDEP
184 PROCFS_MACHDEP_NODETYPE_DEFNS
185 #endif
186 #undef N
187 };
188 static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
189
190 /*
191 * List of files in the root directory. Note: the validate function will
192 * be called with p == NULL for these ones.
193 */
194 static const struct proc_target proc_root_targets[] = {
195 #define N(s) sizeof(s)-1, s
196 /* name type validp */
197 { DT_REG, N("meminfo"), PFSmeminfo, procfs_validfile_linux },
198 { DT_REG, N("cpuinfo"), PFScpuinfo, procfs_validfile_linux },
199 { DT_REG, N("uptime"), PFSuptime, procfs_validfile_linux },
200 { DT_REG, N("mounts"), PFSmounts, procfs_validfile_linux },
201 { DT_REG, N("devices"), PFSdevices, procfs_validfile_linux },
202 { DT_REG, N("stat"), PFScpustat, procfs_validfile_linux },
203 { DT_REG, N("loadavg"), PFSloadavg, procfs_validfile_linux },
204 { DT_REG, N("version"), PFSversion, procfs_validfile_linux },
205 #undef N
206 };
207 static const int nproc_root_targets =
208 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]);
209
210 int procfs_lookup(void *);
211 #define procfs_create genfs_eopnotsupp
212 #define procfs_mknod genfs_eopnotsupp
213 int procfs_open(void *);
214 int procfs_close(void *);
215 int procfs_access(void *);
216 int procfs_getattr(void *);
217 int procfs_setattr(void *);
218 #define procfs_read procfs_rw
219 #define procfs_write procfs_rw
220 #define procfs_fcntl genfs_fcntl
221 #define procfs_ioctl genfs_enoioctl
222 #define procfs_poll genfs_poll
223 #define procfs_kqfilter genfs_kqfilter
224 #define procfs_revoke genfs_revoke
225 #define procfs_fsync genfs_nullop
226 #define procfs_seek genfs_nullop
227 #define procfs_remove genfs_eopnotsupp
228 int procfs_link(void *);
229 #define procfs_rename genfs_eopnotsupp
230 #define procfs_mkdir genfs_eopnotsupp
231 #define procfs_rmdir genfs_eopnotsupp
232 int procfs_symlink(void *);
233 int procfs_readdir(void *);
234 int procfs_readlink(void *);
235 #define procfs_abortop genfs_abortop
236 int procfs_inactive(void *);
237 int procfs_reclaim(void *);
238 #define procfs_lock genfs_lock
239 #define procfs_unlock genfs_unlock
240 #define procfs_bmap genfs_badop
241 #define procfs_strategy genfs_badop
242 int procfs_print(void *);
243 int procfs_pathconf(void *);
244 #define procfs_islocked genfs_islocked
245 #define procfs_advlock genfs_einval
246 #define procfs_bwrite genfs_eopnotsupp
247 int procfs_getpages(void *);
248 #define procfs_putpages genfs_null_putpages
249
250 static int atoi(const char *, size_t);
251
252 /*
253 * procfs vnode operations.
254 */
255 int (**procfs_vnodeop_p)(void *);
256 const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
257 { &vop_default_desc, vn_default_error },
258 { &vop_lookup_desc, procfs_lookup }, /* lookup */
259 { &vop_create_desc, procfs_create }, /* create */
260 { &vop_mknod_desc, procfs_mknod }, /* mknod */
261 { &vop_open_desc, procfs_open }, /* open */
262 { &vop_close_desc, procfs_close }, /* close */
263 { &vop_access_desc, procfs_access }, /* access */
264 { &vop_accessx_desc, genfs_accessx }, /* accessx */
265 { &vop_getattr_desc, procfs_getattr }, /* getattr */
266 { &vop_setattr_desc, procfs_setattr }, /* setattr */
267 { &vop_read_desc, procfs_read }, /* read */
268 { &vop_write_desc, procfs_write }, /* write */
269 { &vop_fallocate_desc, genfs_eopnotsupp }, /* fallocate */
270 { &vop_fdiscard_desc, genfs_eopnotsupp }, /* fdiscard */
271 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */
272 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */
273 { &vop_poll_desc, procfs_poll }, /* poll */
274 { &vop_kqfilter_desc, procfs_kqfilter }, /* kqfilter */
275 { &vop_revoke_desc, procfs_revoke }, /* revoke */
276 { &vop_fsync_desc, procfs_fsync }, /* fsync */
277 { &vop_seek_desc, procfs_seek }, /* seek */
278 { &vop_remove_desc, procfs_remove }, /* remove */
279 { &vop_link_desc, procfs_link }, /* link */
280 { &vop_rename_desc, procfs_rename }, /* rename */
281 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */
282 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */
283 { &vop_symlink_desc, procfs_symlink }, /* symlink */
284 { &vop_readdir_desc, procfs_readdir }, /* readdir */
285 { &vop_readlink_desc, procfs_readlink }, /* readlink */
286 { &vop_abortop_desc, procfs_abortop }, /* abortop */
287 { &vop_inactive_desc, procfs_inactive }, /* inactive */
288 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */
289 { &vop_lock_desc, procfs_lock }, /* lock */
290 { &vop_unlock_desc, procfs_unlock }, /* unlock */
291 { &vop_bmap_desc, procfs_bmap }, /* bmap */
292 { &vop_strategy_desc, procfs_strategy }, /* strategy */
293 { &vop_print_desc, procfs_print }, /* print */
294 { &vop_islocked_desc, procfs_islocked }, /* islocked */
295 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */
296 { &vop_advlock_desc, procfs_advlock }, /* advlock */
297 { &vop_getpages_desc, procfs_getpages }, /* getpages */
298 { &vop_putpages_desc, procfs_putpages }, /* putpages */
299 { NULL, NULL }
300 };
301 const struct vnodeopv_desc procfs_vnodeop_opv_desc =
302 { &procfs_vnodeop_p, procfs_vnodeop_entries };
303 /*
304 * set things up for doing i/o on
305 * the pfsnode (vp). (vp) is locked
306 * on entry, and should be left locked
307 * on exit.
308 *
309 * for procfs we don't need to do anything
310 * in particular for i/o. all that is done
311 * is to support exclusive open on process
312 * memory images.
313 */
314 int
315 procfs_open(void *v)
316 {
317 struct vop_open_args /* {
318 struct vnode *a_vp;
319 int a_mode;
320 kauth_cred_t a_cred;
321 } */ *ap = v;
322 struct vnode *vp = ap->a_vp;
323 struct pfsnode *pfs = VTOPFS(vp);
324 struct lwp *l1;
325 struct proc *p2;
326 int error;
327
328 if ((error =
329 procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p2, ENOENT)) != 0)
330 return error;
331
332 l1 = curlwp; /* tracer */
333
334 #define M2K(m) (((m) & FREAD) && ((m) & FWRITE) ? \
335 KAUTH_REQ_PROCESS_PROCFS_RW : \
336 (m) & FWRITE ? KAUTH_REQ_PROCESS_PROCFS_WRITE : \
337 KAUTH_REQ_PROCESS_PROCFS_READ)
338
339 mutex_enter(p2->p_lock);
340 error = kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_PROCFS,
341 p2, pfs, KAUTH_ARG(M2K(ap->a_mode)), NULL);
342 mutex_exit(p2->p_lock);
343 if (error) {
344 procfs_proc_unlock(p2);
345 return (error);
346 }
347
348 #undef M2K
349
350 switch (pfs->pfs_type) {
351 case PFSmem:
352 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
353 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) {
354 error = EBUSY;
355 break;
356 }
357
358 if (!proc_isunder(p2, l1)) {
359 error = EPERM;
360 break;
361 }
362
363 if (ap->a_mode & FWRITE)
364 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
365
366 break;
367
368 case PFSregs:
369 case PFSfpregs:
370 if (!proc_isunder(p2, l1)) {
371 error = EPERM;
372 break;
373 }
374 break;
375
376 default:
377 break;
378 }
379
380 procfs_proc_unlock(p2);
381 return (error);
382 }
383
384 /*
385 * close the pfsnode (vp) after doing i/o.
386 * (vp) is not locked on entry or exit.
387 *
388 * nothing to do for procfs other than undo
389 * any exclusive open flag (see _open above).
390 */
391 int
392 procfs_close(void *v)
393 {
394 struct vop_close_args /* {
395 struct vnode *a_vp;
396 int a_fflag;
397 kauth_cred_t a_cred;
398 } */ *ap = v;
399 struct pfsnode *pfs = VTOPFS(ap->a_vp);
400
401 switch (pfs->pfs_type) {
402 case PFSmem:
403 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
404 pfs->pfs_flags &= ~(FWRITE|O_EXCL);
405 break;
406
407 default:
408 break;
409 }
410
411 return (0);
412 }
413
414 /*
415 * _inactive is called when the pfsnode
416 * is vrele'd and the reference count goes
417 * to zero. (vp) will be on the vnode free
418 * list, so to get it back vget() must be
419 * used.
420 *
421 * (vp) is locked on entry, but must be unlocked on exit.
422 */
423 int
424 procfs_inactive(void *v)
425 {
426 struct vop_inactive_v2_args /* {
427 struct vnode *a_vp;
428 bool *a_recycle;
429 } */ *ap = v;
430 struct vnode *vp = ap->a_vp;
431 struct pfsnode *pfs = VTOPFS(vp);
432
433 mutex_enter(&proc_lock);
434 *ap->a_recycle = (procfs_proc_find(vp->v_mount, pfs->pfs_pid) == NULL);
435 mutex_exit(&proc_lock);
436
437 return (0);
438 }
439
440 /*
441 * _reclaim is called when getnewvnode()
442 * wants to make use of an entry on the vnode
443 * free list. at this time the filesystem needs
444 * to free any private data and remove the node
445 * from any private lists.
446 */
447 int
448 procfs_reclaim(void *v)
449 {
450 struct vop_reclaim_v2_args /* {
451 struct vnode *a_vp;
452 } */ *ap = v;
453 struct vnode *vp = ap->a_vp;
454 struct pfsnode *pfs = VTOPFS(vp);
455
456 VOP_UNLOCK(vp);
457
458 /*
459 * To interlock with procfs_revoke_vnodes().
460 */
461 mutex_enter(vp->v_interlock);
462 vp->v_data = NULL;
463 mutex_exit(vp->v_interlock);
464 kmem_free(pfs, sizeof(*pfs));
465 return 0;
466 }
467
468 /*
469 * Return POSIX pathconf information applicable to special devices.
470 */
471 int
472 procfs_pathconf(void *v)
473 {
474 struct vop_pathconf_args /* {
475 struct vnode *a_vp;
476 int a_name;
477 register_t *a_retval;
478 } */ *ap = v;
479
480 switch (ap->a_name) {
481 case _PC_LINK_MAX:
482 *ap->a_retval = LINK_MAX;
483 return (0);
484 case _PC_MAX_CANON:
485 *ap->a_retval = MAX_CANON;
486 return (0);
487 case _PC_MAX_INPUT:
488 *ap->a_retval = MAX_INPUT;
489 return (0);
490 case _PC_PIPE_BUF:
491 *ap->a_retval = PIPE_BUF;
492 return (0);
493 case _PC_CHOWN_RESTRICTED:
494 *ap->a_retval = 1;
495 return (0);
496 case _PC_VDISABLE:
497 *ap->a_retval = _POSIX_VDISABLE;
498 return (0);
499 case _PC_SYNC_IO:
500 *ap->a_retval = 1;
501 return (0);
502 default:
503 return genfs_pathconf(ap);
504 }
505 /* NOTREACHED */
506 }
507
508 /*
509 * _print is used for debugging.
510 * just print a readable description
511 * of (vp).
512 */
513 int
514 procfs_print(void *v)
515 {
516 struct vop_print_args /* {
517 struct vnode *a_vp;
518 } */ *ap = v;
519 struct pfsnode *pfs = VTOPFS(ap->a_vp);
520
521 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n",
522 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
523 return 0;
524 }
525
526 int
527 procfs_link(void *v)
528 {
529 struct vop_link_v2_args /* {
530 struct vnode *a_dvp;
531 struct vnode *a_vp;
532 struct componentname *a_cnp;
533 } */ *ap = v;
534
535 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
536 return (EROFS);
537 }
538
539 int
540 procfs_symlink(void *v)
541 {
542 struct vop_symlink_v3_args /* {
543 struct vnode *a_dvp;
544 struct vnode **a_vpp;
545 struct componentname *a_cnp;
546 struct vattr *a_vap;
547 char *a_target;
548 } */ *ap = v;
549
550 VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
551 return (EROFS);
552 }
553
554 /*
555 * Works out the path to the target process's current
556 * working directory or chroot. If the caller is in a chroot and
557 * can't "reach" the target's cwd or root (or some other error
558 * occurs), a "/" is returned for the path.
559 */
560 static void
561 procfs_dir(pfstype t, struct lwp *caller, struct proc *target, char **bpp,
562 char *path, size_t len)
563 {
564 struct cwdinfo *cwdi;
565 struct vnode *vp, *rvp;
566 char *bp;
567
568 /*
569 * Lock target cwdi and take a reference to the vnode
570 * we are interested in to prevent it from disappearing
571 * before getcwd_common() below.
572 */
573 rw_enter(&target->p_cwdi->cwdi_lock, RW_READER);
574 switch (t) {
575 case PFScwd:
576 vp = target->p_cwdi->cwdi_cdir;
577 break;
578 case PFSchroot:
579 vp = target->p_cwdi->cwdi_rdir;
580 break;
581 default:
582 rw_exit(&target->p_cwdi->cwdi_lock);
583 return;
584 }
585 if (vp != NULL)
586 vref(vp);
587 rw_exit(&target->p_cwdi->cwdi_lock);
588
589 cwdi = caller->l_proc->p_cwdi;
590 rw_enter(&cwdi->cwdi_lock, RW_READER);
591
592 rvp = cwdi->cwdi_rdir;
593 bp = bpp ? *bpp : NULL;
594
595 /*
596 * XXX: this horrible kludge avoids locking panics when
597 * attempting to lookup links that point to within procfs
598 */
599 if (vp != NULL && vp->v_tag == VT_PROCFS) {
600 if (bpp) {
601 *--bp = '/';
602 *bpp = bp;
603 }
604 vrele(vp);
605 rw_exit(&cwdi->cwdi_lock);
606 return;
607 }
608
609 if (rvp == NULL)
610 rvp = rootvnode;
611 if (vp == NULL || getcwd_common(vp, rvp, bp ? &bp : NULL, path,
612 len / 2, 0, caller) != 0) {
613 if (bpp) {
614 bp = *bpp;
615 *--bp = '/';
616 }
617 }
618
619 if (bpp)
620 *bpp = bp;
621
622 if (vp != NULL)
623 vrele(vp);
624 rw_exit(&cwdi->cwdi_lock);
625 }
626
627 /*
628 * Invent attributes for pfsnode (vp) and store
629 * them in (vap).
630 * Directories lengths are returned as zero since
631 * any real length would require the genuine size
632 * to be computed, and nothing cares anyway.
633 *
634 * this is relatively minimal for procfs.
635 */
636 int
637 procfs_getattr(void *v)
638 {
639 struct vop_getattr_args /* {
640 struct vnode *a_vp;
641 struct vattr *a_vap;
642 kauth_cred_t a_cred;
643 } */ *ap = v;
644 struct vnode *vp = ap->a_vp;
645 struct pfsnode *pfs = VTOPFS(vp);
646 struct vattr *vap = ap->a_vap;
647 struct proc *procp;
648 char *path, *bp, bf[16];
649 int error;
650
651 /* first check the process still exists */
652 switch (pfs->pfs_type) {
653 case PFSroot:
654 case PFScurproc:
655 case PFSself:
656 procp = NULL;
657 break;
658
659 default:
660 error =
661 procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &procp, ENOENT);
662 if (error != 0)
663 return (error);
664 break;
665 }
666
667 switch (pfs->pfs_type) {
668 case PFStask:
669 if (pfs->pfs_fd == -1) {
670 path = NULL;
671 break;
672 }
673 /*FALLTHROUGH*/
674 case PFScwd:
675 case PFSchroot:
676 path = malloc(MAXPATHLEN + 4, M_TEMP, M_WAITOK);
677 if (path == NULL && procp != NULL) {
678 procfs_proc_unlock(procp);
679 return (ENOMEM);
680 }
681 break;
682
683 default:
684 path = NULL;
685 break;
686 }
687
688 if (procp != NULL) {
689 mutex_enter(procp->p_lock);
690 error = kauth_authorize_process(kauth_cred_get(),
691 KAUTH_PROCESS_CANSEE, procp,
692 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
693 mutex_exit(procp->p_lock);
694 if (error != 0) {
695 procfs_proc_unlock(procp);
696 if (path != NULL)
697 free(path, M_TEMP);
698 return (ENOENT);
699 }
700 }
701
702 error = 0;
703
704 /* start by zeroing out the attributes */
705 vattr_null(vap);
706
707 /* next do all the common fields */
708 vap->va_type = ap->a_vp->v_type;
709 vap->va_mode = pfs->pfs_mode;
710 vap->va_fileid = pfs->pfs_fileno;
711 vap->va_flags = 0;
712 vap->va_blocksize = PAGE_SIZE;
713
714 /*
715 * Make all times be current TOD.
716 *
717 * It would be possible to get the process start
718 * time from the p_stats structure, but there's
719 * no "file creation" time stamp anyway, and the
720 * p_stats structure is not addressable if u. gets
721 * swapped out for that process.
722 */
723 getnanotime(&vap->va_ctime);
724 vap->va_atime = vap->va_mtime = vap->va_ctime;
725 if (procp)
726 TIMEVAL_TO_TIMESPEC(&procp->p_stats->p_start,
727 &vap->va_birthtime);
728 else
729 getnanotime(&vap->va_birthtime);
730
731 switch (pfs->pfs_type) {
732 case PFSmem:
733 case PFSregs:
734 case PFSfpregs:
735 #if defined(__HAVE_PROCFS_MACHDEP) && defined(PROCFS_MACHDEP_PROTECT_CASES)
736 PROCFS_MACHDEP_PROTECT_CASES
737 #endif
738 /*
739 * If the process has exercised some setuid or setgid
740 * privilege, then rip away read/write permission so
741 * that only root can gain access.
742 */
743 if (procp->p_flag & PK_SUGID)
744 vap->va_mode &= ~(S_IRUSR|S_IWUSR);
745 /* FALLTHROUGH */
746 case PFSstatus:
747 case PFSstat:
748 case PFSnote:
749 case PFSnotepg:
750 case PFScmdline:
751 case PFSenviron:
752 case PFSemul:
753 case PFSstatm:
754
755 case PFSmap:
756 case PFSmaps:
757 case PFSlimit:
758 case PFSauxv:
759 vap->va_nlink = 1;
760 vap->va_uid = kauth_cred_geteuid(procp->p_cred);
761 vap->va_gid = kauth_cred_getegid(procp->p_cred);
762 break;
763 case PFScwd:
764 case PFSchroot:
765 case PFSmeminfo:
766 case PFSdevices:
767 case PFScpuinfo:
768 case PFSuptime:
769 case PFSmounts:
770 case PFScpustat:
771 case PFSloadavg:
772 case PFSversion:
773 case PFSexe:
774 case PFSself:
775 case PFScurproc:
776 case PFSroot:
777 vap->va_nlink = 1;
778 vap->va_uid = vap->va_gid = 0;
779 break;
780
781 case PFSproc:
782 case PFStask:
783 case PFSfile:
784 case PFSfd:
785 break;
786
787 default:
788 panic("%s: %d/1", __func__, pfs->pfs_type);
789 }
790
791 /*
792 * now do the object specific fields
793 *
794 * The size could be set from struct reg, but it's hardly
795 * worth the trouble, and it puts some (potentially) machine
796 * dependent data into this machine-independent code. If it
797 * becomes important then this function should break out into
798 * a per-file stat function in the corresponding .c file.
799 */
800
801 switch (pfs->pfs_type) {
802 case PFSroot:
803 vap->va_bytes = vap->va_size = DEV_BSIZE;
804 break;
805
806 case PFSself:
807 case PFScurproc:
808 vap->va_bytes = vap->va_size =
809 snprintf(bf, sizeof(bf), "%ld", (long)curproc->p_pid);
810 break;
811 case PFStask:
812 if (pfs->pfs_fd != -1) {
813 vap->va_nlink = 1;
814 vap->va_uid = 0;
815 vap->va_gid = 0;
816 vap->va_bytes = vap->va_size =
817 snprintf(bf, sizeof(bf), "..");
818 break;
819 }
820 /*FALLTHROUGH*/
821 case PFSfd:
822 if (pfs->pfs_fd != -1) {
823 file_t *fp;
824
825 fp = fd_getfile2(procp, pfs->pfs_fd);
826 if (fp == NULL) {
827 error = EBADF;
828 break;
829 }
830 vap->va_nlink = 1;
831 vap->va_uid = kauth_cred_geteuid(fp->f_cred);
832 vap->va_gid = kauth_cred_getegid(fp->f_cred);
833 switch (fp->f_type) {
834 case DTYPE_VNODE:
835 vap->va_bytes = vap->va_size =
836 fp->f_vnode->v_size;
837 break;
838 default:
839 vap->va_bytes = vap->va_size = 0;
840 break;
841 }
842 closef(fp);
843 break;
844 }
845 /*FALLTHROUGH*/
846 case PFSproc:
847 vap->va_nlink = 2;
848 vap->va_uid = kauth_cred_geteuid(procp->p_cred);
849 vap->va_gid = kauth_cred_getegid(procp->p_cred);
850 vap->va_bytes = vap->va_size = DEV_BSIZE;
851 break;
852
853 case PFSfile:
854 error = EOPNOTSUPP;
855 break;
856
857 case PFSmem:
858 vap->va_bytes = vap->va_size =
859 ctob(procp->p_vmspace->vm_tsize +
860 procp->p_vmspace->vm_dsize +
861 procp->p_vmspace->vm_ssize);
862 break;
863
864 case PFSauxv:
865 vap->va_bytes = vap->va_size = procp->p_execsw->es_arglen;
866 break;
867
868 #if defined(PT_GETREGS) || defined(PT_SETREGS)
869 case PFSregs:
870 vap->va_bytes = vap->va_size = sizeof(struct reg);
871 break;
872 #endif
873
874 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS)
875 case PFSfpregs:
876 vap->va_bytes = vap->va_size = sizeof(struct fpreg);
877 break;
878 #endif
879
880 case PFSstatus:
881 case PFSstat:
882 case PFSnote:
883 case PFSnotepg:
884 case PFScmdline:
885 case PFSenviron:
886 case PFSmeminfo:
887 case PFSdevices:
888 case PFScpuinfo:
889 case PFSuptime:
890 case PFSmounts:
891 case PFScpustat:
892 case PFSloadavg:
893 case PFSstatm:
894 case PFSversion:
895 vap->va_bytes = vap->va_size = 0;
896 break;
897 case PFSlimit:
898 case PFSmap:
899 case PFSmaps:
900 /*
901 * Advise a larger blocksize for the map files, so that
902 * they may be read in one pass.
903 */
904 vap->va_blocksize = 4 * PAGE_SIZE;
905 vap->va_bytes = vap->va_size = 0;
906 break;
907
908 case PFScwd:
909 case PFSchroot:
910 bp = path + MAXPATHLEN;
911 *--bp = '\0';
912 procfs_dir(pfs->pfs_type, curlwp, procp, &bp, path,
913 MAXPATHLEN);
914 vap->va_bytes = vap->va_size = strlen(bp);
915 break;
916
917 case PFSexe:
918 vap->va_bytes = vap->va_size = strlen(procp->p_path);
919 break;
920
921 case PFSemul:
922 vap->va_bytes = vap->va_size = strlen(procp->p_emul->e_name);
923 break;
924
925 #ifdef __HAVE_PROCFS_MACHDEP
926 PROCFS_MACHDEP_NODETYPE_CASES
927 error = procfs_machdep_getattr(ap->a_vp, vap, procp);
928 break;
929 #endif
930
931 default:
932 panic("%s: %d/2", __func__, pfs->pfs_type);
933 }
934
935 if (procp != NULL)
936 procfs_proc_unlock(procp);
937 if (path != NULL)
938 free(path, M_TEMP);
939
940 return (error);
941 }
942
943 /*ARGSUSED*/
944 int
945 procfs_setattr(void *v)
946 {
947 /*
948 * just fake out attribute setting
949 * it's not good to generate an error
950 * return, otherwise things like creat()
951 * will fail when they try to set the
952 * file length to 0. worse, this means
953 * that echo $note > /proc/$pid/note will fail.
954 */
955
956 return (0);
957 }
958
959 /*
960 * implement access checking.
961 *
962 * actually, the check for super-user is slightly
963 * broken since it will allow read access to write-only
964 * objects. this doesn't cause any particular trouble
965 * but does mean that the i/o entry points need to check
966 * that the operation really does make sense.
967 */
968 int
969 procfs_access(void *v)
970 {
971 struct vop_access_args /* {
972 struct vnode *a_vp;
973 accmode_t a_accmode;
974 kauth_cred_t a_cred;
975 } */ *ap = v;
976 struct vattr va;
977 int error;
978
979 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred)) != 0)
980 return (error);
981
982 return kauth_authorize_vnode(ap->a_cred,
983 KAUTH_ACCESS_ACTION(ap->a_accmode, ap->a_vp->v_type, va.va_mode),
984 ap->a_vp, NULL, genfs_can_access(ap->a_vp, ap->a_cred,
985 va.va_uid, va.va_gid, va.va_mode, NULL, ap->a_accmode));
986 }
987
988 /*
989 * lookup. this is incredibly complicated in the
990 * general case, however for most pseudo-filesystems
991 * very little needs to be done.
992 *
993 * Locking isn't hard here, just poorly documented.
994 *
995 * If we're looking up ".", just vref the parent & return it.
996 *
997 * If we're looking up "..", unlock the parent, and lock "..". If everything
998 * went ok, and we're on the last component and the caller requested the
999 * parent locked, try to re-lock the parent. We do this to prevent lock
1000 * races.
1001 *
1002 * For anything else, get the needed node. Then unlock the parent if not
1003 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the
1004 * parent in the .. case).
1005 *
1006 * We try to exit with the parent locked in error cases.
1007 */
1008 int
1009 procfs_lookup(void *v)
1010 {
1011 struct vop_lookup_v2_args /* {
1012 struct vnode * a_dvp;
1013 struct vnode ** a_vpp;
1014 struct componentname * a_cnp;
1015 } */ *ap = v;
1016 struct componentname *cnp = ap->a_cnp;
1017 struct vnode **vpp = ap->a_vpp;
1018 struct vnode *dvp = ap->a_dvp;
1019 const char *pname = cnp->cn_nameptr;
1020 const struct proc_target *pt = NULL;
1021 struct vnode *fvp;
1022 pid_t pid, vnpid;
1023 struct pfsnode *pfs;
1024 struct proc *p = NULL;
1025 struct lwp *plwp;
1026 int i, error;
1027 pfstype type;
1028
1029 *vpp = NULL;
1030
1031 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
1032 return (EROFS);
1033
1034 if (cnp->cn_namelen == 1 && *pname == '.') {
1035 *vpp = dvp;
1036 vref(dvp);
1037 return (0);
1038 }
1039
1040 pfs = VTOPFS(dvp);
1041 switch (pfs->pfs_type) {
1042 case PFSroot:
1043 /*
1044 * Shouldn't get here with .. in the root node.
1045 */
1046 if (cnp->cn_flags & ISDOTDOT)
1047 return (EIO);
1048
1049 for (i = 0; i < nproc_root_targets; i++) {
1050 pt = &proc_root_targets[i];
1051 /*
1052 * check for node match. proc is always NULL here,
1053 * so call pt_valid with constant NULL lwp.
1054 */
1055 if (cnp->cn_namelen == pt->pt_namlen &&
1056 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
1057 (pt->pt_valid == NULL ||
1058 (*pt->pt_valid)(NULL, dvp->v_mount)))
1059 break;
1060 }
1061
1062 if (i != nproc_root_targets) {
1063 error = procfs_allocvp(dvp->v_mount, vpp, 0,
1064 pt->pt_pfstype, -1);
1065 return (error);
1066 }
1067
1068 if (CNEQ(cnp, "curproc", 7)) {
1069 pid = curproc->p_pid;
1070 vnpid = 0;
1071 type = PFScurproc;
1072 } else if (CNEQ(cnp, "self", 4)) {
1073 pid = curproc->p_pid;
1074 vnpid = 0;
1075 type = PFSself;
1076 } else {
1077 pid = (pid_t)atoi(pname, cnp->cn_namelen);
1078 vnpid = pid;
1079 type = PFSproc;
1080 }
1081
1082 if (procfs_proc_lock(dvp->v_mount, pid, &p, ESRCH) != 0)
1083 break;
1084 error = procfs_allocvp(dvp->v_mount, vpp, vnpid, type, -1);
1085 procfs_proc_unlock(p);
1086 return (error);
1087
1088 case PFSproc:
1089 if (cnp->cn_flags & ISDOTDOT) {
1090 error = procfs_allocvp(dvp->v_mount, vpp, 0, PFSroot,
1091 -1);
1092 return (error);
1093 }
1094
1095 if (procfs_proc_lock(dvp->v_mount, pfs->pfs_pid, &p,
1096 ESRCH) != 0)
1097 break;
1098
1099 mutex_enter(p->p_lock);
1100 LIST_FOREACH(plwp, &p->p_lwps, l_sibling) {
1101 if (plwp->l_stat != LSZOMB)
1102 break;
1103 }
1104 /* Process is exiting if no-LWPS or all LWPs are LSZOMB */
1105 if (plwp == NULL) {
1106 mutex_exit(p->p_lock);
1107 procfs_proc_unlock(p);
1108 return ESRCH;
1109 }
1110
1111 lwp_addref(plwp);
1112 mutex_exit(p->p_lock);
1113
1114 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
1115 int found;
1116
1117 found = cnp->cn_namelen == pt->pt_namlen &&
1118 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
1119 (pt->pt_valid == NULL
1120 || (*pt->pt_valid)(plwp, dvp->v_mount));
1121 if (found)
1122 break;
1123 }
1124 lwp_delref(plwp);
1125
1126 if (i == nproc_targets) {
1127 procfs_proc_unlock(p);
1128 break;
1129 }
1130 if (pt->pt_pfstype == PFSfile) {
1131 fvp = p->p_textvp;
1132 /* We already checked that it exists. */
1133 vref(fvp);
1134 procfs_proc_unlock(p);
1135 *vpp = fvp;
1136 return (0);
1137 }
1138
1139 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
1140 pt->pt_pfstype, -1);
1141 procfs_proc_unlock(p);
1142 return (error);
1143
1144 case PFSfd: {
1145 int fd;
1146 file_t *fp;
1147
1148 if ((error = procfs_proc_lock(dvp->v_mount, pfs->pfs_pid, &p,
1149 ENOENT)) != 0)
1150 return error;
1151
1152 if (cnp->cn_flags & ISDOTDOT) {
1153 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
1154 PFSproc, -1);
1155 procfs_proc_unlock(p);
1156 return (error);
1157 }
1158 fd = atoi(pname, cnp->cn_namelen);
1159
1160 fp = fd_getfile2(p, fd);
1161 if (fp == NULL) {
1162 procfs_proc_unlock(p);
1163 return ENOENT;
1164 }
1165 fvp = fp->f_vnode;
1166
1167 /* Don't show directories */
1168 if (fp->f_type == DTYPE_VNODE && fvp->v_type != VDIR) {
1169 vref(fvp);
1170 closef(fp);
1171 procfs_proc_unlock(p);
1172 *vpp = fvp;
1173 return 0;
1174 }
1175
1176 closef(fp);
1177 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
1178 PFSfd, fd);
1179 procfs_proc_unlock(p);
1180 return error;
1181 }
1182 case PFStask: {
1183 int xpid;
1184
1185 if ((error = procfs_proc_lock(dvp->v_mount, pfs->pfs_pid, &p,
1186 ENOENT)) != 0)
1187 return error;
1188
1189 if (cnp->cn_flags & ISDOTDOT) {
1190 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
1191 PFSproc, -1);
1192 procfs_proc_unlock(p);
1193 return (error);
1194 }
1195 xpid = atoi(pname, cnp->cn_namelen);
1196
1197 if (xpid != pfs->pfs_pid) {
1198 procfs_proc_unlock(p);
1199 return ENOENT;
1200 }
1201 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
1202 PFStask, 0);
1203 procfs_proc_unlock(p);
1204 return error;
1205 }
1206 default:
1207 return (ENOTDIR);
1208 }
1209
1210 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
1211 }
1212
1213 int
1214 procfs_validfile(struct lwp *l, struct mount *mp)
1215 {
1216 return l != NULL && l->l_proc != NULL && l->l_proc->p_textvp != NULL;
1217 }
1218
1219 static int
1220 procfs_validfile_linux(struct lwp *l, struct mount *mp)
1221 {
1222 return procfs_use_linux_compat(mp) &&
1223 (l == NULL || l->l_proc == NULL || procfs_validfile(l, mp));
1224 }
1225
1226 struct procfs_root_readdir_ctx {
1227 struct uio *uiop;
1228 off_t *cookies;
1229 int ncookies;
1230 off_t off;
1231 off_t startoff;
1232 int error;
1233 };
1234
1235 static int
1236 procfs_root_readdir_callback(struct proc *p, void *arg)
1237 {
1238 struct procfs_root_readdir_ctx *ctxp = arg;
1239 struct dirent d;
1240 struct uio *uiop;
1241 int error;
1242
1243 uiop = ctxp->uiop;
1244 if (uiop->uio_resid < UIO_MX)
1245 return -1; /* no space */
1246
1247 if (ctxp->off < ctxp->startoff) {
1248 ctxp->off++;
1249 return 0;
1250 }
1251
1252 if (kauth_authorize_process(kauth_cred_get(),
1253 KAUTH_PROCESS_CANSEE, p,
1254 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL) != 0)
1255 return 0;
1256
1257 memset(&d, 0, UIO_MX);
1258 d.d_reclen = UIO_MX;
1259 d.d_fileno = PROCFS_FILENO(p->p_pid, PFSproc, -1);
1260 d.d_namlen = snprintf(d.d_name,
1261 UIO_MX - offsetof(struct dirent, d_name), "%ld", (long)p->p_pid);
1262 d.d_type = DT_DIR;
1263
1264 mutex_exit(&proc_lock);
1265 error = uiomove(&d, UIO_MX, uiop);
1266 mutex_enter(&proc_lock);
1267 if (error) {
1268 ctxp->error = error;
1269 return -1;
1270 }
1271
1272 ctxp->ncookies++;
1273 if (ctxp->cookies)
1274 *(ctxp->cookies)++ = ctxp->off + 1;
1275 ctxp->off++;
1276
1277 return 0;
1278 }
1279
1280 /*
1281 * readdir returns directory entries from pfsnode (vp).
1282 *
1283 * the strategy here with procfs is to generate a single
1284 * directory entry at a time (struct dirent) and then
1285 * copy that out to userland using uiomove. a more efficent
1286 * though more complex implementation, would try to minimize
1287 * the number of calls to uiomove(). for procfs, this is
1288 * hardly worth the added code complexity.
1289 *
1290 * this should just be done through read()
1291 */
1292 int
1293 procfs_readdir(void *v)
1294 {
1295 struct vop_readdir_args /* {
1296 struct vnode *a_vp;
1297 struct uio *a_uio;
1298 kauth_cred_t a_cred;
1299 int *a_eofflag;
1300 off_t **a_cookies;
1301 int *a_ncookies;
1302 } */ *ap = v;
1303 struct uio *uio = ap->a_uio;
1304 struct dirent d;
1305 struct pfsnode *pfs;
1306 off_t i;
1307 int error;
1308 off_t *cookies = NULL;
1309 int ncookies;
1310 struct vnode *vp;
1311 const struct proc_target *pt;
1312 struct procfs_root_readdir_ctx ctx;
1313 struct lwp *l;
1314 int nfd;
1315
1316 vp = ap->a_vp;
1317 pfs = VTOPFS(vp);
1318
1319 if (uio->uio_resid < UIO_MX)
1320 return (EINVAL);
1321 if (uio->uio_offset < 0)
1322 return (EINVAL);
1323
1324 error = 0;
1325 i = uio->uio_offset;
1326 memset(&d, 0, UIO_MX);
1327 d.d_reclen = UIO_MX;
1328 ncookies = uio->uio_resid / UIO_MX;
1329
1330 switch (pfs->pfs_type) {
1331 /*
1332 * this is for the process-specific sub-directories.
1333 * all that is needed to is copy out all the entries
1334 * from the procent[] table (top of this file).
1335 */
1336 case PFSproc: {
1337 struct proc *p;
1338
1339 if (i >= nproc_targets)
1340 return 0;
1341
1342 if (procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, ESRCH) != 0)
1343 break;
1344
1345 if (ap->a_ncookies) {
1346 ncookies = uimin(ncookies, (nproc_targets - i));
1347 cookies = malloc(ncookies * sizeof (off_t),
1348 M_TEMP, M_WAITOK);
1349 *ap->a_cookies = cookies;
1350 }
1351
1352 for (pt = &proc_targets[i];
1353 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) {
1354 if (pt->pt_valid) {
1355 /* XXXSMP LWP can disappear */
1356 mutex_enter(p->p_lock);
1357 l = LIST_FIRST(&p->p_lwps);
1358 KASSERT(l != NULL);
1359 mutex_exit(p->p_lock);
1360 if ((*pt->pt_valid)(l, vp->v_mount) == 0)
1361 continue;
1362 }
1363
1364 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid,
1365 pt->pt_pfstype, -1);
1366 d.d_namlen = pt->pt_namlen;
1367 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1368 d.d_type = pt->pt_type;
1369
1370 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1371 break;
1372 if (cookies)
1373 *cookies++ = i + 1;
1374 }
1375
1376 procfs_proc_unlock(p);
1377 break;
1378 }
1379 case PFSfd: {
1380 struct proc *p;
1381 file_t *fp;
1382 int lim, nc = 0;
1383
1384 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p,
1385 ESRCH)) != 0)
1386 return error;
1387
1388 /* XXX Should this be by file as well? */
1389 if (kauth_authorize_process(kauth_cred_get(),
1390 KAUTH_PROCESS_CANSEE, p,
1391 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_OPENFILES), NULL,
1392 NULL) != 0) {
1393 procfs_proc_unlock(p);
1394 return ESRCH;
1395 }
1396
1397 nfd = atomic_load_consume(&p->p_fd->fd_dt)->dt_nfiles;
1398
1399 lim = uimin((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
1400 if (i >= lim) {
1401 procfs_proc_unlock(p);
1402 return 0;
1403 }
1404
1405 if (ap->a_ncookies) {
1406 ncookies = uimin(ncookies, (nfd + 2 - i));
1407 cookies = malloc(ncookies * sizeof (off_t),
1408 M_TEMP, M_WAITOK);
1409 *ap->a_cookies = cookies;
1410 }
1411
1412 for (; i < 2 && uio->uio_resid >= UIO_MX; i++) {
1413 pt = &proc_targets[i];
1414 d.d_namlen = pt->pt_namlen;
1415 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid,
1416 pt->pt_pfstype, -1);
1417 (void)memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1418 d.d_type = pt->pt_type;
1419 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1420 break;
1421 if (cookies)
1422 *cookies++ = i + 1;
1423 nc++;
1424 }
1425 if (error) {
1426 ncookies = nc;
1427 break;
1428 }
1429 for (; uio->uio_resid >= UIO_MX && i < nfd; i++) {
1430 /* check the descriptor exists */
1431 if ((fp = fd_getfile2(p, i - 2)) == NULL)
1432 continue;
1433 closef(fp);
1434
1435 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, PFSfd, i - 2);
1436 d.d_namlen = snprintf(d.d_name, sizeof(d.d_name),
1437 "%lld", (long long)(i - 2));
1438 d.d_type = VREG;
1439 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1440 break;
1441 if (cookies)
1442 *cookies++ = i + 1;
1443 nc++;
1444 }
1445 ncookies = nc;
1446 procfs_proc_unlock(p);
1447 break;
1448 }
1449 case PFStask: {
1450 struct proc *p;
1451 int nc = 0;
1452
1453 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p,
1454 ESRCH)) != 0)
1455 return error;
1456
1457 nfd = 3; /* ., .., pid */
1458
1459 if (ap->a_ncookies) {
1460 ncookies = uimin(ncookies, (nfd + 2 - i));
1461 cookies = malloc(ncookies * sizeof (off_t),
1462 M_TEMP, M_WAITOK);
1463 *ap->a_cookies = cookies;
1464 }
1465
1466 for (; i < 2 && uio->uio_resid >= UIO_MX; i++) {
1467 pt = &proc_targets[i];
1468 d.d_namlen = pt->pt_namlen;
1469 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid,
1470 pt->pt_pfstype, -1);
1471 (void)memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1472 d.d_type = pt->pt_type;
1473 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1474 break;
1475 if (cookies)
1476 *cookies++ = i + 1;
1477 nc++;
1478 }
1479 if (error) {
1480 ncookies = nc;
1481 break;
1482 }
1483 for (; uio->uio_resid >= UIO_MX && i < nfd; i++) {
1484 /* check the descriptor exists */
1485 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, PFStask,
1486 i - 2);
1487 d.d_namlen = snprintf(d.d_name, sizeof(d.d_name),
1488 "%ld", (long)pfs->pfs_pid);
1489 d.d_type = DT_LNK;
1490 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1491 break;
1492 if (cookies)
1493 *cookies++ = i + 1;
1494 nc++;
1495 }
1496 ncookies = nc;
1497 procfs_proc_unlock(p);
1498 break;
1499 }
1500
1501 /*
1502 * this is for the root of the procfs filesystem
1503 * what is needed are special entries for "curproc"
1504 * and "self" followed by an entry for each process
1505 * on allproc.
1506 */
1507
1508 case PFSroot: {
1509 int nc = 0;
1510
1511 if (ap->a_ncookies) {
1512 /*
1513 * XXX Potentially allocating too much space here,
1514 * but I'm lazy. This loop needs some work.
1515 */
1516 cookies = malloc(ncookies * sizeof (off_t),
1517 M_TEMP, M_WAITOK);
1518 *ap->a_cookies = cookies;
1519 }
1520 error = 0;
1521 /* 0 ... 3 are static entries. */
1522 for (; i <= 3 && uio->uio_resid >= UIO_MX; i++) {
1523 switch (i) {
1524 case 0: /* `.' */
1525 case 1: /* `..' */
1526 d.d_fileno = PROCFS_FILENO(0, PFSroot, -1);
1527 d.d_namlen = i + 1;
1528 memcpy(d.d_name, "..", d.d_namlen);
1529 d.d_name[i + 1] = '\0';
1530 d.d_type = DT_DIR;
1531 break;
1532
1533 case 2:
1534 d.d_fileno = PROCFS_FILENO(0, PFScurproc, -1);
1535 d.d_namlen = sizeof("curproc") - 1;
1536 memcpy(d.d_name, "curproc", sizeof("curproc"));
1537 d.d_type = DT_LNK;
1538 break;
1539
1540 case 3:
1541 d.d_fileno = PROCFS_FILENO(0, PFSself, -1);
1542 d.d_namlen = sizeof("self") - 1;
1543 memcpy(d.d_name, "self", sizeof("self"));
1544 d.d_type = DT_LNK;
1545 break;
1546 }
1547
1548 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1549 break;
1550 nc++;
1551 if (cookies)
1552 *cookies++ = i + 1;
1553 }
1554 /* 4 ... are process entries. */
1555 ctx.uiop = uio;
1556 ctx.error = 0;
1557 ctx.off = 4;
1558 ctx.startoff = i;
1559 ctx.cookies = cookies;
1560 ctx.ncookies = nc;
1561 proclist_foreach_call(&allproc,
1562 procfs_root_readdir_callback, &ctx);
1563 cookies = ctx.cookies;
1564 nc = ctx.ncookies;
1565 error = ctx.error;
1566 if (error)
1567 break;
1568
1569 /* misc entries. */
1570 if (i < ctx.off)
1571 i = ctx.off;
1572 if (i >= ctx.off + nproc_root_targets)
1573 break;
1574 for (pt = &proc_root_targets[i - ctx.off];
1575 uio->uio_resid >= UIO_MX &&
1576 pt < &proc_root_targets[nproc_root_targets];
1577 pt++, i++) {
1578 if (pt->pt_valid &&
1579 (*pt->pt_valid)(NULL, vp->v_mount) == 0)
1580 continue;
1581 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype, -1);
1582 d.d_namlen = pt->pt_namlen;
1583 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
1584 d.d_type = pt->pt_type;
1585
1586 if ((error = uiomove(&d, UIO_MX, uio)) != 0)
1587 break;
1588 nc++;
1589 if (cookies)
1590 *cookies++ = i + 1;
1591 }
1592
1593 ncookies = nc;
1594 break;
1595 }
1596
1597 default:
1598 error = ENOTDIR;
1599 break;
1600 }
1601
1602 if (ap->a_ncookies) {
1603 if (error) {
1604 if (cookies)
1605 free(*ap->a_cookies, M_TEMP);
1606 *ap->a_ncookies = 0;
1607 *ap->a_cookies = NULL;
1608 } else
1609 *ap->a_ncookies = ncookies;
1610 }
1611 uio->uio_offset = i;
1612 return (error);
1613 }
1614
1615 /*
1616 * readlink reads the link of `curproc' and others
1617 */
1618 int
1619 procfs_readlink(void *v)
1620 {
1621 struct vop_readlink_args *ap = v;
1622 char bf[16]; /* should be enough */
1623 char *bp = bf;
1624 char *path = NULL;
1625 int len = 0;
1626 int error = 0;
1627 struct vnode *vp = ap->a_vp;
1628 struct pfsnode *pfs = VTOPFS(vp);
1629 struct proc *pown = NULL;
1630
1631 if (pfs->pfs_fileno == PROCFS_FILENO(0, PFScurproc, -1))
1632 len = snprintf(bf, sizeof(bf), "%ld", (long)curproc->p_pid);
1633 else if (pfs->pfs_fileno == PROCFS_FILENO(0, PFSself, -1))
1634 len = snprintf(bf, sizeof(bf), "%s", "curproc");
1635 else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFStask, 0))
1636 len = snprintf(bf, sizeof(bf), "..");
1637 else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFSexe, -1)) {
1638 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &pown,
1639 ESRCH)) != 0)
1640 return error;
1641 bp = pown->p_path;
1642 len = strlen(bp);
1643 } else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFScwd, -1) ||
1644 pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFSchroot, -1)) {
1645 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &pown,
1646 ESRCH)) != 0)
1647 return error;
1648 path = malloc(MAXPATHLEN + 4, M_TEMP, M_WAITOK);
1649 if (path == NULL) {
1650 procfs_proc_unlock(pown);
1651 return (ENOMEM);
1652 }
1653 bp = path + MAXPATHLEN;
1654 *--bp = '\0';
1655 procfs_dir(PROCFS_TYPE(pfs->pfs_fileno), curlwp, pown,
1656 &bp, path, MAXPATHLEN);
1657 len = strlen(bp);
1658 } else {
1659 file_t *fp;
1660 struct vnode *vxp;
1661
1662 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &pown,
1663 ESRCH)) != 0)
1664 return error;
1665
1666 fp = fd_getfile2(pown, pfs->pfs_fd);
1667 if (fp == NULL) {
1668 procfs_proc_unlock(pown);
1669 return EBADF;
1670 }
1671
1672 switch (fp->f_type) {
1673 case DTYPE_VNODE:
1674 vxp = fp->f_vnode;
1675 if (vxp->v_type != VDIR) {
1676 error = EINVAL;
1677 break;
1678 }
1679 if ((path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK))
1680 == NULL) {
1681 error = ENOMEM;
1682 break;
1683 }
1684 bp = path + MAXPATHLEN;
1685 *--bp = '\0';
1686
1687 /*
1688 * XXX: kludge to avoid locking against ourselves
1689 * in getcwd()
1690 */
1691 if (vxp->v_tag == VT_PROCFS) {
1692 *--bp = '/';
1693 } else {
1694 rw_enter(&curproc->p_cwdi->cwdi_lock,
1695 RW_READER);
1696 vp = curproc->p_cwdi->cwdi_rdir;
1697 if (vp == NULL)
1698 vp = rootvnode;
1699 error = getcwd_common(vxp, vp, &bp, path,
1700 MAXPATHLEN / 2, 0, curlwp);
1701 rw_exit(&curproc->p_cwdi->cwdi_lock);
1702 }
1703 if (error)
1704 break;
1705 len = strlen(bp);
1706 break;
1707
1708 case DTYPE_MISC:
1709 len = snprintf(bf, sizeof(bf), "%s", "[misc]");
1710 break;
1711
1712 case DTYPE_KQUEUE:
1713 len = snprintf(bf, sizeof(bf), "%s", "[kqueue]");
1714 break;
1715
1716 case DTYPE_SEM:
1717 len = snprintf(bf, sizeof(bf), "%s", "[ksem]");
1718 break;
1719
1720 default:
1721 error = EINVAL;
1722 break;
1723 }
1724 closef(fp);
1725 }
1726
1727 if (error == 0)
1728 error = uiomove(bp, len, ap->a_uio);
1729 if (pown)
1730 procfs_proc_unlock(pown);
1731 if (path)
1732 free(path, M_TEMP);
1733 return error;
1734 }
1735
1736 int
1737 procfs_getpages(void *v)
1738 {
1739 struct vop_getpages_args /* {
1740 struct vnode *a_vp;
1741 voff_t a_offset;
1742 struct vm_page **a_m;
1743 int *a_count;
1744 int a_centeridx;
1745 vm_prot_t a_access_type;
1746 int a_advice;
1747 int a_flags;
1748 } */ *ap = v;
1749
1750 if ((ap->a_flags & PGO_LOCKED) == 0)
1751 rw_exit(ap->a_vp->v_uobj.vmobjlock);
1752
1753 return (EFAULT);
1754 }
1755
1756 /*
1757 * convert decimal ascii to int
1758 */
1759 static int
1760 atoi(const char *b, size_t len)
1761 {
1762 int p = 0;
1763
1764 while (len--) {
1765 char c = *b++;
1766 if (c < '0' || c > '9')
1767 return -1;
1768 p = 10 * p + (c - '0');
1769 }
1770
1771 return p;
1772 }
1773