procfs_vfsops.c revision 1.115 1 /* $NetBSD: procfs_vfsops.c,v 1.115 2024/05/12 17:22:29 christos Exp $ */
2
3 /*
4 * Copyright (c) 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)procfs_vfsops.c 8.7 (Berkeley) 5/10/95
35 */
36
37 /*
38 * Copyright (c) 1993 Jan-Simon Pendry
39 *
40 * This code is derived from software contributed to Berkeley by
41 * Jan-Simon Pendry.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed by the University of
54 * California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 * may be used to endorse or promote products derived from this software
57 * without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 * @(#)procfs_vfsops.c 8.7 (Berkeley) 5/10/95
72 */
73
74 /*
75 * procfs VFS interface
76 */
77
78 #include <sys/cdefs.h>
79 __KERNEL_RCSID(0, "$NetBSD: procfs_vfsops.c,v 1.115 2024/05/12 17:22:29 christos Exp $");
80
81 #if defined(_KERNEL_OPT)
82 #include "opt_compat_netbsd.h"
83 #endif
84
85 #include <sys/param.h>
86 #include <sys/atomic.h>
87 #include <sys/buf.h>
88 #include <sys/dirent.h>
89 #include <sys/file.h>
90 #include <sys/filedesc.h>
91 #include <sys/fstrans.h>
92 #include <sys/kauth.h>
93 #include <sys/kernel.h>
94 #include <sys/module.h>
95 #include <sys/mount.h>
96 #include <sys/proc.h>
97 #include <sys/signalvar.h>
98 #include <sys/sysctl.h>
99 #include <sys/syslog.h>
100 #include <sys/systm.h>
101 #include <sys/time.h>
102 #include <sys/vnode.h>
103
104 #include <miscfs/genfs/genfs.h>
105
106 #include <miscfs/procfs/procfs.h>
107
108 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */
109
110 MODULE(MODULE_CLASS_VFS, procfs, "ptrace_common");
111
112 VFS_PROTOS(procfs);
113
114 #define PROCFS_HASHSIZE 256
115 #define PROCFS_EXEC_HOOK ((void *)1)
116 #define PROCFS_EXIT_HOOK ((void *)2)
117
118 static kauth_listener_t procfs_listener;
119 static void *procfs_exechook;
120 static void *procfs_exithook;
121 LIST_HEAD(hashhead, pfsnode);
122 static u_long procfs_hashmask;
123 static struct hashhead *procfs_hashtab;
124 static kmutex_t procfs_hashlock;
125
126 static struct hashhead *
127 procfs_hashhead(pid_t pid)
128 {
129
130 return &procfs_hashtab[pid & procfs_hashmask];
131 }
132
133 void
134 procfs_hashrem(struct pfsnode *pfs)
135 {
136
137 mutex_enter(&procfs_hashlock);
138 LIST_REMOVE(pfs, pfs_hash);
139 mutex_exit(&procfs_hashlock);
140 }
141
142 /*
143 * VFS Operations.
144 *
145 * mount system call
146 */
147 /* ARGSUSED */
148 int
149 procfs_mount(
150 struct mount *mp,
151 const char *path,
152 void *data,
153 size_t *data_len)
154 {
155 struct lwp *l = curlwp;
156 struct procfsmount *pmnt;
157 struct procfs_args *args = data;
158 int error;
159
160 if (args == NULL)
161 return EINVAL;
162
163 if (UIO_MX & (UIO_MX-1)) {
164 log(LOG_ERR, "procfs: invalid directory entry size");
165 return (EINVAL);
166 }
167
168 if (mp->mnt_flag & MNT_GETARGS) {
169 if (*data_len < sizeof *args)
170 return EINVAL;
171
172 pmnt = VFSTOPROC(mp);
173 if (pmnt == NULL)
174 return EIO;
175 args->version = PROCFS_ARGSVERSION;
176 args->flags = pmnt->pmnt_flags;
177 *data_len = sizeof *args;
178 return 0;
179 }
180
181 if (mp->mnt_flag & MNT_UPDATE)
182 return (EOPNOTSUPP);
183
184 if (*data_len >= sizeof *args && args->version != PROCFS_ARGSVERSION)
185 return EINVAL;
186
187 pmnt = kmem_zalloc(sizeof(struct procfsmount), KM_SLEEP);
188
189 mp->mnt_stat.f_namemax = PROCFS_MAXNAMLEN;
190 mp->mnt_flag |= MNT_LOCAL;
191 mp->mnt_data = pmnt;
192 vfs_getnewfsid(mp);
193
194 error = set_statvfs_info(path, UIO_USERSPACE, "procfs", UIO_SYSSPACE,
195 mp->mnt_op->vfs_name, mp, l);
196 if (*data_len >= sizeof *args)
197 pmnt->pmnt_flags = args->flags;
198 else
199 pmnt->pmnt_flags = 0;
200
201 mp->mnt_iflag |= IMNT_MPSAFE | IMNT_SHRLOOKUP;
202 return error;
203 }
204
205 /*
206 * unmount system call
207 */
208 int
209 procfs_unmount(struct mount *mp, int mntflags)
210 {
211 int error;
212 int flags = 0;
213
214 if (mntflags & MNT_FORCE)
215 flags |= FORCECLOSE;
216
217 if ((error = vflush(mp, 0, flags)) != 0)
218 return (error);
219
220 kmem_free(mp->mnt_data, sizeof(struct procfsmount));
221 mp->mnt_data = NULL;
222
223 return 0;
224 }
225
226 int
227 procfs_root(struct mount *mp, int lktype, struct vnode **vpp)
228 {
229 int error;
230
231 error = procfs_allocvp(mp, vpp, 0, PFSroot, -1);
232 if (error == 0) {
233 error = vn_lock(*vpp, lktype);
234 if (error != 0) {
235 vrele(*vpp);
236 *vpp = NULL;
237 }
238 }
239
240 return error;
241 }
242
243 /* ARGSUSED */
244 int
245 procfs_start(struct mount *mp, int flags)
246 {
247
248 return (0);
249 }
250
251 /*
252 * Get file system statistics.
253 */
254 int
255 procfs_statvfs(struct mount *mp, struct statvfs *sbp)
256 {
257
258 genfs_statvfs(mp, sbp);
259
260 sbp->f_bsize = PAGE_SIZE;
261 sbp->f_frsize = PAGE_SIZE;
262 sbp->f_iosize = PAGE_SIZE;
263 sbp->f_blocks = 1;
264 sbp->f_files = maxproc; /* approx */
265 sbp->f_ffree = maxproc - atomic_load_relaxed(&nprocs); /* approx */
266 sbp->f_favail = maxproc - atomic_load_relaxed(&nprocs); /* approx */
267
268 return (0);
269 }
270
271 /*ARGSUSED*/
272 int
273 procfs_sync(
274 struct mount *mp,
275 int waitfor,
276 kauth_cred_t uc)
277 {
278
279 return (0);
280 }
281
282 /*ARGSUSED*/
283 int
284 procfs_vget(struct mount *mp, ino_t ino, int lktype,
285 struct vnode **vpp)
286 {
287 return (EOPNOTSUPP);
288 }
289
290 int
291 procfs_loadvnode(struct mount *mp, struct vnode *vp,
292 const void *key, size_t key_len, const void **new_key)
293 {
294 int error;
295 struct pfskey pfskey;
296 struct pfsnode *pfs;
297
298 KASSERT(key_len == sizeof(pfskey));
299 memcpy(&pfskey, key, key_len);
300
301 pfs = kmem_alloc(sizeof(*pfs), KM_SLEEP);
302 pfs->pfs_pid = pfskey.pk_pid;
303 pfs->pfs_type = pfskey.pk_type;
304 pfs->pfs_fd = pfskey.pk_fd;
305 pfs->pfs_vnode = vp;
306 pfs->pfs_mount = mp;
307 pfs->pfs_flags = 0;
308 pfs->pfs_fileno =
309 PROCFS_FILENO(pfs->pfs_pid, pfs->pfs_type, pfs->pfs_fd);
310 vp->v_tag = VT_PROCFS;
311 vp->v_op = procfs_vnodeop_p;
312 vp->v_data = pfs;
313
314 switch (pfs->pfs_type) {
315 case PFSroot: /* /proc = dr-xr-xr-x */
316 vp->v_vflag |= VV_ROOT;
317 /*FALLTHROUGH*/
318 case PFSproc: /* /proc/N = dr-xr-xr-x */
319 pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
320 vp->v_type = VDIR;
321 break;
322
323 case PFStask: /* /proc/N/task = dr-xr-xr-x */
324 if (pfs->pfs_fd == -1) {
325 pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|
326 S_IROTH|S_IXOTH;
327 vp->v_type = VDIR;
328 break;
329 }
330 /*FALLTHROUGH*/
331 case PFScurproc: /* /proc/curproc = lr-xr-xr-x */
332 case PFSself: /* /proc/self = lr-xr-xr-x */
333 case PFScwd: /* /proc/N/cwd = lr-xr-xr-x */
334 case PFSchroot: /* /proc/N/chroot = lr-xr-xr-x */
335 case PFSexe: /* /proc/N/exe = lr-xr-xr-x */
336 pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
337 vp->v_type = VLNK;
338 break;
339
340 case PFSfd:
341 if (pfs->pfs_fd == -1) { /* /proc/N/fd = dr-x------ */
342 pfs->pfs_mode = S_IRUSR|S_IXUSR;
343 vp->v_type = VDIR;
344 } else { /* /proc/N/fd/M = [ps-]rw------- */
345 file_t *fp;
346 vnode_t *vxp;
347 struct proc *p;
348
349 mutex_enter(&proc_lock);
350 p = procfs_proc_find(mp, pfs->pfs_pid);
351 mutex_exit(&proc_lock);
352 if (p == NULL) {
353 error = ENOENT;
354 goto bad;
355 }
356 KASSERT(rw_read_held(&p->p_reflock));
357 if ((fp = fd_getfile2(p, pfs->pfs_fd)) == NULL) {
358 error = EBADF;
359 goto bad;
360 }
361
362 pfs->pfs_mode = S_IRUSR|S_IWUSR;
363 switch (fp->f_type) {
364 case DTYPE_VNODE:
365 vxp = fp->f_vnode;
366
367 /*
368 * We make symlinks for directories
369 * to avoid cycles.
370 */
371 if (vxp->v_type == VDIR ||
372 procfs_proc_is_linux_compat())
373 goto symlink;
374 vp->v_type = vxp->v_type;
375 break;
376 case DTYPE_PIPE:
377 vp->v_type = VFIFO;
378 break;
379 case DTYPE_SOCKET:
380 vp->v_type = VSOCK;
381 break;
382 case DTYPE_KQUEUE:
383 case DTYPE_MISC:
384 case DTYPE_SEM:
385 symlink:
386 pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|
387 S_IXGRP|S_IROTH|S_IXOTH;
388 vp->v_type = VLNK;
389 break;
390 default:
391 error = EOPNOTSUPP;
392 closef(fp);
393 goto bad;
394 }
395 closef(fp);
396 }
397 break;
398
399 case PFSfile: /* /proc/N/file = -rw------- */
400 case PFSmem: /* /proc/N/mem = -rw------- */
401 case PFSregs: /* /proc/N/regs = -rw------- */
402 case PFSfpregs: /* /proc/N/fpregs = -rw------- */
403 pfs->pfs_mode = S_IRUSR|S_IWUSR;
404 vp->v_type = VREG;
405 break;
406
407 case PFSnote: /* /proc/N/note = --w------ */
408 case PFSnotepg: /* /proc/N/notepg = --w------ */
409 pfs->pfs_mode = S_IWUSR;
410 vp->v_type = VREG;
411 break;
412
413 case PFSmap: /* /proc/N/map = -r-------- */
414 case PFSmaps: /* /proc/N/maps = -r-------- */
415 case PFSauxv: /* /proc/N/auxv = -r-------- */
416 case PFSenviron: /* /proc/N/environ = -r-------- */
417 pfs->pfs_mode = S_IRUSR;
418 vp->v_type = VREG;
419 break;
420
421 case PFSstatus: /* /proc/N/status = -r--r--r-- */
422 case PFSstat: /* /proc/N/stat = -r--r--r-- */
423 case PFScmdline: /* /proc/N/cmdline = -r--r--r-- */
424 case PFSemul: /* /proc/N/emul = -r--r--r-- */
425 case PFSmeminfo: /* /proc/meminfo = -r--r--r-- */
426 case PFScpustat: /* /proc/stat = -r--r--r-- */
427 case PFSdevices: /* /proc/devices = -r--r--r-- */
428 case PFScpuinfo: /* /proc/cpuinfo = -r--r--r-- */
429 case PFSuptime: /* /proc/uptime = -r--r--r-- */
430 case PFSmounts: /* /proc/mounts = -r--r--r-- */
431 case PFSloadavg: /* /proc/loadavg = -r--r--r-- */
432 case PFSstatm: /* /proc/N/statm = -r--r--r-- */
433 case PFSversion: /* /proc/version = -r--r--r-- */
434 case PFSlimit: /* /proc/N/limit = -r--r--r-- */
435 case PFSlimits: /* /proc/N/limits = -r--r--r-- */
436 pfs->pfs_mode = S_IRUSR|S_IRGRP|S_IROTH;
437 vp->v_type = VREG;
438 break;
439
440 #ifdef __HAVE_PROCFS_MACHDEP
441 PROCFS_MACHDEP_NODETYPE_CASES
442 procfs_machdep_allocvp(vp);
443 break;
444 #endif
445
446 default:
447 panic("procfs_allocvp");
448 }
449
450 mutex_enter(&procfs_hashlock);
451 LIST_INSERT_HEAD(procfs_hashhead(pfs->pfs_pid), pfs, pfs_hash);
452 mutex_exit(&procfs_hashlock);
453
454 uvm_vnp_setsize(vp, 0);
455 *new_key = &pfs->pfs_key;
456
457 return 0;
458
459 bad:
460 vp->v_tag =VT_NON;
461 vp->v_type = VNON;
462 vp->v_op = NULL;
463 vp->v_data = NULL;
464 kmem_free(pfs, sizeof(*pfs));
465 return error;
466 }
467
468 void
469 procfs_init(void)
470 {
471
472 }
473
474 void
475 procfs_reinit(void)
476 {
477
478 }
479
480 void
481 procfs_done(void)
482 {
483
484 }
485
486 extern const struct vnodeopv_desc procfs_vnodeop_opv_desc;
487
488 const struct vnodeopv_desc * const procfs_vnodeopv_descs[] = {
489 &procfs_vnodeop_opv_desc,
490 NULL,
491 };
492
493 struct vfsops procfs_vfsops = {
494 .vfs_name = MOUNT_PROCFS,
495 .vfs_min_mount_data = sizeof (struct procfs_args),
496 .vfs_mount = procfs_mount,
497 .vfs_start = procfs_start,
498 .vfs_unmount = procfs_unmount,
499 .vfs_root = procfs_root,
500 .vfs_quotactl = (void *)eopnotsupp,
501 .vfs_statvfs = procfs_statvfs,
502 .vfs_sync = procfs_sync,
503 .vfs_vget = procfs_vget,
504 .vfs_loadvnode = procfs_loadvnode,
505 .vfs_fhtovp = (void *)eopnotsupp,
506 .vfs_vptofh = (void *)eopnotsupp,
507 .vfs_init = procfs_init,
508 .vfs_reinit = procfs_reinit,
509 .vfs_done = procfs_done,
510 .vfs_snapshot = (void *)eopnotsupp,
511 .vfs_extattrctl = vfs_stdextattrctl,
512 .vfs_suspendctl = genfs_suspendctl,
513 .vfs_renamelock_enter = genfs_renamelock_enter,
514 .vfs_renamelock_exit = genfs_renamelock_exit,
515 .vfs_fsync = (void *)eopnotsupp,
516 .vfs_opv_descs = procfs_vnodeopv_descs
517 };
518
519 static void
520 procfs_exechook_cb(struct proc *p, void *arg)
521 {
522 struct hashhead *head;
523 struct pfsnode *pfs;
524 struct mount *mp;
525 struct pfskey key;
526 struct vnode *vp;
527 int error;
528
529 if (arg == PROCFS_EXEC_HOOK && !(p->p_flag & PK_SUGID))
530 return;
531
532 head = procfs_hashhead(p->p_pid);
533
534 again:
535 mutex_enter(&procfs_hashlock);
536 LIST_FOREACH(pfs, head, pfs_hash) {
537 if (pfs->pfs_pid != p->p_pid)
538 continue;
539 mp = pfs->pfs_mount;
540 key = pfs->pfs_key;
541 vfs_ref(mp);
542 mutex_exit(&procfs_hashlock);
543
544 error = vcache_get(mp, &key, sizeof(key), &vp);
545 vfs_rele(mp);
546 if (error != 0)
547 goto again;
548 if (vrecycle(vp))
549 goto again;
550 do {
551 error = vfs_suspend(mp, 0);
552 } while (error == EINTR || error == ERESTART);
553 vgone(vp);
554 if (error == 0)
555 vfs_resume(mp);
556 goto again;
557 }
558 mutex_exit(&procfs_hashlock);
559 }
560
561 static int
562 procfs_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
563 void *arg0, void *arg1, void *arg2, void *arg3)
564 {
565 struct proc *p;
566 struct pfsnode *pfs;
567 int result;
568
569 result = KAUTH_RESULT_DEFER;
570 p = arg0;
571 pfs = arg1;
572
573 if (action != KAUTH_PROCESS_PROCFS)
574 return result;
575
576 switch (pfs->pfs_type) {
577 case PFSregs:
578 case PFSfpregs:
579 case PFSmem:
580 if (kauth_cred_getuid(cred) != kauth_cred_getuid(p->p_cred) ||
581 ISSET(p->p_flag, PK_SUGID))
582 break;
583
584 /*FALLTHROUGH*/
585 default:
586 result = KAUTH_RESULT_ALLOW;
587 break;
588 }
589
590 return result;
591 }
592
593 SYSCTL_SETUP(procfs_sysctl_setup, "procfs sysctl")
594 {
595
596 sysctl_createv(clog, 0, NULL, NULL,
597 CTLFLAG_PERMANENT,
598 CTLTYPE_NODE, "procfs",
599 SYSCTL_DESCR("Process file system"),
600 NULL, 0, NULL, 0,
601 CTL_VFS, 12, CTL_EOL);
602 /*
603 * XXX the "12" above could be dynamic, thereby eliminating
604 * one more instance of the "number to vfs" mapping problem,
605 * but "12" is the order as taken from sys/mount.h
606 */
607 }
608
609 static int
610 procfs_modcmd(modcmd_t cmd, void *arg)
611 {
612 int error;
613
614 switch (cmd) {
615 case MODULE_CMD_INIT:
616 error = vfs_attach(&procfs_vfsops);
617 if (error != 0)
618 break;
619
620 procfs_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
621 procfs_listener_cb, NULL);
622
623 procfs_exechook = exechook_establish(procfs_exechook_cb,
624 PROCFS_EXEC_HOOK);
625 procfs_exithook = exithook_establish(procfs_exechook_cb,
626 PROCFS_EXIT_HOOK);
627
628 mutex_init(&procfs_hashlock, MUTEX_DEFAULT, IPL_NONE);
629 procfs_hashtab = hashinit(PROCFS_HASHSIZE, HASH_LIST, true,
630 &procfs_hashmask);
631
632 break;
633 case MODULE_CMD_FINI:
634 error = vfs_detach(&procfs_vfsops);
635 if (error != 0)
636 break;
637 kauth_unlisten_scope(procfs_listener);
638 exechook_disestablish(procfs_exechook);
639 exithook_disestablish(procfs_exithook);
640 mutex_destroy(&procfs_hashlock);
641 hashdone(procfs_hashtab, HASH_LIST, procfs_hashmask);
642 break;
643 default:
644 error = ENOTTY;
645 break;
646 }
647
648 return (error);
649 }
650