Home | History | Annotate | Line # | Download | only in procfs
      1 /*      $NetBSD: procfs_linux.c,v 1.90 2024/09/14 01:37:42 pgoyette Exp $      */
      2 
      3 /*
      4  * Copyright (c) 2001 Wasabi Systems, Inc.
      5  * All rights reserved.
      6  *
      7  * Written by Frank van der Linden for Wasabi Systems, Inc.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *      This product includes software developed for the NetBSD Project by
     20  *      Wasabi Systems, Inc.
     21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
     22  *    or promote products derived from this software without specific prior
     23  *    written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
     26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 #include <sys/cdefs.h>
     39 __KERNEL_RCSID(0, "$NetBSD: procfs_linux.c,v 1.90 2024/09/14 01:37:42 pgoyette Exp $");
     40 
     41 #if defined(_KERNEL_OPT)
     42 #include "opt_sysv.h"
     43 #include "opt_mqueue.h"
     44 #endif
     45 
     46 #include <sys/param.h>
     47 #include <sys/systm.h>
     48 #include <sys/atomic.h>
     49 #include <sys/time.h>
     50 #include <sys/cpu.h>
     51 #include <sys/kernel.h>
     52 #include <sys/proc.h>
     53 #include <sys/vnode.h>
     54 #include <sys/exec.h>
     55 #include <sys/resource.h>
     56 #include <sys/resourcevar.h>
     57 #include <sys/signal.h>
     58 #include <sys/signalvar.h>
     59 #include <sys/tty.h>
     60 #include <sys/malloc.h>
     61 #include <sys/mount.h>
     62 #include <sys/conf.h>
     63 #include <sys/sysctl.h>
     64 #include <sys/kauth.h>
     65 #include <sys/filedesc.h>
     66 #ifdef SYSVMSG
     67 #include <sys/msg.h>
     68 #endif
     69 #ifdef SYSVSEM
     70 #include <sys/sem.h>
     71 #endif
     72 #ifdef SYSVSHM
     73 #include <sys/shm.h>
     74 #endif
     75 #ifdef MQUEUE
     76 #include <sys/mqueue.h>
     77 #endif
     78 
     79 #include <miscfs/procfs/procfs.h>
     80 
     81 #include <compat/linux/common/linux_exec.h>
     82 #include <compat/linux32/common/linux32_sysctl.h>
     83 
     84 #include <uvm/uvm.h>
     85 #include <uvm/uvm_extern.h>
     86 
     87 extern struct devsw_conv *devsw_conv;
     88 extern int max_devsw_convs;
     89 #ifdef MQUEUE
     90 extern u_int mq_open_max;
     91 extern u_int mq_max_msgsize;
     92 extern u_int mq_def_maxmsg;
     93 extern u_int mq_max_maxmsg;
     94 #endif
     95 
     96 
     97 #define PGTOB(p)	((unsigned long)(p) << PAGE_SHIFT)
     98 #define PGTOKB(p)	((unsigned long)(p) << (PAGE_SHIFT - 10))
     99 
    100 #define LBFSZ (8 * 1024)
    101 
    102 static void
    103 get_proc_size_info(struct proc *p, struct vm_map *map, unsigned long *stext,
    104     unsigned long *etext, unsigned long *sstack)
    105 {
    106 	struct vm_map_entry *entry;
    107 
    108 	*stext = 0;
    109 	*etext = 0;
    110 	*sstack = 0;
    111 
    112 	vm_map_lock_read(map);
    113 
    114 	for (entry = map->header.next; entry != &map->header;
    115 	    entry = entry->next) {
    116 		if (UVM_ET_ISSUBMAP(entry))
    117 			continue;
    118 		/* assume text is the first entry */
    119 		if (*stext == *etext) {
    120 			*stext = entry->start;
    121 			*etext = entry->end;
    122 			break;
    123 		}
    124 	}
    125 #if defined(LINUX_USRSTACK32) && defined(USRSTACK32)
    126 	if (strcmp(p->p_emul->e_name, "linux32") == 0 &&
    127 	    LINUX_USRSTACK32 < USRSTACK32)
    128 		*sstack = (unsigned long)LINUX_USRSTACK32;
    129 	else
    130 #endif
    131 #ifdef LINUX_USRSTACK
    132 	if (strcmp(p->p_emul->e_name, "linux") == 0 &&
    133 	    LINUX_USRSTACK < USRSTACK)
    134 		*sstack = (unsigned long)LINUX_USRSTACK;
    135 	else
    136 #endif
    137 #ifdef	USRSTACK32
    138 	if (strstr(p->p_emul->e_name, "32") != NULL)
    139 		*sstack = (unsigned long)USRSTACK32;
    140 	else
    141 #endif
    142 		*sstack = (unsigned long)USRSTACK;
    143 
    144 	/*
    145 	 * jdk 1.6 compares low <= addr && addr < high
    146 	 * if we put addr == high, then the test fails
    147 	 * so eat one page.
    148 	 */
    149 	*sstack -= PAGE_SIZE;
    150 
    151 	vm_map_unlock_read(map);
    152 }
    153 
    154 /*
    155  * Linux compatible /proc/meminfo. Only active when the -o linux
    156  * mountflag is used.
    157  */
    158 int
    159 procfs_domeminfo(struct lwp *curl, struct proc *p,
    160     struct pfsnode *pfs, struct uio *uio)
    161 {
    162 	char *bf;
    163 	int len;
    164 	int error = 0;
    165 	long filepg, anonpg, execpg, freepg;
    166 
    167 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
    168 
    169 	/* uvm_availmem() will sync the counters if needed. */
    170 	freepg = (long)uvm_availmem(true);
    171 	filepg = (long)(cpu_count_get(CPU_COUNT_FILECLEAN) +
    172 	    cpu_count_get(CPU_COUNT_FILEDIRTY) +
    173 	    cpu_count_get(CPU_COUNT_FILEUNKNOWN) -
    174 	    cpu_count_get(CPU_COUNT_EXECPAGES));
    175 	anonpg = (long)(cpu_count_get(CPU_COUNT_ANONCLEAN) +
    176 	    cpu_count_get(CPU_COUNT_ANONDIRTY) +
    177 	    cpu_count_get(CPU_COUNT_ANONUNKNOWN));
    178 	execpg = (long)cpu_count_get(CPU_COUNT_EXECPAGES);
    179 
    180 	len = snprintf(bf, LBFSZ,
    181 		"        total:    used:    free:  shared: buffers: cached:\n"
    182 		"Mem:  %8lu %8lu %8lu %8lu %8lu %8lu\n"
    183 		"Swap: %8lu %8lu %8lu\n"
    184 		"MemTotal:  %8lu kB\n"
    185 		"MemFree:   %8lu kB\n"
    186 		"MemShared: %8lu kB\n"
    187 		"Buffers:   %8lu kB\n"
    188 		"Cached:    %8lu kB\n"
    189 		"SwapTotal: %8lu kB\n"
    190 		"SwapFree:  %8lu kB\n",
    191 		PGTOB(uvmexp.npages),
    192 		PGTOB(uvmexp.npages - freepg),
    193 		PGTOB(freepg),
    194 		0L,
    195 		PGTOB(filepg),
    196 		PGTOB(anonpg + filepg + execpg),
    197 		PGTOB(uvmexp.swpages),
    198 		PGTOB(uvmexp.swpginuse),
    199 		PGTOB(uvmexp.swpages - uvmexp.swpginuse),
    200 		PGTOKB(uvmexp.npages),
    201 		PGTOKB(freepg),
    202 		0L,
    203 		PGTOKB(freepg),
    204 		PGTOKB(anonpg + filepg + execpg),
    205 		PGTOKB(uvmexp.swpages),
    206 		PGTOKB(uvmexp.swpages - uvmexp.swpginuse));
    207 
    208 	if (len == 0)
    209 		goto out;
    210 
    211 	error = uiomove_frombuf(bf, len, uio);
    212 out:
    213 	free(bf, M_TEMP);
    214 	return error;
    215 }
    216 
    217 /*
    218  * Linux compatible /proc/devices. Only active when the -o linux
    219  * mountflag is used.
    220  */
    221 int
    222 procfs_dodevices(struct lwp *curl, struct proc *p,
    223     struct pfsnode *pfs, struct uio *uio)
    224 {
    225 	char *bf;
    226 	int offset = 0;
    227 	int i, error = ENAMETOOLONG;
    228 
    229 	/* XXX elad - may need filtering. */
    230 
    231 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
    232 
    233 	offset += snprintf(&bf[offset], LBFSZ - offset, "Character devices:\n");
    234 	if (offset >= LBFSZ)
    235 		goto out;
    236 
    237 	mutex_enter(&device_lock);
    238 	for (i = 0; i < max_devsw_convs; i++) {
    239 		if ((devsw_conv[i].d_name == NULL) ||
    240 		    (devsw_conv[i].d_cmajor == -1))
    241 			continue;
    242 
    243 		offset += snprintf(&bf[offset], LBFSZ - offset,
    244 		    "%3d %s\n", devsw_conv[i].d_cmajor, devsw_conv[i].d_name);
    245 		if (offset >= LBFSZ) {
    246 			mutex_exit(&device_lock);
    247 			goto out;
    248 		}
    249 	}
    250 
    251 	offset += snprintf(&bf[offset], LBFSZ - offset, "\nBlock devices:\n");
    252 	if (offset >= LBFSZ) {
    253 		mutex_exit(&device_lock);
    254 		goto out;
    255 	}
    256 
    257 	for (i = 0; i < max_devsw_convs; i++) {
    258 		if ((devsw_conv[i].d_name == NULL) ||
    259 		    (devsw_conv[i].d_bmajor == -1))
    260 			continue;
    261 
    262 		offset += snprintf(&bf[offset], LBFSZ - offset,
    263 		    "%3d %s\n", devsw_conv[i].d_bmajor, devsw_conv[i].d_name);
    264 		if (offset >= LBFSZ) {
    265 			mutex_exit(&device_lock);
    266 			goto out;
    267 		}
    268 	}
    269 	mutex_exit(&device_lock);
    270 
    271 	error = uiomove_frombuf(bf, offset, uio);
    272 out:
    273 	free(bf, M_TEMP);
    274 	return error;
    275 }
    276 
    277 /*
    278  * Linux compatible /proc/stat. Only active when the -o linux
    279  * mountflag is used.
    280  */
    281 int
    282 procfs_docpustat(struct lwp *curl, struct proc *p,
    283     struct pfsnode *pfs, struct uio *uio)
    284 {
    285 	char		*bf;
    286 	int	 	 error;
    287 	int	 	 len;
    288 #if defined(MULTIPROCESSOR)
    289         struct cpu_info *ci;
    290         CPU_INFO_ITERATOR cii;
    291 #endif
    292 	int	 	 i;
    293 
    294 	error = ENAMETOOLONG;
    295 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
    296 
    297 	len = snprintf(bf, LBFSZ,
    298 		"cpu %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
    299 		curcpu()->ci_schedstate.spc_cp_time[CP_USER],
    300 		curcpu()->ci_schedstate.spc_cp_time[CP_NICE],
    301 		curcpu()->ci_schedstate.spc_cp_time[CP_SYS] /*+ [CP_INTR]*/,
    302 		curcpu()->ci_schedstate.spc_cp_time[CP_IDLE]);
    303 	if (len == 0)
    304 		goto out;
    305 
    306 #if defined(MULTIPROCESSOR)
    307 #define ALLCPUS	CPU_INFO_FOREACH(cii, ci)
    308 #define CPUNAME	ci
    309 #else
    310 #define ALLCPUS	; i < 1 ;
    311 #define CPUNAME	curcpu()
    312 #endif
    313 
    314 	i = 0;
    315 	for (ALLCPUS) {
    316 		len += snprintf(&bf[len], LBFSZ - len,
    317 			"cpu%d %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64
    318 			"\n", i,
    319 			CPUNAME->ci_schedstate.spc_cp_time[CP_USER],
    320 			CPUNAME->ci_schedstate.spc_cp_time[CP_NICE],
    321 			CPUNAME->ci_schedstate.spc_cp_time[CP_SYS],
    322 			CPUNAME->ci_schedstate.spc_cp_time[CP_IDLE]);
    323 		if (len >= LBFSZ)
    324 			goto out;
    325 		i += 1;
    326 	}
    327 
    328 	cpu_count_sync(true);
    329 
    330 	struct timeval btv;
    331 	getmicroboottime(&btv);
    332 
    333 	len += snprintf(&bf[len], LBFSZ - len,
    334 			"disk 0 0 0 0\n"
    335 			"page %u %u\n"
    336 			"swap %u %u\n"
    337 			"intr %"PRId64"\n"
    338 			"ctxt %"PRId64"\n"
    339 			"btime %"PRId64"\n",
    340 			uvmexp.pageins, uvmexp.pdpageouts,
    341 			uvmexp.pgswapin, uvmexp.pgswapout,
    342 			cpu_count_get(CPU_COUNT_NINTR),
    343 			cpu_count_get(CPU_COUNT_NSWTCH),
    344 			btv.tv_sec);
    345 	if (len >= LBFSZ)
    346 		goto out;
    347 
    348 	error = uiomove_frombuf(bf, len, uio);
    349 out:
    350 	free(bf, M_TEMP);
    351 	return error;
    352 }
    353 
    354 /*
    355  * Linux compatible /proc/loadavg. Only active when the -o linux
    356  * mountflag is used.
    357  */
    358 int
    359 procfs_doloadavg(struct lwp *curl, struct proc *p,
    360     struct pfsnode *pfs, struct uio *uio)
    361 {
    362 	char	*bf;
    363 	int 	 error;
    364 	int 	 len;
    365 
    366 	error = ENAMETOOLONG;
    367 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
    368 
    369 	averunnable.fscale = FSCALE;
    370 	len = snprintf(bf, LBFSZ,
    371 	        "%d.%02d %d.%02d %d.%02d %d/%d %d\n",
    372 		(int)(averunnable.ldavg[0] / averunnable.fscale),
    373 		(int)(averunnable.ldavg[0] * 100 / averunnable.fscale % 100),
    374 		(int)(averunnable.ldavg[1] / averunnable.fscale),
    375 		(int)(averunnable.ldavg[1] * 100 / averunnable.fscale % 100),
    376 		(int)(averunnable.ldavg[2] / averunnable.fscale),
    377 		(int)(averunnable.ldavg[2] * 100 / averunnable.fscale % 100),
    378 		1,		/* number of ONPROC processes */
    379 		atomic_load_relaxed(&nprocs),
    380 		30000);		/* last pid */
    381 	if (len == 0)
    382 		goto out;
    383 
    384 	error = uiomove_frombuf(bf, len, uio);
    385 out:
    386 	free(bf, M_TEMP);
    387 	return error;
    388 }
    389 
    390 /*
    391  * Linux compatible /proc/<pid>/statm. Only active when the -o linux
    392  * mountflag is used.
    393  */
    394 int
    395 procfs_do_pid_statm(struct lwp *curl, struct lwp *l,
    396     struct pfsnode *pfs, struct uio *uio)
    397 {
    398 	struct vmspace	*vm;
    399 	struct proc	*p = l->l_proc;
    400 	char		*bf;
    401 	int	 	 error;
    402 	int	 	 len;
    403 	struct kinfo_proc2 ki;
    404 
    405 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
    406 
    407 	/* XXX - we use values from vmspace, since dsl says that ru figures
    408 	   are always 0 except for zombies. See kvm_proc.c::kvm_getproc2() */
    409 	if ((error = proc_vmspace_getref(p, &vm)) != 0) {
    410 		goto out;
    411 	}
    412 
    413 	mutex_enter(&proc_lock);
    414 	mutex_enter(p->p_lock);
    415 
    416 	/* retrieve RSS size */
    417 	memset(&ki, 0, sizeof(ki));
    418 	fill_kproc2(p, &ki, false, false);
    419 
    420 	mutex_exit(p->p_lock);
    421 	mutex_exit(&proc_lock);
    422 
    423 	uvmspace_free(vm);
    424 
    425 	len = snprintf(bf, LBFSZ,
    426 	        "%lu %lu %lu %lu %lu %lu %lu\n",
    427 		(unsigned long)(ki.p_vm_msize),	/* size */
    428 		(unsigned long)(ki.p_vm_rssize),/* resident */
    429 		(unsigned long)(ki.p_uru_ixrss),/* shared */
    430 		(unsigned long)(ki.p_vm_tsize),	/* text */
    431 		(unsigned long) 0,		/* library (unused) */
    432 		(unsigned long)(ki.p_vm_dsize + ki.p_vm_ssize),	/* data+stack */
    433 		(unsigned long) 0);		/* dirty */
    434 
    435 	if (len == 0)
    436 		goto out;
    437 
    438 	error = uiomove_frombuf(bf, len, uio);
    439 out:
    440 	free(bf, M_TEMP);
    441 	return error;
    442 }
    443 
    444 #define UTIME2TICKS(s,u)	(((uint64_t)(s) * 1000000 + (u)) / 10000)
    445 
    446 /*
    447  * Linux compatible /proc/<pid>/stat. Only active when the -o linux
    448  * mountflag is used.
    449  */
    450 int
    451 procfs_do_pid_stat(struct lwp *curl, struct lwp *l,
    452     struct pfsnode *pfs, struct uio *uio)
    453 {
    454 	char *bf;
    455 	struct proc *p = l->l_proc;
    456 	int len;
    457 	struct rusage *cru = &p->p_stats->p_cru;
    458 	unsigned long stext = 0, etext = 0, sstack = 0;
    459 	struct timeval rt;
    460 	struct vmspace	*vm;
    461 	struct kinfo_proc2 ki;
    462 	int error;
    463 
    464 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
    465 
    466 	if ((error = proc_vmspace_getref(p, &vm)) != 0) {
    467 		goto out;
    468 	}
    469 
    470 	get_proc_size_info(p, &vm->vm_map, &stext, &etext, &sstack);
    471 
    472 	mutex_enter(&proc_lock);
    473 	mutex_enter(p->p_lock);
    474 
    475 	memset(&ki, 0, sizeof(ki));
    476 	fill_kproc2(p, &ki, false, false);
    477 	calcru(p, NULL, NULL, NULL, &rt);
    478 
    479 	len = snprintf(bf, LBFSZ,
    480 	    "%d (%s) %c %d %d %d %u %d "
    481 	    "%u "
    482 	    "%"PRIu64" %lu %"PRIu64" %lu %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" "
    483 	    "%d %d %"PRIu64" "
    484 	    "%lld %"PRIu64" %"PRId64" %lu %"PRIu64" "
    485 	    "%lu %lu %lu "
    486 	    "%u %u "
    487 	    "%u %u %u %u "
    488 	    "%"PRIu64" %"PRIu64" %"PRIu64" %d %"PRIu64"\n",
    489 
    490 	    ki.p_pid,						/* 1 pid */
    491 	    ki.p_comm,						/* 2 tcomm */
    492 	    "0RRSTZXR8"[(ki.p_stat > 8) ? 0 : (int)ki.p_stat],	/* 3 state */
    493 	    ki.p_ppid,						/* 4 ppid */
    494 	    ki.p__pgid,						/* 5 pgrp */
    495 	    ki.p_sid,						/* 6 sid */
    496 	    (ki.p_tdev != (uint32_t)NODEV) ? ki.p_tdev : 0,	/* 7 tty_nr */
    497 	    ki.p_tpgid,						/* 8 tty_pgrp */
    498 
    499 	    ki.p_flag,						/* 9 flags */
    500 
    501 	    ki.p_uru_minflt,					/* 10 min_flt */
    502 	    cru->ru_minflt,
    503 	    ki.p_uru_majflt,					/* 12 maj_flt */
    504 	    cru->ru_majflt,
    505 	    UTIME2TICKS(ki.p_uutime_sec, ki.p_uutime_usec),	/* 14 utime */
    506 	    UTIME2TICKS(ki.p_ustime_sec, ki.p_ustime_usec),	/* 15 stime */
    507 	    UTIME2TICKS(cru->ru_utime.tv_sec, cru->ru_utime.tv_usec), /* 16 cutime */
    508 	    UTIME2TICKS(cru->ru_stime.tv_sec, cru->ru_stime.tv_usec), /* 17 cstime */
    509 
    510 	    ki.p_priority,				/* XXX: 18 priority */
    511 	    ki.p_nice - NZERO,				/* 19 nice */
    512 	    ki.p_nlwps,					/* 20 num_threads */
    513 
    514 	    (long long)rt.tv_sec,
    515 	    UTIME2TICKS(ki.p_ustart_sec, ki.p_ustart_usec), /* 22 start_time */
    516 	    ki.p_vm_msize,				/* 23 vsize */
    517 	    PGTOKB(ki.p_vm_rssize),			/* 24 rss */
    518 	    p->p_rlimit[RLIMIT_RSS].rlim_cur,		/* 25 rsslim */
    519 
    520 	    stext,					/* 26 start_code */
    521 	    etext,					/* 27 end_code */
    522 	    sstack,					/* 28 start_stack */
    523 
    524 	    0,						/* XXX: 29 esp */
    525 	    0,						/* XXX: 30 eip */
    526 
    527 	    ki.p_siglist.__bits[0],			/* XXX: 31 pending */
    528 	    0,						/* XXX: 32 blocked */
    529 	    ki.p_sigignore.__bits[0],		/* 33 sigign */
    530 	    ki.p_sigcatch.__bits[0],		/* 34 sigcatch */
    531 
    532 	    ki.p_wchan,					/* 35 wchan */
    533 	    ki.p_uru_nvcsw,
    534 	    ki.p_uru_nivcsw,
    535 	    ki.p_exitsig,				/* 38 exit_signal */
    536 	    ki.p_cpuid);				/* 39 task_cpu */
    537 
    538 	mutex_exit(p->p_lock);
    539 	mutex_exit(&proc_lock);
    540 
    541 	uvmspace_free(vm);
    542 
    543 	if (len == 0)
    544 		goto out;
    545 
    546 	error = uiomove_frombuf(bf, len, uio);
    547 out:
    548 	free(bf, M_TEMP);
    549 	return error;
    550 }
    551 
    552 int
    553 procfs_docpuinfo(struct lwp *curl, struct proc *p,
    554     struct pfsnode *pfs, struct uio *uio)
    555 {
    556 	size_t len = LBFSZ;
    557 	char *bf = NULL;
    558 	int error;
    559 
    560 	do {
    561 		if (bf)
    562 			free(bf, M_TEMP);
    563 		bf = malloc(len, M_TEMP, M_WAITOK);
    564 	} while (procfs_getcpuinfstr(bf, &len) < 0);
    565 
    566 	if (len == 0) {
    567 		error = 0;
    568 		goto done;
    569 	}
    570 
    571 	error = uiomove_frombuf(bf, len, uio);
    572 done:
    573 	free(bf, M_TEMP);
    574 	return error;
    575 }
    576 
    577 int
    578 procfs_douptime(struct lwp *curl, struct proc *p,
    579     struct pfsnode *pfs, struct uio *uio)
    580 {
    581 	char *bf;
    582 	int len;
    583 	struct timeval runtime;
    584 	u_int64_t idle;
    585 	int error = 0;
    586 
    587 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
    588 
    589 	microuptime(&runtime);
    590 	idle = curcpu()->ci_schedstate.spc_cp_time[CP_IDLE];
    591 	len = snprintf(bf, LBFSZ,
    592 	    "%lld.%02lu %" PRIu64 ".%02" PRIu64 "\n",
    593 	    (long long)runtime.tv_sec, (long)runtime.tv_usec / 10000,
    594 	    idle / hz, (((idle % hz) * 100) / hz) % 100);
    595 
    596 	if (len == 0)
    597 		goto out;
    598 
    599 	error = uiomove_frombuf(bf, len, uio);
    600 out:
    601 	free(bf, M_TEMP);
    602 	return error;
    603 }
    604 
    605 static int
    606 procfs_format_sfs(char **mtab, size_t *mlen, char *buf, size_t blen,
    607     const struct statvfs *sfs, struct lwp *curl, int suser)
    608 {
    609 	const char *fsname;
    610 
    611 	/* Linux uses different names for some filesystems */
    612 	fsname = sfs->f_fstypename;
    613 	if (strcmp(fsname, "procfs") == 0)
    614 		fsname = "proc";
    615 	else if (strcmp(fsname, "ext2fs") == 0)
    616 		fsname = "ext2";
    617 
    618 	blen = snprintf(buf, blen, "%s %s %s %s%s%s%s%s%s 0 0\n",
    619 	    sfs->f_mntfromname, sfs->f_mntonname, fsname,
    620 	    (sfs->f_flag & ST_RDONLY) ? "ro" : "rw",
    621 	    (sfs->f_flag & ST_NOSUID) ? ",nosuid" : "",
    622 	    (sfs->f_flag & ST_NOEXEC) ? ",noexec" : "",
    623 	    (sfs->f_flag & ST_NODEV) ? ",nodev" : "",
    624 	    (sfs->f_flag & ST_SYNCHRONOUS) ? ",sync" : "",
    625 	    (sfs->f_flag & ST_NOATIME) ? ",noatime" : "");
    626 
    627 	*mtab = realloc(*mtab, *mlen + blen, M_TEMP, M_WAITOK);
    628 	memcpy(*mtab + *mlen, buf, blen);
    629 	*mlen += blen;
    630 	return sfs->f_mntonname[0] == '/' && sfs->f_mntonname[1] == '\0';
    631 }
    632 
    633 int
    634 procfs_domounts(struct lwp *curl, struct proc *p,
    635     struct pfsnode *pfs, struct uio *uio)
    636 {
    637 	char *bf, *mtab = NULL;
    638 	size_t mtabsz = 0;
    639 	mount_iterator_t *iter;
    640 	struct mount *mp;
    641 	int error = 0, root = 0;
    642 	struct cwdinfo *cwdi = curl->l_proc->p_cwdi;
    643 	struct statvfs *sfs;
    644 
    645 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
    646 
    647 	sfs = malloc(sizeof(*sfs), M_TEMP, M_WAITOK);
    648 	mountlist_iterator_init(&iter);
    649 	while ((mp = mountlist_iterator_next(iter)) != NULL) {
    650 		if ((error = dostatvfs(mp, sfs, curl, MNT_WAIT, 0)) == 0)
    651 			root |= procfs_format_sfs(&mtab, &mtabsz, bf, LBFSZ,
    652 			    sfs, curl, 0);
    653 	}
    654 	mountlist_iterator_destroy(iter);
    655 	free(sfs, M_TEMP);
    656 
    657 	/*
    658 	 * If we are inside a chroot that is not itself a mount point,
    659 	 * fake a root entry.
    660 	 */
    661 	if (!root && cwdi->cwdi_rdir)
    662 		(void)procfs_format_sfs(&mtab, &mtabsz, bf, LBFSZ,
    663 		    &cwdi->cwdi_rdir->v_mount->mnt_stat, curl, 1);
    664 
    665 	free(bf, M_TEMP);
    666 
    667 	if (mtabsz > 0) {
    668 		error = uiomove_frombuf(mtab, mtabsz, uio);
    669 		free(mtab, M_TEMP);
    670 	}
    671 
    672 	return error;
    673 }
    674 
    675 /*
    676  * Linux compatible /proc/version. Only active when the -o linux
    677  * mountflag is used.
    678  */
    679 int
    680 procfs_doversion(struct lwp *curl, struct proc *p,
    681     struct pfsnode *pfs, struct uio *uio)
    682 {
    683 	char *bf;
    684 	char lostype[20], losrelease[20], lversion[80];
    685 	const char *postype, *posrelease, *pversion;
    686 	const char *emulname = curlwp->l_proc->p_emul->e_name;
    687 	int len;
    688 	int error = 0;
    689 	int nm[4];
    690 	size_t buflen;
    691 
    692 	CTASSERT(EMUL_LINUX_KERN_OSTYPE == EMUL_LINUX32_KERN_OSTYPE);
    693 	CTASSERT(EMUL_LINUX_KERN_OSRELEASE == EMUL_LINUX32_KERN_OSRELEASE);
    694 	CTASSERT(EMUL_LINUX_KERN_VERSION == EMUL_LINUX32_KERN_VERSION);
    695 
    696 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
    697 
    698 	sysctl_lock(false);
    699 
    700 	if (strncmp(emulname, "linux", 5) == 0) {
    701 		/*
    702 		 * Lookup the emulation ostype, osrelease, and version.
    703 		 * Since compat_linux and compat_linux32 can be built as
    704 		 * modules, we use sysctl to obtain the values instead of
    705 		 * using the symbols directly.
    706 		 */
    707 
    708 		if (strcmp(emulname, "linux32") == 0) {
    709 			nm[0] = CTL_EMUL;
    710 			nm[1] = EMUL_LINUX32;
    711 			nm[2] = EMUL_LINUX32_KERN;
    712 		} else {
    713 			nm[0] = CTL_EMUL;
    714 			nm[1] = EMUL_LINUX;
    715 			nm[2] = EMUL_LINUX_KERN;
    716 		}
    717 
    718 		nm[3] = EMUL_LINUX_KERN_OSTYPE;
    719 		buflen = sizeof(lostype);
    720 		error = sysctl_dispatch(nm, __arraycount(nm),
    721 		    lostype, &buflen,
    722 		    NULL, 0, NULL, NULL, NULL);
    723 		if (error)
    724 			goto out;
    725 
    726 		nm[3] = EMUL_LINUX_KERN_OSRELEASE;
    727 		buflen = sizeof(losrelease);
    728 		error = sysctl_dispatch(nm, __arraycount(nm),
    729 		    losrelease, &buflen,
    730 		    NULL, 0, NULL, NULL, NULL);
    731 		if (error)
    732 			goto out;
    733 
    734 		nm[3] = EMUL_LINUX_KERN_VERSION;
    735 		buflen = sizeof(lversion);
    736 		error = sysctl_dispatch(nm, __arraycount(nm),
    737 		    lversion, &buflen,
    738 		    NULL, 0, NULL, NULL, NULL);
    739 		if (error)
    740 			goto out;
    741 
    742 		postype = lostype;
    743 		posrelease = losrelease;
    744 		pversion = lversion;
    745 	} else {
    746 		postype = ostype;
    747 		posrelease = osrelease;
    748 		strlcpy(lversion, version, sizeof(lversion));
    749 		if (strchr(lversion, '\n'))
    750 			*strchr(lversion, '\n') = '\0';
    751 		pversion = lversion;
    752 	}
    753 
    754 	len = snprintf(bf, LBFSZ,
    755 		"%s version %s (%s@localhost) (gcc version %s) %s\n",
    756 		postype, posrelease, emulname,
    757 #ifdef __VERSION__
    758 		__VERSION__,
    759 #else
    760 		"unknown",
    761 #endif
    762 		pversion);
    763 
    764 	if (len == 0)
    765 		goto out;
    766 
    767 	error = uiomove_frombuf(bf, len, uio);
    768 out:
    769 	free(bf, M_TEMP);
    770 	sysctl_unlock();
    771 	return error;
    772 }
    773 
    774 int
    775 procfs_dosysvipc_msg(struct lwp *curl, struct proc *p,
    776     struct pfsnode *pfs, struct uio *uio)
    777 {
    778 	char *bf;
    779 	int offset = 0;
    780 	int error = EFBIG;
    781 
    782 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
    783 
    784 	offset += snprintf(bf, LBFSZ,
    785 	    "%10s %10s %4s  %10s %10s %5s %5s %5s %5s %5s %5s %10s %10s %10s\n",
    786 	    "key", "msqid", "perms", "cbytes", "qnum", "lspid", "lrpid",
    787 	    "uid", "gid", "cuid", "cgid", "stime", "rtime", "ctime");
    788 	if (offset >= LBFSZ)
    789 		goto out;
    790 
    791 #ifdef SYSVMSG
    792 	for (int id = 0; id < msginfo.msgmni; id++)
    793 		if (msqs[id].msq_u.msg_qbytes > 0) {
    794 			offset += snprintf(&bf[offset], LBFSZ - offset,
    795 			    "%10d %10d  %4o  %10zu %10lu %5u %5u %5u %5u %5u %5u %10lld %10lld %10lld\n",
    796 			    (int) msqs[id].msq_u.msg_perm._key,
    797 			    IXSEQ_TO_IPCID(id, msqs[id].msq_u.msg_perm),
    798 			    msqs[id].msq_u.msg_perm.mode,
    799 			    msqs[id].msq_u._msg_cbytes,
    800 			    msqs[id].msq_u.msg_qnum,
    801 			    msqs[id].msq_u.msg_lspid,
    802 			    msqs[id].msq_u.msg_lrpid,
    803 			    msqs[id].msq_u.msg_perm.uid,
    804 			    msqs[id].msq_u.msg_perm.gid,
    805 			    msqs[id].msq_u.msg_perm.cuid,
    806 			    msqs[id].msq_u.msg_perm.cgid,
    807 			    (long long)msqs[id].msq_u.msg_stime,
    808 			    (long long)msqs[id].msq_u.msg_rtime,
    809 			    (long long)msqs[id].msq_u.msg_ctime);
    810 			if (offset >= LBFSZ)
    811 				goto out;
    812 		}
    813 #endif
    814 
    815 	error = uiomove_frombuf(bf, offset, uio);
    816 out:
    817 	free(bf, M_TEMP);
    818 	return error;
    819 }
    820 
    821 int
    822 procfs_dosysvipc_sem(struct lwp *curl, struct proc *p,
    823     struct pfsnode *pfs, struct uio *uio)
    824 {
    825 	char *bf;
    826 	int offset = 0;
    827 	int error = EFBIG;
    828 
    829 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
    830 
    831 	offset += snprintf(bf, LBFSZ,
    832 	    "%10s %10s %4s %10s %5s %5s %5s %5s %10s %10s\n",
    833 	    "key", "semid", "perms", "nsems", "uid", "gid", "cuid", "cgid",
    834 	    "otime", "ctime");
    835 	if (offset >= LBFSZ)
    836 		goto out;
    837 
    838 #ifdef SYSVSEM
    839 	for (int id = 0; id < seminfo.semmni; id++)
    840 		if ((sema[id].sem_perm.mode & SEM_ALLOC) != 0) {
    841 			offset += snprintf(&bf[offset], LBFSZ - offset,
    842 			    "%10d %10d  %4o %10u %5u %5u %5u %5u %10lld %10lld\n",
    843 			    (int) sema[id].sem_perm._key,
    844 			    IXSEQ_TO_IPCID(id, sema[id].sem_perm),
    845 			    sema[id].sem_perm.mode,
    846 			    sema[id].sem_nsems,
    847 			    sema[id].sem_perm.uid,
    848 			    sema[id].sem_perm.gid,
    849 			    sema[id].sem_perm.cuid,
    850 			    sema[id].sem_perm.cgid,
    851 			    (long long)sema[id].sem_otime,
    852 			    (long long)sema[id].sem_ctime);
    853 			if (offset >= LBFSZ)
    854 				goto out;
    855 		}
    856 #endif
    857 
    858 	error = uiomove_frombuf(bf, offset, uio);
    859 out:
    860 	free(bf, M_TEMP);
    861 	return error;
    862 }
    863 
    864 int
    865 procfs_dosysvipc_shm(struct lwp *curl, struct proc *p,
    866     struct pfsnode *pfs, struct uio *uio)
    867 {
    868 	char *bf;
    869 	int offset = 0;
    870 	int error = EFBIG;
    871 
    872 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
    873 
    874 	offset += snprintf(bf, LBFSZ,
    875 	    "%10s %10s %s %21s %5s %5s %5s %5s %5s %5s %5s %10s %10s %10s %21s %21s\n",
    876 	    "key", "shmid", "perms", "size", "cpid", "lpid", "nattch", "uid",
    877 	    "gid", "cuid", "cgid", "atime", "dtime", "ctime", "rss", "swap");
    878 	if (offset >= LBFSZ)
    879 		goto out;
    880 
    881 #ifdef SYSVSHM
    882 	for (unsigned int id = 0; id < shminfo.shmmni; id++)
    883 		if ((shmsegs[id].shm_perm.mode & SHMSEG_ALLOCATED) != 0) {
    884 			offset += snprintf(&bf[offset], LBFSZ - offset,
    885 			    "%10d %10d  %4o %21zu %5u %5u  %5u %5u %5u %5u %5u %10lld %10lld %10lld %21d %21d\n",
    886 			    (int) shmsegs[id].shm_perm._key,
    887 			    IXSEQ_TO_IPCID(id, shmsegs[id].shm_perm),
    888 			    shmsegs[id].shm_perm.mode,
    889 			    shmsegs[id].shm_segsz,
    890 			    shmsegs[id].shm_cpid,
    891 			    shmsegs[id].shm_lpid,
    892 			    shmsegs[id].shm_nattch,
    893 			    shmsegs[id].shm_perm.uid,
    894 			    shmsegs[id].shm_perm.gid,
    895 			    shmsegs[id].shm_perm.cuid,
    896 			    shmsegs[id].shm_perm.cgid,
    897 			    (long long)shmsegs[id].shm_atime,
    898 			    (long long)shmsegs[id].shm_dtime,
    899 			    (long long)shmsegs[id].shm_ctime,
    900 			    0, 0);	/* XXX rss & swp are not supported */
    901 			if (offset >= LBFSZ)
    902 				goto out;
    903 		}
    904 #endif
    905 
    906 	error = uiomove_frombuf(bf, offset, uio);
    907 out:
    908 	free(bf, M_TEMP);
    909 	return error;
    910 }
    911 
    912 #ifdef MQUEUE
    913 #define print_uint(value, uio) PFS_print_uint(value, uio);
    914 
    915 static int
    916 PFS_print_uint(unsigned int value, struct uio *uio)
    917 {
    918 	char *bf;
    919 	int offset = 0;
    920 	int error = EFBIG;
    921 
    922 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
    923 	offset += snprintf(bf, LBFSZ, "%u\n", value);
    924 	if (offset >= LBFSZ)
    925 		goto out;
    926 
    927 	error = uiomove_frombuf(bf, offset, uio);
    928 out:
    929 	free(bf, M_TEMP);
    930 	return error;
    931 }
    932 #else
    933 
    934 #define print_uint(value, uio) EINVAL
    935 
    936 #endif
    937 
    938 int
    939 procfs_domq_msg_def(struct lwp *curl, struct proc *p,
    940     struct pfsnode *pfs, struct uio *uio)
    941 {
    942 	return print_uint(mq_def_maxmsg, uio);
    943 }
    944 
    945 int
    946 procfs_domq_msg_max(struct lwp *curl, struct proc *p,
    947     struct pfsnode *pfs, struct uio *uio)
    948 {
    949 	return print_uint(mq_max_maxmsg, uio);
    950 }
    951 
    952 int
    953 procfs_domq_siz_def(struct lwp *curl, struct proc *p,
    954     struct pfsnode *pfs, struct uio *uio)
    955 {
    956 	return print_uint(MQ_DEF_MSGSIZE, uio);
    957 }
    958 
    959 int
    960 procfs_domq_siz_max(struct lwp *curl, struct proc *p,
    961     struct pfsnode *pfs, struct uio *uio)
    962 {
    963 	return print_uint(mq_max_msgsize, uio);
    964 }
    965 
    966 int
    967 procfs_domq_qmax(struct lwp *curl, struct proc *p,
    968     struct pfsnode *pfs, struct uio *uio)
    969 {
    970 	return print_uint(mq_open_max, uio);
    971 }
    972