procfs_linux.c revision 1.54.14.2 1 /* $NetBSD: procfs_linux.c,v 1.54.14.2 2011/02/05 06:00:15 cliff Exp $ */
2
3 /*
4 * Copyright (c) 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Frank van der Linden for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: procfs_linux.c,v 1.54.14.2 2011/02/05 06:00:15 cliff Exp $");
40
41 #include "opt_multiprocessor.h"
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/time.h>
46 #include <sys/kernel.h>
47 #include <sys/proc.h>
48 #include <sys/vnode.h>
49 #include <sys/exec.h>
50 #include <sys/resource.h>
51 #include <sys/resourcevar.h>
52 #include <sys/signal.h>
53 #include <sys/signalvar.h>
54 #include <sys/tty.h>
55 #include <sys/malloc.h>
56 #include <sys/mount.h>
57 #include <sys/conf.h>
58
59 #include <miscfs/procfs/procfs.h>
60 #include <miscfs/specfs/specdev.h>
61
62 #include <compat/linux/common/linux_exec.h>
63
64 #include <uvm/uvm_extern.h>
65 #include <uvm/uvm.h>
66
67 extern struct devsw_conv *devsw_conv;
68 extern int max_devsw_convs;
69
70 #define PGTOB(p) ((unsigned long)(p) << PAGE_SHIFT)
71 #define PGTOKB(p) ((unsigned long)(p) << (PAGE_SHIFT - 10))
72
73 #define LBFSZ (8 * 1024)
74
75 static void
76 get_proc_size_info(struct lwp *l, unsigned long *stext, unsigned long *etext, unsigned long *sstack)
77 {
78 struct proc *p = l->l_proc;
79 struct vmspace *vm;
80 struct vm_map *map;
81 struct vm_map_entry *entry;
82
83 *stext = 0;
84 *etext = 0;
85 *sstack = 0;
86
87 proc_vmspace_getref(p, &vm);
88 map = &vm->vm_map;
89 vm_map_lock_read(map);
90
91 for (entry = map->header.next; entry != &map->header;
92 entry = entry->next) {
93 if (UVM_ET_ISSUBMAP(entry))
94 continue;
95 /* assume text is the first entry */
96 if (*stext == *etext) {
97 *stext = entry->start;
98 *etext = entry->end;
99 break;
100 }
101 }
102 #ifdef LINUX_USRSTACK32
103 if (strcmp(p->p_emul->e_name, "linux32") == 0 &&
104 LINUX_USRSTACK32 < USRSTACK32)
105 *sstack = (unsigned long)LINUX_USRSTACK32;
106 else
107 #endif
108 #ifdef LINUX_USRSTACK
109 if (strcmp(p->p_emul->e_name, "linux") == 0 &&
110 LINUX_USRSTACK < USRSTACK)
111 *sstack = (unsigned long)LINUX_USRSTACK;
112 else
113 #endif
114 #ifdef USRSTACK32
115 if (strstr(p->p_emul->e_name, "32") != NULL)
116 *sstack = (unsigned long)USRSTACK32;
117 else
118 #endif
119 *sstack = (unsigned long)USRSTACK;
120
121 /*
122 * jdk 1.6 compares low <= addr && addr < high
123 * if we put addr == high, then the test fails
124 * so eat one page.
125 */
126 *sstack -= PAGE_SIZE;
127
128 vm_map_unlock_read(map);
129 uvmspace_free(vm);
130 }
131
132 /*
133 * Linux compatible /proc/meminfo. Only active when the -o linux
134 * mountflag is used.
135 */
136 int
137 procfs_domeminfo(struct lwp *curl, struct proc *p,
138 struct pfsnode *pfs, struct uio *uio)
139 {
140 char *bf;
141 int len;
142 int error = 0;
143
144 bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
145
146 len = snprintf(bf, LBFSZ,
147 " total: used: free: shared: buffers: cached:\n"
148 "Mem: %8lu %8lu %8lu %8lu %8lu %8lu\n"
149 "Swap: %8lu %8lu %8lu\n"
150 "MemTotal: %8lu kB\n"
151 "MemFree: %8lu kB\n"
152 "MemShared: %8lu kB\n"
153 "Buffers: %8lu kB\n"
154 "Cached: %8lu kB\n"
155 "SwapTotal: %8lu kB\n"
156 "SwapFree: %8lu kB\n",
157 PGTOB(uvmexp.npages),
158 PGTOB(uvmexp.npages - uvmexp.free),
159 PGTOB(uvmexp.free),
160 0L,
161 PGTOB(uvmexp.filepages),
162 PGTOB(uvmexp.anonpages + uvmexp.filepages + uvmexp.execpages),
163 PGTOB(uvmexp.swpages),
164 PGTOB(uvmexp.swpginuse),
165 PGTOB(uvmexp.swpages - uvmexp.swpginuse),
166 PGTOKB(uvmexp.npages),
167 PGTOKB(uvmexp.free),
168 0L,
169 PGTOKB(uvmexp.filepages),
170 PGTOKB(uvmexp.anonpages + uvmexp.filepages + uvmexp.execpages),
171 PGTOKB(uvmexp.swpages),
172 PGTOKB(uvmexp.swpages - uvmexp.swpginuse));
173
174 if (len == 0)
175 goto out;
176
177 error = uiomove_frombuf(bf, len, uio);
178 out:
179 free(bf, M_TEMP);
180 return error;
181 }
182
183 /*
184 * Linux compatible /proc/devices. Only active when the -o linux
185 * mountflag is used.
186 */
187 int
188 procfs_dodevices(struct lwp *curl, struct proc *p,
189 struct pfsnode *pfs, struct uio *uio)
190 {
191 char *bf;
192 int offset = 0;
193 int i, error = ENAMETOOLONG;
194
195 /* XXX elad - may need filtering. */
196
197 bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
198
199 offset += snprintf(&bf[offset], LBFSZ - offset, "Character devices:\n");
200 if (offset >= LBFSZ)
201 goto out;
202
203 mutex_enter(&specfs_lock);
204 for (i = 0; i < max_devsw_convs; i++) {
205 if ((devsw_conv[i].d_name == NULL) ||
206 (devsw_conv[i].d_cmajor == -1))
207 continue;
208
209 offset += snprintf(&bf[offset], LBFSZ - offset,
210 "%3d %s\n", devsw_conv[i].d_cmajor, devsw_conv[i].d_name);
211 if (offset >= LBFSZ) {
212 mutex_exit(&specfs_lock);
213 goto out;
214 }
215 }
216
217 offset += snprintf(&bf[offset], LBFSZ - offset, "\nBlock devices:\n");
218 if (offset >= LBFSZ) {
219 mutex_exit(&specfs_lock);
220 goto out;
221 }
222
223 for (i = 0; i < max_devsw_convs; i++) {
224 if ((devsw_conv[i].d_name == NULL) ||
225 (devsw_conv[i].d_bmajor == -1))
226 continue;
227
228 offset += snprintf(&bf[offset], LBFSZ - offset,
229 "%3d %s\n", devsw_conv[i].d_bmajor, devsw_conv[i].d_name);
230 if (offset >= LBFSZ) {
231 mutex_exit(&specfs_lock);
232 goto out;
233 }
234 }
235 mutex_exit(&specfs_lock);
236
237 error = uiomove_frombuf(bf, offset, uio);
238 out:
239 free(bf, M_TEMP);
240 return error;
241 }
242
243 /*
244 * Linux compatible /proc/stat. Only active when the -o linux
245 * mountflag is used.
246 */
247 int
248 procfs_docpustat(struct lwp *curl, struct proc *p,
249 struct pfsnode *pfs, struct uio *uio)
250 {
251 char *bf;
252 int error;
253 int len;
254 #if defined(MULTIPROCESSOR)
255 struct cpu_info *ci;
256 CPU_INFO_ITERATOR cii;
257 #endif
258 int i;
259
260 error = ENAMETOOLONG;
261 bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
262
263 len = snprintf(bf, LBFSZ,
264 "cpu %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
265 curcpu()->ci_schedstate.spc_cp_time[CP_USER],
266 curcpu()->ci_schedstate.spc_cp_time[CP_NICE],
267 curcpu()->ci_schedstate.spc_cp_time[CP_SYS] /*+ [CP_INTR]*/,
268 curcpu()->ci_schedstate.spc_cp_time[CP_IDLE]);
269 if (len == 0)
270 goto out;
271
272 #if defined(MULTIPROCESSOR)
273 #define ALLCPUS CPU_INFO_FOREACH(cii, ci)
274 #define CPUNAME ci
275 #else
276 #define ALLCPUS ; i < 1 ;
277 #define CPUNAME curcpu()
278 #endif
279
280 i = 0;
281 for (ALLCPUS) {
282 len += snprintf(&bf[len], LBFSZ - len,
283 "cpu%d %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64
284 "\n", i,
285 CPUNAME->ci_schedstate.spc_cp_time[CP_USER],
286 CPUNAME->ci_schedstate.spc_cp_time[CP_NICE],
287 CPUNAME->ci_schedstate.spc_cp_time[CP_SYS],
288 CPUNAME->ci_schedstate.spc_cp_time[CP_IDLE]);
289 if (len >= LBFSZ)
290 goto out;
291 i += 1;
292 }
293
294 len += snprintf(&bf[len], LBFSZ - len,
295 "disk 0 0 0 0\n"
296 "page %u %u\n"
297 "swap %u %u\n"
298 "intr %u\n"
299 "ctxt %u\n"
300 "btime %lld\n",
301 uvmexp.pageins, uvmexp.pdpageouts,
302 uvmexp.pgswapin, uvmexp.pgswapout,
303 uvmexp.intrs,
304 uvmexp.swtch,
305 (long long)boottime.tv_sec);
306 if (len >= LBFSZ)
307 goto out;
308
309 error = uiomove_frombuf(bf, len, uio);
310 out:
311 free(bf, M_TEMP);
312 return error;
313 }
314
315 /*
316 * Linux compatible /proc/loadavg. Only active when the -o linux
317 * mountflag is used.
318 */
319 int
320 procfs_doloadavg(struct lwp *curl, struct proc *p,
321 struct pfsnode *pfs, struct uio *uio)
322 {
323 char *bf;
324 int error;
325 int len;
326
327 error = ENAMETOOLONG;
328 bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
329
330 averunnable.fscale = FSCALE;
331 len = snprintf(bf, LBFSZ,
332 "%d.%02d %d.%02d %d.%02d %d/%d %d\n",
333 (int)(averunnable.ldavg[0] / averunnable.fscale),
334 (int)(averunnable.ldavg[0] * 100 / averunnable.fscale % 100),
335 (int)(averunnable.ldavg[1] / averunnable.fscale),
336 (int)(averunnable.ldavg[1] * 100 / averunnable.fscale % 100),
337 (int)(averunnable.ldavg[2] / averunnable.fscale),
338 (int)(averunnable.ldavg[2] * 100 / averunnable.fscale % 100),
339 1, /* number of ONPROC processes */
340 nprocs,
341 30000); /* last pid */
342 if (len == 0)
343 goto out;
344
345 error = uiomove_frombuf(bf, len, uio);
346 out:
347 free(bf, M_TEMP);
348 return error;
349 }
350
351 /*
352 * Linux compatible /proc/<pid>/statm. Only active when the -o linux
353 * mountflag is used.
354 */
355 int
356 procfs_do_pid_statm(struct lwp *curl, struct lwp *l,
357 struct pfsnode *pfs, struct uio *uio)
358 {
359 struct vmspace *vm;
360 struct proc *p = l->l_proc;
361 struct rusage *ru = &p->p_stats->p_ru;
362 char *bf;
363 int error;
364 int len;
365
366 error = ENAMETOOLONG;
367 bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
368
369 /* XXX - we use values from vmspace, since dsl says that ru figures
370 are always 0 except for zombies. See kvm_proc.c::kvm_getproc2() */
371 if ((error = proc_vmspace_getref(p, &vm)) != 0) {
372 goto out;
373 }
374
375 len = snprintf(bf, LBFSZ,
376 "%lu %lu %lu %lu %lu %lu %lu\n",
377 (unsigned long)(vm->vm_tsize + vm->vm_dsize + vm->vm_ssize), /* size */
378 (unsigned long)(vm->vm_rssize), /* resident */
379 (unsigned long)(ru->ru_ixrss), /* shared */
380 (unsigned long)(vm->vm_tsize), /* text size in pages */
381 (unsigned long)(vm->vm_dsize), /* data size in pages */
382 (unsigned long)(vm->vm_ssize), /* stack size in pages */
383 (unsigned long) 0);
384
385 uvmspace_free(vm);
386
387 if (len == 0)
388 goto out;
389
390 error = uiomove_frombuf(bf, len, uio);
391 out:
392 free(bf, M_TEMP);
393 return error;
394 }
395
396 #define USEC_2_TICKS(x) ((x) / 10000)
397
398 /*
399 * Linux compatible /proc/<pid>/stat. Only active when the -o linux
400 * mountflag is used.
401 */
402 int
403 procfs_do_pid_stat(struct lwp *curl, struct lwp *l,
404 struct pfsnode *pfs, struct uio *uio)
405 {
406 char *bf;
407 struct proc *p = l->l_proc;
408 int len;
409 struct tty *tty = p->p_session->s_ttyp;
410 struct rusage *ru = &p->p_stats->p_ru;
411 struct rusage *cru = &p->p_stats->p_cru;
412 unsigned long stext = 0, etext = 0, sstack = 0;
413 struct timeval rt;
414 struct vmspace *vm;
415 int error = 0;
416
417 bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
418
419 if ((error = proc_vmspace_getref(p, &vm)) != 0) {
420 goto out;
421 }
422
423 get_proc_size_info(l, &stext, &etext, &sstack);
424
425 mutex_enter(proc_lock);
426 mutex_enter(p->p_lock);
427
428 calcru(p, NULL, NULL, NULL, &rt);
429
430 len = snprintf(bf, LBFSZ,
431 "%d (%s) %c %d %d %d %d %d "
432 "%u "
433 "%lu %lu %lu %lu %lu %lu %lu %lu "
434 "%d %d %d "
435 "%lu %lu %lu %lu %" PRIu64 " "
436 "%lu %lu %lu "
437 "%u %u "
438 "%u %u %u %u "
439 "%lu %lu %lu %d %d\n",
440
441 p->p_pid,
442 p->p_comm,
443 "0IR3SZD"[(p->p_stat > 6) ? 0 : (int)p->p_stat],
444 (p->p_pptr != NULL) ? p->p_pptr->p_pid : 0,
445
446 p->p_pgid,
447 p->p_session->s_sid,
448 tty ? tty->t_dev : 0,
449 (tty && tty->t_pgrp) ? tty->t_pgrp->pg_id : 0,
450
451 p->p_flag,
452
453 ru->ru_minflt,
454 cru->ru_minflt,
455 ru->ru_majflt,
456 cru->ru_majflt,
457 USEC_2_TICKS(ru->ru_utime.tv_usec),
458 USEC_2_TICKS(ru->ru_stime.tv_usec),
459 USEC_2_TICKS(cru->ru_utime.tv_usec),
460 USEC_2_TICKS(cru->ru_stime.tv_usec),
461
462 l->l_priority, /* XXX: priority */
463 p->p_nice - 20,
464 0,
465
466 rt.tv_sec,
467 p->p_stats->p_start.tv_sec,
468 (unsigned long)(vm->vm_tsize + vm->vm_dsize + vm->vm_ssize), /* size */
469 (unsigned long)(vm->vm_rssize), /* resident */
470 p->p_rlimit[RLIMIT_RSS].rlim_cur,
471
472 stext, /* start code */
473 etext, /* end code */
474 sstack, /* mm start stack */
475 0, /* XXX: pc */
476 0, /* XXX: sp */
477 p->p_sigpend.sp_set.__bits[0], /* XXX: pending */
478 0, /* XXX: held */
479 p->p_sigctx.ps_sigignore.__bits[0], /* ignored */
480 p->p_sigctx.ps_sigcatch.__bits[0], /* caught */
481
482 (unsigned long)(intptr_t)l->l_wchan,
483 ru->ru_nvcsw,
484 ru->ru_nivcsw,
485 p->p_exitsig,
486 0); /* XXX: processor */
487
488 mutex_exit(p->p_lock);
489 mutex_exit(proc_lock);
490
491 uvmspace_free(vm);
492
493 if (len == 0)
494 goto out;
495
496 error = uiomove_frombuf(bf, len, uio);
497 out:
498 free(bf, M_TEMP);
499 return error;
500 }
501
502 int
503 procfs_docpuinfo(struct lwp *curl, struct proc *p,
504 struct pfsnode *pfs, struct uio *uio)
505 {
506 int len = LBFSZ;
507 char *bf = malloc(len, M_TEMP, M_WAITOK);
508 int error;
509
510 if (procfs_getcpuinfstr(bf, &len) < 0) {
511 error = ENOSPC;
512 goto done;
513 }
514
515 if (len == 0) {
516 error = 0;
517 goto done;
518 }
519
520 error = uiomove_frombuf(bf, len, uio);
521 done:
522 free(bf, M_TEMP);
523 return error;
524 }
525
526 int
527 procfs_douptime(struct lwp *curl, struct proc *p,
528 struct pfsnode *pfs, struct uio *uio)
529 {
530 char *bf;
531 int len;
532 struct timeval runtime;
533 u_int64_t idle;
534 int error = 0;
535
536 bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
537
538 microuptime(&runtime);
539 idle = curcpu()->ci_schedstate.spc_cp_time[CP_IDLE];
540 len = snprintf(bf, LBFSZ,
541 "%lu.%02lu %" PRIu64 ".%02" PRIu64 "\n",
542 runtime.tv_sec, runtime.tv_usec / 10000,
543 idle / hz, (((idle % hz) * 100) / hz) % 100);
544
545 if (len == 0)
546 goto out;
547
548 error = uiomove_frombuf(bf, len, uio);
549 out:
550 free(bf, M_TEMP);
551 return error;
552 }
553
554 int
555 procfs_domounts(struct lwp *curl, struct proc *p,
556 struct pfsnode *pfs, struct uio *uio)
557 {
558 char *bf, *mtab = NULL;
559 const char *fsname;
560 size_t len, mtabsz = 0;
561 struct mount *mp, *nmp;
562 struct statvfs *sfs;
563 int error = 0;
564
565 bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
566 mutex_enter(&mountlist_lock);
567 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
568 mp = nmp) {
569 if (vfs_busy(mp, &nmp)) {
570 continue;
571 }
572
573 sfs = &mp->mnt_stat;
574
575 /* Linux uses different names for some filesystems */
576 fsname = sfs->f_fstypename;
577 if (strcmp(fsname, "procfs") == 0)
578 fsname = "proc";
579 else if (strcmp(fsname, "ext2fs") == 0)
580 fsname = "ext2";
581
582 len = snprintf(bf, LBFSZ, "%s %s %s %s%s%s%s%s%s 0 0\n",
583 sfs->f_mntfromname,
584 sfs->f_mntonname,
585 fsname,
586 (mp->mnt_flag & MNT_RDONLY) ? "ro" : "rw",
587 (mp->mnt_flag & MNT_NOSUID) ? ",nosuid" : "",
588 (mp->mnt_flag & MNT_NOEXEC) ? ",noexec" : "",
589 (mp->mnt_flag & MNT_NODEV) ? ",nodev" : "",
590 (mp->mnt_flag & MNT_SYNCHRONOUS) ? ",sync" : "",
591 (mp->mnt_flag & MNT_NOATIME) ? ",noatime" : ""
592 );
593
594 mtab = realloc(mtab, mtabsz + len, M_TEMP, M_WAITOK);
595 memcpy(mtab + mtabsz, bf, len);
596 mtabsz += len;
597
598 vfs_unbusy(mp, false, &nmp);
599 }
600 mutex_exit(&mountlist_lock);
601 free(bf, M_TEMP);
602
603 if (mtabsz > 0) {
604 error = uiomove_frombuf(mtab, mtabsz, uio);
605 free(mtab, M_TEMP);
606 }
607
608 return error;
609 }
610