Home | History | Annotate | Line # | Download | only in kern
      1 /*	$NetBSD: kern_history.c,v 1.21 2026/01/04 01:34:29 riastradh Exp $	 */
      2 
      3 /*
      4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  *
     27  * from: NetBSD: uvm_stat.c,v 1.36 2011/02/02 15:13:34 chuck Exp
     28  * from: Id: uvm_stat.c,v 1.1.2.3 1997/12/19 15:01:00 mrg Exp
     29  */
     30 
     31 /*
     32  * subr_kernhist.c
     33  */
     34 
     35 #include <sys/cdefs.h>
     36 __KERNEL_RCSID(0, "$NetBSD: kern_history.c,v 1.21 2026/01/04 01:34:29 riastradh Exp $");
     37 
     38 #include "opt_ddb.h"
     39 #include "opt_kernhist.h"
     40 #include "opt_syscall_debug.h"
     41 #include "opt_usb.h"
     42 #include "opt_uvmhist.h"
     43 #include "opt_biohist.h"
     44 #include "opt_sysctl.h"
     45 
     46 #include <sys/param.h>
     47 #include <sys/types.h>
     48 
     49 #include <sys/atomic.h>
     50 #include <sys/cpu.h>
     51 #include <sys/kernhist.h>
     52 #include <sys/kmem.h>
     53 #include <sys/sdt.h>
     54 #include <sys/sysctl.h>
     55 #include <sys/systm.h>
     56 
     57 #ifdef UVMHIST
     58 #include <uvm/uvm.h>
     59 #endif
     60 
     61 #ifdef USB_DEBUG
     62 #include <dev/usb/usbhist.h>
     63 #endif
     64 
     65 #ifdef BIOHIST
     66 #include <sys/biohist.h>
     67 #endif
     68 
     69 #ifdef SYSCALL_DEBUG
     70 KERNHIST_DECL(scdebughist);
     71 #endif
     72 
     73 struct addr_xlt {
     74 	const char *addr;
     75 	size_t len;
     76 	uint32_t offset;
     77 };
     78 
     79 /*
     80  * globals
     81  */
     82 
     83 struct kern_history_head kern_histories;
     84 bool kernhist_sysctl_ready = 0;
     85 
     86 int kernhist_print_enabled = 1;
     87 
     88 int sysctl_hist_node;
     89 
     90 static int sysctl_kernhist_helper(SYSCTLFN_PROTO);
     91 
     92 #ifdef DDB
     93 
     94 /*
     95  * prototypes
     96  */
     97 
     98 void kernhist_dump(struct kern_history *, size_t count,
     99     void (*)(const char *, ...) __printflike(1, 2));
    100 static void kernhist_info(struct kern_history *,
    101     void (*)(const char *, ...));
    102 void kernhist_dumpmask(uint32_t);
    103 static void kernhist_dump_histories(struct kern_history *[], size_t count,
    104     void (*)(const char *, ...) __printflike(1, 2));
    105 
    106 /* display info about one kernhist */
    107 static void
    108 kernhist_info(struct kern_history *l, void (*pr)(const char *, ...))
    109 {
    110 
    111 	pr("kernhist '%s': at %p total %u next free %u\n",
    112 	    l->name, l, l->n, l->f);
    113 }
    114 
    115 /*
    116  * call this from ddb
    117  *
    118  * expects the system to be quiesced, no locking
    119  */
    120 void
    121 kernhist_dump(struct kern_history *l, size_t count,
    122     void (*pr)(const char *, ...))
    123 {
    124 	int lcv;
    125 
    126 	lcv = l->f;
    127 	if (count > l->n)
    128 		pr("%s: count %zu > size %u\n", __func__, count, l->n);
    129 	else if (count)
    130 		lcv = (lcv - count) % l->n;
    131 
    132 	do {
    133 		if (l->e[lcv].fmt)
    134 			kernhist_entry_print(&l->e[lcv], pr);
    135 		lcv = (lcv + 1) % l->n;
    136 	} while (lcv != l->f);
    137 }
    138 
    139 /*
    140  * print a merged list of kern_history structures.  count is unused so far.
    141  */
    142 static void
    143 kernhist_dump_histories(struct kern_history *hists[], size_t count,
    144     void (*pr)(const char *, ...))
    145 {
    146 	struct bintime	bt;
    147 	int	cur[MAXHISTS];
    148 	int	lcv, hi;
    149 
    150 	/* find the first of each list */
    151 	for (lcv = 0; hists[lcv]; lcv++)
    152 		 cur[lcv] = hists[lcv]->f;
    153 
    154 	/*
    155 	 * here we loop "forever", finding the next earliest
    156 	 * history entry and printing it.  cur[X] is the current
    157 	 * entry to test for the history in hists[X].  if it is
    158 	 * -1, then this history is finished.
    159 	 */
    160 	for (;;) {
    161 		hi = -1;
    162 		bt.sec = 0; bt.frac = 0;
    163 
    164 		/* loop over each history */
    165 		for (lcv = 0; hists[lcv]; lcv++) {
    166 restart:
    167 			if (cur[lcv] == -1)
    168 				continue;
    169 			if (!hists[lcv]->e)
    170 				continue;
    171 
    172 			/*
    173 			 * if the format is empty, go to the next entry
    174 			 * and retry.
    175 			 */
    176 			if (hists[lcv]->e[cur[lcv]].fmt == NULL) {
    177 				cur[lcv] = (cur[lcv] + 1) % (hists[lcv]->n);
    178 				if (cur[lcv] == hists[lcv]->f)
    179 					cur[lcv] = -1;
    180 				goto restart;
    181 			}
    182 
    183 			/*
    184 			 * if the time hasn't been set yet, or this entry is
    185 			 * earlier than the current bt, set the time and history
    186 			 * index.
    187 			 */
    188 			if (bt.sec == 0 ||
    189 			    bintimecmp(&hists[lcv]->e[cur[lcv]].bt, &bt, <)) {
    190 				bt = hists[lcv]->e[cur[lcv]].bt;
    191 				hi = lcv;
    192 			}
    193 		}
    194 
    195 		/* if we didn't find any entries, we must be done */
    196 		if (hi == -1)
    197 			break;
    198 
    199 		/* print and move to the next entry */
    200 		kernhist_entry_print(&hists[hi]->e[cur[hi]], pr);
    201 
    202 		cur[hi] = (cur[hi] + 1) % (hists[hi]->n);
    203 		if (cur[hi] == hists[hi]->f)
    204 			cur[hi] = -1;
    205 	}
    206 }
    207 
    208 /*
    209  * call this from ddb.  `bitmask' is from <sys/kernhist.h>.  it
    210  * merges the named histories.
    211  *
    212  * expects the system to be quiesced, no locking
    213  */
    214 void
    215 kernhist_dumpmask(uint32_t bitmask)	/* XXX only support 32 hists */
    216 {
    217 	struct kern_history *hists[MAXHISTS + 1];
    218 	int i = 0;
    219 
    220 #ifdef UVMHIST
    221 	if ((bitmask & KERNHIST_UVMMAPHIST) || bitmask == 0)
    222 		hists[i++] = &maphist;
    223 
    224 	if ((bitmask & KERNHIST_UVMPDHIST) || bitmask == 0)
    225 		hists[i++] = &pdhist;
    226 
    227 	if ((bitmask & KERNHIST_UVMUBCHIST) || bitmask == 0)
    228 		hists[i++] = &ubchist;
    229 
    230 	if ((bitmask & KERNHIST_UVMLOANHIST) || bitmask == 0)
    231 		hists[i++] = &loanhist;
    232 #endif
    233 
    234 #ifdef USB_DEBUG
    235 	if ((bitmask & KERNHIST_USBHIST) || bitmask == 0)
    236 		hists[i++] = &usbhist;
    237 #endif
    238 
    239 #ifdef SYSCALL_DEBUG
    240 	if ((bitmask & KERNHIST_SCDEBUGHIST) || bitmask == 0)
    241 		hists[i++] = &scdebughist;
    242 #endif
    243 
    244 #ifdef BIOHIST
    245 	if ((bitmask & KERNHIST_BIOHIST) || bitmask == 0)
    246 		hists[i++] = &biohist;
    247 #endif
    248 
    249 	hists[i] = NULL;
    250 
    251 	kernhist_dump_histories(hists, 0, printf);
    252 }
    253 
    254 /*
    255  * kernhist_print: ddb hook to print kern history.
    256  */
    257 void
    258 kernhist_print(void *addr, size_t count, const char *modif,
    259     void (*pr)(const char *, ...) __printflike(1,2))
    260 {
    261 	struct kern_history *h;
    262 
    263 	LIST_FOREACH(h, &kern_histories, list) {
    264 		if (h == addr)
    265 			break;
    266 	}
    267 
    268 	if (h == NULL) {
    269 		struct kern_history *hists[MAXHISTS + 1];
    270 		int i = 0;
    271 #ifdef UVMHIST
    272 		hists[i++] = &maphist;
    273 		hists[i++] = &pdhist;
    274 		hists[i++] = &ubchist;
    275 		hists[i++] = &loanhist;
    276 #endif
    277 #ifdef USB_DEBUG
    278 		hists[i++] = &usbhist;
    279 #endif
    280 
    281 #ifdef SYSCALL_DEBUG
    282 		hists[i++] = &scdebughist;
    283 #endif
    284 #ifdef BIOHIST
    285 		hists[i++] = &biohist;
    286 #endif
    287 		hists[i] = NULL;
    288 
    289 		if (*modif == 'i') {
    290 			int lcv;
    291 
    292 			for (lcv = 0; hists[lcv]; lcv++)
    293 				kernhist_info(hists[lcv], pr);
    294 		} else {
    295 			kernhist_dump_histories(hists, count, pr);
    296 		}
    297 	} else {
    298 		if (*modif == 'i')
    299 			kernhist_info(h, pr);
    300 		else
    301 			kernhist_dump(h, count, pr);
    302 	}
    303 }
    304 
    305 #endif
    306 
    307 /*
    308  * sysctl interface
    309  */
    310 
    311 /*
    312  * sysctl_kernhist_new()
    313  *
    314  *	If the specified history (or, if no history is specified, any
    315  *	history) does not already have a sysctl node (under kern.hist)
    316  *	we create a new one and record it's node number.
    317  */
    318 void
    319 sysctl_kernhist_new(struct kern_history *hist)
    320 {
    321 	int error;
    322 	struct kern_history *h;
    323 	const struct sysctlnode *rnode = NULL;
    324 
    325 	membar_consumer();
    326 	if (kernhist_sysctl_ready == 0)
    327 		return;
    328 
    329 	LIST_FOREACH(h, &kern_histories, list) {
    330 		if (hist && h != hist)
    331 			continue;
    332 		if (h->s != 0)
    333 			continue;
    334 		error = sysctl_createv(NULL, 0, NULL, &rnode,
    335 			    CTLFLAG_PERMANENT,
    336 			    CTLTYPE_STRUCT, h->name,
    337 			    SYSCTL_DESCR("history data"),
    338 			    sysctl_kernhist_helper, 0, NULL, 0,
    339 			    CTL_KERN, sysctl_hist_node, CTL_CREATE, CTL_EOL);
    340 		if (error == 0)
    341 			h->s = rnode->sysctl_num;
    342 		if (hist == h)
    343 			break;
    344 	}
    345 }
    346 
    347 /*
    348  * sysctl_kerhnist_init()
    349  *
    350  *	Create the 2nd level "hw.hist" sysctl node
    351  */
    352 void
    353 sysctl_kernhist_init(void)
    354 {
    355 	const struct sysctlnode *rnode = NULL;
    356 
    357 	sysctl_createv(NULL, 0, NULL, &rnode,
    358 			CTLFLAG_PERMANENT,
    359 			CTLTYPE_NODE, "hist",
    360 			SYSCTL_DESCR("kernel history tables"),
    361 			sysctl_kernhist_helper, 0, NULL, 0,
    362 			CTL_KERN, CTL_CREATE, CTL_EOL);
    363 	sysctl_hist_node = rnode->sysctl_num;
    364 
    365 	kernhist_sysctl_ready = 1;
    366 	membar_producer();
    367 
    368 	sysctl_kernhist_new(NULL);
    369 }
    370 
    371 /*
    372  * find_string()
    373  *
    374  *	Search the address-to-offset translation table for matching an
    375  *	address and len, and return the index of the entry we found.  If
    376  *	not found, returns index 0 which points to the "?" entry.  (We
    377  *	start matching at index 1, ignoring any matches of the "?" entry
    378  *	itself.)
    379  */
    380 static int
    381 find_string(struct addr_xlt table[], size_t *count, const char *string,
    382 	    size_t len)
    383 {
    384 	int i;
    385 
    386 	for (i = 1; i < *count; i++)
    387 		if (string == table[i].addr && len == table[i].len)
    388 			return i;
    389 
    390 	return 0;
    391 }
    392 
    393 /*
    394  * add_string()
    395  *
    396  *	If the string and len are unique, add a new address-to-offset
    397  *	entry in the translation table and set the offset of the next
    398  *	entry.
    399  */
    400 static void
    401 add_string(struct addr_xlt table[], size_t *count, const char *string,
    402 	   size_t len)
    403 {
    404 
    405 	if (find_string(table, count, string, len) == 0) {
    406 		table[*count].addr = string;
    407 		table[*count].len = len;
    408 		table[*count + 1].offset = table[*count].offset + len + 1;
    409 		(*count)++;
    410 	}
    411 }
    412 
    413 /*
    414  * sysctl_kernhist_helper
    415  *
    416  *	This helper routine is called for all accesses to the kern.hist
    417  *	hierarchy.
    418  */
    419 static int
    420 sysctl_kernhist_helper(SYSCTLFN_ARGS)
    421 {
    422 	struct kern_history *h;
    423 	struct kern_history_ent *in_evt;
    424 	struct sysctl_history_event *out_evt;
    425 	struct sysctl_history *buf;
    426 	struct addr_xlt *xlate_t, *xlt;
    427 	size_t bufsize, xlate_s;
    428 	size_t xlate_c;
    429 	const char *strp __diagused;
    430 	char *next;
    431 	int i, j;
    432 	int error;
    433 
    434 	if (namelen == 1 && name[0] == CTL_QUERY)
    435 		return sysctl_query(SYSCTLFN_CALL(rnode));
    436 
    437 	/*
    438 	 * Disallow userland updates, verify that we arrived at a
    439 	 * valid history rnode
    440 	 */
    441 	if (newp)
    442 		return SET_ERROR(EPERM);
    443 	if (namelen != 1 || name[0] != CTL_EOL)
    444 		return SET_ERROR(EINVAL);
    445 
    446 	/* Find the correct kernhist for this sysctl node */
    447 	LIST_FOREACH(h, &kern_histories, list) {
    448 		if (h->s == rnode->sysctl_num)
    449 			break;
    450 	}
    451 	if (h == NULL)
    452 		return SET_ERROR(ENOENT);
    453 
    454 	/*
    455 	 * Worst case is two string pointers per history entry, plus
    456 	 * two for the history name and "?" string; allocate an extra
    457 	 * entry since we pre-set the "next" entry's offset member.
    458 	 */
    459 	xlate_s = sizeof(struct addr_xlt) * h->n * 2 + 3;
    460 	xlate_t = kmem_alloc(xlate_s, KM_SLEEP);
    461 	xlate_c = 0;
    462 
    463 	/* offset 0 reserved for NULL pointer, ie unused history entry */
    464 	xlate_t[0].offset = 1;
    465 
    466 	/*
    467 	 * If the history gets updated and an unexpected string is
    468 	 * found later, we'll point it here.  Otherwise, we'd have to
    469 	 * repeat this process iteratively, and it could take multiple
    470 	 * iterations before terminating.
    471 	 */
    472 	add_string(xlate_t, &xlate_c, "?", 0);
    473 
    474 	/* Copy the history name itself to the export structure */
    475 	add_string(xlate_t, &xlate_c, h->name, h->namelen);
    476 
    477 	/*
    478 	 * Loop through all used history entries to find the unique
    479 	 * fn and fmt strings
    480 	 */
    481 	for (i = 0, in_evt = h->e; i < h->n; i++, in_evt++) {
    482 		if (in_evt->fn == NULL)
    483 			continue;
    484 		add_string(xlate_t, &xlate_c, in_evt->fn, in_evt->fnlen);
    485 		add_string(xlate_t, &xlate_c, in_evt->fmt, in_evt->fmtlen);
    486 	}
    487 
    488 	/* Total buffer size includes header, events, and string table */
    489 	bufsize = sizeof(struct sysctl_history) +
    490 	    h->n * sizeof(struct sysctl_history_event) +
    491 	    xlate_t[xlate_c].offset;
    492 	buf = kmem_alloc(bufsize, KM_SLEEP);
    493 
    494 	/*
    495 	 * Copy history header info to the export structure
    496 	 */
    497 	j = find_string(xlate_t, &xlate_c, h->name, h->namelen);
    498 	buf->sh_nameoffset = xlate_t[j].offset;
    499 	buf->sh_numentries = h->n;
    500 	buf->sh_nextfree = h->f;
    501 
    502 	/*
    503 	 * Loop through the history events again, copying the data to
    504 	 * the export structure
    505 	 */
    506 	for (i = 0, in_evt = h->e, out_evt = buf->sh_events; i < h->n;
    507 	    i++, in_evt++, out_evt++) {
    508 		if (in_evt->fn == NULL) {	/* skip unused entries */
    509 			out_evt->she_funcoffset = 0;
    510 			out_evt->she_fmtoffset = 0;
    511 			continue;
    512 		}
    513 		out_evt->she_bintime = in_evt->bt;
    514 		out_evt->she_callnumber = in_evt->call;
    515 		out_evt->she_cpunum = in_evt->cpunum;
    516 		out_evt->she_values[0] = in_evt->v[0];
    517 		out_evt->she_values[1] = in_evt->v[1];
    518 		out_evt->she_values[2] = in_evt->v[2];
    519 		out_evt->she_values[3] = in_evt->v[3];
    520 		j = find_string(xlate_t, &xlate_c, in_evt->fn, in_evt->fnlen);
    521 		out_evt->she_funcoffset = xlate_t[j].offset;
    522 		j = find_string(xlate_t, &xlate_c, in_evt->fmt, in_evt->fmtlen);
    523 		out_evt->she_fmtoffset = xlate_t[j].offset;
    524 	}
    525 
    526 	/*
    527 	 * Finally, fill the text string area with all the unique
    528 	 * strings we found earlier.
    529 	 *
    530 	 * Skip the initial byte, since we use an offset of 0 to mean
    531 	 * a NULL pointer (which means an unused history event).
    532 	 */
    533 	strp = next = (char *)(&buf->sh_events[h->n]);
    534 	*next++ = '\0';
    535 
    536 	/*
    537 	 * Then copy each string into the export structure, making
    538 	 * sure to terminate each string with a '\0' character
    539 	 */
    540 	for (i = 0, xlt = xlate_t; i < xlate_c; i++, xlt++) {
    541 		KASSERTMSG((next - strp) == xlt->offset,
    542 		    "entry %d at wrong offset %"PRIu32, i, xlt->offset);
    543 		memcpy(next, xlt->addr, xlt->len);
    544 		next += xlt->len;
    545 		*next++ = '\0';
    546 	}
    547 
    548 	/* Copy data to userland */
    549 	error = copyout(buf, oldp, uimin(bufsize, *oldlenp));
    550 
    551 	/* If copyout was successful but only partial, report ENOMEM */
    552 	if (error == 0 && *oldlenp < bufsize)
    553 		error = SET_ERROR(ENOMEM);
    554 
    555 	*oldlenp = bufsize;	/* inform userland of space requirements */
    556 
    557 	/* Free up the stuff we allocated */
    558 	kmem_free(buf, bufsize);
    559 	kmem_free(xlate_t, xlate_s);
    560 
    561 	return error;
    562 }
    563