Home | History | Annotate | Line # | Download | only in kern
      1 /*	$NetBSD: kern_ksyms.c,v 1.111 2026/01/04 01:34:57 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software developed for The NetBSD Foundation
      8  * by Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Copyright (c) 2001, 2003 Anders Magnusson (ragge (at) ludd.luth.se).
     34  * All rights reserved.
     35  *
     36  * Redistribution and use in source and binary forms, with or without
     37  * modification, are permitted provided that the following conditions
     38  * are met:
     39  * 1. Redistributions of source code must retain the above copyright
     40  *    notice, this list of conditions and the following disclaimer.
     41  * 2. Redistributions in binary form must reproduce the above copyright
     42  *    notice, this list of conditions and the following disclaimer in the
     43  *    documentation and/or other materials provided with the distribution.
     44  * 3. The name of the author may not be used to endorse or promote products
     45  *    derived from this software without specific prior written permission
     46  *
     47  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     48  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     49  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     50  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     51  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     52  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     53  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     54  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     55  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     56  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     57  */
     58 
     59 /*
     60  * Code to deal with in-kernel symbol table management + /dev/ksyms.
     61  *
     62  * For each loaded module the symbol table info is kept track of by a
     63  * struct, placed in a circular list. The first entry is the kernel
     64  * symbol table.
     65  */
     66 
     67 /*
     68  * TODO:
     69  *
     70  *	Add support for mmap, poll.
     71  *	Constify tables.
     72  *	Constify db_symtab and move it to .rodata.
     73  */
     74 
     75 #define _KSYMS_PRIVATE
     76 
     77 #include <sys/cdefs.h>
     78 __KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c,v 1.111 2026/01/04 01:34:57 riastradh Exp $");
     79 
     80 #if defined(_KERNEL) && defined(_KERNEL_OPT)
     81 #include "opt_copy_symtab.h"
     82 #include "opt_ddb.h"
     83 #include "opt_dtrace.h"
     84 #endif
     85 
     86 #include <sys/param.h>
     87 #include <sys/types.h>
     88 
     89 #include <sys/atomic.h>
     90 #include <sys/conf.h>
     91 #include <sys/exec.h>
     92 #include <sys/file.h>
     93 #include <sys/filedesc.h>
     94 #include <sys/intr.h>
     95 #include <sys/kauth.h>
     96 #include <sys/kernel.h>
     97 #include <sys/kmem.h>
     98 #include <sys/ksyms.h>
     99 #include <sys/proc.h>
    100 #include <sys/pserialize.h>
    101 #include <sys/queue.h>
    102 #include <sys/sdt.h>
    103 #include <sys/stat.h>
    104 #include <sys/systm.h>
    105 
    106 #include <uvm/uvm_extern.h>
    107 
    108 #ifdef DDB
    109 #include <ddb/db_output.h>
    110 #endif
    111 
    112 #include "ksyms.h"
    113 #if NKSYMS > 0
    114 #include "ioconf.h"
    115 #endif
    116 
    117 struct ksyms_snapshot {
    118 	uint64_t		ks_refcnt;
    119 	uint64_t		ks_gen;
    120 	struct uvm_object	*ks_uobj;
    121 	size_t			ks_size;
    122 	dev_t			ks_dev;
    123 	int			ks_maxlen;
    124 };
    125 
    126 #define KSYMS_MAX_ID	98304
    127 #ifdef KDTRACE_HOOKS
    128 static uint32_t ksyms_nmap[KSYMS_MAX_ID];	/* sorted symbol table map */
    129 #else
    130 static uint32_t *ksyms_nmap = NULL;
    131 #endif
    132 
    133 static int ksyms_maxlen;
    134 static bool ksyms_initted;
    135 static bool ksyms_loaded;
    136 static kmutex_t ksyms_lock __cacheline_aligned;
    137 static struct ksyms_symtab kernel_symtab;
    138 static kcondvar_t ksyms_cv;
    139 static struct lwp *ksyms_snapshotting;
    140 static struct ksyms_snapshot *ksyms_snapshot;
    141 static uint64_t ksyms_snapshot_gen;
    142 static pserialize_t ksyms_psz __read_mostly;
    143 
    144 static void ksyms_hdr_init(const void *);
    145 static void ksyms_sizes_calc(void);
    146 static struct ksyms_snapshot *ksyms_snapshot_alloc(int, size_t, dev_t,
    147     uint64_t);
    148 static void ksyms_snapshot_release(struct ksyms_snapshot *);
    149 
    150 #ifdef KSYMS_DEBUG
    151 #define	FOLLOW_CALLS		1
    152 #define	FOLLOW_MORE_CALLS	2
    153 #define	FOLLOW_DEVKSYMS		4
    154 static int ksyms_debug;
    155 #endif
    156 
    157 #define		SYMTAB_FILLER	"|This is the symbol table!"
    158 
    159 #ifdef makeoptions_COPY_SYMTAB
    160 extern char db_symtab[];
    161 extern int db_symtabsize;
    162 #endif
    163 
    164 /*
    165  * used by savecore(8) so non-static
    166  */
    167 struct ksyms_hdr ksyms_hdr;
    168 int ksyms_symsz;
    169 int ksyms_strsz;
    170 int ksyms_ctfsz;	/* this is not currently used by savecore(8) */
    171 TAILQ_HEAD(ksyms_symtab_queue, ksyms_symtab) ksyms_symtabs =
    172     TAILQ_HEAD_INITIALIZER(ksyms_symtabs);
    173 static struct pslist_head ksyms_symtabs_psz = PSLIST_INITIALIZER;
    174 
    175 static int
    176 ksyms_verify(const void *symstart, const void *strstart)
    177 {
    178 #if defined(DIAGNOSTIC) || defined(DEBUG)
    179 	if (symstart == NULL)
    180 		printf("ksyms: Symbol table not found\n");
    181 	if (strstart == NULL)
    182 		printf("ksyms: String table not found\n");
    183 	if (symstart == NULL || strstart == NULL)
    184 		printf("ksyms: Perhaps the kernel is stripped?\n");
    185 #endif
    186 	if (symstart == NULL || strstart == NULL)
    187 		return 0;
    188 	return 1;
    189 }
    190 
    191 /*
    192  * Finds a certain symbol name in a certain symbol table.
    193  */
    194 static Elf_Sym *
    195 findsym(const char *name, struct ksyms_symtab *table, int type)
    196 {
    197 	Elf_Sym *sym, *maxsym;
    198 	int low, mid, high, nglob;
    199 	char *str, *cmp;
    200 
    201 	sym = table->sd_symstart;
    202 	str = table->sd_strstart - table->sd_usroffset;
    203 	nglob = table->sd_nglob;
    204 	low = 0;
    205 	high = nglob;
    206 
    207 	/*
    208 	 * Start with a binary search of all global symbols in this table.
    209 	 * Global symbols must have unique names.
    210 	 */
    211 	while (low < high) {
    212 		mid = (low + high) >> 1;
    213 		cmp = sym[mid].st_name + str;
    214 		if (cmp[0] < name[0] || strcmp(cmp, name) < 0) {
    215 			low = mid + 1;
    216 		} else {
    217 			high = mid;
    218 		}
    219 	}
    220 	KASSERT(low == high);
    221 	if (__predict_true(low < nglob &&
    222 	    strcmp(sym[low].st_name + str, name) == 0)) {
    223 		KASSERT(ELF_ST_BIND(sym[low].st_info) == STB_GLOBAL);
    224 		return &sym[low];
    225 	}
    226 
    227 	/*
    228 	 * Perform a linear search of local symbols (rare).  Many local
    229 	 * symbols with the same name can exist so are not included in
    230 	 * the binary search.
    231 	 */
    232 	if (type != KSYMS_EXTERN) {
    233 		maxsym = sym + table->sd_symsize / sizeof(Elf_Sym);
    234 		for (sym += nglob; sym < maxsym; sym++) {
    235 			if (strcmp(name, sym->st_name + str) == 0) {
    236 				return sym;
    237 			}
    238 		}
    239 	}
    240 	return NULL;
    241 }
    242 
    243 /*
    244  * The "attach" is in reality done in ksyms_init().
    245  */
    246 #if NKSYMS > 0
    247 /*
    248  * ksyms can be loaded even if the kernel has a missing "pseudo-device ksyms"
    249  * statement because ddb and modules require it. Fixing it properly requires
    250  * fixing config to warn about required, but missing pseudo-devices. For now,
    251  * if we don't have the pseudo-device we don't need the attach function; this
    252  * is fine, as it does nothing.
    253  */
    254 void
    255 ksymsattach(int arg)
    256 {
    257 }
    258 #endif
    259 
    260 void
    261 ksyms_init(void)
    262 {
    263 
    264 #ifdef makeoptions_COPY_SYMTAB
    265 	if (!ksyms_loaded &&
    266 	    strncmp(db_symtab, SYMTAB_FILLER, sizeof(SYMTAB_FILLER))) {
    267 		ksyms_addsyms_elf(db_symtabsize, db_symtab,
    268 		    db_symtab + db_symtabsize);
    269 	}
    270 #endif
    271 
    272 	if (!ksyms_initted) {
    273 		mutex_init(&ksyms_lock, MUTEX_DEFAULT, IPL_NONE);
    274 		cv_init(&ksyms_cv, "ksyms");
    275 		ksyms_psz = pserialize_create();
    276 		ksyms_initted = true;
    277 	}
    278 }
    279 
    280 /*
    281  * Are any symbols available?
    282  */
    283 bool
    284 ksyms_available(void)
    285 {
    286 
    287 	return ksyms_loaded;
    288 }
    289 
    290 /*
    291  * Add a symbol table.
    292  * This is intended for use when the symbol table and its corresponding
    293  * string table are easily available.  If they are embedded in an ELF
    294  * image, use addsymtab_elf() instead.
    295  *
    296  * name - Symbol's table name.
    297  * symstart, symsize - Address and size of the symbol table.
    298  * strstart, strsize - Address and size of the string table.
    299  * tab - Symbol table to be updated with this information.
    300  * newstart - Address to which the symbol table has to be copied during
    301  *            shrinking.  If NULL, it is not moved.
    302  */
    303 static const char *addsymtab_strstart;
    304 
    305 static int
    306 addsymtab_compar(const void *a, const void *b)
    307 {
    308 	const Elf_Sym *sa, *sb;
    309 
    310 	sa = a;
    311 	sb = b;
    312 
    313 	/*
    314 	 * Split the symbol table into two, with globals at the start
    315 	 * and locals at the end.
    316 	 */
    317 	if (ELF_ST_BIND(sa->st_info) != ELF_ST_BIND(sb->st_info)) {
    318 		if (ELF_ST_BIND(sa->st_info) == STB_GLOBAL) {
    319 			return -1;
    320 		}
    321 		if (ELF_ST_BIND(sb->st_info) == STB_GLOBAL) {
    322 			return 1;
    323 		}
    324 	}
    325 
    326 	/* Within each band, sort by name. */
    327 	return strcmp(sa->st_name + addsymtab_strstart,
    328 	    sb->st_name + addsymtab_strstart);
    329 }
    330 
    331 static void
    332 addsymtab(const char *name, void *symstart, size_t symsize,
    333 	  void *strstart, size_t strsize, struct ksyms_symtab *tab,
    334 	  void *newstart, void *ctfstart, size_t ctfsize, uint32_t *nmap)
    335 {
    336 	Elf_Sym *sym, *nsym, ts;
    337 	int i, j, n, nglob;
    338 	char *str;
    339 	int nsyms = symsize / sizeof(Elf_Sym);
    340 	int s;
    341 
    342 	/* Sanity check for pre-allocated map table used during startup. */
    343 	if ((nmap == ksyms_nmap) && (nsyms >= KSYMS_MAX_ID)) {
    344 		printf("kern_ksyms: ERROR %d > %d, increase KSYMS_MAX_ID\n",
    345 		    nsyms, KSYMS_MAX_ID);
    346 
    347 		/* truncate for now */
    348 		nsyms = KSYMS_MAX_ID - 1;
    349 	}
    350 
    351 	tab->sd_symstart = symstart;
    352 	tab->sd_symsize = symsize;
    353 	tab->sd_strstart = strstart;
    354 	tab->sd_strsize = strsize;
    355 	tab->sd_name = name;
    356 	tab->sd_minsym = UINTPTR_MAX;
    357 	tab->sd_maxsym = 0;
    358 	tab->sd_usroffset = 0;
    359 	tab->sd_ctfstart = ctfstart;
    360 	tab->sd_ctfsize = ctfsize;
    361 	tab->sd_nmap = nmap;
    362 	tab->sd_nmapsize = nsyms;
    363 #ifdef KSYMS_DEBUG
    364 	printf("newstart %p sym %p ksyms_symsz %zu str %p strsz %zu send %p\n",
    365 	    newstart, symstart, symsize, strstart, strsize,
    366 	    tab->sd_strstart + tab->sd_strsize);
    367 #endif
    368 
    369 	if (nmap) {
    370 		memset(nmap, 0, nsyms * sizeof(uint32_t));
    371 	}
    372 
    373 	/* Pack symbol table by removing all file name references. */
    374 	sym = tab->sd_symstart;
    375 	nsym = (Elf_Sym *)newstart;
    376 	str = tab->sd_strstart;
    377 	nglob = 0;
    378 	for (i = n = 0; i < nsyms; i++) {
    379 
    380 		/*
    381 		 * This breaks CTF mapping, so don't do it when
    382 		 * DTrace is enabled.
    383 		 */
    384 #ifndef KDTRACE_HOOKS
    385 		/*
    386 		 * Remove useless symbols.
    387 		 * Should actually remove all typeless symbols.
    388 		 */
    389 		if (sym[i].st_name == 0)
    390 			continue; /* Skip nameless entries */
    391 		if (sym[i].st_shndx == SHN_UNDEF)
    392 			continue; /* Skip external references */
    393 		if (ELF_ST_TYPE(sym[i].st_info) == STT_FILE)
    394 			continue; /* Skip filenames */
    395 		if (ELF_ST_TYPE(sym[i].st_info) == STT_NOTYPE &&
    396 		    sym[i].st_value == 0 &&
    397 		    strcmp(str + sym[i].st_name, "*ABS*") == 0)
    398 			continue; /* XXX */
    399 		if (ELF_ST_TYPE(sym[i].st_info) == STT_NOTYPE &&
    400 		    strcmp(str + sym[i].st_name, "gcc2_compiled.") == 0)
    401 			continue; /* XXX */
    402 #endif
    403 
    404 		/* Save symbol. Set it as an absolute offset */
    405 		nsym[n] = sym[i];
    406 
    407 #ifdef KDTRACE_HOOKS
    408 		if (nmap != NULL) {
    409 			/*
    410 			 * Save the size, replace it with the symbol id so
    411 			 * the mapping can be done after the cleanup and sort.
    412 			 */
    413 			nmap[i] = nsym[n].st_size;
    414 			nsym[n].st_size = i + 1;	/* zero is reserved */
    415 		}
    416 #endif
    417 
    418 		if (sym[i].st_shndx != SHN_ABS) {
    419 			nsym[n].st_shndx = SHBSS;
    420 		} else {
    421 			/* SHN_ABS is a magic value, don't overwrite it */
    422 		}
    423 
    424 		j = strlen(nsym[n].st_name + str) + 1;
    425 		if (j > ksyms_maxlen)
    426 			ksyms_maxlen = j;
    427 		nglob += (ELF_ST_BIND(nsym[n].st_info) == STB_GLOBAL);
    428 
    429 		/* Compute min and max symbols. */
    430 		if (strcmp(str + sym[i].st_name, "*ABS*") != 0
    431 		    && ELF_ST_TYPE(nsym[n].st_info) != STT_NOTYPE) {
    432 			if (nsym[n].st_value < tab->sd_minsym) {
    433 				tab->sd_minsym = nsym[n].st_value;
    434 			}
    435 			if (nsym[n].st_value > tab->sd_maxsym) {
    436 				tab->sd_maxsym = nsym[n].st_value;
    437 			}
    438 		}
    439 		n++;
    440 	}
    441 
    442 	/* Fill the rest of the record, and sort the symbols. */
    443 	tab->sd_symstart = nsym;
    444 	tab->sd_symsize = n * sizeof(Elf_Sym);
    445 	tab->sd_nglob = nglob;
    446 
    447 	addsymtab_strstart = str;
    448 	if (kheapsort(nsym, n, sizeof(Elf_Sym), addsymtab_compar, &ts) != 0)
    449 		panic("addsymtab");
    450 
    451 #ifdef KDTRACE_HOOKS
    452 	/*
    453 	 * Build the mapping from original symbol id to new symbol table.
    454 	 * Deleted symbols will have a zero map, indices will be one based
    455 	 * instead of zero based.
    456 	 * Resulting map is sd_nmap[original_index] = new_index + 1
    457 	 */
    458 	if (nmap != NULL) {
    459 		int new;
    460 		for (new = 0; new < n; new++) {
    461 			uint32_t orig = nsym[new].st_size - 1;
    462 			uint32_t size = nmap[orig];
    463 
    464 			nmap[orig] = new + 1;
    465 
    466 			/* restore the size */
    467 			nsym[new].st_size = size;
    468 		}
    469 	}
    470 #endif
    471 
    472 	KASSERT(strcmp(name, "netbsd") == 0 || mutex_owned(&ksyms_lock));
    473 	KASSERT(cold || mutex_owned(&ksyms_lock));
    474 
    475 	/*
    476 	 * Publish the symtab.  Do this at splhigh to ensure ddb never
    477 	 * witnesses an inconsistent state of the queue, unless memory
    478 	 * is so corrupt that we crash in PSLIST_WRITER_INSERT_AFTER or
    479 	 * TAILQ_INSERT_TAIL.
    480 	 */
    481 	PSLIST_ENTRY_INIT(tab, sd_pslist);
    482 	s = splhigh();
    483 	if (TAILQ_EMPTY(&ksyms_symtabs)) {
    484 		PSLIST_WRITER_INSERT_HEAD(&ksyms_symtabs_psz, tab, sd_pslist);
    485 	} else {
    486 		struct ksyms_symtab *last;
    487 
    488 		last = TAILQ_LAST(&ksyms_symtabs, ksyms_symtab_queue);
    489 		PSLIST_WRITER_INSERT_AFTER(last, tab, sd_pslist);
    490 	}
    491 	TAILQ_INSERT_TAIL(&ksyms_symtabs, tab, sd_queue);
    492 	splx(s);
    493 
    494 	ksyms_sizes_calc();
    495 	ksyms_loaded = true;
    496 }
    497 
    498 /*
    499  * Setup the kernel symbol table stuff.
    500  */
    501 void
    502 ksyms_addsyms_elf(int symsize, void *start, void *end)
    503 {
    504 	int i, j;
    505 	Elf_Shdr *shdr;
    506 	char *symstart = NULL, *strstart = NULL;
    507 	size_t strsize = 0;
    508 	Elf_Ehdr *ehdr;
    509 	char *ctfstart = NULL;
    510 	size_t ctfsize = 0;
    511 
    512 	if (symsize <= 0) {
    513 		printf("[ Kernel symbol table missing! ]\n");
    514 		return;
    515 	}
    516 
    517 	/* Sanity check */
    518 	if (ALIGNED_POINTER(start, long) == 0) {
    519 		printf("[ Kernel symbol table has bad start address %p ]\n",
    520 		    start);
    521 		return;
    522 	}
    523 
    524 	ehdr = (Elf_Ehdr *)start;
    525 
    526 	/* check if this is a valid ELF header */
    527 	/* No reason to verify arch type, the kernel is actually running! */
    528 	if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) ||
    529 	    ehdr->e_ident[EI_CLASS] != ELFCLASS ||
    530 	    ehdr->e_version > 1) {
    531 		printf("[ Kernel symbol table invalid! ]\n");
    532 		return; /* nothing to do */
    533 	}
    534 
    535 	/* Loaded header will be scratched in addsymtab */
    536 	ksyms_hdr_init(start);
    537 
    538 	/* Find the symbol table and the corresponding string table. */
    539 	shdr = (Elf_Shdr *)((uint8_t *)start + ehdr->e_shoff);
    540 	for (i = 1; i < ehdr->e_shnum; i++) {
    541 		if (shdr[i].sh_type != SHT_SYMTAB)
    542 			continue;
    543 		if (shdr[i].sh_offset == 0)
    544 			continue;
    545 		symstart = (uint8_t *)start + shdr[i].sh_offset;
    546 		symsize = shdr[i].sh_size;
    547 		j = shdr[i].sh_link;
    548 		if (shdr[j].sh_offset == 0)
    549 			continue; /* Can this happen? */
    550 		strstart = (uint8_t *)start + shdr[j].sh_offset;
    551 		strsize = shdr[j].sh_size;
    552 		break;
    553 	}
    554 
    555 #ifdef KDTRACE_HOOKS
    556 	/* Find the CTF section */
    557 	shdr = (Elf_Shdr *)((uint8_t *)start + ehdr->e_shoff);
    558 	if (ehdr->e_shstrndx != 0) {
    559 		char *shstr = (uint8_t *)start +
    560 		    shdr[ehdr->e_shstrndx].sh_offset;
    561 		for (i = 1; i < ehdr->e_shnum; i++) {
    562 #ifdef KSYMS_DEBUG
    563 			printf("ksyms: checking %s\n", &shstr[shdr[i].sh_name]);
    564 #endif
    565 			if (shdr[i].sh_type != SHT_PROGBITS)
    566 				continue;
    567 			if (strncmp(".SUNW_ctf", &shstr[shdr[i].sh_name], 10)
    568 			    != 0)
    569 				continue;
    570 			ctfstart = (uint8_t *)start + shdr[i].sh_offset;
    571 			ctfsize = shdr[i].sh_size;
    572 			ksyms_ctfsz = ctfsize;
    573 #ifdef DEBUG
    574 			aprint_normal("Found CTF at %p, size 0x%zx\n",
    575 			    ctfstart, ctfsize);
    576 #endif
    577 			break;
    578 		}
    579 #ifdef DEBUG
    580 	} else {
    581 		printf("ksyms: e_shstrndx == 0\n");
    582 #endif
    583 	}
    584 #endif
    585 
    586 	if (!ksyms_verify(symstart, strstart))
    587 		return;
    588 
    589 	addsymtab("netbsd", symstart, symsize, strstart, strsize,
    590 	    &kernel_symtab, symstart, ctfstart, ctfsize, ksyms_nmap);
    591 
    592 #ifdef DEBUG
    593 	aprint_normal("Loaded initial symtab at %p, strtab at %p, # entries %ld\n",
    594 	    kernel_symtab.sd_symstart, kernel_symtab.sd_strstart,
    595 	    (long)kernel_symtab.sd_symsize/sizeof(Elf_Sym));
    596 #endif
    597 
    598 	/* Should be no snapshot to invalidate yet.  */
    599 	KASSERT(ksyms_snapshot == NULL);
    600 }
    601 
    602 /*
    603  * Setup the kernel symbol table stuff.
    604  * Use this when the address of the symbol and string tables are known;
    605  * otherwise use ksyms_init with an ELF image.
    606  * We need to pass a minimal ELF header which will later be completed by
    607  * ksyms_hdr_init and handed off to userland through /dev/ksyms.  We use
    608  * a void *rather than a pointer to avoid exposing the Elf_Ehdr type.
    609  */
    610 void
    611 ksyms_addsyms_explicit(void *ehdr, void *symstart, size_t symsize,
    612     void *strstart, size_t strsize)
    613 {
    614 	if (!ksyms_verify(symstart, strstart))
    615 		return;
    616 
    617 	ksyms_hdr_init(ehdr);
    618 	addsymtab("netbsd", symstart, symsize, strstart, strsize,
    619 	    &kernel_symtab, symstart, NULL, 0, ksyms_nmap);
    620 
    621 	/* Should be no snapshot to invalidate yet.  */
    622 	KASSERT(ksyms_snapshot == NULL);
    623 }
    624 
    625 /*
    626  * Get the value associated with a symbol.
    627  * "mod" is the module name, or null if any module.
    628  * "sym" is the symbol name.
    629  * "val" is a pointer to the corresponding value, if call succeeded.
    630  * Returns 0 if success or ENOENT if no such entry.
    631  *
    632  * If symp is nonnull, caller must hold ksyms_lock or module_lock, have
    633  * ksyms_opencnt nonzero, be in a pserialize read section, be in ddb
    634  * with all other CPUs quiescent.
    635  */
    636 int
    637 ksyms_getval_unlocked(const char *mod, const char *sym, Elf_Sym **symp,
    638     unsigned long *val, int type)
    639 {
    640 	struct ksyms_symtab *st;
    641 	Elf_Sym *es;
    642 	int s, error = ENOENT;
    643 
    644 #ifdef KSYMS_DEBUG
    645 	if (ksyms_debug & FOLLOW_CALLS)
    646 		printf("%s: mod %s sym %s valp %p\n", __func__, mod, sym, val);
    647 #endif
    648 
    649 	s = pserialize_read_enter();
    650 	PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab,
    651 	    sd_pslist) {
    652 		if (mod != NULL && strcmp(st->sd_name, mod))
    653 			continue;
    654 		if ((es = findsym(sym, st, type)) != NULL) {
    655 			*val = es->st_value;
    656 			if (symp)
    657 				*symp = es;
    658 			error = 0;
    659 			break;
    660 		}
    661 	}
    662 	pserialize_read_exit(s);
    663 	return error ? SET_ERROR(error) : 0;
    664 }
    665 
    666 int
    667 ksyms_getval(const char *mod, const char *sym, unsigned long *val, int type)
    668 {
    669 
    670 	if (!ksyms_loaded)
    671 		return SET_ERROR(ENOENT);
    672 
    673 	/* No locking needed -- we read the table pserialized.  */
    674 	return ksyms_getval_unlocked(mod, sym, NULL, val, type);
    675 }
    676 
    677 /*
    678  * ksyms_get_mod(mod)
    679  *
    680  * Return the symtab for the given module name.  Caller must ensure
    681  * that the module cannot be unloaded until after this returns.
    682  */
    683 struct ksyms_symtab *
    684 ksyms_get_mod(const char *mod)
    685 {
    686 	struct ksyms_symtab *st;
    687 	int s;
    688 
    689 	s = pserialize_read_enter();
    690 	PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab,
    691 	    sd_pslist) {
    692 		if (mod != NULL && strcmp(st->sd_name, mod))
    693 			continue;
    694 		break;
    695 	}
    696 	pserialize_read_exit(s);
    697 
    698 	return st;
    699 }
    700 
    701 
    702 /*
    703  * ksyms_mod_foreach()
    704  *
    705  * Iterate over the symbol table of the specified module, calling the callback
    706  * handler for each symbol. Stop iterating if the handler return is non-zero.
    707  *
    708  */
    709 
    710 int
    711 ksyms_mod_foreach(const char *mod, ksyms_callback_t callback, void *opaque)
    712 {
    713 	struct ksyms_symtab *st;
    714 	Elf_Sym *sym, *maxsym;
    715 	char *str;
    716 	int symindx;
    717 
    718 	if (!ksyms_loaded)
    719 		return SET_ERROR(ENOENT);
    720 
    721 	mutex_enter(&ksyms_lock);
    722 
    723 	/* find the module */
    724 	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
    725 		if (mod != NULL && strcmp(st->sd_name, mod))
    726 			continue;
    727 
    728 		sym = st->sd_symstart;
    729 		str = st->sd_strstart - st->sd_usroffset;
    730 
    731 		/* now iterate through the symbols */
    732 		maxsym = sym + st->sd_symsize / sizeof(Elf_Sym);
    733 		for (symindx = 0; sym < maxsym; sym++, symindx++) {
    734 			if (callback(str + sym->st_name, symindx,
    735 			    (void *)sym->st_value,
    736 			    sym->st_size,
    737 			    sym->st_info,
    738 			    opaque) != 0) {
    739 				break;
    740 			}
    741 		}
    742 	}
    743 	mutex_exit(&ksyms_lock);
    744 
    745 	return 0;
    746 }
    747 
    748 /*
    749  * Get "mod" and "symbol" associated with an address.
    750  * Returns 0 if success or ENOENT if no such entry.
    751  *
    752  * Caller must hold ksyms_lock or module_lock, have ksyms_opencnt
    753  * nonzero, be in a pserialize read section, or be in ddb with all
    754  * other CPUs quiescent.
    755  */
    756 int
    757 ksyms_getname(const char **mod, const char **sym, vaddr_t v, int f)
    758 {
    759 	struct ksyms_symtab *st;
    760 	Elf_Sym *les, *es = NULL;
    761 	vaddr_t laddr = 0;
    762 	const char *lmod = NULL;
    763 	char *stable = NULL;
    764 	int type, i, sz;
    765 
    766 	if (!ksyms_loaded)
    767 		return SET_ERROR(ENOENT);
    768 
    769 	PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab,
    770 	    sd_pslist) {
    771 		if (v < st->sd_minsym || v > st->sd_maxsym)
    772 			continue;
    773 		sz = st->sd_symsize/sizeof(Elf_Sym);
    774 		for (i = 0; i < sz; i++) {
    775 			les = st->sd_symstart + i;
    776 			type = ELF_ST_TYPE(les->st_info);
    777 
    778 			if ((f & KSYMS_PROC) && (type != STT_FUNC))
    779 				continue;
    780 
    781 			if (type == STT_NOTYPE)
    782 				continue;
    783 
    784 			if (((f & KSYMS_ANY) == 0) &&
    785 			    (type != STT_FUNC) && (type != STT_OBJECT))
    786 				continue;
    787 
    788 			if ((les->st_value <= v) && (les->st_value > laddr)) {
    789 				laddr = les->st_value;
    790 				es = les;
    791 				lmod = st->sd_name;
    792 				stable = st->sd_strstart - st->sd_usroffset;
    793 			}
    794 		}
    795 	}
    796 	if (es == NULL)
    797 		return SET_ERROR(ENOENT);
    798 	if ((f & KSYMS_EXACT) && (v != es->st_value))
    799 		return SET_ERROR(ENOENT);
    800 	if (mod)
    801 		*mod = lmod;
    802 	if (sym)
    803 		*sym = stable + es->st_name;
    804 	return 0;
    805 }
    806 
    807 /*
    808  * Add a symbol table from a loadable module.
    809  */
    810 void
    811 ksyms_modload(const char *name, void *symstart, vsize_t symsize,
    812     char *strstart, vsize_t strsize)
    813 {
    814 	struct ksyms_symtab *st;
    815 	struct ksyms_snapshot *ks;
    816 	void *nmap;
    817 
    818 	st = kmem_zalloc(sizeof(*st), KM_SLEEP);
    819 	nmap = kmem_zalloc(symsize / sizeof(Elf_Sym) * sizeof (uint32_t),
    820 			   KM_SLEEP);
    821 	mutex_enter(&ksyms_lock);
    822 	addsymtab(name, symstart, symsize, strstart, strsize, st, symstart,
    823 	    NULL, 0, nmap);
    824 	ks = ksyms_snapshot;
    825 	ksyms_snapshot = NULL;
    826 	mutex_exit(&ksyms_lock);
    827 
    828 	if (ks)
    829 		ksyms_snapshot_release(ks);
    830 }
    831 
    832 /*
    833  * Remove a symbol table from a loadable module.
    834  */
    835 void
    836 ksyms_modunload(const char *name)
    837 {
    838 	struct ksyms_symtab *st;
    839 	struct ksyms_snapshot *ks;
    840 	int s;
    841 
    842 	mutex_enter(&ksyms_lock);
    843 	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
    844 		if (strcmp(name, st->sd_name) != 0)
    845 			continue;
    846 		break;
    847 	}
    848 	KASSERT(st != NULL);
    849 
    850 	/* Wait for any snapshot in progress to complete.  */
    851 	while (ksyms_snapshotting)
    852 		cv_wait(&ksyms_cv, &ksyms_lock);
    853 
    854 	/*
    855 	 * Remove the symtab.  Do this at splhigh to ensure ddb never
    856 	 * witnesses an inconsistent state of the queue, unless memory
    857 	 * is so corrupt that we crash in TAILQ_REMOVE or
    858 	 * PSLIST_WRITER_REMOVE.
    859 	 */
    860 	s = splhigh();
    861 	TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue);
    862 	PSLIST_WRITER_REMOVE(st, sd_pslist);
    863 	splx(s);
    864 
    865 	/*
    866 	 * And wait a grace period, in case there are any pserialized
    867 	 * readers in flight.
    868 	 */
    869 	pserialize_perform(ksyms_psz);
    870 	PSLIST_ENTRY_DESTROY(st, sd_pslist);
    871 
    872 	/* Recompute the ksyms sizes now that we've removed st.  */
    873 	ksyms_sizes_calc();
    874 
    875 	/* Invalidate the global ksyms snapshot.  */
    876 	ks = ksyms_snapshot;
    877 	ksyms_snapshot = NULL;
    878 	mutex_exit(&ksyms_lock);
    879 
    880 	/*
    881 	 * No more references are possible.  Free the name map and the
    882 	 * symtab itself, which we had allocated in ksyms_modload.
    883 	 */
    884 	kmem_free(st->sd_nmap, st->sd_nmapsize * sizeof(uint32_t));
    885 	kmem_free(st, sizeof(*st));
    886 
    887 	/* Release the formerly global ksyms snapshot, if any.  */
    888 	if (ks)
    889 		ksyms_snapshot_release(ks);
    890 }
    891 
    892 #ifdef DDB
    893 /*
    894  * Keep sifting stuff here, to avoid export of ksyms internals.
    895  *
    896  * Systems is expected to be quiescent, so no locking done.
    897  */
    898 int
    899 ksyms_sift(char *mod, char *sym, int mode)
    900 {
    901 	struct ksyms_symtab *st;
    902 	char *sb;
    903 	int i, sz;
    904 
    905 	if (!ksyms_loaded)
    906 		return SET_ERROR(ENOENT);
    907 
    908 	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
    909 		if (mod && strcmp(mod, st->sd_name))
    910 			continue;
    911 		sb = st->sd_strstart - st->sd_usroffset;
    912 
    913 		sz = st->sd_symsize/sizeof(Elf_Sym);
    914 		for (i = 0; i < sz; i++) {
    915 			Elf_Sym *les = st->sd_symstart + i;
    916 			char c;
    917 
    918 			if (strstr(sb + les->st_name, sym) == NULL)
    919 				continue;
    920 
    921 			if (mode == 'F') {
    922 				switch (ELF_ST_TYPE(les->st_info)) {
    923 				case STT_OBJECT:
    924 					c = '+';
    925 					break;
    926 				case STT_FUNC:
    927 					c = '*';
    928 					break;
    929 				case STT_SECTION:
    930 					c = '&';
    931 					break;
    932 				case STT_FILE:
    933 					c = '/';
    934 					break;
    935 				default:
    936 					c = ' ';
    937 					break;
    938 				}
    939 				db_printf("%s%c ", sb + les->st_name, c);
    940 			} else
    941 				db_printf("%s ", sb + les->st_name);
    942 		}
    943 	}
    944 	return SET_ERROR(ENOENT);
    945 }
    946 #endif /* DDB */
    947 
    948 /*
    949  * In case we exposing the symbol table to the userland using the pseudo-
    950  * device /dev/ksyms, it is easier to provide all the tables as one.
    951  * However, it means we have to change all the st_name fields for the
    952  * symbols so they match the ELF image that the userland will read
    953  * through the device.
    954  *
    955  * The actual (correct) value of st_name is preserved through a global
    956  * offset stored in the symbol table structure.
    957  *
    958  * Call with ksyms_lock held.
    959  */
    960 static void
    961 ksyms_sizes_calc(void)
    962 {
    963 	struct ksyms_symtab *st;
    964 	int i, delta;
    965 
    966 	KASSERT(cold || mutex_owned(&ksyms_lock));
    967 
    968 	ksyms_symsz = ksyms_strsz = 0;
    969 	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
    970 		delta = ksyms_strsz - st->sd_usroffset;
    971 		if (delta != 0) {
    972 			for (i = 0; i < st->sd_symsize/sizeof(Elf_Sym); i++)
    973 				st->sd_symstart[i].st_name += delta;
    974 			st->sd_usroffset = ksyms_strsz;
    975 		}
    976 		ksyms_symsz += st->sd_symsize;
    977 		ksyms_strsz += st->sd_strsize;
    978 	}
    979 }
    980 
    981 static void
    982 ksyms_fill_note(void)
    983 {
    984 	int32_t *note = ksyms_hdr.kh_note;
    985 	note[0] = ELF_NOTE_NETBSD_NAMESZ;
    986 	note[1] = ELF_NOTE_NETBSD_DESCSZ;
    987 	note[2] = ELF_NOTE_TYPE_NETBSD_TAG;
    988 	memcpy(&note[3],  "NetBSD\0", 8);
    989 	note[5] = __NetBSD_Version__;
    990 }
    991 
    992 static void
    993 ksyms_hdr_init(const void *hdraddr)
    994 {
    995 	/* Copy the loaded elf exec header */
    996 	memcpy(&ksyms_hdr.kh_ehdr, hdraddr, sizeof(Elf_Ehdr));
    997 
    998 	/* Set correct program/section header sizes, offsets and numbers */
    999 	ksyms_hdr.kh_ehdr.e_phoff = offsetof(struct ksyms_hdr, kh_phdr[0]);
   1000 	ksyms_hdr.kh_ehdr.e_phentsize = sizeof(Elf_Phdr);
   1001 	ksyms_hdr.kh_ehdr.e_phnum = NPRGHDR;
   1002 	ksyms_hdr.kh_ehdr.e_shoff = offsetof(struct ksyms_hdr, kh_shdr[0]);
   1003 	ksyms_hdr.kh_ehdr.e_shentsize = sizeof(Elf_Shdr);
   1004 	ksyms_hdr.kh_ehdr.e_shnum = NSECHDR;
   1005 	ksyms_hdr.kh_ehdr.e_shstrndx = SHSTRTAB;
   1006 
   1007 	/* Text/data - fake */
   1008 	ksyms_hdr.kh_phdr[0].p_type = PT_LOAD;
   1009 	ksyms_hdr.kh_phdr[0].p_memsz = (unsigned long)-1L;
   1010 	ksyms_hdr.kh_phdr[0].p_flags = PF_R | PF_X | PF_W;
   1011 
   1012 #define SHTCOPY(name)  strlcpy(&ksyms_hdr.kh_strtab[offs], (name), \
   1013     sizeof(ksyms_hdr.kh_strtab) - offs), offs += sizeof(name)
   1014 
   1015 	uint32_t offs = 1;
   1016 	/* First section header ".note.netbsd.ident" */
   1017 	ksyms_hdr.kh_shdr[SHNOTE].sh_name = offs;
   1018 	ksyms_hdr.kh_shdr[SHNOTE].sh_type = SHT_NOTE;
   1019 	ksyms_hdr.kh_shdr[SHNOTE].sh_offset =
   1020 	    offsetof(struct ksyms_hdr, kh_note[0]);
   1021 	ksyms_hdr.kh_shdr[SHNOTE].sh_size = sizeof(ksyms_hdr.kh_note);
   1022 	ksyms_hdr.kh_shdr[SHNOTE].sh_addralign = sizeof(int);
   1023 	SHTCOPY(".note.netbsd.ident");
   1024 	ksyms_fill_note();
   1025 
   1026 	/* Second section header; ".symtab" */
   1027 	ksyms_hdr.kh_shdr[SYMTAB].sh_name = offs;
   1028 	ksyms_hdr.kh_shdr[SYMTAB].sh_type = SHT_SYMTAB;
   1029 	ksyms_hdr.kh_shdr[SYMTAB].sh_offset = sizeof(struct ksyms_hdr);
   1030 /*	ksyms_hdr.kh_shdr[SYMTAB].sh_size = filled in at open */
   1031 	ksyms_hdr.kh_shdr[SYMTAB].sh_link = STRTAB; /* Corresponding strtab */
   1032 	ksyms_hdr.kh_shdr[SYMTAB].sh_addralign = sizeof(long);
   1033 	ksyms_hdr.kh_shdr[SYMTAB].sh_entsize = sizeof(Elf_Sym);
   1034 	SHTCOPY(".symtab");
   1035 
   1036 	/* Third section header; ".strtab" */
   1037 	ksyms_hdr.kh_shdr[STRTAB].sh_name = offs;
   1038 	ksyms_hdr.kh_shdr[STRTAB].sh_type = SHT_STRTAB;
   1039 /*	ksyms_hdr.kh_shdr[STRTAB].sh_offset = filled in at open */
   1040 /*	ksyms_hdr.kh_shdr[STRTAB].sh_size = filled in at open */
   1041 	ksyms_hdr.kh_shdr[STRTAB].sh_addralign = sizeof(char);
   1042 	SHTCOPY(".strtab");
   1043 
   1044 	/* Fourth section, ".shstrtab" */
   1045 	ksyms_hdr.kh_shdr[SHSTRTAB].sh_name = offs;
   1046 	ksyms_hdr.kh_shdr[SHSTRTAB].sh_type = SHT_STRTAB;
   1047 	ksyms_hdr.kh_shdr[SHSTRTAB].sh_offset =
   1048 	    offsetof(struct ksyms_hdr, kh_strtab);
   1049 	ksyms_hdr.kh_shdr[SHSTRTAB].sh_size = SHSTRSIZ;
   1050 	ksyms_hdr.kh_shdr[SHSTRTAB].sh_addralign = sizeof(char);
   1051 	SHTCOPY(".shstrtab");
   1052 
   1053 	/* Fifth section, ".bss". All symbols reside here. */
   1054 	ksyms_hdr.kh_shdr[SHBSS].sh_name = offs;
   1055 	ksyms_hdr.kh_shdr[SHBSS].sh_type = SHT_NOBITS;
   1056 	ksyms_hdr.kh_shdr[SHBSS].sh_offset = 0;
   1057 	ksyms_hdr.kh_shdr[SHBSS].sh_size = (unsigned long)-1L;
   1058 	ksyms_hdr.kh_shdr[SHBSS].sh_addralign = PAGE_SIZE;
   1059 	ksyms_hdr.kh_shdr[SHBSS].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
   1060 	SHTCOPY(".bss");
   1061 
   1062 	/* Sixth section header; ".SUNW_ctf" */
   1063 	ksyms_hdr.kh_shdr[SHCTF].sh_name = offs;
   1064 	ksyms_hdr.kh_shdr[SHCTF].sh_type = SHT_PROGBITS;
   1065 /*	ksyms_hdr.kh_shdr[SHCTF].sh_offset = filled in at open */
   1066 /*	ksyms_hdr.kh_shdr[SHCTF].sh_size = filled in at open */
   1067 	ksyms_hdr.kh_shdr[SHCTF].sh_link = SYMTAB; /* Corresponding symtab */
   1068 	ksyms_hdr.kh_shdr[SHCTF].sh_addralign = sizeof(char);
   1069 	SHTCOPY(".SUNW_ctf");
   1070 }
   1071 
   1072 static struct ksyms_snapshot *
   1073 ksyms_snapshot_alloc(int maxlen, size_t size, dev_t dev, uint64_t gen)
   1074 {
   1075 	struct ksyms_snapshot *ks;
   1076 
   1077 	ks = kmem_zalloc(sizeof(*ks), KM_SLEEP);
   1078 	ks->ks_refcnt = 1;
   1079 	ks->ks_gen = gen;
   1080 	ks->ks_uobj = uao_create(size, 0);
   1081 	ks->ks_size = size;
   1082 	ks->ks_dev = dev;
   1083 	ks->ks_maxlen = maxlen;
   1084 
   1085 	return ks;
   1086 }
   1087 
   1088 static void
   1089 ksyms_snapshot_release(struct ksyms_snapshot *ks)
   1090 {
   1091 	uint64_t refcnt;
   1092 
   1093 	mutex_enter(&ksyms_lock);
   1094 	refcnt = --ks->ks_refcnt;
   1095 	mutex_exit(&ksyms_lock);
   1096 
   1097 	if (refcnt)
   1098 		return;
   1099 
   1100 	uao_detach(ks->ks_uobj);
   1101 	kmem_free(ks, sizeof(*ks));
   1102 }
   1103 
   1104 static int
   1105 ubc_copyfrombuf(struct uvm_object *uobj, struct uio *uio, const void *buf,
   1106     size_t n)
   1107 {
   1108 	struct iovec iov = { .iov_base = __UNCONST(buf), .iov_len = n };
   1109 
   1110 	uio->uio_iov = &iov;
   1111 	uio->uio_iovcnt = 1;
   1112 	uio->uio_resid = n;
   1113 
   1114 	return ubc_uiomove(uobj, uio, n, UVM_ADV_SEQUENTIAL, UBC_WRITE);
   1115 }
   1116 
   1117 static int
   1118 ksyms_take_snapshot(struct ksyms_snapshot *ks, struct ksyms_symtab *last)
   1119 {
   1120 	struct uvm_object *uobj = ks->ks_uobj;
   1121 	struct uio uio;
   1122 	struct ksyms_symtab *st;
   1123 	int error;
   1124 
   1125 	/* Caller must have initiated snapshotting.  */
   1126 	KASSERT(ksyms_snapshotting == curlwp);
   1127 
   1128 	/* Start a uio transfer to reuse incrementally.  */
   1129 	uio.uio_offset = 0;
   1130 	uio.uio_rw = UIO_WRITE; /* write from buffer to uobj */
   1131 	UIO_SETUP_SYSSPACE(&uio);
   1132 
   1133 	/*
   1134 	 * First: Copy out the ELF header.
   1135 	 */
   1136 	error = ubc_copyfrombuf(uobj, &uio, &ksyms_hdr, sizeof(ksyms_hdr));
   1137 	if (error)
   1138 		return error;
   1139 
   1140 	/*
   1141 	 * Copy out the symbol table.  The list of symtabs is
   1142 	 * guaranteed to be nonempty because we always have an entry
   1143 	 * for the main kernel.  We stop at last, not at the end of the
   1144 	 * tailq or NULL, because entries beyond last are not included
   1145 	 * in this snapshot (and may not be fully initialized memory as
   1146 	 * we witness it).
   1147 	 */
   1148 	KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr));
   1149 	for (st = TAILQ_FIRST(&ksyms_symtabs);
   1150 	     ;
   1151 	     st = TAILQ_NEXT(st, sd_queue)) {
   1152 		error = ubc_copyfrombuf(uobj, &uio, st->sd_symstart,
   1153 		    st->sd_symsize);
   1154 		if (error)
   1155 			return error;
   1156 		if (st == last)
   1157 			break;
   1158 	}
   1159 
   1160 	/*
   1161 	 * Copy out the string table
   1162 	 */
   1163 	KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) +
   1164 	    ksyms_hdr.kh_shdr[SYMTAB].sh_size);
   1165 	for (st = TAILQ_FIRST(&ksyms_symtabs);
   1166 	     ;
   1167 	     st = TAILQ_NEXT(st, sd_queue)) {
   1168 		error = ubc_copyfrombuf(uobj, &uio, st->sd_strstart,
   1169 		    st->sd_strsize);
   1170 		if (error)
   1171 			return error;
   1172 		if (st == last)
   1173 			break;
   1174 	}
   1175 
   1176 	/*
   1177 	 * Copy out the CTF table.
   1178 	 */
   1179 	KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) +
   1180 	    ksyms_hdr.kh_shdr[SYMTAB].sh_size +
   1181 	    ksyms_hdr.kh_shdr[STRTAB].sh_size);
   1182 	st = TAILQ_FIRST(&ksyms_symtabs);
   1183 	if (st->sd_ctfstart != NULL) {
   1184 		error = ubc_copyfrombuf(uobj, &uio, st->sd_ctfstart,
   1185 		    st->sd_ctfsize);
   1186 		if (error)
   1187 			return error;
   1188 	}
   1189 
   1190 	KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) +
   1191 	    ksyms_hdr.kh_shdr[SYMTAB].sh_size +
   1192 	    ksyms_hdr.kh_shdr[STRTAB].sh_size +
   1193 	    ksyms_hdr.kh_shdr[SHCTF].sh_size);
   1194 	KASSERT(uio.uio_offset == ks->ks_size);
   1195 
   1196 	return 0;
   1197 }
   1198 
   1199 static const struct fileops ksyms_fileops;
   1200 
   1201 static int
   1202 ksymsopen(dev_t dev, int flags, int devtype, struct lwp *l)
   1203 {
   1204 	struct file *fp = NULL;
   1205 	int fd = -1;
   1206 	struct ksyms_snapshot *ks = NULL;
   1207 	size_t size;
   1208 	struct ksyms_symtab *last;
   1209 	int maxlen;
   1210 	uint64_t gen;
   1211 	int error;
   1212 
   1213 	if (minor(dev) != 0 || !ksyms_loaded)
   1214 		return SET_ERROR(ENXIO);
   1215 
   1216 	/* Allocate a private file.  */
   1217 	error = fd_allocfile(&fp, &fd);
   1218 	if (error)
   1219 		return error;
   1220 
   1221 	mutex_enter(&ksyms_lock);
   1222 
   1223 	/*
   1224 	 * Wait until we have a snapshot, or until there is no snapshot
   1225 	 * being taken right now so we can take one.
   1226 	 */
   1227 	while ((ks = ksyms_snapshot) == NULL && ksyms_snapshotting) {
   1228 		error = cv_wait_sig(&ksyms_cv, &ksyms_lock);
   1229 		if (error)
   1230 			goto out;
   1231 	}
   1232 
   1233 	/*
   1234 	 * If there's a usable snapshot, increment its reference count
   1235 	 * (can't overflow, 64-bit) and just reuse it.
   1236 	 */
   1237 	if (ks) {
   1238 		ks->ks_refcnt++;
   1239 		goto out;
   1240 	}
   1241 
   1242 	/* Find the current length of the symtab object. */
   1243 	size = sizeof(struct ksyms_hdr);
   1244 	size += ksyms_strsz;
   1245 	size += ksyms_symsz;
   1246 	size += ksyms_ctfsz;
   1247 
   1248 	/* Start a new snapshot.  */
   1249 	ksyms_hdr.kh_shdr[SYMTAB].sh_size = ksyms_symsz;
   1250 	ksyms_hdr.kh_shdr[SYMTAB].sh_info = ksyms_symsz / sizeof(Elf_Sym);
   1251 	ksyms_hdr.kh_shdr[STRTAB].sh_offset = ksyms_symsz +
   1252 	    ksyms_hdr.kh_shdr[SYMTAB].sh_offset;
   1253 	ksyms_hdr.kh_shdr[STRTAB].sh_size = ksyms_strsz;
   1254 	ksyms_hdr.kh_shdr[SHCTF].sh_offset = ksyms_strsz +
   1255 	    ksyms_hdr.kh_shdr[STRTAB].sh_offset;
   1256 	ksyms_hdr.kh_shdr[SHCTF].sh_size = ksyms_ctfsz;
   1257 	last = TAILQ_LAST(&ksyms_symtabs, ksyms_symtab_queue);
   1258 	maxlen = ksyms_maxlen;
   1259 	gen = ksyms_snapshot_gen++;
   1260 
   1261 	/*
   1262 	 * Prevent ksyms entries from being removed while we take the
   1263 	 * snapshot.
   1264 	 */
   1265 	KASSERT(ksyms_snapshotting == NULL);
   1266 	ksyms_snapshotting = curlwp;
   1267 	mutex_exit(&ksyms_lock);
   1268 
   1269 	/* Create a snapshot and write the symtab to it.  */
   1270 	ks = ksyms_snapshot_alloc(maxlen, size, dev, gen);
   1271 	error = ksyms_take_snapshot(ks, last);
   1272 
   1273 	/*
   1274 	 * Snapshot creation is done.  Wake up anyone waiting to remove
   1275 	 * entries (module unload).
   1276 	 */
   1277 	mutex_enter(&ksyms_lock);
   1278 	KASSERTMSG(ksyms_snapshotting == curlwp, "lwp %p stole snapshot",
   1279 	    ksyms_snapshotting);
   1280 	ksyms_snapshotting = NULL;
   1281 	cv_broadcast(&ksyms_cv);
   1282 
   1283 	/* If we failed, give up.  */
   1284 	if (error)
   1285 		goto out;
   1286 
   1287 	/* Cache the snapshot for the next reader.  */
   1288 	KASSERT(ksyms_snapshot == NULL);
   1289 	ksyms_snapshot = ks;
   1290 	ks->ks_refcnt++;
   1291 	KASSERT(ks->ks_refcnt == 2);
   1292 
   1293 out:	mutex_exit(&ksyms_lock);
   1294 	if (error) {
   1295 		if (fp)
   1296 			fd_abort(curproc, fp, fd);
   1297 		if (ks)
   1298 			ksyms_snapshot_release(ks);
   1299 	} else {
   1300 		KASSERT(fp);
   1301 		KASSERT(ks);
   1302 		error = fd_clone(fp, fd, flags, &ksyms_fileops, ks);
   1303 		KASSERTMSG(error == EMOVEFD, "error=%d", error);
   1304 	}
   1305 	return error;
   1306 }
   1307 
   1308 static int
   1309 ksymsclose(struct file *fp)
   1310 {
   1311 	struct ksyms_snapshot *ks = fp->f_data;
   1312 
   1313 	ksyms_snapshot_release(ks);
   1314 
   1315 	return 0;
   1316 }
   1317 
   1318 static int
   1319 ksymsread(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
   1320     int flags)
   1321 {
   1322 	const struct ksyms_snapshot *ks = fp->f_data;
   1323 	size_t count;
   1324 	int error;
   1325 
   1326 	/*
   1327 	 * Since we don't have a per-object lock, we might as well use
   1328 	 * the struct file lock to serialize access to fp->f_offset --
   1329 	 * but if the caller isn't relying on or updating fp->f_offset,
   1330 	 * there's no need to do even that.  We could use ksyms_lock,
   1331 	 * but why bother with a global lock if not needed?  Either
   1332 	 * way, the lock we use here must agree with what ksymsseek
   1333 	 * takes (nothing else in ksyms uses fp->f_offset).
   1334 	 */
   1335 	if (offp == &fp->f_offset)
   1336 		mutex_enter(&fp->f_lock);
   1337 
   1338 	/* Refuse negative offsets.  */
   1339 	if (*offp < 0) {
   1340 		error = SET_ERROR(EINVAL);
   1341 		goto out;
   1342 	}
   1343 
   1344 	/* Return nothing at or past end of file.  */
   1345 	if (*offp >= ks->ks_size) {
   1346 		error = 0;
   1347 		goto out;
   1348 	}
   1349 
   1350 	/*
   1351 	 * 1. Set up the uio to transfer from offset *offp.
   1352 	 * 2. Transfer as many bytes as we can (at most uio->uio_resid
   1353 	 *    or what's left in the ksyms).
   1354 	 * 3. If requested, update *offp to reflect the number of bytes
   1355 	 *    transferred.
   1356 	 */
   1357 	uio->uio_offset = *offp;
   1358 	count = uio->uio_resid;
   1359 	error = ubc_uiomove(ks->ks_uobj, uio, MIN(count, ks->ks_size - *offp),
   1360 	    UVM_ADV_SEQUENTIAL, UBC_READ|UBC_PARTIALOK);
   1361 	if (flags & FOF_UPDATE_OFFSET)
   1362 		*offp += count - uio->uio_resid;
   1363 
   1364 out:	if (offp == &fp->f_offset)
   1365 		mutex_exit(&fp->f_lock);
   1366 	return error;
   1367 }
   1368 
   1369 static int
   1370 ksymsstat(struct file *fp, struct stat *st)
   1371 {
   1372 	const struct ksyms_snapshot *ks = fp->f_data;
   1373 
   1374 	memset(st, 0, sizeof(*st));
   1375 
   1376 	st->st_dev = NODEV;
   1377 	st->st_ino = 0;
   1378 	st->st_mode = S_IFCHR;
   1379 	st->st_nlink = 1;
   1380 	st->st_uid = kauth_cred_geteuid(fp->f_cred);
   1381 	st->st_gid = kauth_cred_getegid(fp->f_cred);
   1382 	st->st_rdev = ks->ks_dev;
   1383 	st->st_size = ks->ks_size;
   1384 	/* zero time */
   1385 	st->st_blksize = MAXPHYS; /* XXX arbitrary */
   1386 	st->st_blocks = 0;
   1387 	st->st_gen = ks->ks_gen;
   1388 
   1389 	return 0;
   1390 }
   1391 
   1392 static int
   1393 ksymsmmap(struct file *fp, off_t *offp, size_t nbytes, int prot, int *flagsp,
   1394     int *advicep, struct uvm_object **uobjp, int *maxprotp)
   1395 {
   1396 	const struct ksyms_snapshot *ks = fp->f_data;
   1397 
   1398 	/* uvm_mmap guarantees page-aligned offset and size.  */
   1399 	KASSERT(*offp == round_page(*offp));
   1400 	KASSERT(nbytes == round_page(nbytes));
   1401 	KASSERT(nbytes > 0);
   1402 
   1403 	/* Refuse negative offsets.  */
   1404 	if (*offp < 0)
   1405 		return SET_ERROR(EINVAL);
   1406 
   1407 	/* Refuse mappings that pass the end of file.  */
   1408 	if (nbytes > round_page(ks->ks_size) ||
   1409 	    *offp > round_page(ks->ks_size) - nbytes)
   1410 		return SET_ERROR(EINVAL);	/* XXX ??? */
   1411 
   1412 	/* Success!  */
   1413 	uao_reference(ks->ks_uobj);
   1414 	*advicep = UVM_ADV_SEQUENTIAL;
   1415 	*uobjp = ks->ks_uobj;
   1416 	*maxprotp = prot & VM_PROT_READ;
   1417 	return 0;
   1418 }
   1419 
   1420 static int
   1421 ksymsseek(struct file *fp, off_t delta, int whence, off_t *newoffp, int flags)
   1422 {
   1423 	const off_t OFF_MAX = __type_max(off_t);
   1424 	struct ksyms_snapshot *ks = fp->f_data;
   1425 	off_t base, newoff;
   1426 	int error;
   1427 
   1428 	mutex_enter(&fp->f_lock);
   1429 
   1430 	switch (whence) {
   1431 	case SEEK_CUR:
   1432 		base = fp->f_offset;
   1433 		break;
   1434 	case SEEK_END:
   1435 		base = ks->ks_size;
   1436 		break;
   1437 	case SEEK_SET:
   1438 		base = 0;
   1439 		break;
   1440 	default:
   1441 		error = SET_ERROR(EINVAL);
   1442 		goto out;
   1443 	}
   1444 
   1445 	/* Check for arithmetic overflow and reject negative offsets.  */
   1446 	if (base < 0 || delta > OFF_MAX - base || base + delta < 0) {
   1447 		error = SET_ERROR(EINVAL);
   1448 		goto out;
   1449 	}
   1450 
   1451 	/* Compute the new offset.  */
   1452 	newoff = base + delta;
   1453 
   1454 	/* Success!  */
   1455 	if (newoffp)
   1456 		*newoffp = newoff;
   1457 	if (flags & FOF_UPDATE_OFFSET)
   1458 		fp->f_offset = newoff;
   1459 	error = 0;
   1460 
   1461 out:	mutex_exit(&fp->f_lock);
   1462 	return error;
   1463 }
   1464 
   1465 __CTASSERT(offsetof(struct ksyms_ogsymbol, kg_name) == offsetof(struct ksyms_gsymbol, kg_name));
   1466 __CTASSERT(offsetof(struct ksyms_gvalue, kv_name) == offsetof(struct ksyms_gsymbol, kg_name));
   1467 
   1468 static int
   1469 ksymsioctl(struct file *fp, u_long cmd, void *data)
   1470 {
   1471 	struct ksyms_snapshot *ks = fp->f_data;
   1472 	struct ksyms_ogsymbol *okg = (struct ksyms_ogsymbol *)data;
   1473 	struct ksyms_gsymbol *kg = (struct ksyms_gsymbol *)data;
   1474 	struct ksyms_gvalue *kv = (struct ksyms_gvalue *)data;
   1475 	struct ksyms_symtab *st;
   1476 	Elf_Sym *sym = NULL, copy;
   1477 	unsigned long val;
   1478 	int error = 0;
   1479 	char *str = NULL;
   1480 	int len, s;
   1481 
   1482 	/* Read cached ksyms_maxlen.  */
   1483 	len = ks->ks_maxlen;
   1484 
   1485 	if (cmd == OKIOCGVALUE || cmd == OKIOCGSYMBOL ||
   1486 	    cmd == KIOCGVALUE || cmd == KIOCGSYMBOL) {
   1487 		str = kmem_alloc(len, KM_SLEEP);
   1488 		if ((error = copyinstr(kg->kg_name, str, len, NULL)) != 0) {
   1489 			kmem_free(str, len);
   1490 			return error;
   1491 		}
   1492 	}
   1493 
   1494 	switch (cmd) {
   1495 	case OKIOCGVALUE:
   1496 		/*
   1497 		 * Use the in-kernel symbol lookup code for fast
   1498 		 * retreival of a value.
   1499 		 */
   1500 		error = ksyms_getval(NULL, str, &val, KSYMS_EXTERN);
   1501 		if (error == 0)
   1502 			error = copyout(&val, okg->kg_value, sizeof(long));
   1503 		kmem_free(str, len);
   1504 		break;
   1505 
   1506 	case OKIOCGSYMBOL:
   1507 		/*
   1508 		 * Use the in-kernel symbol lookup code for fast
   1509 		 * retreival of a symbol.
   1510 		 */
   1511 		s = pserialize_read_enter();
   1512 		PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz,
   1513 		    struct ksyms_symtab, sd_pslist) {
   1514 			if ((sym = findsym(str, st, KSYMS_ANY)) == NULL)
   1515 				continue;
   1516 #ifdef notdef
   1517 			/* Skip if bad binding */
   1518 			if (ELF_ST_BIND(sym->st_info) != STB_GLOBAL) {
   1519 				sym = NULL;
   1520 				continue;
   1521 			}
   1522 #endif
   1523 			break;
   1524 		}
   1525 		if (sym != NULL) {
   1526 			memcpy(&copy, sym, sizeof(copy));
   1527 			pserialize_read_exit(s);
   1528 			error = copyout(&copy, okg->kg_sym, sizeof(Elf_Sym));
   1529 		} else {
   1530 			pserialize_read_exit(s);
   1531 			error = SET_ERROR(ENOENT);
   1532 		}
   1533 		kmem_free(str, len);
   1534 		break;
   1535 
   1536 	case KIOCGVALUE:
   1537 		/*
   1538 		 * Use the in-kernel symbol lookup code for fast
   1539 		 * retreival of a value.
   1540 		 */
   1541 		error = ksyms_getval(NULL, str, &val, KSYMS_EXTERN);
   1542 		if (error == 0)
   1543 			kv->kv_value = val;
   1544 		kmem_free(str, len);
   1545 		break;
   1546 
   1547 	case KIOCGSYMBOL:
   1548 		/*
   1549 		 * Use the in-kernel symbol lookup code for fast
   1550 		 * retreival of a symbol.
   1551 		 */
   1552 		s = pserialize_read_enter();
   1553 		PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz,
   1554 		    struct ksyms_symtab, sd_pslist) {
   1555 			if ((sym = findsym(str, st, KSYMS_ANY)) == NULL)
   1556 				continue;
   1557 #ifdef notdef
   1558 			/* Skip if bad binding */
   1559 			if (ELF_ST_BIND(sym->st_info) != STB_GLOBAL) {
   1560 				sym = NULL;
   1561 				continue;
   1562 			}
   1563 #endif
   1564 			break;
   1565 		}
   1566 		if (sym != NULL) {
   1567 			kg->kg_sym = *sym;
   1568 		} else {
   1569 			error = SET_ERROR(ENOENT);
   1570 		}
   1571 		pserialize_read_exit(s);
   1572 		kmem_free(str, len);
   1573 		break;
   1574 
   1575 	case KIOCGSIZE:
   1576 		/*
   1577 		 * Get total size of symbol table.
   1578 		 */
   1579 		*(int *)data = ks->ks_size;
   1580 		break;
   1581 
   1582 	default:
   1583 		error = SET_ERROR(ENOTTY);
   1584 		break;
   1585 	}
   1586 
   1587 	return error;
   1588 }
   1589 
   1590 const struct cdevsw ksyms_cdevsw = {
   1591 	.d_open = ksymsopen,
   1592 	.d_close = noclose,
   1593 	.d_read = noread,
   1594 	.d_write = nowrite,
   1595 	.d_ioctl = noioctl,
   1596 	.d_stop = nostop,
   1597 	.d_tty = notty,
   1598 	.d_poll = nopoll,
   1599 	.d_mmap = nommap,
   1600 	.d_kqfilter = nokqfilter,
   1601 	.d_discard = nodiscard,
   1602 	.d_flag = D_OTHER | D_MPSAFE
   1603 };
   1604 
   1605 static const struct fileops ksyms_fileops = {
   1606 	.fo_name = "ksyms",
   1607 	.fo_read = ksymsread,
   1608 	.fo_write = fbadop_write,
   1609 	.fo_ioctl = ksymsioctl,
   1610 	.fo_fcntl = fnullop_fcntl,
   1611 	.fo_poll = fnullop_poll,
   1612 	.fo_stat = ksymsstat,
   1613 	.fo_close = ksymsclose,
   1614 	.fo_kqfilter = fnullop_kqfilter,
   1615 	.fo_restart = fnullop_restart,
   1616 	.fo_mmap = ksymsmmap,
   1617 	.fo_seek = ksymsseek,
   1618 };
   1619