Home | History | Annotate | Line # | Download | only in kern
      1 /*	$NetBSD: kern_ksyms.c,v 1.109 2024/10/03 20:19:55 andvar Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software developed for The NetBSD Foundation
      8  * by Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Copyright (c) 2001, 2003 Anders Magnusson (ragge (at) ludd.luth.se).
     34  * All rights reserved.
     35  *
     36  * Redistribution and use in source and binary forms, with or without
     37  * modification, are permitted provided that the following conditions
     38  * are met:
     39  * 1. Redistributions of source code must retain the above copyright
     40  *    notice, this list of conditions and the following disclaimer.
     41  * 2. Redistributions in binary form must reproduce the above copyright
     42  *    notice, this list of conditions and the following disclaimer in the
     43  *    documentation and/or other materials provided with the distribution.
     44  * 3. The name of the author may not be used to endorse or promote products
     45  *    derived from this software without specific prior written permission
     46  *
     47  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     48  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     49  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     50  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     51  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     52  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     53  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     54  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     55  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     56  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     57  */
     58 
     59 /*
     60  * Code to deal with in-kernel symbol table management + /dev/ksyms.
     61  *
     62  * For each loaded module the symbol table info is kept track of by a
     63  * struct, placed in a circular list. The first entry is the kernel
     64  * symbol table.
     65  */
     66 
     67 /*
     68  * TODO:
     69  *
     70  *	Add support for mmap, poll.
     71  *	Constify tables.
     72  *	Constify db_symtab and move it to .rodata.
     73  */
     74 
     75 #include <sys/cdefs.h>
     76 __KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c,v 1.109 2024/10/03 20:19:55 andvar Exp $");
     77 
     78 #if defined(_KERNEL) && defined(_KERNEL_OPT)
     79 #include "opt_copy_symtab.h"
     80 #include "opt_ddb.h"
     81 #include "opt_dtrace.h"
     82 #endif
     83 
     84 #define _KSYMS_PRIVATE
     85 
     86 #include <sys/param.h>
     87 #include <sys/queue.h>
     88 #include <sys/exec.h>
     89 #include <sys/file.h>
     90 #include <sys/filedesc.h>
     91 #include <sys/kauth.h>
     92 #include <sys/systm.h>
     93 #include <sys/conf.h>
     94 #include <sys/kmem.h>
     95 #include <sys/proc.h>
     96 #include <sys/atomic.h>
     97 #include <sys/ksyms.h>
     98 #include <sys/kernel.h>
     99 #include <sys/intr.h>
    100 #include <sys/pserialize.h>
    101 #include <sys/stat.h>
    102 
    103 #include <uvm/uvm_extern.h>
    104 
    105 #ifdef DDB
    106 #include <ddb/db_output.h>
    107 #endif
    108 
    109 #include "ksyms.h"
    110 #if NKSYMS > 0
    111 #include "ioconf.h"
    112 #endif
    113 
    114 struct ksyms_snapshot {
    115 	uint64_t		ks_refcnt;
    116 	uint64_t		ks_gen;
    117 	struct uvm_object	*ks_uobj;
    118 	size_t			ks_size;
    119 	dev_t			ks_dev;
    120 	int			ks_maxlen;
    121 };
    122 
    123 #define KSYMS_MAX_ID	98304
    124 #ifdef KDTRACE_HOOKS
    125 static uint32_t ksyms_nmap[KSYMS_MAX_ID];	/* sorted symbol table map */
    126 #else
    127 static uint32_t *ksyms_nmap = NULL;
    128 #endif
    129 
    130 static int ksyms_maxlen;
    131 static bool ksyms_initted;
    132 static bool ksyms_loaded;
    133 static kmutex_t ksyms_lock __cacheline_aligned;
    134 static struct ksyms_symtab kernel_symtab;
    135 static kcondvar_t ksyms_cv;
    136 static struct lwp *ksyms_snapshotting;
    137 static struct ksyms_snapshot *ksyms_snapshot;
    138 static uint64_t ksyms_snapshot_gen;
    139 static pserialize_t ksyms_psz __read_mostly;
    140 
    141 static void ksyms_hdr_init(const void *);
    142 static void ksyms_sizes_calc(void);
    143 static struct ksyms_snapshot *ksyms_snapshot_alloc(int, size_t, dev_t,
    144     uint64_t);
    145 static void ksyms_snapshot_release(struct ksyms_snapshot *);
    146 
    147 #ifdef KSYMS_DEBUG
    148 #define	FOLLOW_CALLS		1
    149 #define	FOLLOW_MORE_CALLS	2
    150 #define	FOLLOW_DEVKSYMS		4
    151 static int ksyms_debug;
    152 #endif
    153 
    154 #define		SYMTAB_FILLER	"|This is the symbol table!"
    155 
    156 #ifdef makeoptions_COPY_SYMTAB
    157 extern char db_symtab[];
    158 extern int db_symtabsize;
    159 #endif
    160 
    161 /*
    162  * used by savecore(8) so non-static
    163  */
    164 struct ksyms_hdr ksyms_hdr;
    165 int ksyms_symsz;
    166 int ksyms_strsz;
    167 int ksyms_ctfsz;	/* this is not currently used by savecore(8) */
    168 TAILQ_HEAD(ksyms_symtab_queue, ksyms_symtab) ksyms_symtabs =
    169     TAILQ_HEAD_INITIALIZER(ksyms_symtabs);
    170 static struct pslist_head ksyms_symtabs_psz = PSLIST_INITIALIZER;
    171 
    172 static int
    173 ksyms_verify(const void *symstart, const void *strstart)
    174 {
    175 #if defined(DIAGNOSTIC) || defined(DEBUG)
    176 	if (symstart == NULL)
    177 		printf("ksyms: Symbol table not found\n");
    178 	if (strstart == NULL)
    179 		printf("ksyms: String table not found\n");
    180 	if (symstart == NULL || strstart == NULL)
    181 		printf("ksyms: Perhaps the kernel is stripped?\n");
    182 #endif
    183 	if (symstart == NULL || strstart == NULL)
    184 		return 0;
    185 	return 1;
    186 }
    187 
    188 /*
    189  * Finds a certain symbol name in a certain symbol table.
    190  */
    191 static Elf_Sym *
    192 findsym(const char *name, struct ksyms_symtab *table, int type)
    193 {
    194 	Elf_Sym *sym, *maxsym;
    195 	int low, mid, high, nglob;
    196 	char *str, *cmp;
    197 
    198 	sym = table->sd_symstart;
    199 	str = table->sd_strstart - table->sd_usroffset;
    200 	nglob = table->sd_nglob;
    201 	low = 0;
    202 	high = nglob;
    203 
    204 	/*
    205 	 * Start with a binary search of all global symbols in this table.
    206 	 * Global symbols must have unique names.
    207 	 */
    208 	while (low < high) {
    209 		mid = (low + high) >> 1;
    210 		cmp = sym[mid].st_name + str;
    211 		if (cmp[0] < name[0] || strcmp(cmp, name) < 0) {
    212 			low = mid + 1;
    213 		} else {
    214 			high = mid;
    215 		}
    216 	}
    217 	KASSERT(low == high);
    218 	if (__predict_true(low < nglob &&
    219 	    strcmp(sym[low].st_name + str, name) == 0)) {
    220 		KASSERT(ELF_ST_BIND(sym[low].st_info) == STB_GLOBAL);
    221 		return &sym[low];
    222 	}
    223 
    224 	/*
    225 	 * Perform a linear search of local symbols (rare).  Many local
    226 	 * symbols with the same name can exist so are not included in
    227 	 * the binary search.
    228 	 */
    229 	if (type != KSYMS_EXTERN) {
    230 		maxsym = sym + table->sd_symsize / sizeof(Elf_Sym);
    231 		for (sym += nglob; sym < maxsym; sym++) {
    232 			if (strcmp(name, sym->st_name + str) == 0) {
    233 				return sym;
    234 			}
    235 		}
    236 	}
    237 	return NULL;
    238 }
    239 
    240 /*
    241  * The "attach" is in reality done in ksyms_init().
    242  */
    243 #if NKSYMS > 0
    244 /*
    245  * ksyms can be loaded even if the kernel has a missing "pseudo-device ksyms"
    246  * statement because ddb and modules require it. Fixing it properly requires
    247  * fixing config to warn about required, but missing pseudo-devices. For now,
    248  * if we don't have the pseudo-device we don't need the attach function; this
    249  * is fine, as it does nothing.
    250  */
    251 void
    252 ksymsattach(int arg)
    253 {
    254 }
    255 #endif
    256 
    257 void
    258 ksyms_init(void)
    259 {
    260 
    261 #ifdef makeoptions_COPY_SYMTAB
    262 	if (!ksyms_loaded &&
    263 	    strncmp(db_symtab, SYMTAB_FILLER, sizeof(SYMTAB_FILLER))) {
    264 		ksyms_addsyms_elf(db_symtabsize, db_symtab,
    265 		    db_symtab + db_symtabsize);
    266 	}
    267 #endif
    268 
    269 	if (!ksyms_initted) {
    270 		mutex_init(&ksyms_lock, MUTEX_DEFAULT, IPL_NONE);
    271 		cv_init(&ksyms_cv, "ksyms");
    272 		ksyms_psz = pserialize_create();
    273 		ksyms_initted = true;
    274 	}
    275 }
    276 
    277 /*
    278  * Are any symbols available?
    279  */
    280 bool
    281 ksyms_available(void)
    282 {
    283 
    284 	return ksyms_loaded;
    285 }
    286 
    287 /*
    288  * Add a symbol table.
    289  * This is intended for use when the symbol table and its corresponding
    290  * string table are easily available.  If they are embedded in an ELF
    291  * image, use addsymtab_elf() instead.
    292  *
    293  * name - Symbol's table name.
    294  * symstart, symsize - Address and size of the symbol table.
    295  * strstart, strsize - Address and size of the string table.
    296  * tab - Symbol table to be updated with this information.
    297  * newstart - Address to which the symbol table has to be copied during
    298  *            shrinking.  If NULL, it is not moved.
    299  */
    300 static const char *addsymtab_strstart;
    301 
    302 static int
    303 addsymtab_compar(const void *a, const void *b)
    304 {
    305 	const Elf_Sym *sa, *sb;
    306 
    307 	sa = a;
    308 	sb = b;
    309 
    310 	/*
    311 	 * Split the symbol table into two, with globals at the start
    312 	 * and locals at the end.
    313 	 */
    314 	if (ELF_ST_BIND(sa->st_info) != ELF_ST_BIND(sb->st_info)) {
    315 		if (ELF_ST_BIND(sa->st_info) == STB_GLOBAL) {
    316 			return -1;
    317 		}
    318 		if (ELF_ST_BIND(sb->st_info) == STB_GLOBAL) {
    319 			return 1;
    320 		}
    321 	}
    322 
    323 	/* Within each band, sort by name. */
    324 	return strcmp(sa->st_name + addsymtab_strstart,
    325 	    sb->st_name + addsymtab_strstart);
    326 }
    327 
    328 static void
    329 addsymtab(const char *name, void *symstart, size_t symsize,
    330 	  void *strstart, size_t strsize, struct ksyms_symtab *tab,
    331 	  void *newstart, void *ctfstart, size_t ctfsize, uint32_t *nmap)
    332 {
    333 	Elf_Sym *sym, *nsym, ts;
    334 	int i, j, n, nglob;
    335 	char *str;
    336 	int nsyms = symsize / sizeof(Elf_Sym);
    337 	int s;
    338 
    339 	/* Sanity check for pre-allocated map table used during startup. */
    340 	if ((nmap == ksyms_nmap) && (nsyms >= KSYMS_MAX_ID)) {
    341 		printf("kern_ksyms: ERROR %d > %d, increase KSYMS_MAX_ID\n",
    342 		    nsyms, KSYMS_MAX_ID);
    343 
    344 		/* truncate for now */
    345 		nsyms = KSYMS_MAX_ID - 1;
    346 	}
    347 
    348 	tab->sd_symstart = symstart;
    349 	tab->sd_symsize = symsize;
    350 	tab->sd_strstart = strstart;
    351 	tab->sd_strsize = strsize;
    352 	tab->sd_name = name;
    353 	tab->sd_minsym = UINTPTR_MAX;
    354 	tab->sd_maxsym = 0;
    355 	tab->sd_usroffset = 0;
    356 	tab->sd_ctfstart = ctfstart;
    357 	tab->sd_ctfsize = ctfsize;
    358 	tab->sd_nmap = nmap;
    359 	tab->sd_nmapsize = nsyms;
    360 #ifdef KSYMS_DEBUG
    361 	printf("newstart %p sym %p ksyms_symsz %zu str %p strsz %zu send %p\n",
    362 	    newstart, symstart, symsize, strstart, strsize,
    363 	    tab->sd_strstart + tab->sd_strsize);
    364 #endif
    365 
    366 	if (nmap) {
    367 		memset(nmap, 0, nsyms * sizeof(uint32_t));
    368 	}
    369 
    370 	/* Pack symbol table by removing all file name references. */
    371 	sym = tab->sd_symstart;
    372 	nsym = (Elf_Sym *)newstart;
    373 	str = tab->sd_strstart;
    374 	nglob = 0;
    375 	for (i = n = 0; i < nsyms; i++) {
    376 
    377 		/*
    378 		 * This breaks CTF mapping, so don't do it when
    379 		 * DTrace is enabled.
    380 		 */
    381 #ifndef KDTRACE_HOOKS
    382 		/*
    383 		 * Remove useless symbols.
    384 		 * Should actually remove all typeless symbols.
    385 		 */
    386 		if (sym[i].st_name == 0)
    387 			continue; /* Skip nameless entries */
    388 		if (sym[i].st_shndx == SHN_UNDEF)
    389 			continue; /* Skip external references */
    390 		if (ELF_ST_TYPE(sym[i].st_info) == STT_FILE)
    391 			continue; /* Skip filenames */
    392 		if (ELF_ST_TYPE(sym[i].st_info) == STT_NOTYPE &&
    393 		    sym[i].st_value == 0 &&
    394 		    strcmp(str + sym[i].st_name, "*ABS*") == 0)
    395 			continue; /* XXX */
    396 		if (ELF_ST_TYPE(sym[i].st_info) == STT_NOTYPE &&
    397 		    strcmp(str + sym[i].st_name, "gcc2_compiled.") == 0)
    398 			continue; /* XXX */
    399 #endif
    400 
    401 		/* Save symbol. Set it as an absolute offset */
    402 		nsym[n] = sym[i];
    403 
    404 #ifdef KDTRACE_HOOKS
    405 		if (nmap != NULL) {
    406 			/*
    407 			 * Save the size, replace it with the symbol id so
    408 			 * the mapping can be done after the cleanup and sort.
    409 			 */
    410 			nmap[i] = nsym[n].st_size;
    411 			nsym[n].st_size = i + 1;	/* zero is reserved */
    412 		}
    413 #endif
    414 
    415 		if (sym[i].st_shndx != SHN_ABS) {
    416 			nsym[n].st_shndx = SHBSS;
    417 		} else {
    418 			/* SHN_ABS is a magic value, don't overwrite it */
    419 		}
    420 
    421 		j = strlen(nsym[n].st_name + str) + 1;
    422 		if (j > ksyms_maxlen)
    423 			ksyms_maxlen = j;
    424 		nglob += (ELF_ST_BIND(nsym[n].st_info) == STB_GLOBAL);
    425 
    426 		/* Compute min and max symbols. */
    427 		if (strcmp(str + sym[i].st_name, "*ABS*") != 0
    428 		    && ELF_ST_TYPE(nsym[n].st_info) != STT_NOTYPE) {
    429 			if (nsym[n].st_value < tab->sd_minsym) {
    430 				tab->sd_minsym = nsym[n].st_value;
    431 			}
    432 			if (nsym[n].st_value > tab->sd_maxsym) {
    433 				tab->sd_maxsym = nsym[n].st_value;
    434 			}
    435 		}
    436 		n++;
    437 	}
    438 
    439 	/* Fill the rest of the record, and sort the symbols. */
    440 	tab->sd_symstart = nsym;
    441 	tab->sd_symsize = n * sizeof(Elf_Sym);
    442 	tab->sd_nglob = nglob;
    443 
    444 	addsymtab_strstart = str;
    445 	if (kheapsort(nsym, n, sizeof(Elf_Sym), addsymtab_compar, &ts) != 0)
    446 		panic("addsymtab");
    447 
    448 #ifdef KDTRACE_HOOKS
    449 	/*
    450 	 * Build the mapping from original symbol id to new symbol table.
    451 	 * Deleted symbols will have a zero map, indices will be one based
    452 	 * instead of zero based.
    453 	 * Resulting map is sd_nmap[original_index] = new_index + 1
    454 	 */
    455 	if (nmap != NULL) {
    456 		int new;
    457 		for (new = 0; new < n; new++) {
    458 			uint32_t orig = nsym[new].st_size - 1;
    459 			uint32_t size = nmap[orig];
    460 
    461 			nmap[orig] = new + 1;
    462 
    463 			/* restore the size */
    464 			nsym[new].st_size = size;
    465 		}
    466 	}
    467 #endif
    468 
    469 	KASSERT(strcmp(name, "netbsd") == 0 || mutex_owned(&ksyms_lock));
    470 	KASSERT(cold || mutex_owned(&ksyms_lock));
    471 
    472 	/*
    473 	 * Publish the symtab.  Do this at splhigh to ensure ddb never
    474 	 * witnesses an inconsistent state of the queue, unless memory
    475 	 * is so corrupt that we crash in PSLIST_WRITER_INSERT_AFTER or
    476 	 * TAILQ_INSERT_TAIL.
    477 	 */
    478 	PSLIST_ENTRY_INIT(tab, sd_pslist);
    479 	s = splhigh();
    480 	if (TAILQ_EMPTY(&ksyms_symtabs)) {
    481 		PSLIST_WRITER_INSERT_HEAD(&ksyms_symtabs_psz, tab, sd_pslist);
    482 	} else {
    483 		struct ksyms_symtab *last;
    484 
    485 		last = TAILQ_LAST(&ksyms_symtabs, ksyms_symtab_queue);
    486 		PSLIST_WRITER_INSERT_AFTER(last, tab, sd_pslist);
    487 	}
    488 	TAILQ_INSERT_TAIL(&ksyms_symtabs, tab, sd_queue);
    489 	splx(s);
    490 
    491 	ksyms_sizes_calc();
    492 	ksyms_loaded = true;
    493 }
    494 
    495 /*
    496  * Setup the kernel symbol table stuff.
    497  */
    498 void
    499 ksyms_addsyms_elf(int symsize, void *start, void *end)
    500 {
    501 	int i, j;
    502 	Elf_Shdr *shdr;
    503 	char *symstart = NULL, *strstart = NULL;
    504 	size_t strsize = 0;
    505 	Elf_Ehdr *ehdr;
    506 	char *ctfstart = NULL;
    507 	size_t ctfsize = 0;
    508 
    509 	if (symsize <= 0) {
    510 		printf("[ Kernel symbol table missing! ]\n");
    511 		return;
    512 	}
    513 
    514 	/* Sanity check */
    515 	if (ALIGNED_POINTER(start, long) == 0) {
    516 		printf("[ Kernel symbol table has bad start address %p ]\n",
    517 		    start);
    518 		return;
    519 	}
    520 
    521 	ehdr = (Elf_Ehdr *)start;
    522 
    523 	/* check if this is a valid ELF header */
    524 	/* No reason to verify arch type, the kernel is actually running! */
    525 	if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) ||
    526 	    ehdr->e_ident[EI_CLASS] != ELFCLASS ||
    527 	    ehdr->e_version > 1) {
    528 		printf("[ Kernel symbol table invalid! ]\n");
    529 		return; /* nothing to do */
    530 	}
    531 
    532 	/* Loaded header will be scratched in addsymtab */
    533 	ksyms_hdr_init(start);
    534 
    535 	/* Find the symbol table and the corresponding string table. */
    536 	shdr = (Elf_Shdr *)((uint8_t *)start + ehdr->e_shoff);
    537 	for (i = 1; i < ehdr->e_shnum; i++) {
    538 		if (shdr[i].sh_type != SHT_SYMTAB)
    539 			continue;
    540 		if (shdr[i].sh_offset == 0)
    541 			continue;
    542 		symstart = (uint8_t *)start + shdr[i].sh_offset;
    543 		symsize = shdr[i].sh_size;
    544 		j = shdr[i].sh_link;
    545 		if (shdr[j].sh_offset == 0)
    546 			continue; /* Can this happen? */
    547 		strstart = (uint8_t *)start + shdr[j].sh_offset;
    548 		strsize = shdr[j].sh_size;
    549 		break;
    550 	}
    551 
    552 #ifdef KDTRACE_HOOKS
    553 	/* Find the CTF section */
    554 	shdr = (Elf_Shdr *)((uint8_t *)start + ehdr->e_shoff);
    555 	if (ehdr->e_shstrndx != 0) {
    556 		char *shstr = (uint8_t *)start +
    557 		    shdr[ehdr->e_shstrndx].sh_offset;
    558 		for (i = 1; i < ehdr->e_shnum; i++) {
    559 #ifdef KSYMS_DEBUG
    560 			printf("ksyms: checking %s\n", &shstr[shdr[i].sh_name]);
    561 #endif
    562 			if (shdr[i].sh_type != SHT_PROGBITS)
    563 				continue;
    564 			if (strncmp(".SUNW_ctf", &shstr[shdr[i].sh_name], 10)
    565 			    != 0)
    566 				continue;
    567 			ctfstart = (uint8_t *)start + shdr[i].sh_offset;
    568 			ctfsize = shdr[i].sh_size;
    569 			ksyms_ctfsz = ctfsize;
    570 #ifdef DEBUG
    571 			aprint_normal("Found CTF at %p, size 0x%zx\n",
    572 			    ctfstart, ctfsize);
    573 #endif
    574 			break;
    575 		}
    576 #ifdef DEBUG
    577 	} else {
    578 		printf("ksyms: e_shstrndx == 0\n");
    579 #endif
    580 	}
    581 #endif
    582 
    583 	if (!ksyms_verify(symstart, strstart))
    584 		return;
    585 
    586 	addsymtab("netbsd", symstart, symsize, strstart, strsize,
    587 	    &kernel_symtab, symstart, ctfstart, ctfsize, ksyms_nmap);
    588 
    589 #ifdef DEBUG
    590 	aprint_normal("Loaded initial symtab at %p, strtab at %p, # entries %ld\n",
    591 	    kernel_symtab.sd_symstart, kernel_symtab.sd_strstart,
    592 	    (long)kernel_symtab.sd_symsize/sizeof(Elf_Sym));
    593 #endif
    594 
    595 	/* Should be no snapshot to invalidate yet.  */
    596 	KASSERT(ksyms_snapshot == NULL);
    597 }
    598 
    599 /*
    600  * Setup the kernel symbol table stuff.
    601  * Use this when the address of the symbol and string tables are known;
    602  * otherwise use ksyms_init with an ELF image.
    603  * We need to pass a minimal ELF header which will later be completed by
    604  * ksyms_hdr_init and handed off to userland through /dev/ksyms.  We use
    605  * a void *rather than a pointer to avoid exposing the Elf_Ehdr type.
    606  */
    607 void
    608 ksyms_addsyms_explicit(void *ehdr, void *symstart, size_t symsize,
    609     void *strstart, size_t strsize)
    610 {
    611 	if (!ksyms_verify(symstart, strstart))
    612 		return;
    613 
    614 	ksyms_hdr_init(ehdr);
    615 	addsymtab("netbsd", symstart, symsize, strstart, strsize,
    616 	    &kernel_symtab, symstart, NULL, 0, ksyms_nmap);
    617 
    618 	/* Should be no snapshot to invalidate yet.  */
    619 	KASSERT(ksyms_snapshot == NULL);
    620 }
    621 
    622 /*
    623  * Get the value associated with a symbol.
    624  * "mod" is the module name, or null if any module.
    625  * "sym" is the symbol name.
    626  * "val" is a pointer to the corresponding value, if call succeeded.
    627  * Returns 0 if success or ENOENT if no such entry.
    628  *
    629  * If symp is nonnull, caller must hold ksyms_lock or module_lock, have
    630  * ksyms_opencnt nonzero, be in a pserialize read section, be in ddb
    631  * with all other CPUs quiescent.
    632  */
    633 int
    634 ksyms_getval_unlocked(const char *mod, const char *sym, Elf_Sym **symp,
    635     unsigned long *val, int type)
    636 {
    637 	struct ksyms_symtab *st;
    638 	Elf_Sym *es;
    639 	int s, error = ENOENT;
    640 
    641 #ifdef KSYMS_DEBUG
    642 	if (ksyms_debug & FOLLOW_CALLS)
    643 		printf("%s: mod %s sym %s valp %p\n", __func__, mod, sym, val);
    644 #endif
    645 
    646 	s = pserialize_read_enter();
    647 	PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab,
    648 	    sd_pslist) {
    649 		if (mod != NULL && strcmp(st->sd_name, mod))
    650 			continue;
    651 		if ((es = findsym(sym, st, type)) != NULL) {
    652 			*val = es->st_value;
    653 			if (symp)
    654 				*symp = es;
    655 			error = 0;
    656 			break;
    657 		}
    658 	}
    659 	pserialize_read_exit(s);
    660 	return error;
    661 }
    662 
    663 int
    664 ksyms_getval(const char *mod, const char *sym, unsigned long *val, int type)
    665 {
    666 
    667 	if (!ksyms_loaded)
    668 		return ENOENT;
    669 
    670 	/* No locking needed -- we read the table pserialized.  */
    671 	return ksyms_getval_unlocked(mod, sym, NULL, val, type);
    672 }
    673 
    674 /*
    675  * ksyms_get_mod(mod)
    676  *
    677  * Return the symtab for the given module name.  Caller must ensure
    678  * that the module cannot be unloaded until after this returns.
    679  */
    680 struct ksyms_symtab *
    681 ksyms_get_mod(const char *mod)
    682 {
    683 	struct ksyms_symtab *st;
    684 	int s;
    685 
    686 	s = pserialize_read_enter();
    687 	PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab,
    688 	    sd_pslist) {
    689 		if (mod != NULL && strcmp(st->sd_name, mod))
    690 			continue;
    691 		break;
    692 	}
    693 	pserialize_read_exit(s);
    694 
    695 	return st;
    696 }
    697 
    698 
    699 /*
    700  * ksyms_mod_foreach()
    701  *
    702  * Iterate over the symbol table of the specified module, calling the callback
    703  * handler for each symbol. Stop iterating if the handler return is non-zero.
    704  *
    705  */
    706 
    707 int
    708 ksyms_mod_foreach(const char *mod, ksyms_callback_t callback, void *opaque)
    709 {
    710 	struct ksyms_symtab *st;
    711 	Elf_Sym *sym, *maxsym;
    712 	char *str;
    713 	int symindx;
    714 
    715 	if (!ksyms_loaded)
    716 		return ENOENT;
    717 
    718 	mutex_enter(&ksyms_lock);
    719 
    720 	/* find the module */
    721 	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
    722 		if (mod != NULL && strcmp(st->sd_name, mod))
    723 			continue;
    724 
    725 		sym = st->sd_symstart;
    726 		str = st->sd_strstart - st->sd_usroffset;
    727 
    728 		/* now iterate through the symbols */
    729 		maxsym = sym + st->sd_symsize / sizeof(Elf_Sym);
    730 		for (symindx = 0; sym < maxsym; sym++, symindx++) {
    731 			if (callback(str + sym->st_name, symindx,
    732 			    (void *)sym->st_value,
    733 			    sym->st_size,
    734 			    sym->st_info,
    735 			    opaque) != 0) {
    736 				break;
    737 			}
    738 		}
    739 	}
    740 	mutex_exit(&ksyms_lock);
    741 
    742 	return 0;
    743 }
    744 
    745 /*
    746  * Get "mod" and "symbol" associated with an address.
    747  * Returns 0 if success or ENOENT if no such entry.
    748  *
    749  * Caller must hold ksyms_lock or module_lock, have ksyms_opencnt
    750  * nonzero, be in a pserialize read section, or be in ddb with all
    751  * other CPUs quiescent.
    752  */
    753 int
    754 ksyms_getname(const char **mod, const char **sym, vaddr_t v, int f)
    755 {
    756 	struct ksyms_symtab *st;
    757 	Elf_Sym *les, *es = NULL;
    758 	vaddr_t laddr = 0;
    759 	const char *lmod = NULL;
    760 	char *stable = NULL;
    761 	int type, i, sz;
    762 
    763 	if (!ksyms_loaded)
    764 		return ENOENT;
    765 
    766 	PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab,
    767 	    sd_pslist) {
    768 		if (v < st->sd_minsym || v > st->sd_maxsym)
    769 			continue;
    770 		sz = st->sd_symsize/sizeof(Elf_Sym);
    771 		for (i = 0; i < sz; i++) {
    772 			les = st->sd_symstart + i;
    773 			type = ELF_ST_TYPE(les->st_info);
    774 
    775 			if ((f & KSYMS_PROC) && (type != STT_FUNC))
    776 				continue;
    777 
    778 			if (type == STT_NOTYPE)
    779 				continue;
    780 
    781 			if (((f & KSYMS_ANY) == 0) &&
    782 			    (type != STT_FUNC) && (type != STT_OBJECT))
    783 				continue;
    784 
    785 			if ((les->st_value <= v) && (les->st_value > laddr)) {
    786 				laddr = les->st_value;
    787 				es = les;
    788 				lmod = st->sd_name;
    789 				stable = st->sd_strstart - st->sd_usroffset;
    790 			}
    791 		}
    792 	}
    793 	if (es == NULL)
    794 		return ENOENT;
    795 	if ((f & KSYMS_EXACT) && (v != es->st_value))
    796 		return ENOENT;
    797 	if (mod)
    798 		*mod = lmod;
    799 	if (sym)
    800 		*sym = stable + es->st_name;
    801 	return 0;
    802 }
    803 
    804 /*
    805  * Add a symbol table from a loadable module.
    806  */
    807 void
    808 ksyms_modload(const char *name, void *symstart, vsize_t symsize,
    809     char *strstart, vsize_t strsize)
    810 {
    811 	struct ksyms_symtab *st;
    812 	struct ksyms_snapshot *ks;
    813 	void *nmap;
    814 
    815 	st = kmem_zalloc(sizeof(*st), KM_SLEEP);
    816 	nmap = kmem_zalloc(symsize / sizeof(Elf_Sym) * sizeof (uint32_t),
    817 			   KM_SLEEP);
    818 	mutex_enter(&ksyms_lock);
    819 	addsymtab(name, symstart, symsize, strstart, strsize, st, symstart,
    820 	    NULL, 0, nmap);
    821 	ks = ksyms_snapshot;
    822 	ksyms_snapshot = NULL;
    823 	mutex_exit(&ksyms_lock);
    824 
    825 	if (ks)
    826 		ksyms_snapshot_release(ks);
    827 }
    828 
    829 /*
    830  * Remove a symbol table from a loadable module.
    831  */
    832 void
    833 ksyms_modunload(const char *name)
    834 {
    835 	struct ksyms_symtab *st;
    836 	struct ksyms_snapshot *ks;
    837 	int s;
    838 
    839 	mutex_enter(&ksyms_lock);
    840 	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
    841 		if (strcmp(name, st->sd_name) != 0)
    842 			continue;
    843 		break;
    844 	}
    845 	KASSERT(st != NULL);
    846 
    847 	/* Wait for any snapshot in progress to complete.  */
    848 	while (ksyms_snapshotting)
    849 		cv_wait(&ksyms_cv, &ksyms_lock);
    850 
    851 	/*
    852 	 * Remove the symtab.  Do this at splhigh to ensure ddb never
    853 	 * witnesses an inconsistent state of the queue, unless memory
    854 	 * is so corrupt that we crash in TAILQ_REMOVE or
    855 	 * PSLIST_WRITER_REMOVE.
    856 	 */
    857 	s = splhigh();
    858 	TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue);
    859 	PSLIST_WRITER_REMOVE(st, sd_pslist);
    860 	splx(s);
    861 
    862 	/*
    863 	 * And wait a grace period, in case there are any pserialized
    864 	 * readers in flight.
    865 	 */
    866 	pserialize_perform(ksyms_psz);
    867 	PSLIST_ENTRY_DESTROY(st, sd_pslist);
    868 
    869 	/* Recompute the ksyms sizes now that we've removed st.  */
    870 	ksyms_sizes_calc();
    871 
    872 	/* Invalidate the global ksyms snapshot.  */
    873 	ks = ksyms_snapshot;
    874 	ksyms_snapshot = NULL;
    875 	mutex_exit(&ksyms_lock);
    876 
    877 	/*
    878 	 * No more references are possible.  Free the name map and the
    879 	 * symtab itself, which we had allocated in ksyms_modload.
    880 	 */
    881 	kmem_free(st->sd_nmap, st->sd_nmapsize * sizeof(uint32_t));
    882 	kmem_free(st, sizeof(*st));
    883 
    884 	/* Release the formerly global ksyms snapshot, if any.  */
    885 	if (ks)
    886 		ksyms_snapshot_release(ks);
    887 }
    888 
    889 #ifdef DDB
    890 /*
    891  * Keep sifting stuff here, to avoid export of ksyms internals.
    892  *
    893  * Systems is expected to be quiescent, so no locking done.
    894  */
    895 int
    896 ksyms_sift(char *mod, char *sym, int mode)
    897 {
    898 	struct ksyms_symtab *st;
    899 	char *sb;
    900 	int i, sz;
    901 
    902 	if (!ksyms_loaded)
    903 		return ENOENT;
    904 
    905 	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
    906 		if (mod && strcmp(mod, st->sd_name))
    907 			continue;
    908 		sb = st->sd_strstart - st->sd_usroffset;
    909 
    910 		sz = st->sd_symsize/sizeof(Elf_Sym);
    911 		for (i = 0; i < sz; i++) {
    912 			Elf_Sym *les = st->sd_symstart + i;
    913 			char c;
    914 
    915 			if (strstr(sb + les->st_name, sym) == NULL)
    916 				continue;
    917 
    918 			if (mode == 'F') {
    919 				switch (ELF_ST_TYPE(les->st_info)) {
    920 				case STT_OBJECT:
    921 					c = '+';
    922 					break;
    923 				case STT_FUNC:
    924 					c = '*';
    925 					break;
    926 				case STT_SECTION:
    927 					c = '&';
    928 					break;
    929 				case STT_FILE:
    930 					c = '/';
    931 					break;
    932 				default:
    933 					c = ' ';
    934 					break;
    935 				}
    936 				db_printf("%s%c ", sb + les->st_name, c);
    937 			} else
    938 				db_printf("%s ", sb + les->st_name);
    939 		}
    940 	}
    941 	return ENOENT;
    942 }
    943 #endif /* DDB */
    944 
    945 /*
    946  * In case we exposing the symbol table to the userland using the pseudo-
    947  * device /dev/ksyms, it is easier to provide all the tables as one.
    948  * However, it means we have to change all the st_name fields for the
    949  * symbols so they match the ELF image that the userland will read
    950  * through the device.
    951  *
    952  * The actual (correct) value of st_name is preserved through a global
    953  * offset stored in the symbol table structure.
    954  *
    955  * Call with ksyms_lock held.
    956  */
    957 static void
    958 ksyms_sizes_calc(void)
    959 {
    960 	struct ksyms_symtab *st;
    961 	int i, delta;
    962 
    963 	KASSERT(cold || mutex_owned(&ksyms_lock));
    964 
    965 	ksyms_symsz = ksyms_strsz = 0;
    966 	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
    967 		delta = ksyms_strsz - st->sd_usroffset;
    968 		if (delta != 0) {
    969 			for (i = 0; i < st->sd_symsize/sizeof(Elf_Sym); i++)
    970 				st->sd_symstart[i].st_name += delta;
    971 			st->sd_usroffset = ksyms_strsz;
    972 		}
    973 		ksyms_symsz += st->sd_symsize;
    974 		ksyms_strsz += st->sd_strsize;
    975 	}
    976 }
    977 
    978 static void
    979 ksyms_fill_note(void)
    980 {
    981 	int32_t *note = ksyms_hdr.kh_note;
    982 	note[0] = ELF_NOTE_NETBSD_NAMESZ;
    983 	note[1] = ELF_NOTE_NETBSD_DESCSZ;
    984 	note[2] = ELF_NOTE_TYPE_NETBSD_TAG;
    985 	memcpy(&note[3],  "NetBSD\0", 8);
    986 	note[5] = __NetBSD_Version__;
    987 }
    988 
    989 static void
    990 ksyms_hdr_init(const void *hdraddr)
    991 {
    992 	/* Copy the loaded elf exec header */
    993 	memcpy(&ksyms_hdr.kh_ehdr, hdraddr, sizeof(Elf_Ehdr));
    994 
    995 	/* Set correct program/section header sizes, offsets and numbers */
    996 	ksyms_hdr.kh_ehdr.e_phoff = offsetof(struct ksyms_hdr, kh_phdr[0]);
    997 	ksyms_hdr.kh_ehdr.e_phentsize = sizeof(Elf_Phdr);
    998 	ksyms_hdr.kh_ehdr.e_phnum = NPRGHDR;
    999 	ksyms_hdr.kh_ehdr.e_shoff = offsetof(struct ksyms_hdr, kh_shdr[0]);
   1000 	ksyms_hdr.kh_ehdr.e_shentsize = sizeof(Elf_Shdr);
   1001 	ksyms_hdr.kh_ehdr.e_shnum = NSECHDR;
   1002 	ksyms_hdr.kh_ehdr.e_shstrndx = SHSTRTAB;
   1003 
   1004 	/* Text/data - fake */
   1005 	ksyms_hdr.kh_phdr[0].p_type = PT_LOAD;
   1006 	ksyms_hdr.kh_phdr[0].p_memsz = (unsigned long)-1L;
   1007 	ksyms_hdr.kh_phdr[0].p_flags = PF_R | PF_X | PF_W;
   1008 
   1009 #define SHTCOPY(name)  strlcpy(&ksyms_hdr.kh_strtab[offs], (name), \
   1010     sizeof(ksyms_hdr.kh_strtab) - offs), offs += sizeof(name)
   1011 
   1012 	uint32_t offs = 1;
   1013 	/* First section header ".note.netbsd.ident" */
   1014 	ksyms_hdr.kh_shdr[SHNOTE].sh_name = offs;
   1015 	ksyms_hdr.kh_shdr[SHNOTE].sh_type = SHT_NOTE;
   1016 	ksyms_hdr.kh_shdr[SHNOTE].sh_offset =
   1017 	    offsetof(struct ksyms_hdr, kh_note[0]);
   1018 	ksyms_hdr.kh_shdr[SHNOTE].sh_size = sizeof(ksyms_hdr.kh_note);
   1019 	ksyms_hdr.kh_shdr[SHNOTE].sh_addralign = sizeof(int);
   1020 	SHTCOPY(".note.netbsd.ident");
   1021 	ksyms_fill_note();
   1022 
   1023 	/* Second section header; ".symtab" */
   1024 	ksyms_hdr.kh_shdr[SYMTAB].sh_name = offs;
   1025 	ksyms_hdr.kh_shdr[SYMTAB].sh_type = SHT_SYMTAB;
   1026 	ksyms_hdr.kh_shdr[SYMTAB].sh_offset = sizeof(struct ksyms_hdr);
   1027 /*	ksyms_hdr.kh_shdr[SYMTAB].sh_size = filled in at open */
   1028 	ksyms_hdr.kh_shdr[SYMTAB].sh_link = STRTAB; /* Corresponding strtab */
   1029 	ksyms_hdr.kh_shdr[SYMTAB].sh_addralign = sizeof(long);
   1030 	ksyms_hdr.kh_shdr[SYMTAB].sh_entsize = sizeof(Elf_Sym);
   1031 	SHTCOPY(".symtab");
   1032 
   1033 	/* Third section header; ".strtab" */
   1034 	ksyms_hdr.kh_shdr[STRTAB].sh_name = offs;
   1035 	ksyms_hdr.kh_shdr[STRTAB].sh_type = SHT_STRTAB;
   1036 /*	ksyms_hdr.kh_shdr[STRTAB].sh_offset = filled in at open */
   1037 /*	ksyms_hdr.kh_shdr[STRTAB].sh_size = filled in at open */
   1038 	ksyms_hdr.kh_shdr[STRTAB].sh_addralign = sizeof(char);
   1039 	SHTCOPY(".strtab");
   1040 
   1041 	/* Fourth section, ".shstrtab" */
   1042 	ksyms_hdr.kh_shdr[SHSTRTAB].sh_name = offs;
   1043 	ksyms_hdr.kh_shdr[SHSTRTAB].sh_type = SHT_STRTAB;
   1044 	ksyms_hdr.kh_shdr[SHSTRTAB].sh_offset =
   1045 	    offsetof(struct ksyms_hdr, kh_strtab);
   1046 	ksyms_hdr.kh_shdr[SHSTRTAB].sh_size = SHSTRSIZ;
   1047 	ksyms_hdr.kh_shdr[SHSTRTAB].sh_addralign = sizeof(char);
   1048 	SHTCOPY(".shstrtab");
   1049 
   1050 	/* Fifth section, ".bss". All symbols reside here. */
   1051 	ksyms_hdr.kh_shdr[SHBSS].sh_name = offs;
   1052 	ksyms_hdr.kh_shdr[SHBSS].sh_type = SHT_NOBITS;
   1053 	ksyms_hdr.kh_shdr[SHBSS].sh_offset = 0;
   1054 	ksyms_hdr.kh_shdr[SHBSS].sh_size = (unsigned long)-1L;
   1055 	ksyms_hdr.kh_shdr[SHBSS].sh_addralign = PAGE_SIZE;
   1056 	ksyms_hdr.kh_shdr[SHBSS].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
   1057 	SHTCOPY(".bss");
   1058 
   1059 	/* Sixth section header; ".SUNW_ctf" */
   1060 	ksyms_hdr.kh_shdr[SHCTF].sh_name = offs;
   1061 	ksyms_hdr.kh_shdr[SHCTF].sh_type = SHT_PROGBITS;
   1062 /*	ksyms_hdr.kh_shdr[SHCTF].sh_offset = filled in at open */
   1063 /*	ksyms_hdr.kh_shdr[SHCTF].sh_size = filled in at open */
   1064 	ksyms_hdr.kh_shdr[SHCTF].sh_link = SYMTAB; /* Corresponding symtab */
   1065 	ksyms_hdr.kh_shdr[SHCTF].sh_addralign = sizeof(char);
   1066 	SHTCOPY(".SUNW_ctf");
   1067 }
   1068 
   1069 static struct ksyms_snapshot *
   1070 ksyms_snapshot_alloc(int maxlen, size_t size, dev_t dev, uint64_t gen)
   1071 {
   1072 	struct ksyms_snapshot *ks;
   1073 
   1074 	ks = kmem_zalloc(sizeof(*ks), KM_SLEEP);
   1075 	ks->ks_refcnt = 1;
   1076 	ks->ks_gen = gen;
   1077 	ks->ks_uobj = uao_create(size, 0);
   1078 	ks->ks_size = size;
   1079 	ks->ks_dev = dev;
   1080 	ks->ks_maxlen = maxlen;
   1081 
   1082 	return ks;
   1083 }
   1084 
   1085 static void
   1086 ksyms_snapshot_release(struct ksyms_snapshot *ks)
   1087 {
   1088 	uint64_t refcnt;
   1089 
   1090 	mutex_enter(&ksyms_lock);
   1091 	refcnt = --ks->ks_refcnt;
   1092 	mutex_exit(&ksyms_lock);
   1093 
   1094 	if (refcnt)
   1095 		return;
   1096 
   1097 	uao_detach(ks->ks_uobj);
   1098 	kmem_free(ks, sizeof(*ks));
   1099 }
   1100 
   1101 static int
   1102 ubc_copyfrombuf(struct uvm_object *uobj, struct uio *uio, const void *buf,
   1103     size_t n)
   1104 {
   1105 	struct iovec iov = { .iov_base = __UNCONST(buf), .iov_len = n };
   1106 
   1107 	uio->uio_iov = &iov;
   1108 	uio->uio_iovcnt = 1;
   1109 	uio->uio_resid = n;
   1110 
   1111 	return ubc_uiomove(uobj, uio, n, UVM_ADV_SEQUENTIAL, UBC_WRITE);
   1112 }
   1113 
   1114 static int
   1115 ksyms_take_snapshot(struct ksyms_snapshot *ks, struct ksyms_symtab *last)
   1116 {
   1117 	struct uvm_object *uobj = ks->ks_uobj;
   1118 	struct uio uio;
   1119 	struct ksyms_symtab *st;
   1120 	int error;
   1121 
   1122 	/* Caller must have initiated snapshotting.  */
   1123 	KASSERT(ksyms_snapshotting == curlwp);
   1124 
   1125 	/* Start a uio transfer to reuse incrementally.  */
   1126 	uio.uio_offset = 0;
   1127 	uio.uio_rw = UIO_WRITE; /* write from buffer to uobj */
   1128 	UIO_SETUP_SYSSPACE(&uio);
   1129 
   1130 	/*
   1131 	 * First: Copy out the ELF header.
   1132 	 */
   1133 	error = ubc_copyfrombuf(uobj, &uio, &ksyms_hdr, sizeof(ksyms_hdr));
   1134 	if (error)
   1135 		return error;
   1136 
   1137 	/*
   1138 	 * Copy out the symbol table.  The list of symtabs is
   1139 	 * guaranteed to be nonempty because we always have an entry
   1140 	 * for the main kernel.  We stop at last, not at the end of the
   1141 	 * tailq or NULL, because entries beyond last are not included
   1142 	 * in this snapshot (and may not be fully initialized memory as
   1143 	 * we witness it).
   1144 	 */
   1145 	KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr));
   1146 	for (st = TAILQ_FIRST(&ksyms_symtabs);
   1147 	     ;
   1148 	     st = TAILQ_NEXT(st, sd_queue)) {
   1149 		error = ubc_copyfrombuf(uobj, &uio, st->sd_symstart,
   1150 		    st->sd_symsize);
   1151 		if (error)
   1152 			return error;
   1153 		if (st == last)
   1154 			break;
   1155 	}
   1156 
   1157 	/*
   1158 	 * Copy out the string table
   1159 	 */
   1160 	KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) +
   1161 	    ksyms_hdr.kh_shdr[SYMTAB].sh_size);
   1162 	for (st = TAILQ_FIRST(&ksyms_symtabs);
   1163 	     ;
   1164 	     st = TAILQ_NEXT(st, sd_queue)) {
   1165 		error = ubc_copyfrombuf(uobj, &uio, st->sd_strstart,
   1166 		    st->sd_strsize);
   1167 		if (error)
   1168 			return error;
   1169 		if (st == last)
   1170 			break;
   1171 	}
   1172 
   1173 	/*
   1174 	 * Copy out the CTF table.
   1175 	 */
   1176 	KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) +
   1177 	    ksyms_hdr.kh_shdr[SYMTAB].sh_size +
   1178 	    ksyms_hdr.kh_shdr[STRTAB].sh_size);
   1179 	st = TAILQ_FIRST(&ksyms_symtabs);
   1180 	if (st->sd_ctfstart != NULL) {
   1181 		error = ubc_copyfrombuf(uobj, &uio, st->sd_ctfstart,
   1182 		    st->sd_ctfsize);
   1183 		if (error)
   1184 			return error;
   1185 	}
   1186 
   1187 	KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) +
   1188 	    ksyms_hdr.kh_shdr[SYMTAB].sh_size +
   1189 	    ksyms_hdr.kh_shdr[STRTAB].sh_size +
   1190 	    ksyms_hdr.kh_shdr[SHCTF].sh_size);
   1191 	KASSERT(uio.uio_offset == ks->ks_size);
   1192 
   1193 	return 0;
   1194 }
   1195 
   1196 static const struct fileops ksyms_fileops;
   1197 
   1198 static int
   1199 ksymsopen(dev_t dev, int flags, int devtype, struct lwp *l)
   1200 {
   1201 	struct file *fp = NULL;
   1202 	int fd = -1;
   1203 	struct ksyms_snapshot *ks = NULL;
   1204 	size_t size;
   1205 	struct ksyms_symtab *last;
   1206 	int maxlen;
   1207 	uint64_t gen;
   1208 	int error;
   1209 
   1210 	if (minor(dev) != 0 || !ksyms_loaded)
   1211 		return ENXIO;
   1212 
   1213 	/* Allocate a private file.  */
   1214 	error = fd_allocfile(&fp, &fd);
   1215 	if (error)
   1216 		return error;
   1217 
   1218 	mutex_enter(&ksyms_lock);
   1219 
   1220 	/*
   1221 	 * Wait until we have a snapshot, or until there is no snapshot
   1222 	 * being taken right now so we can take one.
   1223 	 */
   1224 	while ((ks = ksyms_snapshot) == NULL && ksyms_snapshotting) {
   1225 		error = cv_wait_sig(&ksyms_cv, &ksyms_lock);
   1226 		if (error)
   1227 			goto out;
   1228 	}
   1229 
   1230 	/*
   1231 	 * If there's a usable snapshot, increment its reference count
   1232 	 * (can't overflow, 64-bit) and just reuse it.
   1233 	 */
   1234 	if (ks) {
   1235 		ks->ks_refcnt++;
   1236 		goto out;
   1237 	}
   1238 
   1239 	/* Find the current length of the symtab object. */
   1240 	size = sizeof(struct ksyms_hdr);
   1241 	size += ksyms_strsz;
   1242 	size += ksyms_symsz;
   1243 	size += ksyms_ctfsz;
   1244 
   1245 	/* Start a new snapshot.  */
   1246 	ksyms_hdr.kh_shdr[SYMTAB].sh_size = ksyms_symsz;
   1247 	ksyms_hdr.kh_shdr[SYMTAB].sh_info = ksyms_symsz / sizeof(Elf_Sym);
   1248 	ksyms_hdr.kh_shdr[STRTAB].sh_offset = ksyms_symsz +
   1249 	    ksyms_hdr.kh_shdr[SYMTAB].sh_offset;
   1250 	ksyms_hdr.kh_shdr[STRTAB].sh_size = ksyms_strsz;
   1251 	ksyms_hdr.kh_shdr[SHCTF].sh_offset = ksyms_strsz +
   1252 	    ksyms_hdr.kh_shdr[STRTAB].sh_offset;
   1253 	ksyms_hdr.kh_shdr[SHCTF].sh_size = ksyms_ctfsz;
   1254 	last = TAILQ_LAST(&ksyms_symtabs, ksyms_symtab_queue);
   1255 	maxlen = ksyms_maxlen;
   1256 	gen = ksyms_snapshot_gen++;
   1257 
   1258 	/*
   1259 	 * Prevent ksyms entries from being removed while we take the
   1260 	 * snapshot.
   1261 	 */
   1262 	KASSERT(ksyms_snapshotting == NULL);
   1263 	ksyms_snapshotting = curlwp;
   1264 	mutex_exit(&ksyms_lock);
   1265 
   1266 	/* Create a snapshot and write the symtab to it.  */
   1267 	ks = ksyms_snapshot_alloc(maxlen, size, dev, gen);
   1268 	error = ksyms_take_snapshot(ks, last);
   1269 
   1270 	/*
   1271 	 * Snapshot creation is done.  Wake up anyone waiting to remove
   1272 	 * entries (module unload).
   1273 	 */
   1274 	mutex_enter(&ksyms_lock);
   1275 	KASSERTMSG(ksyms_snapshotting == curlwp, "lwp %p stole snapshot",
   1276 	    ksyms_snapshotting);
   1277 	ksyms_snapshotting = NULL;
   1278 	cv_broadcast(&ksyms_cv);
   1279 
   1280 	/* If we failed, give up.  */
   1281 	if (error)
   1282 		goto out;
   1283 
   1284 	/* Cache the snapshot for the next reader.  */
   1285 	KASSERT(ksyms_snapshot == NULL);
   1286 	ksyms_snapshot = ks;
   1287 	ks->ks_refcnt++;
   1288 	KASSERT(ks->ks_refcnt == 2);
   1289 
   1290 out:	mutex_exit(&ksyms_lock);
   1291 	if (error) {
   1292 		if (fp)
   1293 			fd_abort(curproc, fp, fd);
   1294 		if (ks)
   1295 			ksyms_snapshot_release(ks);
   1296 	} else {
   1297 		KASSERT(fp);
   1298 		KASSERT(ks);
   1299 		error = fd_clone(fp, fd, flags, &ksyms_fileops, ks);
   1300 		KASSERTMSG(error == EMOVEFD, "error=%d", error);
   1301 	}
   1302 	return error;
   1303 }
   1304 
   1305 static int
   1306 ksymsclose(struct file *fp)
   1307 {
   1308 	struct ksyms_snapshot *ks = fp->f_data;
   1309 
   1310 	ksyms_snapshot_release(ks);
   1311 
   1312 	return 0;
   1313 }
   1314 
   1315 static int
   1316 ksymsread(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
   1317     int flags)
   1318 {
   1319 	const struct ksyms_snapshot *ks = fp->f_data;
   1320 	size_t count;
   1321 	int error;
   1322 
   1323 	/*
   1324 	 * Since we don't have a per-object lock, we might as well use
   1325 	 * the struct file lock to serialize access to fp->f_offset --
   1326 	 * but if the caller isn't relying on or updating fp->f_offset,
   1327 	 * there's no need to do even that.  We could use ksyms_lock,
   1328 	 * but why bother with a global lock if not needed?  Either
   1329 	 * way, the lock we use here must agree with what ksymsseek
   1330 	 * takes (nothing else in ksyms uses fp->f_offset).
   1331 	 */
   1332 	if (offp == &fp->f_offset)
   1333 		mutex_enter(&fp->f_lock);
   1334 
   1335 	/* Refuse negative offsets.  */
   1336 	if (*offp < 0) {
   1337 		error = EINVAL;
   1338 		goto out;
   1339 	}
   1340 
   1341 	/* Return nothing at or past end of file.  */
   1342 	if (*offp >= ks->ks_size) {
   1343 		error = 0;
   1344 		goto out;
   1345 	}
   1346 
   1347 	/*
   1348 	 * 1. Set up the uio to transfer from offset *offp.
   1349 	 * 2. Transfer as many bytes as we can (at most uio->uio_resid
   1350 	 *    or what's left in the ksyms).
   1351 	 * 3. If requested, update *offp to reflect the number of bytes
   1352 	 *    transferred.
   1353 	 */
   1354 	uio->uio_offset = *offp;
   1355 	count = uio->uio_resid;
   1356 	error = ubc_uiomove(ks->ks_uobj, uio, MIN(count, ks->ks_size - *offp),
   1357 	    UVM_ADV_SEQUENTIAL, UBC_READ|UBC_PARTIALOK);
   1358 	if (flags & FOF_UPDATE_OFFSET)
   1359 		*offp += count - uio->uio_resid;
   1360 
   1361 out:	if (offp == &fp->f_offset)
   1362 		mutex_exit(&fp->f_lock);
   1363 	return error;
   1364 }
   1365 
   1366 static int
   1367 ksymsstat(struct file *fp, struct stat *st)
   1368 {
   1369 	const struct ksyms_snapshot *ks = fp->f_data;
   1370 
   1371 	memset(st, 0, sizeof(*st));
   1372 
   1373 	st->st_dev = NODEV;
   1374 	st->st_ino = 0;
   1375 	st->st_mode = S_IFCHR;
   1376 	st->st_nlink = 1;
   1377 	st->st_uid = kauth_cred_geteuid(fp->f_cred);
   1378 	st->st_gid = kauth_cred_getegid(fp->f_cred);
   1379 	st->st_rdev = ks->ks_dev;
   1380 	st->st_size = ks->ks_size;
   1381 	/* zero time */
   1382 	st->st_blksize = MAXPHYS; /* XXX arbitrary */
   1383 	st->st_blocks = 0;
   1384 	st->st_gen = ks->ks_gen;
   1385 
   1386 	return 0;
   1387 }
   1388 
   1389 static int
   1390 ksymsmmap(struct file *fp, off_t *offp, size_t nbytes, int prot, int *flagsp,
   1391     int *advicep, struct uvm_object **uobjp, int *maxprotp)
   1392 {
   1393 	const struct ksyms_snapshot *ks = fp->f_data;
   1394 
   1395 	/* uvm_mmap guarantees page-aligned offset and size.  */
   1396 	KASSERT(*offp == round_page(*offp));
   1397 	KASSERT(nbytes == round_page(nbytes));
   1398 	KASSERT(nbytes > 0);
   1399 
   1400 	/* Refuse negative offsets.  */
   1401 	if (*offp < 0)
   1402 		return EINVAL;
   1403 
   1404 	/* Refuse mappings that pass the end of file.  */
   1405 	if (nbytes > round_page(ks->ks_size) ||
   1406 	    *offp > round_page(ks->ks_size) - nbytes)
   1407 		return EINVAL;	/* XXX ??? */
   1408 
   1409 	/* Success!  */
   1410 	uao_reference(ks->ks_uobj);
   1411 	*advicep = UVM_ADV_SEQUENTIAL;
   1412 	*uobjp = ks->ks_uobj;
   1413 	*maxprotp = prot & VM_PROT_READ;
   1414 	return 0;
   1415 }
   1416 
   1417 static int
   1418 ksymsseek(struct file *fp, off_t delta, int whence, off_t *newoffp, int flags)
   1419 {
   1420 	const off_t OFF_MAX = __type_max(off_t);
   1421 	struct ksyms_snapshot *ks = fp->f_data;
   1422 	off_t base, newoff;
   1423 	int error;
   1424 
   1425 	mutex_enter(&fp->f_lock);
   1426 
   1427 	switch (whence) {
   1428 	case SEEK_CUR:
   1429 		base = fp->f_offset;
   1430 		break;
   1431 	case SEEK_END:
   1432 		base = ks->ks_size;
   1433 		break;
   1434 	case SEEK_SET:
   1435 		base = 0;
   1436 		break;
   1437 	default:
   1438 		error = EINVAL;
   1439 		goto out;
   1440 	}
   1441 
   1442 	/* Check for arithmetic overflow and reject negative offsets.  */
   1443 	if (base < 0 || delta > OFF_MAX - base || base + delta < 0) {
   1444 		error = EINVAL;
   1445 		goto out;
   1446 	}
   1447 
   1448 	/* Compute the new offset.  */
   1449 	newoff = base + delta;
   1450 
   1451 	/* Success!  */
   1452 	if (newoffp)
   1453 		*newoffp = newoff;
   1454 	if (flags & FOF_UPDATE_OFFSET)
   1455 		fp->f_offset = newoff;
   1456 	error = 0;
   1457 
   1458 out:	mutex_exit(&fp->f_lock);
   1459 	return error;
   1460 }
   1461 
   1462 __CTASSERT(offsetof(struct ksyms_ogsymbol, kg_name) == offsetof(struct ksyms_gsymbol, kg_name));
   1463 __CTASSERT(offsetof(struct ksyms_gvalue, kv_name) == offsetof(struct ksyms_gsymbol, kg_name));
   1464 
   1465 static int
   1466 ksymsioctl(struct file *fp, u_long cmd, void *data)
   1467 {
   1468 	struct ksyms_snapshot *ks = fp->f_data;
   1469 	struct ksyms_ogsymbol *okg = (struct ksyms_ogsymbol *)data;
   1470 	struct ksyms_gsymbol *kg = (struct ksyms_gsymbol *)data;
   1471 	struct ksyms_gvalue *kv = (struct ksyms_gvalue *)data;
   1472 	struct ksyms_symtab *st;
   1473 	Elf_Sym *sym = NULL, copy;
   1474 	unsigned long val;
   1475 	int error = 0;
   1476 	char *str = NULL;
   1477 	int len, s;
   1478 
   1479 	/* Read cached ksyms_maxlen.  */
   1480 	len = ks->ks_maxlen;
   1481 
   1482 	if (cmd == OKIOCGVALUE || cmd == OKIOCGSYMBOL ||
   1483 	    cmd == KIOCGVALUE || cmd == KIOCGSYMBOL) {
   1484 		str = kmem_alloc(len, KM_SLEEP);
   1485 		if ((error = copyinstr(kg->kg_name, str, len, NULL)) != 0) {
   1486 			kmem_free(str, len);
   1487 			return error;
   1488 		}
   1489 	}
   1490 
   1491 	switch (cmd) {
   1492 	case OKIOCGVALUE:
   1493 		/*
   1494 		 * Use the in-kernel symbol lookup code for fast
   1495 		 * retreival of a value.
   1496 		 */
   1497 		error = ksyms_getval(NULL, str, &val, KSYMS_EXTERN);
   1498 		if (error == 0)
   1499 			error = copyout(&val, okg->kg_value, sizeof(long));
   1500 		kmem_free(str, len);
   1501 		break;
   1502 
   1503 	case OKIOCGSYMBOL:
   1504 		/*
   1505 		 * Use the in-kernel symbol lookup code for fast
   1506 		 * retreival of a symbol.
   1507 		 */
   1508 		s = pserialize_read_enter();
   1509 		PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz,
   1510 		    struct ksyms_symtab, sd_pslist) {
   1511 			if ((sym = findsym(str, st, KSYMS_ANY)) == NULL)
   1512 				continue;
   1513 #ifdef notdef
   1514 			/* Skip if bad binding */
   1515 			if (ELF_ST_BIND(sym->st_info) != STB_GLOBAL) {
   1516 				sym = NULL;
   1517 				continue;
   1518 			}
   1519 #endif
   1520 			break;
   1521 		}
   1522 		if (sym != NULL) {
   1523 			memcpy(&copy, sym, sizeof(copy));
   1524 			pserialize_read_exit(s);
   1525 			error = copyout(&copy, okg->kg_sym, sizeof(Elf_Sym));
   1526 		} else {
   1527 			pserialize_read_exit(s);
   1528 			error = ENOENT;
   1529 		}
   1530 		kmem_free(str, len);
   1531 		break;
   1532 
   1533 	case KIOCGVALUE:
   1534 		/*
   1535 		 * Use the in-kernel symbol lookup code for fast
   1536 		 * retreival of a value.
   1537 		 */
   1538 		error = ksyms_getval(NULL, str, &val, KSYMS_EXTERN);
   1539 		if (error == 0)
   1540 			kv->kv_value = val;
   1541 		kmem_free(str, len);
   1542 		break;
   1543 
   1544 	case KIOCGSYMBOL:
   1545 		/*
   1546 		 * Use the in-kernel symbol lookup code for fast
   1547 		 * retreival of a symbol.
   1548 		 */
   1549 		s = pserialize_read_enter();
   1550 		PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz,
   1551 		    struct ksyms_symtab, sd_pslist) {
   1552 			if ((sym = findsym(str, st, KSYMS_ANY)) == NULL)
   1553 				continue;
   1554 #ifdef notdef
   1555 			/* Skip if bad binding */
   1556 			if (ELF_ST_BIND(sym->st_info) != STB_GLOBAL) {
   1557 				sym = NULL;
   1558 				continue;
   1559 			}
   1560 #endif
   1561 			break;
   1562 		}
   1563 		if (sym != NULL) {
   1564 			kg->kg_sym = *sym;
   1565 		} else {
   1566 			error = ENOENT;
   1567 		}
   1568 		pserialize_read_exit(s);
   1569 		kmem_free(str, len);
   1570 		break;
   1571 
   1572 	case KIOCGSIZE:
   1573 		/*
   1574 		 * Get total size of symbol table.
   1575 		 */
   1576 		*(int *)data = ks->ks_size;
   1577 		break;
   1578 
   1579 	default:
   1580 		error = ENOTTY;
   1581 		break;
   1582 	}
   1583 
   1584 	return error;
   1585 }
   1586 
   1587 const struct cdevsw ksyms_cdevsw = {
   1588 	.d_open = ksymsopen,
   1589 	.d_close = noclose,
   1590 	.d_read = noread,
   1591 	.d_write = nowrite,
   1592 	.d_ioctl = noioctl,
   1593 	.d_stop = nostop,
   1594 	.d_tty = notty,
   1595 	.d_poll = nopoll,
   1596 	.d_mmap = nommap,
   1597 	.d_kqfilter = nokqfilter,
   1598 	.d_discard = nodiscard,
   1599 	.d_flag = D_OTHER | D_MPSAFE
   1600 };
   1601 
   1602 static const struct fileops ksyms_fileops = {
   1603 	.fo_name = "ksyms",
   1604 	.fo_read = ksymsread,
   1605 	.fo_write = fbadop_write,
   1606 	.fo_ioctl = ksymsioctl,
   1607 	.fo_fcntl = fnullop_fcntl,
   1608 	.fo_poll = fnullop_poll,
   1609 	.fo_stat = ksymsstat,
   1610 	.fo_close = ksymsclose,
   1611 	.fo_kqfilter = fnullop_kqfilter,
   1612 	.fo_restart = fnullop_restart,
   1613 	.fo_mmap = ksymsmmap,
   1614 	.fo_seek = ksymsseek,
   1615 };
   1616