Home | History | Annotate | Line # | Download | only in kern
kern_subr.c revision 1.103
      1 /*	$NetBSD: kern_subr.c,v 1.103 2003/08/07 16:31:49 agc Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1997, 1998, 1999, 2002 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9  * NASA Ames Research Center, and by Luke Mewburn.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *	This product includes software developed by the NetBSD
     22  *	Foundation, Inc. and its contributors.
     23  * 4. Neither the name of The NetBSD Foundation nor the names of its
     24  *    contributors may be used to endorse or promote products derived
     25  *    from this software without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37  * POSSIBILITY OF SUCH DAMAGE.
     38  */
     39 
     40 /*
     41  * Copyright (c) 1982, 1986, 1991, 1993
     42  *	The Regents of the University of California.  All rights reserved.
     43  * (c) UNIX System Laboratories, Inc.
     44  * All or some portions of this file are derived from material licensed
     45  * to the University of California by American Telephone and Telegraph
     46  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     47  * the permission of UNIX System Laboratories, Inc.
     48  *
     49  * Copyright (c) 1992, 1993
     50  *	The Regents of the University of California.  All rights reserved.
     51  *
     52  * This software was developed by the Computer Systems Engineering group
     53  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
     54  * contributed to Berkeley.
     55  *
     56  * All advertising materials mentioning features or use of this software
     57  * must display the following acknowledgement:
     58  *	This product includes software developed by the University of
     59  *	California, Lawrence Berkeley Laboratory.
     60  *
     61  * Redistribution and use in source and binary forms, with or without
     62  * modification, are permitted provided that the following conditions
     63  * are met:
     64  * 1. Redistributions of source code must retain the above copyright
     65  *    notice, this list of conditions and the following disclaimer.
     66  * 2. Redistributions in binary form must reproduce the above copyright
     67  *    notice, this list of conditions and the following disclaimer in the
     68  *    documentation and/or other materials provided with the distribution.
     69  * 3. Neither the name of the University nor the names of its contributors
     70  *    may be used to endorse or promote products derived from this software
     71  *    without specific prior written permission.
     72  *
     73  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     74  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     75  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     76  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     77  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     78  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     79  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     80  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     81  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     82  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     83  * SUCH DAMAGE.
     84  *
     85  *	@(#)kern_subr.c	8.4 (Berkeley) 2/14/95
     86  */
     87 
     88 #include <sys/cdefs.h>
     89 __KERNEL_RCSID(0, "$NetBSD: kern_subr.c,v 1.103 2003/08/07 16:31:49 agc Exp $");
     90 
     91 #include "opt_ddb.h"
     92 #include "opt_md.h"
     93 #include "opt_syscall_debug.h"
     94 #include "opt_ktrace.h"
     95 #include "opt_systrace.h"
     96 
     97 #include <sys/param.h>
     98 #include <sys/systm.h>
     99 #include <sys/proc.h>
    100 #include <sys/malloc.h>
    101 #include <sys/mount.h>
    102 #include <sys/device.h>
    103 #include <sys/reboot.h>
    104 #include <sys/conf.h>
    105 #include <sys/disklabel.h>
    106 #include <sys/queue.h>
    107 #include <sys/systrace.h>
    108 #include <sys/ktrace.h>
    109 
    110 #include <uvm/uvm_extern.h>
    111 
    112 #include <dev/cons.h>
    113 
    114 #include <net/if.h>
    115 
    116 /* XXX these should eventually move to subr_autoconf.c */
    117 static struct device *finddevice __P((const char *));
    118 static struct device *getdisk __P((char *, int, int, dev_t *, int));
    119 static struct device *parsedisk __P((char *, int, int, dev_t *));
    120 
    121 /*
    122  * A generic linear hook.
    123  */
    124 struct hook_desc {
    125 	LIST_ENTRY(hook_desc) hk_list;
    126 	void	(*hk_fn) __P((void *));
    127 	void	*hk_arg;
    128 };
    129 typedef LIST_HEAD(, hook_desc) hook_list_t;
    130 
    131 static void *hook_establish __P((hook_list_t *, void (*)(void *), void *));
    132 static void hook_disestablish __P((hook_list_t *, void *));
    133 static void hook_destroy __P((hook_list_t *));
    134 static void hook_proc_run __P((hook_list_t *, struct proc *));
    135 
    136 MALLOC_DEFINE(M_IOV, "iov", "large iov's");
    137 
    138 int
    139 uiomove(buf, n, uio)
    140 	void *buf;
    141 	size_t n;
    142 	struct uio *uio;
    143 {
    144 	struct iovec *iov;
    145 	u_int cnt;
    146 	int error = 0;
    147 	char *cp = buf;
    148 	struct proc *p = uio->uio_procp;
    149 
    150 #ifdef DIAGNOSTIC
    151 	if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE)
    152 		panic("uiomove: mode");
    153 #endif
    154 	while (n > 0 && uio->uio_resid) {
    155 		iov = uio->uio_iov;
    156 		cnt = iov->iov_len;
    157 		if (cnt == 0) {
    158 			uio->uio_iov++;
    159 			uio->uio_iovcnt--;
    160 			continue;
    161 		}
    162 		if (cnt > n)
    163 			cnt = n;
    164 		switch (uio->uio_segflg) {
    165 
    166 		case UIO_USERSPACE:
    167 			if (curlwp->l_cpu->ci_schedstate.spc_flags &
    168 			    SPCF_SHOULDYIELD)
    169 				preempt(1);
    170 			if (__predict_true(p == curproc)) {
    171 				if (uio->uio_rw == UIO_READ)
    172 					error = copyout(cp, iov->iov_base, cnt);
    173 				else
    174 					error = copyin(iov->iov_base, cp, cnt);
    175 			} else {
    176 				if (uio->uio_rw == UIO_READ)
    177 					error = copyout_proc(p, cp,
    178 					    iov->iov_base, cnt);
    179 				else
    180 					error = copyin_proc(p, iov->iov_base,
    181 					    cp, cnt);
    182 			}
    183 			if (error)
    184 				return (error);
    185 			break;
    186 
    187 		case UIO_SYSSPACE:
    188 			if (uio->uio_rw == UIO_READ)
    189 				error = kcopy(cp, iov->iov_base, cnt);
    190 			else
    191 				error = kcopy(iov->iov_base, cp, cnt);
    192 			if (error)
    193 				return (error);
    194 			break;
    195 		}
    196 		iov->iov_base = (caddr_t)iov->iov_base + cnt;
    197 		iov->iov_len -= cnt;
    198 		uio->uio_resid -= cnt;
    199 		uio->uio_offset += cnt;
    200 		cp += cnt;
    201 		KDASSERT(cnt <= n);
    202 		n -= cnt;
    203 	}
    204 	return (error);
    205 }
    206 
    207 /*
    208  * Give next character to user as result of read.
    209  */
    210 int
    211 ureadc(c, uio)
    212 	int c;
    213 	struct uio *uio;
    214 {
    215 	struct iovec *iov;
    216 
    217 	if (uio->uio_resid <= 0)
    218 		panic("ureadc: non-positive resid");
    219 again:
    220 	if (uio->uio_iovcnt <= 0)
    221 		panic("ureadc: non-positive iovcnt");
    222 	iov = uio->uio_iov;
    223 	if (iov->iov_len <= 0) {
    224 		uio->uio_iovcnt--;
    225 		uio->uio_iov++;
    226 		goto again;
    227 	}
    228 	switch (uio->uio_segflg) {
    229 
    230 	case UIO_USERSPACE:
    231 		if (subyte(iov->iov_base, c) < 0)
    232 			return (EFAULT);
    233 		break;
    234 
    235 	case UIO_SYSSPACE:
    236 		*(char *)iov->iov_base = c;
    237 		break;
    238 	}
    239 	iov->iov_base = (caddr_t)iov->iov_base + 1;
    240 	iov->iov_len--;
    241 	uio->uio_resid--;
    242 	uio->uio_offset++;
    243 	return (0);
    244 }
    245 
    246 /*
    247  * Like copyin(), but operates on an arbitrary process.
    248  */
    249 int
    250 copyin_proc(struct proc *p, const void *uaddr, void *kaddr, size_t len)
    251 {
    252 	struct iovec iov;
    253 	struct uio uio;
    254 	int error;
    255 
    256 	if (len == 0)
    257 		return (0);
    258 
    259 	iov.iov_base = kaddr;
    260 	iov.iov_len = len;
    261 	uio.uio_iov = &iov;
    262 	uio.uio_iovcnt = 1;
    263 	uio.uio_offset = (off_t)(intptr_t)uaddr;
    264 	uio.uio_resid = len;
    265 	uio.uio_segflg = UIO_SYSSPACE;
    266 	uio.uio_rw = UIO_READ;
    267 	uio.uio_procp = NULL;
    268 
    269 	/* XXXCDC: how should locking work here? */
    270 	if ((p->p_flag & P_WEXIT) || (p->p_vmspace->vm_refcnt < 1))
    271 		return (EFAULT);
    272 	p->p_vmspace->vm_refcnt++;	/* XXX */
    273 	error = uvm_io(&p->p_vmspace->vm_map, &uio);
    274 	uvmspace_free(p->p_vmspace);
    275 
    276 	return (error);
    277 }
    278 
    279 /*
    280  * Like copyout(), but operates on an arbitrary process.
    281  */
    282 int
    283 copyout_proc(struct proc *p, const void *kaddr, void *uaddr, size_t len)
    284 {
    285 	struct iovec iov;
    286 	struct uio uio;
    287 	int error;
    288 
    289 	if (len == 0)
    290 		return (0);
    291 
    292 	iov.iov_base = (void *) kaddr;	/* XXX cast away const */
    293 	iov.iov_len = len;
    294 	uio.uio_iov = &iov;
    295 	uio.uio_iovcnt = 1;
    296 	uio.uio_offset = (off_t)(intptr_t)uaddr;
    297 	uio.uio_resid = len;
    298 	uio.uio_segflg = UIO_SYSSPACE;
    299 	uio.uio_rw = UIO_WRITE;
    300 	uio.uio_procp = NULL;
    301 
    302 	/* XXXCDC: how should locking work here? */
    303 	if ((p->p_flag & P_WEXIT) || (p->p_vmspace->vm_refcnt < 1))
    304 		return (EFAULT);
    305 	p->p_vmspace->vm_refcnt++;	/* XXX */
    306 	error = uvm_io(&p->p_vmspace->vm_map, &uio);
    307 	uvmspace_free(p->p_vmspace);
    308 
    309 	return (error);
    310 }
    311 
    312 /*
    313  * General routine to allocate a hash table.
    314  * Allocate enough memory to hold at least `elements' list-head pointers.
    315  * Return a pointer to the allocated space and set *hashmask to a pattern
    316  * suitable for masking a value to use as an index into the returned array.
    317  */
    318 void *
    319 hashinit(elements, htype, mtype, mflags, hashmask)
    320 	u_int elements;
    321 	enum hashtype htype;
    322 	struct malloc_type *mtype;
    323 	int mflags;
    324 	u_long *hashmask;
    325 {
    326 	u_long hashsize, i;
    327 	LIST_HEAD(, generic) *hashtbl_list;
    328 	TAILQ_HEAD(, generic) *hashtbl_tailq;
    329 	size_t esize;
    330 	void *p;
    331 
    332 	if (elements == 0)
    333 		panic("hashinit: bad cnt");
    334 	for (hashsize = 1; hashsize < elements; hashsize <<= 1)
    335 		continue;
    336 
    337 	switch (htype) {
    338 	case HASH_LIST:
    339 		esize = sizeof(*hashtbl_list);
    340 		break;
    341 	case HASH_TAILQ:
    342 		esize = sizeof(*hashtbl_tailq);
    343 		break;
    344 #ifdef DIAGNOSTIC
    345 	default:
    346 		panic("hashinit: invalid table type");
    347 #endif
    348 	}
    349 
    350 	if ((p = malloc(hashsize * esize, mtype, mflags)) == NULL)
    351 		return (NULL);
    352 
    353 	switch (htype) {
    354 	case HASH_LIST:
    355 		hashtbl_list = p;
    356 		for (i = 0; i < hashsize; i++)
    357 			LIST_INIT(&hashtbl_list[i]);
    358 		break;
    359 	case HASH_TAILQ:
    360 		hashtbl_tailq = p;
    361 		for (i = 0; i < hashsize; i++)
    362 			TAILQ_INIT(&hashtbl_tailq[i]);
    363 		break;
    364 	}
    365 	*hashmask = hashsize - 1;
    366 	return (p);
    367 }
    368 
    369 /*
    370  * Free memory from hash table previosly allocated via hashinit().
    371  */
    372 void
    373 hashdone(hashtbl, mtype)
    374 	void *hashtbl;
    375 	struct malloc_type *mtype;
    376 {
    377 
    378 	free(hashtbl, mtype);
    379 }
    380 
    381 
    382 static void *
    383 hook_establish(list, fn, arg)
    384 	hook_list_t *list;
    385 	void (*fn) __P((void *));
    386 	void *arg;
    387 {
    388 	struct hook_desc *hd;
    389 
    390 	hd = malloc(sizeof(*hd), M_DEVBUF, M_NOWAIT);
    391 	if (hd == NULL)
    392 		return (NULL);
    393 
    394 	hd->hk_fn = fn;
    395 	hd->hk_arg = arg;
    396 	LIST_INSERT_HEAD(list, hd, hk_list);
    397 
    398 	return (hd);
    399 }
    400 
    401 static void
    402 hook_disestablish(list, vhook)
    403 	hook_list_t *list;
    404 	void *vhook;
    405 {
    406 #ifdef DIAGNOSTIC
    407 	struct hook_desc *hd;
    408 
    409 	LIST_FOREACH(hd, list, hk_list) {
    410                 if (hd == vhook)
    411 			break;
    412 	}
    413 
    414 	if (hd == NULL)
    415 		panic("hook_disestablish: hook %p not established", vhook);
    416 #endif
    417 	LIST_REMOVE((struct hook_desc *)vhook, hk_list);
    418 	free(vhook, M_DEVBUF);
    419 }
    420 
    421 static void
    422 hook_destroy(list)
    423 	hook_list_t *list;
    424 {
    425 	struct hook_desc *hd;
    426 
    427 	while ((hd = LIST_FIRST(list)) != NULL) {
    428 		LIST_REMOVE(hd, hk_list);
    429 		free(hd, M_DEVBUF);
    430 	}
    431 }
    432 
    433 static void
    434 hook_proc_run(list, p)
    435 	hook_list_t *list;
    436 	struct proc *p;
    437 {
    438 	struct hook_desc *hd;
    439 
    440 	for (hd = LIST_FIRST(list); hd != NULL; hd = LIST_NEXT(hd, hk_list)) {
    441 		((void (*) __P((struct proc *, void *)))*hd->hk_fn)(p,
    442 		    hd->hk_arg);
    443 	}
    444 }
    445 
    446 /*
    447  * "Shutdown hook" types, functions, and variables.
    448  *
    449  * Should be invoked immediately before the
    450  * system is halted or rebooted, i.e. after file systems unmounted,
    451  * after crash dump done, etc.
    452  *
    453  * Each shutdown hook is removed from the list before it's run, so that
    454  * it won't be run again.
    455  */
    456 
    457 hook_list_t shutdownhook_list;
    458 
    459 void *
    460 shutdownhook_establish(fn, arg)
    461 	void (*fn) __P((void *));
    462 	void *arg;
    463 {
    464 	return hook_establish(&shutdownhook_list, fn, arg);
    465 }
    466 
    467 void
    468 shutdownhook_disestablish(vhook)
    469 	void *vhook;
    470 {
    471 	hook_disestablish(&shutdownhook_list, vhook);
    472 }
    473 
    474 /*
    475  * Run shutdown hooks.  Should be invoked immediately before the
    476  * system is halted or rebooted, i.e. after file systems unmounted,
    477  * after crash dump done, etc.
    478  *
    479  * Each shutdown hook is removed from the list before it's run, so that
    480  * it won't be run again.
    481  */
    482 void
    483 doshutdownhooks()
    484 {
    485 	struct hook_desc *dp;
    486 
    487 	while ((dp = LIST_FIRST(&shutdownhook_list)) != NULL) {
    488 		LIST_REMOVE(dp, hk_list);
    489 		(*dp->hk_fn)(dp->hk_arg);
    490 #if 0
    491 		/*
    492 		 * Don't bother freeing the hook structure,, since we may
    493 		 * be rebooting because of a memory corruption problem,
    494 		 * and this might only make things worse.  It doesn't
    495 		 * matter, anyway, since the system is just about to
    496 		 * reboot.
    497 		 */
    498 		free(dp, M_DEVBUF);
    499 #endif
    500 	}
    501 }
    502 
    503 /*
    504  * "Mountroot hook" types, functions, and variables.
    505  */
    506 
    507 hook_list_t mountroothook_list;
    508 
    509 void *
    510 mountroothook_establish(fn, dev)
    511 	void (*fn) __P((struct device *));
    512 	struct device *dev;
    513 {
    514 	return hook_establish(&mountroothook_list, (void (*)__P((void *)))fn,
    515 	    dev);
    516 }
    517 
    518 void
    519 mountroothook_disestablish(vhook)
    520 	void *vhook;
    521 {
    522 	hook_disestablish(&mountroothook_list, vhook);
    523 }
    524 
    525 void
    526 mountroothook_destroy()
    527 {
    528 	hook_destroy(&mountroothook_list);
    529 }
    530 
    531 void
    532 domountroothook()
    533 {
    534 	struct hook_desc *hd;
    535 
    536 	LIST_FOREACH(hd, &mountroothook_list, hk_list) {
    537 		if (hd->hk_arg == (void *)root_device) {
    538 			(*hd->hk_fn)(hd->hk_arg);
    539 			return;
    540 		}
    541 	}
    542 }
    543 
    544 hook_list_t exechook_list;
    545 
    546 void *
    547 exechook_establish(fn, arg)
    548 	void (*fn) __P((struct proc *, void *));
    549 	void *arg;
    550 {
    551 	return hook_establish(&exechook_list, (void (*) __P((void *)))fn, arg);
    552 }
    553 
    554 void
    555 exechook_disestablish(vhook)
    556 	void *vhook;
    557 {
    558 	hook_disestablish(&exechook_list, vhook);
    559 }
    560 
    561 /*
    562  * Run exec hooks.
    563  */
    564 void
    565 doexechooks(p)
    566 	struct proc *p;
    567 {
    568 	hook_proc_run(&exechook_list, p);
    569 }
    570 
    571 hook_list_t exithook_list;
    572 
    573 void *
    574 exithook_establish(fn, arg)
    575 	void (*fn) __P((struct proc *, void *));
    576 	void *arg;
    577 {
    578 	return hook_establish(&exithook_list, (void (*) __P((void *)))fn, arg);
    579 }
    580 
    581 void
    582 exithook_disestablish(vhook)
    583 	void *vhook;
    584 {
    585 	hook_disestablish(&exithook_list, vhook);
    586 }
    587 
    588 /*
    589  * Run exit hooks.
    590  */
    591 void
    592 doexithooks(p)
    593 	struct proc *p;
    594 {
    595 	hook_proc_run(&exithook_list, p);
    596 }
    597 
    598 hook_list_t forkhook_list;
    599 
    600 void *
    601 forkhook_establish(fn)
    602 	void (*fn) __P((struct proc *, struct proc *));
    603 {
    604 	return hook_establish(&forkhook_list, (void (*) __P((void *)))fn, NULL);
    605 }
    606 
    607 void
    608 forkhook_disestablish(vhook)
    609 	void *vhook;
    610 {
    611 	hook_disestablish(&forkhook_list, vhook);
    612 }
    613 
    614 /*
    615  * Run fork hooks.
    616  */
    617 void
    618 doforkhooks(p2, p1)
    619 	struct proc *p2, *p1;
    620 {
    621 	struct hook_desc *hd;
    622 
    623 	LIST_FOREACH(hd, &forkhook_list, hk_list) {
    624 		((void (*) __P((struct proc *, struct proc *)))*hd->hk_fn)
    625 		    (p2, p1);
    626 	}
    627 }
    628 
    629 /*
    630  * "Power hook" types, functions, and variables.
    631  * The list of power hooks is kept ordered with the last registered hook
    632  * first.
    633  * When running the hooks on power down the hooks are called in reverse
    634  * registration order, when powering up in registration order.
    635  */
    636 struct powerhook_desc {
    637 	CIRCLEQ_ENTRY(powerhook_desc) sfd_list;
    638 	void	(*sfd_fn) __P((int, void *));
    639 	void	*sfd_arg;
    640 };
    641 
    642 CIRCLEQ_HEAD(, powerhook_desc) powerhook_list =
    643 	CIRCLEQ_HEAD_INITIALIZER(powerhook_list);
    644 
    645 void *
    646 powerhook_establish(fn, arg)
    647 	void (*fn) __P((int, void *));
    648 	void *arg;
    649 {
    650 	struct powerhook_desc *ndp;
    651 
    652 	ndp = (struct powerhook_desc *)
    653 	    malloc(sizeof(*ndp), M_DEVBUF, M_NOWAIT);
    654 	if (ndp == NULL)
    655 		return (NULL);
    656 
    657 	ndp->sfd_fn = fn;
    658 	ndp->sfd_arg = arg;
    659 	CIRCLEQ_INSERT_HEAD(&powerhook_list, ndp, sfd_list);
    660 
    661 	return (ndp);
    662 }
    663 
    664 void
    665 powerhook_disestablish(vhook)
    666 	void *vhook;
    667 {
    668 #ifdef DIAGNOSTIC
    669 	struct powerhook_desc *dp;
    670 
    671 	CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list)
    672                 if (dp == vhook)
    673 			goto found;
    674 	panic("powerhook_disestablish: hook %p not established", vhook);
    675  found:
    676 #endif
    677 
    678 	CIRCLEQ_REMOVE(&powerhook_list, (struct powerhook_desc *)vhook,
    679 	    sfd_list);
    680 	free(vhook, M_DEVBUF);
    681 }
    682 
    683 /*
    684  * Run power hooks.
    685  */
    686 void
    687 dopowerhooks(why)
    688 	int why;
    689 {
    690 	struct powerhook_desc *dp;
    691 
    692 	if (why == PWR_RESUME || why == PWR_SOFTRESUME) {
    693 		CIRCLEQ_FOREACH_REVERSE(dp, &powerhook_list, sfd_list) {
    694 			(*dp->sfd_fn)(why, dp->sfd_arg);
    695 		}
    696 	} else {
    697 		CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) {
    698 			(*dp->sfd_fn)(why, dp->sfd_arg);
    699 		}
    700 	}
    701 }
    702 
    703 /*
    704  * Determine the root device and, if instructed to, the root file system.
    705  */
    706 
    707 #include "md.h"
    708 #if NMD == 0
    709 #undef MEMORY_DISK_HOOKS
    710 #endif
    711 
    712 #ifdef MEMORY_DISK_HOOKS
    713 static struct device fakemdrootdev[NMD];
    714 #endif
    715 
    716 #include "raid.h"
    717 #if NRAID == 1
    718 #define BOOT_FROM_RAID_HOOKS 1
    719 #endif
    720 
    721 #ifdef BOOT_FROM_RAID_HOOKS
    722 extern int numraid;
    723 extern struct device *raidrootdev;
    724 #endif
    725 
    726 void
    727 setroot(bootdv, bootpartition)
    728 	struct device *bootdv;
    729 	int bootpartition;
    730 {
    731 	struct device *dv;
    732 	int len;
    733 #ifdef MEMORY_DISK_HOOKS
    734 	int i;
    735 #endif
    736 	dev_t nrootdev;
    737 	dev_t ndumpdev = NODEV;
    738 	char buf[128];
    739 	const char *rootdevname;
    740 	const char *dumpdevname;
    741 	struct device *rootdv = NULL;		/* XXX gcc -Wuninitialized */
    742 	struct device *dumpdv = NULL;
    743 	struct ifnet *ifp;
    744 	const char *deffsname;
    745 	struct vfsops *vops;
    746 
    747 #ifdef MEMORY_DISK_HOOKS
    748 	for (i = 0; i < NMD; i++) {
    749 		fakemdrootdev[i].dv_class  = DV_DISK;
    750 		fakemdrootdev[i].dv_cfdata = NULL;
    751 		fakemdrootdev[i].dv_unit   = i;
    752 		fakemdrootdev[i].dv_parent = NULL;
    753 		sprintf(fakemdrootdev[i].dv_xname, "md%d", i);
    754 	}
    755 #endif /* MEMORY_DISK_HOOKS */
    756 
    757 #ifdef MEMORY_DISK_IS_ROOT
    758 	bootdv = &fakemdrootdev[0];
    759 	bootpartition = 0;
    760 #endif
    761 
    762 	/*
    763 	 * If NFS is specified as the file system, and we found
    764 	 * a DV_DISK boot device (or no boot device at all), then
    765 	 * find a reasonable network interface for "rootspec".
    766 	 */
    767 	vops = vfs_getopsbyname("nfs");
    768 	if (vops != NULL && vops->vfs_mountroot == mountroot &&
    769 	    rootspec == NULL &&
    770 	    (bootdv == NULL || bootdv->dv_class != DV_IFNET)) {
    771 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
    772 			if ((ifp->if_flags &
    773 			     (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0)
    774 				break;
    775 		}
    776 		if (ifp == NULL) {
    777 			/*
    778 			 * Can't find a suitable interface; ask the
    779 			 * user.
    780 			 */
    781 			boothowto |= RB_ASKNAME;
    782 		} else {
    783 			/*
    784 			 * Have a suitable interface; behave as if
    785 			 * the user specified this interface.
    786 			 */
    787 			rootspec = (const char *)ifp->if_xname;
    788 		}
    789 	}
    790 
    791 	/*
    792 	 * If wildcarded root and we the boot device wasn't determined,
    793 	 * ask the user.
    794 	 */
    795 	if (rootspec == NULL && bootdv == NULL)
    796 		boothowto |= RB_ASKNAME;
    797 
    798  top:
    799 	if (boothowto & RB_ASKNAME) {
    800 		struct device *defdumpdv;
    801 
    802 		for (;;) {
    803 			printf("root device");
    804 			if (bootdv != NULL) {
    805 				printf(" (default %s", bootdv->dv_xname);
    806 				if (bootdv->dv_class == DV_DISK)
    807 					printf("%c", bootpartition + 'a');
    808 				printf(")");
    809 			}
    810 			printf(": ");
    811 			len = cngetsn(buf, sizeof(buf));
    812 			if (len == 0 && bootdv != NULL) {
    813 				strlcpy(buf, bootdv->dv_xname, sizeof(buf));
    814 				len = strlen(buf);
    815 			}
    816 			if (len > 0 && buf[len - 1] == '*') {
    817 				buf[--len] = '\0';
    818 				dv = getdisk(buf, len, 1, &nrootdev, 0);
    819 				if (dv != NULL) {
    820 					rootdv = dv;
    821 					break;
    822 				}
    823 			}
    824 			dv = getdisk(buf, len, bootpartition, &nrootdev, 0);
    825 			if (dv != NULL) {
    826 				rootdv = dv;
    827 				break;
    828 			}
    829 		}
    830 
    831 		/*
    832 		 * Set up the default dump device.  If root is on
    833 		 * a network device, there is no default dump
    834 		 * device, since we don't support dumps to the
    835 		 * network.
    836 		 */
    837 		if (rootdv->dv_class == DV_IFNET)
    838 			defdumpdv = NULL;
    839 		else
    840 			defdumpdv = rootdv;
    841 
    842 		for (;;) {
    843 			printf("dump device");
    844 			if (defdumpdv != NULL) {
    845 				/*
    846 				 * Note, we know it's a disk if we get here.
    847 				 */
    848 				printf(" (default %sb)", defdumpdv->dv_xname);
    849 			}
    850 			printf(": ");
    851 			len = cngetsn(buf, sizeof(buf));
    852 			if (len == 0) {
    853 				if (defdumpdv != NULL) {
    854 					ndumpdev = MAKEDISKDEV(major(nrootdev),
    855 					    DISKUNIT(nrootdev), 1);
    856 				}
    857 				dumpdv = defdumpdv;
    858 				break;
    859 			}
    860 			if (len == 4 && strcmp(buf, "none") == 0) {
    861 				dumpdv = NULL;
    862 				break;
    863 			}
    864 			dv = getdisk(buf, len, 1, &ndumpdev, 1);
    865 			if (dv != NULL) {
    866 				dumpdv = dv;
    867 				break;
    868 			}
    869 		}
    870 
    871 		rootdev = nrootdev;
    872 		dumpdev = ndumpdev;
    873 
    874 		for (vops = LIST_FIRST(&vfs_list); vops != NULL;
    875 		     vops = LIST_NEXT(vops, vfs_list)) {
    876 			if (vops->vfs_mountroot != NULL &&
    877 			    vops->vfs_mountroot == mountroot)
    878 			break;
    879 		}
    880 
    881 		if (vops == NULL) {
    882 			mountroot = NULL;
    883 			deffsname = "generic";
    884 		} else
    885 			deffsname = vops->vfs_name;
    886 
    887 		for (;;) {
    888 			printf("file system (default %s): ", deffsname);
    889 			len = cngetsn(buf, sizeof(buf));
    890 			if (len == 0)
    891 				break;
    892 			if (len == 4 && strcmp(buf, "halt") == 0)
    893 				cpu_reboot(RB_HALT, NULL);
    894 			else if (len == 6 && strcmp(buf, "reboot") == 0)
    895 				cpu_reboot(0, NULL);
    896 #if defined(DDB)
    897 			else if (len == 3 && strcmp(buf, "ddb") == 0) {
    898 				console_debugger();
    899 			}
    900 #endif
    901 			else if (len == 7 && strcmp(buf, "generic") == 0) {
    902 				mountroot = NULL;
    903 				break;
    904 			}
    905 			vops = vfs_getopsbyname(buf);
    906 			if (vops == NULL || vops->vfs_mountroot == NULL) {
    907 				printf("use one of: generic");
    908 				for (vops = LIST_FIRST(&vfs_list);
    909 				     vops != NULL;
    910 				     vops = LIST_NEXT(vops, vfs_list)) {
    911 					if (vops->vfs_mountroot != NULL)
    912 						printf(" %s", vops->vfs_name);
    913 				}
    914 #if defined(DDB)
    915 				printf(" ddb");
    916 #endif
    917 				printf(" halt reboot\n");
    918 			} else {
    919 				mountroot = vops->vfs_mountroot;
    920 				break;
    921 			}
    922 		}
    923 
    924 	} else if (rootspec == NULL) {
    925 		int majdev;
    926 
    927 		/*
    928 		 * Wildcarded root; use the boot device.
    929 		 */
    930 		rootdv = bootdv;
    931 
    932 		majdev = devsw_name2blk(bootdv->dv_xname, NULL, 0);
    933 		if (majdev >= 0) {
    934 			/*
    935 			 * Root is on a disk.  `bootpartition' is root.
    936 			 */
    937 			rootdev = MAKEDISKDEV(majdev, bootdv->dv_unit,
    938 			    bootpartition);
    939 		}
    940 	} else {
    941 
    942 		/*
    943 		 * `root on <dev> ...'
    944 		 */
    945 
    946 		/*
    947 		 * If it's a network interface, we can bail out
    948 		 * early.
    949 		 */
    950 		dv = finddevice(rootspec);
    951 		if (dv != NULL && dv->dv_class == DV_IFNET) {
    952 			rootdv = dv;
    953 			goto haveroot;
    954 		}
    955 
    956 		rootdevname = devsw_blk2name(major(rootdev));
    957 		if (rootdevname == NULL) {
    958 			printf("unknown device major 0x%x\n", rootdev);
    959 			boothowto |= RB_ASKNAME;
    960 			goto top;
    961 		}
    962 		memset(buf, 0, sizeof(buf));
    963 		sprintf(buf, "%s%d", rootdevname, DISKUNIT(rootdev));
    964 
    965 		rootdv = finddevice(buf);
    966 		if (rootdv == NULL) {
    967 			printf("device %s (0x%x) not configured\n",
    968 			    buf, rootdev);
    969 			boothowto |= RB_ASKNAME;
    970 			goto top;
    971 		}
    972 	}
    973 
    974  haveroot:
    975 
    976 	root_device = rootdv;
    977 
    978 	switch (rootdv->dv_class) {
    979 	case DV_IFNET:
    980 		aprint_normal("root on %s", rootdv->dv_xname);
    981 		break;
    982 
    983 	case DV_DISK:
    984 		aprint_normal("root on %s%c", rootdv->dv_xname,
    985 		    DISKPART(rootdev) + 'a');
    986 		break;
    987 
    988 	default:
    989 		printf("can't determine root device\n");
    990 		boothowto |= RB_ASKNAME;
    991 		goto top;
    992 	}
    993 
    994 	/*
    995 	 * Now configure the dump device.
    996 	 *
    997 	 * If we haven't figured out the dump device, do so, with
    998 	 * the following rules:
    999 	 *
   1000 	 *	(a) We already know dumpdv in the RB_ASKNAME case.
   1001 	 *
   1002 	 *	(b) If dumpspec is set, try to use it.  If the device
   1003 	 *	    is not available, punt.
   1004 	 *
   1005 	 *	(c) If dumpspec is not set, the dump device is
   1006 	 *	    wildcarded or unspecified.  If the root device
   1007 	 *	    is DV_IFNET, punt.  Otherwise, use partition b
   1008 	 *	    of the root device.
   1009 	 */
   1010 
   1011 	if (boothowto & RB_ASKNAME) {		/* (a) */
   1012 		if (dumpdv == NULL)
   1013 			goto nodumpdev;
   1014 	} else if (dumpspec != NULL) {		/* (b) */
   1015 		if (strcmp(dumpspec, "none") == 0 || dumpdev == NODEV) {
   1016 			/*
   1017 			 * Operator doesn't want a dump device.
   1018 			 * Or looks like they tried to pick a network
   1019 			 * device.  Oops.
   1020 			 */
   1021 			goto nodumpdev;
   1022 		}
   1023 
   1024 		dumpdevname = devsw_blk2name(major(dumpdev));
   1025 		if (dumpdevname == NULL)
   1026 			goto nodumpdev;
   1027 		memset(buf, 0, sizeof(buf));
   1028 		sprintf(buf, "%s%d", dumpdevname, DISKUNIT(dumpdev));
   1029 
   1030 		dumpdv = finddevice(buf);
   1031 		if (dumpdv == NULL) {
   1032 			/*
   1033 			 * Device not configured.
   1034 			 */
   1035 			goto nodumpdev;
   1036 		}
   1037 	} else {				/* (c) */
   1038 		if (rootdv->dv_class == DV_IFNET)
   1039 			goto nodumpdev;
   1040 		else {
   1041 			dumpdv = rootdv;
   1042 			dumpdev = MAKEDISKDEV(major(rootdev),
   1043 			    dumpdv->dv_unit, 1);
   1044 		}
   1045 	}
   1046 
   1047 	aprint_normal(" dumps on %s%c\n", dumpdv->dv_xname,
   1048 	    DISKPART(dumpdev) + 'a');
   1049 	return;
   1050 
   1051  nodumpdev:
   1052 	dumpdev = NODEV;
   1053 	aprint_normal("\n");
   1054 }
   1055 
   1056 static struct device *
   1057 finddevice(name)
   1058 	const char *name;
   1059 {
   1060 	struct device *dv;
   1061 #ifdef BOOT_FROM_RAID_HOOKS
   1062 	int j;
   1063 
   1064 	for (j = 0; j < numraid; j++) {
   1065 		if (strcmp(name, raidrootdev[j].dv_xname) == 0) {
   1066 			dv = &raidrootdev[j];
   1067 			return (dv);
   1068 		}
   1069 	}
   1070 #endif
   1071 
   1072 	for (dv = TAILQ_FIRST(&alldevs); dv != NULL;
   1073 	    dv = TAILQ_NEXT(dv, dv_list))
   1074 		if (strcmp(dv->dv_xname, name) == 0)
   1075 			break;
   1076 	return (dv);
   1077 }
   1078 
   1079 static struct device *
   1080 getdisk(str, len, defpart, devp, isdump)
   1081 	char *str;
   1082 	int len, defpart;
   1083 	dev_t *devp;
   1084 	int isdump;
   1085 {
   1086 	struct device	*dv;
   1087 #ifdef MEMORY_DISK_HOOKS
   1088 	int		i;
   1089 #endif
   1090 #ifdef BOOT_FROM_RAID_HOOKS
   1091 	int 		j;
   1092 #endif
   1093 
   1094 	if ((dv = parsedisk(str, len, defpart, devp)) == NULL) {
   1095 		printf("use one of:");
   1096 #ifdef MEMORY_DISK_HOOKS
   1097 		if (isdump == 0)
   1098 			for (i = 0; i < NMD; i++)
   1099 				printf(" %s[a-%c]", fakemdrootdev[i].dv_xname,
   1100 				    'a' + MAXPARTITIONS - 1);
   1101 #endif
   1102 #ifdef BOOT_FROM_RAID_HOOKS
   1103 		if (isdump == 0)
   1104 			for (j = 0; j < numraid; j++)
   1105 				printf(" %s[a-%c]", raidrootdev[j].dv_xname,
   1106 				    'a' + MAXPARTITIONS - 1);
   1107 #endif
   1108 		TAILQ_FOREACH(dv, &alldevs, dv_list) {
   1109 			if (dv->dv_class == DV_DISK)
   1110 				printf(" %s[a-%c]", dv->dv_xname,
   1111 				    'a' + MAXPARTITIONS - 1);
   1112 			if (isdump == 0 && dv->dv_class == DV_IFNET)
   1113 				printf(" %s", dv->dv_xname);
   1114 		}
   1115 		if (isdump)
   1116 			printf(" none");
   1117 #if defined(DDB)
   1118 		printf(" ddb");
   1119 #endif
   1120 		printf(" halt reboot\n");
   1121 	}
   1122 	return (dv);
   1123 }
   1124 
   1125 static struct device *
   1126 parsedisk(str, len, defpart, devp)
   1127 	char *str;
   1128 	int len, defpart;
   1129 	dev_t *devp;
   1130 {
   1131 	struct device *dv;
   1132 	char *cp, c;
   1133 	int majdev, part;
   1134 #ifdef MEMORY_DISK_HOOKS
   1135 	int i;
   1136 #endif
   1137 	if (len == 0)
   1138 		return (NULL);
   1139 
   1140 	if (len == 4 && strcmp(str, "halt") == 0)
   1141 		cpu_reboot(RB_HALT, NULL);
   1142 	else if (len == 6 && strcmp(str, "reboot") == 0)
   1143 		cpu_reboot(0, NULL);
   1144 #if defined(DDB)
   1145 	else if (len == 3 && strcmp(str, "ddb") == 0)
   1146 		console_debugger();
   1147 #endif
   1148 
   1149 	cp = str + len - 1;
   1150 	c = *cp;
   1151 	if (c >= 'a' && c <= ('a' + MAXPARTITIONS - 1)) {
   1152 		part = c - 'a';
   1153 		*cp = '\0';
   1154 	} else
   1155 		part = defpart;
   1156 
   1157 #ifdef MEMORY_DISK_HOOKS
   1158 	for (i = 0; i < NMD; i++)
   1159 		if (strcmp(str, fakemdrootdev[i].dv_xname) == 0) {
   1160 			dv = &fakemdrootdev[i];
   1161 			goto gotdisk;
   1162 		}
   1163 #endif
   1164 
   1165 	dv = finddevice(str);
   1166 	if (dv != NULL) {
   1167 		if (dv->dv_class == DV_DISK) {
   1168 #ifdef MEMORY_DISK_HOOKS
   1169  gotdisk:
   1170 #endif
   1171 			majdev = devsw_name2blk(dv->dv_xname, NULL, 0);
   1172 			if (majdev < 0)
   1173 				panic("parsedisk");
   1174 			*devp = MAKEDISKDEV(majdev, dv->dv_unit, part);
   1175 		}
   1176 
   1177 		if (dv->dv_class == DV_IFNET)
   1178 			*devp = NODEV;
   1179 	}
   1180 
   1181 	*cp = c;
   1182 	return (dv);
   1183 }
   1184 
   1185 /*
   1186  * snprintf() `bytes' into `buf', reformatting it so that the number,
   1187  * plus a possible `x' + suffix extension) fits into len bytes (including
   1188  * the terminating NUL).
   1189  * Returns the number of bytes stored in buf, or -1 if there was a problem.
   1190  * E.g, given a len of 9 and a suffix of `B':
   1191  *	bytes		result
   1192  *	-----		------
   1193  *	99999		`99999 B'
   1194  *	100000		`97 kB'
   1195  *	66715648	`65152 kB'
   1196  *	252215296	`240 MB'
   1197  */
   1198 int
   1199 humanize_number(buf, len, bytes, suffix, divisor)
   1200 	char		*buf;
   1201 	size_t		 len;
   1202 	u_int64_t	 bytes;
   1203 	const char	*suffix;
   1204 	int 		divisor;
   1205 {
   1206        	/* prefixes are: (none), kilo, Mega, Giga, Tera, Peta, Exa */
   1207 	const char *prefixes;
   1208 	int		r;
   1209 	u_int64_t	max;
   1210 	size_t		i, suffixlen;
   1211 
   1212 	if (buf == NULL || suffix == NULL)
   1213 		return (-1);
   1214 	if (len > 0)
   1215 		buf[0] = '\0';
   1216 	suffixlen = strlen(suffix);
   1217 	/* check if enough room for `x y' + suffix + `\0' */
   1218 	if (len < 4 + suffixlen)
   1219 		return (-1);
   1220 
   1221 	if (divisor == 1024) {
   1222 		/*
   1223 		 * binary multiplies
   1224 		 * XXX IEC 60027-2 recommends Ki, Mi, Gi...
   1225 		 */
   1226 		prefixes = " KMGTPE";
   1227 	} else
   1228 		prefixes = " kMGTPE"; /* SI for decimal multiplies */
   1229 
   1230 	max = 1;
   1231 	for (i = 0; i < len - suffixlen - 3; i++)
   1232 		max *= 10;
   1233 	for (i = 0; bytes >= max && prefixes[i + 1]; i++)
   1234 		bytes /= divisor;
   1235 
   1236 	r = snprintf(buf, len, "%qu%s%c%s", (unsigned long long)bytes,
   1237 	    i == 0 ? "" : " ", prefixes[i], suffix);
   1238 
   1239 	return (r);
   1240 }
   1241 
   1242 int
   1243 format_bytes(buf, len, bytes)
   1244 	char		*buf;
   1245 	size_t		 len;
   1246 	u_int64_t	 bytes;
   1247 {
   1248 	int	rv;
   1249 	size_t	nlen;
   1250 
   1251 	rv = humanize_number(buf, len, bytes, "B", 1024);
   1252 	if (rv != -1) {
   1253 			/* nuke the trailing ` B' if it exists */
   1254 		nlen = strlen(buf) - 2;
   1255 		if (strcmp(&buf[nlen], " B") == 0)
   1256 			buf[nlen] = '\0';
   1257 	}
   1258 	return (rv);
   1259 }
   1260 
   1261 /*
   1262  * Start trace of particular system call. If process is being traced,
   1263  * this routine is called by MD syscall dispatch code just before
   1264  * a system call is actually executed.
   1265  * MD caller guarantees the passed 'code' is within the supported
   1266  * system call number range for emulation the process runs under.
   1267  */
   1268 int
   1269 trace_enter(struct lwp *l, register_t code,
   1270 	register_t realcode, const struct sysent *callp, void *args,
   1271 	register_t rval[])
   1272 {
   1273 #if defined(KTRACE) || defined(SYSTRACE)
   1274 	struct proc *p = l->l_proc;
   1275 #endif
   1276 
   1277 #ifdef SYSCALL_DEBUG
   1278 	scdebug_call(l, code, args);
   1279 #endif /* SYSCALL_DEBUG */
   1280 
   1281 #ifdef KTRACE
   1282 	if (KTRPOINT(p, KTR_SYSCALL))
   1283 		ktrsyscall(p, code, realcode, callp, args);
   1284 #endif /* KTRACE */
   1285 
   1286 #ifdef SYSTRACE
   1287 	if (ISSET(p->p_flag, P_SYSTRACE))
   1288 		return systrace_enter(p, code, args, rval);
   1289 #endif
   1290 	return 0;
   1291 }
   1292 
   1293 /*
   1294  * End trace of particular system call. If process is being traced,
   1295  * this routine is called by MD syscall dispatch code just after
   1296  * a system call finishes.
   1297  * MD caller guarantees the passed 'code' is within the supported
   1298  * system call number range for emulation the process runs under.
   1299  */
   1300 void
   1301 trace_exit(struct lwp *l, register_t code, void *args, register_t rval[],
   1302     int error)
   1303 {
   1304 #if defined(KTRACE) || defined(SYSTRACE)
   1305 	struct proc *p = l->l_proc;
   1306 #endif
   1307 
   1308 #ifdef SYSCALL_DEBUG
   1309 	scdebug_ret(l, code, error, rval);
   1310 #endif /* SYSCALL_DEBUG */
   1311 
   1312 #ifdef KTRACE
   1313 	if (KTRPOINT(p, KTR_SYSRET)) {
   1314 		KERNEL_PROC_LOCK(l);
   1315 		ktrsysret(p, code, error, rval);
   1316 		KERNEL_PROC_UNLOCK(l);
   1317 	}
   1318 #endif /* KTRACE */
   1319 
   1320 #ifdef SYSTRACE
   1321 	if (ISSET(p->p_flag, P_SYSTRACE))
   1322 		systrace_exit(p, code, args, rval, error);
   1323 #endif
   1324 }
   1325