kern_subr.c revision 1.113 1 /* $NetBSD: kern_subr.c,v 1.113 2004/10/23 17:14:11 thorpej Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1998, 1999, 2002 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Luke Mewburn.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*
41 * Copyright (c) 1982, 1986, 1991, 1993
42 * The Regents of the University of California. All rights reserved.
43 * (c) UNIX System Laboratories, Inc.
44 * All or some portions of this file are derived from material licensed
45 * to the University of California by American Telephone and Telegraph
46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47 * the permission of UNIX System Laboratories, Inc.
48 *
49 * Copyright (c) 1992, 1993
50 * The Regents of the University of California. All rights reserved.
51 *
52 * This software was developed by the Computer Systems Engineering group
53 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
54 * contributed to Berkeley.
55 *
56 * All advertising materials mentioning features or use of this software
57 * must display the following acknowledgement:
58 * This product includes software developed by the University of
59 * California, Lawrence Berkeley Laboratory.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 * 1. Redistributions of source code must retain the above copyright
65 * notice, this list of conditions and the following disclaimer.
66 * 2. Redistributions in binary form must reproduce the above copyright
67 * notice, this list of conditions and the following disclaimer in the
68 * documentation and/or other materials provided with the distribution.
69 * 3. Neither the name of the University nor the names of its contributors
70 * may be used to endorse or promote products derived from this software
71 * without specific prior written permission.
72 *
73 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
76 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
83 * SUCH DAMAGE.
84 *
85 * @(#)kern_subr.c 8.4 (Berkeley) 2/14/95
86 */
87
88 #include <sys/cdefs.h>
89 __KERNEL_RCSID(0, "$NetBSD: kern_subr.c,v 1.113 2004/10/23 17:14:11 thorpej Exp $");
90
91 #include "opt_ddb.h"
92 #include "opt_md.h"
93 #include "opt_syscall_debug.h"
94 #include "opt_ktrace.h"
95 #include "opt_systrace.h"
96
97 #include <sys/param.h>
98 #include <sys/systm.h>
99 #include <sys/proc.h>
100 #include <sys/malloc.h>
101 #include <sys/mount.h>
102 #include <sys/device.h>
103 #include <sys/reboot.h>
104 #include <sys/conf.h>
105 #include <sys/disklabel.h>
106 #include <sys/queue.h>
107 #include <sys/systrace.h>
108 #include <sys/ktrace.h>
109
110 #include <uvm/uvm_extern.h>
111
112 #include <dev/cons.h>
113
114 #include <net/if.h>
115
116 /* XXX these should eventually move to subr_autoconf.c */
117 static struct device *finddevice(const char *);
118 static struct device *getdisk(char *, int, int, dev_t *, int);
119 static struct device *parsedisk(char *, int, int, dev_t *);
120
121 /*
122 * A generic linear hook.
123 */
124 struct hook_desc {
125 LIST_ENTRY(hook_desc) hk_list;
126 void (*hk_fn)(void *);
127 void *hk_arg;
128 };
129 typedef LIST_HEAD(, hook_desc) hook_list_t;
130
131 static void *hook_establish(hook_list_t *, void (*)(void *), void *);
132 static void hook_disestablish(hook_list_t *, void *);
133 static void hook_destroy(hook_list_t *);
134 static void hook_proc_run(hook_list_t *, struct proc *);
135
136 MALLOC_DEFINE(M_IOV, "iov", "large iov's");
137
138 int
139 uiomove(buf, n, uio)
140 void *buf;
141 size_t n;
142 struct uio *uio;
143 {
144 struct iovec *iov;
145 u_int cnt;
146 int error = 0;
147 char *cp = buf;
148 struct proc *p = uio->uio_procp;
149 int hold_count;
150
151 hold_count = KERNEL_LOCK_RELEASE_ALL();
152
153 #if defined(LOCKDEBUG) || defined(DIAGNOSTIC)
154 spinlock_switchcheck();
155 #endif
156 #ifdef LOCKDEBUG
157 simple_lock_only_held(NULL, "uiomove");
158 #endif
159
160 #ifdef DIAGNOSTIC
161 if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE)
162 panic("uiomove: mode");
163 #endif
164 while (n > 0 && uio->uio_resid) {
165 iov = uio->uio_iov;
166 cnt = iov->iov_len;
167 if (cnt == 0) {
168 KASSERT(uio->uio_iovcnt > 0);
169 uio->uio_iov++;
170 uio->uio_iovcnt--;
171 continue;
172 }
173 if (cnt > n)
174 cnt = n;
175 switch (uio->uio_segflg) {
176
177 case UIO_USERSPACE:
178 if (curcpu()->ci_schedstate.spc_flags &
179 SPCF_SHOULDYIELD)
180 preempt(1);
181 if (__predict_true(p == curproc)) {
182 if (uio->uio_rw == UIO_READ)
183 error = copyout(cp, iov->iov_base, cnt);
184 else
185 error = copyin(iov->iov_base, cp, cnt);
186 } else {
187 if (uio->uio_rw == UIO_READ)
188 error = copyout_proc(p, cp,
189 iov->iov_base, cnt);
190 else
191 error = copyin_proc(p, iov->iov_base,
192 cp, cnt);
193 }
194 if (error)
195 goto out;
196 break;
197
198 case UIO_SYSSPACE:
199 if (uio->uio_rw == UIO_READ)
200 error = kcopy(cp, iov->iov_base, cnt);
201 else
202 error = kcopy(iov->iov_base, cp, cnt);
203 if (error)
204 goto out;
205 break;
206 }
207 iov->iov_base = (caddr_t)iov->iov_base + cnt;
208 iov->iov_len -= cnt;
209 uio->uio_resid -= cnt;
210 uio->uio_offset += cnt;
211 cp += cnt;
212 KDASSERT(cnt <= n);
213 n -= cnt;
214 }
215 out:
216 KERNEL_LOCK_ACQUIRE_COUNT(hold_count);
217 return (error);
218 }
219
220 /*
221 * Wrapper for uiomove() that validates the arguments against a known-good
222 * kernel buffer.
223 */
224 int
225 uiomove_frombuf(void *buf, size_t buflen, struct uio *uio)
226 {
227 size_t offset;
228
229 if (uio->uio_offset < 0 || uio->uio_resid < 0 ||
230 (offset = uio->uio_offset) != uio->uio_offset)
231 return (EINVAL);
232 if (offset >= buflen)
233 return (0);
234 return (uiomove((char *)buf + offset, buflen - offset, uio));
235 }
236
237 /*
238 * Give next character to user as result of read.
239 */
240 int
241 ureadc(c, uio)
242 int c;
243 struct uio *uio;
244 {
245 struct iovec *iov;
246
247 if (uio->uio_resid <= 0)
248 panic("ureadc: non-positive resid");
249 again:
250 if (uio->uio_iovcnt <= 0)
251 panic("ureadc: non-positive iovcnt");
252 iov = uio->uio_iov;
253 if (iov->iov_len <= 0) {
254 uio->uio_iovcnt--;
255 uio->uio_iov++;
256 goto again;
257 }
258 switch (uio->uio_segflg) {
259
260 case UIO_USERSPACE:
261 if (subyte(iov->iov_base, c) < 0)
262 return (EFAULT);
263 break;
264
265 case UIO_SYSSPACE:
266 *(char *)iov->iov_base = c;
267 break;
268 }
269 iov->iov_base = (caddr_t)iov->iov_base + 1;
270 iov->iov_len--;
271 uio->uio_resid--;
272 uio->uio_offset++;
273 return (0);
274 }
275
276 /*
277 * Like copyin(), but operates on an arbitrary process.
278 */
279 int
280 copyin_proc(struct proc *p, const void *uaddr, void *kaddr, size_t len)
281 {
282 struct iovec iov;
283 struct uio uio;
284 int error;
285
286 if (len == 0)
287 return (0);
288
289 iov.iov_base = kaddr;
290 iov.iov_len = len;
291 uio.uio_iov = &iov;
292 uio.uio_iovcnt = 1;
293 uio.uio_offset = (off_t)(intptr_t)uaddr;
294 uio.uio_resid = len;
295 uio.uio_segflg = UIO_SYSSPACE;
296 uio.uio_rw = UIO_READ;
297 uio.uio_procp = NULL;
298
299 /* XXXCDC: how should locking work here? */
300 if ((p->p_flag & P_WEXIT) || (p->p_vmspace->vm_refcnt < 1))
301 return (EFAULT);
302 p->p_vmspace->vm_refcnt++; /* XXX */
303 error = uvm_io(&p->p_vmspace->vm_map, &uio);
304 uvmspace_free(p->p_vmspace);
305
306 return (error);
307 }
308
309 /*
310 * Like copyout(), but operates on an arbitrary process.
311 */
312 int
313 copyout_proc(struct proc *p, const void *kaddr, void *uaddr, size_t len)
314 {
315 struct iovec iov;
316 struct uio uio;
317 int error;
318
319 if (len == 0)
320 return (0);
321
322 iov.iov_base = (void *) kaddr; /* XXX cast away const */
323 iov.iov_len = len;
324 uio.uio_iov = &iov;
325 uio.uio_iovcnt = 1;
326 uio.uio_offset = (off_t)(intptr_t)uaddr;
327 uio.uio_resid = len;
328 uio.uio_segflg = UIO_SYSSPACE;
329 uio.uio_rw = UIO_WRITE;
330 uio.uio_procp = NULL;
331
332 /* XXXCDC: how should locking work here? */
333 if ((p->p_flag & P_WEXIT) || (p->p_vmspace->vm_refcnt < 1))
334 return (EFAULT);
335 p->p_vmspace->vm_refcnt++; /* XXX */
336 error = uvm_io(&p->p_vmspace->vm_map, &uio);
337 uvmspace_free(p->p_vmspace);
338
339 return (error);
340 }
341
342 /*
343 * General routine to allocate a hash table.
344 * Allocate enough memory to hold at least `elements' list-head pointers.
345 * Return a pointer to the allocated space and set *hashmask to a pattern
346 * suitable for masking a value to use as an index into the returned array.
347 */
348 void *
349 hashinit(elements, htype, mtype, mflags, hashmask)
350 u_int elements;
351 enum hashtype htype;
352 struct malloc_type *mtype;
353 int mflags;
354 u_long *hashmask;
355 {
356 u_long hashsize, i;
357 LIST_HEAD(, generic) *hashtbl_list;
358 TAILQ_HEAD(, generic) *hashtbl_tailq;
359 size_t esize;
360 void *p;
361
362 if (elements == 0)
363 panic("hashinit: bad cnt");
364 for (hashsize = 1; hashsize < elements; hashsize <<= 1)
365 continue;
366
367 switch (htype) {
368 case HASH_LIST:
369 esize = sizeof(*hashtbl_list);
370 break;
371 case HASH_TAILQ:
372 esize = sizeof(*hashtbl_tailq);
373 break;
374 default:
375 #ifdef DIAGNOSTIC
376 panic("hashinit: invalid table type");
377 #else
378 return NULL;
379 #endif
380 }
381
382 if ((p = malloc(hashsize * esize, mtype, mflags)) == NULL)
383 return (NULL);
384
385 switch (htype) {
386 case HASH_LIST:
387 hashtbl_list = p;
388 for (i = 0; i < hashsize; i++)
389 LIST_INIT(&hashtbl_list[i]);
390 break;
391 case HASH_TAILQ:
392 hashtbl_tailq = p;
393 for (i = 0; i < hashsize; i++)
394 TAILQ_INIT(&hashtbl_tailq[i]);
395 break;
396 }
397 *hashmask = hashsize - 1;
398 return (p);
399 }
400
401 /*
402 * Free memory from hash table previosly allocated via hashinit().
403 */
404 void
405 hashdone(hashtbl, mtype)
406 void *hashtbl;
407 struct malloc_type *mtype;
408 {
409
410 free(hashtbl, mtype);
411 }
412
413
414 static void *
415 hook_establish(list, fn, arg)
416 hook_list_t *list;
417 void (*fn)(void *);
418 void *arg;
419 {
420 struct hook_desc *hd;
421
422 hd = malloc(sizeof(*hd), M_DEVBUF, M_NOWAIT);
423 if (hd == NULL)
424 return (NULL);
425
426 hd->hk_fn = fn;
427 hd->hk_arg = arg;
428 LIST_INSERT_HEAD(list, hd, hk_list);
429
430 return (hd);
431 }
432
433 static void
434 hook_disestablish(list, vhook)
435 hook_list_t *list;
436 void *vhook;
437 {
438 #ifdef DIAGNOSTIC
439 struct hook_desc *hd;
440
441 LIST_FOREACH(hd, list, hk_list) {
442 if (hd == vhook)
443 break;
444 }
445
446 if (hd == NULL)
447 panic("hook_disestablish: hook %p not established", vhook);
448 #endif
449 LIST_REMOVE((struct hook_desc *)vhook, hk_list);
450 free(vhook, M_DEVBUF);
451 }
452
453 static void
454 hook_destroy(list)
455 hook_list_t *list;
456 {
457 struct hook_desc *hd;
458
459 while ((hd = LIST_FIRST(list)) != NULL) {
460 LIST_REMOVE(hd, hk_list);
461 free(hd, M_DEVBUF);
462 }
463 }
464
465 static void
466 hook_proc_run(list, p)
467 hook_list_t *list;
468 struct proc *p;
469 {
470 struct hook_desc *hd;
471
472 for (hd = LIST_FIRST(list); hd != NULL; hd = LIST_NEXT(hd, hk_list)) {
473 ((void (*)(struct proc *, void *))*hd->hk_fn)(p,
474 hd->hk_arg);
475 }
476 }
477
478 /*
479 * "Shutdown hook" types, functions, and variables.
480 *
481 * Should be invoked immediately before the
482 * system is halted or rebooted, i.e. after file systems unmounted,
483 * after crash dump done, etc.
484 *
485 * Each shutdown hook is removed from the list before it's run, so that
486 * it won't be run again.
487 */
488
489 hook_list_t shutdownhook_list;
490
491 void *
492 shutdownhook_establish(fn, arg)
493 void (*fn)(void *);
494 void *arg;
495 {
496 return hook_establish(&shutdownhook_list, fn, arg);
497 }
498
499 void
500 shutdownhook_disestablish(vhook)
501 void *vhook;
502 {
503 hook_disestablish(&shutdownhook_list, vhook);
504 }
505
506 /*
507 * Run shutdown hooks. Should be invoked immediately before the
508 * system is halted or rebooted, i.e. after file systems unmounted,
509 * after crash dump done, etc.
510 *
511 * Each shutdown hook is removed from the list before it's run, so that
512 * it won't be run again.
513 */
514 void
515 doshutdownhooks()
516 {
517 struct hook_desc *dp;
518
519 while ((dp = LIST_FIRST(&shutdownhook_list)) != NULL) {
520 LIST_REMOVE(dp, hk_list);
521 (*dp->hk_fn)(dp->hk_arg);
522 #if 0
523 /*
524 * Don't bother freeing the hook structure,, since we may
525 * be rebooting because of a memory corruption problem,
526 * and this might only make things worse. It doesn't
527 * matter, anyway, since the system is just about to
528 * reboot.
529 */
530 free(dp, M_DEVBUF);
531 #endif
532 }
533 }
534
535 /*
536 * "Mountroot hook" types, functions, and variables.
537 */
538
539 hook_list_t mountroothook_list;
540
541 void *
542 mountroothook_establish(fn, dev)
543 void (*fn)(struct device *);
544 struct device *dev;
545 {
546 return hook_establish(&mountroothook_list, (void (*)(void *))fn, dev);
547 }
548
549 void
550 mountroothook_disestablish(vhook)
551 void *vhook;
552 {
553 hook_disestablish(&mountroothook_list, vhook);
554 }
555
556 void
557 mountroothook_destroy()
558 {
559 hook_destroy(&mountroothook_list);
560 }
561
562 void
563 domountroothook()
564 {
565 struct hook_desc *hd;
566
567 LIST_FOREACH(hd, &mountroothook_list, hk_list) {
568 if (hd->hk_arg == (void *)root_device) {
569 (*hd->hk_fn)(hd->hk_arg);
570 return;
571 }
572 }
573 }
574
575 hook_list_t exechook_list;
576
577 void *
578 exechook_establish(fn, arg)
579 void (*fn)(struct proc *, void *);
580 void *arg;
581 {
582 return hook_establish(&exechook_list, (void (*)(void *))fn, arg);
583 }
584
585 void
586 exechook_disestablish(vhook)
587 void *vhook;
588 {
589 hook_disestablish(&exechook_list, vhook);
590 }
591
592 /*
593 * Run exec hooks.
594 */
595 void
596 doexechooks(p)
597 struct proc *p;
598 {
599 hook_proc_run(&exechook_list, p);
600 }
601
602 hook_list_t exithook_list;
603
604 void *
605 exithook_establish(fn, arg)
606 void (*fn)(struct proc *, void *);
607 void *arg;
608 {
609 return hook_establish(&exithook_list, (void (*)(void *))fn, arg);
610 }
611
612 void
613 exithook_disestablish(vhook)
614 void *vhook;
615 {
616 hook_disestablish(&exithook_list, vhook);
617 }
618
619 /*
620 * Run exit hooks.
621 */
622 void
623 doexithooks(p)
624 struct proc *p;
625 {
626 hook_proc_run(&exithook_list, p);
627 }
628
629 hook_list_t forkhook_list;
630
631 void *
632 forkhook_establish(fn)
633 void (*fn)(struct proc *, struct proc *);
634 {
635 return hook_establish(&forkhook_list, (void (*)(void *))fn, NULL);
636 }
637
638 void
639 forkhook_disestablish(vhook)
640 void *vhook;
641 {
642 hook_disestablish(&forkhook_list, vhook);
643 }
644
645 /*
646 * Run fork hooks.
647 */
648 void
649 doforkhooks(p2, p1)
650 struct proc *p2, *p1;
651 {
652 struct hook_desc *hd;
653
654 LIST_FOREACH(hd, &forkhook_list, hk_list) {
655 ((void (*)(struct proc *, struct proc *))*hd->hk_fn)
656 (p2, p1);
657 }
658 }
659
660 /*
661 * "Power hook" types, functions, and variables.
662 * The list of power hooks is kept ordered with the last registered hook
663 * first.
664 * When running the hooks on power down the hooks are called in reverse
665 * registration order, when powering up in registration order.
666 */
667 struct powerhook_desc {
668 CIRCLEQ_ENTRY(powerhook_desc) sfd_list;
669 void (*sfd_fn)(int, void *);
670 void *sfd_arg;
671 };
672
673 CIRCLEQ_HEAD(, powerhook_desc) powerhook_list =
674 CIRCLEQ_HEAD_INITIALIZER(powerhook_list);
675
676 void *
677 powerhook_establish(fn, arg)
678 void (*fn)(int, void *);
679 void *arg;
680 {
681 struct powerhook_desc *ndp;
682
683 ndp = (struct powerhook_desc *)
684 malloc(sizeof(*ndp), M_DEVBUF, M_NOWAIT);
685 if (ndp == NULL)
686 return (NULL);
687
688 ndp->sfd_fn = fn;
689 ndp->sfd_arg = arg;
690 CIRCLEQ_INSERT_HEAD(&powerhook_list, ndp, sfd_list);
691
692 return (ndp);
693 }
694
695 void
696 powerhook_disestablish(vhook)
697 void *vhook;
698 {
699 #ifdef DIAGNOSTIC
700 struct powerhook_desc *dp;
701
702 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list)
703 if (dp == vhook)
704 goto found;
705 panic("powerhook_disestablish: hook %p not established", vhook);
706 found:
707 #endif
708
709 CIRCLEQ_REMOVE(&powerhook_list, (struct powerhook_desc *)vhook,
710 sfd_list);
711 free(vhook, M_DEVBUF);
712 }
713
714 /*
715 * Run power hooks.
716 */
717 void
718 dopowerhooks(why)
719 int why;
720 {
721 struct powerhook_desc *dp;
722
723 if (why == PWR_RESUME || why == PWR_SOFTRESUME) {
724 CIRCLEQ_FOREACH_REVERSE(dp, &powerhook_list, sfd_list) {
725 (*dp->sfd_fn)(why, dp->sfd_arg);
726 }
727 } else {
728 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) {
729 (*dp->sfd_fn)(why, dp->sfd_arg);
730 }
731 }
732 }
733
734 /*
735 * Determine the root device and, if instructed to, the root file system.
736 */
737
738 #include "md.h"
739 #if NMD == 0
740 #undef MEMORY_DISK_HOOKS
741 #endif
742
743 #ifdef MEMORY_DISK_HOOKS
744 static struct device fakemdrootdev[NMD];
745 #endif
746
747 #ifdef MEMORY_DISK_IS_ROOT
748 #define BOOT_FROM_MEMORY_HOOKS 1
749 #endif
750
751 #include "raid.h"
752 #if NRAID == 1
753 #define BOOT_FROM_RAID_HOOKS 1
754 #endif
755
756 #ifdef BOOT_FROM_RAID_HOOKS
757 extern int numraid;
758 extern struct device *raidrootdev;
759 #endif
760
761 /*
762 * The device and wedge that we booted from. If booted_wedge is NULL,
763 * the we might consult booted_partition.
764 */
765 struct device *booted_device;
766 struct device *booted_wedge;
767 int booted_partition;
768
769 /*
770 * Use partition letters if it's a disk class but not a wedge.
771 * XXX Check for wedge is kinda gross.
772 */
773 #define DEV_USES_PARTITIONS(dv) \
774 ((dv)->dv_class == DV_DISK && \
775 strcmp((dv)->dv_cfdata->cf_name, "dk") != 0)
776
777 void
778 setroot(bootdv, bootpartition)
779 struct device *bootdv;
780 int bootpartition;
781 {
782 struct device *dv;
783 int len;
784 #ifdef MEMORY_DISK_HOOKS
785 int i;
786 #endif
787 dev_t nrootdev;
788 dev_t ndumpdev = NODEV;
789 char buf[128];
790 const char *rootdevname;
791 const char *dumpdevname;
792 struct device *rootdv = NULL; /* XXX gcc -Wuninitialized */
793 struct device *dumpdv = NULL;
794 struct ifnet *ifp;
795 const char *deffsname;
796 struct vfsops *vops;
797
798 #ifdef MEMORY_DISK_HOOKS
799 for (i = 0; i < NMD; i++) {
800 fakemdrootdev[i].dv_class = DV_DISK;
801 fakemdrootdev[i].dv_cfdata = NULL;
802 fakemdrootdev[i].dv_unit = i;
803 fakemdrootdev[i].dv_parent = NULL;
804 snprintf(fakemdrootdev[i].dv_xname,
805 sizeof(fakemdrootdev[i].dv_xname), "md%d", i);
806 }
807 #endif /* MEMORY_DISK_HOOKS */
808
809 #ifdef MEMORY_DISK_IS_ROOT
810 bootdv = &fakemdrootdev[0];
811 bootpartition = 0;
812 #endif
813
814 /*
815 * If NFS is specified as the file system, and we found
816 * a DV_DISK boot device (or no boot device at all), then
817 * find a reasonable network interface for "rootspec".
818 */
819 vops = vfs_getopsbyname("nfs");
820 if (vops != NULL && vops->vfs_mountroot == mountroot &&
821 rootspec == NULL &&
822 (bootdv == NULL || bootdv->dv_class != DV_IFNET)) {
823 TAILQ_FOREACH(ifp, &ifnet, if_list) {
824 if ((ifp->if_flags &
825 (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0)
826 break;
827 }
828 if (ifp == NULL) {
829 /*
830 * Can't find a suitable interface; ask the
831 * user.
832 */
833 boothowto |= RB_ASKNAME;
834 } else {
835 /*
836 * Have a suitable interface; behave as if
837 * the user specified this interface.
838 */
839 rootspec = (const char *)ifp->if_xname;
840 }
841 }
842
843 /*
844 * If wildcarded root and we the boot device wasn't determined,
845 * ask the user.
846 */
847 if (rootspec == NULL && bootdv == NULL)
848 boothowto |= RB_ASKNAME;
849
850 top:
851 if (boothowto & RB_ASKNAME) {
852 struct device *defdumpdv;
853
854 for (;;) {
855 printf("root device");
856 if (bootdv != NULL) {
857 printf(" (default %s", bootdv->dv_xname);
858 if (DEV_USES_PARTITIONS(bootdv))
859 printf("%c", bootpartition + 'a');
860 printf(")");
861 }
862 printf(": ");
863 len = cngetsn(buf, sizeof(buf));
864 if (len == 0 && bootdv != NULL) {
865 strlcpy(buf, bootdv->dv_xname, sizeof(buf));
866 len = strlen(buf);
867 }
868 if (len > 0 && buf[len - 1] == '*') {
869 buf[--len] = '\0';
870 dv = getdisk(buf, len, 1, &nrootdev, 0);
871 if (dv != NULL) {
872 rootdv = dv;
873 break;
874 }
875 }
876 dv = getdisk(buf, len, bootpartition, &nrootdev, 0);
877 if (dv != NULL) {
878 rootdv = dv;
879 break;
880 }
881 }
882
883 /*
884 * Set up the default dump device. If root is on
885 * a network device, there is no default dump
886 * device, since we don't support dumps to the
887 * network.
888 */
889 if (DEV_USES_PARTITIONS(rootdv) == 0)
890 defdumpdv = NULL;
891 else
892 defdumpdv = rootdv;
893
894 for (;;) {
895 printf("dump device");
896 if (defdumpdv != NULL) {
897 /*
898 * Note, we know it's a disk if we get here.
899 */
900 printf(" (default %sb)", defdumpdv->dv_xname);
901 }
902 printf(": ");
903 len = cngetsn(buf, sizeof(buf));
904 if (len == 0) {
905 if (defdumpdv != NULL) {
906 ndumpdev = MAKEDISKDEV(major(nrootdev),
907 DISKUNIT(nrootdev), 1);
908 }
909 dumpdv = defdumpdv;
910 break;
911 }
912 if (len == 4 && strcmp(buf, "none") == 0) {
913 dumpdv = NULL;
914 break;
915 }
916 dv = getdisk(buf, len, 1, &ndumpdev, 1);
917 if (dv != NULL) {
918 dumpdv = dv;
919 break;
920 }
921 }
922
923 rootdev = nrootdev;
924 dumpdev = ndumpdev;
925
926 for (vops = LIST_FIRST(&vfs_list); vops != NULL;
927 vops = LIST_NEXT(vops, vfs_list)) {
928 if (vops->vfs_mountroot != NULL &&
929 vops->vfs_mountroot == mountroot)
930 break;
931 }
932
933 if (vops == NULL) {
934 mountroot = NULL;
935 deffsname = "generic";
936 } else
937 deffsname = vops->vfs_name;
938
939 for (;;) {
940 printf("file system (default %s): ", deffsname);
941 len = cngetsn(buf, sizeof(buf));
942 if (len == 0)
943 break;
944 if (len == 4 && strcmp(buf, "halt") == 0)
945 cpu_reboot(RB_HALT, NULL);
946 else if (len == 6 && strcmp(buf, "reboot") == 0)
947 cpu_reboot(0, NULL);
948 #if defined(DDB)
949 else if (len == 3 && strcmp(buf, "ddb") == 0) {
950 console_debugger();
951 }
952 #endif
953 else if (len == 7 && strcmp(buf, "generic") == 0) {
954 mountroot = NULL;
955 break;
956 }
957 vops = vfs_getopsbyname(buf);
958 if (vops == NULL || vops->vfs_mountroot == NULL) {
959 printf("use one of: generic");
960 for (vops = LIST_FIRST(&vfs_list);
961 vops != NULL;
962 vops = LIST_NEXT(vops, vfs_list)) {
963 if (vops->vfs_mountroot != NULL)
964 printf(" %s", vops->vfs_name);
965 }
966 #if defined(DDB)
967 printf(" ddb");
968 #endif
969 printf(" halt reboot\n");
970 } else {
971 mountroot = vops->vfs_mountroot;
972 break;
973 }
974 }
975
976 } else if (rootspec == NULL) {
977 int majdev;
978
979 /*
980 * Wildcarded root; use the boot device.
981 */
982 rootdv = bootdv;
983
984 majdev = devsw_name2blk(bootdv->dv_xname, NULL, 0);
985 if (majdev >= 0) {
986 /*
987 * Root is on a disk. `bootpartition' is root,
988 * unless the device does not use partitions.
989 */
990 if (DEV_USES_PARTITIONS(bootdv))
991 rootdev = MAKEDISKDEV(majdev, bootdv->dv_unit,
992 bootpartition);
993 else
994 rootdev = makedev(majdev, bootdv->dv_unit);
995 }
996 } else {
997
998 /*
999 * `root on <dev> ...'
1000 */
1001
1002 /*
1003 * If it's a network interface, we can bail out
1004 * early.
1005 */
1006 dv = finddevice(rootspec);
1007 if (dv != NULL && dv->dv_class == DV_IFNET) {
1008 rootdv = dv;
1009 goto haveroot;
1010 }
1011
1012 rootdevname = devsw_blk2name(major(rootdev));
1013 if (rootdevname == NULL) {
1014 printf("unknown device major 0x%x\n", rootdev);
1015 boothowto |= RB_ASKNAME;
1016 goto top;
1017 }
1018 memset(buf, 0, sizeof(buf));
1019 snprintf(buf, sizeof(buf), "%s%d", rootdevname,
1020 DISKUNIT(rootdev));
1021
1022 rootdv = finddevice(buf);
1023 if (rootdv == NULL) {
1024 printf("device %s (0x%x) not configured\n",
1025 buf, rootdev);
1026 boothowto |= RB_ASKNAME;
1027 goto top;
1028 }
1029 }
1030
1031 haveroot:
1032
1033 root_device = rootdv;
1034
1035 switch (rootdv->dv_class) {
1036 case DV_IFNET:
1037 aprint_normal("root on %s", rootdv->dv_xname);
1038 break;
1039
1040 case DV_DISK:
1041 aprint_normal("root on %s%c", rootdv->dv_xname,
1042 DISKPART(rootdev) + 'a');
1043 break;
1044
1045 default:
1046 printf("can't determine root device\n");
1047 boothowto |= RB_ASKNAME;
1048 goto top;
1049 }
1050
1051 /*
1052 * Now configure the dump device.
1053 *
1054 * If we haven't figured out the dump device, do so, with
1055 * the following rules:
1056 *
1057 * (a) We already know dumpdv in the RB_ASKNAME case.
1058 *
1059 * (b) If dumpspec is set, try to use it. If the device
1060 * is not available, punt.
1061 *
1062 * (c) If dumpspec is not set, the dump device is
1063 * wildcarded or unspecified. If the root device
1064 * is DV_IFNET, punt. Otherwise, use partition b
1065 * of the root device.
1066 */
1067
1068 if (boothowto & RB_ASKNAME) { /* (a) */
1069 if (dumpdv == NULL)
1070 goto nodumpdev;
1071 } else if (dumpspec != NULL) { /* (b) */
1072 if (strcmp(dumpspec, "none") == 0 || dumpdev == NODEV) {
1073 /*
1074 * Operator doesn't want a dump device.
1075 * Or looks like they tried to pick a network
1076 * device. Oops.
1077 */
1078 goto nodumpdev;
1079 }
1080
1081 dumpdevname = devsw_blk2name(major(dumpdev));
1082 if (dumpdevname == NULL)
1083 goto nodumpdev;
1084 memset(buf, 0, sizeof(buf));
1085 snprintf(buf, sizeof(buf), "%s%d", dumpdevname,
1086 DISKUNIT(dumpdev));
1087
1088 dumpdv = finddevice(buf);
1089 if (dumpdv == NULL) {
1090 /*
1091 * Device not configured.
1092 */
1093 goto nodumpdev;
1094 }
1095 } else { /* (c) */
1096 if (DEV_USES_PARTITIONS(rootdv) == 0)
1097 goto nodumpdev;
1098 else {
1099 dumpdv = rootdv;
1100 dumpdev = MAKEDISKDEV(major(rootdev),
1101 dumpdv->dv_unit, 1);
1102 }
1103 }
1104
1105 aprint_normal(" dumps on %s%c\n", dumpdv->dv_xname,
1106 DISKPART(dumpdev) + 'a');
1107 return;
1108
1109 nodumpdev:
1110 dumpdev = NODEV;
1111 aprint_normal("\n");
1112 }
1113
1114 static struct device *
1115 finddevice(name)
1116 const char *name;
1117 {
1118 struct device *dv;
1119 #if defined(BOOT_FROM_RAID_HOOKS) || defined(BOOT_FROM_MEMORY_HOOKS)
1120 int j;
1121 #endif /* BOOT_FROM_RAID_HOOKS || BOOT_FROM_MEMORY_HOOKS */
1122
1123 #ifdef BOOT_FROM_RAID_HOOKS
1124 for (j = 0; j < numraid; j++) {
1125 if (strcmp(name, raidrootdev[j].dv_xname) == 0) {
1126 dv = &raidrootdev[j];
1127 return (dv);
1128 }
1129 }
1130 #endif /* BOOT_FROM_RAID_HOOKS */
1131
1132 #ifdef BOOT_FROM_MEMORY_HOOKS
1133 for (j = 0; j < NMD; j++) {
1134 if (strcmp(name, fakemdrootdev[j].dv_xname) == 0) {
1135 dv = &fakemdrootdev[j];
1136 return (dv);
1137 }
1138 }
1139 #endif /* BOOT_FROM_MEMORY_HOOKS */
1140
1141 for (dv = TAILQ_FIRST(&alldevs); dv != NULL;
1142 dv = TAILQ_NEXT(dv, dv_list))
1143 if (strcmp(dv->dv_xname, name) == 0)
1144 break;
1145 return (dv);
1146 }
1147
1148 static struct device *
1149 getdisk(str, len, defpart, devp, isdump)
1150 char *str;
1151 int len, defpart;
1152 dev_t *devp;
1153 int isdump;
1154 {
1155 struct device *dv;
1156 #ifdef MEMORY_DISK_HOOKS
1157 int i;
1158 #endif
1159 #ifdef BOOT_FROM_RAID_HOOKS
1160 int j;
1161 #endif
1162
1163 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) {
1164 printf("use one of:");
1165 #ifdef MEMORY_DISK_HOOKS
1166 if (isdump == 0)
1167 for (i = 0; i < NMD; i++)
1168 printf(" %s[a-%c]", fakemdrootdev[i].dv_xname,
1169 'a' + MAXPARTITIONS - 1);
1170 #endif
1171 #ifdef BOOT_FROM_RAID_HOOKS
1172 if (isdump == 0)
1173 for (j = 0; j < numraid; j++)
1174 printf(" %s[a-%c]", raidrootdev[j].dv_xname,
1175 'a' + MAXPARTITIONS - 1);
1176 #endif
1177 TAILQ_FOREACH(dv, &alldevs, dv_list) {
1178 if (DEV_USES_PARTITIONS(dv))
1179 printf(" %s[a-%c]", dv->dv_xname,
1180 'a' + MAXPARTITIONS - 1);
1181 else if (dv->dv_class == DV_DISK)
1182 printf(" %s", dv->dv_xname);
1183 if (isdump == 0 && dv->dv_class == DV_IFNET)
1184 printf(" %s", dv->dv_xname);
1185 }
1186 if (isdump)
1187 printf(" none");
1188 #if defined(DDB)
1189 printf(" ddb");
1190 #endif
1191 printf(" halt reboot\n");
1192 }
1193 return (dv);
1194 }
1195
1196 static struct device *
1197 parsedisk(str, len, defpart, devp)
1198 char *str;
1199 int len, defpart;
1200 dev_t *devp;
1201 {
1202 struct device *dv;
1203 char *cp, c;
1204 int majdev, part;
1205 #ifdef MEMORY_DISK_HOOKS
1206 int i;
1207 #endif
1208 if (len == 0)
1209 return (NULL);
1210
1211 if (len == 4 && strcmp(str, "halt") == 0)
1212 cpu_reboot(RB_HALT, NULL);
1213 else if (len == 6 && strcmp(str, "reboot") == 0)
1214 cpu_reboot(0, NULL);
1215 #if defined(DDB)
1216 else if (len == 3 && strcmp(str, "ddb") == 0)
1217 console_debugger();
1218 #endif
1219
1220 cp = str + len - 1;
1221 c = *cp;
1222 if (c >= 'a' && c <= ('a' + MAXPARTITIONS - 1)) {
1223 part = c - 'a';
1224 *cp = '\0';
1225 } else
1226 part = defpart;
1227
1228 #ifdef MEMORY_DISK_HOOKS
1229 for (i = 0; i < NMD; i++)
1230 if (strcmp(str, fakemdrootdev[i].dv_xname) == 0) {
1231 dv = &fakemdrootdev[i];
1232 goto gotdisk;
1233 }
1234 #endif
1235
1236 dv = finddevice(str);
1237 if (dv != NULL) {
1238 if (dv->dv_class == DV_DISK) {
1239 #ifdef MEMORY_DISK_HOOKS
1240 gotdisk:
1241 #endif
1242 majdev = devsw_name2blk(dv->dv_xname, NULL, 0);
1243 if (majdev < 0)
1244 panic("parsedisk");
1245 if (DEV_USES_PARTITIONS(dv))
1246 *devp = MAKEDISKDEV(majdev, dv->dv_unit, part);
1247 else
1248 *devp = makedev(majdev, dv->dv_unit);
1249 }
1250
1251 if (dv->dv_class == DV_IFNET)
1252 *devp = NODEV;
1253 }
1254
1255 *cp = c;
1256 return (dv);
1257 }
1258
1259 /*
1260 * snprintf() `bytes' into `buf', reformatting it so that the number,
1261 * plus a possible `x' + suffix extension) fits into len bytes (including
1262 * the terminating NUL).
1263 * Returns the number of bytes stored in buf, or -1 if there was a problem.
1264 * E.g, given a len of 9 and a suffix of `B':
1265 * bytes result
1266 * ----- ------
1267 * 99999 `99999 B'
1268 * 100000 `97 kB'
1269 * 66715648 `65152 kB'
1270 * 252215296 `240 MB'
1271 */
1272 int
1273 humanize_number(buf, len, bytes, suffix, divisor)
1274 char *buf;
1275 size_t len;
1276 u_int64_t bytes;
1277 const char *suffix;
1278 int divisor;
1279 {
1280 /* prefixes are: (none), kilo, Mega, Giga, Tera, Peta, Exa */
1281 const char *prefixes;
1282 int r;
1283 u_int64_t max;
1284 size_t i, suffixlen;
1285
1286 if (buf == NULL || suffix == NULL)
1287 return (-1);
1288 if (len > 0)
1289 buf[0] = '\0';
1290 suffixlen = strlen(suffix);
1291 /* check if enough room for `x y' + suffix + `\0' */
1292 if (len < 4 + suffixlen)
1293 return (-1);
1294
1295 if (divisor == 1024) {
1296 /*
1297 * binary multiplies
1298 * XXX IEC 60027-2 recommends Ki, Mi, Gi...
1299 */
1300 prefixes = " KMGTPE";
1301 } else
1302 prefixes = " kMGTPE"; /* SI for decimal multiplies */
1303
1304 max = 1;
1305 for (i = 0; i < len - suffixlen - 3; i++)
1306 max *= 10;
1307 for (i = 0; bytes >= max && prefixes[i + 1]; i++)
1308 bytes /= divisor;
1309
1310 r = snprintf(buf, len, "%qu%s%c%s", (unsigned long long)bytes,
1311 i == 0 ? "" : " ", prefixes[i], suffix);
1312
1313 return (r);
1314 }
1315
1316 int
1317 format_bytes(buf, len, bytes)
1318 char *buf;
1319 size_t len;
1320 u_int64_t bytes;
1321 {
1322 int rv;
1323 size_t nlen;
1324
1325 rv = humanize_number(buf, len, bytes, "B", 1024);
1326 if (rv != -1) {
1327 /* nuke the trailing ` B' if it exists */
1328 nlen = strlen(buf) - 2;
1329 if (strcmp(&buf[nlen], " B") == 0)
1330 buf[nlen] = '\0';
1331 }
1332 return (rv);
1333 }
1334
1335 /*
1336 * Start trace of particular system call. If process is being traced,
1337 * this routine is called by MD syscall dispatch code just before
1338 * a system call is actually executed.
1339 * MD caller guarantees the passed 'code' is within the supported
1340 * system call number range for emulation the process runs under.
1341 */
1342 int
1343 trace_enter(struct lwp *l, register_t code,
1344 register_t realcode, const struct sysent *callp, void *args)
1345 {
1346 #if defined(KTRACE) || defined(SYSTRACE)
1347 struct proc *p = l->l_proc;
1348 #endif
1349
1350 #ifdef SYSCALL_DEBUG
1351 scdebug_call(l, code, args);
1352 #endif /* SYSCALL_DEBUG */
1353
1354 #ifdef KTRACE
1355 if (KTRPOINT(p, KTR_SYSCALL))
1356 ktrsyscall(p, code, realcode, callp, args);
1357 #endif /* KTRACE */
1358
1359 #ifdef SYSTRACE
1360 if (ISSET(p->p_flag, P_SYSTRACE))
1361 return systrace_enter(p, code, args);
1362 #endif
1363 return 0;
1364 }
1365
1366 /*
1367 * End trace of particular system call. If process is being traced,
1368 * this routine is called by MD syscall dispatch code just after
1369 * a system call finishes.
1370 * MD caller guarantees the passed 'code' is within the supported
1371 * system call number range for emulation the process runs under.
1372 */
1373 void
1374 trace_exit(struct lwp *l, register_t code, void *args, register_t rval[],
1375 int error)
1376 {
1377 #if defined(KTRACE) || defined(SYSTRACE)
1378 struct proc *p = l->l_proc;
1379 #endif
1380
1381 #ifdef SYSCALL_DEBUG
1382 scdebug_ret(l, code, error, rval);
1383 #endif /* SYSCALL_DEBUG */
1384
1385 #ifdef KTRACE
1386 if (KTRPOINT(p, KTR_SYSRET)) {
1387 KERNEL_PROC_LOCK(l);
1388 ktrsysret(p, code, error, rval);
1389 KERNEL_PROC_UNLOCK(l);
1390 }
1391 #endif /* KTRACE */
1392
1393 #ifdef SYSTRACE
1394 if (ISSET(p->p_flag, P_SYSTRACE))
1395 systrace_exit(p, code, args, rval, error);
1396 #endif
1397 }
1398