kern_subr.c revision 1.112 1 /* $NetBSD: kern_subr.c,v 1.112 2004/09/23 10:45:08 yamt Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1998, 1999, 2002 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Luke Mewburn.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*
41 * Copyright (c) 1982, 1986, 1991, 1993
42 * The Regents of the University of California. All rights reserved.
43 * (c) UNIX System Laboratories, Inc.
44 * All or some portions of this file are derived from material licensed
45 * to the University of California by American Telephone and Telegraph
46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47 * the permission of UNIX System Laboratories, Inc.
48 *
49 * Copyright (c) 1992, 1993
50 * The Regents of the University of California. All rights reserved.
51 *
52 * This software was developed by the Computer Systems Engineering group
53 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
54 * contributed to Berkeley.
55 *
56 * All advertising materials mentioning features or use of this software
57 * must display the following acknowledgement:
58 * This product includes software developed by the University of
59 * California, Lawrence Berkeley Laboratory.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 * 1. Redistributions of source code must retain the above copyright
65 * notice, this list of conditions and the following disclaimer.
66 * 2. Redistributions in binary form must reproduce the above copyright
67 * notice, this list of conditions and the following disclaimer in the
68 * documentation and/or other materials provided with the distribution.
69 * 3. Neither the name of the University nor the names of its contributors
70 * may be used to endorse or promote products derived from this software
71 * without specific prior written permission.
72 *
73 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
76 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
83 * SUCH DAMAGE.
84 *
85 * @(#)kern_subr.c 8.4 (Berkeley) 2/14/95
86 */
87
88 #include <sys/cdefs.h>
89 __KERNEL_RCSID(0, "$NetBSD: kern_subr.c,v 1.112 2004/09/23 10:45:08 yamt Exp $");
90
91 #include "opt_ddb.h"
92 #include "opt_md.h"
93 #include "opt_syscall_debug.h"
94 #include "opt_ktrace.h"
95 #include "opt_systrace.h"
96
97 #include <sys/param.h>
98 #include <sys/systm.h>
99 #include <sys/proc.h>
100 #include <sys/malloc.h>
101 #include <sys/mount.h>
102 #include <sys/device.h>
103 #include <sys/reboot.h>
104 #include <sys/conf.h>
105 #include <sys/disklabel.h>
106 #include <sys/queue.h>
107 #include <sys/systrace.h>
108 #include <sys/ktrace.h>
109
110 #include <uvm/uvm_extern.h>
111
112 #include <dev/cons.h>
113
114 #include <net/if.h>
115
116 /* XXX these should eventually move to subr_autoconf.c */
117 static struct device *finddevice(const char *);
118 static struct device *getdisk(char *, int, int, dev_t *, int);
119 static struct device *parsedisk(char *, int, int, dev_t *);
120
121 /*
122 * A generic linear hook.
123 */
124 struct hook_desc {
125 LIST_ENTRY(hook_desc) hk_list;
126 void (*hk_fn)(void *);
127 void *hk_arg;
128 };
129 typedef LIST_HEAD(, hook_desc) hook_list_t;
130
131 static void *hook_establish(hook_list_t *, void (*)(void *), void *);
132 static void hook_disestablish(hook_list_t *, void *);
133 static void hook_destroy(hook_list_t *);
134 static void hook_proc_run(hook_list_t *, struct proc *);
135
136 MALLOC_DEFINE(M_IOV, "iov", "large iov's");
137
138 int
139 uiomove(buf, n, uio)
140 void *buf;
141 size_t n;
142 struct uio *uio;
143 {
144 struct iovec *iov;
145 u_int cnt;
146 int error = 0;
147 char *cp = buf;
148 struct proc *p = uio->uio_procp;
149 int hold_count;
150
151 hold_count = KERNEL_LOCK_RELEASE_ALL();
152
153 #if defined(LOCKDEBUG) || defined(DIAGNOSTIC)
154 spinlock_switchcheck();
155 #endif
156 #ifdef LOCKDEBUG
157 simple_lock_only_held(NULL, "uiomove");
158 #endif
159
160 #ifdef DIAGNOSTIC
161 if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE)
162 panic("uiomove: mode");
163 #endif
164 while (n > 0 && uio->uio_resid) {
165 iov = uio->uio_iov;
166 cnt = iov->iov_len;
167 if (cnt == 0) {
168 KASSERT(uio->uio_iovcnt > 0);
169 uio->uio_iov++;
170 uio->uio_iovcnt--;
171 continue;
172 }
173 if (cnt > n)
174 cnt = n;
175 switch (uio->uio_segflg) {
176
177 case UIO_USERSPACE:
178 if (curcpu()->ci_schedstate.spc_flags &
179 SPCF_SHOULDYIELD)
180 preempt(1);
181 if (__predict_true(p == curproc)) {
182 if (uio->uio_rw == UIO_READ)
183 error = copyout(cp, iov->iov_base, cnt);
184 else
185 error = copyin(iov->iov_base, cp, cnt);
186 } else {
187 if (uio->uio_rw == UIO_READ)
188 error = copyout_proc(p, cp,
189 iov->iov_base, cnt);
190 else
191 error = copyin_proc(p, iov->iov_base,
192 cp, cnt);
193 }
194 if (error)
195 goto out;
196 break;
197
198 case UIO_SYSSPACE:
199 if (uio->uio_rw == UIO_READ)
200 error = kcopy(cp, iov->iov_base, cnt);
201 else
202 error = kcopy(iov->iov_base, cp, cnt);
203 if (error)
204 goto out;
205 break;
206 }
207 iov->iov_base = (caddr_t)iov->iov_base + cnt;
208 iov->iov_len -= cnt;
209 uio->uio_resid -= cnt;
210 uio->uio_offset += cnt;
211 cp += cnt;
212 KDASSERT(cnt <= n);
213 n -= cnt;
214 }
215 out:
216 KERNEL_LOCK_ACQUIRE_COUNT(hold_count);
217 return (error);
218 }
219
220 /*
221 * Wrapper for uiomove() that validates the arguments against a known-good
222 * kernel buffer.
223 */
224 int
225 uiomove_frombuf(void *buf, size_t buflen, struct uio *uio)
226 {
227 size_t offset;
228
229 if (uio->uio_offset < 0 || uio->uio_resid < 0 ||
230 (offset = uio->uio_offset) != uio->uio_offset)
231 return (EINVAL);
232 if (offset >= buflen)
233 return (0);
234 return (uiomove((char *)buf + offset, buflen - offset, uio));
235 }
236
237 /*
238 * Give next character to user as result of read.
239 */
240 int
241 ureadc(c, uio)
242 int c;
243 struct uio *uio;
244 {
245 struct iovec *iov;
246
247 if (uio->uio_resid <= 0)
248 panic("ureadc: non-positive resid");
249 again:
250 if (uio->uio_iovcnt <= 0)
251 panic("ureadc: non-positive iovcnt");
252 iov = uio->uio_iov;
253 if (iov->iov_len <= 0) {
254 uio->uio_iovcnt--;
255 uio->uio_iov++;
256 goto again;
257 }
258 switch (uio->uio_segflg) {
259
260 case UIO_USERSPACE:
261 if (subyte(iov->iov_base, c) < 0)
262 return (EFAULT);
263 break;
264
265 case UIO_SYSSPACE:
266 *(char *)iov->iov_base = c;
267 break;
268 }
269 iov->iov_base = (caddr_t)iov->iov_base + 1;
270 iov->iov_len--;
271 uio->uio_resid--;
272 uio->uio_offset++;
273 return (0);
274 }
275
276 /*
277 * Like copyin(), but operates on an arbitrary process.
278 */
279 int
280 copyin_proc(struct proc *p, const void *uaddr, void *kaddr, size_t len)
281 {
282 struct iovec iov;
283 struct uio uio;
284 int error;
285
286 if (len == 0)
287 return (0);
288
289 iov.iov_base = kaddr;
290 iov.iov_len = len;
291 uio.uio_iov = &iov;
292 uio.uio_iovcnt = 1;
293 uio.uio_offset = (off_t)(intptr_t)uaddr;
294 uio.uio_resid = len;
295 uio.uio_segflg = UIO_SYSSPACE;
296 uio.uio_rw = UIO_READ;
297 uio.uio_procp = NULL;
298
299 /* XXXCDC: how should locking work here? */
300 if ((p->p_flag & P_WEXIT) || (p->p_vmspace->vm_refcnt < 1))
301 return (EFAULT);
302 p->p_vmspace->vm_refcnt++; /* XXX */
303 error = uvm_io(&p->p_vmspace->vm_map, &uio);
304 uvmspace_free(p->p_vmspace);
305
306 return (error);
307 }
308
309 /*
310 * Like copyout(), but operates on an arbitrary process.
311 */
312 int
313 copyout_proc(struct proc *p, const void *kaddr, void *uaddr, size_t len)
314 {
315 struct iovec iov;
316 struct uio uio;
317 int error;
318
319 if (len == 0)
320 return (0);
321
322 iov.iov_base = (void *) kaddr; /* XXX cast away const */
323 iov.iov_len = len;
324 uio.uio_iov = &iov;
325 uio.uio_iovcnt = 1;
326 uio.uio_offset = (off_t)(intptr_t)uaddr;
327 uio.uio_resid = len;
328 uio.uio_segflg = UIO_SYSSPACE;
329 uio.uio_rw = UIO_WRITE;
330 uio.uio_procp = NULL;
331
332 /* XXXCDC: how should locking work here? */
333 if ((p->p_flag & P_WEXIT) || (p->p_vmspace->vm_refcnt < 1))
334 return (EFAULT);
335 p->p_vmspace->vm_refcnt++; /* XXX */
336 error = uvm_io(&p->p_vmspace->vm_map, &uio);
337 uvmspace_free(p->p_vmspace);
338
339 return (error);
340 }
341
342 /*
343 * General routine to allocate a hash table.
344 * Allocate enough memory to hold at least `elements' list-head pointers.
345 * Return a pointer to the allocated space and set *hashmask to a pattern
346 * suitable for masking a value to use as an index into the returned array.
347 */
348 void *
349 hashinit(elements, htype, mtype, mflags, hashmask)
350 u_int elements;
351 enum hashtype htype;
352 struct malloc_type *mtype;
353 int mflags;
354 u_long *hashmask;
355 {
356 u_long hashsize, i;
357 LIST_HEAD(, generic) *hashtbl_list;
358 TAILQ_HEAD(, generic) *hashtbl_tailq;
359 size_t esize;
360 void *p;
361
362 if (elements == 0)
363 panic("hashinit: bad cnt");
364 for (hashsize = 1; hashsize < elements; hashsize <<= 1)
365 continue;
366
367 switch (htype) {
368 case HASH_LIST:
369 esize = sizeof(*hashtbl_list);
370 break;
371 case HASH_TAILQ:
372 esize = sizeof(*hashtbl_tailq);
373 break;
374 default:
375 #ifdef DIAGNOSTIC
376 panic("hashinit: invalid table type");
377 #else
378 return NULL;
379 #endif
380 }
381
382 if ((p = malloc(hashsize * esize, mtype, mflags)) == NULL)
383 return (NULL);
384
385 switch (htype) {
386 case HASH_LIST:
387 hashtbl_list = p;
388 for (i = 0; i < hashsize; i++)
389 LIST_INIT(&hashtbl_list[i]);
390 break;
391 case HASH_TAILQ:
392 hashtbl_tailq = p;
393 for (i = 0; i < hashsize; i++)
394 TAILQ_INIT(&hashtbl_tailq[i]);
395 break;
396 }
397 *hashmask = hashsize - 1;
398 return (p);
399 }
400
401 /*
402 * Free memory from hash table previosly allocated via hashinit().
403 */
404 void
405 hashdone(hashtbl, mtype)
406 void *hashtbl;
407 struct malloc_type *mtype;
408 {
409
410 free(hashtbl, mtype);
411 }
412
413
414 static void *
415 hook_establish(list, fn, arg)
416 hook_list_t *list;
417 void (*fn)(void *);
418 void *arg;
419 {
420 struct hook_desc *hd;
421
422 hd = malloc(sizeof(*hd), M_DEVBUF, M_NOWAIT);
423 if (hd == NULL)
424 return (NULL);
425
426 hd->hk_fn = fn;
427 hd->hk_arg = arg;
428 LIST_INSERT_HEAD(list, hd, hk_list);
429
430 return (hd);
431 }
432
433 static void
434 hook_disestablish(list, vhook)
435 hook_list_t *list;
436 void *vhook;
437 {
438 #ifdef DIAGNOSTIC
439 struct hook_desc *hd;
440
441 LIST_FOREACH(hd, list, hk_list) {
442 if (hd == vhook)
443 break;
444 }
445
446 if (hd == NULL)
447 panic("hook_disestablish: hook %p not established", vhook);
448 #endif
449 LIST_REMOVE((struct hook_desc *)vhook, hk_list);
450 free(vhook, M_DEVBUF);
451 }
452
453 static void
454 hook_destroy(list)
455 hook_list_t *list;
456 {
457 struct hook_desc *hd;
458
459 while ((hd = LIST_FIRST(list)) != NULL) {
460 LIST_REMOVE(hd, hk_list);
461 free(hd, M_DEVBUF);
462 }
463 }
464
465 static void
466 hook_proc_run(list, p)
467 hook_list_t *list;
468 struct proc *p;
469 {
470 struct hook_desc *hd;
471
472 for (hd = LIST_FIRST(list); hd != NULL; hd = LIST_NEXT(hd, hk_list)) {
473 ((void (*)(struct proc *, void *))*hd->hk_fn)(p,
474 hd->hk_arg);
475 }
476 }
477
478 /*
479 * "Shutdown hook" types, functions, and variables.
480 *
481 * Should be invoked immediately before the
482 * system is halted or rebooted, i.e. after file systems unmounted,
483 * after crash dump done, etc.
484 *
485 * Each shutdown hook is removed from the list before it's run, so that
486 * it won't be run again.
487 */
488
489 hook_list_t shutdownhook_list;
490
491 void *
492 shutdownhook_establish(fn, arg)
493 void (*fn)(void *);
494 void *arg;
495 {
496 return hook_establish(&shutdownhook_list, fn, arg);
497 }
498
499 void
500 shutdownhook_disestablish(vhook)
501 void *vhook;
502 {
503 hook_disestablish(&shutdownhook_list, vhook);
504 }
505
506 /*
507 * Run shutdown hooks. Should be invoked immediately before the
508 * system is halted or rebooted, i.e. after file systems unmounted,
509 * after crash dump done, etc.
510 *
511 * Each shutdown hook is removed from the list before it's run, so that
512 * it won't be run again.
513 */
514 void
515 doshutdownhooks()
516 {
517 struct hook_desc *dp;
518
519 while ((dp = LIST_FIRST(&shutdownhook_list)) != NULL) {
520 LIST_REMOVE(dp, hk_list);
521 (*dp->hk_fn)(dp->hk_arg);
522 #if 0
523 /*
524 * Don't bother freeing the hook structure,, since we may
525 * be rebooting because of a memory corruption problem,
526 * and this might only make things worse. It doesn't
527 * matter, anyway, since the system is just about to
528 * reboot.
529 */
530 free(dp, M_DEVBUF);
531 #endif
532 }
533 }
534
535 /*
536 * "Mountroot hook" types, functions, and variables.
537 */
538
539 hook_list_t mountroothook_list;
540
541 void *
542 mountroothook_establish(fn, dev)
543 void (*fn)(struct device *);
544 struct device *dev;
545 {
546 return hook_establish(&mountroothook_list, (void (*)(void *))fn, dev);
547 }
548
549 void
550 mountroothook_disestablish(vhook)
551 void *vhook;
552 {
553 hook_disestablish(&mountroothook_list, vhook);
554 }
555
556 void
557 mountroothook_destroy()
558 {
559 hook_destroy(&mountroothook_list);
560 }
561
562 void
563 domountroothook()
564 {
565 struct hook_desc *hd;
566
567 LIST_FOREACH(hd, &mountroothook_list, hk_list) {
568 if (hd->hk_arg == (void *)root_device) {
569 (*hd->hk_fn)(hd->hk_arg);
570 return;
571 }
572 }
573 }
574
575 hook_list_t exechook_list;
576
577 void *
578 exechook_establish(fn, arg)
579 void (*fn)(struct proc *, void *);
580 void *arg;
581 {
582 return hook_establish(&exechook_list, (void (*)(void *))fn, arg);
583 }
584
585 void
586 exechook_disestablish(vhook)
587 void *vhook;
588 {
589 hook_disestablish(&exechook_list, vhook);
590 }
591
592 /*
593 * Run exec hooks.
594 */
595 void
596 doexechooks(p)
597 struct proc *p;
598 {
599 hook_proc_run(&exechook_list, p);
600 }
601
602 hook_list_t exithook_list;
603
604 void *
605 exithook_establish(fn, arg)
606 void (*fn)(struct proc *, void *);
607 void *arg;
608 {
609 return hook_establish(&exithook_list, (void (*)(void *))fn, arg);
610 }
611
612 void
613 exithook_disestablish(vhook)
614 void *vhook;
615 {
616 hook_disestablish(&exithook_list, vhook);
617 }
618
619 /*
620 * Run exit hooks.
621 */
622 void
623 doexithooks(p)
624 struct proc *p;
625 {
626 hook_proc_run(&exithook_list, p);
627 }
628
629 hook_list_t forkhook_list;
630
631 void *
632 forkhook_establish(fn)
633 void (*fn)(struct proc *, struct proc *);
634 {
635 return hook_establish(&forkhook_list, (void (*)(void *))fn, NULL);
636 }
637
638 void
639 forkhook_disestablish(vhook)
640 void *vhook;
641 {
642 hook_disestablish(&forkhook_list, vhook);
643 }
644
645 /*
646 * Run fork hooks.
647 */
648 void
649 doforkhooks(p2, p1)
650 struct proc *p2, *p1;
651 {
652 struct hook_desc *hd;
653
654 LIST_FOREACH(hd, &forkhook_list, hk_list) {
655 ((void (*)(struct proc *, struct proc *))*hd->hk_fn)
656 (p2, p1);
657 }
658 }
659
660 /*
661 * "Power hook" types, functions, and variables.
662 * The list of power hooks is kept ordered with the last registered hook
663 * first.
664 * When running the hooks on power down the hooks are called in reverse
665 * registration order, when powering up in registration order.
666 */
667 struct powerhook_desc {
668 CIRCLEQ_ENTRY(powerhook_desc) sfd_list;
669 void (*sfd_fn)(int, void *);
670 void *sfd_arg;
671 };
672
673 CIRCLEQ_HEAD(, powerhook_desc) powerhook_list =
674 CIRCLEQ_HEAD_INITIALIZER(powerhook_list);
675
676 void *
677 powerhook_establish(fn, arg)
678 void (*fn)(int, void *);
679 void *arg;
680 {
681 struct powerhook_desc *ndp;
682
683 ndp = (struct powerhook_desc *)
684 malloc(sizeof(*ndp), M_DEVBUF, M_NOWAIT);
685 if (ndp == NULL)
686 return (NULL);
687
688 ndp->sfd_fn = fn;
689 ndp->sfd_arg = arg;
690 CIRCLEQ_INSERT_HEAD(&powerhook_list, ndp, sfd_list);
691
692 return (ndp);
693 }
694
695 void
696 powerhook_disestablish(vhook)
697 void *vhook;
698 {
699 #ifdef DIAGNOSTIC
700 struct powerhook_desc *dp;
701
702 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list)
703 if (dp == vhook)
704 goto found;
705 panic("powerhook_disestablish: hook %p not established", vhook);
706 found:
707 #endif
708
709 CIRCLEQ_REMOVE(&powerhook_list, (struct powerhook_desc *)vhook,
710 sfd_list);
711 free(vhook, M_DEVBUF);
712 }
713
714 /*
715 * Run power hooks.
716 */
717 void
718 dopowerhooks(why)
719 int why;
720 {
721 struct powerhook_desc *dp;
722
723 if (why == PWR_RESUME || why == PWR_SOFTRESUME) {
724 CIRCLEQ_FOREACH_REVERSE(dp, &powerhook_list, sfd_list) {
725 (*dp->sfd_fn)(why, dp->sfd_arg);
726 }
727 } else {
728 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) {
729 (*dp->sfd_fn)(why, dp->sfd_arg);
730 }
731 }
732 }
733
734 /*
735 * Determine the root device and, if instructed to, the root file system.
736 */
737
738 #include "md.h"
739 #if NMD == 0
740 #undef MEMORY_DISK_HOOKS
741 #endif
742
743 #ifdef MEMORY_DISK_HOOKS
744 static struct device fakemdrootdev[NMD];
745 #endif
746
747 #ifdef MEMORY_DISK_IS_ROOT
748 #define BOOT_FROM_MEMORY_HOOKS 1
749 #endif
750
751 #include "raid.h"
752 #if NRAID == 1
753 #define BOOT_FROM_RAID_HOOKS 1
754 #endif
755
756 #ifdef BOOT_FROM_RAID_HOOKS
757 extern int numraid;
758 extern struct device *raidrootdev;
759 #endif
760
761 void
762 setroot(bootdv, bootpartition)
763 struct device *bootdv;
764 int bootpartition;
765 {
766 struct device *dv;
767 int len;
768 #ifdef MEMORY_DISK_HOOKS
769 int i;
770 #endif
771 dev_t nrootdev;
772 dev_t ndumpdev = NODEV;
773 char buf[128];
774 const char *rootdevname;
775 const char *dumpdevname;
776 struct device *rootdv = NULL; /* XXX gcc -Wuninitialized */
777 struct device *dumpdv = NULL;
778 struct ifnet *ifp;
779 const char *deffsname;
780 struct vfsops *vops;
781
782 #ifdef MEMORY_DISK_HOOKS
783 for (i = 0; i < NMD; i++) {
784 fakemdrootdev[i].dv_class = DV_DISK;
785 fakemdrootdev[i].dv_cfdata = NULL;
786 fakemdrootdev[i].dv_unit = i;
787 fakemdrootdev[i].dv_parent = NULL;
788 snprintf(fakemdrootdev[i].dv_xname,
789 sizeof(fakemdrootdev[i].dv_xname), "md%d", i);
790 }
791 #endif /* MEMORY_DISK_HOOKS */
792
793 #ifdef MEMORY_DISK_IS_ROOT
794 bootdv = &fakemdrootdev[0];
795 bootpartition = 0;
796 #endif
797
798 /*
799 * If NFS is specified as the file system, and we found
800 * a DV_DISK boot device (or no boot device at all), then
801 * find a reasonable network interface for "rootspec".
802 */
803 vops = vfs_getopsbyname("nfs");
804 if (vops != NULL && vops->vfs_mountroot == mountroot &&
805 rootspec == NULL &&
806 (bootdv == NULL || bootdv->dv_class != DV_IFNET)) {
807 TAILQ_FOREACH(ifp, &ifnet, if_list) {
808 if ((ifp->if_flags &
809 (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0)
810 break;
811 }
812 if (ifp == NULL) {
813 /*
814 * Can't find a suitable interface; ask the
815 * user.
816 */
817 boothowto |= RB_ASKNAME;
818 } else {
819 /*
820 * Have a suitable interface; behave as if
821 * the user specified this interface.
822 */
823 rootspec = (const char *)ifp->if_xname;
824 }
825 }
826
827 /*
828 * If wildcarded root and we the boot device wasn't determined,
829 * ask the user.
830 */
831 if (rootspec == NULL && bootdv == NULL)
832 boothowto |= RB_ASKNAME;
833
834 top:
835 if (boothowto & RB_ASKNAME) {
836 struct device *defdumpdv;
837
838 for (;;) {
839 printf("root device");
840 if (bootdv != NULL) {
841 printf(" (default %s", bootdv->dv_xname);
842 if (bootdv->dv_class == DV_DISK)
843 printf("%c", bootpartition + 'a');
844 printf(")");
845 }
846 printf(": ");
847 len = cngetsn(buf, sizeof(buf));
848 if (len == 0 && bootdv != NULL) {
849 strlcpy(buf, bootdv->dv_xname, sizeof(buf));
850 len = strlen(buf);
851 }
852 if (len > 0 && buf[len - 1] == '*') {
853 buf[--len] = '\0';
854 dv = getdisk(buf, len, 1, &nrootdev, 0);
855 if (dv != NULL) {
856 rootdv = dv;
857 break;
858 }
859 }
860 dv = getdisk(buf, len, bootpartition, &nrootdev, 0);
861 if (dv != NULL) {
862 rootdv = dv;
863 break;
864 }
865 }
866
867 /*
868 * Set up the default dump device. If root is on
869 * a network device, there is no default dump
870 * device, since we don't support dumps to the
871 * network.
872 */
873 if (rootdv->dv_class == DV_IFNET)
874 defdumpdv = NULL;
875 else
876 defdumpdv = rootdv;
877
878 for (;;) {
879 printf("dump device");
880 if (defdumpdv != NULL) {
881 /*
882 * Note, we know it's a disk if we get here.
883 */
884 printf(" (default %sb)", defdumpdv->dv_xname);
885 }
886 printf(": ");
887 len = cngetsn(buf, sizeof(buf));
888 if (len == 0) {
889 if (defdumpdv != NULL) {
890 ndumpdev = MAKEDISKDEV(major(nrootdev),
891 DISKUNIT(nrootdev), 1);
892 }
893 dumpdv = defdumpdv;
894 break;
895 }
896 if (len == 4 && strcmp(buf, "none") == 0) {
897 dumpdv = NULL;
898 break;
899 }
900 dv = getdisk(buf, len, 1, &ndumpdev, 1);
901 if (dv != NULL) {
902 dumpdv = dv;
903 break;
904 }
905 }
906
907 rootdev = nrootdev;
908 dumpdev = ndumpdev;
909
910 for (vops = LIST_FIRST(&vfs_list); vops != NULL;
911 vops = LIST_NEXT(vops, vfs_list)) {
912 if (vops->vfs_mountroot != NULL &&
913 vops->vfs_mountroot == mountroot)
914 break;
915 }
916
917 if (vops == NULL) {
918 mountroot = NULL;
919 deffsname = "generic";
920 } else
921 deffsname = vops->vfs_name;
922
923 for (;;) {
924 printf("file system (default %s): ", deffsname);
925 len = cngetsn(buf, sizeof(buf));
926 if (len == 0)
927 break;
928 if (len == 4 && strcmp(buf, "halt") == 0)
929 cpu_reboot(RB_HALT, NULL);
930 else if (len == 6 && strcmp(buf, "reboot") == 0)
931 cpu_reboot(0, NULL);
932 #if defined(DDB)
933 else if (len == 3 && strcmp(buf, "ddb") == 0) {
934 console_debugger();
935 }
936 #endif
937 else if (len == 7 && strcmp(buf, "generic") == 0) {
938 mountroot = NULL;
939 break;
940 }
941 vops = vfs_getopsbyname(buf);
942 if (vops == NULL || vops->vfs_mountroot == NULL) {
943 printf("use one of: generic");
944 for (vops = LIST_FIRST(&vfs_list);
945 vops != NULL;
946 vops = LIST_NEXT(vops, vfs_list)) {
947 if (vops->vfs_mountroot != NULL)
948 printf(" %s", vops->vfs_name);
949 }
950 #if defined(DDB)
951 printf(" ddb");
952 #endif
953 printf(" halt reboot\n");
954 } else {
955 mountroot = vops->vfs_mountroot;
956 break;
957 }
958 }
959
960 } else if (rootspec == NULL) {
961 int majdev;
962
963 /*
964 * Wildcarded root; use the boot device.
965 */
966 rootdv = bootdv;
967
968 majdev = devsw_name2blk(bootdv->dv_xname, NULL, 0);
969 if (majdev >= 0) {
970 /*
971 * Root is on a disk. `bootpartition' is root.
972 */
973 rootdev = MAKEDISKDEV(majdev, bootdv->dv_unit,
974 bootpartition);
975 }
976 } else {
977
978 /*
979 * `root on <dev> ...'
980 */
981
982 /*
983 * If it's a network interface, we can bail out
984 * early.
985 */
986 dv = finddevice(rootspec);
987 if (dv != NULL && dv->dv_class == DV_IFNET) {
988 rootdv = dv;
989 goto haveroot;
990 }
991
992 rootdevname = devsw_blk2name(major(rootdev));
993 if (rootdevname == NULL) {
994 printf("unknown device major 0x%x\n", rootdev);
995 boothowto |= RB_ASKNAME;
996 goto top;
997 }
998 memset(buf, 0, sizeof(buf));
999 snprintf(buf, sizeof(buf), "%s%d", rootdevname,
1000 DISKUNIT(rootdev));
1001
1002 rootdv = finddevice(buf);
1003 if (rootdv == NULL) {
1004 printf("device %s (0x%x) not configured\n",
1005 buf, rootdev);
1006 boothowto |= RB_ASKNAME;
1007 goto top;
1008 }
1009 }
1010
1011 haveroot:
1012
1013 root_device = rootdv;
1014
1015 switch (rootdv->dv_class) {
1016 case DV_IFNET:
1017 aprint_normal("root on %s", rootdv->dv_xname);
1018 break;
1019
1020 case DV_DISK:
1021 aprint_normal("root on %s%c", rootdv->dv_xname,
1022 DISKPART(rootdev) + 'a');
1023 break;
1024
1025 default:
1026 printf("can't determine root device\n");
1027 boothowto |= RB_ASKNAME;
1028 goto top;
1029 }
1030
1031 /*
1032 * Now configure the dump device.
1033 *
1034 * If we haven't figured out the dump device, do so, with
1035 * the following rules:
1036 *
1037 * (a) We already know dumpdv in the RB_ASKNAME case.
1038 *
1039 * (b) If dumpspec is set, try to use it. If the device
1040 * is not available, punt.
1041 *
1042 * (c) If dumpspec is not set, the dump device is
1043 * wildcarded or unspecified. If the root device
1044 * is DV_IFNET, punt. Otherwise, use partition b
1045 * of the root device.
1046 */
1047
1048 if (boothowto & RB_ASKNAME) { /* (a) */
1049 if (dumpdv == NULL)
1050 goto nodumpdev;
1051 } else if (dumpspec != NULL) { /* (b) */
1052 if (strcmp(dumpspec, "none") == 0 || dumpdev == NODEV) {
1053 /*
1054 * Operator doesn't want a dump device.
1055 * Or looks like they tried to pick a network
1056 * device. Oops.
1057 */
1058 goto nodumpdev;
1059 }
1060
1061 dumpdevname = devsw_blk2name(major(dumpdev));
1062 if (dumpdevname == NULL)
1063 goto nodumpdev;
1064 memset(buf, 0, sizeof(buf));
1065 snprintf(buf, sizeof(buf), "%s%d", dumpdevname,
1066 DISKUNIT(dumpdev));
1067
1068 dumpdv = finddevice(buf);
1069 if (dumpdv == NULL) {
1070 /*
1071 * Device not configured.
1072 */
1073 goto nodumpdev;
1074 }
1075 } else { /* (c) */
1076 if (rootdv->dv_class == DV_IFNET)
1077 goto nodumpdev;
1078 else {
1079 dumpdv = rootdv;
1080 dumpdev = MAKEDISKDEV(major(rootdev),
1081 dumpdv->dv_unit, 1);
1082 }
1083 }
1084
1085 aprint_normal(" dumps on %s%c\n", dumpdv->dv_xname,
1086 DISKPART(dumpdev) + 'a');
1087 return;
1088
1089 nodumpdev:
1090 dumpdev = NODEV;
1091 aprint_normal("\n");
1092 }
1093
1094 static struct device *
1095 finddevice(name)
1096 const char *name;
1097 {
1098 struct device *dv;
1099 #if defined(BOOT_FROM_RAID_HOOKS) || defined(BOOT_FROM_MEMORY_HOOKS)
1100 int j;
1101 #endif /* BOOT_FROM_RAID_HOOKS || BOOT_FROM_MEMORY_HOOKS */
1102
1103 #ifdef BOOT_FROM_RAID_HOOKS
1104 for (j = 0; j < numraid; j++) {
1105 if (strcmp(name, raidrootdev[j].dv_xname) == 0) {
1106 dv = &raidrootdev[j];
1107 return (dv);
1108 }
1109 }
1110 #endif /* BOOT_FROM_RAID_HOOKS */
1111
1112 #ifdef BOOT_FROM_MEMORY_HOOKS
1113 for (j = 0; j < NMD; j++) {
1114 if (strcmp(name, fakemdrootdev[j].dv_xname) == 0) {
1115 dv = &fakemdrootdev[j];
1116 return (dv);
1117 }
1118 }
1119 #endif /* BOOT_FROM_MEMORY_HOOKS */
1120
1121 for (dv = TAILQ_FIRST(&alldevs); dv != NULL;
1122 dv = TAILQ_NEXT(dv, dv_list))
1123 if (strcmp(dv->dv_xname, name) == 0)
1124 break;
1125 return (dv);
1126 }
1127
1128 static struct device *
1129 getdisk(str, len, defpart, devp, isdump)
1130 char *str;
1131 int len, defpart;
1132 dev_t *devp;
1133 int isdump;
1134 {
1135 struct device *dv;
1136 #ifdef MEMORY_DISK_HOOKS
1137 int i;
1138 #endif
1139 #ifdef BOOT_FROM_RAID_HOOKS
1140 int j;
1141 #endif
1142
1143 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) {
1144 printf("use one of:");
1145 #ifdef MEMORY_DISK_HOOKS
1146 if (isdump == 0)
1147 for (i = 0; i < NMD; i++)
1148 printf(" %s[a-%c]", fakemdrootdev[i].dv_xname,
1149 'a' + MAXPARTITIONS - 1);
1150 #endif
1151 #ifdef BOOT_FROM_RAID_HOOKS
1152 if (isdump == 0)
1153 for (j = 0; j < numraid; j++)
1154 printf(" %s[a-%c]", raidrootdev[j].dv_xname,
1155 'a' + MAXPARTITIONS - 1);
1156 #endif
1157 TAILQ_FOREACH(dv, &alldevs, dv_list) {
1158 if (dv->dv_class == DV_DISK)
1159 printf(" %s[a-%c]", dv->dv_xname,
1160 'a' + MAXPARTITIONS - 1);
1161 if (isdump == 0 && dv->dv_class == DV_IFNET)
1162 printf(" %s", dv->dv_xname);
1163 }
1164 if (isdump)
1165 printf(" none");
1166 #if defined(DDB)
1167 printf(" ddb");
1168 #endif
1169 printf(" halt reboot\n");
1170 }
1171 return (dv);
1172 }
1173
1174 static struct device *
1175 parsedisk(str, len, defpart, devp)
1176 char *str;
1177 int len, defpart;
1178 dev_t *devp;
1179 {
1180 struct device *dv;
1181 char *cp, c;
1182 int majdev, part;
1183 #ifdef MEMORY_DISK_HOOKS
1184 int i;
1185 #endif
1186 if (len == 0)
1187 return (NULL);
1188
1189 if (len == 4 && strcmp(str, "halt") == 0)
1190 cpu_reboot(RB_HALT, NULL);
1191 else if (len == 6 && strcmp(str, "reboot") == 0)
1192 cpu_reboot(0, NULL);
1193 #if defined(DDB)
1194 else if (len == 3 && strcmp(str, "ddb") == 0)
1195 console_debugger();
1196 #endif
1197
1198 cp = str + len - 1;
1199 c = *cp;
1200 if (c >= 'a' && c <= ('a' + MAXPARTITIONS - 1)) {
1201 part = c - 'a';
1202 *cp = '\0';
1203 } else
1204 part = defpart;
1205
1206 #ifdef MEMORY_DISK_HOOKS
1207 for (i = 0; i < NMD; i++)
1208 if (strcmp(str, fakemdrootdev[i].dv_xname) == 0) {
1209 dv = &fakemdrootdev[i];
1210 goto gotdisk;
1211 }
1212 #endif
1213
1214 dv = finddevice(str);
1215 if (dv != NULL) {
1216 if (dv->dv_class == DV_DISK) {
1217 #ifdef MEMORY_DISK_HOOKS
1218 gotdisk:
1219 #endif
1220 majdev = devsw_name2blk(dv->dv_xname, NULL, 0);
1221 if (majdev < 0)
1222 panic("parsedisk");
1223 *devp = MAKEDISKDEV(majdev, dv->dv_unit, part);
1224 }
1225
1226 if (dv->dv_class == DV_IFNET)
1227 *devp = NODEV;
1228 }
1229
1230 *cp = c;
1231 return (dv);
1232 }
1233
1234 /*
1235 * snprintf() `bytes' into `buf', reformatting it so that the number,
1236 * plus a possible `x' + suffix extension) fits into len bytes (including
1237 * the terminating NUL).
1238 * Returns the number of bytes stored in buf, or -1 if there was a problem.
1239 * E.g, given a len of 9 and a suffix of `B':
1240 * bytes result
1241 * ----- ------
1242 * 99999 `99999 B'
1243 * 100000 `97 kB'
1244 * 66715648 `65152 kB'
1245 * 252215296 `240 MB'
1246 */
1247 int
1248 humanize_number(buf, len, bytes, suffix, divisor)
1249 char *buf;
1250 size_t len;
1251 u_int64_t bytes;
1252 const char *suffix;
1253 int divisor;
1254 {
1255 /* prefixes are: (none), kilo, Mega, Giga, Tera, Peta, Exa */
1256 const char *prefixes;
1257 int r;
1258 u_int64_t max;
1259 size_t i, suffixlen;
1260
1261 if (buf == NULL || suffix == NULL)
1262 return (-1);
1263 if (len > 0)
1264 buf[0] = '\0';
1265 suffixlen = strlen(suffix);
1266 /* check if enough room for `x y' + suffix + `\0' */
1267 if (len < 4 + suffixlen)
1268 return (-1);
1269
1270 if (divisor == 1024) {
1271 /*
1272 * binary multiplies
1273 * XXX IEC 60027-2 recommends Ki, Mi, Gi...
1274 */
1275 prefixes = " KMGTPE";
1276 } else
1277 prefixes = " kMGTPE"; /* SI for decimal multiplies */
1278
1279 max = 1;
1280 for (i = 0; i < len - suffixlen - 3; i++)
1281 max *= 10;
1282 for (i = 0; bytes >= max && prefixes[i + 1]; i++)
1283 bytes /= divisor;
1284
1285 r = snprintf(buf, len, "%qu%s%c%s", (unsigned long long)bytes,
1286 i == 0 ? "" : " ", prefixes[i], suffix);
1287
1288 return (r);
1289 }
1290
1291 int
1292 format_bytes(buf, len, bytes)
1293 char *buf;
1294 size_t len;
1295 u_int64_t bytes;
1296 {
1297 int rv;
1298 size_t nlen;
1299
1300 rv = humanize_number(buf, len, bytes, "B", 1024);
1301 if (rv != -1) {
1302 /* nuke the trailing ` B' if it exists */
1303 nlen = strlen(buf) - 2;
1304 if (strcmp(&buf[nlen], " B") == 0)
1305 buf[nlen] = '\0';
1306 }
1307 return (rv);
1308 }
1309
1310 /*
1311 * Start trace of particular system call. If process is being traced,
1312 * this routine is called by MD syscall dispatch code just before
1313 * a system call is actually executed.
1314 * MD caller guarantees the passed 'code' is within the supported
1315 * system call number range for emulation the process runs under.
1316 */
1317 int
1318 trace_enter(struct lwp *l, register_t code,
1319 register_t realcode, const struct sysent *callp, void *args)
1320 {
1321 #if defined(KTRACE) || defined(SYSTRACE)
1322 struct proc *p = l->l_proc;
1323 #endif
1324
1325 #ifdef SYSCALL_DEBUG
1326 scdebug_call(l, code, args);
1327 #endif /* SYSCALL_DEBUG */
1328
1329 #ifdef KTRACE
1330 if (KTRPOINT(p, KTR_SYSCALL))
1331 ktrsyscall(p, code, realcode, callp, args);
1332 #endif /* KTRACE */
1333
1334 #ifdef SYSTRACE
1335 if (ISSET(p->p_flag, P_SYSTRACE))
1336 return systrace_enter(p, code, args);
1337 #endif
1338 return 0;
1339 }
1340
1341 /*
1342 * End trace of particular system call. If process is being traced,
1343 * this routine is called by MD syscall dispatch code just after
1344 * a system call finishes.
1345 * MD caller guarantees the passed 'code' is within the supported
1346 * system call number range for emulation the process runs under.
1347 */
1348 void
1349 trace_exit(struct lwp *l, register_t code, void *args, register_t rval[],
1350 int error)
1351 {
1352 #if defined(KTRACE) || defined(SYSTRACE)
1353 struct proc *p = l->l_proc;
1354 #endif
1355
1356 #ifdef SYSCALL_DEBUG
1357 scdebug_ret(l, code, error, rval);
1358 #endif /* SYSCALL_DEBUG */
1359
1360 #ifdef KTRACE
1361 if (KTRPOINT(p, KTR_SYSRET)) {
1362 KERNEL_PROC_LOCK(l);
1363 ktrsysret(p, code, error, rval);
1364 KERNEL_PROC_UNLOCK(l);
1365 }
1366 #endif /* KTRACE */
1367
1368 #ifdef SYSTRACE
1369 if (ISSET(p->p_flag, P_SYSTRACE))
1370 systrace_exit(p, code, args, rval, error);
1371 #endif
1372 }
1373