kern_subr.c revision 1.119 1 /* $NetBSD: kern_subr.c,v 1.119 2005/08/28 20:58:14 reinoud Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1998, 1999, 2002 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Luke Mewburn.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*
41 * Copyright (c) 1982, 1986, 1991, 1993
42 * The Regents of the University of California. All rights reserved.
43 * (c) UNIX System Laboratories, Inc.
44 * All or some portions of this file are derived from material licensed
45 * to the University of California by American Telephone and Telegraph
46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47 * the permission of UNIX System Laboratories, Inc.
48 *
49 * Copyright (c) 1992, 1993
50 * The Regents of the University of California. All rights reserved.
51 *
52 * This software was developed by the Computer Systems Engineering group
53 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
54 * contributed to Berkeley.
55 *
56 * All advertising materials mentioning features or use of this software
57 * must display the following acknowledgement:
58 * This product includes software developed by the University of
59 * California, Lawrence Berkeley Laboratory.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 * 1. Redistributions of source code must retain the above copyright
65 * notice, this list of conditions and the following disclaimer.
66 * 2. Redistributions in binary form must reproduce the above copyright
67 * notice, this list of conditions and the following disclaimer in the
68 * documentation and/or other materials provided with the distribution.
69 * 3. Neither the name of the University nor the names of its contributors
70 * may be used to endorse or promote products derived from this software
71 * without specific prior written permission.
72 *
73 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
76 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
83 * SUCH DAMAGE.
84 *
85 * @(#)kern_subr.c 8.4 (Berkeley) 2/14/95
86 */
87
88 #include <sys/cdefs.h>
89 __KERNEL_RCSID(0, "$NetBSD: kern_subr.c,v 1.119 2005/08/28 20:58:14 reinoud Exp $");
90
91 #include "opt_ddb.h"
92 #include "opt_md.h"
93 #include "opt_syscall_debug.h"
94 #include "opt_ktrace.h"
95 #include "opt_systrace.h"
96
97 #include <sys/param.h>
98 #include <sys/systm.h>
99 #include <sys/proc.h>
100 #include <sys/malloc.h>
101 #include <sys/mount.h>
102 #include <sys/device.h>
103 #include <sys/reboot.h>
104 #include <sys/conf.h>
105 #include <sys/disklabel.h>
106 #include <sys/queue.h>
107 #include <sys/systrace.h>
108 #include <sys/ktrace.h>
109 #include <sys/fcntl.h>
110
111 #include <uvm/uvm_extern.h>
112
113 #include <dev/cons.h>
114
115 #include <net/if.h>
116
117 /* XXX these should eventually move to subr_autoconf.c */
118 static struct device *finddevice(const char *);
119 static struct device *getdisk(char *, int, int, dev_t *, int);
120 static struct device *parsedisk(char *, int, int, dev_t *);
121
122 /*
123 * A generic linear hook.
124 */
125 struct hook_desc {
126 LIST_ENTRY(hook_desc) hk_list;
127 void (*hk_fn)(void *);
128 void *hk_arg;
129 };
130 typedef LIST_HEAD(, hook_desc) hook_list_t;
131
132 MALLOC_DEFINE(M_IOV, "iov", "large iov's");
133
134 int
135 uiomove(void *buf, size_t n, struct uio *uio)
136 {
137 struct iovec *iov;
138 u_int cnt;
139 int error = 0;
140 char *cp = buf;
141 struct proc *p = uio->uio_procp;
142 int hold_count;
143
144 hold_count = KERNEL_LOCK_RELEASE_ALL();
145
146 #if defined(LOCKDEBUG) || defined(DIAGNOSTIC)
147 spinlock_switchcheck();
148 #endif
149 #ifdef LOCKDEBUG
150 simple_lock_only_held(NULL, "uiomove");
151 #endif
152
153 #ifdef DIAGNOSTIC
154 if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE)
155 panic("uiomove: mode");
156 #endif
157 while (n > 0 && uio->uio_resid) {
158 iov = uio->uio_iov;
159 cnt = iov->iov_len;
160 if (cnt == 0) {
161 KASSERT(uio->uio_iovcnt > 0);
162 uio->uio_iov++;
163 uio->uio_iovcnt--;
164 continue;
165 }
166 if (cnt > n)
167 cnt = n;
168 switch (uio->uio_segflg) {
169
170 case UIO_USERSPACE:
171 if (curcpu()->ci_schedstate.spc_flags &
172 SPCF_SHOULDYIELD)
173 preempt(1);
174 if (uio->uio_rw == UIO_READ)
175 error = copyout_proc(p, cp, iov->iov_base, cnt);
176 else
177 error = copyin_proc(p, iov->iov_base, cp, cnt);
178 if (error)
179 goto out;
180 break;
181
182 case UIO_SYSSPACE:
183 if (uio->uio_rw == UIO_READ)
184 error = kcopy(cp, iov->iov_base, cnt);
185 else
186 error = kcopy(iov->iov_base, cp, cnt);
187 if (error)
188 goto out;
189 break;
190 }
191 iov->iov_base = (caddr_t)iov->iov_base + cnt;
192 iov->iov_len -= cnt;
193 uio->uio_resid -= cnt;
194 uio->uio_offset += cnt;
195 cp += cnt;
196 KDASSERT(cnt <= n);
197 n -= cnt;
198 }
199 out:
200 KERNEL_LOCK_ACQUIRE_COUNT(hold_count);
201 return (error);
202 }
203
204 /*
205 * Wrapper for uiomove() that validates the arguments against a known-good
206 * kernel buffer.
207 */
208 int
209 uiomove_frombuf(void *buf, size_t buflen, struct uio *uio)
210 {
211 size_t offset;
212
213 if (uio->uio_offset < 0 || uio->uio_resid < 0 ||
214 (offset = uio->uio_offset) != uio->uio_offset)
215 return (EINVAL);
216 if (offset >= buflen)
217 return (0);
218 return (uiomove((char *)buf + offset, buflen - offset, uio));
219 }
220
221 /*
222 * Give next character to user as result of read.
223 */
224 int
225 ureadc(int c, struct uio *uio)
226 {
227 struct iovec *iov;
228
229 if (uio->uio_resid <= 0)
230 panic("ureadc: non-positive resid");
231 again:
232 if (uio->uio_iovcnt <= 0)
233 panic("ureadc: non-positive iovcnt");
234 iov = uio->uio_iov;
235 if (iov->iov_len <= 0) {
236 uio->uio_iovcnt--;
237 uio->uio_iov++;
238 goto again;
239 }
240 switch (uio->uio_segflg) {
241
242 case UIO_USERSPACE:
243 if (subyte(iov->iov_base, c) < 0)
244 return (EFAULT);
245 break;
246
247 case UIO_SYSSPACE:
248 *(char *)iov->iov_base = c;
249 break;
250 }
251 iov->iov_base = (caddr_t)iov->iov_base + 1;
252 iov->iov_len--;
253 uio->uio_resid--;
254 uio->uio_offset++;
255 return (0);
256 }
257
258 /*
259 * Like copyin(), but operates on an arbitrary process.
260 */
261 int
262 copyin_proc(struct proc *p, const void *uaddr, void *kaddr, size_t len)
263 {
264 struct iovec iov;
265 struct uio uio;
266 int error;
267
268 if (len == 0)
269 return (0);
270
271 if (__predict_true(p == curproc))
272 return copyin(uaddr, kaddr, len);
273
274 iov.iov_base = kaddr;
275 iov.iov_len = len;
276 uio.uio_iov = &iov;
277 uio.uio_iovcnt = 1;
278 uio.uio_offset = (off_t)(intptr_t)uaddr;
279 uio.uio_resid = len;
280 uio.uio_segflg = UIO_SYSSPACE;
281 uio.uio_rw = UIO_READ;
282 uio.uio_procp = NULL;
283
284 /* XXXCDC: how should locking work here? */
285 if ((p->p_flag & P_WEXIT) || (p->p_vmspace->vm_refcnt < 1))
286 return (EFAULT);
287 p->p_vmspace->vm_refcnt++; /* XXX */
288 error = uvm_io(&p->p_vmspace->vm_map, &uio);
289 uvmspace_free(p->p_vmspace);
290
291 return (error);
292 }
293
294 /*
295 * Like copyout(), but operates on an arbitrary process.
296 */
297 int
298 copyout_proc(struct proc *p, const void *kaddr, void *uaddr, size_t len)
299 {
300 struct iovec iov;
301 struct uio uio;
302 int error;
303
304 if (len == 0)
305 return (0);
306
307 if (__predict_true(p == curproc))
308 return copyout(kaddr, uaddr, len);
309
310 iov.iov_base = __UNCONST(kaddr); /* XXXUNCONST cast away const */
311 iov.iov_len = len;
312 uio.uio_iov = &iov;
313 uio.uio_iovcnt = 1;
314 uio.uio_offset = (off_t)(intptr_t)uaddr;
315 uio.uio_resid = len;
316 uio.uio_segflg = UIO_SYSSPACE;
317 uio.uio_rw = UIO_WRITE;
318 uio.uio_procp = NULL;
319
320 /* XXXCDC: how should locking work here? */
321 if ((p->p_flag & P_WEXIT) || (p->p_vmspace->vm_refcnt < 1))
322 return (EFAULT);
323 p->p_vmspace->vm_refcnt++; /* XXX */
324 error = uvm_io(&p->p_vmspace->vm_map, &uio);
325 uvmspace_free(p->p_vmspace);
326
327 return (error);
328 }
329
330 /*
331 * Like copyin(), except it operates on kernel addresses when the FKIOCTL
332 * flag is passed in `ioctlflags' from the ioctl call.
333 */
334 int
335 ioctl_copyin(int ioctlflags, const void *src, void *dst, size_t len)
336 {
337 if (ioctlflags & FKIOCTL)
338 return kcopy(src, dst, len);
339 return copyin(src, dst, len);
340 }
341
342 /*
343 * Like copyout(), except it operates on kernel addresses when the FKIOCTL
344 * flag is passed in `ioctlflags' from the ioctl call.
345 */
346 int
347 ioctl_copyout(int ioctlflags, const void *src, void *dst, size_t len)
348 {
349 if (ioctlflags & FKIOCTL)
350 return kcopy(src, dst, len);
351 return copyout(src, dst, len);
352 }
353
354 /*
355 * General routine to allocate a hash table.
356 * Allocate enough memory to hold at least `elements' list-head pointers.
357 * Return a pointer to the allocated space and set *hashmask to a pattern
358 * suitable for masking a value to use as an index into the returned array.
359 */
360 void *
361 hashinit(u_int elements, enum hashtype htype, struct malloc_type *mtype,
362 int mflags, u_long *hashmask)
363 {
364 u_long hashsize, i;
365 LIST_HEAD(, generic) *hashtbl_list;
366 TAILQ_HEAD(, generic) *hashtbl_tailq;
367 size_t esize;
368 void *p;
369
370 if (elements == 0)
371 panic("hashinit: bad cnt");
372 for (hashsize = 1; hashsize < elements; hashsize <<= 1)
373 continue;
374
375 switch (htype) {
376 case HASH_LIST:
377 esize = sizeof(*hashtbl_list);
378 break;
379 case HASH_TAILQ:
380 esize = sizeof(*hashtbl_tailq);
381 break;
382 default:
383 #ifdef DIAGNOSTIC
384 panic("hashinit: invalid table type");
385 #else
386 return NULL;
387 #endif
388 }
389
390 if ((p = malloc(hashsize * esize, mtype, mflags)) == NULL)
391 return (NULL);
392
393 switch (htype) {
394 case HASH_LIST:
395 hashtbl_list = p;
396 for (i = 0; i < hashsize; i++)
397 LIST_INIT(&hashtbl_list[i]);
398 break;
399 case HASH_TAILQ:
400 hashtbl_tailq = p;
401 for (i = 0; i < hashsize; i++)
402 TAILQ_INIT(&hashtbl_tailq[i]);
403 break;
404 }
405 *hashmask = hashsize - 1;
406 return (p);
407 }
408
409 /*
410 * Free memory from hash table previosly allocated via hashinit().
411 */
412 void
413 hashdone(void *hashtbl, struct malloc_type *mtype)
414 {
415
416 free(hashtbl, mtype);
417 }
418
419
420 static void *
421 hook_establish(hook_list_t *list, void (*fn)(void *), void *arg)
422 {
423 struct hook_desc *hd;
424
425 hd = malloc(sizeof(*hd), M_DEVBUF, M_NOWAIT);
426 if (hd == NULL)
427 return (NULL);
428
429 hd->hk_fn = fn;
430 hd->hk_arg = arg;
431 LIST_INSERT_HEAD(list, hd, hk_list);
432
433 return (hd);
434 }
435
436 static void
437 hook_disestablish(hook_list_t *list, void *vhook)
438 {
439 #ifdef DIAGNOSTIC
440 struct hook_desc *hd;
441
442 LIST_FOREACH(hd, list, hk_list) {
443 if (hd == vhook)
444 break;
445 }
446
447 if (hd == NULL)
448 panic("hook_disestablish: hook %p not established", vhook);
449 #endif
450 LIST_REMOVE((struct hook_desc *)vhook, hk_list);
451 free(vhook, M_DEVBUF);
452 }
453
454 static void
455 hook_destroy(hook_list_t *list)
456 {
457 struct hook_desc *hd;
458
459 while ((hd = LIST_FIRST(list)) != NULL) {
460 LIST_REMOVE(hd, hk_list);
461 free(hd, M_DEVBUF);
462 }
463 }
464
465 static void
466 hook_proc_run(hook_list_t *list, struct proc *p)
467 {
468 struct hook_desc *hd;
469
470 for (hd = LIST_FIRST(list); hd != NULL; hd = LIST_NEXT(hd, hk_list)) {
471 ((void (*)(struct proc *, void *))*hd->hk_fn)(p,
472 hd->hk_arg);
473 }
474 }
475
476 /*
477 * "Shutdown hook" types, functions, and variables.
478 *
479 * Should be invoked immediately before the
480 * system is halted or rebooted, i.e. after file systems unmounted,
481 * after crash dump done, etc.
482 *
483 * Each shutdown hook is removed from the list before it's run, so that
484 * it won't be run again.
485 */
486
487 static hook_list_t shutdownhook_list;
488
489 void *
490 shutdownhook_establish(void (*fn)(void *), void *arg)
491 {
492 return hook_establish(&shutdownhook_list, fn, arg);
493 }
494
495 void
496 shutdownhook_disestablish(void *vhook)
497 {
498 hook_disestablish(&shutdownhook_list, vhook);
499 }
500
501 /*
502 * Run shutdown hooks. Should be invoked immediately before the
503 * system is halted or rebooted, i.e. after file systems unmounted,
504 * after crash dump done, etc.
505 *
506 * Each shutdown hook is removed from the list before it's run, so that
507 * it won't be run again.
508 */
509 void
510 doshutdownhooks(void)
511 {
512 struct hook_desc *dp;
513
514 while ((dp = LIST_FIRST(&shutdownhook_list)) != NULL) {
515 LIST_REMOVE(dp, hk_list);
516 (*dp->hk_fn)(dp->hk_arg);
517 #if 0
518 /*
519 * Don't bother freeing the hook structure,, since we may
520 * be rebooting because of a memory corruption problem,
521 * and this might only make things worse. It doesn't
522 * matter, anyway, since the system is just about to
523 * reboot.
524 */
525 free(dp, M_DEVBUF);
526 #endif
527 }
528 }
529
530 /*
531 * "Mountroot hook" types, functions, and variables.
532 */
533
534 static hook_list_t mountroothook_list;
535
536 void *
537 mountroothook_establish(void (*fn)(struct device *), struct device *dev)
538 {
539 return hook_establish(&mountroothook_list, (void (*)(void *))fn, dev);
540 }
541
542 void
543 mountroothook_disestablish(void *vhook)
544 {
545 hook_disestablish(&mountroothook_list, vhook);
546 }
547
548 void
549 mountroothook_destroy(void)
550 {
551 hook_destroy(&mountroothook_list);
552 }
553
554 void
555 domountroothook(void)
556 {
557 struct hook_desc *hd;
558
559 LIST_FOREACH(hd, &mountroothook_list, hk_list) {
560 if (hd->hk_arg == (void *)root_device) {
561 (*hd->hk_fn)(hd->hk_arg);
562 return;
563 }
564 }
565 }
566
567 static hook_list_t exechook_list;
568
569 void *
570 exechook_establish(void (*fn)(struct proc *, void *), void *arg)
571 {
572 return hook_establish(&exechook_list, (void (*)(void *))fn, arg);
573 }
574
575 void
576 exechook_disestablish(void *vhook)
577 {
578 hook_disestablish(&exechook_list, vhook);
579 }
580
581 /*
582 * Run exec hooks.
583 */
584 void
585 doexechooks(struct proc *p)
586 {
587 hook_proc_run(&exechook_list, p);
588 }
589
590 static hook_list_t exithook_list;
591
592 void *
593 exithook_establish(void (*fn)(struct proc *, void *), void *arg)
594 {
595 return hook_establish(&exithook_list, (void (*)(void *))fn, arg);
596 }
597
598 void
599 exithook_disestablish(void *vhook)
600 {
601 hook_disestablish(&exithook_list, vhook);
602 }
603
604 /*
605 * Run exit hooks.
606 */
607 void
608 doexithooks(struct proc *p)
609 {
610 hook_proc_run(&exithook_list, p);
611 }
612
613 static hook_list_t forkhook_list;
614
615 void *
616 forkhook_establish(void (*fn)(struct proc *, struct proc *))
617 {
618 return hook_establish(&forkhook_list, (void (*)(void *))fn, NULL);
619 }
620
621 void
622 forkhook_disestablish(void *vhook)
623 {
624 hook_disestablish(&forkhook_list, vhook);
625 }
626
627 /*
628 * Run fork hooks.
629 */
630 void
631 doforkhooks(struct proc *p2, struct proc *p1)
632 {
633 struct hook_desc *hd;
634
635 LIST_FOREACH(hd, &forkhook_list, hk_list) {
636 ((void (*)(struct proc *, struct proc *))*hd->hk_fn)
637 (p2, p1);
638 }
639 }
640
641 /*
642 * "Power hook" types, functions, and variables.
643 * The list of power hooks is kept ordered with the last registered hook
644 * first.
645 * When running the hooks on power down the hooks are called in reverse
646 * registration order, when powering up in registration order.
647 */
648 struct powerhook_desc {
649 CIRCLEQ_ENTRY(powerhook_desc) sfd_list;
650 void (*sfd_fn)(int, void *);
651 void *sfd_arg;
652 };
653
654 static CIRCLEQ_HEAD(, powerhook_desc) powerhook_list =
655 CIRCLEQ_HEAD_INITIALIZER(powerhook_list);
656
657 void *
658 powerhook_establish(void (*fn)(int, void *), void *arg)
659 {
660 struct powerhook_desc *ndp;
661
662 ndp = (struct powerhook_desc *)
663 malloc(sizeof(*ndp), M_DEVBUF, M_NOWAIT);
664 if (ndp == NULL)
665 return (NULL);
666
667 ndp->sfd_fn = fn;
668 ndp->sfd_arg = arg;
669 CIRCLEQ_INSERT_HEAD(&powerhook_list, ndp, sfd_list);
670
671 return (ndp);
672 }
673
674 void
675 powerhook_disestablish(void *vhook)
676 {
677 #ifdef DIAGNOSTIC
678 struct powerhook_desc *dp;
679
680 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list)
681 if (dp == vhook)
682 goto found;
683 panic("powerhook_disestablish: hook %p not established", vhook);
684 found:
685 #endif
686
687 CIRCLEQ_REMOVE(&powerhook_list, (struct powerhook_desc *)vhook,
688 sfd_list);
689 free(vhook, M_DEVBUF);
690 }
691
692 /*
693 * Run power hooks.
694 */
695 void
696 dopowerhooks(int why)
697 {
698 struct powerhook_desc *dp;
699
700 if (why == PWR_RESUME || why == PWR_SOFTRESUME) {
701 CIRCLEQ_FOREACH_REVERSE(dp, &powerhook_list, sfd_list) {
702 (*dp->sfd_fn)(why, dp->sfd_arg);
703 }
704 } else {
705 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) {
706 (*dp->sfd_fn)(why, dp->sfd_arg);
707 }
708 }
709 }
710
711 /*
712 * Determine the root device and, if instructed to, the root file system.
713 */
714
715 #include "md.h"
716 #if NMD == 0
717 #undef MEMORY_DISK_HOOKS
718 #endif
719
720 #ifdef MEMORY_DISK_HOOKS
721 static struct device fakemdrootdev[NMD];
722 #endif
723
724 #ifdef MEMORY_DISK_IS_ROOT
725 #define BOOT_FROM_MEMORY_HOOKS 1
726 #endif
727
728 #include "raid.h"
729 #if NRAID == 1
730 #define BOOT_FROM_RAID_HOOKS 1
731 #endif
732
733 #ifdef BOOT_FROM_RAID_HOOKS
734 extern int numraid;
735 extern struct device *raidrootdev;
736 #endif
737
738 /*
739 * The device and wedge that we booted from. If booted_wedge is NULL,
740 * the we might consult booted_partition.
741 */
742 struct device *booted_device;
743 struct device *booted_wedge;
744 int booted_partition;
745
746 /*
747 * Use partition letters if it's a disk class but not a wedge.
748 * XXX Check for wedge is kinda gross.
749 */
750 #define DEV_USES_PARTITIONS(dv) \
751 ((dv)->dv_class == DV_DISK && \
752 ((dv)->dv_cfdata == NULL || \
753 strcmp((dv)->dv_cfdata->cf_name, "dk") != 0))
754
755 void
756 setroot(struct device *bootdv, int bootpartition)
757 {
758 struct device *dv;
759 int len;
760 #ifdef MEMORY_DISK_HOOKS
761 int i;
762 #endif
763 dev_t nrootdev;
764 dev_t ndumpdev = NODEV;
765 char buf[128];
766 const char *rootdevname;
767 const char *dumpdevname;
768 struct device *rootdv = NULL; /* XXX gcc -Wuninitialized */
769 struct device *dumpdv = NULL;
770 struct ifnet *ifp;
771 const char *deffsname;
772 struct vfsops *vops;
773
774 #ifdef MEMORY_DISK_HOOKS
775 for (i = 0; i < NMD; i++) {
776 fakemdrootdev[i].dv_class = DV_DISK;
777 fakemdrootdev[i].dv_cfdata = NULL;
778 fakemdrootdev[i].dv_unit = i;
779 fakemdrootdev[i].dv_parent = NULL;
780 snprintf(fakemdrootdev[i].dv_xname,
781 sizeof(fakemdrootdev[i].dv_xname), "md%d", i);
782 }
783 #endif /* MEMORY_DISK_HOOKS */
784
785 #ifdef MEMORY_DISK_IS_ROOT
786 bootdv = &fakemdrootdev[0];
787 bootpartition = 0;
788 #endif
789
790 /*
791 * If NFS is specified as the file system, and we found
792 * a DV_DISK boot device (or no boot device at all), then
793 * find a reasonable network interface for "rootspec".
794 */
795 vops = vfs_getopsbyname("nfs");
796 if (vops != NULL && vops->vfs_mountroot == mountroot &&
797 rootspec == NULL &&
798 (bootdv == NULL || bootdv->dv_class != DV_IFNET)) {
799 IFNET_FOREACH(ifp) {
800 if ((ifp->if_flags &
801 (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0)
802 break;
803 }
804 if (ifp == NULL) {
805 /*
806 * Can't find a suitable interface; ask the
807 * user.
808 */
809 boothowto |= RB_ASKNAME;
810 } else {
811 /*
812 * Have a suitable interface; behave as if
813 * the user specified this interface.
814 */
815 rootspec = (const char *)ifp->if_xname;
816 }
817 }
818
819 /*
820 * If wildcarded root and we the boot device wasn't determined,
821 * ask the user.
822 */
823 if (rootspec == NULL && bootdv == NULL)
824 boothowto |= RB_ASKNAME;
825
826 top:
827 if (boothowto & RB_ASKNAME) {
828 struct device *defdumpdv;
829
830 for (;;) {
831 printf("root device");
832 if (bootdv != NULL) {
833 printf(" (default %s", bootdv->dv_xname);
834 if (DEV_USES_PARTITIONS(bootdv))
835 printf("%c", bootpartition + 'a');
836 printf(")");
837 }
838 printf(": ");
839 len = cngetsn(buf, sizeof(buf));
840 if (len == 0 && bootdv != NULL) {
841 strlcpy(buf, bootdv->dv_xname, sizeof(buf));
842 len = strlen(buf);
843 }
844 if (len > 0 && buf[len - 1] == '*') {
845 buf[--len] = '\0';
846 dv = getdisk(buf, len, 1, &nrootdev, 0);
847 if (dv != NULL) {
848 rootdv = dv;
849 break;
850 }
851 }
852 dv = getdisk(buf, len, bootpartition, &nrootdev, 0);
853 if (dv != NULL) {
854 rootdv = dv;
855 break;
856 }
857 }
858
859 /*
860 * Set up the default dump device. If root is on
861 * a network device, there is no default dump
862 * device, since we don't support dumps to the
863 * network.
864 */
865 if (DEV_USES_PARTITIONS(rootdv) == 0)
866 defdumpdv = NULL;
867 else
868 defdumpdv = rootdv;
869
870 for (;;) {
871 printf("dump device");
872 if (defdumpdv != NULL) {
873 /*
874 * Note, we know it's a disk if we get here.
875 */
876 printf(" (default %sb)", defdumpdv->dv_xname);
877 }
878 printf(": ");
879 len = cngetsn(buf, sizeof(buf));
880 if (len == 0) {
881 if (defdumpdv != NULL) {
882 ndumpdev = MAKEDISKDEV(major(nrootdev),
883 DISKUNIT(nrootdev), 1);
884 }
885 dumpdv = defdumpdv;
886 break;
887 }
888 if (len == 4 && strcmp(buf, "none") == 0) {
889 dumpdv = NULL;
890 break;
891 }
892 dv = getdisk(buf, len, 1, &ndumpdev, 1);
893 if (dv != NULL) {
894 dumpdv = dv;
895 break;
896 }
897 }
898
899 rootdev = nrootdev;
900 dumpdev = ndumpdev;
901
902 for (vops = LIST_FIRST(&vfs_list); vops != NULL;
903 vops = LIST_NEXT(vops, vfs_list)) {
904 if (vops->vfs_mountroot != NULL &&
905 vops->vfs_mountroot == mountroot)
906 break;
907 }
908
909 if (vops == NULL) {
910 mountroot = NULL;
911 deffsname = "generic";
912 } else
913 deffsname = vops->vfs_name;
914
915 for (;;) {
916 printf("file system (default %s): ", deffsname);
917 len = cngetsn(buf, sizeof(buf));
918 if (len == 0)
919 break;
920 if (len == 4 && strcmp(buf, "halt") == 0)
921 cpu_reboot(RB_HALT, NULL);
922 else if (len == 6 && strcmp(buf, "reboot") == 0)
923 cpu_reboot(0, NULL);
924 #if defined(DDB)
925 else if (len == 3 && strcmp(buf, "ddb") == 0) {
926 console_debugger();
927 }
928 #endif
929 else if (len == 7 && strcmp(buf, "generic") == 0) {
930 mountroot = NULL;
931 break;
932 }
933 vops = vfs_getopsbyname(buf);
934 if (vops == NULL || vops->vfs_mountroot == NULL) {
935 printf("use one of: generic");
936 for (vops = LIST_FIRST(&vfs_list);
937 vops != NULL;
938 vops = LIST_NEXT(vops, vfs_list)) {
939 if (vops->vfs_mountroot != NULL)
940 printf(" %s", vops->vfs_name);
941 }
942 #if defined(DDB)
943 printf(" ddb");
944 #endif
945 printf(" halt reboot\n");
946 } else {
947 mountroot = vops->vfs_mountroot;
948 break;
949 }
950 }
951
952 } else if (rootspec == NULL) {
953 int majdev;
954
955 /*
956 * Wildcarded root; use the boot device.
957 */
958 rootdv = bootdv;
959
960 majdev = devsw_name2blk(bootdv->dv_xname, NULL, 0);
961 if (majdev >= 0) {
962 /*
963 * Root is on a disk. `bootpartition' is root,
964 * unless the device does not use partitions.
965 */
966 if (DEV_USES_PARTITIONS(bootdv))
967 rootdev = MAKEDISKDEV(majdev, bootdv->dv_unit,
968 bootpartition);
969 else
970 rootdev = makedev(majdev, bootdv->dv_unit);
971 }
972 } else {
973
974 /*
975 * `root on <dev> ...'
976 */
977
978 /*
979 * If it's a network interface, we can bail out
980 * early.
981 */
982 dv = finddevice(rootspec);
983 if (dv != NULL && dv->dv_class == DV_IFNET) {
984 rootdv = dv;
985 goto haveroot;
986 }
987
988 rootdevname = devsw_blk2name(major(rootdev));
989 if (rootdevname == NULL) {
990 printf("unknown device major 0x%x\n", rootdev);
991 boothowto |= RB_ASKNAME;
992 goto top;
993 }
994 memset(buf, 0, sizeof(buf));
995 snprintf(buf, sizeof(buf), "%s%d", rootdevname,
996 DISKUNIT(rootdev));
997
998 rootdv = finddevice(buf);
999 if (rootdv == NULL) {
1000 printf("device %s (0x%x) not configured\n",
1001 buf, rootdev);
1002 boothowto |= RB_ASKNAME;
1003 goto top;
1004 }
1005 }
1006
1007 haveroot:
1008
1009 root_device = rootdv;
1010
1011 switch (rootdv->dv_class) {
1012 case DV_IFNET:
1013 aprint_normal("root on %s", rootdv->dv_xname);
1014 break;
1015
1016 case DV_DISK:
1017 aprint_normal("root on %s%c", rootdv->dv_xname,
1018 DISKPART(rootdev) + 'a');
1019 break;
1020
1021 default:
1022 printf("can't determine root device\n");
1023 boothowto |= RB_ASKNAME;
1024 goto top;
1025 }
1026
1027 /*
1028 * Now configure the dump device.
1029 *
1030 * If we haven't figured out the dump device, do so, with
1031 * the following rules:
1032 *
1033 * (a) We already know dumpdv in the RB_ASKNAME case.
1034 *
1035 * (b) If dumpspec is set, try to use it. If the device
1036 * is not available, punt.
1037 *
1038 * (c) If dumpspec is not set, the dump device is
1039 * wildcarded or unspecified. If the root device
1040 * is DV_IFNET, punt. Otherwise, use partition b
1041 * of the root device.
1042 */
1043
1044 if (boothowto & RB_ASKNAME) { /* (a) */
1045 if (dumpdv == NULL)
1046 goto nodumpdev;
1047 } else if (dumpspec != NULL) { /* (b) */
1048 if (strcmp(dumpspec, "none") == 0 || dumpdev == NODEV) {
1049 /*
1050 * Operator doesn't want a dump device.
1051 * Or looks like they tried to pick a network
1052 * device. Oops.
1053 */
1054 goto nodumpdev;
1055 }
1056
1057 dumpdevname = devsw_blk2name(major(dumpdev));
1058 if (dumpdevname == NULL)
1059 goto nodumpdev;
1060 memset(buf, 0, sizeof(buf));
1061 snprintf(buf, sizeof(buf), "%s%d", dumpdevname,
1062 DISKUNIT(dumpdev));
1063
1064 dumpdv = finddevice(buf);
1065 if (dumpdv == NULL) {
1066 /*
1067 * Device not configured.
1068 */
1069 goto nodumpdev;
1070 }
1071 } else { /* (c) */
1072 if (DEV_USES_PARTITIONS(rootdv) == 0)
1073 goto nodumpdev;
1074 else {
1075 dumpdv = rootdv;
1076 dumpdev = MAKEDISKDEV(major(rootdev),
1077 dumpdv->dv_unit, 1);
1078 }
1079 }
1080
1081 aprint_normal(" dumps on %s%c\n", dumpdv->dv_xname,
1082 DISKPART(dumpdev) + 'a');
1083 return;
1084
1085 nodumpdev:
1086 dumpdev = NODEV;
1087 aprint_normal("\n");
1088 }
1089
1090 static struct device *
1091 finddevice(const char *name)
1092 {
1093 struct device *dv;
1094 #if defined(BOOT_FROM_RAID_HOOKS) || defined(BOOT_FROM_MEMORY_HOOKS)
1095 int j;
1096 #endif /* BOOT_FROM_RAID_HOOKS || BOOT_FROM_MEMORY_HOOKS */
1097
1098 #ifdef BOOT_FROM_RAID_HOOKS
1099 for (j = 0; j < numraid; j++) {
1100 if (strcmp(name, raidrootdev[j].dv_xname) == 0) {
1101 dv = &raidrootdev[j];
1102 return (dv);
1103 }
1104 }
1105 #endif /* BOOT_FROM_RAID_HOOKS */
1106
1107 #ifdef BOOT_FROM_MEMORY_HOOKS
1108 for (j = 0; j < NMD; j++) {
1109 if (strcmp(name, fakemdrootdev[j].dv_xname) == 0) {
1110 dv = &fakemdrootdev[j];
1111 return (dv);
1112 }
1113 }
1114 #endif /* BOOT_FROM_MEMORY_HOOKS */
1115
1116 for (dv = TAILQ_FIRST(&alldevs); dv != NULL;
1117 dv = TAILQ_NEXT(dv, dv_list))
1118 if (strcmp(dv->dv_xname, name) == 0)
1119 break;
1120 return (dv);
1121 }
1122
1123 static struct device *
1124 getdisk(char *str, int len, int defpart, dev_t *devp, int isdump)
1125 {
1126 struct device *dv;
1127 #ifdef MEMORY_DISK_HOOKS
1128 int i;
1129 #endif
1130 #ifdef BOOT_FROM_RAID_HOOKS
1131 int j;
1132 #endif
1133
1134 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) {
1135 printf("use one of:");
1136 #ifdef MEMORY_DISK_HOOKS
1137 if (isdump == 0)
1138 for (i = 0; i < NMD; i++)
1139 printf(" %s[a-%c]", fakemdrootdev[i].dv_xname,
1140 'a' + MAXPARTITIONS - 1);
1141 #endif
1142 #ifdef BOOT_FROM_RAID_HOOKS
1143 if (isdump == 0)
1144 for (j = 0; j < numraid; j++)
1145 printf(" %s[a-%c]", raidrootdev[j].dv_xname,
1146 'a' + MAXPARTITIONS - 1);
1147 #endif
1148 TAILQ_FOREACH(dv, &alldevs, dv_list) {
1149 if (DEV_USES_PARTITIONS(dv))
1150 printf(" %s[a-%c]", dv->dv_xname,
1151 'a' + MAXPARTITIONS - 1);
1152 else if (dv->dv_class == DV_DISK)
1153 printf(" %s", dv->dv_xname);
1154 if (isdump == 0 && dv->dv_class == DV_IFNET)
1155 printf(" %s", dv->dv_xname);
1156 }
1157 if (isdump)
1158 printf(" none");
1159 #if defined(DDB)
1160 printf(" ddb");
1161 #endif
1162 printf(" halt reboot\n");
1163 }
1164 return (dv);
1165 }
1166
1167 static struct device *
1168 parsedisk(char *str, int len, int defpart, dev_t *devp)
1169 {
1170 struct device *dv;
1171 char *cp, c;
1172 int majdev, part;
1173 #ifdef MEMORY_DISK_HOOKS
1174 int i;
1175 #endif
1176 if (len == 0)
1177 return (NULL);
1178
1179 if (len == 4 && strcmp(str, "halt") == 0)
1180 cpu_reboot(RB_HALT, NULL);
1181 else if (len == 6 && strcmp(str, "reboot") == 0)
1182 cpu_reboot(0, NULL);
1183 #if defined(DDB)
1184 else if (len == 3 && strcmp(str, "ddb") == 0)
1185 console_debugger();
1186 #endif
1187
1188 cp = str + len - 1;
1189 c = *cp;
1190 if (c >= 'a' && c <= ('a' + MAXPARTITIONS - 1)) {
1191 part = c - 'a';
1192 *cp = '\0';
1193 } else
1194 part = defpart;
1195
1196 #ifdef MEMORY_DISK_HOOKS
1197 for (i = 0; i < NMD; i++)
1198 if (strcmp(str, fakemdrootdev[i].dv_xname) == 0) {
1199 dv = &fakemdrootdev[i];
1200 goto gotdisk;
1201 }
1202 #endif
1203
1204 dv = finddevice(str);
1205 if (dv != NULL) {
1206 if (dv->dv_class == DV_DISK) {
1207 #ifdef MEMORY_DISK_HOOKS
1208 gotdisk:
1209 #endif
1210 majdev = devsw_name2blk(dv->dv_xname, NULL, 0);
1211 if (majdev < 0)
1212 panic("parsedisk");
1213 if (DEV_USES_PARTITIONS(dv))
1214 *devp = MAKEDISKDEV(majdev, dv->dv_unit, part);
1215 else
1216 *devp = makedev(majdev, dv->dv_unit);
1217 }
1218
1219 if (dv->dv_class == DV_IFNET)
1220 *devp = NODEV;
1221 }
1222
1223 *cp = c;
1224 return (dv);
1225 }
1226
1227 /*
1228 * snprintf() `bytes' into `buf', reformatting it so that the number,
1229 * plus a possible `x' + suffix extension) fits into len bytes (including
1230 * the terminating NUL).
1231 * Returns the number of bytes stored in buf, or -1 if there was a problem.
1232 * E.g, given a len of 9 and a suffix of `B':
1233 * bytes result
1234 * ----- ------
1235 * 99999 `99999 B'
1236 * 100000 `97 kB'
1237 * 66715648 `65152 kB'
1238 * 252215296 `240 MB'
1239 */
1240 int
1241 humanize_number(char *buf, size_t len, uint64_t bytes, const char *suffix,
1242 int divisor)
1243 {
1244 /* prefixes are: (none), kilo, Mega, Giga, Tera, Peta, Exa */
1245 const char *prefixes;
1246 int r;
1247 u_int64_t umax;
1248 size_t i, suffixlen;
1249
1250 if (buf == NULL || suffix == NULL)
1251 return (-1);
1252 if (len > 0)
1253 buf[0] = '\0';
1254 suffixlen = strlen(suffix);
1255 /* check if enough room for `x y' + suffix + `\0' */
1256 if (len < 4 + suffixlen)
1257 return (-1);
1258
1259 if (divisor == 1024) {
1260 /*
1261 * binary multiplies
1262 * XXX IEC 60027-2 recommends Ki, Mi, Gi...
1263 */
1264 prefixes = " KMGTPE";
1265 } else
1266 prefixes = " kMGTPE"; /* SI for decimal multiplies */
1267
1268 umax = 1;
1269 for (i = 0; i < len - suffixlen - 3; i++)
1270 umax *= 10;
1271 for (i = 0; bytes >= umax && prefixes[i + 1]; i++)
1272 bytes /= divisor;
1273
1274 r = snprintf(buf, len, "%qu%s%c%s", (unsigned long long)bytes,
1275 i == 0 ? "" : " ", prefixes[i], suffix);
1276
1277 return (r);
1278 }
1279
1280 int
1281 format_bytes(char *buf, size_t len, uint64_t bytes)
1282 {
1283 int rv;
1284 size_t nlen;
1285
1286 rv = humanize_number(buf, len, bytes, "B", 1024);
1287 if (rv != -1) {
1288 /* nuke the trailing ` B' if it exists */
1289 nlen = strlen(buf) - 2;
1290 if (strcmp(&buf[nlen], " B") == 0)
1291 buf[nlen] = '\0';
1292 }
1293 return (rv);
1294 }
1295
1296 /*
1297 * Start trace of particular system call. If process is being traced,
1298 * this routine is called by MD syscall dispatch code just before
1299 * a system call is actually executed.
1300 * MD caller guarantees the passed 'code' is within the supported
1301 * system call number range for emulation the process runs under.
1302 */
1303 int
1304 trace_enter(struct lwp *l, register_t code,
1305 register_t realcode, const struct sysent *callp, void *args)
1306 {
1307 #if defined(KTRACE) || defined(SYSTRACE)
1308 struct proc *p = l->l_proc;
1309 #endif
1310
1311 #ifdef SYSCALL_DEBUG
1312 scdebug_call(l, code, args);
1313 #endif /* SYSCALL_DEBUG */
1314
1315 #ifdef KTRACE
1316 if (KTRPOINT(p, KTR_SYSCALL))
1317 ktrsyscall(p, code, realcode, callp, args);
1318 #endif /* KTRACE */
1319
1320 #ifdef SYSTRACE
1321 if (ISSET(p->p_flag, P_SYSTRACE))
1322 return systrace_enter(p, code, args);
1323 #endif
1324 return 0;
1325 }
1326
1327 /*
1328 * End trace of particular system call. If process is being traced,
1329 * this routine is called by MD syscall dispatch code just after
1330 * a system call finishes.
1331 * MD caller guarantees the passed 'code' is within the supported
1332 * system call number range for emulation the process runs under.
1333 */
1334 void
1335 trace_exit(struct lwp *l, register_t code, void *args, register_t rval[],
1336 int error)
1337 {
1338 #if defined(KTRACE) || defined(SYSTRACE)
1339 struct proc *p = l->l_proc;
1340 #endif
1341
1342 #ifdef SYSCALL_DEBUG
1343 scdebug_ret(l, code, error, rval);
1344 #endif /* SYSCALL_DEBUG */
1345
1346 #ifdef KTRACE
1347 if (KTRPOINT(p, KTR_SYSRET)) {
1348 KERNEL_PROC_LOCK(l);
1349 ktrsysret(p, code, error, rval);
1350 KERNEL_PROC_UNLOCK(l);
1351 }
1352 #endif /* KTRACE */
1353
1354 #ifdef SYSTRACE
1355 if (ISSET(p->p_flag, P_SYSTRACE))
1356 systrace_exit(p, code, args, rval, error);
1357 #endif
1358 }
1359