init_main.c revision 1.260 1 /* $NetBSD: init_main.c,v 1.260 2005/12/11 12:24:29 christos Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
37 */
38
39 /*
40 * Copyright (c) 1995 Christopher G. Demetriou. All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 * must display the following acknowledgement:
52 * This product includes software developed by the University of
53 * California, Berkeley and its contributors.
54 * 4. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
71 */
72
73 #include <sys/cdefs.h>
74 __KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.260 2005/12/11 12:24:29 christos Exp $");
75
76 #include "opt_ipsec.h"
77 #include "opt_sysv.h"
78 #include "opt_maxuprc.h"
79 #include "opt_multiprocessor.h"
80 #include "opt_pipe.h"
81 #include "opt_syscall_debug.h"
82 #include "opt_systrace.h"
83 #include "opt_posix.h"
84 #include "opt_kcont.h"
85 #include "opt_rootfs_magiclinks.h"
86 #include "opt_verified_exec.h"
87
88 #include "rnd.h"
89
90 #include <sys/param.h>
91 #include <sys/acct.h>
92 #include <sys/filedesc.h>
93 #include <sys/file.h>
94 #include <sys/errno.h>
95 #include <sys/callout.h>
96 #include <sys/kernel.h>
97 #include <sys/kcont.h>
98 #include <sys/mount.h>
99 #include <sys/proc.h>
100 #include <sys/kthread.h>
101 #include <sys/resourcevar.h>
102 #include <sys/signalvar.h>
103 #include <sys/systm.h>
104 #include <sys/vnode.h>
105 #include <sys/tty.h>
106 #include <sys/conf.h>
107 #include <sys/disklabel.h>
108 #include <sys/buf.h>
109 #include <sys/device.h>
110 #include <sys/exec.h>
111 #include <sys/socketvar.h>
112 #include <sys/protosw.h>
113 #include <sys/reboot.h>
114 #include <sys/user.h>
115 #include <sys/sysctl.h>
116 #include <sys/event.h>
117 #include <sys/mbuf.h>
118 #ifdef FAST_IPSEC
119 #include <netipsec/ipsec.h>
120 #endif
121 #ifdef SYSVSHM
122 #include <sys/shm.h>
123 #endif
124 #ifdef SYSVSEM
125 #include <sys/sem.h>
126 #endif
127 #ifdef SYSVMSG
128 #include <sys/msg.h>
129 #endif
130 #ifdef P1003_1B_SEMAPHORE
131 #include <sys/ksem.h>
132 #endif
133 #ifdef SYSTRACE
134 #include <sys/systrace.h>
135 #endif
136 #include <sys/domain.h>
137 #include <sys/namei.h>
138 #if NRND > 0
139 #include <sys/rnd.h>
140 #endif
141 #ifndef PIPE_SOCKETPAIR
142 #include <sys/pipe.h>
143 #endif
144 #ifdef LKM
145 #include <sys/lkm.h>
146 #endif
147 #ifdef VERIFIED_EXEC
148 #include <sys/verified_exec.h>
149 #endif
150 #include <net80211/ieee80211_netbsd.h>
151
152 #include <sys/syscall.h>
153 #include <sys/sa.h>
154 #include <sys/syscallargs.h>
155
156 #include <ufs/ufs/quota.h>
157
158 #include <miscfs/genfs/genfs.h>
159 #include <miscfs/syncfs/syncfs.h>
160
161 #include <machine/cpu.h>
162
163 #include <uvm/uvm.h>
164
165 #include <dev/cons.h>
166
167 #include <net/if.h>
168 #include <net/raw_cb.h>
169
170 extern struct proc proc0;
171 extern struct lwp lwp0;
172 extern struct cwdinfo cwdi0;
173
174 #ifndef curlwp
175 struct lwp *curlwp = &lwp0;
176 #endif
177 struct proc *initproc;
178
179 struct vnode *rootvp, *swapdev_vp;
180 int boothowto;
181 int cold = 1; /* still working on startup */
182 struct timeval boottime;
183 time_t rootfstime; /* recorded root fs time, if known */
184
185 __volatile int start_init_exec; /* semaphore for start_init() */
186
187 static void check_console(struct lwp *l);
188 static void start_init(void *);
189 void main(void);
190
191 /*
192 * System startup; initialize the world, create process 0, mount root
193 * filesystem, and fork to create init and pagedaemon. Most of the
194 * hard work is done in the lower-level initialization routines including
195 * startup(), which does memory initialization and autoconfiguration.
196 */
197 void
198 main(void)
199 {
200 struct lwp *l;
201 struct proc *p;
202 struct pdevinit *pdev;
203 int s, error;
204 extern struct pdevinit pdevinit[];
205 extern void schedcpu(void *);
206 #ifdef NVNODE_IMPLICIT
207 int usevnodes;
208 #endif
209
210 /*
211 * Initialize the current LWP pointer (curlwp) before
212 * any possible traps/probes to simplify trap processing.
213 */
214 l = &lwp0;
215 curlwp = l;
216 l->l_cpu = curcpu();
217 l->l_proc = &proc0;
218 l->l_lid = 1;
219
220 /*
221 * Attempt to find console and initialize
222 * in case of early panic or other messages.
223 */
224 consinit();
225
226 KERNEL_LOCK_INIT();
227
228 uvm_init();
229
230 /* Do machine-dependent initialization. */
231 cpu_startup();
232
233 /* Initialize callouts. */
234 callout_startup();
235
236 /* Initialize the buffer cache */
237 bufinit();
238
239 /*
240 * Initialize mbuf's. Do this now because we might attempt to
241 * allocate mbufs or mbuf clusters during autoconfiguration.
242 */
243 mbinit();
244
245 /* Initialize sockets. */
246 soinit();
247
248 #ifdef KCONT
249 /* Initialize kcont. */
250 kcont_init();
251 #endif
252
253 /*
254 * The following things must be done before autoconfiguration.
255 */
256 evcnt_init(); /* initialize event counters */
257 #if NRND > 0
258 rnd_init(); /* initialize RNG */
259 #endif
260 /* Initialize the sysctl subsystem. */
261 sysctl_init();
262
263 /* Initialize process and pgrp structures. */
264 procinit();
265
266 /* Initialize signal-related data structures. */
267 signal_init();
268
269 /* Create process 0 (the swapper). */
270 proc0_init();
271
272 /*
273 * Charge root for one process.
274 */
275 (void)chgproccnt(0, 1);
276
277 rqinit();
278
279 /* Initialize the file systems. */
280 #ifdef NVNODE_IMPLICIT
281 /*
282 * If maximum number of vnodes in namei vnode cache is not explicitly
283 * defined in kernel config, adjust the number such as we use roughly
284 * 1.0% of memory for vnode cache (but not less than NVNODE vnodes).
285 */
286 usevnodes = (ptoa((unsigned)physmem) / 100) / sizeof(struct vnode);
287 if (usevnodes > desiredvnodes)
288 desiredvnodes = usevnodes;
289 #endif
290 vfsinit();
291
292 /* Configure the system hardware. This will enable interrupts. */
293 configure();
294
295 ubc_init(); /* must be after autoconfig */
296
297 /* Lock the kernel on behalf of proc0. */
298 KERNEL_PROC_LOCK(l);
299
300 #ifdef SYSVSHM
301 /* Initialize System V style shared memory. */
302 shminit();
303 #endif
304
305 #ifdef SYSVSEM
306 /* Initialize System V style semaphores. */
307 seminit();
308 #endif
309
310 #ifdef SYSVMSG
311 /* Initialize System V style message queues. */
312 msginit();
313 #endif
314
315 #ifdef P1003_1B_SEMAPHORE
316 /* Initialize posix semaphores */
317 ksem_init();
318 #endif
319
320 #ifdef VERIFIED_EXEC
321 /*
322 * Initialise the fingerprint operations vectors before
323 * fingerprints can be loaded.
324 */
325 veriexec_init_fp_ops();
326 #endif
327
328 /* Attach pseudo-devices. */
329 for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
330 (*pdev->pdev_attach)(pdev->pdev_count);
331
332 #ifdef FAST_IPSEC
333 /* Attach network crypto subsystem */
334 ipsec_attach();
335 #endif
336
337 /*
338 * Initialize protocols. Block reception of incoming packets
339 * until everything is ready.
340 */
341 s = splnet();
342 ifinit();
343 domaininit();
344 if_attachdomain();
345 splx(s);
346
347 #ifdef GPROF
348 /* Initialize kernel profiling. */
349 kmstartup();
350 #endif
351
352 /* Initialize system accouting. */
353 acct_init();
354
355 /* Kick off timeout driven events by calling first time. */
356 schedcpu(NULL);
357
358 /*
359 * Create process 1 (init(8)). We do this now, as Unix has
360 * historically had init be process 1, and changing this would
361 * probably upset a lot of people.
362 *
363 * Note that process 1 won't immediately exec init(8), but will
364 * wait for us to inform it that the root file system has been
365 * mounted.
366 */
367 if (fork1(l, 0, SIGCHLD, NULL, 0, start_init, NULL, NULL, &initproc))
368 panic("fork init");
369
370 /*
371 * Create any kernel threads who's creation was deferred because
372 * initproc had not yet been created.
373 */
374 kthread_run_deferred_queue();
375
376 /*
377 * Now that device driver threads have been created, wait for
378 * them to finish any deferred autoconfiguration. Note we don't
379 * need to lock this semaphore, since we haven't booted any
380 * secondary processors, yet.
381 */
382 while (config_pending)
383 (void) tsleep(&config_pending, PWAIT, "cfpend", 0);
384
385 /*
386 * Finalize configuration now that all real devices have been
387 * found. This needs to be done before the root device is
388 * selected, since finalization may create the root device.
389 */
390 config_finalize();
391
392 /*
393 * Now that autoconfiguration has completed, we can determine
394 * the root and dump devices.
395 */
396 cpu_rootconf();
397 cpu_dumpconf();
398
399 /* Mount the root file system. */
400 do {
401 domountroothook();
402 if ((error = vfs_mountroot())) {
403 printf("cannot mount root, error = %d\n", error);
404 boothowto |= RB_ASKNAME;
405 setroot(root_device,
406 (rootdev != NODEV) ? DISKPART(rootdev) : 0);
407 }
408 } while (error != 0);
409 mountroothook_destroy();
410
411 /*
412 * Initialise the time-of-day clock, passing the time recorded
413 * in the root filesystem (if any) for use by systems that
414 * don't have a non-volatile time-of-day device.
415 */
416 inittodr(rootfstime);
417
418 CIRCLEQ_FIRST(&mountlist)->mnt_flag |= MNT_ROOTFS;
419 #ifdef ROOTFS_MAGICLINKS
420 CIRCLEQ_FIRST(&mountlist)->mnt_flag |= MNT_MAGICLINKS;
421 #endif
422 CIRCLEQ_FIRST(&mountlist)->mnt_op->vfs_refcount++;
423
424 /*
425 * Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to
426 * reference it.
427 */
428 error = VFS_ROOT(CIRCLEQ_FIRST(&mountlist), &rootvnode);
429 if (error)
430 panic("cannot find root vnode, error=%d", error);
431 cwdi0.cwdi_cdir = rootvnode;
432 VREF(cwdi0.cwdi_cdir);
433 VOP_UNLOCK(rootvnode, 0);
434 cwdi0.cwdi_rdir = NULL;
435
436 /*
437 * Now that root is mounted, we can fixup initproc's CWD
438 * info. All other processes are kthreads, which merely
439 * share proc0's CWD info.
440 */
441 initproc->p_cwdi->cwdi_cdir = rootvnode;
442 VREF(initproc->p_cwdi->cwdi_cdir);
443 initproc->p_cwdi->cwdi_rdir = NULL;
444
445 /*
446 * Now can look at time, having had a chance to verify the time
447 * from the file system. Reset p->p_rtime as it may have been
448 * munched in mi_switch() after the time got set.
449 */
450 proclist_lock_read();
451 s = splsched();
452 LIST_FOREACH(p, &allproc, p_list) {
453 KASSERT((p->p_flag & P_MARKER) == 0);
454 p->p_stats->p_start = mono_time = boottime = time;
455 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
456 if (l->l_cpu != NULL)
457 l->l_cpu->ci_schedstate.spc_runtime = time;
458 }
459 p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
460 }
461 splx(s);
462 proclist_unlock_read();
463
464 /* Create the pageout daemon kernel thread. */
465 uvm_swap_init();
466 if (kthread_create1(uvm_pageout, NULL, NULL, "pagedaemon"))
467 panic("fork pagedaemon");
468
469 /* Create the filesystem syncer kernel thread. */
470 if (kthread_create1(sched_sync, NULL, NULL, "ioflush"))
471 panic("fork syncer");
472
473 /* Create the aiodone daemon kernel thread. */
474 if (kthread_create1(uvm_aiodone_daemon, NULL, &uvm.aiodoned_proc,
475 "aiodoned"))
476 panic("fork aiodoned");
477
478 #if defined(MULTIPROCESSOR)
479 /* Boot the secondary processors. */
480 cpu_boot_secondary_processors();
481 #endif
482
483 /* Initialize exec structures */
484 exec_init(1);
485
486 /*
487 * Okay, now we can let init(8) exec! It's off to userland!
488 */
489 start_init_exec = 1;
490 wakeup(&start_init_exec);
491
492 /* The scheduler is an infinite loop. */
493 uvm_scheduler();
494 /* NOTREACHED */
495 }
496
497 void
498 setrootfstime(time_t t)
499 {
500
501 rootfstime = t;
502 }
503
504 static void
505 check_console(struct lwp *l)
506 {
507 struct nameidata nd;
508 int error;
509
510 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", l);
511 error = namei(&nd);
512 if (error == 0)
513 vrele(nd.ni_vp);
514 else if (error == ENOENT)
515 printf("warning: no /dev/console\n");
516 else
517 printf("warning: lookup /dev/console: error %d\n", error);
518 }
519
520 /*
521 * List of paths to try when searching for "init".
522 */
523 static const char *initpaths[] = {
524 "/sbin/init",
525 "/sbin/oinit",
526 "/sbin/init.bak",
527 NULL,
528 };
529
530 /*
531 * Start the initial user process; try exec'ing each pathname in "initpaths".
532 * The program is invoked with one argument containing the boot flags.
533 */
534 static void
535 start_init(void *arg)
536 {
537 struct lwp *l = arg;
538 struct proc *p = l->l_proc;
539 vaddr_t addr;
540 struct sys_execve_args /* {
541 syscallarg(const char *) path;
542 syscallarg(char * const *) argp;
543 syscallarg(char * const *) envp;
544 } */ args;
545 int options, i, error;
546 register_t retval[2];
547 char flags[4], *flagsp;
548 const char *path, *slash;
549 char *ucp, **uap, *arg0, *arg1 = NULL;
550 char ipath[129];
551 int ipx, len;
552
553 /*
554 * Now in process 1.
555 */
556 strncpy(p->p_comm, "init", MAXCOMLEN);
557
558 /*
559 * Wait for main() to tell us that it's safe to exec.
560 */
561 while (start_init_exec == 0)
562 (void) tsleep(&start_init_exec, PWAIT, "initexec", 0);
563
564 /*
565 * This is not the right way to do this. We really should
566 * hand-craft a descriptor onto /dev/console to hand to init,
567 * but that's a _lot_ more work, and the benefit from this easy
568 * hack makes up for the "good is the enemy of the best" effect.
569 */
570 check_console(l);
571
572 /*
573 * Need just enough stack to hold the faked-up "execve()" arguments.
574 */
575 addr = (vaddr_t)STACK_ALLOC(USRSTACK, PAGE_SIZE);
576 if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
577 NULL, UVM_UNKNOWN_OFFSET, 0,
578 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
579 UVM_ADV_NORMAL,
580 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW)) != 0)
581 panic("init: couldn't allocate argument space");
582 p->p_vmspace->vm_maxsaddr = (caddr_t)STACK_MAX(addr, PAGE_SIZE);
583
584 ipx = 0;
585 while (1) {
586 if (boothowto & RB_ASKNAME) {
587 printf("init path");
588 if (initpaths[ipx])
589 printf(" (default %s)", initpaths[ipx]);
590 printf(": ");
591 len = cngetsn(ipath, sizeof(ipath)-1);
592 if (len == 0) {
593 if (initpaths[ipx])
594 path = initpaths[ipx++];
595 else
596 continue;
597 } else {
598 ipath[len] = '\0';
599 path = ipath;
600 }
601 } else {
602 if ((path = initpaths[ipx++]) == NULL)
603 break;
604 }
605
606 ucp = (char *)USRSTACK;
607
608 /*
609 * Construct the boot flag argument.
610 */
611 flagsp = flags;
612 *flagsp++ = '-';
613 options = 0;
614
615 if (boothowto & RB_SINGLE) {
616 *flagsp++ = 's';
617 options = 1;
618 }
619 #ifdef notyet
620 if (boothowto & RB_FASTBOOT) {
621 *flagsp++ = 'f';
622 options = 1;
623 }
624 #endif
625
626 /*
627 * Move out the flags (arg 1), if necessary.
628 */
629 if (options != 0) {
630 *flagsp++ = '\0';
631 i = flagsp - flags;
632 #ifdef DEBUG
633 printf("init: copying out flags `%s' %d\n", flags, i);
634 #endif
635 arg1 = STACK_ALLOC(ucp, i);
636 ucp = STACK_MAX(arg1, i);
637 (void)copyout((caddr_t)flags, arg1, i);
638 }
639
640 /*
641 * Move out the file name (also arg 0).
642 */
643 i = strlen(path) + 1;
644 #ifdef DEBUG
645 printf("init: copying out path `%s' %d\n", path, i);
646 #else
647 if (boothowto & RB_ASKNAME || path != initpaths[0])
648 printf("init: trying %s\n", path);
649 #endif
650 arg0 = STACK_ALLOC(ucp, i);
651 ucp = STACK_MAX(arg0, i);
652 (void)copyout(path, arg0, i);
653
654 /*
655 * Move out the arg pointers.
656 */
657 ucp = (caddr_t)STACK_ALIGN(ucp, ALIGNBYTES);
658 uap = (char **)STACK_ALLOC(ucp, sizeof(char *) * 3);
659 SCARG(&args, path) = arg0;
660 SCARG(&args, argp) = uap;
661 SCARG(&args, envp) = NULL;
662 slash = strrchr(path, '/');
663 if (slash)
664 (void)suword((caddr_t)uap++,
665 (long)arg0 + (slash + 1 - path));
666 else
667 (void)suword((caddr_t)uap++, (long)arg0);
668 if (options != 0)
669 (void)suword((caddr_t)uap++, (long)arg1);
670 (void)suword((caddr_t)uap++, 0); /* terminator */
671
672 /*
673 * Now try to exec the program. If can't for any reason
674 * other than it doesn't exist, complain.
675 */
676 error = sys_execve(l, &args, retval);
677 if (error == 0 || error == EJUSTRETURN) {
678 KERNEL_PROC_UNLOCK(l);
679 return;
680 }
681 printf("exec %s: error %d\n", path, error);
682 }
683 printf("init: not found\n");
684 panic("no init");
685 }
686