init_main.c revision 1.200 1 /* $NetBSD: init_main.c,v 1.200 2002/05/27 13:46:45 itojun Exp $ */
2
3 /*
4 * Copyright (c) 1995 Christopher G. Demetriou. All rights reserved.
5 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgement:
23 * This product includes software developed by the University of
24 * California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
42 */
43
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.200 2002/05/27 13:46:45 itojun Exp $");
46
47 #include "fs_nfs.h"
48 #include "opt_nfsserver.h"
49 #include "opt_sysv.h"
50 #include "opt_maxuprc.h"
51 #include "opt_multiprocessor.h"
52 #include "opt_pipe.h"
53 #include "opt_syscall_debug.h"
54
55 #include "rnd.h"
56
57 #include <sys/param.h>
58 #include <sys/acct.h>
59 #include <sys/filedesc.h>
60 #include <sys/file.h>
61 #include <sys/errno.h>
62 #include <sys/callout.h>
63 #include <sys/kernel.h>
64 #include <sys/mount.h>
65 #include <sys/map.h>
66 #include <sys/proc.h>
67 #include <sys/kthread.h>
68 #include <sys/resourcevar.h>
69 #include <sys/signalvar.h>
70 #include <sys/systm.h>
71 #include <sys/vnode.h>
72 #include <sys/tty.h>
73 #include <sys/conf.h>
74 #include <sys/disklabel.h>
75 #include <sys/buf.h>
76 #include <sys/device.h>
77 #include <sys/disk.h>
78 #include <sys/exec.h>
79 #include <sys/socketvar.h>
80 #include <sys/protosw.h>
81 #include <sys/reboot.h>
82 #include <sys/user.h>
83 #include <sys/sysctl.h>
84 #ifdef SYSVSHM
85 #include <sys/shm.h>
86 #endif
87 #ifdef SYSVSEM
88 #include <sys/sem.h>
89 #endif
90 #ifdef SYSVMSG
91 #include <sys/msg.h>
92 #endif
93 #include <sys/domain.h>
94 #include <sys/mbuf.h>
95 #include <sys/namei.h>
96 #if NRND > 0
97 #include <sys/rnd.h>
98 #endif
99 #ifndef PIPE_SOCKETPAIR
100 #include <sys/pipe.h>
101 #endif
102
103 #include <sys/syscall.h>
104 #include <sys/syscallargs.h>
105
106 #include <ufs/ufs/quota.h>
107
108 #include <miscfs/genfs/genfs.h>
109 #include <miscfs/syncfs/syncfs.h>
110
111 #include <machine/cpu.h>
112
113 #include <uvm/uvm.h>
114
115 #include <net/if.h>
116 #include <net/raw_cb.h>
117
118 const char copyright[] =
119 "Copyright (c) 1996, 1997, 1998, 1999, 2000, 2001, 2002\n"
120 " The NetBSD Foundation, Inc. All rights reserved.\n"
121 "Copyright (c) 1982, 1986, 1989, 1991, 1993\n"
122 " The Regents of the University of California. All rights reserved.\n"
123 "\n";
124
125 /* Components of the first process -- never freed. */
126 struct session session0;
127 struct pgrp pgrp0;
128 struct proc proc0;
129 struct pcred cred0;
130 struct filedesc0 filedesc0;
131 struct cwdinfo cwdi0;
132 struct plimit limit0;
133 struct vmspace vmspace0;
134 struct sigacts sigacts0;
135 #ifndef curproc
136 struct proc *curproc = &proc0;
137 #endif
138 struct proc *initproc;
139
140 int cmask = CMASK;
141 extern struct user *proc0paddr;
142
143 struct vnode *rootvp, *swapdev_vp;
144 int boothowto;
145 int cold = 1; /* still working on startup */
146 struct timeval boottime;
147
148 __volatile int start_init_exec; /* semaphore for start_init() */
149
150 static void check_console(struct proc *p);
151 static void start_init(void *);
152 void main(void);
153
154 extern const struct emul emul_netbsd; /* defined in kern_exec.c */
155
156 /*
157 * System startup; initialize the world, create process 0, mount root
158 * filesystem, and fork to create init and pagedaemon. Most of the
159 * hard work is done in the lower-level initialization routines including
160 * startup(), which does memory initialization and autoconfiguration.
161 */
162 void
163 main(void)
164 {
165 struct proc *p;
166 struct pdevinit *pdev;
167 int i, s, error;
168 rlim_t lim;
169 extern struct pdevinit pdevinit[];
170 extern void schedcpu(void *);
171 #if defined(NFSSERVER) || defined(NFS)
172 extern void nfs_init(void);
173 #endif
174 #ifdef NVNODE_IMPLICIT
175 int usevnodes;
176 #endif
177
178 /*
179 * Initialize the current process pointer (curproc) before
180 * any possible traps/probes to simplify trap processing.
181 */
182 p = &proc0;
183 curproc = p;
184 p->p_cpu = curcpu();
185 /*
186 * Attempt to find console and initialize
187 * in case of early panic or other messages.
188 */
189 consinit();
190 printf("%s", copyright);
191
192 KERNEL_LOCK_INIT();
193
194 uvm_init();
195
196 /* Do machine-dependent initialization. */
197 cpu_startup();
198
199 /* Initialize callouts. */
200 callout_startup();
201
202 /*
203 * Initialize mbuf's. Do this now because we might attempt to
204 * allocate mbufs or mbuf clusters during autoconfiguration.
205 */
206 mbinit();
207
208 /* Initialize sockets. */
209 soinit();
210
211 /*
212 * The following 3 things must be done before autoconfiguration.
213 */
214 disk_init(); /* initialize disk list */
215 tty_init(); /* initialize tty list */
216 #if NRND > 0
217 rnd_init(); /* initialize RNG */
218 #endif
219
220 /* Initialize the sysctl subsystem. */
221 sysctl_init();
222
223 /*
224 * Initialize process and pgrp structures.
225 */
226 procinit();
227
228 /*
229 * Create process 0 (the swapper).
230 */
231 s = proclist_lock_write();
232 LIST_INSERT_HEAD(&allproc, p, p_list);
233 LIST_INSERT_HEAD(PIDHASH(p->p_pid), p, p_hash);
234 proclist_unlock_write(s);
235
236 p->p_pgrp = &pgrp0;
237 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
238 LIST_INIT(&pgrp0.pg_members);
239 LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
240
241 pgrp0.pg_session = &session0;
242 session0.s_count = 1;
243 session0.s_sid = p->p_pid;
244 session0.s_leader = p;
245
246 /*
247 * Set P_NOCLDWAIT so that kernel threads are reparented to
248 * init(8) when they exit. init(8) can easily wait them out
249 * for us.
250 */
251 p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
252 p->p_stat = SONPROC;
253 p->p_nice = NZERO;
254 p->p_emul = &emul_netbsd;
255 #ifdef __HAVE_SYSCALL_INTERN
256 (*p->p_emul->e_syscall_intern)(p);
257 #endif
258 strncpy(p->p_comm, "swapper", MAXCOMLEN);
259
260 callout_init(&p->p_realit_ch);
261 callout_init(&p->p_tsleep_ch);
262
263 /* Create credentials. */
264 cred0.p_refcnt = 1;
265 p->p_cred = &cred0;
266 p->p_ucred = crget();
267 p->p_ucred->cr_ngroups = 1; /* group 0 */
268
269 /* Create the file descriptor table. */
270 finit();
271 p->p_fd = &filedesc0.fd_fd;
272 fdinit1(&filedesc0);
273
274 /* Create the CWD info. */
275 p->p_cwdi = &cwdi0;
276 cwdi0.cwdi_cmask = cmask;
277 cwdi0.cwdi_refcnt = 1;
278
279 /* Create the limits structures. */
280 p->p_limit = &limit0;
281 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
282 limit0.pl_rlimit[i].rlim_cur =
283 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
284
285 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
286 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
287 maxfiles < NOFILE ? maxfiles : NOFILE;
288
289 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
290 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
291 maxproc < MAXUPRC ? maxproc : MAXUPRC;
292
293 lim = ptoa(uvmexp.free);
294 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = lim;
295 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim;
296 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
297 limit0.pl_corename = defcorename;
298 limit0.p_refcnt = 1;
299
300 /*
301 * Initialize proc0's vmspace, which uses the kernel pmap.
302 * All kernel processes (which never have user space mappings)
303 * share proc0's vmspace, and thus, the kernel pmap.
304 */
305 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
306 trunc_page(VM_MAX_ADDRESS));
307 p->p_vmspace = &vmspace0;
308
309 p->p_addr = proc0paddr; /* XXX */
310
311 /*
312 * We continue to place resource usage info in the
313 * user struct so they're pageable.
314 */
315 p->p_stats = &p->p_addr->u_stats;
316
317 /*
318 * Charge root for one process.
319 */
320 (void)chgproccnt(0, 1);
321
322 rqinit();
323
324 /* Configure virtual memory system, set vm rlimits. */
325 uvm_init_limits(p);
326
327 /* Initialize the file systems. */
328 #if defined(NFSSERVER) || defined(NFS)
329 nfs_init(); /* initialize server/shared data */
330 #endif
331 #ifdef NVNODE_IMPLICIT
332 /*
333 * If maximum number of vnodes in namei vnode cache is not explicitly
334 * defined in kernel config, adjust the number such as we use roughly
335 * 0.5% of memory for vnode cache (but not less than NVNODE vnodes).
336 */
337 usevnodes = (ptoa((unsigned)physmem) / 200) / sizeof(struct vnode);
338 if (usevnodes > desiredvnodes)
339 desiredvnodes = usevnodes;
340 #endif
341 vfsinit();
342
343 /* Configure the system hardware. This will enable interrupts. */
344 configure();
345
346 ubc_init(); /* must be after autoconfig */
347
348 /* Lock the kernel on behalf of proc0. */
349 KERNEL_PROC_LOCK(p);
350
351 #ifdef SYSVSHM
352 /* Initialize System V style shared memory. */
353 shminit();
354 #endif
355
356 #ifdef SYSVSEM
357 /* Initialize System V style semaphores. */
358 seminit();
359 #endif
360
361 #ifdef SYSVMSG
362 /* Initialize System V style message queues. */
363 msginit();
364 #endif
365
366 /* Attach pseudo-devices. */
367 for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
368 (*pdev->pdev_attach)(pdev->pdev_count);
369
370 /*
371 * Initialize protocols. Block reception of incoming packets
372 * until everything is ready.
373 */
374 s = splnet();
375 ifinit();
376 domaininit();
377 if_attachdomain();
378 splx(s);
379
380 #ifdef GPROF
381 /* Initialize kernel profiling. */
382 kmstartup();
383 #endif
384
385 /* Initialize system accouting. */
386 acct_init();
387
388 /*
389 * Initialize signal-related data structures, and signal state
390 * for proc0.
391 */
392 signal_init();
393 p->p_sigacts = &sigacts0;
394 siginit(p);
395
396 /* Kick off timeout driven events by calling first time. */
397 schedcpu(NULL);
398
399 /*
400 * Create process 1 (init(8)). We do this now, as Unix has
401 * historically had init be process 1, and changing this would
402 * probably upset a lot of people.
403 *
404 * Note that process 1 won't immediately exec init(8), but will
405 * wait for us to inform it that the root file system has been
406 * mounted.
407 */
408 if (fork1(p, 0, SIGCHLD, NULL, 0, start_init, NULL, NULL, &initproc))
409 panic("fork init");
410
411 /*
412 * Create any kernel threads who's creation was deferred because
413 * initproc had not yet been created.
414 */
415 kthread_run_deferred_queue();
416
417 /*
418 * Now that device driver threads have been created, wait for
419 * them to finish any deferred autoconfiguration. Note we don't
420 * need to lock this semaphore, since we haven't booted any
421 * secondary processors, yet.
422 */
423 while (config_pending)
424 (void) tsleep((void *)&config_pending, PWAIT, "cfpend", 0);
425
426 /*
427 * Now that autoconfiguration has completed, we can determine
428 * the root and dump devices.
429 */
430 cpu_rootconf();
431 cpu_dumpconf();
432
433 /* Mount the root file system. */
434 do {
435 domountroothook();
436 if ((error = vfs_mountroot())) {
437 printf("cannot mount root, error = %d\n", error);
438 boothowto |= RB_ASKNAME;
439 setroot(root_device,
440 (rootdev != NODEV) ? DISKPART(rootdev) : 0);
441 }
442 } while (error != 0);
443 mountroothook_destroy();
444
445 mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
446 mountlist.cqh_first->mnt_op->vfs_refcount++;
447
448 /*
449 * Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to
450 * reference it.
451 */
452 if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
453 panic("cannot find root vnode");
454 cwdi0.cwdi_cdir = rootvnode;
455 VREF(cwdi0.cwdi_cdir);
456 VOP_UNLOCK(rootvnode, 0);
457 cwdi0.cwdi_rdir = NULL;
458
459 /*
460 * Now that root is mounted, we can fixup initproc's CWD
461 * info. All other processes are kthreads, which merely
462 * share proc0's CWD info.
463 */
464 initproc->p_cwdi->cwdi_cdir = rootvnode;
465 VREF(initproc->p_cwdi->cwdi_cdir);
466 initproc->p_cwdi->cwdi_rdir = NULL;
467
468 /*
469 * Now can look at time, having had a chance to verify the time
470 * from the file system. Reset p->p_rtime as it may have been
471 * munched in mi_switch() after the time got set.
472 */
473 proclist_lock_read();
474 s = splsched();
475 for (p = LIST_FIRST(&allproc); p != NULL;
476 p = LIST_NEXT(p, p_list)) {
477 p->p_stats->p_start = mono_time = boottime = time;
478 if (p->p_cpu != NULL)
479 p->p_cpu->ci_schedstate.spc_runtime = time;
480 p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
481 }
482 splx(s);
483 proclist_unlock_read();
484
485 /* Create the pageout daemon kernel thread. */
486 uvm_swap_init();
487 if (kthread_create1(uvm_pageout, NULL, NULL, "pagedaemon"))
488 panic("fork pagedaemon");
489
490 /* Create the process reaper kernel thread. */
491 if (kthread_create1(reaper, NULL, NULL, "reaper"))
492 panic("fork reaper");
493
494 /* Create the filesystem syncer kernel thread. */
495 if (kthread_create1(sched_sync, NULL, NULL, "ioflush"))
496 panic("fork syncer");
497
498 /* Create the aiodone daemon kernel thread. */
499 if (kthread_create1(uvm_aiodone_daemon, NULL, NULL, "aiodoned"))
500 panic("fork aiodoned");
501
502 #if defined(MULTIPROCESSOR)
503 /* Boot the secondary processors. */
504 cpu_boot_secondary_processors();
505 #endif
506
507 /* Initialize exec structures */
508 exec_init(1);
509
510 #ifndef PIPE_SOCKETPAIR
511 /* Initialize pipe structures */
512 pipe_init();
513 #endif
514
515 /*
516 * Okay, now we can let init(8) exec! It's off to userland!
517 */
518 start_init_exec = 1;
519 wakeup((void *)&start_init_exec);
520
521 /* The scheduler is an infinite loop. */
522 uvm_scheduler();
523 /* NOTREACHED */
524 }
525
526 static void
527 check_console(struct proc *p)
528 {
529 struct nameidata nd;
530 int error;
531
532 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
533 error = namei(&nd);
534 if (error == 0)
535 vrele(nd.ni_vp);
536 else if (error == ENOENT)
537 printf("warning: no /dev/console\n");
538 else
539 printf("warning: lookup /dev/console: error %d\n", error);
540 }
541
542 /*
543 * List of paths to try when searching for "init".
544 */
545 static const char *initpaths[] = {
546 "/sbin/init",
547 "/sbin/oinit",
548 "/sbin/init.bak",
549 NULL,
550 };
551
552 /*
553 * Start the initial user process; try exec'ing each pathname in "initpaths".
554 * The program is invoked with one argument containing the boot flags.
555 */
556 static void
557 start_init(void *arg)
558 {
559 struct proc *p = arg;
560 vaddr_t addr;
561 struct sys_execve_args /* {
562 syscallarg(const char *) path;
563 syscallarg(char * const *) argp;
564 syscallarg(char * const *) envp;
565 } */ args;
566 int options, i, error;
567 register_t retval[2];
568 char flags[4], *flagsp;
569 const char **pathp, *path, *slash;
570 char *ucp, **uap, *arg0, *arg1 = NULL;
571
572 /*
573 * Now in process 1.
574 */
575 strncpy(p->p_comm, "init", MAXCOMLEN);
576
577 /*
578 * Wait for main() to tell us that it's safe to exec.
579 */
580 while (start_init_exec == 0)
581 (void) tsleep((void *)&start_init_exec, PWAIT, "initexec", 0);
582
583 /*
584 * This is not the right way to do this. We really should
585 * hand-craft a descriptor onto /dev/console to hand to init,
586 * but that's a _lot_ more work, and the benefit from this easy
587 * hack makes up for the "good is the enemy of the best" effect.
588 */
589 check_console(p);
590
591 /*
592 * Need just enough stack to hold the faked-up "execve()" arguments.
593 */
594 addr = USRSTACK - PAGE_SIZE;
595 if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
596 NULL, UVM_UNKNOWN_OFFSET, 0,
597 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
598 UVM_ADV_NORMAL,
599 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW)) != 0)
600 panic("init: couldn't allocate argument space");
601 p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
602
603 for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
604 ucp = (char *)(addr + PAGE_SIZE);
605
606 /*
607 * Construct the boot flag argument.
608 */
609 flagsp = flags;
610 *flagsp++ = '-';
611 options = 0;
612
613 if (boothowto & RB_SINGLE) {
614 *flagsp++ = 's';
615 options = 1;
616 }
617 #ifdef notyet
618 if (boothowto & RB_FASTBOOT) {
619 *flagsp++ = 'f';
620 options = 1;
621 }
622 #endif
623
624 /*
625 * Move out the flags (arg 1), if necessary.
626 */
627 if (options != 0) {
628 *flagsp++ = '\0';
629 i = flagsp - flags;
630 #ifdef DEBUG
631 printf("init: copying out flags `%s' %d\n", flags, i);
632 #endif
633 (void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
634 arg1 = ucp;
635 }
636
637 /*
638 * Move out the file name (also arg 0).
639 */
640 i = strlen(path) + 1;
641 #ifdef DEBUG
642 printf("init: copying out path `%s' %d\n", path, i);
643 #endif
644 (void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
645 arg0 = ucp;
646
647 /*
648 * Move out the arg pointers.
649 */
650 uap = (char **)((long)ucp & ~ALIGNBYTES);
651 (void)suword((caddr_t)--uap, 0); /* terminator */
652 if (options != 0)
653 (void)suword((caddr_t)--uap, (long)arg1);
654 slash = strrchr(path, '/');
655 if (slash)
656 (void)suword((caddr_t)--uap,
657 (long)arg0 + (slash + 1 - path));
658 else
659 (void)suword((caddr_t)--uap, (long)arg0);
660
661 /*
662 * Point at the arguments.
663 */
664 SCARG(&args, path) = arg0;
665 SCARG(&args, argp) = uap;
666 SCARG(&args, envp) = NULL;
667
668 /*
669 * Now try to exec the program. If can't for any reason
670 * other than it doesn't exist, complain.
671 */
672 error = sys_execve(p, &args, retval);
673 if (error == 0 || error == EJUSTRETURN) {
674 KERNEL_PROC_UNLOCK(p);
675 return;
676 }
677 if (error != ENOENT)
678 printf("exec %s: error %d\n", path, error);
679 }
680 printf("init: not found\n");
681 panic("no init");
682 }
683