init_main.c revision 1.191 1 /* $NetBSD: init_main.c,v 1.191 2001/06/08 12:53:30 mrg Exp $ */
2
3 /*
4 * Copyright (c) 1995 Christopher G. Demetriou. All rights reserved.
5 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgement:
23 * This product includes software developed by the University of
24 * California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
42 */
43
44 #include "fs_nfs.h"
45 #include "opt_nfsserver.h"
46 #include "opt_sysv.h"
47 #include "opt_maxuprc.h"
48 #include "opt_multiprocessor.h"
49 #include "opt_syscall_debug.h"
50
51 #include "rnd.h"
52
53 #include <sys/param.h>
54 #include <sys/acct.h>
55 #include <sys/filedesc.h>
56 #include <sys/file.h>
57 #include <sys/errno.h>
58 #include <sys/callout.h>
59 #include <sys/kernel.h>
60 #include <sys/mount.h>
61 #include <sys/map.h>
62 #include <sys/proc.h>
63 #include <sys/kthread.h>
64 #include <sys/resourcevar.h>
65 #include <sys/signalvar.h>
66 #include <sys/systm.h>
67 #include <sys/vnode.h>
68 #include <sys/tty.h>
69 #include <sys/conf.h>
70 #include <sys/disklabel.h>
71 #include <sys/buf.h>
72 #include <sys/device.h>
73 #include <sys/exec.h>
74 #include <sys/socketvar.h>
75 #include <sys/protosw.h>
76 #include <sys/reboot.h>
77 #include <sys/user.h>
78 #include <sys/sysctl.h>
79 #ifdef SYSVSHM
80 #include <sys/shm.h>
81 #endif
82 #ifdef SYSVSEM
83 #include <sys/sem.h>
84 #endif
85 #ifdef SYSVMSG
86 #include <sys/msg.h>
87 #endif
88 #include <sys/domain.h>
89 #include <sys/mbuf.h>
90 #include <sys/namei.h>
91 #if NRND > 0
92 #include <sys/rnd.h>
93 #endif
94
95 #include <sys/syscall.h>
96 #include <sys/syscallargs.h>
97
98 #include <ufs/ufs/quota.h>
99
100 #include <miscfs/genfs/genfs.h>
101 #include <miscfs/syncfs/syncfs.h>
102
103 #include <machine/cpu.h>
104
105 #include <uvm/uvm.h>
106
107 #include <net/if.h>
108 #include <net/raw_cb.h>
109
110 const char copyright[] = "\n"
111 "Copyright (c) 1996, 1997, 1998, 1999, 2000, 2001\n"
112 " The NetBSD Foundation, Inc. All rights reserved.\n"
113 "Copyright (c) 1982, 1986, 1989, 1991, 1993\n"
114 " The Regents of the University of California. All rights reserved.\n"
115 "\n";
116
117 /* Components of the first process -- never freed. */
118 struct session session0;
119 struct pgrp pgrp0;
120 struct proc proc0;
121 struct pcred cred0;
122 struct filedesc0 filedesc0;
123 struct cwdinfo cwdi0;
124 struct plimit limit0;
125 struct vmspace vmspace0;
126 struct sigacts sigacts0;
127 #ifndef curproc
128 struct proc *curproc = &proc0;
129 #endif
130 struct proc *initproc;
131
132 int cmask = CMASK;
133 extern struct user *proc0paddr;
134
135 struct vnode *rootvp, *swapdev_vp;
136 int boothowto;
137 int cold = 1; /* still working on startup */
138 struct timeval boottime;
139
140 __volatile int start_init_exec; /* semaphore for start_init() */
141
142 static void check_console(struct proc *p);
143 static void start_init(void *);
144 void main(void);
145
146 extern const struct emul emul_netbsd; /* defined in kern_exec.c */
147
148 /*
149 * System startup; initialize the world, create process 0, mount root
150 * filesystem, and fork to create init and pagedaemon. Most of the
151 * hard work is done in the lower-level initialization routines including
152 * startup(), which does memory initialization and autoconfiguration.
153 */
154 void
155 main(void)
156 {
157 struct proc *p;
158 struct pdevinit *pdev;
159 int i, s, error;
160 rlim_t lim;
161 extern struct pdevinit pdevinit[];
162 extern void schedcpu(void *);
163 extern void disk_init(void);
164 #if defined(NFSSERVER) || defined(NFS)
165 extern void nfs_init(void);
166 #endif
167 #ifdef NVNODE_IMPLICIT
168 int usevnodes;
169 #endif
170
171 /*
172 * Initialize the current process pointer (curproc) before
173 * any possible traps/probes to simplify trap processing.
174 */
175 p = &proc0;
176 curproc = p;
177 p->p_cpu = curcpu();
178 /*
179 * Attempt to find console and initialize
180 * in case of early panic or other messages.
181 */
182 consinit();
183 printf("%s", copyright);
184
185 KERNEL_LOCK_INIT();
186
187 uvm_init();
188
189 /* Do machine-dependent initialization. */
190 cpu_startup();
191
192 /* Initialize callouts. */
193 callout_startup();
194
195 /*
196 * Initialize mbuf's. Do this now because we might attempt to
197 * allocate mbufs or mbuf clusters during autoconfiguration.
198 */
199 mbinit();
200
201 /* Initialize sockets. */
202 soinit();
203
204 /*
205 * The following 3 things must be done before autoconfiguration.
206 */
207 disk_init(); /* initialize disk list */
208 tty_init(); /* initialize tty list */
209 #if NRND > 0
210 rnd_init(); /* initialize RNG */
211 #endif
212
213 /* Initialize the sysctl subsystem. */
214 sysctl_init();
215
216 /*
217 * Initialize process and pgrp structures.
218 */
219 procinit();
220
221 /*
222 * Create process 0 (the swapper).
223 */
224 s = proclist_lock_write();
225 LIST_INSERT_HEAD(&allproc, p, p_list);
226 LIST_INSERT_HEAD(PIDHASH(p->p_pid), p, p_hash);
227 proclist_unlock_write(s);
228
229 p->p_pgrp = &pgrp0;
230 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
231 LIST_INIT(&pgrp0.pg_members);
232 LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
233
234 pgrp0.pg_session = &session0;
235 session0.s_count = 1;
236 session0.s_sid = p->p_pid;
237 session0.s_leader = p;
238
239 /*
240 * Set P_NOCLDWAIT so that kernel threads are reparented to
241 * init(8) when they exit. init(8) can easily wait them out
242 * for us.
243 */
244 p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
245 p->p_stat = SONPROC;
246 p->p_nice = NZERO;
247 p->p_emul = &emul_netbsd;
248 #ifdef __HAVE_SYSCALL_INTERN
249 (*p->p_emul->e_syscall_intern)(p);
250 #endif
251 strncpy(p->p_comm, "swapper", MAXCOMLEN);
252
253 callout_init(&p->p_realit_ch);
254 callout_init(&p->p_tsleep_ch);
255
256 /* Create credentials. */
257 cred0.p_refcnt = 1;
258 p->p_cred = &cred0;
259 p->p_ucred = crget();
260 p->p_ucred->cr_ngroups = 1; /* group 0 */
261
262 /* Create the file descriptor table. */
263 finit();
264 p->p_fd = &filedesc0.fd_fd;
265 fdinit1(&filedesc0);
266
267 /* Create the CWD info. */
268 p->p_cwdi = &cwdi0;
269 cwdi0.cwdi_cmask = cmask;
270 cwdi0.cwdi_refcnt = 1;
271
272 /* Create the limits structures. */
273 p->p_limit = &limit0;
274 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
275 limit0.pl_rlimit[i].rlim_cur =
276 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
277
278 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
279 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
280 maxfiles < NOFILE ? maxfiles : NOFILE;
281
282 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
283 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
284 maxproc < MAXUPRC ? maxproc : MAXUPRC;
285
286 lim = ptoa(uvmexp.free);
287 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = lim;
288 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim;
289 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
290 limit0.pl_corename = defcorename;
291 limit0.p_refcnt = 1;
292
293 /*
294 * Initialize proc0's vmspace, which uses the kernel pmap.
295 * All kernel processes (which never have user space mappings)
296 * share proc0's vmspace, and thus, the kernel pmap.
297 */
298 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
299 trunc_page(VM_MAX_ADDRESS), TRUE);
300 p->p_vmspace = &vmspace0;
301
302 p->p_addr = proc0paddr; /* XXX */
303
304 /*
305 * We continue to place resource usage info in the
306 * user struct so they're pageable.
307 */
308 p->p_stats = &p->p_addr->u_stats;
309
310 /*
311 * Charge root for one process.
312 */
313 (void)chgproccnt(0, 1);
314
315 rqinit();
316
317 /* Configure virtual memory system, set vm rlimits. */
318 uvm_init_limits(p);
319
320 /* Initialize the file systems. */
321 #if defined(NFSSERVER) || defined(NFS)
322 nfs_init(); /* initialize server/shared data */
323 #endif
324 vfsinit();
325
326 /* Configure the system hardware. This will enable interrupts. */
327 configure();
328
329 ubc_init(); /* must be after autoconfig */
330
331 /* Lock the kernel on behalf of proc0. */
332 KERNEL_PROC_LOCK(p);
333
334 #ifdef SYSVSHM
335 /* Initialize System V style shared memory. */
336 shminit();
337 #endif
338
339 #ifdef SYSVSEM
340 /* Initialize System V style semaphores. */
341 seminit();
342 #endif
343
344 #ifdef SYSVMSG
345 /* Initialize System V style message queues. */
346 msginit();
347 #endif
348
349 /* Attach pseudo-devices. */
350 for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
351 (*pdev->pdev_attach)(pdev->pdev_count);
352
353 /*
354 * Initialize protocols. Block reception of incoming packets
355 * until everything is ready.
356 */
357 s = splnet();
358 ifinit();
359 domaininit();
360 splx(s);
361
362 #ifdef GPROF
363 /* Initialize kernel profiling. */
364 kmstartup();
365 #endif
366
367 /* Initialize system accouting. */
368 acct_init();
369
370 /*
371 * Initialize signal-related data structures, and signal state
372 * for proc0.
373 */
374 signal_init();
375 p->p_sigacts = &sigacts0;
376 siginit(p);
377
378 /* Kick off timeout driven events by calling first time. */
379 schedcpu(NULL);
380
381 /*
382 * Create process 1 (init(8)). We do this now, as Unix has
383 * historically had init be process 1, and changing this would
384 * probably upset a lot of people.
385 *
386 * Note that process 1 won't immediately exec init(8), but will
387 * wait for us to inform it that the root file system has been
388 * mounted.
389 */
390 if (fork1(p, 0, SIGCHLD, NULL, 0, start_init, NULL, NULL, &initproc))
391 panic("fork init");
392
393 /*
394 * Create any kernel threads who's creation was deferred because
395 * initproc had not yet been created.
396 */
397 kthread_run_deferred_queue();
398
399 /*
400 * Now that device driver threads have been created, wait for
401 * them to finish any deferred autoconfiguration. Note we don't
402 * need to lock this semaphore, since we haven't booted any
403 * secondary processors, yet.
404 */
405 while (config_pending)
406 (void) tsleep((void *)&config_pending, PWAIT, "cfpend", 0);
407
408 /*
409 * Now that autoconfiguration has completed, we can determine
410 * the root and dump devices.
411 */
412 cpu_rootconf();
413 cpu_dumpconf();
414
415 /* Mount the root file system. */
416 do {
417 domountroothook();
418 if ((error = vfs_mountroot())) {
419 printf("cannot mount root, error = %d\n", error);
420 boothowto |= RB_ASKNAME;
421 setroot(root_device,
422 (rootdev != NODEV) ? DISKPART(rootdev) : 0);
423 }
424 } while (error != 0);
425 mountroothook_destroy();
426
427 mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
428 mountlist.cqh_first->mnt_op->vfs_refcount++;
429
430 /*
431 * Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to
432 * reference it.
433 */
434 if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
435 panic("cannot find root vnode");
436 cwdi0.cwdi_cdir = rootvnode;
437 VREF(cwdi0.cwdi_cdir);
438 VOP_UNLOCK(rootvnode, 0);
439 cwdi0.cwdi_rdir = NULL;
440
441 /*
442 * Now that root is mounted, we can fixup initproc's CWD
443 * info. All other processes are kthreads, which merely
444 * share proc0's CWD info.
445 */
446 initproc->p_cwdi->cwdi_cdir = rootvnode;
447 VREF(initproc->p_cwdi->cwdi_cdir);
448 initproc->p_cwdi->cwdi_rdir = NULL;
449
450 /*
451 * Now can look at time, having had a chance to verify the time
452 * from the file system. Reset p->p_rtime as it may have been
453 * munched in mi_switch() after the time got set.
454 */
455 proclist_lock_read();
456 s = splsched();
457 for (p = LIST_FIRST(&allproc); p != NULL;
458 p = LIST_NEXT(p, p_list)) {
459 p->p_stats->p_start = mono_time = boottime = time;
460 if (p->p_cpu != NULL)
461 p->p_cpu->ci_schedstate.spc_runtime = time;
462 p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
463 }
464 splx(s);
465 proclist_unlock_read();
466
467 /* Create the pageout daemon kernel thread. */
468 uvm_swap_init();
469 if (kthread_create1(uvm_pageout, NULL, NULL, "pagedaemon"))
470 panic("fork pagedaemon");
471
472 /* Create the process reaper kernel thread. */
473 if (kthread_create1(reaper, NULL, NULL, "reaper"))
474 panic("fork reaper");
475
476 /* Create the filesystem syncer kernel thread. */
477 if (kthread_create1(sched_sync, NULL, NULL, "ioflush"))
478 panic("fork syncer");
479
480 /* Create the aiodone daemon kernel thread. */
481 if (kthread_create1(uvm_aiodone_daemon, NULL, NULL, "aiodoned"))
482 panic("fork aiodoned");
483
484 #if defined(MULTIPROCESSOR)
485 /* Boot the secondary processors. */
486 cpu_boot_secondary_processors();
487 #endif
488
489 /* Initialize exec structures */
490 exec_init(1);
491
492 /*
493 * Okay, now we can let init(8) exec! It's off to userland!
494 */
495 start_init_exec = 1;
496 wakeup((void *)&start_init_exec);
497
498 #ifdef NVNODE_IMPLICIT
499 /*
500 * If maximum number of vnodes in namei vnode cache is not explicitly
501 * defined in kernel config, adjust the number such as we use roughly
502 * 0.5% of memory for vnode cache (but not less than NVNODE vnodes).
503 */
504 usevnodes = (ptoa(physmem) / 200) / sizeof(struct vnode);
505 if (usevnodes > desiredvnodes)
506 desiredvnodes = usevnodes;
507 #endif
508
509 /* The scheduler is an infinite loop. */
510 uvm_scheduler();
511 /* NOTREACHED */
512 }
513
514 static void
515 check_console(struct proc *p)
516 {
517 struct nameidata nd;
518 int error;
519
520 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
521 error = namei(&nd);
522 if (error == 0)
523 vrele(nd.ni_vp);
524 else if (error == ENOENT)
525 printf("warning: no /dev/console\n");
526 else
527 printf("warning: lookup /dev/console: error %d\n", error);
528 }
529
530 /*
531 * List of paths to try when searching for "init".
532 */
533 static const char *initpaths[] = {
534 "/sbin/init",
535 "/sbin/oinit",
536 "/sbin/init.bak",
537 NULL,
538 };
539
540 /*
541 * Start the initial user process; try exec'ing each pathname in "initpaths".
542 * The program is invoked with one argument containing the boot flags.
543 */
544 static void
545 start_init(void *arg)
546 {
547 struct proc *p = arg;
548 vaddr_t addr;
549 struct sys_execve_args /* {
550 syscallarg(const char *) path;
551 syscallarg(char * const *) argp;
552 syscallarg(char * const *) envp;
553 } */ args;
554 int options, i, error;
555 register_t retval[2];
556 char flags[4], *flagsp;
557 const char **pathp, *path, *slash;
558 char *ucp, **uap, *arg0, *arg1 = NULL;
559
560 /*
561 * Now in process 1.
562 */
563 strncpy(p->p_comm, "init", MAXCOMLEN);
564
565 /*
566 * Wait for main() to tell us that it's safe to exec.
567 */
568 while (start_init_exec == 0)
569 (void) tsleep((void *)&start_init_exec, PWAIT, "initexec", 0);
570
571 /*
572 * This is not the right way to do this. We really should
573 * hand-craft a descriptor onto /dev/console to hand to init,
574 * but that's a _lot_ more work, and the benefit from this easy
575 * hack makes up for the "good is the enemy of the best" effect.
576 */
577 check_console(p);
578
579 /*
580 * Need just enough stack to hold the faked-up "execve()" arguments.
581 */
582 addr = USRSTACK - PAGE_SIZE;
583 if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
584 NULL, UVM_UNKNOWN_OFFSET, 0,
585 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
586 UVM_ADV_NORMAL,
587 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW)) != 0)
588 panic("init: couldn't allocate argument space");
589 p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
590
591 for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
592 ucp = (char *)(addr + PAGE_SIZE);
593
594 /*
595 * Construct the boot flag argument.
596 */
597 flagsp = flags;
598 *flagsp++ = '-';
599 options = 0;
600
601 if (boothowto & RB_SINGLE) {
602 *flagsp++ = 's';
603 options = 1;
604 }
605 #ifdef notyet
606 if (boothowto & RB_FASTBOOT) {
607 *flagsp++ = 'f';
608 options = 1;
609 }
610 #endif
611
612 /*
613 * Move out the flags (arg 1), if necessary.
614 */
615 if (options != 0) {
616 *flagsp++ = '\0';
617 i = flagsp - flags;
618 #ifdef DEBUG
619 printf("init: copying out flags `%s' %d\n", flags, i);
620 #endif
621 (void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
622 arg1 = ucp;
623 }
624
625 /*
626 * Move out the file name (also arg 0).
627 */
628 i = strlen(path) + 1;
629 #ifdef DEBUG
630 printf("init: copying out path `%s' %d\n", path, i);
631 #endif
632 (void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
633 arg0 = ucp;
634
635 /*
636 * Move out the arg pointers.
637 */
638 uap = (char **)((long)ucp & ~ALIGNBYTES);
639 (void)suword((caddr_t)--uap, 0); /* terminator */
640 if (options != 0)
641 (void)suword((caddr_t)--uap, (long)arg1);
642 slash = strrchr(path, '/');
643 if (slash)
644 (void)suword((caddr_t)--uap,
645 (long)arg0 + (slash + 1 - path));
646 else
647 (void)suword((caddr_t)--uap, (long)arg0);
648
649 /*
650 * Point at the arguments.
651 */
652 SCARG(&args, path) = arg0;
653 SCARG(&args, argp) = uap;
654 SCARG(&args, envp) = NULL;
655
656 /*
657 * Now try to exec the program. If can't for any reason
658 * other than it doesn't exist, complain.
659 */
660 error = sys_execve(p, &args, retval);
661 if (error == 0 || error == EJUSTRETURN) {
662 KERNEL_PROC_UNLOCK(p);
663 return;
664 }
665 if (error != ENOENT)
666 printf("exec %s: error %d\n", path, error);
667 }
668 printf("init: not found\n");
669 panic("no init");
670 }
671