init_main.c revision 1.199 1 /* $NetBSD: init_main.c,v 1.199 2002/03/04 02:30:27 simonb Exp $ */
2
3 /*
4 * Copyright (c) 1995 Christopher G. Demetriou. All rights reserved.
5 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgement:
23 * This product includes software developed by the University of
24 * California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
42 */
43
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.199 2002/03/04 02:30:27 simonb Exp $");
46
47 #include "fs_nfs.h"
48 #include "opt_nfsserver.h"
49 #include "opt_sysv.h"
50 #include "opt_maxuprc.h"
51 #include "opt_multiprocessor.h"
52 #include "opt_pipe.h"
53 #include "opt_syscall_debug.h"
54
55 #include "rnd.h"
56
57 #include <sys/param.h>
58 #include <sys/acct.h>
59 #include <sys/filedesc.h>
60 #include <sys/file.h>
61 #include <sys/errno.h>
62 #include <sys/callout.h>
63 #include <sys/kernel.h>
64 #include <sys/mount.h>
65 #include <sys/map.h>
66 #include <sys/proc.h>
67 #include <sys/kthread.h>
68 #include <sys/resourcevar.h>
69 #include <sys/signalvar.h>
70 #include <sys/systm.h>
71 #include <sys/vnode.h>
72 #include <sys/tty.h>
73 #include <sys/conf.h>
74 #include <sys/disklabel.h>
75 #include <sys/buf.h>
76 #include <sys/device.h>
77 #include <sys/disk.h>
78 #include <sys/exec.h>
79 #include <sys/socketvar.h>
80 #include <sys/protosw.h>
81 #include <sys/reboot.h>
82 #include <sys/user.h>
83 #include <sys/sysctl.h>
84 #ifdef SYSVSHM
85 #include <sys/shm.h>
86 #endif
87 #ifdef SYSVSEM
88 #include <sys/sem.h>
89 #endif
90 #ifdef SYSVMSG
91 #include <sys/msg.h>
92 #endif
93 #include <sys/domain.h>
94 #include <sys/mbuf.h>
95 #include <sys/namei.h>
96 #if NRND > 0
97 #include <sys/rnd.h>
98 #endif
99 #ifndef PIPE_SOCKETPAIR
100 #include <sys/pipe.h>
101 #endif
102
103 #include <sys/syscall.h>
104 #include <sys/syscallargs.h>
105
106 #include <ufs/ufs/quota.h>
107
108 #include <miscfs/genfs/genfs.h>
109 #include <miscfs/syncfs/syncfs.h>
110
111 #include <machine/cpu.h>
112
113 #include <uvm/uvm.h>
114
115 #include <net/if.h>
116 #include <net/raw_cb.h>
117
118 const char copyright[] =
119 "Copyright (c) 1996, 1997, 1998, 1999, 2000, 2001, 2002\n"
120 " The NetBSD Foundation, Inc. All rights reserved.\n"
121 "Copyright (c) 1982, 1986, 1989, 1991, 1993\n"
122 " The Regents of the University of California. All rights reserved.\n"
123 "\n";
124
125 /* Components of the first process -- never freed. */
126 struct session session0;
127 struct pgrp pgrp0;
128 struct proc proc0;
129 struct pcred cred0;
130 struct filedesc0 filedesc0;
131 struct cwdinfo cwdi0;
132 struct plimit limit0;
133 struct vmspace vmspace0;
134 struct sigacts sigacts0;
135 #ifndef curproc
136 struct proc *curproc = &proc0;
137 #endif
138 struct proc *initproc;
139
140 int cmask = CMASK;
141 extern struct user *proc0paddr;
142
143 struct vnode *rootvp, *swapdev_vp;
144 int boothowto;
145 int cold = 1; /* still working on startup */
146 struct timeval boottime;
147
148 __volatile int start_init_exec; /* semaphore for start_init() */
149
150 static void check_console(struct proc *p);
151 static void start_init(void *);
152 void main(void);
153
154 extern const struct emul emul_netbsd; /* defined in kern_exec.c */
155
156 /*
157 * System startup; initialize the world, create process 0, mount root
158 * filesystem, and fork to create init and pagedaemon. Most of the
159 * hard work is done in the lower-level initialization routines including
160 * startup(), which does memory initialization and autoconfiguration.
161 */
162 void
163 main(void)
164 {
165 struct proc *p;
166 struct pdevinit *pdev;
167 int i, s, error;
168 rlim_t lim;
169 extern struct pdevinit pdevinit[];
170 extern void schedcpu(void *);
171 #if defined(NFSSERVER) || defined(NFS)
172 extern void nfs_init(void);
173 #endif
174 #ifdef NVNODE_IMPLICIT
175 int usevnodes;
176 #endif
177
178 /*
179 * Initialize the current process pointer (curproc) before
180 * any possible traps/probes to simplify trap processing.
181 */
182 p = &proc0;
183 curproc = p;
184 p->p_cpu = curcpu();
185 /*
186 * Attempt to find console and initialize
187 * in case of early panic or other messages.
188 */
189 consinit();
190 printf("%s", copyright);
191
192 KERNEL_LOCK_INIT();
193
194 uvm_init();
195
196 /* Do machine-dependent initialization. */
197 cpu_startup();
198
199 /* Initialize callouts. */
200 callout_startup();
201
202 /*
203 * Initialize mbuf's. Do this now because we might attempt to
204 * allocate mbufs or mbuf clusters during autoconfiguration.
205 */
206 mbinit();
207
208 /* Initialize sockets. */
209 soinit();
210
211 /*
212 * The following 3 things must be done before autoconfiguration.
213 */
214 disk_init(); /* initialize disk list */
215 tty_init(); /* initialize tty list */
216 #if NRND > 0
217 rnd_init(); /* initialize RNG */
218 #endif
219
220 /* Initialize the sysctl subsystem. */
221 sysctl_init();
222
223 /*
224 * Initialize process and pgrp structures.
225 */
226 procinit();
227
228 /*
229 * Create process 0 (the swapper).
230 */
231 s = proclist_lock_write();
232 LIST_INSERT_HEAD(&allproc, p, p_list);
233 LIST_INSERT_HEAD(PIDHASH(p->p_pid), p, p_hash);
234 proclist_unlock_write(s);
235
236 p->p_pgrp = &pgrp0;
237 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
238 LIST_INIT(&pgrp0.pg_members);
239 LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
240
241 pgrp0.pg_session = &session0;
242 session0.s_count = 1;
243 session0.s_sid = p->p_pid;
244 session0.s_leader = p;
245
246 /*
247 * Set P_NOCLDWAIT so that kernel threads are reparented to
248 * init(8) when they exit. init(8) can easily wait them out
249 * for us.
250 */
251 p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
252 p->p_stat = SONPROC;
253 p->p_nice = NZERO;
254 p->p_emul = &emul_netbsd;
255 #ifdef __HAVE_SYSCALL_INTERN
256 (*p->p_emul->e_syscall_intern)(p);
257 #endif
258 strncpy(p->p_comm, "swapper", MAXCOMLEN);
259
260 callout_init(&p->p_realit_ch);
261 callout_init(&p->p_tsleep_ch);
262
263 /* Create credentials. */
264 cred0.p_refcnt = 1;
265 p->p_cred = &cred0;
266 p->p_ucred = crget();
267 p->p_ucred->cr_ngroups = 1; /* group 0 */
268
269 /* Create the file descriptor table. */
270 finit();
271 p->p_fd = &filedesc0.fd_fd;
272 fdinit1(&filedesc0);
273
274 /* Create the CWD info. */
275 p->p_cwdi = &cwdi0;
276 cwdi0.cwdi_cmask = cmask;
277 cwdi0.cwdi_refcnt = 1;
278
279 /* Create the limits structures. */
280 p->p_limit = &limit0;
281 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
282 limit0.pl_rlimit[i].rlim_cur =
283 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
284
285 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
286 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
287 maxfiles < NOFILE ? maxfiles : NOFILE;
288
289 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
290 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
291 maxproc < MAXUPRC ? maxproc : MAXUPRC;
292
293 lim = ptoa(uvmexp.free);
294 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = lim;
295 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim;
296 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
297 limit0.pl_corename = defcorename;
298 limit0.p_refcnt = 1;
299
300 /*
301 * Initialize proc0's vmspace, which uses the kernel pmap.
302 * All kernel processes (which never have user space mappings)
303 * share proc0's vmspace, and thus, the kernel pmap.
304 */
305 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
306 trunc_page(VM_MAX_ADDRESS));
307 p->p_vmspace = &vmspace0;
308
309 p->p_addr = proc0paddr; /* XXX */
310
311 /*
312 * We continue to place resource usage info in the
313 * user struct so they're pageable.
314 */
315 p->p_stats = &p->p_addr->u_stats;
316
317 /*
318 * Charge root for one process.
319 */
320 (void)chgproccnt(0, 1);
321
322 rqinit();
323
324 /* Configure virtual memory system, set vm rlimits. */
325 uvm_init_limits(p);
326
327 /* Initialize the file systems. */
328 #if defined(NFSSERVER) || defined(NFS)
329 nfs_init(); /* initialize server/shared data */
330 #endif
331 #ifdef NVNODE_IMPLICIT
332 /*
333 * If maximum number of vnodes in namei vnode cache is not explicitly
334 * defined in kernel config, adjust the number such as we use roughly
335 * 0.5% of memory for vnode cache (but not less than NVNODE vnodes).
336 */
337 usevnodes = (ptoa((unsigned)physmem) / 200) / sizeof(struct vnode);
338 if (usevnodes > desiredvnodes)
339 desiredvnodes = usevnodes;
340 #endif
341 vfsinit();
342
343 /* Configure the system hardware. This will enable interrupts. */
344 configure();
345
346 ubc_init(); /* must be after autoconfig */
347
348 /* Lock the kernel on behalf of proc0. */
349 KERNEL_PROC_LOCK(p);
350
351 #ifdef SYSVSHM
352 /* Initialize System V style shared memory. */
353 shminit();
354 #endif
355
356 #ifdef SYSVSEM
357 /* Initialize System V style semaphores. */
358 seminit();
359 #endif
360
361 #ifdef SYSVMSG
362 /* Initialize System V style message queues. */
363 msginit();
364 #endif
365
366 /* Attach pseudo-devices. */
367 for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
368 (*pdev->pdev_attach)(pdev->pdev_count);
369
370 /*
371 * Initialize protocols. Block reception of incoming packets
372 * until everything is ready.
373 */
374 s = splnet();
375 ifinit();
376 domaininit();
377 splx(s);
378
379 #ifdef GPROF
380 /* Initialize kernel profiling. */
381 kmstartup();
382 #endif
383
384 /* Initialize system accouting. */
385 acct_init();
386
387 /*
388 * Initialize signal-related data structures, and signal state
389 * for proc0.
390 */
391 signal_init();
392 p->p_sigacts = &sigacts0;
393 siginit(p);
394
395 /* Kick off timeout driven events by calling first time. */
396 schedcpu(NULL);
397
398 /*
399 * Create process 1 (init(8)). We do this now, as Unix has
400 * historically had init be process 1, and changing this would
401 * probably upset a lot of people.
402 *
403 * Note that process 1 won't immediately exec init(8), but will
404 * wait for us to inform it that the root file system has been
405 * mounted.
406 */
407 if (fork1(p, 0, SIGCHLD, NULL, 0, start_init, NULL, NULL, &initproc))
408 panic("fork init");
409
410 /*
411 * Create any kernel threads who's creation was deferred because
412 * initproc had not yet been created.
413 */
414 kthread_run_deferred_queue();
415
416 /*
417 * Now that device driver threads have been created, wait for
418 * them to finish any deferred autoconfiguration. Note we don't
419 * need to lock this semaphore, since we haven't booted any
420 * secondary processors, yet.
421 */
422 while (config_pending)
423 (void) tsleep((void *)&config_pending, PWAIT, "cfpend", 0);
424
425 /*
426 * Now that autoconfiguration has completed, we can determine
427 * the root and dump devices.
428 */
429 cpu_rootconf();
430 cpu_dumpconf();
431
432 /* Mount the root file system. */
433 do {
434 domountroothook();
435 if ((error = vfs_mountroot())) {
436 printf("cannot mount root, error = %d\n", error);
437 boothowto |= RB_ASKNAME;
438 setroot(root_device,
439 (rootdev != NODEV) ? DISKPART(rootdev) : 0);
440 }
441 } while (error != 0);
442 mountroothook_destroy();
443
444 mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
445 mountlist.cqh_first->mnt_op->vfs_refcount++;
446
447 /*
448 * Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to
449 * reference it.
450 */
451 if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
452 panic("cannot find root vnode");
453 cwdi0.cwdi_cdir = rootvnode;
454 VREF(cwdi0.cwdi_cdir);
455 VOP_UNLOCK(rootvnode, 0);
456 cwdi0.cwdi_rdir = NULL;
457
458 /*
459 * Now that root is mounted, we can fixup initproc's CWD
460 * info. All other processes are kthreads, which merely
461 * share proc0's CWD info.
462 */
463 initproc->p_cwdi->cwdi_cdir = rootvnode;
464 VREF(initproc->p_cwdi->cwdi_cdir);
465 initproc->p_cwdi->cwdi_rdir = NULL;
466
467 /*
468 * Now can look at time, having had a chance to verify the time
469 * from the file system. Reset p->p_rtime as it may have been
470 * munched in mi_switch() after the time got set.
471 */
472 proclist_lock_read();
473 s = splsched();
474 for (p = LIST_FIRST(&allproc); p != NULL;
475 p = LIST_NEXT(p, p_list)) {
476 p->p_stats->p_start = mono_time = boottime = time;
477 if (p->p_cpu != NULL)
478 p->p_cpu->ci_schedstate.spc_runtime = time;
479 p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
480 }
481 splx(s);
482 proclist_unlock_read();
483
484 /* Create the pageout daemon kernel thread. */
485 uvm_swap_init();
486 if (kthread_create1(uvm_pageout, NULL, NULL, "pagedaemon"))
487 panic("fork pagedaemon");
488
489 /* Create the process reaper kernel thread. */
490 if (kthread_create1(reaper, NULL, NULL, "reaper"))
491 panic("fork reaper");
492
493 /* Create the filesystem syncer kernel thread. */
494 if (kthread_create1(sched_sync, NULL, NULL, "ioflush"))
495 panic("fork syncer");
496
497 /* Create the aiodone daemon kernel thread. */
498 if (kthread_create1(uvm_aiodone_daemon, NULL, NULL, "aiodoned"))
499 panic("fork aiodoned");
500
501 #if defined(MULTIPROCESSOR)
502 /* Boot the secondary processors. */
503 cpu_boot_secondary_processors();
504 #endif
505
506 /* Initialize exec structures */
507 exec_init(1);
508
509 #ifndef PIPE_SOCKETPAIR
510 /* Initialize pipe structures */
511 pipe_init();
512 #endif
513
514 /*
515 * Okay, now we can let init(8) exec! It's off to userland!
516 */
517 start_init_exec = 1;
518 wakeup((void *)&start_init_exec);
519
520 /* The scheduler is an infinite loop. */
521 uvm_scheduler();
522 /* NOTREACHED */
523 }
524
525 static void
526 check_console(struct proc *p)
527 {
528 struct nameidata nd;
529 int error;
530
531 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
532 error = namei(&nd);
533 if (error == 0)
534 vrele(nd.ni_vp);
535 else if (error == ENOENT)
536 printf("warning: no /dev/console\n");
537 else
538 printf("warning: lookup /dev/console: error %d\n", error);
539 }
540
541 /*
542 * List of paths to try when searching for "init".
543 */
544 static const char *initpaths[] = {
545 "/sbin/init",
546 "/sbin/oinit",
547 "/sbin/init.bak",
548 NULL,
549 };
550
551 /*
552 * Start the initial user process; try exec'ing each pathname in "initpaths".
553 * The program is invoked with one argument containing the boot flags.
554 */
555 static void
556 start_init(void *arg)
557 {
558 struct proc *p = arg;
559 vaddr_t addr;
560 struct sys_execve_args /* {
561 syscallarg(const char *) path;
562 syscallarg(char * const *) argp;
563 syscallarg(char * const *) envp;
564 } */ args;
565 int options, i, error;
566 register_t retval[2];
567 char flags[4], *flagsp;
568 const char **pathp, *path, *slash;
569 char *ucp, **uap, *arg0, *arg1 = NULL;
570
571 /*
572 * Now in process 1.
573 */
574 strncpy(p->p_comm, "init", MAXCOMLEN);
575
576 /*
577 * Wait for main() to tell us that it's safe to exec.
578 */
579 while (start_init_exec == 0)
580 (void) tsleep((void *)&start_init_exec, PWAIT, "initexec", 0);
581
582 /*
583 * This is not the right way to do this. We really should
584 * hand-craft a descriptor onto /dev/console to hand to init,
585 * but that's a _lot_ more work, and the benefit from this easy
586 * hack makes up for the "good is the enemy of the best" effect.
587 */
588 check_console(p);
589
590 /*
591 * Need just enough stack to hold the faked-up "execve()" arguments.
592 */
593 addr = USRSTACK - PAGE_SIZE;
594 if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
595 NULL, UVM_UNKNOWN_OFFSET, 0,
596 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
597 UVM_ADV_NORMAL,
598 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW)) != 0)
599 panic("init: couldn't allocate argument space");
600 p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
601
602 for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
603 ucp = (char *)(addr + PAGE_SIZE);
604
605 /*
606 * Construct the boot flag argument.
607 */
608 flagsp = flags;
609 *flagsp++ = '-';
610 options = 0;
611
612 if (boothowto & RB_SINGLE) {
613 *flagsp++ = 's';
614 options = 1;
615 }
616 #ifdef notyet
617 if (boothowto & RB_FASTBOOT) {
618 *flagsp++ = 'f';
619 options = 1;
620 }
621 #endif
622
623 /*
624 * Move out the flags (arg 1), if necessary.
625 */
626 if (options != 0) {
627 *flagsp++ = '\0';
628 i = flagsp - flags;
629 #ifdef DEBUG
630 printf("init: copying out flags `%s' %d\n", flags, i);
631 #endif
632 (void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
633 arg1 = ucp;
634 }
635
636 /*
637 * Move out the file name (also arg 0).
638 */
639 i = strlen(path) + 1;
640 #ifdef DEBUG
641 printf("init: copying out path `%s' %d\n", path, i);
642 #endif
643 (void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
644 arg0 = ucp;
645
646 /*
647 * Move out the arg pointers.
648 */
649 uap = (char **)((long)ucp & ~ALIGNBYTES);
650 (void)suword((caddr_t)--uap, 0); /* terminator */
651 if (options != 0)
652 (void)suword((caddr_t)--uap, (long)arg1);
653 slash = strrchr(path, '/');
654 if (slash)
655 (void)suword((caddr_t)--uap,
656 (long)arg0 + (slash + 1 - path));
657 else
658 (void)suword((caddr_t)--uap, (long)arg0);
659
660 /*
661 * Point at the arguments.
662 */
663 SCARG(&args, path) = arg0;
664 SCARG(&args, argp) = uap;
665 SCARG(&args, envp) = NULL;
666
667 /*
668 * Now try to exec the program. If can't for any reason
669 * other than it doesn't exist, complain.
670 */
671 error = sys_execve(p, &args, retval);
672 if (error == 0 || error == EJUSTRETURN) {
673 KERNEL_PROC_UNLOCK(p);
674 return;
675 }
676 if (error != ENOENT)
677 printf("exec %s: error %d\n", path, error);
678 }
679 printf("init: not found\n");
680 panic("no init");
681 }
682