init_main.c revision 1.189 1 /* $NetBSD: init_main.c,v 1.189 2001/03/15 06:10:55 chs Exp $ */
2
3 /*
4 * Copyright (c) 1995 Christopher G. Demetriou. All rights reserved.
5 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgement:
23 * This product includes software developed by the University of
24 * California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
42 */
43
44 #include "fs_nfs.h"
45 #include "opt_nfsserver.h"
46 #include "opt_sysv.h"
47 #include "opt_maxuprc.h"
48 #include "opt_multiprocessor.h"
49 #include "opt_syscall_debug.h"
50
51 #include "rnd.h"
52
53 #include <sys/param.h>
54 #include <sys/acct.h>
55 #include <sys/filedesc.h>
56 #include <sys/file.h>
57 #include <sys/errno.h>
58 #include <sys/callout.h>
59 #include <sys/kernel.h>
60 #include <sys/mount.h>
61 #include <sys/map.h>
62 #include <sys/proc.h>
63 #include <sys/kthread.h>
64 #include <sys/resourcevar.h>
65 #include <sys/signalvar.h>
66 #include <sys/systm.h>
67 #include <sys/vnode.h>
68 #include <sys/tty.h>
69 #include <sys/conf.h>
70 #include <sys/disklabel.h>
71 #include <sys/buf.h>
72 #include <sys/device.h>
73 #include <sys/exec.h>
74 #include <sys/socketvar.h>
75 #include <sys/protosw.h>
76 #include <sys/reboot.h>
77 #include <sys/user.h>
78 #include <sys/sysctl.h>
79 #ifdef SYSVSHM
80 #include <sys/shm.h>
81 #endif
82 #ifdef SYSVSEM
83 #include <sys/sem.h>
84 #endif
85 #ifdef SYSVMSG
86 #include <sys/msg.h>
87 #endif
88 #include <sys/domain.h>
89 #include <sys/mbuf.h>
90 #include <sys/namei.h>
91 #if NRND > 0
92 #include <sys/rnd.h>
93 #endif
94
95 #include <sys/syscall.h>
96 #include <sys/syscallargs.h>
97
98 #include <ufs/ufs/quota.h>
99
100 #include <miscfs/genfs/genfs.h>
101 #include <miscfs/syncfs/syncfs.h>
102
103 #include <machine/cpu.h>
104
105 #include <uvm/uvm.h>
106
107 #include <net/if.h>
108 #include <net/raw_cb.h>
109
110 const char copyright[] = "\
111 Copyright (c) 1996, 1997, 1998, 1999, 2000, 2001
112 The NetBSD Foundation, Inc. All rights reserved.
113 Copyright (c) 1982, 1986, 1989, 1991, 1993
114 The Regents of the University of California. All rights reserved.
115
116 ";
117
118 /* Components of the first process -- never freed. */
119 struct session session0;
120 struct pgrp pgrp0;
121 struct proc proc0;
122 struct pcred cred0;
123 struct filedesc0 filedesc0;
124 struct cwdinfo cwdi0;
125 struct plimit limit0;
126 struct vmspace vmspace0;
127 struct sigacts sigacts0;
128 #ifndef curproc
129 struct proc *curproc = &proc0;
130 #endif
131 struct proc *initproc;
132
133 int cmask = CMASK;
134 extern struct user *proc0paddr;
135
136 struct vnode *rootvp, *swapdev_vp;
137 int boothowto;
138 int cold = 1; /* still working on startup */
139 struct timeval boottime;
140
141 __volatile int start_init_exec; /* semaphore for start_init() */
142
143 static void check_console(struct proc *p);
144 static void start_init(void *);
145 void main(void);
146
147 extern const struct emul emul_netbsd; /* defined in kern_exec.c */
148
149 /*
150 * System startup; initialize the world, create process 0, mount root
151 * filesystem, and fork to create init and pagedaemon. Most of the
152 * hard work is done in the lower-level initialization routines including
153 * startup(), which does memory initialization and autoconfiguration.
154 */
155 void
156 main(void)
157 {
158 struct proc *p;
159 struct pdevinit *pdev;
160 int i, s, error;
161 rlim_t lim;
162 extern struct pdevinit pdevinit[];
163 extern void schedcpu(void *);
164 extern void disk_init(void);
165 #if defined(NFSSERVER) || defined(NFS)
166 extern void nfs_init(void);
167 #endif
168 #ifdef NVNODE_IMPLICIT
169 int usevnodes;
170 #endif
171
172 /*
173 * Initialize the current process pointer (curproc) before
174 * any possible traps/probes to simplify trap processing.
175 */
176 p = &proc0;
177 curproc = p;
178 p->p_cpu = curcpu();
179 /*
180 * Attempt to find console and initialize
181 * in case of early panic or other messages.
182 */
183 consinit();
184 printf("%s", copyright);
185
186 KERNEL_LOCK_INIT();
187
188 uvm_init();
189
190 /* Do machine-dependent initialization. */
191 cpu_startup();
192
193 /* Initialize callouts. */
194 callout_startup();
195
196 /*
197 * Initialize mbuf's. Do this now because we might attempt to
198 * allocate mbufs or mbuf clusters during autoconfiguration.
199 */
200 mbinit();
201
202 /* Initialize sockets. */
203 soinit();
204
205 /*
206 * The following 3 things must be done before autoconfiguration.
207 */
208 disk_init(); /* initialize disk list */
209 tty_init(); /* initialize tty list */
210 #if NRND > 0
211 rnd_init(); /* initialize RNG */
212 #endif
213
214 /* Initialize the sysctl subsystem. */
215 sysctl_init();
216
217 /*
218 * Initialize process and pgrp structures.
219 */
220 procinit();
221
222 /*
223 * Create process 0 (the swapper).
224 */
225 s = proclist_lock_write();
226 LIST_INSERT_HEAD(&allproc, p, p_list);
227 LIST_INSERT_HEAD(PIDHASH(p->p_pid), p, p_hash);
228 proclist_unlock_write(s);
229
230 p->p_pgrp = &pgrp0;
231 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
232 LIST_INIT(&pgrp0.pg_members);
233 LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
234
235 pgrp0.pg_session = &session0;
236 session0.s_count = 1;
237 session0.s_sid = p->p_pid;
238 session0.s_leader = p;
239
240 /*
241 * Set P_NOCLDWAIT so that kernel threads are reparented to
242 * init(8) when they exit. init(8) can easily wait them out
243 * for us.
244 */
245 p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
246 p->p_stat = SONPROC;
247 p->p_nice = NZERO;
248 p->p_emul = &emul_netbsd;
249 #ifdef __HAVE_SYSCALL_INTERN
250 (*p->p_emul->e_syscall_intern)(p);
251 #endif
252 strncpy(p->p_comm, "swapper", MAXCOMLEN);
253
254 callout_init(&p->p_realit_ch);
255 callout_init(&p->p_tsleep_ch);
256
257 /* Create credentials. */
258 cred0.p_refcnt = 1;
259 p->p_cred = &cred0;
260 p->p_ucred = crget();
261 p->p_ucred->cr_ngroups = 1; /* group 0 */
262
263 /* Create the file descriptor table. */
264 finit();
265 p->p_fd = &filedesc0.fd_fd;
266 fdinit1(&filedesc0);
267
268 /* Create the CWD info. */
269 p->p_cwdi = &cwdi0;
270 cwdi0.cwdi_cmask = cmask;
271 cwdi0.cwdi_refcnt = 1;
272
273 /* Create the limits structures. */
274 p->p_limit = &limit0;
275 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
276 limit0.pl_rlimit[i].rlim_cur =
277 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
278
279 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
280 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
281 maxfiles < NOFILE ? maxfiles : NOFILE;
282
283 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
284 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
285 maxproc < MAXUPRC ? maxproc : MAXUPRC;
286
287 lim = ptoa(uvmexp.free);
288 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = lim;
289 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim;
290 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
291 limit0.pl_corename = defcorename;
292 limit0.p_refcnt = 1;
293
294 /*
295 * Initialize proc0's vmspace, which uses the kernel pmap.
296 * All kernel processes (which never have user space mappings)
297 * share proc0's vmspace, and thus, the kernel pmap.
298 */
299 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
300 trunc_page(VM_MAX_ADDRESS), TRUE);
301 p->p_vmspace = &vmspace0;
302
303 p->p_addr = proc0paddr; /* XXX */
304
305 /*
306 * We continue to place resource usage info in the
307 * user struct so they're pageable.
308 */
309 p->p_stats = &p->p_addr->u_stats;
310
311 /*
312 * Charge root for one process.
313 */
314 (void)chgproccnt(0, 1);
315
316 rqinit();
317
318 /* Configure virtual memory system, set vm rlimits. */
319 uvm_init_limits(p);
320
321 /* Initialize the file systems. */
322 #if defined(NFSSERVER) || defined(NFS)
323 nfs_init(); /* initialize server/shared data */
324 #endif
325 vfsinit();
326
327 /* Configure the system hardware. This will enable interrupts. */
328 configure();
329
330 ubc_init(); /* must be after autoconfig */
331
332 /* Lock the kernel on behalf of proc0. */
333 KERNEL_PROC_LOCK(p);
334
335 #ifdef SYSVSHM
336 /* Initialize System V style shared memory. */
337 shminit();
338 #endif
339
340 #ifdef SYSVSEM
341 /* Initialize System V style semaphores. */
342 seminit();
343 #endif
344
345 #ifdef SYSVMSG
346 /* Initialize System V style message queues. */
347 msginit();
348 #endif
349
350 /* Attach pseudo-devices. */
351 for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
352 (*pdev->pdev_attach)(pdev->pdev_count);
353
354 /*
355 * Initialize protocols. Block reception of incoming packets
356 * until everything is ready.
357 */
358 s = splimp();
359 ifinit();
360 domaininit();
361 splx(s);
362
363 #ifdef GPROF
364 /* Initialize kernel profiling. */
365 kmstartup();
366 #endif
367
368 /* Initialize system accouting. */
369 acct_init();
370
371 /*
372 * Initialize signal-related data structures, and signal state
373 * for proc0.
374 */
375 signal_init();
376 p->p_sigacts = &sigacts0;
377 siginit(p);
378
379 /* Kick off timeout driven events by calling first time. */
380 schedcpu(NULL);
381
382 /*
383 * Create process 1 (init(8)). We do this now, as Unix has
384 * historically had init be process 1, and changing this would
385 * probably upset a lot of people.
386 *
387 * Note that process 1 won't immediately exec init(8), but will
388 * wait for us to inform it that the root file system has been
389 * mounted.
390 */
391 if (fork1(p, 0, SIGCHLD, NULL, 0, start_init, NULL, NULL, &initproc))
392 panic("fork init");
393
394 /*
395 * Create any kernel threads who's creation was deferred because
396 * initproc had not yet been created.
397 */
398 kthread_run_deferred_queue();
399
400 /*
401 * Now that device driver threads have been created, wait for
402 * them to finish any deferred autoconfiguration. Note we don't
403 * need to lock this semaphore, since we haven't booted any
404 * secondary processors, yet.
405 */
406 while (config_pending)
407 (void) tsleep((void *)&config_pending, PWAIT, "cfpend", 0);
408
409 /*
410 * Now that autoconfiguration has completed, we can determine
411 * the root and dump devices.
412 */
413 cpu_rootconf();
414 cpu_dumpconf();
415
416 /* Mount the root file system. */
417 do {
418 domountroothook();
419 if ((error = vfs_mountroot())) {
420 printf("cannot mount root, error = %d\n", error);
421 boothowto |= RB_ASKNAME;
422 setroot(root_device,
423 (rootdev != NODEV) ? DISKPART(rootdev) : 0);
424 }
425 } while (error != 0);
426 mountroothook_destroy();
427
428 mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
429 mountlist.cqh_first->mnt_op->vfs_refcount++;
430
431 /*
432 * Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to
433 * reference it.
434 */
435 if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
436 panic("cannot find root vnode");
437 cwdi0.cwdi_cdir = rootvnode;
438 VREF(cwdi0.cwdi_cdir);
439 VOP_UNLOCK(rootvnode, 0);
440 cwdi0.cwdi_rdir = NULL;
441
442 /*
443 * Now that root is mounted, we can fixup initproc's CWD
444 * info. All other processes are kthreads, which merely
445 * share proc0's CWD info.
446 */
447 initproc->p_cwdi->cwdi_cdir = rootvnode;
448 VREF(initproc->p_cwdi->cwdi_cdir);
449 initproc->p_cwdi->cwdi_rdir = NULL;
450
451 /*
452 * Now can look at time, having had a chance to verify the time
453 * from the file system. Reset p->p_rtime as it may have been
454 * munched in mi_switch() after the time got set.
455 */
456 proclist_lock_read();
457 s = splsched();
458 for (p = LIST_FIRST(&allproc); p != NULL;
459 p = LIST_NEXT(p, p_list)) {
460 p->p_stats->p_start = mono_time = boottime = time;
461 if (p->p_cpu != NULL)
462 p->p_cpu->ci_schedstate.spc_runtime = time;
463 p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
464 }
465 splx(s);
466 proclist_unlock_read();
467
468 /* Create the pageout daemon kernel thread. */
469 uvm_swap_init();
470 if (kthread_create1(uvm_pageout, NULL, NULL, "pagedaemon"))
471 panic("fork pagedaemon");
472
473 /* Create the process reaper kernel thread. */
474 if (kthread_create1(reaper, NULL, NULL, "reaper"))
475 panic("fork reaper");
476
477 /* Create the filesystem syncer kernel thread. */
478 if (kthread_create1(sched_sync, NULL, NULL, "ioflush"))
479 panic("fork syncer");
480
481 /* Create the aiodone daemon kernel thread. */
482 if (kthread_create1(uvm_aiodone_daemon, NULL, NULL, "aiodoned"))
483 panic("fork aiodoned");
484
485 #if defined(MULTIPROCESSOR)
486 /* Boot the secondary processors. */
487 cpu_boot_secondary_processors();
488 #endif
489
490 /* Initialize exec structures */
491 exec_init(1);
492
493 /*
494 * Okay, now we can let init(8) exec! It's off to userland!
495 */
496 start_init_exec = 1;
497 wakeup((void *)&start_init_exec);
498
499 #ifdef NVNODE_IMPLICIT
500 /*
501 * If maximum number of vnodes in namei vnode cache is not explicitly
502 * defined in kernel config, adjust the number such as we use roughly
503 * 0.5% of memory for vnode cache (but not less than NVNODE vnodes).
504 */
505 usevnodes = (ptoa(physmem) / 200) / sizeof(struct vnode);
506 if (usevnodes > desiredvnodes)
507 desiredvnodes = usevnodes;
508 #endif
509
510 /* The scheduler is an infinite loop. */
511 uvm_scheduler();
512 /* NOTREACHED */
513 }
514
515 static void
516 check_console(struct proc *p)
517 {
518 struct nameidata nd;
519 int error;
520
521 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
522 error = namei(&nd);
523 if (error == 0)
524 vrele(nd.ni_vp);
525 else if (error == ENOENT)
526 printf("warning: no /dev/console\n");
527 else
528 printf("warning: lookup /dev/console: error %d\n", error);
529 }
530
531 /*
532 * List of paths to try when searching for "init".
533 */
534 static const char *initpaths[] = {
535 "/sbin/init",
536 "/sbin/oinit",
537 "/sbin/init.bak",
538 NULL,
539 };
540
541 /*
542 * Start the initial user process; try exec'ing each pathname in "initpaths".
543 * The program is invoked with one argument containing the boot flags.
544 */
545 static void
546 start_init(void *arg)
547 {
548 struct proc *p = arg;
549 vaddr_t addr;
550 struct sys_execve_args /* {
551 syscallarg(const char *) path;
552 syscallarg(char * const *) argp;
553 syscallarg(char * const *) envp;
554 } */ args;
555 int options, i, error;
556 register_t retval[2];
557 char flags[4], *flagsp;
558 const char **pathp, *path, *slash;
559 char *ucp, **uap, *arg0, *arg1 = NULL;
560
561 /*
562 * Now in process 1.
563 */
564 strncpy(p->p_comm, "init", MAXCOMLEN);
565
566 /*
567 * Wait for main() to tell us that it's safe to exec.
568 */
569 while (start_init_exec == 0)
570 (void) tsleep((void *)&start_init_exec, PWAIT, "initexec", 0);
571
572 /*
573 * This is not the right way to do this. We really should
574 * hand-craft a descriptor onto /dev/console to hand to init,
575 * but that's a _lot_ more work, and the benefit from this easy
576 * hack makes up for the "good is the enemy of the best" effect.
577 */
578 check_console(p);
579
580 /*
581 * Need just enough stack to hold the faked-up "execve()" arguments.
582 */
583 addr = USRSTACK - PAGE_SIZE;
584 if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
585 NULL, UVM_UNKNOWN_OFFSET, 0,
586 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
587 UVM_ADV_NORMAL,
588 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW)) != 0)
589 panic("init: couldn't allocate argument space");
590 p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
591
592 for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
593 ucp = (char *)(addr + PAGE_SIZE);
594
595 /*
596 * Construct the boot flag argument.
597 */
598 flagsp = flags;
599 *flagsp++ = '-';
600 options = 0;
601
602 if (boothowto & RB_SINGLE) {
603 *flagsp++ = 's';
604 options = 1;
605 }
606 #ifdef notyet
607 if (boothowto & RB_FASTBOOT) {
608 *flagsp++ = 'f';
609 options = 1;
610 }
611 #endif
612
613 /*
614 * Move out the flags (arg 1), if necessary.
615 */
616 if (options != 0) {
617 *flagsp++ = '\0';
618 i = flagsp - flags;
619 #ifdef DEBUG
620 printf("init: copying out flags `%s' %d\n", flags, i);
621 #endif
622 (void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
623 arg1 = ucp;
624 }
625
626 /*
627 * Move out the file name (also arg 0).
628 */
629 i = strlen(path) + 1;
630 #ifdef DEBUG
631 printf("init: copying out path `%s' %d\n", path, i);
632 #endif
633 (void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
634 arg0 = ucp;
635
636 /*
637 * Move out the arg pointers.
638 */
639 uap = (char **)((long)ucp & ~ALIGNBYTES);
640 (void)suword((caddr_t)--uap, 0); /* terminator */
641 if (options != 0)
642 (void)suword((caddr_t)--uap, (long)arg1);
643 slash = strrchr(path, '/');
644 if (slash)
645 (void)suword((caddr_t)--uap,
646 (long)arg0 + (slash + 1 - path));
647 else
648 (void)suword((caddr_t)--uap, (long)arg0);
649
650 /*
651 * Point at the arguments.
652 */
653 SCARG(&args, path) = arg0;
654 SCARG(&args, argp) = uap;
655 SCARG(&args, envp) = NULL;
656
657 /*
658 * Now try to exec the program. If can't for any reason
659 * other than it doesn't exist, complain.
660 */
661 error = sys_execve(p, &args, retval);
662 if (error == 0 || error == EJUSTRETURN) {
663 KERNEL_PROC_UNLOCK(p);
664 return;
665 }
666 if (error != ENOENT)
667 printf("exec %s: error %d\n", path, error);
668 }
669 printf("init: not found\n");
670 panic("no init");
671 }
672