init_main.c revision 1.185 1 /* $NetBSD: init_main.c,v 1.185 2000/11/27 08:39:43 chs Exp $ */
2
3 /*
4 * Copyright (c) 1995 Christopher G. Demetriou. All rights reserved.
5 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgement:
23 * This product includes software developed by the University of
24 * California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
42 */
43
44 #include "fs_nfs.h"
45 #include "opt_nfsserver.h"
46 #include "opt_sysv.h"
47 #include "opt_maxuprc.h"
48 #include "opt_multiprocessor.h"
49 #include "opt_syscall_debug.h"
50
51 #include "rnd.h"
52
53 #include <sys/param.h>
54 #include <sys/acct.h>
55 #include <sys/filedesc.h>
56 #include <sys/file.h>
57 #include <sys/errno.h>
58 #include <sys/callout.h>
59 #include <sys/kernel.h>
60 #include <sys/mount.h>
61 #include <sys/map.h>
62 #include <sys/proc.h>
63 #include <sys/kthread.h>
64 #include <sys/resourcevar.h>
65 #include <sys/signalvar.h>
66 #include <sys/systm.h>
67 #include <sys/vnode.h>
68 #include <sys/tty.h>
69 #include <sys/conf.h>
70 #include <sys/disklabel.h>
71 #include <sys/buf.h>
72 #include <sys/device.h>
73 #include <sys/socketvar.h>
74 #include <sys/protosw.h>
75 #include <sys/reboot.h>
76 #include <sys/user.h>
77 #include <sys/sysctl.h>
78 #ifdef SYSVSHM
79 #include <sys/shm.h>
80 #endif
81 #ifdef SYSVSEM
82 #include <sys/sem.h>
83 #endif
84 #ifdef SYSVMSG
85 #include <sys/msg.h>
86 #endif
87 #include <sys/domain.h>
88 #include <sys/mbuf.h>
89 #include <sys/namei.h>
90 #if NRND > 0
91 #include <sys/rnd.h>
92 #endif
93
94 #include <sys/syscall.h>
95 #include <sys/syscallargs.h>
96
97 #include <ufs/ufs/quota.h>
98
99 #include <miscfs/genfs/genfs.h>
100 #include <miscfs/syncfs/syncfs.h>
101
102 #include <machine/cpu.h>
103
104 #include <uvm/uvm.h>
105
106 #include <net/if.h>
107 #include <net/raw_cb.h>
108
109 const char copyright[] = "\
110 Copyright (c) 1996, 1997, 1998, 1999, 2000
111 The NetBSD Foundation, Inc. All rights reserved.
112 Copyright (c) 1982, 1986, 1989, 1991, 1993
113 The Regents of the University of California. All rights reserved.
114
115 ";
116
117 /* Components of the first process -- never freed. */
118 struct session session0;
119 struct pgrp pgrp0;
120 struct proc proc0;
121 struct pcred cred0;
122 struct filedesc0 filedesc0;
123 struct cwdinfo cwdi0;
124 struct plimit limit0;
125 struct vmspace vmspace0;
126 struct sigacts sigacts0;
127 #ifndef curproc
128 struct proc *curproc = &proc0;
129 #endif
130 struct proc *initproc;
131
132 int cmask = CMASK;
133 extern struct user *proc0paddr;
134
135 struct vnode *rootvp, *swapdev_vp;
136 int boothowto;
137 int cold = 1; /* still working on startup */
138 struct timeval boottime;
139
140 __volatile int start_init_exec; /* semaphore for start_init() */
141
142 static void check_console(struct proc *p);
143 static void start_init(void *);
144 void main(void);
145
146 extern const struct emul emul_netbsd; /* defined in kern_exec.c */
147
148 /*
149 * System startup; initialize the world, create process 0, mount root
150 * filesystem, and fork to create init and pagedaemon. Most of the
151 * hard work is done in the lower-level initialization routines including
152 * startup(), which does memory initialization and autoconfiguration.
153 */
154 void
155 main(void)
156 {
157 struct proc *p;
158 struct pdevinit *pdev;
159 int i, s, error;
160 rlim_t lim;
161 extern struct pdevinit pdevinit[];
162 extern void schedcpu(void *);
163 extern void disk_init(void);
164 #if defined(NFSSERVER) || defined(NFS)
165 extern void nfs_init(void);
166 #endif
167 #ifdef NVNODE_IMPLICIT
168 int usevnodes;
169 #endif
170
171 /*
172 * Initialize the current process pointer (curproc) before
173 * any possible traps/probes to simplify trap processing.
174 */
175 p = &proc0;
176 curproc = p;
177 p->p_cpu = curcpu();
178 /*
179 * Attempt to find console and initialize
180 * in case of early panic or other messages.
181 */
182 consinit();
183 printf("%s", copyright);
184
185 KERNEL_LOCK_INIT();
186
187 uvm_init();
188
189 /* Do machine-dependent initialization. */
190 cpu_startup();
191
192 /* Initialize callouts. */
193 callout_startup();
194
195 /*
196 * Initialize mbuf's. Do this now because we might attempt to
197 * allocate mbufs or mbuf clusters during autoconfiguration.
198 */
199 mbinit();
200
201 /* Initialize sockets. */
202 soinit();
203
204 /*
205 * The following 3 things must be done before autoconfiguration.
206 */
207 disk_init(); /* initialize disk list */
208 tty_init(); /* initialize tty list */
209 #if NRND > 0
210 rnd_init(); /* initialize RNG */
211 #endif
212
213 /* Initialize the sysctl subsystem. */
214 sysctl_init();
215
216 /*
217 * Initialize process and pgrp structures.
218 */
219 procinit();
220
221 /*
222 * Create process 0 (the swapper).
223 */
224 s = proclist_lock_write();
225 LIST_INSERT_HEAD(&allproc, p, p_list);
226 LIST_INSERT_HEAD(PIDHASH(p->p_pid), p, p_hash);
227 proclist_unlock_write(s);
228
229 p->p_pgrp = &pgrp0;
230 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
231 LIST_INIT(&pgrp0.pg_members);
232 LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
233
234 pgrp0.pg_session = &session0;
235 session0.s_count = 1;
236 session0.s_sid = p->p_pid;
237 session0.s_leader = p;
238
239 /*
240 * Set P_NOCLDWAIT so that kernel threads are reparented to
241 * init(8) when they exit. init(8) can easily wait them out
242 * for us.
243 */
244 p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
245 p->p_stat = SONPROC;
246 p->p_nice = NZERO;
247 p->p_emul = &emul_netbsd;
248 strncpy(p->p_comm, "swapper", MAXCOMLEN);
249
250 callout_init(&p->p_realit_ch);
251 callout_init(&p->p_tsleep_ch);
252
253 /* Create credentials. */
254 cred0.p_refcnt = 1;
255 p->p_cred = &cred0;
256 p->p_ucred = crget();
257 p->p_ucred->cr_ngroups = 1; /* group 0 */
258
259 /* Create the file descriptor table. */
260 finit();
261 p->p_fd = &filedesc0.fd_fd;
262 fdinit1(&filedesc0);
263
264 /* Create the CWD info. */
265 p->p_cwdi = &cwdi0;
266 cwdi0.cwdi_cmask = cmask;
267 cwdi0.cwdi_refcnt = 1;
268
269 /* Create the limits structures. */
270 p->p_limit = &limit0;
271 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
272 limit0.pl_rlimit[i].rlim_cur =
273 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
274
275 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
276 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
277 maxfiles < NOFILE ? maxfiles : NOFILE;
278
279 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
280 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
281 maxproc < MAXUPRC ? maxproc : MAXUPRC;
282
283 lim = ptoa(uvmexp.free);
284 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = lim;
285 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim;
286 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
287 limit0.pl_corename = defcorename;
288 limit0.p_refcnt = 1;
289
290 /*
291 * Initialize proc0's vmspace, which uses the kernel pmap.
292 * All kernel processes (which never have user space mappings)
293 * share proc0's vmspace, and thus, the kernel pmap.
294 */
295 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
296 trunc_page(VM_MAX_ADDRESS), TRUE);
297 p->p_vmspace = &vmspace0;
298
299 p->p_addr = proc0paddr; /* XXX */
300
301 /*
302 * We continue to place resource usage info in the
303 * user struct so they're pageable.
304 */
305 p->p_stats = &p->p_addr->u_stats;
306
307 /*
308 * Charge root for one process.
309 */
310 (void)chgproccnt(0, 1);
311
312 rqinit();
313
314 /* Configure virtual memory system, set vm rlimits. */
315 uvm_init_limits(p);
316
317 /* Initialize the file systems. */
318 #if defined(NFSSERVER) || defined(NFS)
319 nfs_init(); /* initialize server/shared data */
320 #endif
321 vfsinit();
322
323 /* Configure the system hardware. This will enable interrupts. */
324 configure();
325
326 ubc_init(); /* must be after autoconfig */
327
328 /* Lock the kernel on behalf of proc0. */
329 KERNEL_PROC_LOCK(p);
330
331 #ifdef SYSVSHM
332 /* Initialize System V style shared memory. */
333 shminit();
334 #endif
335
336 #ifdef SYSVSEM
337 /* Initialize System V style semaphores. */
338 seminit();
339 #endif
340
341 #ifdef SYSVMSG
342 /* Initialize System V style message queues. */
343 msginit();
344 #endif
345
346 /* Attach pseudo-devices. */
347 for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
348 (*pdev->pdev_attach)(pdev->pdev_count);
349
350 /*
351 * Initialize protocols. Block reception of incoming packets
352 * until everything is ready.
353 */
354 s = splimp();
355 ifinit();
356 domaininit();
357 splx(s);
358
359 #ifdef GPROF
360 /* Initialize kernel profiling. */
361 kmstartup();
362 #endif
363
364 /* Initialize system accouting. */
365 acct_init();
366
367 /*
368 * Initialize signal-related data structures, and signal state
369 * for proc0.
370 */
371 signal_init();
372 p->p_sigacts = &sigacts0;
373 siginit(p);
374
375 /* Kick off timeout driven events by calling first time. */
376 schedcpu(NULL);
377
378 /*
379 * Create process 1 (init(8)). We do this now, as Unix has
380 * historically had init be process 1, and changing this would
381 * probably upset a lot of people.
382 *
383 * Note that process 1 won't immediately exec init(8), but will
384 * wait for us to inform it that the root file system has been
385 * mounted.
386 */
387 if (fork1(p, 0, SIGCHLD, NULL, 0, start_init, NULL, NULL, &initproc))
388 panic("fork init");
389
390 /*
391 * Create any kernel threads who's creation was deferred because
392 * initproc had not yet been created.
393 */
394 kthread_run_deferred_queue();
395
396 /*
397 * Now that device driver threads have been created, wait for
398 * them to finish any deferred autoconfiguration. Note we don't
399 * need to lock this semaphore, since we haven't booted any
400 * secondary processors, yet.
401 */
402 while (config_pending)
403 (void) tsleep((void *)&config_pending, PWAIT, "cfpend", 0);
404
405 /*
406 * Now that autoconfiguration has completed, we can determine
407 * the root and dump devices.
408 */
409 cpu_rootconf();
410 cpu_dumpconf();
411
412 /* Mount the root file system. */
413 do {
414 domountroothook();
415 if ((error = vfs_mountroot())) {
416 printf("cannot mount root, error = %d\n", error);
417 boothowto |= RB_ASKNAME;
418 setroot(root_device,
419 (rootdev != NODEV) ? DISKPART(rootdev) : 0);
420 }
421 } while (error != 0);
422 mountroothook_destroy();
423
424 mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
425 mountlist.cqh_first->mnt_op->vfs_refcount++;
426
427 /*
428 * Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to
429 * reference it.
430 */
431 if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
432 panic("cannot find root vnode");
433 cwdi0.cwdi_cdir = rootvnode;
434 VREF(cwdi0.cwdi_cdir);
435 VOP_UNLOCK(rootvnode, 0);
436 cwdi0.cwdi_rdir = NULL;
437
438 /*
439 * Now that root is mounted, we can fixup initproc's CWD
440 * info. All other processes are kthreads, which merely
441 * share proc0's CWD info.
442 */
443 initproc->p_cwdi->cwdi_cdir = rootvnode;
444 VREF(initproc->p_cwdi->cwdi_cdir);
445 initproc->p_cwdi->cwdi_rdir = NULL;
446
447 /*
448 * Now can look at time, having had a chance to verify the time
449 * from the file system. Reset p->p_rtime as it may have been
450 * munched in mi_switch() after the time got set.
451 */
452 proclist_lock_read();
453 s = splsched();
454 for (p = LIST_FIRST(&allproc); p != NULL;
455 p = LIST_NEXT(p, p_list)) {
456 p->p_stats->p_start = mono_time = boottime = time;
457 if (p->p_cpu != NULL)
458 p->p_cpu->ci_schedstate.spc_runtime = time;
459 p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
460 }
461 splx(s);
462 proclist_unlock_read();
463
464 /* Create the pageout daemon kernel thread. */
465 uvm_swap_init();
466 if (kthread_create1(uvm_pageout, NULL, NULL, "pagedaemon"))
467 panic("fork pagedaemon");
468
469 /* Create the process reaper kernel thread. */
470 if (kthread_create1(reaper, NULL, NULL, "reaper"))
471 panic("fork reaper");
472
473 /* Create the filesystem syncer kernel thread. */
474 if (kthread_create1(sched_sync, NULL, NULL, "ioflush"))
475 panic("fork syncer");
476
477 /* Create the aiodone daemon kernel thread. */
478 if (kthread_create1(uvm_aiodone_daemon, NULL, NULL, "aiodoned"))
479 panic("fork aiodoned");
480
481 #if defined(MULTIPROCESSOR)
482 /* Boot the secondary processors. */
483 cpu_boot_secondary_processors();
484 #endif
485
486 /*
487 * Okay, now we can let init(8) exec! It's off to userland!
488 */
489 start_init_exec = 1;
490 wakeup((void *)&start_init_exec);
491
492 #ifdef NVNODE_IMPLICIT
493 /*
494 * If maximum number of vnodes in namei vnode cache is not explicitly
495 * defined in kernel config, adjust the number such as we use roughly
496 * 0.5% of memory for vnode cache (but not less than NVNODE vnodes).
497 */
498 usevnodes = (ptoa(physmem) / 200) / sizeof(struct vnode);
499 if (usevnodes > desiredvnodes)
500 desiredvnodes = usevnodes;
501 #endif
502
503 /* The scheduler is an infinite loop. */
504 uvm_scheduler();
505 /* NOTREACHED */
506 }
507
508 static void
509 check_console(struct proc *p)
510 {
511 struct nameidata nd;
512 int error;
513
514 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
515 error = namei(&nd);
516 if (error == 0)
517 vrele(nd.ni_vp);
518 else if (error == ENOENT)
519 printf("warning: no /dev/console\n");
520 else
521 printf("warning: lookup /dev/console: error %d\n", error);
522 }
523
524 /*
525 * List of paths to try when searching for "init".
526 */
527 static const char *initpaths[] = {
528 "/sbin/init",
529 "/sbin/oinit",
530 "/sbin/init.bak",
531 NULL,
532 };
533
534 /*
535 * Start the initial user process; try exec'ing each pathname in "initpaths".
536 * The program is invoked with one argument containing the boot flags.
537 */
538 static void
539 start_init(void *arg)
540 {
541 struct proc *p = arg;
542 vaddr_t addr;
543 struct sys_execve_args /* {
544 syscallarg(const char *) path;
545 syscallarg(char * const *) argp;
546 syscallarg(char * const *) envp;
547 } */ args;
548 int options, i, error;
549 register_t retval[2];
550 char flags[4], *flagsp;
551 const char **pathp, *path, *slash;
552 char *ucp, **uap, *arg0, *arg1 = NULL;
553
554 /*
555 * Now in process 1.
556 */
557 strncpy(p->p_comm, "init", MAXCOMLEN);
558
559 /*
560 * Wait for main() to tell us that it's safe to exec.
561 */
562 while (start_init_exec == 0)
563 (void) tsleep((void *)&start_init_exec, PWAIT, "initexec", 0);
564
565 /*
566 * This is not the right way to do this. We really should
567 * hand-craft a descriptor onto /dev/console to hand to init,
568 * but that's a _lot_ more work, and the benefit from this easy
569 * hack makes up for the "good is the enemy of the best" effect.
570 */
571 check_console(p);
572
573 /*
574 * Need just enough stack to hold the faked-up "execve()" arguments.
575 */
576 addr = USRSTACK - PAGE_SIZE;
577 if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
578 NULL, UVM_UNKNOWN_OFFSET, 0,
579 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
580 UVM_ADV_NORMAL,
581 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW))
582 != KERN_SUCCESS)
583 panic("init: couldn't allocate argument space");
584 p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
585
586 for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
587 ucp = (char *)(addr + PAGE_SIZE);
588
589 /*
590 * Construct the boot flag argument.
591 */
592 flagsp = flags;
593 *flagsp++ = '-';
594 options = 0;
595
596 if (boothowto & RB_SINGLE) {
597 *flagsp++ = 's';
598 options = 1;
599 }
600 #ifdef notyet
601 if (boothowto & RB_FASTBOOT) {
602 *flagsp++ = 'f';
603 options = 1;
604 }
605 #endif
606
607 /*
608 * Move out the flags (arg 1), if necessary.
609 */
610 if (options != 0) {
611 *flagsp++ = '\0';
612 i = flagsp - flags;
613 #ifdef DEBUG
614 printf("init: copying out flags `%s' %d\n", flags, i);
615 #endif
616 (void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
617 arg1 = ucp;
618 }
619
620 /*
621 * Move out the file name (also arg 0).
622 */
623 i = strlen(path) + 1;
624 #ifdef DEBUG
625 printf("init: copying out path `%s' %d\n", path, i);
626 #endif
627 (void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
628 arg0 = ucp;
629
630 /*
631 * Move out the arg pointers.
632 */
633 uap = (char **)((long)ucp & ~ALIGNBYTES);
634 (void)suword((caddr_t)--uap, 0); /* terminator */
635 if (options != 0)
636 (void)suword((caddr_t)--uap, (long)arg1);
637 slash = strrchr(path, '/');
638 if (slash)
639 (void)suword((caddr_t)--uap,
640 (long)arg0 + (slash + 1 - path));
641 else
642 (void)suword((caddr_t)--uap, (long)arg0);
643
644 /*
645 * Point at the arguments.
646 */
647 SCARG(&args, path) = arg0;
648 SCARG(&args, argp) = uap;
649 SCARG(&args, envp) = NULL;
650
651 /*
652 * Now try to exec the program. If can't for any reason
653 * other than it doesn't exist, complain.
654 */
655 error = sys_execve(p, &args, retval);
656 if (error == 0 || error == EJUSTRETURN) {
657 KERNEL_PROC_UNLOCK(p);
658 return;
659 }
660 if (error != ENOENT)
661 printf("exec %s: error %d\n", path, error);
662 }
663 printf("init: not found\n");
664 panic("no init");
665 }
666