init_main.c revision 1.163 1 /* $NetBSD: init_main.c,v 1.163 2000/01/24 18:03:19 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1995 Christopher G. Demetriou. All rights reserved.
5 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgement:
23 * This product includes software developed by the University of
24 * California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
42 */
43
44 #include "fs_nfs.h"
45 #include "opt_nfsserver.h"
46 #include "opt_sysv.h"
47 #include "opt_maxuprc.h"
48 #include "opt_multiprocessor.h"
49
50 #include "rnd.h"
51
52 #include <sys/param.h>
53 #include <sys/filedesc.h>
54 #include <sys/file.h>
55 #include <sys/errno.h>
56 #include <sys/exec.h>
57 #include <sys/callout.h>
58 #include <sys/kernel.h>
59 #include <sys/mount.h>
60 #include <sys/map.h>
61 #include <sys/proc.h>
62 #include <sys/kthread.h>
63 #include <sys/resourcevar.h>
64 #include <sys/signalvar.h>
65 #include <sys/systm.h>
66 #include <sys/vnode.h>
67 #include <sys/tty.h>
68 #include <sys/conf.h>
69 #include <sys/disklabel.h>
70 #include <sys/buf.h>
71 #include <sys/device.h>
72 #include <sys/socketvar.h>
73 #include <sys/protosw.h>
74 #include <sys/reboot.h>
75 #include <sys/user.h>
76 #ifdef SYSVSHM
77 #include <sys/shm.h>
78 #endif
79 #ifdef SYSVSEM
80 #include <sys/sem.h>
81 #endif
82 #ifdef SYSVMSG
83 #include <sys/msg.h>
84 #endif
85 #include <sys/domain.h>
86 #include <sys/mbuf.h>
87 #include <sys/namei.h>
88 #if NRND > 0
89 #include <sys/rnd.h>
90 #endif
91
92 #include <sys/syscall.h>
93 #include <sys/syscallargs.h>
94
95 #include <ufs/ufs/quota.h>
96
97 #include <miscfs/genfs/genfs.h>
98 #include <miscfs/syncfs/syncfs.h>
99
100 #include <machine/cpu.h>
101
102 #include <vm/vm.h>
103 #include <vm/vm_pageout.h>
104
105 #include <uvm/uvm.h>
106
107 #include <net/if.h>
108 #include <net/raw_cb.h>
109
110 char copyright[] = "\
111 Copyright (c) 1996, 1997, 1998, 1999, 2000
112 The NetBSD Foundation, Inc. All rights reserved.
113 Copyright (c) 1982, 1986, 1989, 1991, 1993
114 The Regents of the University of California. All rights reserved.
115
116 ";
117
118 /* Components of the first process -- never freed. */
119 struct session session0;
120 struct pgrp pgrp0;
121 struct proc proc0;
122 struct pcred cred0;
123 struct filedesc0 filedesc0;
124 struct cwdinfo cwdi0;
125 struct plimit limit0;
126 struct vmspace vmspace0;
127 struct sigacts sigacts0;
128 #ifndef curproc
129 struct proc *curproc = &proc0;
130 #endif
131 struct proc *initproc;
132
133 int cmask = CMASK;
134 extern struct user *proc0paddr;
135
136 struct vnode *rootvp, *swapdev_vp;
137 int boothowto;
138 int cold = 1; /* still working on startup */
139 struct timeval boottime;
140 struct timeval runtime;
141
142 __volatile int start_init_exec; /* semaphore for start_init() */
143
144 static void check_console __P((struct proc *p));
145 static void start_init __P((void *));
146 static void start_pagedaemon __P((void *));
147 static void start_reaper __P((void *));
148 void main __P((void));
149
150 extern char sigcode[], esigcode[];
151 #ifdef SYSCALL_DEBUG
152 extern char *syscallnames[];
153 #endif
154
155 struct emul emul_netbsd = {
156 "netbsd",
157 NULL,
158 sendsig,
159 SYS_syscall,
160 SYS_MAXSYSCALL,
161 sysent,
162 #ifdef SYSCALL_DEBUG
163 syscallnames,
164 #else
165 NULL,
166 #endif
167 0,
168 copyargs,
169 setregs,
170 sigcode,
171 esigcode,
172 };
173
174 /*
175 * System startup; initialize the world, create process 0, mount root
176 * filesystem, and fork to create init and pagedaemon. Most of the
177 * hard work is done in the lower-level initialization routines including
178 * startup(), which does memory initialization and autoconfiguration.
179 */
180 void
181 main()
182 {
183 struct proc *p;
184 struct pdevinit *pdev;
185 int i, s, error;
186 extern struct pdevinit pdevinit[];
187 extern void roundrobin __P((void *));
188 extern void schedcpu __P((void *));
189 extern void disk_init __P((void));
190 #if defined(NFSSERVER) || defined(NFS)
191 extern void nfs_init __P((void));
192 #endif
193
194 /*
195 * Initialize the current process pointer (curproc) before
196 * any possible traps/probes to simplify trap processing.
197 */
198 p = &proc0;
199 curproc = p;
200 /*
201 * Attempt to find console and initialize
202 * in case of early panic or other messages.
203 */
204 consinit();
205 printf("%s", copyright);
206
207 uvm_init();
208
209 /* Do machine-dependent initialization. */
210 cpu_startup();
211
212 /* Initialize callouts. */
213 callout_startup();
214
215 /*
216 * Initialize mbuf's. Do this now because we might attempt to
217 * allocate mbufs or mbuf clusters during autoconfiguration.
218 */
219 mbinit();
220
221 /* Initialize sockets. */
222 soinit();
223
224 /*
225 * The following 3 things must be done before autoconfiguration.
226 */
227 disk_init(); /* initialize disk list */
228 tty_init(); /* initialize tty list */
229 #if NRND > 0
230 rnd_init(); /* initialize RNG */
231 #endif
232
233 /*
234 * Initialize process and pgrp structures.
235 */
236 procinit();
237
238 /*
239 * Create process 0 (the swapper).
240 */
241 s = proclist_lock_write();
242 LIST_INSERT_HEAD(&allproc, p, p_list);
243 proclist_unlock_write(s);
244
245 p->p_pgrp = &pgrp0;
246 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
247 LIST_INIT(&pgrp0.pg_members);
248 LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
249
250 pgrp0.pg_session = &session0;
251 session0.s_count = 1;
252 session0.s_sid = p->p_pid;
253 session0.s_leader = p;
254
255 /*
256 * Set P_NOCLDWAIT so that kernel threads are reparented to
257 * init(8) when they exit. init(8) can easily wait them out
258 * for us.
259 */
260 p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
261 p->p_stat = SRUN;
262 p->p_nice = NZERO;
263 p->p_emul = &emul_netbsd;
264 strncpy(p->p_comm, "swapper", MAXCOMLEN);
265
266 /* Create credentials. */
267 cred0.p_refcnt = 1;
268 p->p_cred = &cred0;
269 p->p_ucred = crget();
270 p->p_ucred->cr_ngroups = 1; /* group 0 */
271
272 /* Create the file descriptor table. */
273 finit();
274 p->p_fd = &filedesc0.fd_fd;
275 fdinit1(&filedesc0);
276
277 /* Create the CWD info. */
278 p->p_cwdi = &cwdi0;
279 cwdi0.cwdi_cmask = cmask;
280 cwdi0.cwdi_refcnt = 1;
281
282 /* Create the limits structures. */
283 p->p_limit = &limit0;
284 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
285 limit0.pl_rlimit[i].rlim_cur =
286 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
287
288 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
289 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
290 maxfiles < NOFILE ? maxfiles : NOFILE;
291
292 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
293 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
294 maxproc < MAXUPRC ? maxproc : MAXUPRC;
295
296 i = ptoa(uvmexp.free);
297 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
298 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
299 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
300 limit0.pl_corename = defcorename;
301 limit0.p_refcnt = 1;
302
303 /*
304 * Initialize proc0's vmspace, which uses the kernel pmap.
305 * All kernel processes (which never have user space mappings)
306 * share proc0's vmspace, and thus, the kernel pmap.
307 */
308 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
309 trunc_page(VM_MAX_ADDRESS), TRUE);
310 p->p_vmspace = &vmspace0;
311
312 p->p_addr = proc0paddr; /* XXX */
313
314 /*
315 * We continue to place resource usage info in the
316 * user struct so they're pageable.
317 */
318 p->p_stats = &p->p_addr->u_stats;
319
320 /*
321 * Charge root for one process.
322 */
323 (void)chgproccnt(0, 1);
324
325 rqinit();
326
327 /* Configure virtual memory system, set vm rlimits. */
328 uvm_init_limits(p);
329
330 /* Initialize the file systems. */
331 #if defined(NFSSERVER) || defined(NFS)
332 nfs_init(); /* initialize server/shared data */
333 #endif
334 vfsinit();
335
336 /* Configure the system hardware. This will enable interrupts. */
337 configure();
338
339 #ifdef SYSVSHM
340 /* Initialize System V style shared memory. */
341 shminit();
342 #endif
343
344 #ifdef SYSVSEM
345 /* Initialize System V style semaphores. */
346 seminit();
347 #endif
348
349 #ifdef SYSVMSG
350 /* Initialize System V style message queues. */
351 msginit();
352 #endif
353
354 /* Attach pseudo-devices. */
355 for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
356 (*pdev->pdev_attach)(pdev->pdev_count);
357
358 /*
359 * Initialize protocols. Block reception of incoming packets
360 * until everything is ready.
361 */
362 s = splimp();
363 ifinit();
364 domaininit();
365 splx(s);
366
367 #ifdef GPROF
368 /* Initialize kernel profiling. */
369 kmstartup();
370 #endif
371
372 /*
373 * Initialize signal-related data structures, and signal state
374 * for proc0.
375 */
376 signal_init();
377 p->p_sigacts = &sigacts0;
378 siginit(p);
379
380 /* Kick off timeout driven events by calling first time. */
381 roundrobin(NULL);
382 schedcpu(NULL);
383
384 /*
385 * Create process 1 (init(8)). We do this now, as Unix has
386 * historically had init be process 1, and changing this would
387 * probably upset a lot of people.
388 *
389 * Note that process 1 won't immediately exec init(8), but will
390 * wait for us to inform it that the root file system has been
391 * mounted.
392 */
393 if (fork1(p, 0, SIGCHLD, NULL, 0, NULL, &initproc))
394 panic("fork init");
395 cpu_set_kpc(initproc, start_init, initproc);
396
397 /*
398 * Create any kernel threads who's creation was deferred because
399 * initproc had not yet been created.
400 */
401 kthread_run_deferred_queue();
402
403 /*
404 * Now that device driver threads have been created, wait for
405 * them to finish any deferred autoconfiguration. Note we don't
406 * need to lock this semaphore, since we haven't booted any
407 * secondary processors, yet.
408 */
409 while (config_pending)
410 (void) tsleep((void *)&config_pending, PWAIT, "cfpend", 0);
411
412 /*
413 * Now that autoconfiguration has completed, we can determine
414 * the root and dump devices.
415 */
416 cpu_rootconf();
417 cpu_dumpconf();
418
419 /* Mount the root file system. */
420 do {
421 domountroothook();
422 if ((error = vfs_mountroot())) {
423 printf("cannot mount root, error = %d\n", error);
424 boothowto |= RB_ASKNAME;
425 setroot(root_device,
426 (rootdev != NODEV) ? DISKPART(rootdev) : 0);
427 }
428 } while (error != 0);
429 mountroothook_destroy();
430
431 mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
432 mountlist.cqh_first->mnt_op->vfs_refcount++;
433
434 /*
435 * Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to
436 * reference it.
437 */
438 if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
439 panic("cannot find root vnode");
440 cwdi0.cwdi_cdir = rootvnode;
441 VREF(cwdi0.cwdi_cdir);
442 VOP_UNLOCK(rootvnode, 0);
443 cwdi0.cwdi_rdir = NULL;
444
445 /*
446 * Now that root is mounted, we can fixup initproc's CWD
447 * info. All other processes are kthreads, which merely
448 * share proc0's CWD info.
449 */
450 initproc->p_cwdi->cwdi_cdir = rootvnode;
451 VREF(initproc->p_cwdi->cwdi_cdir);
452 initproc->p_cwdi->cwdi_rdir = NULL;
453
454 /*
455 * Now can look at time, having had a chance to verify the time
456 * from the file system. Reset p->p_rtime as it may have been
457 * munched in mi_switch() after the time got set.
458 */
459 proclist_lock_read();
460 s = splclock(); /* so we can read time */
461 for (p = LIST_FIRST(&allproc); p != NULL;
462 p = LIST_NEXT(p, p_list)) {
463 p->p_stats->p_start = runtime = mono_time = boottime = time;
464 p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
465 }
466 splx(s);
467 proclist_unlock_read();
468
469 /* Create the pageout daemon kernel thread. */
470 uvm_swap_init();
471 if (kthread_create1(start_pagedaemon, NULL, NULL, "pagedaemon"))
472 panic("fork pagedaemon");
473
474 /* Create the process reaper kernel thread. */
475 if (kthread_create1(start_reaper, NULL, NULL, "reaper"))
476 panic("fork reaper");
477
478 /* Create the filesystem syncer kernel thread. */
479 if (kthread_create1(sched_sync, NULL, NULL, "ioflush"))
480 panic("fork syncer");
481
482 #if defined(MULTIPROCESSOR)
483 /* Boot the secondary processors. */
484 cpu_boot_secondary_processors();
485 #endif
486
487 /*
488 * Okay, now we can let init(8) exec! It's off to userland!
489 */
490 start_init_exec = 1;
491 wakeup((void *)&start_init_exec);
492
493 /* The scheduler is an infinite loop. */
494 uvm_scheduler();
495 /* NOTREACHED */
496 }
497
498 static void
499 check_console(p)
500 struct proc *p;
501 {
502 struct nameidata nd;
503 int error;
504
505 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
506 error = namei(&nd);
507 if (error == 0)
508 vrele(nd.ni_vp);
509 else if (error == ENOENT)
510 printf("warning: no /dev/console\n");
511 else
512 printf("warning: lookup /dev/console: error %d\n", error);
513 }
514
515 /*
516 * List of paths to try when searching for "init".
517 */
518 static char *initpaths[] = {
519 "/sbin/init",
520 "/sbin/oinit",
521 "/sbin/init.bak",
522 NULL,
523 };
524
525 /*
526 * Start the initial user process; try exec'ing each pathname in "initpaths".
527 * The program is invoked with one argument containing the boot flags.
528 */
529 static void
530 start_init(arg)
531 void *arg;
532 {
533 struct proc *p = arg;
534 vaddr_t addr;
535 struct sys_execve_args /* {
536 syscallarg(const char *) path;
537 syscallarg(char * const *) argp;
538 syscallarg(char * const *) envp;
539 } */ args;
540 int options, i, error;
541 register_t retval[2];
542 char flags[4], *flagsp;
543 char **pathp, *path, *slash, *ucp, **uap, *arg0, *arg1 = NULL;
544
545 /*
546 * Now in process 1.
547 */
548 strncpy(p->p_comm, "init", MAXCOMLEN);
549
550 /*
551 * Wait for main() to tell us that it's safe to exec.
552 */
553 while (start_init_exec == 0)
554 (void) tsleep((void *)&start_init_exec, PWAIT, "initexec", 0);
555
556 /*
557 * This is not the right way to do this. We really should
558 * hand-craft a descriptor onto /dev/console to hand to init,
559 * but that's a _lot_ more work, and the benefit from this easy
560 * hack makes up for the "good is the enemy of the best" effect.
561 */
562 check_console(p);
563
564 /*
565 * Need just enough stack to hold the faked-up "execve()" arguments.
566 */
567 addr = USRSTACK - PAGE_SIZE;
568 if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
569 NULL, UVM_UNKNOWN_OFFSET,
570 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
571 UVM_ADV_NORMAL,
572 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW))
573 != KERN_SUCCESS)
574 panic("init: couldn't allocate argument space");
575 p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
576
577 for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
578 ucp = (char *)(addr + PAGE_SIZE);
579
580 /*
581 * Construct the boot flag argument.
582 */
583 flagsp = flags;
584 *flagsp++ = '-';
585 options = 0;
586
587 if (boothowto & RB_SINGLE) {
588 *flagsp++ = 's';
589 options = 1;
590 }
591 #ifdef notyet
592 if (boothowto & RB_FASTBOOT) {
593 *flagsp++ = 'f';
594 options = 1;
595 }
596 #endif
597
598 /*
599 * Move out the flags (arg 1), if necessary.
600 */
601 if (options != 0) {
602 *flagsp++ = '\0';
603 i = flagsp - flags;
604 #ifdef DEBUG
605 printf("init: copying out flags `%s' %d\n", flags, i);
606 #endif
607 (void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
608 arg1 = ucp;
609 }
610
611 /*
612 * Move out the file name (also arg 0).
613 */
614 i = strlen(path) + 1;
615 #ifdef DEBUG
616 printf("init: copying out path `%s' %d\n", path, i);
617 #endif
618 (void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
619 arg0 = ucp;
620
621 /*
622 * Move out the arg pointers.
623 */
624 uap = (char **)((long)ucp & ~ALIGNBYTES);
625 (void)suword((caddr_t)--uap, 0); /* terminator */
626 if (options != 0)
627 (void)suword((caddr_t)--uap, (long)arg1);
628 slash = strrchr(path, '/');
629 if (slash)
630 (void)suword((caddr_t)--uap,
631 (long)arg0 + (slash + 1 - path));
632 else
633 (void)suword((caddr_t)--uap, (long)arg0);
634
635 /*
636 * Point at the arguments.
637 */
638 SCARG(&args, path) = arg0;
639 SCARG(&args, argp) = uap;
640 SCARG(&args, envp) = NULL;
641
642 /*
643 * Now try to exec the program. If can't for any reason
644 * other than it doesn't exist, complain.
645 */
646 error = sys_execve(p, &args, retval);
647 if (error == 0 || error == EJUSTRETURN)
648 return;
649 if (error != ENOENT)
650 printf("exec %s: error %d\n", path, error);
651 }
652 printf("init: not found\n");
653 panic("no init");
654 }
655
656 /* ARGSUSED */
657 static void
658 start_pagedaemon(arg)
659 void *arg;
660 {
661
662 uvm_pageout();
663 /* NOTREACHED */
664 }
665
666 /* ARGSUSED */
667 static void
668 start_reaper(arg)
669 void *arg;
670 {
671
672 reaper();
673 /* NOTREACHED */
674 }
675