init_main.c revision 1.178 1 /* $NetBSD: init_main.c,v 1.178 2000/08/21 02:11:56 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1995 Christopher G. Demetriou. All rights reserved.
5 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgement:
23 * This product includes software developed by the University of
24 * California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
42 */
43
44 #include "fs_nfs.h"
45 #include "opt_nfsserver.h"
46 #include "opt_sysv.h"
47 #include "opt_maxuprc.h"
48 #include "opt_multiprocessor.h"
49 #include "opt_syscall_debug.h"
50
51 #include "rnd.h"
52
53 #include <sys/param.h>
54 #include <sys/acct.h>
55 #include <sys/filedesc.h>
56 #include <sys/file.h>
57 #include <sys/errno.h>
58 #include <sys/exec.h>
59 #include <sys/callout.h>
60 #include <sys/kernel.h>
61 #include <sys/mount.h>
62 #include <sys/map.h>
63 #include <sys/proc.h>
64 #include <sys/kthread.h>
65 #include <sys/resourcevar.h>
66 #include <sys/signalvar.h>
67 #include <sys/systm.h>
68 #include <sys/vnode.h>
69 #include <sys/tty.h>
70 #include <sys/conf.h>
71 #include <sys/disklabel.h>
72 #include <sys/buf.h>
73 #include <sys/device.h>
74 #include <sys/socketvar.h>
75 #include <sys/protosw.h>
76 #include <sys/reboot.h>
77 #include <sys/user.h>
78 #include <sys/sysctl.h>
79 #ifdef SYSVSHM
80 #include <sys/shm.h>
81 #endif
82 #ifdef SYSVSEM
83 #include <sys/sem.h>
84 #endif
85 #ifdef SYSVMSG
86 #include <sys/msg.h>
87 #endif
88 #include <sys/domain.h>
89 #include <sys/mbuf.h>
90 #include <sys/namei.h>
91 #if NRND > 0
92 #include <sys/rnd.h>
93 #endif
94
95 #include <sys/syscall.h>
96 #include <sys/syscallargs.h>
97
98 #include <ufs/ufs/quota.h>
99
100 #include <miscfs/genfs/genfs.h>
101 #include <miscfs/syncfs/syncfs.h>
102
103 #include <machine/cpu.h>
104
105 #include <uvm/uvm.h>
106
107 #include <net/if.h>
108 #include <net/raw_cb.h>
109
110 const char copyright[] = "\
111 Copyright (c) 1996, 1997, 1998, 1999, 2000
112 The NetBSD Foundation, Inc. All rights reserved.
113 Copyright (c) 1982, 1986, 1989, 1991, 1993
114 The Regents of the University of California. All rights reserved.
115
116 ";
117
118 /* Components of the first process -- never freed. */
119 struct session session0;
120 struct pgrp pgrp0;
121 struct proc proc0;
122 struct pcred cred0;
123 struct filedesc0 filedesc0;
124 struct cwdinfo cwdi0;
125 struct plimit limit0;
126 struct vmspace vmspace0;
127 struct sigacts sigacts0;
128 #ifndef curproc
129 struct proc *curproc = &proc0;
130 #endif
131 struct proc *initproc;
132
133 int cmask = CMASK;
134 extern struct user *proc0paddr;
135
136 struct vnode *rootvp, *swapdev_vp;
137 int boothowto;
138 int cold = 1; /* still working on startup */
139 struct timeval boottime;
140
141 __volatile int start_init_exec; /* semaphore for start_init() */
142
143 static void check_console(struct proc *p);
144 static void start_init(void *);
145 void main(void);
146
147 extern char sigcode[], esigcode[];
148 #ifdef SYSCALL_DEBUG
149 extern char *syscallnames[];
150 #endif
151
152 struct emul emul_netbsd = {
153 "netbsd",
154 NULL,
155 sendsig,
156 SYS_syscall,
157 SYS_MAXSYSCALL,
158 sysent,
159 #ifdef SYSCALL_DEBUG
160 syscallnames,
161 #else
162 NULL,
163 #endif
164 0,
165 copyargs,
166 setregs,
167 sigcode,
168 esigcode,
169 };
170
171 /*
172 * System startup; initialize the world, create process 0, mount root
173 * filesystem, and fork to create init and pagedaemon. Most of the
174 * hard work is done in the lower-level initialization routines including
175 * startup(), which does memory initialization and autoconfiguration.
176 */
177 void
178 main(void)
179 {
180 struct proc *p;
181 struct pdevinit *pdev;
182 int i, s, error;
183 extern struct pdevinit pdevinit[];
184 extern void roundrobin(void *);
185 extern void schedcpu(void *);
186 extern void disk_init(void);
187 #if defined(NFSSERVER) || defined(NFS)
188 extern void nfs_init(void);
189 #endif
190 #ifdef NVNODE_IMPLICIT
191 int usevnodes;
192 #endif
193
194 /*
195 * Initialize the current process pointer (curproc) before
196 * any possible traps/probes to simplify trap processing.
197 */
198 p = &proc0;
199 curproc = p;
200 p->p_cpu = curcpu();
201 /*
202 * Attempt to find console and initialize
203 * in case of early panic or other messages.
204 */
205 consinit();
206 printf("%s", copyright);
207
208 uvm_init();
209
210 /* Do machine-dependent initialization. */
211 cpu_startup();
212
213 /* Initialize callouts. */
214 callout_startup();
215
216 /*
217 * Initialize mbuf's. Do this now because we might attempt to
218 * allocate mbufs or mbuf clusters during autoconfiguration.
219 */
220 mbinit();
221
222 /* Initialize sockets. */
223 soinit();
224
225 /*
226 * The following 3 things must be done before autoconfiguration.
227 */
228 disk_init(); /* initialize disk list */
229 tty_init(); /* initialize tty list */
230 #if NRND > 0
231 rnd_init(); /* initialize RNG */
232 #endif
233
234 /* Initialize the sysctl subsystem. */
235 sysctl_init();
236
237 /*
238 * Initialize process and pgrp structures.
239 */
240 procinit();
241
242 /*
243 * Create process 0 (the swapper).
244 */
245 s = proclist_lock_write();
246 LIST_INSERT_HEAD(&allproc, p, p_list);
247 LIST_INSERT_HEAD(PIDHASH(p->p_pid), p, p_hash);
248 proclist_unlock_write(s);
249
250 p->p_pgrp = &pgrp0;
251 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
252 LIST_INIT(&pgrp0.pg_members);
253 LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
254
255 pgrp0.pg_session = &session0;
256 session0.s_count = 1;
257 session0.s_sid = p->p_pid;
258 session0.s_leader = p;
259
260 /*
261 * Set P_NOCLDWAIT so that kernel threads are reparented to
262 * init(8) when they exit. init(8) can easily wait them out
263 * for us.
264 */
265 p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
266 p->p_stat = SONPROC;
267 p->p_nice = NZERO;
268 p->p_emul = &emul_netbsd;
269 strncpy(p->p_comm, "swapper", MAXCOMLEN);
270
271 callout_init(&p->p_realit_ch);
272 callout_init(&p->p_tsleep_ch);
273
274 /* Create credentials. */
275 cred0.p_refcnt = 1;
276 p->p_cred = &cred0;
277 p->p_ucred = crget();
278 p->p_ucred->cr_ngroups = 1; /* group 0 */
279
280 /* Create the file descriptor table. */
281 finit();
282 p->p_fd = &filedesc0.fd_fd;
283 fdinit1(&filedesc0);
284
285 /* Create the CWD info. */
286 p->p_cwdi = &cwdi0;
287 cwdi0.cwdi_cmask = cmask;
288 cwdi0.cwdi_refcnt = 1;
289
290 /* Create the limits structures. */
291 p->p_limit = &limit0;
292 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
293 limit0.pl_rlimit[i].rlim_cur =
294 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
295
296 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
297 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
298 maxfiles < NOFILE ? maxfiles : NOFILE;
299
300 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
301 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
302 maxproc < MAXUPRC ? maxproc : MAXUPRC;
303
304 i = ptoa(uvmexp.free);
305 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
306 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
307 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
308 limit0.pl_corename = defcorename;
309 limit0.p_refcnt = 1;
310
311 /*
312 * Initialize proc0's vmspace, which uses the kernel pmap.
313 * All kernel processes (which never have user space mappings)
314 * share proc0's vmspace, and thus, the kernel pmap.
315 */
316 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
317 trunc_page(VM_MAX_ADDRESS), TRUE);
318 p->p_vmspace = &vmspace0;
319
320 p->p_addr = proc0paddr; /* XXX */
321
322 /*
323 * We continue to place resource usage info in the
324 * user struct so they're pageable.
325 */
326 p->p_stats = &p->p_addr->u_stats;
327
328 /*
329 * Charge root for one process.
330 */
331 (void)chgproccnt(0, 1);
332
333 rqinit();
334
335 /* Configure virtual memory system, set vm rlimits. */
336 uvm_init_limits(p);
337
338 /* Initialize the file systems. */
339 #if defined(NFSSERVER) || defined(NFS)
340 nfs_init(); /* initialize server/shared data */
341 #endif
342 vfsinit();
343
344 /* Configure the system hardware. This will enable interrupts. */
345 configure();
346
347 #ifdef SYSVSHM
348 /* Initialize System V style shared memory. */
349 shminit();
350 #endif
351
352 #ifdef SYSVSEM
353 /* Initialize System V style semaphores. */
354 seminit();
355 #endif
356
357 #ifdef SYSVMSG
358 /* Initialize System V style message queues. */
359 msginit();
360 #endif
361
362 /* Attach pseudo-devices. */
363 for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
364 (*pdev->pdev_attach)(pdev->pdev_count);
365
366 /*
367 * Initialize protocols. Block reception of incoming packets
368 * until everything is ready.
369 */
370 s = splimp();
371 ifinit();
372 domaininit();
373 splx(s);
374
375 #ifdef GPROF
376 /* Initialize kernel profiling. */
377 kmstartup();
378 #endif
379
380 /* Initialize system accouting. */
381 acct_init();
382
383 /*
384 * Initialize signal-related data structures, and signal state
385 * for proc0.
386 */
387 signal_init();
388 p->p_sigacts = &sigacts0;
389 siginit(p);
390
391 /* Kick off timeout driven events by calling first time. */
392 roundrobin(NULL);
393 schedcpu(NULL);
394
395 /*
396 * Create process 1 (init(8)). We do this now, as Unix has
397 * historically had init be process 1, and changing this would
398 * probably upset a lot of people.
399 *
400 * Note that process 1 won't immediately exec init(8), but will
401 * wait for us to inform it that the root file system has been
402 * mounted.
403 */
404 if (fork1(p, 0, SIGCHLD, NULL, 0, start_init, NULL, NULL, &initproc))
405 panic("fork init");
406
407 /*
408 * Create any kernel threads who's creation was deferred because
409 * initproc had not yet been created.
410 */
411 kthread_run_deferred_queue();
412
413 /*
414 * Now that device driver threads have been created, wait for
415 * them to finish any deferred autoconfiguration. Note we don't
416 * need to lock this semaphore, since we haven't booted any
417 * secondary processors, yet.
418 */
419 while (config_pending)
420 (void) tsleep((void *)&config_pending, PWAIT, "cfpend", 0);
421
422 /*
423 * Now that autoconfiguration has completed, we can determine
424 * the root and dump devices.
425 */
426 cpu_rootconf();
427 cpu_dumpconf();
428
429 /* Mount the root file system. */
430 do {
431 domountroothook();
432 if ((error = vfs_mountroot())) {
433 printf("cannot mount root, error = %d\n", error);
434 boothowto |= RB_ASKNAME;
435 setroot(root_device,
436 (rootdev != NODEV) ? DISKPART(rootdev) : 0);
437 }
438 } while (error != 0);
439 mountroothook_destroy();
440
441 mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
442 mountlist.cqh_first->mnt_op->vfs_refcount++;
443
444 /*
445 * Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to
446 * reference it.
447 */
448 if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
449 panic("cannot find root vnode");
450 cwdi0.cwdi_cdir = rootvnode;
451 VREF(cwdi0.cwdi_cdir);
452 VOP_UNLOCK(rootvnode, 0);
453 cwdi0.cwdi_rdir = NULL;
454
455 /*
456 * Now that root is mounted, we can fixup initproc's CWD
457 * info. All other processes are kthreads, which merely
458 * share proc0's CWD info.
459 */
460 initproc->p_cwdi->cwdi_cdir = rootvnode;
461 VREF(initproc->p_cwdi->cwdi_cdir);
462 initproc->p_cwdi->cwdi_rdir = NULL;
463
464 /*
465 * Now can look at time, having had a chance to verify the time
466 * from the file system. Reset p->p_rtime as it may have been
467 * munched in mi_switch() after the time got set.
468 */
469 proclist_lock_read();
470 s = splsched();
471 for (p = LIST_FIRST(&allproc); p != NULL;
472 p = LIST_NEXT(p, p_list)) {
473 p->p_stats->p_start = mono_time = boottime = time;
474 if (p->p_cpu != NULL)
475 p->p_cpu->ci_schedstate.spc_runtime = time;
476 p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
477 }
478 splx(s);
479 proclist_unlock_read();
480
481 /* Create the pageout daemon kernel thread. */
482 uvm_swap_init();
483 if (kthread_create1(uvm_pageout, NULL, NULL, "pagedaemon"))
484 panic("fork pagedaemon");
485
486 /* Create the process reaper kernel thread. */
487 if (kthread_create1(reaper, NULL, NULL, "reaper"))
488 panic("fork reaper");
489
490 /* Create the filesystem syncer kernel thread. */
491 if (kthread_create1(sched_sync, NULL, NULL, "ioflush"))
492 panic("fork syncer");
493
494 #if defined(MULTIPROCESSOR)
495 /* Boot the secondary processors. */
496 cpu_boot_secondary_processors();
497 #endif
498
499 /*
500 * Okay, now we can let init(8) exec! It's off to userland!
501 */
502 start_init_exec = 1;
503 wakeup((void *)&start_init_exec);
504
505 #ifdef NVNODE_IMPLICIT
506 /*
507 * If maximum number of vnodes in namei vnode cache is not explicitly
508 * defined in kernel config, adjust the number such as we use roughly
509 * 0.5% of memory for vnode cache (but not less than NVNODE vnodes).
510 */
511 usevnodes = (ptoa(physmem) / 200) / sizeof(struct vnode);
512 if (usevnodes > desiredvnodes)
513 desiredvnodes = usevnodes;
514 #endif
515
516 /* The scheduler is an infinite loop. */
517 uvm_scheduler();
518 /* NOTREACHED */
519 }
520
521 static void
522 check_console(struct proc *p)
523 {
524 struct nameidata nd;
525 int error;
526
527 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
528 error = namei(&nd);
529 if (error == 0)
530 vrele(nd.ni_vp);
531 else if (error == ENOENT)
532 printf("warning: no /dev/console\n");
533 else
534 printf("warning: lookup /dev/console: error %d\n", error);
535 }
536
537 /*
538 * List of paths to try when searching for "init".
539 */
540 static const char *initpaths[] = {
541 "/sbin/init",
542 "/sbin/oinit",
543 "/sbin/init.bak",
544 NULL,
545 };
546
547 /*
548 * Start the initial user process; try exec'ing each pathname in "initpaths".
549 * The program is invoked with one argument containing the boot flags.
550 */
551 static void
552 start_init(void *arg)
553 {
554 struct proc *p = arg;
555 vaddr_t addr;
556 struct sys_execve_args /* {
557 syscallarg(const char *) path;
558 syscallarg(char * const *) argp;
559 syscallarg(char * const *) envp;
560 } */ args;
561 int options, i, error;
562 register_t retval[2];
563 char flags[4], *flagsp;
564 const char **pathp, *path, *slash;
565 char *ucp, **uap, *arg0, *arg1 = NULL;
566
567 /*
568 * Now in process 1.
569 */
570 strncpy(p->p_comm, "init", MAXCOMLEN);
571
572 /*
573 * Wait for main() to tell us that it's safe to exec.
574 */
575 while (start_init_exec == 0)
576 (void) tsleep((void *)&start_init_exec, PWAIT, "initexec", 0);
577
578 /*
579 * This is not the right way to do this. We really should
580 * hand-craft a descriptor onto /dev/console to hand to init,
581 * but that's a _lot_ more work, and the benefit from this easy
582 * hack makes up for the "good is the enemy of the best" effect.
583 */
584 check_console(p);
585
586 /*
587 * Need just enough stack to hold the faked-up "execve()" arguments.
588 */
589 addr = USRSTACK - PAGE_SIZE;
590 if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
591 NULL, UVM_UNKNOWN_OFFSET,
592 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
593 UVM_ADV_NORMAL,
594 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW))
595 != KERN_SUCCESS)
596 panic("init: couldn't allocate argument space");
597 p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
598
599 for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
600 ucp = (char *)(addr + PAGE_SIZE);
601
602 /*
603 * Construct the boot flag argument.
604 */
605 flagsp = flags;
606 *flagsp++ = '-';
607 options = 0;
608
609 if (boothowto & RB_SINGLE) {
610 *flagsp++ = 's';
611 options = 1;
612 }
613 #ifdef notyet
614 if (boothowto & RB_FASTBOOT) {
615 *flagsp++ = 'f';
616 options = 1;
617 }
618 #endif
619
620 /*
621 * Move out the flags (arg 1), if necessary.
622 */
623 if (options != 0) {
624 *flagsp++ = '\0';
625 i = flagsp - flags;
626 #ifdef DEBUG
627 printf("init: copying out flags `%s' %d\n", flags, i);
628 #endif
629 (void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
630 arg1 = ucp;
631 }
632
633 /*
634 * Move out the file name (also arg 0).
635 */
636 i = strlen(path) + 1;
637 #ifdef DEBUG
638 printf("init: copying out path `%s' %d\n", path, i);
639 #endif
640 (void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
641 arg0 = ucp;
642
643 /*
644 * Move out the arg pointers.
645 */
646 uap = (char **)((long)ucp & ~ALIGNBYTES);
647 (void)suword((caddr_t)--uap, 0); /* terminator */
648 if (options != 0)
649 (void)suword((caddr_t)--uap, (long)arg1);
650 slash = strrchr(path, '/');
651 if (slash)
652 (void)suword((caddr_t)--uap,
653 (long)arg0 + (slash + 1 - path));
654 else
655 (void)suword((caddr_t)--uap, (long)arg0);
656
657 /*
658 * Point at the arguments.
659 */
660 SCARG(&args, path) = arg0;
661 SCARG(&args, argp) = uap;
662 SCARG(&args, envp) = NULL;
663
664 /*
665 * Now try to exec the program. If can't for any reason
666 * other than it doesn't exist, complain.
667 */
668 error = sys_execve(p, &args, retval);
669 if (error == 0 || error == EJUSTRETURN)
670 return;
671 if (error != ENOENT)
672 printf("exec %s: error %d\n", path, error);
673 }
674 printf("init: not found\n");
675 panic("no init");
676 }
677