init_main.c revision 1.186 1 /* $NetBSD: init_main.c,v 1.186 2000/12/08 22:07:36 jdolecek Exp $ */
2
3 /*
4 * Copyright (c) 1995 Christopher G. Demetriou. All rights reserved.
5 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgement:
23 * This product includes software developed by the University of
24 * California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
42 */
43
44 #include "fs_nfs.h"
45 #include "opt_nfsserver.h"
46 #include "opt_sysv.h"
47 #include "opt_maxuprc.h"
48 #include "opt_multiprocessor.h"
49 #include "opt_syscall_debug.h"
50
51 #include "rnd.h"
52
53 #include <sys/param.h>
54 #include <sys/acct.h>
55 #include <sys/filedesc.h>
56 #include <sys/file.h>
57 #include <sys/errno.h>
58 #include <sys/callout.h>
59 #include <sys/kernel.h>
60 #include <sys/mount.h>
61 #include <sys/map.h>
62 #include <sys/proc.h>
63 #include <sys/kthread.h>
64 #include <sys/resourcevar.h>
65 #include <sys/signalvar.h>
66 #include <sys/systm.h>
67 #include <sys/vnode.h>
68 #include <sys/tty.h>
69 #include <sys/conf.h>
70 #include <sys/disklabel.h>
71 #include <sys/buf.h>
72 #include <sys/device.h>
73 #include <sys/exec.h>
74 #include <sys/socketvar.h>
75 #include <sys/protosw.h>
76 #include <sys/reboot.h>
77 #include <sys/user.h>
78 #include <sys/sysctl.h>
79 #ifdef SYSVSHM
80 #include <sys/shm.h>
81 #endif
82 #ifdef SYSVSEM
83 #include <sys/sem.h>
84 #endif
85 #ifdef SYSVMSG
86 #include <sys/msg.h>
87 #endif
88 #include <sys/domain.h>
89 #include <sys/mbuf.h>
90 #include <sys/namei.h>
91 #if NRND > 0
92 #include <sys/rnd.h>
93 #endif
94
95 #include <sys/syscall.h>
96 #include <sys/syscallargs.h>
97
98 #include <ufs/ufs/quota.h>
99
100 #include <miscfs/genfs/genfs.h>
101 #include <miscfs/syncfs/syncfs.h>
102
103 #include <machine/cpu.h>
104
105 #include <uvm/uvm.h>
106
107 #include <net/if.h>
108 #include <net/raw_cb.h>
109
110 const char copyright[] = "\
111 Copyright (c) 1996, 1997, 1998, 1999, 2000
112 The NetBSD Foundation, Inc. All rights reserved.
113 Copyright (c) 1982, 1986, 1989, 1991, 1993
114 The Regents of the University of California. All rights reserved.
115
116 ";
117
118 /* Components of the first process -- never freed. */
119 struct session session0;
120 struct pgrp pgrp0;
121 struct proc proc0;
122 struct pcred cred0;
123 struct filedesc0 filedesc0;
124 struct cwdinfo cwdi0;
125 struct plimit limit0;
126 struct vmspace vmspace0;
127 struct sigacts sigacts0;
128 #ifndef curproc
129 struct proc *curproc = &proc0;
130 #endif
131 struct proc *initproc;
132
133 int cmask = CMASK;
134 extern struct user *proc0paddr;
135
136 struct vnode *rootvp, *swapdev_vp;
137 int boothowto;
138 int cold = 1; /* still working on startup */
139 struct timeval boottime;
140
141 __volatile int start_init_exec; /* semaphore for start_init() */
142
143 static void check_console(struct proc *p);
144 static void start_init(void *);
145 void main(void);
146
147 extern const struct emul emul_netbsd; /* defined in kern_exec.c */
148
149 /*
150 * System startup; initialize the world, create process 0, mount root
151 * filesystem, and fork to create init and pagedaemon. Most of the
152 * hard work is done in the lower-level initialization routines including
153 * startup(), which does memory initialization and autoconfiguration.
154 */
155 void
156 main(void)
157 {
158 struct proc *p;
159 struct pdevinit *pdev;
160 int i, s, error;
161 rlim_t lim;
162 extern struct pdevinit pdevinit[];
163 extern void schedcpu(void *);
164 extern void disk_init(void);
165 #if defined(NFSSERVER) || defined(NFS)
166 extern void nfs_init(void);
167 #endif
168 #ifdef NVNODE_IMPLICIT
169 int usevnodes;
170 #endif
171
172 /*
173 * Initialize the current process pointer (curproc) before
174 * any possible traps/probes to simplify trap processing.
175 */
176 p = &proc0;
177 curproc = p;
178 p->p_cpu = curcpu();
179 /*
180 * Attempt to find console and initialize
181 * in case of early panic or other messages.
182 */
183 consinit();
184 printf("%s", copyright);
185
186 KERNEL_LOCK_INIT();
187
188 uvm_init();
189
190 /* Do machine-dependent initialization. */
191 cpu_startup();
192
193 /* Initialize callouts. */
194 callout_startup();
195
196 /*
197 * Initialize mbuf's. Do this now because we might attempt to
198 * allocate mbufs or mbuf clusters during autoconfiguration.
199 */
200 mbinit();
201
202 /* Initialize sockets. */
203 soinit();
204
205 /*
206 * The following 3 things must be done before autoconfiguration.
207 */
208 disk_init(); /* initialize disk list */
209 tty_init(); /* initialize tty list */
210 #if NRND > 0
211 rnd_init(); /* initialize RNG */
212 #endif
213
214 /* Initialize the sysctl subsystem. */
215 sysctl_init();
216
217 /*
218 * Initialize process and pgrp structures.
219 */
220 procinit();
221
222 /*
223 * Create process 0 (the swapper).
224 */
225 s = proclist_lock_write();
226 LIST_INSERT_HEAD(&allproc, p, p_list);
227 LIST_INSERT_HEAD(PIDHASH(p->p_pid), p, p_hash);
228 proclist_unlock_write(s);
229
230 p->p_pgrp = &pgrp0;
231 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
232 LIST_INIT(&pgrp0.pg_members);
233 LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
234
235 pgrp0.pg_session = &session0;
236 session0.s_count = 1;
237 session0.s_sid = p->p_pid;
238 session0.s_leader = p;
239
240 /*
241 * Set P_NOCLDWAIT so that kernel threads are reparented to
242 * init(8) when they exit. init(8) can easily wait them out
243 * for us.
244 */
245 p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
246 p->p_stat = SONPROC;
247 p->p_nice = NZERO;
248 p->p_emul = &emul_netbsd;
249 strncpy(p->p_comm, "swapper", MAXCOMLEN);
250
251 callout_init(&p->p_realit_ch);
252 callout_init(&p->p_tsleep_ch);
253
254 /* Create credentials. */
255 cred0.p_refcnt = 1;
256 p->p_cred = &cred0;
257 p->p_ucred = crget();
258 p->p_ucred->cr_ngroups = 1; /* group 0 */
259
260 /* Create the file descriptor table. */
261 finit();
262 p->p_fd = &filedesc0.fd_fd;
263 fdinit1(&filedesc0);
264
265 /* Create the CWD info. */
266 p->p_cwdi = &cwdi0;
267 cwdi0.cwdi_cmask = cmask;
268 cwdi0.cwdi_refcnt = 1;
269
270 /* Create the limits structures. */
271 p->p_limit = &limit0;
272 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
273 limit0.pl_rlimit[i].rlim_cur =
274 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
275
276 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
277 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
278 maxfiles < NOFILE ? maxfiles : NOFILE;
279
280 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
281 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
282 maxproc < MAXUPRC ? maxproc : MAXUPRC;
283
284 lim = ptoa(uvmexp.free);
285 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = lim;
286 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim;
287 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
288 limit0.pl_corename = defcorename;
289 limit0.p_refcnt = 1;
290
291 /*
292 * Initialize proc0's vmspace, which uses the kernel pmap.
293 * All kernel processes (which never have user space mappings)
294 * share proc0's vmspace, and thus, the kernel pmap.
295 */
296 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
297 trunc_page(VM_MAX_ADDRESS), TRUE);
298 p->p_vmspace = &vmspace0;
299
300 p->p_addr = proc0paddr; /* XXX */
301
302 /*
303 * We continue to place resource usage info in the
304 * user struct so they're pageable.
305 */
306 p->p_stats = &p->p_addr->u_stats;
307
308 /*
309 * Charge root for one process.
310 */
311 (void)chgproccnt(0, 1);
312
313 rqinit();
314
315 /* Configure virtual memory system, set vm rlimits. */
316 uvm_init_limits(p);
317
318 /* Initialize the file systems. */
319 #if defined(NFSSERVER) || defined(NFS)
320 nfs_init(); /* initialize server/shared data */
321 #endif
322 vfsinit();
323
324 /* Configure the system hardware. This will enable interrupts. */
325 configure();
326
327 ubc_init(); /* must be after autoconfig */
328
329 /* Lock the kernel on behalf of proc0. */
330 KERNEL_PROC_LOCK(p);
331
332 #ifdef SYSVSHM
333 /* Initialize System V style shared memory. */
334 shminit();
335 #endif
336
337 #ifdef SYSVSEM
338 /* Initialize System V style semaphores. */
339 seminit();
340 #endif
341
342 #ifdef SYSVMSG
343 /* Initialize System V style message queues. */
344 msginit();
345 #endif
346
347 /* Attach pseudo-devices. */
348 for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
349 (*pdev->pdev_attach)(pdev->pdev_count);
350
351 /*
352 * Initialize protocols. Block reception of incoming packets
353 * until everything is ready.
354 */
355 s = splimp();
356 ifinit();
357 domaininit();
358 splx(s);
359
360 #ifdef GPROF
361 /* Initialize kernel profiling. */
362 kmstartup();
363 #endif
364
365 /* Initialize system accouting. */
366 acct_init();
367
368 /*
369 * Initialize signal-related data structures, and signal state
370 * for proc0.
371 */
372 signal_init();
373 p->p_sigacts = &sigacts0;
374 siginit(p);
375
376 /* Kick off timeout driven events by calling first time. */
377 schedcpu(NULL);
378
379 /*
380 * Create process 1 (init(8)). We do this now, as Unix has
381 * historically had init be process 1, and changing this would
382 * probably upset a lot of people.
383 *
384 * Note that process 1 won't immediately exec init(8), but will
385 * wait for us to inform it that the root file system has been
386 * mounted.
387 */
388 if (fork1(p, 0, SIGCHLD, NULL, 0, start_init, NULL, NULL, &initproc))
389 panic("fork init");
390
391 /*
392 * Create any kernel threads who's creation was deferred because
393 * initproc had not yet been created.
394 */
395 kthread_run_deferred_queue();
396
397 /*
398 * Now that device driver threads have been created, wait for
399 * them to finish any deferred autoconfiguration. Note we don't
400 * need to lock this semaphore, since we haven't booted any
401 * secondary processors, yet.
402 */
403 while (config_pending)
404 (void) tsleep((void *)&config_pending, PWAIT, "cfpend", 0);
405
406 /*
407 * Now that autoconfiguration has completed, we can determine
408 * the root and dump devices.
409 */
410 cpu_rootconf();
411 cpu_dumpconf();
412
413 /* Mount the root file system. */
414 do {
415 domountroothook();
416 if ((error = vfs_mountroot())) {
417 printf("cannot mount root, error = %d\n", error);
418 boothowto |= RB_ASKNAME;
419 setroot(root_device,
420 (rootdev != NODEV) ? DISKPART(rootdev) : 0);
421 }
422 } while (error != 0);
423 mountroothook_destroy();
424
425 mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
426 mountlist.cqh_first->mnt_op->vfs_refcount++;
427
428 /*
429 * Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to
430 * reference it.
431 */
432 if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
433 panic("cannot find root vnode");
434 cwdi0.cwdi_cdir = rootvnode;
435 VREF(cwdi0.cwdi_cdir);
436 VOP_UNLOCK(rootvnode, 0);
437 cwdi0.cwdi_rdir = NULL;
438
439 /*
440 * Now that root is mounted, we can fixup initproc's CWD
441 * info. All other processes are kthreads, which merely
442 * share proc0's CWD info.
443 */
444 initproc->p_cwdi->cwdi_cdir = rootvnode;
445 VREF(initproc->p_cwdi->cwdi_cdir);
446 initproc->p_cwdi->cwdi_rdir = NULL;
447
448 /*
449 * Now can look at time, having had a chance to verify the time
450 * from the file system. Reset p->p_rtime as it may have been
451 * munched in mi_switch() after the time got set.
452 */
453 proclist_lock_read();
454 s = splsched();
455 for (p = LIST_FIRST(&allproc); p != NULL;
456 p = LIST_NEXT(p, p_list)) {
457 p->p_stats->p_start = mono_time = boottime = time;
458 if (p->p_cpu != NULL)
459 p->p_cpu->ci_schedstate.spc_runtime = time;
460 p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
461 }
462 splx(s);
463 proclist_unlock_read();
464
465 /* Create the pageout daemon kernel thread. */
466 uvm_swap_init();
467 if (kthread_create1(uvm_pageout, NULL, NULL, "pagedaemon"))
468 panic("fork pagedaemon");
469
470 /* Create the process reaper kernel thread. */
471 if (kthread_create1(reaper, NULL, NULL, "reaper"))
472 panic("fork reaper");
473
474 /* Create the filesystem syncer kernel thread. */
475 if (kthread_create1(sched_sync, NULL, NULL, "ioflush"))
476 panic("fork syncer");
477
478 /* Create the aiodone daemon kernel thread. */
479 if (kthread_create1(uvm_aiodone_daemon, NULL, NULL, "aiodoned"))
480 panic("fork aiodoned");
481
482 #if defined(MULTIPROCESSOR)
483 /* Boot the secondary processors. */
484 cpu_boot_secondary_processors();
485 #endif
486
487 /* Initialize exec structures */
488 exec_init(1);
489
490 /*
491 * Okay, now we can let init(8) exec! It's off to userland!
492 */
493 start_init_exec = 1;
494 wakeup((void *)&start_init_exec);
495
496 #ifdef NVNODE_IMPLICIT
497 /*
498 * If maximum number of vnodes in namei vnode cache is not explicitly
499 * defined in kernel config, adjust the number such as we use roughly
500 * 0.5% of memory for vnode cache (but not less than NVNODE vnodes).
501 */
502 usevnodes = (ptoa(physmem) / 200) / sizeof(struct vnode);
503 if (usevnodes > desiredvnodes)
504 desiredvnodes = usevnodes;
505 #endif
506
507 /* The scheduler is an infinite loop. */
508 uvm_scheduler();
509 /* NOTREACHED */
510 }
511
512 static void
513 check_console(struct proc *p)
514 {
515 struct nameidata nd;
516 int error;
517
518 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
519 error = namei(&nd);
520 if (error == 0)
521 vrele(nd.ni_vp);
522 else if (error == ENOENT)
523 printf("warning: no /dev/console\n");
524 else
525 printf("warning: lookup /dev/console: error %d\n", error);
526 }
527
528 /*
529 * List of paths to try when searching for "init".
530 */
531 static const char *initpaths[] = {
532 "/sbin/init",
533 "/sbin/oinit",
534 "/sbin/init.bak",
535 NULL,
536 };
537
538 /*
539 * Start the initial user process; try exec'ing each pathname in "initpaths".
540 * The program is invoked with one argument containing the boot flags.
541 */
542 static void
543 start_init(void *arg)
544 {
545 struct proc *p = arg;
546 vaddr_t addr;
547 struct sys_execve_args /* {
548 syscallarg(const char *) path;
549 syscallarg(char * const *) argp;
550 syscallarg(char * const *) envp;
551 } */ args;
552 int options, i, error;
553 register_t retval[2];
554 char flags[4], *flagsp;
555 const char **pathp, *path, *slash;
556 char *ucp, **uap, *arg0, *arg1 = NULL;
557
558 /*
559 * Now in process 1.
560 */
561 strncpy(p->p_comm, "init", MAXCOMLEN);
562
563 /*
564 * Wait for main() to tell us that it's safe to exec.
565 */
566 while (start_init_exec == 0)
567 (void) tsleep((void *)&start_init_exec, PWAIT, "initexec", 0);
568
569 /*
570 * This is not the right way to do this. We really should
571 * hand-craft a descriptor onto /dev/console to hand to init,
572 * but that's a _lot_ more work, and the benefit from this easy
573 * hack makes up for the "good is the enemy of the best" effect.
574 */
575 check_console(p);
576
577 /*
578 * Need just enough stack to hold the faked-up "execve()" arguments.
579 */
580 addr = USRSTACK - PAGE_SIZE;
581 if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
582 NULL, UVM_UNKNOWN_OFFSET, 0,
583 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
584 UVM_ADV_NORMAL,
585 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW))
586 != KERN_SUCCESS)
587 panic("init: couldn't allocate argument space");
588 p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
589
590 for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
591 ucp = (char *)(addr + PAGE_SIZE);
592
593 /*
594 * Construct the boot flag argument.
595 */
596 flagsp = flags;
597 *flagsp++ = '-';
598 options = 0;
599
600 if (boothowto & RB_SINGLE) {
601 *flagsp++ = 's';
602 options = 1;
603 }
604 #ifdef notyet
605 if (boothowto & RB_FASTBOOT) {
606 *flagsp++ = 'f';
607 options = 1;
608 }
609 #endif
610
611 /*
612 * Move out the flags (arg 1), if necessary.
613 */
614 if (options != 0) {
615 *flagsp++ = '\0';
616 i = flagsp - flags;
617 #ifdef DEBUG
618 printf("init: copying out flags `%s' %d\n", flags, i);
619 #endif
620 (void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
621 arg1 = ucp;
622 }
623
624 /*
625 * Move out the file name (also arg 0).
626 */
627 i = strlen(path) + 1;
628 #ifdef DEBUG
629 printf("init: copying out path `%s' %d\n", path, i);
630 #endif
631 (void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
632 arg0 = ucp;
633
634 /*
635 * Move out the arg pointers.
636 */
637 uap = (char **)((long)ucp & ~ALIGNBYTES);
638 (void)suword((caddr_t)--uap, 0); /* terminator */
639 if (options != 0)
640 (void)suword((caddr_t)--uap, (long)arg1);
641 slash = strrchr(path, '/');
642 if (slash)
643 (void)suword((caddr_t)--uap,
644 (long)arg0 + (slash + 1 - path));
645 else
646 (void)suword((caddr_t)--uap, (long)arg0);
647
648 /*
649 * Point at the arguments.
650 */
651 SCARG(&args, path) = arg0;
652 SCARG(&args, argp) = uap;
653 SCARG(&args, envp) = NULL;
654
655 /*
656 * Now try to exec the program. If can't for any reason
657 * other than it doesn't exist, complain.
658 */
659 error = sys_execve(p, &args, retval);
660 if (error == 0 || error == EJUSTRETURN) {
661 KERNEL_PROC_UNLOCK(p);
662 return;
663 }
664 if (error != ENOENT)
665 printf("exec %s: error %d\n", path, error);
666 }
667 printf("init: not found\n");
668 panic("no init");
669 }
670