init_main.c revision 1.184 1 /* $NetBSD: init_main.c,v 1.184 2000/11/21 00:37:56 jdolecek Exp $ */
2
3 /*
4 * Copyright (c) 1995 Christopher G. Demetriou. All rights reserved.
5 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgement:
23 * This product includes software developed by the University of
24 * California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
42 */
43
44 #include "fs_nfs.h"
45 #include "opt_nfsserver.h"
46 #include "opt_sysv.h"
47 #include "opt_maxuprc.h"
48 #include "opt_multiprocessor.h"
49 #include "opt_syscall_debug.h"
50
51 #include "rnd.h"
52
53 #include <sys/param.h>
54 #include <sys/acct.h>
55 #include <sys/filedesc.h>
56 #include <sys/file.h>
57 #include <sys/errno.h>
58 #include <sys/callout.h>
59 #include <sys/kernel.h>
60 #include <sys/mount.h>
61 #include <sys/map.h>
62 #include <sys/proc.h>
63 #include <sys/kthread.h>
64 #include <sys/resourcevar.h>
65 #include <sys/signalvar.h>
66 #include <sys/systm.h>
67 #include <sys/vnode.h>
68 #include <sys/tty.h>
69 #include <sys/conf.h>
70 #include <sys/disklabel.h>
71 #include <sys/buf.h>
72 #include <sys/device.h>
73 #include <sys/socketvar.h>
74 #include <sys/protosw.h>
75 #include <sys/reboot.h>
76 #include <sys/user.h>
77 #include <sys/sysctl.h>
78 #ifdef SYSVSHM
79 #include <sys/shm.h>
80 #endif
81 #ifdef SYSVSEM
82 #include <sys/sem.h>
83 #endif
84 #ifdef SYSVMSG
85 #include <sys/msg.h>
86 #endif
87 #include <sys/domain.h>
88 #include <sys/mbuf.h>
89 #include <sys/namei.h>
90 #if NRND > 0
91 #include <sys/rnd.h>
92 #endif
93
94 #include <sys/syscall.h>
95 #include <sys/syscallargs.h>
96
97 #include <ufs/ufs/quota.h>
98
99 #include <miscfs/genfs/genfs.h>
100 #include <miscfs/syncfs/syncfs.h>
101
102 #include <machine/cpu.h>
103
104 #include <uvm/uvm.h>
105
106 #include <net/if.h>
107 #include <net/raw_cb.h>
108
109 const char copyright[] = "\
110 Copyright (c) 1996, 1997, 1998, 1999, 2000
111 The NetBSD Foundation, Inc. All rights reserved.
112 Copyright (c) 1982, 1986, 1989, 1991, 1993
113 The Regents of the University of California. All rights reserved.
114
115 ";
116
117 /* Components of the first process -- never freed. */
118 struct session session0;
119 struct pgrp pgrp0;
120 struct proc proc0;
121 struct pcred cred0;
122 struct filedesc0 filedesc0;
123 struct cwdinfo cwdi0;
124 struct plimit limit0;
125 struct vmspace vmspace0;
126 struct sigacts sigacts0;
127 #ifndef curproc
128 struct proc *curproc = &proc0;
129 #endif
130 struct proc *initproc;
131
132 int cmask = CMASK;
133 extern struct user *proc0paddr;
134
135 struct vnode *rootvp, *swapdev_vp;
136 int boothowto;
137 int cold = 1; /* still working on startup */
138 struct timeval boottime;
139
140 __volatile int start_init_exec; /* semaphore for start_init() */
141
142 static void check_console(struct proc *p);
143 static void start_init(void *);
144 void main(void);
145
146 extern const struct emul emul_netbsd; /* defined in kern_exec.c */
147
148 /*
149 * System startup; initialize the world, create process 0, mount root
150 * filesystem, and fork to create init and pagedaemon. Most of the
151 * hard work is done in the lower-level initialization routines including
152 * startup(), which does memory initialization and autoconfiguration.
153 */
154 void
155 main(void)
156 {
157 struct proc *p;
158 struct pdevinit *pdev;
159 int i, s, error;
160 rlim_t lim;
161 extern struct pdevinit pdevinit[];
162 extern void schedcpu(void *);
163 extern void disk_init(void);
164 #if defined(NFSSERVER) || defined(NFS)
165 extern void nfs_init(void);
166 #endif
167 #ifdef NVNODE_IMPLICIT
168 int usevnodes;
169 #endif
170
171 /*
172 * Initialize the current process pointer (curproc) before
173 * any possible traps/probes to simplify trap processing.
174 */
175 p = &proc0;
176 curproc = p;
177 p->p_cpu = curcpu();
178 /*
179 * Attempt to find console and initialize
180 * in case of early panic or other messages.
181 */
182 consinit();
183 printf("%s", copyright);
184
185 KERNEL_LOCK_INIT();
186
187 uvm_init();
188
189 /* Do machine-dependent initialization. */
190 cpu_startup();
191
192 /* Initialize callouts. */
193 callout_startup();
194
195 /*
196 * Initialize mbuf's. Do this now because we might attempt to
197 * allocate mbufs or mbuf clusters during autoconfiguration.
198 */
199 mbinit();
200
201 /* Initialize sockets. */
202 soinit();
203
204 /*
205 * The following 3 things must be done before autoconfiguration.
206 */
207 disk_init(); /* initialize disk list */
208 tty_init(); /* initialize tty list */
209 #if NRND > 0
210 rnd_init(); /* initialize RNG */
211 #endif
212
213 /* Initialize the sysctl subsystem. */
214 sysctl_init();
215
216 /*
217 * Initialize process and pgrp structures.
218 */
219 procinit();
220
221 /*
222 * Create process 0 (the swapper).
223 */
224 s = proclist_lock_write();
225 LIST_INSERT_HEAD(&allproc, p, p_list);
226 LIST_INSERT_HEAD(PIDHASH(p->p_pid), p, p_hash);
227 proclist_unlock_write(s);
228
229 p->p_pgrp = &pgrp0;
230 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
231 LIST_INIT(&pgrp0.pg_members);
232 LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
233
234 pgrp0.pg_session = &session0;
235 session0.s_count = 1;
236 session0.s_sid = p->p_pid;
237 session0.s_leader = p;
238
239 /*
240 * Set P_NOCLDWAIT so that kernel threads are reparented to
241 * init(8) when they exit. init(8) can easily wait them out
242 * for us.
243 */
244 p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
245 p->p_stat = SONPROC;
246 p->p_nice = NZERO;
247 p->p_emul = &emul_netbsd;
248 strncpy(p->p_comm, "swapper", MAXCOMLEN);
249
250 callout_init(&p->p_realit_ch);
251 callout_init(&p->p_tsleep_ch);
252
253 /* Create credentials. */
254 cred0.p_refcnt = 1;
255 p->p_cred = &cred0;
256 p->p_ucred = crget();
257 p->p_ucred->cr_ngroups = 1; /* group 0 */
258
259 /* Create the file descriptor table. */
260 finit();
261 p->p_fd = &filedesc0.fd_fd;
262 fdinit1(&filedesc0);
263
264 /* Create the CWD info. */
265 p->p_cwdi = &cwdi0;
266 cwdi0.cwdi_cmask = cmask;
267 cwdi0.cwdi_refcnt = 1;
268
269 /* Create the limits structures. */
270 p->p_limit = &limit0;
271 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
272 limit0.pl_rlimit[i].rlim_cur =
273 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
274
275 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
276 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
277 maxfiles < NOFILE ? maxfiles : NOFILE;
278
279 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
280 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
281 maxproc < MAXUPRC ? maxproc : MAXUPRC;
282
283 lim = ptoa(uvmexp.free);
284 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = lim;
285 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim;
286 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
287 limit0.pl_corename = defcorename;
288 limit0.p_refcnt = 1;
289
290 /*
291 * Initialize proc0's vmspace, which uses the kernel pmap.
292 * All kernel processes (which never have user space mappings)
293 * share proc0's vmspace, and thus, the kernel pmap.
294 */
295 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
296 trunc_page(VM_MAX_ADDRESS), TRUE);
297 p->p_vmspace = &vmspace0;
298
299 p->p_addr = proc0paddr; /* XXX */
300
301 /*
302 * We continue to place resource usage info in the
303 * user struct so they're pageable.
304 */
305 p->p_stats = &p->p_addr->u_stats;
306
307 /*
308 * Charge root for one process.
309 */
310 (void)chgproccnt(0, 1);
311
312 rqinit();
313
314 /* Configure virtual memory system, set vm rlimits. */
315 uvm_init_limits(p);
316
317 /* Initialize the file systems. */
318 #if defined(NFSSERVER) || defined(NFS)
319 nfs_init(); /* initialize server/shared data */
320 #endif
321 vfsinit();
322
323 /* Configure the system hardware. This will enable interrupts. */
324 configure();
325
326 /* Lock the kernel on behalf of proc0. */
327 KERNEL_PROC_LOCK(p);
328
329 #ifdef SYSVSHM
330 /* Initialize System V style shared memory. */
331 shminit();
332 #endif
333
334 #ifdef SYSVSEM
335 /* Initialize System V style semaphores. */
336 seminit();
337 #endif
338
339 #ifdef SYSVMSG
340 /* Initialize System V style message queues. */
341 msginit();
342 #endif
343
344 /* Attach pseudo-devices. */
345 for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
346 (*pdev->pdev_attach)(pdev->pdev_count);
347
348 /*
349 * Initialize protocols. Block reception of incoming packets
350 * until everything is ready.
351 */
352 s = splimp();
353 ifinit();
354 domaininit();
355 splx(s);
356
357 #ifdef GPROF
358 /* Initialize kernel profiling. */
359 kmstartup();
360 #endif
361
362 /* Initialize system accouting. */
363 acct_init();
364
365 /*
366 * Initialize signal-related data structures, and signal state
367 * for proc0.
368 */
369 signal_init();
370 p->p_sigacts = &sigacts0;
371 siginit(p);
372
373 /* Kick off timeout driven events by calling first time. */
374 schedcpu(NULL);
375
376 /*
377 * Create process 1 (init(8)). We do this now, as Unix has
378 * historically had init be process 1, and changing this would
379 * probably upset a lot of people.
380 *
381 * Note that process 1 won't immediately exec init(8), but will
382 * wait for us to inform it that the root file system has been
383 * mounted.
384 */
385 if (fork1(p, 0, SIGCHLD, NULL, 0, start_init, NULL, NULL, &initproc))
386 panic("fork init");
387
388 /*
389 * Create any kernel threads who's creation was deferred because
390 * initproc had not yet been created.
391 */
392 kthread_run_deferred_queue();
393
394 /*
395 * Now that device driver threads have been created, wait for
396 * them to finish any deferred autoconfiguration. Note we don't
397 * need to lock this semaphore, since we haven't booted any
398 * secondary processors, yet.
399 */
400 while (config_pending)
401 (void) tsleep((void *)&config_pending, PWAIT, "cfpend", 0);
402
403 /*
404 * Now that autoconfiguration has completed, we can determine
405 * the root and dump devices.
406 */
407 cpu_rootconf();
408 cpu_dumpconf();
409
410 /* Mount the root file system. */
411 do {
412 domountroothook();
413 if ((error = vfs_mountroot())) {
414 printf("cannot mount root, error = %d\n", error);
415 boothowto |= RB_ASKNAME;
416 setroot(root_device,
417 (rootdev != NODEV) ? DISKPART(rootdev) : 0);
418 }
419 } while (error != 0);
420 mountroothook_destroy();
421
422 mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
423 mountlist.cqh_first->mnt_op->vfs_refcount++;
424
425 /*
426 * Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to
427 * reference it.
428 */
429 if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
430 panic("cannot find root vnode");
431 cwdi0.cwdi_cdir = rootvnode;
432 VREF(cwdi0.cwdi_cdir);
433 VOP_UNLOCK(rootvnode, 0);
434 cwdi0.cwdi_rdir = NULL;
435
436 /*
437 * Now that root is mounted, we can fixup initproc's CWD
438 * info. All other processes are kthreads, which merely
439 * share proc0's CWD info.
440 */
441 initproc->p_cwdi->cwdi_cdir = rootvnode;
442 VREF(initproc->p_cwdi->cwdi_cdir);
443 initproc->p_cwdi->cwdi_rdir = NULL;
444
445 /*
446 * Now can look at time, having had a chance to verify the time
447 * from the file system. Reset p->p_rtime as it may have been
448 * munched in mi_switch() after the time got set.
449 */
450 proclist_lock_read();
451 s = splsched();
452 for (p = LIST_FIRST(&allproc); p != NULL;
453 p = LIST_NEXT(p, p_list)) {
454 p->p_stats->p_start = mono_time = boottime = time;
455 if (p->p_cpu != NULL)
456 p->p_cpu->ci_schedstate.spc_runtime = time;
457 p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
458 }
459 splx(s);
460 proclist_unlock_read();
461
462 /* Create the pageout daemon kernel thread. */
463 uvm_swap_init();
464 if (kthread_create1(uvm_pageout, NULL, NULL, "pagedaemon"))
465 panic("fork pagedaemon");
466
467 /* Create the process reaper kernel thread. */
468 if (kthread_create1(reaper, NULL, NULL, "reaper"))
469 panic("fork reaper");
470
471 /* Create the filesystem syncer kernel thread. */
472 if (kthread_create1(sched_sync, NULL, NULL, "ioflush"))
473 panic("fork syncer");
474
475 #if defined(MULTIPROCESSOR)
476 /* Boot the secondary processors. */
477 cpu_boot_secondary_processors();
478 #endif
479
480 /*
481 * Okay, now we can let init(8) exec! It's off to userland!
482 */
483 start_init_exec = 1;
484 wakeup((void *)&start_init_exec);
485
486 #ifdef NVNODE_IMPLICIT
487 /*
488 * If maximum number of vnodes in namei vnode cache is not explicitly
489 * defined in kernel config, adjust the number such as we use roughly
490 * 0.5% of memory for vnode cache (but not less than NVNODE vnodes).
491 */
492 usevnodes = (ptoa(physmem) / 200) / sizeof(struct vnode);
493 if (usevnodes > desiredvnodes)
494 desiredvnodes = usevnodes;
495 #endif
496
497 /* The scheduler is an infinite loop. */
498 uvm_scheduler();
499 /* NOTREACHED */
500 }
501
502 static void
503 check_console(struct proc *p)
504 {
505 struct nameidata nd;
506 int error;
507
508 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
509 error = namei(&nd);
510 if (error == 0)
511 vrele(nd.ni_vp);
512 else if (error == ENOENT)
513 printf("warning: no /dev/console\n");
514 else
515 printf("warning: lookup /dev/console: error %d\n", error);
516 }
517
518 /*
519 * List of paths to try when searching for "init".
520 */
521 static const char *initpaths[] = {
522 "/sbin/init",
523 "/sbin/oinit",
524 "/sbin/init.bak",
525 NULL,
526 };
527
528 /*
529 * Start the initial user process; try exec'ing each pathname in "initpaths".
530 * The program is invoked with one argument containing the boot flags.
531 */
532 static void
533 start_init(void *arg)
534 {
535 struct proc *p = arg;
536 vaddr_t addr;
537 struct sys_execve_args /* {
538 syscallarg(const char *) path;
539 syscallarg(char * const *) argp;
540 syscallarg(char * const *) envp;
541 } */ args;
542 int options, i, error;
543 register_t retval[2];
544 char flags[4], *flagsp;
545 const char **pathp, *path, *slash;
546 char *ucp, **uap, *arg0, *arg1 = NULL;
547
548 /*
549 * Now in process 1.
550 */
551 strncpy(p->p_comm, "init", MAXCOMLEN);
552
553 /*
554 * Wait for main() to tell us that it's safe to exec.
555 */
556 while (start_init_exec == 0)
557 (void) tsleep((void *)&start_init_exec, PWAIT, "initexec", 0);
558
559 /*
560 * This is not the right way to do this. We really should
561 * hand-craft a descriptor onto /dev/console to hand to init,
562 * but that's a _lot_ more work, and the benefit from this easy
563 * hack makes up for the "good is the enemy of the best" effect.
564 */
565 check_console(p);
566
567 /*
568 * Need just enough stack to hold the faked-up "execve()" arguments.
569 */
570 addr = USRSTACK - PAGE_SIZE;
571 if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
572 NULL, UVM_UNKNOWN_OFFSET, 0,
573 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
574 UVM_ADV_NORMAL,
575 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW))
576 != KERN_SUCCESS)
577 panic("init: couldn't allocate argument space");
578 p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
579
580 for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
581 ucp = (char *)(addr + PAGE_SIZE);
582
583 /*
584 * Construct the boot flag argument.
585 */
586 flagsp = flags;
587 *flagsp++ = '-';
588 options = 0;
589
590 if (boothowto & RB_SINGLE) {
591 *flagsp++ = 's';
592 options = 1;
593 }
594 #ifdef notyet
595 if (boothowto & RB_FASTBOOT) {
596 *flagsp++ = 'f';
597 options = 1;
598 }
599 #endif
600
601 /*
602 * Move out the flags (arg 1), if necessary.
603 */
604 if (options != 0) {
605 *flagsp++ = '\0';
606 i = flagsp - flags;
607 #ifdef DEBUG
608 printf("init: copying out flags `%s' %d\n", flags, i);
609 #endif
610 (void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
611 arg1 = ucp;
612 }
613
614 /*
615 * Move out the file name (also arg 0).
616 */
617 i = strlen(path) + 1;
618 #ifdef DEBUG
619 printf("init: copying out path `%s' %d\n", path, i);
620 #endif
621 (void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
622 arg0 = ucp;
623
624 /*
625 * Move out the arg pointers.
626 */
627 uap = (char **)((long)ucp & ~ALIGNBYTES);
628 (void)suword((caddr_t)--uap, 0); /* terminator */
629 if (options != 0)
630 (void)suword((caddr_t)--uap, (long)arg1);
631 slash = strrchr(path, '/');
632 if (slash)
633 (void)suword((caddr_t)--uap,
634 (long)arg0 + (slash + 1 - path));
635 else
636 (void)suword((caddr_t)--uap, (long)arg0);
637
638 /*
639 * Point at the arguments.
640 */
641 SCARG(&args, path) = arg0;
642 SCARG(&args, argp) = uap;
643 SCARG(&args, envp) = NULL;
644
645 /*
646 * Now try to exec the program. If can't for any reason
647 * other than it doesn't exist, complain.
648 */
649 error = sys_execve(p, &args, retval);
650 if (error == 0 || error == EJUSTRETURN) {
651 KERNEL_PROC_UNLOCK(p);
652 return;
653 }
654 if (error != ENOENT)
655 printf("exec %s: error %d\n", path, error);
656 }
657 printf("init: not found\n");
658 panic("no init");
659 }
660