init_main.c revision 1.181 1 /* $NetBSD: init_main.c,v 1.181 2000/09/13 15:00:25 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1995 Christopher G. Demetriou. All rights reserved.
5 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgement:
23 * This product includes software developed by the University of
24 * California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
42 */
43
44 #include "fs_nfs.h"
45 #include "opt_nfsserver.h"
46 #include "opt_sysv.h"
47 #include "opt_maxuprc.h"
48 #include "opt_multiprocessor.h"
49 #include "opt_syscall_debug.h"
50
51 #include "rnd.h"
52
53 #include <sys/param.h>
54 #include <sys/acct.h>
55 #include <sys/filedesc.h>
56 #include <sys/file.h>
57 #include <sys/errno.h>
58 #include <sys/exec.h>
59 #include <sys/callout.h>
60 #include <sys/kernel.h>
61 #include <sys/mount.h>
62 #include <sys/map.h>
63 #include <sys/proc.h>
64 #include <sys/kthread.h>
65 #include <sys/resourcevar.h>
66 #include <sys/signalvar.h>
67 #include <sys/systm.h>
68 #include <sys/vnode.h>
69 #include <sys/tty.h>
70 #include <sys/conf.h>
71 #include <sys/disklabel.h>
72 #include <sys/buf.h>
73 #include <sys/device.h>
74 #include <sys/socketvar.h>
75 #include <sys/protosw.h>
76 #include <sys/reboot.h>
77 #include <sys/user.h>
78 #include <sys/sysctl.h>
79 #ifdef SYSVSHM
80 #include <sys/shm.h>
81 #endif
82 #ifdef SYSVSEM
83 #include <sys/sem.h>
84 #endif
85 #ifdef SYSVMSG
86 #include <sys/msg.h>
87 #endif
88 #include <sys/domain.h>
89 #include <sys/mbuf.h>
90 #include <sys/namei.h>
91 #if NRND > 0
92 #include <sys/rnd.h>
93 #endif
94
95 #include <sys/syscall.h>
96 #include <sys/syscallargs.h>
97
98 #include <ufs/ufs/quota.h>
99
100 #include <miscfs/genfs/genfs.h>
101 #include <miscfs/syncfs/syncfs.h>
102
103 #include <machine/cpu.h>
104
105 #include <uvm/uvm.h>
106
107 #include <net/if.h>
108 #include <net/raw_cb.h>
109
110 const char copyright[] = "\
111 Copyright (c) 1996, 1997, 1998, 1999, 2000
112 The NetBSD Foundation, Inc. All rights reserved.
113 Copyright (c) 1982, 1986, 1989, 1991, 1993
114 The Regents of the University of California. All rights reserved.
115
116 ";
117
118 /* Components of the first process -- never freed. */
119 struct session session0;
120 struct pgrp pgrp0;
121 struct proc proc0;
122 struct pcred cred0;
123 struct filedesc0 filedesc0;
124 struct cwdinfo cwdi0;
125 struct plimit limit0;
126 struct vmspace vmspace0;
127 struct sigacts sigacts0;
128 #ifndef curproc
129 struct proc *curproc = &proc0;
130 #endif
131 struct proc *initproc;
132
133 int cmask = CMASK;
134 extern struct user *proc0paddr;
135
136 struct vnode *rootvp, *swapdev_vp;
137 int boothowto;
138 int cold = 1; /* still working on startup */
139 struct timeval boottime;
140
141 __volatile int start_init_exec; /* semaphore for start_init() */
142
143 static void check_console(struct proc *p);
144 static void start_init(void *);
145 void main(void);
146
147 extern char sigcode[], esigcode[];
148 #ifdef SYSCALL_DEBUG
149 extern char *syscallnames[];
150 #endif
151
152 struct emul emul_netbsd = {
153 "netbsd",
154 NULL,
155 sendsig,
156 SYS_syscall,
157 SYS_MAXSYSCALL,
158 sysent,
159 #ifdef SYSCALL_DEBUG
160 syscallnames,
161 #else
162 NULL,
163 #endif
164 0,
165 copyargs,
166 setregs,
167 sigcode,
168 esigcode,
169 };
170
171 /*
172 * System startup; initialize the world, create process 0, mount root
173 * filesystem, and fork to create init and pagedaemon. Most of the
174 * hard work is done in the lower-level initialization routines including
175 * startup(), which does memory initialization and autoconfiguration.
176 */
177 void
178 main(void)
179 {
180 struct proc *p;
181 struct pdevinit *pdev;
182 int i, s, error;
183 extern struct pdevinit pdevinit[];
184 extern void schedcpu(void *);
185 extern void disk_init(void);
186 #if defined(NFSSERVER) || defined(NFS)
187 extern void nfs_init(void);
188 #endif
189 #ifdef NVNODE_IMPLICIT
190 int usevnodes;
191 #endif
192
193 /*
194 * Initialize the current process pointer (curproc) before
195 * any possible traps/probes to simplify trap processing.
196 */
197 p = &proc0;
198 curproc = p;
199 p->p_cpu = curcpu();
200 /*
201 * Attempt to find console and initialize
202 * in case of early panic or other messages.
203 */
204 consinit();
205 printf("%s", copyright);
206
207 KERNEL_LOCK_INIT();
208
209 uvm_init();
210
211 /* Do machine-dependent initialization. */
212 cpu_startup();
213
214 /* Initialize callouts. */
215 callout_startup();
216
217 /*
218 * Initialize mbuf's. Do this now because we might attempt to
219 * allocate mbufs or mbuf clusters during autoconfiguration.
220 */
221 mbinit();
222
223 /* Initialize sockets. */
224 soinit();
225
226 /*
227 * The following 3 things must be done before autoconfiguration.
228 */
229 disk_init(); /* initialize disk list */
230 tty_init(); /* initialize tty list */
231 #if NRND > 0
232 rnd_init(); /* initialize RNG */
233 #endif
234
235 /* Initialize the sysctl subsystem. */
236 sysctl_init();
237
238 /*
239 * Initialize process and pgrp structures.
240 */
241 procinit();
242
243 /*
244 * Create process 0 (the swapper).
245 */
246 s = proclist_lock_write();
247 LIST_INSERT_HEAD(&allproc, p, p_list);
248 LIST_INSERT_HEAD(PIDHASH(p->p_pid), p, p_hash);
249 proclist_unlock_write(s);
250
251 p->p_pgrp = &pgrp0;
252 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
253 LIST_INIT(&pgrp0.pg_members);
254 LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
255
256 pgrp0.pg_session = &session0;
257 session0.s_count = 1;
258 session0.s_sid = p->p_pid;
259 session0.s_leader = p;
260
261 /*
262 * Set P_NOCLDWAIT so that kernel threads are reparented to
263 * init(8) when they exit. init(8) can easily wait them out
264 * for us.
265 */
266 p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
267 p->p_stat = SONPROC;
268 p->p_nice = NZERO;
269 p->p_emul = &emul_netbsd;
270 strncpy(p->p_comm, "swapper", MAXCOMLEN);
271
272 callout_init(&p->p_realit_ch);
273 callout_init(&p->p_tsleep_ch);
274
275 /* Create credentials. */
276 cred0.p_refcnt = 1;
277 p->p_cred = &cred0;
278 p->p_ucred = crget();
279 p->p_ucred->cr_ngroups = 1; /* group 0 */
280
281 /* Create the file descriptor table. */
282 finit();
283 p->p_fd = &filedesc0.fd_fd;
284 fdinit1(&filedesc0);
285
286 /* Create the CWD info. */
287 p->p_cwdi = &cwdi0;
288 cwdi0.cwdi_cmask = cmask;
289 cwdi0.cwdi_refcnt = 1;
290
291 /* Create the limits structures. */
292 p->p_limit = &limit0;
293 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
294 limit0.pl_rlimit[i].rlim_cur =
295 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
296
297 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
298 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
299 maxfiles < NOFILE ? maxfiles : NOFILE;
300
301 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
302 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
303 maxproc < MAXUPRC ? maxproc : MAXUPRC;
304
305 i = ptoa(uvmexp.free);
306 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
307 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
308 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
309 limit0.pl_corename = defcorename;
310 limit0.p_refcnt = 1;
311
312 /*
313 * Initialize proc0's vmspace, which uses the kernel pmap.
314 * All kernel processes (which never have user space mappings)
315 * share proc0's vmspace, and thus, the kernel pmap.
316 */
317 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
318 trunc_page(VM_MAX_ADDRESS), TRUE);
319 p->p_vmspace = &vmspace0;
320
321 p->p_addr = proc0paddr; /* XXX */
322
323 /*
324 * We continue to place resource usage info in the
325 * user struct so they're pageable.
326 */
327 p->p_stats = &p->p_addr->u_stats;
328
329 /*
330 * Charge root for one process.
331 */
332 (void)chgproccnt(0, 1);
333
334 rqinit();
335
336 /* Configure virtual memory system, set vm rlimits. */
337 uvm_init_limits(p);
338
339 /* Initialize the file systems. */
340 #if defined(NFSSERVER) || defined(NFS)
341 nfs_init(); /* initialize server/shared data */
342 #endif
343 vfsinit();
344
345 /* Configure the system hardware. This will enable interrupts. */
346 configure();
347
348 /* Lock the kernel on behalf of proc0. */
349 KERNEL_PROC_LOCK(p);
350
351 #ifdef SYSVSHM
352 /* Initialize System V style shared memory. */
353 shminit();
354 #endif
355
356 #ifdef SYSVSEM
357 /* Initialize System V style semaphores. */
358 seminit();
359 #endif
360
361 #ifdef SYSVMSG
362 /* Initialize System V style message queues. */
363 msginit();
364 #endif
365
366 /* Attach pseudo-devices. */
367 for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
368 (*pdev->pdev_attach)(pdev->pdev_count);
369
370 /*
371 * Initialize protocols. Block reception of incoming packets
372 * until everything is ready.
373 */
374 s = splimp();
375 ifinit();
376 domaininit();
377 splx(s);
378
379 #ifdef GPROF
380 /* Initialize kernel profiling. */
381 kmstartup();
382 #endif
383
384 /* Initialize system accouting. */
385 acct_init();
386
387 /*
388 * Initialize signal-related data structures, and signal state
389 * for proc0.
390 */
391 signal_init();
392 p->p_sigacts = &sigacts0;
393 siginit(p);
394
395 /* Kick off timeout driven events by calling first time. */
396 schedcpu(NULL);
397
398 /*
399 * Create process 1 (init(8)). We do this now, as Unix has
400 * historically had init be process 1, and changing this would
401 * probably upset a lot of people.
402 *
403 * Note that process 1 won't immediately exec init(8), but will
404 * wait for us to inform it that the root file system has been
405 * mounted.
406 */
407 if (fork1(p, 0, SIGCHLD, NULL, 0, start_init, NULL, NULL, &initproc))
408 panic("fork init");
409
410 /*
411 * Create any kernel threads who's creation was deferred because
412 * initproc had not yet been created.
413 */
414 kthread_run_deferred_queue();
415
416 /*
417 * Now that device driver threads have been created, wait for
418 * them to finish any deferred autoconfiguration. Note we don't
419 * need to lock this semaphore, since we haven't booted any
420 * secondary processors, yet.
421 */
422 while (config_pending)
423 (void) tsleep((void *)&config_pending, PWAIT, "cfpend", 0);
424
425 /*
426 * Now that autoconfiguration has completed, we can determine
427 * the root and dump devices.
428 */
429 cpu_rootconf();
430 cpu_dumpconf();
431
432 /* Mount the root file system. */
433 do {
434 domountroothook();
435 if ((error = vfs_mountroot())) {
436 printf("cannot mount root, error = %d\n", error);
437 boothowto |= RB_ASKNAME;
438 setroot(root_device,
439 (rootdev != NODEV) ? DISKPART(rootdev) : 0);
440 }
441 } while (error != 0);
442 mountroothook_destroy();
443
444 mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
445 mountlist.cqh_first->mnt_op->vfs_refcount++;
446
447 /*
448 * Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to
449 * reference it.
450 */
451 if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
452 panic("cannot find root vnode");
453 cwdi0.cwdi_cdir = rootvnode;
454 VREF(cwdi0.cwdi_cdir);
455 VOP_UNLOCK(rootvnode, 0);
456 cwdi0.cwdi_rdir = NULL;
457
458 /*
459 * Now that root is mounted, we can fixup initproc's CWD
460 * info. All other processes are kthreads, which merely
461 * share proc0's CWD info.
462 */
463 initproc->p_cwdi->cwdi_cdir = rootvnode;
464 VREF(initproc->p_cwdi->cwdi_cdir);
465 initproc->p_cwdi->cwdi_rdir = NULL;
466
467 /*
468 * Now can look at time, having had a chance to verify the time
469 * from the file system. Reset p->p_rtime as it may have been
470 * munched in mi_switch() after the time got set.
471 */
472 proclist_lock_read();
473 s = splsched();
474 for (p = LIST_FIRST(&allproc); p != NULL;
475 p = LIST_NEXT(p, p_list)) {
476 p->p_stats->p_start = mono_time = boottime = time;
477 if (p->p_cpu != NULL)
478 p->p_cpu->ci_schedstate.spc_runtime = time;
479 p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
480 }
481 splx(s);
482 proclist_unlock_read();
483
484 /* Create the pageout daemon kernel thread. */
485 uvm_swap_init();
486 if (kthread_create1(uvm_pageout, NULL, NULL, "pagedaemon"))
487 panic("fork pagedaemon");
488
489 /* Create the process reaper kernel thread. */
490 if (kthread_create1(reaper, NULL, NULL, "reaper"))
491 panic("fork reaper");
492
493 /* Create the filesystem syncer kernel thread. */
494 if (kthread_create1(sched_sync, NULL, NULL, "ioflush"))
495 panic("fork syncer");
496
497 #if defined(MULTIPROCESSOR)
498 /* Boot the secondary processors. */
499 cpu_boot_secondary_processors();
500 #endif
501
502 /*
503 * Okay, now we can let init(8) exec! It's off to userland!
504 */
505 start_init_exec = 1;
506 wakeup((void *)&start_init_exec);
507
508 #ifdef NVNODE_IMPLICIT
509 /*
510 * If maximum number of vnodes in namei vnode cache is not explicitly
511 * defined in kernel config, adjust the number such as we use roughly
512 * 0.5% of memory for vnode cache (but not less than NVNODE vnodes).
513 */
514 usevnodes = (ptoa(physmem) / 200) / sizeof(struct vnode);
515 if (usevnodes > desiredvnodes)
516 desiredvnodes = usevnodes;
517 #endif
518
519 /* The scheduler is an infinite loop. */
520 uvm_scheduler();
521 /* NOTREACHED */
522 }
523
524 static void
525 check_console(struct proc *p)
526 {
527 struct nameidata nd;
528 int error;
529
530 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
531 error = namei(&nd);
532 if (error == 0)
533 vrele(nd.ni_vp);
534 else if (error == ENOENT)
535 printf("warning: no /dev/console\n");
536 else
537 printf("warning: lookup /dev/console: error %d\n", error);
538 }
539
540 /*
541 * List of paths to try when searching for "init".
542 */
543 static const char *initpaths[] = {
544 "/sbin/init",
545 "/sbin/oinit",
546 "/sbin/init.bak",
547 NULL,
548 };
549
550 /*
551 * Start the initial user process; try exec'ing each pathname in "initpaths".
552 * The program is invoked with one argument containing the boot flags.
553 */
554 static void
555 start_init(void *arg)
556 {
557 struct proc *p = arg;
558 vaddr_t addr;
559 struct sys_execve_args /* {
560 syscallarg(const char *) path;
561 syscallarg(char * const *) argp;
562 syscallarg(char * const *) envp;
563 } */ args;
564 int options, i, error;
565 register_t retval[2];
566 char flags[4], *flagsp;
567 const char **pathp, *path, *slash;
568 char *ucp, **uap, *arg0, *arg1 = NULL;
569
570 /*
571 * Now in process 1.
572 */
573 strncpy(p->p_comm, "init", MAXCOMLEN);
574
575 /*
576 * Wait for main() to tell us that it's safe to exec.
577 */
578 while (start_init_exec == 0)
579 (void) tsleep((void *)&start_init_exec, PWAIT, "initexec", 0);
580
581 /*
582 * This is not the right way to do this. We really should
583 * hand-craft a descriptor onto /dev/console to hand to init,
584 * but that's a _lot_ more work, and the benefit from this easy
585 * hack makes up for the "good is the enemy of the best" effect.
586 */
587 check_console(p);
588
589 /*
590 * Need just enough stack to hold the faked-up "execve()" arguments.
591 */
592 addr = USRSTACK - PAGE_SIZE;
593 if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
594 NULL, UVM_UNKNOWN_OFFSET, 0,
595 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
596 UVM_ADV_NORMAL,
597 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW))
598 != KERN_SUCCESS)
599 panic("init: couldn't allocate argument space");
600 p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
601
602 for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
603 ucp = (char *)(addr + PAGE_SIZE);
604
605 /*
606 * Construct the boot flag argument.
607 */
608 flagsp = flags;
609 *flagsp++ = '-';
610 options = 0;
611
612 if (boothowto & RB_SINGLE) {
613 *flagsp++ = 's';
614 options = 1;
615 }
616 #ifdef notyet
617 if (boothowto & RB_FASTBOOT) {
618 *flagsp++ = 'f';
619 options = 1;
620 }
621 #endif
622
623 /*
624 * Move out the flags (arg 1), if necessary.
625 */
626 if (options != 0) {
627 *flagsp++ = '\0';
628 i = flagsp - flags;
629 #ifdef DEBUG
630 printf("init: copying out flags `%s' %d\n", flags, i);
631 #endif
632 (void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
633 arg1 = ucp;
634 }
635
636 /*
637 * Move out the file name (also arg 0).
638 */
639 i = strlen(path) + 1;
640 #ifdef DEBUG
641 printf("init: copying out path `%s' %d\n", path, i);
642 #endif
643 (void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
644 arg0 = ucp;
645
646 /*
647 * Move out the arg pointers.
648 */
649 uap = (char **)((long)ucp & ~ALIGNBYTES);
650 (void)suword((caddr_t)--uap, 0); /* terminator */
651 if (options != 0)
652 (void)suword((caddr_t)--uap, (long)arg1);
653 slash = strrchr(path, '/');
654 if (slash)
655 (void)suword((caddr_t)--uap,
656 (long)arg0 + (slash + 1 - path));
657 else
658 (void)suword((caddr_t)--uap, (long)arg0);
659
660 /*
661 * Point at the arguments.
662 */
663 SCARG(&args, path) = arg0;
664 SCARG(&args, argp) = uap;
665 SCARG(&args, envp) = NULL;
666
667 /*
668 * Now try to exec the program. If can't for any reason
669 * other than it doesn't exist, complain.
670 */
671 error = sys_execve(p, &args, retval);
672 if (error == 0 || error == EJUSTRETURN) {
673 KERNEL_PROC_UNLOCK(p);
674 return;
675 }
676 if (error != ENOENT)
677 printf("exec %s: error %d\n", path, error);
678 }
679 printf("init: not found\n");
680 panic("no init");
681 }
682