init_main.c revision 1.176 1 /* $NetBSD: init_main.c,v 1.176 2000/07/14 07:21:21 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1995 Christopher G. Demetriou. All rights reserved.
5 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgement:
23 * This product includes software developed by the University of
24 * California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
42 */
43
44 #include "fs_nfs.h"
45 #include "opt_nfsserver.h"
46 #include "opt_sysv.h"
47 #include "opt_maxuprc.h"
48 #include "opt_multiprocessor.h"
49 #include "opt_syscall_debug.h"
50
51 #include "rnd.h"
52
53 #include <sys/param.h>
54 #include <sys/acct.h>
55 #include <sys/filedesc.h>
56 #include <sys/file.h>
57 #include <sys/errno.h>
58 #include <sys/exec.h>
59 #include <sys/callout.h>
60 #include <sys/kernel.h>
61 #include <sys/mount.h>
62 #include <sys/map.h>
63 #include <sys/proc.h>
64 #include <sys/kthread.h>
65 #include <sys/resourcevar.h>
66 #include <sys/signalvar.h>
67 #include <sys/systm.h>
68 #include <sys/vnode.h>
69 #include <sys/tty.h>
70 #include <sys/conf.h>
71 #include <sys/disklabel.h>
72 #include <sys/buf.h>
73 #include <sys/device.h>
74 #include <sys/socketvar.h>
75 #include <sys/protosw.h>
76 #include <sys/reboot.h>
77 #include <sys/user.h>
78 #include <sys/sysctl.h>
79 #ifdef SYSVSHM
80 #include <sys/shm.h>
81 #endif
82 #ifdef SYSVSEM
83 #include <sys/sem.h>
84 #endif
85 #ifdef SYSVMSG
86 #include <sys/msg.h>
87 #endif
88 #include <sys/domain.h>
89 #include <sys/mbuf.h>
90 #include <sys/namei.h>
91 #if NRND > 0
92 #include <sys/rnd.h>
93 #endif
94
95 #include <sys/syscall.h>
96 #include <sys/syscallargs.h>
97
98 #include <ufs/ufs/quota.h>
99
100 #include <miscfs/genfs/genfs.h>
101 #include <miscfs/syncfs/syncfs.h>
102
103 #include <machine/cpu.h>
104
105 #include <uvm/uvm.h>
106
107 #include <net/if.h>
108 #include <net/raw_cb.h>
109
110 const char copyright[] = "\
111 Copyright (c) 1996, 1997, 1998, 1999, 2000
112 The NetBSD Foundation, Inc. All rights reserved.
113 Copyright (c) 1982, 1986, 1989, 1991, 1993
114 The Regents of the University of California. All rights reserved.
115
116 ";
117
118 /* Components of the first process -- never freed. */
119 struct session session0;
120 struct pgrp pgrp0;
121 struct proc proc0;
122 struct pcred cred0;
123 struct filedesc0 filedesc0;
124 struct cwdinfo cwdi0;
125 struct plimit limit0;
126 struct vmspace vmspace0;
127 struct sigacts sigacts0;
128 #ifndef curproc
129 struct proc *curproc = &proc0;
130 #endif
131 struct proc *initproc;
132
133 int cmask = CMASK;
134 extern struct user *proc0paddr;
135
136 struct vnode *rootvp, *swapdev_vp;
137 int boothowto;
138 int cold = 1; /* still working on startup */
139 struct timeval boottime;
140
141 __volatile int start_init_exec; /* semaphore for start_init() */
142
143 static void check_console(struct proc *p);
144 static void start_init(void *);
145 static void start_pagedaemon(void *);
146 static void start_reaper(void *);
147 void main(void);
148
149 extern char sigcode[], esigcode[];
150 #ifdef SYSCALL_DEBUG
151 extern char *syscallnames[];
152 #endif
153
154 struct emul emul_netbsd = {
155 "netbsd",
156 NULL,
157 sendsig,
158 SYS_syscall,
159 SYS_MAXSYSCALL,
160 sysent,
161 #ifdef SYSCALL_DEBUG
162 syscallnames,
163 #else
164 NULL,
165 #endif
166 0,
167 copyargs,
168 setregs,
169 sigcode,
170 esigcode,
171 };
172
173 /*
174 * System startup; initialize the world, create process 0, mount root
175 * filesystem, and fork to create init and pagedaemon. Most of the
176 * hard work is done in the lower-level initialization routines including
177 * startup(), which does memory initialization and autoconfiguration.
178 */
179 void
180 main(void)
181 {
182 struct proc *p;
183 struct pdevinit *pdev;
184 int i, s, error;
185 extern struct pdevinit pdevinit[];
186 extern void roundrobin(void *);
187 extern void schedcpu(void *);
188 extern void disk_init(void);
189 #if defined(NFSSERVER) || defined(NFS)
190 extern void nfs_init(void);
191 #endif
192 #ifdef NVNODE_IMPLICIT
193 int usevnodes;
194 #endif
195
196 /*
197 * Initialize the current process pointer (curproc) before
198 * any possible traps/probes to simplify trap processing.
199 */
200 p = &proc0;
201 curproc = p;
202 p->p_cpu = curcpu();
203 /*
204 * Attempt to find console and initialize
205 * in case of early panic or other messages.
206 */
207 consinit();
208 printf("%s", copyright);
209
210 uvm_init();
211
212 /* Do machine-dependent initialization. */
213 cpu_startup();
214
215 /* Initialize callouts. */
216 callout_startup();
217
218 /*
219 * Initialize mbuf's. Do this now because we might attempt to
220 * allocate mbufs or mbuf clusters during autoconfiguration.
221 */
222 mbinit();
223
224 /* Initialize sockets. */
225 soinit();
226
227 /*
228 * The following 3 things must be done before autoconfiguration.
229 */
230 disk_init(); /* initialize disk list */
231 tty_init(); /* initialize tty list */
232 #if NRND > 0
233 rnd_init(); /* initialize RNG */
234 #endif
235
236 /* Initialize the sysctl subsystem. */
237 sysctl_init();
238
239 /*
240 * Initialize process and pgrp structures.
241 */
242 procinit();
243
244 /*
245 * Create process 0 (the swapper).
246 */
247 s = proclist_lock_write();
248 LIST_INSERT_HEAD(&allproc, p, p_list);
249 LIST_INSERT_HEAD(PIDHASH(p->p_pid), p, p_hash);
250 proclist_unlock_write(s);
251
252 p->p_pgrp = &pgrp0;
253 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
254 LIST_INIT(&pgrp0.pg_members);
255 LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
256
257 pgrp0.pg_session = &session0;
258 session0.s_count = 1;
259 session0.s_sid = p->p_pid;
260 session0.s_leader = p;
261
262 /*
263 * Set P_NOCLDWAIT so that kernel threads are reparented to
264 * init(8) when they exit. init(8) can easily wait them out
265 * for us.
266 */
267 p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
268 p->p_stat = SONPROC;
269 p->p_nice = NZERO;
270 p->p_emul = &emul_netbsd;
271 strncpy(p->p_comm, "swapper", MAXCOMLEN);
272
273 callout_init(&p->p_realit_ch);
274 callout_init(&p->p_tsleep_ch);
275
276 /* Create credentials. */
277 cred0.p_refcnt = 1;
278 p->p_cred = &cred0;
279 p->p_ucred = crget();
280 p->p_ucred->cr_ngroups = 1; /* group 0 */
281
282 /* Create the file descriptor table. */
283 finit();
284 p->p_fd = &filedesc0.fd_fd;
285 fdinit1(&filedesc0);
286
287 /* Create the CWD info. */
288 p->p_cwdi = &cwdi0;
289 cwdi0.cwdi_cmask = cmask;
290 cwdi0.cwdi_refcnt = 1;
291
292 /* Create the limits structures. */
293 p->p_limit = &limit0;
294 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
295 limit0.pl_rlimit[i].rlim_cur =
296 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
297
298 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
299 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
300 maxfiles < NOFILE ? maxfiles : NOFILE;
301
302 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
303 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
304 maxproc < MAXUPRC ? maxproc : MAXUPRC;
305
306 i = ptoa(uvmexp.free);
307 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
308 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
309 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
310 limit0.pl_corename = defcorename;
311 limit0.p_refcnt = 1;
312
313 /*
314 * Initialize proc0's vmspace, which uses the kernel pmap.
315 * All kernel processes (which never have user space mappings)
316 * share proc0's vmspace, and thus, the kernel pmap.
317 */
318 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
319 trunc_page(VM_MAX_ADDRESS), TRUE);
320 p->p_vmspace = &vmspace0;
321
322 p->p_addr = proc0paddr; /* XXX */
323
324 /*
325 * We continue to place resource usage info in the
326 * user struct so they're pageable.
327 */
328 p->p_stats = &p->p_addr->u_stats;
329
330 /*
331 * Charge root for one process.
332 */
333 (void)chgproccnt(0, 1);
334
335 rqinit();
336
337 /* Configure virtual memory system, set vm rlimits. */
338 uvm_init_limits(p);
339
340 /* Initialize the file systems. */
341 #if defined(NFSSERVER) || defined(NFS)
342 nfs_init(); /* initialize server/shared data */
343 #endif
344 vfsinit();
345
346 /* Configure the system hardware. This will enable interrupts. */
347 configure();
348
349 #ifdef SYSVSHM
350 /* Initialize System V style shared memory. */
351 shminit();
352 #endif
353
354 #ifdef SYSVSEM
355 /* Initialize System V style semaphores. */
356 seminit();
357 #endif
358
359 #ifdef SYSVMSG
360 /* Initialize System V style message queues. */
361 msginit();
362 #endif
363
364 /* Attach pseudo-devices. */
365 for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
366 (*pdev->pdev_attach)(pdev->pdev_count);
367
368 /*
369 * Initialize protocols. Block reception of incoming packets
370 * until everything is ready.
371 */
372 s = splimp();
373 ifinit();
374 domaininit();
375 splx(s);
376
377 #ifdef GPROF
378 /* Initialize kernel profiling. */
379 kmstartup();
380 #endif
381
382 /* Initialize system accouting. */
383 acct_init();
384
385 /*
386 * Initialize signal-related data structures, and signal state
387 * for proc0.
388 */
389 signal_init();
390 p->p_sigacts = &sigacts0;
391 siginit(p);
392
393 /* Kick off timeout driven events by calling first time. */
394 roundrobin(NULL);
395 schedcpu(NULL);
396
397 /*
398 * Create process 1 (init(8)). We do this now, as Unix has
399 * historically had init be process 1, and changing this would
400 * probably upset a lot of people.
401 *
402 * Note that process 1 won't immediately exec init(8), but will
403 * wait for us to inform it that the root file system has been
404 * mounted.
405 */
406 if (fork1(p, 0, SIGCHLD, NULL, 0, start_init, NULL, NULL, &initproc))
407 panic("fork init");
408
409 /*
410 * Create any kernel threads who's creation was deferred because
411 * initproc had not yet been created.
412 */
413 kthread_run_deferred_queue();
414
415 /*
416 * Now that device driver threads have been created, wait for
417 * them to finish any deferred autoconfiguration. Note we don't
418 * need to lock this semaphore, since we haven't booted any
419 * secondary processors, yet.
420 */
421 while (config_pending)
422 (void) tsleep((void *)&config_pending, PWAIT, "cfpend", 0);
423
424 /*
425 * Now that autoconfiguration has completed, we can determine
426 * the root and dump devices.
427 */
428 cpu_rootconf();
429 cpu_dumpconf();
430
431 /* Mount the root file system. */
432 do {
433 domountroothook();
434 if ((error = vfs_mountroot())) {
435 printf("cannot mount root, error = %d\n", error);
436 boothowto |= RB_ASKNAME;
437 setroot(root_device,
438 (rootdev != NODEV) ? DISKPART(rootdev) : 0);
439 }
440 } while (error != 0);
441 mountroothook_destroy();
442
443 mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
444 mountlist.cqh_first->mnt_op->vfs_refcount++;
445
446 /*
447 * Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to
448 * reference it.
449 */
450 if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
451 panic("cannot find root vnode");
452 cwdi0.cwdi_cdir = rootvnode;
453 VREF(cwdi0.cwdi_cdir);
454 VOP_UNLOCK(rootvnode, 0);
455 cwdi0.cwdi_rdir = NULL;
456
457 /*
458 * Now that root is mounted, we can fixup initproc's CWD
459 * info. All other processes are kthreads, which merely
460 * share proc0's CWD info.
461 */
462 initproc->p_cwdi->cwdi_cdir = rootvnode;
463 VREF(initproc->p_cwdi->cwdi_cdir);
464 initproc->p_cwdi->cwdi_rdir = NULL;
465
466 /*
467 * Now can look at time, having had a chance to verify the time
468 * from the file system. Reset p->p_rtime as it may have been
469 * munched in mi_switch() after the time got set.
470 */
471 proclist_lock_read();
472 s = splhigh(); /* block clock and statclock */
473 for (p = LIST_FIRST(&allproc); p != NULL;
474 p = LIST_NEXT(p, p_list)) {
475 p->p_stats->p_start = mono_time = boottime = time;
476 if (p->p_cpu != NULL)
477 p->p_cpu->ci_schedstate.spc_runtime = time;
478 p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
479 }
480 splx(s);
481 proclist_unlock_read();
482
483 /* Create the pageout daemon kernel thread. */
484 uvm_swap_init();
485 if (kthread_create1(start_pagedaemon, NULL, NULL, "pagedaemon"))
486 panic("fork pagedaemon");
487
488 /* Create the process reaper kernel thread. */
489 if (kthread_create1(start_reaper, NULL, NULL, "reaper"))
490 panic("fork reaper");
491
492 /* Create the filesystem syncer kernel thread. */
493 if (kthread_create1(sched_sync, NULL, NULL, "ioflush"))
494 panic("fork syncer");
495
496 #if defined(MULTIPROCESSOR)
497 /* Boot the secondary processors. */
498 cpu_boot_secondary_processors();
499 #endif
500
501 /*
502 * Okay, now we can let init(8) exec! It's off to userland!
503 */
504 start_init_exec = 1;
505 wakeup((void *)&start_init_exec);
506
507 #ifdef NVNODE_IMPLICIT
508 /*
509 * If maximum number of vnodes in namei vnode cache is not explicitly
510 * defined in kernel config, adjust the number such as we use roughly
511 * 0.5% of memory for vnode cache (but not less than NVNODE vnodes).
512 */
513 usevnodes = (ptoa(physmem) / 200) / sizeof(struct vnode);
514 if (usevnodes > desiredvnodes)
515 desiredvnodes = usevnodes;
516 #endif
517
518 /* The scheduler is an infinite loop. */
519 uvm_scheduler();
520 /* NOTREACHED */
521 }
522
523 static void
524 check_console(struct proc *p)
525 {
526 struct nameidata nd;
527 int error;
528
529 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
530 error = namei(&nd);
531 if (error == 0)
532 vrele(nd.ni_vp);
533 else if (error == ENOENT)
534 printf("warning: no /dev/console\n");
535 else
536 printf("warning: lookup /dev/console: error %d\n", error);
537 }
538
539 /*
540 * List of paths to try when searching for "init".
541 */
542 static const char *initpaths[] = {
543 "/sbin/init",
544 "/sbin/oinit",
545 "/sbin/init.bak",
546 NULL,
547 };
548
549 /*
550 * Start the initial user process; try exec'ing each pathname in "initpaths".
551 * The program is invoked with one argument containing the boot flags.
552 */
553 static void
554 start_init(void *arg)
555 {
556 struct proc *p = arg;
557 vaddr_t addr;
558 struct sys_execve_args /* {
559 syscallarg(const char *) path;
560 syscallarg(char * const *) argp;
561 syscallarg(char * const *) envp;
562 } */ args;
563 int options, i, error;
564 register_t retval[2];
565 char flags[4], *flagsp;
566 const char **pathp, *path, *slash;
567 char *ucp, **uap, *arg0, *arg1 = NULL;
568
569 /*
570 * Now in process 1.
571 */
572 strncpy(p->p_comm, "init", MAXCOMLEN);
573
574 /*
575 * Wait for main() to tell us that it's safe to exec.
576 */
577 while (start_init_exec == 0)
578 (void) tsleep((void *)&start_init_exec, PWAIT, "initexec", 0);
579
580 /*
581 * This is not the right way to do this. We really should
582 * hand-craft a descriptor onto /dev/console to hand to init,
583 * but that's a _lot_ more work, and the benefit from this easy
584 * hack makes up for the "good is the enemy of the best" effect.
585 */
586 check_console(p);
587
588 /*
589 * Need just enough stack to hold the faked-up "execve()" arguments.
590 */
591 addr = USRSTACK - PAGE_SIZE;
592 if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
593 NULL, UVM_UNKNOWN_OFFSET,
594 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
595 UVM_ADV_NORMAL,
596 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW))
597 != KERN_SUCCESS)
598 panic("init: couldn't allocate argument space");
599 p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
600
601 for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
602 ucp = (char *)(addr + PAGE_SIZE);
603
604 /*
605 * Construct the boot flag argument.
606 */
607 flagsp = flags;
608 *flagsp++ = '-';
609 options = 0;
610
611 if (boothowto & RB_SINGLE) {
612 *flagsp++ = 's';
613 options = 1;
614 }
615 #ifdef notyet
616 if (boothowto & RB_FASTBOOT) {
617 *flagsp++ = 'f';
618 options = 1;
619 }
620 #endif
621
622 /*
623 * Move out the flags (arg 1), if necessary.
624 */
625 if (options != 0) {
626 *flagsp++ = '\0';
627 i = flagsp - flags;
628 #ifdef DEBUG
629 printf("init: copying out flags `%s' %d\n", flags, i);
630 #endif
631 (void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
632 arg1 = ucp;
633 }
634
635 /*
636 * Move out the file name (also arg 0).
637 */
638 i = strlen(path) + 1;
639 #ifdef DEBUG
640 printf("init: copying out path `%s' %d\n", path, i);
641 #endif
642 (void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
643 arg0 = ucp;
644
645 /*
646 * Move out the arg pointers.
647 */
648 uap = (char **)((long)ucp & ~ALIGNBYTES);
649 (void)suword((caddr_t)--uap, 0); /* terminator */
650 if (options != 0)
651 (void)suword((caddr_t)--uap, (long)arg1);
652 slash = strrchr(path, '/');
653 if (slash)
654 (void)suword((caddr_t)--uap,
655 (long)arg0 + (slash + 1 - path));
656 else
657 (void)suword((caddr_t)--uap, (long)arg0);
658
659 /*
660 * Point at the arguments.
661 */
662 SCARG(&args, path) = arg0;
663 SCARG(&args, argp) = uap;
664 SCARG(&args, envp) = NULL;
665
666 /*
667 * Now try to exec the program. If can't for any reason
668 * other than it doesn't exist, complain.
669 */
670 error = sys_execve(p, &args, retval);
671 if (error == 0 || error == EJUSTRETURN)
672 return;
673 if (error != ENOENT)
674 printf("exec %s: error %d\n", path, error);
675 }
676 printf("init: not found\n");
677 panic("no init");
678 }
679
680 /* ARGSUSED */
681 static void
682 start_pagedaemon(void *arg)
683 {
684
685 uvm_pageout();
686 /* NOTREACHED */
687 }
688
689 /* ARGSUSED */
690 static void
691 start_reaper(void *arg)
692 {
693
694 reaper();
695 /* NOTREACHED */
696 }
697