init_main.c revision 1.152 1 /* $NetBSD: init_main.c,v 1.152 1999/06/07 20:16:09 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1995 Christopher G. Demetriou. All rights reserved.
5 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgement:
23 * This product includes software developed by the University of
24 * California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
42 */
43
44 #include "fs_nfs.h"
45 #include "opt_nfsserver.h"
46 #include "opt_sysv.h"
47
48 #include "rnd.h"
49
50 #include <sys/param.h>
51 #include <sys/filedesc.h>
52 #include <sys/file.h>
53 #include <sys/errno.h>
54 #include <sys/exec.h>
55 #include <sys/kernel.h>
56 #include <sys/mount.h>
57 #include <sys/map.h>
58 #include <sys/proc.h>
59 #include <sys/kthread.h>
60 #include <sys/resourcevar.h>
61 #include <sys/signalvar.h>
62 #include <sys/systm.h>
63 #include <sys/vnode.h>
64 #include <sys/tty.h>
65 #include <sys/conf.h>
66 #include <sys/disklabel.h>
67 #include <sys/buf.h>
68 #include <sys/device.h>
69 #include <sys/socketvar.h>
70 #include <sys/protosw.h>
71 #include <sys/reboot.h>
72 #include <sys/user.h>
73 #ifdef SYSVSHM
74 #include <sys/shm.h>
75 #endif
76 #ifdef SYSVSEM
77 #include <sys/sem.h>
78 #endif
79 #ifdef SYSVMSG
80 #include <sys/msg.h>
81 #endif
82 #include <sys/domain.h>
83 #include <sys/mbuf.h>
84 #include <sys/namei.h>
85 #if NRND > 0
86 #include <sys/rnd.h>
87 #endif
88
89 #include <sys/syscall.h>
90 #include <sys/syscallargs.h>
91
92 #include <ufs/ufs/quota.h>
93
94 #include <machine/cpu.h>
95
96 #include <vm/vm.h>
97 #include <vm/vm_pageout.h>
98
99 #include <uvm/uvm.h>
100
101 #include <net/if.h>
102 #include <net/raw_cb.h>
103
104 char copyright[] = "\
105 Copyright (c) 1996, 1997, 1998, 1999
106 The NetBSD Foundation, Inc. All rights reserved.
107 Copyright (c) 1982, 1986, 1989, 1991, 1993
108 The Regents of the University of California. All rights reserved.
109
110 ";
111
112 /* Components of the first process -- never freed. */
113 struct session session0;
114 struct pgrp pgrp0;
115 struct proc proc0;
116 struct pcred cred0;
117 struct filedesc0 filedesc0;
118 struct cwdinfo cwdi0;
119 struct plimit limit0;
120 struct vmspace vmspace0;
121 struct sigacts sigacts0;
122 #ifndef curproc
123 struct proc *curproc = &proc0;
124 #endif
125 struct proc *initproc;
126
127 int cmask = CMASK;
128 extern struct user *proc0paddr;
129
130 struct vnode *rootvp, *swapdev_vp;
131 int boothowto;
132 struct timeval boottime;
133 struct timeval runtime;
134
135 static void check_console __P((struct proc *p));
136 static void start_init __P((void *));
137 static void start_pagedaemon __P((void *));
138 static void start_reaper __P((void *));
139 void main __P((void));
140
141 extern char sigcode[], esigcode[];
142 #ifdef SYSCALL_DEBUG
143 extern char *syscallnames[];
144 #endif
145
146 struct emul emul_netbsd = {
147 "netbsd",
148 NULL,
149 sendsig,
150 SYS_syscall,
151 SYS_MAXSYSCALL,
152 sysent,
153 #ifdef SYSCALL_DEBUG
154 syscallnames,
155 #else
156 NULL,
157 #endif
158 0,
159 copyargs,
160 setregs,
161 sigcode,
162 esigcode,
163 };
164
165 /*
166 * System startup; initialize the world, create process 0, mount root
167 * filesystem, and fork to create init and pagedaemon. Most of the
168 * hard work is done in the lower-level initialization routines including
169 * startup(), which does memory initialization and autoconfiguration.
170 */
171 void
172 main()
173 {
174 struct proc *p;
175 struct pdevinit *pdev;
176 int i, s, error;
177 extern struct pdevinit pdevinit[];
178 extern void roundrobin __P((void *));
179 extern void schedcpu __P((void *));
180 extern void disk_init __P((void));
181 #if defined(NFSSERVER) || defined(NFS)
182 extern void nfs_init __P((void));
183 #endif
184
185 /*
186 * Initialize the current process pointer (curproc) before
187 * any possible traps/probes to simplify trap processing.
188 */
189 p = &proc0;
190 curproc = p;
191 /*
192 * Attempt to find console and initialize
193 * in case of early panic or other messages.
194 */
195 consinit();
196 printf("%s", copyright);
197
198 uvm_init();
199
200 /* Do machine-dependent initialization. */
201 cpu_startup();
202
203 /*
204 * Initialize mbuf's. Do this now because we might attempt to
205 * allocate mbufs or mbuf clusters during autoconfiguration.
206 */
207 mbinit();
208
209 /* Initialize sockets. */
210 soinit();
211
212 disk_init(); /* must come before autoconfiguration */
213 tty_init(); /* initialise tty list */
214 #if NRND > 0
215 rnd_init();
216 #endif
217 config_init(); /* init autoconfiguration data structures */
218 configure(); /* ...and configure the hardware */
219
220 /*
221 * Initialize process and pgrp structures.
222 */
223 procinit();
224
225 /*
226 * Create process 0 (the swapper).
227 */
228 LIST_INSERT_HEAD(&allproc, p, p_list);
229 p->p_pgrp = &pgrp0;
230 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
231 LIST_INIT(&pgrp0.pg_members);
232 LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
233
234 pgrp0.pg_session = &session0;
235 session0.s_count = 1;
236 session0.s_sid = p->p_pid;
237 session0.s_leader = p;
238
239 /*
240 * Set P_NOCLDWAIT so that kernel threads are reparented to
241 * init(8) when they exit. init(8) can easily wait them out
242 * for us.
243 */
244 p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
245 p->p_stat = SRUN;
246 p->p_nice = NZERO;
247 p->p_emul = &emul_netbsd;
248 strncpy(p->p_comm, "swapper", MAXCOMLEN);
249
250 /* Create credentials. */
251 cred0.p_refcnt = 1;
252 p->p_cred = &cred0;
253 p->p_ucred = crget();
254 p->p_ucred->cr_ngroups = 1; /* group 0 */
255
256 /* Create the file descriptor table. */
257 finit();
258 p->p_fd = &filedesc0.fd_fd;
259 fdinit1(&filedesc0);
260
261 /* Create the CWD info. */
262 p->p_cwdi = &cwdi0;
263 cwdi0.cwdi_cmask = cmask;
264 cwdi0.cwdi_refcnt = 1;
265
266 /* Create the limits structures. */
267 p->p_limit = &limit0;
268 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
269 limit0.pl_rlimit[i].rlim_cur =
270 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
271
272 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
273 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
274 maxfiles < NOFILE ? maxfiles : NOFILE;
275
276 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
277 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
278 maxproc < MAXUPRC ? maxproc : MAXUPRC;
279
280 i = ptoa(uvmexp.free);
281 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
282 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
283 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
284 limit0.p_refcnt = 1;
285
286 /*
287 * Initialize proc0's vmspace, which uses the kernel pmap.
288 * All kernel processes (which never have user space mappings)
289 * share proc0's vmspace, and thus, the kernel pmap.
290 */
291 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
292 trunc_page(VM_MAX_ADDRESS), TRUE);
293 p->p_vmspace = &vmspace0;
294
295 p->p_addr = proc0paddr; /* XXX */
296
297 /*
298 * We continue to place resource usage info in the
299 * user struct so they're pageable.
300 */
301 p->p_stats = &p->p_addr->u_stats;
302
303 /*
304 * Charge root for one process.
305 */
306 (void)chgproccnt(0, 1);
307
308 rqinit();
309
310 /* Configure virtual memory system, set vm rlimits. */
311 uvm_init_limits(p);
312
313 /* Initialize the file systems. */
314 #if defined(NFSSERVER) || defined(NFS)
315 nfs_init(); /* initialize server/shared data */
316 #endif
317 vfsinit();
318
319 /* Start real time and statistics clocks. */
320 initclocks();
321
322 #ifdef SYSVSHM
323 /* Initialize System V style shared memory. */
324 shminit();
325 #endif
326
327 #ifdef SYSVSEM
328 /* Initialize System V style semaphores. */
329 seminit();
330 #endif
331
332 #ifdef SYSVMSG
333 /* Initialize System V style message queues. */
334 msginit();
335 #endif
336
337 /* Attach pseudo-devices. */
338 for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
339 (*pdev->pdev_attach)(pdev->pdev_count);
340
341 /*
342 * Initialize protocols. Block reception of incoming packets
343 * until everything is ready.
344 */
345 s = splimp();
346 ifinit();
347 domaininit();
348 splx(s);
349
350 #ifdef GPROF
351 /* Initialize kernel profiling. */
352 kmstartup();
353 #endif
354
355 /* Kick off timeout driven events by calling first time. */
356 roundrobin(NULL);
357 schedcpu(NULL);
358
359 /* Determine the root and dump devices. */
360 cpu_rootconf();
361 cpu_dumpconf();
362
363 /* Mount the root file system. */
364 do {
365 domountroothook();
366 if ((error = vfs_mountroot())) {
367 printf("cannot mount root, error = %d\n", error);
368 boothowto |= RB_ASKNAME;
369 setroot(root_device,
370 (rootdev != NODEV) ? DISKPART(rootdev) : 0);
371 }
372 } while (error != 0);
373 mountroothook_destroy();
374
375 mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
376 mountlist.cqh_first->mnt_op->vfs_refcount++;
377
378 /*
379 * Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to
380 * reference it.
381 */
382 if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
383 panic("cannot find root vnode");
384 cwdi0.cwdi_cdir = rootvnode;
385 VREF(cwdi0.cwdi_cdir);
386 VOP_UNLOCK(rootvnode, 0);
387 cwdi0.cwdi_rdir = NULL;
388 uvm_swap_init();
389
390 /*
391 * Now can look at time, having had a chance to verify the time
392 * from the file system. Reset p->p_rtime as it may have been
393 * munched in mi_switch() after the time got set.
394 */
395 p->p_stats->p_start = runtime = mono_time = boottime = time;
396 p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
397
398 /*
399 * Initialize signal-related data structures, and signal state
400 * for proc0.
401 */
402 signal_init();
403 p->p_sigacts = &sigacts0;
404 siginit(p);
405
406 /* Create process 1 (init(8)). */
407 if (fork1(p, 0, SIGCHLD, NULL, 0, NULL, &initproc))
408 panic("fork init");
409 cpu_set_kpc(initproc, start_init, initproc);
410
411 /* Create process 2, the pageout daemon kernel thread. */
412 if (kthread_create(start_pagedaemon, NULL, NULL, "pagedaemon"))
413 panic("fork pagedaemon");
414
415 /* Create process 3, the process reaper kernel thread. */
416 if (kthread_create(start_reaper, NULL, NULL, "reaper"))
417 panic("fork reaper");
418
419 /* Create any other deferred kernel threads. */
420 kthread_run_deferred_queue();
421
422 /* The scheduler is an infinite loop. */
423 uvm_scheduler();
424 /* NOTREACHED */
425 }
426
427 static void
428 check_console(p)
429 struct proc *p;
430 {
431 struct nameidata nd;
432 int error;
433
434 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
435 error = namei(&nd);
436 if (error == 0)
437 vrele(nd.ni_vp);
438 else if (error == ENOENT)
439 printf("warning: no /dev/console\n");
440 else
441 printf("warning: lookup /dev/console: error %d\n", error);
442 }
443
444 /*
445 * List of paths to try when searching for "init".
446 */
447 static char *initpaths[] = {
448 "/sbin/init",
449 "/sbin/oinit",
450 "/sbin/init.bak",
451 NULL,
452 };
453
454 /*
455 * Start the initial user process; try exec'ing each pathname in "initpaths".
456 * The program is invoked with one argument containing the boot flags.
457 */
458 static void
459 start_init(arg)
460 void *arg;
461 {
462 struct proc *p = arg;
463 vaddr_t addr;
464 struct sys_execve_args /* {
465 syscallarg(const char *) path;
466 syscallarg(char * const *) argp;
467 syscallarg(char * const *) envp;
468 } */ args;
469 int options, i, error;
470 register_t retval[2];
471 char flags[4], *flagsp;
472 char **pathp, *path, *slash, *ucp, **uap, *arg0, *arg1 = NULL;
473
474 /*
475 * Now in process 1.
476 */
477 strncpy(p->p_comm, "init", MAXCOMLEN);
478
479 /*
480 * This is not the right way to do this. We really should
481 * hand-craft a descriptor onto /dev/console to hand to init,
482 * but that's a _lot_ more work, and the benefit from this easy
483 * hack makes up for the "good is the enemy of the best" effect.
484 */
485 check_console(p);
486
487 /*
488 * Need just enough stack to hold the faked-up "execve()" arguments.
489 */
490 addr = USRSTACK - PAGE_SIZE;
491 if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
492 NULL, UVM_UNKNOWN_OFFSET,
493 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
494 UVM_ADV_NORMAL,
495 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW))
496 != KERN_SUCCESS)
497 panic("init: couldn't allocate argument space");
498 p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
499
500 for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
501 ucp = (char *)(addr + PAGE_SIZE);
502
503 /*
504 * Construct the boot flag argument.
505 */
506 flagsp = flags;
507 *flagsp++ = '-';
508 options = 0;
509
510 if (boothowto & RB_SINGLE) {
511 *flagsp++ = 's';
512 options = 1;
513 }
514 #ifdef notyet
515 if (boothowto & RB_FASTBOOT) {
516 *flagsp++ = 'f';
517 options = 1;
518 }
519 #endif
520
521 /*
522 * Move out the flags (arg 1), if necessary.
523 */
524 if (options != 0) {
525 *flagsp++ = '\0';
526 i = flagsp - flags;
527 #ifdef DEBUG
528 printf("init: copying out flags `%s' %d\n", flags, i);
529 #endif
530 (void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
531 arg1 = ucp;
532 }
533
534 /*
535 * Move out the file name (also arg 0).
536 */
537 i = strlen(path) + 1;
538 #ifdef DEBUG
539 printf("init: copying out path `%s' %d\n", path, i);
540 #endif
541 (void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
542 arg0 = ucp;
543
544 /*
545 * Move out the arg pointers.
546 */
547 uap = (char **)((long)ucp & ~ALIGNBYTES);
548 (void)suword((caddr_t)--uap, 0); /* terminator */
549 if (options != 0)
550 (void)suword((caddr_t)--uap, (long)arg1);
551 slash = strrchr(path, '/');
552 if (slash)
553 (void)suword((caddr_t)--uap,
554 (long)arg0 + (slash + 1 - path));
555 else
556 (void)suword((caddr_t)--uap, (long)arg0);
557
558 /*
559 * Point at the arguments.
560 */
561 SCARG(&args, path) = arg0;
562 SCARG(&args, argp) = uap;
563 SCARG(&args, envp) = NULL;
564
565 /*
566 * Now try to exec the program. If can't for any reason
567 * other than it doesn't exist, complain.
568 */
569 error = sys_execve(p, &args, retval);
570 if (error == 0 || error == EJUSTRETURN)
571 return;
572 if (error != ENOENT)
573 printf("exec %s: error %d\n", path, error);
574 }
575 printf("init: not found\n");
576 panic("no init");
577 }
578
579 /* ARGSUSED */
580 static void
581 start_pagedaemon(arg)
582 void *arg;
583 {
584
585 uvm_pageout();
586 /* NOTREACHED */
587 }
588
589 /* ARGSUSED */
590 static void
591 start_reaper(arg)
592 void *arg;
593 {
594
595 reaper();
596 /* NOTREACHED */
597 }
598