init_main.c revision 1.154 1 /* $NetBSD: init_main.c,v 1.154 1999/07/22 21:08:31 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1995 Christopher G. Demetriou. All rights reserved.
5 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgement:
23 * This product includes software developed by the University of
24 * California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
42 */
43
44 #include "fs_nfs.h"
45 #include "opt_nfsserver.h"
46 #include "opt_sysv.h"
47
48 #include "rnd.h"
49
50 #include <sys/param.h>
51 #include <sys/filedesc.h>
52 #include <sys/file.h>
53 #include <sys/errno.h>
54 #include <sys/exec.h>
55 #include <sys/kernel.h>
56 #include <sys/mount.h>
57 #include <sys/map.h>
58 #include <sys/proc.h>
59 #include <sys/kthread.h>
60 #include <sys/resourcevar.h>
61 #include <sys/signalvar.h>
62 #include <sys/systm.h>
63 #include <sys/vnode.h>
64 #include <sys/tty.h>
65 #include <sys/conf.h>
66 #include <sys/disklabel.h>
67 #include <sys/buf.h>
68 #include <sys/device.h>
69 #include <sys/socketvar.h>
70 #include <sys/protosw.h>
71 #include <sys/reboot.h>
72 #include <sys/user.h>
73 #ifdef SYSVSHM
74 #include <sys/shm.h>
75 #endif
76 #ifdef SYSVSEM
77 #include <sys/sem.h>
78 #endif
79 #ifdef SYSVMSG
80 #include <sys/msg.h>
81 #endif
82 #include <sys/domain.h>
83 #include <sys/mbuf.h>
84 #include <sys/namei.h>
85 #if NRND > 0
86 #include <sys/rnd.h>
87 #endif
88
89 #include <sys/syscall.h>
90 #include <sys/syscallargs.h>
91
92 #include <ufs/ufs/quota.h>
93
94 #include <machine/cpu.h>
95
96 #include <vm/vm.h>
97 #include <vm/vm_pageout.h>
98
99 #include <uvm/uvm.h>
100
101 #include <net/if.h>
102 #include <net/raw_cb.h>
103
104 char copyright[] = "\
105 Copyright (c) 1996, 1997, 1998, 1999
106 The NetBSD Foundation, Inc. All rights reserved.
107 Copyright (c) 1982, 1986, 1989, 1991, 1993
108 The Regents of the University of California. All rights reserved.
109
110 ";
111
112 /* Components of the first process -- never freed. */
113 struct session session0;
114 struct pgrp pgrp0;
115 struct proc proc0;
116 struct pcred cred0;
117 struct filedesc0 filedesc0;
118 struct cwdinfo cwdi0;
119 struct plimit limit0;
120 struct vmspace vmspace0;
121 struct sigacts sigacts0;
122 #ifndef curproc
123 struct proc *curproc = &proc0;
124 #endif
125 struct proc *initproc;
126
127 int cmask = CMASK;
128 extern struct user *proc0paddr;
129
130 struct vnode *rootvp, *swapdev_vp;
131 int boothowto;
132 struct timeval boottime;
133 struct timeval runtime;
134
135 static void check_console __P((struct proc *p));
136 static void start_init __P((void *));
137 static void start_pagedaemon __P((void *));
138 static void start_reaper __P((void *));
139 void main __P((void));
140
141 extern char sigcode[], esigcode[];
142 #ifdef SYSCALL_DEBUG
143 extern char *syscallnames[];
144 #endif
145
146 struct emul emul_netbsd = {
147 "netbsd",
148 NULL,
149 sendsig,
150 SYS_syscall,
151 SYS_MAXSYSCALL,
152 sysent,
153 #ifdef SYSCALL_DEBUG
154 syscallnames,
155 #else
156 NULL,
157 #endif
158 0,
159 copyargs,
160 setregs,
161 sigcode,
162 esigcode,
163 };
164
165 /*
166 * System startup; initialize the world, create process 0, mount root
167 * filesystem, and fork to create init and pagedaemon. Most of the
168 * hard work is done in the lower-level initialization routines including
169 * startup(), which does memory initialization and autoconfiguration.
170 */
171 void
172 main()
173 {
174 struct proc *p;
175 struct pdevinit *pdev;
176 int i, s, error;
177 extern struct pdevinit pdevinit[];
178 extern void roundrobin __P((void *));
179 extern void schedcpu __P((void *));
180 extern void disk_init __P((void));
181 #if defined(NFSSERVER) || defined(NFS)
182 extern void nfs_init __P((void));
183 #endif
184
185 /*
186 * Initialize the current process pointer (curproc) before
187 * any possible traps/probes to simplify trap processing.
188 */
189 p = &proc0;
190 curproc = p;
191 /*
192 * Attempt to find console and initialize
193 * in case of early panic or other messages.
194 */
195 consinit();
196 printf("%s", copyright);
197
198 uvm_init();
199
200 /* Do machine-dependent initialization. */
201 cpu_startup();
202
203 /*
204 * Initialize mbuf's. Do this now because we might attempt to
205 * allocate mbufs or mbuf clusters during autoconfiguration.
206 */
207 mbinit();
208
209 /* Initialize sockets. */
210 soinit();
211
212 disk_init(); /* must come before autoconfiguration */
213 tty_init(); /* initialise tty list */
214 #if NRND > 0
215 rnd_init();
216 #endif
217 config_init(); /* init autoconfiguration data structures */
218 configure(); /* ...and configure the hardware */
219
220 /*
221 * Initialize process and pgrp structures.
222 */
223 procinit();
224
225 /*
226 * Create process 0 (the swapper).
227 */
228 s = proclist_lock_write();
229 LIST_INSERT_HEAD(&allproc, p, p_list);
230 proclist_unlock_write(s);
231
232 p->p_pgrp = &pgrp0;
233 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
234 LIST_INIT(&pgrp0.pg_members);
235 LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
236
237 pgrp0.pg_session = &session0;
238 session0.s_count = 1;
239 session0.s_sid = p->p_pid;
240 session0.s_leader = p;
241
242 /*
243 * Set P_NOCLDWAIT so that kernel threads are reparented to
244 * init(8) when they exit. init(8) can easily wait them out
245 * for us.
246 */
247 p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
248 p->p_stat = SRUN;
249 p->p_nice = NZERO;
250 p->p_emul = &emul_netbsd;
251 strncpy(p->p_comm, "swapper", MAXCOMLEN);
252
253 /* Create credentials. */
254 cred0.p_refcnt = 1;
255 p->p_cred = &cred0;
256 p->p_ucred = crget();
257 p->p_ucred->cr_ngroups = 1; /* group 0 */
258
259 /* Create the file descriptor table. */
260 finit();
261 p->p_fd = &filedesc0.fd_fd;
262 fdinit1(&filedesc0);
263
264 /* Create the CWD info. */
265 p->p_cwdi = &cwdi0;
266 cwdi0.cwdi_cmask = cmask;
267 cwdi0.cwdi_refcnt = 1;
268
269 /* Create the limits structures. */
270 p->p_limit = &limit0;
271 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
272 limit0.pl_rlimit[i].rlim_cur =
273 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
274
275 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
276 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
277 maxfiles < NOFILE ? maxfiles : NOFILE;
278
279 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
280 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
281 maxproc < MAXUPRC ? maxproc : MAXUPRC;
282
283 i = ptoa(uvmexp.free);
284 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
285 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
286 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
287 limit0.p_refcnt = 1;
288
289 /*
290 * Initialize proc0's vmspace, which uses the kernel pmap.
291 * All kernel processes (which never have user space mappings)
292 * share proc0's vmspace, and thus, the kernel pmap.
293 */
294 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
295 trunc_page(VM_MAX_ADDRESS), TRUE);
296 p->p_vmspace = &vmspace0;
297
298 p->p_addr = proc0paddr; /* XXX */
299
300 /*
301 * We continue to place resource usage info in the
302 * user struct so they're pageable.
303 */
304 p->p_stats = &p->p_addr->u_stats;
305
306 /*
307 * Charge root for one process.
308 */
309 (void)chgproccnt(0, 1);
310
311 rqinit();
312
313 /* Configure virtual memory system, set vm rlimits. */
314 uvm_init_limits(p);
315
316 /* Initialize the file systems. */
317 #if defined(NFSSERVER) || defined(NFS)
318 nfs_init(); /* initialize server/shared data */
319 #endif
320 vfsinit();
321
322 /* Start real time and statistics clocks. */
323 initclocks();
324
325 #ifdef SYSVSHM
326 /* Initialize System V style shared memory. */
327 shminit();
328 #endif
329
330 #ifdef SYSVSEM
331 /* Initialize System V style semaphores. */
332 seminit();
333 #endif
334
335 #ifdef SYSVMSG
336 /* Initialize System V style message queues. */
337 msginit();
338 #endif
339
340 /* Attach pseudo-devices. */
341 for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
342 (*pdev->pdev_attach)(pdev->pdev_count);
343
344 /*
345 * Initialize protocols. Block reception of incoming packets
346 * until everything is ready.
347 */
348 s = splimp();
349 ifinit();
350 domaininit();
351 splx(s);
352
353 #ifdef GPROF
354 /* Initialize kernel profiling. */
355 kmstartup();
356 #endif
357
358 /* Kick off timeout driven events by calling first time. */
359 roundrobin(NULL);
360 schedcpu(NULL);
361
362 /* Determine the root and dump devices. */
363 cpu_rootconf();
364 cpu_dumpconf();
365
366 /* Mount the root file system. */
367 do {
368 domountroothook();
369 if ((error = vfs_mountroot())) {
370 printf("cannot mount root, error = %d\n", error);
371 boothowto |= RB_ASKNAME;
372 setroot(root_device,
373 (rootdev != NODEV) ? DISKPART(rootdev) : 0);
374 }
375 } while (error != 0);
376 mountroothook_destroy();
377
378 mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
379 mountlist.cqh_first->mnt_op->vfs_refcount++;
380
381 /*
382 * Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to
383 * reference it.
384 */
385 if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
386 panic("cannot find root vnode");
387 cwdi0.cwdi_cdir = rootvnode;
388 VREF(cwdi0.cwdi_cdir);
389 VOP_UNLOCK(rootvnode, 0);
390 cwdi0.cwdi_rdir = NULL;
391 uvm_swap_init();
392
393 /*
394 * Now can look at time, having had a chance to verify the time
395 * from the file system. Reset p->p_rtime as it may have been
396 * munched in mi_switch() after the time got set.
397 */
398 p->p_stats->p_start = runtime = mono_time = boottime = time;
399 p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
400
401 /*
402 * Initialize signal-related data structures, and signal state
403 * for proc0.
404 */
405 signal_init();
406 p->p_sigacts = &sigacts0;
407 siginit(p);
408
409 /* Create process 1 (init(8)). */
410 if (fork1(p, 0, SIGCHLD, NULL, 0, NULL, &initproc))
411 panic("fork init");
412 cpu_set_kpc(initproc, start_init, initproc);
413
414 /* Create process 2, the pageout daemon kernel thread. */
415 if (kthread_create1(start_pagedaemon, NULL, NULL, "pagedaemon"))
416 panic("fork pagedaemon");
417
418 /* Create process 3, the process reaper kernel thread. */
419 if (kthread_create1(start_reaper, NULL, NULL, "reaper"))
420 panic("fork reaper");
421
422 /* Create any other deferred kernel threads. */
423 kthread_run_deferred_queue();
424
425 /* The scheduler is an infinite loop. */
426 uvm_scheduler();
427 /* NOTREACHED */
428 }
429
430 static void
431 check_console(p)
432 struct proc *p;
433 {
434 struct nameidata nd;
435 int error;
436
437 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
438 error = namei(&nd);
439 if (error == 0)
440 vrele(nd.ni_vp);
441 else if (error == ENOENT)
442 printf("warning: no /dev/console\n");
443 else
444 printf("warning: lookup /dev/console: error %d\n", error);
445 }
446
447 /*
448 * List of paths to try when searching for "init".
449 */
450 static char *initpaths[] = {
451 "/sbin/init",
452 "/sbin/oinit",
453 "/sbin/init.bak",
454 NULL,
455 };
456
457 /*
458 * Start the initial user process; try exec'ing each pathname in "initpaths".
459 * The program is invoked with one argument containing the boot flags.
460 */
461 static void
462 start_init(arg)
463 void *arg;
464 {
465 struct proc *p = arg;
466 vaddr_t addr;
467 struct sys_execve_args /* {
468 syscallarg(const char *) path;
469 syscallarg(char * const *) argp;
470 syscallarg(char * const *) envp;
471 } */ args;
472 int options, i, error;
473 register_t retval[2];
474 char flags[4], *flagsp;
475 char **pathp, *path, *slash, *ucp, **uap, *arg0, *arg1 = NULL;
476
477 /*
478 * Now in process 1.
479 */
480 strncpy(p->p_comm, "init", MAXCOMLEN);
481
482 /*
483 * This is not the right way to do this. We really should
484 * hand-craft a descriptor onto /dev/console to hand to init,
485 * but that's a _lot_ more work, and the benefit from this easy
486 * hack makes up for the "good is the enemy of the best" effect.
487 */
488 check_console(p);
489
490 /*
491 * Need just enough stack to hold the faked-up "execve()" arguments.
492 */
493 addr = USRSTACK - PAGE_SIZE;
494 if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
495 NULL, UVM_UNKNOWN_OFFSET,
496 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
497 UVM_ADV_NORMAL,
498 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW))
499 != KERN_SUCCESS)
500 panic("init: couldn't allocate argument space");
501 p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
502
503 for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
504 ucp = (char *)(addr + PAGE_SIZE);
505
506 /*
507 * Construct the boot flag argument.
508 */
509 flagsp = flags;
510 *flagsp++ = '-';
511 options = 0;
512
513 if (boothowto & RB_SINGLE) {
514 *flagsp++ = 's';
515 options = 1;
516 }
517 #ifdef notyet
518 if (boothowto & RB_FASTBOOT) {
519 *flagsp++ = 'f';
520 options = 1;
521 }
522 #endif
523
524 /*
525 * Move out the flags (arg 1), if necessary.
526 */
527 if (options != 0) {
528 *flagsp++ = '\0';
529 i = flagsp - flags;
530 #ifdef DEBUG
531 printf("init: copying out flags `%s' %d\n", flags, i);
532 #endif
533 (void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
534 arg1 = ucp;
535 }
536
537 /*
538 * Move out the file name (also arg 0).
539 */
540 i = strlen(path) + 1;
541 #ifdef DEBUG
542 printf("init: copying out path `%s' %d\n", path, i);
543 #endif
544 (void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
545 arg0 = ucp;
546
547 /*
548 * Move out the arg pointers.
549 */
550 uap = (char **)((long)ucp & ~ALIGNBYTES);
551 (void)suword((caddr_t)--uap, 0); /* terminator */
552 if (options != 0)
553 (void)suword((caddr_t)--uap, (long)arg1);
554 slash = strrchr(path, '/');
555 if (slash)
556 (void)suword((caddr_t)--uap,
557 (long)arg0 + (slash + 1 - path));
558 else
559 (void)suword((caddr_t)--uap, (long)arg0);
560
561 /*
562 * Point at the arguments.
563 */
564 SCARG(&args, path) = arg0;
565 SCARG(&args, argp) = uap;
566 SCARG(&args, envp) = NULL;
567
568 /*
569 * Now try to exec the program. If can't for any reason
570 * other than it doesn't exist, complain.
571 */
572 error = sys_execve(p, &args, retval);
573 if (error == 0 || error == EJUSTRETURN)
574 return;
575 if (error != ENOENT)
576 printf("exec %s: error %d\n", path, error);
577 }
578 printf("init: not found\n");
579 panic("no init");
580 }
581
582 /* ARGSUSED */
583 static void
584 start_pagedaemon(arg)
585 void *arg;
586 {
587
588 uvm_pageout();
589 /* NOTREACHED */
590 }
591
592 /* ARGSUSED */
593 static void
594 start_reaper(arg)
595 void *arg;
596 {
597
598 reaper();
599 /* NOTREACHED */
600 }
601