Home | History | Annotate | Line # | Download | only in kern
init_main.c revision 1.160
      1 /*	$NetBSD: init_main.c,v 1.160 1999/12/16 19:59:17 thorpej Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1995 Christopher G. Demetriou.  All rights reserved.
      5  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
      6  *	The Regents of the University of California.  All rights reserved.
      7  * (c) UNIX System Laboratories, Inc.
      8  * All or some portions of this file are derived from material licensed
      9  * to the University of California by American Telephone and Telegraph
     10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     11  * the permission of UNIX System Laboratories, Inc.
     12  *
     13  * Redistribution and use in source and binary forms, with or without
     14  * modification, are permitted provided that the following conditions
     15  * are met:
     16  * 1. Redistributions of source code must retain the above copyright
     17  *    notice, this list of conditions and the following disclaimer.
     18  * 2. Redistributions in binary form must reproduce the above copyright
     19  *    notice, this list of conditions and the following disclaimer in the
     20  *    documentation and/or other materials provided with the distribution.
     21  * 3. All advertising materials mentioning features or use of this software
     22  *    must display the following acknowledgement:
     23  *	This product includes software developed by the University of
     24  *	California, Berkeley and its contributors.
     25  * 4. Neither the name of the University nor the names of its contributors
     26  *    may be used to endorse or promote products derived from this software
     27  *    without specific prior written permission.
     28  *
     29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     39  * SUCH DAMAGE.
     40  *
     41  *	@(#)init_main.c	8.16 (Berkeley) 5/14/95
     42  */
     43 
     44 #include "fs_nfs.h"
     45 #include "opt_nfsserver.h"
     46 #include "opt_sysv.h"
     47 #include "opt_maxuprc.h"
     48 #include "opt_multiprocessor.h"
     49 
     50 #include "rnd.h"
     51 
     52 #include <sys/param.h>
     53 #include <sys/filedesc.h>
     54 #include <sys/file.h>
     55 #include <sys/errno.h>
     56 #include <sys/exec.h>
     57 #include <sys/kernel.h>
     58 #include <sys/mount.h>
     59 #include <sys/map.h>
     60 #include <sys/proc.h>
     61 #include <sys/kthread.h>
     62 #include <sys/resourcevar.h>
     63 #include <sys/signalvar.h>
     64 #include <sys/systm.h>
     65 #include <sys/vnode.h>
     66 #include <sys/tty.h>
     67 #include <sys/conf.h>
     68 #include <sys/disklabel.h>
     69 #include <sys/buf.h>
     70 #include <sys/device.h>
     71 #include <sys/socketvar.h>
     72 #include <sys/protosw.h>
     73 #include <sys/reboot.h>
     74 #include <sys/user.h>
     75 #ifdef SYSVSHM
     76 #include <sys/shm.h>
     77 #endif
     78 #ifdef SYSVSEM
     79 #include <sys/sem.h>
     80 #endif
     81 #ifdef SYSVMSG
     82 #include <sys/msg.h>
     83 #endif
     84 #include <sys/domain.h>
     85 #include <sys/mbuf.h>
     86 #include <sys/namei.h>
     87 #if NRND > 0
     88 #include <sys/rnd.h>
     89 #endif
     90 
     91 #include <sys/syscall.h>
     92 #include <sys/syscallargs.h>
     93 
     94 #include <ufs/ufs/quota.h>
     95 
     96 #include <miscfs/genfs/genfs.h>
     97 #include <miscfs/syncfs/syncfs.h>
     98 
     99 #include <machine/cpu.h>
    100 
    101 #include <vm/vm.h>
    102 #include <vm/vm_pageout.h>
    103 
    104 #include <uvm/uvm.h>
    105 
    106 #include <net/if.h>
    107 #include <net/raw_cb.h>
    108 
    109 char	copyright[] = "\
    110 Copyright (c) 1996, 1997, 1998, 1999
    111     The NetBSD Foundation, Inc.  All rights reserved.
    112 Copyright (c) 1982, 1986, 1989, 1991, 1993
    113     The Regents of the University of California.  All rights reserved.
    114 
    115 ";
    116 
    117 /* Components of the first process -- never freed. */
    118 struct	session session0;
    119 struct	pgrp pgrp0;
    120 struct	proc proc0;
    121 struct	pcred cred0;
    122 struct	filedesc0 filedesc0;
    123 struct	cwdinfo cwdi0;
    124 struct	plimit limit0;
    125 struct	vmspace vmspace0;
    126 struct	sigacts sigacts0;
    127 #ifndef curproc
    128 struct	proc *curproc = &proc0;
    129 #endif
    130 struct	proc *initproc;
    131 
    132 int	cmask = CMASK;
    133 extern	struct user *proc0paddr;
    134 
    135 struct	vnode *rootvp, *swapdev_vp;
    136 int	boothowto;
    137 int	cold = 1;			/* still working on startup */
    138 struct	timeval boottime;
    139 struct	timeval runtime;
    140 
    141 static void check_console __P((struct proc *p));
    142 static void start_init __P((void *));
    143 static void start_pagedaemon __P((void *));
    144 static void start_reaper __P((void *));
    145 void main __P((void));
    146 
    147 extern char sigcode[], esigcode[];
    148 #ifdef SYSCALL_DEBUG
    149 extern char *syscallnames[];
    150 #endif
    151 
    152 struct emul emul_netbsd = {
    153 	"netbsd",
    154 	NULL,
    155 	sendsig,
    156 	SYS_syscall,
    157 	SYS_MAXSYSCALL,
    158 	sysent,
    159 #ifdef SYSCALL_DEBUG
    160 	syscallnames,
    161 #else
    162 	NULL,
    163 #endif
    164 	0,
    165 	copyargs,
    166 	setregs,
    167 	sigcode,
    168 	esigcode,
    169 };
    170 
    171 /*
    172  * System startup; initialize the world, create process 0, mount root
    173  * filesystem, and fork to create init and pagedaemon.  Most of the
    174  * hard work is done in the lower-level initialization routines including
    175  * startup(), which does memory initialization and autoconfiguration.
    176  */
    177 void
    178 main()
    179 {
    180 	struct proc *p;
    181 	struct pdevinit *pdev;
    182 	int i, s, error;
    183 	extern struct pdevinit pdevinit[];
    184 	extern void roundrobin __P((void *));
    185 	extern void schedcpu __P((void *));
    186 	extern void disk_init __P((void));
    187 #if defined(NFSSERVER) || defined(NFS)
    188 	extern void nfs_init __P((void));
    189 #endif
    190 
    191 	/*
    192 	 * Initialize the current process pointer (curproc) before
    193 	 * any possible traps/probes to simplify trap processing.
    194 	 */
    195 	p = &proc0;
    196 	curproc = p;
    197 	/*
    198 	 * Attempt to find console and initialize
    199 	 * in case of early panic or other messages.
    200 	 */
    201 	consinit();
    202 	printf("%s", copyright);
    203 
    204 	uvm_init();
    205 
    206 	/* Do machine-dependent initialization. */
    207 	cpu_startup();
    208 
    209 	/*
    210 	 * Initialize mbuf's.  Do this now because we might attempt to
    211 	 * allocate mbufs or mbuf clusters during autoconfiguration.
    212 	 */
    213 	mbinit();
    214 
    215 	/* Initialize sockets. */
    216 	soinit();
    217 
    218 	/*
    219 	 * The following 3 things must be done before autoconfiguration.
    220 	 */
    221 	disk_init();		/* initialize disk list */
    222 	tty_init();		/* initialize tty list */
    223 #if NRND > 0
    224 	rnd_init();		/* initialize RNG */
    225 #endif
    226 
    227 	/*
    228 	 * Initialize process and pgrp structures.
    229 	 */
    230 	procinit();
    231 
    232 	/*
    233 	 * Create process 0 (the swapper).
    234 	 */
    235 	s = proclist_lock_write();
    236 	LIST_INSERT_HEAD(&allproc, p, p_list);
    237 	proclist_unlock_write(s);
    238 
    239 	p->p_pgrp = &pgrp0;
    240 	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
    241 	LIST_INIT(&pgrp0.pg_members);
    242 	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
    243 
    244 	pgrp0.pg_session = &session0;
    245 	session0.s_count = 1;
    246 	session0.s_sid = p->p_pid;
    247 	session0.s_leader = p;
    248 
    249 	/*
    250 	 * Set P_NOCLDWAIT so that kernel threads are reparented to
    251 	 * init(8) when they exit.  init(8) can easily wait them out
    252 	 * for us.
    253 	 */
    254 	p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
    255 	p->p_stat = SRUN;
    256 	p->p_nice = NZERO;
    257 	p->p_emul = &emul_netbsd;
    258 	strncpy(p->p_comm, "swapper", MAXCOMLEN);
    259 
    260 	/* Create credentials. */
    261 	cred0.p_refcnt = 1;
    262 	p->p_cred = &cred0;
    263 	p->p_ucred = crget();
    264 	p->p_ucred->cr_ngroups = 1;	/* group 0 */
    265 
    266 	/* Create the file descriptor table. */
    267 	finit();
    268 	p->p_fd = &filedesc0.fd_fd;
    269 	fdinit1(&filedesc0);
    270 
    271 	/* Create the CWD info. */
    272 	p->p_cwdi = &cwdi0;
    273 	cwdi0.cwdi_cmask = cmask;
    274 	cwdi0.cwdi_refcnt = 1;
    275 
    276 	/* Create the limits structures. */
    277 	p->p_limit = &limit0;
    278 	for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
    279 		limit0.pl_rlimit[i].rlim_cur =
    280 		    limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
    281 
    282 	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
    283 	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
    284 	    maxfiles < NOFILE ? maxfiles : NOFILE;
    285 
    286 	limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
    287 	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
    288 	    maxproc < MAXUPRC ? maxproc : MAXUPRC;
    289 
    290 	i = ptoa(uvmexp.free);
    291 	limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
    292 	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
    293 	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
    294 	limit0.pl_corename = defcorename;
    295 	limit0.p_refcnt = 1;
    296 
    297 	/*
    298 	 * Initialize proc0's vmspace, which uses the kernel pmap.
    299 	 * All kernel processes (which never have user space mappings)
    300 	 * share proc0's vmspace, and thus, the kernel pmap.
    301 	 */
    302 	uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
    303 	    trunc_page(VM_MAX_ADDRESS), TRUE);
    304 	p->p_vmspace = &vmspace0;
    305 
    306 	p->p_addr = proc0paddr;				/* XXX */
    307 
    308 	/*
    309 	 * We continue to place resource usage info in the
    310 	 * user struct so they're pageable.
    311 	 */
    312 	p->p_stats = &p->p_addr->u_stats;
    313 
    314 	/*
    315 	 * Charge root for one process.
    316 	 */
    317 	(void)chgproccnt(0, 1);
    318 
    319 	rqinit();
    320 
    321 	/* Configure virtual memory system, set vm rlimits. */
    322 	uvm_init_limits(p);
    323 
    324 	/* Initialize the file systems. */
    325 #if defined(NFSSERVER) || defined(NFS)
    326 	nfs_init();			/* initialize server/shared data */
    327 #endif
    328 	vfsinit();
    329 
    330 	/* Configure the system hardware.  This will enable interrupts. */
    331 	configure();
    332 
    333 #ifdef SYSVSHM
    334 	/* Initialize System V style shared memory. */
    335 	shminit();
    336 #endif
    337 
    338 #ifdef SYSVSEM
    339 	/* Initialize System V style semaphores. */
    340 	seminit();
    341 #endif
    342 
    343 #ifdef SYSVMSG
    344 	/* Initialize System V style message queues. */
    345 	msginit();
    346 #endif
    347 
    348 	/* Attach pseudo-devices. */
    349 	for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
    350 		(*pdev->pdev_attach)(pdev->pdev_count);
    351 
    352 	/*
    353 	 * Initialize protocols.  Block reception of incoming packets
    354 	 * until everything is ready.
    355 	 */
    356 	s = splimp();
    357 	ifinit();
    358 	domaininit();
    359 	splx(s);
    360 
    361 #ifdef GPROF
    362 	/* Initialize kernel profiling. */
    363 	kmstartup();
    364 #endif
    365 
    366 	/* Kick off timeout driven events by calling first time. */
    367 	roundrobin(NULL);
    368 	schedcpu(NULL);
    369 
    370 	/* Determine the root and dump devices. */
    371 	cpu_rootconf();
    372 	cpu_dumpconf();
    373 
    374 	/* Mount the root file system. */
    375 	do {
    376 		domountroothook();
    377 		if ((error = vfs_mountroot())) {
    378 			printf("cannot mount root, error = %d\n", error);
    379 			boothowto |= RB_ASKNAME;
    380 			setroot(root_device,
    381 			    (rootdev != NODEV) ? DISKPART(rootdev) : 0);
    382 		}
    383 	} while (error != 0);
    384 	mountroothook_destroy();
    385 
    386 	mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
    387 	mountlist.cqh_first->mnt_op->vfs_refcount++;
    388 
    389 	/*
    390 	 * Get the vnode for '/'.  Set filedesc0.fd_fd.fd_cdir to
    391 	 * reference it.
    392 	 */
    393 	if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
    394 		panic("cannot find root vnode");
    395 	cwdi0.cwdi_cdir = rootvnode;
    396 	VREF(cwdi0.cwdi_cdir);
    397 	VOP_UNLOCK(rootvnode, 0);
    398 	cwdi0.cwdi_rdir = NULL;
    399 	uvm_swap_init();
    400 
    401 	/*
    402 	 * Now can look at time, having had a chance to verify the time
    403 	 * from the file system.  Reset p->p_rtime as it may have been
    404 	 * munched in mi_switch() after the time got set.
    405 	 */
    406 	p->p_stats->p_start = runtime = mono_time = boottime = time;
    407 	p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
    408 
    409 	/*
    410 	 * Initialize signal-related data structures, and signal state
    411 	 * for proc0.
    412 	 */
    413 	signal_init();
    414 	p->p_sigacts = &sigacts0;
    415 	siginit(p);
    416 
    417 	/* Create process 1 (init(8)). */
    418 	if (fork1(p, 0, SIGCHLD, NULL, 0, NULL, &initproc))
    419 		panic("fork init");
    420 	cpu_set_kpc(initproc, start_init, initproc);
    421 
    422 	/* Create process 2, the pageout daemon kernel thread. */
    423 	if (kthread_create1(start_pagedaemon, NULL, NULL, "pagedaemon"))
    424 		panic("fork pagedaemon");
    425 
    426 	/* Create process 3, the process reaper kernel thread. */
    427 	if (kthread_create1(start_reaper, NULL, NULL, "reaper"))
    428 		panic("fork reaper");
    429 
    430 	/* Create process 4, the filesystem syncer */
    431 	if (kthread_create1(sched_sync, NULL, NULL, "ioflush"))
    432 		panic("fork syncer");
    433 
    434 	/* Create any other deferred kernel threads. */
    435 	kthread_run_deferred_queue();
    436 
    437 #if defined(MULTIPROCESSOR)
    438 	/* Boot the secondary processors. */
    439 	cpu_boot_secondary_processors();
    440 #endif
    441 
    442 	/* The scheduler is an infinite loop. */
    443 	uvm_scheduler();
    444 	/* NOTREACHED */
    445 }
    446 
    447 static void
    448 check_console(p)
    449 	struct proc *p;
    450 {
    451 	struct nameidata nd;
    452 	int error;
    453 
    454 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
    455 	error = namei(&nd);
    456 	if (error == 0)
    457 		vrele(nd.ni_vp);
    458 	else if (error == ENOENT)
    459 		printf("warning: no /dev/console\n");
    460 	else
    461 		printf("warning: lookup /dev/console: error %d\n", error);
    462 }
    463 
    464 /*
    465  * List of paths to try when searching for "init".
    466  */
    467 static char *initpaths[] = {
    468 	"/sbin/init",
    469 	"/sbin/oinit",
    470 	"/sbin/init.bak",
    471 	NULL,
    472 };
    473 
    474 /*
    475  * Start the initial user process; try exec'ing each pathname in "initpaths".
    476  * The program is invoked with one argument containing the boot flags.
    477  */
    478 static void
    479 start_init(arg)
    480 	void *arg;
    481 {
    482 	struct proc *p = arg;
    483 	vaddr_t addr;
    484 	struct sys_execve_args /* {
    485 		syscallarg(const char *) path;
    486 		syscallarg(char * const *) argp;
    487 		syscallarg(char * const *) envp;
    488 	} */ args;
    489 	int options, i, error;
    490 	register_t retval[2];
    491 	char flags[4], *flagsp;
    492 	char **pathp, *path, *slash, *ucp, **uap, *arg0, *arg1 = NULL;
    493 
    494 	/*
    495 	 * Now in process 1.
    496 	 */
    497 	strncpy(p->p_comm, "init", MAXCOMLEN);
    498 
    499 	/*
    500 	 * This is not the right way to do this.  We really should
    501 	 * hand-craft a descriptor onto /dev/console to hand to init,
    502 	 * but that's a _lot_ more work, and the benefit from this easy
    503 	 * hack makes up for the "good is the enemy of the best" effect.
    504 	 */
    505 	check_console(p);
    506 
    507 	/*
    508 	 * Need just enough stack to hold the faked-up "execve()" arguments.
    509 	 */
    510 	addr = USRSTACK - PAGE_SIZE;
    511 	if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
    512                     NULL, UVM_UNKNOWN_OFFSET,
    513                     UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
    514 		    UVM_ADV_NORMAL,
    515                     UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW))
    516 		!= KERN_SUCCESS)
    517 		panic("init: couldn't allocate argument space");
    518 	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
    519 
    520 	for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
    521 		ucp = (char *)(addr + PAGE_SIZE);
    522 
    523 		/*
    524 		 * Construct the boot flag argument.
    525 		 */
    526 		flagsp = flags;
    527 		*flagsp++ = '-';
    528 		options = 0;
    529 
    530 		if (boothowto & RB_SINGLE) {
    531 			*flagsp++ = 's';
    532 			options = 1;
    533 		}
    534 #ifdef notyet
    535 		if (boothowto & RB_FASTBOOT) {
    536 			*flagsp++ = 'f';
    537 			options = 1;
    538 		}
    539 #endif
    540 
    541 		/*
    542 		 * Move out the flags (arg 1), if necessary.
    543 		 */
    544 		if (options != 0) {
    545 			*flagsp++ = '\0';
    546 			i = flagsp - flags;
    547 #ifdef DEBUG
    548 			printf("init: copying out flags `%s' %d\n", flags, i);
    549 #endif
    550 			(void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
    551 			arg1 = ucp;
    552 		}
    553 
    554 		/*
    555 		 * Move out the file name (also arg 0).
    556 		 */
    557 		i = strlen(path) + 1;
    558 #ifdef DEBUG
    559 		printf("init: copying out path `%s' %d\n", path, i);
    560 #endif
    561 		(void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
    562 		arg0 = ucp;
    563 
    564 		/*
    565 		 * Move out the arg pointers.
    566 		 */
    567 		uap = (char **)((long)ucp & ~ALIGNBYTES);
    568 		(void)suword((caddr_t)--uap, 0);	/* terminator */
    569 		if (options != 0)
    570 			(void)suword((caddr_t)--uap, (long)arg1);
    571 		slash = strrchr(path, '/');
    572 		if (slash)
    573 			(void)suword((caddr_t)--uap,
    574 			    (long)arg0 + (slash + 1 - path));
    575 		else
    576 			(void)suword((caddr_t)--uap, (long)arg0);
    577 
    578 		/*
    579 		 * Point at the arguments.
    580 		 */
    581 		SCARG(&args, path) = arg0;
    582 		SCARG(&args, argp) = uap;
    583 		SCARG(&args, envp) = NULL;
    584 
    585 		/*
    586 		 * Now try to exec the program.  If can't for any reason
    587 		 * other than it doesn't exist, complain.
    588 		 */
    589 		error = sys_execve(p, &args, retval);
    590 		if (error == 0 || error == EJUSTRETURN)
    591 			return;
    592 		if (error != ENOENT)
    593 			printf("exec %s: error %d\n", path, error);
    594 	}
    595 	printf("init: not found\n");
    596 	panic("no init");
    597 }
    598 
    599 /* ARGSUSED */
    600 static void
    601 start_pagedaemon(arg)
    602 	void *arg;
    603 {
    604 
    605 	uvm_pageout();
    606 	/* NOTREACHED */
    607 }
    608 
    609 /* ARGSUSED */
    610 static void
    611 start_reaper(arg)
    612 	void *arg;
    613 {
    614 
    615 	reaper();
    616 	/* NOTREACHED */
    617 }
    618