Home | History | Annotate | Line # | Download | only in kern
init_main.c revision 1.158
      1 /*	$NetBSD: init_main.c,v 1.158 1999/11/13 05:02:25 simonb Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1995 Christopher G. Demetriou.  All rights reserved.
      5  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
      6  *	The Regents of the University of California.  All rights reserved.
      7  * (c) UNIX System Laboratories, Inc.
      8  * All or some portions of this file are derived from material licensed
      9  * to the University of California by American Telephone and Telegraph
     10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     11  * the permission of UNIX System Laboratories, Inc.
     12  *
     13  * Redistribution and use in source and binary forms, with or without
     14  * modification, are permitted provided that the following conditions
     15  * are met:
     16  * 1. Redistributions of source code must retain the above copyright
     17  *    notice, this list of conditions and the following disclaimer.
     18  * 2. Redistributions in binary form must reproduce the above copyright
     19  *    notice, this list of conditions and the following disclaimer in the
     20  *    documentation and/or other materials provided with the distribution.
     21  * 3. All advertising materials mentioning features or use of this software
     22  *    must display the following acknowledgement:
     23  *	This product includes software developed by the University of
     24  *	California, Berkeley and its contributors.
     25  * 4. Neither the name of the University nor the names of its contributors
     26  *    may be used to endorse or promote products derived from this software
     27  *    without specific prior written permission.
     28  *
     29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     39  * SUCH DAMAGE.
     40  *
     41  *	@(#)init_main.c	8.16 (Berkeley) 5/14/95
     42  */
     43 
     44 #include "fs_nfs.h"
     45 #include "opt_nfsserver.h"
     46 #include "opt_sysv.h"
     47 #include "opt_maxuprc.h"
     48 
     49 #include "rnd.h"
     50 
     51 #include <sys/param.h>
     52 #include <sys/filedesc.h>
     53 #include <sys/file.h>
     54 #include <sys/errno.h>
     55 #include <sys/exec.h>
     56 #include <sys/kernel.h>
     57 #include <sys/mount.h>
     58 #include <sys/map.h>
     59 #include <sys/proc.h>
     60 #include <sys/kthread.h>
     61 #include <sys/resourcevar.h>
     62 #include <sys/signalvar.h>
     63 #include <sys/systm.h>
     64 #include <sys/vnode.h>
     65 #include <sys/tty.h>
     66 #include <sys/conf.h>
     67 #include <sys/disklabel.h>
     68 #include <sys/buf.h>
     69 #include <sys/device.h>
     70 #include <sys/socketvar.h>
     71 #include <sys/protosw.h>
     72 #include <sys/reboot.h>
     73 #include <sys/user.h>
     74 #ifdef SYSVSHM
     75 #include <sys/shm.h>
     76 #endif
     77 #ifdef SYSVSEM
     78 #include <sys/sem.h>
     79 #endif
     80 #ifdef SYSVMSG
     81 #include <sys/msg.h>
     82 #endif
     83 #include <sys/domain.h>
     84 #include <sys/mbuf.h>
     85 #include <sys/namei.h>
     86 #if NRND > 0
     87 #include <sys/rnd.h>
     88 #endif
     89 
     90 #include <sys/syscall.h>
     91 #include <sys/syscallargs.h>
     92 
     93 #include <ufs/ufs/quota.h>
     94 
     95 #include <machine/cpu.h>
     96 
     97 #include <vm/vm.h>
     98 #include <vm/vm_pageout.h>
     99 
    100 #include <uvm/uvm.h>
    101 
    102 #include <net/if.h>
    103 #include <net/raw_cb.h>
    104 
    105 char	copyright[] = "\
    106 Copyright (c) 1996, 1997, 1998, 1999
    107     The NetBSD Foundation, Inc.  All rights reserved.
    108 Copyright (c) 1982, 1986, 1989, 1991, 1993
    109     The Regents of the University of California.  All rights reserved.
    110 
    111 ";
    112 
    113 /* Components of the first process -- never freed. */
    114 struct	session session0;
    115 struct	pgrp pgrp0;
    116 struct	proc proc0;
    117 struct	pcred cred0;
    118 struct	filedesc0 filedesc0;
    119 struct	cwdinfo cwdi0;
    120 struct	plimit limit0;
    121 struct	vmspace vmspace0;
    122 struct	sigacts sigacts0;
    123 #ifndef curproc
    124 struct	proc *curproc = &proc0;
    125 #endif
    126 struct	proc *initproc;
    127 
    128 int	cmask = CMASK;
    129 extern	struct user *proc0paddr;
    130 
    131 struct	vnode *rootvp, *swapdev_vp;
    132 int	boothowto;
    133 int	cold = 1;			/* still working on startup */
    134 struct	timeval boottime;
    135 struct	timeval runtime;
    136 
    137 static void check_console __P((struct proc *p));
    138 static void start_init __P((void *));
    139 static void start_pagedaemon __P((void *));
    140 static void start_reaper __P((void *));
    141 void main __P((void));
    142 
    143 extern char sigcode[], esigcode[];
    144 #ifdef SYSCALL_DEBUG
    145 extern char *syscallnames[];
    146 #endif
    147 
    148 struct emul emul_netbsd = {
    149 	"netbsd",
    150 	NULL,
    151 	sendsig,
    152 	SYS_syscall,
    153 	SYS_MAXSYSCALL,
    154 	sysent,
    155 #ifdef SYSCALL_DEBUG
    156 	syscallnames,
    157 #else
    158 	NULL,
    159 #endif
    160 	0,
    161 	copyargs,
    162 	setregs,
    163 	sigcode,
    164 	esigcode,
    165 };
    166 
    167 /*
    168  * System startup; initialize the world, create process 0, mount root
    169  * filesystem, and fork to create init and pagedaemon.  Most of the
    170  * hard work is done in the lower-level initialization routines including
    171  * startup(), which does memory initialization and autoconfiguration.
    172  */
    173 void
    174 main()
    175 {
    176 	struct proc *p;
    177 	struct pdevinit *pdev;
    178 	int i, s, error;
    179 	extern struct pdevinit pdevinit[];
    180 	extern void roundrobin __P((void *));
    181 	extern void schedcpu __P((void *));
    182 	extern void disk_init __P((void));
    183 #if defined(NFSSERVER) || defined(NFS)
    184 	extern void nfs_init __P((void));
    185 #endif
    186 
    187 	/*
    188 	 * Initialize the current process pointer (curproc) before
    189 	 * any possible traps/probes to simplify trap processing.
    190 	 */
    191 	p = &proc0;
    192 	curproc = p;
    193 	/*
    194 	 * Attempt to find console and initialize
    195 	 * in case of early panic or other messages.
    196 	 */
    197 	consinit();
    198 	printf("%s", copyright);
    199 
    200 	uvm_init();
    201 
    202 	/* Do machine-dependent initialization. */
    203 	cpu_startup();
    204 
    205 	/*
    206 	 * Initialize mbuf's.  Do this now because we might attempt to
    207 	 * allocate mbufs or mbuf clusters during autoconfiguration.
    208 	 */
    209 	mbinit();
    210 
    211 	/* Initialize sockets. */
    212 	soinit();
    213 
    214 	/*
    215 	 * The following 3 things must be done before autoconfiguration.
    216 	 */
    217 	disk_init();		/* initialize disk list */
    218 	tty_init();		/* initialize tty list */
    219 #if NRND > 0
    220 	rnd_init();		/* initialize RNG */
    221 #endif
    222 
    223 	/*
    224 	 * Initialize process and pgrp structures.
    225 	 */
    226 	procinit();
    227 
    228 	/*
    229 	 * Create process 0 (the swapper).
    230 	 */
    231 	s = proclist_lock_write();
    232 	LIST_INSERT_HEAD(&allproc, p, p_list);
    233 	proclist_unlock_write(s);
    234 
    235 	p->p_pgrp = &pgrp0;
    236 	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
    237 	LIST_INIT(&pgrp0.pg_members);
    238 	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
    239 
    240 	pgrp0.pg_session = &session0;
    241 	session0.s_count = 1;
    242 	session0.s_sid = p->p_pid;
    243 	session0.s_leader = p;
    244 
    245 	/*
    246 	 * Set P_NOCLDWAIT so that kernel threads are reparented to
    247 	 * init(8) when they exit.  init(8) can easily wait them out
    248 	 * for us.
    249 	 */
    250 	p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
    251 	p->p_stat = SRUN;
    252 	p->p_nice = NZERO;
    253 	p->p_emul = &emul_netbsd;
    254 	strncpy(p->p_comm, "swapper", MAXCOMLEN);
    255 
    256 	/* Create credentials. */
    257 	cred0.p_refcnt = 1;
    258 	p->p_cred = &cred0;
    259 	p->p_ucred = crget();
    260 	p->p_ucred->cr_ngroups = 1;	/* group 0 */
    261 
    262 	/* Create the file descriptor table. */
    263 	finit();
    264 	p->p_fd = &filedesc0.fd_fd;
    265 	fdinit1(&filedesc0);
    266 
    267 	/* Create the CWD info. */
    268 	p->p_cwdi = &cwdi0;
    269 	cwdi0.cwdi_cmask = cmask;
    270 	cwdi0.cwdi_refcnt = 1;
    271 
    272 	/* Create the limits structures. */
    273 	p->p_limit = &limit0;
    274 	for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
    275 		limit0.pl_rlimit[i].rlim_cur =
    276 		    limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
    277 
    278 	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
    279 	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
    280 	    maxfiles < NOFILE ? maxfiles : NOFILE;
    281 
    282 	limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
    283 	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
    284 	    maxproc < MAXUPRC ? maxproc : MAXUPRC;
    285 
    286 	i = ptoa(uvmexp.free);
    287 	limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
    288 	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
    289 	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
    290 	limit0.pl_corename = defcorename;
    291 	limit0.p_refcnt = 1;
    292 
    293 	/*
    294 	 * Initialize proc0's vmspace, which uses the kernel pmap.
    295 	 * All kernel processes (which never have user space mappings)
    296 	 * share proc0's vmspace, and thus, the kernel pmap.
    297 	 */
    298 	uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
    299 	    trunc_page(VM_MAX_ADDRESS), TRUE);
    300 	p->p_vmspace = &vmspace0;
    301 
    302 	p->p_addr = proc0paddr;				/* XXX */
    303 
    304 	/*
    305 	 * We continue to place resource usage info in the
    306 	 * user struct so they're pageable.
    307 	 */
    308 	p->p_stats = &p->p_addr->u_stats;
    309 
    310 	/*
    311 	 * Charge root for one process.
    312 	 */
    313 	(void)chgproccnt(0, 1);
    314 
    315 	rqinit();
    316 
    317 	/* Configure virtual memory system, set vm rlimits. */
    318 	uvm_init_limits(p);
    319 
    320 	/* Initialize the file systems. */
    321 #if defined(NFSSERVER) || defined(NFS)
    322 	nfs_init();			/* initialize server/shared data */
    323 #endif
    324 	vfsinit();
    325 
    326 	/* Configure the system hardware.  This will enable interrupts. */
    327 	configure();
    328 
    329 #ifdef SYSVSHM
    330 	/* Initialize System V style shared memory. */
    331 	shminit();
    332 #endif
    333 
    334 #ifdef SYSVSEM
    335 	/* Initialize System V style semaphores. */
    336 	seminit();
    337 #endif
    338 
    339 #ifdef SYSVMSG
    340 	/* Initialize System V style message queues. */
    341 	msginit();
    342 #endif
    343 
    344 	/* Attach pseudo-devices. */
    345 	for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
    346 		(*pdev->pdev_attach)(pdev->pdev_count);
    347 
    348 	/*
    349 	 * Initialize protocols.  Block reception of incoming packets
    350 	 * until everything is ready.
    351 	 */
    352 	s = splimp();
    353 	ifinit();
    354 	domaininit();
    355 	splx(s);
    356 
    357 #ifdef GPROF
    358 	/* Initialize kernel profiling. */
    359 	kmstartup();
    360 #endif
    361 
    362 	/* Kick off timeout driven events by calling first time. */
    363 	roundrobin(NULL);
    364 	schedcpu(NULL);
    365 
    366 	/* Determine the root and dump devices. */
    367 	cpu_rootconf();
    368 	cpu_dumpconf();
    369 
    370 	/* Mount the root file system. */
    371 	do {
    372 		domountroothook();
    373 		if ((error = vfs_mountroot())) {
    374 			printf("cannot mount root, error = %d\n", error);
    375 			boothowto |= RB_ASKNAME;
    376 			setroot(root_device,
    377 			    (rootdev != NODEV) ? DISKPART(rootdev) : 0);
    378 		}
    379 	} while (error != 0);
    380 	mountroothook_destroy();
    381 
    382 	mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
    383 	mountlist.cqh_first->mnt_op->vfs_refcount++;
    384 
    385 	/*
    386 	 * Get the vnode for '/'.  Set filedesc0.fd_fd.fd_cdir to
    387 	 * reference it.
    388 	 */
    389 	if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
    390 		panic("cannot find root vnode");
    391 	cwdi0.cwdi_cdir = rootvnode;
    392 	VREF(cwdi0.cwdi_cdir);
    393 	VOP_UNLOCK(rootvnode, 0);
    394 	cwdi0.cwdi_rdir = NULL;
    395 	uvm_swap_init();
    396 
    397 	/*
    398 	 * Now can look at time, having had a chance to verify the time
    399 	 * from the file system.  Reset p->p_rtime as it may have been
    400 	 * munched in mi_switch() after the time got set.
    401 	 */
    402 	p->p_stats->p_start = runtime = mono_time = boottime = time;
    403 	p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
    404 
    405 	/*
    406 	 * Initialize signal-related data structures, and signal state
    407 	 * for proc0.
    408 	 */
    409 	signal_init();
    410 	p->p_sigacts = &sigacts0;
    411 	siginit(p);
    412 
    413 	/* Create process 1 (init(8)). */
    414 	if (fork1(p, 0, SIGCHLD, NULL, 0, NULL, &initproc))
    415 		panic("fork init");
    416 	cpu_set_kpc(initproc, start_init, initproc);
    417 
    418 	/* Create process 2, the pageout daemon kernel thread. */
    419 	if (kthread_create1(start_pagedaemon, NULL, NULL, "pagedaemon"))
    420 		panic("fork pagedaemon");
    421 
    422 	/* Create process 3, the process reaper kernel thread. */
    423 	if (kthread_create1(start_reaper, NULL, NULL, "reaper"))
    424 		panic("fork reaper");
    425 
    426 	/* Create any other deferred kernel threads. */
    427 	kthread_run_deferred_queue();
    428 
    429 	/* The scheduler is an infinite loop. */
    430 	uvm_scheduler();
    431 	/* NOTREACHED */
    432 }
    433 
    434 static void
    435 check_console(p)
    436 	struct proc *p;
    437 {
    438 	struct nameidata nd;
    439 	int error;
    440 
    441 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
    442 	error = namei(&nd);
    443 	if (error == 0)
    444 		vrele(nd.ni_vp);
    445 	else if (error == ENOENT)
    446 		printf("warning: no /dev/console\n");
    447 	else
    448 		printf("warning: lookup /dev/console: error %d\n", error);
    449 }
    450 
    451 /*
    452  * List of paths to try when searching for "init".
    453  */
    454 static char *initpaths[] = {
    455 	"/sbin/init",
    456 	"/sbin/oinit",
    457 	"/sbin/init.bak",
    458 	NULL,
    459 };
    460 
    461 /*
    462  * Start the initial user process; try exec'ing each pathname in "initpaths".
    463  * The program is invoked with one argument containing the boot flags.
    464  */
    465 static void
    466 start_init(arg)
    467 	void *arg;
    468 {
    469 	struct proc *p = arg;
    470 	vaddr_t addr;
    471 	struct sys_execve_args /* {
    472 		syscallarg(const char *) path;
    473 		syscallarg(char * const *) argp;
    474 		syscallarg(char * const *) envp;
    475 	} */ args;
    476 	int options, i, error;
    477 	register_t retval[2];
    478 	char flags[4], *flagsp;
    479 	char **pathp, *path, *slash, *ucp, **uap, *arg0, *arg1 = NULL;
    480 
    481 	/*
    482 	 * Now in process 1.
    483 	 */
    484 	strncpy(p->p_comm, "init", MAXCOMLEN);
    485 
    486 	/*
    487 	 * This is not the right way to do this.  We really should
    488 	 * hand-craft a descriptor onto /dev/console to hand to init,
    489 	 * but that's a _lot_ more work, and the benefit from this easy
    490 	 * hack makes up for the "good is the enemy of the best" effect.
    491 	 */
    492 	check_console(p);
    493 
    494 	/*
    495 	 * Need just enough stack to hold the faked-up "execve()" arguments.
    496 	 */
    497 	addr = USRSTACK - PAGE_SIZE;
    498 	if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
    499                     NULL, UVM_UNKNOWN_OFFSET,
    500                     UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
    501 		    UVM_ADV_NORMAL,
    502                     UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW))
    503 		!= KERN_SUCCESS)
    504 		panic("init: couldn't allocate argument space");
    505 	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
    506 
    507 	for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
    508 		ucp = (char *)(addr + PAGE_SIZE);
    509 
    510 		/*
    511 		 * Construct the boot flag argument.
    512 		 */
    513 		flagsp = flags;
    514 		*flagsp++ = '-';
    515 		options = 0;
    516 
    517 		if (boothowto & RB_SINGLE) {
    518 			*flagsp++ = 's';
    519 			options = 1;
    520 		}
    521 #ifdef notyet
    522 		if (boothowto & RB_FASTBOOT) {
    523 			*flagsp++ = 'f';
    524 			options = 1;
    525 		}
    526 #endif
    527 
    528 		/*
    529 		 * Move out the flags (arg 1), if necessary.
    530 		 */
    531 		if (options != 0) {
    532 			*flagsp++ = '\0';
    533 			i = flagsp - flags;
    534 #ifdef DEBUG
    535 			printf("init: copying out flags `%s' %d\n", flags, i);
    536 #endif
    537 			(void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
    538 			arg1 = ucp;
    539 		}
    540 
    541 		/*
    542 		 * Move out the file name (also arg 0).
    543 		 */
    544 		i = strlen(path) + 1;
    545 #ifdef DEBUG
    546 		printf("init: copying out path `%s' %d\n", path, i);
    547 #endif
    548 		(void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
    549 		arg0 = ucp;
    550 
    551 		/*
    552 		 * Move out the arg pointers.
    553 		 */
    554 		uap = (char **)((long)ucp & ~ALIGNBYTES);
    555 		(void)suword((caddr_t)--uap, 0);	/* terminator */
    556 		if (options != 0)
    557 			(void)suword((caddr_t)--uap, (long)arg1);
    558 		slash = strrchr(path, '/');
    559 		if (slash)
    560 			(void)suword((caddr_t)--uap,
    561 			    (long)arg0 + (slash + 1 - path));
    562 		else
    563 			(void)suword((caddr_t)--uap, (long)arg0);
    564 
    565 		/*
    566 		 * Point at the arguments.
    567 		 */
    568 		SCARG(&args, path) = arg0;
    569 		SCARG(&args, argp) = uap;
    570 		SCARG(&args, envp) = NULL;
    571 
    572 		/*
    573 		 * Now try to exec the program.  If can't for any reason
    574 		 * other than it doesn't exist, complain.
    575 		 */
    576 		error = sys_execve(p, &args, retval);
    577 		if (error == 0 || error == EJUSTRETURN)
    578 			return;
    579 		if (error != ENOENT)
    580 			printf("exec %s: error %d\n", path, error);
    581 	}
    582 	printf("init: not found\n");
    583 	panic("no init");
    584 }
    585 
    586 /* ARGSUSED */
    587 static void
    588 start_pagedaemon(arg)
    589 	void *arg;
    590 {
    591 
    592 	uvm_pageout();
    593 	/* NOTREACHED */
    594 }
    595 
    596 /* ARGSUSED */
    597 static void
    598 start_reaper(arg)
    599 	void *arg;
    600 {
    601 
    602 	reaper();
    603 	/* NOTREACHED */
    604 }
    605