Home | History | Annotate | Line # | Download | only in kern
init_main.c revision 1.154
      1 /*	$NetBSD: init_main.c,v 1.154 1999/07/22 21:08:31 thorpej Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1995 Christopher G. Demetriou.  All rights reserved.
      5  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
      6  *	The Regents of the University of California.  All rights reserved.
      7  * (c) UNIX System Laboratories, Inc.
      8  * All or some portions of this file are derived from material licensed
      9  * to the University of California by American Telephone and Telegraph
     10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     11  * the permission of UNIX System Laboratories, Inc.
     12  *
     13  * Redistribution and use in source and binary forms, with or without
     14  * modification, are permitted provided that the following conditions
     15  * are met:
     16  * 1. Redistributions of source code must retain the above copyright
     17  *    notice, this list of conditions and the following disclaimer.
     18  * 2. Redistributions in binary form must reproduce the above copyright
     19  *    notice, this list of conditions and the following disclaimer in the
     20  *    documentation and/or other materials provided with the distribution.
     21  * 3. All advertising materials mentioning features or use of this software
     22  *    must display the following acknowledgement:
     23  *	This product includes software developed by the University of
     24  *	California, Berkeley and its contributors.
     25  * 4. Neither the name of the University nor the names of its contributors
     26  *    may be used to endorse or promote products derived from this software
     27  *    without specific prior written permission.
     28  *
     29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     39  * SUCH DAMAGE.
     40  *
     41  *	@(#)init_main.c	8.16 (Berkeley) 5/14/95
     42  */
     43 
     44 #include "fs_nfs.h"
     45 #include "opt_nfsserver.h"
     46 #include "opt_sysv.h"
     47 
     48 #include "rnd.h"
     49 
     50 #include <sys/param.h>
     51 #include <sys/filedesc.h>
     52 #include <sys/file.h>
     53 #include <sys/errno.h>
     54 #include <sys/exec.h>
     55 #include <sys/kernel.h>
     56 #include <sys/mount.h>
     57 #include <sys/map.h>
     58 #include <sys/proc.h>
     59 #include <sys/kthread.h>
     60 #include <sys/resourcevar.h>
     61 #include <sys/signalvar.h>
     62 #include <sys/systm.h>
     63 #include <sys/vnode.h>
     64 #include <sys/tty.h>
     65 #include <sys/conf.h>
     66 #include <sys/disklabel.h>
     67 #include <sys/buf.h>
     68 #include <sys/device.h>
     69 #include <sys/socketvar.h>
     70 #include <sys/protosw.h>
     71 #include <sys/reboot.h>
     72 #include <sys/user.h>
     73 #ifdef SYSVSHM
     74 #include <sys/shm.h>
     75 #endif
     76 #ifdef SYSVSEM
     77 #include <sys/sem.h>
     78 #endif
     79 #ifdef SYSVMSG
     80 #include <sys/msg.h>
     81 #endif
     82 #include <sys/domain.h>
     83 #include <sys/mbuf.h>
     84 #include <sys/namei.h>
     85 #if NRND > 0
     86 #include <sys/rnd.h>
     87 #endif
     88 
     89 #include <sys/syscall.h>
     90 #include <sys/syscallargs.h>
     91 
     92 #include <ufs/ufs/quota.h>
     93 
     94 #include <machine/cpu.h>
     95 
     96 #include <vm/vm.h>
     97 #include <vm/vm_pageout.h>
     98 
     99 #include <uvm/uvm.h>
    100 
    101 #include <net/if.h>
    102 #include <net/raw_cb.h>
    103 
    104 char	copyright[] = "\
    105 Copyright (c) 1996, 1997, 1998, 1999
    106     The NetBSD Foundation, Inc.  All rights reserved.
    107 Copyright (c) 1982, 1986, 1989, 1991, 1993
    108     The Regents of the University of California.  All rights reserved.
    109 
    110 ";
    111 
    112 /* Components of the first process -- never freed. */
    113 struct	session session0;
    114 struct	pgrp pgrp0;
    115 struct	proc proc0;
    116 struct	pcred cred0;
    117 struct	filedesc0 filedesc0;
    118 struct	cwdinfo cwdi0;
    119 struct	plimit limit0;
    120 struct	vmspace vmspace0;
    121 struct	sigacts sigacts0;
    122 #ifndef curproc
    123 struct	proc *curproc = &proc0;
    124 #endif
    125 struct	proc *initproc;
    126 
    127 int	cmask = CMASK;
    128 extern	struct user *proc0paddr;
    129 
    130 struct	vnode *rootvp, *swapdev_vp;
    131 int	boothowto;
    132 struct	timeval boottime;
    133 struct	timeval runtime;
    134 
    135 static void check_console __P((struct proc *p));
    136 static void start_init __P((void *));
    137 static void start_pagedaemon __P((void *));
    138 static void start_reaper __P((void *));
    139 void main __P((void));
    140 
    141 extern char sigcode[], esigcode[];
    142 #ifdef SYSCALL_DEBUG
    143 extern char *syscallnames[];
    144 #endif
    145 
    146 struct emul emul_netbsd = {
    147 	"netbsd",
    148 	NULL,
    149 	sendsig,
    150 	SYS_syscall,
    151 	SYS_MAXSYSCALL,
    152 	sysent,
    153 #ifdef SYSCALL_DEBUG
    154 	syscallnames,
    155 #else
    156 	NULL,
    157 #endif
    158 	0,
    159 	copyargs,
    160 	setregs,
    161 	sigcode,
    162 	esigcode,
    163 };
    164 
    165 /*
    166  * System startup; initialize the world, create process 0, mount root
    167  * filesystem, and fork to create init and pagedaemon.  Most of the
    168  * hard work is done in the lower-level initialization routines including
    169  * startup(), which does memory initialization and autoconfiguration.
    170  */
    171 void
    172 main()
    173 {
    174 	struct proc *p;
    175 	struct pdevinit *pdev;
    176 	int i, s, error;
    177 	extern struct pdevinit pdevinit[];
    178 	extern void roundrobin __P((void *));
    179 	extern void schedcpu __P((void *));
    180 	extern void disk_init __P((void));
    181 #if defined(NFSSERVER) || defined(NFS)
    182 	extern void nfs_init __P((void));
    183 #endif
    184 
    185 	/*
    186 	 * Initialize the current process pointer (curproc) before
    187 	 * any possible traps/probes to simplify trap processing.
    188 	 */
    189 	p = &proc0;
    190 	curproc = p;
    191 	/*
    192 	 * Attempt to find console and initialize
    193 	 * in case of early panic or other messages.
    194 	 */
    195 	consinit();
    196 	printf("%s", copyright);
    197 
    198 	uvm_init();
    199 
    200 	/* Do machine-dependent initialization. */
    201 	cpu_startup();
    202 
    203 	/*
    204 	 * Initialize mbuf's.  Do this now because we might attempt to
    205 	 * allocate mbufs or mbuf clusters during autoconfiguration.
    206 	 */
    207 	mbinit();
    208 
    209 	/* Initialize sockets. */
    210 	soinit();
    211 
    212 	disk_init();		/* must come before autoconfiguration */
    213 	tty_init();		/* initialise tty list */
    214 #if NRND > 0
    215 	rnd_init();
    216 #endif
    217 	config_init();		/* init autoconfiguration data structures */
    218 	configure();		/* ...and configure the hardware */
    219 
    220 	/*
    221 	 * Initialize process and pgrp structures.
    222 	 */
    223 	procinit();
    224 
    225 	/*
    226 	 * Create process 0 (the swapper).
    227 	 */
    228 	s = proclist_lock_write();
    229 	LIST_INSERT_HEAD(&allproc, p, p_list);
    230 	proclist_unlock_write(s);
    231 
    232 	p->p_pgrp = &pgrp0;
    233 	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
    234 	LIST_INIT(&pgrp0.pg_members);
    235 	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
    236 
    237 	pgrp0.pg_session = &session0;
    238 	session0.s_count = 1;
    239 	session0.s_sid = p->p_pid;
    240 	session0.s_leader = p;
    241 
    242 	/*
    243 	 * Set P_NOCLDWAIT so that kernel threads are reparented to
    244 	 * init(8) when they exit.  init(8) can easily wait them out
    245 	 * for us.
    246 	 */
    247 	p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
    248 	p->p_stat = SRUN;
    249 	p->p_nice = NZERO;
    250 	p->p_emul = &emul_netbsd;
    251 	strncpy(p->p_comm, "swapper", MAXCOMLEN);
    252 
    253 	/* Create credentials. */
    254 	cred0.p_refcnt = 1;
    255 	p->p_cred = &cred0;
    256 	p->p_ucred = crget();
    257 	p->p_ucred->cr_ngroups = 1;	/* group 0 */
    258 
    259 	/* Create the file descriptor table. */
    260 	finit();
    261 	p->p_fd = &filedesc0.fd_fd;
    262 	fdinit1(&filedesc0);
    263 
    264 	/* Create the CWD info. */
    265 	p->p_cwdi = &cwdi0;
    266 	cwdi0.cwdi_cmask = cmask;
    267 	cwdi0.cwdi_refcnt = 1;
    268 
    269 	/* Create the limits structures. */
    270 	p->p_limit = &limit0;
    271 	for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
    272 		limit0.pl_rlimit[i].rlim_cur =
    273 		    limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
    274 
    275 	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
    276 	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
    277 	    maxfiles < NOFILE ? maxfiles : NOFILE;
    278 
    279 	limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
    280 	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
    281 	    maxproc < MAXUPRC ? maxproc : MAXUPRC;
    282 
    283 	i = ptoa(uvmexp.free);
    284 	limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
    285 	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
    286 	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
    287 	limit0.p_refcnt = 1;
    288 
    289 	/*
    290 	 * Initialize proc0's vmspace, which uses the kernel pmap.
    291 	 * All kernel processes (which never have user space mappings)
    292 	 * share proc0's vmspace, and thus, the kernel pmap.
    293 	 */
    294 	uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
    295 	    trunc_page(VM_MAX_ADDRESS), TRUE);
    296 	p->p_vmspace = &vmspace0;
    297 
    298 	p->p_addr = proc0paddr;				/* XXX */
    299 
    300 	/*
    301 	 * We continue to place resource usage info in the
    302 	 * user struct so they're pageable.
    303 	 */
    304 	p->p_stats = &p->p_addr->u_stats;
    305 
    306 	/*
    307 	 * Charge root for one process.
    308 	 */
    309 	(void)chgproccnt(0, 1);
    310 
    311 	rqinit();
    312 
    313 	/* Configure virtual memory system, set vm rlimits. */
    314 	uvm_init_limits(p);
    315 
    316 	/* Initialize the file systems. */
    317 #if defined(NFSSERVER) || defined(NFS)
    318 	nfs_init();			/* initialize server/shared data */
    319 #endif
    320 	vfsinit();
    321 
    322 	/* Start real time and statistics clocks. */
    323 	initclocks();
    324 
    325 #ifdef SYSVSHM
    326 	/* Initialize System V style shared memory. */
    327 	shminit();
    328 #endif
    329 
    330 #ifdef SYSVSEM
    331 	/* Initialize System V style semaphores. */
    332 	seminit();
    333 #endif
    334 
    335 #ifdef SYSVMSG
    336 	/* Initialize System V style message queues. */
    337 	msginit();
    338 #endif
    339 
    340 	/* Attach pseudo-devices. */
    341 	for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
    342 		(*pdev->pdev_attach)(pdev->pdev_count);
    343 
    344 	/*
    345 	 * Initialize protocols.  Block reception of incoming packets
    346 	 * until everything is ready.
    347 	 */
    348 	s = splimp();
    349 	ifinit();
    350 	domaininit();
    351 	splx(s);
    352 
    353 #ifdef GPROF
    354 	/* Initialize kernel profiling. */
    355 	kmstartup();
    356 #endif
    357 
    358 	/* Kick off timeout driven events by calling first time. */
    359 	roundrobin(NULL);
    360 	schedcpu(NULL);
    361 
    362 	/* Determine the root and dump devices. */
    363 	cpu_rootconf();
    364 	cpu_dumpconf();
    365 
    366 	/* Mount the root file system. */
    367 	do {
    368 		domountroothook();
    369 		if ((error = vfs_mountroot())) {
    370 			printf("cannot mount root, error = %d\n", error);
    371 			boothowto |= RB_ASKNAME;
    372 			setroot(root_device,
    373 			    (rootdev != NODEV) ? DISKPART(rootdev) : 0);
    374 		}
    375 	} while (error != 0);
    376 	mountroothook_destroy();
    377 
    378 	mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
    379 	mountlist.cqh_first->mnt_op->vfs_refcount++;
    380 
    381 	/*
    382 	 * Get the vnode for '/'.  Set filedesc0.fd_fd.fd_cdir to
    383 	 * reference it.
    384 	 */
    385 	if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
    386 		panic("cannot find root vnode");
    387 	cwdi0.cwdi_cdir = rootvnode;
    388 	VREF(cwdi0.cwdi_cdir);
    389 	VOP_UNLOCK(rootvnode, 0);
    390 	cwdi0.cwdi_rdir = NULL;
    391 	uvm_swap_init();
    392 
    393 	/*
    394 	 * Now can look at time, having had a chance to verify the time
    395 	 * from the file system.  Reset p->p_rtime as it may have been
    396 	 * munched in mi_switch() after the time got set.
    397 	 */
    398 	p->p_stats->p_start = runtime = mono_time = boottime = time;
    399 	p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
    400 
    401 	/*
    402 	 * Initialize signal-related data structures, and signal state
    403 	 * for proc0.
    404 	 */
    405 	signal_init();
    406 	p->p_sigacts = &sigacts0;
    407 	siginit(p);
    408 
    409 	/* Create process 1 (init(8)). */
    410 	if (fork1(p, 0, SIGCHLD, NULL, 0, NULL, &initproc))
    411 		panic("fork init");
    412 	cpu_set_kpc(initproc, start_init, initproc);
    413 
    414 	/* Create process 2, the pageout daemon kernel thread. */
    415 	if (kthread_create1(start_pagedaemon, NULL, NULL, "pagedaemon"))
    416 		panic("fork pagedaemon");
    417 
    418 	/* Create process 3, the process reaper kernel thread. */
    419 	if (kthread_create1(start_reaper, NULL, NULL, "reaper"))
    420 		panic("fork reaper");
    421 
    422 	/* Create any other deferred kernel threads. */
    423 	kthread_run_deferred_queue();
    424 
    425 	/* The scheduler is an infinite loop. */
    426 	uvm_scheduler();
    427 	/* NOTREACHED */
    428 }
    429 
    430 static void
    431 check_console(p)
    432 	struct proc *p;
    433 {
    434 	struct nameidata nd;
    435 	int error;
    436 
    437 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
    438 	error = namei(&nd);
    439 	if (error == 0)
    440 		vrele(nd.ni_vp);
    441 	else if (error == ENOENT)
    442 		printf("warning: no /dev/console\n");
    443 	else
    444 		printf("warning: lookup /dev/console: error %d\n", error);
    445 }
    446 
    447 /*
    448  * List of paths to try when searching for "init".
    449  */
    450 static char *initpaths[] = {
    451 	"/sbin/init",
    452 	"/sbin/oinit",
    453 	"/sbin/init.bak",
    454 	NULL,
    455 };
    456 
    457 /*
    458  * Start the initial user process; try exec'ing each pathname in "initpaths".
    459  * The program is invoked with one argument containing the boot flags.
    460  */
    461 static void
    462 start_init(arg)
    463 	void *arg;
    464 {
    465 	struct proc *p = arg;
    466 	vaddr_t addr;
    467 	struct sys_execve_args /* {
    468 		syscallarg(const char *) path;
    469 		syscallarg(char * const *) argp;
    470 		syscallarg(char * const *) envp;
    471 	} */ args;
    472 	int options, i, error;
    473 	register_t retval[2];
    474 	char flags[4], *flagsp;
    475 	char **pathp, *path, *slash, *ucp, **uap, *arg0, *arg1 = NULL;
    476 
    477 	/*
    478 	 * Now in process 1.
    479 	 */
    480 	strncpy(p->p_comm, "init", MAXCOMLEN);
    481 
    482 	/*
    483 	 * This is not the right way to do this.  We really should
    484 	 * hand-craft a descriptor onto /dev/console to hand to init,
    485 	 * but that's a _lot_ more work, and the benefit from this easy
    486 	 * hack makes up for the "good is the enemy of the best" effect.
    487 	 */
    488 	check_console(p);
    489 
    490 	/*
    491 	 * Need just enough stack to hold the faked-up "execve()" arguments.
    492 	 */
    493 	addr = USRSTACK - PAGE_SIZE;
    494 	if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
    495                     NULL, UVM_UNKNOWN_OFFSET,
    496                     UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
    497 		    UVM_ADV_NORMAL,
    498                     UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW))
    499 		!= KERN_SUCCESS)
    500 		panic("init: couldn't allocate argument space");
    501 	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
    502 
    503 	for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
    504 		ucp = (char *)(addr + PAGE_SIZE);
    505 
    506 		/*
    507 		 * Construct the boot flag argument.
    508 		 */
    509 		flagsp = flags;
    510 		*flagsp++ = '-';
    511 		options = 0;
    512 
    513 		if (boothowto & RB_SINGLE) {
    514 			*flagsp++ = 's';
    515 			options = 1;
    516 		}
    517 #ifdef notyet
    518 		if (boothowto & RB_FASTBOOT) {
    519 			*flagsp++ = 'f';
    520 			options = 1;
    521 		}
    522 #endif
    523 
    524 		/*
    525 		 * Move out the flags (arg 1), if necessary.
    526 		 */
    527 		if (options != 0) {
    528 			*flagsp++ = '\0';
    529 			i = flagsp - flags;
    530 #ifdef DEBUG
    531 			printf("init: copying out flags `%s' %d\n", flags, i);
    532 #endif
    533 			(void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
    534 			arg1 = ucp;
    535 		}
    536 
    537 		/*
    538 		 * Move out the file name (also arg 0).
    539 		 */
    540 		i = strlen(path) + 1;
    541 #ifdef DEBUG
    542 		printf("init: copying out path `%s' %d\n", path, i);
    543 #endif
    544 		(void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
    545 		arg0 = ucp;
    546 
    547 		/*
    548 		 * Move out the arg pointers.
    549 		 */
    550 		uap = (char **)((long)ucp & ~ALIGNBYTES);
    551 		(void)suword((caddr_t)--uap, 0);	/* terminator */
    552 		if (options != 0)
    553 			(void)suword((caddr_t)--uap, (long)arg1);
    554 		slash = strrchr(path, '/');
    555 		if (slash)
    556 			(void)suword((caddr_t)--uap,
    557 			    (long)arg0 + (slash + 1 - path));
    558 		else
    559 			(void)suword((caddr_t)--uap, (long)arg0);
    560 
    561 		/*
    562 		 * Point at the arguments.
    563 		 */
    564 		SCARG(&args, path) = arg0;
    565 		SCARG(&args, argp) = uap;
    566 		SCARG(&args, envp) = NULL;
    567 
    568 		/*
    569 		 * Now try to exec the program.  If can't for any reason
    570 		 * other than it doesn't exist, complain.
    571 		 */
    572 		error = sys_execve(p, &args, retval);
    573 		if (error == 0 || error == EJUSTRETURN)
    574 			return;
    575 		if (error != ENOENT)
    576 			printf("exec %s: error %d\n", path, error);
    577 	}
    578 	printf("init: not found\n");
    579 	panic("no init");
    580 }
    581 
    582 /* ARGSUSED */
    583 static void
    584 start_pagedaemon(arg)
    585 	void *arg;
    586 {
    587 
    588 	uvm_pageout();
    589 	/* NOTREACHED */
    590 }
    591 
    592 /* ARGSUSED */
    593 static void
    594 start_reaper(arg)
    595 	void *arg;
    596 {
    597 
    598 	reaper();
    599 	/* NOTREACHED */
    600 }
    601