Home | History | Annotate | Line # | Download | only in kern
init_main.c revision 1.162
      1 /*	$NetBSD: init_main.c,v 1.162 2000/01/19 20:05:50 thorpej Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1995 Christopher G. Demetriou.  All rights reserved.
      5  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
      6  *	The Regents of the University of California.  All rights reserved.
      7  * (c) UNIX System Laboratories, Inc.
      8  * All or some portions of this file are derived from material licensed
      9  * to the University of California by American Telephone and Telegraph
     10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     11  * the permission of UNIX System Laboratories, Inc.
     12  *
     13  * Redistribution and use in source and binary forms, with or without
     14  * modification, are permitted provided that the following conditions
     15  * are met:
     16  * 1. Redistributions of source code must retain the above copyright
     17  *    notice, this list of conditions and the following disclaimer.
     18  * 2. Redistributions in binary form must reproduce the above copyright
     19  *    notice, this list of conditions and the following disclaimer in the
     20  *    documentation and/or other materials provided with the distribution.
     21  * 3. All advertising materials mentioning features or use of this software
     22  *    must display the following acknowledgement:
     23  *	This product includes software developed by the University of
     24  *	California, Berkeley and its contributors.
     25  * 4. Neither the name of the University nor the names of its contributors
     26  *    may be used to endorse or promote products derived from this software
     27  *    without specific prior written permission.
     28  *
     29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     39  * SUCH DAMAGE.
     40  *
     41  *	@(#)init_main.c	8.16 (Berkeley) 5/14/95
     42  */
     43 
     44 #include "fs_nfs.h"
     45 #include "opt_nfsserver.h"
     46 #include "opt_sysv.h"
     47 #include "opt_maxuprc.h"
     48 #include "opt_multiprocessor.h"
     49 
     50 #include "rnd.h"
     51 
     52 #include <sys/param.h>
     53 #include <sys/filedesc.h>
     54 #include <sys/file.h>
     55 #include <sys/errno.h>
     56 #include <sys/exec.h>
     57 #include <sys/callout.h>
     58 #include <sys/kernel.h>
     59 #include <sys/mount.h>
     60 #include <sys/map.h>
     61 #include <sys/proc.h>
     62 #include <sys/kthread.h>
     63 #include <sys/resourcevar.h>
     64 #include <sys/signalvar.h>
     65 #include <sys/systm.h>
     66 #include <sys/vnode.h>
     67 #include <sys/tty.h>
     68 #include <sys/conf.h>
     69 #include <sys/disklabel.h>
     70 #include <sys/buf.h>
     71 #include <sys/device.h>
     72 #include <sys/socketvar.h>
     73 #include <sys/protosw.h>
     74 #include <sys/reboot.h>
     75 #include <sys/user.h>
     76 #ifdef SYSVSHM
     77 #include <sys/shm.h>
     78 #endif
     79 #ifdef SYSVSEM
     80 #include <sys/sem.h>
     81 #endif
     82 #ifdef SYSVMSG
     83 #include <sys/msg.h>
     84 #endif
     85 #include <sys/domain.h>
     86 #include <sys/mbuf.h>
     87 #include <sys/namei.h>
     88 #if NRND > 0
     89 #include <sys/rnd.h>
     90 #endif
     91 
     92 #include <sys/syscall.h>
     93 #include <sys/syscallargs.h>
     94 
     95 #include <ufs/ufs/quota.h>
     96 
     97 #include <miscfs/genfs/genfs.h>
     98 #include <miscfs/syncfs/syncfs.h>
     99 
    100 #include <machine/cpu.h>
    101 
    102 #include <vm/vm.h>
    103 #include <vm/vm_pageout.h>
    104 
    105 #include <uvm/uvm.h>
    106 
    107 #include <net/if.h>
    108 #include <net/raw_cb.h>
    109 
    110 char	copyright[] = "\
    111 Copyright (c) 1996, 1997, 1998, 1999, 2000
    112     The NetBSD Foundation, Inc.  All rights reserved.
    113 Copyright (c) 1982, 1986, 1989, 1991, 1993
    114     The Regents of the University of California.  All rights reserved.
    115 
    116 ";
    117 
    118 /* Components of the first process -- never freed. */
    119 struct	session session0;
    120 struct	pgrp pgrp0;
    121 struct	proc proc0;
    122 struct	pcred cred0;
    123 struct	filedesc0 filedesc0;
    124 struct	cwdinfo cwdi0;
    125 struct	plimit limit0;
    126 struct	vmspace vmspace0;
    127 struct	sigacts sigacts0;
    128 #ifndef curproc
    129 struct	proc *curproc = &proc0;
    130 #endif
    131 struct	proc *initproc;
    132 
    133 int	cmask = CMASK;
    134 extern	struct user *proc0paddr;
    135 
    136 struct	vnode *rootvp, *swapdev_vp;
    137 int	boothowto;
    138 int	cold = 1;			/* still working on startup */
    139 struct	timeval boottime;
    140 struct	timeval runtime;
    141 
    142 static void check_console __P((struct proc *p));
    143 static void start_init __P((void *));
    144 static void start_pagedaemon __P((void *));
    145 static void start_reaper __P((void *));
    146 void main __P((void));
    147 
    148 extern char sigcode[], esigcode[];
    149 #ifdef SYSCALL_DEBUG
    150 extern char *syscallnames[];
    151 #endif
    152 
    153 struct emul emul_netbsd = {
    154 	"netbsd",
    155 	NULL,
    156 	sendsig,
    157 	SYS_syscall,
    158 	SYS_MAXSYSCALL,
    159 	sysent,
    160 #ifdef SYSCALL_DEBUG
    161 	syscallnames,
    162 #else
    163 	NULL,
    164 #endif
    165 	0,
    166 	copyargs,
    167 	setregs,
    168 	sigcode,
    169 	esigcode,
    170 };
    171 
    172 /*
    173  * System startup; initialize the world, create process 0, mount root
    174  * filesystem, and fork to create init and pagedaemon.  Most of the
    175  * hard work is done in the lower-level initialization routines including
    176  * startup(), which does memory initialization and autoconfiguration.
    177  */
    178 void
    179 main()
    180 {
    181 	struct proc *p;
    182 	struct pdevinit *pdev;
    183 	int i, s, error;
    184 	extern struct pdevinit pdevinit[];
    185 	extern void roundrobin __P((void *));
    186 	extern void schedcpu __P((void *));
    187 	extern void disk_init __P((void));
    188 #if defined(NFSSERVER) || defined(NFS)
    189 	extern void nfs_init __P((void));
    190 #endif
    191 
    192 	/*
    193 	 * Initialize the current process pointer (curproc) before
    194 	 * any possible traps/probes to simplify trap processing.
    195 	 */
    196 	p = &proc0;
    197 	curproc = p;
    198 	/*
    199 	 * Attempt to find console and initialize
    200 	 * in case of early panic or other messages.
    201 	 */
    202 	consinit();
    203 	printf("%s", copyright);
    204 
    205 	uvm_init();
    206 
    207 	/* Do machine-dependent initialization. */
    208 	cpu_startup();
    209 
    210 	/* Initialize callouts. */
    211 	callout_startup();
    212 
    213 	/*
    214 	 * Initialize mbuf's.  Do this now because we might attempt to
    215 	 * allocate mbufs or mbuf clusters during autoconfiguration.
    216 	 */
    217 	mbinit();
    218 
    219 	/* Initialize sockets. */
    220 	soinit();
    221 
    222 	/*
    223 	 * The following 3 things must be done before autoconfiguration.
    224 	 */
    225 	disk_init();		/* initialize disk list */
    226 	tty_init();		/* initialize tty list */
    227 #if NRND > 0
    228 	rnd_init();		/* initialize RNG */
    229 #endif
    230 
    231 	/*
    232 	 * Initialize process and pgrp structures.
    233 	 */
    234 	procinit();
    235 
    236 	/*
    237 	 * Create process 0 (the swapper).
    238 	 */
    239 	s = proclist_lock_write();
    240 	LIST_INSERT_HEAD(&allproc, p, p_list);
    241 	proclist_unlock_write(s);
    242 
    243 	p->p_pgrp = &pgrp0;
    244 	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
    245 	LIST_INIT(&pgrp0.pg_members);
    246 	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
    247 
    248 	pgrp0.pg_session = &session0;
    249 	session0.s_count = 1;
    250 	session0.s_sid = p->p_pid;
    251 	session0.s_leader = p;
    252 
    253 	/*
    254 	 * Set P_NOCLDWAIT so that kernel threads are reparented to
    255 	 * init(8) when they exit.  init(8) can easily wait them out
    256 	 * for us.
    257 	 */
    258 	p->p_flag = P_INMEM | P_SYSTEM | P_NOCLDWAIT;
    259 	p->p_stat = SRUN;
    260 	p->p_nice = NZERO;
    261 	p->p_emul = &emul_netbsd;
    262 	strncpy(p->p_comm, "swapper", MAXCOMLEN);
    263 
    264 	/* Create credentials. */
    265 	cred0.p_refcnt = 1;
    266 	p->p_cred = &cred0;
    267 	p->p_ucred = crget();
    268 	p->p_ucred->cr_ngroups = 1;	/* group 0 */
    269 
    270 	/* Create the file descriptor table. */
    271 	finit();
    272 	p->p_fd = &filedesc0.fd_fd;
    273 	fdinit1(&filedesc0);
    274 
    275 	/* Create the CWD info. */
    276 	p->p_cwdi = &cwdi0;
    277 	cwdi0.cwdi_cmask = cmask;
    278 	cwdi0.cwdi_refcnt = 1;
    279 
    280 	/* Create the limits structures. */
    281 	p->p_limit = &limit0;
    282 	for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
    283 		limit0.pl_rlimit[i].rlim_cur =
    284 		    limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
    285 
    286 	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
    287 	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
    288 	    maxfiles < NOFILE ? maxfiles : NOFILE;
    289 
    290 	limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
    291 	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
    292 	    maxproc < MAXUPRC ? maxproc : MAXUPRC;
    293 
    294 	i = ptoa(uvmexp.free);
    295 	limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
    296 	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
    297 	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
    298 	limit0.pl_corename = defcorename;
    299 	limit0.p_refcnt = 1;
    300 
    301 	/*
    302 	 * Initialize proc0's vmspace, which uses the kernel pmap.
    303 	 * All kernel processes (which never have user space mappings)
    304 	 * share proc0's vmspace, and thus, the kernel pmap.
    305 	 */
    306 	uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
    307 	    trunc_page(VM_MAX_ADDRESS), TRUE);
    308 	p->p_vmspace = &vmspace0;
    309 
    310 	p->p_addr = proc0paddr;				/* XXX */
    311 
    312 	/*
    313 	 * We continue to place resource usage info in the
    314 	 * user struct so they're pageable.
    315 	 */
    316 	p->p_stats = &p->p_addr->u_stats;
    317 
    318 	/*
    319 	 * Charge root for one process.
    320 	 */
    321 	(void)chgproccnt(0, 1);
    322 
    323 	rqinit();
    324 
    325 	/* Configure virtual memory system, set vm rlimits. */
    326 	uvm_init_limits(p);
    327 
    328 	/* Initialize the file systems. */
    329 #if defined(NFSSERVER) || defined(NFS)
    330 	nfs_init();			/* initialize server/shared data */
    331 #endif
    332 	vfsinit();
    333 
    334 	/* Configure the system hardware.  This will enable interrupts. */
    335 	configure();
    336 
    337 #ifdef SYSVSHM
    338 	/* Initialize System V style shared memory. */
    339 	shminit();
    340 #endif
    341 
    342 #ifdef SYSVSEM
    343 	/* Initialize System V style semaphores. */
    344 	seminit();
    345 #endif
    346 
    347 #ifdef SYSVMSG
    348 	/* Initialize System V style message queues. */
    349 	msginit();
    350 #endif
    351 
    352 	/* Attach pseudo-devices. */
    353 	for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
    354 		(*pdev->pdev_attach)(pdev->pdev_count);
    355 
    356 	/*
    357 	 * Initialize protocols.  Block reception of incoming packets
    358 	 * until everything is ready.
    359 	 */
    360 	s = splimp();
    361 	ifinit();
    362 	domaininit();
    363 	splx(s);
    364 
    365 #ifdef GPROF
    366 	/* Initialize kernel profiling. */
    367 	kmstartup();
    368 #endif
    369 
    370 	/* Kick off timeout driven events by calling first time. */
    371 	roundrobin(NULL);
    372 	schedcpu(NULL);
    373 
    374 	/* Determine the root and dump devices. */
    375 	cpu_rootconf();
    376 	cpu_dumpconf();
    377 
    378 	/* Mount the root file system. */
    379 	do {
    380 		domountroothook();
    381 		if ((error = vfs_mountroot())) {
    382 			printf("cannot mount root, error = %d\n", error);
    383 			boothowto |= RB_ASKNAME;
    384 			setroot(root_device,
    385 			    (rootdev != NODEV) ? DISKPART(rootdev) : 0);
    386 		}
    387 	} while (error != 0);
    388 	mountroothook_destroy();
    389 
    390 	mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
    391 	mountlist.cqh_first->mnt_op->vfs_refcount++;
    392 
    393 	/*
    394 	 * Get the vnode for '/'.  Set filedesc0.fd_fd.fd_cdir to
    395 	 * reference it.
    396 	 */
    397 	if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
    398 		panic("cannot find root vnode");
    399 	cwdi0.cwdi_cdir = rootvnode;
    400 	VREF(cwdi0.cwdi_cdir);
    401 	VOP_UNLOCK(rootvnode, 0);
    402 	cwdi0.cwdi_rdir = NULL;
    403 	uvm_swap_init();
    404 
    405 	/*
    406 	 * Now can look at time, having had a chance to verify the time
    407 	 * from the file system.  Reset p->p_rtime as it may have been
    408 	 * munched in mi_switch() after the time got set.
    409 	 */
    410 	p->p_stats->p_start = runtime = mono_time = boottime = time;
    411 	p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
    412 
    413 	/*
    414 	 * Initialize signal-related data structures, and signal state
    415 	 * for proc0.
    416 	 */
    417 	signal_init();
    418 	p->p_sigacts = &sigacts0;
    419 	siginit(p);
    420 
    421 	/* Create process 1 (init(8)). */
    422 	if (fork1(p, 0, SIGCHLD, NULL, 0, NULL, &initproc))
    423 		panic("fork init");
    424 	cpu_set_kpc(initproc, start_init, initproc);
    425 
    426 	/* Create process 2, the pageout daemon kernel thread. */
    427 	if (kthread_create1(start_pagedaemon, NULL, NULL, "pagedaemon"))
    428 		panic("fork pagedaemon");
    429 
    430 	/* Create process 3, the process reaper kernel thread. */
    431 	if (kthread_create1(start_reaper, NULL, NULL, "reaper"))
    432 		panic("fork reaper");
    433 
    434 	/* Create process 4, the filesystem syncer */
    435 	if (kthread_create1(sched_sync, NULL, NULL, "ioflush"))
    436 		panic("fork syncer");
    437 
    438 	/* Create any other deferred kernel threads. */
    439 	kthread_run_deferred_queue();
    440 
    441 #if defined(MULTIPROCESSOR)
    442 	/* Boot the secondary processors. */
    443 	cpu_boot_secondary_processors();
    444 #endif
    445 
    446 	/* The scheduler is an infinite loop. */
    447 	uvm_scheduler();
    448 	/* NOTREACHED */
    449 }
    450 
    451 static void
    452 check_console(p)
    453 	struct proc *p;
    454 {
    455 	struct nameidata nd;
    456 	int error;
    457 
    458 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
    459 	error = namei(&nd);
    460 	if (error == 0)
    461 		vrele(nd.ni_vp);
    462 	else if (error == ENOENT)
    463 		printf("warning: no /dev/console\n");
    464 	else
    465 		printf("warning: lookup /dev/console: error %d\n", error);
    466 }
    467 
    468 /*
    469  * List of paths to try when searching for "init".
    470  */
    471 static char *initpaths[] = {
    472 	"/sbin/init",
    473 	"/sbin/oinit",
    474 	"/sbin/init.bak",
    475 	NULL,
    476 };
    477 
    478 /*
    479  * Start the initial user process; try exec'ing each pathname in "initpaths".
    480  * The program is invoked with one argument containing the boot flags.
    481  */
    482 static void
    483 start_init(arg)
    484 	void *arg;
    485 {
    486 	struct proc *p = arg;
    487 	vaddr_t addr;
    488 	struct sys_execve_args /* {
    489 		syscallarg(const char *) path;
    490 		syscallarg(char * const *) argp;
    491 		syscallarg(char * const *) envp;
    492 	} */ args;
    493 	int options, i, error;
    494 	register_t retval[2];
    495 	char flags[4], *flagsp;
    496 	char **pathp, *path, *slash, *ucp, **uap, *arg0, *arg1 = NULL;
    497 
    498 	/*
    499 	 * Now in process 1.
    500 	 */
    501 	strncpy(p->p_comm, "init", MAXCOMLEN);
    502 
    503 	/*
    504 	 * This is not the right way to do this.  We really should
    505 	 * hand-craft a descriptor onto /dev/console to hand to init,
    506 	 * but that's a _lot_ more work, and the benefit from this easy
    507 	 * hack makes up for the "good is the enemy of the best" effect.
    508 	 */
    509 	check_console(p);
    510 
    511 	/*
    512 	 * Need just enough stack to hold the faked-up "execve()" arguments.
    513 	 */
    514 	addr = USRSTACK - PAGE_SIZE;
    515 	if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
    516                     NULL, UVM_UNKNOWN_OFFSET,
    517                     UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
    518 		    UVM_ADV_NORMAL,
    519                     UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW))
    520 		!= KERN_SUCCESS)
    521 		panic("init: couldn't allocate argument space");
    522 	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
    523 
    524 	for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
    525 		ucp = (char *)(addr + PAGE_SIZE);
    526 
    527 		/*
    528 		 * Construct the boot flag argument.
    529 		 */
    530 		flagsp = flags;
    531 		*flagsp++ = '-';
    532 		options = 0;
    533 
    534 		if (boothowto & RB_SINGLE) {
    535 			*flagsp++ = 's';
    536 			options = 1;
    537 		}
    538 #ifdef notyet
    539 		if (boothowto & RB_FASTBOOT) {
    540 			*flagsp++ = 'f';
    541 			options = 1;
    542 		}
    543 #endif
    544 
    545 		/*
    546 		 * Move out the flags (arg 1), if necessary.
    547 		 */
    548 		if (options != 0) {
    549 			*flagsp++ = '\0';
    550 			i = flagsp - flags;
    551 #ifdef DEBUG
    552 			printf("init: copying out flags `%s' %d\n", flags, i);
    553 #endif
    554 			(void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
    555 			arg1 = ucp;
    556 		}
    557 
    558 		/*
    559 		 * Move out the file name (also arg 0).
    560 		 */
    561 		i = strlen(path) + 1;
    562 #ifdef DEBUG
    563 		printf("init: copying out path `%s' %d\n", path, i);
    564 #endif
    565 		(void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
    566 		arg0 = ucp;
    567 
    568 		/*
    569 		 * Move out the arg pointers.
    570 		 */
    571 		uap = (char **)((long)ucp & ~ALIGNBYTES);
    572 		(void)suword((caddr_t)--uap, 0);	/* terminator */
    573 		if (options != 0)
    574 			(void)suword((caddr_t)--uap, (long)arg1);
    575 		slash = strrchr(path, '/');
    576 		if (slash)
    577 			(void)suword((caddr_t)--uap,
    578 			    (long)arg0 + (slash + 1 - path));
    579 		else
    580 			(void)suword((caddr_t)--uap, (long)arg0);
    581 
    582 		/*
    583 		 * Point at the arguments.
    584 		 */
    585 		SCARG(&args, path) = arg0;
    586 		SCARG(&args, argp) = uap;
    587 		SCARG(&args, envp) = NULL;
    588 
    589 		/*
    590 		 * Now try to exec the program.  If can't for any reason
    591 		 * other than it doesn't exist, complain.
    592 		 */
    593 		error = sys_execve(p, &args, retval);
    594 		if (error == 0 || error == EJUSTRETURN)
    595 			return;
    596 		if (error != ENOENT)
    597 			printf("exec %s: error %d\n", path, error);
    598 	}
    599 	printf("init: not found\n");
    600 	panic("no init");
    601 }
    602 
    603 /* ARGSUSED */
    604 static void
    605 start_pagedaemon(arg)
    606 	void *arg;
    607 {
    608 
    609 	uvm_pageout();
    610 	/* NOTREACHED */
    611 }
    612 
    613 /* ARGSUSED */
    614 static void
    615 start_reaper(arg)
    616 	void *arg;
    617 {
    618 
    619 	reaper();
    620 	/* NOTREACHED */
    621 }
    622