Home | History | Annotate | Line # | Download | only in kern
kern_syscall.c revision 1.10
      1 /*	$NetBSD: kern_syscall.c,v 1.10 2015/03/07 16:38:07 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software developed for The NetBSD Foundation
      8  * by Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: kern_syscall.c,v 1.10 2015/03/07 16:38:07 christos Exp $");
     34 
     35 #ifdef _KERNEL_OPT
     36 #include "opt_modular.h"
     37 #include "opt_syscall_debug.h"
     38 #include "opt_ktrace.h"
     39 #include "opt_ptrace.h"
     40 #include "opt_dtrace.h"
     41 #endif
     42 
     43 /* XXX To get syscall prototypes. */
     44 #define SYSVSHM
     45 #define SYSVSEM
     46 #define SYSVMSG
     47 
     48 #include <sys/param.h>
     49 #include <sys/module.h>
     50 #include <sys/sched.h>
     51 #include <sys/syscall.h>
     52 #include <sys/syscallargs.h>
     53 #include <sys/syscallvar.h>
     54 #include <sys/systm.h>
     55 #include <sys/xcall.h>
     56 #include <sys/ktrace.h>
     57 #include <sys/ptrace.h>
     58 
     59 int
     60 sys_nomodule(struct lwp *l, const void *v, register_t *retval)
     61 {
     62 #ifdef MODULAR
     63 	static struct {
     64 		u_int		al_code;
     65 		const char	*al_module;
     66 	} const autoload[] = {
     67 	    { SYS_aio_cancel, "aio" },
     68 	    { SYS_aio_error, "aio" },
     69 	    { SYS_aio_fsync, "aio" },
     70 	    { SYS_aio_read, "aio" },
     71 	    { SYS_aio_return, "aio" },
     72 	    { SYS___aio_suspend50, "aio" },
     73 	    { SYS_aio_write, "aio" },
     74 	    { SYS_lio_listio, "aio" },
     75 	    { SYS_mq_open, "mqueue" },
     76 	    { SYS_mq_close, "mqueue" },
     77 	    { SYS_mq_unlink, "mqueue" },
     78 	    { SYS_mq_getattr, "mqueue" },
     79 	    { SYS_mq_setattr, "mqueue" },
     80 	    { SYS_mq_notify, "mqueue" },
     81 	    { SYS_mq_send, "mqueue" },
     82 	    { SYS_mq_receive, "mqueue" },
     83 	    { SYS___mq_timedsend50, "mqueue" },
     84 	    { SYS___mq_timedreceive50, "mqueue" },
     85 	    { SYS_compat_43_fstat43, "compat" },
     86 	    { SYS_compat_43_lstat43, "compat" },
     87 	    { SYS_compat_43_oaccept, "compat" },
     88 	    { SYS_compat_43_ocreat, "compat" },
     89 	    { SYS_compat_43_oftruncate, "compat" },
     90 	    { SYS_compat_43_ogetdirentries, "compat" },
     91 	    { SYS_compat_43_ogetdtablesize, "compat" },
     92 	    { SYS_compat_43_ogethostid, "compat" },
     93 	    { SYS_compat_43_ogethostname, "compat" },
     94 	    { SYS_compat_43_ogetkerninfo, "compat" },
     95 	    { SYS_compat_43_ogetpagesize, "compat" },
     96 	    { SYS_compat_43_ogetpeername, "compat" },
     97 	    { SYS_compat_43_ogetrlimit, "compat" },
     98 	    { SYS_compat_43_ogetsockname, "compat" },
     99 	    { SYS_compat_43_okillpg, "compat" },
    100 	    { SYS_compat_43_olseek, "compat" },
    101 	    { SYS_compat_43_ommap, "compat" },
    102 	    { SYS_compat_43_oquota, "compat" },
    103 	    { SYS_compat_43_orecv, "compat" },
    104 	    { SYS_compat_43_orecvfrom, "compat" },
    105 	    { SYS_compat_43_orecvmsg, "compat" },
    106 	    { SYS_compat_43_osend, "compat" },
    107 	    { SYS_compat_43_osendmsg, "compat" },
    108 	    { SYS_compat_43_osethostid, "compat" },
    109 	    { SYS_compat_43_osethostname, "compat" },
    110 	    { SYS_compat_43_osetrlimit, "compat" },
    111 	    { SYS_compat_43_osigblock, "compat" },
    112 	    { SYS_compat_43_osigsetmask, "compat" },
    113 	    { SYS_compat_43_osigstack, "compat" },
    114 	    { SYS_compat_43_osigvec, "compat" },
    115 	    { SYS_compat_43_otruncate, "compat" },
    116 	    { SYS_compat_43_owait, "compat" },
    117 	    { SYS_compat_43_stat43, "compat" },
    118 	    { SYS_compat_09_ogetdomainname, "compat" },
    119 	    { SYS_compat_09_osetdomainname, "compat" },
    120 	    { SYS_compat_09_ouname, "compat" },
    121 #ifndef _LP64
    122 	    { SYS_compat_10_omsgsys, "compat" },
    123 	    { SYS_compat_10_osemsys, "compat" },
    124 	    { SYS_compat_10_oshmsys, "compat" },
    125 #endif
    126 	    { SYS_compat_12_fstat12, "compat" },
    127 	    { SYS_compat_12_getdirentries, "compat" },
    128 	    { SYS_compat_12_lstat12, "compat" },
    129 	    { SYS_compat_12_msync, "compat" },
    130 	    { SYS_compat_12_oreboot, "compat" },
    131 	    { SYS_compat_12_oswapon, "compat" },
    132 	    { SYS_compat_12_stat12, "compat" },
    133 	    { SYS_compat_13_sigaction13, "compat" },
    134 	    { SYS_compat_13_sigaltstack13, "compat" },
    135 	    { SYS_compat_13_sigpending13, "compat" },
    136 	    { SYS_compat_13_sigprocmask13, "compat" },
    137 	    { SYS_compat_13_sigreturn13, "compat" },
    138 	    { SYS_compat_13_sigsuspend13, "compat" },
    139 	    { SYS_compat_14___semctl, "compat" },
    140 	    { SYS_compat_14_msgctl, "compat" },
    141 	    { SYS_compat_14_shmctl, "compat" },
    142 	    { SYS_compat_16___sigaction14, "compat" },
    143 	    { SYS_compat_16___sigreturn14, "compat" },
    144 	    { SYS_compat_20_fhstatfs, "compat" },
    145 	    { SYS_compat_20_fstatfs, "compat" },
    146 	    { SYS_compat_20_getfsstat, "compat" },
    147 	    { SYS_compat_20_statfs, "compat" },
    148 	    { SYS_compat_30___fhstat30, "compat" },
    149 	    { SYS_compat_30___fstat13, "compat" },
    150 	    { SYS_compat_30___lstat13, "compat" },
    151 	    { SYS_compat_30___stat13, "compat" },
    152 	    { SYS_compat_30_fhopen, "compat" },
    153 	    { SYS_compat_30_fhstat, "compat" },
    154 	    { SYS_compat_30_fhstatvfs1, "compat" },
    155 	    { SYS_compat_30_getdents, "compat" },
    156 	    { SYS_compat_30_getfh, "compat" },
    157 	    { SYS_compat_30_socket, "compat" },
    158 	    { SYS_compat_40_mount, "compat" },
    159 	    { SYS_compat_50_wait4, "compat" },
    160 	    { SYS_compat_50_mknod, "compat" },
    161 	    { SYS_compat_50_setitimer, "compat" },
    162 	    { SYS_compat_50_getitimer, "compat" },
    163 	    { SYS_compat_50_select, "compat" },
    164 	    { SYS_compat_50_gettimeofday, "compat" },
    165 	    { SYS_compat_50_getrusage, "compat" },
    166 	    { SYS_compat_50_settimeofday, "compat" },
    167 	    { SYS_compat_50_utimes, "compat" },
    168 	    { SYS_compat_50_adjtime, "compat" },
    169 	    { SYS_compat_50_lfs_segwait, "compat" },
    170 	    { SYS_compat_50_futimes, "compat" },
    171 	    { SYS_compat_50_clock_gettime, "compat" },
    172 	    { SYS_compat_50_clock_settime, "compat" },
    173 	    { SYS_compat_50_clock_getres, "compat" },
    174 	    { SYS_compat_50_timer_settime, "compat" },
    175 	    { SYS_compat_50_timer_gettime, "compat" },
    176 	    { SYS_compat_50_nanosleep, "compat" },
    177 	    { SYS_compat_50___sigtimedwait, "compat" },
    178 	    { SYS_compat_50_mq_timedsend, "compat" },
    179 	    { SYS_compat_50_mq_timedreceive, "compat" },
    180 	    { SYS_compat_50_lutimes, "compat" },
    181 	    { SYS_compat_50_____semctl13, "compat" },
    182 	    { SYS_compat_50___msgctl13, "compat" },
    183 	    { SYS_compat_50___shmctl13, "compat" },
    184 	    { SYS_compat_50__lwp_park, "compat" },
    185 	    { SYS_compat_50_kevent, "compat" },
    186 	    { SYS_compat_50_pselect, "compat" },
    187 	    { SYS_compat_50_pollts, "compat" },
    188 	    { SYS_compat_50___stat30, "compat" },
    189 	    { SYS_compat_50___fstat30, "compat" },
    190 	    { SYS_compat_50___lstat30, "compat" },
    191 	    { SYS_compat_50___ntp_gettime30, "compat" },
    192 	    { SYS_compat_50___fhstat40, "compat" },
    193 	    { SYS_compat_50_aio_suspend, "compat" },
    194 	    { SYS_compat_60__lwp_park, "compat" },
    195 	    { SYS__ksem_init, "ksem" },
    196 	    { SYS__ksem_open, "ksem" },
    197 	    { SYS__ksem_unlink, "ksem" },
    198 	    { SYS__ksem_close, "ksem" },
    199 	    { SYS__ksem_post, "ksem" },
    200 	    { SYS__ksem_wait, "ksem" },
    201 	    { SYS__ksem_trywait, "ksem" },
    202 	    { SYS__ksem_getvalue, "ksem" },
    203 	    { SYS__ksem_destroy, "ksem" },
    204 	    { SYS__ksem_timedwait, "ksem" },
    205 	    { SYS_nfssvc, "nfsserver" },
    206 	    { SYS_afssys, "openafs" },
    207 	};
    208 	const struct sysent *sy;
    209 	const struct emul *em;
    210 	int code, i;
    211 
    212 	/*
    213 	 * Restart the syscall if we interrupted a module unload that
    214 	 * failed.  Acquiring kernconfig_lock delays us until any unload
    215 	 * has been completed or rolled back.
    216 	 */
    217 	kernconfig_lock();
    218 	sy = l->l_sysent;
    219 	if (sy->sy_call != sys_nomodule) {
    220 		kernconfig_unlock();
    221 		return ERESTART;
    222 	}
    223 	/*
    224 	 * Try to autoload a module to satisfy the request.  If it
    225 	 * works, retry the request.
    226 	 */
    227 	em = l->l_proc->p_emul;
    228 	if (em == &emul_netbsd) {
    229 		code = sy - em->e_sysent;
    230 		for (i = 0; i < __arraycount(autoload); i++) {
    231 			if (autoload[i].al_code != code) {
    232 				continue;
    233 			}
    234 			if (module_autoload(autoload[i].al_module,
    235 			    MODULE_CLASS_ANY) != 0 ||
    236 			    sy->sy_call == sys_nomodule) {
    237 			    	break;
    238 			}
    239 			kernconfig_unlock();
    240 			return ERESTART;
    241 		}
    242 	}
    243 	kernconfig_unlock();
    244 #endif	/* MODULAR */
    245 
    246 	return sys_nosys(l, v, retval);
    247 }
    248 
    249 int
    250 syscall_establish(const struct emul *em, const struct syscall_package *sp)
    251 {
    252 	struct sysent *sy;
    253 	int i;
    254 
    255 	KASSERT(kernconfig_is_held());
    256 
    257 	if (em == NULL) {
    258 		em = &emul_netbsd;
    259 	}
    260 	sy = em->e_sysent;
    261 
    262 	/*
    263 	 * Ensure that all preconditions are valid, since this is
    264 	 * an all or nothing deal.  Once a system call is entered,
    265 	 * it can become busy and we could be unable to remove it
    266 	 * on error.
    267 	 */
    268 	for (i = 0; sp[i].sp_call != NULL; i++) {
    269 		if (sy[sp[i].sp_code].sy_call != sys_nomodule) {
    270 #ifdef DIAGNOSTIC
    271 			printf("syscall %d is busy\n", sp[i].sp_code);
    272 #endif
    273 			return EBUSY;
    274 		}
    275 	}
    276 	/* Everything looks good, patch them in. */
    277 	for (i = 0; sp[i].sp_call != NULL; i++) {
    278 		sy[sp[i].sp_code].sy_call = sp[i].sp_call;
    279 	}
    280 
    281 	return 0;
    282 }
    283 
    284 int
    285 syscall_disestablish(const struct emul *em, const struct syscall_package *sp)
    286 {
    287 	struct sysent *sy;
    288 	uint64_t where;
    289 	lwp_t *l;
    290 	int i;
    291 
    292 	KASSERT(kernconfig_is_held());
    293 
    294 	if (em == NULL) {
    295 		em = &emul_netbsd;
    296 	}
    297 	sy = em->e_sysent;
    298 
    299 	/*
    300 	 * First, patch the system calls to sys_nomodule to gate further
    301 	 * activity.
    302 	 */
    303 	for (i = 0; sp[i].sp_call != NULL; i++) {
    304 		KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call);
    305 		sy[sp[i].sp_code].sy_call = sys_nomodule;
    306 	}
    307 
    308 	/*
    309 	 * Run a cross call to cycle through all CPUs.  This does two
    310 	 * things: lock activity provides a barrier and makes our update
    311 	 * of sy_call visible to all CPUs, and upon return we can be sure
    312 	 * that we see pertinent values of l_sysent posted by remote CPUs.
    313 	 */
    314 	where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
    315 	xc_wait(where);
    316 
    317 	/*
    318 	 * Now it's safe to check l_sysent.  Run through all LWPs and see
    319 	 * if anyone is still using the system call.
    320 	 */
    321 	for (i = 0; sp[i].sp_call != NULL; i++) {
    322 		mutex_enter(proc_lock);
    323 		LIST_FOREACH(l, &alllwp, l_list) {
    324 			if (l->l_sysent == &sy[sp[i].sp_code]) {
    325 				break;
    326 			}
    327 		}
    328 		mutex_exit(proc_lock);
    329 		if (l == NULL) {
    330 			continue;
    331 		}
    332 		/*
    333 		 * We lose: one or more calls are still in use.  Put back
    334 		 * the old entrypoints and act like nothing happened.
    335 		 * When we drop kernconfig_lock, any system calls held in
    336 		 * sys_nomodule() will be restarted.
    337 		 */
    338 		for (i = 0; sp[i].sp_call != NULL; i++) {
    339 			sy[sp[i].sp_code].sy_call = sp[i].sp_call;
    340 		}
    341 		return EBUSY;
    342 	}
    343 
    344 	return 0;
    345 }
    346 
    347 /*
    348  * Return true if system call tracing is enabled for the specified process.
    349  */
    350 bool
    351 trace_is_enabled(struct proc *p)
    352 {
    353 #ifdef SYSCALL_DEBUG
    354 	return (true);
    355 #endif
    356 #ifdef KTRACE
    357 	if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET)))
    358 		return (true);
    359 #endif
    360 #ifdef PTRACE
    361 	if (ISSET(p->p_slflag, PSL_SYSCALL))
    362 		return (true);
    363 #endif
    364 
    365 	return (false);
    366 }
    367 
    368 /*
    369  * Start trace of particular system call. If process is being traced,
    370  * this routine is called by MD syscall dispatch code just before
    371  * a system call is actually executed.
    372  */
    373 int
    374 trace_enter(register_t code, const struct sysent *sy, const void *args)
    375 {
    376 	int error = 0;
    377 
    378 #ifdef KDTRACE_HOOKS
    379 	if (sy->sy_entry) {
    380 		struct emul *e = curlwp->l_proc->p_emul;
    381 		(*e->e_dtrace_syscall)(sy->sy_entry, code, sy, args, NULL, 0);
    382 	}
    383 #endif
    384 
    385 #ifdef SYSCALL_DEBUG
    386 	scdebug_call(code, args);
    387 #endif /* SYSCALL_DEBUG */
    388 
    389 	ktrsyscall(code, args, sy->sy_narg);
    390 
    391 #ifdef PTRACE
    392 	if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) ==
    393 	    (PSL_SYSCALL|PSL_TRACED)) {
    394 		process_stoptrace();
    395 		if (curlwp->l_proc->p_slflag & PSL_SYSCALLEMU) {
    396 			/* tracer will emulate syscall for us */
    397 			error = EJUSTRETURN;
    398 		}
    399 	}
    400 #endif
    401 	return error;
    402 }
    403 
    404 /*
    405  * End trace of particular system call. If process is being traced,
    406  * this routine is called by MD syscall dispatch code just after
    407  * a system call finishes.
    408  * MD caller guarantees the passed 'code' is within the supported
    409  * system call number range for emulation the process runs under.
    410  */
    411 void
    412 trace_exit(register_t code, const struct sysent *sy, const void *args,
    413     register_t rval[], int error)
    414 {
    415 #if defined(PTRACE) || defined(KDTRACE_HOOKS)
    416 	struct proc *p = curlwp->l_proc;
    417 #endif
    418 
    419 #ifdef KDTRACE_HOOKS
    420 	if (sy->sy_return) {
    421 		(*p->p_emul->e_dtrace_syscall)(sy->sy_return, code, sy, args,
    422 		    rval, error);
    423 	}
    424 #endif
    425 
    426 #ifdef SYSCALL_DEBUG
    427 	scdebug_ret(code, error, rval);
    428 #endif /* SYSCALL_DEBUG */
    429 
    430 	ktrsysret(code, error, rval);
    431 
    432 #ifdef PTRACE
    433 	if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED|PSL_SYSCALLEMU)) ==
    434 	    (PSL_SYSCALL|PSL_TRACED))
    435 		process_stoptrace();
    436 	CLR(p->p_slflag, PSL_SYSCALLEMU);
    437 #endif
    438 }
    439