Home | History | Annotate | Line # | Download | only in kern
kern_syscall.c revision 1.7.2.2
      1  1.7.2.1       tls /*	$NetBSD: kern_syscall.c,v 1.7.2.2 2017/12/03 11:38:44 jdolecek Exp $	*/
      2      1.1     pooka 
      3      1.1     pooka /*-
      4      1.1     pooka  * Copyright (c) 2008 The NetBSD Foundation, Inc.
      5      1.1     pooka  * All rights reserved.
      6      1.1     pooka  *
      7      1.1     pooka  * This code is derived from software developed for The NetBSD Foundation
      8      1.1     pooka  * by Andrew Doran.
      9      1.1     pooka  *
     10      1.1     pooka  * Redistribution and use in source and binary forms, with or without
     11      1.1     pooka  * modification, are permitted provided that the following conditions
     12      1.1     pooka  * are met:
     13      1.1     pooka  * 1. Redistributions of source code must retain the above copyright
     14      1.1     pooka  *    notice, this list of conditions and the following disclaimer.
     15      1.1     pooka  * 2. Redistributions in binary form must reproduce the above copyright
     16      1.1     pooka  *    notice, this list of conditions and the following disclaimer in the
     17      1.1     pooka  *    documentation and/or other materials provided with the distribution.
     18      1.1     pooka  *
     19      1.1     pooka  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20      1.1     pooka  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21      1.1     pooka  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22      1.1     pooka  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23      1.1     pooka  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24      1.1     pooka  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25      1.1     pooka  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26      1.1     pooka  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27      1.1     pooka  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28      1.1     pooka  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29      1.1     pooka  * POSSIBILITY OF SUCH DAMAGE.
     30      1.1     pooka  */
     31      1.1     pooka 
     32      1.1     pooka #include <sys/cdefs.h>
     33  1.7.2.1       tls __KERNEL_RCSID(0, "$NetBSD: kern_syscall.c,v 1.7.2.2 2017/12/03 11:38:44 jdolecek Exp $");
     34      1.2     pooka 
     35  1.7.2.1       tls #ifdef _KERNEL_OPT
     36      1.2     pooka #include "opt_modular.h"
     37  1.7.2.1       tls #include "opt_syscall_debug.h"
     38  1.7.2.1       tls #include "opt_ktrace.h"
     39  1.7.2.1       tls #include "opt_ptrace.h"
     40  1.7.2.2  jdolecek #include "opt_dtrace.h"
     41  1.7.2.1       tls #endif
     42      1.1     pooka 
     43      1.3     pooka /* XXX To get syscall prototypes. */
     44      1.3     pooka #define SYSVSHM
     45      1.3     pooka #define SYSVSEM
     46      1.3     pooka #define SYSVMSG
     47      1.3     pooka 
     48      1.1     pooka #include <sys/param.h>
     49      1.1     pooka #include <sys/module.h>
     50      1.4     pooka #include <sys/sched.h>
     51      1.1     pooka #include <sys/syscall.h>
     52      1.1     pooka #include <sys/syscallargs.h>
     53      1.1     pooka #include <sys/syscallvar.h>
     54      1.5  pgoyette #include <sys/systm.h>
     55      1.1     pooka #include <sys/xcall.h>
     56  1.7.2.1       tls #include <sys/ktrace.h>
     57  1.7.2.1       tls #include <sys/ptrace.h>
     58      1.1     pooka 
     59      1.1     pooka int
     60      1.1     pooka sys_nomodule(struct lwp *l, const void *v, register_t *retval)
     61      1.1     pooka {
     62      1.1     pooka #ifdef MODULAR
     63  1.7.2.2  jdolecek 
     64      1.1     pooka 	const struct sysent *sy;
     65      1.1     pooka 	const struct emul *em;
     66  1.7.2.2  jdolecek 	const struct sc_autoload *auto_list;
     67  1.7.2.2  jdolecek 	u_int code;
     68      1.1     pooka 
     69      1.1     pooka 	/*
     70      1.1     pooka 	 * Restart the syscall if we interrupted a module unload that
     71      1.5  pgoyette 	 * failed.  Acquiring kernconfig_lock delays us until any unload
     72      1.1     pooka 	 * has been completed or rolled back.
     73      1.1     pooka 	 */
     74      1.5  pgoyette 	kernconfig_lock();
     75      1.1     pooka 	sy = l->l_sysent;
     76      1.1     pooka 	if (sy->sy_call != sys_nomodule) {
     77      1.5  pgoyette 		kernconfig_unlock();
     78      1.1     pooka 		return ERESTART;
     79      1.1     pooka 	}
     80      1.1     pooka 	/*
     81      1.1     pooka 	 * Try to autoload a module to satisfy the request.  If it
     82      1.1     pooka 	 * works, retry the request.
     83      1.1     pooka 	 */
     84      1.1     pooka 	em = l->l_proc->p_emul;
     85  1.7.2.2  jdolecek 	code = sy - em->e_sysent;
     86  1.7.2.2  jdolecek 
     87  1.7.2.2  jdolecek 	if ((auto_list = em->e_sc_autoload) != NULL)
     88  1.7.2.2  jdolecek 		for (; auto_list->al_code > 0; auto_list++) {
     89  1.7.2.2  jdolecek 			if (auto_list->al_code != code) {
     90      1.1     pooka 				continue;
     91      1.1     pooka 			}
     92  1.7.2.2  jdolecek 			if (module_autoload(auto_list->al_module,
     93      1.1     pooka 			    MODULE_CLASS_ANY) != 0 ||
     94      1.1     pooka 			    sy->sy_call == sys_nomodule) {
     95      1.1     pooka 			    	break;
     96      1.1     pooka 			}
     97      1.5  pgoyette 			kernconfig_unlock();
     98      1.1     pooka 			return ERESTART;
     99      1.1     pooka 		}
    100      1.5  pgoyette 	kernconfig_unlock();
    101      1.1     pooka #endif	/* MODULAR */
    102      1.1     pooka 
    103      1.1     pooka 	return sys_nosys(l, v, retval);
    104      1.1     pooka }
    105      1.1     pooka 
    106      1.1     pooka int
    107      1.1     pooka syscall_establish(const struct emul *em, const struct syscall_package *sp)
    108      1.1     pooka {
    109      1.1     pooka 	struct sysent *sy;
    110      1.1     pooka 	int i;
    111      1.1     pooka 
    112      1.5  pgoyette 	KASSERT(kernconfig_is_held());
    113      1.1     pooka 
    114      1.1     pooka 	if (em == NULL) {
    115      1.1     pooka 		em = &emul_netbsd;
    116      1.1     pooka 	}
    117      1.1     pooka 	sy = em->e_sysent;
    118      1.1     pooka 
    119      1.1     pooka 	/*
    120      1.1     pooka 	 * Ensure that all preconditions are valid, since this is
    121      1.1     pooka 	 * an all or nothing deal.  Once a system call is entered,
    122      1.1     pooka 	 * it can become busy and we could be unable to remove it
    123      1.1     pooka 	 * on error.
    124      1.1     pooka 	 */
    125      1.1     pooka 	for (i = 0; sp[i].sp_call != NULL; i++) {
    126      1.1     pooka 		if (sy[sp[i].sp_code].sy_call != sys_nomodule) {
    127      1.1     pooka #ifdef DIAGNOSTIC
    128      1.1     pooka 			printf("syscall %d is busy\n", sp[i].sp_code);
    129      1.1     pooka #endif
    130      1.1     pooka 			return EBUSY;
    131      1.1     pooka 		}
    132      1.1     pooka 	}
    133      1.1     pooka 	/* Everything looks good, patch them in. */
    134      1.1     pooka 	for (i = 0; sp[i].sp_call != NULL; i++) {
    135      1.1     pooka 		sy[sp[i].sp_code].sy_call = sp[i].sp_call;
    136      1.1     pooka 	}
    137      1.1     pooka 
    138      1.1     pooka 	return 0;
    139      1.1     pooka }
    140      1.1     pooka 
    141      1.1     pooka int
    142      1.1     pooka syscall_disestablish(const struct emul *em, const struct syscall_package *sp)
    143      1.1     pooka {
    144      1.1     pooka 	struct sysent *sy;
    145      1.1     pooka 	uint64_t where;
    146      1.1     pooka 	lwp_t *l;
    147      1.1     pooka 	int i;
    148      1.1     pooka 
    149      1.5  pgoyette 	KASSERT(kernconfig_is_held());
    150      1.1     pooka 
    151      1.1     pooka 	if (em == NULL) {
    152      1.1     pooka 		em = &emul_netbsd;
    153      1.1     pooka 	}
    154      1.1     pooka 	sy = em->e_sysent;
    155      1.1     pooka 
    156      1.1     pooka 	/*
    157      1.1     pooka 	 * First, patch the system calls to sys_nomodule to gate further
    158      1.1     pooka 	 * activity.
    159      1.1     pooka 	 */
    160      1.1     pooka 	for (i = 0; sp[i].sp_call != NULL; i++) {
    161      1.1     pooka 		KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call);
    162      1.1     pooka 		sy[sp[i].sp_code].sy_call = sys_nomodule;
    163      1.1     pooka 	}
    164      1.1     pooka 
    165      1.1     pooka 	/*
    166      1.1     pooka 	 * Run a cross call to cycle through all CPUs.  This does two
    167      1.1     pooka 	 * things: lock activity provides a barrier and makes our update
    168      1.1     pooka 	 * of sy_call visible to all CPUs, and upon return we can be sure
    169      1.1     pooka 	 * that we see pertinent values of l_sysent posted by remote CPUs.
    170      1.1     pooka 	 */
    171      1.1     pooka 	where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
    172      1.1     pooka 	xc_wait(where);
    173      1.1     pooka 
    174      1.1     pooka 	/*
    175      1.1     pooka 	 * Now it's safe to check l_sysent.  Run through all LWPs and see
    176      1.1     pooka 	 * if anyone is still using the system call.
    177      1.1     pooka 	 */
    178      1.1     pooka 	for (i = 0; sp[i].sp_call != NULL; i++) {
    179      1.1     pooka 		mutex_enter(proc_lock);
    180      1.1     pooka 		LIST_FOREACH(l, &alllwp, l_list) {
    181      1.1     pooka 			if (l->l_sysent == &sy[sp[i].sp_code]) {
    182      1.1     pooka 				break;
    183      1.1     pooka 			}
    184      1.1     pooka 		}
    185      1.1     pooka 		mutex_exit(proc_lock);
    186      1.1     pooka 		if (l == NULL) {
    187      1.1     pooka 			continue;
    188      1.1     pooka 		}
    189      1.1     pooka 		/*
    190      1.1     pooka 		 * We lose: one or more calls are still in use.  Put back
    191      1.1     pooka 		 * the old entrypoints and act like nothing happened.
    192      1.5  pgoyette 		 * When we drop kernconfig_lock, any system calls held in
    193      1.1     pooka 		 * sys_nomodule() will be restarted.
    194      1.1     pooka 		 */
    195      1.1     pooka 		for (i = 0; sp[i].sp_call != NULL; i++) {
    196      1.1     pooka 			sy[sp[i].sp_code].sy_call = sp[i].sp_call;
    197      1.1     pooka 		}
    198      1.1     pooka 		return EBUSY;
    199      1.1     pooka 	}
    200      1.1     pooka 
    201      1.1     pooka 	return 0;
    202      1.1     pooka }
    203  1.7.2.1       tls 
    204  1.7.2.1       tls /*
    205  1.7.2.1       tls  * Return true if system call tracing is enabled for the specified process.
    206  1.7.2.1       tls  */
    207  1.7.2.1       tls bool
    208  1.7.2.1       tls trace_is_enabled(struct proc *p)
    209  1.7.2.1       tls {
    210  1.7.2.1       tls #ifdef SYSCALL_DEBUG
    211  1.7.2.1       tls 	return (true);
    212  1.7.2.1       tls #endif
    213  1.7.2.1       tls #ifdef KTRACE
    214  1.7.2.1       tls 	if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET)))
    215  1.7.2.1       tls 		return (true);
    216  1.7.2.1       tls #endif
    217  1.7.2.1       tls #ifdef PTRACE
    218  1.7.2.1       tls 	if (ISSET(p->p_slflag, PSL_SYSCALL))
    219  1.7.2.1       tls 		return (true);
    220  1.7.2.1       tls #endif
    221  1.7.2.1       tls 
    222  1.7.2.1       tls 	return (false);
    223  1.7.2.1       tls }
    224  1.7.2.1       tls 
    225  1.7.2.1       tls /*
    226  1.7.2.1       tls  * Start trace of particular system call. If process is being traced,
    227  1.7.2.1       tls  * this routine is called by MD syscall dispatch code just before
    228  1.7.2.1       tls  * a system call is actually executed.
    229  1.7.2.1       tls  */
    230  1.7.2.1       tls int
    231  1.7.2.2  jdolecek trace_enter(register_t code, const struct sysent *sy, const void *args)
    232  1.7.2.1       tls {
    233  1.7.2.1       tls 	int error = 0;
    234  1.7.2.1       tls 
    235  1.7.2.2  jdolecek #ifdef KDTRACE_HOOKS
    236  1.7.2.2  jdolecek 	if (sy->sy_entry) {
    237  1.7.2.2  jdolecek 		struct emul *e = curlwp->l_proc->p_emul;
    238  1.7.2.2  jdolecek 		(*e->e_dtrace_syscall)(sy->sy_entry, code, sy, args, NULL, 0);
    239  1.7.2.2  jdolecek 	}
    240  1.7.2.2  jdolecek #endif
    241  1.7.2.2  jdolecek 
    242  1.7.2.1       tls #ifdef SYSCALL_DEBUG
    243  1.7.2.1       tls 	scdebug_call(code, args);
    244  1.7.2.1       tls #endif /* SYSCALL_DEBUG */
    245  1.7.2.1       tls 
    246  1.7.2.2  jdolecek 	ktrsyscall(code, args, sy->sy_narg);
    247  1.7.2.1       tls 
    248  1.7.2.1       tls #ifdef PTRACE
    249  1.7.2.1       tls 	if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) ==
    250  1.7.2.1       tls 	    (PSL_SYSCALL|PSL_TRACED)) {
    251  1.7.2.2  jdolecek 		proc_stoptrace(TRAP_SCE);
    252  1.7.2.1       tls 		if (curlwp->l_proc->p_slflag & PSL_SYSCALLEMU) {
    253  1.7.2.1       tls 			/* tracer will emulate syscall for us */
    254  1.7.2.1       tls 			error = EJUSTRETURN;
    255  1.7.2.1       tls 		}
    256  1.7.2.1       tls 	}
    257  1.7.2.1       tls #endif
    258  1.7.2.1       tls 	return error;
    259  1.7.2.1       tls }
    260  1.7.2.1       tls 
    261  1.7.2.1       tls /*
    262  1.7.2.1       tls  * End trace of particular system call. If process is being traced,
    263  1.7.2.1       tls  * this routine is called by MD syscall dispatch code just after
    264  1.7.2.1       tls  * a system call finishes.
    265  1.7.2.1       tls  * MD caller guarantees the passed 'code' is within the supported
    266  1.7.2.1       tls  * system call number range for emulation the process runs under.
    267  1.7.2.1       tls  */
    268  1.7.2.1       tls void
    269  1.7.2.2  jdolecek trace_exit(register_t code, const struct sysent *sy, const void *args,
    270  1.7.2.2  jdolecek     register_t rval[], int error)
    271  1.7.2.1       tls {
    272  1.7.2.2  jdolecek #if defined(PTRACE) || defined(KDTRACE_HOOKS)
    273  1.7.2.1       tls 	struct proc *p = curlwp->l_proc;
    274  1.7.2.1       tls #endif
    275  1.7.2.1       tls 
    276  1.7.2.2  jdolecek #ifdef KDTRACE_HOOKS
    277  1.7.2.2  jdolecek 	if (sy->sy_return) {
    278  1.7.2.2  jdolecek 		(*p->p_emul->e_dtrace_syscall)(sy->sy_return, code, sy, args,
    279  1.7.2.2  jdolecek 		    rval, error);
    280  1.7.2.2  jdolecek 	}
    281  1.7.2.2  jdolecek #endif
    282  1.7.2.2  jdolecek 
    283  1.7.2.1       tls #ifdef SYSCALL_DEBUG
    284  1.7.2.1       tls 	scdebug_ret(code, error, rval);
    285  1.7.2.1       tls #endif /* SYSCALL_DEBUG */
    286  1.7.2.1       tls 
    287  1.7.2.1       tls 	ktrsysret(code, error, rval);
    288  1.7.2.1       tls 
    289  1.7.2.1       tls #ifdef PTRACE
    290  1.7.2.1       tls 	if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED|PSL_SYSCALLEMU)) ==
    291  1.7.2.2  jdolecek 	    (PSL_SYSCALL|PSL_TRACED)) {
    292  1.7.2.2  jdolecek 		proc_stoptrace(TRAP_SCX);
    293  1.7.2.2  jdolecek 	}
    294  1.7.2.1       tls 	CLR(p->p_slflag, PSL_SYSCALLEMU);
    295  1.7.2.1       tls #endif
    296  1.7.2.1       tls }
    297