Home | History | Annotate | Line # | Download | only in kern
kern_syscall.c revision 1.16.14.2
      1  1.16.14.2    martin /*	$NetBSD: kern_syscall.c,v 1.16.14.2 2020/04/13 08:05:04 martin Exp $	*/
      2        1.1     pooka 
      3        1.1     pooka /*-
      4        1.1     pooka  * Copyright (c) 2008 The NetBSD Foundation, Inc.
      5        1.1     pooka  * All rights reserved.
      6        1.1     pooka  *
      7        1.1     pooka  * This code is derived from software developed for The NetBSD Foundation
      8        1.1     pooka  * by Andrew Doran.
      9        1.1     pooka  *
     10        1.1     pooka  * Redistribution and use in source and binary forms, with or without
     11        1.1     pooka  * modification, are permitted provided that the following conditions
     12        1.1     pooka  * are met:
     13        1.1     pooka  * 1. Redistributions of source code must retain the above copyright
     14        1.1     pooka  *    notice, this list of conditions and the following disclaimer.
     15        1.1     pooka  * 2. Redistributions in binary form must reproduce the above copyright
     16        1.1     pooka  *    notice, this list of conditions and the following disclaimer in the
     17        1.1     pooka  *    documentation and/or other materials provided with the distribution.
     18        1.1     pooka  *
     19        1.1     pooka  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20        1.1     pooka  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21        1.1     pooka  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22        1.1     pooka  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23        1.1     pooka  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24        1.1     pooka  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25        1.1     pooka  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26        1.1     pooka  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27        1.1     pooka  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28        1.1     pooka  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29        1.1     pooka  * POSSIBILITY OF SUCH DAMAGE.
     30        1.1     pooka  */
     31        1.1     pooka 
     32        1.1     pooka #include <sys/cdefs.h>
     33  1.16.14.2    martin __KERNEL_RCSID(0, "$NetBSD: kern_syscall.c,v 1.16.14.2 2020/04/13 08:05:04 martin Exp $");
     34        1.2     pooka 
     35        1.8     pooka #ifdef _KERNEL_OPT
     36        1.2     pooka #include "opt_modular.h"
     37        1.8     pooka #include "opt_syscall_debug.h"
     38        1.8     pooka #include "opt_ktrace.h"
     39        1.8     pooka #include "opt_ptrace.h"
     40       1.10  christos #include "opt_dtrace.h"
     41        1.8     pooka #endif
     42        1.1     pooka 
     43        1.3     pooka /* XXX To get syscall prototypes. */
     44        1.3     pooka #define SYSVSHM
     45        1.3     pooka #define SYSVSEM
     46        1.3     pooka #define SYSVMSG
     47        1.3     pooka 
     48        1.1     pooka #include <sys/param.h>
     49        1.1     pooka #include <sys/module.h>
     50        1.4     pooka #include <sys/sched.h>
     51        1.1     pooka #include <sys/syscall.h>
     52        1.1     pooka #include <sys/syscallargs.h>
     53        1.1     pooka #include <sys/syscallvar.h>
     54        1.5  pgoyette #include <sys/systm.h>
     55        1.1     pooka #include <sys/xcall.h>
     56        1.8     pooka #include <sys/ktrace.h>
     57        1.8     pooka #include <sys/ptrace.h>
     58        1.1     pooka 
     59        1.1     pooka int
     60        1.1     pooka sys_nomodule(struct lwp *l, const void *v, register_t *retval)
     61        1.1     pooka {
     62        1.1     pooka #ifdef MODULAR
     63       1.11  pgoyette 
     64        1.1     pooka 	const struct sysent *sy;
     65        1.1     pooka 	const struct emul *em;
     66       1.13  pgoyette 	const struct sc_autoload *auto_list;
     67       1.13  pgoyette 	u_int code;
     68        1.1     pooka 
     69        1.1     pooka 	/*
     70        1.1     pooka 	 * Restart the syscall if we interrupted a module unload that
     71        1.5  pgoyette 	 * failed.  Acquiring kernconfig_lock delays us until any unload
     72        1.1     pooka 	 * has been completed or rolled back.
     73        1.1     pooka 	 */
     74        1.5  pgoyette 	kernconfig_lock();
     75        1.1     pooka 	sy = l->l_sysent;
     76        1.1     pooka 	if (sy->sy_call != sys_nomodule) {
     77        1.5  pgoyette 		kernconfig_unlock();
     78        1.1     pooka 		return ERESTART;
     79        1.1     pooka 	}
     80        1.1     pooka 	/*
     81        1.1     pooka 	 * Try to autoload a module to satisfy the request.  If it
     82        1.1     pooka 	 * works, retry the request.
     83        1.1     pooka 	 */
     84        1.1     pooka 	em = l->l_proc->p_emul;
     85       1.12  pgoyette 	code = sy - em->e_sysent;
     86       1.12  pgoyette 
     87       1.12  pgoyette 	if ((auto_list = em->e_sc_autoload) != NULL)
     88       1.12  pgoyette 		for (; auto_list->al_code > 0; auto_list++) {
     89       1.12  pgoyette 			if (auto_list->al_code != code) {
     90        1.1     pooka 				continue;
     91        1.1     pooka 			}
     92       1.12  pgoyette 			if (module_autoload(auto_list->al_module,
     93       1.14  pgoyette 			    MODULE_CLASS_ANY) != 0 ||
     94        1.1     pooka 			    sy->sy_call == sys_nomodule) {
     95        1.1     pooka 			    	break;
     96        1.1     pooka 			}
     97        1.5  pgoyette 			kernconfig_unlock();
     98        1.1     pooka 			return ERESTART;
     99        1.1     pooka 		}
    100        1.5  pgoyette 	kernconfig_unlock();
    101        1.1     pooka #endif	/* MODULAR */
    102        1.1     pooka 
    103        1.1     pooka 	return sys_nosys(l, v, retval);
    104        1.1     pooka }
    105        1.1     pooka 
    106        1.1     pooka int
    107        1.1     pooka syscall_establish(const struct emul *em, const struct syscall_package *sp)
    108        1.1     pooka {
    109        1.1     pooka 	struct sysent *sy;
    110        1.1     pooka 	int i;
    111        1.1     pooka 
    112        1.5  pgoyette 	KASSERT(kernconfig_is_held());
    113        1.1     pooka 
    114        1.1     pooka 	if (em == NULL) {
    115        1.1     pooka 		em = &emul_netbsd;
    116        1.1     pooka 	}
    117        1.1     pooka 	sy = em->e_sysent;
    118        1.1     pooka 
    119        1.1     pooka 	/*
    120        1.1     pooka 	 * Ensure that all preconditions are valid, since this is
    121        1.1     pooka 	 * an all or nothing deal.  Once a system call is entered,
    122        1.1     pooka 	 * it can become busy and we could be unable to remove it
    123        1.1     pooka 	 * on error.
    124        1.1     pooka 	 */
    125        1.1     pooka 	for (i = 0; sp[i].sp_call != NULL; i++) {
    126  1.16.14.1  christos 		if (sp[i].sp_code >= SYS_NSYSENT)
    127  1.16.14.1  christos 			return EINVAL;
    128  1.16.14.1  christos 		if (sy[sp[i].sp_code].sy_call != sys_nomodule &&
    129  1.16.14.1  christos 		    sy[sp[i].sp_code].sy_call != sys_nosys) {
    130        1.1     pooka #ifdef DIAGNOSTIC
    131        1.1     pooka 			printf("syscall %d is busy\n", sp[i].sp_code);
    132        1.1     pooka #endif
    133        1.1     pooka 			return EBUSY;
    134        1.1     pooka 		}
    135        1.1     pooka 	}
    136        1.1     pooka 	/* Everything looks good, patch them in. */
    137        1.1     pooka 	for (i = 0; sp[i].sp_call != NULL; i++) {
    138        1.1     pooka 		sy[sp[i].sp_code].sy_call = sp[i].sp_call;
    139        1.1     pooka 	}
    140        1.1     pooka 
    141        1.1     pooka 	return 0;
    142        1.1     pooka }
    143        1.1     pooka 
    144        1.1     pooka int
    145        1.1     pooka syscall_disestablish(const struct emul *em, const struct syscall_package *sp)
    146        1.1     pooka {
    147        1.1     pooka 	struct sysent *sy;
    148  1.16.14.1  christos 	const uint32_t *sb;
    149        1.1     pooka 	lwp_t *l;
    150        1.1     pooka 	int i;
    151        1.1     pooka 
    152        1.5  pgoyette 	KASSERT(kernconfig_is_held());
    153        1.1     pooka 
    154        1.1     pooka 	if (em == NULL) {
    155        1.1     pooka 		em = &emul_netbsd;
    156        1.1     pooka 	}
    157        1.1     pooka 	sy = em->e_sysent;
    158  1.16.14.1  christos 	sb = em->e_nomodbits;
    159        1.1     pooka 
    160        1.1     pooka 	/*
    161  1.16.14.1  christos 	 * First, patch the system calls to sys_nomodule or sys_nosys
    162  1.16.14.1  christos 	 * to gate further activity.
    163        1.1     pooka 	 */
    164        1.1     pooka 	for (i = 0; sp[i].sp_call != NULL; i++) {
    165        1.1     pooka 		KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call);
    166  1.16.14.1  christos 		sy[sp[i].sp_code].sy_call =
    167  1.16.14.1  christos 		    sb[sp[i].sp_code / 32] & (1 << (sp[i].sp_code % 32)) ?
    168  1.16.14.1  christos 		      sys_nomodule : sys_nosys;
    169        1.1     pooka 	}
    170        1.1     pooka 
    171        1.1     pooka 	/*
    172        1.1     pooka 	 * Run a cross call to cycle through all CPUs.  This does two
    173        1.1     pooka 	 * things: lock activity provides a barrier and makes our update
    174        1.1     pooka 	 * of sy_call visible to all CPUs, and upon return we can be sure
    175        1.1     pooka 	 * that we see pertinent values of l_sysent posted by remote CPUs.
    176        1.1     pooka 	 */
    177  1.16.14.2    martin 	xc_barrier(0);
    178        1.1     pooka 
    179        1.1     pooka 	/*
    180        1.1     pooka 	 * Now it's safe to check l_sysent.  Run through all LWPs and see
    181        1.1     pooka 	 * if anyone is still using the system call.
    182        1.1     pooka 	 */
    183        1.1     pooka 	for (i = 0; sp[i].sp_call != NULL; i++) {
    184        1.1     pooka 		mutex_enter(proc_lock);
    185        1.1     pooka 		LIST_FOREACH(l, &alllwp, l_list) {
    186        1.1     pooka 			if (l->l_sysent == &sy[sp[i].sp_code]) {
    187        1.1     pooka 				break;
    188        1.1     pooka 			}
    189        1.1     pooka 		}
    190        1.1     pooka 		mutex_exit(proc_lock);
    191        1.1     pooka 		if (l == NULL) {
    192        1.1     pooka 			continue;
    193        1.1     pooka 		}
    194        1.1     pooka 		/*
    195        1.1     pooka 		 * We lose: one or more calls are still in use.  Put back
    196        1.1     pooka 		 * the old entrypoints and act like nothing happened.
    197        1.5  pgoyette 		 * When we drop kernconfig_lock, any system calls held in
    198        1.1     pooka 		 * sys_nomodule() will be restarted.
    199        1.1     pooka 		 */
    200        1.1     pooka 		for (i = 0; sp[i].sp_call != NULL; i++) {
    201        1.1     pooka 			sy[sp[i].sp_code].sy_call = sp[i].sp_call;
    202        1.1     pooka 		}
    203        1.1     pooka 		return EBUSY;
    204        1.1     pooka 	}
    205        1.1     pooka 
    206        1.1     pooka 	return 0;
    207        1.1     pooka }
    208        1.8     pooka 
    209        1.8     pooka /*
    210        1.8     pooka  * Return true if system call tracing is enabled for the specified process.
    211        1.8     pooka  */
    212        1.8     pooka bool
    213        1.8     pooka trace_is_enabled(struct proc *p)
    214        1.8     pooka {
    215        1.8     pooka #ifdef SYSCALL_DEBUG
    216        1.8     pooka 	return (true);
    217        1.8     pooka #endif
    218        1.8     pooka #ifdef KTRACE
    219        1.8     pooka 	if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET)))
    220        1.8     pooka 		return (true);
    221        1.8     pooka #endif
    222        1.8     pooka #ifdef PTRACE
    223        1.8     pooka 	if (ISSET(p->p_slflag, PSL_SYSCALL))
    224        1.8     pooka 		return (true);
    225        1.8     pooka #endif
    226        1.8     pooka 
    227        1.8     pooka 	return (false);
    228        1.8     pooka }
    229        1.8     pooka 
    230        1.8     pooka /*
    231        1.8     pooka  * Start trace of particular system call. If process is being traced,
    232        1.8     pooka  * this routine is called by MD syscall dispatch code just before
    233        1.8     pooka  * a system call is actually executed.
    234        1.8     pooka  */
    235        1.8     pooka int
    236       1.10  christos trace_enter(register_t code, const struct sysent *sy, const void *args)
    237        1.8     pooka {
    238        1.8     pooka 	int error = 0;
    239        1.8     pooka 
    240       1.10  christos #ifdef KDTRACE_HOOKS
    241       1.10  christos 	if (sy->sy_entry) {
    242       1.10  christos 		struct emul *e = curlwp->l_proc->p_emul;
    243       1.10  christos 		(*e->e_dtrace_syscall)(sy->sy_entry, code, sy, args, NULL, 0);
    244       1.10  christos 	}
    245       1.10  christos #endif
    246       1.10  christos 
    247        1.8     pooka #ifdef SYSCALL_DEBUG
    248        1.8     pooka 	scdebug_call(code, args);
    249        1.8     pooka #endif /* SYSCALL_DEBUG */
    250        1.8     pooka 
    251       1.10  christos 	ktrsyscall(code, args, sy->sy_narg);
    252        1.8     pooka 
    253        1.8     pooka #ifdef PTRACE
    254        1.8     pooka 	if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) ==
    255        1.8     pooka 	    (PSL_SYSCALL|PSL_TRACED)) {
    256  1.16.14.1  christos 		proc_stoptrace(TRAP_SCE, code, args, NULL, 0);
    257        1.8     pooka 		if (curlwp->l_proc->p_slflag & PSL_SYSCALLEMU) {
    258        1.8     pooka 			/* tracer will emulate syscall for us */
    259        1.8     pooka 			error = EJUSTRETURN;
    260        1.8     pooka 		}
    261        1.8     pooka 	}
    262        1.8     pooka #endif
    263        1.8     pooka 	return error;
    264        1.8     pooka }
    265        1.8     pooka 
    266        1.8     pooka /*
    267        1.8     pooka  * End trace of particular system call. If process is being traced,
    268        1.8     pooka  * this routine is called by MD syscall dispatch code just after
    269        1.8     pooka  * a system call finishes.
    270        1.8     pooka  * MD caller guarantees the passed 'code' is within the supported
    271        1.8     pooka  * system call number range for emulation the process runs under.
    272        1.8     pooka  */
    273        1.8     pooka void
    274       1.10  christos trace_exit(register_t code, const struct sysent *sy, const void *args,
    275       1.10  christos     register_t rval[], int error)
    276        1.8     pooka {
    277       1.10  christos #if defined(PTRACE) || defined(KDTRACE_HOOKS)
    278        1.8     pooka 	struct proc *p = curlwp->l_proc;
    279        1.8     pooka #endif
    280        1.8     pooka 
    281       1.10  christos #ifdef KDTRACE_HOOKS
    282       1.10  christos 	if (sy->sy_return) {
    283       1.10  christos 		(*p->p_emul->e_dtrace_syscall)(sy->sy_return, code, sy, args,
    284       1.10  christos 		    rval, error);
    285       1.10  christos 	}
    286       1.10  christos #endif
    287       1.10  christos 
    288        1.8     pooka #ifdef SYSCALL_DEBUG
    289        1.8     pooka 	scdebug_ret(code, error, rval);
    290        1.8     pooka #endif /* SYSCALL_DEBUG */
    291        1.8     pooka 
    292        1.8     pooka 	ktrsysret(code, error, rval);
    293        1.8     pooka 
    294        1.8     pooka #ifdef PTRACE
    295        1.8     pooka 	if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED|PSL_SYSCALLEMU)) ==
    296       1.15  christos 	    (PSL_SYSCALL|PSL_TRACED)) {
    297  1.16.14.1  christos 		proc_stoptrace(TRAP_SCX, code, args, rval, error);
    298       1.15  christos 	}
    299        1.8     pooka 	CLR(p->p_slflag, PSL_SYSCALLEMU);
    300        1.8     pooka #endif
    301        1.8     pooka }
    302