1 1.21 christos /* $NetBSD: kern_syscall.c,v 1.21 2020/08/31 19:51:30 christos Exp $ */ 2 1.1 pooka 3 1.1 pooka /*- 4 1.1 pooka * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 1.1 pooka * All rights reserved. 6 1.1 pooka * 7 1.1 pooka * This code is derived from software developed for The NetBSD Foundation 8 1.1 pooka * by Andrew Doran. 9 1.1 pooka * 10 1.1 pooka * Redistribution and use in source and binary forms, with or without 11 1.1 pooka * modification, are permitted provided that the following conditions 12 1.1 pooka * are met: 13 1.1 pooka * 1. Redistributions of source code must retain the above copyright 14 1.1 pooka * notice, this list of conditions and the following disclaimer. 15 1.1 pooka * 2. Redistributions in binary form must reproduce the above copyright 16 1.1 pooka * notice, this list of conditions and the following disclaimer in the 17 1.1 pooka * documentation and/or other materials provided with the distribution. 18 1.1 pooka * 19 1.1 pooka * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.1 pooka * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.1 pooka * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.1 pooka * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.1 pooka * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.1 pooka * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.1 pooka * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.1 pooka * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.1 pooka * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.1 pooka * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.1 pooka * POSSIBILITY OF SUCH DAMAGE. 30 1.1 pooka */ 31 1.1 pooka 32 1.1 pooka #include <sys/cdefs.h> 33 1.21 christos __KERNEL_RCSID(0, "$NetBSD: kern_syscall.c,v 1.21 2020/08/31 19:51:30 christos Exp $"); 34 1.2 pooka 35 1.8 pooka #ifdef _KERNEL_OPT 36 1.2 pooka #include "opt_modular.h" 37 1.8 pooka #include "opt_syscall_debug.h" 38 1.8 pooka #include "opt_ktrace.h" 39 1.8 pooka #include "opt_ptrace.h" 40 1.10 christos #include "opt_dtrace.h" 41 1.8 pooka #endif 42 1.1 pooka 43 1.3 pooka /* XXX To get syscall prototypes. */ 44 1.3 pooka #define SYSVSHM 45 1.3 pooka #define SYSVSEM 46 1.3 pooka #define SYSVMSG 47 1.3 pooka 48 1.1 pooka #include <sys/param.h> 49 1.1 pooka #include <sys/module.h> 50 1.4 pooka #include <sys/sched.h> 51 1.1 pooka #include <sys/syscall.h> 52 1.1 pooka #include <sys/syscallargs.h> 53 1.1 pooka #include <sys/syscallvar.h> 54 1.5 pgoyette #include <sys/systm.h> 55 1.1 pooka #include <sys/xcall.h> 56 1.8 pooka #include <sys/ktrace.h> 57 1.8 pooka #include <sys/ptrace.h> 58 1.1 pooka 59 1.1 pooka int 60 1.1 pooka sys_nomodule(struct lwp *l, const void *v, register_t *retval) 61 1.1 pooka { 62 1.1 pooka #ifdef MODULAR 63 1.11 pgoyette 64 1.1 pooka const struct sysent *sy; 65 1.1 pooka const struct emul *em; 66 1.13 pgoyette const struct sc_autoload *auto_list; 67 1.13 pgoyette u_int code; 68 1.1 pooka 69 1.1 pooka /* 70 1.1 pooka * Restart the syscall if we interrupted a module unload that 71 1.5 pgoyette * failed. Acquiring kernconfig_lock delays us until any unload 72 1.1 pooka * has been completed or rolled back. 73 1.1 pooka */ 74 1.5 pgoyette kernconfig_lock(); 75 1.1 pooka sy = l->l_sysent; 76 1.1 pooka if (sy->sy_call != sys_nomodule) { 77 1.5 pgoyette kernconfig_unlock(); 78 1.1 pooka return ERESTART; 79 1.1 pooka } 80 1.1 pooka /* 81 1.1 pooka * Try to autoload a module to satisfy the request. If it 82 1.1 pooka * works, retry the request. 83 1.1 pooka */ 84 1.1 pooka em = l->l_proc->p_emul; 85 1.12 pgoyette code = sy - em->e_sysent; 86 1.12 pgoyette 87 1.12 pgoyette if ((auto_list = em->e_sc_autoload) != NULL) 88 1.12 pgoyette for (; auto_list->al_code > 0; auto_list++) { 89 1.12 pgoyette if (auto_list->al_code != code) { 90 1.1 pooka continue; 91 1.1 pooka } 92 1.12 pgoyette if (module_autoload(auto_list->al_module, 93 1.14 pgoyette MODULE_CLASS_ANY) != 0 || 94 1.1 pooka sy->sy_call == sys_nomodule) { 95 1.1 pooka break; 96 1.1 pooka } 97 1.5 pgoyette kernconfig_unlock(); 98 1.1 pooka return ERESTART; 99 1.1 pooka } 100 1.5 pgoyette kernconfig_unlock(); 101 1.1 pooka #endif /* MODULAR */ 102 1.1 pooka 103 1.1 pooka return sys_nosys(l, v, retval); 104 1.1 pooka } 105 1.1 pooka 106 1.1 pooka int 107 1.1 pooka syscall_establish(const struct emul *em, const struct syscall_package *sp) 108 1.1 pooka { 109 1.1 pooka struct sysent *sy; 110 1.1 pooka int i; 111 1.1 pooka 112 1.5 pgoyette KASSERT(kernconfig_is_held()); 113 1.1 pooka 114 1.1 pooka if (em == NULL) { 115 1.1 pooka em = &emul_netbsd; 116 1.1 pooka } 117 1.1 pooka sy = em->e_sysent; 118 1.1 pooka 119 1.1 pooka /* 120 1.1 pooka * Ensure that all preconditions are valid, since this is 121 1.1 pooka * an all or nothing deal. Once a system call is entered, 122 1.1 pooka * it can become busy and we could be unable to remove it 123 1.1 pooka * on error. 124 1.1 pooka */ 125 1.1 pooka for (i = 0; sp[i].sp_call != NULL; i++) { 126 1.17 pgoyette if (sp[i].sp_code >= SYS_NSYSENT) 127 1.17 pgoyette return EINVAL; 128 1.17 pgoyette if (sy[sp[i].sp_code].sy_call != sys_nomodule && 129 1.17 pgoyette sy[sp[i].sp_code].sy_call != sys_nosys) { 130 1.1 pooka #ifdef DIAGNOSTIC 131 1.1 pooka printf("syscall %d is busy\n", sp[i].sp_code); 132 1.1 pooka #endif 133 1.1 pooka return EBUSY; 134 1.1 pooka } 135 1.1 pooka } 136 1.1 pooka /* Everything looks good, patch them in. */ 137 1.1 pooka for (i = 0; sp[i].sp_call != NULL; i++) { 138 1.1 pooka sy[sp[i].sp_code].sy_call = sp[i].sp_call; 139 1.1 pooka } 140 1.1 pooka 141 1.1 pooka return 0; 142 1.1 pooka } 143 1.1 pooka 144 1.1 pooka int 145 1.1 pooka syscall_disestablish(const struct emul *em, const struct syscall_package *sp) 146 1.1 pooka { 147 1.1 pooka struct sysent *sy; 148 1.17 pgoyette const uint32_t *sb; 149 1.1 pooka lwp_t *l; 150 1.1 pooka int i; 151 1.1 pooka 152 1.5 pgoyette KASSERT(kernconfig_is_held()); 153 1.1 pooka 154 1.1 pooka if (em == NULL) { 155 1.1 pooka em = &emul_netbsd; 156 1.1 pooka } 157 1.1 pooka sy = em->e_sysent; 158 1.17 pgoyette sb = em->e_nomodbits; 159 1.1 pooka 160 1.1 pooka /* 161 1.17 pgoyette * First, patch the system calls to sys_nomodule or sys_nosys 162 1.17 pgoyette * to gate further activity. 163 1.1 pooka */ 164 1.1 pooka for (i = 0; sp[i].sp_call != NULL; i++) { 165 1.1 pooka KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call); 166 1.17 pgoyette sy[sp[i].sp_code].sy_call = 167 1.17 pgoyette sb[sp[i].sp_code / 32] & (1 << (sp[i].sp_code % 32)) ? 168 1.17 pgoyette sys_nomodule : sys_nosys; 169 1.1 pooka } 170 1.1 pooka 171 1.1 pooka /* 172 1.1 pooka * Run a cross call to cycle through all CPUs. This does two 173 1.1 pooka * things: lock activity provides a barrier and makes our update 174 1.1 pooka * of sy_call visible to all CPUs, and upon return we can be sure 175 1.1 pooka * that we see pertinent values of l_sysent posted by remote CPUs. 176 1.1 pooka */ 177 1.19 uwe xc_barrier(0); 178 1.1 pooka 179 1.1 pooka /* 180 1.1 pooka * Now it's safe to check l_sysent. Run through all LWPs and see 181 1.1 pooka * if anyone is still using the system call. 182 1.1 pooka */ 183 1.1 pooka for (i = 0; sp[i].sp_call != NULL; i++) { 184 1.20 ad mutex_enter(&proc_lock); 185 1.1 pooka LIST_FOREACH(l, &alllwp, l_list) { 186 1.1 pooka if (l->l_sysent == &sy[sp[i].sp_code]) { 187 1.1 pooka break; 188 1.1 pooka } 189 1.1 pooka } 190 1.20 ad mutex_exit(&proc_lock); 191 1.1 pooka if (l == NULL) { 192 1.1 pooka continue; 193 1.1 pooka } 194 1.1 pooka /* 195 1.1 pooka * We lose: one or more calls are still in use. Put back 196 1.1 pooka * the old entrypoints and act like nothing happened. 197 1.5 pgoyette * When we drop kernconfig_lock, any system calls held in 198 1.1 pooka * sys_nomodule() will be restarted. 199 1.1 pooka */ 200 1.1 pooka for (i = 0; sp[i].sp_call != NULL; i++) { 201 1.1 pooka sy[sp[i].sp_code].sy_call = sp[i].sp_call; 202 1.1 pooka } 203 1.1 pooka return EBUSY; 204 1.1 pooka } 205 1.1 pooka 206 1.1 pooka return 0; 207 1.1 pooka } 208 1.8 pooka 209 1.8 pooka /* 210 1.8 pooka * Return true if system call tracing is enabled for the specified process. 211 1.8 pooka */ 212 1.8 pooka bool 213 1.8 pooka trace_is_enabled(struct proc *p) 214 1.8 pooka { 215 1.8 pooka #ifdef SYSCALL_DEBUG 216 1.8 pooka return (true); 217 1.8 pooka #endif 218 1.8 pooka #ifdef KTRACE 219 1.8 pooka if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET))) 220 1.8 pooka return (true); 221 1.8 pooka #endif 222 1.8 pooka #ifdef PTRACE 223 1.8 pooka if (ISSET(p->p_slflag, PSL_SYSCALL)) 224 1.8 pooka return (true); 225 1.8 pooka #endif 226 1.8 pooka 227 1.8 pooka return (false); 228 1.8 pooka } 229 1.8 pooka 230 1.8 pooka /* 231 1.8 pooka * Start trace of particular system call. If process is being traced, 232 1.8 pooka * this routine is called by MD syscall dispatch code just before 233 1.8 pooka * a system call is actually executed. 234 1.8 pooka */ 235 1.8 pooka int 236 1.10 christos trace_enter(register_t code, const struct sysent *sy, const void *args) 237 1.8 pooka { 238 1.8 pooka int error = 0; 239 1.21 christos #if defined(PTRACE) || defined(KDTRACE_HOOKS) 240 1.21 christos struct proc *p = curlwp->l_proc; 241 1.21 christos #endif 242 1.8 pooka 243 1.10 christos #ifdef KDTRACE_HOOKS 244 1.10 christos if (sy->sy_entry) { 245 1.21 christos struct emul *e = p->p_emul; 246 1.21 christos if (e->e_dtrace_syscall) 247 1.21 christos (*e->e_dtrace_syscall)(sy->sy_entry, code, sy, args, 248 1.21 christos NULL, 0); 249 1.10 christos } 250 1.10 christos #endif 251 1.10 christos 252 1.8 pooka #ifdef SYSCALL_DEBUG 253 1.8 pooka scdebug_call(code, args); 254 1.8 pooka #endif /* SYSCALL_DEBUG */ 255 1.8 pooka 256 1.10 christos ktrsyscall(code, args, sy->sy_narg); 257 1.8 pooka 258 1.8 pooka #ifdef PTRACE 259 1.21 christos if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == 260 1.8 pooka (PSL_SYSCALL|PSL_TRACED)) { 261 1.18 kamil proc_stoptrace(TRAP_SCE, code, args, NULL, 0); 262 1.8 pooka if (curlwp->l_proc->p_slflag & PSL_SYSCALLEMU) { 263 1.8 pooka /* tracer will emulate syscall for us */ 264 1.8 pooka error = EJUSTRETURN; 265 1.8 pooka } 266 1.8 pooka } 267 1.8 pooka #endif 268 1.8 pooka return error; 269 1.8 pooka } 270 1.8 pooka 271 1.8 pooka /* 272 1.8 pooka * End trace of particular system call. If process is being traced, 273 1.8 pooka * this routine is called by MD syscall dispatch code just after 274 1.8 pooka * a system call finishes. 275 1.8 pooka * MD caller guarantees the passed 'code' is within the supported 276 1.8 pooka * system call number range for emulation the process runs under. 277 1.8 pooka */ 278 1.8 pooka void 279 1.10 christos trace_exit(register_t code, const struct sysent *sy, const void *args, 280 1.10 christos register_t rval[], int error) 281 1.8 pooka { 282 1.10 christos #if defined(PTRACE) || defined(KDTRACE_HOOKS) 283 1.8 pooka struct proc *p = curlwp->l_proc; 284 1.8 pooka #endif 285 1.8 pooka 286 1.10 christos #ifdef KDTRACE_HOOKS 287 1.10 christos if (sy->sy_return) { 288 1.21 christos struct emul *e = p->p_emul; 289 1.21 christos if (e->e_dtrace_syscall) 290 1.21 christos (*p->p_emul->e_dtrace_syscall)(sy->sy_return, code, sy, 291 1.21 christos args, rval, error); 292 1.10 christos } 293 1.10 christos #endif 294 1.10 christos 295 1.8 pooka #ifdef SYSCALL_DEBUG 296 1.8 pooka scdebug_ret(code, error, rval); 297 1.8 pooka #endif /* SYSCALL_DEBUG */ 298 1.8 pooka 299 1.8 pooka ktrsysret(code, error, rval); 300 1.8 pooka 301 1.8 pooka #ifdef PTRACE 302 1.8 pooka if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED|PSL_SYSCALLEMU)) == 303 1.15 christos (PSL_SYSCALL|PSL_TRACED)) { 304 1.18 kamil proc_stoptrace(TRAP_SCX, code, args, rval, error); 305 1.15 christos } 306 1.8 pooka CLR(p->p_slflag, PSL_SYSCALLEMU); 307 1.8 pooka #endif 308 1.8 pooka } 309