1 /* $NetBSD: kern_syscall.c,v 1.23 2026/02/01 03:32:44 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software developed for The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: kern_syscall.c,v 1.23 2026/02/01 03:32:44 riastradh Exp $"); 34 35 #ifdef _KERNEL_OPT 36 #include "opt_dtrace.h" 37 #include "opt_ktrace.h" 38 #include "opt_modular.h" 39 #include "opt_ptrace.h" 40 #include "opt_syscall_debug.h" 41 #endif 42 43 /* XXX To get syscall prototypes. */ 44 #define SYSVSHM 45 #define SYSVSEM 46 #define SYSVMSG 47 48 #include <sys/param.h> 49 50 #include <sys/ktrace.h> 51 #include <sys/module.h> 52 #include <sys/ptrace.h> 53 #include <sys/sched.h> 54 #include <sys/sdt.h> 55 #include <sys/syscall.h> 56 #include <sys/syscallargs.h> 57 #include <sys/syscallvar.h> 58 #include <sys/systm.h> 59 #include <sys/xcall.h> 60 61 int 62 sys_nomodule(struct lwp *l, const void *v, register_t *retval) 63 { 64 #ifdef MODULAR 65 66 const struct sysent *sy; 67 const struct emul *em; 68 const struct sc_autoload *auto_list; 69 u_int code; 70 71 /* 72 * Restart the syscall if we interrupted a module unload that 73 * failed. Acquiring kernconfig_lock delays us until any unload 74 * has been completed or rolled back. 75 */ 76 kernconfig_lock(); 77 sy = l->l_sysent; 78 if (sy->sy_call != sys_nomodule) { 79 kernconfig_unlock(); 80 return SET_ERROR(ERESTART); 81 } 82 /* 83 * Try to autoload a module to satisfy the request. If it 84 * works, retry the request. 85 */ 86 em = l->l_proc->p_emul; 87 code = sy - em->e_sysent; 88 89 if ((auto_list = em->e_sc_autoload) != NULL) 90 for (; auto_list->al_code > 0; auto_list++) { 91 if (auto_list->al_code != code) { 92 continue; 93 } 94 if (module_autoload(auto_list->al_module, 95 MODULE_CLASS_ANY) != 0 || 96 sy->sy_call == sys_nomodule) { 97 break; 98 } 99 kernconfig_unlock(); 100 return SET_ERROR(ERESTART); 101 } 102 kernconfig_unlock(); 103 #endif /* MODULAR */ 104 105 return sys_nosys(l, v, retval); 106 } 107 108 int 109 syscall_establish(const struct emul *em, const struct syscall_package *sp) 110 { 111 struct sysent *sy; 112 int i; 113 114 KASSERT(kernconfig_is_held()); 115 116 if (em == NULL) { 117 em = &emul_netbsd; 118 } 119 sy = em->e_sysent; 120 121 /* 122 * Ensure that all preconditions are valid, since this is 123 * an all or nothing deal. Once a system call is entered, 124 * it can become busy and we could be unable to remove it 125 * on error. 126 */ 127 for (i = 0; sp[i].sp_call != NULL; i++) { 128 if (sp[i].sp_code >= SYS_NSYSENT) 129 return SET_ERROR(EINVAL); 130 if (sy[sp[i].sp_code].sy_call != sys_nomodule && 131 sy[sp[i].sp_code].sy_call != sys_nosys) { 132 #ifdef DIAGNOSTIC 133 printf("syscall %d is busy\n", sp[i].sp_code); 134 #endif 135 return SET_ERROR(EBUSY); 136 } 137 } 138 /* Everything looks good, patch them in. */ 139 for (i = 0; sp[i].sp_call != NULL; i++) { 140 sy[sp[i].sp_code].sy_call = sp[i].sp_call; 141 } 142 143 return 0; 144 } 145 146 int 147 syscall_disestablish(const struct emul *em, const struct syscall_package *sp) 148 { 149 struct sysent *sy; 150 const uint32_t *sb; 151 lwp_t *l; 152 int i; 153 154 KASSERT(kernconfig_is_held()); 155 156 if (em == NULL) { 157 em = &emul_netbsd; 158 } 159 sy = em->e_sysent; 160 sb = em->e_nomodbits; 161 162 /* 163 * First, patch the system calls to sys_nomodule or sys_nosys 164 * to gate further activity. 165 */ 166 for (i = 0; sp[i].sp_call != NULL; i++) { 167 KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call); 168 sy[sp[i].sp_code].sy_call = 169 sb[sp[i].sp_code / 32] & (1 << (sp[i].sp_code % 32)) ? 170 sys_nomodule : sys_nosys; 171 } 172 173 /* 174 * Run a cross call to cycle through all CPUs. This does two 175 * things: lock activity provides a barrier and makes our update 176 * of sy_call visible to all CPUs, and upon return we can be sure 177 * that we see pertinent values of l_sysent posted by remote CPUs. 178 */ 179 xc_barrier(0); 180 181 /* 182 * Now it's safe to check l_sysent. Run through all LWPs and see 183 * if anyone is still using the system call. 184 */ 185 for (i = 0; sp[i].sp_call != NULL; i++) { 186 mutex_enter(&proc_lock); 187 LIST_FOREACH(l, &alllwp, l_list) { 188 if (l->l_sysent == &sy[sp[i].sp_code]) { 189 break; 190 } 191 } 192 mutex_exit(&proc_lock); 193 if (l == NULL) { 194 continue; 195 } 196 /* 197 * We lose: one or more calls are still in use. Put back 198 * the old entrypoints and act like nothing happened. 199 * When we drop kernconfig_lock, any system calls held in 200 * sys_nomodule() will be restarted. 201 */ 202 for (i = 0; sp[i].sp_call != NULL; i++) { 203 sy[sp[i].sp_code].sy_call = sp[i].sp_call; 204 } 205 return SET_ERROR(EBUSY); 206 } 207 208 return 0; 209 } 210 211 /* 212 * Return true if system call tracing is enabled for the specified process. 213 */ 214 bool 215 trace_is_enabled(struct proc *p) 216 { 217 #ifdef SYSCALL_DEBUG 218 return (true); 219 #endif 220 #ifdef KTRACE 221 if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET))) 222 return (true); 223 #endif 224 #ifdef PTRACE 225 if (ISSET(p->p_slflag, PSL_SYSCALL)) 226 return (true); 227 #endif 228 229 return (false); 230 } 231 232 /* 233 * Start trace of particular system call. If process is being traced, 234 * this routine is called by MD syscall dispatch code just before 235 * a system call is actually executed. 236 */ 237 int 238 trace_enter(register_t code, const struct sysent *sy, const void *args) 239 { 240 int error = 0; 241 #if defined(PTRACE) || defined(KDTRACE_HOOKS) 242 struct proc *p = curlwp->l_proc; 243 #endif 244 245 #ifdef KDTRACE_HOOKS 246 if (sy->sy_entry) { 247 struct emul *e = p->p_emul; 248 if (e->e_dtrace_syscall) 249 (*e->e_dtrace_syscall)(sy->sy_entry, code, sy, args, 250 NULL, 0); 251 } 252 #endif 253 254 #ifdef SYSCALL_DEBUG 255 scdebug_call(code, args); 256 #endif /* SYSCALL_DEBUG */ 257 258 ktrsyscall(code, args, sy->sy_narg); 259 260 #ifdef PTRACE 261 if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == 262 (PSL_SYSCALL|PSL_TRACED)) { 263 proc_stoptrace(TRAP_SCE, code, args, NULL, 0); 264 if (curlwp->l_proc->p_slflag & PSL_SYSCALLEMU) { 265 /* tracer will emulate syscall for us */ 266 error = SET_ERROR(EJUSTRETURN); 267 } 268 } 269 #endif 270 return error; 271 } 272 273 /* 274 * End trace of particular system call. If process is being traced, 275 * this routine is called by MD syscall dispatch code just after 276 * a system call finishes. 277 * MD caller guarantees the passed 'code' is within the supported 278 * system call number range for emulation the process runs under. 279 */ 280 void 281 trace_exit(register_t code, const struct sysent *sy, const void *args, 282 register_t rval[], int error) 283 { 284 #if defined(PTRACE) || defined(KDTRACE_HOOKS) 285 struct proc *p = curlwp->l_proc; 286 #endif 287 288 #ifdef KDTRACE_HOOKS 289 if (sy->sy_return) { 290 struct emul *e = p->p_emul; 291 if (e->e_dtrace_syscall) 292 (*p->p_emul->e_dtrace_syscall)(sy->sy_return, code, sy, 293 args, rval, error); 294 } 295 #endif 296 297 #ifdef SYSCALL_DEBUG 298 scdebug_ret(code, error, rval); 299 #endif /* SYSCALL_DEBUG */ 300 301 ktrsysret(code, error, rval); 302 303 #ifdef PTRACE 304 if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED|PSL_SYSCALLEMU)) == 305 (PSL_SYSCALL|PSL_TRACED)) { 306 proc_stoptrace(TRAP_SCX, code, args, rval, error); 307 } 308 CLR(p->p_slflag, PSL_SYSCALLEMU); 309 #endif 310 } 311