kern_syscall.c revision 1.16.14.2 1 /* $NetBSD: kern_syscall.c,v 1.16.14.2 2020/04/13 08:05:04 martin Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software developed for The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: kern_syscall.c,v 1.16.14.2 2020/04/13 08:05:04 martin Exp $");
34
35 #ifdef _KERNEL_OPT
36 #include "opt_modular.h"
37 #include "opt_syscall_debug.h"
38 #include "opt_ktrace.h"
39 #include "opt_ptrace.h"
40 #include "opt_dtrace.h"
41 #endif
42
43 /* XXX To get syscall prototypes. */
44 #define SYSVSHM
45 #define SYSVSEM
46 #define SYSVMSG
47
48 #include <sys/param.h>
49 #include <sys/module.h>
50 #include <sys/sched.h>
51 #include <sys/syscall.h>
52 #include <sys/syscallargs.h>
53 #include <sys/syscallvar.h>
54 #include <sys/systm.h>
55 #include <sys/xcall.h>
56 #include <sys/ktrace.h>
57 #include <sys/ptrace.h>
58
59 int
60 sys_nomodule(struct lwp *l, const void *v, register_t *retval)
61 {
62 #ifdef MODULAR
63
64 const struct sysent *sy;
65 const struct emul *em;
66 const struct sc_autoload *auto_list;
67 u_int code;
68
69 /*
70 * Restart the syscall if we interrupted a module unload that
71 * failed. Acquiring kernconfig_lock delays us until any unload
72 * has been completed or rolled back.
73 */
74 kernconfig_lock();
75 sy = l->l_sysent;
76 if (sy->sy_call != sys_nomodule) {
77 kernconfig_unlock();
78 return ERESTART;
79 }
80 /*
81 * Try to autoload a module to satisfy the request. If it
82 * works, retry the request.
83 */
84 em = l->l_proc->p_emul;
85 code = sy - em->e_sysent;
86
87 if ((auto_list = em->e_sc_autoload) != NULL)
88 for (; auto_list->al_code > 0; auto_list++) {
89 if (auto_list->al_code != code) {
90 continue;
91 }
92 if (module_autoload(auto_list->al_module,
93 MODULE_CLASS_ANY) != 0 ||
94 sy->sy_call == sys_nomodule) {
95 break;
96 }
97 kernconfig_unlock();
98 return ERESTART;
99 }
100 kernconfig_unlock();
101 #endif /* MODULAR */
102
103 return sys_nosys(l, v, retval);
104 }
105
106 int
107 syscall_establish(const struct emul *em, const struct syscall_package *sp)
108 {
109 struct sysent *sy;
110 int i;
111
112 KASSERT(kernconfig_is_held());
113
114 if (em == NULL) {
115 em = &emul_netbsd;
116 }
117 sy = em->e_sysent;
118
119 /*
120 * Ensure that all preconditions are valid, since this is
121 * an all or nothing deal. Once a system call is entered,
122 * it can become busy and we could be unable to remove it
123 * on error.
124 */
125 for (i = 0; sp[i].sp_call != NULL; i++) {
126 if (sp[i].sp_code >= SYS_NSYSENT)
127 return EINVAL;
128 if (sy[sp[i].sp_code].sy_call != sys_nomodule &&
129 sy[sp[i].sp_code].sy_call != sys_nosys) {
130 #ifdef DIAGNOSTIC
131 printf("syscall %d is busy\n", sp[i].sp_code);
132 #endif
133 return EBUSY;
134 }
135 }
136 /* Everything looks good, patch them in. */
137 for (i = 0; sp[i].sp_call != NULL; i++) {
138 sy[sp[i].sp_code].sy_call = sp[i].sp_call;
139 }
140
141 return 0;
142 }
143
144 int
145 syscall_disestablish(const struct emul *em, const struct syscall_package *sp)
146 {
147 struct sysent *sy;
148 const uint32_t *sb;
149 lwp_t *l;
150 int i;
151
152 KASSERT(kernconfig_is_held());
153
154 if (em == NULL) {
155 em = &emul_netbsd;
156 }
157 sy = em->e_sysent;
158 sb = em->e_nomodbits;
159
160 /*
161 * First, patch the system calls to sys_nomodule or sys_nosys
162 * to gate further activity.
163 */
164 for (i = 0; sp[i].sp_call != NULL; i++) {
165 KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call);
166 sy[sp[i].sp_code].sy_call =
167 sb[sp[i].sp_code / 32] & (1 << (sp[i].sp_code % 32)) ?
168 sys_nomodule : sys_nosys;
169 }
170
171 /*
172 * Run a cross call to cycle through all CPUs. This does two
173 * things: lock activity provides a barrier and makes our update
174 * of sy_call visible to all CPUs, and upon return we can be sure
175 * that we see pertinent values of l_sysent posted by remote CPUs.
176 */
177 xc_barrier(0);
178
179 /*
180 * Now it's safe to check l_sysent. Run through all LWPs and see
181 * if anyone is still using the system call.
182 */
183 for (i = 0; sp[i].sp_call != NULL; i++) {
184 mutex_enter(proc_lock);
185 LIST_FOREACH(l, &alllwp, l_list) {
186 if (l->l_sysent == &sy[sp[i].sp_code]) {
187 break;
188 }
189 }
190 mutex_exit(proc_lock);
191 if (l == NULL) {
192 continue;
193 }
194 /*
195 * We lose: one or more calls are still in use. Put back
196 * the old entrypoints and act like nothing happened.
197 * When we drop kernconfig_lock, any system calls held in
198 * sys_nomodule() will be restarted.
199 */
200 for (i = 0; sp[i].sp_call != NULL; i++) {
201 sy[sp[i].sp_code].sy_call = sp[i].sp_call;
202 }
203 return EBUSY;
204 }
205
206 return 0;
207 }
208
209 /*
210 * Return true if system call tracing is enabled for the specified process.
211 */
212 bool
213 trace_is_enabled(struct proc *p)
214 {
215 #ifdef SYSCALL_DEBUG
216 return (true);
217 #endif
218 #ifdef KTRACE
219 if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET)))
220 return (true);
221 #endif
222 #ifdef PTRACE
223 if (ISSET(p->p_slflag, PSL_SYSCALL))
224 return (true);
225 #endif
226
227 return (false);
228 }
229
230 /*
231 * Start trace of particular system call. If process is being traced,
232 * this routine is called by MD syscall dispatch code just before
233 * a system call is actually executed.
234 */
235 int
236 trace_enter(register_t code, const struct sysent *sy, const void *args)
237 {
238 int error = 0;
239
240 #ifdef KDTRACE_HOOKS
241 if (sy->sy_entry) {
242 struct emul *e = curlwp->l_proc->p_emul;
243 (*e->e_dtrace_syscall)(sy->sy_entry, code, sy, args, NULL, 0);
244 }
245 #endif
246
247 #ifdef SYSCALL_DEBUG
248 scdebug_call(code, args);
249 #endif /* SYSCALL_DEBUG */
250
251 ktrsyscall(code, args, sy->sy_narg);
252
253 #ifdef PTRACE
254 if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) ==
255 (PSL_SYSCALL|PSL_TRACED)) {
256 proc_stoptrace(TRAP_SCE, code, args, NULL, 0);
257 if (curlwp->l_proc->p_slflag & PSL_SYSCALLEMU) {
258 /* tracer will emulate syscall for us */
259 error = EJUSTRETURN;
260 }
261 }
262 #endif
263 return error;
264 }
265
266 /*
267 * End trace of particular system call. If process is being traced,
268 * this routine is called by MD syscall dispatch code just after
269 * a system call finishes.
270 * MD caller guarantees the passed 'code' is within the supported
271 * system call number range for emulation the process runs under.
272 */
273 void
274 trace_exit(register_t code, const struct sysent *sy, const void *args,
275 register_t rval[], int error)
276 {
277 #if defined(PTRACE) || defined(KDTRACE_HOOKS)
278 struct proc *p = curlwp->l_proc;
279 #endif
280
281 #ifdef KDTRACE_HOOKS
282 if (sy->sy_return) {
283 (*p->p_emul->e_dtrace_syscall)(sy->sy_return, code, sy, args,
284 rval, error);
285 }
286 #endif
287
288 #ifdef SYSCALL_DEBUG
289 scdebug_ret(code, error, rval);
290 #endif /* SYSCALL_DEBUG */
291
292 ktrsysret(code, error, rval);
293
294 #ifdef PTRACE
295 if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED|PSL_SYSCALLEMU)) ==
296 (PSL_SYSCALL|PSL_TRACED)) {
297 proc_stoptrace(TRAP_SCX, code, args, rval, error);
298 }
299 CLR(p->p_slflag, PSL_SYSCALLEMU);
300 #endif
301 }
302