sys_lwp.c revision 1.1.2.4 1 /* $NetBSD: sys_lwp.c,v 1.1.2.4 2006/12/29 20:27:44 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2001, 2006 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Nathan J. Williams, and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Lightweight process (LWP) system calls. See kern_lwp.c for a description
41 * of LWPs.
42 */
43
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.1.2.4 2006/12/29 20:27:44 ad Exp $");
46
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/pool.h>
50 #include <sys/proc.h>
51 #include <sys/sa.h>
52 #include <sys/savar.h>
53 #include <sys/types.h>
54 #include <sys/syscallargs.h>
55 #include <sys/kauth.h>
56 #include <sys/kmem.h>
57 #include <sys/sleepq.h>
58
59 #include <uvm/uvm_extern.h>
60
61 #define LWP_UNPARK_MAX 1024
62
63 syncobj_t lwp_park_sobj = {
64 SOBJ_SLEEPQ_SORTED,
65 sleepq_unsleep,
66 sleepq_changepri
67 };
68
69 sleeptab_t lwp_park_tab;
70
71 #ifdef LWP_COUNTERS
72 struct evcnt lwp_ev_park_early = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
73 NULL, "_lwp_park", "unparked early");
74 struct evcnt lwp_ev_park_raced = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
75 NULL, "_lwp_park", "raced");
76 struct evcnt lwp_ev_park_miss = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
77 NULL, "_lwp_park", "not parked");
78 struct evcnt lwp_ev_park_bcast = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
79 NULL, "_lwp_park", "broadcast unpark");
80 struct evcnt lwp_ev_park_targ = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
81 NULL, "_lwp_park", "targeted unpark");
82 struct evcnt lwp_ev_park = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
83 NULL, "_lwp_park", "parked");
84
85 #define LWP_COUNT(ev, val) (ev).ev_count += (val) /* XXXSMP */
86 #else
87 #define LWP_COUNT(ev, val) /* nothing */
88 #endif
89
90 void
91 lwp_sys_init(void)
92 {
93 sleeptab_init(&lwp_park_tab);
94 #ifdef LWP_COUNTERS
95 evcnt_attach_static(&lwp_ev_park_early);
96 evcnt_attach_static(&lwp_ev_park_raced);
97 evcnt_attach_static(&lwp_ev_park_miss);
98 evcnt_attach_static(&lwp_ev_park_bcast);
99 evcnt_attach_static(&lwp_ev_park_targ);
100 evcnt_attach_static(&lwp_ev_park);
101 #endif
102 }
103
104 /* ARGSUSED */
105 int
106 sys__lwp_create(struct lwp *l, void *v, register_t *retval)
107 {
108 struct sys__lwp_create_args /* {
109 syscallarg(const ucontext_t *) ucp;
110 syscallarg(u_long) flags;
111 syscallarg(lwpid_t *) new_lwp;
112 } */ *uap = v;
113 struct proc *p = l->l_proc;
114 struct lwp *l2;
115 vaddr_t uaddr;
116 boolean_t inmem;
117 ucontext_t *newuc;
118 int error, lid;
119
120 mutex_enter(&p->p_smutex);
121 if ((p->p_sflag & (PS_SA | PS_WEXIT)) != 0 || p->p_sa != NULL) {
122 mutex_exit(&p->p_smutex);
123 return EINVAL;
124 }
125 p->p_sflag |= PS_NOSA;
126 mutex_exit(&p->p_smutex);
127
128 newuc = pool_get(&lwp_uc_pool, PR_WAITOK);
129
130 error = copyin(SCARG(uap, ucp), newuc,
131 l->l_proc->p_emul->e_sa->sae_ucsize);
132 if (error) {
133 pool_put(&lwp_uc_pool, newuc);
134 return (error);
135 }
136
137 /* XXX check against resource limits */
138
139 inmem = uvm_uarea_alloc(&uaddr);
140 if (__predict_false(uaddr == 0)) {
141 pool_put(&lwp_uc_pool, newuc);
142 return (ENOMEM);
143 }
144
145 newlwp(l, p, uaddr, inmem,
146 SCARG(uap, flags) & LWP_DETACHED,
147 NULL, 0, startlwp, newuc, &l2);
148
149 /*
150 * Set the new LWP running, unless the caller has requested that
151 * it be created in suspended state. If the process is stopping,
152 * then the LWP is created stopped.
153 */
154 mutex_enter(&p->p_smutex);
155 lwp_lock(l2);
156 lid = l2->l_lid;
157 if ((SCARG(uap, flags) & LWP_SUSPENDED) == 0 &&
158 (l->l_flag & L_WREBOOT) == 0) {
159 if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0)
160 l2->l_stat = LSSTOP;
161 else {
162 LOCK_ASSERT(lwp_locked(l2, &sched_mutex));
163 p->p_nrlwps++;
164 l2->l_stat = LSRUN;
165 setrunqueue(l2);
166 }
167 } else
168 l2->l_stat = LSSUSPENDED;
169 lwp_unlock(l2);
170 mutex_exit(&p->p_smutex);
171
172 error = copyout(&lid, SCARG(uap, new_lwp), sizeof(lid));
173 if (error)
174 return (error);
175
176 return (0);
177 }
178
179 int
180 sys__lwp_exit(struct lwp *l, void *v, register_t *retval)
181 {
182
183 lwp_exit(l);
184 return (0);
185 }
186
187 int
188 sys__lwp_self(struct lwp *l, void *v, register_t *retval)
189 {
190
191 *retval = l->l_lid;
192
193 return (0);
194 }
195
196 int
197 sys__lwp_getprivate(struct lwp *l, void *v, register_t *retval)
198 {
199
200 mb_read();
201 *retval = (uintptr_t) l->l_private;
202
203 return (0);
204 }
205
206 int
207 sys__lwp_setprivate(struct lwp *l, void *v, register_t *retval)
208 {
209 struct sys__lwp_setprivate_args /* {
210 syscallarg(void *) ptr;
211 } */ *uap = v;
212
213 l->l_private = SCARG(uap, ptr);
214 mb_write();
215
216 return (0);
217 }
218
219 int
220 sys__lwp_suspend(struct lwp *l, void *v, register_t *retval)
221 {
222 struct sys__lwp_suspend_args /* {
223 syscallarg(lwpid_t) target;
224 } */ *uap = v;
225 struct proc *p = l->l_proc;
226 struct lwp *t;
227 int error;
228
229 mutex_enter(&p->p_smutex);
230
231 if ((p->p_sflag & PS_SA) != 0 || p->p_sa != NULL) {
232 mutex_exit(&p->p_smutex);
233 return EINVAL;
234 }
235
236 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) {
237 mutex_exit(&p->p_smutex);
238 return (ESRCH);
239 }
240
241 /*
242 * Check for deadlock, which is only possible when we're suspending
243 * ourself. XXX There is a short race here, as p_nrlwps is only
244 * incremented when an LWP suspends itself on the kernel/user
245 * boundary. It's still possible to kill -9 the process so we
246 * don't bother checking further.
247 */
248 lwp_lock(t);
249 if ((t == l && p->p_nrlwps == 1) ||
250 (l->l_flag & (L_WCORE | L_WEXIT)) != 0) {
251 lwp_unlock(t);
252 mutex_exit(&p->p_smutex);
253 return (EDEADLK);
254 }
255
256 /*
257 * Suspend the LWP. If it's on a different CPU, we need to wait for
258 * it to be preempted, where it will put itself to sleep.
259 *
260 * Suspension of the current LWP will happen on return to userspace.
261 */
262 error = lwp_suspend(l, t);
263 mutex_exit(&p->p_smutex);
264
265 return (error);
266 }
267
268 int
269 sys__lwp_continue(struct lwp *l, void *v, register_t *retval)
270 {
271 struct sys__lwp_continue_args /* {
272 syscallarg(lwpid_t) target;
273 } */ *uap = v;
274 int error;
275 struct proc *p = l->l_proc;
276 struct lwp *t;
277
278 error = 0;
279
280 mutex_enter(&p->p_smutex);
281
282 if ((p->p_sflag & PS_SA) != 0 || p->p_sa != NULL) {
283 mutex_exit(&p->p_smutex);
284 return EINVAL;
285 }
286
287 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) {
288 mutex_exit(&p->p_smutex);
289 return ESRCH;
290 }
291
292 lwp_lock(t);
293 lwp_continue(t);
294 mutex_exit(&p->p_smutex);
295
296 return error;
297 }
298
299 int
300 sys__lwp_wakeup(struct lwp *l, void *v, register_t *retval)
301 {
302 struct sys__lwp_wakeup_args /* {
303 syscallarg(lwpid_t) target;
304 } */ *uap = v;
305 struct lwp *t;
306 struct proc *p;
307 int error;
308
309 p = l->l_proc;
310 mutex_enter(&p->p_smutex);
311
312 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) {
313 mutex_exit(&p->p_smutex);
314 return ESRCH;
315 }
316
317 lwp_lock(l);
318
319 if (t->l_stat != LSSLEEP) {
320 error = ENODEV;
321 goto bad;
322 }
323
324 if ((t->l_flag & L_SINTR) == 0) {
325 error = EBUSY;
326 goto bad;
327 }
328
329 /* wake it up setrunnable() will release the LWP lock. */
330 t->l_flag |= L_CANCELLED;
331 setrunnable(t);
332 mutex_exit(&p->p_smutex);
333 return 0;
334
335 bad:
336 lwp_unlock(l);
337 mutex_exit(&p->p_smutex);
338 return error;
339 }
340
341 int
342 sys__lwp_wait(struct lwp *l, void *v, register_t *retval)
343 {
344 struct sys__lwp_wait_args /* {
345 syscallarg(lwpid_t) wait_for;
346 syscallarg(lwpid_t *) departed;
347 } */ *uap = v;
348 struct proc *p = l->l_proc;
349 int error;
350 lwpid_t dep;
351
352 mutex_enter(&p->p_smutex);
353 error = lwp_wait1(l, SCARG(uap, wait_for), &dep, 0);
354 mutex_exit(&p->p_smutex);
355
356 if (error)
357 return (error);
358
359 if (SCARG(uap, departed)) {
360 error = copyout(&dep, SCARG(uap, departed),
361 sizeof(dep));
362 if (error)
363 return (error);
364 }
365
366 return (0);
367 }
368
369 /* ARGSUSED */
370 int
371 sys__lwp_kill(struct lwp *l, void *v, register_t *retval)
372 {
373 struct sys__lwp_kill_args /* {
374 syscallarg(lwpid_t) target;
375 syscallarg(int) signo;
376 } */ *uap = v;
377 struct proc *p = l->l_proc;
378 struct lwp *t;
379 ksiginfo_t ksi;
380 int signo = SCARG(uap, signo);
381 int error;
382
383 if ((u_int)signo >= NSIG)
384 return (EINVAL);
385
386 KSI_INIT(&ksi);
387 ksi.ksi_signo = signo;
388 ksi.ksi_code = SI_USER;
389 ksi.ksi_pid = p->p_pid;
390 ksi.ksi_uid = kauth_cred_geteuid(l->l_cred);
391 ksi.ksi_lid = SCARG(uap, target);
392
393 mutex_enter(&proclist_mutex);
394 mutex_enter(&p->p_smutex);
395 if ((t = lwp_find(p, ksi.ksi_lid)) == NULL)
396 error = ESRCH;
397 else {
398 kpsignal2(p, &ksi);
399 error = 0;
400 }
401 mutex_exit(&p->p_smutex);
402 mutex_exit(&proclist_mutex);
403
404 return (error);
405 }
406
407 int
408 sys__lwp_detach(struct lwp *l, void *v, register_t *retval)
409 {
410 struct sys__lwp_detach_args /* {
411 syscallarg(lwpid_t) target;
412 } */ *uap = v;
413 struct proc *p;
414 struct lwp *t;
415 lwpid_t target;
416
417 target = SCARG(uap, target);
418 p = l->l_proc;
419
420 mutex_enter(&p->p_smutex);
421 if (l->l_lid == target)
422 t = l;
423 else
424 t = lwp_find(p, target);
425 if (t != NULL) {
426 p->p_ndlwps++;
427 t->l_prflag |= LPR_DETACHED;
428 }
429 mutex_exit(&p->p_smutex);
430 cv_broadcast(&p->p_lwpcv);
431
432 return (t == NULL ? ESRCH : 0);
433 }
434
435 static inline wchan_t
436 lwp_park_wchan(struct proc *p, lwpid_t target)
437 {
438 return (wchan_t)((uintptr_t)p ^
439 ((uintptr_t)target << SLEEPTAB_HASH_SHIFT));
440 }
441
442 /*
443 * 'park' an LWP waiting on a user-level synchronisation object. The LWP
444 * will remain parked until another LWP in the same process calls in and
445 * requests that it be unparked.
446 */
447 int
448 sys__lwp_park(struct lwp *l, void *v, register_t *retval)
449 {
450 struct sys__lwp_park_args /* {
451 syscallarg(const struct timespec *) ts;
452 syscallarg(ucontext_t *) uc;
453 } */ *uap = v;
454 const struct timespec *tsp;
455 struct timespec ts, tsx;
456 struct timeval tv;
457 sleepq_t *sq;
458 wchan_t wchan;
459 int timo, error;
460
461 /* Fix up the given timeout value. */
462 if ((tsp = SCARG(uap, ts)) != NULL) {
463 if ((error = copyin(tsp, &ts, sizeof(ts))) != 0)
464 return error;
465 getnanotime(&tsx);
466 timespecsub(&ts, &tsx, &ts);
467 tv.tv_sec = ts.tv_sec;
468 tv.tv_usec = ts.tv_nsec / 1000;
469 if (tv.tv_sec < 0 || (tv.tv_sec == 0 && tv.tv_usec < 0))
470 return ETIMEDOUT;
471 if ((error = itimerfix(&tv)) != 0)
472 return error;
473 timo = tvtohz(&tv);
474 } else
475 timo = 0;
476
477 /* Find and lock the sleep queue. */
478 wchan = lwp_park_wchan(l->l_proc, l->l_lid);
479 sq = sleeptab_lookup(&lwp_park_tab, wchan);
480
481 /*
482 * Before going the full route and blocking, check to see if an
483 * unpark op is pending.
484 */
485 if ((l->l_flag & L_CANCELLED) != 0) {
486 sleepq_lwp_lock(l);
487 l->l_flag &= ~L_CANCELLED;
488 sleepq_lwp_unlock(l);
489 sleepq_unlock(sq);
490 LWP_COUNT(lwp_ev_park_early, 1);
491 return EALREADY;
492 }
493
494 /*
495 * For now we ignore the ucontext argument. In the future, we may
496 * put our stack up to be recycled. If it's binned, a trampoline
497 * function could call sleepq_unblock() on our behalf.
498 */
499 LWP_COUNT(lwp_ev_park, 1);
500 sleepq_enter(sq, l);
501 sleepq_block(sq, sched_kpri(l), wchan, "parked", timo, 1,
502 &lwp_park_sobj);
503 error = sleepq_unblock(timo, 1);
504 return (error == EWOULDBLOCK ? ETIMEDOUT : error);
505 }
506
507 int
508 sys__lwp_unpark(struct lwp *l, void *v, register_t *retval)
509 {
510 struct sys__lwp_unpark_args /* {
511 syscallarg(lwpid_t) target;
512 } */ *uap = v;
513 struct proc *p;
514 struct lwp *t;
515 sleepq_t *sq;
516 lwpid_t target;
517 wchan_t wchan;
518 int swapin;
519
520 p = l->l_proc;
521 target = SCARG(uap, target);
522
523 /*
524 * Easy case: search for the LWP on the sleep queue. If
525 * it's parked, remove it from the queue and set running.
526 */
527 wchan = lwp_park_wchan(p, target);
528 sq = sleeptab_lookup(&lwp_park_tab, wchan);
529
530 TAILQ_FOREACH(t, &sq->sq_queue, l_sleepchain)
531 if (t->l_proc == p && t->l_lid == target)
532 break;
533
534 if (t == NULL) {
535 /*
536 * The LWP hasn't parked yet. Take the hit
537 * and mark the operation as pending.
538 */
539 sleepq_unlock(sq);
540 mutex_enter(&p->p_smutex);
541 if ((t = lwp_find(p, target)) != NULL)
542 lwp_lock(t);
543 mutex_exit(&p->p_smutex);
544
545 if (t == NULL)
546 return ESRCH;
547 if (t->l_syncobj != &lwp_park_sobj) {
548 t->l_flag |= L_CANCELLED;
549 lwp_unlock(t);
550 LWP_COUNT(lwp_ev_park_miss, 1);
551 return EALREADY;
552 }
553
554 /*
555 * We have raced, and the LWP is now parked.
556 * Wake it in the usual way.
557 */
558 sq = t->l_sleepq;
559 LOCK_ASSERT(lwp_locked(t, sq->sq_mutex));
560 LWP_COUNT(lwp_ev_park_raced, 1);
561 }
562
563 swapin = sleepq_remove(sq, t);
564 sleepq_unlock(sq);
565 if (swapin)
566 wakeup(&proc0);
567 LWP_COUNT(lwp_ev_park_targ, 1);
568 return 0;
569 }
570
571 int
572 sys__lwp_unpark_all(struct lwp *l, void *v, register_t *retval)
573 {
574 struct sys__lwp_unpark_all_args /* {
575 syscallarg(const lwpid_t *) targets;
576 syscallarg(size_t) ntargets;
577 } */ *uap = v;
578 struct proc *p;
579 struct lwp *t;
580 sleepq_t *sq;
581 wchan_t wchan;
582 lwpid_t targets[32], *tp, *tmax, target;
583 int swapin, error;
584 u_int ntargets, unparked;
585 size_t sz;
586
587 p = l->l_proc;
588 ntargets = SCARG(uap, ntargets);
589
590 if (SCARG(uap, targets) == NULL) {
591 /*
592 * Let the caller know how much we are willing to do, and
593 * let it unpark the LWPs in blocks.
594 */
595 *retval = LWP_UNPARK_MAX;
596 return 0;
597 }
598 if (ntargets > LWP_UNPARK_MAX || ntargets == 0)
599 return EINVAL;
600
601 /*
602 * Copy in the target array. If it's a small number of LWPs, then
603 * place the numbers on the stack.
604 */
605 sz = sizeof(target) * ntargets;
606 if (sz <= sizeof(targets))
607 tp = targets;
608 else if ((tp = kmem_alloc(sz, KM_SLEEP)) == NULL)
609 return ENOMEM;
610 error = copyin(SCARG(uap, targets), tp, sz);
611 if (error != 0) {
612 if (tp != targets)
613 kmem_free(tp, sz);
614 return error;
615 }
616
617 unparked = 0;
618 swapin = 0;
619
620 for (tmax = tp + ntargets; tp < tmax; tp++) {
621 target = *tp;
622
623 /*
624 * Easy case: search for the LWP on the sleep queue. If
625 * it's parked, remove it from the queue and set running.
626 */
627 wchan = lwp_park_wchan(l->l_proc, target);
628 sq = sleeptab_lookup(&lwp_park_tab, wchan);
629
630 TAILQ_FOREACH(t, &sq->sq_queue, l_sleepchain)
631 if (t->l_proc == p && t->l_lid == target)
632 break;
633
634 if (t == NULL) {
635 /*
636 * The LWP hasn't parked yet. Take the hit and
637 * mark the operation as pending.
638 */
639 sleepq_unlock(sq);
640 mutex_enter(&p->p_smutex);
641 if ((t = lwp_find(p, target)) != NULL)
642 lwp_lock(t);
643 mutex_exit(&p->p_smutex);
644
645 if (t == NULL)
646 continue;
647 if (t->l_syncobj != &lwp_park_sobj) {
648 t->l_flag |= L_CANCELLED;
649 lwp_unlock(t);
650 continue;
651 }
652
653 /*
654 * We have raced, and the LWP is now parked.
655 * Wake it in the usual way.
656 */
657 sq = t->l_sleepq;
658 LOCK_ASSERT(lwp_locked(t, sq->sq_mutex));
659 }
660
661 swapin |= sleepq_remove(sq, t);
662 sleepq_unlock(sq);
663 unparked++;
664 }
665
666 if (tp != targets)
667 kmem_free(tp, sz);
668 if (swapin)
669 wakeup(&proc0);
670 LWP_COUNT(lwp_ev_park_bcast, unparked);
671 LWP_COUNT(lwp_ev_park_miss, (ntargets - unparked));
672 return 0;
673 }
674