sys_lwp.c revision 1.12.2.8 1 /* $NetBSD: sys_lwp.c,v 1.12.2.8 2007/09/09 23:12:20 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2001, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Nathan J. Williams, and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Lightweight process (LWP) system calls. See kern_lwp.c for a description
41 * of LWPs.
42 */
43
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.12.2.8 2007/09/09 23:12:20 ad Exp $");
46
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/pool.h>
50 #include <sys/proc.h>
51 #include <sys/types.h>
52 #include <sys/syscallargs.h>
53 #include <sys/kauth.h>
54 #include <sys/kmem.h>
55 #include <sys/sleepq.h>
56
57 #include <uvm/uvm_extern.h>
58
59 #define LWP_UNPARK_MAX 1024
60
61 syncobj_t lwp_park_sobj = {
62 SOBJ_SLEEPQ_LIFO,
63 sleepq_unsleep,
64 sleepq_changepri,
65 sleepq_lendpri,
66 syncobj_noowner,
67 };
68
69 sleeptab_t lwp_park_tab;
70
71 void
72 lwp_sys_init(void)
73 {
74
75 sleeptab_init(&lwp_park_tab);
76 }
77
78 /* ARGSUSED */
79 int
80 sys__lwp_create(struct lwp *l, void *v, register_t *retval)
81 {
82 struct sys__lwp_create_args /* {
83 syscallarg(const ucontext_t *) ucp;
84 syscallarg(u_long) flags;
85 syscallarg(lwpid_t *) new_lwp;
86 } */ *uap = v;
87 struct proc *p = l->l_proc;
88 struct lwp *l2;
89 vaddr_t uaddr;
90 bool inmem;
91 ucontext_t *newuc;
92 int error, lid;
93
94 newuc = pool_get(&lwp_uc_pool, PR_WAITOK);
95
96 error = copyin(SCARG(uap, ucp), newuc, p->p_emul->e_ucsize);
97 if (error) {
98 pool_put(&lwp_uc_pool, newuc);
99 return error;
100 }
101
102 /* XXX check against resource limits */
103
104 inmem = uvm_uarea_alloc(&uaddr);
105 if (__predict_false(uaddr == 0)) {
106 pool_put(&lwp_uc_pool, newuc);
107 return ENOMEM;
108 }
109
110 error = newlwp(l, p, uaddr, inmem,
111 SCARG(uap, flags) & LWP_DETACHED,
112 NULL, 0, p->p_emul->e_startlwp, newuc, &l2);
113 if (error) {
114 uvm_uarea_free(uaddr);
115 pool_put(&lwp_uc_pool, newuc);
116 return error;
117 }
118
119 lid = l2->l_lid;
120 error = copyout(&lid, SCARG(uap, new_lwp), sizeof(lid));
121 if (error) {
122 lwp_exit(l2);
123 pool_put(&lwp_uc_pool, newuc);
124 return error;
125 }
126
127 /*
128 * Set the new LWP running, unless the caller has requested that
129 * it be created in suspended state. If the process is stopping,
130 * then the LWP is created stopped.
131 */
132 mutex_enter(&p->p_smutex);
133 lwp_lock(l2);
134 if ((SCARG(uap, flags) & LWP_SUSPENDED) == 0 &&
135 (l->l_flag & (LW_WREBOOT | LW_WSUSPEND | LW_WEXIT)) == 0) {
136 if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0)
137 l2->l_stat = LSSTOP;
138 else {
139 KASSERT(lwp_locked(l2, l2->l_cpu->ci_schedstate.spc_mutex));
140 p->p_nrlwps++;
141 l2->l_stat = LSRUN;
142 sched_enqueue(l2, false);
143 }
144 } else
145 l2->l_stat = LSSUSPENDED;
146 lwp_unlock(l2);
147 mutex_exit(&p->p_smutex);
148
149 return 0;
150 }
151
152 int
153 sys__lwp_exit(struct lwp *l, void *v, register_t *retval)
154 {
155
156 lwp_exit(l);
157 return 0;
158 }
159
160 int
161 sys__lwp_self(struct lwp *l, void *v, register_t *retval)
162 {
163
164 *retval = l->l_lid;
165 return 0;
166 }
167
168 int
169 sys__lwp_getprivate(struct lwp *l, void *v, register_t *retval)
170 {
171
172 *retval = (uintptr_t)l->l_private;
173 return 0;
174 }
175
176 int
177 sys__lwp_setprivate(struct lwp *l, void *v, register_t *retval)
178 {
179 struct sys__lwp_setprivate_args /* {
180 syscallarg(void *) ptr;
181 } */ *uap = v;
182
183 l->l_private = SCARG(uap, ptr);
184 return 0;
185 }
186
187 int
188 sys__lwp_suspend(struct lwp *l, void *v, register_t *retval)
189 {
190 struct sys__lwp_suspend_args /* {
191 syscallarg(lwpid_t) target;
192 } */ *uap = v;
193 struct proc *p = l->l_proc;
194 struct lwp *t;
195 int error;
196
197 mutex_enter(&p->p_smutex);
198 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) {
199 mutex_exit(&p->p_smutex);
200 return ESRCH;
201 }
202
203 /*
204 * Check for deadlock, which is only possible when we're suspending
205 * ourself. XXX There is a short race here, as p_nrlwps is only
206 * incremented when an LWP suspends itself on the kernel/user
207 * boundary. It's still possible to kill -9 the process so we
208 * don't bother checking further.
209 */
210 lwp_lock(t);
211 if ((t == l && p->p_nrlwps == 1) ||
212 (l->l_flag & (LW_WCORE | LW_WEXIT)) != 0) {
213 lwp_unlock(t);
214 mutex_exit(&p->p_smutex);
215 return EDEADLK;
216 }
217
218 /*
219 * Suspend the LWP. XXX If it's on a different CPU, we should wait
220 * for it to be preempted, where it will put itself to sleep.
221 *
222 * Suspension of the current LWP will happen on return to userspace.
223 */
224 error = lwp_suspend(l, t);
225 if (error) {
226 mutex_exit(&p->p_smutex);
227 return error;
228 }
229
230 /*
231 * Wait for:
232 * o process exiting
233 * o target LWP suspended
234 * o target LWP not suspended and L_WSUSPEND clear
235 * o target LWP exited
236 */
237 for (;;) {
238 error = cv_wait_sig(&p->p_lwpcv, &p->p_smutex);
239 if (error) {
240 error = ERESTART;
241 break;
242 }
243 if (lwp_find(p, SCARG(uap, target)) == NULL) {
244 error = ESRCH;
245 break;
246 }
247 if ((l->l_flag | t->l_flag) & (LW_WCORE | LW_WEXIT)) {
248 error = ERESTART;
249 break;
250 }
251 if (t->l_stat == LSSUSPENDED ||
252 (t->l_flag & LW_WSUSPEND) == 0)
253 break;
254 }
255 mutex_exit(&p->p_smutex);
256
257 return error;
258 }
259
260 int
261 sys__lwp_continue(struct lwp *l, void *v, register_t *retval)
262 {
263 struct sys__lwp_continue_args /* {
264 syscallarg(lwpid_t) target;
265 } */ *uap = v;
266 int error;
267 struct proc *p = l->l_proc;
268 struct lwp *t;
269
270 error = 0;
271
272 mutex_enter(&p->p_smutex);
273 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) {
274 mutex_exit(&p->p_smutex);
275 return ESRCH;
276 }
277
278 lwp_lock(t);
279 lwp_continue(t);
280 mutex_exit(&p->p_smutex);
281
282 return error;
283 }
284
285 int
286 sys__lwp_wakeup(struct lwp *l, void *v, register_t *retval)
287 {
288 struct sys__lwp_wakeup_args /* {
289 syscallarg(lwpid_t) target;
290 } */ *uap = v;
291 struct lwp *t;
292 struct proc *p;
293 int error;
294
295 p = l->l_proc;
296 mutex_enter(&p->p_smutex);
297
298 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) {
299 mutex_exit(&p->p_smutex);
300 return ESRCH;
301 }
302
303 lwp_lock(t);
304 t->l_flag |= (LW_CANCELLED | LW_UNPARKED);
305
306 if (t->l_stat != LSSLEEP) {
307 lwp_unlock(t);
308 error = ENODEV;
309 } else if ((t->l_flag & LW_SINTR) == 0) {
310 lwp_unlock(t);
311 error = EBUSY;
312 } else {
313 /* Wake it up. lwp_unsleep() will release the LWP lock. */
314 lwp_unsleep(t);
315 error = 0;
316 }
317
318 mutex_exit(&p->p_smutex);
319
320 return error;
321 }
322
323 int
324 sys__lwp_wait(struct lwp *l, void *v, register_t *retval)
325 {
326 struct sys__lwp_wait_args /* {
327 syscallarg(lwpid_t) wait_for;
328 syscallarg(lwpid_t *) departed;
329 } */ *uap = v;
330 struct proc *p = l->l_proc;
331 int error;
332 lwpid_t dep;
333
334 mutex_enter(&p->p_smutex);
335 error = lwp_wait1(l, SCARG(uap, wait_for), &dep, 0);
336 mutex_exit(&p->p_smutex);
337
338 if (error)
339 return error;
340
341 if (SCARG(uap, departed)) {
342 error = copyout(&dep, SCARG(uap, departed), sizeof(dep));
343 if (error)
344 return error;
345 }
346
347 return 0;
348 }
349
350 /* ARGSUSED */
351 int
352 sys__lwp_kill(struct lwp *l, void *v, register_t *retval)
353 {
354 struct sys__lwp_kill_args /* {
355 syscallarg(lwpid_t) target;
356 syscallarg(int) signo;
357 } */ *uap = v;
358 struct proc *p = l->l_proc;
359 struct lwp *t;
360 ksiginfo_t ksi;
361 int signo = SCARG(uap, signo);
362 int error = 0;
363
364 if ((u_int)signo >= NSIG)
365 return EINVAL;
366
367 KSI_INIT(&ksi);
368 ksi.ksi_signo = signo;
369 ksi.ksi_code = SI_USER;
370 ksi.ksi_pid = p->p_pid;
371 ksi.ksi_uid = kauth_cred_geteuid(l->l_cred);
372 ksi.ksi_lid = SCARG(uap, target);
373
374 mutex_enter(&proclist_mutex);
375 mutex_enter(&p->p_smutex);
376 if ((t = lwp_find(p, ksi.ksi_lid)) == NULL)
377 error = ESRCH;
378 else if (signo != 0)
379 kpsignal2(p, &ksi);
380 mutex_exit(&p->p_smutex);
381 mutex_exit(&proclist_mutex);
382
383 return error;
384 }
385
386 int
387 sys__lwp_detach(struct lwp *l, void *v, register_t *retval)
388 {
389 struct sys__lwp_detach_args /* {
390 syscallarg(lwpid_t) target;
391 } */ *uap = v;
392 struct proc *p;
393 struct lwp *t;
394 lwpid_t target;
395 int error;
396
397 target = SCARG(uap, target);
398 p = l->l_proc;
399
400 mutex_enter(&p->p_smutex);
401
402 if (l->l_lid == target)
403 t = l;
404 else {
405 /*
406 * We can't use lwp_find() here because the target might
407 * be a zombie.
408 */
409 LIST_FOREACH(t, &p->p_lwps, l_sibling)
410 if (t->l_lid == target)
411 break;
412 }
413
414 /*
415 * If the LWP is already detached, there's nothing to do.
416 * If it's a zombie, we need to clean up after it. LSZOMB
417 * is visible with the proc mutex held.
418 *
419 * After we have detached or released the LWP, kick any
420 * other LWPs that may be sitting in _lwp_wait(), waiting
421 * for the target LWP to exit.
422 */
423 if (t != NULL && t->l_stat != LSIDL) {
424 if ((t->l_prflag & LPR_DETACHED) == 0) {
425 p->p_ndlwps++;
426 t->l_prflag |= LPR_DETACHED;
427 if (t->l_stat == LSZOMB) {
428 /* Releases proc mutex. */
429 lwp_free(t, false, false);
430 return 0;
431 }
432 error = 0;
433
434 /*
435 * Have any LWPs sleeping in lwp_wait() recheck
436 * for deadlock.
437 */
438 cv_broadcast(&p->p_lwpcv);
439 } else
440 error = EINVAL;
441 } else
442 error = ESRCH;
443
444 mutex_exit(&p->p_smutex);
445
446 return error;
447 }
448
449 static inline wchan_t
450 lwp_park_wchan(struct proc *p, const void *hint)
451 {
452
453 return (wchan_t)((uintptr_t)p ^ (uintptr_t)hint);
454 }
455
456 int
457 lwp_unpark(lwpid_t target, const void *hint)
458 {
459 sleepq_t *sq;
460 wchan_t wchan;
461 int swapin;
462 proc_t *p;
463 lwp_t *t;
464
465 /*
466 * Easy case: search for the LWP on the sleep queue. If
467 * it's parked, remove it from the queue and set running.
468 */
469 p = curproc;
470 wchan = lwp_park_wchan(p, hint);
471 sq = sleeptab_lookup(&lwp_park_tab, wchan);
472
473 TAILQ_FOREACH(t, &sq->sq_queue, l_sleepchain)
474 if (t->l_proc == p && t->l_lid == target)
475 break;
476
477 if (__predict_true(t != NULL)) {
478 swapin = sleepq_remove(sq, t);
479 sleepq_unlock(sq);
480 if (swapin)
481 uvm_kick_scheduler();
482 return 0;
483 }
484
485 /*
486 * The LWP hasn't parked yet. Take the hit and mark the
487 * operation as pending.
488 */
489 sleepq_unlock(sq);
490
491 mutex_enter(&p->p_smutex);
492 if ((t = lwp_find(p, target)) == NULL) {
493 mutex_exit(&p->p_smutex);
494 return ESRCH;
495 }
496
497 /*
498 * It may not have parked yet, we may have raced, or it
499 * is parked on a different user sync object.
500 */
501 lwp_lock(t);
502 if (t->l_syncobj == &lwp_park_sobj) {
503 /* Releases the LWP lock. */
504 lwp_unsleep(t);
505 } else {
506 /*
507 * Set the operation pending. The next call to _lwp_park
508 * will return early.
509 */
510 t->l_flag |= LW_UNPARKED;
511 lwp_unlock(t);
512 }
513
514 mutex_exit(&p->p_smutex);
515 return 0;
516 }
517
518 int
519 lwp_park(struct timespec *ts, const void *hint)
520 {
521 struct timespec tsx;
522 sleepq_t *sq;
523 wchan_t wchan;
524 int timo, error;
525 lwp_t *l;
526
527 /* Fix up the given timeout value. */
528 if (ts != NULL) {
529 getnanotime(&tsx);
530 timespecsub(ts, &tsx, &tsx);
531 if (tsx.tv_sec < 0 || (tsx.tv_sec == 0 && tsx.tv_nsec <= 0))
532 return ETIMEDOUT;
533 if ((error = itimespecfix(&tsx)) != 0)
534 return error;
535 timo = tstohz(&tsx);
536 KASSERT(timo != 0);
537 } else
538 timo = 0;
539
540 /* Find and lock the sleep queue. */
541 l = curlwp;
542 wchan = lwp_park_wchan(l->l_proc, hint);
543 sq = sleeptab_lookup(&lwp_park_tab, wchan);
544
545 /*
546 * Before going the full route and blocking, check to see if an
547 * unpark op is pending.
548 */
549 lwp_lock(l);
550 if ((l->l_flag & (LW_CANCELLED | LW_UNPARKED)) != 0) {
551 l->l_flag &= ~(LW_CANCELLED | LW_UNPARKED);
552 lwp_unlock(l);
553 sleepq_unlock(sq);
554 return EALREADY;
555 }
556 lwp_unlock_to(l, sq->sq_mutex);
557 l->l_biglocks = 0;
558 sleepq_enqueue(sq, l->l_usrpri, wchan, "parked", &lwp_park_sobj);
559 error = sleepq_block(timo, true);
560 switch (error) {
561 case EWOULDBLOCK:
562 error = ETIMEDOUT;
563 break;
564 case ERESTART:
565 error = EINTR;
566 break;
567 default:
568 /* nothing */
569 break;
570 }
571 return error;
572 }
573
574 /*
575 * 'park' an LWP waiting on a user-level synchronisation object. The LWP
576 * will remain parked until another LWP in the same process calls in and
577 * requests that it be unparked.
578 */
579 int
580 sys__lwp_park(struct lwp *l, void *v, register_t *retval)
581 {
582 struct sys__lwp_park_args /* {
583 syscallarg(const struct timespec *) ts;
584 syscallarg(lwpid_t) unpark;
585 syscallarg(const void *) hint;
586 syscallarg(const void *) unparkhint;
587 } */ *uap = v;
588 struct timespec ts, *tsp;
589 int error;
590
591 if (SCARG(uap, ts) == NULL)
592 tsp = NULL;
593 else {
594 error = copyin(SCARG(uap, ts), &ts, sizeof(ts));
595 if (error != 0)
596 return error;
597 tsp = &ts;
598 }
599
600 if (SCARG(uap, unpark) != 0) {
601 error = lwp_unpark(SCARG(uap, unpark), SCARG(uap, unparkhint));
602 if (error != 0)
603 return error;
604 }
605
606 return lwp_park(tsp, SCARG(uap, hint));
607 }
608
609 int
610 sys__lwp_unpark(struct lwp *l, void *v, register_t *retval)
611 {
612 struct sys__lwp_unpark_args /* {
613 syscallarg(lwpid_t) target;
614 syscallarg(const void *) hint;
615 } */ *uap = v;
616
617 return lwp_unpark(SCARG(uap, target), SCARG(uap, hint));
618 }
619
620 int
621 sys__lwp_unpark_all(struct lwp *l, void *v, register_t *retval)
622 {
623 struct sys__lwp_unpark_all_args /* {
624 syscallarg(const lwpid_t *) targets;
625 syscallarg(size_t) ntargets;
626 syscallarg(const void *) hint;
627 } */ *uap = v;
628 struct proc *p;
629 struct lwp *t;
630 sleepq_t *sq;
631 wchan_t wchan;
632 lwpid_t targets[32], *tp, *tpp, *tmax, target;
633 int swapin, error;
634 u_int ntargets;
635 size_t sz;
636
637 p = l->l_proc;
638 ntargets = SCARG(uap, ntargets);
639
640 if (SCARG(uap, targets) == NULL) {
641 /*
642 * Let the caller know how much we are willing to do, and
643 * let it unpark the LWPs in blocks.
644 */
645 *retval = LWP_UNPARK_MAX;
646 return 0;
647 }
648 if (ntargets > LWP_UNPARK_MAX || ntargets == 0)
649 return EINVAL;
650
651 /*
652 * Copy in the target array. If it's a small number of LWPs, then
653 * place the numbers on the stack.
654 */
655 sz = sizeof(target) * ntargets;
656 if (sz <= sizeof(targets))
657 tp = targets;
658 else {
659 KERNEL_LOCK(1, l); /* XXXSMP */
660 tp = kmem_alloc(sz, KM_SLEEP);
661 KERNEL_UNLOCK_ONE(l); /* XXXSMP */
662 if (tp == NULL)
663 return ENOMEM;
664 }
665 error = copyin(SCARG(uap, targets), tp, sz);
666 if (error != 0) {
667 if (tp != targets) {
668 KERNEL_LOCK(1, l); /* XXXSMP */
669 kmem_free(tp, sz);
670 KERNEL_UNLOCK_ONE(l); /* XXXSMP */
671 }
672 return error;
673 }
674
675 swapin = 0;
676 wchan = lwp_park_wchan(p, SCARG(uap, hint));
677 sq = sleeptab_lookup(&lwp_park_tab, wchan);
678
679 for (tmax = tp + ntargets, tpp = tp; tpp < tmax; tpp++) {
680 target = *tpp;
681
682 /*
683 * Easy case: search for the LWP on the sleep queue. If
684 * it's parked, remove it from the queue and set running.
685 */
686 TAILQ_FOREACH(t, &sq->sq_queue, l_sleepchain)
687 if (t->l_proc == p && t->l_lid == target)
688 break;
689
690 if (t != NULL) {
691 swapin |= sleepq_remove(sq, t);
692 continue;
693 }
694
695 /*
696 * The LWP hasn't parked yet. Take the hit and
697 * mark the operation as pending.
698 */
699 sleepq_unlock(sq);
700 mutex_enter(&p->p_smutex);
701 if ((t = lwp_find(p, target)) == NULL) {
702 mutex_exit(&p->p_smutex);
703 sleepq_lock(sq);
704 continue;
705 }
706 lwp_lock(t);
707
708 /*
709 * It may not have parked yet, we may have raced, or
710 * it is parked on a different user sync object.
711 */
712 if (t->l_syncobj == &lwp_park_sobj) {
713 /* Releases the LWP lock. */
714 lwp_unsleep(t);
715 } else {
716 /*
717 * Set the operation pending. The next call to
718 * _lwp_park will return early.
719 */
720 t->l_flag |= LW_UNPARKED;
721 lwp_unlock(t);
722 }
723
724 mutex_exit(&p->p_smutex);
725 sleepq_lock(sq);
726 }
727
728 sleepq_unlock(sq);
729 if (tp != targets) {
730 KERNEL_LOCK(1, l); /* XXXSMP */
731 kmem_free(tp, sz);
732 KERNEL_UNLOCK_ONE(l); /* XXXSMP */
733 }
734 if (swapin)
735 uvm_kick_scheduler();
736
737 return 0;
738 }
739