sys_lwp.c revision 1.3 1 /* $NetBSD: sys_lwp.c,v 1.3 2007/02/15 20:21:13 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2001, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Nathan J. Williams, and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Lightweight process (LWP) system calls. See kern_lwp.c for a description
41 * of LWPs.
42 */
43
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.3 2007/02/15 20:21:13 ad Exp $");
46
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/pool.h>
50 #include <sys/proc.h>
51 #include <sys/types.h>
52 #include <sys/syscallargs.h>
53 #include <sys/kauth.h>
54 #include <sys/kmem.h>
55 #include <sys/sleepq.h>
56
57 #include <uvm/uvm_extern.h>
58
59 #define LWP_UNPARK_MAX 1024
60
61 syncobj_t lwp_park_sobj = {
62 SOBJ_SLEEPQ_SORTED,
63 sleepq_unsleep,
64 sleepq_changepri
65 };
66
67 sleeptab_t lwp_park_tab;
68
69 #ifdef LWP_COUNTERS
70 struct evcnt lwp_ev_park_early = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
71 NULL, "_lwp_park", "unparked early");
72 struct evcnt lwp_ev_park_raced = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
73 NULL, "_lwp_park", "raced");
74 struct evcnt lwp_ev_park_miss = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
75 NULL, "_lwp_park", "not parked");
76 struct evcnt lwp_ev_park_bcast = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
77 NULL, "_lwp_park", "broadcast unpark");
78 struct evcnt lwp_ev_park_targ = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
79 NULL, "_lwp_park", "targeted unpark");
80 struct evcnt lwp_ev_park = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
81 NULL, "_lwp_park", "parked");
82
83 #define LWP_COUNT(ev, val) (ev).ev_count += (val) /* XXXSMP */
84 #else
85 #define LWP_COUNT(ev, val) /* nothing */
86 #endif
87
88 void
89 lwp_sys_init(void)
90 {
91 sleeptab_init(&lwp_park_tab);
92 #ifdef LWP_COUNTERS
93 evcnt_attach_static(&lwp_ev_park_early);
94 evcnt_attach_static(&lwp_ev_park_raced);
95 evcnt_attach_static(&lwp_ev_park_miss);
96 evcnt_attach_static(&lwp_ev_park_bcast);
97 evcnt_attach_static(&lwp_ev_park_targ);
98 evcnt_attach_static(&lwp_ev_park);
99 #endif
100 }
101
102 /* ARGSUSED */
103 int
104 sys__lwp_create(struct lwp *l, void *v, register_t *retval)
105 {
106 struct sys__lwp_create_args /* {
107 syscallarg(const ucontext_t *) ucp;
108 syscallarg(u_long) flags;
109 syscallarg(lwpid_t *) new_lwp;
110 } */ *uap = v;
111 struct proc *p = l->l_proc;
112 struct lwp *l2;
113 vaddr_t uaddr;
114 boolean_t inmem;
115 ucontext_t *newuc;
116 int error, lid;
117
118 newuc = pool_get(&lwp_uc_pool, PR_WAITOK);
119
120 error = copyin(SCARG(uap, ucp), newuc, p->p_emul->e_ucsize);
121 if (error) {
122 pool_put(&lwp_uc_pool, newuc);
123 return error;
124 }
125
126 /* XXX check against resource limits */
127
128 inmem = uvm_uarea_alloc(&uaddr);
129 if (__predict_false(uaddr == 0)) {
130 pool_put(&lwp_uc_pool, newuc);
131 return ENOMEM;
132 }
133
134 newlwp(l, p, uaddr, inmem,
135 SCARG(uap, flags) & LWP_DETACHED,
136 NULL, 0, startlwp, newuc, &l2);
137
138 /*
139 * Set the new LWP running, unless the caller has requested that
140 * it be created in suspended state. If the process is stopping,
141 * then the LWP is created stopped.
142 */
143 mutex_enter(&p->p_smutex);
144 lwp_lock(l2);
145 lid = l2->l_lid;
146 if ((SCARG(uap, flags) & LWP_SUSPENDED) == 0 &&
147 (l->l_flag & (L_WREBOOT | L_WSUSPEND | L_WEXIT)) == 0) {
148 if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0)
149 l2->l_stat = LSSTOP;
150 else {
151 LOCK_ASSERT(lwp_locked(l2, &sched_mutex));
152 p->p_nrlwps++;
153 l2->l_stat = LSRUN;
154 setrunqueue(l2);
155 }
156 } else
157 l2->l_stat = LSSUSPENDED;
158 lwp_unlock(l2);
159 mutex_exit(&p->p_smutex);
160
161 error = copyout(&lid, SCARG(uap, new_lwp), sizeof(lid));
162 if (error)
163 return error;
164
165 return 0;
166 }
167
168 int
169 sys__lwp_exit(struct lwp *l, void *v, register_t *retval)
170 {
171
172 lwp_exit(l);
173 return 0;
174 }
175
176 int
177 sys__lwp_self(struct lwp *l, void *v, register_t *retval)
178 {
179
180 *retval = l->l_lid;
181 return 0;
182 }
183
184 int
185 sys__lwp_getprivate(struct lwp *l, void *v, register_t *retval)
186 {
187
188 *retval = (uintptr_t)l->l_private;
189 return 0;
190 }
191
192 int
193 sys__lwp_setprivate(struct lwp *l, void *v, register_t *retval)
194 {
195 struct sys__lwp_setprivate_args /* {
196 syscallarg(void *) ptr;
197 } */ *uap = v;
198
199 l->l_private = SCARG(uap, ptr);
200 return 0;
201 }
202
203 int
204 sys__lwp_suspend(struct lwp *l, void *v, register_t *retval)
205 {
206 struct sys__lwp_suspend_args /* {
207 syscallarg(lwpid_t) target;
208 } */ *uap = v;
209 struct proc *p = l->l_proc;
210 struct lwp *t;
211 int error;
212
213 mutex_enter(&p->p_smutex);
214 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) {
215 mutex_exit(&p->p_smutex);
216 return ESRCH;
217 }
218
219 /*
220 * Check for deadlock, which is only possible when we're suspending
221 * ourself. XXX There is a short race here, as p_nrlwps is only
222 * incremented when an LWP suspends itself on the kernel/user
223 * boundary. It's still possible to kill -9 the process so we
224 * don't bother checking further.
225 */
226 lwp_lock(t);
227 if ((t == l && p->p_nrlwps == 1) ||
228 (l->l_flag & (L_WCORE | L_WEXIT)) != 0) {
229 lwp_unlock(t);
230 mutex_exit(&p->p_smutex);
231 return EDEADLK;
232 }
233
234 /*
235 * Suspend the LWP. XXX If it's on a different CPU, we should wait
236 * for it to be preempted, where it will put itself to sleep.
237 *
238 * Suspension of the current LWP will happen on return to userspace.
239 */
240 error = lwp_suspend(l, t);
241 mutex_exit(&p->p_smutex);
242
243 return error;
244 }
245
246 int
247 sys__lwp_continue(struct lwp *l, void *v, register_t *retval)
248 {
249 struct sys__lwp_continue_args /* {
250 syscallarg(lwpid_t) target;
251 } */ *uap = v;
252 int error;
253 struct proc *p = l->l_proc;
254 struct lwp *t;
255
256 error = 0;
257
258 mutex_enter(&p->p_smutex);
259 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) {
260 mutex_exit(&p->p_smutex);
261 return ESRCH;
262 }
263
264 lwp_lock(t);
265 lwp_continue(t);
266 mutex_exit(&p->p_smutex);
267
268 return error;
269 }
270
271 int
272 sys__lwp_wakeup(struct lwp *l, void *v, register_t *retval)
273 {
274 struct sys__lwp_wakeup_args /* {
275 syscallarg(lwpid_t) target;
276 } */ *uap = v;
277 struct lwp *t;
278 struct proc *p;
279 int error;
280
281 p = l->l_proc;
282 mutex_enter(&p->p_smutex);
283
284 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) {
285 mutex_exit(&p->p_smutex);
286 return ESRCH;
287 }
288
289 lwp_lock(t);
290
291 if (t->l_stat != LSSLEEP) {
292 error = ENODEV;
293 goto bad;
294 }
295
296 if ((t->l_flag & L_SINTR) == 0) {
297 error = EBUSY;
298 goto bad;
299 }
300
301 /* wake it up setrunnable() will release the LWP lock. */
302 t->l_flag |= L_CANCELLED;
303 setrunnable(t);
304 mutex_exit(&p->p_smutex);
305 return 0;
306
307 bad:
308 lwp_unlock(t);
309 mutex_exit(&p->p_smutex);
310 return error;
311 }
312
313 int
314 sys__lwp_wait(struct lwp *l, void *v, register_t *retval)
315 {
316 struct sys__lwp_wait_args /* {
317 syscallarg(lwpid_t) wait_for;
318 syscallarg(lwpid_t *) departed;
319 } */ *uap = v;
320 struct proc *p = l->l_proc;
321 int error;
322 lwpid_t dep;
323
324 mutex_enter(&p->p_smutex);
325 error = lwp_wait1(l, SCARG(uap, wait_for), &dep, 0);
326 mutex_exit(&p->p_smutex);
327
328 if (error)
329 return error;
330
331 if (SCARG(uap, departed)) {
332 error = copyout(&dep, SCARG(uap, departed), sizeof(dep));
333 if (error)
334 return error;
335 }
336
337 return 0;
338 }
339
340 /* ARGSUSED */
341 int
342 sys__lwp_kill(struct lwp *l, void *v, register_t *retval)
343 {
344 struct sys__lwp_kill_args /* {
345 syscallarg(lwpid_t) target;
346 syscallarg(int) signo;
347 } */ *uap = v;
348 struct proc *p = l->l_proc;
349 struct lwp *t;
350 ksiginfo_t ksi;
351 int signo = SCARG(uap, signo);
352 int error = 0;
353
354 if ((u_int)signo >= NSIG)
355 return EINVAL;
356
357 KSI_INIT(&ksi);
358 ksi.ksi_signo = signo;
359 ksi.ksi_code = SI_USER;
360 ksi.ksi_pid = p->p_pid;
361 ksi.ksi_uid = kauth_cred_geteuid(l->l_cred);
362 ksi.ksi_lid = SCARG(uap, target);
363
364 mutex_enter(&proclist_mutex);
365 mutex_enter(&p->p_smutex);
366 if ((t = lwp_find(p, ksi.ksi_lid)) == NULL)
367 error = ESRCH;
368 else if (signo != 0)
369 kpsignal2(p, &ksi);
370 mutex_exit(&p->p_smutex);
371 mutex_exit(&proclist_mutex);
372
373 return error;
374 }
375
376 int
377 sys__lwp_detach(struct lwp *l, void *v, register_t *retval)
378 {
379 struct sys__lwp_detach_args /* {
380 syscallarg(lwpid_t) target;
381 } */ *uap = v;
382 struct proc *p;
383 struct lwp *t;
384 lwpid_t target;
385 int error;
386
387 target = SCARG(uap, target);
388 p = l->l_proc;
389
390 mutex_enter(&p->p_smutex);
391
392 if (l->l_lid == target)
393 t = l;
394 else {
395 /*
396 * We can't use lwp_find() here because the target might
397 * be a zombie.
398 */
399 LIST_FOREACH(t, &p->p_lwps, l_sibling)
400 if (t->l_lid == target)
401 break;
402 }
403
404 /*
405 * If the LWP is already detached, there's nothing to do.
406 * If it's a zombie, we need to clean up after it. LSZOMB
407 * is visible with the proc mutex held.
408 *
409 * After we have detached or released the LWP, kick any
410 * other LWPs that may be sitting in _lwp_wait(), waiting
411 * for the target LWP to exit.
412 */
413 if (t != NULL && t->l_stat != LSIDL) {
414 if ((t->l_prflag & LPR_DETACHED) == 0) {
415 p->p_ndlwps++;
416 t->l_prflag |= LPR_DETACHED;
417 if (t->l_stat == LSZOMB) {
418 cv_broadcast(&p->p_lwpcv);
419 lwp_free(t, 0, 0); /* releases proc mutex */
420 return 0;
421 }
422 error = 0;
423 } else
424 error = EINVAL;
425 } else
426 error = ESRCH;
427
428 cv_broadcast(&p->p_lwpcv);
429 mutex_exit(&p->p_smutex);
430
431 return error;
432 }
433
434 static inline wchan_t
435 lwp_park_wchan(struct proc *p, const void *hint)
436 {
437 return (wchan_t)((uintptr_t)p ^ (uintptr_t)hint);
438 }
439
440 /*
441 * 'park' an LWP waiting on a user-level synchronisation object. The LWP
442 * will remain parked until another LWP in the same process calls in and
443 * requests that it be unparked.
444 */
445 int
446 sys__lwp_park(struct lwp *l, void *v, register_t *retval)
447 {
448 struct sys__lwp_park_args /* {
449 syscallarg(const struct timespec *) ts;
450 syscallarg(ucontext_t *) uc;
451 syscallarg(const void *) hint;
452 } */ *uap = v;
453 const struct timespec *tsp;
454 struct timespec ts, tsx;
455 struct timeval tv;
456 sleepq_t *sq;
457 wchan_t wchan;
458 int timo, error;
459
460 /* Fix up the given timeout value. */
461 if ((tsp = SCARG(uap, ts)) != NULL) {
462 if ((error = copyin(tsp, &ts, sizeof(ts))) != 0)
463 return error;
464 getnanotime(&tsx);
465 timespecsub(&ts, &tsx, &ts);
466 tv.tv_sec = ts.tv_sec;
467 tv.tv_usec = ts.tv_nsec / 1000;
468 if (tv.tv_sec < 0 || (tv.tv_sec == 0 && tv.tv_usec < 0))
469 return ETIMEDOUT;
470 if ((error = itimerfix(&tv)) != 0)
471 return error;
472 timo = tvtohz(&tv);
473 } else
474 timo = 0;
475
476 /* Find and lock the sleep queue. */
477 wchan = lwp_park_wchan(l->l_proc, SCARG(uap, hint));
478 sq = sleeptab_lookup(&lwp_park_tab, wchan);
479
480 /*
481 * Before going the full route and blocking, check to see if an
482 * unpark op is pending.
483 */
484 if ((l->l_flag & L_CANCELLED) != 0) {
485 sleepq_lwp_lock(l);
486 l->l_flag &= ~L_CANCELLED;
487 sleepq_lwp_unlock(l);
488 sleepq_unlock(sq);
489 LWP_COUNT(lwp_ev_park_early, 1);
490 return EALREADY;
491 }
492
493 /*
494 * For now we ignore the ucontext argument. In the future, we may
495 * put our stack up to be recycled. If it's binned, a trampoline
496 * function could call sleepq_unblock() on our behalf.
497 */
498 LWP_COUNT(lwp_ev_park, 1);
499 sleepq_enter(sq, l);
500 sleepq_block(sq, sched_kpri(l), wchan, "parked", timo, 1,
501 &lwp_park_sobj);
502 error = sleepq_unblock(timo, 1);
503 return error == EWOULDBLOCK ? ETIMEDOUT : error;
504 }
505
506 int
507 sys__lwp_unpark(struct lwp *l, void *v, register_t *retval)
508 {
509 struct sys__lwp_unpark_args /* {
510 syscallarg(lwpid_t) target;
511 syscallarg(const void *) hint;
512 } */ *uap = v;
513 struct proc *p;
514 struct lwp *t;
515 sleepq_t *sq;
516 lwpid_t target;
517 wchan_t wchan;
518 int swapin;
519
520 p = l->l_proc;
521 target = SCARG(uap, target);
522
523 /*
524 * Easy case: search for the LWP on the sleep queue. If
525 * it's parked, remove it from the queue and set running.
526 */
527 wchan = lwp_park_wchan(p, SCARG(uap, hint));
528 sq = sleeptab_lookup(&lwp_park_tab, wchan);
529
530 TAILQ_FOREACH(t, &sq->sq_queue, l_sleepchain)
531 if (t->l_proc == p && t->l_lid == target)
532 break;
533
534 if (t == NULL) {
535 /*
536 * The LWP hasn't parked yet. Take the hit
537 * and mark the operation as pending.
538 */
539 sleepq_unlock(sq);
540 mutex_enter(&p->p_smutex);
541 if ((t = lwp_find(p, target)) == NULL) {
542 mutex_exit(&p->p_smutex);
543 return ESRCH;
544 }
545 lwp_lock(t);
546 mutex_exit(&p->p_smutex);
547
548 if (t->l_sleepq == sq) {
549 /*
550 * We have raced, and the LWP is now parked.
551 * Wake it in the usual way.
552 */
553 KASSERT(t->l_syncobj == &lwp_park_sobj);
554 LOCK_ASSERT(lwp_locked(t, sq->sq_mutex));
555 LWP_COUNT(lwp_ev_park_raced, 1);
556 } else {
557 /*
558 * It many not have parked yet, or is parked
559 * on a different user sync object. The
560 * latter is an application error.
561 */
562 t->l_flag |= L_CANCELLED;
563 lwp_unlock(t);
564 return 0;
565 }
566 }
567
568 swapin = sleepq_remove(sq, t);
569 sleepq_unlock(sq);
570 if (swapin)
571 uvm_kick_scheduler();
572 LWP_COUNT(lwp_ev_park_targ, 1);
573 return 0;
574 }
575
576 int
577 sys__lwp_unpark_all(struct lwp *l, void *v, register_t *retval)
578 {
579 struct sys__lwp_unpark_all_args /* {
580 syscallarg(const lwpid_t *) targets;
581 syscallarg(size_t) ntargets;
582 syscallarg(const void *) hint;
583 } */ *uap = v;
584 struct proc *p;
585 struct lwp *t;
586 sleepq_t *sq;
587 wchan_t wchan;
588 lwpid_t targets[32], *tp, *tpp, *tmax, target;
589 int swapin, error;
590 u_int ntargets, unparked;
591 size_t sz;
592
593 p = l->l_proc;
594 ntargets = SCARG(uap, ntargets);
595
596 if (SCARG(uap, targets) == NULL) {
597 /*
598 * Let the caller know how much we are willing to do, and
599 * let it unpark the LWPs in blocks.
600 */
601 *retval = LWP_UNPARK_MAX;
602 return 0;
603 }
604 if (ntargets > LWP_UNPARK_MAX || ntargets == 0)
605 return EINVAL;
606
607 /*
608 * Copy in the target array. If it's a small number of LWPs, then
609 * place the numbers on the stack.
610 */
611 sz = sizeof(target) * ntargets;
612 if (sz <= sizeof(targets))
613 tp = targets;
614 else {
615 KERNEL_LOCK(1, l); /* XXXSMP */
616 tp = kmem_alloc(sz, KM_SLEEP);
617 KERNEL_UNLOCK_ONE(l); /* XXXSMP */
618 if (tp == NULL)
619 return ENOMEM;
620 }
621 error = copyin(SCARG(uap, targets), tp, sz);
622 if (error != 0) {
623 if (tp != targets) {
624 KERNEL_LOCK(1, l); /* XXXSMP */
625 kmem_free(tp, sz);
626 KERNEL_UNLOCK_ONE(l); /* XXXSMP */
627 }
628 return error;
629 }
630
631 unparked = 0;
632 swapin = 0;
633 wchan = lwp_park_wchan(p, SCARG(uap, hint));
634 sq = sleeptab_lookup(&lwp_park_tab, wchan);
635
636 for (tmax = tp + ntargets, tpp = tp; tpp < tmax; tpp++) {
637 target = *tpp;
638
639 /*
640 * Easy case: search for the LWP on the sleep queue. If
641 * it's parked, remove it from the queue and set running.
642 */
643 TAILQ_FOREACH(t, &sq->sq_queue, l_sleepchain)
644 if (t->l_proc == p && t->l_lid == target)
645 break;
646
647 if (t != NULL) {
648 swapin |= sleepq_remove(sq, t);
649 unparked++;
650 continue;
651 }
652
653 /*
654 * The LWP hasn't parked yet. Take the hit and
655 * mark the operation as pending.
656 */
657 sleepq_unlock(sq);
658 mutex_enter(&p->p_smutex);
659 if ((t = lwp_find(p, target)) == NULL) {
660 mutex_exit(&p->p_smutex);
661 sleepq_lock(sq);
662 continue;
663 }
664 lwp_lock(t);
665 mutex_exit(&p->p_smutex);
666
667 if (t->l_sleepq == sq) {
668 /*
669 * We have raced, and the LWP is now parked.
670 * Wake it in the usual way.
671 */
672 KASSERT(t->l_syncobj == &lwp_park_sobj);
673 LOCK_ASSERT(lwp_locked(t, sq->sq_mutex));
674 LWP_COUNT(lwp_ev_park_raced, 1);
675 swapin |= sleepq_remove(sq, t);
676 unparked++;
677 } else {
678 /*
679 * It many not have parked yet, or is parked
680 * on a different user sync object. The
681 * latter is an application error.
682 */
683 t->l_flag |= L_CANCELLED;
684 lwp_unlock(t);
685 sleepq_lock(sq);
686 }
687 }
688
689 sleepq_unlock(sq);
690 if (tp != targets) {
691 KERNEL_LOCK(1, l); /* XXXSMP */
692 kmem_free(tp, sz);
693 KERNEL_UNLOCK_ONE(l); /* XXXSMP */
694 }
695 if (swapin)
696 uvm_kick_scheduler();
697 LWP_COUNT(lwp_ev_park_bcast, unparked);
698 LWP_COUNT(lwp_ev_park_miss, (ntargets - unparked));
699 /* XXXAD return unparked; */
700 return 0;
701 }
702