kern_lwp.c revision 1.61.2.6 1 /* $NetBSD: kern_lwp.c,v 1.61.2.6 2007/04/10 00:22:11 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2001, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Nathan J. Williams, and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Overview
41 *
42 * Lightweight processes (LWPs) are the basic unit (or thread) of
43 * execution within the kernel. The core state of an LWP is described
44 * by "struct lwp".
45 *
46 * Each LWP is contained within a process (described by "struct proc"),
47 * Every process contains at least one LWP, but may contain more. The
48 * process describes attributes shared among all of its LWPs such as a
49 * private address space, global execution state (stopped, active,
50 * zombie, ...), signal disposition and so on. On a multiprocessor
51 * machine, multiple LWPs be executing in kernel simultaneously.
52 *
53 * Execution states
54 *
55 * At any given time, an LWP has overall state that is described by
56 * lwp::l_stat. The states are broken into two sets below. The first
57 * set is guaranteed to represent the absolute, current state of the
58 * LWP:
59 *
60 * LSONPROC
61 *
62 * On processor: the LWP is executing on a CPU, either in the
63 * kernel or in user space.
64 *
65 * LSRUN
66 *
67 * Runnable: the LWP is parked on a run queue, and may soon be
68 * chosen to run by a idle processor, or by a processor that
69 * has been asked to preempt a currently runnning but lower
70 * priority LWP. If the LWP is not swapped in (L_INMEM == 0)
71 * then the LWP is not on a run queue, but may be soon.
72 *
73 * LSIDL
74 *
75 * Idle: the LWP has been created but has not yet executed.
76 * Whoever created the new LWP can be expected to set it to
77 * another state shortly.
78 *
79 * LSSUSPENDED:
80 *
81 * Suspended: the LWP has had its execution suspended by
82 * another LWP in the same process using the _lwp_suspend()
83 * system call. User-level LWPs also enter the suspended
84 * state when the system is shutting down.
85 *
86 * The second set represent a "statement of intent" on behalf of the
87 * LWP. The LWP may in fact be executing on a processor, may be
88 * sleeping, idle, or on a run queue. It is expected to take the
89 * necessary action to stop executing or become "running" again within
90 * a short timeframe.
91 *
92 * LSZOMB:
93 *
94 * Dead: the LWP has released most of its resources and is
95 * about to switch away into oblivion. When it switches away,
96 * its few remaining resources will be collected.
97 *
98 * LSSLEEP:
99 *
100 * Sleeping: the LWP has entered itself onto a sleep queue, and
101 * will switch away shortly to allow other LWPs to run on the
102 * CPU.
103 *
104 * LSSTOP:
105 *
106 * Stopped: the LWP has been stopped as a result of a job
107 * control signal, or as a result of the ptrace() interface.
108 * Stopped LWPs may run briefly within the kernel to handle
109 * signals that they receive, but will not return to user space
110 * until their process' state is changed away from stopped.
111 * Single LWPs within a process can not be set stopped
112 * selectively: all actions that can stop or continue LWPs
113 * occur at the process level.
114 *
115 * State transitions
116 *
117 * Note that the LSSTOP and LSSUSPENDED states may only be set
118 * when returning to user space in userret(), or when sleeping
119 * interruptably. Before setting those states, we try to ensure
120 * that the LWPs will release all kernel locks that they hold,
121 * and at a minimum try to ensure that the LWP can be set runnable
122 * again by a signal.
123 *
124 * LWPs may transition states in the following ways:
125 *
126 * RUN -------> ONPROC ONPROC -----> RUN
127 * > STOPPED > SLEEP
128 * > SUSPENDED > STOPPED
129 * > SUSPENDED
130 * > ZOMB
131 *
132 * STOPPED ---> RUN SUSPENDED --> RUN
133 * > SLEEP > SLEEP
134 *
135 * SLEEP -----> ONPROC IDL --------> RUN
136 * > RUN > SUSPENDED
137 * > STOPPED > STOPPED
138 * > SUSPENDED
139 *
140 * Locking
141 *
142 * The majority of fields in 'struct lwp' are covered by a single,
143 * general spin mutex pointed to by lwp::l_mutex. The locks covering
144 * each field are documented in sys/lwp.h.
145 *
146 * State transitions must be made with the LWP's general lock held. In
147 * a multiprocessor kernel, state transitions may cause the LWP's lock
148 * pointer to change. On uniprocessor kernels, most scheduler and
149 * synchronisation objects such as sleep queues and LWPs are protected
150 * by only one mutex (sched_mutex). In this case, LWPs' lock pointers
151 * will never change and will always reference sched_mutex.
152 *
153 * Manipulation of the general lock is not performed directly, but
154 * through calls to lwp_lock(), lwp_relock() and similar.
155 *
156 * States and their associated locks:
157 *
158 * LSIDL, LSZOMB
159 *
160 * Always covered by sched_mutex.
161 *
162 * LSONPROC, LSRUN:
163 *
164 * Always covered by sched_mutex, which protects the run queues
165 * and other miscellaneous items. If the scheduler is changed
166 * to use per-CPU run queues, this may become a per-CPU mutex.
167 *
168 * LSSLEEP:
169 *
170 * Covered by a mutex associated with the sleep queue that the
171 * LWP resides on, indirectly referenced by l_sleepq->sq_mutex.
172 *
173 * LSSTOP, LSSUSPENDED:
174 *
175 * If the LWP was previously sleeping (l_wchan != NULL), then
176 * l_mutex references the sleep queue mutex. If the LWP was
177 * runnable or on the CPU when halted, or has been removed from
178 * the sleep queue since halted, then the mutex is sched_mutex.
179 *
180 * The lock order is as follows:
181 *
182 * sleepq_t::sq_mutex |---> sched_mutex
183 * tschain_t::tc_mutex |
184 *
185 * Each process has an scheduler state mutex (proc::p_smutex), and a
186 * number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and
187 * so on. When an LWP is to be entered into or removed from one of the
188 * following states, p_mutex must be held and the process wide counters
189 * adjusted:
190 *
191 * LSIDL, LSZOMB, LSSTOP, LSSUSPENDED
192 *
193 * Note that an LWP is considered running or likely to run soon if in
194 * one of the following states. This affects the value of p_nrlwps:
195 *
196 * LSRUN, LSONPROC, LSSLEEP
197 *
198 * p_smutex does not need to be held when transitioning among these
199 * three states.
200 */
201
202 #include <sys/cdefs.h>
203 __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.61.2.6 2007/04/10 00:22:11 ad Exp $");
204
205 #include "opt_multiprocessor.h"
206 #include "opt_lockdebug.h"
207
208 #define _LWP_API_PRIVATE
209
210 #include <sys/param.h>
211 #include <sys/systm.h>
212 #include <sys/pool.h>
213 #include <sys/proc.h>
214 #include <sys/syscallargs.h>
215 #include <sys/syscall_stats.h>
216 #include <sys/kauth.h>
217 #include <sys/sleepq.h>
218 #include <sys/lockdebug.h>
219 #include <sys/kmem.h>
220
221 #include <uvm/uvm_extern.h>
222
223 struct lwplist alllwp;
224
225 POOL_INIT(lwp_pool, sizeof(struct lwp), MIN_LWP_ALIGNMENT, 0, 0, "lwppl",
226 &pool_allocator_nointr, IPL_NONE);
227 POOL_INIT(lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl",
228 &pool_allocator_nointr, IPL_NONE);
229
230 static specificdata_domain_t lwp_specificdata_domain;
231
232 #define LWP_DEBUG
233
234 #ifdef LWP_DEBUG
235 int lwp_debug = 0;
236 #define DPRINTF(x) if (lwp_debug) printf x
237 #else
238 #define DPRINTF(x)
239 #endif
240
241 void
242 lwpinit(void)
243 {
244
245 lwp_specificdata_domain = specificdata_domain_create();
246 KASSERT(lwp_specificdata_domain != NULL);
247 lwp_sys_init();
248 }
249
250 /*
251 * Set an suspended.
252 *
253 * Must be called with p_smutex held, and the LWP locked. Will unlock the
254 * LWP before return.
255 */
256 int
257 lwp_suspend(struct lwp *curl, struct lwp *t)
258 {
259 int error;
260
261 KASSERT(mutex_owned(&t->l_proc->p_smutex));
262 KASSERT(lwp_locked(t, NULL));
263
264 KASSERT(curl != t || curl->l_stat == LSONPROC);
265
266 /*
267 * If the current LWP has been told to exit, we must not suspend anyone
268 * else or deadlock could occur. We won't return to userspace.
269 */
270 if ((curl->l_stat & (LW_WEXIT | LW_WCORE)) != 0) {
271 lwp_unlock(t);
272 return (EDEADLK);
273 }
274
275 error = 0;
276
277 switch (t->l_stat) {
278 case LSRUN:
279 case LSONPROC:
280 t->l_flag |= LW_WSUSPEND;
281 lwp_need_userret(t);
282 lwp_unlock(t);
283 break;
284
285 case LSSLEEP:
286 t->l_flag |= LW_WSUSPEND;
287
288 /*
289 * Kick the LWP and try to get it to the kernel boundary
290 * so that it will release any locks that it holds.
291 * setrunnable() will release the lock.
292 */
293 if ((t->l_flag & LW_SINTR) != 0)
294 setrunnable(t);
295 else
296 lwp_unlock(t);
297 break;
298
299 case LSSUSPENDED:
300 lwp_unlock(t);
301 break;
302
303 case LSSTOP:
304 t->l_flag |= LW_WSUSPEND;
305 setrunnable(t);
306 break;
307
308 case LSIDL:
309 case LSZOMB:
310 error = EINTR; /* It's what Solaris does..... */
311 lwp_unlock(t);
312 break;
313 }
314
315 /*
316 * XXXLWP Wait for:
317 *
318 * o process exiting
319 * o target LWP suspended
320 * o target LWP not suspended and L_WSUSPEND clear
321 * o target LWP exited
322 */
323
324 return (error);
325 }
326
327 /*
328 * Restart a suspended LWP.
329 *
330 * Must be called with p_smutex held, and the LWP locked. Will unlock the
331 * LWP before return.
332 */
333 void
334 lwp_continue(struct lwp *l)
335 {
336
337 KASSERT(mutex_owned(&l->l_proc->p_smutex));
338 KASSERT(lwp_locked(l, NULL));
339
340 DPRINTF(("lwp_continue of %d.%d (%s), state %d, wchan %p\n",
341 l->l_proc->p_pid, l->l_lid, l->l_proc->p_comm, l->l_stat,
342 l->l_wchan));
343
344 /* If rebooting or not suspended, then just bail out. */
345 if ((l->l_flag & LW_WREBOOT) != 0) {
346 lwp_unlock(l);
347 return;
348 }
349
350 l->l_flag &= ~LW_WSUSPEND;
351
352 if (l->l_stat != LSSUSPENDED) {
353 lwp_unlock(l);
354 return;
355 }
356
357 /* setrunnable() will release the lock. */
358 setrunnable(l);
359 }
360
361 /*
362 * Wait for an LWP within the current process to exit. If 'lid' is
363 * non-zero, we are waiting for a specific LWP.
364 *
365 * Must be called with p->p_smutex held.
366 */
367 int
368 lwp_wait1(struct lwp *l, lwpid_t lid, lwpid_t *departed, int flags)
369 {
370 struct proc *p = l->l_proc;
371 struct lwp *l2;
372 int nfound, error;
373
374 DPRINTF(("lwp_wait1: %d.%d waiting for %d.\n",
375 p->p_pid, l->l_lid, lid));
376
377 KASSERT(mutex_owned(&p->p_smutex));
378
379 /*
380 * We try to check for deadlock:
381 *
382 * 1) If all other LWPs are waiting for exits or suspended.
383 * 2) If we are trying to wait on ourself.
384 *
385 * XXX we'd like to check for a cycle of waiting LWPs (specific LID
386 * waits, not any-LWP waits) and detect that sort of deadlock, but
387 * we don't have a good place to store the lwp that is being waited
388 * for. wchan is already filled with &p->p_nlwps, and putting the
389 * lwp address in there for deadlock tracing would require exiting
390 * LWPs to call wakeup on both their own address and &p->p_nlwps, to
391 * get threads sleeping on any LWP exiting.
392 */
393 if (lid == l->l_lid)
394 return EDEADLK;
395
396 p->p_nlwpwait++;
397
398 for (;;) {
399 /*
400 * Avoid a race between exit1() and sigexit(): if the
401 * process is dumping core, then we need to bail out: call
402 * into lwp_userret() where we will be suspended until the
403 * deed is done.
404 */
405 if ((p->p_sflag & PS_WCORE) != 0) {
406 mutex_exit(&p->p_smutex);
407 lwp_userret(l);
408 #ifdef DIAGNOSTIC
409 panic("lwp_wait1");
410 #endif
411 /* NOTREACHED */
412 }
413
414 /*
415 * First off, drain any detached LWP that is waiting to be
416 * reaped.
417 */
418 while ((l2 = p->p_zomblwp) != NULL) {
419 p->p_zomblwp = NULL;
420 lwp_free(l2, 0, 0); /* releases proc mutex */
421 mutex_enter(&p->p_smutex);
422 }
423
424 /*
425 * Now look for an LWP to collect. If the whole process is
426 * exiting, count detached LWPs as eligible to be collected,
427 * but don't drain them here.
428 */
429 nfound = 0;
430 LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
431 if (l2 == l || (lid != 0 && l2->l_lid != lid))
432 continue;
433 if ((l2->l_prflag & LPR_DETACHED) != 0) {
434 nfound += ((flags & LWPWAIT_EXITCONTROL) != 0);
435 continue;
436 }
437 nfound++;
438
439 /* No need to lock the LWP in order to see LSZOMB. */
440 if (l2->l_stat != LSZOMB)
441 continue;
442
443 if (departed)
444 *departed = l2->l_lid;
445 lwp_free(l2, 0, 0);
446 mutex_enter(&p->p_smutex);
447 p->p_nlwpwait--;
448 return 0;
449 }
450
451 if (nfound == 0) {
452 error = ESRCH;
453 break;
454 }
455 if ((flags & LWPWAIT_EXITCONTROL) != 0) {
456 KASSERT(p->p_nlwps > 1);
457 cv_wait(&p->p_lwpcv, &p->p_smutex);
458 continue;
459 }
460 if ((p->p_sflag & PS_WEXIT) != 0 ||
461 p->p_nrlwps <= p->p_nlwpwait + p->p_ndlwps) {
462 error = EDEADLK;
463 break;
464 }
465 if ((error = cv_wait_sig(&p->p_lwpcv, &p->p_smutex)) != 0)
466 break;
467 }
468
469 p->p_nlwpwait--;
470 return error;
471 }
472
473 /*
474 * Create a new LWP within process 'p2', using LWP 'l1' as a template.
475 * The new LWP is created in state LSIDL and must be set running,
476 * suspended, or stopped by the caller.
477 */
478 int
479 newlwp(struct lwp *l1, struct proc *p2, vaddr_t uaddr, bool inmem,
480 int flags, void *stack, size_t stacksize,
481 void (*func)(void *), void *arg, struct lwp **rnewlwpp)
482 {
483 struct lwp *l2, *isfree;
484 turnstile_t *ts;
485
486 /*
487 * First off, reap any detached LWP waiting to be collected.
488 * We can re-use its LWP structure and turnstile.
489 */
490 isfree = NULL;
491 if (p2->p_zomblwp != NULL) {
492 mutex_enter(&p2->p_smutex);
493 if ((isfree = p2->p_zomblwp) != NULL) {
494 p2->p_zomblwp = NULL;
495 lwp_free(isfree, 1, 0); /* releases proc mutex */
496 } else
497 mutex_exit(&p2->p_smutex);
498 }
499 if (isfree == NULL) {
500 l2 = pool_get(&lwp_pool, PR_WAITOK);
501 memset(l2, 0, sizeof(*l2));
502 l2->l_ts = pool_cache_get(&turnstile_cache, PR_WAITOK);
503 SLIST_INIT(&l2->l_pi_lenders);
504 } else {
505 l2 = isfree;
506 ts = l2->l_ts;
507 KASSERT(l2->l_inheritedprio == MAXPRI);
508 KASSERT(SLIST_EMPTY(&l2->l_pi_lenders));
509 memset(l2, 0, sizeof(*l2));
510 l2->l_ts = ts;
511 }
512
513 l2->l_stat = LSIDL;
514 l2->l_proc = p2;
515 l2->l_refcnt = 1;
516 l2->l_priority = l1->l_priority;
517 l2->l_usrpri = l1->l_usrpri;
518 l2->l_inheritedprio = MAXPRI;
519 l2->l_mutex = &sched_mutex;
520 l2->l_cpu = l1->l_cpu;
521 l2->l_flag = inmem ? LW_INMEM : 0;
522 lwp_initspecific(l2);
523 TAILQ_INIT(&l2->l_selwait);
524
525 if (p2->p_flag & PK_SYSTEM) {
526 /*
527 * Mark it as a system process and not a candidate for
528 * swapping.
529 */
530 l2->l_flag |= LW_SYSTEM;
531 }
532
533 lwp_update_creds(l2);
534 callout_init(&l2->l_tsleep_ch);
535 mutex_init(&l2->l_swaplock, MUTEX_DEFAULT, IPL_NONE);
536 cv_init(&l2->l_sigcv, "sigwait");
537 l2->l_syncobj = &sched_syncobj;
538
539 if (rnewlwpp != NULL)
540 *rnewlwpp = l2;
541
542 l2->l_addr = UAREA_TO_USER(uaddr);
543 KERNEL_LOCK(1, curlwp);
544 uvm_lwp_fork(l1, l2, stack, stacksize, func,
545 (arg != NULL) ? arg : l2);
546 KERNEL_UNLOCK_ONE(curlwp);
547
548 mutex_enter(&p2->p_smutex);
549
550 if ((flags & LWP_DETACHED) != 0) {
551 l2->l_prflag = LPR_DETACHED;
552 p2->p_ndlwps++;
553 } else
554 l2->l_prflag = 0;
555
556 l2->l_sigmask = l1->l_sigmask;
557 CIRCLEQ_INIT(&l2->l_sigpend.sp_info);
558 sigemptyset(&l2->l_sigpend.sp_set);
559
560 p2->p_nlwpid++;
561 if (p2->p_nlwpid == 0)
562 p2->p_nlwpid++;
563 l2->l_lid = p2->p_nlwpid;
564 LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling);
565 p2->p_nlwps++;
566
567 mutex_exit(&p2->p_smutex);
568
569 mutex_enter(&proclist_lock);
570 mutex_enter(&proclist_mutex);
571 LIST_INSERT_HEAD(&alllwp, l2, l_list);
572 mutex_exit(&proclist_mutex);
573 mutex_exit(&proclist_lock);
574
575 SYSCALL_TIME_LWP_INIT(l2);
576
577 if (p2->p_emul->e_lwp_fork)
578 (*p2->p_emul->e_lwp_fork)(l1, l2);
579
580 return (0);
581 }
582
583 /*
584 * Quit the process. This will call cpu_exit, which will call cpu_switch,
585 * so this can only be used meaningfully if you're willing to switch away.
586 * Calling with l!=curlwp would be weird.
587 */
588 void
589 lwp_exit(struct lwp *l)
590 {
591 struct proc *p = l->l_proc;
592 struct lwp *l2;
593
594 DPRINTF(("lwp_exit: %d.%d exiting.\n", p->p_pid, l->l_lid));
595 DPRINTF((" nlwps: %d nzlwps: %d\n", p->p_nlwps, p->p_nzlwps));
596
597 /*
598 * Verify that we hold no locks other than the kernel lock.
599 */
600 #ifdef MULTIPROCESSOR
601 LOCKDEBUG_BARRIER(&kernel_lock, 0);
602 #else
603 LOCKDEBUG_BARRIER(NULL, 0);
604 #endif
605
606 /*
607 * If we are the last live LWP in a process, we need to exit the
608 * entire process. We do so with an exit status of zero, because
609 * it's a "controlled" exit, and because that's what Solaris does.
610 *
611 * We are not quite a zombie yet, but for accounting purposes we
612 * must increment the count of zombies here.
613 *
614 * Note: the last LWP's specificdata will be deleted here.
615 */
616 mutex_enter(&p->p_smutex);
617 if (p->p_nlwps - p->p_nzlwps == 1) {
618 DPRINTF(("lwp_exit: %d.%d calling exit1()\n",
619 p->p_pid, l->l_lid));
620 exit1(l, 0);
621 /* NOTREACHED */
622 }
623 p->p_nzlwps++;
624 mutex_exit(&p->p_smutex);
625
626 if (p->p_emul->e_lwp_exit)
627 (*p->p_emul->e_lwp_exit)(l);
628
629 /* Delete the specificdata while it's still safe to sleep. */
630 specificdata_fini(lwp_specificdata_domain, &l->l_specdataref);
631
632 /*
633 * Release our cached credentials.
634 */
635 kauth_cred_free(l->l_cred);
636
637 /*
638 * While we can still block, mark the LWP as unswappable to
639 * prevent conflicts with the with the swapper.
640 */
641 uvm_lwp_hold(l);
642
643 /*
644 * Remove the LWP from the global list.
645 */
646 mutex_enter(&proclist_lock);
647 mutex_enter(&proclist_mutex);
648 LIST_REMOVE(l, l_list);
649 mutex_exit(&proclist_mutex);
650 mutex_exit(&proclist_lock);
651
652 /*
653 * Get rid of all references to the LWP that others (e.g. procfs)
654 * may have, and mark the LWP as a zombie. If the LWP is detached,
655 * mark it waiting for collection in the proc structure. Note that
656 * before we can do that, we need to free any other dead, deatched
657 * LWP waiting to meet its maker.
658 *
659 * XXXSMP disable preemption.
660 */
661 mutex_enter(&p->p_smutex);
662 lwp_drainrefs(l);
663
664 if ((l->l_prflag & LPR_DETACHED) != 0) {
665 while ((l2 = p->p_zomblwp) != NULL) {
666 p->p_zomblwp = NULL;
667 lwp_free(l2, 0, 0); /* releases proc mutex */
668 mutex_enter(&p->p_smutex);
669 }
670 p->p_zomblwp = l;
671 }
672
673 /*
674 * If we find a pending signal for the process and we have been
675 * asked to check for signals, then we loose: arrange to have
676 * all other LWPs in the process check for signals.
677 */
678 if ((l->l_flag & LW_PENDSIG) != 0 &&
679 firstsig(&p->p_sigpend.sp_set) != 0) {
680 LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
681 lwp_lock(l2);
682 l2->l_flag |= LW_PENDSIG;
683 lwp_unlock(l2);
684 }
685 }
686
687 lwp_lock(l);
688 l->l_stat = LSZOMB;
689 lwp_unlock(l);
690 p->p_nrlwps--;
691 cv_broadcast(&p->p_lwpcv);
692 mutex_exit(&p->p_smutex);
693
694 /*
695 * We can no longer block. At this point, lwp_free() may already
696 * be gunning for us. On a multi-CPU system, we may be off p_lwps.
697 *
698 * Free MD LWP resources.
699 */
700 #ifndef __NO_CPU_LWP_FREE
701 cpu_lwp_free(l, 0);
702 #endif
703 pmap_deactivate(l);
704
705 /*
706 * Release the kernel lock, signal another LWP to collect us,
707 * and switch away into oblivion.
708 */
709 #ifdef notyet
710 /* XXXSMP hold in lwp_userret() */
711 KERNEL_UNLOCK_LAST(l);
712 #else
713 KERNEL_UNLOCK_ALL(l, NULL);
714 #endif
715
716 cpu_exit(l);
717 }
718
719 /*
720 * We are called from cpu_exit() once it is safe to schedule the dead LWP's
721 * resources to be freed (i.e., once we've switched to the idle PCB for the
722 * current CPU).
723 */
724 void
725 lwp_exit2(struct lwp *l)
726 {
727 /* XXXSMP re-enable preemption */
728 }
729
730 /*
731 * Free a dead LWP's remaining resources.
732 *
733 * XXXLWP limits.
734 */
735 void
736 lwp_free(struct lwp *l, int recycle, int last)
737 {
738 struct proc *p = l->l_proc;
739 ksiginfoq_t kq;
740
741 /*
742 * If this was not the last LWP in the process, then adjust
743 * counters and unlock.
744 */
745 if (!last) {
746 /*
747 * Add the LWP's run time to the process' base value.
748 * This needs to co-incide with coming off p_lwps.
749 */
750 timeradd(&l->l_rtime, &p->p_rtime, &p->p_rtime);
751 LIST_REMOVE(l, l_sibling);
752 p->p_nlwps--;
753 p->p_nzlwps--;
754 if ((l->l_prflag & LPR_DETACHED) != 0)
755 p->p_ndlwps--;
756 mutex_exit(&p->p_smutex);
757
758 #ifdef MULTIPROCESSOR
759 /*
760 * In the unlikely event that the LWP is still on the CPU,
761 * then spin until it has switched away. We need to release
762 * all locks to avoid deadlock against interrupt handlers on
763 * the target CPU.
764 */
765 if (l->l_cpu->ci_curlwp == l) {
766 int count;
767 KERNEL_UNLOCK_ALL(curlwp, &count);
768 while (l->l_cpu->ci_curlwp == l)
769 SPINLOCK_BACKOFF_HOOK;
770 KERNEL_LOCK(count, curlwp);
771 }
772 #endif
773 }
774
775 /*
776 * Destroy the LWP's remaining signal information.
777 */
778 ksiginfo_queue_init(&kq);
779 sigclear(&l->l_sigpend, NULL, &kq);
780 ksiginfo_queue_drain(&kq);
781 cv_destroy(&l->l_sigcv);
782 mutex_destroy(&l->l_swaplock);
783
784 /*
785 * Free the LWP's turnstile and the LWP structure itself unless the
786 * caller wants to recycle them.
787 *
788 * We can't return turnstile0 to the pool (it didn't come from it),
789 * so if it comes up just drop it quietly and move on.
790 *
791 * We don't recycle the VM resources at this time.
792 */
793 if (!recycle && l->l_ts != &turnstile0)
794 pool_cache_put(&turnstile_cache, l->l_ts);
795 #ifndef __NO_CPU_LWP_FREE
796 cpu_lwp_free2(l);
797 #endif
798 uvm_lwp_exit(l);
799 KASSERT(SLIST_EMPTY(&l->l_pi_lenders));
800 KASSERT(l->l_inheritedprio == MAXPRI);
801 if (!recycle)
802 pool_put(&lwp_pool, l);
803 }
804
805 /*
806 * Pick a LWP to represent the process for those operations which
807 * want information about a "process" that is actually associated
808 * with a LWP.
809 *
810 * If 'locking' is false, no locking or lock checks are performed.
811 * This is intended for use by DDB.
812 *
813 * We don't bother locking the LWP here, since code that uses this
814 * interface is broken by design and an exact match is not required.
815 */
816 struct lwp *
817 proc_representative_lwp(struct proc *p, int *nrlwps, int locking)
818 {
819 struct lwp *l, *onproc, *running, *sleeping, *stopped, *suspended;
820 struct lwp *signalled;
821 int cnt;
822
823 if (locking) {
824 KASSERT(mutex_owned(&p->p_smutex));
825 }
826
827 /* Trivial case: only one LWP */
828 if (p->p_nlwps == 1) {
829 l = LIST_FIRST(&p->p_lwps);
830 if (nrlwps)
831 *nrlwps = (l->l_stat == LSONPROC || LSRUN);
832 return l;
833 }
834
835 cnt = 0;
836 switch (p->p_stat) {
837 case SSTOP:
838 case SACTIVE:
839 /* Pick the most live LWP */
840 onproc = running = sleeping = stopped = suspended = NULL;
841 signalled = NULL;
842 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
843 if (l->l_lid == p->p_sigctx.ps_lwp)
844 signalled = l;
845 switch (l->l_stat) {
846 case LSONPROC:
847 onproc = l;
848 cnt++;
849 break;
850 case LSRUN:
851 running = l;
852 cnt++;
853 break;
854 case LSSLEEP:
855 sleeping = l;
856 break;
857 case LSSTOP:
858 stopped = l;
859 break;
860 case LSSUSPENDED:
861 suspended = l;
862 break;
863 }
864 }
865 if (nrlwps)
866 *nrlwps = cnt;
867 if (signalled)
868 l = signalled;
869 else if (onproc)
870 l = onproc;
871 else if (running)
872 l = running;
873 else if (sleeping)
874 l = sleeping;
875 else if (stopped)
876 l = stopped;
877 else if (suspended)
878 l = suspended;
879 else
880 break;
881 return l;
882 if (nrlwps)
883 *nrlwps = 0;
884 l = LIST_FIRST(&p->p_lwps);
885 return l;
886 #ifdef DIAGNOSTIC
887 case SIDL:
888 case SZOMB:
889 case SDYING:
890 case SDEAD:
891 if (locking)
892 mutex_exit(&p->p_smutex);
893 /* We have more than one LWP and we're in SIDL?
894 * How'd that happen?
895 */
896 panic("Too many LWPs in idle/dying process %d (%s) stat = %d",
897 p->p_pid, p->p_comm, p->p_stat);
898 break;
899 default:
900 if (locking)
901 mutex_exit(&p->p_smutex);
902 panic("Process %d (%s) in unknown state %d",
903 p->p_pid, p->p_comm, p->p_stat);
904 #endif
905 }
906
907 if (locking)
908 mutex_exit(&p->p_smutex);
909 panic("proc_representative_lwp: couldn't find a lwp for process"
910 " %d (%s)", p->p_pid, p->p_comm);
911 /* NOTREACHED */
912 return NULL;
913 }
914
915 /*
916 * Look up a live LWP within the speicifed process, and return it locked.
917 *
918 * Must be called with p->p_smutex held.
919 */
920 struct lwp *
921 lwp_find(struct proc *p, int id)
922 {
923 struct lwp *l;
924
925 KASSERT(mutex_owned(&p->p_smutex));
926
927 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
928 if (l->l_lid == id)
929 break;
930 }
931
932 /*
933 * No need to lock - all of these conditions will
934 * be visible with the process level mutex held.
935 */
936 if (l != NULL && (l->l_stat == LSIDL || l->l_stat == LSZOMB))
937 l = NULL;
938
939 return l;
940 }
941
942 /*
943 * Update an LWP's cached credentials to mirror the process' master copy.
944 *
945 * This happens early in the syscall path, on user trap, and on LWP
946 * creation. A long-running LWP can also voluntarily choose to update
947 * it's credentials by calling this routine. This may be called from
948 * LWP_CACHE_CREDS(), which checks l->l_cred != p->p_cred beforehand.
949 */
950 void
951 lwp_update_creds(struct lwp *l)
952 {
953 kauth_cred_t oc;
954 struct proc *p;
955
956 p = l->l_proc;
957 oc = l->l_cred;
958
959 mutex_enter(&p->p_mutex);
960 kauth_cred_hold(p->p_cred);
961 l->l_cred = p->p_cred;
962 mutex_exit(&p->p_mutex);
963 if (oc != NULL)
964 kauth_cred_free(oc);
965 }
966
967 /*
968 * Verify that an LWP is locked, and optionally verify that the lock matches
969 * one we specify.
970 */
971 int
972 lwp_locked(struct lwp *l, kmutex_t *mtx)
973 {
974 kmutex_t *cur = l->l_mutex;
975
976 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
977 return mutex_owned(cur) && (mtx == cur || mtx == NULL);
978 #else
979 return mutex_owned(cur);
980 #endif
981 }
982
983 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
984 /*
985 * Lock an LWP.
986 */
987 void
988 lwp_lock_retry(struct lwp *l, kmutex_t *old)
989 {
990
991 /*
992 * XXXgcc ignoring kmutex_t * volatile on i386
993 *
994 * gcc version 4.1.2 20061021 prerelease (NetBSD nb1 20061021)
995 */
996 #if 1
997 while (l->l_mutex != old) {
998 #else
999 for (;;) {
1000 #endif
1001 mutex_spin_exit(old);
1002 old = l->l_mutex;
1003 mutex_spin_enter(old);
1004
1005 /*
1006 * mutex_enter() will have posted a read barrier. Re-test
1007 * l->l_mutex. If it has changed, we need to try again.
1008 */
1009 #if 1
1010 }
1011 #else
1012 } while (__predict_false(l->l_mutex != old));
1013 #endif
1014 }
1015 #endif
1016
1017 /*
1018 * Lend a new mutex to an LWP. The old mutex must be held.
1019 */
1020 void
1021 lwp_setlock(struct lwp *l, kmutex_t *new)
1022 {
1023
1024 KASSERT(mutex_owned(l->l_mutex));
1025
1026 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
1027 mb_write();
1028 l->l_mutex = new;
1029 #else
1030 (void)new;
1031 #endif
1032 }
1033
1034 /*
1035 * Lend a new mutex to an LWP, and release the old mutex. The old mutex
1036 * must be held.
1037 */
1038 void
1039 lwp_unlock_to(struct lwp *l, kmutex_t *new)
1040 {
1041 kmutex_t *old;
1042
1043 KASSERT(mutex_owned(l->l_mutex));
1044
1045 old = l->l_mutex;
1046 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
1047 mb_write();
1048 l->l_mutex = new;
1049 #else
1050 (void)new;
1051 #endif
1052 mutex_spin_exit(old);
1053 }
1054
1055 /*
1056 * Acquire a new mutex, and donate it to an LWP. The LWP must already be
1057 * locked.
1058 */
1059 void
1060 lwp_relock(struct lwp *l, kmutex_t *new)
1061 {
1062 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
1063 kmutex_t *old;
1064 #endif
1065
1066 KASSERT(mutex_owned(l->l_mutex));
1067
1068 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
1069 old = l->l_mutex;
1070 if (old != new) {
1071 mutex_spin_enter(new);
1072 l->l_mutex = new;
1073 mutex_spin_exit(old);
1074 }
1075 #else
1076 (void)new;
1077 #endif
1078 }
1079
1080 int
1081 lwp_trylock(struct lwp *l)
1082 {
1083 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
1084 kmutex_t *old;
1085
1086 for (;;) {
1087 if (!mutex_tryenter(old = l->l_mutex))
1088 return 0;
1089 if (__predict_true(l->l_mutex == old))
1090 return 1;
1091 mutex_spin_exit(old);
1092 }
1093 #else
1094 return mutex_tryenter(l->l_mutex);
1095 #endif
1096 }
1097
1098 /*
1099 * Handle exceptions for mi_userret(). Called if a member of LW_USERRET is
1100 * set.
1101 */
1102 void
1103 lwp_userret(struct lwp *l)
1104 {
1105 struct proc *p;
1106 void (*hook)(void);
1107 int sig;
1108
1109 p = l->l_proc;
1110
1111 /*
1112 * It should be safe to do this read unlocked on a multiprocessor
1113 * system..
1114 */
1115 while ((l->l_flag & LW_USERRET) != 0) {
1116 /*
1117 * Process pending signals first, unless the process
1118 * is dumping core or exiting, where we will instead
1119 * enter the L_WSUSPEND case below.
1120 */
1121 if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) ==
1122 LW_PENDSIG) {
1123 KERNEL_LOCK(1, l); /* XXXSMP pool_put() below */
1124 mutex_enter(&p->p_smutex);
1125 while ((sig = issignal(l)) != 0)
1126 postsig(sig);
1127 mutex_exit(&p->p_smutex);
1128 KERNEL_UNLOCK_LAST(l); /* XXXSMP */
1129 }
1130
1131 /*
1132 * Core-dump or suspend pending.
1133 *
1134 * In case of core dump, suspend ourselves, so that the
1135 * kernel stack and therefore the userland registers saved
1136 * in the trapframe are around for coredump() to write them
1137 * out. We issue a wakeup on p->p_lwpcv so that sigexit()
1138 * will write the core file out once all other LWPs are
1139 * suspended.
1140 */
1141 if ((l->l_flag & LW_WSUSPEND) != 0) {
1142 mutex_enter(&p->p_smutex);
1143 p->p_nrlwps--;
1144 cv_broadcast(&p->p_lwpcv);
1145 lwp_lock(l);
1146 l->l_stat = LSSUSPENDED;
1147 mutex_exit(&p->p_smutex);
1148 mi_switch(l, NULL);
1149 }
1150
1151 /* Process is exiting. */
1152 if ((l->l_flag & LW_WEXIT) != 0) {
1153 KERNEL_LOCK(1, l);
1154 lwp_exit(l);
1155 KASSERT(0);
1156 /* NOTREACHED */
1157 }
1158
1159 /* Call userret hook; used by Linux emulation. */
1160 if ((l->l_flag & LW_WUSERRET) != 0) {
1161 lwp_lock(l);
1162 l->l_flag &= ~LW_WUSERRET;
1163 lwp_unlock(l);
1164 hook = p->p_userret;
1165 p->p_userret = NULL;
1166 (*hook)();
1167 }
1168 }
1169 }
1170
1171 /*
1172 * Force an LWP to enter the kernel, to take a trip through lwp_userret().
1173 */
1174 void
1175 lwp_need_userret(struct lwp *l)
1176 {
1177 KASSERT(lwp_locked(l, NULL));
1178
1179 /*
1180 * Since the tests in lwp_userret() are done unlocked, make sure
1181 * that the condition will be seen before forcing the LWP to enter
1182 * kernel mode.
1183 */
1184 mb_write();
1185 cpu_signotify(l);
1186 }
1187
1188 /*
1189 * Add one reference to an LWP. This will prevent the LWP from
1190 * exiting, thus keep the lwp structure and PCB around to inspect.
1191 */
1192 void
1193 lwp_addref(struct lwp *l)
1194 {
1195
1196 KASSERT(mutex_owned(&l->l_proc->p_smutex));
1197 KASSERT(l->l_stat != LSZOMB);
1198 KASSERT(l->l_refcnt != 0);
1199
1200 l->l_refcnt++;
1201 }
1202
1203 /*
1204 * Remove one reference to an LWP. If this is the last reference,
1205 * then we must finalize the LWP's death.
1206 */
1207 void
1208 lwp_delref(struct lwp *l)
1209 {
1210 struct proc *p = l->l_proc;
1211
1212 mutex_enter(&p->p_smutex);
1213 if (--l->l_refcnt == 0)
1214 cv_broadcast(&p->p_refcv);
1215 mutex_exit(&p->p_smutex);
1216 }
1217
1218 /*
1219 * Drain all references to the current LWP.
1220 */
1221 void
1222 lwp_drainrefs(struct lwp *l)
1223 {
1224 struct proc *p = l->l_proc;
1225
1226 KASSERT(mutex_owned(&p->p_smutex));
1227 KASSERT(l->l_refcnt != 0);
1228
1229 l->l_refcnt--;
1230 while (l->l_refcnt != 0)
1231 cv_wait(&p->p_refcv, &p->p_smutex);
1232 }
1233
1234 /*
1235 * lwp_specific_key_create --
1236 * Create a key for subsystem lwp-specific data.
1237 */
1238 int
1239 lwp_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
1240 {
1241
1242 return (specificdata_key_create(lwp_specificdata_domain, keyp, dtor));
1243 }
1244
1245 /*
1246 * lwp_specific_key_delete --
1247 * Delete a key for subsystem lwp-specific data.
1248 */
1249 void
1250 lwp_specific_key_delete(specificdata_key_t key)
1251 {
1252
1253 specificdata_key_delete(lwp_specificdata_domain, key);
1254 }
1255
1256 /*
1257 * lwp_initspecific --
1258 * Initialize an LWP's specificdata container.
1259 */
1260 void
1261 lwp_initspecific(struct lwp *l)
1262 {
1263 int error;
1264
1265 error = specificdata_init(lwp_specificdata_domain, &l->l_specdataref);
1266 KASSERT(error == 0);
1267 }
1268
1269 /*
1270 * lwp_finispecific --
1271 * Finalize an LWP's specificdata container.
1272 */
1273 void
1274 lwp_finispecific(struct lwp *l)
1275 {
1276
1277 specificdata_fini(lwp_specificdata_domain, &l->l_specdataref);
1278 }
1279
1280 /*
1281 * lwp_getspecific --
1282 * Return lwp-specific data corresponding to the specified key.
1283 *
1284 * Note: LWP specific data is NOT INTERLOCKED. An LWP should access
1285 * only its OWN SPECIFIC DATA. If it is necessary to access another
1286 * LWP's specifc data, care must be taken to ensure that doing so
1287 * would not cause internal data structure inconsistency (i.e. caller
1288 * can guarantee that the target LWP is not inside an lwp_getspecific()
1289 * or lwp_setspecific() call).
1290 */
1291 void *
1292 lwp_getspecific(specificdata_key_t key)
1293 {
1294
1295 return (specificdata_getspecific_unlocked(lwp_specificdata_domain,
1296 &curlwp->l_specdataref, key));
1297 }
1298
1299 void *
1300 _lwp_getspecific_by_lwp(struct lwp *l, specificdata_key_t key)
1301 {
1302
1303 return (specificdata_getspecific_unlocked(lwp_specificdata_domain,
1304 &l->l_specdataref, key));
1305 }
1306
1307 /*
1308 * lwp_setspecific --
1309 * Set lwp-specific data corresponding to the specified key.
1310 */
1311 void
1312 lwp_setspecific(specificdata_key_t key, void *data)
1313 {
1314
1315 specificdata_setspecific(lwp_specificdata_domain,
1316 &curlwp->l_specdataref, key, data);
1317 }
1318