kern_lwp.c revision 1.40.2.5 1 /* $NetBSD: kern_lwp.c,v 1.40.2.5 2006/11/18 21:39:22 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2001, 2006 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Nathan J. Williams, and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Overview
41 *
42 * Lightweight processes (LWPs) are the basic unit (or thread) of
43 * execution within the kernel. The core state of an LWP is described
44 * by "struct lwp".
45 *
46 * Each LWP is contained within a process (described by "struct proc"),
47 * Every process contains at least one LWP, but may contain more. The
48 * process describes attributes shared among all of its LWPs such as a
49 * private address space, global execution state (stopped, active,
50 * zombie, ...), signal disposition and so on. On a multiprocessor
51 * machine, multiple LWPs be executing in kernel simultaneously.
52 *
53 * Note that LWPs differ from kernel threads (kthreads) in that kernel
54 * threads are distinct processes (system processes) with no user space
55 * component, which themselves may contain one or more LWPs.
56 *
57 * Execution states
58 *
59 * At any given time, an LWP has overall state that is described by
60 * lwp::l_stat. The states are broken into two sets below. The first
61 * set is guaranteed to represent the absolute, current state of the
62 * LWP:
63 *
64 * LSONPROC
65 *
66 * On processor: the LWP is executing on a CPU, either in the
67 * kernel or in user space.
68 *
69 * LSRUN
70 *
71 * Runnable: the LWP is parked on a run queue, and may soon be
72 * chosen to run by a idle processor, or by a processor that
73 * has been asked to preempt a currently runnning but lower
74 * priority LWP. If the LWP is not swapped in (L_INMEM == 0)
75 * then the LWP is not on a run queue, but may be soon.
76 *
77 * LSIDL
78 *
79 * Idle: the LWP has been created but has not yet executed.
80 * Whoever created the new LWP can be expected to set it to
81 * another state shortly.
82 *
83 * LSZOMB
84 *
85 * Zombie: the LWP has exited, released all of its resources
86 * and can execute no further. It will persist until 'reaped'
87 * by another LWP or process via the _lwp_wait() or wait()
88 * system calls.
89 *
90 * LSSUSPENDED:
91 *
92 * Suspended: the LWP has had its execution suspended by
93 * another LWP in the same process using the _lwp_suspend()
94 * system call. User-level LWPs also enter the suspended
95 * state when the system is shutting down.
96 *
97 * The second set represent a "statement of intent" on behalf of the
98 * LWP. The LWP may in fact be executing on a processor, may be
99 * sleeping, idle, or on a run queue. It is expected to take the
100 * necessary action to stop executing or become "running" again within
101 * a short timeframe.
102 *
103 * LSDEAD:
104 *
105 * Dead: the LWP has released most of its resources and is
106 * about to switch away into oblivion. When it switches away,
107 * its few remaining resources will be collected and the LWP
108 * will enter the LSZOMB (zombie) state.
109 *
110 * LSSLEEP:
111 *
112 * Sleeping: the LWP has entered itself onto a sleep queue, and
113 * will switch away shortly to allow other LWPs to run on the
114 * CPU.
115 *
116 * LSSTOP:
117 *
118 * Stopped: the LWP has been stopped as a result of a job
119 * control signal, or as a result of the ptrace() interface.
120 * Stopped LWPs may run briefly within the kernel to handle
121 * signals that they receive, but will not return to user space
122 * until their process' state is changed away from stopped.
123 * Single LWPs within a process can not be set stopped
124 * selectively: all actions that can stop or continue LWPs
125 * occur at the process level.
126 *
127 * State transitions
128 *
129 * Note that the LSSTOP and LSSUSPENDED states may only be set
130 * when returning to user space in userret(), or when sleeping
131 * interruptably. Before setting those states, we try to ensure
132 * that the LWPs will release all kernel locks that they hold,
133 * and at a minimum try to ensure that the LWP can be set runnable
134 * again by a signal.
135 *
136 * LWPs may transition states in the following ways:
137 *
138 * IDL -------> SUSPENDED DEAD -------> ZOMBIE
139 * > RUN
140 *
141 * RUN -------> ONPROC ONPROC -----> RUN
142 * > STOPPED > SLEEP
143 * > SUSPENDED > STOPPED
144 * > SUSPENDED
145 * > DEAD
146 *
147 * STOPPED ---> RUN SUSPENDED --> RUN
148 * > SLEEP > SLEEP
149 *
150 * SLEEP -----> ONPROC
151 * > RUN
152 * > STOPPED
153 * > SUSPENDED
154 *
155 * Locking
156 *
157 * The majority of fields in 'struct lwp' are covered by a single,
158 * general spin mutex pointed to by lwp::l_mutex. The locks covering
159 * each field are documented in sys/lwp.h.
160 *
161 * State transitions must be made with the LWP's general lock held. In
162 * a multiprocessor kernel, state transitions may cause the LWP's lock
163 * pointer to change. On uniprocessor kernels, most scheduler and
164 * synchronisation objects such as sleep queues and LWPs are protected
165 * by only one mutex (sched_mutex). In this case, LWPs' lock pointers
166 * will never change and will always reference sched_mutex.
167 *
168 * Manipulation of the general lock is not performed directly, but
169 * through calls to lwp_lock(), lwp_relock() and similar.
170 *
171 * States and their associated locks:
172 *
173 * LSIDL, LSDEAD, LSZOMB
174 *
175 * Always covered by lwp_mutex (the idle mutex).
176 *
177 * LSONPROC, LSRUN:
178 *
179 * Always covered by sched_mutex, which protects the run queues
180 * and other miscellaneous items. If the scheduler is changed
181 * to use per-CPU run queues, this may become a per-CPU mutex.
182 *
183 * LSSLEEP:
184 *
185 * Covered by a mutex associated with the sleep queue that the
186 * LWP resides on, indirectly referenced by l_sleepq->sq_mutex.
187 *
188 * LSSTOP, LSSUSPENDED:
189 *
190 * If the LWP was previously sleeping (l_wchan != NULL), then
191 * l_mutex references the sleep queue mutex. If the LWP was
192 * runnable or on the CPU when halted, or has been removed from
193 * the sleep queue since halted, then the mutex is lwp_mutex.
194 *
195 * The lock order for the various mutexes is as follows:
196 *
197 * sleepq_t::sq_mutex -> lwp_mutex -> sched_mutex
198 *
199 * Each process has an scheduler state mutex (proc::p_smutex), and a
200 * number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and
201 * so on. When an LWP is to be entered into or removed from one of the
202 * following states, p_mutex must be held and the process wide counters
203 * adjusted:
204 *
205 * LSIDL, LSDEAD, LSZOMB, LSSTOP, LSSUSPENDED
206 *
207 * Note that an LWP is considered running or likely to run soon if in
208 * one of the following states. This affects the value of p_nrlwps:
209 *
210 * LSRUN, LSONPROC, LSSLEEP
211 *
212 * p_smutex does not need to be held when transitioning among these
213 * three states.
214 */
215
216 #include <sys/cdefs.h>
217 __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.40.2.5 2006/11/18 21:39:22 ad Exp $");
218
219 #include "opt_multiprocessor.h"
220 #include "opt_lockdebug.h"
221
222 #define _LWP_API_PRIVATE
223
224 #include <sys/param.h>
225 #include <sys/systm.h>
226 #include <sys/pool.h>
227 #include <sys/proc.h>
228 #include <sys/sa.h>
229 #include <sys/syscallargs.h>
230 #include <sys/kauth.h>
231 #include <sys/sleepq.h>
232 #include <sys/lockdebug.h>
233
234 #include <uvm/uvm_extern.h>
235
236 struct lwplist alllwp;
237 kmutex_t alllwp_mutex;
238 kmutex_t lwp_mutex;
239
240 POOL_INIT(lwp_pool, sizeof(struct lwp), 16, 0, 0, "lwppl",
241 &pool_allocator_nointr);
242 POOL_INIT(lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl",
243 &pool_allocator_nointr);
244
245 static specificdata_domain_t lwp_specificdata_domain;
246
247 #define LWP_DEBUG
248
249 #ifdef LWP_DEBUG
250 int lwp_debug = 0;
251 #define DPRINTF(x) if (lwp_debug) printf x
252 #else
253 #define DPRINTF(x)
254 #endif
255
256 void
257 lwpinit(void)
258 {
259
260 lwp_specificdata_domain = specificdata_domain_create();
261 KASSERT(lwp_specificdata_domain != NULL);
262 }
263
264 /*
265 * Set an LWP halted or suspended.
266 *
267 * Must be called with p_smutex held, and the LWP locked. Will unlock the
268 * LWP before return.
269 */
270 int
271 lwp_halt(struct lwp *curl, struct lwp *t, int state)
272 {
273 int error, want;
274
275 LOCK_ASSERT(mutex_owned(&t->l_proc->p_smutex)); /* XXXAD what now? */
276 LOCK_ASSERT(lwp_locked(t, NULL));
277
278 KASSERT(curl != t || curl->l_stat == LSONPROC);
279
280 /*
281 * If the current LWP has been told to exit, we must not suspend anyone
282 * else or deadlock could occur. We won't return to userspace.
283 */
284 if ((curl->l_stat & (L_WEXIT | L_WCORE)) != 0)
285 return (EDEADLK);
286
287 error = 0;
288
289 want = (state == LSSUSPENDED ? L_WSUSPEND : 0);
290
291 switch (t->l_stat) {
292 case LSRUN:
293 case LSONPROC:
294 t->l_flag |= want;
295 signotify(t);
296 break;
297
298 case LSSLEEP:
299 t->l_stat |= want;
300
301 /*
302 * Kick the LWP and try to get it to the kernel boundary
303 * so that it will release any locks that it holds.
304 * setrunnable() will release the lock.
305 */
306 signotify(t);
307 setrunnable(t);
308 return 0;
309
310 case LSSUSPENDED:
311 case LSSTOP:
312 t->l_flag |= want;
313 break;
314
315 case LSIDL:
316 case LSZOMB:
317 case LSDEAD:
318 error = EINTR; /* It's what Solaris does..... */
319 break;
320 }
321
322 lwp_unlock(t);
323
324 return (error);
325 }
326
327 /*
328 * Restart a suspended LWP.
329 *
330 * Must be called with p_smutex held, and the LWP locked. Will unlock the
331 * LWP before return.
332 */
333 void
334 lwp_continue(struct lwp *l)
335 {
336
337 LOCK_ASSERT(mutex_owned(&l->l_proc->p_smutex));
338 LOCK_ASSERT(lwp_locked(l, NULL));
339
340 DPRINTF(("lwp_continue of %d.%d (%s), state %d, wchan %p\n",
341 l->l_proc->p_pid, l->l_lid, l->l_proc->p_comm, l->l_stat,
342 l->l_wchan));
343
344 /* If rebooting or not suspended, then just bail out. */
345 if ((l->l_flag & L_WREBOOT) != 0) {
346 lwp_unlock(l);
347 return;
348 }
349
350 l->l_flag &= ~L_WSUSPEND;
351
352 if (l->l_stat != LSSUSPENDED) {
353 lwp_unlock(l);
354 return;
355 }
356
357 /* setrunnable() will release the lock. */
358 setrunnable(l);
359 }
360
361 /*
362 * Wait for an LWP within the current process to exit. If 'lid' is
363 * non-zero, we are waiting for a specific LWP.
364 *
365 * Must be called with p->p_smutex held.
366 */
367 int
368 lwp_wait1(struct lwp *l, lwpid_t lid, lwpid_t *departed, int flags)
369 {
370 struct proc *p = l->l_proc;
371 struct lwp *l2;
372 int nfound, error, wpri;
373 static const char waitstr1[] = "lwpwait";
374 static const char waitstr2[] = "lwpwait2";
375
376 DPRINTF(("lwp_wait1: %d.%d waiting for %d.\n",
377 p->p_pid, l->l_lid, lid));
378
379 LOCK_ASSERT(mutex_owned(&p->p_smutex));
380
381 /*
382 * Check for deadlock:
383 *
384 * 1) If all other LWPs are waiting for exits or suspended.
385 * 2) If we are trying to wait on ourself.
386 *
387 * XXX we'd like to check for a cycle of waiting LWPs (specific LID
388 * waits, not any-LWP waits) and detect that sort of deadlock, but
389 * we don't have a good place to store the lwp that is being waited
390 * for. wchan is already filled with &p->p_nlwps, and putting the
391 * lwp address in there for deadlock tracing would require exiting
392 * LWPs to call wakeup on both their own address and &p->p_nlwps, to
393 * get threads sleeping on any LWP exiting.
394 */
395 if (lwp_lastlive(p->p_nlwpwait) || lid == l->l_lid)
396 return (EDEADLK);
397
398 p->p_nlwpwait++;
399 wpri = PWAIT;
400 if ((flags & LWPWAIT_EXITCONTROL) == 0)
401 wpri |= PCATCH;
402 loop:
403 nfound = 0;
404 LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
405 if ((l2 == l) || (l2->l_flag & L_DETACHED) ||
406 ((lid != 0) && (lid != l2->l_lid)))
407 continue;
408 nfound++;
409 if (l2->l_stat != LSZOMB)
410 continue;
411
412 if (departed)
413 *departed = l2->l_lid;
414
415 LIST_REMOVE(l2, l_sibling);
416 p->p_nlwps--;
417 p->p_nzlwps--;
418 p->p_nlwpwait--;
419 /* XXX decrement limits */
420 pool_put(&lwp_pool, l2);
421 return (0);
422 }
423
424 if (nfound == 0) {
425 p->p_nlwpwait--;
426 return (ESRCH);
427 }
428
429 if ((error = mtsleep(&p->p_nlwps, wpri,
430 (lid != 0) ? waitstr1 : waitstr2, 0, &p->p_smutex)) != 0)
431 return (error);
432
433 goto loop;
434 }
435
436 /*
437 * Create a new LWP within process 'p2', using LWP 'l1' as a template.
438 * The new LWP is created in state LSIDL and must be set running,
439 * suspended, or stopped by the caller.
440 */
441 int
442 newlwp(struct lwp *l1, struct proc *p2, vaddr_t uaddr, boolean_t inmem,
443 int flags, void *stack, size_t stacksize,
444 void (*func)(void *), void *arg, struct lwp **rnewlwpp)
445 {
446 struct lwp *l2;
447
448 l2 = pool_get(&lwp_pool, PR_WAITOK);
449
450 l2->l_stat = LSIDL;
451 l2->l_forw = l2->l_back = NULL;
452 l2->l_proc = p2;
453 l2->l_refcnt = 1;
454
455 lwp_initspecific(l2);
456
457 memset(&l2->l_startzero, 0,
458 (unsigned) ((caddr_t)&l2->l_endzero -
459 (caddr_t)&l2->l_startzero));
460
461 /* The copy here is unlocked, but is unlikely to pose a problem. */
462 memcpy(&l2->l_startcopy, &l1->l_startcopy,
463 (unsigned) ((caddr_t)&l2->l_endcopy -
464 (caddr_t)&l2->l_startcopy));
465
466 #if !defined(MULTIPROCESSOR)
467 /*
468 * In the single-processor case, all processes will always run
469 * on the same CPU. So, initialize the child's CPU to the parent's
470 * now. In the multiprocessor case, the child's CPU will be
471 * initialized in the low-level context switch code when the
472 * process runs.
473 */
474 KASSERT(l1->l_cpu != NULL);
475 l2->l_cpu = l1->l_cpu;
476 #else
477 /*
478 * Zero child's CPU pointer so we don't get trash.
479 */
480 l2->l_cpu = NULL;
481 #endif /* ! MULTIPROCESSOR */
482
483 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
484 l2->l_mutex = &lwp_mutex;
485 #else
486 l2->l_mutex = &sched_mutex;
487 #endif
488
489 l2->l_flag = inmem ? L_INMEM : 0;
490 l2->l_flag |= (flags & LWP_DETACHED) ? L_DETACHED : 0;
491
492 if (p2->p_flag & P_SYSTEM) {
493 /*
494 * Mark it as a system process and not a candidate for
495 * swapping.
496 */
497 l2->l_flag |= L_SYSTEM | L_INMEM;
498 }
499
500 lwp_update_creds(l2);
501 callout_init(&l2->l_tsleep_ch);
502 l2->l_ts = pool_cache_get(&turnstile_cache, PR_WAITOK);
503 l2->l_syncobj = &sched_syncobj;
504
505 if (rnewlwpp != NULL)
506 *rnewlwpp = l2;
507
508 l2->l_addr = UAREA_TO_USER(uaddr);
509 uvm_lwp_fork(l1, l2, stack, stacksize, func,
510 (arg != NULL) ? arg : l2);
511
512 mutex_enter(&p2->p_smutex);
513
514 if ((p2->p_flag & P_SA) == 0) {
515 l2->l_sigpend = &l2->l_sigstore.ss_pend;
516 l2->l_sigmask = &l2->l_sigstore.ss_mask;
517 l2->l_sigstk = &l2->l_sigstore.ss_stk;
518 l2->l_sigmask = l1->l_sigmask;
519 CIRCLEQ_INIT(&l2->l_sigpend->sp_info);
520 sigemptyset(&l2->l_sigpend->sp_set);
521 } else {
522 l2->l_sigpend = &p2->p_sigstore.ss_pend;
523 l2->l_sigmask = &p2->p_sigstore.ss_mask;
524 l2->l_sigstk = &p2->p_sigstore.ss_stk;
525 }
526
527 l2->l_lid = ++p2->p_nlwpid;
528 LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling);
529 p2->p_nlwps++;
530
531 mutex_exit(&p2->p_smutex);
532
533 mutex_enter(&alllwp_mutex);
534 LIST_INSERT_HEAD(&alllwp, l2, l_list);
535 mutex_exit(&alllwp_mutex);
536
537 if (p2->p_emul->e_lwp_fork)
538 (*p2->p_emul->e_lwp_fork)(l1, l2);
539
540 return (0);
541 }
542
543 /*
544 * Quit the process. This will call cpu_exit, which will call cpu_switch,
545 * so this can only be used meaningfully if you're willing to switch away.
546 * Calling with l!=curlwp would be weird.
547 */
548 int
549 lwp_exit(struct lwp *l, int checksigs)
550 {
551 struct proc *p = l->l_proc;
552
553 DPRINTF(("lwp_exit: %d.%d exiting.\n", p->p_pid, l->l_lid));
554 DPRINTF((" nlwps: %d nzlwps: %d\n", p->p_nlwps, p->p_nzlwps));
555
556 mutex_enter(&p->p_smutex);
557
558 /*
559 * If we've got pending signals that we haven't processed yet, make
560 * sure that we take them before exiting.
561 */
562 if (checksigs && sigispending(l)) {
563 mutex_exit(&p->p_smutex);
564 return ERESTART;
565 }
566
567 if (p->p_emul->e_lwp_exit)
568 (*p->p_emul->e_lwp_exit)(l);
569
570 /*
571 * If we are the last live LWP in a process, we need to exit the
572 * entire process. We do so with an exit status of zero, because
573 * it's a "controlled" exit, and because that's what Solaris does.
574 *
575 * We are not quite a zombie yet, but for accounting purposes we
576 * must increment the count of zombies here.
577 *
578 * Note: the last LWP's specificdata will be deleted here.
579 */
580 p->p_nzlwps++;
581 if (p->p_nlwps - p->p_nzlwps == 0) {
582 DPRINTF(("lwp_exit: %d.%d calling exit1()\n",
583 p->p_pid, l->l_lid));
584 exit1(l, 0);
585 /* NOTREACHED */
586 }
587
588 /* Delete the specificdata while it's still safe to sleep. */
589 specificdata_fini(lwp_specificdata_domain, &l->l_specdataref);
590
591 /*
592 * Release our cached credentials and collate accounting flags.
593 */
594 kauth_cred_free(l->l_cred);
595 mutex_enter(&p->p_mutex);
596 p->p_acflag |= l->l_acflag;
597 mutex_exit(&p->p_mutex);
598
599 lwp_lock(l);
600 if ((l->l_flag & L_DETACHED) != 0) {
601 LIST_REMOVE(l, l_sibling);
602 p->p_nlwps--;
603 curlwp = NULL;
604 l->l_proc = NULL;
605 }
606 l->l_stat = LSDEAD;
607 lwp_unlock_to(l, &lwp_mutex);
608
609 if ((p->p_flag & P_SA) == 0) {
610 /*
611 * Clear any private, pending signals. XXX We may loose
612 * process-wide signals that we didn't want to take.
613 */
614 sigclear(l->l_sigpend, NULL);
615 }
616
617 mutex_exit(&p->p_smutex);
618
619 /*
620 * Remove the LWP from the global list and from the parent process.
621 * Once done, mark it as dead. Nothing should be able to find or
622 * update it past this point.
623 */
624 mutex_enter(&alllwp_mutex);
625 LIST_REMOVE(l, l_list);
626 mutex_exit(&alllwp_mutex);
627
628 /*
629 * Verify that we hold no locks other than the kernel mutex, and
630 * release our turnstile. We should no longer sleep past this
631 * point.
632 */
633 LOCKDEBUG_BARRIER(&kernel_lock, 0);
634 pool_cache_put(&turnstile_cache, l->l_ts);
635
636 /*
637 * Free MD LWP resources
638 */
639 #ifndef __NO_CPU_LWP_FREE
640 cpu_lwp_free(l, 0);
641 #endif
642 pmap_deactivate(l);
643
644 /*
645 * Release the kernel lock, and switch away into oblivion.
646 */
647 (void)KERNEL_UNLOCK(0, l); /* XXXSMP assert count == 1 */
648 cpu_exit(l);
649
650 /* NOTREACHED */
651 return 0;
652 }
653
654 /*
655 * We are called from cpu_exit() once it is safe to schedule the dead LWP's
656 * resources to be freed (i.e., once we've switched to the idle PCB for the
657 * current CPU).
658 *
659 * NOTE: One must be careful with locking in this routine. It's called from
660 * a critical section in machine-dependent code.
661 */
662 void
663 lwp_exit2(struct lwp *l)
664 {
665 struct proc *p;
666 u_int refcnt;
667
668 /*
669 * If someone holds a reference on the LWP, let them clean us up.
670 */
671 lwp_lock(l);
672 refcnt = --l->l_refcnt;
673 lwp_unlock(l);
674 if (refcnt != 0)
675 return;
676
677 KASSERT(l->l_stat == LSDEAD);
678 KERNEL_LOCK(1, NULL);
679
680 /*
681 * Free the VM resources we're still holding on to.
682 */
683 uvm_lwp_exit(l);
684
685 p = l->l_proc;
686
687 if ((l->l_flag & L_DETACHED) != 0) {
688 /*
689 * Nobody waits for detached LWPs.
690 */
691 pool_put(&lwp_pool, l);
692 (void)KERNEL_UNLOCK(1, NULL);
693
694 /*
695 * If this is the last LWP in the process, wake up the
696 * parent so that it can reap us.
697 */
698 mb_read();
699 if (p->p_nlwps == 0) {
700 KASSERT(p->p_stat == SDEAD);
701 p->p_stat = SZOMB;
702 mb_write();
703
704 /* XXXSMP too much locking */
705 mutex_enter(&proclist_mutex);
706 mutex_enter(&proc_stop_mutex);
707 p = p->p_pptr;
708 p->p_nstopchild++;
709 cv_broadcast(&p->p_waitcv);
710 mutex_exit(&proc_stop_mutex);
711 mutex_exit(&proclist_mutex);
712 }
713 } else {
714 (void)KERNEL_UNLOCK(1, NULL);
715 l->l_stat = LSZOMB;
716 mb_write();
717 mutex_enter(&p->p_smutex);
718 wakeup(&p->p_nlwps);
719 mutex_exit(&p->p_smutex);
720 }
721 }
722
723 /*
724 * Pick a LWP to represent the process for those operations which
725 * want information about a "process" that is actually associated
726 * with a LWP.
727 *
728 * Must be called with p->p_smutex held, and will return the LWP locked.
729 * If 'locking' is false, no locking or lock checks are performed. This
730 * is intended for use by DDB.
731 */
732 struct lwp *
733 proc_representative_lwp(struct proc *p, int *nrlwps, int locking)
734 {
735 struct lwp *l, *onproc, *running, *sleeping, *stopped, *suspended;
736 struct lwp *signalled;
737 int cnt;
738
739 if (locking) {
740 LOCK_ASSERT(mutex_owned(&p->p_smutex));
741 }
742
743 /* Trivial case: only one LWP */
744 if (p->p_nlwps == 1) {
745 l = LIST_FIRST(&p->p_lwps);
746 if (nrlwps)
747 *nrlwps = (l->l_stat == LSONPROC || LSRUN);
748 if (locking)
749 lwp_lock(l);
750 return l;
751 }
752
753 cnt = 0;
754 switch (p->p_stat) {
755 case SSTOP:
756 case SACTIVE:
757 /* Pick the most live LWP */
758 onproc = running = sleeping = stopped = suspended = NULL;
759 signalled = NULL;
760 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
761 if (locking)
762 lwp_lock(l);
763 if (l->l_lid == p->p_sigctx.ps_lwp)
764 signalled = l;
765 switch (l->l_stat) {
766 case LSONPROC:
767 onproc = l;
768 cnt++;
769 break;
770 case LSRUN:
771 running = l;
772 cnt++;
773 break;
774 case LSSLEEP:
775 sleeping = l;
776 break;
777 case LSSTOP:
778 stopped = l;
779 break;
780 case LSSUSPENDED:
781 suspended = l;
782 break;
783 }
784 if (locking)
785 lwp_unlock(l);
786 }
787 if (nrlwps)
788 *nrlwps = cnt;
789 if (signalled)
790 l = signalled;
791 else if (onproc)
792 l = onproc;
793 else if (running)
794 l = running;
795 else if (sleeping)
796 l = sleeping;
797 else if (stopped)
798 l = stopped;
799 else if (suspended)
800 l = suspended;
801 else
802 break;
803 if (locking)
804 lwp_lock(l);
805 return l;
806 case SZOMB:
807 /* Doesn't really matter... */
808 if (nrlwps)
809 *nrlwps = 0;
810 l = LIST_FIRST(&p->p_lwps);
811 if (locking)
812 lwp_lock(l);
813 return l;
814 #ifdef DIAGNOSTIC
815 case SIDL:
816 if (locking)
817 mutex_exit(&p->p_smutex);
818 /* We have more than one LWP and we're in SIDL?
819 * How'd that happen?
820 */
821 panic("Too many LWPs in SIDL process %d (%s)",
822 p->p_pid, p->p_comm);
823 default:
824 if (locking)
825 mutex_exit(&p->p_smutex);
826 panic("Process %d (%s) in unknown state %d",
827 p->p_pid, p->p_comm, p->p_stat);
828 #endif
829 }
830
831 if (locking)
832 mutex_exit(&p->p_smutex);
833 panic("proc_representative_lwp: couldn't find a lwp for process"
834 " %d (%s)", p->p_pid, p->p_comm);
835 /* NOTREACHED */
836 return NULL;
837 }
838
839 /*
840 * Look up a live LWP within the speicifed process, and return it locked.
841 *
842 * Must be called with p->p_smutex held.
843 */
844 struct lwp *
845 lwp_byid(struct proc *p, int id)
846 {
847 struct lwp *l;
848
849 LOCK_ASSERT(mutex_owned(&p->p_smutex));
850
851 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
852 if (l->l_lid == id)
853 break;
854 }
855
856 if (l != NULL) {
857 lwp_lock(l);
858 if (l->l_stat == LSIDL || l->l_stat == LSZOMB ||
859 l->l_stat == LSDEAD) {
860 lwp_unlock(l);
861 l = NULL;
862 }
863 }
864
865 return l;
866 }
867
868 /*
869 * Update an LWP's cached credentials to mirror the process' master copy.
870 *
871 * This happens early in the syscall path, on user trap, and on LWP
872 * creation. A long-running LWP can also voluntarily choose to update
873 * it's credentials by calling this routine. This may be called from
874 * LWP_CACHE_CREDS(), which checks l->l_cred != p->p_cred beforehand.
875 */
876 void
877 lwp_update_creds(struct lwp *l)
878 {
879 kauth_cred_t oc;
880 struct proc *p;
881
882 p = l->l_proc;
883 oc = l->l_cred;
884
885 mutex_enter(&p->p_mutex);
886 kauth_cred_hold(p->p_cred);
887 l->l_cred = p->p_cred;
888 mutex_exit(&p->p_mutex);
889 if (oc != NULL)
890 kauth_cred_free(oc);
891 }
892
893 /*
894 * Verify that an LWP is locked, and optionally verify that the lock matches
895 * one we specify.
896 */
897 int
898 lwp_locked(struct lwp *l, kmutex_t *mtx)
899 {
900 kmutex_t *cur = l->l_mutex;
901
902 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
903 return mutex_owned(cur) && (mtx == cur || mtx == NULL);
904 #else
905 return mutex_owned(cur);
906 #endif
907 }
908
909 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
910 /*
911 * Lock an LWP.
912 */
913 void
914 lwp_lock_retry(struct lwp *l, kmutex_t *old)
915 {
916
917 for (;;) {
918 mutex_exit(old);
919 old = l->l_mutex;
920 mutex_enter(old);
921
922 /*
923 * mutex_enter() will have posted a read barrier. Re-test
924 * l->l_mutex. If it has changed, we need to try again.
925 */
926 } while (__predict_false(l->l_mutex != old));
927 }
928 #endif
929
930 /*
931 * Lend a new mutex to an LWP. The old mutex must be held.
932 */
933 void
934 lwp_setlock(struct lwp *l, kmutex_t *new)
935 {
936
937 LOCK_ASSERT(mutex_owned(l->l_mutex));
938
939 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
940 mb_write();
941 l->l_mutex = new;
942 #else
943 (void)new;
944 #endif
945 }
946
947 /*
948 * Lend a new mutex to an LWP, and release the old mutex. The old mutex
949 * must be held.
950 */
951 void
952 lwp_unlock_to(struct lwp *l, kmutex_t *new)
953 {
954 kmutex_t *old;
955
956 LOCK_ASSERT(mutex_owned(l->l_mutex));
957
958 old = l->l_mutex;
959 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
960 mb_write();
961 l->l_mutex = new;
962 #else
963 (void)new;
964 #endif
965 mutex_exit(old);
966 }
967
968 /*
969 * Acquire a new mutex, and dontate it to an LWP. The LWP must already be
970 * locked.
971 */
972 void
973 lwp_relock(struct lwp *l, kmutex_t *new)
974 {
975 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
976 kmutex_t *old;
977 #endif
978
979 LOCK_ASSERT(mutex_owned(l->l_mutex));
980
981 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
982 old = l->l_mutex;
983 if (old != new) {
984 mutex_enter(new);
985 l->l_mutex = new;
986 mutex_exit(old);
987 }
988 #else
989 (void)new;
990 #endif
991 }
992
993 /*
994 * Handle exceptions for mi_userret(). Called if L_USERRET is set.
995 */
996 void
997 lwp_userret(struct lwp *l)
998 {
999 struct proc *p;
1000 int sig;
1001
1002 p = l->l_proc;
1003
1004 do {
1005 /* Process pending signals first. */
1006 if ((l->l_flag & L_PENDSIG) != 0) {
1007 KERNEL_LOCK(1, l); /* XXXSMP pool_put() below */
1008 mutex_enter(&p->p_smutex);
1009 while ((sig = issignal(l)) != 0)
1010 postsig(sig);
1011 mutex_exit(&p->p_smutex);
1012 (void)KERNEL_UNLOCK(0, l); /* XXXSMP */
1013 }
1014
1015 /* Core-dump or suspend pending. */
1016 if ((l->l_flag & L_WSUSPEND) != 0) {
1017 /*
1018 * Suspend ourselves, so that the kernel stack and
1019 * therefore the userland registers saved in the
1020 * trapframe are around for coredump() to write them
1021 * out. We issue a wakeup() on p->p_nrlwps so that
1022 * sigexit() will write the core file out once all
1023 * other LWPs are suspended.
1024 */
1025 mutex_enter(&p->p_smutex);
1026 lwp_lock(l);
1027 lwp_relock(l, &lwp_mutex);
1028 p->p_nrlwps--;
1029 wakeup(&p->p_nrlwps);
1030 l->l_stat = LSSUSPENDED;
1031 mutex_exit(&p->p_smutex);
1032 mi_switch(l, NULL);
1033 lwp_lock(l);
1034 }
1035
1036 /* Process is exiting. */
1037 if ((l->l_flag & L_WEXIT) != 0) {
1038 KERNEL_LOCK(1, l);
1039 (void)lwp_exit(l, 0);
1040 KASSERT(0);
1041 /* NOTREACHED */
1042 }
1043 } while ((l->l_flag & L_USERRET) != 0);
1044 }
1045
1046 /*
1047 * Return non-zero if this the last live LWP in the process. Called when
1048 * exiting, dumping core, waiting for other LWPs to exit, etc. Accepts a
1049 * 'bias' value for deadlock detection.
1050 *
1051 * Must be called with p->p_smutex held.
1052 */
1053 int
1054 lwp_lastlive(int bias)
1055 {
1056 struct lwp *l = curlwp;
1057 struct proc *p = l->l_proc;
1058
1059 LOCK_ASSERT(mutex_owned(&p->p_smutex));
1060 KASSERT(l->l_stat == LSONPROC || l->l_stat == LSSTOP);
1061
1062 return p->p_nrlwps - bias - (l->l_stat == LSONPROC) == 0;
1063 }
1064
1065 /*
1066 * Add one reference to an LWP. This will prevent the LWP from
1067 * transitioning from the LSDEAD state into LSZOMB, and thus keep
1068 * the lwp structure and PCB around to inspect.
1069 */
1070 void
1071 lwp_addref(struct lwp *l)
1072 {
1073
1074 LOCK_ASSERT(lwp_locked(l, NULL));
1075 KASSERT(l->l_stat != LSZOMB);
1076 KASSERT(l->l_refcnt != 0);
1077
1078 l->l_refcnt++;
1079 }
1080
1081 /*
1082 * Remove one reference to an LWP. If this is the last reference,
1083 * then we must finalize the LWP's death.
1084 */
1085 void
1086 lwp_delref(struct lwp *l)
1087 {
1088
1089 lwp_exit2(l);
1090 }
1091
1092 /*
1093 * lwp_specific_key_create --
1094 * Create a key for subsystem lwp-specific data.
1095 */
1096 int
1097 lwp_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
1098 {
1099
1100 return (specificdata_key_create(lwp_specificdata_domain, keyp, dtor));
1101 }
1102
1103 /*
1104 * lwp_specific_key_delete --
1105 * Delete a key for subsystem lwp-specific data.
1106 */
1107 void
1108 lwp_specific_key_delete(specificdata_key_t key)
1109 {
1110
1111 specificdata_key_delete(lwp_specificdata_domain, key);
1112 }
1113
1114 /*
1115 * lwp_initspecific --
1116 * Initialize an LWP's specificdata container.
1117 */
1118 void
1119 lwp_initspecific(struct lwp *l)
1120 {
1121 int error;
1122
1123 error = specificdata_init(lwp_specificdata_domain, &l->l_specdataref);
1124 KASSERT(error == 0);
1125 }
1126
1127 /*
1128 * lwp_finispecific --
1129 * Finalize an LWP's specificdata container.
1130 */
1131 void
1132 lwp_finispecific(struct lwp *l)
1133 {
1134
1135 specificdata_fini(lwp_specificdata_domain, &l->l_specdataref);
1136 }
1137
1138 /*
1139 * lwp_getspecific --
1140 * Return lwp-specific data corresponding to the specified key.
1141 *
1142 * Note: LWP specific data is NOT INTERLOCKED. An LWP should access
1143 * only its OWN SPECIFIC DATA. If it is necessary to access another
1144 * LWP's specifc data, care must be taken to ensure that doing so
1145 * would not cause internal data structure inconsistency (i.e. caller
1146 * can guarantee that the target LWP is not inside an lwp_getspecific()
1147 * or lwp_setspecific() call).
1148 */
1149 void *
1150 lwp_getspecific(specificdata_key_t key)
1151 {
1152
1153 return (specificdata_getspecific_unlocked(lwp_specificdata_domain,
1154 &curlwp->l_specdataref, key));
1155 }
1156
1157 void *
1158 _lwp_getspecific_by_lwp(struct lwp *l, specificdata_key_t key)
1159 {
1160
1161 return (specificdata_getspecific_unlocked(lwp_specificdata_domain,
1162 &l->l_specdataref, key));
1163 }
1164
1165 /*
1166 * lwp_setspecific --
1167 * Set lwp-specific data corresponding to the specified key.
1168 */
1169 void
1170 lwp_setspecific(specificdata_key_t key, void *data)
1171 {
1172
1173 specificdata_setspecific(lwp_specificdata_domain,
1174 &curlwp->l_specdataref, key, data);
1175 }
1176