kern_softint.c revision 1.1.2.4 1 /* $NetBSD: kern_softint.c,v 1.1.2.4 2007/07/07 12:12:40 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Generic software interrupt framework.
41 *
42 * Overview
43 *
44 * The soft interrupt framework provides a mechanism to schedule a
45 * low priority callback that runs with thread context. It allows
46 * for dynamic registration of software interrupts, and for fair
47 * queueing and prioritization of those interrupts. The callbacks
48 * can be scheduled to run from nearly any point in the kernel: by
49 * code running with thread context, by code running from a
50 * hardware interrupt handler, and at any interrupt priority
51 * level.
52 *
53 * Priority levels
54 *
55 * Since soft interrupt dispatch can be tied to the underlying
56 * architecture's interrupt dispatch code, it may be limited by
57 * both by the capabilities of the hardware and the capabilities
58 * of the interrupt dispatch code itself. Therefore the number of
59 * levels is restricted to four. In order of priority (lowest to
60 * highest) the levels are: clock, bio, net, serial.
61 *
62 * The symbolic names are provided only as guide and in isolation
63 * do not have any direct connection with a particular kind of
64 * device activity.
65 *
66 * The four priority levels map directly to scheduler priority
67 * levels, and where the architecture implements 'fast' software
68 * interrupts, they also map onto interrupt priorities. The
69 * interrupt priorities are intended to be hidden from machine
70 * independent code, which should use multiprocessor and
71 * preemption aware mechanisms to synchronize with software
72 * interrupts (for example: mutexes).
73 *
74 * Capabilities
75 *
76 * As with hardware interrupt handlers, software interrupts run
77 * with limited machine context. In particular, they do not
78 * posess any VM (virtual memory) context, and should therefore
79 * not try to operate on user space addresses, or to use virtual
80 * memory facilities other than those noted as interrupt safe.
81 *
82 * Unlike hardware interrupts, software interrupts do have thread
83 * context. They may block on synchronization objects, sleep, and
84 * resume execution at a later time. Since software interrupts
85 * are a limited resource and (typically) run with higher priority
86 * than all other threads in the system, all block-and-resume
87 * activity by a software interrupt must be kept short in order to
88 * allow futher processing at that level to continue. The kernel
89 * does not allow software interrupts to use facilities or perform
90 * actions that may block for a significant amount of time. This
91 * means that it's not valid for a software interrupt to: sleep on
92 * condition variables, use the lockmgr() facility, or wait for
93 * resources to become available (for example, memory).
94 *
95 * Software interrupts may block to await ownership of locks,
96 * which are typically owned only for a short perioid of time:
97 * mutexes and reader/writer locks. By extension, code running in
98 * the bottom half of the kernel must take care to ensure that any
99 * lock that may be taken from a software interrupt can not be
100 * held for more than a short period of time.
101 *
102 * Per-CPU operation
103 *
104 * Soft interrupts are strictly per-CPU. If a soft interrupt is
105 * triggered on a CPU, it will only be dispatched on that CPU.
106 * Each LWP dedicated to handling a soft interrupt is bound to
107 * it's home CPU, so if the LWP blocks and needs to run again, it
108 * can only run there. Nearly all data structures used to manage
109 * software interrupts are per-CPU.
110 *
111 * Soft interrupts can occur many thousands of times per second.
112 * In light of this, the per-CPU requirement is intended to solve
113 * three problems:
114 *
115 * 1) For passing work down from a hardware interrupt handler to a
116 * software interrupt (for example, using a queue) spinlocks need
117 * not be used to guarantee data integrity. Adjusting the CPU
118 * local interrupt priority level is sufficient. Acquiring
119 * spinlocks is computationally expensive, as it increases traffic
120 * on the system bus and can stall processors with long execution
121 * pipelines.
122 *
123 * 2) Often hardware interrupt handlers manipulate data structures
124 * and then pass those to a software interrupt for further
125 * processing. If those data structures are immediately passed to
126 * another CPU, the associated cache lines may be forced across
127 * the system bus, generating more bus traffic.
128 *
129 * 3) The data structures used to manage soft interrupts are also
130 * CPU local, again to reduce unnecessary bus traffic.
131 *
132 * Generic implementation
133 *
134 * A generic, low performance implementation is provided that
135 * works across all architectures, with no machine-dependent
136 * modifications needed. This implementation uses the scheduler,
137 * and so has a number of restrictions:
138 *
139 * 1) Since software interrupts can be triggered from any priority
140 * level, on architectures where the generic implementation is
141 * used IPL_SCHED must be equal to IPL_HIGH.
142 *
143 * 2) The software interrupts are not preemptive, and so must wait
144 * for the currently executing thread to yield the CPU. This
145 * can introduce latency.
146 *
147 * 3) A context switch is required for each soft interrupt to be
148 * handled, which can be quite expensive.
149 *
150 * 'Fast' software interrupts
151 *
152 * XXX
153 *
154 * The !__HAVE_FAST_SOFTINTS case assumes splhigh == splsched.
155 */
156
157 #include <sys/cdefs.h>
158 __KERNEL_RCSID(0, "$NetBSD: kern_softint.c,v 1.1.2.4 2007/07/07 12:12:40 ad Exp $");
159
160 #include <sys/param.h>
161 #include <sys/malloc.h>
162 #include <sys/proc.h>
163 #include <sys/intr.h>
164 #include <sys/mutex.h>
165 #include <sys/kthread.h>
166 #include <sys/evcnt.h>
167 #include <sys/cpu.h>
168
169 #include <net/netisr.h>
170
171 #include <uvm/uvm_extern.h>
172
173 #define PRI_SOFTSERIAL (PRI_COUNT - 1)
174 #define PRI_SOFTNET (PRI_SOFTSERIAL - schedppq * 1)
175 #define PRI_SOFTBIO (PRI_SOFTSERIAL - schedppq * 2)
176 #define PRI_SOFTCLOCK (PRI_SOFTSERIAL - schedppq * 3)
177
178 /* This could overlap with signal info in struct lwp. */
179 typedef struct softint {
180 TAILQ_HEAD(, softhand) si_q;
181 struct lwp *si_lwp;
182 struct cpu_info *si_cpu;
183 uintptr_t si_machdep;
184 struct evcnt si_evcnt;
185 int si_active;
186 char si_name[8];
187 } softint_t;
188
189 typedef struct softhand {
190 TAILQ_ENTRY(softhand) sh_q;
191 void (*sh_func)(void *);
192 void *sh_arg;
193 softint_t *sh_isr;
194 u_int sh_pending;
195 u_int sh_flags;
196 } softhand_t;
197
198 typedef struct softcpu {
199 struct cpu_info *sc_cpu;
200 softint_t sc_int[SOFTINT_COUNT];
201 softhand_t sc_hand[1];
202 } softcpu_t;
203
204 static void softint_thread(void *);
205 static void softint_netisr(void *);
206
207 u_int softint_bytes = 8192;
208 static u_int softint_max;
209 static kmutex_t softint_lock;
210 static void *softint_netisr_sih;
211 struct evcnt softint_block;
212
213 /*
214 * softint_init_isr:
215 *
216 * Initialize a single interrupt level for a single CPU.
217 */
218 static void
219 softint_init_isr(softcpu_t *sc, const char *desc, pri_t pri, u_int level)
220 {
221 struct cpu_info *ci;
222 softint_t *si;
223 int error;
224
225 si = &sc->sc_int[level];
226 ci = sc->sc_cpu;
227 si->si_cpu = ci;
228
229 TAILQ_INIT(&si->si_q);
230
231 error = kthread_create(pri, KTHREAD_MPSAFE | KTHREAD_INTR |
232 KTHREAD_IDLE, ci, softint_thread, si, &si->si_lwp,
233 "soft%s/%d", desc, (int)ci->ci_cpuid);
234 if (error != 0)
235 panic("softint_init_isr: error %d", error);
236
237 snprintf(si->si_name, sizeof(si->si_name), "%s/%d", desc,
238 (int)ci->ci_cpuid);
239 evcnt_attach_dynamic(&si->si_evcnt, EVCNT_TYPE_INTR, NULL,
240 "softint", si->si_name);
241
242 si->si_lwp->l_private = si;
243 softint_init_md(si->si_lwp, level, &si->si_machdep);
244 #ifdef __HAVE_FAST_SOFTINTS
245 si->si_lwp->l_mutex = &ci->ci_schedstate.spc_lwplock;
246 #endif
247 }
248 /*
249 * softint_init:
250 *
251 * Initialize per-CPU data structures. Called from mi_cpu_attach().
252 */
253 void
254 softint_init(struct cpu_info *ci)
255 {
256 static struct cpu_info *first;
257 softcpu_t *sc, *scfirst;
258 softhand_t *sh, *shmax;
259
260 if (first == NULL) {
261 /* Boot CPU. */
262 first = ci;
263 mutex_init(&softint_lock, MUTEX_DEFAULT, IPL_NONE);
264 softint_bytes = round_page(softint_bytes);
265 softint_max = (softint_bytes - sizeof(softcpu_t)) /
266 sizeof(softhand_t);
267 evcnt_attach_dynamic(&softint_block, EVCNT_TYPE_INTR,
268 NULL, "softint", "block");
269 }
270
271 sc = (softcpu_t *)uvm_km_alloc(kernel_map, softint_bytes, 0,
272 UVM_KMF_WIRED | UVM_KMF_ZERO);
273 if (sc == NULL)
274 panic("softint_init_cpu: cannot allocate memory");
275
276 ci->ci_data.cpu_softcpu = sc;
277 sc->sc_cpu = ci;
278
279 softint_init_isr(sc, "net", PRI_SOFTNET, SOFTINT_NET);
280 softint_init_isr(sc, "bio", PRI_SOFTBIO, SOFTINT_BIO);
281 softint_init_isr(sc, "clk", PRI_SOFTCLOCK, SOFTINT_CLOCK);
282 softint_init_isr(sc, "ser", PRI_SOFTSERIAL, SOFTINT_SERIAL);
283
284 if (first != ci) {
285 /* Don't lock -- autoconfiguration will prevent reentry. */
286 scfirst = first->ci_data.cpu_softcpu;
287 sh = sc->sc_hand;
288 memcpy(sh, scfirst->sc_hand, sizeof(*sh) * softint_max);
289
290 /* Update pointers for this CPU. */
291 for (shmax = sh + softint_max; sh < shmax; sh++) {
292 if (sh->sh_func == NULL)
293 continue;
294 sh->sh_isr =
295 &sc->sc_int[sh->sh_flags & SOFTINT_LVLMASK];
296 }
297 } else {
298 /* Establish a handler for legacy net interrupts. */
299 softint_netisr_sih = softint_establish(SOFTINT_NET,
300 softint_netisr, NULL);
301 KASSERT(softint_netisr_sih != NULL);
302 }
303 }
304
305 /*
306 * softint_establish:
307 *
308 * Register a software interrupt handler.
309 */
310 void *
311 softint_establish(u_int flags, void (*func)(void *), void *arg)
312 {
313 CPU_INFO_ITERATOR cii;
314 struct cpu_info *ci;
315 softcpu_t *sc;
316 softhand_t *sh;
317 u_int level, index;
318
319 level = (flags & SOFTINT_LVLMASK);
320 KASSERT(level < SOFTINT_COUNT);
321
322 mutex_enter(&softint_lock);
323
324 /* Find a free slot. */
325 sc = curcpu()->ci_data.cpu_softcpu;
326 for (index = 1; index < softint_max; index++)
327 if (sc->sc_hand[index].sh_func == NULL)
328 break;
329 if (index == softint_max) {
330 mutex_exit(&softint_lock);
331 printf("WARNING: softint_establish: table full, "
332 "increase softint_bytes\n");
333 return NULL;
334 }
335
336 /* Set up the handler on each CPU. */
337 for (CPU_INFO_FOREACH(cii, ci)) {
338 sc = ci->ci_data.cpu_softcpu;
339 sh = &sc->sc_hand[index];
340
341 sh->sh_isr = &sc->sc_int[level];
342 sh->sh_func = func;
343 sh->sh_arg = arg;
344 sh->sh_flags = flags;
345 sh->sh_pending = 0;
346 }
347
348 mutex_exit(&softint_lock);
349
350 return (void *)((uint8_t *)&sc->sc_hand[index] - (uint8_t *)sc);
351 }
352
353 /*
354 * softint_disestablish:
355 *
356 * Unregister a software interrupt handler.
357 */
358 void
359 softint_disestablish(void *arg)
360 {
361 CPU_INFO_ITERATOR cii;
362 struct cpu_info *ci;
363 softcpu_t *sc;
364 softhand_t *sh;
365 uintptr_t offset;
366
367 offset = (uintptr_t)arg;
368 KASSERT(offset != 0 && offset < softint_bytes);
369
370 mutex_enter(&softint_lock);
371
372 /* Set up the handler on each CPU. */
373 for (CPU_INFO_FOREACH(cii, ci)) {
374 sc = ci->ci_data.cpu_softcpu;
375 sh = (softhand_t *)((uint8_t *)sc + offset);
376 KASSERT(sh->sh_func != NULL);
377 KASSERT(sh->sh_pending == 0);
378 sh->sh_func = NULL;
379 }
380
381 mutex_exit(&softint_lock);
382 }
383
384 /*
385 * softint_schedule:
386 *
387 * Trigger a software interrupt. Must be called from a hardware
388 * interrupt handler, or with preemption disabled (since we are
389 * using the value of curcpu()).
390 */
391 void
392 softint_schedule(void *arg)
393 {
394 softhand_t *sh;
395 softint_t *si;
396 uintptr_t offset;
397 int s;
398
399 /* Find the handler record for this CPU. */
400 offset = (uintptr_t)arg;
401 KASSERT(offset != 0 && offset < softint_bytes);
402 sh = (softhand_t *)((uint8_t *)curcpu()->ci_data.cpu_softcpu + offset);
403
404 /* If it's already pending there's nothing to do. */
405 if (sh->sh_pending)
406 return;
407
408 /*
409 * Enqueue the handler into the LWP's pending list.
410 * If the LWP is completely idle, then make it run.
411 */
412 s = splhigh();
413 if (!sh->sh_pending) {
414 si = sh->sh_isr;
415 sh->sh_pending = 1;
416 TAILQ_INSERT_TAIL(&si->si_q, sh, sh_q);
417 if (si->si_active == 0) {
418 si->si_active = 1;
419 softint_trigger(si->si_machdep);
420 }
421 }
422 splx(s);
423 }
424
425 /*
426 * softint_execute:
427 *
428 * Invoke handlers for the specified soft interrupt.
429 * Must be entered at splhigh. Will drop the priority
430 * to the level specified, but returns back at splhigh.
431 */
432 static inline void
433 softint_execute(softint_t *si, lwp_t *l, int s)
434 {
435 softhand_t *sh;
436 lwp_t *l2;
437
438 KASSERT(si->si_lwp == curlwp);
439 KASSERT(si->si_cpu == curcpu());
440 KASSERT(si->si_lwp->l_wchan == NULL);
441 KASSERT(!TAILQ_EMPTY(&si->si_q));
442 KASSERT(si->si_active);
443
444 while (!TAILQ_EMPTY(&si->si_q)) {
445 /*
446 * If any interrupted LWP has higher priority then we
447 * must yield immediatley. Note that IPL_HIGH may be
448 * above IPL_SCHED, so we have to drop the interrupt
449 * priority level before yielding.
450 *
451 * XXXAD Optimise this away.
452 */
453 for (l2 = l->l_switchto; l2 != NULL; l2 = l2->l_switchto) {
454 if (lwp_eprio(l2) > l->l_priority)
455 break;
456 }
457 if (l2 != NULL) {
458 splx(s);
459 yield();
460 (void)splhigh();
461 continue;
462 }
463
464 /*
465 * Pick the longest waiting handler to run. We block
466 * interrupts but do not lock in order to do this, as
467 * we are protecting against the local CPU only.
468 */
469 sh = TAILQ_FIRST(&si->si_q);
470 TAILQ_REMOVE(&si->si_q, sh, sh_q);
471 sh->sh_pending = 0;
472 splx(s);
473
474 /* Run the handler. */
475 if ((sh->sh_flags & SOFTINT_MPSAFE) == 0) {
476 KERNEL_LOCK(1, l);
477 }
478 (*sh->sh_func)(sh->sh_arg);
479 if ((sh->sh_flags & SOFTINT_MPSAFE) == 0) {
480 KERNEL_UNLOCK_ONE(l);
481 }
482
483 (void)splhigh();
484 }
485
486 /*
487 * Unlocked, but only for statistics.
488 * Should be per-CPU to prevent cache ping-pong.
489 */
490 uvmexp.softs++;
491
492 si->si_evcnt.ev_count++;
493 si->si_active = 0;
494 }
495
496 /*
497 * schednetisr:
498 *
499 * Trigger a legacy network interrupt. XXX Needs to go away.
500 */
501 void
502 schednetisr(int isr)
503 {
504 int s;
505
506 s = splhigh();
507 curcpu()->ci_data.cpu_netisrs |= (1 << isr);
508 softint_schedule(softint_netisr_sih);
509 splx(s);
510 }
511
512 /*
513 * softintr_netisr:
514 *
515 * Dispatch legacy network interrupts. XXX Needs to go away.
516 */
517 static void
518 softint_netisr(void *cookie)
519 {
520 struct cpu_info *ci;
521 int s, bits;
522
523 ci = curcpu();
524
525 s = splhigh();
526 bits = ci->ci_data.cpu_netisrs;
527 ci->ci_data.cpu_netisrs = 0;
528 splx(s);
529
530 #define DONETISR(which, func) \
531 do { \
532 void func(void); \
533 if ((bits & (1 << which)) != 0) \
534 func(); \
535 } while(0);
536 #include <net/netisr_dispatch.h>
537 #undef DONETISR
538 }
539
540 #ifndef __HAVE_FAST_SOFTINTS
541
542 /*
543 * softint_init_md:
544 *
545 * Perform machine-dependent initialization. Arguments:
546 *
547 * l
548 *
549 * LWP to handle the interrupt
550 *
551 * level
552 *
553 * Symbolic level: SOFTINT_*
554 *
555 * machdep
556 *
557 * Private value for machine dependent code,
558 * passed by MI code to softint_trigger().
559 */
560 void
561 softint_init_md(lwp_t *l, u_int level, uintptr_t *machdep)
562 {
563 softint_t *si;
564
565 *machdep = (lwp_t)l;
566 si = l->l_private;
567
568 lwp_lock(l);
569 /* Cheat and make the KASSERT in softint_thread() happy. */
570 si->si_active = 1;
571 l->l_stat = LSRUN;
572 sched_enqueue(l, false);
573 lwp_unlock(l);
574 }
575
576 /*
577 * softint_trigger:
578 *
579 * Cause a soft interrupt handler to begin executing.
580 */
581 void
582 softint_trigger(uintptr_t machdep)
583 {
584 struct cpu_info *ci;
585 lwp_t *l;
586
587 l = (lwp_t *)machdep;
588 ci = l->l_cpu;
589
590 spc_lock(ci);
591 l->l_mutex = ci->ci_schedstate.spc_mutex;
592 l->l_stat = LSRUN;
593 sched_enqueue(l, false);
594 cpu_need_resched(ci, 1);
595 spc_unlock(ci);
596 }
597
598 /*
599 * softint_thread:
600 *
601 * MI software interrupt dispatch. In the __HAVE_FAST_SOFTINTS
602 * case, the LWP is switched to without restoring any state, so
603 * we should not arrive here - there is a direct handoff between
604 * the interrupt stub and softint_execute().
605 */
606 void
607 softint_thread(void *cookie)
608 {
609 softint_t *si;
610 lwp_t *l;
611 int s;
612
613 l = curlwp;
614 si = l->l_private;
615 s = splhigh();
616
617 for (;;) {
618 softint_execute(si, l, s);
619
620 lwp_lock(l);
621 l->l_stat = LSIDL;
622 mi_switch(l);
623 }
624 }
625
626 #else /* !__HAVE_FAST_SOFTINTS */
627
628 /*
629 * softint_thread:
630 *
631 * In the __HAVE_FAST_SOFTINTS case, the LWP is switched to without
632 * restoring any state, so we should not arrive here - there is a
633 * direct handoff between the interrupt stub and softint_dispatch().
634 */
635 void
636 softint_thread(void *cookie)
637 {
638
639 panic("softint_thread");
640 }
641
642 /*
643 * softint_dispatch:
644 *
645 * Entry point from machine-dependent code.
646 */
647 void
648 softint_dispatch(lwp_t *pinned, int s)
649 {
650 softint_t *si;
651 lwp_t *l;
652
653 l = curlwp;
654 si = l->l_private;
655
656 /*
657 * Note the interrupted LWP, and mark the current LWP as running
658 * before proceeding. Although this must as a rule be done with
659 * the LWP locked, at this point no external agents will want to
660 * modify the interrupt LWP's state.
661 */
662 l->l_switchto = pinned;
663 l->l_stat = LSONPROC;
664 l->l_flag |= LW_RUNNING;
665
666 softint_execute(si, l, s);
667
668 /*
669 * If we blocked while handling the interrupt, the LWP underneath
670 * will be gone so switch to the idle LWP. It will select a new
671 * LWP to run.
672 *
673 * We must drop the priority level to IPL_SCHED here as switching
674 * at IPL_HIGH could deadlock the system. We have already set
675 * si->si_active = 0 (which means another interrupt at this level
676 * can be triggered). That's not be a problem: we are lowering
677 * to level 's' which will prevent softint_execute() from being
678 * reentered, at a minimum until the priority is finally dropped
679 * to IPL_NONE on entry to the idle loop.
680 */
681 l->l_stat = LSIDL;
682 if (l->l_switchto == NULL) {
683 splx(s);
684 lwp_exit_switchaway(l);
685 /* NOTREACHED */
686 }
687 l->l_switchto = NULL;
688 l->l_flag &= ~LW_RUNNING;
689 }
690
691 #endif /* !__HAVE_FAST_SOFTINTS */
692