kern_softint.c revision 1.1.2.6 1 /* $NetBSD: kern_softint.c,v 1.1.2.6 2007/07/14 22:09:44 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Generic software interrupt framework.
41 *
42 * Overview
43 *
44 * The soft interrupt framework provides a mechanism to schedule a
45 * low priority callback that runs with thread context. It allows
46 * for dynamic registration of software interrupts, and for fair
47 * queueing and prioritization of those interrupts. The callbacks
48 * can be scheduled to run from nearly any point in the kernel: by
49 * code running with thread context, by code running from a
50 * hardware interrupt handler, and at any interrupt priority
51 * level.
52 *
53 * Priority levels
54 *
55 * Since soft interrupt dispatch can be tied to the underlying
56 * architecture's interrupt dispatch code, it may be limited by
57 * both by the capabilities of the hardware and the capabilities
58 * of the interrupt dispatch code itself. Therefore the number of
59 * levels is restricted to four. In order of priority (lowest to
60 * highest) the levels are: clock, bio, net, serial.
61 *
62 * The symbolic names are provided only as guide and in isolation
63 * do not have any direct connection with a particular kind of
64 * device activity.
65 *
66 * The four priority levels map directly to scheduler priority
67 * levels, and where the architecture implements 'fast' software
68 * interrupts, they also map onto interrupt priorities. The
69 * interrupt priorities are intended to be hidden from machine
70 * independent code, which should use multiprocessor and
71 * preemption aware mechanisms to synchronize with software
72 * interrupts (for example: mutexes).
73 *
74 * Capabilities
75 *
76 * As with hardware interrupt handlers, software interrupts run
77 * with limited machine context. In particular, they do not
78 * posess any VM (virtual memory) context, and should therefore
79 * not try to operate on user space addresses, or to use virtual
80 * memory facilities other than those noted as interrupt safe.
81 *
82 * Unlike hardware interrupts, software interrupts do have thread
83 * context. They may block on synchronization objects, sleep, and
84 * resume execution at a later time. Since software interrupts
85 * are a limited resource and (typically) run with higher priority
86 * than all other threads in the system, all block-and-resume
87 * activity by a software interrupt must be kept short in order to
88 * allow futher processing at that level to continue. The kernel
89 * does not allow software interrupts to use facilities or perform
90 * actions that may block for a significant amount of time. This
91 * means that it's not valid for a software interrupt to: sleep on
92 * condition variables, use the lockmgr() facility, or wait for
93 * resources to become available (for example, memory).
94 *
95 * Software interrupts may block to await ownership of locks,
96 * which are typically owned only for a short perioid of time:
97 * mutexes and reader/writer locks. By extension, code running in
98 * the bottom half of the kernel must take care to ensure that any
99 * lock that may be taken from a software interrupt can not be
100 * held for more than a short period of time.
101 *
102 * Per-CPU operation
103 *
104 * Soft interrupts are strictly per-CPU. If a soft interrupt is
105 * triggered on a CPU, it will only be dispatched on that CPU.
106 * Each LWP dedicated to handling a soft interrupt is bound to
107 * it's home CPU, so if the LWP blocks and needs to run again, it
108 * can only run there. Nearly all data structures used to manage
109 * software interrupts are per-CPU.
110 *
111 * Soft interrupts can occur many thousands of times per second.
112 * In light of this, the per-CPU requirement is intended to solve
113 * three problems:
114 *
115 * 1) For passing work down from a hardware interrupt handler to a
116 * software interrupt (for example, using a queue) spinlocks need
117 * not be used to guarantee data integrity. Adjusting the CPU
118 * local interrupt priority level is sufficient. Acquiring
119 * spinlocks is computationally expensive, as it increases traffic
120 * on the system bus and can stall processors with long execution
121 * pipelines.
122 *
123 * 2) Often hardware interrupt handlers manipulate data structures
124 * and then pass those to a software interrupt for further
125 * processing. If those data structures are immediately passed to
126 * another CPU, the associated cache lines may be forced across
127 * the system bus, generating more bus traffic.
128 *
129 * 3) The data structures used to manage soft interrupts are also
130 * CPU local, again to reduce unnecessary bus traffic.
131 *
132 * Generic implementation
133 *
134 * A generic, low performance implementation is provided that
135 * works across all architectures, with no machine-dependent
136 * modifications needed. This implementation uses the scheduler,
137 * and so has a number of restrictions:
138 *
139 * 1) Since software interrupts can be triggered from any priority
140 * level, on architectures where the generic implementation is
141 * used IPL_SCHED must be equal to IPL_HIGH.
142 *
143 * 2) The software interrupts are not preemptive, and so must wait
144 * for the currently executing thread to yield the CPU. This
145 * can introduce latency.
146 *
147 * 3) A context switch is required for each soft interrupt to be
148 * handled, which can be quite expensive.
149 *
150 * 'Fast' software interrupts
151 *
152 * XXX
153 *
154 * The !__HAVE_FAST_SOFTINTS case assumes splhigh == splsched.
155 */
156
157 #include <sys/cdefs.h>
158 __KERNEL_RCSID(0, "$NetBSD: kern_softint.c,v 1.1.2.6 2007/07/14 22:09:44 ad Exp $");
159
160 #include <sys/param.h>
161 #include <sys/malloc.h>
162 #include <sys/proc.h>
163 #include <sys/intr.h>
164 #include <sys/mutex.h>
165 #include <sys/kthread.h>
166 #include <sys/evcnt.h>
167 #include <sys/cpu.h>
168
169 #include <net/netisr.h>
170
171 #include <uvm/uvm_extern.h>
172
173 #define PRI_SOFTSERIAL (PRI_COUNT - 1)
174 #define PRI_SOFTNET (PRI_SOFTSERIAL - schedppq * 1)
175 #define PRI_SOFTBIO (PRI_SOFTSERIAL - schedppq * 2)
176 #define PRI_SOFTCLOCK (PRI_SOFTSERIAL - schedppq * 3)
177
178 /* This could overlap with signal info in struct lwp. */
179 typedef struct softint {
180 TAILQ_HEAD(, softhand) si_q;
181 struct lwp *si_lwp;
182 struct cpu_info *si_cpu;
183 uintptr_t si_machdep;
184 struct evcnt si_evcnt;
185 int si_active;
186 char si_name[8];
187 } softint_t;
188
189 typedef struct softhand {
190 TAILQ_ENTRY(softhand) sh_q;
191 void (*sh_func)(void *);
192 void *sh_arg;
193 softint_t *sh_isr;
194 u_int sh_pending;
195 u_int sh_flags;
196 } softhand_t;
197
198 typedef struct softcpu {
199 struct cpu_info *sc_cpu;
200 softint_t sc_int[SOFTINT_COUNT];
201 softhand_t sc_hand[1];
202 } softcpu_t;
203
204 static void softint_thread(void *);
205 static void softint_netisr(void *);
206
207 u_int softint_bytes = 8192;
208 u_int softint_timing;
209 static u_int softint_max;
210 static kmutex_t softint_lock;
211 static void *softint_netisr_sih;
212 struct evcnt softint_block;
213
214 /*
215 * softint_init_isr:
216 *
217 * Initialize a single interrupt level for a single CPU.
218 */
219 static void
220 softint_init_isr(softcpu_t *sc, const char *desc, pri_t pri, u_int level)
221 {
222 struct cpu_info *ci;
223 softint_t *si;
224 int error;
225
226 si = &sc->sc_int[level];
227 ci = sc->sc_cpu;
228 si->si_cpu = ci;
229
230 TAILQ_INIT(&si->si_q);
231
232 error = kthread_create(pri, KTHREAD_MPSAFE | KTHREAD_INTR |
233 KTHREAD_IDLE, ci, softint_thread, si, &si->si_lwp,
234 "soft%s/%d", desc, (int)ci->ci_cpuid);
235 if (error != 0)
236 panic("softint_init_isr: error %d", error);
237
238 snprintf(si->si_name, sizeof(si->si_name), "%s/%d", desc,
239 (int)ci->ci_cpuid);
240 evcnt_attach_dynamic(&si->si_evcnt, EVCNT_TYPE_INTR, NULL,
241 "softint", si->si_name);
242
243 si->si_lwp->l_private = si;
244 softint_init_md(si->si_lwp, level, &si->si_machdep);
245 #ifdef __HAVE_FAST_SOFTINTS
246 si->si_lwp->l_mutex = &ci->ci_schedstate.spc_lwplock;
247 #endif
248 }
249 /*
250 * softint_init:
251 *
252 * Initialize per-CPU data structures. Called from mi_cpu_attach().
253 */
254 void
255 softint_init(struct cpu_info *ci)
256 {
257 static struct cpu_info *first;
258 softcpu_t *sc, *scfirst;
259 softhand_t *sh, *shmax;
260
261 if (first == NULL) {
262 /* Boot CPU. */
263 first = ci;
264 mutex_init(&softint_lock, MUTEX_DEFAULT, IPL_NONE);
265 softint_bytes = round_page(softint_bytes);
266 softint_max = (softint_bytes - sizeof(softcpu_t)) /
267 sizeof(softhand_t);
268 evcnt_attach_dynamic(&softint_block, EVCNT_TYPE_INTR,
269 NULL, "softint", "block");
270 }
271
272 sc = (softcpu_t *)uvm_km_alloc(kernel_map, softint_bytes, 0,
273 UVM_KMF_WIRED | UVM_KMF_ZERO);
274 if (sc == NULL)
275 panic("softint_init_cpu: cannot allocate memory");
276
277 ci->ci_data.cpu_softcpu = sc;
278 sc->sc_cpu = ci;
279
280 softint_init_isr(sc, "net", PRI_SOFTNET, SOFTINT_NET);
281 softint_init_isr(sc, "bio", PRI_SOFTBIO, SOFTINT_BIO);
282 softint_init_isr(sc, "clk", PRI_SOFTCLOCK, SOFTINT_CLOCK);
283 softint_init_isr(sc, "ser", PRI_SOFTSERIAL, SOFTINT_SERIAL);
284
285 if (first != ci) {
286 /* Don't lock -- autoconfiguration will prevent reentry. */
287 scfirst = first->ci_data.cpu_softcpu;
288 sh = sc->sc_hand;
289 memcpy(sh, scfirst->sc_hand, sizeof(*sh) * softint_max);
290
291 /* Update pointers for this CPU. */
292 for (shmax = sh + softint_max; sh < shmax; sh++) {
293 if (sh->sh_func == NULL)
294 continue;
295 sh->sh_isr =
296 &sc->sc_int[sh->sh_flags & SOFTINT_LVLMASK];
297 }
298 } else {
299 /* Establish a handler for legacy net interrupts. */
300 softint_netisr_sih = softint_establish(SOFTINT_NET,
301 softint_netisr, NULL);
302 KASSERT(softint_netisr_sih != NULL);
303 }
304 }
305
306 /*
307 * softint_establish:
308 *
309 * Register a software interrupt handler.
310 */
311 void *
312 softint_establish(u_int flags, void (*func)(void *), void *arg)
313 {
314 CPU_INFO_ITERATOR cii;
315 struct cpu_info *ci;
316 softcpu_t *sc;
317 softhand_t *sh;
318 u_int level, index;
319
320 level = (flags & SOFTINT_LVLMASK);
321 KASSERT(level < SOFTINT_COUNT);
322
323 mutex_enter(&softint_lock);
324
325 /* Find a free slot. */
326 sc = curcpu()->ci_data.cpu_softcpu;
327 for (index = 1; index < softint_max; index++)
328 if (sc->sc_hand[index].sh_func == NULL)
329 break;
330 if (index == softint_max) {
331 mutex_exit(&softint_lock);
332 printf("WARNING: softint_establish: table full, "
333 "increase softint_bytes\n");
334 return NULL;
335 }
336
337 /* Set up the handler on each CPU. */
338 for (CPU_INFO_FOREACH(cii, ci)) {
339 sc = ci->ci_data.cpu_softcpu;
340 sh = &sc->sc_hand[index];
341
342 sh->sh_isr = &sc->sc_int[level];
343 sh->sh_func = func;
344 sh->sh_arg = arg;
345 sh->sh_flags = flags;
346 sh->sh_pending = 0;
347 }
348
349 mutex_exit(&softint_lock);
350
351 return (void *)((uint8_t *)&sc->sc_hand[index] - (uint8_t *)sc);
352 }
353
354 /*
355 * softint_disestablish:
356 *
357 * Unregister a software interrupt handler.
358 */
359 void
360 softint_disestablish(void *arg)
361 {
362 CPU_INFO_ITERATOR cii;
363 struct cpu_info *ci;
364 softcpu_t *sc;
365 softhand_t *sh;
366 uintptr_t offset;
367
368 offset = (uintptr_t)arg;
369 KASSERT(offset != 0 && offset < softint_bytes);
370
371 mutex_enter(&softint_lock);
372
373 /* Set up the handler on each CPU. */
374 for (CPU_INFO_FOREACH(cii, ci)) {
375 sc = ci->ci_data.cpu_softcpu;
376 sh = (softhand_t *)((uint8_t *)sc + offset);
377 KASSERT(sh->sh_func != NULL);
378 KASSERT(sh->sh_pending == 0);
379 sh->sh_func = NULL;
380 }
381
382 mutex_exit(&softint_lock);
383 }
384
385 /*
386 * softint_schedule:
387 *
388 * Trigger a software interrupt. Must be called from a hardware
389 * interrupt handler, or with preemption disabled (since we are
390 * using the value of curcpu()).
391 */
392 void
393 softint_schedule(void *arg)
394 {
395 softhand_t *sh;
396 softint_t *si;
397 uintptr_t offset;
398 int s;
399
400 /* Find the handler record for this CPU. */
401 offset = (uintptr_t)arg;
402 KASSERT(offset != 0 && offset < softint_bytes);
403 sh = (softhand_t *)((uint8_t *)curcpu()->ci_data.cpu_softcpu + offset);
404
405 /* If it's already pending there's nothing to do. */
406 if (sh->sh_pending)
407 return;
408
409 /*
410 * Enqueue the handler into the LWP's pending list.
411 * If the LWP is completely idle, then make it run.
412 */
413 s = splhigh();
414 if (!sh->sh_pending) {
415 si = sh->sh_isr;
416 sh->sh_pending = 1;
417 TAILQ_INSERT_TAIL(&si->si_q, sh, sh_q);
418 if (si->si_active == 0) {
419 si->si_active = 1;
420 softint_trigger(si->si_machdep);
421 }
422 }
423 splx(s);
424 }
425
426 /*
427 * softint_execute:
428 *
429 * Invoke handlers for the specified soft interrupt.
430 * Must be entered at splhigh. Will drop the priority
431 * to the level specified, but returns back at splhigh.
432 */
433 static inline void
434 softint_execute(softint_t *si, lwp_t *l, int s)
435 {
436 softhand_t *sh;
437 lwp_t *l2;
438
439 KASSERT(si->si_lwp == curlwp);
440 KASSERT(si->si_cpu == curcpu());
441 KASSERT(si->si_lwp->l_wchan == NULL);
442 KASSERT(!TAILQ_EMPTY(&si->si_q));
443 KASSERT(si->si_active);
444
445 while (!TAILQ_EMPTY(&si->si_q)) {
446 /*
447 * If any interrupted LWP has higher priority then we
448 * must yield immediatley. Note that IPL_HIGH may be
449 * above IPL_SCHED, so we have to drop the interrupt
450 * priority level before yielding.
451 *
452 * XXXAD Optimise this away.
453 */
454 for (l2 = l->l_switchto; l2 != NULL; l2 = l2->l_switchto) {
455 if (lwp_eprio(l2) > l->l_priority)
456 break;
457 }
458 if (l2 != NULL) {
459 splx(s);
460 yield();
461 (void)splhigh();
462 continue;
463 }
464
465 /*
466 * Pick the longest waiting handler to run. We block
467 * interrupts but do not lock in order to do this, as
468 * we are protecting against the local CPU only.
469 */
470 sh = TAILQ_FIRST(&si->si_q);
471 TAILQ_REMOVE(&si->si_q, sh, sh_q);
472 sh->sh_pending = 0;
473 splx(s);
474
475 /* Run the handler. */
476 if ((sh->sh_flags & SOFTINT_MPSAFE) == 0) {
477 KERNEL_LOCK(1, l);
478 }
479 (*sh->sh_func)(sh->sh_arg);
480 if ((sh->sh_flags & SOFTINT_MPSAFE) == 0) {
481 KERNEL_UNLOCK_ONE(l);
482 }
483
484 (void)splhigh();
485 }
486
487 /*
488 * Unlocked, but only for statistics.
489 * Should be per-CPU to prevent cache ping-pong.
490 */
491 uvmexp.softs++;
492
493 si->si_evcnt.ev_count++;
494 si->si_active = 0;
495 }
496
497 /*
498 * schednetisr:
499 *
500 * Trigger a legacy network interrupt. XXX Needs to go away.
501 */
502 void
503 schednetisr(int isr)
504 {
505 int s;
506
507 s = splhigh();
508 curcpu()->ci_data.cpu_netisrs |= (1 << isr);
509 softint_schedule(softint_netisr_sih);
510 splx(s);
511 }
512
513 /*
514 * softintr_netisr:
515 *
516 * Dispatch legacy network interrupts. XXX Needs to go away.
517 */
518 static void
519 softint_netisr(void *cookie)
520 {
521 struct cpu_info *ci;
522 int s, bits;
523
524 ci = curcpu();
525
526 s = splhigh();
527 bits = ci->ci_data.cpu_netisrs;
528 ci->ci_data.cpu_netisrs = 0;
529 splx(s);
530
531 #define DONETISR(which, func) \
532 do { \
533 void func(void); \
534 if ((bits & (1 << which)) != 0) \
535 func(); \
536 } while(0);
537 #include <net/netisr_dispatch.h>
538 #undef DONETISR
539 }
540
541 #ifndef __HAVE_FAST_SOFTINTS
542
543 /*
544 * softint_init_md:
545 *
546 * Perform machine-dependent initialization.
547 */
548 void
549 softint_init_md(lwp_t *l, u_int level, uintptr_t *machdep)
550 {
551 softint_t *si;
552
553 *machdep = (lwp_t)l;
554 si = l->l_private;
555
556 lwp_lock(l);
557 /* Cheat and make the KASSERT in softint_thread() happy. */
558 si->si_active = 1;
559 l->l_stat = LSRUN;
560 sched_enqueue(l, false);
561 lwp_unlock(l);
562 }
563
564 /*
565 * softint_trigger:
566 *
567 * Cause a soft interrupt handler to begin executing.
568 */
569 void
570 softint_trigger(uintptr_t machdep)
571 {
572 struct cpu_info *ci;
573 lwp_t *l;
574
575 l = (lwp_t *)machdep;
576 ci = l->l_cpu;
577
578 spc_lock(ci);
579 l->l_mutex = ci->ci_schedstate.spc_mutex;
580 l->l_stat = LSRUN;
581 sched_enqueue(l, false);
582 cpu_need_resched(ci, 1);
583 spc_unlock(ci);
584 }
585
586 /*
587 * softint_thread:
588 *
589 * Slow path MI software interrupt dispatch.
590 */
591 void
592 softint_thread(void *cookie)
593 {
594 softint_t *si;
595 lwp_t *l;
596 int s;
597
598 l = curlwp;
599 si = l->l_private;
600 s = splhigh();
601
602 for (;;) {
603 softint_execute(si, l, s);
604
605 lwp_lock(l);
606 l->l_stat = LSIDL;
607 mi_switch(l);
608 }
609 }
610
611 #else /* !__HAVE_FAST_SOFTINTS */
612
613 /*
614 * softint_thread:
615 *
616 * In the __HAVE_FAST_SOFTINTS case, the LWP is switched to without
617 * restoring any state, so we should not arrive here - there is a
618 * direct handoff between the interrupt stub and softint_dispatch().
619 */
620 void
621 softint_thread(void *cookie)
622 {
623
624 panic("softint_thread");
625 }
626
627 /*
628 * softint_dispatch:
629 *
630 * Entry point from machine-dependent code.
631 */
632 void
633 softint_dispatch(lwp_t *pinned, int s)
634 {
635 struct timeval now;
636 softint_t *si;
637 u_int timing;
638 lwp_t *l;
639
640 l = curlwp;
641 si = l->l_private;
642
643 /*
644 * Note the interrupted LWP, and mark the current LWP as running
645 * before proceeding. Although this must as a rule be done with
646 * the LWP locked, at this point no external agents will want to
647 * modify the interrupt LWP's state.
648 */
649 timing = (softint_timing ? LW_TIMEINTR : 0);
650 l->l_switchto = pinned;
651 l->l_stat = LSONPROC;
652 l->l_flag |= (LW_RUNNING | timing);
653
654 /*
655 * Dispatch the interrupt. If softints are being timed, charge
656 * for it.
657 */
658 if (timing)
659 microtime(&l->l_stime);
660 softint_execute(si, l, s);
661 if (timing) {
662 microtime(&now);
663 updatertime(l, &now);
664 l->l_flag &= ~LW_TIMEINTR;
665 }
666
667 /*
668 * If we blocked while handling the interrupt, the pinned LWP is
669 * gone so switch to the idle LWP. It will select a new LWP to
670 * run.
671 *
672 * We must drop the priority level as switching at IPL_HIGH could
673 * deadlock the system. We have already set si->si_active = 0,
674 * which means another interrupt at this level can be triggered.
675 * That's not be a problem: we are lowering to level 's' which will
676 * prevent softint_dispatch() from being reentered at level 's',
677 * until the priority is finally dropped to IPL_NONE on entry to
678 * the idle loop.
679 */
680 l->l_stat = LSIDL;
681 if (l->l_switchto == NULL) {
682 splx(s);
683 lwp_exit_switchaway(l);
684 /* NOTREACHED */
685 }
686 l->l_switchto = NULL;
687 l->l_flag &= ~LW_RUNNING;
688 }
689
690 #endif /* !__HAVE_FAST_SOFTINTS */
691