kern_softint.c revision 1.1.2.7 1 /* $NetBSD: kern_softint.c,v 1.1.2.7 2007/07/15 22:20:28 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Generic software interrupt framework.
41 *
42 * Overview
43 *
44 * The soft interrupt framework provides a mechanism to schedule a
45 * low priority callback that runs with thread context. It allows
46 * for dynamic registration of software interrupts, and for fair
47 * queueing and prioritization of those interrupts. The callbacks
48 * can be scheduled to run from nearly any point in the kernel: by
49 * code running with thread context, by code running from a
50 * hardware interrupt handler, and at any interrupt priority
51 * level.
52 *
53 * Priority levels
54 *
55 * Since soft interrupt dispatch can be tied to the underlying
56 * architecture's interrupt dispatch code, it may be limited by
57 * both by the capabilities of the hardware and the capabilities
58 * of the interrupt dispatch code itself. Therefore the number of
59 * levels is restricted to four. In order of priority (lowest to
60 * highest) the levels are: clock, bio, net, serial.
61 *
62 * The symbolic names are provided only as guide and in isolation
63 * do not have any direct connection with a particular kind of
64 * device activity.
65 *
66 * The four priority levels map directly to scheduler priority
67 * levels, and where the architecture implements 'fast' software
68 * interrupts, they also map onto interrupt priorities. The
69 * interrupt priorities are intended to be hidden from machine
70 * independent code, which should use multiprocessor and
71 * preemption aware mechanisms to synchronize with software
72 * interrupts (for example: mutexes).
73 *
74 * Capabilities
75 *
76 * As with hardware interrupt handlers, software interrupts run
77 * with limited machine context. In particular, they do not
78 * posess any VM (virtual memory) context, and should therefore
79 * not try to operate on user space addresses, or to use virtual
80 * memory facilities other than those noted as interrupt safe.
81 *
82 * Unlike hardware interrupts, software interrupts do have thread
83 * context. They may block on synchronization objects, sleep, and
84 * resume execution at a later time. Since software interrupts
85 * are a limited resource and (typically) run with higher priority
86 * than all other threads in the system, all block-and-resume
87 * activity by a software interrupt must be kept short in order to
88 * allow futher processing at that level to continue. The kernel
89 * does not allow software interrupts to use facilities or perform
90 * actions that may block for a significant amount of time. This
91 * means that it's not valid for a software interrupt to: sleep on
92 * condition variables, use the lockmgr() facility, or wait for
93 * resources to become available (for example, memory).
94 *
95 * Software interrupts may block to await ownership of locks,
96 * which are typically owned only for a short perioid of time:
97 * mutexes and reader/writer locks. By extension, code running in
98 * the bottom half of the kernel must take care to ensure that any
99 * lock that may be taken from a software interrupt can not be
100 * held for more than a short period of time.
101 *
102 * Per-CPU operation
103 *
104 * Soft interrupts are strictly per-CPU. If a soft interrupt is
105 * triggered on a CPU, it will only be dispatched on that CPU.
106 * Each LWP dedicated to handling a soft interrupt is bound to
107 * it's home CPU, so if the LWP blocks and needs to run again, it
108 * can only run there. Nearly all data structures used to manage
109 * software interrupts are per-CPU.
110 *
111 * Soft interrupts can occur many thousands of times per second.
112 * In light of this, the per-CPU requirement is intended to solve
113 * three problems:
114 *
115 * 1) For passing work down from a hardware interrupt handler to a
116 * software interrupt (for example, using a queue) spinlocks need
117 * not be used to guarantee data integrity. Adjusting the CPU
118 * local interrupt priority level is sufficient. Acquiring
119 * spinlocks is computationally expensive, as it increases traffic
120 * on the system bus and can stall processors with long execution
121 * pipelines.
122 *
123 * 2) Often hardware interrupt handlers manipulate data structures
124 * and then pass those to a software interrupt for further
125 * processing. If those data structures are immediately passed to
126 * another CPU, the associated cache lines may be forced across
127 * the system bus, generating more bus traffic.
128 *
129 * 3) The data structures used to manage soft interrupts are also
130 * CPU local, again to reduce unnecessary bus traffic.
131 *
132 * Generic implementation
133 *
134 * A generic, low performance implementation is provided that
135 * works across all architectures, with no machine-dependent
136 * modifications needed. This implementation uses the scheduler,
137 * and so has a number of restrictions:
138 *
139 * 1) Since software interrupts can be triggered from any priority
140 * level, on architectures where the generic implementation is
141 * used IPL_SCHED must be equal to IPL_HIGH.
142 *
143 * 2) The software interrupts are not preemptive, and so must wait
144 * for the currently executing thread to yield the CPU. This
145 * can introduce latency.
146 *
147 * 3) A context switch is required for each soft interrupt to be
148 * handled, which can be quite expensive.
149 *
150 * 'Fast' software interrupts
151 *
152 * XXX
153 *
154 * The !__HAVE_FAST_SOFTINTS case assumes splhigh == splsched.
155 */
156
157 #include <sys/cdefs.h>
158 __KERNEL_RCSID(0, "$NetBSD: kern_softint.c,v 1.1.2.7 2007/07/15 22:20:28 ad Exp $");
159
160 #include <sys/param.h>
161 #include <sys/malloc.h>
162 #include <sys/proc.h>
163 #include <sys/intr.h>
164 #include <sys/mutex.h>
165 #include <sys/kthread.h>
166 #include <sys/evcnt.h>
167 #include <sys/cpu.h>
168
169 #include <net/netisr.h>
170
171 #include <uvm/uvm_extern.h>
172
173 #define PRI_SOFTSERIAL (PRI_COUNT - 1)
174 #define PRI_SOFTNET (PRI_SOFTSERIAL - schedppq * 1)
175 #define PRI_SOFTBIO (PRI_SOFTSERIAL - schedppq * 2)
176 #define PRI_SOFTCLOCK (PRI_SOFTSERIAL - schedppq * 3)
177
178 /* This could overlap with signal info in struct lwp. */
179 typedef struct softint {
180 TAILQ_HEAD(, softhand) si_q;
181 struct lwp *si_lwp;
182 struct cpu_info *si_cpu;
183 uintptr_t si_machdep;
184 struct evcnt si_evcnt;
185 int si_active;
186 char si_name[8];
187 } softint_t;
188
189 typedef struct softhand {
190 TAILQ_ENTRY(softhand) sh_q;
191 void (*sh_func)(void *);
192 void *sh_arg;
193 softint_t *sh_isr;
194 u_int sh_pending;
195 u_int sh_flags;
196 } softhand_t;
197
198 typedef struct softcpu {
199 struct cpu_info *sc_cpu;
200 softint_t sc_int[SOFTINT_COUNT];
201 softhand_t sc_hand[1];
202 } softcpu_t;
203
204 static void softint_thread(void *);
205 static void softint_netisr(void *);
206
207 u_int softint_bytes = 8192;
208 u_int softint_timing;
209 static u_int softint_max;
210 static kmutex_t softint_lock;
211 static void *softint_netisr_sih;
212 struct evcnt softint_block;
213
214 /*
215 * softint_init_isr:
216 *
217 * Initialize a single interrupt level for a single CPU.
218 */
219 static void
220 softint_init_isr(softcpu_t *sc, const char *desc, pri_t pri, u_int level)
221 {
222 struct cpu_info *ci;
223 softint_t *si;
224 int error;
225
226 si = &sc->sc_int[level];
227 ci = sc->sc_cpu;
228 si->si_cpu = ci;
229
230 TAILQ_INIT(&si->si_q);
231
232 error = kthread_create(pri, KTHREAD_MPSAFE | KTHREAD_INTR |
233 KTHREAD_IDLE, ci, softint_thread, si, &si->si_lwp,
234 "soft%s/%d", desc, (int)ci->ci_cpuid);
235 if (error != 0)
236 panic("softint_init_isr: error %d", error);
237
238 snprintf(si->si_name, sizeof(si->si_name), "%s/%d", desc,
239 (int)ci->ci_cpuid);
240 evcnt_attach_dynamic(&si->si_evcnt, EVCNT_TYPE_INTR, NULL,
241 "softint", si->si_name);
242
243 si->si_lwp->l_private = si;
244 softint_init_md(si->si_lwp, level, &si->si_machdep);
245 #ifdef __HAVE_FAST_SOFTINTS
246 si->si_lwp->l_mutex = &ci->ci_schedstate.spc_lwplock;
247 #endif
248 }
249 /*
250 * softint_init:
251 *
252 * Initialize per-CPU data structures. Called from mi_cpu_attach().
253 */
254 void
255 softint_init(struct cpu_info *ci)
256 {
257 static struct cpu_info *first;
258 softcpu_t *sc, *scfirst;
259 softhand_t *sh, *shmax;
260
261 if (first == NULL) {
262 /* Boot CPU. */
263 first = ci;
264 mutex_init(&softint_lock, MUTEX_DEFAULT, IPL_NONE);
265 softint_bytes = round_page(softint_bytes);
266 softint_max = (softint_bytes - sizeof(softcpu_t)) /
267 sizeof(softhand_t);
268 evcnt_attach_dynamic(&softint_block, EVCNT_TYPE_INTR,
269 NULL, "softint", "block");
270 }
271
272 sc = (softcpu_t *)uvm_km_alloc(kernel_map, softint_bytes, 0,
273 UVM_KMF_WIRED | UVM_KMF_ZERO);
274 if (sc == NULL)
275 panic("softint_init_cpu: cannot allocate memory");
276
277 ci->ci_data.cpu_softcpu = sc;
278 sc->sc_cpu = ci;
279
280 softint_init_isr(sc, "net", PRI_SOFTNET, SOFTINT_NET);
281 softint_init_isr(sc, "bio", PRI_SOFTBIO, SOFTINT_BIO);
282 softint_init_isr(sc, "clk", PRI_SOFTCLOCK, SOFTINT_CLOCK);
283 softint_init_isr(sc, "ser", PRI_SOFTSERIAL, SOFTINT_SERIAL);
284
285 if (first != ci) {
286 /* Don't lock -- autoconfiguration will prevent reentry. */
287 scfirst = first->ci_data.cpu_softcpu;
288 sh = sc->sc_hand;
289 memcpy(sh, scfirst->sc_hand, sizeof(*sh) * softint_max);
290
291 /* Update pointers for this CPU. */
292 for (shmax = sh + softint_max; sh < shmax; sh++) {
293 if (sh->sh_func == NULL)
294 continue;
295 sh->sh_isr =
296 &sc->sc_int[sh->sh_flags & SOFTINT_LVLMASK];
297 }
298 } else {
299 /* Establish a handler for legacy net interrupts. */
300 softint_netisr_sih = softint_establish(SOFTINT_NET,
301 softint_netisr, NULL);
302 KASSERT(softint_netisr_sih != NULL);
303 }
304 }
305
306 /*
307 * softint_establish:
308 *
309 * Register a software interrupt handler.
310 */
311 void *
312 softint_establish(u_int flags, void (*func)(void *), void *arg)
313 {
314 CPU_INFO_ITERATOR cii;
315 struct cpu_info *ci;
316 softcpu_t *sc;
317 softhand_t *sh;
318 u_int level, index;
319
320 level = (flags & SOFTINT_LVLMASK);
321 KASSERT(level < SOFTINT_COUNT);
322
323 mutex_enter(&softint_lock);
324
325 /* Find a free slot. */
326 sc = curcpu()->ci_data.cpu_softcpu;
327 for (index = 1; index < softint_max; index++)
328 if (sc->sc_hand[index].sh_func == NULL)
329 break;
330 if (index == softint_max) {
331 mutex_exit(&softint_lock);
332 printf("WARNING: softint_establish: table full, "
333 "increase softint_bytes\n");
334 return NULL;
335 }
336
337 /* Set up the handler on each CPU. */
338 for (CPU_INFO_FOREACH(cii, ci)) {
339 sc = ci->ci_data.cpu_softcpu;
340 sh = &sc->sc_hand[index];
341
342 sh->sh_isr = &sc->sc_int[level];
343 sh->sh_func = func;
344 sh->sh_arg = arg;
345 sh->sh_flags = flags;
346 sh->sh_pending = 0;
347 }
348
349 mutex_exit(&softint_lock);
350
351 return (void *)((uint8_t *)&sc->sc_hand[index] - (uint8_t *)sc);
352 }
353
354 /*
355 * softint_disestablish:
356 *
357 * Unregister a software interrupt handler.
358 */
359 void
360 softint_disestablish(void *arg)
361 {
362 CPU_INFO_ITERATOR cii;
363 struct cpu_info *ci;
364 softcpu_t *sc;
365 softhand_t *sh;
366 uintptr_t offset;
367
368 offset = (uintptr_t)arg;
369 KASSERT(offset != 0 && offset < softint_bytes);
370
371 mutex_enter(&softint_lock);
372
373 /* Set up the handler on each CPU. */
374 for (CPU_INFO_FOREACH(cii, ci)) {
375 sc = ci->ci_data.cpu_softcpu;
376 sh = (softhand_t *)((uint8_t *)sc + offset);
377 KASSERT(sh->sh_func != NULL);
378 KASSERT(sh->sh_pending == 0);
379 sh->sh_func = NULL;
380 }
381
382 mutex_exit(&softint_lock);
383 }
384
385 /*
386 * softint_schedule:
387 *
388 * Trigger a software interrupt. Must be called from a hardware
389 * interrupt handler, or with preemption disabled (since we are
390 * using the value of curcpu()).
391 */
392 void
393 softint_schedule(void *arg)
394 {
395 softhand_t *sh;
396 softint_t *si;
397 uintptr_t offset;
398 int s;
399
400 /* Find the handler record for this CPU. */
401 offset = (uintptr_t)arg;
402 KASSERT(offset != 0 && offset < softint_bytes);
403 sh = (softhand_t *)((uint8_t *)curcpu()->ci_data.cpu_softcpu + offset);
404
405 /* If it's already pending there's nothing to do. */
406 if (sh->sh_pending)
407 return;
408
409 /*
410 * Enqueue the handler into the LWP's pending list.
411 * If the LWP is completely idle, then make it run.
412 */
413 s = splhigh();
414 if (!sh->sh_pending) {
415 si = sh->sh_isr;
416 sh->sh_pending = 1;
417 TAILQ_INSERT_TAIL(&si->si_q, sh, sh_q);
418 if (si->si_active == 0) {
419 si->si_active = 1;
420 softint_trigger(si->si_machdep);
421 }
422 }
423 splx(s);
424 }
425
426 /*
427 * softint_execute:
428 *
429 * Invoke handlers for the specified soft interrupt.
430 * Must be entered at splhigh. Will drop the priority
431 * to the level specified, but returns back at splhigh.
432 */
433 static inline void
434 softint_execute(softint_t *si, lwp_t *l, int s)
435 {
436 softhand_t *sh;
437 lwp_t *l2;
438
439 KASSERT(si->si_lwp == curlwp);
440 KASSERT(si->si_cpu == curcpu());
441 KASSERT(si->si_lwp->l_wchan == NULL);
442 KASSERT(si->si_active);
443
444 while (!TAILQ_EMPTY(&si->si_q)) {
445 /*
446 * If any interrupted LWP has higher priority then we
447 * must yield immediatley. Note that IPL_HIGH may be
448 * above IPL_SCHED, so we have to drop the interrupt
449 * priority level before yielding.
450 *
451 * XXXAD Optimise this away.
452 */
453 for (l2 = l->l_switchto; l2 != NULL; l2 = l2->l_switchto) {
454 if (lwp_eprio(l2) > l->l_priority)
455 break;
456 }
457 if (l2 != NULL) {
458 splx(s);
459 yield();
460 (void)splhigh();
461 continue;
462 }
463
464 /*
465 * Pick the longest waiting handler to run. We block
466 * interrupts but do not lock in order to do this, as
467 * we are protecting against the local CPU only.
468 */
469 sh = TAILQ_FIRST(&si->si_q);
470 TAILQ_REMOVE(&si->si_q, sh, sh_q);
471 sh->sh_pending = 0;
472 splx(s);
473
474 /* Run the handler. */
475 if ((sh->sh_flags & SOFTINT_MPSAFE) == 0) {
476 KERNEL_LOCK(1, l);
477 }
478 (*sh->sh_func)(sh->sh_arg);
479 if ((sh->sh_flags & SOFTINT_MPSAFE) == 0) {
480 KERNEL_UNLOCK_ONE(l);
481 }
482
483 (void)splhigh();
484 }
485
486 /*
487 * Unlocked, but only for statistics.
488 * Should be per-CPU to prevent cache ping-pong.
489 */
490 uvmexp.softs++;
491
492 si->si_evcnt.ev_count++;
493 si->si_active = 0;
494 }
495
496 /*
497 * schednetisr:
498 *
499 * Trigger a legacy network interrupt. XXX Needs to go away.
500 */
501 void
502 schednetisr(int isr)
503 {
504 int s;
505
506 s = splhigh();
507 curcpu()->ci_data.cpu_netisrs |= (1 << isr);
508 softint_schedule(softint_netisr_sih);
509 splx(s);
510 }
511
512 /*
513 * softintr_netisr:
514 *
515 * Dispatch legacy network interrupts. XXX Needs to go away.
516 */
517 static void
518 softint_netisr(void *cookie)
519 {
520 struct cpu_info *ci;
521 int s, bits;
522
523 ci = curcpu();
524
525 s = splhigh();
526 bits = ci->ci_data.cpu_netisrs;
527 ci->ci_data.cpu_netisrs = 0;
528 splx(s);
529
530 #define DONETISR(which, func) \
531 do { \
532 void func(void); \
533 if ((bits & (1 << which)) != 0) \
534 func(); \
535 } while(0);
536 #include <net/netisr_dispatch.h>
537 #undef DONETISR
538 }
539
540 #ifndef __HAVE_FAST_SOFTINTS
541
542 /*
543 * softint_init_md:
544 *
545 * Perform machine-dependent initialization.
546 */
547 void
548 softint_init_md(lwp_t *l, u_int level, uintptr_t *machdep)
549 {
550 softint_t *si;
551
552 *machdep = (uintptr_t)l;
553 si = l->l_private;
554
555 lwp_lock(l);
556 /* Cheat and make the KASSERT in softint_thread() happy. */
557 si->si_active = 1;
558 l->l_stat = LSRUN;
559 sched_enqueue(l, false);
560 lwp_unlock(l);
561 }
562
563 /*
564 * softint_trigger:
565 *
566 * Cause a soft interrupt handler to begin executing.
567 */
568 void
569 softint_trigger(uintptr_t machdep)
570 {
571 struct cpu_info *ci;
572 lwp_t *l;
573
574 l = (lwp_t *)machdep;
575 ci = l->l_cpu;
576
577 spc_lock(ci);
578 l->l_mutex = ci->ci_schedstate.spc_mutex;
579 l->l_stat = LSRUN;
580 sched_enqueue(l, false);
581 cpu_need_resched(ci, 1);
582 spc_unlock(ci);
583 }
584
585 /*
586 * softint_thread:
587 *
588 * Slow path MI software interrupt dispatch.
589 */
590 void
591 softint_thread(void *cookie)
592 {
593 softint_t *si;
594 lwp_t *l;
595 int s;
596
597 l = curlwp;
598 si = l->l_private;
599 s = splhigh();
600
601 for (;;) {
602 softint_execute(si, l, s);
603
604 lwp_lock(l);
605 l->l_stat = LSIDL;
606 mi_switch(l);
607 }
608 }
609
610 #else /* !__HAVE_FAST_SOFTINTS */
611
612 /*
613 * softint_thread:
614 *
615 * In the __HAVE_FAST_SOFTINTS case, the LWP is switched to without
616 * restoring any state, so we should not arrive here - there is a
617 * direct handoff between the interrupt stub and softint_dispatch().
618 */
619 void
620 softint_thread(void *cookie)
621 {
622
623 panic("softint_thread");
624 }
625
626 /*
627 * softint_dispatch:
628 *
629 * Entry point from machine-dependent code.
630 */
631 void
632 softint_dispatch(lwp_t *pinned, int s)
633 {
634 struct timeval now;
635 softint_t *si;
636 u_int timing;
637 lwp_t *l;
638
639 l = curlwp;
640 si = l->l_private;
641
642 /*
643 * Note the interrupted LWP, and mark the current LWP as running
644 * before proceeding. Although this must as a rule be done with
645 * the LWP locked, at this point no external agents will want to
646 * modify the interrupt LWP's state.
647 */
648 timing = (softint_timing ? LW_TIMEINTR : 0);
649 l->l_switchto = pinned;
650 l->l_stat = LSONPROC;
651 l->l_flag |= (LW_RUNNING | timing);
652
653 /*
654 * Dispatch the interrupt. If softints are being timed, charge
655 * for it.
656 */
657 if (timing)
658 microtime(&l->l_stime);
659 softint_execute(si, l, s);
660 if (timing) {
661 microtime(&now);
662 updatertime(l, &now);
663 l->l_flag &= ~LW_TIMEINTR;
664 }
665
666 /*
667 * If we blocked while handling the interrupt, the pinned LWP is
668 * gone so switch to the idle LWP. It will select a new LWP to
669 * run.
670 *
671 * We must drop the priority level as switching at IPL_HIGH could
672 * deadlock the system. We have already set si->si_active = 0,
673 * which means another interrupt at this level can be triggered.
674 * That's not be a problem: we are lowering to level 's' which will
675 * prevent softint_dispatch() from being reentered at level 's',
676 * until the priority is finally dropped to IPL_NONE on entry to
677 * the idle loop.
678 */
679 l->l_stat = LSIDL;
680 if (l->l_switchto == NULL) {
681 splx(s);
682 lwp_exit_switchaway(l);
683 /* NOTREACHED */
684 }
685 l->l_switchto = NULL;
686 l->l_flag &= ~LW_RUNNING;
687 }
688
689 #endif /* !__HAVE_FAST_SOFTINTS */
690