pktqueue.c revision 1.1 1 /* $NetBSD: pktqueue.c,v 1.1 2014/06/05 23:48:16 rmind Exp $ */
2
3 /*-
4 * Copyright (c) 2014 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Mindaugas Rasiukevicius.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: pktqueue.c,v 1.1 2014/06/05 23:48:16 rmind Exp $");
34
35 #include <sys/param.h>
36 #include <sys/types.h>
37
38 #include <sys/atomic.h>
39 #include <sys/cpu.h>
40 #include <sys/pcq.h>
41 #include <sys/intr.h>
42 #include <sys/mbuf.h>
43 #include <sys/proc.h>
44 #include <sys/percpu.h>
45
46 #include <netinet/in.h>
47 #include <netinet/ip.h>
48 #include <netinet/ip_private.h>
49
50 #include <net/pktqueue.h>
51
52 /*
53 * WARNING: update this if struct pktqueue changes.
54 */
55 #define PKTQ_CLPAD \
56 MAX(COHERENCY_UNIT, COHERENCY_UNIT - sizeof(kmutex_t) - sizeof(u_int))
57
58 struct pktqueue {
59 /*
60 * The lock used for a barrier mechanism. The barrier counter,
61 * as well as the drop counter, are managed atomically though.
62 * Ensure this group is in a separate cache line.
63 */
64 kmutex_t pq_lock;
65 volatile u_int pq_barrier;
66 uint8_t _pad[PKTQ_CLPAD];
67
68 /* The size of the queue, counters and the interrupt handler. */
69 u_int pq_maxlen;
70 percpu_t * pq_counters;
71 void * pq_sih;
72
73 /* Finally, per-CPU queues. */
74 pcq_t * pq_queue[];
75 };
76
77 /* The counters of the packet queue. */
78 #define PQCNT_ENQUEUE 0
79 #define PQCNT_DEQUEUE 1
80 #define PQCNT_DROP 2
81 #define PQCNT_NCOUNTERS 3
82
83 typedef struct {
84 uint64_t count[PQCNT_NCOUNTERS];
85 } pktq_counters_t;
86
87 /* Special marker value used by pktq_barrier() mechanism. */
88 #define PKTQ_MARKER ((void *)(~0ULL))
89
90 /*
91 * The total size of pktqueue_t which depends on the number of CPUs.
92 */
93 #define PKTQUEUE_STRUCT_LEN(ncpu) \
94 roundup2(offsetof(pktqueue_t, pq_queue[ncpu]), coherency_unit)
95
96 pktqueue_t *
97 pktq_create(size_t maxlen, void (*intrh)(void *))
98 {
99 const u_int sflags = SOFTINT_NET | SOFTINT_MPSAFE | SOFTINT_RCPU;
100 const size_t len = PKTQUEUE_STRUCT_LEN(ncpu);
101 pktqueue_t *pq;
102 percpu_t *pc;
103 void *sih;
104
105 if ((pc = percpu_alloc(sizeof(pktq_counters_t))) == NULL) {
106 return NULL;
107 }
108 if ((sih = softint_establish(sflags, intrh, NULL)) == NULL) {
109 percpu_free(pc, sizeof(pktq_counters_t));
110 return NULL;
111 }
112
113 pq = kmem_zalloc(len, KM_SLEEP);
114 for (u_int i = 0; i < ncpu; i++) {
115 pq->pq_queue[i] = pcq_create(maxlen, KM_SLEEP);
116 }
117 mutex_init(&pq->pq_lock, MUTEX_DEFAULT, IPL_NONE);
118 pq->pq_maxlen = maxlen;
119 pq->pq_counters = pc;
120 pq->pq_sih = sih;
121
122 return pq;
123 }
124
125 void
126 pktq_destroy(pktqueue_t *pq)
127 {
128 const size_t len = PKTQUEUE_STRUCT_LEN(ncpu);
129
130 for (u_int i = 0; i < ncpu; i++) {
131 pcq_t *q = pq->pq_queue[i];
132 KASSERT(pcq_peek(q) == NULL);
133 pcq_destroy(q);
134 }
135 percpu_free(pq->pq_counters, sizeof(pktq_counters_t));
136 softint_disestablish(pq->pq_sih);
137 mutex_destroy(&pq->pq_lock);
138 kmem_free(pq, len);
139 }
140
141 /*
142 * - pktq_inc_counter: increment the counter given an ID.
143 * - pktq_collect_counts: handler to sum up the counts from each CPU.
144 * - pktq_getcount: return the effective count given an ID.
145 */
146
147 static inline void
148 pktq_inc_count(pktqueue_t *pq, u_int i)
149 {
150 percpu_t *pc = pq->pq_counters;
151 pktq_counters_t *c;
152
153 c = percpu_getref(pc);
154 c->count[i]++;
155 percpu_putref(pc);
156 }
157
158 static void
159 pktq_collect_counts(void *mem, void *arg, struct cpu_info *ci)
160 {
161 const pktq_counters_t *c = mem;
162 pktq_counters_t *sum = arg;
163
164 for (u_int i = 0; i < PQCNT_NCOUNTERS; i++) {
165 sum->count[i] += c->count[i];
166 }
167 }
168
169 uint64_t
170 pktq_get_count(pktqueue_t *pq, pktq_count_t c)
171 {
172 pktq_counters_t sum;
173
174 if (c != PKTQ_MAXLEN) {
175 memset(&sum, 0, sizeof(sum));
176 percpu_foreach(pq->pq_counters, pktq_collect_counts, &sum);
177 }
178 switch (c) {
179 case PKTQ_NITEMS:
180 return sum.count[PQCNT_ENQUEUE] - sum.count[PQCNT_DEQUEUE];
181 case PKTQ_DROPS:
182 return sum.count[PQCNT_DROP];
183 case PKTQ_MAXLEN:
184 return pq->pq_maxlen;
185 }
186 return 0;
187 }
188
189 uint32_t
190 pktq_rps_hash(const struct mbuf *m __unused)
191 {
192 /*
193 * XXX: No distribution yet; the softnet_lock contention
194 * XXX: must be eliminated first.
195 */
196 return 0;
197 }
198
199 /*
200 * pktq_enqueue: inject the packet into the end of the queue.
201 *
202 * => Must be called from the interrupt or with the preemption disabled.
203 * => Consumes the packet and returns true on success.
204 * => Returns false on failure; caller is responsible to free the packet.
205 */
206 bool
207 pktq_enqueue(pktqueue_t *pq, struct mbuf *m, const u_int hash)
208 {
209 const unsigned cpuid = hash % ncpu;
210
211 KASSERT(kpreempt_disabled());
212
213 if (__predict_false(!pcq_put(pq->pq_queue[cpuid], m))) {
214 pktq_inc_count(pq, PQCNT_DROP);
215 return false;
216 }
217 softint_schedule_cpu(pq->pq_sih, cpu_lookup(cpuid));
218 pktq_inc_count(pq, PQCNT_ENQUEUE);
219 return true;
220 }
221
222 /*
223 * pktq_dequeue: take a packet from the queue.
224 *
225 * => Must be called with preemption disabled.
226 * => Must ensure there are not concurrent dequeue calls.
227 */
228 struct mbuf *
229 pktq_dequeue(pktqueue_t *pq)
230 {
231 const struct cpu_info *ci = curcpu();
232 const unsigned cpuid = cpu_index(ci);
233 struct mbuf *m;
234
235 m = pcq_get(pq->pq_queue[cpuid]);
236 if (__predict_false(m == PKTQ_MARKER)) {
237 /* Note the marker entry. */
238 atomic_inc_uint(&pq->pq_barrier);
239 return NULL;
240 }
241 if (__predict_true(m != NULL)) {
242 pktq_inc_count(pq, PQCNT_DEQUEUE);
243 }
244 return m;
245 }
246
247 /*
248 * pktq_barrier: waits for a grace period when all packets enqueued at
249 * the moment of calling this routine will be processed. This is used
250 * to ensure that e.g. packets referencing some interface were drained.
251 */
252 void
253 pktq_barrier(pktqueue_t *pq)
254 {
255 u_int pending = 0;
256
257 mutex_enter(&pq->pq_lock);
258 KASSERT(pq->pq_barrier == 0);
259
260 for (u_int i = 0; i < ncpu; i++) {
261 pcq_t *q = pq->pq_queue[i];
262
263 /* If the queue is empty - nothing to do. */
264 if (pcq_peek(q) == NULL) {
265 continue;
266 }
267 /* Otherwise, put the marker and entry. */
268 while (!pcq_put(q, PKTQ_MARKER)) {
269 kpause("pktqsync", false, 1, NULL);
270 }
271 kpreempt_disable();
272 softint_schedule_cpu(pq->pq_sih, cpu_lookup(i));
273 kpreempt_enable();
274 pending++;
275 }
276
277 /* Wait for each queue to process the markers. */
278 while (pq->pq_barrier != pending) {
279 kpause("pktqsync", false, 1, NULL);
280 }
281 pq->pq_barrier = 0;
282 mutex_exit(&pq->pq_lock);
283 }
284
285 /*
286 * pktq_flush: free mbufs in all queues.
287 *
288 * => The caller must ensure there are no concurrent writers or flush.
289 */
290 void
291 pktq_flush(pktqueue_t *pq)
292 {
293 struct mbuf *m;
294
295 for (u_int i = 0; i < ncpu; i++) {
296 while ((m = pcq_get(pq->pq_queue[i])) != NULL) {
297 pktq_inc_count(pq, PQCNT_DEQUEUE);
298 m_freem(m);
299 }
300 }
301 }
302