tcp_vtw.c revision 1.7 1 1.1 dyoung /*
2 1.1 dyoung * Copyright (c) 2011 The NetBSD Foundation, Inc.
3 1.1 dyoung * All rights reserved.
4 1.1 dyoung *
5 1.1 dyoung * This code is derived from software contributed to The NetBSD Foundation
6 1.1 dyoung * by Coyote Point Systems, Inc.
7 1.1 dyoung *
8 1.1 dyoung * Redistribution and use in source and binary forms, with or without
9 1.1 dyoung * modification, are permitted provided that the following conditions
10 1.1 dyoung * are met:
11 1.1 dyoung * 1. Redistributions of source code must retain the above copyright
12 1.1 dyoung * notice, this list of conditions and the following disclaimer.
13 1.1 dyoung * 2. Redistributions in binary form must reproduce the above copyright
14 1.1 dyoung * notice, this list of conditions and the following disclaimer in the
15 1.1 dyoung * documentation and/or other materials provided with the distribution.
16 1.1 dyoung *
17 1.1 dyoung * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 1.1 dyoung * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 1.1 dyoung * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 1.1 dyoung * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 1.1 dyoung * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 1.1 dyoung * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 1.1 dyoung * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 1.1 dyoung * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 1.1 dyoung * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 1.1 dyoung * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 1.1 dyoung * POSSIBILITY OF SUCH DAMAGE.
28 1.1 dyoung */
29 1.1 dyoung #include <sys/cdefs.h>
30 1.1 dyoung
31 1.1 dyoung #include "opt_ddb.h"
32 1.1 dyoung #include "opt_inet.h"
33 1.1 dyoung #include "opt_ipsec.h"
34 1.1 dyoung #include "opt_inet_csum.h"
35 1.1 dyoung #include "opt_tcp_debug.h"
36 1.1 dyoung
37 1.1 dyoung #include <sys/param.h>
38 1.1 dyoung #include <sys/systm.h>
39 1.1 dyoung #include <sys/malloc.h>
40 1.1 dyoung #include <sys/kmem.h>
41 1.1 dyoung #include <sys/mbuf.h>
42 1.1 dyoung #include <sys/protosw.h>
43 1.1 dyoung #include <sys/socket.h>
44 1.1 dyoung #include <sys/socketvar.h>
45 1.1 dyoung #include <sys/errno.h>
46 1.1 dyoung #include <sys/syslog.h>
47 1.1 dyoung #include <sys/pool.h>
48 1.1 dyoung #include <sys/domain.h>
49 1.1 dyoung #include <sys/kernel.h>
50 1.1 dyoung #include <net/if.h>
51 1.1 dyoung #include <net/route.h>
52 1.1 dyoung #include <net/if_types.h>
53 1.1 dyoung
54 1.1 dyoung #include <netinet/in.h>
55 1.1 dyoung #include <netinet/in_systm.h>
56 1.1 dyoung #include <netinet/ip.h>
57 1.1 dyoung #include <netinet/in_pcb.h>
58 1.1 dyoung #include <netinet/in_var.h>
59 1.1 dyoung #include <netinet/ip_var.h>
60 1.1 dyoung #include <netinet/in_offload.h>
61 1.1 dyoung #include <netinet/ip6.h>
62 1.1 dyoung #include <netinet6/ip6_var.h>
63 1.1 dyoung #include <netinet6/in6_pcb.h>
64 1.1 dyoung #include <netinet6/ip6_var.h>
65 1.1 dyoung #include <netinet6/in6_var.h>
66 1.1 dyoung #include <netinet/icmp6.h>
67 1.1 dyoung #include <netinet6/nd6.h>
68 1.1 dyoung
69 1.1 dyoung #include <netinet/tcp.h>
70 1.1 dyoung #include <netinet/tcp_fsm.h>
71 1.1 dyoung #include <netinet/tcp_seq.h>
72 1.1 dyoung #include <netinet/tcp_timer.h>
73 1.1 dyoung #include <netinet/tcp_var.h>
74 1.1 dyoung #include <netinet/tcp_private.h>
75 1.1 dyoung #include <netinet/tcpip.h>
76 1.1 dyoung
77 1.1 dyoung #include <machine/stdarg.h>
78 1.1 dyoung #include <netinet/tcp_vtw.h>
79 1.1 dyoung
80 1.7 dyoung __KERNEL_RCSID(0, "$NetBSD: tcp_vtw.c,v 1.7 2011/06/06 19:15:43 dyoung Exp $");
81 1.1 dyoung
82 1.1 dyoung #define db_trace(__a, __b) do { } while (/*CONSTCOND*/0)
83 1.1 dyoung
84 1.1 dyoung static void vtw_debug_init(void);
85 1.1 dyoung
86 1.1 dyoung fatp_ctl_t fat_tcpv4;
87 1.1 dyoung fatp_ctl_t fat_tcpv6;
88 1.1 dyoung vtw_ctl_t vtw_tcpv4[VTW_NCLASS];
89 1.1 dyoung vtw_ctl_t vtw_tcpv6[VTW_NCLASS];
90 1.1 dyoung vtw_stats_t vtw_stats;
91 1.1 dyoung
92 1.1 dyoung /* We provide state for the lookup_ports iterator.
93 1.1 dyoung * As currently we are netlock-protected, there is one.
94 1.1 dyoung * If we were finer-grain, we would have one per CPU.
95 1.1 dyoung * I do not want to be in the business of alloc/free.
96 1.1 dyoung * The best alternate would be allocate on the caller's
97 1.1 dyoung * stack, but that would require them to know the struct,
98 1.1 dyoung * or at least the size.
99 1.1 dyoung * See how she goes.
100 1.1 dyoung */
101 1.1 dyoung struct tcp_ports_iterator {
102 1.1 dyoung union {
103 1.1 dyoung struct in_addr v4;
104 1.1 dyoung struct in6_addr v6;
105 1.1 dyoung } addr;
106 1.1 dyoung u_int port;
107 1.1 dyoung
108 1.1 dyoung uint32_t wild : 1;
109 1.1 dyoung
110 1.1 dyoung vtw_ctl_t *ctl;
111 1.1 dyoung fatp_t *fp;
112 1.1 dyoung
113 1.1 dyoung uint16_t slot_idx;
114 1.1 dyoung uint16_t ctl_idx;
115 1.1 dyoung };
116 1.1 dyoung
117 1.1 dyoung static struct tcp_ports_iterator tcp_ports_iterator_v4;
118 1.1 dyoung static struct tcp_ports_iterator tcp_ports_iterator_v6;
119 1.1 dyoung
120 1.1 dyoung static int vtw_age(vtw_ctl_t *, struct timeval *);
121 1.1 dyoung
122 1.1 dyoung /*!\brief allocate a fat pointer from a collection.
123 1.1 dyoung */
124 1.1 dyoung static fatp_t *
125 1.1 dyoung fatp_alloc(fatp_ctl_t *fat)
126 1.1 dyoung {
127 1.1 dyoung fatp_t *fp = 0;
128 1.1 dyoung
129 1.1 dyoung if (fat->nfree) {
130 1.1 dyoung fp = fat->free;
131 1.1 dyoung if (fp) {
132 1.1 dyoung fat->free = fatp_next(fat, fp);
133 1.1 dyoung --fat->nfree;
134 1.1 dyoung ++fat->nalloc;
135 1.1 dyoung fp->nxt = 0;
136 1.1 dyoung
137 1.1 dyoung KASSERT(!fp->inuse);
138 1.1 dyoung }
139 1.1 dyoung }
140 1.1 dyoung
141 1.1 dyoung return fp;
142 1.1 dyoung }
143 1.1 dyoung
144 1.1 dyoung /*!\brief free a fat pointer.
145 1.1 dyoung */
146 1.1 dyoung static void
147 1.1 dyoung fatp_free(fatp_ctl_t *fat, fatp_t *fp)
148 1.1 dyoung {
149 1.1 dyoung if (fp) {
150 1.1 dyoung KASSERT(!fp->inuse);
151 1.1 dyoung KASSERT(!fp->nxt);
152 1.1 dyoung
153 1.1 dyoung fp->nxt = fatp_index(fat, fat->free);
154 1.1 dyoung fat->free = fp;
155 1.1 dyoung
156 1.1 dyoung ++fat->nfree;
157 1.1 dyoung --fat->nalloc;
158 1.1 dyoung }
159 1.1 dyoung }
160 1.1 dyoung
161 1.1 dyoung /*!\brief initialise a collection of fat pointers.
162 1.1 dyoung *
163 1.1 dyoung *\param n # hash buckets
164 1.1 dyoung *\param m total # fat pointers to allocate
165 1.1 dyoung *
166 1.1 dyoung * We allocate 2x as much, as we have two hashes: full and lport only.
167 1.1 dyoung */
168 1.1 dyoung static void
169 1.6 dyoung fatp_init(fatp_ctl_t *fat, uint32_t n, uint32_t m,
170 1.6 dyoung fatp_t *fat_base, fatp_t **fat_hash)
171 1.1 dyoung {
172 1.1 dyoung fatp_t *fp;
173 1.1 dyoung
174 1.1 dyoung KASSERT(n <= FATP_MAX / 2);
175 1.1 dyoung
176 1.6 dyoung fat->hash = fat_hash;
177 1.6 dyoung fat->base = fat_base;
178 1.1 dyoung
179 1.1 dyoung fat->port = &fat->hash[m];
180 1.1 dyoung
181 1.1 dyoung fat->mask = m - 1; // ASSERT is power of 2 (m)
182 1.1 dyoung fat->lim = fat->base + 2*n - 1;
183 1.1 dyoung fat->nfree = 0;
184 1.1 dyoung fat->nalloc = 2*n;
185 1.1 dyoung
186 1.1 dyoung /* Initialise the free list.
187 1.1 dyoung */
188 1.1 dyoung for (fp = fat->lim; fp >= fat->base; --fp) {
189 1.1 dyoung fatp_free(fat, fp);
190 1.1 dyoung }
191 1.1 dyoung }
192 1.1 dyoung
193 1.1 dyoung /*
194 1.1 dyoung * The `xtra' is XORed into the tag stored.
195 1.1 dyoung */
196 1.1 dyoung static uint32_t fatp_xtra[] = {
197 1.1 dyoung 0x11111111,0x22222222,0x33333333,0x44444444,
198 1.1 dyoung 0x55555555,0x66666666,0x77777777,0x88888888,
199 1.1 dyoung 0x12121212,0x21212121,0x34343434,0x43434343,
200 1.1 dyoung 0x56565656,0x65656565,0x78787878,0x87878787,
201 1.1 dyoung 0x11221122,0x22112211,0x33443344,0x44334433,
202 1.1 dyoung 0x55665566,0x66556655,0x77887788,0x88778877,
203 1.1 dyoung 0x11112222,0x22221111,0x33334444,0x44443333,
204 1.1 dyoung 0x55556666,0x66665555,0x77778888,0x88887777,
205 1.1 dyoung };
206 1.1 dyoung
207 1.1 dyoung /*!\brief turn a {fatp_t*,slot} into an integral key.
208 1.1 dyoung *
209 1.1 dyoung * The key can be used to obtain the fatp_t, and the slot,
210 1.1 dyoung * as it directly encodes them.
211 1.1 dyoung */
212 1.1 dyoung static inline uint32_t
213 1.1 dyoung fatp_key(fatp_ctl_t *fat, fatp_t *fp, uint32_t slot)
214 1.1 dyoung {
215 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
216 1.1 dyoung CACHE_LINE_SIZE == 64 ||
217 1.1 dyoung CACHE_LINE_SIZE == 128);
218 1.1 dyoung
219 1.1 dyoung switch (fatp_ntags()) {
220 1.1 dyoung case 7:
221 1.1 dyoung return (fatp_index(fat, fp) << 3) | slot;
222 1.1 dyoung case 15:
223 1.1 dyoung return (fatp_index(fat, fp) << 4) | slot;
224 1.1 dyoung case 31:
225 1.1 dyoung return (fatp_index(fat, fp) << 5) | slot;
226 1.1 dyoung default:
227 1.1 dyoung KASSERT(0 && "no support, for no good reason");
228 1.1 dyoung return ~0;
229 1.1 dyoung }
230 1.1 dyoung }
231 1.1 dyoung
232 1.1 dyoung static inline uint32_t
233 1.1 dyoung fatp_slot_from_key(fatp_ctl_t *fat, uint32_t key)
234 1.1 dyoung {
235 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
236 1.1 dyoung CACHE_LINE_SIZE == 64 ||
237 1.1 dyoung CACHE_LINE_SIZE == 128);
238 1.1 dyoung
239 1.1 dyoung switch (fatp_ntags()) {
240 1.1 dyoung case 7:
241 1.1 dyoung return key & 7;
242 1.1 dyoung case 15:
243 1.1 dyoung return key & 15;
244 1.1 dyoung case 31:
245 1.1 dyoung return key & 31;
246 1.1 dyoung default:
247 1.1 dyoung KASSERT(0 && "no support, for no good reason");
248 1.1 dyoung return ~0;
249 1.1 dyoung }
250 1.1 dyoung }
251 1.1 dyoung
252 1.1 dyoung static inline fatp_t *
253 1.1 dyoung fatp_from_key(fatp_ctl_t *fat, uint32_t key)
254 1.1 dyoung {
255 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
256 1.1 dyoung CACHE_LINE_SIZE == 64 ||
257 1.1 dyoung CACHE_LINE_SIZE == 128);
258 1.1 dyoung
259 1.1 dyoung switch (fatp_ntags()) {
260 1.1 dyoung case 7:
261 1.1 dyoung key >>= 3;
262 1.1 dyoung break;
263 1.1 dyoung case 15:
264 1.1 dyoung key >>= 4;
265 1.1 dyoung break;
266 1.1 dyoung case 31:
267 1.1 dyoung key >>= 5;
268 1.1 dyoung break;
269 1.1 dyoung default:
270 1.1 dyoung KASSERT(0 && "no support, for no good reason");
271 1.1 dyoung return 0;
272 1.1 dyoung }
273 1.1 dyoung
274 1.1 dyoung return key ? fat->base + key - 1 : 0;
275 1.1 dyoung }
276 1.1 dyoung
277 1.1 dyoung static inline uint32_t
278 1.1 dyoung idx_encode(vtw_ctl_t *ctl, uint32_t idx)
279 1.1 dyoung {
280 1.1 dyoung return (idx << ctl->idx_bits) | idx;
281 1.1 dyoung }
282 1.1 dyoung
283 1.1 dyoung static inline uint32_t
284 1.1 dyoung idx_decode(vtw_ctl_t *ctl, uint32_t bits)
285 1.1 dyoung {
286 1.1 dyoung uint32_t idx = bits & ctl->idx_mask;
287 1.1 dyoung
288 1.1 dyoung if (idx_encode(ctl, idx) == bits)
289 1.1 dyoung return idx;
290 1.1 dyoung else
291 1.1 dyoung return ~0;
292 1.1 dyoung }
293 1.1 dyoung
294 1.1 dyoung /*!\brief insert index into fatp hash
295 1.1 dyoung *
296 1.1 dyoung *\param idx - index of element being placed in hash chain
297 1.1 dyoung *\param tag - 32-bit tag identifier
298 1.1 dyoung *
299 1.1 dyoung *\returns
300 1.1 dyoung * value which can be used to locate entry.
301 1.1 dyoung *
302 1.1 dyoung *\note
303 1.1 dyoung * we rely on the fact that there are unused high bits in the index
304 1.1 dyoung * for verification purposes on lookup.
305 1.1 dyoung */
306 1.1 dyoung
307 1.1 dyoung static inline uint32_t
308 1.1 dyoung fatp_vtw_inshash(fatp_ctl_t *fat, uint32_t idx, uint32_t tag, int which,
309 1.1 dyoung void *dbg)
310 1.1 dyoung {
311 1.1 dyoung fatp_t *fp;
312 1.1 dyoung fatp_t **hash = (which ? fat->port : fat->hash);
313 1.1 dyoung int i;
314 1.1 dyoung
315 1.1 dyoung fp = hash[tag & fat->mask];
316 1.1 dyoung
317 1.1 dyoung while (!fp || fatp_full(fp)) {
318 1.1 dyoung fatp_t *fq;
319 1.1 dyoung
320 1.1 dyoung /* All entries are inuse at the top level.
321 1.1 dyoung * We allocate a spare, and push the top level
322 1.1 dyoung * down one. All entries in the fp we push down
323 1.1 dyoung * (think of a tape worm here) will be expelled sooner than
324 1.1 dyoung * any entries added subsequently to this hash bucket.
325 1.1 dyoung * This is a property of the time waits we are exploiting.
326 1.1 dyoung */
327 1.1 dyoung
328 1.1 dyoung fq = fatp_alloc(fat);
329 1.1 dyoung if (!fq) {
330 1.1 dyoung vtw_age(fat->vtw, 0);
331 1.1 dyoung fp = hash[tag & fat->mask];
332 1.1 dyoung continue;
333 1.1 dyoung }
334 1.1 dyoung
335 1.1 dyoung fq->inuse = 0;
336 1.1 dyoung fq->nxt = fatp_index(fat, fp);
337 1.1 dyoung
338 1.1 dyoung hash[tag & fat->mask] = fq;
339 1.1 dyoung
340 1.1 dyoung fp = fq;
341 1.1 dyoung }
342 1.1 dyoung
343 1.1 dyoung KASSERT(!fatp_full(fp));
344 1.1 dyoung
345 1.1 dyoung /* Fill highest index first. Lookup is lowest first.
346 1.1 dyoung */
347 1.1 dyoung for (i = fatp_ntags(); --i >= 0; ) {
348 1.1 dyoung if (!((1 << i) & fp->inuse)) {
349 1.1 dyoung break;
350 1.1 dyoung }
351 1.1 dyoung }
352 1.1 dyoung
353 1.1 dyoung fp->inuse |= 1 << i;
354 1.1 dyoung fp->tag[i] = tag ^ idx_encode(fat->vtw, idx) ^ fatp_xtra[i];
355 1.1 dyoung
356 1.1 dyoung db_trace(KTR_VTW
357 1.1 dyoung , (fp, "fat: inuse %5.5x tag[%x] %8.8x"
358 1.1 dyoung , fp->inuse
359 1.1 dyoung , i, fp->tag[i]));
360 1.1 dyoung
361 1.1 dyoung return fatp_key(fat, fp, i);
362 1.1 dyoung }
363 1.1 dyoung
364 1.1 dyoung static inline int
365 1.1 dyoung vtw_alive(const vtw_t *vtw)
366 1.1 dyoung {
367 1.1 dyoung return vtw->hashed && vtw->expire.tv_sec;
368 1.1 dyoung }
369 1.1 dyoung
370 1.1 dyoung static inline uint32_t
371 1.1 dyoung vtw_index_v4(vtw_ctl_t *ctl, vtw_v4_t *v4)
372 1.1 dyoung {
373 1.1 dyoung if (ctl->base.v4 <= v4 && v4 <= ctl->lim.v4)
374 1.1 dyoung return v4 - ctl->base.v4;
375 1.1 dyoung
376 1.1 dyoung KASSERT(0 && "vtw out of bounds");
377 1.1 dyoung
378 1.1 dyoung return ~0;
379 1.1 dyoung }
380 1.1 dyoung
381 1.1 dyoung static inline uint32_t
382 1.1 dyoung vtw_index_v6(vtw_ctl_t *ctl, vtw_v6_t *v6)
383 1.1 dyoung {
384 1.1 dyoung if (ctl->base.v6 <= v6 && v6 <= ctl->lim.v6)
385 1.1 dyoung return v6 - ctl->base.v6;
386 1.1 dyoung
387 1.1 dyoung KASSERT(0 && "vtw out of bounds");
388 1.1 dyoung
389 1.1 dyoung return ~0;
390 1.1 dyoung }
391 1.1 dyoung
392 1.1 dyoung static inline uint32_t
393 1.1 dyoung vtw_index(vtw_ctl_t *ctl, vtw_t *vtw)
394 1.1 dyoung {
395 1.1 dyoung if (ctl->clidx)
396 1.1 dyoung ctl = ctl->ctl;
397 1.1 dyoung
398 1.1 dyoung if (ctl->is_v4)
399 1.1 dyoung return vtw_index_v4(ctl, (vtw_v4_t *)vtw);
400 1.1 dyoung
401 1.1 dyoung if (ctl->is_v6)
402 1.1 dyoung return vtw_index_v6(ctl, (vtw_v6_t *)vtw);
403 1.1 dyoung
404 1.1 dyoung KASSERT(0 && "neither 4 nor 6. most curious.");
405 1.1 dyoung
406 1.1 dyoung return ~0;
407 1.1 dyoung }
408 1.1 dyoung
409 1.1 dyoung static inline vtw_t *
410 1.1 dyoung vtw_from_index(vtw_ctl_t *ctl, uint32_t idx)
411 1.1 dyoung {
412 1.1 dyoung if (ctl->clidx)
413 1.1 dyoung ctl = ctl->ctl;
414 1.1 dyoung
415 1.1 dyoung /* See if the index looks like it might be an index.
416 1.1 dyoung * Bits on outside of the valid index bits is a give away.
417 1.1 dyoung */
418 1.1 dyoung idx = idx_decode(ctl, idx);
419 1.1 dyoung
420 1.1 dyoung if (idx == ~0) {
421 1.1 dyoung return 0;
422 1.1 dyoung } else if (ctl->is_v4) {
423 1.1 dyoung vtw_v4_t *vtw = ctl->base.v4 + idx;
424 1.1 dyoung
425 1.1 dyoung return (ctl->base.v4 <= vtw && vtw <= ctl->lim.v4)
426 1.1 dyoung ? &vtw->common : 0;
427 1.1 dyoung } else if (ctl->is_v6) {
428 1.1 dyoung vtw_v6_t *vtw = ctl->base.v6 + idx;
429 1.1 dyoung
430 1.1 dyoung return (ctl->base.v6 <= vtw && vtw <= ctl->lim.v6)
431 1.1 dyoung ? &vtw->common : 0;
432 1.1 dyoung } else {
433 1.1 dyoung KASSERT(0 && "badness");
434 1.1 dyoung return 0;
435 1.1 dyoung }
436 1.1 dyoung }
437 1.1 dyoung
438 1.1 dyoung /*!\brief return the next vtw after this one.
439 1.1 dyoung *
440 1.1 dyoung * Due to the differing sizes of the entries in differing
441 1.1 dyoung * arenas, we have to ensure we ++ the correct pointer type.
442 1.1 dyoung *
443 1.1 dyoung * Also handles wrap.
444 1.1 dyoung */
445 1.1 dyoung static inline vtw_t *
446 1.1 dyoung vtw_next(vtw_ctl_t *ctl, vtw_t *vtw)
447 1.1 dyoung {
448 1.1 dyoung if (ctl->is_v4) {
449 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
450 1.1 dyoung
451 1.1 dyoung vtw = &(++v4)->common;
452 1.1 dyoung } else {
453 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
454 1.1 dyoung
455 1.1 dyoung vtw = &(++v6)->common;
456 1.1 dyoung }
457 1.1 dyoung
458 1.1 dyoung if (vtw > ctl->lim.v)
459 1.1 dyoung vtw = ctl->base.v;
460 1.1 dyoung
461 1.1 dyoung return vtw;
462 1.1 dyoung }
463 1.1 dyoung
464 1.1 dyoung /*!\brief remove entry from FATP hash chains
465 1.1 dyoung */
466 1.1 dyoung static inline void
467 1.1 dyoung vtw_unhash(vtw_ctl_t *ctl, vtw_t *vtw)
468 1.1 dyoung {
469 1.1 dyoung fatp_ctl_t *fat = ctl->fat;
470 1.1 dyoung fatp_t *fp;
471 1.1 dyoung uint32_t key = vtw->key;
472 1.1 dyoung uint32_t tag, slot, idx;
473 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
474 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
475 1.1 dyoung
476 1.1 dyoung if (!vtw->hashed) {
477 1.1 dyoung KASSERT(0 && "unhashed");
478 1.1 dyoung return;
479 1.1 dyoung }
480 1.1 dyoung
481 1.1 dyoung if (fat->vtw->is_v4) {
482 1.1 dyoung tag = v4_tag(v4->faddr, v4->fport, v4->laddr, v4->lport);
483 1.1 dyoung } else if (fat->vtw->is_v6) {
484 1.1 dyoung tag = v6_tag(&v6->faddr, v6->fport, &v6->laddr, v6->lport);
485 1.1 dyoung } else {
486 1.1 dyoung tag = 0;
487 1.1 dyoung KASSERT(0 && "not reached");
488 1.1 dyoung }
489 1.1 dyoung
490 1.1 dyoung /* Remove from fat->hash[]
491 1.1 dyoung */
492 1.1 dyoung slot = fatp_slot_from_key(fat, key);
493 1.1 dyoung fp = fatp_from_key(fat, key);
494 1.1 dyoung idx = vtw_index(ctl, vtw);
495 1.1 dyoung
496 1.1 dyoung db_trace(KTR_VTW
497 1.1 dyoung , (fp, "fat: del inuse %5.5x slot %x idx %x key %x tag %x"
498 1.1 dyoung , fp->inuse, slot, idx, key, tag));
499 1.1 dyoung
500 1.1 dyoung KASSERT(fp->inuse & (1 << slot));
501 1.1 dyoung KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
502 1.1 dyoung ^ fatp_xtra[slot]));
503 1.1 dyoung
504 1.1 dyoung if ((fp->inuse & (1 << slot))
505 1.1 dyoung && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
506 1.1 dyoung ^ fatp_xtra[slot])) {
507 1.1 dyoung fp->inuse ^= 1 << slot;
508 1.1 dyoung fp->tag[slot] = 0;
509 1.1 dyoung
510 1.1 dyoung /* When we delete entries, we do not compact. This is
511 1.1 dyoung * due to temporality. We add entries, and they
512 1.1 dyoung * (eventually) expire. Older entries will be further
513 1.1 dyoung * down the chain.
514 1.1 dyoung */
515 1.1 dyoung if (!fp->inuse) {
516 1.1 dyoung uint32_t hi = tag & fat->mask;
517 1.1 dyoung fatp_t *fq = 0;
518 1.1 dyoung fatp_t *fr = fat->hash[hi];
519 1.1 dyoung
520 1.1 dyoung while (fr && fr != fp) {
521 1.1 dyoung fr = fatp_next(fat, fq = fr);
522 1.1 dyoung }
523 1.1 dyoung
524 1.1 dyoung if (fr == fp) {
525 1.1 dyoung if (fq) {
526 1.1 dyoung fq->nxt = fp->nxt;
527 1.1 dyoung fp->nxt = 0;
528 1.1 dyoung fatp_free(fat, fp);
529 1.1 dyoung } else {
530 1.1 dyoung KASSERT(fat->hash[hi] == fp);
531 1.1 dyoung
532 1.1 dyoung if (fp->nxt) {
533 1.1 dyoung fat->hash[hi]
534 1.1 dyoung = fatp_next(fat, fp);
535 1.1 dyoung fp->nxt = 0;
536 1.1 dyoung fatp_free(fat, fp);
537 1.1 dyoung } else {
538 1.1 dyoung /* retain for next use.
539 1.1 dyoung */
540 1.1 dyoung ;
541 1.1 dyoung }
542 1.1 dyoung }
543 1.1 dyoung } else {
544 1.1 dyoung fr = fat->hash[hi];
545 1.1 dyoung
546 1.1 dyoung do {
547 1.1 dyoung db_trace(KTR_VTW
548 1.1 dyoung , (fr
549 1.1 dyoung , "fat:*del inuse %5.5x"
550 1.1 dyoung " nxt %x"
551 1.1 dyoung , fr->inuse, fr->nxt));
552 1.1 dyoung
553 1.1 dyoung fr = fatp_next(fat, fq = fr);
554 1.1 dyoung } while (fr && fr != fp);
555 1.1 dyoung
556 1.1 dyoung KASSERT(0 && "oops");
557 1.1 dyoung }
558 1.1 dyoung }
559 1.1 dyoung vtw->key ^= ~0;
560 1.1 dyoung }
561 1.1 dyoung
562 1.1 dyoung if (fat->vtw->is_v4) {
563 1.1 dyoung tag = v4_port_tag(v4->lport);
564 1.1 dyoung } else if (fat->vtw->is_v6) {
565 1.1 dyoung tag = v6_port_tag(v6->lport);
566 1.1 dyoung }
567 1.1 dyoung
568 1.1 dyoung /* Remove from fat->port[]
569 1.1 dyoung */
570 1.1 dyoung key = vtw->port_key;
571 1.1 dyoung slot = fatp_slot_from_key(fat, key);
572 1.1 dyoung fp = fatp_from_key(fat, key);
573 1.1 dyoung idx = vtw_index(ctl, vtw);
574 1.1 dyoung
575 1.1 dyoung db_trace(KTR_VTW
576 1.1 dyoung , (fp, "fatport: del inuse %5.5x"
577 1.1 dyoung " slot %x idx %x key %x tag %x"
578 1.1 dyoung , fp->inuse, slot, idx, key, tag));
579 1.1 dyoung
580 1.1 dyoung KASSERT(fp->inuse & (1 << slot));
581 1.1 dyoung KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
582 1.1 dyoung ^ fatp_xtra[slot]));
583 1.1 dyoung
584 1.1 dyoung if ((fp->inuse & (1 << slot))
585 1.1 dyoung && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
586 1.1 dyoung ^ fatp_xtra[slot])) {
587 1.1 dyoung fp->inuse ^= 1 << slot;
588 1.1 dyoung fp->tag[slot] = 0;
589 1.1 dyoung
590 1.1 dyoung if (!fp->inuse) {
591 1.1 dyoung uint32_t hi = tag & fat->mask;
592 1.1 dyoung fatp_t *fq = 0;
593 1.1 dyoung fatp_t *fr = fat->port[hi];
594 1.1 dyoung
595 1.1 dyoung while (fr && fr != fp) {
596 1.1 dyoung fr = fatp_next(fat, fq = fr);
597 1.1 dyoung }
598 1.1 dyoung
599 1.1 dyoung if (fr == fp) {
600 1.1 dyoung if (fq) {
601 1.1 dyoung fq->nxt = fp->nxt;
602 1.1 dyoung fp->nxt = 0;
603 1.1 dyoung fatp_free(fat, fp);
604 1.1 dyoung } else {
605 1.1 dyoung KASSERT(fat->port[hi] == fp);
606 1.1 dyoung
607 1.1 dyoung if (fp->nxt) {
608 1.1 dyoung fat->port[hi]
609 1.1 dyoung = fatp_next(fat, fp);
610 1.1 dyoung fp->nxt = 0;
611 1.1 dyoung fatp_free(fat, fp);
612 1.1 dyoung } else {
613 1.1 dyoung /* retain for next use.
614 1.1 dyoung */
615 1.1 dyoung ;
616 1.1 dyoung }
617 1.1 dyoung }
618 1.1 dyoung }
619 1.1 dyoung }
620 1.1 dyoung vtw->port_key ^= ~0;
621 1.1 dyoung }
622 1.1 dyoung
623 1.1 dyoung vtw->hashed = 0;
624 1.1 dyoung }
625 1.1 dyoung
626 1.1 dyoung /*!\brief remove entry from hash, possibly free.
627 1.1 dyoung */
628 1.1 dyoung void
629 1.1 dyoung vtw_del(vtw_ctl_t *ctl, vtw_t *vtw)
630 1.1 dyoung {
631 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
632 1.1 dyoung
633 1.1 dyoung if (vtw->hashed) {
634 1.1 dyoung ++vtw_stats.del;
635 1.1 dyoung vtw_unhash(ctl, vtw);
636 1.1 dyoung }
637 1.1 dyoung
638 1.1 dyoung /* We only delete the oldest entry.
639 1.1 dyoung */
640 1.1 dyoung if (vtw != ctl->oldest.v)
641 1.1 dyoung return;
642 1.1 dyoung
643 1.1 dyoung --ctl->nalloc;
644 1.1 dyoung ++ctl->nfree;
645 1.1 dyoung
646 1.1 dyoung vtw->expire.tv_sec = 0;
647 1.1 dyoung vtw->expire.tv_usec = ~0;
648 1.1 dyoung
649 1.1 dyoung if (!ctl->nalloc)
650 1.1 dyoung ctl->oldest.v = 0;
651 1.1 dyoung
652 1.1 dyoung ctl->oldest.v = vtw_next(ctl, vtw);
653 1.1 dyoung }
654 1.1 dyoung
655 1.4 dholland /*!\brief insert vestigial timewait in hash chain
656 1.1 dyoung */
657 1.1 dyoung static void
658 1.1 dyoung vtw_inshash_v4(vtw_ctl_t *ctl, vtw_t *vtw)
659 1.1 dyoung {
660 1.1 dyoung uint32_t idx = vtw_index(ctl, vtw);
661 1.1 dyoung uint32_t tag;
662 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
663 1.1 dyoung
664 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
665 1.1 dyoung KASSERT(!vtw->hashed);
666 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
667 1.1 dyoung
668 1.1 dyoung ++vtw_stats.ins;
669 1.1 dyoung
670 1.1 dyoung tag = v4_tag(v4->faddr, v4->fport,
671 1.1 dyoung v4->laddr, v4->lport);
672 1.1 dyoung
673 1.1 dyoung vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
674 1.1 dyoung
675 1.1 dyoung db_trace(KTR_VTW, (ctl
676 1.1 dyoung , "vtw: ins %8.8x:%4.4x %8.8x:%4.4x"
677 1.1 dyoung " tag %8.8x key %8.8x"
678 1.1 dyoung , v4->faddr, v4->fport
679 1.1 dyoung , v4->laddr, v4->lport
680 1.1 dyoung , tag
681 1.1 dyoung , vtw->key));
682 1.1 dyoung
683 1.1 dyoung tag = v4_port_tag(v4->lport);
684 1.1 dyoung vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
685 1.1 dyoung
686 1.1 dyoung db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
687 1.1 dyoung , v4->lport, v4->lport
688 1.1 dyoung , tag
689 1.1 dyoung , vtw->key));
690 1.1 dyoung
691 1.1 dyoung vtw->hashed = 1;
692 1.1 dyoung }
693 1.1 dyoung
694 1.4 dholland /*!\brief insert vestigial timewait in hash chain
695 1.1 dyoung */
696 1.1 dyoung static void
697 1.1 dyoung vtw_inshash_v6(vtw_ctl_t *ctl, vtw_t *vtw)
698 1.1 dyoung {
699 1.1 dyoung uint32_t idx = vtw_index(ctl, vtw);
700 1.1 dyoung uint32_t tag;
701 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
702 1.1 dyoung
703 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
704 1.1 dyoung KASSERT(!vtw->hashed);
705 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
706 1.1 dyoung
707 1.1 dyoung ++vtw_stats.ins;
708 1.1 dyoung
709 1.1 dyoung tag = v6_tag(&v6->faddr, v6->fport,
710 1.1 dyoung &v6->laddr, v6->lport);
711 1.1 dyoung
712 1.1 dyoung vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
713 1.1 dyoung
714 1.1 dyoung tag = v6_port_tag(v6->lport);
715 1.1 dyoung vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
716 1.1 dyoung
717 1.1 dyoung db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
718 1.1 dyoung , v6->lport, v6->lport
719 1.1 dyoung , tag
720 1.1 dyoung , vtw->key));
721 1.1 dyoung
722 1.1 dyoung vtw->hashed = 1;
723 1.1 dyoung }
724 1.1 dyoung
725 1.1 dyoung static vtw_t *
726 1.1 dyoung vtw_lookup_hash_v4(vtw_ctl_t *ctl, uint32_t faddr, uint16_t fport
727 1.1 dyoung , uint32_t laddr, uint16_t lport
728 1.1 dyoung , int which)
729 1.1 dyoung {
730 1.1 dyoung vtw_v4_t *v4;
731 1.1 dyoung vtw_t *vtw;
732 1.1 dyoung uint32_t tag;
733 1.1 dyoung fatp_t *fp;
734 1.1 dyoung int i;
735 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
736 1.1 dyoung
737 1.1 dyoung if (!ctl || !ctl->fat)
738 1.1 dyoung return 0;
739 1.1 dyoung
740 1.1 dyoung ++vtw_stats.look[which];
741 1.1 dyoung
742 1.1 dyoung if (which) {
743 1.1 dyoung tag = v4_port_tag(lport);
744 1.1 dyoung fp = ctl->fat->port[tag & ctl->fat->mask];
745 1.1 dyoung } else {
746 1.1 dyoung tag = v4_tag(faddr, fport, laddr, lport);
747 1.1 dyoung fp = ctl->fat->hash[tag & ctl->fat->mask];
748 1.1 dyoung }
749 1.1 dyoung
750 1.1 dyoung while (fp && fp->inuse) {
751 1.1 dyoung uint32_t inuse = fp->inuse;
752 1.1 dyoung
753 1.1 dyoung ++fatps;
754 1.1 dyoung
755 1.1 dyoung for (i = 0; inuse && i < fatp_ntags(); ++i) {
756 1.1 dyoung uint32_t idx;
757 1.1 dyoung
758 1.1 dyoung if (!(inuse & (1 << i)))
759 1.1 dyoung continue;
760 1.1 dyoung
761 1.1 dyoung inuse ^= 1 << i;
762 1.1 dyoung
763 1.1 dyoung ++probes;
764 1.1 dyoung ++vtw_stats.probe[which];
765 1.1 dyoung
766 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
767 1.1 dyoung vtw = vtw_from_index(ctl, idx);
768 1.1 dyoung
769 1.1 dyoung if (!vtw) {
770 1.1 dyoung /* Hopefully fast path.
771 1.1 dyoung */
772 1.1 dyoung db_trace(KTR_VTW
773 1.1 dyoung , (fp, "vtw: fast %A:%P %A:%P"
774 1.1 dyoung " idx %x tag %x"
775 1.1 dyoung , faddr, fport
776 1.1 dyoung , laddr, lport
777 1.1 dyoung , idx, tag));
778 1.1 dyoung continue;
779 1.1 dyoung }
780 1.1 dyoung
781 1.1 dyoung v4 = (void*)vtw;
782 1.1 dyoung
783 1.1 dyoung /* The de-referencing of vtw is what we want to avoid.
784 1.1 dyoung * Losing.
785 1.1 dyoung */
786 1.1 dyoung if (vtw_alive(vtw)
787 1.1 dyoung && ((which ? vtw->port_key : vtw->key)
788 1.1 dyoung == fatp_key(ctl->fat, fp, i))
789 1.1 dyoung && (which
790 1.1 dyoung || (v4->faddr == faddr && v4->laddr == laddr
791 1.1 dyoung && v4->fport == fport))
792 1.1 dyoung && v4->lport == lport) {
793 1.1 dyoung ++vtw_stats.hit[which];
794 1.1 dyoung
795 1.1 dyoung db_trace(KTR_VTW
796 1.1 dyoung , (fp, "vtw: hit %8.8x:%4.4x"
797 1.1 dyoung " %8.8x:%4.4x idx %x key %x"
798 1.1 dyoung , faddr, fport
799 1.1 dyoung , laddr, lport
800 1.1 dyoung , idx_decode(ctl, idx), vtw->key));
801 1.1 dyoung
802 1.1 dyoung KASSERT(vtw->hashed);
803 1.1 dyoung
804 1.1 dyoung goto out;
805 1.1 dyoung }
806 1.1 dyoung ++vtw_stats.losing[which];
807 1.1 dyoung ++losings;
808 1.1 dyoung
809 1.1 dyoung if (vtw_alive(vtw)) {
810 1.1 dyoung db_trace(KTR_VTW
811 1.1 dyoung , (fp, "vtw:!mis %8.8x:%4.4x"
812 1.1 dyoung " %8.8x:%4.4x key %x tag %x"
813 1.1 dyoung , faddr, fport
814 1.1 dyoung , laddr, lport
815 1.1 dyoung , fatp_key(ctl->fat, fp, i)
816 1.1 dyoung , v4_tag(faddr, fport
817 1.1 dyoung , laddr, lport)));
818 1.1 dyoung db_trace(KTR_VTW
819 1.1 dyoung , (vtw, "vtw:!mis %8.8x:%4.4x"
820 1.1 dyoung " %8.8x:%4.4x key %x tag %x"
821 1.1 dyoung , v4->faddr, v4->fport
822 1.1 dyoung , v4->laddr, v4->lport
823 1.1 dyoung , vtw->key
824 1.1 dyoung , v4_tag(v4->faddr, v4->fport
825 1.1 dyoung , v4->laddr, v4->lport)));
826 1.1 dyoung
827 1.1 dyoung if (vtw->key == fatp_key(ctl->fat, fp, i)) {
828 1.1 dyoung db_trace(KTR_VTW
829 1.1 dyoung , (vtw, "vtw:!mis %8.8x:%4.4x"
830 1.1 dyoung " %8.8x:%4.4x key %x"
831 1.1 dyoung " which %x"
832 1.1 dyoung , v4->faddr, v4->fport
833 1.1 dyoung , v4->laddr, v4->lport
834 1.1 dyoung , vtw->key
835 1.1 dyoung , which));
836 1.1 dyoung
837 1.1 dyoung } else {
838 1.1 dyoung db_trace(KTR_VTW
839 1.1 dyoung , (vtw
840 1.1 dyoung , "vtw:!mis"
841 1.1 dyoung " key %8.8x != %8.8x"
842 1.1 dyoung " idx %x i %x which %x"
843 1.1 dyoung , vtw->key
844 1.1 dyoung , fatp_key(ctl->fat, fp, i)
845 1.1 dyoung , idx_decode(ctl, idx)
846 1.1 dyoung , i
847 1.1 dyoung , which));
848 1.1 dyoung }
849 1.1 dyoung } else {
850 1.1 dyoung db_trace(KTR_VTW
851 1.1 dyoung , (fp
852 1.1 dyoung , "vtw:!mis free entry"
853 1.1 dyoung " idx %x vtw %p which %x"
854 1.1 dyoung , idx_decode(ctl, idx)
855 1.1 dyoung , vtw, which));
856 1.1 dyoung }
857 1.1 dyoung }
858 1.1 dyoung
859 1.1 dyoung if (fp->nxt) {
860 1.1 dyoung fp = fatp_next(ctl->fat, fp);
861 1.1 dyoung } else {
862 1.1 dyoung break;
863 1.1 dyoung }
864 1.1 dyoung }
865 1.1 dyoung ++vtw_stats.miss[which];
866 1.1 dyoung vtw = 0;
867 1.1 dyoung out:
868 1.1 dyoung if (fatps > vtw_stats.max_chain[which])
869 1.1 dyoung vtw_stats.max_chain[which] = fatps;
870 1.1 dyoung if (probes > vtw_stats.max_probe[which])
871 1.1 dyoung vtw_stats.max_probe[which] = probes;
872 1.1 dyoung if (losings > vtw_stats.max_loss[which])
873 1.1 dyoung vtw_stats.max_loss[which] = losings;
874 1.1 dyoung
875 1.1 dyoung return vtw;
876 1.1 dyoung }
877 1.1 dyoung
878 1.1 dyoung static vtw_t *
879 1.1 dyoung vtw_lookup_hash_v6(vtw_ctl_t *ctl, const struct in6_addr *faddr, uint16_t fport
880 1.1 dyoung , const struct in6_addr *laddr, uint16_t lport
881 1.1 dyoung , int which)
882 1.1 dyoung {
883 1.1 dyoung vtw_v6_t *v6;
884 1.1 dyoung vtw_t *vtw;
885 1.1 dyoung uint32_t tag;
886 1.1 dyoung fatp_t *fp;
887 1.1 dyoung int i;
888 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
889 1.1 dyoung
890 1.1 dyoung ++vtw_stats.look[which];
891 1.1 dyoung
892 1.1 dyoung if (!ctl || !ctl->fat)
893 1.1 dyoung return 0;
894 1.1 dyoung
895 1.1 dyoung if (which) {
896 1.1 dyoung tag = v6_port_tag(lport);
897 1.1 dyoung fp = ctl->fat->port[tag & ctl->fat->mask];
898 1.1 dyoung } else {
899 1.1 dyoung tag = v6_tag(faddr, fport, laddr, lport);
900 1.1 dyoung fp = ctl->fat->hash[tag & ctl->fat->mask];
901 1.1 dyoung }
902 1.1 dyoung
903 1.1 dyoung while (fp && fp->inuse) {
904 1.1 dyoung uint32_t inuse = fp->inuse;
905 1.1 dyoung
906 1.1 dyoung ++fatps;
907 1.1 dyoung
908 1.1 dyoung for (i = 0; inuse && i < fatp_ntags(); ++i) {
909 1.1 dyoung uint32_t idx;
910 1.1 dyoung
911 1.1 dyoung if (!(inuse & (1 << i)))
912 1.1 dyoung continue;
913 1.1 dyoung
914 1.1 dyoung inuse ^= 1 << i;
915 1.1 dyoung
916 1.1 dyoung ++probes;
917 1.1 dyoung ++vtw_stats.probe[which];
918 1.1 dyoung
919 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
920 1.1 dyoung vtw = vtw_from_index(ctl, idx);
921 1.1 dyoung
922 1.1 dyoung db_trace(KTR_VTW
923 1.1 dyoung , (fp, "probe: %2d %6A:%4.4x %6A:%4.4x idx %x"
924 1.1 dyoung , i
925 1.1 dyoung , db_store(faddr, sizeof (*faddr)), fport
926 1.1 dyoung , db_store(laddr, sizeof (*laddr)), lport
927 1.1 dyoung , idx_decode(ctl, idx)));
928 1.1 dyoung
929 1.1 dyoung if (!vtw) {
930 1.1 dyoung /* Hopefully fast path.
931 1.1 dyoung */
932 1.1 dyoung continue;
933 1.1 dyoung }
934 1.1 dyoung
935 1.1 dyoung v6 = (void*)vtw;
936 1.1 dyoung
937 1.1 dyoung if (vtw_alive(vtw)
938 1.1 dyoung && ((which ? vtw->port_key : vtw->key)
939 1.1 dyoung == fatp_key(ctl->fat, fp, i))
940 1.1 dyoung && v6->lport == lport
941 1.1 dyoung && (which
942 1.1 dyoung || (v6->fport == fport
943 1.1 dyoung && !bcmp(&v6->faddr, faddr, sizeof (*faddr))
944 1.1 dyoung && !bcmp(&v6->laddr, laddr
945 1.1 dyoung , sizeof (*laddr))))) {
946 1.1 dyoung ++vtw_stats.hit[which];
947 1.1 dyoung
948 1.1 dyoung KASSERT(vtw->hashed);
949 1.1 dyoung goto out;
950 1.1 dyoung } else {
951 1.1 dyoung ++vtw_stats.losing[which];
952 1.1 dyoung ++losings;
953 1.1 dyoung }
954 1.1 dyoung }
955 1.1 dyoung
956 1.1 dyoung if (fp->nxt) {
957 1.1 dyoung fp = fatp_next(ctl->fat, fp);
958 1.1 dyoung } else {
959 1.1 dyoung break;
960 1.1 dyoung }
961 1.1 dyoung }
962 1.1 dyoung ++vtw_stats.miss[which];
963 1.1 dyoung vtw = 0;
964 1.1 dyoung out:
965 1.1 dyoung if (fatps > vtw_stats.max_chain[which])
966 1.1 dyoung vtw_stats.max_chain[which] = fatps;
967 1.1 dyoung if (probes > vtw_stats.max_probe[which])
968 1.1 dyoung vtw_stats.max_probe[which] = probes;
969 1.1 dyoung if (losings > vtw_stats.max_loss[which])
970 1.1 dyoung vtw_stats.max_loss[which] = losings;
971 1.1 dyoung
972 1.1 dyoung return vtw;
973 1.1 dyoung }
974 1.1 dyoung
975 1.1 dyoung /*!\brief port iterator
976 1.1 dyoung */
977 1.1 dyoung static vtw_t *
978 1.1 dyoung vtw_next_port_v4(struct tcp_ports_iterator *it)
979 1.1 dyoung {
980 1.1 dyoung vtw_ctl_t *ctl = it->ctl;
981 1.1 dyoung vtw_v4_t *v4;
982 1.1 dyoung vtw_t *vtw;
983 1.1 dyoung uint32_t tag;
984 1.1 dyoung uint16_t lport = it->port;
985 1.1 dyoung fatp_t *fp;
986 1.1 dyoung int i;
987 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
988 1.1 dyoung
989 1.1 dyoung tag = v4_port_tag(lport);
990 1.1 dyoung if (!it->fp) {
991 1.1 dyoung it->fp = ctl->fat->port[tag & ctl->fat->mask];
992 1.1 dyoung it->slot_idx = 0;
993 1.1 dyoung }
994 1.1 dyoung fp = it->fp;
995 1.1 dyoung
996 1.1 dyoung while (fp) {
997 1.1 dyoung uint32_t inuse = fp->inuse;
998 1.1 dyoung
999 1.1 dyoung ++fatps;
1000 1.1 dyoung
1001 1.1 dyoung for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
1002 1.1 dyoung uint32_t idx;
1003 1.1 dyoung
1004 1.1 dyoung if (!(inuse & (1 << i)))
1005 1.1 dyoung continue;
1006 1.1 dyoung
1007 1.1 dyoung inuse &= ~0 << i;
1008 1.1 dyoung
1009 1.1 dyoung if (i < it->slot_idx)
1010 1.1 dyoung continue;
1011 1.1 dyoung
1012 1.1 dyoung ++vtw_stats.probe[1];
1013 1.1 dyoung ++probes;
1014 1.1 dyoung
1015 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
1016 1.1 dyoung vtw = vtw_from_index(ctl, idx);
1017 1.1 dyoung
1018 1.1 dyoung if (!vtw) {
1019 1.1 dyoung /* Hopefully fast path.
1020 1.1 dyoung */
1021 1.1 dyoung continue;
1022 1.1 dyoung }
1023 1.1 dyoung
1024 1.1 dyoung v4 = (void*)vtw;
1025 1.1 dyoung
1026 1.1 dyoung if (vtw_alive(vtw)
1027 1.1 dyoung && vtw->port_key == fatp_key(ctl->fat, fp, i)
1028 1.1 dyoung && v4->lport == lport) {
1029 1.1 dyoung ++vtw_stats.hit[1];
1030 1.1 dyoung
1031 1.1 dyoung it->slot_idx = i + 1;
1032 1.1 dyoung
1033 1.1 dyoung goto out;
1034 1.1 dyoung } else if (vtw_alive(vtw)) {
1035 1.1 dyoung ++vtw_stats.losing[1];
1036 1.1 dyoung ++losings;
1037 1.1 dyoung
1038 1.1 dyoung db_trace(KTR_VTW
1039 1.1 dyoung , (vtw, "vtw:!mis"
1040 1.1 dyoung " port %8.8x:%4.4x %8.8x:%4.4x"
1041 1.1 dyoung " key %x port %x"
1042 1.1 dyoung , v4->faddr, v4->fport
1043 1.1 dyoung , v4->laddr, v4->lport
1044 1.1 dyoung , vtw->key
1045 1.1 dyoung , lport));
1046 1.1 dyoung } else {
1047 1.1 dyoung /* Really losing here. We are coming
1048 1.1 dyoung * up with references to free entries.
1049 1.1 dyoung * Might find it better to use
1050 1.1 dyoung * traditional, or need another
1051 1.1 dyoung * add-hockery. The other add-hockery
1052 1.1 dyoung * would be to pul more into into the
1053 1.1 dyoung * cache line to reject the false
1054 1.1 dyoung * hits.
1055 1.1 dyoung */
1056 1.1 dyoung ++vtw_stats.losing[1];
1057 1.1 dyoung ++losings;
1058 1.1 dyoung db_trace(KTR_VTW
1059 1.1 dyoung , (fp, "vtw:!mis port %x"
1060 1.1 dyoung " - free entry idx %x vtw %p"
1061 1.1 dyoung , lport
1062 1.1 dyoung , idx_decode(ctl, idx)
1063 1.1 dyoung , vtw));
1064 1.1 dyoung }
1065 1.1 dyoung }
1066 1.1 dyoung
1067 1.1 dyoung if (fp->nxt) {
1068 1.1 dyoung it->fp = fp = fatp_next(ctl->fat, fp);
1069 1.1 dyoung it->slot_idx = 0;
1070 1.1 dyoung } else {
1071 1.1 dyoung it->fp = 0;
1072 1.1 dyoung break;
1073 1.1 dyoung }
1074 1.1 dyoung }
1075 1.1 dyoung ++vtw_stats.miss[1];
1076 1.1 dyoung
1077 1.1 dyoung vtw = 0;
1078 1.1 dyoung out:
1079 1.1 dyoung if (fatps > vtw_stats.max_chain[1])
1080 1.1 dyoung vtw_stats.max_chain[1] = fatps;
1081 1.1 dyoung if (probes > vtw_stats.max_probe[1])
1082 1.1 dyoung vtw_stats.max_probe[1] = probes;
1083 1.1 dyoung if (losings > vtw_stats.max_loss[1])
1084 1.1 dyoung vtw_stats.max_loss[1] = losings;
1085 1.1 dyoung
1086 1.1 dyoung return vtw;
1087 1.1 dyoung }
1088 1.1 dyoung
1089 1.1 dyoung /*!\brief port iterator
1090 1.1 dyoung */
1091 1.1 dyoung static vtw_t *
1092 1.1 dyoung vtw_next_port_v6(struct tcp_ports_iterator *it)
1093 1.1 dyoung {
1094 1.1 dyoung vtw_ctl_t *ctl = it->ctl;
1095 1.1 dyoung vtw_v6_t *v6;
1096 1.1 dyoung vtw_t *vtw;
1097 1.1 dyoung uint32_t tag;
1098 1.1 dyoung uint16_t lport = it->port;
1099 1.1 dyoung fatp_t *fp;
1100 1.1 dyoung int i;
1101 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
1102 1.1 dyoung
1103 1.1 dyoung tag = v6_port_tag(lport);
1104 1.1 dyoung if (!it->fp) {
1105 1.1 dyoung it->fp = ctl->fat->port[tag & ctl->fat->mask];
1106 1.1 dyoung it->slot_idx = 0;
1107 1.1 dyoung }
1108 1.1 dyoung fp = it->fp;
1109 1.1 dyoung
1110 1.1 dyoung while (fp) {
1111 1.1 dyoung uint32_t inuse = fp->inuse;
1112 1.1 dyoung
1113 1.1 dyoung ++fatps;
1114 1.1 dyoung
1115 1.1 dyoung for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
1116 1.1 dyoung uint32_t idx;
1117 1.1 dyoung
1118 1.1 dyoung if (!(inuse & (1 << i)))
1119 1.1 dyoung continue;
1120 1.1 dyoung
1121 1.1 dyoung inuse &= ~0 << i;
1122 1.1 dyoung
1123 1.1 dyoung if (i < it->slot_idx)
1124 1.1 dyoung continue;
1125 1.1 dyoung
1126 1.1 dyoung ++vtw_stats.probe[1];
1127 1.1 dyoung ++probes;
1128 1.1 dyoung
1129 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
1130 1.1 dyoung vtw = vtw_from_index(ctl, idx);
1131 1.1 dyoung
1132 1.1 dyoung if (!vtw) {
1133 1.1 dyoung /* Hopefully fast path.
1134 1.1 dyoung */
1135 1.1 dyoung continue;
1136 1.1 dyoung }
1137 1.1 dyoung
1138 1.1 dyoung v6 = (void*)vtw;
1139 1.1 dyoung
1140 1.1 dyoung db_trace(KTR_VTW
1141 1.1 dyoung , (vtw, "vtw: i %x idx %x fp->tag %x"
1142 1.1 dyoung " tag %x xtra %x"
1143 1.1 dyoung , i, idx_decode(ctl, idx)
1144 1.1 dyoung , fp->tag[i], tag, fatp_xtra[i]));
1145 1.1 dyoung
1146 1.1 dyoung if (vtw_alive(vtw)
1147 1.1 dyoung && vtw->port_key == fatp_key(ctl->fat, fp, i)
1148 1.1 dyoung && v6->lport == lport) {
1149 1.1 dyoung ++vtw_stats.hit[1];
1150 1.1 dyoung
1151 1.1 dyoung db_trace(KTR_VTW
1152 1.1 dyoung , (fp, "vtw: nxt port %P - %4.4x"
1153 1.1 dyoung " idx %x key %x"
1154 1.1 dyoung , lport, lport
1155 1.1 dyoung , idx_decode(ctl, idx), vtw->key));
1156 1.1 dyoung
1157 1.1 dyoung it->slot_idx = i + 1;
1158 1.1 dyoung goto out;
1159 1.1 dyoung } else if (vtw_alive(vtw)) {
1160 1.1 dyoung ++vtw_stats.losing[1];
1161 1.1 dyoung
1162 1.1 dyoung db_trace(KTR_VTW
1163 1.1 dyoung , (vtw, "vtw:!mis port %6A:%4.4x"
1164 1.1 dyoung " %6A:%4.4x key %x port %x"
1165 1.1 dyoung , db_store(&v6->faddr
1166 1.1 dyoung , sizeof (v6->faddr))
1167 1.1 dyoung , v6->fport
1168 1.1 dyoung , db_store(&v6->laddr
1169 1.1 dyoung , sizeof (v6->faddr))
1170 1.1 dyoung , v6->lport
1171 1.1 dyoung , vtw->key
1172 1.1 dyoung , lport));
1173 1.1 dyoung } else {
1174 1.1 dyoung /* Really losing here. We are coming
1175 1.1 dyoung * up with references to free entries.
1176 1.1 dyoung * Might find it better to use
1177 1.1 dyoung * traditional, or need another
1178 1.1 dyoung * add-hockery. The other add-hockery
1179 1.1 dyoung * would be to pul more into into the
1180 1.1 dyoung * cache line to reject the false
1181 1.1 dyoung * hits.
1182 1.1 dyoung */
1183 1.1 dyoung ++vtw_stats.losing[1];
1184 1.1 dyoung ++losings;
1185 1.1 dyoung
1186 1.1 dyoung db_trace(KTR_VTW
1187 1.1 dyoung , (fp
1188 1.1 dyoung , "vtw:!mis port %x"
1189 1.1 dyoung " - free entry idx %x vtw %p"
1190 1.1 dyoung , lport, idx_decode(ctl, idx)
1191 1.1 dyoung , vtw));
1192 1.1 dyoung }
1193 1.1 dyoung }
1194 1.1 dyoung
1195 1.1 dyoung if (fp->nxt) {
1196 1.1 dyoung it->fp = fp = fatp_next(ctl->fat, fp);
1197 1.1 dyoung it->slot_idx = 0;
1198 1.1 dyoung } else {
1199 1.1 dyoung it->fp = 0;
1200 1.1 dyoung break;
1201 1.1 dyoung }
1202 1.1 dyoung }
1203 1.1 dyoung ++vtw_stats.miss[1];
1204 1.1 dyoung
1205 1.1 dyoung vtw = 0;
1206 1.1 dyoung out:
1207 1.1 dyoung if (fatps > vtw_stats.max_chain[1])
1208 1.1 dyoung vtw_stats.max_chain[1] = fatps;
1209 1.1 dyoung if (probes > vtw_stats.max_probe[1])
1210 1.1 dyoung vtw_stats.max_probe[1] = probes;
1211 1.1 dyoung if (losings > vtw_stats.max_loss[1])
1212 1.1 dyoung vtw_stats.max_loss[1] = losings;
1213 1.1 dyoung
1214 1.1 dyoung return vtw;
1215 1.1 dyoung }
1216 1.1 dyoung
1217 1.1 dyoung /*!\brief initialise the VTW allocation arena
1218 1.1 dyoung *
1219 1.1 dyoung * There are 1+3 allocation classes:
1220 1.1 dyoung * 0 classless
1221 1.1 dyoung * {1,2,3} MSL-class based allocation
1222 1.1 dyoung *
1223 1.1 dyoung * The allocation arenas are all initialised. Classless gets all the
1224 1.1 dyoung * space. MSL-class based divides the arena, so that allocation
1225 1.1 dyoung * within a class can proceed without having to consider entries
1226 1.1 dyoung * (aka: cache lines) from different classes.
1227 1.1 dyoung *
1228 1.1 dyoung * Usually, we are completely classless or class-based, but there can be
1229 1.1 dyoung * transition periods, corresponding to dynamic adjustments in the config
1230 1.1 dyoung * by the operator.
1231 1.1 dyoung */
1232 1.1 dyoung static void
1233 1.6 dyoung vtw_init(fatp_ctl_t *fat, vtw_ctl_t *ctl, const uint32_t n, vtw_t *ctl_base_v)
1234 1.1 dyoung {
1235 1.6 dyoung int class_n, i;
1236 1.6 dyoung vtw_t *base;
1237 1.1 dyoung
1238 1.6 dyoung ctl->base.v = ctl_base_v;
1239 1.1 dyoung
1240 1.6 dyoung if (ctl->is_v4) {
1241 1.6 dyoung ctl->lim.v4 = ctl->base.v4 + n - 1;
1242 1.6 dyoung ctl->alloc.v4 = ctl->base.v4;
1243 1.6 dyoung } else {
1244 1.6 dyoung ctl->lim.v6 = ctl->base.v6 + n - 1;
1245 1.6 dyoung ctl->alloc.v6 = ctl->base.v6;
1246 1.6 dyoung }
1247 1.1 dyoung
1248 1.6 dyoung ctl->nfree = n;
1249 1.6 dyoung ctl->ctl = ctl;
1250 1.1 dyoung
1251 1.6 dyoung ctl->idx_bits = 32;
1252 1.6 dyoung for (ctl->idx_mask = ~0; (ctl->idx_mask & (n-1)) == n-1; ) {
1253 1.6 dyoung ctl->idx_mask >>= 1;
1254 1.6 dyoung ctl->idx_bits -= 1;
1255 1.6 dyoung }
1256 1.1 dyoung
1257 1.6 dyoung ctl->idx_mask <<= 1;
1258 1.6 dyoung ctl->idx_mask |= 1;
1259 1.6 dyoung ctl->idx_bits += 1;
1260 1.1 dyoung
1261 1.6 dyoung ctl->fat = fat;
1262 1.6 dyoung fat->vtw = ctl;
1263 1.1 dyoung
1264 1.6 dyoung /* Divide the resources equally amongst the classes.
1265 1.6 dyoung * This is not optimal, as the different classes
1266 1.6 dyoung * arrive and leave at different rates, but it is
1267 1.6 dyoung * the best I can do for now.
1268 1.6 dyoung */
1269 1.6 dyoung class_n = n / (VTW_NCLASS-1);
1270 1.6 dyoung base = ctl->base.v;
1271 1.1 dyoung
1272 1.6 dyoung for (i = 1; i < VTW_NCLASS; ++i) {
1273 1.6 dyoung int j;
1274 1.1 dyoung
1275 1.6 dyoung ctl[i] = ctl[0];
1276 1.6 dyoung ctl[i].clidx = i;
1277 1.1 dyoung
1278 1.6 dyoung ctl[i].base.v = base;
1279 1.6 dyoung ctl[i].alloc = ctl[i].base;
1280 1.1 dyoung
1281 1.6 dyoung for (j = 0; j < class_n - 1; ++j) {
1282 1.6 dyoung if (tcp_msl_enable)
1283 1.6 dyoung base->msl_class = i;
1284 1.1 dyoung base = vtw_next(ctl, base);
1285 1.1 dyoung }
1286 1.6 dyoung
1287 1.6 dyoung ctl[i].lim.v = base;
1288 1.6 dyoung base = vtw_next(ctl, base);
1289 1.6 dyoung ctl[i].nfree = class_n;
1290 1.1 dyoung }
1291 1.1 dyoung
1292 1.1 dyoung vtw_debug_init();
1293 1.1 dyoung }
1294 1.1 dyoung
1295 1.1 dyoung /*!\brief map class to TCP MSL
1296 1.1 dyoung */
1297 1.1 dyoung static inline uint32_t
1298 1.1 dyoung class_to_msl(int class)
1299 1.1 dyoung {
1300 1.1 dyoung switch (class) {
1301 1.1 dyoung case 0:
1302 1.1 dyoung case 1:
1303 1.1 dyoung return tcp_msl_remote ? tcp_msl_remote : (TCPTV_MSL >> 0);
1304 1.1 dyoung case 2:
1305 1.1 dyoung return tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1);
1306 1.1 dyoung default:
1307 1.1 dyoung return tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2);
1308 1.1 dyoung }
1309 1.1 dyoung }
1310 1.1 dyoung
1311 1.1 dyoung /*!\brief map TCP MSL to class
1312 1.1 dyoung */
1313 1.1 dyoung static inline uint32_t
1314 1.1 dyoung msl_to_class(int msl)
1315 1.1 dyoung {
1316 1.1 dyoung if (tcp_msl_enable) {
1317 1.1 dyoung if (msl <= (tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2)))
1318 1.1 dyoung return 1+2;
1319 1.1 dyoung if (msl <= (tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1)))
1320 1.1 dyoung return 1+1;
1321 1.1 dyoung return 1;
1322 1.1 dyoung }
1323 1.1 dyoung return 0;
1324 1.1 dyoung }
1325 1.1 dyoung
1326 1.1 dyoung /*!\brief allocate a vtw entry
1327 1.1 dyoung */
1328 1.1 dyoung static inline vtw_t *
1329 1.1 dyoung vtw_alloc(vtw_ctl_t *ctl)
1330 1.1 dyoung {
1331 1.1 dyoung vtw_t *vtw = 0;
1332 1.1 dyoung int stuck = 0;
1333 1.1 dyoung int avail = ctl ? (ctl->nalloc + ctl->nfree) : 0;
1334 1.1 dyoung int msl;
1335 1.1 dyoung
1336 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
1337 1.1 dyoung
1338 1.1 dyoung /* If no resources, we will not get far.
1339 1.1 dyoung */
1340 1.1 dyoung if (!ctl || !ctl->base.v4 || avail <= 0)
1341 1.1 dyoung return 0;
1342 1.1 dyoung
1343 1.1 dyoung /* Obtain a free one.
1344 1.1 dyoung */
1345 1.1 dyoung while (!ctl->nfree) {
1346 1.1 dyoung vtw_age(ctl, 0);
1347 1.1 dyoung
1348 1.1 dyoung if (++stuck > avail) {
1349 1.1 dyoung /* When in transition between
1350 1.1 dyoung * schemes (classless, classed) we
1351 1.1 dyoung * can be stuck having to await the
1352 1.1 dyoung * expiration of cross-allocated entries.
1353 1.1 dyoung *
1354 1.1 dyoung * Returning zero means we will fall back to the
1355 1.1 dyoung * traditional TIME_WAIT handling, except in the
1356 1.1 dyoung * case of a re-shed, in which case we cannot
1357 1.1 dyoung * perform the reshecd, but will retain the extant
1358 1.1 dyoung * entry.
1359 1.1 dyoung */
1360 1.1 dyoung db_trace(KTR_VTW
1361 1.1 dyoung , (ctl, "vtw:!none free in class %x %x/%x"
1362 1.1 dyoung , ctl->clidx
1363 1.1 dyoung , ctl->nalloc, ctl->nfree));
1364 1.1 dyoung
1365 1.1 dyoung return 0;
1366 1.1 dyoung }
1367 1.1 dyoung }
1368 1.1 dyoung
1369 1.1 dyoung vtw = ctl->alloc.v;
1370 1.1 dyoung
1371 1.1 dyoung if (vtw->msl_class != ctl->clidx) {
1372 1.1 dyoung /* Usurping rules:
1373 1.1 dyoung * 0 -> {1,2,3} or {1,2,3} -> 0
1374 1.1 dyoung */
1375 1.1 dyoung KASSERT(!vtw->msl_class || !ctl->clidx);
1376 1.1 dyoung
1377 1.1 dyoung if (vtw->hashed || vtw->expire.tv_sec) {
1378 1.1 dyoung /* As this is owned by some other class,
1379 1.1 dyoung * we must wait for it to expire it.
1380 1.1 dyoung * This will only happen on class/classless
1381 1.1 dyoung * transitions, which are guaranteed to progress
1382 1.1 dyoung * to completion in small finite time, barring bugs.
1383 1.1 dyoung */
1384 1.1 dyoung db_trace(KTR_VTW
1385 1.1 dyoung , (ctl, "vtw:!%p class %x!=%x %x:%x%s"
1386 1.1 dyoung , vtw, vtw->msl_class, ctl->clidx
1387 1.1 dyoung , vtw->expire.tv_sec
1388 1.1 dyoung , vtw->expire.tv_usec
1389 1.1 dyoung , vtw->hashed ? " hashed" : ""));
1390 1.1 dyoung
1391 1.1 dyoung return 0;
1392 1.1 dyoung }
1393 1.1 dyoung
1394 1.1 dyoung db_trace(KTR_VTW
1395 1.1 dyoung , (ctl, "vtw:!%p usurped from %x to %x"
1396 1.1 dyoung , vtw, vtw->msl_class, ctl->clidx));
1397 1.1 dyoung
1398 1.1 dyoung vtw->msl_class = ctl->clidx;
1399 1.1 dyoung }
1400 1.1 dyoung
1401 1.1 dyoung if (vtw_alive(vtw)) {
1402 1.1 dyoung KASSERT(0 && "next free not free");
1403 1.1 dyoung return 0;
1404 1.1 dyoung }
1405 1.1 dyoung
1406 1.1 dyoung /* Advance allocation poiter.
1407 1.1 dyoung */
1408 1.1 dyoung ctl->alloc.v = vtw_next(ctl, vtw);
1409 1.1 dyoung
1410 1.1 dyoung --ctl->nfree;
1411 1.1 dyoung ++ctl->nalloc;
1412 1.1 dyoung
1413 1.1 dyoung msl = (2 * class_to_msl(ctl->clidx) * 1000) / PR_SLOWHZ; // msec
1414 1.1 dyoung
1415 1.1 dyoung /* mark expiration
1416 1.1 dyoung */
1417 1.3 drochner getmicrouptime(&vtw->expire);
1418 1.1 dyoung
1419 1.1 dyoung /* Move expiration into the future.
1420 1.1 dyoung */
1421 1.1 dyoung vtw->expire.tv_sec += msl / 1000;
1422 1.1 dyoung vtw->expire.tv_usec += 1000 * (msl % 1000);
1423 1.1 dyoung
1424 1.1 dyoung while (vtw->expire.tv_usec >= 1000*1000) {
1425 1.1 dyoung vtw->expire.tv_usec -= 1000*1000;
1426 1.1 dyoung vtw->expire.tv_sec += 1;
1427 1.1 dyoung }
1428 1.1 dyoung
1429 1.1 dyoung if (!ctl->oldest.v)
1430 1.1 dyoung ctl->oldest.v = vtw;
1431 1.1 dyoung
1432 1.1 dyoung return vtw;
1433 1.1 dyoung }
1434 1.1 dyoung
1435 1.1 dyoung /*!\brief expiration
1436 1.1 dyoung */
1437 1.1 dyoung static int
1438 1.1 dyoung vtw_age(vtw_ctl_t *ctl, struct timeval *_when)
1439 1.1 dyoung {
1440 1.1 dyoung vtw_t *vtw;
1441 1.1 dyoung struct timeval then, *when = _when;
1442 1.1 dyoung int maxtries = 0;
1443 1.1 dyoung
1444 1.1 dyoung if (!ctl->oldest.v) {
1445 1.1 dyoung KASSERT(!ctl->nalloc);
1446 1.1 dyoung return 0;
1447 1.1 dyoung }
1448 1.1 dyoung
1449 1.1 dyoung for (vtw = ctl->oldest.v; vtw && ctl->nalloc; ) {
1450 1.1 dyoung if (++maxtries > ctl->nalloc)
1451 1.1 dyoung break;
1452 1.1 dyoung
1453 1.1 dyoung if (vtw->msl_class != ctl->clidx) {
1454 1.1 dyoung db_trace(KTR_VTW
1455 1.1 dyoung , (vtw, "vtw:!age class mismatch %x != %x"
1456 1.1 dyoung , vtw->msl_class, ctl->clidx));
1457 1.1 dyoung /* XXXX
1458 1.1 dyoung * See if the appropriate action is to skip to the next.
1459 1.1 dyoung * XXXX
1460 1.1 dyoung */
1461 1.1 dyoung ctl->oldest.v = vtw = vtw_next(ctl, vtw);
1462 1.1 dyoung continue;
1463 1.1 dyoung }
1464 1.1 dyoung if (!when) {
1465 1.1 dyoung /* Latch oldest timeval if none specified.
1466 1.1 dyoung */
1467 1.1 dyoung then = vtw->expire;
1468 1.1 dyoung when = &then;
1469 1.1 dyoung }
1470 1.1 dyoung
1471 1.1 dyoung if (!timercmp(&vtw->expire, when, <=))
1472 1.1 dyoung break;
1473 1.1 dyoung
1474 1.1 dyoung db_trace(KTR_VTW
1475 1.1 dyoung , (vtw, "vtw: expire %x %8.8x:%8.8x %x/%x"
1476 1.1 dyoung , ctl->clidx
1477 1.1 dyoung , vtw->expire.tv_sec
1478 1.1 dyoung , vtw->expire.tv_usec
1479 1.1 dyoung , ctl->nalloc
1480 1.1 dyoung , ctl->nfree));
1481 1.1 dyoung
1482 1.1 dyoung if (!_when)
1483 1.1 dyoung ++vtw_stats.kill;
1484 1.1 dyoung
1485 1.1 dyoung vtw_del(ctl, vtw);
1486 1.1 dyoung vtw = ctl->oldest.v;
1487 1.1 dyoung }
1488 1.1 dyoung
1489 1.1 dyoung return ctl->nalloc; // # remaining allocated
1490 1.1 dyoung }
1491 1.1 dyoung
1492 1.1 dyoung static callout_t vtw_cs;
1493 1.1 dyoung
1494 1.1 dyoung /*!\brief notice the passage of time.
1495 1.1 dyoung * It seems to be getting faster. What happened to the year?
1496 1.1 dyoung */
1497 1.1 dyoung static void
1498 1.1 dyoung vtw_tick(void *arg)
1499 1.1 dyoung {
1500 1.1 dyoung struct timeval now;
1501 1.1 dyoung int i, cnt = 0;
1502 1.1 dyoung
1503 1.3 drochner getmicrouptime(&now);
1504 1.1 dyoung
1505 1.1 dyoung db_trace(KTR_VTW, (arg, "vtk: tick - now %8.8x:%8.8x"
1506 1.1 dyoung , now.tv_sec, now.tv_usec));
1507 1.1 dyoung
1508 1.1 dyoung mutex_enter(softnet_lock);
1509 1.1 dyoung
1510 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
1511 1.1 dyoung cnt += vtw_age(&vtw_tcpv4[i], &now);
1512 1.1 dyoung cnt += vtw_age(&vtw_tcpv6[i], &now);
1513 1.1 dyoung }
1514 1.1 dyoung
1515 1.1 dyoung /* Keep ticks coming while we need them.
1516 1.1 dyoung */
1517 1.1 dyoung if (cnt)
1518 1.1 dyoung callout_schedule(&vtw_cs, hz / 5);
1519 1.1 dyoung else {
1520 1.1 dyoung tcp_vtw_was_enabled = 0;
1521 1.1 dyoung tcbtable.vestige = 0;
1522 1.1 dyoung }
1523 1.1 dyoung mutex_exit(softnet_lock);
1524 1.1 dyoung }
1525 1.1 dyoung
1526 1.1 dyoung /* in_pcblookup_ports assist for handling vestigial entries.
1527 1.1 dyoung */
1528 1.1 dyoung static void *
1529 1.1 dyoung tcp_init_ports_v4(struct in_addr addr, u_int port, int wild)
1530 1.1 dyoung {
1531 1.1 dyoung struct tcp_ports_iterator *it = &tcp_ports_iterator_v4;
1532 1.1 dyoung
1533 1.1 dyoung bzero(it, sizeof (*it));
1534 1.1 dyoung
1535 1.1 dyoung /* Note: the reference to vtw_tcpv4[0] is fine.
1536 1.1 dyoung * We do not need per-class iteration. We just
1537 1.1 dyoung * need to get to the fat, and there is one
1538 1.1 dyoung * shared fat.
1539 1.1 dyoung */
1540 1.1 dyoung if (vtw_tcpv4[0].fat) {
1541 1.1 dyoung it->addr.v4 = addr;
1542 1.1 dyoung it->port = port;
1543 1.1 dyoung it->wild = !!wild;
1544 1.1 dyoung it->ctl = &vtw_tcpv4[0];
1545 1.1 dyoung
1546 1.1 dyoung ++vtw_stats.look[1];
1547 1.1 dyoung }
1548 1.1 dyoung
1549 1.1 dyoung return it;
1550 1.1 dyoung }
1551 1.1 dyoung
1552 1.1 dyoung /*!\brief export an IPv4 vtw.
1553 1.1 dyoung */
1554 1.1 dyoung static int
1555 1.1 dyoung vtw_export_v4(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
1556 1.1 dyoung {
1557 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
1558 1.1 dyoung
1559 1.1 dyoung bzero(res, sizeof (*res));
1560 1.1 dyoung
1561 1.1 dyoung if (ctl && vtw) {
1562 1.1 dyoung if (!ctl->clidx && vtw->msl_class)
1563 1.1 dyoung ctl += vtw->msl_class;
1564 1.1 dyoung else
1565 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
1566 1.1 dyoung
1567 1.1 dyoung res->valid = 1;
1568 1.1 dyoung res->v4 = 1;
1569 1.1 dyoung
1570 1.1 dyoung res->faddr.v4.s_addr = v4->faddr;
1571 1.1 dyoung res->laddr.v4.s_addr = v4->laddr;
1572 1.1 dyoung res->fport = v4->fport;
1573 1.1 dyoung res->lport = v4->lport;
1574 1.1 dyoung res->vtw = vtw; // netlock held over call(s)
1575 1.1 dyoung res->ctl = ctl;
1576 1.1 dyoung res->reuse_addr = vtw->reuse_addr;
1577 1.1 dyoung res->reuse_port = vtw->reuse_port;
1578 1.1 dyoung res->snd_nxt = vtw->snd_nxt;
1579 1.1 dyoung res->rcv_nxt = vtw->rcv_nxt;
1580 1.1 dyoung res->rcv_wnd = vtw->rcv_wnd;
1581 1.1 dyoung res->uid = vtw->uid;
1582 1.1 dyoung }
1583 1.1 dyoung
1584 1.1 dyoung return res->valid;
1585 1.1 dyoung }
1586 1.1 dyoung
1587 1.1 dyoung /*!\brief return next port in the port iterator. yowza.
1588 1.1 dyoung */
1589 1.1 dyoung static int
1590 1.1 dyoung tcp_next_port_v4(void *arg, struct vestigial_inpcb *res)
1591 1.1 dyoung {
1592 1.1 dyoung struct tcp_ports_iterator *it = arg;
1593 1.1 dyoung vtw_t *vtw = 0;
1594 1.1 dyoung
1595 1.1 dyoung if (it->ctl)
1596 1.1 dyoung vtw = vtw_next_port_v4(it);
1597 1.1 dyoung
1598 1.1 dyoung if (!vtw)
1599 1.1 dyoung it->ctl = 0;
1600 1.1 dyoung
1601 1.1 dyoung return vtw_export_v4(it->ctl, vtw, res);
1602 1.1 dyoung }
1603 1.1 dyoung
1604 1.1 dyoung static int
1605 1.1 dyoung tcp_lookup_v4(struct in_addr faddr, uint16_t fport,
1606 1.1 dyoung struct in_addr laddr, uint16_t lport,
1607 1.1 dyoung struct vestigial_inpcb *res)
1608 1.1 dyoung {
1609 1.1 dyoung vtw_t *vtw;
1610 1.1 dyoung vtw_ctl_t *ctl;
1611 1.1 dyoung
1612 1.1 dyoung
1613 1.1 dyoung db_trace(KTR_VTW
1614 1.1 dyoung , (res, "vtw: lookup %A:%P %A:%P"
1615 1.1 dyoung , faddr, fport
1616 1.1 dyoung , laddr, lport));
1617 1.1 dyoung
1618 1.1 dyoung vtw = vtw_lookup_hash_v4((ctl = &vtw_tcpv4[0])
1619 1.1 dyoung , faddr.s_addr, fport
1620 1.1 dyoung , laddr.s_addr, lport, 0);
1621 1.1 dyoung
1622 1.1 dyoung return vtw_export_v4(ctl, vtw, res);
1623 1.1 dyoung }
1624 1.1 dyoung
1625 1.1 dyoung /* in_pcblookup_ports assist for handling vestigial entries.
1626 1.1 dyoung */
1627 1.1 dyoung static void *
1628 1.1 dyoung tcp_init_ports_v6(const struct in6_addr *addr, u_int port, int wild)
1629 1.1 dyoung {
1630 1.1 dyoung struct tcp_ports_iterator *it = &tcp_ports_iterator_v6;
1631 1.1 dyoung
1632 1.1 dyoung bzero(it, sizeof (*it));
1633 1.1 dyoung
1634 1.1 dyoung /* Note: the reference to vtw_tcpv6[0] is fine.
1635 1.1 dyoung * We do not need per-class iteration. We just
1636 1.1 dyoung * need to get to the fat, and there is one
1637 1.1 dyoung * shared fat.
1638 1.1 dyoung */
1639 1.1 dyoung if (vtw_tcpv6[0].fat) {
1640 1.1 dyoung it->addr.v6 = *addr;
1641 1.1 dyoung it->port = port;
1642 1.1 dyoung it->wild = !!wild;
1643 1.1 dyoung it->ctl = &vtw_tcpv6[0];
1644 1.1 dyoung
1645 1.1 dyoung ++vtw_stats.look[1];
1646 1.1 dyoung }
1647 1.1 dyoung
1648 1.1 dyoung return it;
1649 1.1 dyoung }
1650 1.1 dyoung
1651 1.1 dyoung /*!\brief export an IPv6 vtw.
1652 1.1 dyoung */
1653 1.1 dyoung static int
1654 1.1 dyoung vtw_export_v6(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
1655 1.1 dyoung {
1656 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
1657 1.1 dyoung
1658 1.1 dyoung bzero(res, sizeof (*res));
1659 1.1 dyoung
1660 1.1 dyoung if (ctl && vtw) {
1661 1.1 dyoung if (!ctl->clidx && vtw->msl_class)
1662 1.1 dyoung ctl += vtw->msl_class;
1663 1.1 dyoung else
1664 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
1665 1.1 dyoung
1666 1.1 dyoung res->valid = 1;
1667 1.1 dyoung res->v4 = 0;
1668 1.1 dyoung
1669 1.1 dyoung res->faddr.v6 = v6->faddr;
1670 1.1 dyoung res->laddr.v6 = v6->laddr;
1671 1.1 dyoung res->fport = v6->fport;
1672 1.1 dyoung res->lport = v6->lport;
1673 1.1 dyoung res->vtw = vtw; // netlock held over call(s)
1674 1.1 dyoung res->ctl = ctl;
1675 1.1 dyoung
1676 1.1 dyoung res->v6only = vtw->v6only;
1677 1.1 dyoung res->reuse_addr = vtw->reuse_addr;
1678 1.1 dyoung res->reuse_port = vtw->reuse_port;
1679 1.1 dyoung
1680 1.1 dyoung res->snd_nxt = vtw->snd_nxt;
1681 1.1 dyoung res->rcv_nxt = vtw->rcv_nxt;
1682 1.1 dyoung res->rcv_wnd = vtw->rcv_wnd;
1683 1.1 dyoung res->uid = vtw->uid;
1684 1.1 dyoung }
1685 1.1 dyoung
1686 1.1 dyoung return res->valid;
1687 1.1 dyoung }
1688 1.1 dyoung
1689 1.1 dyoung static int
1690 1.1 dyoung tcp_next_port_v6(void *arg, struct vestigial_inpcb *res)
1691 1.1 dyoung {
1692 1.1 dyoung struct tcp_ports_iterator *it = arg;
1693 1.1 dyoung vtw_t *vtw = 0;
1694 1.1 dyoung
1695 1.1 dyoung if (it->ctl)
1696 1.1 dyoung vtw = vtw_next_port_v6(it);
1697 1.1 dyoung
1698 1.1 dyoung if (!vtw)
1699 1.1 dyoung it->ctl = 0;
1700 1.1 dyoung
1701 1.1 dyoung return vtw_export_v6(it->ctl, vtw, res);
1702 1.1 dyoung }
1703 1.1 dyoung
1704 1.1 dyoung static int
1705 1.1 dyoung tcp_lookup_v6(const struct in6_addr *faddr, uint16_t fport,
1706 1.1 dyoung const struct in6_addr *laddr, uint16_t lport,
1707 1.1 dyoung struct vestigial_inpcb *res)
1708 1.1 dyoung {
1709 1.1 dyoung vtw_ctl_t *ctl;
1710 1.1 dyoung vtw_t *vtw;
1711 1.1 dyoung
1712 1.1 dyoung db_trace(KTR_VTW
1713 1.1 dyoung , (res, "vtw: lookup %6A:%P %6A:%P"
1714 1.1 dyoung , db_store(faddr, sizeof (*faddr)), fport
1715 1.1 dyoung , db_store(laddr, sizeof (*laddr)), lport));
1716 1.1 dyoung
1717 1.1 dyoung vtw = vtw_lookup_hash_v6((ctl = &vtw_tcpv6[0])
1718 1.1 dyoung , faddr, fport
1719 1.1 dyoung , laddr, lport, 0);
1720 1.1 dyoung
1721 1.1 dyoung return vtw_export_v6(ctl, vtw, res);
1722 1.1 dyoung }
1723 1.1 dyoung
1724 1.1 dyoung static vestigial_hooks_t tcp_hooks = {
1725 1.1 dyoung .init_ports4 = tcp_init_ports_v4,
1726 1.1 dyoung .next_port4 = tcp_next_port_v4,
1727 1.1 dyoung .lookup4 = tcp_lookup_v4,
1728 1.1 dyoung .init_ports6 = tcp_init_ports_v6,
1729 1.1 dyoung .next_port6 = tcp_next_port_v6,
1730 1.1 dyoung .lookup6 = tcp_lookup_v6,
1731 1.1 dyoung };
1732 1.1 dyoung
1733 1.1 dyoung static bool
1734 1.1 dyoung vtw_select(int af, fatp_ctl_t **fatp, vtw_ctl_t **ctlp)
1735 1.1 dyoung {
1736 1.1 dyoung fatp_ctl_t *fat;
1737 1.1 dyoung vtw_ctl_t *ctl;
1738 1.1 dyoung
1739 1.1 dyoung switch (af) {
1740 1.1 dyoung case AF_INET:
1741 1.1 dyoung fat = &fat_tcpv4;
1742 1.1 dyoung ctl = &vtw_tcpv4[0];
1743 1.1 dyoung break;
1744 1.1 dyoung case AF_INET6:
1745 1.1 dyoung fat = &fat_tcpv6;
1746 1.1 dyoung ctl = &vtw_tcpv6[0];
1747 1.1 dyoung break;
1748 1.1 dyoung default:
1749 1.1 dyoung return false;
1750 1.1 dyoung }
1751 1.1 dyoung if (fatp != NULL)
1752 1.1 dyoung *fatp = fat;
1753 1.1 dyoung if (ctlp != NULL)
1754 1.1 dyoung *ctlp = ctl;
1755 1.1 dyoung return true;
1756 1.1 dyoung }
1757 1.1 dyoung
1758 1.1 dyoung /*!\brief initialize controlling instance
1759 1.1 dyoung */
1760 1.1 dyoung static int
1761 1.1 dyoung vtw_control_init(int af)
1762 1.1 dyoung {
1763 1.1 dyoung fatp_ctl_t *fat;
1764 1.1 dyoung vtw_ctl_t *ctl;
1765 1.6 dyoung fatp_t *fat_base;
1766 1.6 dyoung fatp_t **fat_hash;
1767 1.6 dyoung vtw_t *ctl_base_v;
1768 1.6 dyoung uint32_t n, m;
1769 1.6 dyoung size_t sz;
1770 1.6 dyoung
1771 1.6 dyoung KASSERT(powerof2(tcp_vtw_entries));
1772 1.1 dyoung
1773 1.1 dyoung if (!vtw_select(af, &fat, &ctl))
1774 1.1 dyoung return EAFNOSUPPORT;
1775 1.1 dyoung
1776 1.6 dyoung if (fat->hash != NULL) {
1777 1.6 dyoung KASSERT(fat->base != NULL && ctl->base.v != NULL);
1778 1.6 dyoung return 0;
1779 1.6 dyoung }
1780 1.6 dyoung
1781 1.6 dyoung /* Allocate 10% more capacity in the fat pointers.
1782 1.6 dyoung * We should only need ~#hash additional based on
1783 1.6 dyoung * how they age, but TIME_WAIT assassination could cause
1784 1.6 dyoung * sparse fat pointer utilisation.
1785 1.6 dyoung */
1786 1.6 dyoung m = 512;
1787 1.6 dyoung n = 2*m + (11 * (tcp_vtw_entries / fatp_ntags())) / 10;
1788 1.6 dyoung sz = (ctl->is_v4 ? sizeof(vtw_v4_t) : sizeof(vtw_v6_t));
1789 1.6 dyoung
1790 1.6 dyoung fat_hash = kmem_zalloc(2*m * sizeof(fatp_t *), KM_NOSLEEP);
1791 1.6 dyoung
1792 1.6 dyoung if (fat_hash == NULL) {
1793 1.6 dyoung printf("%s: could not allocate %zu bytes for "
1794 1.6 dyoung "hash anchors", __func__, 2*m * sizeof(fatp_t *));
1795 1.6 dyoung return ENOMEM;
1796 1.6 dyoung }
1797 1.1 dyoung
1798 1.6 dyoung fat_base = kmem_zalloc(2*n * sizeof(fatp_t), KM_NOSLEEP);
1799 1.1 dyoung
1800 1.6 dyoung if (fat_base == NULL) {
1801 1.6 dyoung kmem_free(fat_hash, 2*m * sizeof (fatp_t *));
1802 1.6 dyoung printf("%s: could not allocate %zu bytes for "
1803 1.6 dyoung "fatp_t array", __func__, 2*n * sizeof(fatp_t));
1804 1.6 dyoung return ENOMEM;
1805 1.6 dyoung }
1806 1.1 dyoung
1807 1.6 dyoung ctl_base_v = kmem_zalloc(tcp_vtw_entries * sz, KM_NOSLEEP);
1808 1.1 dyoung
1809 1.6 dyoung if (ctl_base_v == NULL) {
1810 1.6 dyoung kmem_free(fat_hash, 2*m * sizeof (fatp_t *));
1811 1.6 dyoung kmem_free(fat_base, 2*n * sizeof(fatp_t));
1812 1.6 dyoung printf("%s: could not allocate %zu bytes for "
1813 1.6 dyoung "vtw_t array", __func__, tcp_vtw_entries * sz);
1814 1.6 dyoung return ENOMEM;
1815 1.1 dyoung }
1816 1.1 dyoung
1817 1.6 dyoung fatp_init(fat, n, m, fat_base, fat_hash);
1818 1.1 dyoung
1819 1.6 dyoung vtw_init(fat, ctl, tcp_vtw_entries, ctl_base_v);
1820 1.1 dyoung
1821 1.1 dyoung return 0;
1822 1.1 dyoung }
1823 1.1 dyoung
1824 1.1 dyoung /*!\brief select controlling instance
1825 1.1 dyoung */
1826 1.1 dyoung static vtw_ctl_t *
1827 1.1 dyoung vtw_control(int af, uint32_t msl)
1828 1.1 dyoung {
1829 1.1 dyoung fatp_ctl_t *fat;
1830 1.1 dyoung vtw_ctl_t *ctl;
1831 1.1 dyoung int class = msl_to_class(msl);
1832 1.1 dyoung
1833 1.1 dyoung if (!vtw_select(af, &fat, &ctl))
1834 1.1 dyoung return NULL;
1835 1.1 dyoung
1836 1.1 dyoung if (!fat->base || !ctl->base.v)
1837 1.1 dyoung return NULL;
1838 1.1 dyoung
1839 1.5 dyoung if (!tcp_vtw_was_enabled) {
1840 1.5 dyoung /* This guarantees is timer ticks until we no longer need them.
1841 1.5 dyoung */
1842 1.5 dyoung tcp_vtw_was_enabled = 1;
1843 1.5 dyoung
1844 1.5 dyoung callout_schedule(&vtw_cs, hz / 5);
1845 1.5 dyoung
1846 1.5 dyoung tcbtable.vestige = &tcp_hooks;
1847 1.5 dyoung }
1848 1.5 dyoung
1849 1.1 dyoung return ctl + class;
1850 1.1 dyoung }
1851 1.1 dyoung
1852 1.1 dyoung /*!\brief add TCP pcb to vestigial timewait
1853 1.1 dyoung */
1854 1.1 dyoung int
1855 1.1 dyoung vtw_add(int af, struct tcpcb *tp)
1856 1.1 dyoung {
1857 1.1 dyoung int enable;
1858 1.1 dyoung vtw_ctl_t *ctl;
1859 1.1 dyoung vtw_t *vtw;
1860 1.1 dyoung
1861 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
1862 1.1 dyoung
1863 1.1 dyoung ctl = vtw_control(af, tp->t_msl);
1864 1.1 dyoung if (!ctl)
1865 1.1 dyoung return 0;
1866 1.1 dyoung
1867 1.1 dyoung enable = (af == AF_INET) ? tcp4_vtw_enable : tcp6_vtw_enable;
1868 1.1 dyoung
1869 1.1 dyoung vtw = vtw_alloc(ctl);
1870 1.1 dyoung
1871 1.1 dyoung if (vtw) {
1872 1.1 dyoung vtw->snd_nxt = tp->snd_nxt;
1873 1.1 dyoung vtw->rcv_nxt = tp->rcv_nxt;
1874 1.1 dyoung
1875 1.1 dyoung switch (af) {
1876 1.1 dyoung case AF_INET: {
1877 1.1 dyoung struct inpcb *inp = tp->t_inpcb;
1878 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
1879 1.1 dyoung
1880 1.1 dyoung v4->faddr = inp->inp_faddr.s_addr;
1881 1.1 dyoung v4->laddr = inp->inp_laddr.s_addr;
1882 1.1 dyoung v4->fport = inp->inp_fport;
1883 1.1 dyoung v4->lport = inp->inp_lport;
1884 1.1 dyoung
1885 1.1 dyoung vtw->reuse_port = !!(inp->inp_socket->so_options
1886 1.1 dyoung & SO_REUSEPORT);
1887 1.1 dyoung vtw->reuse_addr = !!(inp->inp_socket->so_options
1888 1.1 dyoung & SO_REUSEADDR);
1889 1.1 dyoung vtw->v6only = 0;
1890 1.1 dyoung vtw->uid = inp->inp_socket->so_uidinfo->ui_uid;
1891 1.1 dyoung
1892 1.1 dyoung vtw_inshash_v4(ctl, vtw);
1893 1.1 dyoung
1894 1.1 dyoung
1895 1.1 dyoung #ifdef VTW_DEBUG
1896 1.1 dyoung /* Immediate lookup (connected and port) to
1897 1.1 dyoung * ensure at least that works!
1898 1.1 dyoung */
1899 1.1 dyoung if (enable & 4) {
1900 1.1 dyoung KASSERT(vtw_lookup_hash_v4
1901 1.1 dyoung (ctl
1902 1.1 dyoung , inp->inp_faddr.s_addr, inp->inp_fport
1903 1.1 dyoung , inp->inp_laddr.s_addr, inp->inp_lport
1904 1.1 dyoung , 0)
1905 1.1 dyoung == vtw);
1906 1.1 dyoung KASSERT(vtw_lookup_hash_v4
1907 1.1 dyoung (ctl
1908 1.1 dyoung , inp->inp_faddr.s_addr, inp->inp_fport
1909 1.1 dyoung , inp->inp_laddr.s_addr, inp->inp_lport
1910 1.1 dyoung , 1));
1911 1.1 dyoung }
1912 1.1 dyoung /* Immediate port iterator functionality check: not wild
1913 1.1 dyoung */
1914 1.1 dyoung if (enable & 8) {
1915 1.1 dyoung struct tcp_ports_iterator *it;
1916 1.1 dyoung struct vestigial_inpcb res;
1917 1.1 dyoung int cnt = 0;
1918 1.1 dyoung
1919 1.1 dyoung it = tcp_init_ports_v4(inp->inp_laddr
1920 1.1 dyoung , inp->inp_lport, 0);
1921 1.1 dyoung
1922 1.1 dyoung while (tcp_next_port_v4(it, &res)) {
1923 1.1 dyoung ++cnt;
1924 1.1 dyoung }
1925 1.1 dyoung KASSERT(cnt);
1926 1.1 dyoung }
1927 1.1 dyoung /* Immediate port iterator functionality check: wild
1928 1.1 dyoung */
1929 1.1 dyoung if (enable & 16) {
1930 1.1 dyoung struct tcp_ports_iterator *it;
1931 1.1 dyoung struct vestigial_inpcb res;
1932 1.1 dyoung struct in_addr any;
1933 1.1 dyoung int cnt = 0;
1934 1.1 dyoung
1935 1.1 dyoung any.s_addr = htonl(INADDR_ANY);
1936 1.1 dyoung
1937 1.1 dyoung it = tcp_init_ports_v4(any, inp->inp_lport, 1);
1938 1.1 dyoung
1939 1.1 dyoung while (tcp_next_port_v4(it, &res)) {
1940 1.1 dyoung ++cnt;
1941 1.1 dyoung }
1942 1.1 dyoung KASSERT(cnt);
1943 1.1 dyoung }
1944 1.1 dyoung #endif /* VTW_DEBUG */
1945 1.1 dyoung break;
1946 1.1 dyoung }
1947 1.1 dyoung
1948 1.1 dyoung case AF_INET6: {
1949 1.1 dyoung struct in6pcb *inp = tp->t_in6pcb;
1950 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
1951 1.1 dyoung
1952 1.1 dyoung v6->faddr = inp->in6p_faddr;
1953 1.1 dyoung v6->laddr = inp->in6p_laddr;
1954 1.1 dyoung v6->fport = inp->in6p_fport;
1955 1.1 dyoung v6->lport = inp->in6p_lport;
1956 1.1 dyoung
1957 1.1 dyoung vtw->reuse_port = !!(inp->in6p_socket->so_options
1958 1.1 dyoung & SO_REUSEPORT);
1959 1.1 dyoung vtw->reuse_addr = !!(inp->in6p_socket->so_options
1960 1.1 dyoung & SO_REUSEADDR);
1961 1.1 dyoung vtw->v6only = !!(inp->in6p_flags
1962 1.1 dyoung & IN6P_IPV6_V6ONLY);
1963 1.1 dyoung vtw->uid = inp->in6p_socket->so_uidinfo->ui_uid;
1964 1.1 dyoung
1965 1.1 dyoung vtw_inshash_v6(ctl, vtw);
1966 1.1 dyoung #ifdef VTW_DEBUG
1967 1.1 dyoung /* Immediate lookup (connected and port) to
1968 1.1 dyoung * ensure at least that works!
1969 1.1 dyoung */
1970 1.1 dyoung if (enable & 4) {
1971 1.1 dyoung KASSERT(vtw_lookup_hash_v6(ctl
1972 1.1 dyoung , &inp->in6p_faddr, inp->in6p_fport
1973 1.1 dyoung , &inp->in6p_laddr, inp->in6p_lport
1974 1.1 dyoung , 0)
1975 1.1 dyoung == vtw);
1976 1.1 dyoung KASSERT(vtw_lookup_hash_v6
1977 1.1 dyoung (ctl
1978 1.1 dyoung , &inp->in6p_faddr, inp->in6p_fport
1979 1.1 dyoung , &inp->in6p_laddr, inp->in6p_lport
1980 1.1 dyoung , 1));
1981 1.1 dyoung }
1982 1.1 dyoung /* Immediate port iterator functionality check: not wild
1983 1.1 dyoung */
1984 1.1 dyoung if (enable & 8) {
1985 1.1 dyoung struct tcp_ports_iterator *it;
1986 1.1 dyoung struct vestigial_inpcb res;
1987 1.1 dyoung int cnt = 0;
1988 1.1 dyoung
1989 1.1 dyoung it = tcp_init_ports_v6(&inp->in6p_laddr
1990 1.1 dyoung , inp->in6p_lport, 0);
1991 1.1 dyoung
1992 1.1 dyoung while (tcp_next_port_v6(it, &res)) {
1993 1.1 dyoung ++cnt;
1994 1.1 dyoung }
1995 1.1 dyoung KASSERT(cnt);
1996 1.1 dyoung }
1997 1.1 dyoung /* Immediate port iterator functionality check: wild
1998 1.1 dyoung */
1999 1.1 dyoung if (enable & 16) {
2000 1.1 dyoung struct tcp_ports_iterator *it;
2001 1.1 dyoung struct vestigial_inpcb res;
2002 1.1 dyoung static struct in6_addr any = IN6ADDR_ANY_INIT;
2003 1.1 dyoung int cnt = 0;
2004 1.1 dyoung
2005 1.1 dyoung it = tcp_init_ports_v6(&any
2006 1.1 dyoung , inp->in6p_lport, 1);
2007 1.1 dyoung
2008 1.1 dyoung while (tcp_next_port_v6(it, &res)) {
2009 1.1 dyoung ++cnt;
2010 1.1 dyoung }
2011 1.1 dyoung KASSERT(cnt);
2012 1.1 dyoung }
2013 1.1 dyoung #endif /* VTW_DEBUG */
2014 1.1 dyoung break;
2015 1.1 dyoung }
2016 1.1 dyoung }
2017 1.1 dyoung
2018 1.1 dyoung tcp_canceltimers(tp);
2019 1.1 dyoung tp = tcp_close(tp);
2020 1.1 dyoung KASSERT(!tp);
2021 1.1 dyoung
2022 1.1 dyoung return 1;
2023 1.1 dyoung }
2024 1.1 dyoung
2025 1.1 dyoung return 0;
2026 1.1 dyoung }
2027 1.1 dyoung
2028 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2029 1.1 dyoung */
2030 1.1 dyoung static void
2031 1.1 dyoung vtw_restart_v4(vestigial_inpcb_t *vp)
2032 1.1 dyoung {
2033 1.1 dyoung vtw_v4_t copy = *(vtw_v4_t*)vp->vtw;
2034 1.1 dyoung vtw_t *vtw;
2035 1.1 dyoung vtw_t *cp = ©.common;
2036 1.1 dyoung vtw_ctl_t *ctl;
2037 1.1 dyoung
2038 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
2039 1.1 dyoung
2040 1.1 dyoung db_trace(KTR_VTW
2041 1.1 dyoung , (vp->vtw, "vtw: restart %A:%P %A:%P"
2042 1.1 dyoung , vp->faddr.v4.s_addr, vp->fport
2043 1.1 dyoung , vp->laddr.v4.s_addr, vp->lport));
2044 1.1 dyoung
2045 1.1 dyoung /* Class might have changed, so have a squiz.
2046 1.1 dyoung */
2047 1.1 dyoung ctl = vtw_control(AF_INET, class_to_msl(cp->msl_class));
2048 1.1 dyoung vtw = vtw_alloc(ctl);
2049 1.1 dyoung
2050 1.1 dyoung if (vtw) {
2051 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
2052 1.1 dyoung
2053 1.1 dyoung /* Safe now to unhash the old entry
2054 1.1 dyoung */
2055 1.1 dyoung vtw_del(vp->ctl, vp->vtw);
2056 1.1 dyoung
2057 1.1 dyoung vtw->snd_nxt = cp->snd_nxt;
2058 1.1 dyoung vtw->rcv_nxt = cp->rcv_nxt;
2059 1.1 dyoung
2060 1.1 dyoung v4->faddr = copy.faddr;
2061 1.1 dyoung v4->laddr = copy.laddr;
2062 1.1 dyoung v4->fport = copy.fport;
2063 1.1 dyoung v4->lport = copy.lport;
2064 1.1 dyoung
2065 1.1 dyoung vtw->reuse_port = cp->reuse_port;
2066 1.1 dyoung vtw->reuse_addr = cp->reuse_addr;
2067 1.1 dyoung vtw->v6only = 0;
2068 1.1 dyoung vtw->uid = cp->uid;
2069 1.1 dyoung
2070 1.1 dyoung vtw_inshash_v4(ctl, vtw);
2071 1.1 dyoung }
2072 1.1 dyoung
2073 1.1 dyoung vp->valid = 0;
2074 1.1 dyoung }
2075 1.1 dyoung
2076 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2077 1.1 dyoung */
2078 1.1 dyoung static void
2079 1.1 dyoung vtw_restart_v6(vestigial_inpcb_t *vp)
2080 1.1 dyoung {
2081 1.1 dyoung vtw_v6_t copy = *(vtw_v6_t*)vp->vtw;
2082 1.1 dyoung vtw_t *vtw;
2083 1.1 dyoung vtw_t *cp = ©.common;
2084 1.1 dyoung vtw_ctl_t *ctl;
2085 1.1 dyoung
2086 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
2087 1.1 dyoung
2088 1.1 dyoung db_trace(KTR_VTW
2089 1.1 dyoung , (vp->vtw, "vtw: restart %6A:%P %6A:%P"
2090 1.1 dyoung , db_store(&vp->faddr.v6, sizeof (vp->faddr.v6))
2091 1.1 dyoung , vp->fport
2092 1.1 dyoung , db_store(&vp->laddr.v6, sizeof (vp->laddr.v6))
2093 1.1 dyoung , vp->lport));
2094 1.1 dyoung
2095 1.1 dyoung /* Class might have changed, so have a squiz.
2096 1.1 dyoung */
2097 1.1 dyoung ctl = vtw_control(AF_INET6, class_to_msl(cp->msl_class));
2098 1.1 dyoung vtw = vtw_alloc(ctl);
2099 1.1 dyoung
2100 1.1 dyoung if (vtw) {
2101 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
2102 1.1 dyoung
2103 1.1 dyoung /* Safe now to unhash the old entry
2104 1.1 dyoung */
2105 1.1 dyoung vtw_del(vp->ctl, vp->vtw);
2106 1.1 dyoung
2107 1.1 dyoung vtw->snd_nxt = cp->snd_nxt;
2108 1.1 dyoung vtw->rcv_nxt = cp->rcv_nxt;
2109 1.1 dyoung
2110 1.1 dyoung v6->faddr = copy.faddr;
2111 1.1 dyoung v6->laddr = copy.laddr;
2112 1.1 dyoung v6->fport = copy.fport;
2113 1.1 dyoung v6->lport = copy.lport;
2114 1.1 dyoung
2115 1.1 dyoung vtw->reuse_port = cp->reuse_port;
2116 1.1 dyoung vtw->reuse_addr = cp->reuse_addr;
2117 1.1 dyoung vtw->v6only = cp->v6only;
2118 1.1 dyoung vtw->uid = cp->uid;
2119 1.1 dyoung
2120 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2121 1.1 dyoung }
2122 1.1 dyoung
2123 1.1 dyoung vp->valid = 0;
2124 1.1 dyoung }
2125 1.1 dyoung
2126 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2127 1.1 dyoung */
2128 1.1 dyoung void
2129 1.1 dyoung vtw_restart(vestigial_inpcb_t *vp)
2130 1.1 dyoung {
2131 1.1 dyoung if (!vp || !vp->valid)
2132 1.1 dyoung return;
2133 1.1 dyoung
2134 1.1 dyoung if (vp->v4)
2135 1.1 dyoung vtw_restart_v4(vp);
2136 1.1 dyoung else
2137 1.1 dyoung vtw_restart_v6(vp);
2138 1.1 dyoung }
2139 1.1 dyoung
2140 1.1 dyoung int
2141 1.7 dyoung sysctl_tcp_vtw_enable(SYSCTLFN_ARGS)
2142 1.7 dyoung {
2143 1.7 dyoung int en, rc;
2144 1.7 dyoung struct sysctlnode node;
2145 1.7 dyoung
2146 1.7 dyoung node = *rnode;
2147 1.7 dyoung en = *(int *)rnode->sysctl_data;
2148 1.7 dyoung node.sysctl_data = &en;
2149 1.7 dyoung
2150 1.7 dyoung rc = sysctl_lookup(SYSCTLFN_CALL(&node));
2151 1.7 dyoung if (rc != 0 || newp == NULL)
2152 1.7 dyoung return rc;
2153 1.7 dyoung
2154 1.7 dyoung if (rnode->sysctl_data != &tcp4_vtw_enable &&
2155 1.7 dyoung rnode->sysctl_data != &tcp6_vtw_enable)
2156 1.7 dyoung rc = ENOENT;
2157 1.7 dyoung else if ((en & 1) == 0)
2158 1.7 dyoung rc = 0;
2159 1.7 dyoung else if (rnode->sysctl_data == &tcp4_vtw_enable)
2160 1.7 dyoung rc = vtw_control_init(AF_INET);
2161 1.7 dyoung else /* rnode->sysctl_data == &tcp6_vtw_enable */
2162 1.7 dyoung rc = vtw_control_init(AF_INET6);
2163 1.7 dyoung
2164 1.7 dyoung if (rc == 0)
2165 1.7 dyoung *(int *)rnode->sysctl_data = en;
2166 1.7 dyoung
2167 1.7 dyoung return rc;
2168 1.7 dyoung }
2169 1.7 dyoung
2170 1.7 dyoung int
2171 1.1 dyoung vtw_earlyinit(void)
2172 1.1 dyoung {
2173 1.5 dyoung int i, rc;
2174 1.1 dyoung
2175 1.5 dyoung callout_init(&vtw_cs, 0);
2176 1.5 dyoung callout_setfunc(&vtw_cs, vtw_tick, 0);
2177 1.1 dyoung
2178 1.5 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2179 1.5 dyoung vtw_tcpv4[i].is_v4 = 1;
2180 1.5 dyoung vtw_tcpv6[i].is_v6 = 1;
2181 1.1 dyoung }
2182 1.1 dyoung
2183 1.7 dyoung if ((tcp4_vtw_enable & 1) != 0 &&
2184 1.7 dyoung (rc = vtw_control_init(AF_INET)) != 0)
2185 1.7 dyoung return rc;
2186 1.7 dyoung
2187 1.7 dyoung if ((tcp6_vtw_enable & 1) != 0 &&
2188 1.1 dyoung (rc = vtw_control_init(AF_INET6)) != 0)
2189 1.1 dyoung return rc;
2190 1.1 dyoung
2191 1.1 dyoung return 0;
2192 1.1 dyoung }
2193 1.1 dyoung
2194 1.1 dyoung #ifdef VTW_DEBUG
2195 1.1 dyoung #include <sys/syscallargs.h>
2196 1.1 dyoung #include <sys/sysctl.h>
2197 1.1 dyoung
2198 1.1 dyoung /*!\brief add lalp, fafp entries for debug
2199 1.1 dyoung */
2200 1.1 dyoung int
2201 1.1 dyoung vtw_debug_add(int af, sin_either_t *la, sin_either_t *fa, int msl, int class)
2202 1.1 dyoung {
2203 1.1 dyoung vtw_ctl_t *ctl;
2204 1.1 dyoung vtw_t *vtw;
2205 1.1 dyoung
2206 1.1 dyoung ctl = vtw_control(af, msl ? msl : class_to_msl(class));
2207 1.1 dyoung if (!ctl)
2208 1.1 dyoung return 0;
2209 1.1 dyoung
2210 1.1 dyoung vtw = vtw_alloc(ctl);
2211 1.1 dyoung
2212 1.1 dyoung if (vtw) {
2213 1.1 dyoung vtw->snd_nxt = 0;
2214 1.1 dyoung vtw->rcv_nxt = 0;
2215 1.1 dyoung
2216 1.1 dyoung switch (af) {
2217 1.1 dyoung case AF_INET: {
2218 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
2219 1.1 dyoung
2220 1.1 dyoung v4->faddr = fa->sin_addr.v4.s_addr;
2221 1.1 dyoung v4->laddr = la->sin_addr.v4.s_addr;
2222 1.1 dyoung v4->fport = fa->sin_port;
2223 1.1 dyoung v4->lport = la->sin_port;
2224 1.1 dyoung
2225 1.1 dyoung vtw->reuse_port = 1;
2226 1.1 dyoung vtw->reuse_addr = 1;
2227 1.1 dyoung vtw->v6only = 0;
2228 1.1 dyoung vtw->uid = 0;
2229 1.1 dyoung
2230 1.1 dyoung vtw_inshash_v4(ctl, vtw);
2231 1.1 dyoung break;
2232 1.1 dyoung }
2233 1.1 dyoung
2234 1.1 dyoung case AF_INET6: {
2235 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
2236 1.1 dyoung
2237 1.1 dyoung v6->faddr = fa->sin_addr.v6;
2238 1.1 dyoung v6->laddr = la->sin_addr.v6;
2239 1.1 dyoung
2240 1.1 dyoung v6->fport = fa->sin_port;
2241 1.1 dyoung v6->lport = la->sin_port;
2242 1.1 dyoung
2243 1.1 dyoung vtw->reuse_port = 1;
2244 1.1 dyoung vtw->reuse_addr = 1;
2245 1.1 dyoung vtw->v6only = 0;
2246 1.1 dyoung vtw->uid = 0;
2247 1.1 dyoung
2248 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2249 1.1 dyoung break;
2250 1.1 dyoung }
2251 1.1 dyoung
2252 1.1 dyoung default:
2253 1.1 dyoung break;
2254 1.1 dyoung }
2255 1.1 dyoung
2256 1.1 dyoung return 1;
2257 1.1 dyoung }
2258 1.1 dyoung
2259 1.1 dyoung return 0;
2260 1.1 dyoung }
2261 1.1 dyoung
2262 1.1 dyoung static int vtw_syscall = 0;
2263 1.1 dyoung
2264 1.1 dyoung static int
2265 1.1 dyoung vtw_debug_process(vtw_sysargs_t *ap)
2266 1.1 dyoung {
2267 1.1 dyoung struct vestigial_inpcb vestige;
2268 1.1 dyoung int rc = 0;
2269 1.1 dyoung
2270 1.1 dyoung mutex_enter(softnet_lock);
2271 1.1 dyoung
2272 1.1 dyoung switch (ap->op) {
2273 1.1 dyoung case 0: // insert
2274 1.1 dyoung vtw_debug_add(ap->la.sin_family
2275 1.1 dyoung , &ap->la
2276 1.1 dyoung , &ap->fa
2277 1.1 dyoung , TCPTV_MSL
2278 1.1 dyoung , 0);
2279 1.1 dyoung break;
2280 1.1 dyoung
2281 1.1 dyoung case 1: // lookup
2282 1.1 dyoung case 2: // restart
2283 1.1 dyoung switch (ap->la.sin_family) {
2284 1.1 dyoung case AF_INET:
2285 1.1 dyoung if (tcp_lookup_v4(ap->fa.sin_addr.v4, ap->fa.sin_port,
2286 1.1 dyoung ap->la.sin_addr.v4, ap->la.sin_port,
2287 1.1 dyoung &vestige)) {
2288 1.1 dyoung if (ap->op == 2) {
2289 1.1 dyoung vtw_restart(&vestige);
2290 1.1 dyoung }
2291 1.1 dyoung rc = 0;
2292 1.1 dyoung } else
2293 1.1 dyoung rc = ESRCH;
2294 1.1 dyoung break;
2295 1.1 dyoung
2296 1.1 dyoung case AF_INET6:
2297 1.1 dyoung if (tcp_lookup_v6(&ap->fa.sin_addr.v6, ap->fa.sin_port,
2298 1.1 dyoung &ap->la.sin_addr.v6, ap->la.sin_port,
2299 1.1 dyoung &vestige)) {
2300 1.1 dyoung if (ap->op == 2) {
2301 1.1 dyoung vtw_restart(&vestige);
2302 1.1 dyoung }
2303 1.1 dyoung rc = 0;
2304 1.1 dyoung } else
2305 1.1 dyoung rc = ESRCH;
2306 1.1 dyoung break;
2307 1.1 dyoung default:
2308 1.1 dyoung rc = EINVAL;
2309 1.1 dyoung }
2310 1.1 dyoung break;
2311 1.1 dyoung
2312 1.1 dyoung default:
2313 1.1 dyoung rc = EINVAL;
2314 1.1 dyoung }
2315 1.1 dyoung
2316 1.1 dyoung mutex_exit(softnet_lock);
2317 1.1 dyoung return rc;
2318 1.1 dyoung }
2319 1.1 dyoung
2320 1.1 dyoung struct sys_vtw_args {
2321 1.1 dyoung syscallarg(const vtw_sysargs_t *) req;
2322 1.1 dyoung syscallarg(size_t) len;
2323 1.1 dyoung };
2324 1.1 dyoung
2325 1.1 dyoung static int
2326 1.1 dyoung vtw_sys(struct lwp *l, const void *_, register_t *retval)
2327 1.1 dyoung {
2328 1.1 dyoung const struct sys_vtw_args *uap = _;
2329 1.1 dyoung void *buf;
2330 1.1 dyoung int rc;
2331 1.1 dyoung size_t len = SCARG(uap, len);
2332 1.1 dyoung
2333 1.1 dyoung if (len != sizeof (vtw_sysargs_t))
2334 1.1 dyoung return EINVAL;
2335 1.1 dyoung
2336 1.1 dyoung buf = kmem_alloc(len, KM_SLEEP);
2337 1.1 dyoung if (!buf)
2338 1.1 dyoung return ENOMEM;
2339 1.1 dyoung
2340 1.1 dyoung rc = copyin(SCARG(uap, req), buf, len);
2341 1.1 dyoung if (!rc) {
2342 1.1 dyoung rc = vtw_debug_process(buf);
2343 1.1 dyoung }
2344 1.1 dyoung kmem_free(buf, len);
2345 1.1 dyoung
2346 1.1 dyoung return rc;
2347 1.1 dyoung }
2348 1.1 dyoung
2349 1.1 dyoung static void
2350 1.1 dyoung vtw_sanity_check(void)
2351 1.1 dyoung {
2352 1.1 dyoung vtw_ctl_t *ctl;
2353 1.1 dyoung vtw_t *vtw;
2354 1.1 dyoung int i;
2355 1.1 dyoung int n;
2356 1.1 dyoung
2357 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2358 1.1 dyoung ctl = &vtw_tcpv4[i];
2359 1.1 dyoung
2360 1.1 dyoung if (!ctl->base.v || ctl->nalloc)
2361 1.1 dyoung continue;
2362 1.1 dyoung
2363 1.1 dyoung for (n = 0, vtw = ctl->base.v; ; ) {
2364 1.1 dyoung ++n;
2365 1.1 dyoung vtw = vtw_next(ctl, vtw);
2366 1.1 dyoung if (vtw == ctl->base.v)
2367 1.1 dyoung break;
2368 1.1 dyoung }
2369 1.1 dyoung db_trace(KTR_VTW
2370 1.1 dyoung , (ctl, "sanity: class %x n %x nfree %x"
2371 1.1 dyoung , i, n, ctl->nfree));
2372 1.1 dyoung
2373 1.1 dyoung KASSERT(n == ctl->nfree);
2374 1.1 dyoung }
2375 1.1 dyoung
2376 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2377 1.1 dyoung ctl = &vtw_tcpv6[i];
2378 1.1 dyoung
2379 1.1 dyoung if (!ctl->base.v || ctl->nalloc)
2380 1.1 dyoung continue;
2381 1.1 dyoung
2382 1.1 dyoung for (n = 0, vtw = ctl->base.v; ; ) {
2383 1.1 dyoung ++n;
2384 1.1 dyoung vtw = vtw_next(ctl, vtw);
2385 1.1 dyoung if (vtw == ctl->base.v)
2386 1.1 dyoung break;
2387 1.1 dyoung }
2388 1.1 dyoung db_trace(KTR_VTW
2389 1.1 dyoung , (ctl, "sanity: class %x n %x nfree %x"
2390 1.1 dyoung , i, n, ctl->nfree));
2391 1.1 dyoung KASSERT(n == ctl->nfree);
2392 1.1 dyoung }
2393 1.1 dyoung }
2394 1.1 dyoung
2395 1.1 dyoung /*!\brief Initialise debug support.
2396 1.1 dyoung */
2397 1.1 dyoung static void
2398 1.1 dyoung vtw_debug_init(void)
2399 1.1 dyoung {
2400 1.1 dyoung int i;
2401 1.1 dyoung
2402 1.1 dyoung vtw_sanity_check();
2403 1.1 dyoung
2404 1.1 dyoung if (vtw_syscall)
2405 1.1 dyoung return;
2406 1.1 dyoung
2407 1.1 dyoung for (i = 511; i; --i) {
2408 1.1 dyoung if (sysent[i].sy_call == sys_nosys) {
2409 1.1 dyoung sysent[i].sy_call = vtw_sys;
2410 1.1 dyoung sysent[i].sy_narg = 2;
2411 1.1 dyoung sysent[i].sy_argsize = sizeof (struct sys_vtw_args);
2412 1.1 dyoung sysent[i].sy_flags = 0;
2413 1.1 dyoung
2414 1.1 dyoung vtw_syscall = i;
2415 1.1 dyoung break;
2416 1.1 dyoung }
2417 1.1 dyoung }
2418 1.1 dyoung if (i) {
2419 1.1 dyoung const struct sysctlnode *node;
2420 1.1 dyoung uint32_t flags;
2421 1.1 dyoung
2422 1.1 dyoung flags = sysctl_root.sysctl_flags;
2423 1.1 dyoung
2424 1.1 dyoung sysctl_root.sysctl_flags |= CTLFLAG_READWRITE;
2425 1.1 dyoung sysctl_root.sysctl_flags &= ~CTLFLAG_PERMANENT;
2426 1.1 dyoung
2427 1.1 dyoung sysctl_createv(0, 0, 0, &node,
2428 1.1 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE,
2429 1.1 dyoung "koff",
2430 1.1 dyoung SYSCTL_DESCR("Kernel Obscure Feature Finder"),
2431 1.1 dyoung 0, 0, 0, 0, CTL_CREATE, CTL_EOL);
2432 1.1 dyoung
2433 1.1 dyoung if (!node) {
2434 1.1 dyoung sysctl_createv(0, 0, 0, &node,
2435 1.1 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE,
2436 1.1 dyoung "koffka",
2437 1.1 dyoung SYSCTL_DESCR("The Real(tm) Kernel"
2438 1.1 dyoung " Obscure Feature Finder"),
2439 1.1 dyoung 0, 0, 0, 0, CTL_CREATE, CTL_EOL);
2440 1.1 dyoung }
2441 1.1 dyoung if (node) {
2442 1.1 dyoung sysctl_createv(0, 0, 0, 0,
2443 1.1 dyoung CTLFLAG_PERMANENT|CTLFLAG_READONLY,
2444 1.1 dyoung CTLTYPE_INT, "vtw_debug_syscall",
2445 1.1 dyoung SYSCTL_DESCR("vtw debug"
2446 1.1 dyoung " system call number"),
2447 1.1 dyoung 0, 0, &vtw_syscall, 0, node->sysctl_num,
2448 1.1 dyoung CTL_CREATE, CTL_EOL);
2449 1.1 dyoung }
2450 1.1 dyoung sysctl_root.sysctl_flags = flags;
2451 1.1 dyoung }
2452 1.1 dyoung }
2453 1.1 dyoung #else /* !VTW_DEBUG */
2454 1.1 dyoung static void
2455 1.1 dyoung vtw_debug_init(void)
2456 1.1 dyoung {
2457 1.1 dyoung return;
2458 1.1 dyoung }
2459 1.1 dyoung #endif /* !VTW_DEBUG */
2460