tcp_vtw.c revision 1.8 1 1.1 dyoung /*
2 1.1 dyoung * Copyright (c) 2011 The NetBSD Foundation, Inc.
3 1.1 dyoung * All rights reserved.
4 1.1 dyoung *
5 1.1 dyoung * This code is derived from software contributed to The NetBSD Foundation
6 1.1 dyoung * by Coyote Point Systems, Inc.
7 1.1 dyoung *
8 1.1 dyoung * Redistribution and use in source and binary forms, with or without
9 1.1 dyoung * modification, are permitted provided that the following conditions
10 1.1 dyoung * are met:
11 1.1 dyoung * 1. Redistributions of source code must retain the above copyright
12 1.1 dyoung * notice, this list of conditions and the following disclaimer.
13 1.1 dyoung * 2. Redistributions in binary form must reproduce the above copyright
14 1.1 dyoung * notice, this list of conditions and the following disclaimer in the
15 1.1 dyoung * documentation and/or other materials provided with the distribution.
16 1.1 dyoung *
17 1.1 dyoung * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 1.1 dyoung * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 1.1 dyoung * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 1.1 dyoung * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 1.1 dyoung * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 1.1 dyoung * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 1.1 dyoung * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 1.1 dyoung * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 1.1 dyoung * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 1.1 dyoung * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 1.1 dyoung * POSSIBILITY OF SUCH DAMAGE.
28 1.1 dyoung */
29 1.1 dyoung #include <sys/cdefs.h>
30 1.1 dyoung
31 1.1 dyoung #include "opt_ddb.h"
32 1.1 dyoung #include "opt_inet.h"
33 1.1 dyoung #include "opt_ipsec.h"
34 1.1 dyoung #include "opt_inet_csum.h"
35 1.1 dyoung #include "opt_tcp_debug.h"
36 1.1 dyoung
37 1.1 dyoung #include <sys/param.h>
38 1.1 dyoung #include <sys/systm.h>
39 1.1 dyoung #include <sys/malloc.h>
40 1.1 dyoung #include <sys/kmem.h>
41 1.1 dyoung #include <sys/mbuf.h>
42 1.1 dyoung #include <sys/protosw.h>
43 1.1 dyoung #include <sys/socket.h>
44 1.1 dyoung #include <sys/socketvar.h>
45 1.1 dyoung #include <sys/errno.h>
46 1.1 dyoung #include <sys/syslog.h>
47 1.1 dyoung #include <sys/pool.h>
48 1.1 dyoung #include <sys/domain.h>
49 1.1 dyoung #include <sys/kernel.h>
50 1.1 dyoung #include <net/if.h>
51 1.1 dyoung #include <net/route.h>
52 1.1 dyoung #include <net/if_types.h>
53 1.1 dyoung
54 1.1 dyoung #include <netinet/in.h>
55 1.1 dyoung #include <netinet/in_systm.h>
56 1.1 dyoung #include <netinet/ip.h>
57 1.1 dyoung #include <netinet/in_pcb.h>
58 1.1 dyoung #include <netinet/in_var.h>
59 1.1 dyoung #include <netinet/ip_var.h>
60 1.1 dyoung #include <netinet/in_offload.h>
61 1.1 dyoung #include <netinet/ip6.h>
62 1.1 dyoung #include <netinet6/ip6_var.h>
63 1.1 dyoung #include <netinet6/in6_pcb.h>
64 1.1 dyoung #include <netinet6/ip6_var.h>
65 1.1 dyoung #include <netinet6/in6_var.h>
66 1.1 dyoung #include <netinet/icmp6.h>
67 1.1 dyoung #include <netinet6/nd6.h>
68 1.1 dyoung
69 1.1 dyoung #include <netinet/tcp.h>
70 1.1 dyoung #include <netinet/tcp_fsm.h>
71 1.1 dyoung #include <netinet/tcp_seq.h>
72 1.1 dyoung #include <netinet/tcp_timer.h>
73 1.1 dyoung #include <netinet/tcp_var.h>
74 1.1 dyoung #include <netinet/tcp_private.h>
75 1.1 dyoung #include <netinet/tcpip.h>
76 1.1 dyoung
77 1.1 dyoung #include <netinet/tcp_vtw.h>
78 1.1 dyoung
79 1.8 joerg __KERNEL_RCSID(0, "$NetBSD: tcp_vtw.c,v 1.8 2011/07/17 20:54:53 joerg Exp $");
80 1.1 dyoung
81 1.1 dyoung #define db_trace(__a, __b) do { } while (/*CONSTCOND*/0)
82 1.1 dyoung
83 1.1 dyoung static void vtw_debug_init(void);
84 1.1 dyoung
85 1.1 dyoung fatp_ctl_t fat_tcpv4;
86 1.1 dyoung fatp_ctl_t fat_tcpv6;
87 1.1 dyoung vtw_ctl_t vtw_tcpv4[VTW_NCLASS];
88 1.1 dyoung vtw_ctl_t vtw_tcpv6[VTW_NCLASS];
89 1.1 dyoung vtw_stats_t vtw_stats;
90 1.1 dyoung
91 1.1 dyoung /* We provide state for the lookup_ports iterator.
92 1.1 dyoung * As currently we are netlock-protected, there is one.
93 1.1 dyoung * If we were finer-grain, we would have one per CPU.
94 1.1 dyoung * I do not want to be in the business of alloc/free.
95 1.1 dyoung * The best alternate would be allocate on the caller's
96 1.1 dyoung * stack, but that would require them to know the struct,
97 1.1 dyoung * or at least the size.
98 1.1 dyoung * See how she goes.
99 1.1 dyoung */
100 1.1 dyoung struct tcp_ports_iterator {
101 1.1 dyoung union {
102 1.1 dyoung struct in_addr v4;
103 1.1 dyoung struct in6_addr v6;
104 1.1 dyoung } addr;
105 1.1 dyoung u_int port;
106 1.1 dyoung
107 1.1 dyoung uint32_t wild : 1;
108 1.1 dyoung
109 1.1 dyoung vtw_ctl_t *ctl;
110 1.1 dyoung fatp_t *fp;
111 1.1 dyoung
112 1.1 dyoung uint16_t slot_idx;
113 1.1 dyoung uint16_t ctl_idx;
114 1.1 dyoung };
115 1.1 dyoung
116 1.1 dyoung static struct tcp_ports_iterator tcp_ports_iterator_v4;
117 1.1 dyoung static struct tcp_ports_iterator tcp_ports_iterator_v6;
118 1.1 dyoung
119 1.1 dyoung static int vtw_age(vtw_ctl_t *, struct timeval *);
120 1.1 dyoung
121 1.1 dyoung /*!\brief allocate a fat pointer from a collection.
122 1.1 dyoung */
123 1.1 dyoung static fatp_t *
124 1.1 dyoung fatp_alloc(fatp_ctl_t *fat)
125 1.1 dyoung {
126 1.1 dyoung fatp_t *fp = 0;
127 1.1 dyoung
128 1.1 dyoung if (fat->nfree) {
129 1.1 dyoung fp = fat->free;
130 1.1 dyoung if (fp) {
131 1.1 dyoung fat->free = fatp_next(fat, fp);
132 1.1 dyoung --fat->nfree;
133 1.1 dyoung ++fat->nalloc;
134 1.1 dyoung fp->nxt = 0;
135 1.1 dyoung
136 1.1 dyoung KASSERT(!fp->inuse);
137 1.1 dyoung }
138 1.1 dyoung }
139 1.1 dyoung
140 1.1 dyoung return fp;
141 1.1 dyoung }
142 1.1 dyoung
143 1.1 dyoung /*!\brief free a fat pointer.
144 1.1 dyoung */
145 1.1 dyoung static void
146 1.1 dyoung fatp_free(fatp_ctl_t *fat, fatp_t *fp)
147 1.1 dyoung {
148 1.1 dyoung if (fp) {
149 1.1 dyoung KASSERT(!fp->inuse);
150 1.1 dyoung KASSERT(!fp->nxt);
151 1.1 dyoung
152 1.1 dyoung fp->nxt = fatp_index(fat, fat->free);
153 1.1 dyoung fat->free = fp;
154 1.1 dyoung
155 1.1 dyoung ++fat->nfree;
156 1.1 dyoung --fat->nalloc;
157 1.1 dyoung }
158 1.1 dyoung }
159 1.1 dyoung
160 1.1 dyoung /*!\brief initialise a collection of fat pointers.
161 1.1 dyoung *
162 1.1 dyoung *\param n # hash buckets
163 1.1 dyoung *\param m total # fat pointers to allocate
164 1.1 dyoung *
165 1.1 dyoung * We allocate 2x as much, as we have two hashes: full and lport only.
166 1.1 dyoung */
167 1.1 dyoung static void
168 1.6 dyoung fatp_init(fatp_ctl_t *fat, uint32_t n, uint32_t m,
169 1.6 dyoung fatp_t *fat_base, fatp_t **fat_hash)
170 1.1 dyoung {
171 1.1 dyoung fatp_t *fp;
172 1.1 dyoung
173 1.1 dyoung KASSERT(n <= FATP_MAX / 2);
174 1.1 dyoung
175 1.6 dyoung fat->hash = fat_hash;
176 1.6 dyoung fat->base = fat_base;
177 1.1 dyoung
178 1.1 dyoung fat->port = &fat->hash[m];
179 1.1 dyoung
180 1.1 dyoung fat->mask = m - 1; // ASSERT is power of 2 (m)
181 1.1 dyoung fat->lim = fat->base + 2*n - 1;
182 1.1 dyoung fat->nfree = 0;
183 1.1 dyoung fat->nalloc = 2*n;
184 1.1 dyoung
185 1.1 dyoung /* Initialise the free list.
186 1.1 dyoung */
187 1.1 dyoung for (fp = fat->lim; fp >= fat->base; --fp) {
188 1.1 dyoung fatp_free(fat, fp);
189 1.1 dyoung }
190 1.1 dyoung }
191 1.1 dyoung
192 1.1 dyoung /*
193 1.1 dyoung * The `xtra' is XORed into the tag stored.
194 1.1 dyoung */
195 1.1 dyoung static uint32_t fatp_xtra[] = {
196 1.1 dyoung 0x11111111,0x22222222,0x33333333,0x44444444,
197 1.1 dyoung 0x55555555,0x66666666,0x77777777,0x88888888,
198 1.1 dyoung 0x12121212,0x21212121,0x34343434,0x43434343,
199 1.1 dyoung 0x56565656,0x65656565,0x78787878,0x87878787,
200 1.1 dyoung 0x11221122,0x22112211,0x33443344,0x44334433,
201 1.1 dyoung 0x55665566,0x66556655,0x77887788,0x88778877,
202 1.1 dyoung 0x11112222,0x22221111,0x33334444,0x44443333,
203 1.1 dyoung 0x55556666,0x66665555,0x77778888,0x88887777,
204 1.1 dyoung };
205 1.1 dyoung
206 1.1 dyoung /*!\brief turn a {fatp_t*,slot} into an integral key.
207 1.1 dyoung *
208 1.1 dyoung * The key can be used to obtain the fatp_t, and the slot,
209 1.1 dyoung * as it directly encodes them.
210 1.1 dyoung */
211 1.1 dyoung static inline uint32_t
212 1.1 dyoung fatp_key(fatp_ctl_t *fat, fatp_t *fp, uint32_t slot)
213 1.1 dyoung {
214 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
215 1.1 dyoung CACHE_LINE_SIZE == 64 ||
216 1.1 dyoung CACHE_LINE_SIZE == 128);
217 1.1 dyoung
218 1.1 dyoung switch (fatp_ntags()) {
219 1.1 dyoung case 7:
220 1.1 dyoung return (fatp_index(fat, fp) << 3) | slot;
221 1.1 dyoung case 15:
222 1.1 dyoung return (fatp_index(fat, fp) << 4) | slot;
223 1.1 dyoung case 31:
224 1.1 dyoung return (fatp_index(fat, fp) << 5) | slot;
225 1.1 dyoung default:
226 1.1 dyoung KASSERT(0 && "no support, for no good reason");
227 1.1 dyoung return ~0;
228 1.1 dyoung }
229 1.1 dyoung }
230 1.1 dyoung
231 1.1 dyoung static inline uint32_t
232 1.1 dyoung fatp_slot_from_key(fatp_ctl_t *fat, uint32_t key)
233 1.1 dyoung {
234 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
235 1.1 dyoung CACHE_LINE_SIZE == 64 ||
236 1.1 dyoung CACHE_LINE_SIZE == 128);
237 1.1 dyoung
238 1.1 dyoung switch (fatp_ntags()) {
239 1.1 dyoung case 7:
240 1.1 dyoung return key & 7;
241 1.1 dyoung case 15:
242 1.1 dyoung return key & 15;
243 1.1 dyoung case 31:
244 1.1 dyoung return key & 31;
245 1.1 dyoung default:
246 1.1 dyoung KASSERT(0 && "no support, for no good reason");
247 1.1 dyoung return ~0;
248 1.1 dyoung }
249 1.1 dyoung }
250 1.1 dyoung
251 1.1 dyoung static inline fatp_t *
252 1.1 dyoung fatp_from_key(fatp_ctl_t *fat, uint32_t key)
253 1.1 dyoung {
254 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
255 1.1 dyoung CACHE_LINE_SIZE == 64 ||
256 1.1 dyoung CACHE_LINE_SIZE == 128);
257 1.1 dyoung
258 1.1 dyoung switch (fatp_ntags()) {
259 1.1 dyoung case 7:
260 1.1 dyoung key >>= 3;
261 1.1 dyoung break;
262 1.1 dyoung case 15:
263 1.1 dyoung key >>= 4;
264 1.1 dyoung break;
265 1.1 dyoung case 31:
266 1.1 dyoung key >>= 5;
267 1.1 dyoung break;
268 1.1 dyoung default:
269 1.1 dyoung KASSERT(0 && "no support, for no good reason");
270 1.1 dyoung return 0;
271 1.1 dyoung }
272 1.1 dyoung
273 1.1 dyoung return key ? fat->base + key - 1 : 0;
274 1.1 dyoung }
275 1.1 dyoung
276 1.1 dyoung static inline uint32_t
277 1.1 dyoung idx_encode(vtw_ctl_t *ctl, uint32_t idx)
278 1.1 dyoung {
279 1.1 dyoung return (idx << ctl->idx_bits) | idx;
280 1.1 dyoung }
281 1.1 dyoung
282 1.1 dyoung static inline uint32_t
283 1.1 dyoung idx_decode(vtw_ctl_t *ctl, uint32_t bits)
284 1.1 dyoung {
285 1.1 dyoung uint32_t idx = bits & ctl->idx_mask;
286 1.1 dyoung
287 1.1 dyoung if (idx_encode(ctl, idx) == bits)
288 1.1 dyoung return idx;
289 1.1 dyoung else
290 1.1 dyoung return ~0;
291 1.1 dyoung }
292 1.1 dyoung
293 1.1 dyoung /*!\brief insert index into fatp hash
294 1.1 dyoung *
295 1.1 dyoung *\param idx - index of element being placed in hash chain
296 1.1 dyoung *\param tag - 32-bit tag identifier
297 1.1 dyoung *
298 1.1 dyoung *\returns
299 1.1 dyoung * value which can be used to locate entry.
300 1.1 dyoung *
301 1.1 dyoung *\note
302 1.1 dyoung * we rely on the fact that there are unused high bits in the index
303 1.1 dyoung * for verification purposes on lookup.
304 1.1 dyoung */
305 1.1 dyoung
306 1.1 dyoung static inline uint32_t
307 1.1 dyoung fatp_vtw_inshash(fatp_ctl_t *fat, uint32_t idx, uint32_t tag, int which,
308 1.1 dyoung void *dbg)
309 1.1 dyoung {
310 1.1 dyoung fatp_t *fp;
311 1.1 dyoung fatp_t **hash = (which ? fat->port : fat->hash);
312 1.1 dyoung int i;
313 1.1 dyoung
314 1.1 dyoung fp = hash[tag & fat->mask];
315 1.1 dyoung
316 1.1 dyoung while (!fp || fatp_full(fp)) {
317 1.1 dyoung fatp_t *fq;
318 1.1 dyoung
319 1.1 dyoung /* All entries are inuse at the top level.
320 1.1 dyoung * We allocate a spare, and push the top level
321 1.1 dyoung * down one. All entries in the fp we push down
322 1.1 dyoung * (think of a tape worm here) will be expelled sooner than
323 1.1 dyoung * any entries added subsequently to this hash bucket.
324 1.1 dyoung * This is a property of the time waits we are exploiting.
325 1.1 dyoung */
326 1.1 dyoung
327 1.1 dyoung fq = fatp_alloc(fat);
328 1.1 dyoung if (!fq) {
329 1.1 dyoung vtw_age(fat->vtw, 0);
330 1.1 dyoung fp = hash[tag & fat->mask];
331 1.1 dyoung continue;
332 1.1 dyoung }
333 1.1 dyoung
334 1.1 dyoung fq->inuse = 0;
335 1.1 dyoung fq->nxt = fatp_index(fat, fp);
336 1.1 dyoung
337 1.1 dyoung hash[tag & fat->mask] = fq;
338 1.1 dyoung
339 1.1 dyoung fp = fq;
340 1.1 dyoung }
341 1.1 dyoung
342 1.1 dyoung KASSERT(!fatp_full(fp));
343 1.1 dyoung
344 1.1 dyoung /* Fill highest index first. Lookup is lowest first.
345 1.1 dyoung */
346 1.1 dyoung for (i = fatp_ntags(); --i >= 0; ) {
347 1.1 dyoung if (!((1 << i) & fp->inuse)) {
348 1.1 dyoung break;
349 1.1 dyoung }
350 1.1 dyoung }
351 1.1 dyoung
352 1.1 dyoung fp->inuse |= 1 << i;
353 1.1 dyoung fp->tag[i] = tag ^ idx_encode(fat->vtw, idx) ^ fatp_xtra[i];
354 1.1 dyoung
355 1.1 dyoung db_trace(KTR_VTW
356 1.1 dyoung , (fp, "fat: inuse %5.5x tag[%x] %8.8x"
357 1.1 dyoung , fp->inuse
358 1.1 dyoung , i, fp->tag[i]));
359 1.1 dyoung
360 1.1 dyoung return fatp_key(fat, fp, i);
361 1.1 dyoung }
362 1.1 dyoung
363 1.1 dyoung static inline int
364 1.1 dyoung vtw_alive(const vtw_t *vtw)
365 1.1 dyoung {
366 1.1 dyoung return vtw->hashed && vtw->expire.tv_sec;
367 1.1 dyoung }
368 1.1 dyoung
369 1.1 dyoung static inline uint32_t
370 1.1 dyoung vtw_index_v4(vtw_ctl_t *ctl, vtw_v4_t *v4)
371 1.1 dyoung {
372 1.1 dyoung if (ctl->base.v4 <= v4 && v4 <= ctl->lim.v4)
373 1.1 dyoung return v4 - ctl->base.v4;
374 1.1 dyoung
375 1.1 dyoung KASSERT(0 && "vtw out of bounds");
376 1.1 dyoung
377 1.1 dyoung return ~0;
378 1.1 dyoung }
379 1.1 dyoung
380 1.1 dyoung static inline uint32_t
381 1.1 dyoung vtw_index_v6(vtw_ctl_t *ctl, vtw_v6_t *v6)
382 1.1 dyoung {
383 1.1 dyoung if (ctl->base.v6 <= v6 && v6 <= ctl->lim.v6)
384 1.1 dyoung return v6 - ctl->base.v6;
385 1.1 dyoung
386 1.1 dyoung KASSERT(0 && "vtw out of bounds");
387 1.1 dyoung
388 1.1 dyoung return ~0;
389 1.1 dyoung }
390 1.1 dyoung
391 1.1 dyoung static inline uint32_t
392 1.1 dyoung vtw_index(vtw_ctl_t *ctl, vtw_t *vtw)
393 1.1 dyoung {
394 1.1 dyoung if (ctl->clidx)
395 1.1 dyoung ctl = ctl->ctl;
396 1.1 dyoung
397 1.1 dyoung if (ctl->is_v4)
398 1.1 dyoung return vtw_index_v4(ctl, (vtw_v4_t *)vtw);
399 1.1 dyoung
400 1.1 dyoung if (ctl->is_v6)
401 1.1 dyoung return vtw_index_v6(ctl, (vtw_v6_t *)vtw);
402 1.1 dyoung
403 1.1 dyoung KASSERT(0 && "neither 4 nor 6. most curious.");
404 1.1 dyoung
405 1.1 dyoung return ~0;
406 1.1 dyoung }
407 1.1 dyoung
408 1.1 dyoung static inline vtw_t *
409 1.1 dyoung vtw_from_index(vtw_ctl_t *ctl, uint32_t idx)
410 1.1 dyoung {
411 1.1 dyoung if (ctl->clidx)
412 1.1 dyoung ctl = ctl->ctl;
413 1.1 dyoung
414 1.1 dyoung /* See if the index looks like it might be an index.
415 1.1 dyoung * Bits on outside of the valid index bits is a give away.
416 1.1 dyoung */
417 1.1 dyoung idx = idx_decode(ctl, idx);
418 1.1 dyoung
419 1.1 dyoung if (idx == ~0) {
420 1.1 dyoung return 0;
421 1.1 dyoung } else if (ctl->is_v4) {
422 1.1 dyoung vtw_v4_t *vtw = ctl->base.v4 + idx;
423 1.1 dyoung
424 1.1 dyoung return (ctl->base.v4 <= vtw && vtw <= ctl->lim.v4)
425 1.1 dyoung ? &vtw->common : 0;
426 1.1 dyoung } else if (ctl->is_v6) {
427 1.1 dyoung vtw_v6_t *vtw = ctl->base.v6 + idx;
428 1.1 dyoung
429 1.1 dyoung return (ctl->base.v6 <= vtw && vtw <= ctl->lim.v6)
430 1.1 dyoung ? &vtw->common : 0;
431 1.1 dyoung } else {
432 1.1 dyoung KASSERT(0 && "badness");
433 1.1 dyoung return 0;
434 1.1 dyoung }
435 1.1 dyoung }
436 1.1 dyoung
437 1.1 dyoung /*!\brief return the next vtw after this one.
438 1.1 dyoung *
439 1.1 dyoung * Due to the differing sizes of the entries in differing
440 1.1 dyoung * arenas, we have to ensure we ++ the correct pointer type.
441 1.1 dyoung *
442 1.1 dyoung * Also handles wrap.
443 1.1 dyoung */
444 1.1 dyoung static inline vtw_t *
445 1.1 dyoung vtw_next(vtw_ctl_t *ctl, vtw_t *vtw)
446 1.1 dyoung {
447 1.1 dyoung if (ctl->is_v4) {
448 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
449 1.1 dyoung
450 1.1 dyoung vtw = &(++v4)->common;
451 1.1 dyoung } else {
452 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
453 1.1 dyoung
454 1.1 dyoung vtw = &(++v6)->common;
455 1.1 dyoung }
456 1.1 dyoung
457 1.1 dyoung if (vtw > ctl->lim.v)
458 1.1 dyoung vtw = ctl->base.v;
459 1.1 dyoung
460 1.1 dyoung return vtw;
461 1.1 dyoung }
462 1.1 dyoung
463 1.1 dyoung /*!\brief remove entry from FATP hash chains
464 1.1 dyoung */
465 1.1 dyoung static inline void
466 1.1 dyoung vtw_unhash(vtw_ctl_t *ctl, vtw_t *vtw)
467 1.1 dyoung {
468 1.1 dyoung fatp_ctl_t *fat = ctl->fat;
469 1.1 dyoung fatp_t *fp;
470 1.1 dyoung uint32_t key = vtw->key;
471 1.1 dyoung uint32_t tag, slot, idx;
472 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
473 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
474 1.1 dyoung
475 1.1 dyoung if (!vtw->hashed) {
476 1.1 dyoung KASSERT(0 && "unhashed");
477 1.1 dyoung return;
478 1.1 dyoung }
479 1.1 dyoung
480 1.1 dyoung if (fat->vtw->is_v4) {
481 1.1 dyoung tag = v4_tag(v4->faddr, v4->fport, v4->laddr, v4->lport);
482 1.1 dyoung } else if (fat->vtw->is_v6) {
483 1.1 dyoung tag = v6_tag(&v6->faddr, v6->fport, &v6->laddr, v6->lport);
484 1.1 dyoung } else {
485 1.1 dyoung tag = 0;
486 1.1 dyoung KASSERT(0 && "not reached");
487 1.1 dyoung }
488 1.1 dyoung
489 1.1 dyoung /* Remove from fat->hash[]
490 1.1 dyoung */
491 1.1 dyoung slot = fatp_slot_from_key(fat, key);
492 1.1 dyoung fp = fatp_from_key(fat, key);
493 1.1 dyoung idx = vtw_index(ctl, vtw);
494 1.1 dyoung
495 1.1 dyoung db_trace(KTR_VTW
496 1.1 dyoung , (fp, "fat: del inuse %5.5x slot %x idx %x key %x tag %x"
497 1.1 dyoung , fp->inuse, slot, idx, key, tag));
498 1.1 dyoung
499 1.1 dyoung KASSERT(fp->inuse & (1 << slot));
500 1.1 dyoung KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
501 1.1 dyoung ^ fatp_xtra[slot]));
502 1.1 dyoung
503 1.1 dyoung if ((fp->inuse & (1 << slot))
504 1.1 dyoung && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
505 1.1 dyoung ^ fatp_xtra[slot])) {
506 1.1 dyoung fp->inuse ^= 1 << slot;
507 1.1 dyoung fp->tag[slot] = 0;
508 1.1 dyoung
509 1.1 dyoung /* When we delete entries, we do not compact. This is
510 1.1 dyoung * due to temporality. We add entries, and they
511 1.1 dyoung * (eventually) expire. Older entries will be further
512 1.1 dyoung * down the chain.
513 1.1 dyoung */
514 1.1 dyoung if (!fp->inuse) {
515 1.1 dyoung uint32_t hi = tag & fat->mask;
516 1.1 dyoung fatp_t *fq = 0;
517 1.1 dyoung fatp_t *fr = fat->hash[hi];
518 1.1 dyoung
519 1.1 dyoung while (fr && fr != fp) {
520 1.1 dyoung fr = fatp_next(fat, fq = fr);
521 1.1 dyoung }
522 1.1 dyoung
523 1.1 dyoung if (fr == fp) {
524 1.1 dyoung if (fq) {
525 1.1 dyoung fq->nxt = fp->nxt;
526 1.1 dyoung fp->nxt = 0;
527 1.1 dyoung fatp_free(fat, fp);
528 1.1 dyoung } else {
529 1.1 dyoung KASSERT(fat->hash[hi] == fp);
530 1.1 dyoung
531 1.1 dyoung if (fp->nxt) {
532 1.1 dyoung fat->hash[hi]
533 1.1 dyoung = fatp_next(fat, fp);
534 1.1 dyoung fp->nxt = 0;
535 1.1 dyoung fatp_free(fat, fp);
536 1.1 dyoung } else {
537 1.1 dyoung /* retain for next use.
538 1.1 dyoung */
539 1.1 dyoung ;
540 1.1 dyoung }
541 1.1 dyoung }
542 1.1 dyoung } else {
543 1.1 dyoung fr = fat->hash[hi];
544 1.1 dyoung
545 1.1 dyoung do {
546 1.1 dyoung db_trace(KTR_VTW
547 1.1 dyoung , (fr
548 1.1 dyoung , "fat:*del inuse %5.5x"
549 1.1 dyoung " nxt %x"
550 1.1 dyoung , fr->inuse, fr->nxt));
551 1.1 dyoung
552 1.1 dyoung fr = fatp_next(fat, fq = fr);
553 1.1 dyoung } while (fr && fr != fp);
554 1.1 dyoung
555 1.1 dyoung KASSERT(0 && "oops");
556 1.1 dyoung }
557 1.1 dyoung }
558 1.1 dyoung vtw->key ^= ~0;
559 1.1 dyoung }
560 1.1 dyoung
561 1.1 dyoung if (fat->vtw->is_v4) {
562 1.1 dyoung tag = v4_port_tag(v4->lport);
563 1.1 dyoung } else if (fat->vtw->is_v6) {
564 1.1 dyoung tag = v6_port_tag(v6->lport);
565 1.1 dyoung }
566 1.1 dyoung
567 1.1 dyoung /* Remove from fat->port[]
568 1.1 dyoung */
569 1.1 dyoung key = vtw->port_key;
570 1.1 dyoung slot = fatp_slot_from_key(fat, key);
571 1.1 dyoung fp = fatp_from_key(fat, key);
572 1.1 dyoung idx = vtw_index(ctl, vtw);
573 1.1 dyoung
574 1.1 dyoung db_trace(KTR_VTW
575 1.1 dyoung , (fp, "fatport: del inuse %5.5x"
576 1.1 dyoung " slot %x idx %x key %x tag %x"
577 1.1 dyoung , fp->inuse, slot, idx, key, tag));
578 1.1 dyoung
579 1.1 dyoung KASSERT(fp->inuse & (1 << slot));
580 1.1 dyoung KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
581 1.1 dyoung ^ fatp_xtra[slot]));
582 1.1 dyoung
583 1.1 dyoung if ((fp->inuse & (1 << slot))
584 1.1 dyoung && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
585 1.1 dyoung ^ fatp_xtra[slot])) {
586 1.1 dyoung fp->inuse ^= 1 << slot;
587 1.1 dyoung fp->tag[slot] = 0;
588 1.1 dyoung
589 1.1 dyoung if (!fp->inuse) {
590 1.1 dyoung uint32_t hi = tag & fat->mask;
591 1.1 dyoung fatp_t *fq = 0;
592 1.1 dyoung fatp_t *fr = fat->port[hi];
593 1.1 dyoung
594 1.1 dyoung while (fr && fr != fp) {
595 1.1 dyoung fr = fatp_next(fat, fq = fr);
596 1.1 dyoung }
597 1.1 dyoung
598 1.1 dyoung if (fr == fp) {
599 1.1 dyoung if (fq) {
600 1.1 dyoung fq->nxt = fp->nxt;
601 1.1 dyoung fp->nxt = 0;
602 1.1 dyoung fatp_free(fat, fp);
603 1.1 dyoung } else {
604 1.1 dyoung KASSERT(fat->port[hi] == fp);
605 1.1 dyoung
606 1.1 dyoung if (fp->nxt) {
607 1.1 dyoung fat->port[hi]
608 1.1 dyoung = fatp_next(fat, fp);
609 1.1 dyoung fp->nxt = 0;
610 1.1 dyoung fatp_free(fat, fp);
611 1.1 dyoung } else {
612 1.1 dyoung /* retain for next use.
613 1.1 dyoung */
614 1.1 dyoung ;
615 1.1 dyoung }
616 1.1 dyoung }
617 1.1 dyoung }
618 1.1 dyoung }
619 1.1 dyoung vtw->port_key ^= ~0;
620 1.1 dyoung }
621 1.1 dyoung
622 1.1 dyoung vtw->hashed = 0;
623 1.1 dyoung }
624 1.1 dyoung
625 1.1 dyoung /*!\brief remove entry from hash, possibly free.
626 1.1 dyoung */
627 1.1 dyoung void
628 1.1 dyoung vtw_del(vtw_ctl_t *ctl, vtw_t *vtw)
629 1.1 dyoung {
630 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
631 1.1 dyoung
632 1.1 dyoung if (vtw->hashed) {
633 1.1 dyoung ++vtw_stats.del;
634 1.1 dyoung vtw_unhash(ctl, vtw);
635 1.1 dyoung }
636 1.1 dyoung
637 1.1 dyoung /* We only delete the oldest entry.
638 1.1 dyoung */
639 1.1 dyoung if (vtw != ctl->oldest.v)
640 1.1 dyoung return;
641 1.1 dyoung
642 1.1 dyoung --ctl->nalloc;
643 1.1 dyoung ++ctl->nfree;
644 1.1 dyoung
645 1.1 dyoung vtw->expire.tv_sec = 0;
646 1.1 dyoung vtw->expire.tv_usec = ~0;
647 1.1 dyoung
648 1.1 dyoung if (!ctl->nalloc)
649 1.1 dyoung ctl->oldest.v = 0;
650 1.1 dyoung
651 1.1 dyoung ctl->oldest.v = vtw_next(ctl, vtw);
652 1.1 dyoung }
653 1.1 dyoung
654 1.4 dholland /*!\brief insert vestigial timewait in hash chain
655 1.1 dyoung */
656 1.1 dyoung static void
657 1.1 dyoung vtw_inshash_v4(vtw_ctl_t *ctl, vtw_t *vtw)
658 1.1 dyoung {
659 1.1 dyoung uint32_t idx = vtw_index(ctl, vtw);
660 1.1 dyoung uint32_t tag;
661 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
662 1.1 dyoung
663 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
664 1.1 dyoung KASSERT(!vtw->hashed);
665 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
666 1.1 dyoung
667 1.1 dyoung ++vtw_stats.ins;
668 1.1 dyoung
669 1.1 dyoung tag = v4_tag(v4->faddr, v4->fport,
670 1.1 dyoung v4->laddr, v4->lport);
671 1.1 dyoung
672 1.1 dyoung vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
673 1.1 dyoung
674 1.1 dyoung db_trace(KTR_VTW, (ctl
675 1.1 dyoung , "vtw: ins %8.8x:%4.4x %8.8x:%4.4x"
676 1.1 dyoung " tag %8.8x key %8.8x"
677 1.1 dyoung , v4->faddr, v4->fport
678 1.1 dyoung , v4->laddr, v4->lport
679 1.1 dyoung , tag
680 1.1 dyoung , vtw->key));
681 1.1 dyoung
682 1.1 dyoung tag = v4_port_tag(v4->lport);
683 1.1 dyoung vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
684 1.1 dyoung
685 1.1 dyoung db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
686 1.1 dyoung , v4->lport, v4->lport
687 1.1 dyoung , tag
688 1.1 dyoung , vtw->key));
689 1.1 dyoung
690 1.1 dyoung vtw->hashed = 1;
691 1.1 dyoung }
692 1.1 dyoung
693 1.4 dholland /*!\brief insert vestigial timewait in hash chain
694 1.1 dyoung */
695 1.1 dyoung static void
696 1.1 dyoung vtw_inshash_v6(vtw_ctl_t *ctl, vtw_t *vtw)
697 1.1 dyoung {
698 1.1 dyoung uint32_t idx = vtw_index(ctl, vtw);
699 1.1 dyoung uint32_t tag;
700 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
701 1.1 dyoung
702 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
703 1.1 dyoung KASSERT(!vtw->hashed);
704 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
705 1.1 dyoung
706 1.1 dyoung ++vtw_stats.ins;
707 1.1 dyoung
708 1.1 dyoung tag = v6_tag(&v6->faddr, v6->fport,
709 1.1 dyoung &v6->laddr, v6->lport);
710 1.1 dyoung
711 1.1 dyoung vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
712 1.1 dyoung
713 1.1 dyoung tag = v6_port_tag(v6->lport);
714 1.1 dyoung vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
715 1.1 dyoung
716 1.1 dyoung db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
717 1.1 dyoung , v6->lport, v6->lport
718 1.1 dyoung , tag
719 1.1 dyoung , vtw->key));
720 1.1 dyoung
721 1.1 dyoung vtw->hashed = 1;
722 1.1 dyoung }
723 1.1 dyoung
724 1.1 dyoung static vtw_t *
725 1.1 dyoung vtw_lookup_hash_v4(vtw_ctl_t *ctl, uint32_t faddr, uint16_t fport
726 1.1 dyoung , uint32_t laddr, uint16_t lport
727 1.1 dyoung , int which)
728 1.1 dyoung {
729 1.1 dyoung vtw_v4_t *v4;
730 1.1 dyoung vtw_t *vtw;
731 1.1 dyoung uint32_t tag;
732 1.1 dyoung fatp_t *fp;
733 1.1 dyoung int i;
734 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
735 1.1 dyoung
736 1.1 dyoung if (!ctl || !ctl->fat)
737 1.1 dyoung return 0;
738 1.1 dyoung
739 1.1 dyoung ++vtw_stats.look[which];
740 1.1 dyoung
741 1.1 dyoung if (which) {
742 1.1 dyoung tag = v4_port_tag(lport);
743 1.1 dyoung fp = ctl->fat->port[tag & ctl->fat->mask];
744 1.1 dyoung } else {
745 1.1 dyoung tag = v4_tag(faddr, fport, laddr, lport);
746 1.1 dyoung fp = ctl->fat->hash[tag & ctl->fat->mask];
747 1.1 dyoung }
748 1.1 dyoung
749 1.1 dyoung while (fp && fp->inuse) {
750 1.1 dyoung uint32_t inuse = fp->inuse;
751 1.1 dyoung
752 1.1 dyoung ++fatps;
753 1.1 dyoung
754 1.1 dyoung for (i = 0; inuse && i < fatp_ntags(); ++i) {
755 1.1 dyoung uint32_t idx;
756 1.1 dyoung
757 1.1 dyoung if (!(inuse & (1 << i)))
758 1.1 dyoung continue;
759 1.1 dyoung
760 1.1 dyoung inuse ^= 1 << i;
761 1.1 dyoung
762 1.1 dyoung ++probes;
763 1.1 dyoung ++vtw_stats.probe[which];
764 1.1 dyoung
765 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
766 1.1 dyoung vtw = vtw_from_index(ctl, idx);
767 1.1 dyoung
768 1.1 dyoung if (!vtw) {
769 1.1 dyoung /* Hopefully fast path.
770 1.1 dyoung */
771 1.1 dyoung db_trace(KTR_VTW
772 1.1 dyoung , (fp, "vtw: fast %A:%P %A:%P"
773 1.1 dyoung " idx %x tag %x"
774 1.1 dyoung , faddr, fport
775 1.1 dyoung , laddr, lport
776 1.1 dyoung , idx, tag));
777 1.1 dyoung continue;
778 1.1 dyoung }
779 1.1 dyoung
780 1.1 dyoung v4 = (void*)vtw;
781 1.1 dyoung
782 1.1 dyoung /* The de-referencing of vtw is what we want to avoid.
783 1.1 dyoung * Losing.
784 1.1 dyoung */
785 1.1 dyoung if (vtw_alive(vtw)
786 1.1 dyoung && ((which ? vtw->port_key : vtw->key)
787 1.1 dyoung == fatp_key(ctl->fat, fp, i))
788 1.1 dyoung && (which
789 1.1 dyoung || (v4->faddr == faddr && v4->laddr == laddr
790 1.1 dyoung && v4->fport == fport))
791 1.1 dyoung && v4->lport == lport) {
792 1.1 dyoung ++vtw_stats.hit[which];
793 1.1 dyoung
794 1.1 dyoung db_trace(KTR_VTW
795 1.1 dyoung , (fp, "vtw: hit %8.8x:%4.4x"
796 1.1 dyoung " %8.8x:%4.4x idx %x key %x"
797 1.1 dyoung , faddr, fport
798 1.1 dyoung , laddr, lport
799 1.1 dyoung , idx_decode(ctl, idx), vtw->key));
800 1.1 dyoung
801 1.1 dyoung KASSERT(vtw->hashed);
802 1.1 dyoung
803 1.1 dyoung goto out;
804 1.1 dyoung }
805 1.1 dyoung ++vtw_stats.losing[which];
806 1.1 dyoung ++losings;
807 1.1 dyoung
808 1.1 dyoung if (vtw_alive(vtw)) {
809 1.1 dyoung db_trace(KTR_VTW
810 1.1 dyoung , (fp, "vtw:!mis %8.8x:%4.4x"
811 1.1 dyoung " %8.8x:%4.4x key %x tag %x"
812 1.1 dyoung , faddr, fport
813 1.1 dyoung , laddr, lport
814 1.1 dyoung , fatp_key(ctl->fat, fp, i)
815 1.1 dyoung , v4_tag(faddr, fport
816 1.1 dyoung , laddr, lport)));
817 1.1 dyoung db_trace(KTR_VTW
818 1.1 dyoung , (vtw, "vtw:!mis %8.8x:%4.4x"
819 1.1 dyoung " %8.8x:%4.4x key %x tag %x"
820 1.1 dyoung , v4->faddr, v4->fport
821 1.1 dyoung , v4->laddr, v4->lport
822 1.1 dyoung , vtw->key
823 1.1 dyoung , v4_tag(v4->faddr, v4->fport
824 1.1 dyoung , v4->laddr, v4->lport)));
825 1.1 dyoung
826 1.1 dyoung if (vtw->key == fatp_key(ctl->fat, fp, i)) {
827 1.1 dyoung db_trace(KTR_VTW
828 1.1 dyoung , (vtw, "vtw:!mis %8.8x:%4.4x"
829 1.1 dyoung " %8.8x:%4.4x key %x"
830 1.1 dyoung " which %x"
831 1.1 dyoung , v4->faddr, v4->fport
832 1.1 dyoung , v4->laddr, v4->lport
833 1.1 dyoung , vtw->key
834 1.1 dyoung , which));
835 1.1 dyoung
836 1.1 dyoung } else {
837 1.1 dyoung db_trace(KTR_VTW
838 1.1 dyoung , (vtw
839 1.1 dyoung , "vtw:!mis"
840 1.1 dyoung " key %8.8x != %8.8x"
841 1.1 dyoung " idx %x i %x which %x"
842 1.1 dyoung , vtw->key
843 1.1 dyoung , fatp_key(ctl->fat, fp, i)
844 1.1 dyoung , idx_decode(ctl, idx)
845 1.1 dyoung , i
846 1.1 dyoung , which));
847 1.1 dyoung }
848 1.1 dyoung } else {
849 1.1 dyoung db_trace(KTR_VTW
850 1.1 dyoung , (fp
851 1.1 dyoung , "vtw:!mis free entry"
852 1.1 dyoung " idx %x vtw %p which %x"
853 1.1 dyoung , idx_decode(ctl, idx)
854 1.1 dyoung , vtw, which));
855 1.1 dyoung }
856 1.1 dyoung }
857 1.1 dyoung
858 1.1 dyoung if (fp->nxt) {
859 1.1 dyoung fp = fatp_next(ctl->fat, fp);
860 1.1 dyoung } else {
861 1.1 dyoung break;
862 1.1 dyoung }
863 1.1 dyoung }
864 1.1 dyoung ++vtw_stats.miss[which];
865 1.1 dyoung vtw = 0;
866 1.1 dyoung out:
867 1.1 dyoung if (fatps > vtw_stats.max_chain[which])
868 1.1 dyoung vtw_stats.max_chain[which] = fatps;
869 1.1 dyoung if (probes > vtw_stats.max_probe[which])
870 1.1 dyoung vtw_stats.max_probe[which] = probes;
871 1.1 dyoung if (losings > vtw_stats.max_loss[which])
872 1.1 dyoung vtw_stats.max_loss[which] = losings;
873 1.1 dyoung
874 1.1 dyoung return vtw;
875 1.1 dyoung }
876 1.1 dyoung
877 1.1 dyoung static vtw_t *
878 1.1 dyoung vtw_lookup_hash_v6(vtw_ctl_t *ctl, const struct in6_addr *faddr, uint16_t fport
879 1.1 dyoung , const struct in6_addr *laddr, uint16_t lport
880 1.1 dyoung , int which)
881 1.1 dyoung {
882 1.1 dyoung vtw_v6_t *v6;
883 1.1 dyoung vtw_t *vtw;
884 1.1 dyoung uint32_t tag;
885 1.1 dyoung fatp_t *fp;
886 1.1 dyoung int i;
887 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
888 1.1 dyoung
889 1.1 dyoung ++vtw_stats.look[which];
890 1.1 dyoung
891 1.1 dyoung if (!ctl || !ctl->fat)
892 1.1 dyoung return 0;
893 1.1 dyoung
894 1.1 dyoung if (which) {
895 1.1 dyoung tag = v6_port_tag(lport);
896 1.1 dyoung fp = ctl->fat->port[tag & ctl->fat->mask];
897 1.1 dyoung } else {
898 1.1 dyoung tag = v6_tag(faddr, fport, laddr, lport);
899 1.1 dyoung fp = ctl->fat->hash[tag & ctl->fat->mask];
900 1.1 dyoung }
901 1.1 dyoung
902 1.1 dyoung while (fp && fp->inuse) {
903 1.1 dyoung uint32_t inuse = fp->inuse;
904 1.1 dyoung
905 1.1 dyoung ++fatps;
906 1.1 dyoung
907 1.1 dyoung for (i = 0; inuse && i < fatp_ntags(); ++i) {
908 1.1 dyoung uint32_t idx;
909 1.1 dyoung
910 1.1 dyoung if (!(inuse & (1 << i)))
911 1.1 dyoung continue;
912 1.1 dyoung
913 1.1 dyoung inuse ^= 1 << i;
914 1.1 dyoung
915 1.1 dyoung ++probes;
916 1.1 dyoung ++vtw_stats.probe[which];
917 1.1 dyoung
918 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
919 1.1 dyoung vtw = vtw_from_index(ctl, idx);
920 1.1 dyoung
921 1.1 dyoung db_trace(KTR_VTW
922 1.1 dyoung , (fp, "probe: %2d %6A:%4.4x %6A:%4.4x idx %x"
923 1.1 dyoung , i
924 1.1 dyoung , db_store(faddr, sizeof (*faddr)), fport
925 1.1 dyoung , db_store(laddr, sizeof (*laddr)), lport
926 1.1 dyoung , idx_decode(ctl, idx)));
927 1.1 dyoung
928 1.1 dyoung if (!vtw) {
929 1.1 dyoung /* Hopefully fast path.
930 1.1 dyoung */
931 1.1 dyoung continue;
932 1.1 dyoung }
933 1.1 dyoung
934 1.1 dyoung v6 = (void*)vtw;
935 1.1 dyoung
936 1.1 dyoung if (vtw_alive(vtw)
937 1.1 dyoung && ((which ? vtw->port_key : vtw->key)
938 1.1 dyoung == fatp_key(ctl->fat, fp, i))
939 1.1 dyoung && v6->lport == lport
940 1.1 dyoung && (which
941 1.1 dyoung || (v6->fport == fport
942 1.1 dyoung && !bcmp(&v6->faddr, faddr, sizeof (*faddr))
943 1.1 dyoung && !bcmp(&v6->laddr, laddr
944 1.1 dyoung , sizeof (*laddr))))) {
945 1.1 dyoung ++vtw_stats.hit[which];
946 1.1 dyoung
947 1.1 dyoung KASSERT(vtw->hashed);
948 1.1 dyoung goto out;
949 1.1 dyoung } else {
950 1.1 dyoung ++vtw_stats.losing[which];
951 1.1 dyoung ++losings;
952 1.1 dyoung }
953 1.1 dyoung }
954 1.1 dyoung
955 1.1 dyoung if (fp->nxt) {
956 1.1 dyoung fp = fatp_next(ctl->fat, fp);
957 1.1 dyoung } else {
958 1.1 dyoung break;
959 1.1 dyoung }
960 1.1 dyoung }
961 1.1 dyoung ++vtw_stats.miss[which];
962 1.1 dyoung vtw = 0;
963 1.1 dyoung out:
964 1.1 dyoung if (fatps > vtw_stats.max_chain[which])
965 1.1 dyoung vtw_stats.max_chain[which] = fatps;
966 1.1 dyoung if (probes > vtw_stats.max_probe[which])
967 1.1 dyoung vtw_stats.max_probe[which] = probes;
968 1.1 dyoung if (losings > vtw_stats.max_loss[which])
969 1.1 dyoung vtw_stats.max_loss[which] = losings;
970 1.1 dyoung
971 1.1 dyoung return vtw;
972 1.1 dyoung }
973 1.1 dyoung
974 1.1 dyoung /*!\brief port iterator
975 1.1 dyoung */
976 1.1 dyoung static vtw_t *
977 1.1 dyoung vtw_next_port_v4(struct tcp_ports_iterator *it)
978 1.1 dyoung {
979 1.1 dyoung vtw_ctl_t *ctl = it->ctl;
980 1.1 dyoung vtw_v4_t *v4;
981 1.1 dyoung vtw_t *vtw;
982 1.1 dyoung uint32_t tag;
983 1.1 dyoung uint16_t lport = it->port;
984 1.1 dyoung fatp_t *fp;
985 1.1 dyoung int i;
986 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
987 1.1 dyoung
988 1.1 dyoung tag = v4_port_tag(lport);
989 1.1 dyoung if (!it->fp) {
990 1.1 dyoung it->fp = ctl->fat->port[tag & ctl->fat->mask];
991 1.1 dyoung it->slot_idx = 0;
992 1.1 dyoung }
993 1.1 dyoung fp = it->fp;
994 1.1 dyoung
995 1.1 dyoung while (fp) {
996 1.1 dyoung uint32_t inuse = fp->inuse;
997 1.1 dyoung
998 1.1 dyoung ++fatps;
999 1.1 dyoung
1000 1.1 dyoung for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
1001 1.1 dyoung uint32_t idx;
1002 1.1 dyoung
1003 1.1 dyoung if (!(inuse & (1 << i)))
1004 1.1 dyoung continue;
1005 1.1 dyoung
1006 1.1 dyoung inuse &= ~0 << i;
1007 1.1 dyoung
1008 1.1 dyoung if (i < it->slot_idx)
1009 1.1 dyoung continue;
1010 1.1 dyoung
1011 1.1 dyoung ++vtw_stats.probe[1];
1012 1.1 dyoung ++probes;
1013 1.1 dyoung
1014 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
1015 1.1 dyoung vtw = vtw_from_index(ctl, idx);
1016 1.1 dyoung
1017 1.1 dyoung if (!vtw) {
1018 1.1 dyoung /* Hopefully fast path.
1019 1.1 dyoung */
1020 1.1 dyoung continue;
1021 1.1 dyoung }
1022 1.1 dyoung
1023 1.1 dyoung v4 = (void*)vtw;
1024 1.1 dyoung
1025 1.1 dyoung if (vtw_alive(vtw)
1026 1.1 dyoung && vtw->port_key == fatp_key(ctl->fat, fp, i)
1027 1.1 dyoung && v4->lport == lport) {
1028 1.1 dyoung ++vtw_stats.hit[1];
1029 1.1 dyoung
1030 1.1 dyoung it->slot_idx = i + 1;
1031 1.1 dyoung
1032 1.1 dyoung goto out;
1033 1.1 dyoung } else if (vtw_alive(vtw)) {
1034 1.1 dyoung ++vtw_stats.losing[1];
1035 1.1 dyoung ++losings;
1036 1.1 dyoung
1037 1.1 dyoung db_trace(KTR_VTW
1038 1.1 dyoung , (vtw, "vtw:!mis"
1039 1.1 dyoung " port %8.8x:%4.4x %8.8x:%4.4x"
1040 1.1 dyoung " key %x port %x"
1041 1.1 dyoung , v4->faddr, v4->fport
1042 1.1 dyoung , v4->laddr, v4->lport
1043 1.1 dyoung , vtw->key
1044 1.1 dyoung , lport));
1045 1.1 dyoung } else {
1046 1.1 dyoung /* Really losing here. We are coming
1047 1.1 dyoung * up with references to free entries.
1048 1.1 dyoung * Might find it better to use
1049 1.1 dyoung * traditional, or need another
1050 1.1 dyoung * add-hockery. The other add-hockery
1051 1.1 dyoung * would be to pul more into into the
1052 1.1 dyoung * cache line to reject the false
1053 1.1 dyoung * hits.
1054 1.1 dyoung */
1055 1.1 dyoung ++vtw_stats.losing[1];
1056 1.1 dyoung ++losings;
1057 1.1 dyoung db_trace(KTR_VTW
1058 1.1 dyoung , (fp, "vtw:!mis port %x"
1059 1.1 dyoung " - free entry idx %x vtw %p"
1060 1.1 dyoung , lport
1061 1.1 dyoung , idx_decode(ctl, idx)
1062 1.1 dyoung , vtw));
1063 1.1 dyoung }
1064 1.1 dyoung }
1065 1.1 dyoung
1066 1.1 dyoung if (fp->nxt) {
1067 1.1 dyoung it->fp = fp = fatp_next(ctl->fat, fp);
1068 1.1 dyoung it->slot_idx = 0;
1069 1.1 dyoung } else {
1070 1.1 dyoung it->fp = 0;
1071 1.1 dyoung break;
1072 1.1 dyoung }
1073 1.1 dyoung }
1074 1.1 dyoung ++vtw_stats.miss[1];
1075 1.1 dyoung
1076 1.1 dyoung vtw = 0;
1077 1.1 dyoung out:
1078 1.1 dyoung if (fatps > vtw_stats.max_chain[1])
1079 1.1 dyoung vtw_stats.max_chain[1] = fatps;
1080 1.1 dyoung if (probes > vtw_stats.max_probe[1])
1081 1.1 dyoung vtw_stats.max_probe[1] = probes;
1082 1.1 dyoung if (losings > vtw_stats.max_loss[1])
1083 1.1 dyoung vtw_stats.max_loss[1] = losings;
1084 1.1 dyoung
1085 1.1 dyoung return vtw;
1086 1.1 dyoung }
1087 1.1 dyoung
1088 1.1 dyoung /*!\brief port iterator
1089 1.1 dyoung */
1090 1.1 dyoung static vtw_t *
1091 1.1 dyoung vtw_next_port_v6(struct tcp_ports_iterator *it)
1092 1.1 dyoung {
1093 1.1 dyoung vtw_ctl_t *ctl = it->ctl;
1094 1.1 dyoung vtw_v6_t *v6;
1095 1.1 dyoung vtw_t *vtw;
1096 1.1 dyoung uint32_t tag;
1097 1.1 dyoung uint16_t lport = it->port;
1098 1.1 dyoung fatp_t *fp;
1099 1.1 dyoung int i;
1100 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
1101 1.1 dyoung
1102 1.1 dyoung tag = v6_port_tag(lport);
1103 1.1 dyoung if (!it->fp) {
1104 1.1 dyoung it->fp = ctl->fat->port[tag & ctl->fat->mask];
1105 1.1 dyoung it->slot_idx = 0;
1106 1.1 dyoung }
1107 1.1 dyoung fp = it->fp;
1108 1.1 dyoung
1109 1.1 dyoung while (fp) {
1110 1.1 dyoung uint32_t inuse = fp->inuse;
1111 1.1 dyoung
1112 1.1 dyoung ++fatps;
1113 1.1 dyoung
1114 1.1 dyoung for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
1115 1.1 dyoung uint32_t idx;
1116 1.1 dyoung
1117 1.1 dyoung if (!(inuse & (1 << i)))
1118 1.1 dyoung continue;
1119 1.1 dyoung
1120 1.1 dyoung inuse &= ~0 << i;
1121 1.1 dyoung
1122 1.1 dyoung if (i < it->slot_idx)
1123 1.1 dyoung continue;
1124 1.1 dyoung
1125 1.1 dyoung ++vtw_stats.probe[1];
1126 1.1 dyoung ++probes;
1127 1.1 dyoung
1128 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
1129 1.1 dyoung vtw = vtw_from_index(ctl, idx);
1130 1.1 dyoung
1131 1.1 dyoung if (!vtw) {
1132 1.1 dyoung /* Hopefully fast path.
1133 1.1 dyoung */
1134 1.1 dyoung continue;
1135 1.1 dyoung }
1136 1.1 dyoung
1137 1.1 dyoung v6 = (void*)vtw;
1138 1.1 dyoung
1139 1.1 dyoung db_trace(KTR_VTW
1140 1.1 dyoung , (vtw, "vtw: i %x idx %x fp->tag %x"
1141 1.1 dyoung " tag %x xtra %x"
1142 1.1 dyoung , i, idx_decode(ctl, idx)
1143 1.1 dyoung , fp->tag[i], tag, fatp_xtra[i]));
1144 1.1 dyoung
1145 1.1 dyoung if (vtw_alive(vtw)
1146 1.1 dyoung && vtw->port_key == fatp_key(ctl->fat, fp, i)
1147 1.1 dyoung && v6->lport == lport) {
1148 1.1 dyoung ++vtw_stats.hit[1];
1149 1.1 dyoung
1150 1.1 dyoung db_trace(KTR_VTW
1151 1.1 dyoung , (fp, "vtw: nxt port %P - %4.4x"
1152 1.1 dyoung " idx %x key %x"
1153 1.1 dyoung , lport, lport
1154 1.1 dyoung , idx_decode(ctl, idx), vtw->key));
1155 1.1 dyoung
1156 1.1 dyoung it->slot_idx = i + 1;
1157 1.1 dyoung goto out;
1158 1.1 dyoung } else if (vtw_alive(vtw)) {
1159 1.1 dyoung ++vtw_stats.losing[1];
1160 1.1 dyoung
1161 1.1 dyoung db_trace(KTR_VTW
1162 1.1 dyoung , (vtw, "vtw:!mis port %6A:%4.4x"
1163 1.1 dyoung " %6A:%4.4x key %x port %x"
1164 1.1 dyoung , db_store(&v6->faddr
1165 1.1 dyoung , sizeof (v6->faddr))
1166 1.1 dyoung , v6->fport
1167 1.1 dyoung , db_store(&v6->laddr
1168 1.1 dyoung , sizeof (v6->faddr))
1169 1.1 dyoung , v6->lport
1170 1.1 dyoung , vtw->key
1171 1.1 dyoung , lport));
1172 1.1 dyoung } else {
1173 1.1 dyoung /* Really losing here. We are coming
1174 1.1 dyoung * up with references to free entries.
1175 1.1 dyoung * Might find it better to use
1176 1.1 dyoung * traditional, or need another
1177 1.1 dyoung * add-hockery. The other add-hockery
1178 1.1 dyoung * would be to pul more into into the
1179 1.1 dyoung * cache line to reject the false
1180 1.1 dyoung * hits.
1181 1.1 dyoung */
1182 1.1 dyoung ++vtw_stats.losing[1];
1183 1.1 dyoung ++losings;
1184 1.1 dyoung
1185 1.1 dyoung db_trace(KTR_VTW
1186 1.1 dyoung , (fp
1187 1.1 dyoung , "vtw:!mis port %x"
1188 1.1 dyoung " - free entry idx %x vtw %p"
1189 1.1 dyoung , lport, idx_decode(ctl, idx)
1190 1.1 dyoung , vtw));
1191 1.1 dyoung }
1192 1.1 dyoung }
1193 1.1 dyoung
1194 1.1 dyoung if (fp->nxt) {
1195 1.1 dyoung it->fp = fp = fatp_next(ctl->fat, fp);
1196 1.1 dyoung it->slot_idx = 0;
1197 1.1 dyoung } else {
1198 1.1 dyoung it->fp = 0;
1199 1.1 dyoung break;
1200 1.1 dyoung }
1201 1.1 dyoung }
1202 1.1 dyoung ++vtw_stats.miss[1];
1203 1.1 dyoung
1204 1.1 dyoung vtw = 0;
1205 1.1 dyoung out:
1206 1.1 dyoung if (fatps > vtw_stats.max_chain[1])
1207 1.1 dyoung vtw_stats.max_chain[1] = fatps;
1208 1.1 dyoung if (probes > vtw_stats.max_probe[1])
1209 1.1 dyoung vtw_stats.max_probe[1] = probes;
1210 1.1 dyoung if (losings > vtw_stats.max_loss[1])
1211 1.1 dyoung vtw_stats.max_loss[1] = losings;
1212 1.1 dyoung
1213 1.1 dyoung return vtw;
1214 1.1 dyoung }
1215 1.1 dyoung
1216 1.1 dyoung /*!\brief initialise the VTW allocation arena
1217 1.1 dyoung *
1218 1.1 dyoung * There are 1+3 allocation classes:
1219 1.1 dyoung * 0 classless
1220 1.1 dyoung * {1,2,3} MSL-class based allocation
1221 1.1 dyoung *
1222 1.1 dyoung * The allocation arenas are all initialised. Classless gets all the
1223 1.1 dyoung * space. MSL-class based divides the arena, so that allocation
1224 1.1 dyoung * within a class can proceed without having to consider entries
1225 1.1 dyoung * (aka: cache lines) from different classes.
1226 1.1 dyoung *
1227 1.1 dyoung * Usually, we are completely classless or class-based, but there can be
1228 1.1 dyoung * transition periods, corresponding to dynamic adjustments in the config
1229 1.1 dyoung * by the operator.
1230 1.1 dyoung */
1231 1.1 dyoung static void
1232 1.6 dyoung vtw_init(fatp_ctl_t *fat, vtw_ctl_t *ctl, const uint32_t n, vtw_t *ctl_base_v)
1233 1.1 dyoung {
1234 1.6 dyoung int class_n, i;
1235 1.6 dyoung vtw_t *base;
1236 1.1 dyoung
1237 1.6 dyoung ctl->base.v = ctl_base_v;
1238 1.1 dyoung
1239 1.6 dyoung if (ctl->is_v4) {
1240 1.6 dyoung ctl->lim.v4 = ctl->base.v4 + n - 1;
1241 1.6 dyoung ctl->alloc.v4 = ctl->base.v4;
1242 1.6 dyoung } else {
1243 1.6 dyoung ctl->lim.v6 = ctl->base.v6 + n - 1;
1244 1.6 dyoung ctl->alloc.v6 = ctl->base.v6;
1245 1.6 dyoung }
1246 1.1 dyoung
1247 1.6 dyoung ctl->nfree = n;
1248 1.6 dyoung ctl->ctl = ctl;
1249 1.1 dyoung
1250 1.6 dyoung ctl->idx_bits = 32;
1251 1.6 dyoung for (ctl->idx_mask = ~0; (ctl->idx_mask & (n-1)) == n-1; ) {
1252 1.6 dyoung ctl->idx_mask >>= 1;
1253 1.6 dyoung ctl->idx_bits -= 1;
1254 1.6 dyoung }
1255 1.1 dyoung
1256 1.6 dyoung ctl->idx_mask <<= 1;
1257 1.6 dyoung ctl->idx_mask |= 1;
1258 1.6 dyoung ctl->idx_bits += 1;
1259 1.1 dyoung
1260 1.6 dyoung ctl->fat = fat;
1261 1.6 dyoung fat->vtw = ctl;
1262 1.1 dyoung
1263 1.6 dyoung /* Divide the resources equally amongst the classes.
1264 1.6 dyoung * This is not optimal, as the different classes
1265 1.6 dyoung * arrive and leave at different rates, but it is
1266 1.6 dyoung * the best I can do for now.
1267 1.6 dyoung */
1268 1.6 dyoung class_n = n / (VTW_NCLASS-1);
1269 1.6 dyoung base = ctl->base.v;
1270 1.1 dyoung
1271 1.6 dyoung for (i = 1; i < VTW_NCLASS; ++i) {
1272 1.6 dyoung int j;
1273 1.1 dyoung
1274 1.6 dyoung ctl[i] = ctl[0];
1275 1.6 dyoung ctl[i].clidx = i;
1276 1.1 dyoung
1277 1.6 dyoung ctl[i].base.v = base;
1278 1.6 dyoung ctl[i].alloc = ctl[i].base;
1279 1.1 dyoung
1280 1.6 dyoung for (j = 0; j < class_n - 1; ++j) {
1281 1.6 dyoung if (tcp_msl_enable)
1282 1.6 dyoung base->msl_class = i;
1283 1.1 dyoung base = vtw_next(ctl, base);
1284 1.1 dyoung }
1285 1.6 dyoung
1286 1.6 dyoung ctl[i].lim.v = base;
1287 1.6 dyoung base = vtw_next(ctl, base);
1288 1.6 dyoung ctl[i].nfree = class_n;
1289 1.1 dyoung }
1290 1.1 dyoung
1291 1.1 dyoung vtw_debug_init();
1292 1.1 dyoung }
1293 1.1 dyoung
1294 1.1 dyoung /*!\brief map class to TCP MSL
1295 1.1 dyoung */
1296 1.1 dyoung static inline uint32_t
1297 1.1 dyoung class_to_msl(int class)
1298 1.1 dyoung {
1299 1.1 dyoung switch (class) {
1300 1.1 dyoung case 0:
1301 1.1 dyoung case 1:
1302 1.1 dyoung return tcp_msl_remote ? tcp_msl_remote : (TCPTV_MSL >> 0);
1303 1.1 dyoung case 2:
1304 1.1 dyoung return tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1);
1305 1.1 dyoung default:
1306 1.1 dyoung return tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2);
1307 1.1 dyoung }
1308 1.1 dyoung }
1309 1.1 dyoung
1310 1.1 dyoung /*!\brief map TCP MSL to class
1311 1.1 dyoung */
1312 1.1 dyoung static inline uint32_t
1313 1.1 dyoung msl_to_class(int msl)
1314 1.1 dyoung {
1315 1.1 dyoung if (tcp_msl_enable) {
1316 1.1 dyoung if (msl <= (tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2)))
1317 1.1 dyoung return 1+2;
1318 1.1 dyoung if (msl <= (tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1)))
1319 1.1 dyoung return 1+1;
1320 1.1 dyoung return 1;
1321 1.1 dyoung }
1322 1.1 dyoung return 0;
1323 1.1 dyoung }
1324 1.1 dyoung
1325 1.1 dyoung /*!\brief allocate a vtw entry
1326 1.1 dyoung */
1327 1.1 dyoung static inline vtw_t *
1328 1.1 dyoung vtw_alloc(vtw_ctl_t *ctl)
1329 1.1 dyoung {
1330 1.1 dyoung vtw_t *vtw = 0;
1331 1.1 dyoung int stuck = 0;
1332 1.1 dyoung int avail = ctl ? (ctl->nalloc + ctl->nfree) : 0;
1333 1.1 dyoung int msl;
1334 1.1 dyoung
1335 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
1336 1.1 dyoung
1337 1.1 dyoung /* If no resources, we will not get far.
1338 1.1 dyoung */
1339 1.1 dyoung if (!ctl || !ctl->base.v4 || avail <= 0)
1340 1.1 dyoung return 0;
1341 1.1 dyoung
1342 1.1 dyoung /* Obtain a free one.
1343 1.1 dyoung */
1344 1.1 dyoung while (!ctl->nfree) {
1345 1.1 dyoung vtw_age(ctl, 0);
1346 1.1 dyoung
1347 1.1 dyoung if (++stuck > avail) {
1348 1.1 dyoung /* When in transition between
1349 1.1 dyoung * schemes (classless, classed) we
1350 1.1 dyoung * can be stuck having to await the
1351 1.1 dyoung * expiration of cross-allocated entries.
1352 1.1 dyoung *
1353 1.1 dyoung * Returning zero means we will fall back to the
1354 1.1 dyoung * traditional TIME_WAIT handling, except in the
1355 1.1 dyoung * case of a re-shed, in which case we cannot
1356 1.1 dyoung * perform the reshecd, but will retain the extant
1357 1.1 dyoung * entry.
1358 1.1 dyoung */
1359 1.1 dyoung db_trace(KTR_VTW
1360 1.1 dyoung , (ctl, "vtw:!none free in class %x %x/%x"
1361 1.1 dyoung , ctl->clidx
1362 1.1 dyoung , ctl->nalloc, ctl->nfree));
1363 1.1 dyoung
1364 1.1 dyoung return 0;
1365 1.1 dyoung }
1366 1.1 dyoung }
1367 1.1 dyoung
1368 1.1 dyoung vtw = ctl->alloc.v;
1369 1.1 dyoung
1370 1.1 dyoung if (vtw->msl_class != ctl->clidx) {
1371 1.1 dyoung /* Usurping rules:
1372 1.1 dyoung * 0 -> {1,2,3} or {1,2,3} -> 0
1373 1.1 dyoung */
1374 1.1 dyoung KASSERT(!vtw->msl_class || !ctl->clidx);
1375 1.1 dyoung
1376 1.1 dyoung if (vtw->hashed || vtw->expire.tv_sec) {
1377 1.1 dyoung /* As this is owned by some other class,
1378 1.1 dyoung * we must wait for it to expire it.
1379 1.1 dyoung * This will only happen on class/classless
1380 1.1 dyoung * transitions, which are guaranteed to progress
1381 1.1 dyoung * to completion in small finite time, barring bugs.
1382 1.1 dyoung */
1383 1.1 dyoung db_trace(KTR_VTW
1384 1.1 dyoung , (ctl, "vtw:!%p class %x!=%x %x:%x%s"
1385 1.1 dyoung , vtw, vtw->msl_class, ctl->clidx
1386 1.1 dyoung , vtw->expire.tv_sec
1387 1.1 dyoung , vtw->expire.tv_usec
1388 1.1 dyoung , vtw->hashed ? " hashed" : ""));
1389 1.1 dyoung
1390 1.1 dyoung return 0;
1391 1.1 dyoung }
1392 1.1 dyoung
1393 1.1 dyoung db_trace(KTR_VTW
1394 1.1 dyoung , (ctl, "vtw:!%p usurped from %x to %x"
1395 1.1 dyoung , vtw, vtw->msl_class, ctl->clidx));
1396 1.1 dyoung
1397 1.1 dyoung vtw->msl_class = ctl->clidx;
1398 1.1 dyoung }
1399 1.1 dyoung
1400 1.1 dyoung if (vtw_alive(vtw)) {
1401 1.1 dyoung KASSERT(0 && "next free not free");
1402 1.1 dyoung return 0;
1403 1.1 dyoung }
1404 1.1 dyoung
1405 1.1 dyoung /* Advance allocation poiter.
1406 1.1 dyoung */
1407 1.1 dyoung ctl->alloc.v = vtw_next(ctl, vtw);
1408 1.1 dyoung
1409 1.1 dyoung --ctl->nfree;
1410 1.1 dyoung ++ctl->nalloc;
1411 1.1 dyoung
1412 1.1 dyoung msl = (2 * class_to_msl(ctl->clidx) * 1000) / PR_SLOWHZ; // msec
1413 1.1 dyoung
1414 1.1 dyoung /* mark expiration
1415 1.1 dyoung */
1416 1.3 drochner getmicrouptime(&vtw->expire);
1417 1.1 dyoung
1418 1.1 dyoung /* Move expiration into the future.
1419 1.1 dyoung */
1420 1.1 dyoung vtw->expire.tv_sec += msl / 1000;
1421 1.1 dyoung vtw->expire.tv_usec += 1000 * (msl % 1000);
1422 1.1 dyoung
1423 1.1 dyoung while (vtw->expire.tv_usec >= 1000*1000) {
1424 1.1 dyoung vtw->expire.tv_usec -= 1000*1000;
1425 1.1 dyoung vtw->expire.tv_sec += 1;
1426 1.1 dyoung }
1427 1.1 dyoung
1428 1.1 dyoung if (!ctl->oldest.v)
1429 1.1 dyoung ctl->oldest.v = vtw;
1430 1.1 dyoung
1431 1.1 dyoung return vtw;
1432 1.1 dyoung }
1433 1.1 dyoung
1434 1.1 dyoung /*!\brief expiration
1435 1.1 dyoung */
1436 1.1 dyoung static int
1437 1.1 dyoung vtw_age(vtw_ctl_t *ctl, struct timeval *_when)
1438 1.1 dyoung {
1439 1.1 dyoung vtw_t *vtw;
1440 1.1 dyoung struct timeval then, *when = _when;
1441 1.1 dyoung int maxtries = 0;
1442 1.1 dyoung
1443 1.1 dyoung if (!ctl->oldest.v) {
1444 1.1 dyoung KASSERT(!ctl->nalloc);
1445 1.1 dyoung return 0;
1446 1.1 dyoung }
1447 1.1 dyoung
1448 1.1 dyoung for (vtw = ctl->oldest.v; vtw && ctl->nalloc; ) {
1449 1.1 dyoung if (++maxtries > ctl->nalloc)
1450 1.1 dyoung break;
1451 1.1 dyoung
1452 1.1 dyoung if (vtw->msl_class != ctl->clidx) {
1453 1.1 dyoung db_trace(KTR_VTW
1454 1.1 dyoung , (vtw, "vtw:!age class mismatch %x != %x"
1455 1.1 dyoung , vtw->msl_class, ctl->clidx));
1456 1.1 dyoung /* XXXX
1457 1.1 dyoung * See if the appropriate action is to skip to the next.
1458 1.1 dyoung * XXXX
1459 1.1 dyoung */
1460 1.1 dyoung ctl->oldest.v = vtw = vtw_next(ctl, vtw);
1461 1.1 dyoung continue;
1462 1.1 dyoung }
1463 1.1 dyoung if (!when) {
1464 1.1 dyoung /* Latch oldest timeval if none specified.
1465 1.1 dyoung */
1466 1.1 dyoung then = vtw->expire;
1467 1.1 dyoung when = &then;
1468 1.1 dyoung }
1469 1.1 dyoung
1470 1.1 dyoung if (!timercmp(&vtw->expire, when, <=))
1471 1.1 dyoung break;
1472 1.1 dyoung
1473 1.1 dyoung db_trace(KTR_VTW
1474 1.1 dyoung , (vtw, "vtw: expire %x %8.8x:%8.8x %x/%x"
1475 1.1 dyoung , ctl->clidx
1476 1.1 dyoung , vtw->expire.tv_sec
1477 1.1 dyoung , vtw->expire.tv_usec
1478 1.1 dyoung , ctl->nalloc
1479 1.1 dyoung , ctl->nfree));
1480 1.1 dyoung
1481 1.1 dyoung if (!_when)
1482 1.1 dyoung ++vtw_stats.kill;
1483 1.1 dyoung
1484 1.1 dyoung vtw_del(ctl, vtw);
1485 1.1 dyoung vtw = ctl->oldest.v;
1486 1.1 dyoung }
1487 1.1 dyoung
1488 1.1 dyoung return ctl->nalloc; // # remaining allocated
1489 1.1 dyoung }
1490 1.1 dyoung
1491 1.1 dyoung static callout_t vtw_cs;
1492 1.1 dyoung
1493 1.1 dyoung /*!\brief notice the passage of time.
1494 1.1 dyoung * It seems to be getting faster. What happened to the year?
1495 1.1 dyoung */
1496 1.1 dyoung static void
1497 1.1 dyoung vtw_tick(void *arg)
1498 1.1 dyoung {
1499 1.1 dyoung struct timeval now;
1500 1.1 dyoung int i, cnt = 0;
1501 1.1 dyoung
1502 1.3 drochner getmicrouptime(&now);
1503 1.1 dyoung
1504 1.1 dyoung db_trace(KTR_VTW, (arg, "vtk: tick - now %8.8x:%8.8x"
1505 1.1 dyoung , now.tv_sec, now.tv_usec));
1506 1.1 dyoung
1507 1.1 dyoung mutex_enter(softnet_lock);
1508 1.1 dyoung
1509 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
1510 1.1 dyoung cnt += vtw_age(&vtw_tcpv4[i], &now);
1511 1.1 dyoung cnt += vtw_age(&vtw_tcpv6[i], &now);
1512 1.1 dyoung }
1513 1.1 dyoung
1514 1.1 dyoung /* Keep ticks coming while we need them.
1515 1.1 dyoung */
1516 1.1 dyoung if (cnt)
1517 1.1 dyoung callout_schedule(&vtw_cs, hz / 5);
1518 1.1 dyoung else {
1519 1.1 dyoung tcp_vtw_was_enabled = 0;
1520 1.1 dyoung tcbtable.vestige = 0;
1521 1.1 dyoung }
1522 1.1 dyoung mutex_exit(softnet_lock);
1523 1.1 dyoung }
1524 1.1 dyoung
1525 1.1 dyoung /* in_pcblookup_ports assist for handling vestigial entries.
1526 1.1 dyoung */
1527 1.1 dyoung static void *
1528 1.1 dyoung tcp_init_ports_v4(struct in_addr addr, u_int port, int wild)
1529 1.1 dyoung {
1530 1.1 dyoung struct tcp_ports_iterator *it = &tcp_ports_iterator_v4;
1531 1.1 dyoung
1532 1.1 dyoung bzero(it, sizeof (*it));
1533 1.1 dyoung
1534 1.1 dyoung /* Note: the reference to vtw_tcpv4[0] is fine.
1535 1.1 dyoung * We do not need per-class iteration. We just
1536 1.1 dyoung * need to get to the fat, and there is one
1537 1.1 dyoung * shared fat.
1538 1.1 dyoung */
1539 1.1 dyoung if (vtw_tcpv4[0].fat) {
1540 1.1 dyoung it->addr.v4 = addr;
1541 1.1 dyoung it->port = port;
1542 1.1 dyoung it->wild = !!wild;
1543 1.1 dyoung it->ctl = &vtw_tcpv4[0];
1544 1.1 dyoung
1545 1.1 dyoung ++vtw_stats.look[1];
1546 1.1 dyoung }
1547 1.1 dyoung
1548 1.1 dyoung return it;
1549 1.1 dyoung }
1550 1.1 dyoung
1551 1.1 dyoung /*!\brief export an IPv4 vtw.
1552 1.1 dyoung */
1553 1.1 dyoung static int
1554 1.1 dyoung vtw_export_v4(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
1555 1.1 dyoung {
1556 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
1557 1.1 dyoung
1558 1.1 dyoung bzero(res, sizeof (*res));
1559 1.1 dyoung
1560 1.1 dyoung if (ctl && vtw) {
1561 1.1 dyoung if (!ctl->clidx && vtw->msl_class)
1562 1.1 dyoung ctl += vtw->msl_class;
1563 1.1 dyoung else
1564 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
1565 1.1 dyoung
1566 1.1 dyoung res->valid = 1;
1567 1.1 dyoung res->v4 = 1;
1568 1.1 dyoung
1569 1.1 dyoung res->faddr.v4.s_addr = v4->faddr;
1570 1.1 dyoung res->laddr.v4.s_addr = v4->laddr;
1571 1.1 dyoung res->fport = v4->fport;
1572 1.1 dyoung res->lport = v4->lport;
1573 1.1 dyoung res->vtw = vtw; // netlock held over call(s)
1574 1.1 dyoung res->ctl = ctl;
1575 1.1 dyoung res->reuse_addr = vtw->reuse_addr;
1576 1.1 dyoung res->reuse_port = vtw->reuse_port;
1577 1.1 dyoung res->snd_nxt = vtw->snd_nxt;
1578 1.1 dyoung res->rcv_nxt = vtw->rcv_nxt;
1579 1.1 dyoung res->rcv_wnd = vtw->rcv_wnd;
1580 1.1 dyoung res->uid = vtw->uid;
1581 1.1 dyoung }
1582 1.1 dyoung
1583 1.1 dyoung return res->valid;
1584 1.1 dyoung }
1585 1.1 dyoung
1586 1.1 dyoung /*!\brief return next port in the port iterator. yowza.
1587 1.1 dyoung */
1588 1.1 dyoung static int
1589 1.1 dyoung tcp_next_port_v4(void *arg, struct vestigial_inpcb *res)
1590 1.1 dyoung {
1591 1.1 dyoung struct tcp_ports_iterator *it = arg;
1592 1.1 dyoung vtw_t *vtw = 0;
1593 1.1 dyoung
1594 1.1 dyoung if (it->ctl)
1595 1.1 dyoung vtw = vtw_next_port_v4(it);
1596 1.1 dyoung
1597 1.1 dyoung if (!vtw)
1598 1.1 dyoung it->ctl = 0;
1599 1.1 dyoung
1600 1.1 dyoung return vtw_export_v4(it->ctl, vtw, res);
1601 1.1 dyoung }
1602 1.1 dyoung
1603 1.1 dyoung static int
1604 1.1 dyoung tcp_lookup_v4(struct in_addr faddr, uint16_t fport,
1605 1.1 dyoung struct in_addr laddr, uint16_t lport,
1606 1.1 dyoung struct vestigial_inpcb *res)
1607 1.1 dyoung {
1608 1.1 dyoung vtw_t *vtw;
1609 1.1 dyoung vtw_ctl_t *ctl;
1610 1.1 dyoung
1611 1.1 dyoung
1612 1.1 dyoung db_trace(KTR_VTW
1613 1.1 dyoung , (res, "vtw: lookup %A:%P %A:%P"
1614 1.1 dyoung , faddr, fport
1615 1.1 dyoung , laddr, lport));
1616 1.1 dyoung
1617 1.1 dyoung vtw = vtw_lookup_hash_v4((ctl = &vtw_tcpv4[0])
1618 1.1 dyoung , faddr.s_addr, fport
1619 1.1 dyoung , laddr.s_addr, lport, 0);
1620 1.1 dyoung
1621 1.1 dyoung return vtw_export_v4(ctl, vtw, res);
1622 1.1 dyoung }
1623 1.1 dyoung
1624 1.1 dyoung /* in_pcblookup_ports assist for handling vestigial entries.
1625 1.1 dyoung */
1626 1.1 dyoung static void *
1627 1.1 dyoung tcp_init_ports_v6(const struct in6_addr *addr, u_int port, int wild)
1628 1.1 dyoung {
1629 1.1 dyoung struct tcp_ports_iterator *it = &tcp_ports_iterator_v6;
1630 1.1 dyoung
1631 1.1 dyoung bzero(it, sizeof (*it));
1632 1.1 dyoung
1633 1.1 dyoung /* Note: the reference to vtw_tcpv6[0] is fine.
1634 1.1 dyoung * We do not need per-class iteration. We just
1635 1.1 dyoung * need to get to the fat, and there is one
1636 1.1 dyoung * shared fat.
1637 1.1 dyoung */
1638 1.1 dyoung if (vtw_tcpv6[0].fat) {
1639 1.1 dyoung it->addr.v6 = *addr;
1640 1.1 dyoung it->port = port;
1641 1.1 dyoung it->wild = !!wild;
1642 1.1 dyoung it->ctl = &vtw_tcpv6[0];
1643 1.1 dyoung
1644 1.1 dyoung ++vtw_stats.look[1];
1645 1.1 dyoung }
1646 1.1 dyoung
1647 1.1 dyoung return it;
1648 1.1 dyoung }
1649 1.1 dyoung
1650 1.1 dyoung /*!\brief export an IPv6 vtw.
1651 1.1 dyoung */
1652 1.1 dyoung static int
1653 1.1 dyoung vtw_export_v6(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
1654 1.1 dyoung {
1655 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
1656 1.1 dyoung
1657 1.1 dyoung bzero(res, sizeof (*res));
1658 1.1 dyoung
1659 1.1 dyoung if (ctl && vtw) {
1660 1.1 dyoung if (!ctl->clidx && vtw->msl_class)
1661 1.1 dyoung ctl += vtw->msl_class;
1662 1.1 dyoung else
1663 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
1664 1.1 dyoung
1665 1.1 dyoung res->valid = 1;
1666 1.1 dyoung res->v4 = 0;
1667 1.1 dyoung
1668 1.1 dyoung res->faddr.v6 = v6->faddr;
1669 1.1 dyoung res->laddr.v6 = v6->laddr;
1670 1.1 dyoung res->fport = v6->fport;
1671 1.1 dyoung res->lport = v6->lport;
1672 1.1 dyoung res->vtw = vtw; // netlock held over call(s)
1673 1.1 dyoung res->ctl = ctl;
1674 1.1 dyoung
1675 1.1 dyoung res->v6only = vtw->v6only;
1676 1.1 dyoung res->reuse_addr = vtw->reuse_addr;
1677 1.1 dyoung res->reuse_port = vtw->reuse_port;
1678 1.1 dyoung
1679 1.1 dyoung res->snd_nxt = vtw->snd_nxt;
1680 1.1 dyoung res->rcv_nxt = vtw->rcv_nxt;
1681 1.1 dyoung res->rcv_wnd = vtw->rcv_wnd;
1682 1.1 dyoung res->uid = vtw->uid;
1683 1.1 dyoung }
1684 1.1 dyoung
1685 1.1 dyoung return res->valid;
1686 1.1 dyoung }
1687 1.1 dyoung
1688 1.1 dyoung static int
1689 1.1 dyoung tcp_next_port_v6(void *arg, struct vestigial_inpcb *res)
1690 1.1 dyoung {
1691 1.1 dyoung struct tcp_ports_iterator *it = arg;
1692 1.1 dyoung vtw_t *vtw = 0;
1693 1.1 dyoung
1694 1.1 dyoung if (it->ctl)
1695 1.1 dyoung vtw = vtw_next_port_v6(it);
1696 1.1 dyoung
1697 1.1 dyoung if (!vtw)
1698 1.1 dyoung it->ctl = 0;
1699 1.1 dyoung
1700 1.1 dyoung return vtw_export_v6(it->ctl, vtw, res);
1701 1.1 dyoung }
1702 1.1 dyoung
1703 1.1 dyoung static int
1704 1.1 dyoung tcp_lookup_v6(const struct in6_addr *faddr, uint16_t fport,
1705 1.1 dyoung const struct in6_addr *laddr, uint16_t lport,
1706 1.1 dyoung struct vestigial_inpcb *res)
1707 1.1 dyoung {
1708 1.1 dyoung vtw_ctl_t *ctl;
1709 1.1 dyoung vtw_t *vtw;
1710 1.1 dyoung
1711 1.1 dyoung db_trace(KTR_VTW
1712 1.1 dyoung , (res, "vtw: lookup %6A:%P %6A:%P"
1713 1.1 dyoung , db_store(faddr, sizeof (*faddr)), fport
1714 1.1 dyoung , db_store(laddr, sizeof (*laddr)), lport));
1715 1.1 dyoung
1716 1.1 dyoung vtw = vtw_lookup_hash_v6((ctl = &vtw_tcpv6[0])
1717 1.1 dyoung , faddr, fport
1718 1.1 dyoung , laddr, lport, 0);
1719 1.1 dyoung
1720 1.1 dyoung return vtw_export_v6(ctl, vtw, res);
1721 1.1 dyoung }
1722 1.1 dyoung
1723 1.1 dyoung static vestigial_hooks_t tcp_hooks = {
1724 1.1 dyoung .init_ports4 = tcp_init_ports_v4,
1725 1.1 dyoung .next_port4 = tcp_next_port_v4,
1726 1.1 dyoung .lookup4 = tcp_lookup_v4,
1727 1.1 dyoung .init_ports6 = tcp_init_ports_v6,
1728 1.1 dyoung .next_port6 = tcp_next_port_v6,
1729 1.1 dyoung .lookup6 = tcp_lookup_v6,
1730 1.1 dyoung };
1731 1.1 dyoung
1732 1.1 dyoung static bool
1733 1.1 dyoung vtw_select(int af, fatp_ctl_t **fatp, vtw_ctl_t **ctlp)
1734 1.1 dyoung {
1735 1.1 dyoung fatp_ctl_t *fat;
1736 1.1 dyoung vtw_ctl_t *ctl;
1737 1.1 dyoung
1738 1.1 dyoung switch (af) {
1739 1.1 dyoung case AF_INET:
1740 1.1 dyoung fat = &fat_tcpv4;
1741 1.1 dyoung ctl = &vtw_tcpv4[0];
1742 1.1 dyoung break;
1743 1.1 dyoung case AF_INET6:
1744 1.1 dyoung fat = &fat_tcpv6;
1745 1.1 dyoung ctl = &vtw_tcpv6[0];
1746 1.1 dyoung break;
1747 1.1 dyoung default:
1748 1.1 dyoung return false;
1749 1.1 dyoung }
1750 1.1 dyoung if (fatp != NULL)
1751 1.1 dyoung *fatp = fat;
1752 1.1 dyoung if (ctlp != NULL)
1753 1.1 dyoung *ctlp = ctl;
1754 1.1 dyoung return true;
1755 1.1 dyoung }
1756 1.1 dyoung
1757 1.1 dyoung /*!\brief initialize controlling instance
1758 1.1 dyoung */
1759 1.1 dyoung static int
1760 1.1 dyoung vtw_control_init(int af)
1761 1.1 dyoung {
1762 1.1 dyoung fatp_ctl_t *fat;
1763 1.1 dyoung vtw_ctl_t *ctl;
1764 1.6 dyoung fatp_t *fat_base;
1765 1.6 dyoung fatp_t **fat_hash;
1766 1.6 dyoung vtw_t *ctl_base_v;
1767 1.6 dyoung uint32_t n, m;
1768 1.6 dyoung size_t sz;
1769 1.6 dyoung
1770 1.6 dyoung KASSERT(powerof2(tcp_vtw_entries));
1771 1.1 dyoung
1772 1.1 dyoung if (!vtw_select(af, &fat, &ctl))
1773 1.1 dyoung return EAFNOSUPPORT;
1774 1.1 dyoung
1775 1.6 dyoung if (fat->hash != NULL) {
1776 1.6 dyoung KASSERT(fat->base != NULL && ctl->base.v != NULL);
1777 1.6 dyoung return 0;
1778 1.6 dyoung }
1779 1.6 dyoung
1780 1.6 dyoung /* Allocate 10% more capacity in the fat pointers.
1781 1.6 dyoung * We should only need ~#hash additional based on
1782 1.6 dyoung * how they age, but TIME_WAIT assassination could cause
1783 1.6 dyoung * sparse fat pointer utilisation.
1784 1.6 dyoung */
1785 1.6 dyoung m = 512;
1786 1.6 dyoung n = 2*m + (11 * (tcp_vtw_entries / fatp_ntags())) / 10;
1787 1.6 dyoung sz = (ctl->is_v4 ? sizeof(vtw_v4_t) : sizeof(vtw_v6_t));
1788 1.6 dyoung
1789 1.6 dyoung fat_hash = kmem_zalloc(2*m * sizeof(fatp_t *), KM_NOSLEEP);
1790 1.6 dyoung
1791 1.6 dyoung if (fat_hash == NULL) {
1792 1.6 dyoung printf("%s: could not allocate %zu bytes for "
1793 1.6 dyoung "hash anchors", __func__, 2*m * sizeof(fatp_t *));
1794 1.6 dyoung return ENOMEM;
1795 1.6 dyoung }
1796 1.1 dyoung
1797 1.6 dyoung fat_base = kmem_zalloc(2*n * sizeof(fatp_t), KM_NOSLEEP);
1798 1.1 dyoung
1799 1.6 dyoung if (fat_base == NULL) {
1800 1.6 dyoung kmem_free(fat_hash, 2*m * sizeof (fatp_t *));
1801 1.6 dyoung printf("%s: could not allocate %zu bytes for "
1802 1.6 dyoung "fatp_t array", __func__, 2*n * sizeof(fatp_t));
1803 1.6 dyoung return ENOMEM;
1804 1.6 dyoung }
1805 1.1 dyoung
1806 1.6 dyoung ctl_base_v = kmem_zalloc(tcp_vtw_entries * sz, KM_NOSLEEP);
1807 1.1 dyoung
1808 1.6 dyoung if (ctl_base_v == NULL) {
1809 1.6 dyoung kmem_free(fat_hash, 2*m * sizeof (fatp_t *));
1810 1.6 dyoung kmem_free(fat_base, 2*n * sizeof(fatp_t));
1811 1.6 dyoung printf("%s: could not allocate %zu bytes for "
1812 1.6 dyoung "vtw_t array", __func__, tcp_vtw_entries * sz);
1813 1.6 dyoung return ENOMEM;
1814 1.1 dyoung }
1815 1.1 dyoung
1816 1.6 dyoung fatp_init(fat, n, m, fat_base, fat_hash);
1817 1.1 dyoung
1818 1.6 dyoung vtw_init(fat, ctl, tcp_vtw_entries, ctl_base_v);
1819 1.1 dyoung
1820 1.1 dyoung return 0;
1821 1.1 dyoung }
1822 1.1 dyoung
1823 1.1 dyoung /*!\brief select controlling instance
1824 1.1 dyoung */
1825 1.1 dyoung static vtw_ctl_t *
1826 1.1 dyoung vtw_control(int af, uint32_t msl)
1827 1.1 dyoung {
1828 1.1 dyoung fatp_ctl_t *fat;
1829 1.1 dyoung vtw_ctl_t *ctl;
1830 1.1 dyoung int class = msl_to_class(msl);
1831 1.1 dyoung
1832 1.1 dyoung if (!vtw_select(af, &fat, &ctl))
1833 1.1 dyoung return NULL;
1834 1.1 dyoung
1835 1.1 dyoung if (!fat->base || !ctl->base.v)
1836 1.1 dyoung return NULL;
1837 1.1 dyoung
1838 1.5 dyoung if (!tcp_vtw_was_enabled) {
1839 1.5 dyoung /* This guarantees is timer ticks until we no longer need them.
1840 1.5 dyoung */
1841 1.5 dyoung tcp_vtw_was_enabled = 1;
1842 1.5 dyoung
1843 1.5 dyoung callout_schedule(&vtw_cs, hz / 5);
1844 1.5 dyoung
1845 1.5 dyoung tcbtable.vestige = &tcp_hooks;
1846 1.5 dyoung }
1847 1.5 dyoung
1848 1.1 dyoung return ctl + class;
1849 1.1 dyoung }
1850 1.1 dyoung
1851 1.1 dyoung /*!\brief add TCP pcb to vestigial timewait
1852 1.1 dyoung */
1853 1.1 dyoung int
1854 1.1 dyoung vtw_add(int af, struct tcpcb *tp)
1855 1.1 dyoung {
1856 1.1 dyoung int enable;
1857 1.1 dyoung vtw_ctl_t *ctl;
1858 1.1 dyoung vtw_t *vtw;
1859 1.1 dyoung
1860 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
1861 1.1 dyoung
1862 1.1 dyoung ctl = vtw_control(af, tp->t_msl);
1863 1.1 dyoung if (!ctl)
1864 1.1 dyoung return 0;
1865 1.1 dyoung
1866 1.1 dyoung enable = (af == AF_INET) ? tcp4_vtw_enable : tcp6_vtw_enable;
1867 1.1 dyoung
1868 1.1 dyoung vtw = vtw_alloc(ctl);
1869 1.1 dyoung
1870 1.1 dyoung if (vtw) {
1871 1.1 dyoung vtw->snd_nxt = tp->snd_nxt;
1872 1.1 dyoung vtw->rcv_nxt = tp->rcv_nxt;
1873 1.1 dyoung
1874 1.1 dyoung switch (af) {
1875 1.1 dyoung case AF_INET: {
1876 1.1 dyoung struct inpcb *inp = tp->t_inpcb;
1877 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
1878 1.1 dyoung
1879 1.1 dyoung v4->faddr = inp->inp_faddr.s_addr;
1880 1.1 dyoung v4->laddr = inp->inp_laddr.s_addr;
1881 1.1 dyoung v4->fport = inp->inp_fport;
1882 1.1 dyoung v4->lport = inp->inp_lport;
1883 1.1 dyoung
1884 1.1 dyoung vtw->reuse_port = !!(inp->inp_socket->so_options
1885 1.1 dyoung & SO_REUSEPORT);
1886 1.1 dyoung vtw->reuse_addr = !!(inp->inp_socket->so_options
1887 1.1 dyoung & SO_REUSEADDR);
1888 1.1 dyoung vtw->v6only = 0;
1889 1.1 dyoung vtw->uid = inp->inp_socket->so_uidinfo->ui_uid;
1890 1.1 dyoung
1891 1.1 dyoung vtw_inshash_v4(ctl, vtw);
1892 1.1 dyoung
1893 1.1 dyoung
1894 1.1 dyoung #ifdef VTW_DEBUG
1895 1.1 dyoung /* Immediate lookup (connected and port) to
1896 1.1 dyoung * ensure at least that works!
1897 1.1 dyoung */
1898 1.1 dyoung if (enable & 4) {
1899 1.1 dyoung KASSERT(vtw_lookup_hash_v4
1900 1.1 dyoung (ctl
1901 1.1 dyoung , inp->inp_faddr.s_addr, inp->inp_fport
1902 1.1 dyoung , inp->inp_laddr.s_addr, inp->inp_lport
1903 1.1 dyoung , 0)
1904 1.1 dyoung == vtw);
1905 1.1 dyoung KASSERT(vtw_lookup_hash_v4
1906 1.1 dyoung (ctl
1907 1.1 dyoung , inp->inp_faddr.s_addr, inp->inp_fport
1908 1.1 dyoung , inp->inp_laddr.s_addr, inp->inp_lport
1909 1.1 dyoung , 1));
1910 1.1 dyoung }
1911 1.1 dyoung /* Immediate port iterator functionality check: not wild
1912 1.1 dyoung */
1913 1.1 dyoung if (enable & 8) {
1914 1.1 dyoung struct tcp_ports_iterator *it;
1915 1.1 dyoung struct vestigial_inpcb res;
1916 1.1 dyoung int cnt = 0;
1917 1.1 dyoung
1918 1.1 dyoung it = tcp_init_ports_v4(inp->inp_laddr
1919 1.1 dyoung , inp->inp_lport, 0);
1920 1.1 dyoung
1921 1.1 dyoung while (tcp_next_port_v4(it, &res)) {
1922 1.1 dyoung ++cnt;
1923 1.1 dyoung }
1924 1.1 dyoung KASSERT(cnt);
1925 1.1 dyoung }
1926 1.1 dyoung /* Immediate port iterator functionality check: wild
1927 1.1 dyoung */
1928 1.1 dyoung if (enable & 16) {
1929 1.1 dyoung struct tcp_ports_iterator *it;
1930 1.1 dyoung struct vestigial_inpcb res;
1931 1.1 dyoung struct in_addr any;
1932 1.1 dyoung int cnt = 0;
1933 1.1 dyoung
1934 1.1 dyoung any.s_addr = htonl(INADDR_ANY);
1935 1.1 dyoung
1936 1.1 dyoung it = tcp_init_ports_v4(any, inp->inp_lport, 1);
1937 1.1 dyoung
1938 1.1 dyoung while (tcp_next_port_v4(it, &res)) {
1939 1.1 dyoung ++cnt;
1940 1.1 dyoung }
1941 1.1 dyoung KASSERT(cnt);
1942 1.1 dyoung }
1943 1.1 dyoung #endif /* VTW_DEBUG */
1944 1.1 dyoung break;
1945 1.1 dyoung }
1946 1.1 dyoung
1947 1.1 dyoung case AF_INET6: {
1948 1.1 dyoung struct in6pcb *inp = tp->t_in6pcb;
1949 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
1950 1.1 dyoung
1951 1.1 dyoung v6->faddr = inp->in6p_faddr;
1952 1.1 dyoung v6->laddr = inp->in6p_laddr;
1953 1.1 dyoung v6->fport = inp->in6p_fport;
1954 1.1 dyoung v6->lport = inp->in6p_lport;
1955 1.1 dyoung
1956 1.1 dyoung vtw->reuse_port = !!(inp->in6p_socket->so_options
1957 1.1 dyoung & SO_REUSEPORT);
1958 1.1 dyoung vtw->reuse_addr = !!(inp->in6p_socket->so_options
1959 1.1 dyoung & SO_REUSEADDR);
1960 1.1 dyoung vtw->v6only = !!(inp->in6p_flags
1961 1.1 dyoung & IN6P_IPV6_V6ONLY);
1962 1.1 dyoung vtw->uid = inp->in6p_socket->so_uidinfo->ui_uid;
1963 1.1 dyoung
1964 1.1 dyoung vtw_inshash_v6(ctl, vtw);
1965 1.1 dyoung #ifdef VTW_DEBUG
1966 1.1 dyoung /* Immediate lookup (connected and port) to
1967 1.1 dyoung * ensure at least that works!
1968 1.1 dyoung */
1969 1.1 dyoung if (enable & 4) {
1970 1.1 dyoung KASSERT(vtw_lookup_hash_v6(ctl
1971 1.1 dyoung , &inp->in6p_faddr, inp->in6p_fport
1972 1.1 dyoung , &inp->in6p_laddr, inp->in6p_lport
1973 1.1 dyoung , 0)
1974 1.1 dyoung == vtw);
1975 1.1 dyoung KASSERT(vtw_lookup_hash_v6
1976 1.1 dyoung (ctl
1977 1.1 dyoung , &inp->in6p_faddr, inp->in6p_fport
1978 1.1 dyoung , &inp->in6p_laddr, inp->in6p_lport
1979 1.1 dyoung , 1));
1980 1.1 dyoung }
1981 1.1 dyoung /* Immediate port iterator functionality check: not wild
1982 1.1 dyoung */
1983 1.1 dyoung if (enable & 8) {
1984 1.1 dyoung struct tcp_ports_iterator *it;
1985 1.1 dyoung struct vestigial_inpcb res;
1986 1.1 dyoung int cnt = 0;
1987 1.1 dyoung
1988 1.1 dyoung it = tcp_init_ports_v6(&inp->in6p_laddr
1989 1.1 dyoung , inp->in6p_lport, 0);
1990 1.1 dyoung
1991 1.1 dyoung while (tcp_next_port_v6(it, &res)) {
1992 1.1 dyoung ++cnt;
1993 1.1 dyoung }
1994 1.1 dyoung KASSERT(cnt);
1995 1.1 dyoung }
1996 1.1 dyoung /* Immediate port iterator functionality check: wild
1997 1.1 dyoung */
1998 1.1 dyoung if (enable & 16) {
1999 1.1 dyoung struct tcp_ports_iterator *it;
2000 1.1 dyoung struct vestigial_inpcb res;
2001 1.1 dyoung static struct in6_addr any = IN6ADDR_ANY_INIT;
2002 1.1 dyoung int cnt = 0;
2003 1.1 dyoung
2004 1.1 dyoung it = tcp_init_ports_v6(&any
2005 1.1 dyoung , inp->in6p_lport, 1);
2006 1.1 dyoung
2007 1.1 dyoung while (tcp_next_port_v6(it, &res)) {
2008 1.1 dyoung ++cnt;
2009 1.1 dyoung }
2010 1.1 dyoung KASSERT(cnt);
2011 1.1 dyoung }
2012 1.1 dyoung #endif /* VTW_DEBUG */
2013 1.1 dyoung break;
2014 1.1 dyoung }
2015 1.1 dyoung }
2016 1.1 dyoung
2017 1.1 dyoung tcp_canceltimers(tp);
2018 1.1 dyoung tp = tcp_close(tp);
2019 1.1 dyoung KASSERT(!tp);
2020 1.1 dyoung
2021 1.1 dyoung return 1;
2022 1.1 dyoung }
2023 1.1 dyoung
2024 1.1 dyoung return 0;
2025 1.1 dyoung }
2026 1.1 dyoung
2027 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2028 1.1 dyoung */
2029 1.1 dyoung static void
2030 1.1 dyoung vtw_restart_v4(vestigial_inpcb_t *vp)
2031 1.1 dyoung {
2032 1.1 dyoung vtw_v4_t copy = *(vtw_v4_t*)vp->vtw;
2033 1.1 dyoung vtw_t *vtw;
2034 1.1 dyoung vtw_t *cp = ©.common;
2035 1.1 dyoung vtw_ctl_t *ctl;
2036 1.1 dyoung
2037 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
2038 1.1 dyoung
2039 1.1 dyoung db_trace(KTR_VTW
2040 1.1 dyoung , (vp->vtw, "vtw: restart %A:%P %A:%P"
2041 1.1 dyoung , vp->faddr.v4.s_addr, vp->fport
2042 1.1 dyoung , vp->laddr.v4.s_addr, vp->lport));
2043 1.1 dyoung
2044 1.1 dyoung /* Class might have changed, so have a squiz.
2045 1.1 dyoung */
2046 1.1 dyoung ctl = vtw_control(AF_INET, class_to_msl(cp->msl_class));
2047 1.1 dyoung vtw = vtw_alloc(ctl);
2048 1.1 dyoung
2049 1.1 dyoung if (vtw) {
2050 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
2051 1.1 dyoung
2052 1.1 dyoung /* Safe now to unhash the old entry
2053 1.1 dyoung */
2054 1.1 dyoung vtw_del(vp->ctl, vp->vtw);
2055 1.1 dyoung
2056 1.1 dyoung vtw->snd_nxt = cp->snd_nxt;
2057 1.1 dyoung vtw->rcv_nxt = cp->rcv_nxt;
2058 1.1 dyoung
2059 1.1 dyoung v4->faddr = copy.faddr;
2060 1.1 dyoung v4->laddr = copy.laddr;
2061 1.1 dyoung v4->fport = copy.fport;
2062 1.1 dyoung v4->lport = copy.lport;
2063 1.1 dyoung
2064 1.1 dyoung vtw->reuse_port = cp->reuse_port;
2065 1.1 dyoung vtw->reuse_addr = cp->reuse_addr;
2066 1.1 dyoung vtw->v6only = 0;
2067 1.1 dyoung vtw->uid = cp->uid;
2068 1.1 dyoung
2069 1.1 dyoung vtw_inshash_v4(ctl, vtw);
2070 1.1 dyoung }
2071 1.1 dyoung
2072 1.1 dyoung vp->valid = 0;
2073 1.1 dyoung }
2074 1.1 dyoung
2075 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2076 1.1 dyoung */
2077 1.1 dyoung static void
2078 1.1 dyoung vtw_restart_v6(vestigial_inpcb_t *vp)
2079 1.1 dyoung {
2080 1.1 dyoung vtw_v6_t copy = *(vtw_v6_t*)vp->vtw;
2081 1.1 dyoung vtw_t *vtw;
2082 1.1 dyoung vtw_t *cp = ©.common;
2083 1.1 dyoung vtw_ctl_t *ctl;
2084 1.1 dyoung
2085 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
2086 1.1 dyoung
2087 1.1 dyoung db_trace(KTR_VTW
2088 1.1 dyoung , (vp->vtw, "vtw: restart %6A:%P %6A:%P"
2089 1.1 dyoung , db_store(&vp->faddr.v6, sizeof (vp->faddr.v6))
2090 1.1 dyoung , vp->fport
2091 1.1 dyoung , db_store(&vp->laddr.v6, sizeof (vp->laddr.v6))
2092 1.1 dyoung , vp->lport));
2093 1.1 dyoung
2094 1.1 dyoung /* Class might have changed, so have a squiz.
2095 1.1 dyoung */
2096 1.1 dyoung ctl = vtw_control(AF_INET6, class_to_msl(cp->msl_class));
2097 1.1 dyoung vtw = vtw_alloc(ctl);
2098 1.1 dyoung
2099 1.1 dyoung if (vtw) {
2100 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
2101 1.1 dyoung
2102 1.1 dyoung /* Safe now to unhash the old entry
2103 1.1 dyoung */
2104 1.1 dyoung vtw_del(vp->ctl, vp->vtw);
2105 1.1 dyoung
2106 1.1 dyoung vtw->snd_nxt = cp->snd_nxt;
2107 1.1 dyoung vtw->rcv_nxt = cp->rcv_nxt;
2108 1.1 dyoung
2109 1.1 dyoung v6->faddr = copy.faddr;
2110 1.1 dyoung v6->laddr = copy.laddr;
2111 1.1 dyoung v6->fport = copy.fport;
2112 1.1 dyoung v6->lport = copy.lport;
2113 1.1 dyoung
2114 1.1 dyoung vtw->reuse_port = cp->reuse_port;
2115 1.1 dyoung vtw->reuse_addr = cp->reuse_addr;
2116 1.1 dyoung vtw->v6only = cp->v6only;
2117 1.1 dyoung vtw->uid = cp->uid;
2118 1.1 dyoung
2119 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2120 1.1 dyoung }
2121 1.1 dyoung
2122 1.1 dyoung vp->valid = 0;
2123 1.1 dyoung }
2124 1.1 dyoung
2125 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2126 1.1 dyoung */
2127 1.1 dyoung void
2128 1.1 dyoung vtw_restart(vestigial_inpcb_t *vp)
2129 1.1 dyoung {
2130 1.1 dyoung if (!vp || !vp->valid)
2131 1.1 dyoung return;
2132 1.1 dyoung
2133 1.1 dyoung if (vp->v4)
2134 1.1 dyoung vtw_restart_v4(vp);
2135 1.1 dyoung else
2136 1.1 dyoung vtw_restart_v6(vp);
2137 1.1 dyoung }
2138 1.1 dyoung
2139 1.1 dyoung int
2140 1.7 dyoung sysctl_tcp_vtw_enable(SYSCTLFN_ARGS)
2141 1.7 dyoung {
2142 1.7 dyoung int en, rc;
2143 1.7 dyoung struct sysctlnode node;
2144 1.7 dyoung
2145 1.7 dyoung node = *rnode;
2146 1.7 dyoung en = *(int *)rnode->sysctl_data;
2147 1.7 dyoung node.sysctl_data = &en;
2148 1.7 dyoung
2149 1.7 dyoung rc = sysctl_lookup(SYSCTLFN_CALL(&node));
2150 1.7 dyoung if (rc != 0 || newp == NULL)
2151 1.7 dyoung return rc;
2152 1.7 dyoung
2153 1.7 dyoung if (rnode->sysctl_data != &tcp4_vtw_enable &&
2154 1.7 dyoung rnode->sysctl_data != &tcp6_vtw_enable)
2155 1.7 dyoung rc = ENOENT;
2156 1.7 dyoung else if ((en & 1) == 0)
2157 1.7 dyoung rc = 0;
2158 1.7 dyoung else if (rnode->sysctl_data == &tcp4_vtw_enable)
2159 1.7 dyoung rc = vtw_control_init(AF_INET);
2160 1.7 dyoung else /* rnode->sysctl_data == &tcp6_vtw_enable */
2161 1.7 dyoung rc = vtw_control_init(AF_INET6);
2162 1.7 dyoung
2163 1.7 dyoung if (rc == 0)
2164 1.7 dyoung *(int *)rnode->sysctl_data = en;
2165 1.7 dyoung
2166 1.7 dyoung return rc;
2167 1.7 dyoung }
2168 1.7 dyoung
2169 1.7 dyoung int
2170 1.1 dyoung vtw_earlyinit(void)
2171 1.1 dyoung {
2172 1.5 dyoung int i, rc;
2173 1.1 dyoung
2174 1.5 dyoung callout_init(&vtw_cs, 0);
2175 1.5 dyoung callout_setfunc(&vtw_cs, vtw_tick, 0);
2176 1.1 dyoung
2177 1.5 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2178 1.5 dyoung vtw_tcpv4[i].is_v4 = 1;
2179 1.5 dyoung vtw_tcpv6[i].is_v6 = 1;
2180 1.1 dyoung }
2181 1.1 dyoung
2182 1.7 dyoung if ((tcp4_vtw_enable & 1) != 0 &&
2183 1.7 dyoung (rc = vtw_control_init(AF_INET)) != 0)
2184 1.7 dyoung return rc;
2185 1.7 dyoung
2186 1.7 dyoung if ((tcp6_vtw_enable & 1) != 0 &&
2187 1.1 dyoung (rc = vtw_control_init(AF_INET6)) != 0)
2188 1.1 dyoung return rc;
2189 1.1 dyoung
2190 1.1 dyoung return 0;
2191 1.1 dyoung }
2192 1.1 dyoung
2193 1.1 dyoung #ifdef VTW_DEBUG
2194 1.1 dyoung #include <sys/syscallargs.h>
2195 1.1 dyoung #include <sys/sysctl.h>
2196 1.1 dyoung
2197 1.1 dyoung /*!\brief add lalp, fafp entries for debug
2198 1.1 dyoung */
2199 1.1 dyoung int
2200 1.1 dyoung vtw_debug_add(int af, sin_either_t *la, sin_either_t *fa, int msl, int class)
2201 1.1 dyoung {
2202 1.1 dyoung vtw_ctl_t *ctl;
2203 1.1 dyoung vtw_t *vtw;
2204 1.1 dyoung
2205 1.1 dyoung ctl = vtw_control(af, msl ? msl : class_to_msl(class));
2206 1.1 dyoung if (!ctl)
2207 1.1 dyoung return 0;
2208 1.1 dyoung
2209 1.1 dyoung vtw = vtw_alloc(ctl);
2210 1.1 dyoung
2211 1.1 dyoung if (vtw) {
2212 1.1 dyoung vtw->snd_nxt = 0;
2213 1.1 dyoung vtw->rcv_nxt = 0;
2214 1.1 dyoung
2215 1.1 dyoung switch (af) {
2216 1.1 dyoung case AF_INET: {
2217 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
2218 1.1 dyoung
2219 1.1 dyoung v4->faddr = fa->sin_addr.v4.s_addr;
2220 1.1 dyoung v4->laddr = la->sin_addr.v4.s_addr;
2221 1.1 dyoung v4->fport = fa->sin_port;
2222 1.1 dyoung v4->lport = la->sin_port;
2223 1.1 dyoung
2224 1.1 dyoung vtw->reuse_port = 1;
2225 1.1 dyoung vtw->reuse_addr = 1;
2226 1.1 dyoung vtw->v6only = 0;
2227 1.1 dyoung vtw->uid = 0;
2228 1.1 dyoung
2229 1.1 dyoung vtw_inshash_v4(ctl, vtw);
2230 1.1 dyoung break;
2231 1.1 dyoung }
2232 1.1 dyoung
2233 1.1 dyoung case AF_INET6: {
2234 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
2235 1.1 dyoung
2236 1.1 dyoung v6->faddr = fa->sin_addr.v6;
2237 1.1 dyoung v6->laddr = la->sin_addr.v6;
2238 1.1 dyoung
2239 1.1 dyoung v6->fport = fa->sin_port;
2240 1.1 dyoung v6->lport = la->sin_port;
2241 1.1 dyoung
2242 1.1 dyoung vtw->reuse_port = 1;
2243 1.1 dyoung vtw->reuse_addr = 1;
2244 1.1 dyoung vtw->v6only = 0;
2245 1.1 dyoung vtw->uid = 0;
2246 1.1 dyoung
2247 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2248 1.1 dyoung break;
2249 1.1 dyoung }
2250 1.1 dyoung
2251 1.1 dyoung default:
2252 1.1 dyoung break;
2253 1.1 dyoung }
2254 1.1 dyoung
2255 1.1 dyoung return 1;
2256 1.1 dyoung }
2257 1.1 dyoung
2258 1.1 dyoung return 0;
2259 1.1 dyoung }
2260 1.1 dyoung
2261 1.1 dyoung static int vtw_syscall = 0;
2262 1.1 dyoung
2263 1.1 dyoung static int
2264 1.1 dyoung vtw_debug_process(vtw_sysargs_t *ap)
2265 1.1 dyoung {
2266 1.1 dyoung struct vestigial_inpcb vestige;
2267 1.1 dyoung int rc = 0;
2268 1.1 dyoung
2269 1.1 dyoung mutex_enter(softnet_lock);
2270 1.1 dyoung
2271 1.1 dyoung switch (ap->op) {
2272 1.1 dyoung case 0: // insert
2273 1.1 dyoung vtw_debug_add(ap->la.sin_family
2274 1.1 dyoung , &ap->la
2275 1.1 dyoung , &ap->fa
2276 1.1 dyoung , TCPTV_MSL
2277 1.1 dyoung , 0);
2278 1.1 dyoung break;
2279 1.1 dyoung
2280 1.1 dyoung case 1: // lookup
2281 1.1 dyoung case 2: // restart
2282 1.1 dyoung switch (ap->la.sin_family) {
2283 1.1 dyoung case AF_INET:
2284 1.1 dyoung if (tcp_lookup_v4(ap->fa.sin_addr.v4, ap->fa.sin_port,
2285 1.1 dyoung ap->la.sin_addr.v4, ap->la.sin_port,
2286 1.1 dyoung &vestige)) {
2287 1.1 dyoung if (ap->op == 2) {
2288 1.1 dyoung vtw_restart(&vestige);
2289 1.1 dyoung }
2290 1.1 dyoung rc = 0;
2291 1.1 dyoung } else
2292 1.1 dyoung rc = ESRCH;
2293 1.1 dyoung break;
2294 1.1 dyoung
2295 1.1 dyoung case AF_INET6:
2296 1.1 dyoung if (tcp_lookup_v6(&ap->fa.sin_addr.v6, ap->fa.sin_port,
2297 1.1 dyoung &ap->la.sin_addr.v6, ap->la.sin_port,
2298 1.1 dyoung &vestige)) {
2299 1.1 dyoung if (ap->op == 2) {
2300 1.1 dyoung vtw_restart(&vestige);
2301 1.1 dyoung }
2302 1.1 dyoung rc = 0;
2303 1.1 dyoung } else
2304 1.1 dyoung rc = ESRCH;
2305 1.1 dyoung break;
2306 1.1 dyoung default:
2307 1.1 dyoung rc = EINVAL;
2308 1.1 dyoung }
2309 1.1 dyoung break;
2310 1.1 dyoung
2311 1.1 dyoung default:
2312 1.1 dyoung rc = EINVAL;
2313 1.1 dyoung }
2314 1.1 dyoung
2315 1.1 dyoung mutex_exit(softnet_lock);
2316 1.1 dyoung return rc;
2317 1.1 dyoung }
2318 1.1 dyoung
2319 1.1 dyoung struct sys_vtw_args {
2320 1.1 dyoung syscallarg(const vtw_sysargs_t *) req;
2321 1.1 dyoung syscallarg(size_t) len;
2322 1.1 dyoung };
2323 1.1 dyoung
2324 1.1 dyoung static int
2325 1.1 dyoung vtw_sys(struct lwp *l, const void *_, register_t *retval)
2326 1.1 dyoung {
2327 1.1 dyoung const struct sys_vtw_args *uap = _;
2328 1.1 dyoung void *buf;
2329 1.1 dyoung int rc;
2330 1.1 dyoung size_t len = SCARG(uap, len);
2331 1.1 dyoung
2332 1.1 dyoung if (len != sizeof (vtw_sysargs_t))
2333 1.1 dyoung return EINVAL;
2334 1.1 dyoung
2335 1.1 dyoung buf = kmem_alloc(len, KM_SLEEP);
2336 1.1 dyoung if (!buf)
2337 1.1 dyoung return ENOMEM;
2338 1.1 dyoung
2339 1.1 dyoung rc = copyin(SCARG(uap, req), buf, len);
2340 1.1 dyoung if (!rc) {
2341 1.1 dyoung rc = vtw_debug_process(buf);
2342 1.1 dyoung }
2343 1.1 dyoung kmem_free(buf, len);
2344 1.1 dyoung
2345 1.1 dyoung return rc;
2346 1.1 dyoung }
2347 1.1 dyoung
2348 1.1 dyoung static void
2349 1.1 dyoung vtw_sanity_check(void)
2350 1.1 dyoung {
2351 1.1 dyoung vtw_ctl_t *ctl;
2352 1.1 dyoung vtw_t *vtw;
2353 1.1 dyoung int i;
2354 1.1 dyoung int n;
2355 1.1 dyoung
2356 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2357 1.1 dyoung ctl = &vtw_tcpv4[i];
2358 1.1 dyoung
2359 1.1 dyoung if (!ctl->base.v || ctl->nalloc)
2360 1.1 dyoung continue;
2361 1.1 dyoung
2362 1.1 dyoung for (n = 0, vtw = ctl->base.v; ; ) {
2363 1.1 dyoung ++n;
2364 1.1 dyoung vtw = vtw_next(ctl, vtw);
2365 1.1 dyoung if (vtw == ctl->base.v)
2366 1.1 dyoung break;
2367 1.1 dyoung }
2368 1.1 dyoung db_trace(KTR_VTW
2369 1.1 dyoung , (ctl, "sanity: class %x n %x nfree %x"
2370 1.1 dyoung , i, n, ctl->nfree));
2371 1.1 dyoung
2372 1.1 dyoung KASSERT(n == ctl->nfree);
2373 1.1 dyoung }
2374 1.1 dyoung
2375 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2376 1.1 dyoung ctl = &vtw_tcpv6[i];
2377 1.1 dyoung
2378 1.1 dyoung if (!ctl->base.v || ctl->nalloc)
2379 1.1 dyoung continue;
2380 1.1 dyoung
2381 1.1 dyoung for (n = 0, vtw = ctl->base.v; ; ) {
2382 1.1 dyoung ++n;
2383 1.1 dyoung vtw = vtw_next(ctl, vtw);
2384 1.1 dyoung if (vtw == ctl->base.v)
2385 1.1 dyoung break;
2386 1.1 dyoung }
2387 1.1 dyoung db_trace(KTR_VTW
2388 1.1 dyoung , (ctl, "sanity: class %x n %x nfree %x"
2389 1.1 dyoung , i, n, ctl->nfree));
2390 1.1 dyoung KASSERT(n == ctl->nfree);
2391 1.1 dyoung }
2392 1.1 dyoung }
2393 1.1 dyoung
2394 1.1 dyoung /*!\brief Initialise debug support.
2395 1.1 dyoung */
2396 1.1 dyoung static void
2397 1.1 dyoung vtw_debug_init(void)
2398 1.1 dyoung {
2399 1.1 dyoung int i;
2400 1.1 dyoung
2401 1.1 dyoung vtw_sanity_check();
2402 1.1 dyoung
2403 1.1 dyoung if (vtw_syscall)
2404 1.1 dyoung return;
2405 1.1 dyoung
2406 1.1 dyoung for (i = 511; i; --i) {
2407 1.1 dyoung if (sysent[i].sy_call == sys_nosys) {
2408 1.1 dyoung sysent[i].sy_call = vtw_sys;
2409 1.1 dyoung sysent[i].sy_narg = 2;
2410 1.1 dyoung sysent[i].sy_argsize = sizeof (struct sys_vtw_args);
2411 1.1 dyoung sysent[i].sy_flags = 0;
2412 1.1 dyoung
2413 1.1 dyoung vtw_syscall = i;
2414 1.1 dyoung break;
2415 1.1 dyoung }
2416 1.1 dyoung }
2417 1.1 dyoung if (i) {
2418 1.1 dyoung const struct sysctlnode *node;
2419 1.1 dyoung uint32_t flags;
2420 1.1 dyoung
2421 1.1 dyoung flags = sysctl_root.sysctl_flags;
2422 1.1 dyoung
2423 1.1 dyoung sysctl_root.sysctl_flags |= CTLFLAG_READWRITE;
2424 1.1 dyoung sysctl_root.sysctl_flags &= ~CTLFLAG_PERMANENT;
2425 1.1 dyoung
2426 1.1 dyoung sysctl_createv(0, 0, 0, &node,
2427 1.1 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE,
2428 1.1 dyoung "koff",
2429 1.1 dyoung SYSCTL_DESCR("Kernel Obscure Feature Finder"),
2430 1.1 dyoung 0, 0, 0, 0, CTL_CREATE, CTL_EOL);
2431 1.1 dyoung
2432 1.1 dyoung if (!node) {
2433 1.1 dyoung sysctl_createv(0, 0, 0, &node,
2434 1.1 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE,
2435 1.1 dyoung "koffka",
2436 1.1 dyoung SYSCTL_DESCR("The Real(tm) Kernel"
2437 1.1 dyoung " Obscure Feature Finder"),
2438 1.1 dyoung 0, 0, 0, 0, CTL_CREATE, CTL_EOL);
2439 1.1 dyoung }
2440 1.1 dyoung if (node) {
2441 1.1 dyoung sysctl_createv(0, 0, 0, 0,
2442 1.1 dyoung CTLFLAG_PERMANENT|CTLFLAG_READONLY,
2443 1.1 dyoung CTLTYPE_INT, "vtw_debug_syscall",
2444 1.1 dyoung SYSCTL_DESCR("vtw debug"
2445 1.1 dyoung " system call number"),
2446 1.1 dyoung 0, 0, &vtw_syscall, 0, node->sysctl_num,
2447 1.1 dyoung CTL_CREATE, CTL_EOL);
2448 1.1 dyoung }
2449 1.1 dyoung sysctl_root.sysctl_flags = flags;
2450 1.1 dyoung }
2451 1.1 dyoung }
2452 1.1 dyoung #else /* !VTW_DEBUG */
2453 1.1 dyoung static void
2454 1.1 dyoung vtw_debug_init(void)
2455 1.1 dyoung {
2456 1.1 dyoung return;
2457 1.1 dyoung }
2458 1.1 dyoung #endif /* !VTW_DEBUG */
2459