tcp_vtw.c revision 1.1 1 1.1 dyoung /*
2 1.1 dyoung * Copyright (c) 2011 The NetBSD Foundation, Inc.
3 1.1 dyoung * All rights reserved.
4 1.1 dyoung *
5 1.1 dyoung * This code is derived from software contributed to The NetBSD Foundation
6 1.1 dyoung * by Coyote Point Systems, Inc.
7 1.1 dyoung *
8 1.1 dyoung * Redistribution and use in source and binary forms, with or without
9 1.1 dyoung * modification, are permitted provided that the following conditions
10 1.1 dyoung * are met:
11 1.1 dyoung * 1. Redistributions of source code must retain the above copyright
12 1.1 dyoung * notice, this list of conditions and the following disclaimer.
13 1.1 dyoung * 2. Redistributions in binary form must reproduce the above copyright
14 1.1 dyoung * notice, this list of conditions and the following disclaimer in the
15 1.1 dyoung * documentation and/or other materials provided with the distribution.
16 1.1 dyoung *
17 1.1 dyoung * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 1.1 dyoung * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 1.1 dyoung * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 1.1 dyoung * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 1.1 dyoung * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 1.1 dyoung * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 1.1 dyoung * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 1.1 dyoung * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 1.1 dyoung * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 1.1 dyoung * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 1.1 dyoung * POSSIBILITY OF SUCH DAMAGE.
28 1.1 dyoung */
29 1.1 dyoung #include <sys/cdefs.h>
30 1.1 dyoung
31 1.1 dyoung #include "opt_ddb.h"
32 1.1 dyoung #include "opt_inet.h"
33 1.1 dyoung #include "opt_ipsec.h"
34 1.1 dyoung #include "opt_inet_csum.h"
35 1.1 dyoung #include "opt_tcp_debug.h"
36 1.1 dyoung
37 1.1 dyoung #include <sys/param.h>
38 1.1 dyoung #include <sys/systm.h>
39 1.1 dyoung #include <sys/malloc.h>
40 1.1 dyoung #include <sys/kmem.h>
41 1.1 dyoung #include <sys/mbuf.h>
42 1.1 dyoung #include <sys/protosw.h>
43 1.1 dyoung #include <sys/socket.h>
44 1.1 dyoung #include <sys/socketvar.h>
45 1.1 dyoung #include <sys/errno.h>
46 1.1 dyoung #include <sys/syslog.h>
47 1.1 dyoung #include <sys/pool.h>
48 1.1 dyoung #include <sys/domain.h>
49 1.1 dyoung #include <sys/kernel.h>
50 1.1 dyoung #include <net/if.h>
51 1.1 dyoung #include <net/route.h>
52 1.1 dyoung #include <net/if_types.h>
53 1.1 dyoung
54 1.1 dyoung #include <netinet/in.h>
55 1.1 dyoung #include <netinet/in_systm.h>
56 1.1 dyoung #include <netinet/ip.h>
57 1.1 dyoung #include <netinet/in_pcb.h>
58 1.1 dyoung #include <netinet/in_var.h>
59 1.1 dyoung #include <netinet/ip_var.h>
60 1.1 dyoung #include <netinet/in_offload.h>
61 1.1 dyoung #include <netinet/ip6.h>
62 1.1 dyoung #include <netinet6/ip6_var.h>
63 1.1 dyoung #include <netinet6/in6_pcb.h>
64 1.1 dyoung #include <netinet6/ip6_var.h>
65 1.1 dyoung #include <netinet6/in6_var.h>
66 1.1 dyoung #include <netinet/icmp6.h>
67 1.1 dyoung #include <netinet6/nd6.h>
68 1.1 dyoung
69 1.1 dyoung #include <netinet/tcp.h>
70 1.1 dyoung #include <netinet/tcp_fsm.h>
71 1.1 dyoung #include <netinet/tcp_seq.h>
72 1.1 dyoung #include <netinet/tcp_timer.h>
73 1.1 dyoung #include <netinet/tcp_var.h>
74 1.1 dyoung #include <netinet/tcp_private.h>
75 1.1 dyoung #include <netinet/tcpip.h>
76 1.1 dyoung
77 1.1 dyoung #include <machine/stdarg.h>
78 1.1 dyoung #include <netinet/tcp_vtw.h>
79 1.1 dyoung
80 1.1 dyoung __KERNEL_RCSID(0, "$NetBSD: tcp_vtw.c,v 1.1 2011/05/03 18:28:45 dyoung Exp $");
81 1.1 dyoung
82 1.1 dyoung #define db_trace(__a, __b) do { } while (/*CONSTCOND*/0)
83 1.1 dyoung
84 1.1 dyoung static void k_vtw(int c, char **v);
85 1.1 dyoung static void vtw_debug_init(void);
86 1.1 dyoung
87 1.1 dyoung fatp_ctl_t fat_tcpv4;
88 1.1 dyoung fatp_ctl_t fat_tcpv6;
89 1.1 dyoung vtw_ctl_t vtw_tcpv4[VTW_NCLASS];
90 1.1 dyoung vtw_ctl_t vtw_tcpv6[VTW_NCLASS];
91 1.1 dyoung vtw_stats_t vtw_stats;
92 1.1 dyoung
93 1.1 dyoung /* We provide state for the lookup_ports iterator.
94 1.1 dyoung * As currently we are netlock-protected, there is one.
95 1.1 dyoung * If we were finer-grain, we would have one per CPU.
96 1.1 dyoung * I do not want to be in the business of alloc/free.
97 1.1 dyoung * The best alternate would be allocate on the caller's
98 1.1 dyoung * stack, but that would require them to know the struct,
99 1.1 dyoung * or at least the size.
100 1.1 dyoung * See how she goes.
101 1.1 dyoung */
102 1.1 dyoung struct tcp_ports_iterator {
103 1.1 dyoung union {
104 1.1 dyoung struct in_addr v4;
105 1.1 dyoung struct in6_addr v6;
106 1.1 dyoung } addr;
107 1.1 dyoung u_int port;
108 1.1 dyoung
109 1.1 dyoung uint32_t wild : 1;
110 1.1 dyoung
111 1.1 dyoung vtw_ctl_t *ctl;
112 1.1 dyoung fatp_t *fp;
113 1.1 dyoung
114 1.1 dyoung uint16_t slot_idx;
115 1.1 dyoung uint16_t ctl_idx;
116 1.1 dyoung };
117 1.1 dyoung
118 1.1 dyoung static struct tcp_ports_iterator tcp_ports_iterator_v4;
119 1.1 dyoung static struct tcp_ports_iterator tcp_ports_iterator_v6;
120 1.1 dyoung
121 1.1 dyoung static int vtw_age(vtw_ctl_t *, struct timeval *);
122 1.1 dyoung
123 1.1 dyoung /*!\brief allocate a fat pointer from a collection.
124 1.1 dyoung */
125 1.1 dyoung static fatp_t *
126 1.1 dyoung fatp_alloc(fatp_ctl_t *fat)
127 1.1 dyoung {
128 1.1 dyoung fatp_t *fp = 0;
129 1.1 dyoung
130 1.1 dyoung if (fat->nfree) {
131 1.1 dyoung fp = fat->free;
132 1.1 dyoung if (fp) {
133 1.1 dyoung fat->free = fatp_next(fat, fp);
134 1.1 dyoung --fat->nfree;
135 1.1 dyoung ++fat->nalloc;
136 1.1 dyoung fp->nxt = 0;
137 1.1 dyoung
138 1.1 dyoung KASSERT(!fp->inuse);
139 1.1 dyoung }
140 1.1 dyoung }
141 1.1 dyoung
142 1.1 dyoung return fp;
143 1.1 dyoung }
144 1.1 dyoung
145 1.1 dyoung /*!\brief free a fat pointer.
146 1.1 dyoung */
147 1.1 dyoung static void
148 1.1 dyoung fatp_free(fatp_ctl_t *fat, fatp_t *fp)
149 1.1 dyoung {
150 1.1 dyoung if (fp) {
151 1.1 dyoung KASSERT(!fp->inuse);
152 1.1 dyoung KASSERT(!fp->nxt);
153 1.1 dyoung
154 1.1 dyoung fp->nxt = fatp_index(fat, fat->free);
155 1.1 dyoung fat->free = fp;
156 1.1 dyoung
157 1.1 dyoung ++fat->nfree;
158 1.1 dyoung --fat->nalloc;
159 1.1 dyoung }
160 1.1 dyoung }
161 1.1 dyoung
162 1.1 dyoung /*!\brief initialise a collection of fat pointers.
163 1.1 dyoung *
164 1.1 dyoung *\param n # hash buckets
165 1.1 dyoung *\param m total # fat pointers to allocate
166 1.1 dyoung *
167 1.1 dyoung * We allocate 2x as much, as we have two hashes: full and lport only.
168 1.1 dyoung */
169 1.1 dyoung static void
170 1.1 dyoung fatp_init(fatp_ctl_t *fat, uint32_t n, uint32_t m)
171 1.1 dyoung {
172 1.1 dyoung fatp_t *fp;
173 1.1 dyoung
174 1.1 dyoung k_vtw(0,0);
175 1.1 dyoung
176 1.1 dyoung KASSERT(n <= FATP_MAX / 2);
177 1.1 dyoung
178 1.1 dyoung fat->hash = kmem_alloc(2*m * sizeof (fatp_t *), KM_SLEEP);
179 1.1 dyoung fat->base = kmem_alloc(2*n * sizeof (fatp_t), KM_SLEEP);
180 1.1 dyoung
181 1.1 dyoung if (!fat->base) {
182 1.1 dyoung if (fat->hash)
183 1.1 dyoung kmem_free(fat->hash, 2*m * sizeof (fatp_t *));
184 1.1 dyoung
185 1.1 dyoung bzero(fat, sizeof (*fat));
186 1.1 dyoung return;
187 1.1 dyoung }
188 1.1 dyoung
189 1.1 dyoung fat->port = &fat->hash[m];
190 1.1 dyoung
191 1.1 dyoung fat->mask = m - 1; // ASSERT is power of 2 (m)
192 1.1 dyoung fat->lim = fat->base + 2*n - 1;
193 1.1 dyoung fat->nfree = 0;
194 1.1 dyoung fat->nalloc = 2*n;
195 1.1 dyoung
196 1.1 dyoung bzero(fat->hash, 2*m * sizeof (fatp_t *));
197 1.1 dyoung bzero(fat->base, 2*n * sizeof (fatp_t));
198 1.1 dyoung
199 1.1 dyoung /* Initialise the free list.
200 1.1 dyoung */
201 1.1 dyoung for (fp = fat->lim; fp >= fat->base; --fp) {
202 1.1 dyoung fatp_free(fat, fp);
203 1.1 dyoung }
204 1.1 dyoung }
205 1.1 dyoung
206 1.1 dyoung /*
207 1.1 dyoung * The `xtra' is XORed into the tag stored.
208 1.1 dyoung */
209 1.1 dyoung static uint32_t fatp_xtra[] = {
210 1.1 dyoung 0x11111111,0x22222222,0x33333333,0x44444444,
211 1.1 dyoung 0x55555555,0x66666666,0x77777777,0x88888888,
212 1.1 dyoung 0x12121212,0x21212121,0x34343434,0x43434343,
213 1.1 dyoung 0x56565656,0x65656565,0x78787878,0x87878787,
214 1.1 dyoung 0x11221122,0x22112211,0x33443344,0x44334433,
215 1.1 dyoung 0x55665566,0x66556655,0x77887788,0x88778877,
216 1.1 dyoung 0x11112222,0x22221111,0x33334444,0x44443333,
217 1.1 dyoung 0x55556666,0x66665555,0x77778888,0x88887777,
218 1.1 dyoung };
219 1.1 dyoung
220 1.1 dyoung /*!\brief turn a {fatp_t*,slot} into an integral key.
221 1.1 dyoung *
222 1.1 dyoung * The key can be used to obtain the fatp_t, and the slot,
223 1.1 dyoung * as it directly encodes them.
224 1.1 dyoung */
225 1.1 dyoung static inline uint32_t
226 1.1 dyoung fatp_key(fatp_ctl_t *fat, fatp_t *fp, uint32_t slot)
227 1.1 dyoung {
228 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
229 1.1 dyoung CACHE_LINE_SIZE == 64 ||
230 1.1 dyoung CACHE_LINE_SIZE == 128);
231 1.1 dyoung
232 1.1 dyoung switch (fatp_ntags()) {
233 1.1 dyoung case 7:
234 1.1 dyoung return (fatp_index(fat, fp) << 3) | slot;
235 1.1 dyoung case 15:
236 1.1 dyoung return (fatp_index(fat, fp) << 4) | slot;
237 1.1 dyoung case 31:
238 1.1 dyoung return (fatp_index(fat, fp) << 5) | slot;
239 1.1 dyoung default:
240 1.1 dyoung KASSERT(0 && "no support, for no good reason");
241 1.1 dyoung return ~0;
242 1.1 dyoung }
243 1.1 dyoung }
244 1.1 dyoung
245 1.1 dyoung static inline uint32_t
246 1.1 dyoung fatp_slot_from_key(fatp_ctl_t *fat, uint32_t key)
247 1.1 dyoung {
248 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
249 1.1 dyoung CACHE_LINE_SIZE == 64 ||
250 1.1 dyoung CACHE_LINE_SIZE == 128);
251 1.1 dyoung
252 1.1 dyoung switch (fatp_ntags()) {
253 1.1 dyoung case 7:
254 1.1 dyoung return key & 7;
255 1.1 dyoung case 15:
256 1.1 dyoung return key & 15;
257 1.1 dyoung case 31:
258 1.1 dyoung return key & 31;
259 1.1 dyoung default:
260 1.1 dyoung KASSERT(0 && "no support, for no good reason");
261 1.1 dyoung return ~0;
262 1.1 dyoung }
263 1.1 dyoung }
264 1.1 dyoung
265 1.1 dyoung static inline fatp_t *
266 1.1 dyoung fatp_from_key(fatp_ctl_t *fat, uint32_t key)
267 1.1 dyoung {
268 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
269 1.1 dyoung CACHE_LINE_SIZE == 64 ||
270 1.1 dyoung CACHE_LINE_SIZE == 128);
271 1.1 dyoung
272 1.1 dyoung switch (fatp_ntags()) {
273 1.1 dyoung case 7:
274 1.1 dyoung key >>= 3;
275 1.1 dyoung break;
276 1.1 dyoung case 15:
277 1.1 dyoung key >>= 4;
278 1.1 dyoung break;
279 1.1 dyoung case 31:
280 1.1 dyoung key >>= 5;
281 1.1 dyoung break;
282 1.1 dyoung default:
283 1.1 dyoung KASSERT(0 && "no support, for no good reason");
284 1.1 dyoung return 0;
285 1.1 dyoung }
286 1.1 dyoung
287 1.1 dyoung return key ? fat->base + key - 1 : 0;
288 1.1 dyoung }
289 1.1 dyoung
290 1.1 dyoung static inline uint32_t
291 1.1 dyoung idx_encode(vtw_ctl_t *ctl, uint32_t idx)
292 1.1 dyoung {
293 1.1 dyoung return (idx << ctl->idx_bits) | idx;
294 1.1 dyoung }
295 1.1 dyoung
296 1.1 dyoung static inline uint32_t
297 1.1 dyoung idx_decode(vtw_ctl_t *ctl, uint32_t bits)
298 1.1 dyoung {
299 1.1 dyoung uint32_t idx = bits & ctl->idx_mask;
300 1.1 dyoung
301 1.1 dyoung if (idx_encode(ctl, idx) == bits)
302 1.1 dyoung return idx;
303 1.1 dyoung else
304 1.1 dyoung return ~0;
305 1.1 dyoung }
306 1.1 dyoung
307 1.1 dyoung /*!\brief insert index into fatp hash
308 1.1 dyoung *
309 1.1 dyoung *\param idx - index of element being placed in hash chain
310 1.1 dyoung *\param tag - 32-bit tag identifier
311 1.1 dyoung *
312 1.1 dyoung *\returns
313 1.1 dyoung * value which can be used to locate entry.
314 1.1 dyoung *
315 1.1 dyoung *\note
316 1.1 dyoung * we rely on the fact that there are unused high bits in the index
317 1.1 dyoung * for verification purposes on lookup.
318 1.1 dyoung */
319 1.1 dyoung
320 1.1 dyoung static inline uint32_t
321 1.1 dyoung fatp_vtw_inshash(fatp_ctl_t *fat, uint32_t idx, uint32_t tag, int which,
322 1.1 dyoung void *dbg)
323 1.1 dyoung {
324 1.1 dyoung fatp_t *fp;
325 1.1 dyoung fatp_t **hash = (which ? fat->port : fat->hash);
326 1.1 dyoung int i;
327 1.1 dyoung
328 1.1 dyoung fp = hash[tag & fat->mask];
329 1.1 dyoung
330 1.1 dyoung while (!fp || fatp_full(fp)) {
331 1.1 dyoung fatp_t *fq;
332 1.1 dyoung
333 1.1 dyoung /* All entries are inuse at the top level.
334 1.1 dyoung * We allocate a spare, and push the top level
335 1.1 dyoung * down one. All entries in the fp we push down
336 1.1 dyoung * (think of a tape worm here) will be expelled sooner than
337 1.1 dyoung * any entries added subsequently to this hash bucket.
338 1.1 dyoung * This is a property of the time waits we are exploiting.
339 1.1 dyoung */
340 1.1 dyoung
341 1.1 dyoung fq = fatp_alloc(fat);
342 1.1 dyoung if (!fq) {
343 1.1 dyoung vtw_age(fat->vtw, 0);
344 1.1 dyoung fp = hash[tag & fat->mask];
345 1.1 dyoung continue;
346 1.1 dyoung }
347 1.1 dyoung
348 1.1 dyoung fq->inuse = 0;
349 1.1 dyoung fq->nxt = fatp_index(fat, fp);
350 1.1 dyoung
351 1.1 dyoung hash[tag & fat->mask] = fq;
352 1.1 dyoung
353 1.1 dyoung fp = fq;
354 1.1 dyoung }
355 1.1 dyoung
356 1.1 dyoung KASSERT(!fatp_full(fp));
357 1.1 dyoung
358 1.1 dyoung /* Fill highest index first. Lookup is lowest first.
359 1.1 dyoung */
360 1.1 dyoung for (i = fatp_ntags(); --i >= 0; ) {
361 1.1 dyoung if (!((1 << i) & fp->inuse)) {
362 1.1 dyoung break;
363 1.1 dyoung }
364 1.1 dyoung }
365 1.1 dyoung
366 1.1 dyoung fp->inuse |= 1 << i;
367 1.1 dyoung fp->tag[i] = tag ^ idx_encode(fat->vtw, idx) ^ fatp_xtra[i];
368 1.1 dyoung
369 1.1 dyoung db_trace(KTR_VTW
370 1.1 dyoung , (fp, "fat: inuse %5.5x tag[%x] %8.8x"
371 1.1 dyoung , fp->inuse
372 1.1 dyoung , i, fp->tag[i]));
373 1.1 dyoung
374 1.1 dyoung return fatp_key(fat, fp, i);
375 1.1 dyoung }
376 1.1 dyoung
377 1.1 dyoung static inline int
378 1.1 dyoung vtw_alive(const vtw_t *vtw)
379 1.1 dyoung {
380 1.1 dyoung return vtw->hashed && vtw->expire.tv_sec;
381 1.1 dyoung }
382 1.1 dyoung
383 1.1 dyoung static inline uint32_t
384 1.1 dyoung vtw_index_v4(vtw_ctl_t *ctl, vtw_v4_t *v4)
385 1.1 dyoung {
386 1.1 dyoung if (ctl->base.v4 <= v4 && v4 <= ctl->lim.v4)
387 1.1 dyoung return v4 - ctl->base.v4;
388 1.1 dyoung
389 1.1 dyoung KASSERT(0 && "vtw out of bounds");
390 1.1 dyoung
391 1.1 dyoung return ~0;
392 1.1 dyoung }
393 1.1 dyoung
394 1.1 dyoung static inline uint32_t
395 1.1 dyoung vtw_index_v6(vtw_ctl_t *ctl, vtw_v6_t *v6)
396 1.1 dyoung {
397 1.1 dyoung if (ctl->base.v6 <= v6 && v6 <= ctl->lim.v6)
398 1.1 dyoung return v6 - ctl->base.v6;
399 1.1 dyoung
400 1.1 dyoung KASSERT(0 && "vtw out of bounds");
401 1.1 dyoung
402 1.1 dyoung return ~0;
403 1.1 dyoung }
404 1.1 dyoung
405 1.1 dyoung static inline uint32_t
406 1.1 dyoung vtw_index(vtw_ctl_t *ctl, vtw_t *vtw)
407 1.1 dyoung {
408 1.1 dyoung if (ctl->clidx)
409 1.1 dyoung ctl = ctl->ctl;
410 1.1 dyoung
411 1.1 dyoung if (ctl->is_v4)
412 1.1 dyoung return vtw_index_v4(ctl, (vtw_v4_t *)vtw);
413 1.1 dyoung
414 1.1 dyoung if (ctl->is_v6)
415 1.1 dyoung return vtw_index_v6(ctl, (vtw_v6_t *)vtw);
416 1.1 dyoung
417 1.1 dyoung KASSERT(0 && "neither 4 nor 6. most curious.");
418 1.1 dyoung
419 1.1 dyoung return ~0;
420 1.1 dyoung }
421 1.1 dyoung
422 1.1 dyoung static inline vtw_t *
423 1.1 dyoung vtw_from_index(vtw_ctl_t *ctl, uint32_t idx)
424 1.1 dyoung {
425 1.1 dyoung if (ctl->clidx)
426 1.1 dyoung ctl = ctl->ctl;
427 1.1 dyoung
428 1.1 dyoung /* See if the index looks like it might be an index.
429 1.1 dyoung * Bits on outside of the valid index bits is a give away.
430 1.1 dyoung */
431 1.1 dyoung idx = idx_decode(ctl, idx);
432 1.1 dyoung
433 1.1 dyoung if (idx == ~0) {
434 1.1 dyoung return 0;
435 1.1 dyoung } else if (ctl->is_v4) {
436 1.1 dyoung vtw_v4_t *vtw = ctl->base.v4 + idx;
437 1.1 dyoung
438 1.1 dyoung return (ctl->base.v4 <= vtw && vtw <= ctl->lim.v4)
439 1.1 dyoung ? &vtw->common : 0;
440 1.1 dyoung } else if (ctl->is_v6) {
441 1.1 dyoung vtw_v6_t *vtw = ctl->base.v6 + idx;
442 1.1 dyoung
443 1.1 dyoung return (ctl->base.v6 <= vtw && vtw <= ctl->lim.v6)
444 1.1 dyoung ? &vtw->common : 0;
445 1.1 dyoung } else {
446 1.1 dyoung KASSERT(0 && "badness");
447 1.1 dyoung return 0;
448 1.1 dyoung }
449 1.1 dyoung }
450 1.1 dyoung
451 1.1 dyoung /*!\brief return the next vtw after this one.
452 1.1 dyoung *
453 1.1 dyoung * Due to the differing sizes of the entries in differing
454 1.1 dyoung * arenas, we have to ensure we ++ the correct pointer type.
455 1.1 dyoung *
456 1.1 dyoung * Also handles wrap.
457 1.1 dyoung */
458 1.1 dyoung static inline vtw_t *
459 1.1 dyoung vtw_next(vtw_ctl_t *ctl, vtw_t *vtw)
460 1.1 dyoung {
461 1.1 dyoung if (ctl->is_v4) {
462 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
463 1.1 dyoung
464 1.1 dyoung vtw = &(++v4)->common;
465 1.1 dyoung } else {
466 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
467 1.1 dyoung
468 1.1 dyoung vtw = &(++v6)->common;
469 1.1 dyoung }
470 1.1 dyoung
471 1.1 dyoung if (vtw > ctl->lim.v)
472 1.1 dyoung vtw = ctl->base.v;
473 1.1 dyoung
474 1.1 dyoung return vtw;
475 1.1 dyoung }
476 1.1 dyoung
477 1.1 dyoung /*!\brief remove entry from FATP hash chains
478 1.1 dyoung */
479 1.1 dyoung static inline void
480 1.1 dyoung vtw_unhash(vtw_ctl_t *ctl, vtw_t *vtw)
481 1.1 dyoung {
482 1.1 dyoung fatp_ctl_t *fat = ctl->fat;
483 1.1 dyoung fatp_t *fp;
484 1.1 dyoung uint32_t key = vtw->key;
485 1.1 dyoung uint32_t tag, slot, idx;
486 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
487 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
488 1.1 dyoung
489 1.1 dyoung if (!vtw->hashed) {
490 1.1 dyoung KASSERT(0 && "unhashed");
491 1.1 dyoung return;
492 1.1 dyoung }
493 1.1 dyoung
494 1.1 dyoung if (fat->vtw->is_v4) {
495 1.1 dyoung tag = v4_tag(v4->faddr, v4->fport, v4->laddr, v4->lport);
496 1.1 dyoung } else if (fat->vtw->is_v6) {
497 1.1 dyoung tag = v6_tag(&v6->faddr, v6->fport, &v6->laddr, v6->lport);
498 1.1 dyoung } else {
499 1.1 dyoung tag = 0;
500 1.1 dyoung KASSERT(0 && "not reached");
501 1.1 dyoung }
502 1.1 dyoung
503 1.1 dyoung /* Remove from fat->hash[]
504 1.1 dyoung */
505 1.1 dyoung slot = fatp_slot_from_key(fat, key);
506 1.1 dyoung fp = fatp_from_key(fat, key);
507 1.1 dyoung idx = vtw_index(ctl, vtw);
508 1.1 dyoung
509 1.1 dyoung db_trace(KTR_VTW
510 1.1 dyoung , (fp, "fat: del inuse %5.5x slot %x idx %x key %x tag %x"
511 1.1 dyoung , fp->inuse, slot, idx, key, tag));
512 1.1 dyoung
513 1.1 dyoung KASSERT(fp->inuse & (1 << slot));
514 1.1 dyoung KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
515 1.1 dyoung ^ fatp_xtra[slot]));
516 1.1 dyoung
517 1.1 dyoung if ((fp->inuse & (1 << slot))
518 1.1 dyoung && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
519 1.1 dyoung ^ fatp_xtra[slot])) {
520 1.1 dyoung fp->inuse ^= 1 << slot;
521 1.1 dyoung fp->tag[slot] = 0;
522 1.1 dyoung
523 1.1 dyoung /* When we delete entries, we do not compact. This is
524 1.1 dyoung * due to temporality. We add entries, and they
525 1.1 dyoung * (eventually) expire. Older entries will be further
526 1.1 dyoung * down the chain.
527 1.1 dyoung */
528 1.1 dyoung if (!fp->inuse) {
529 1.1 dyoung uint32_t hi = tag & fat->mask;
530 1.1 dyoung fatp_t *fq = 0;
531 1.1 dyoung fatp_t *fr = fat->hash[hi];
532 1.1 dyoung
533 1.1 dyoung while (fr && fr != fp) {
534 1.1 dyoung fr = fatp_next(fat, fq = fr);
535 1.1 dyoung }
536 1.1 dyoung
537 1.1 dyoung if (fr == fp) {
538 1.1 dyoung if (fq) {
539 1.1 dyoung fq->nxt = fp->nxt;
540 1.1 dyoung fp->nxt = 0;
541 1.1 dyoung fatp_free(fat, fp);
542 1.1 dyoung } else {
543 1.1 dyoung KASSERT(fat->hash[hi] == fp);
544 1.1 dyoung
545 1.1 dyoung if (fp->nxt) {
546 1.1 dyoung fat->hash[hi]
547 1.1 dyoung = fatp_next(fat, fp);
548 1.1 dyoung fp->nxt = 0;
549 1.1 dyoung fatp_free(fat, fp);
550 1.1 dyoung } else {
551 1.1 dyoung /* retain for next use.
552 1.1 dyoung */
553 1.1 dyoung ;
554 1.1 dyoung }
555 1.1 dyoung }
556 1.1 dyoung } else {
557 1.1 dyoung fr = fat->hash[hi];
558 1.1 dyoung
559 1.1 dyoung do {
560 1.1 dyoung db_trace(KTR_VTW
561 1.1 dyoung , (fr
562 1.1 dyoung , "fat:*del inuse %5.5x"
563 1.1 dyoung " nxt %x"
564 1.1 dyoung , fr->inuse, fr->nxt));
565 1.1 dyoung
566 1.1 dyoung fr = fatp_next(fat, fq = fr);
567 1.1 dyoung } while (fr && fr != fp);
568 1.1 dyoung
569 1.1 dyoung KASSERT(0 && "oops");
570 1.1 dyoung }
571 1.1 dyoung }
572 1.1 dyoung vtw->key ^= ~0;
573 1.1 dyoung }
574 1.1 dyoung
575 1.1 dyoung if (fat->vtw->is_v4) {
576 1.1 dyoung tag = v4_port_tag(v4->lport);
577 1.1 dyoung } else if (fat->vtw->is_v6) {
578 1.1 dyoung tag = v6_port_tag(v6->lport);
579 1.1 dyoung }
580 1.1 dyoung
581 1.1 dyoung /* Remove from fat->port[]
582 1.1 dyoung */
583 1.1 dyoung key = vtw->port_key;
584 1.1 dyoung slot = fatp_slot_from_key(fat, key);
585 1.1 dyoung fp = fatp_from_key(fat, key);
586 1.1 dyoung idx = vtw_index(ctl, vtw);
587 1.1 dyoung
588 1.1 dyoung db_trace(KTR_VTW
589 1.1 dyoung , (fp, "fatport: del inuse %5.5x"
590 1.1 dyoung " slot %x idx %x key %x tag %x"
591 1.1 dyoung , fp->inuse, slot, idx, key, tag));
592 1.1 dyoung
593 1.1 dyoung KASSERT(fp->inuse & (1 << slot));
594 1.1 dyoung KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
595 1.1 dyoung ^ fatp_xtra[slot]));
596 1.1 dyoung
597 1.1 dyoung if ((fp->inuse & (1 << slot))
598 1.1 dyoung && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
599 1.1 dyoung ^ fatp_xtra[slot])) {
600 1.1 dyoung fp->inuse ^= 1 << slot;
601 1.1 dyoung fp->tag[slot] = 0;
602 1.1 dyoung
603 1.1 dyoung if (!fp->inuse) {
604 1.1 dyoung uint32_t hi = tag & fat->mask;
605 1.1 dyoung fatp_t *fq = 0;
606 1.1 dyoung fatp_t *fr = fat->port[hi];
607 1.1 dyoung
608 1.1 dyoung while (fr && fr != fp) {
609 1.1 dyoung fr = fatp_next(fat, fq = fr);
610 1.1 dyoung }
611 1.1 dyoung
612 1.1 dyoung if (fr == fp) {
613 1.1 dyoung if (fq) {
614 1.1 dyoung fq->nxt = fp->nxt;
615 1.1 dyoung fp->nxt = 0;
616 1.1 dyoung fatp_free(fat, fp);
617 1.1 dyoung } else {
618 1.1 dyoung KASSERT(fat->port[hi] == fp);
619 1.1 dyoung
620 1.1 dyoung if (fp->nxt) {
621 1.1 dyoung fat->port[hi]
622 1.1 dyoung = fatp_next(fat, fp);
623 1.1 dyoung fp->nxt = 0;
624 1.1 dyoung fatp_free(fat, fp);
625 1.1 dyoung } else {
626 1.1 dyoung /* retain for next use.
627 1.1 dyoung */
628 1.1 dyoung ;
629 1.1 dyoung }
630 1.1 dyoung }
631 1.1 dyoung }
632 1.1 dyoung }
633 1.1 dyoung vtw->port_key ^= ~0;
634 1.1 dyoung }
635 1.1 dyoung
636 1.1 dyoung vtw->hashed = 0;
637 1.1 dyoung }
638 1.1 dyoung
639 1.1 dyoung /*!\brief remove entry from hash, possibly free.
640 1.1 dyoung */
641 1.1 dyoung void
642 1.1 dyoung vtw_del(vtw_ctl_t *ctl, vtw_t *vtw)
643 1.1 dyoung {
644 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
645 1.1 dyoung
646 1.1 dyoung if (vtw->hashed) {
647 1.1 dyoung ++vtw_stats.del;
648 1.1 dyoung vtw_unhash(ctl, vtw);
649 1.1 dyoung }
650 1.1 dyoung
651 1.1 dyoung /* We only delete the oldest entry.
652 1.1 dyoung */
653 1.1 dyoung if (vtw != ctl->oldest.v)
654 1.1 dyoung return;
655 1.1 dyoung
656 1.1 dyoung --ctl->nalloc;
657 1.1 dyoung ++ctl->nfree;
658 1.1 dyoung
659 1.1 dyoung vtw->expire.tv_sec = 0;
660 1.1 dyoung vtw->expire.tv_usec = ~0;
661 1.1 dyoung
662 1.1 dyoung if (!ctl->nalloc)
663 1.1 dyoung ctl->oldest.v = 0;
664 1.1 dyoung
665 1.1 dyoung ctl->oldest.v = vtw_next(ctl, vtw);
666 1.1 dyoung }
667 1.1 dyoung
668 1.1 dyoung /*!\brief insert vestigeal timewait in hash chain
669 1.1 dyoung */
670 1.1 dyoung static void
671 1.1 dyoung vtw_inshash_v4(vtw_ctl_t *ctl, vtw_t *vtw)
672 1.1 dyoung {
673 1.1 dyoung uint32_t idx = vtw_index(ctl, vtw);
674 1.1 dyoung uint32_t tag;
675 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
676 1.1 dyoung
677 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
678 1.1 dyoung KASSERT(!vtw->hashed);
679 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
680 1.1 dyoung
681 1.1 dyoung ++vtw_stats.ins;
682 1.1 dyoung
683 1.1 dyoung tag = v4_tag(v4->faddr, v4->fport,
684 1.1 dyoung v4->laddr, v4->lport);
685 1.1 dyoung
686 1.1 dyoung vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
687 1.1 dyoung
688 1.1 dyoung db_trace(KTR_VTW, (ctl
689 1.1 dyoung , "vtw: ins %8.8x:%4.4x %8.8x:%4.4x"
690 1.1 dyoung " tag %8.8x key %8.8x"
691 1.1 dyoung , v4->faddr, v4->fport
692 1.1 dyoung , v4->laddr, v4->lport
693 1.1 dyoung , tag
694 1.1 dyoung , vtw->key));
695 1.1 dyoung
696 1.1 dyoung tag = v4_port_tag(v4->lport);
697 1.1 dyoung vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
698 1.1 dyoung
699 1.1 dyoung db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
700 1.1 dyoung , v4->lport, v4->lport
701 1.1 dyoung , tag
702 1.1 dyoung , vtw->key));
703 1.1 dyoung
704 1.1 dyoung vtw->hashed = 1;
705 1.1 dyoung }
706 1.1 dyoung
707 1.1 dyoung /*!\brief insert vestigeal timewait in hash chain
708 1.1 dyoung */
709 1.1 dyoung static void
710 1.1 dyoung vtw_inshash_v6(vtw_ctl_t *ctl, vtw_t *vtw)
711 1.1 dyoung {
712 1.1 dyoung uint32_t idx = vtw_index(ctl, vtw);
713 1.1 dyoung uint32_t tag;
714 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
715 1.1 dyoung
716 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
717 1.1 dyoung KASSERT(!vtw->hashed);
718 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
719 1.1 dyoung
720 1.1 dyoung ++vtw_stats.ins;
721 1.1 dyoung
722 1.1 dyoung tag = v6_tag(&v6->faddr, v6->fport,
723 1.1 dyoung &v6->laddr, v6->lport);
724 1.1 dyoung
725 1.1 dyoung vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
726 1.1 dyoung
727 1.1 dyoung tag = v6_port_tag(v6->lport);
728 1.1 dyoung vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
729 1.1 dyoung
730 1.1 dyoung db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
731 1.1 dyoung , v6->lport, v6->lport
732 1.1 dyoung , tag
733 1.1 dyoung , vtw->key));
734 1.1 dyoung
735 1.1 dyoung vtw->hashed = 1;
736 1.1 dyoung }
737 1.1 dyoung
738 1.1 dyoung static vtw_t *
739 1.1 dyoung vtw_lookup_hash_v4(vtw_ctl_t *ctl, uint32_t faddr, uint16_t fport
740 1.1 dyoung , uint32_t laddr, uint16_t lport
741 1.1 dyoung , int which)
742 1.1 dyoung {
743 1.1 dyoung vtw_v4_t *v4;
744 1.1 dyoung vtw_t *vtw;
745 1.1 dyoung uint32_t tag;
746 1.1 dyoung fatp_t *fp;
747 1.1 dyoung int i;
748 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
749 1.1 dyoung
750 1.1 dyoung if (!ctl || !ctl->fat)
751 1.1 dyoung return 0;
752 1.1 dyoung
753 1.1 dyoung ++vtw_stats.look[which];
754 1.1 dyoung
755 1.1 dyoung if (which) {
756 1.1 dyoung tag = v4_port_tag(lport);
757 1.1 dyoung fp = ctl->fat->port[tag & ctl->fat->mask];
758 1.1 dyoung } else {
759 1.1 dyoung tag = v4_tag(faddr, fport, laddr, lport);
760 1.1 dyoung fp = ctl->fat->hash[tag & ctl->fat->mask];
761 1.1 dyoung }
762 1.1 dyoung
763 1.1 dyoung while (fp && fp->inuse) {
764 1.1 dyoung uint32_t inuse = fp->inuse;
765 1.1 dyoung
766 1.1 dyoung ++fatps;
767 1.1 dyoung
768 1.1 dyoung for (i = 0; inuse && i < fatp_ntags(); ++i) {
769 1.1 dyoung uint32_t idx;
770 1.1 dyoung
771 1.1 dyoung if (!(inuse & (1 << i)))
772 1.1 dyoung continue;
773 1.1 dyoung
774 1.1 dyoung inuse ^= 1 << i;
775 1.1 dyoung
776 1.1 dyoung ++probes;
777 1.1 dyoung ++vtw_stats.probe[which];
778 1.1 dyoung
779 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
780 1.1 dyoung vtw = vtw_from_index(ctl, idx);
781 1.1 dyoung
782 1.1 dyoung if (!vtw) {
783 1.1 dyoung /* Hopefully fast path.
784 1.1 dyoung */
785 1.1 dyoung db_trace(KTR_VTW
786 1.1 dyoung , (fp, "vtw: fast %A:%P %A:%P"
787 1.1 dyoung " idx %x tag %x"
788 1.1 dyoung , faddr, fport
789 1.1 dyoung , laddr, lport
790 1.1 dyoung , idx, tag));
791 1.1 dyoung continue;
792 1.1 dyoung }
793 1.1 dyoung
794 1.1 dyoung v4 = (void*)vtw;
795 1.1 dyoung
796 1.1 dyoung /* The de-referencing of vtw is what we want to avoid.
797 1.1 dyoung * Losing.
798 1.1 dyoung */
799 1.1 dyoung if (vtw_alive(vtw)
800 1.1 dyoung && ((which ? vtw->port_key : vtw->key)
801 1.1 dyoung == fatp_key(ctl->fat, fp, i))
802 1.1 dyoung && (which
803 1.1 dyoung || (v4->faddr == faddr && v4->laddr == laddr
804 1.1 dyoung && v4->fport == fport))
805 1.1 dyoung && v4->lport == lport) {
806 1.1 dyoung ++vtw_stats.hit[which];
807 1.1 dyoung
808 1.1 dyoung db_trace(KTR_VTW
809 1.1 dyoung , (fp, "vtw: hit %8.8x:%4.4x"
810 1.1 dyoung " %8.8x:%4.4x idx %x key %x"
811 1.1 dyoung , faddr, fport
812 1.1 dyoung , laddr, lport
813 1.1 dyoung , idx_decode(ctl, idx), vtw->key));
814 1.1 dyoung
815 1.1 dyoung KASSERT(vtw->hashed);
816 1.1 dyoung
817 1.1 dyoung goto out;
818 1.1 dyoung }
819 1.1 dyoung ++vtw_stats.losing[which];
820 1.1 dyoung ++losings;
821 1.1 dyoung
822 1.1 dyoung if (vtw_alive(vtw)) {
823 1.1 dyoung db_trace(KTR_VTW
824 1.1 dyoung , (fp, "vtw:!mis %8.8x:%4.4x"
825 1.1 dyoung " %8.8x:%4.4x key %x tag %x"
826 1.1 dyoung , faddr, fport
827 1.1 dyoung , laddr, lport
828 1.1 dyoung , fatp_key(ctl->fat, fp, i)
829 1.1 dyoung , v4_tag(faddr, fport
830 1.1 dyoung , laddr, lport)));
831 1.1 dyoung db_trace(KTR_VTW
832 1.1 dyoung , (vtw, "vtw:!mis %8.8x:%4.4x"
833 1.1 dyoung " %8.8x:%4.4x key %x tag %x"
834 1.1 dyoung , v4->faddr, v4->fport
835 1.1 dyoung , v4->laddr, v4->lport
836 1.1 dyoung , vtw->key
837 1.1 dyoung , v4_tag(v4->faddr, v4->fport
838 1.1 dyoung , v4->laddr, v4->lport)));
839 1.1 dyoung
840 1.1 dyoung if (vtw->key == fatp_key(ctl->fat, fp, i)) {
841 1.1 dyoung db_trace(KTR_VTW
842 1.1 dyoung , (vtw, "vtw:!mis %8.8x:%4.4x"
843 1.1 dyoung " %8.8x:%4.4x key %x"
844 1.1 dyoung " which %x"
845 1.1 dyoung , v4->faddr, v4->fport
846 1.1 dyoung , v4->laddr, v4->lport
847 1.1 dyoung , vtw->key
848 1.1 dyoung , which));
849 1.1 dyoung
850 1.1 dyoung } else {
851 1.1 dyoung db_trace(KTR_VTW
852 1.1 dyoung , (vtw
853 1.1 dyoung , "vtw:!mis"
854 1.1 dyoung " key %8.8x != %8.8x"
855 1.1 dyoung " idx %x i %x which %x"
856 1.1 dyoung , vtw->key
857 1.1 dyoung , fatp_key(ctl->fat, fp, i)
858 1.1 dyoung , idx_decode(ctl, idx)
859 1.1 dyoung , i
860 1.1 dyoung , which));
861 1.1 dyoung }
862 1.1 dyoung } else {
863 1.1 dyoung db_trace(KTR_VTW
864 1.1 dyoung , (fp
865 1.1 dyoung , "vtw:!mis free entry"
866 1.1 dyoung " idx %x vtw %p which %x"
867 1.1 dyoung , idx_decode(ctl, idx)
868 1.1 dyoung , vtw, which));
869 1.1 dyoung }
870 1.1 dyoung }
871 1.1 dyoung
872 1.1 dyoung if (fp->nxt) {
873 1.1 dyoung fp = fatp_next(ctl->fat, fp);
874 1.1 dyoung } else {
875 1.1 dyoung break;
876 1.1 dyoung }
877 1.1 dyoung }
878 1.1 dyoung ++vtw_stats.miss[which];
879 1.1 dyoung vtw = 0;
880 1.1 dyoung out:
881 1.1 dyoung if (fatps > vtw_stats.max_chain[which])
882 1.1 dyoung vtw_stats.max_chain[which] = fatps;
883 1.1 dyoung if (probes > vtw_stats.max_probe[which])
884 1.1 dyoung vtw_stats.max_probe[which] = probes;
885 1.1 dyoung if (losings > vtw_stats.max_loss[which])
886 1.1 dyoung vtw_stats.max_loss[which] = losings;
887 1.1 dyoung
888 1.1 dyoung return vtw;
889 1.1 dyoung }
890 1.1 dyoung
891 1.1 dyoung static vtw_t *
892 1.1 dyoung vtw_lookup_hash_v6(vtw_ctl_t *ctl, const struct in6_addr *faddr, uint16_t fport
893 1.1 dyoung , const struct in6_addr *laddr, uint16_t lport
894 1.1 dyoung , int which)
895 1.1 dyoung {
896 1.1 dyoung vtw_v6_t *v6;
897 1.1 dyoung vtw_t *vtw;
898 1.1 dyoung uint32_t tag;
899 1.1 dyoung fatp_t *fp;
900 1.1 dyoung int i;
901 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
902 1.1 dyoung
903 1.1 dyoung ++vtw_stats.look[which];
904 1.1 dyoung
905 1.1 dyoung if (!ctl || !ctl->fat)
906 1.1 dyoung return 0;
907 1.1 dyoung
908 1.1 dyoung if (which) {
909 1.1 dyoung tag = v6_port_tag(lport);
910 1.1 dyoung fp = ctl->fat->port[tag & ctl->fat->mask];
911 1.1 dyoung } else {
912 1.1 dyoung tag = v6_tag(faddr, fport, laddr, lport);
913 1.1 dyoung fp = ctl->fat->hash[tag & ctl->fat->mask];
914 1.1 dyoung }
915 1.1 dyoung
916 1.1 dyoung while (fp && fp->inuse) {
917 1.1 dyoung uint32_t inuse = fp->inuse;
918 1.1 dyoung
919 1.1 dyoung ++fatps;
920 1.1 dyoung
921 1.1 dyoung for (i = 0; inuse && i < fatp_ntags(); ++i) {
922 1.1 dyoung uint32_t idx;
923 1.1 dyoung
924 1.1 dyoung if (!(inuse & (1 << i)))
925 1.1 dyoung continue;
926 1.1 dyoung
927 1.1 dyoung inuse ^= 1 << i;
928 1.1 dyoung
929 1.1 dyoung ++probes;
930 1.1 dyoung ++vtw_stats.probe[which];
931 1.1 dyoung
932 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
933 1.1 dyoung vtw = vtw_from_index(ctl, idx);
934 1.1 dyoung
935 1.1 dyoung db_trace(KTR_VTW
936 1.1 dyoung , (fp, "probe: %2d %6A:%4.4x %6A:%4.4x idx %x"
937 1.1 dyoung , i
938 1.1 dyoung , db_store(faddr, sizeof (*faddr)), fport
939 1.1 dyoung , db_store(laddr, sizeof (*laddr)), lport
940 1.1 dyoung , idx_decode(ctl, idx)));
941 1.1 dyoung
942 1.1 dyoung if (!vtw) {
943 1.1 dyoung /* Hopefully fast path.
944 1.1 dyoung */
945 1.1 dyoung continue;
946 1.1 dyoung }
947 1.1 dyoung
948 1.1 dyoung v6 = (void*)vtw;
949 1.1 dyoung
950 1.1 dyoung if (vtw_alive(vtw)
951 1.1 dyoung && ((which ? vtw->port_key : vtw->key)
952 1.1 dyoung == fatp_key(ctl->fat, fp, i))
953 1.1 dyoung && v6->lport == lport
954 1.1 dyoung && (which
955 1.1 dyoung || (v6->fport == fport
956 1.1 dyoung && !bcmp(&v6->faddr, faddr, sizeof (*faddr))
957 1.1 dyoung && !bcmp(&v6->laddr, laddr
958 1.1 dyoung , sizeof (*laddr))))) {
959 1.1 dyoung ++vtw_stats.hit[which];
960 1.1 dyoung
961 1.1 dyoung KASSERT(vtw->hashed);
962 1.1 dyoung goto out;
963 1.1 dyoung } else {
964 1.1 dyoung ++vtw_stats.losing[which];
965 1.1 dyoung ++losings;
966 1.1 dyoung }
967 1.1 dyoung }
968 1.1 dyoung
969 1.1 dyoung if (fp->nxt) {
970 1.1 dyoung fp = fatp_next(ctl->fat, fp);
971 1.1 dyoung } else {
972 1.1 dyoung break;
973 1.1 dyoung }
974 1.1 dyoung }
975 1.1 dyoung ++vtw_stats.miss[which];
976 1.1 dyoung vtw = 0;
977 1.1 dyoung out:
978 1.1 dyoung if (fatps > vtw_stats.max_chain[which])
979 1.1 dyoung vtw_stats.max_chain[which] = fatps;
980 1.1 dyoung if (probes > vtw_stats.max_probe[which])
981 1.1 dyoung vtw_stats.max_probe[which] = probes;
982 1.1 dyoung if (losings > vtw_stats.max_loss[which])
983 1.1 dyoung vtw_stats.max_loss[which] = losings;
984 1.1 dyoung
985 1.1 dyoung return vtw;
986 1.1 dyoung }
987 1.1 dyoung
988 1.1 dyoung /*!\brief port iterator
989 1.1 dyoung */
990 1.1 dyoung static vtw_t *
991 1.1 dyoung vtw_next_port_v4(struct tcp_ports_iterator *it)
992 1.1 dyoung {
993 1.1 dyoung vtw_ctl_t *ctl = it->ctl;
994 1.1 dyoung vtw_v4_t *v4;
995 1.1 dyoung vtw_t *vtw;
996 1.1 dyoung uint32_t tag;
997 1.1 dyoung uint16_t lport = it->port;
998 1.1 dyoung fatp_t *fp;
999 1.1 dyoung int i;
1000 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
1001 1.1 dyoung
1002 1.1 dyoung tag = v4_port_tag(lport);
1003 1.1 dyoung if (!it->fp) {
1004 1.1 dyoung it->fp = ctl->fat->port[tag & ctl->fat->mask];
1005 1.1 dyoung it->slot_idx = 0;
1006 1.1 dyoung }
1007 1.1 dyoung fp = it->fp;
1008 1.1 dyoung
1009 1.1 dyoung while (fp) {
1010 1.1 dyoung uint32_t inuse = fp->inuse;
1011 1.1 dyoung
1012 1.1 dyoung ++fatps;
1013 1.1 dyoung
1014 1.1 dyoung for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
1015 1.1 dyoung uint32_t idx;
1016 1.1 dyoung
1017 1.1 dyoung if (!(inuse & (1 << i)))
1018 1.1 dyoung continue;
1019 1.1 dyoung
1020 1.1 dyoung inuse &= ~0 << i;
1021 1.1 dyoung
1022 1.1 dyoung if (i < it->slot_idx)
1023 1.1 dyoung continue;
1024 1.1 dyoung
1025 1.1 dyoung ++vtw_stats.probe[1];
1026 1.1 dyoung ++probes;
1027 1.1 dyoung
1028 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
1029 1.1 dyoung vtw = vtw_from_index(ctl, idx);
1030 1.1 dyoung
1031 1.1 dyoung if (!vtw) {
1032 1.1 dyoung /* Hopefully fast path.
1033 1.1 dyoung */
1034 1.1 dyoung continue;
1035 1.1 dyoung }
1036 1.1 dyoung
1037 1.1 dyoung v4 = (void*)vtw;
1038 1.1 dyoung
1039 1.1 dyoung if (vtw_alive(vtw)
1040 1.1 dyoung && vtw->port_key == fatp_key(ctl->fat, fp, i)
1041 1.1 dyoung && v4->lport == lport) {
1042 1.1 dyoung ++vtw_stats.hit[1];
1043 1.1 dyoung
1044 1.1 dyoung it->slot_idx = i + 1;
1045 1.1 dyoung
1046 1.1 dyoung goto out;
1047 1.1 dyoung } else if (vtw_alive(vtw)) {
1048 1.1 dyoung ++vtw_stats.losing[1];
1049 1.1 dyoung ++losings;
1050 1.1 dyoung
1051 1.1 dyoung db_trace(KTR_VTW
1052 1.1 dyoung , (vtw, "vtw:!mis"
1053 1.1 dyoung " port %8.8x:%4.4x %8.8x:%4.4x"
1054 1.1 dyoung " key %x port %x"
1055 1.1 dyoung , v4->faddr, v4->fport
1056 1.1 dyoung , v4->laddr, v4->lport
1057 1.1 dyoung , vtw->key
1058 1.1 dyoung , lport));
1059 1.1 dyoung } else {
1060 1.1 dyoung /* Really losing here. We are coming
1061 1.1 dyoung * up with references to free entries.
1062 1.1 dyoung * Might find it better to use
1063 1.1 dyoung * traditional, or need another
1064 1.1 dyoung * add-hockery. The other add-hockery
1065 1.1 dyoung * would be to pul more into into the
1066 1.1 dyoung * cache line to reject the false
1067 1.1 dyoung * hits.
1068 1.1 dyoung */
1069 1.1 dyoung ++vtw_stats.losing[1];
1070 1.1 dyoung ++losings;
1071 1.1 dyoung db_trace(KTR_VTW
1072 1.1 dyoung , (fp, "vtw:!mis port %x"
1073 1.1 dyoung " - free entry idx %x vtw %p"
1074 1.1 dyoung , lport
1075 1.1 dyoung , idx_decode(ctl, idx)
1076 1.1 dyoung , vtw));
1077 1.1 dyoung }
1078 1.1 dyoung }
1079 1.1 dyoung
1080 1.1 dyoung if (fp->nxt) {
1081 1.1 dyoung it->fp = fp = fatp_next(ctl->fat, fp);
1082 1.1 dyoung it->slot_idx = 0;
1083 1.1 dyoung } else {
1084 1.1 dyoung it->fp = 0;
1085 1.1 dyoung break;
1086 1.1 dyoung }
1087 1.1 dyoung }
1088 1.1 dyoung ++vtw_stats.miss[1];
1089 1.1 dyoung
1090 1.1 dyoung vtw = 0;
1091 1.1 dyoung out:
1092 1.1 dyoung if (fatps > vtw_stats.max_chain[1])
1093 1.1 dyoung vtw_stats.max_chain[1] = fatps;
1094 1.1 dyoung if (probes > vtw_stats.max_probe[1])
1095 1.1 dyoung vtw_stats.max_probe[1] = probes;
1096 1.1 dyoung if (losings > vtw_stats.max_loss[1])
1097 1.1 dyoung vtw_stats.max_loss[1] = losings;
1098 1.1 dyoung
1099 1.1 dyoung return vtw;
1100 1.1 dyoung }
1101 1.1 dyoung
1102 1.1 dyoung /*!\brief port iterator
1103 1.1 dyoung */
1104 1.1 dyoung static vtw_t *
1105 1.1 dyoung vtw_next_port_v6(struct tcp_ports_iterator *it)
1106 1.1 dyoung {
1107 1.1 dyoung vtw_ctl_t *ctl = it->ctl;
1108 1.1 dyoung vtw_v6_t *v6;
1109 1.1 dyoung vtw_t *vtw;
1110 1.1 dyoung uint32_t tag;
1111 1.1 dyoung uint16_t lport = it->port;
1112 1.1 dyoung fatp_t *fp;
1113 1.1 dyoung int i;
1114 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
1115 1.1 dyoung
1116 1.1 dyoung tag = v6_port_tag(lport);
1117 1.1 dyoung if (!it->fp) {
1118 1.1 dyoung it->fp = ctl->fat->port[tag & ctl->fat->mask];
1119 1.1 dyoung it->slot_idx = 0;
1120 1.1 dyoung }
1121 1.1 dyoung fp = it->fp;
1122 1.1 dyoung
1123 1.1 dyoung while (fp) {
1124 1.1 dyoung uint32_t inuse = fp->inuse;
1125 1.1 dyoung
1126 1.1 dyoung ++fatps;
1127 1.1 dyoung
1128 1.1 dyoung for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
1129 1.1 dyoung uint32_t idx;
1130 1.1 dyoung
1131 1.1 dyoung if (!(inuse & (1 << i)))
1132 1.1 dyoung continue;
1133 1.1 dyoung
1134 1.1 dyoung inuse &= ~0 << i;
1135 1.1 dyoung
1136 1.1 dyoung if (i < it->slot_idx)
1137 1.1 dyoung continue;
1138 1.1 dyoung
1139 1.1 dyoung ++vtw_stats.probe[1];
1140 1.1 dyoung ++probes;
1141 1.1 dyoung
1142 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
1143 1.1 dyoung vtw = vtw_from_index(ctl, idx);
1144 1.1 dyoung
1145 1.1 dyoung if (!vtw) {
1146 1.1 dyoung /* Hopefully fast path.
1147 1.1 dyoung */
1148 1.1 dyoung continue;
1149 1.1 dyoung }
1150 1.1 dyoung
1151 1.1 dyoung v6 = (void*)vtw;
1152 1.1 dyoung
1153 1.1 dyoung db_trace(KTR_VTW
1154 1.1 dyoung , (vtw, "vtw: i %x idx %x fp->tag %x"
1155 1.1 dyoung " tag %x xtra %x"
1156 1.1 dyoung , i, idx_decode(ctl, idx)
1157 1.1 dyoung , fp->tag[i], tag, fatp_xtra[i]));
1158 1.1 dyoung
1159 1.1 dyoung if (vtw_alive(vtw)
1160 1.1 dyoung && vtw->port_key == fatp_key(ctl->fat, fp, i)
1161 1.1 dyoung && v6->lport == lport) {
1162 1.1 dyoung ++vtw_stats.hit[1];
1163 1.1 dyoung
1164 1.1 dyoung db_trace(KTR_VTW
1165 1.1 dyoung , (fp, "vtw: nxt port %P - %4.4x"
1166 1.1 dyoung " idx %x key %x"
1167 1.1 dyoung , lport, lport
1168 1.1 dyoung , idx_decode(ctl, idx), vtw->key));
1169 1.1 dyoung
1170 1.1 dyoung it->slot_idx = i + 1;
1171 1.1 dyoung goto out;
1172 1.1 dyoung } else if (vtw_alive(vtw)) {
1173 1.1 dyoung ++vtw_stats.losing[1];
1174 1.1 dyoung
1175 1.1 dyoung db_trace(KTR_VTW
1176 1.1 dyoung , (vtw, "vtw:!mis port %6A:%4.4x"
1177 1.1 dyoung " %6A:%4.4x key %x port %x"
1178 1.1 dyoung , db_store(&v6->faddr
1179 1.1 dyoung , sizeof (v6->faddr))
1180 1.1 dyoung , v6->fport
1181 1.1 dyoung , db_store(&v6->laddr
1182 1.1 dyoung , sizeof (v6->faddr))
1183 1.1 dyoung , v6->lport
1184 1.1 dyoung , vtw->key
1185 1.1 dyoung , lport));
1186 1.1 dyoung } else {
1187 1.1 dyoung /* Really losing here. We are coming
1188 1.1 dyoung * up with references to free entries.
1189 1.1 dyoung * Might find it better to use
1190 1.1 dyoung * traditional, or need another
1191 1.1 dyoung * add-hockery. The other add-hockery
1192 1.1 dyoung * would be to pul more into into the
1193 1.1 dyoung * cache line to reject the false
1194 1.1 dyoung * hits.
1195 1.1 dyoung */
1196 1.1 dyoung ++vtw_stats.losing[1];
1197 1.1 dyoung ++losings;
1198 1.1 dyoung
1199 1.1 dyoung db_trace(KTR_VTW
1200 1.1 dyoung , (fp
1201 1.1 dyoung , "vtw:!mis port %x"
1202 1.1 dyoung " - free entry idx %x vtw %p"
1203 1.1 dyoung , lport, idx_decode(ctl, idx)
1204 1.1 dyoung , vtw));
1205 1.1 dyoung }
1206 1.1 dyoung }
1207 1.1 dyoung
1208 1.1 dyoung if (fp->nxt) {
1209 1.1 dyoung it->fp = fp = fatp_next(ctl->fat, fp);
1210 1.1 dyoung it->slot_idx = 0;
1211 1.1 dyoung } else {
1212 1.1 dyoung it->fp = 0;
1213 1.1 dyoung break;
1214 1.1 dyoung }
1215 1.1 dyoung }
1216 1.1 dyoung ++vtw_stats.miss[1];
1217 1.1 dyoung
1218 1.1 dyoung vtw = 0;
1219 1.1 dyoung out:
1220 1.1 dyoung if (fatps > vtw_stats.max_chain[1])
1221 1.1 dyoung vtw_stats.max_chain[1] = fatps;
1222 1.1 dyoung if (probes > vtw_stats.max_probe[1])
1223 1.1 dyoung vtw_stats.max_probe[1] = probes;
1224 1.1 dyoung if (losings > vtw_stats.max_loss[1])
1225 1.1 dyoung vtw_stats.max_loss[1] = losings;
1226 1.1 dyoung
1227 1.1 dyoung return vtw;
1228 1.1 dyoung }
1229 1.1 dyoung
1230 1.1 dyoung /*!\brief initialise the VTW allocation arena
1231 1.1 dyoung *
1232 1.1 dyoung * There are 1+3 allocation classes:
1233 1.1 dyoung * 0 classless
1234 1.1 dyoung * {1,2,3} MSL-class based allocation
1235 1.1 dyoung *
1236 1.1 dyoung * The allocation arenas are all initialised. Classless gets all the
1237 1.1 dyoung * space. MSL-class based divides the arena, so that allocation
1238 1.1 dyoung * within a class can proceed without having to consider entries
1239 1.1 dyoung * (aka: cache lines) from different classes.
1240 1.1 dyoung *
1241 1.1 dyoung * Usually, we are completely classless or class-based, but there can be
1242 1.1 dyoung * transition periods, corresponding to dynamic adjustments in the config
1243 1.1 dyoung * by the operator.
1244 1.1 dyoung */
1245 1.1 dyoung static void
1246 1.1 dyoung vtw_init(fatp_ctl_t *fat, vtw_ctl_t *ctl, uint32_t n)
1247 1.1 dyoung {
1248 1.1 dyoung int i;
1249 1.1 dyoung int sz = (ctl->is_v4 ? sizeof (vtw_v4_t) : sizeof (vtw_v6_t));
1250 1.1 dyoung
1251 1.1 dyoung ctl->base.v4 = kmem_alloc(n * sz, KM_SLEEP);
1252 1.1 dyoung if (ctl->base.v4) {
1253 1.1 dyoung vtw_t *base;
1254 1.1 dyoung int class_n;
1255 1.1 dyoung
1256 1.1 dyoung bzero(ctl->base.v4, n * sz);
1257 1.1 dyoung
1258 1.1 dyoung if (ctl->is_v4) {
1259 1.1 dyoung ctl->lim.v4 = ctl->base.v4 + n - 1;
1260 1.1 dyoung ctl->alloc.v4 = ctl->base.v4;
1261 1.1 dyoung } else {
1262 1.1 dyoung ctl->lim.v6 = ctl->base.v6 + n - 1;
1263 1.1 dyoung ctl->alloc.v6 = ctl->base.v6;
1264 1.1 dyoung }
1265 1.1 dyoung
1266 1.1 dyoung ctl->nfree = n;
1267 1.1 dyoung ctl->ctl = ctl;
1268 1.1 dyoung
1269 1.1 dyoung ctl->idx_bits = 32;
1270 1.1 dyoung for (ctl->idx_mask = ~0; (ctl->idx_mask & (n-1)) == n-1; ) {
1271 1.1 dyoung ctl->idx_mask >>= 1;
1272 1.1 dyoung ctl->idx_bits -= 1;
1273 1.1 dyoung }
1274 1.1 dyoung
1275 1.1 dyoung ctl->idx_mask <<= 1;
1276 1.1 dyoung ctl->idx_mask |= 1;
1277 1.1 dyoung ctl->idx_bits += 1;
1278 1.1 dyoung
1279 1.1 dyoung ctl->fat = fat;
1280 1.1 dyoung fat->vtw = ctl;
1281 1.1 dyoung
1282 1.1 dyoung /* Divide the resources equally amongst the classes.
1283 1.1 dyoung * This is not optimal, as the different classes
1284 1.1 dyoung * arrive and leave at different rates, but it is
1285 1.1 dyoung * the best I can do for now.
1286 1.1 dyoung */
1287 1.1 dyoung class_n = n / (VTW_NCLASS-1);
1288 1.1 dyoung base = ctl->base.v;
1289 1.1 dyoung
1290 1.1 dyoung for (i = 1; i < VTW_NCLASS; ++i) {
1291 1.1 dyoung int j;
1292 1.1 dyoung
1293 1.1 dyoung ctl[i] = ctl[0];
1294 1.1 dyoung ctl[i].clidx = i;
1295 1.1 dyoung
1296 1.1 dyoung ctl[i].base.v = base;
1297 1.1 dyoung ctl[i].alloc = ctl[i].base;
1298 1.1 dyoung
1299 1.1 dyoung for (j = 0; j < class_n - 1; ++j) {
1300 1.1 dyoung if (tcp_msl_enable)
1301 1.1 dyoung base->msl_class = i;
1302 1.1 dyoung base = vtw_next(ctl, base);
1303 1.1 dyoung }
1304 1.1 dyoung
1305 1.1 dyoung ctl[i].lim.v = base;
1306 1.1 dyoung base = vtw_next(ctl, base);
1307 1.1 dyoung ctl[i].nfree = class_n;
1308 1.1 dyoung }
1309 1.1 dyoung }
1310 1.1 dyoung
1311 1.1 dyoung vtw_debug_init();
1312 1.1 dyoung }
1313 1.1 dyoung
1314 1.1 dyoung /*!\brief map class to TCP MSL
1315 1.1 dyoung */
1316 1.1 dyoung static inline uint32_t
1317 1.1 dyoung class_to_msl(int class)
1318 1.1 dyoung {
1319 1.1 dyoung switch (class) {
1320 1.1 dyoung case 0:
1321 1.1 dyoung case 1:
1322 1.1 dyoung return tcp_msl_remote ? tcp_msl_remote : (TCPTV_MSL >> 0);
1323 1.1 dyoung case 2:
1324 1.1 dyoung return tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1);
1325 1.1 dyoung default:
1326 1.1 dyoung return tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2);
1327 1.1 dyoung }
1328 1.1 dyoung }
1329 1.1 dyoung
1330 1.1 dyoung /*!\brief map TCP MSL to class
1331 1.1 dyoung */
1332 1.1 dyoung static inline uint32_t
1333 1.1 dyoung msl_to_class(int msl)
1334 1.1 dyoung {
1335 1.1 dyoung if (tcp_msl_enable) {
1336 1.1 dyoung if (msl <= (tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2)))
1337 1.1 dyoung return 1+2;
1338 1.1 dyoung if (msl <= (tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1)))
1339 1.1 dyoung return 1+1;
1340 1.1 dyoung return 1;
1341 1.1 dyoung }
1342 1.1 dyoung return 0;
1343 1.1 dyoung }
1344 1.1 dyoung
1345 1.1 dyoung /*!\brief allocate a vtw entry
1346 1.1 dyoung */
1347 1.1 dyoung static inline vtw_t *
1348 1.1 dyoung vtw_alloc(vtw_ctl_t *ctl)
1349 1.1 dyoung {
1350 1.1 dyoung vtw_t *vtw = 0;
1351 1.1 dyoung int stuck = 0;
1352 1.1 dyoung int avail = ctl ? (ctl->nalloc + ctl->nfree) : 0;
1353 1.1 dyoung int msl;
1354 1.1 dyoung
1355 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
1356 1.1 dyoung
1357 1.1 dyoung /* If no resources, we will not get far.
1358 1.1 dyoung */
1359 1.1 dyoung if (!ctl || !ctl->base.v4 || avail <= 0)
1360 1.1 dyoung return 0;
1361 1.1 dyoung
1362 1.1 dyoung /* Obtain a free one.
1363 1.1 dyoung */
1364 1.1 dyoung while (!ctl->nfree) {
1365 1.1 dyoung vtw_age(ctl, 0);
1366 1.1 dyoung
1367 1.1 dyoung if (++stuck > avail) {
1368 1.1 dyoung /* When in transition between
1369 1.1 dyoung * schemes (classless, classed) we
1370 1.1 dyoung * can be stuck having to await the
1371 1.1 dyoung * expiration of cross-allocated entries.
1372 1.1 dyoung *
1373 1.1 dyoung * Returning zero means we will fall back to the
1374 1.1 dyoung * traditional TIME_WAIT handling, except in the
1375 1.1 dyoung * case of a re-shed, in which case we cannot
1376 1.1 dyoung * perform the reshecd, but will retain the extant
1377 1.1 dyoung * entry.
1378 1.1 dyoung */
1379 1.1 dyoung db_trace(KTR_VTW
1380 1.1 dyoung , (ctl, "vtw:!none free in class %x %x/%x"
1381 1.1 dyoung , ctl->clidx
1382 1.1 dyoung , ctl->nalloc, ctl->nfree));
1383 1.1 dyoung
1384 1.1 dyoung return 0;
1385 1.1 dyoung }
1386 1.1 dyoung }
1387 1.1 dyoung
1388 1.1 dyoung vtw = ctl->alloc.v;
1389 1.1 dyoung
1390 1.1 dyoung if (vtw->msl_class != ctl->clidx) {
1391 1.1 dyoung /* Usurping rules:
1392 1.1 dyoung * 0 -> {1,2,3} or {1,2,3} -> 0
1393 1.1 dyoung */
1394 1.1 dyoung KASSERT(!vtw->msl_class || !ctl->clidx);
1395 1.1 dyoung
1396 1.1 dyoung if (vtw->hashed || vtw->expire.tv_sec) {
1397 1.1 dyoung /* As this is owned by some other class,
1398 1.1 dyoung * we must wait for it to expire it.
1399 1.1 dyoung * This will only happen on class/classless
1400 1.1 dyoung * transitions, which are guaranteed to progress
1401 1.1 dyoung * to completion in small finite time, barring bugs.
1402 1.1 dyoung */
1403 1.1 dyoung db_trace(KTR_VTW
1404 1.1 dyoung , (ctl, "vtw:!%p class %x!=%x %x:%x%s"
1405 1.1 dyoung , vtw, vtw->msl_class, ctl->clidx
1406 1.1 dyoung , vtw->expire.tv_sec
1407 1.1 dyoung , vtw->expire.tv_usec
1408 1.1 dyoung , vtw->hashed ? " hashed" : ""));
1409 1.1 dyoung
1410 1.1 dyoung return 0;
1411 1.1 dyoung }
1412 1.1 dyoung
1413 1.1 dyoung db_trace(KTR_VTW
1414 1.1 dyoung , (ctl, "vtw:!%p usurped from %x to %x"
1415 1.1 dyoung , vtw, vtw->msl_class, ctl->clidx));
1416 1.1 dyoung
1417 1.1 dyoung vtw->msl_class = ctl->clidx;
1418 1.1 dyoung }
1419 1.1 dyoung
1420 1.1 dyoung if (vtw_alive(vtw)) {
1421 1.1 dyoung KASSERT(0 && "next free not free");
1422 1.1 dyoung return 0;
1423 1.1 dyoung }
1424 1.1 dyoung
1425 1.1 dyoung /* Advance allocation poiter.
1426 1.1 dyoung */
1427 1.1 dyoung ctl->alloc.v = vtw_next(ctl, vtw);
1428 1.1 dyoung
1429 1.1 dyoung --ctl->nfree;
1430 1.1 dyoung ++ctl->nalloc;
1431 1.1 dyoung
1432 1.1 dyoung msl = (2 * class_to_msl(ctl->clidx) * 1000) / PR_SLOWHZ; // msec
1433 1.1 dyoung
1434 1.1 dyoung /* mark expiration
1435 1.1 dyoung */
1436 1.1 dyoung microtime(&vtw->expire);
1437 1.1 dyoung
1438 1.1 dyoung /* Move expiration into the future.
1439 1.1 dyoung */
1440 1.1 dyoung vtw->expire.tv_sec += msl / 1000;
1441 1.1 dyoung vtw->expire.tv_usec += 1000 * (msl % 1000);
1442 1.1 dyoung
1443 1.1 dyoung while (vtw->expire.tv_usec >= 1000*1000) {
1444 1.1 dyoung vtw->expire.tv_usec -= 1000*1000;
1445 1.1 dyoung vtw->expire.tv_sec += 1;
1446 1.1 dyoung }
1447 1.1 dyoung
1448 1.1 dyoung if (!ctl->oldest.v)
1449 1.1 dyoung ctl->oldest.v = vtw;
1450 1.1 dyoung
1451 1.1 dyoung return vtw;
1452 1.1 dyoung }
1453 1.1 dyoung
1454 1.1 dyoung /*!\brief expiration
1455 1.1 dyoung */
1456 1.1 dyoung static int
1457 1.1 dyoung vtw_age(vtw_ctl_t *ctl, struct timeval *_when)
1458 1.1 dyoung {
1459 1.1 dyoung vtw_t *vtw;
1460 1.1 dyoung struct timeval then, *when = _when;
1461 1.1 dyoung int maxtries = 0;
1462 1.1 dyoung
1463 1.1 dyoung if (!ctl->oldest.v) {
1464 1.1 dyoung KASSERT(!ctl->nalloc);
1465 1.1 dyoung return 0;
1466 1.1 dyoung }
1467 1.1 dyoung
1468 1.1 dyoung for (vtw = ctl->oldest.v; vtw && ctl->nalloc; ) {
1469 1.1 dyoung if (++maxtries > ctl->nalloc)
1470 1.1 dyoung break;
1471 1.1 dyoung
1472 1.1 dyoung if (vtw->msl_class != ctl->clidx) {
1473 1.1 dyoung db_trace(KTR_VTW
1474 1.1 dyoung , (vtw, "vtw:!age class mismatch %x != %x"
1475 1.1 dyoung , vtw->msl_class, ctl->clidx));
1476 1.1 dyoung /* XXXX
1477 1.1 dyoung * See if the appropriate action is to skip to the next.
1478 1.1 dyoung * XXXX
1479 1.1 dyoung */
1480 1.1 dyoung ctl->oldest.v = vtw = vtw_next(ctl, vtw);
1481 1.1 dyoung continue;
1482 1.1 dyoung }
1483 1.1 dyoung if (!when) {
1484 1.1 dyoung /* Latch oldest timeval if none specified.
1485 1.1 dyoung */
1486 1.1 dyoung then = vtw->expire;
1487 1.1 dyoung when = &then;
1488 1.1 dyoung }
1489 1.1 dyoung
1490 1.1 dyoung if (!timercmp(&vtw->expire, when, <=))
1491 1.1 dyoung break;
1492 1.1 dyoung
1493 1.1 dyoung db_trace(KTR_VTW
1494 1.1 dyoung , (vtw, "vtw: expire %x %8.8x:%8.8x %x/%x"
1495 1.1 dyoung , ctl->clidx
1496 1.1 dyoung , vtw->expire.tv_sec
1497 1.1 dyoung , vtw->expire.tv_usec
1498 1.1 dyoung , ctl->nalloc
1499 1.1 dyoung , ctl->nfree));
1500 1.1 dyoung
1501 1.1 dyoung if (!_when)
1502 1.1 dyoung ++vtw_stats.kill;
1503 1.1 dyoung
1504 1.1 dyoung vtw_del(ctl, vtw);
1505 1.1 dyoung vtw = ctl->oldest.v;
1506 1.1 dyoung }
1507 1.1 dyoung
1508 1.1 dyoung return ctl->nalloc; // # remaining allocated
1509 1.1 dyoung }
1510 1.1 dyoung
1511 1.1 dyoung static callout_t vtw_cs;
1512 1.1 dyoung
1513 1.1 dyoung /*!\brief notice the passage of time.
1514 1.1 dyoung * It seems to be getting faster. What happened to the year?
1515 1.1 dyoung */
1516 1.1 dyoung static void
1517 1.1 dyoung vtw_tick(void *arg)
1518 1.1 dyoung {
1519 1.1 dyoung struct timeval now;
1520 1.1 dyoung int i, cnt = 0;
1521 1.1 dyoung
1522 1.1 dyoung microtime(&now);
1523 1.1 dyoung
1524 1.1 dyoung db_trace(KTR_VTW, (arg, "vtk: tick - now %8.8x:%8.8x"
1525 1.1 dyoung , now.tv_sec, now.tv_usec));
1526 1.1 dyoung
1527 1.1 dyoung mutex_enter(softnet_lock);
1528 1.1 dyoung
1529 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
1530 1.1 dyoung cnt += vtw_age(&vtw_tcpv4[i], &now);
1531 1.1 dyoung cnt += vtw_age(&vtw_tcpv6[i], &now);
1532 1.1 dyoung }
1533 1.1 dyoung
1534 1.1 dyoung /* Keep ticks coming while we need them.
1535 1.1 dyoung */
1536 1.1 dyoung if (cnt)
1537 1.1 dyoung callout_schedule(&vtw_cs, hz / 5);
1538 1.1 dyoung else {
1539 1.1 dyoung tcp_vtw_was_enabled = 0;
1540 1.1 dyoung tcbtable.vestige = 0;
1541 1.1 dyoung }
1542 1.1 dyoung mutex_exit(softnet_lock);
1543 1.1 dyoung }
1544 1.1 dyoung
1545 1.1 dyoung /* in_pcblookup_ports assist for handling vestigial entries.
1546 1.1 dyoung */
1547 1.1 dyoung static void *
1548 1.1 dyoung tcp_init_ports_v4(struct in_addr addr, u_int port, int wild)
1549 1.1 dyoung {
1550 1.1 dyoung struct tcp_ports_iterator *it = &tcp_ports_iterator_v4;
1551 1.1 dyoung
1552 1.1 dyoung bzero(it, sizeof (*it));
1553 1.1 dyoung
1554 1.1 dyoung /* Note: the reference to vtw_tcpv4[0] is fine.
1555 1.1 dyoung * We do not need per-class iteration. We just
1556 1.1 dyoung * need to get to the fat, and there is one
1557 1.1 dyoung * shared fat.
1558 1.1 dyoung */
1559 1.1 dyoung if (vtw_tcpv4[0].fat) {
1560 1.1 dyoung it->addr.v4 = addr;
1561 1.1 dyoung it->port = port;
1562 1.1 dyoung it->wild = !!wild;
1563 1.1 dyoung it->ctl = &vtw_tcpv4[0];
1564 1.1 dyoung
1565 1.1 dyoung ++vtw_stats.look[1];
1566 1.1 dyoung }
1567 1.1 dyoung
1568 1.1 dyoung return it;
1569 1.1 dyoung }
1570 1.1 dyoung
1571 1.1 dyoung /*!\brief export an IPv4 vtw.
1572 1.1 dyoung */
1573 1.1 dyoung static int
1574 1.1 dyoung vtw_export_v4(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
1575 1.1 dyoung {
1576 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
1577 1.1 dyoung
1578 1.1 dyoung bzero(res, sizeof (*res));
1579 1.1 dyoung
1580 1.1 dyoung if (ctl && vtw) {
1581 1.1 dyoung if (!ctl->clidx && vtw->msl_class)
1582 1.1 dyoung ctl += vtw->msl_class;
1583 1.1 dyoung else
1584 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
1585 1.1 dyoung
1586 1.1 dyoung res->valid = 1;
1587 1.1 dyoung res->v4 = 1;
1588 1.1 dyoung
1589 1.1 dyoung res->faddr.v4.s_addr = v4->faddr;
1590 1.1 dyoung res->laddr.v4.s_addr = v4->laddr;
1591 1.1 dyoung res->fport = v4->fport;
1592 1.1 dyoung res->lport = v4->lport;
1593 1.1 dyoung res->vtw = vtw; // netlock held over call(s)
1594 1.1 dyoung res->ctl = ctl;
1595 1.1 dyoung res->reuse_addr = vtw->reuse_addr;
1596 1.1 dyoung res->reuse_port = vtw->reuse_port;
1597 1.1 dyoung res->snd_nxt = vtw->snd_nxt;
1598 1.1 dyoung res->rcv_nxt = vtw->rcv_nxt;
1599 1.1 dyoung res->rcv_wnd = vtw->rcv_wnd;
1600 1.1 dyoung res->uid = vtw->uid;
1601 1.1 dyoung }
1602 1.1 dyoung
1603 1.1 dyoung return res->valid;
1604 1.1 dyoung }
1605 1.1 dyoung
1606 1.1 dyoung /*!\brief return next port in the port iterator. yowza.
1607 1.1 dyoung */
1608 1.1 dyoung static int
1609 1.1 dyoung tcp_next_port_v4(void *arg, struct vestigial_inpcb *res)
1610 1.1 dyoung {
1611 1.1 dyoung struct tcp_ports_iterator *it = arg;
1612 1.1 dyoung vtw_t *vtw = 0;
1613 1.1 dyoung
1614 1.1 dyoung if (it->ctl)
1615 1.1 dyoung vtw = vtw_next_port_v4(it);
1616 1.1 dyoung
1617 1.1 dyoung if (!vtw)
1618 1.1 dyoung it->ctl = 0;
1619 1.1 dyoung
1620 1.1 dyoung return vtw_export_v4(it->ctl, vtw, res);
1621 1.1 dyoung }
1622 1.1 dyoung
1623 1.1 dyoung static int
1624 1.1 dyoung tcp_lookup_v4(struct in_addr faddr, uint16_t fport,
1625 1.1 dyoung struct in_addr laddr, uint16_t lport,
1626 1.1 dyoung struct vestigial_inpcb *res)
1627 1.1 dyoung {
1628 1.1 dyoung vtw_t *vtw;
1629 1.1 dyoung vtw_ctl_t *ctl;
1630 1.1 dyoung
1631 1.1 dyoung
1632 1.1 dyoung db_trace(KTR_VTW
1633 1.1 dyoung , (res, "vtw: lookup %A:%P %A:%P"
1634 1.1 dyoung , faddr, fport
1635 1.1 dyoung , laddr, lport));
1636 1.1 dyoung
1637 1.1 dyoung vtw = vtw_lookup_hash_v4((ctl = &vtw_tcpv4[0])
1638 1.1 dyoung , faddr.s_addr, fport
1639 1.1 dyoung , laddr.s_addr, lport, 0);
1640 1.1 dyoung
1641 1.1 dyoung return vtw_export_v4(ctl, vtw, res);
1642 1.1 dyoung }
1643 1.1 dyoung
1644 1.1 dyoung /* in_pcblookup_ports assist for handling vestigial entries.
1645 1.1 dyoung */
1646 1.1 dyoung static void *
1647 1.1 dyoung tcp_init_ports_v6(const struct in6_addr *addr, u_int port, int wild)
1648 1.1 dyoung {
1649 1.1 dyoung struct tcp_ports_iterator *it = &tcp_ports_iterator_v6;
1650 1.1 dyoung
1651 1.1 dyoung bzero(it, sizeof (*it));
1652 1.1 dyoung
1653 1.1 dyoung /* Note: the reference to vtw_tcpv6[0] is fine.
1654 1.1 dyoung * We do not need per-class iteration. We just
1655 1.1 dyoung * need to get to the fat, and there is one
1656 1.1 dyoung * shared fat.
1657 1.1 dyoung */
1658 1.1 dyoung if (vtw_tcpv6[0].fat) {
1659 1.1 dyoung it->addr.v6 = *addr;
1660 1.1 dyoung it->port = port;
1661 1.1 dyoung it->wild = !!wild;
1662 1.1 dyoung it->ctl = &vtw_tcpv6[0];
1663 1.1 dyoung
1664 1.1 dyoung ++vtw_stats.look[1];
1665 1.1 dyoung }
1666 1.1 dyoung
1667 1.1 dyoung return it;
1668 1.1 dyoung }
1669 1.1 dyoung
1670 1.1 dyoung /*!\brief export an IPv6 vtw.
1671 1.1 dyoung */
1672 1.1 dyoung static int
1673 1.1 dyoung vtw_export_v6(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
1674 1.1 dyoung {
1675 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
1676 1.1 dyoung
1677 1.1 dyoung bzero(res, sizeof (*res));
1678 1.1 dyoung
1679 1.1 dyoung if (ctl && vtw) {
1680 1.1 dyoung if (!ctl->clidx && vtw->msl_class)
1681 1.1 dyoung ctl += vtw->msl_class;
1682 1.1 dyoung else
1683 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
1684 1.1 dyoung
1685 1.1 dyoung res->valid = 1;
1686 1.1 dyoung res->v4 = 0;
1687 1.1 dyoung
1688 1.1 dyoung res->faddr.v6 = v6->faddr;
1689 1.1 dyoung res->laddr.v6 = v6->laddr;
1690 1.1 dyoung res->fport = v6->fport;
1691 1.1 dyoung res->lport = v6->lport;
1692 1.1 dyoung res->vtw = vtw; // netlock held over call(s)
1693 1.1 dyoung res->ctl = ctl;
1694 1.1 dyoung
1695 1.1 dyoung res->v6only = vtw->v6only;
1696 1.1 dyoung res->reuse_addr = vtw->reuse_addr;
1697 1.1 dyoung res->reuse_port = vtw->reuse_port;
1698 1.1 dyoung
1699 1.1 dyoung res->snd_nxt = vtw->snd_nxt;
1700 1.1 dyoung res->rcv_nxt = vtw->rcv_nxt;
1701 1.1 dyoung res->rcv_wnd = vtw->rcv_wnd;
1702 1.1 dyoung res->uid = vtw->uid;
1703 1.1 dyoung }
1704 1.1 dyoung
1705 1.1 dyoung return res->valid;
1706 1.1 dyoung }
1707 1.1 dyoung
1708 1.1 dyoung static int
1709 1.1 dyoung tcp_next_port_v6(void *arg, struct vestigial_inpcb *res)
1710 1.1 dyoung {
1711 1.1 dyoung struct tcp_ports_iterator *it = arg;
1712 1.1 dyoung vtw_t *vtw = 0;
1713 1.1 dyoung
1714 1.1 dyoung if (it->ctl)
1715 1.1 dyoung vtw = vtw_next_port_v6(it);
1716 1.1 dyoung
1717 1.1 dyoung if (!vtw)
1718 1.1 dyoung it->ctl = 0;
1719 1.1 dyoung
1720 1.1 dyoung return vtw_export_v6(it->ctl, vtw, res);
1721 1.1 dyoung }
1722 1.1 dyoung
1723 1.1 dyoung static int
1724 1.1 dyoung tcp_lookup_v6(const struct in6_addr *faddr, uint16_t fport,
1725 1.1 dyoung const struct in6_addr *laddr, uint16_t lport,
1726 1.1 dyoung struct vestigial_inpcb *res)
1727 1.1 dyoung {
1728 1.1 dyoung vtw_ctl_t *ctl;
1729 1.1 dyoung vtw_t *vtw;
1730 1.1 dyoung
1731 1.1 dyoung db_trace(KTR_VTW
1732 1.1 dyoung , (res, "vtw: lookup %6A:%P %6A:%P"
1733 1.1 dyoung , db_store(faddr, sizeof (*faddr)), fport
1734 1.1 dyoung , db_store(laddr, sizeof (*laddr)), lport));
1735 1.1 dyoung
1736 1.1 dyoung vtw = vtw_lookup_hash_v6((ctl = &vtw_tcpv6[0])
1737 1.1 dyoung , faddr, fport
1738 1.1 dyoung , laddr, lport, 0);
1739 1.1 dyoung
1740 1.1 dyoung return vtw_export_v6(ctl, vtw, res);
1741 1.1 dyoung }
1742 1.1 dyoung
1743 1.1 dyoung static vestigial_hooks_t tcp_hooks = {
1744 1.1 dyoung .init_ports4 = tcp_init_ports_v4,
1745 1.1 dyoung .next_port4 = tcp_next_port_v4,
1746 1.1 dyoung .lookup4 = tcp_lookup_v4,
1747 1.1 dyoung .init_ports6 = tcp_init_ports_v6,
1748 1.1 dyoung .next_port6 = tcp_next_port_v6,
1749 1.1 dyoung .lookup6 = tcp_lookup_v6,
1750 1.1 dyoung };
1751 1.1 dyoung
1752 1.1 dyoung static bool
1753 1.1 dyoung vtw_select(int af, fatp_ctl_t **fatp, vtw_ctl_t **ctlp)
1754 1.1 dyoung {
1755 1.1 dyoung fatp_ctl_t *fat;
1756 1.1 dyoung vtw_ctl_t *ctl;
1757 1.1 dyoung
1758 1.1 dyoung switch (af) {
1759 1.1 dyoung case AF_INET:
1760 1.1 dyoung fat = &fat_tcpv4;
1761 1.1 dyoung ctl = &vtw_tcpv4[0];
1762 1.1 dyoung break;
1763 1.1 dyoung case AF_INET6:
1764 1.1 dyoung fat = &fat_tcpv6;
1765 1.1 dyoung ctl = &vtw_tcpv6[0];
1766 1.1 dyoung break;
1767 1.1 dyoung default:
1768 1.1 dyoung return false;
1769 1.1 dyoung }
1770 1.1 dyoung if (fatp != NULL)
1771 1.1 dyoung *fatp = fat;
1772 1.1 dyoung if (ctlp != NULL)
1773 1.1 dyoung *ctlp = ctl;
1774 1.1 dyoung return true;
1775 1.1 dyoung }
1776 1.1 dyoung
1777 1.1 dyoung /*!\brief initialize controlling instance
1778 1.1 dyoung */
1779 1.1 dyoung static int
1780 1.1 dyoung vtw_control_init(int af)
1781 1.1 dyoung {
1782 1.1 dyoung fatp_ctl_t *fat;
1783 1.1 dyoung vtw_ctl_t *ctl;
1784 1.1 dyoung
1785 1.1 dyoung if (!vtw_select(af, &fat, &ctl))
1786 1.1 dyoung return EAFNOSUPPORT;
1787 1.1 dyoung
1788 1.1 dyoung if (!fat->base) {
1789 1.1 dyoung uint32_t n, m;
1790 1.1 dyoung
1791 1.1 dyoung KASSERT(powerof2(tcp_vtw_entries));
1792 1.1 dyoung
1793 1.1 dyoung /* Allocate 10% more capacity in the fat pointers.
1794 1.1 dyoung * We should only need ~#hash additional based on
1795 1.1 dyoung * how they age, but TIME_WAIT assassination could cause
1796 1.1 dyoung * sparse fat pointer utilisation.
1797 1.1 dyoung */
1798 1.1 dyoung m = 512;
1799 1.1 dyoung n = 2*m + (11 * (tcp_vtw_entries / fatp_ntags())) / 10;
1800 1.1 dyoung
1801 1.1 dyoung fatp_init(fat, n, m);
1802 1.1 dyoung
1803 1.1 dyoung if (!fat->base)
1804 1.1 dyoung return ENOMEM;
1805 1.1 dyoung }
1806 1.1 dyoung
1807 1.1 dyoung if (!ctl->base.v) {
1808 1.1 dyoung
1809 1.1 dyoung vtw_init(fat, ctl, tcp_vtw_entries);
1810 1.1 dyoung if (!ctl->base.v)
1811 1.1 dyoung return ENOMEM;
1812 1.1 dyoung }
1813 1.1 dyoung
1814 1.1 dyoung return 0;
1815 1.1 dyoung }
1816 1.1 dyoung
1817 1.1 dyoung /*!\brief select controlling instance
1818 1.1 dyoung */
1819 1.1 dyoung static vtw_ctl_t *
1820 1.1 dyoung vtw_control(int af, uint32_t msl)
1821 1.1 dyoung {
1822 1.1 dyoung fatp_ctl_t *fat;
1823 1.1 dyoung vtw_ctl_t *ctl;
1824 1.1 dyoung int class = msl_to_class(msl);
1825 1.1 dyoung
1826 1.1 dyoung if (!vtw_select(af, &fat, &ctl))
1827 1.1 dyoung return NULL;
1828 1.1 dyoung
1829 1.1 dyoung if (!fat->base || !ctl->base.v)
1830 1.1 dyoung return NULL;
1831 1.1 dyoung
1832 1.1 dyoung return ctl + class;
1833 1.1 dyoung }
1834 1.1 dyoung
1835 1.1 dyoung /*!\brief add TCP pcb to vestigial timewait
1836 1.1 dyoung */
1837 1.1 dyoung int
1838 1.1 dyoung vtw_add(int af, struct tcpcb *tp)
1839 1.1 dyoung {
1840 1.1 dyoung int enable;
1841 1.1 dyoung vtw_ctl_t *ctl;
1842 1.1 dyoung vtw_t *vtw;
1843 1.1 dyoung
1844 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
1845 1.1 dyoung
1846 1.1 dyoung ctl = vtw_control(af, tp->t_msl);
1847 1.1 dyoung if (!ctl)
1848 1.1 dyoung return 0;
1849 1.1 dyoung
1850 1.1 dyoung enable = (af == AF_INET) ? tcp4_vtw_enable : tcp6_vtw_enable;
1851 1.1 dyoung
1852 1.1 dyoung vtw = vtw_alloc(ctl);
1853 1.1 dyoung
1854 1.1 dyoung if (vtw) {
1855 1.1 dyoung vtw->snd_nxt = tp->snd_nxt;
1856 1.1 dyoung vtw->rcv_nxt = tp->rcv_nxt;
1857 1.1 dyoung
1858 1.1 dyoung switch (af) {
1859 1.1 dyoung case AF_INET: {
1860 1.1 dyoung struct inpcb *inp = tp->t_inpcb;
1861 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
1862 1.1 dyoung
1863 1.1 dyoung v4->faddr = inp->inp_faddr.s_addr;
1864 1.1 dyoung v4->laddr = inp->inp_laddr.s_addr;
1865 1.1 dyoung v4->fport = inp->inp_fport;
1866 1.1 dyoung v4->lport = inp->inp_lport;
1867 1.1 dyoung
1868 1.1 dyoung vtw->reuse_port = !!(inp->inp_socket->so_options
1869 1.1 dyoung & SO_REUSEPORT);
1870 1.1 dyoung vtw->reuse_addr = !!(inp->inp_socket->so_options
1871 1.1 dyoung & SO_REUSEADDR);
1872 1.1 dyoung vtw->v6only = 0;
1873 1.1 dyoung vtw->uid = inp->inp_socket->so_uidinfo->ui_uid;
1874 1.1 dyoung
1875 1.1 dyoung vtw_inshash_v4(ctl, vtw);
1876 1.1 dyoung
1877 1.1 dyoung
1878 1.1 dyoung #ifdef VTW_DEBUG
1879 1.1 dyoung /* Immediate lookup (connected and port) to
1880 1.1 dyoung * ensure at least that works!
1881 1.1 dyoung */
1882 1.1 dyoung if (enable & 4) {
1883 1.1 dyoung KASSERT(vtw_lookup_hash_v4
1884 1.1 dyoung (ctl
1885 1.1 dyoung , inp->inp_faddr.s_addr, inp->inp_fport
1886 1.1 dyoung , inp->inp_laddr.s_addr, inp->inp_lport
1887 1.1 dyoung , 0)
1888 1.1 dyoung == vtw);
1889 1.1 dyoung KASSERT(vtw_lookup_hash_v4
1890 1.1 dyoung (ctl
1891 1.1 dyoung , inp->inp_faddr.s_addr, inp->inp_fport
1892 1.1 dyoung , inp->inp_laddr.s_addr, inp->inp_lport
1893 1.1 dyoung , 1));
1894 1.1 dyoung }
1895 1.1 dyoung /* Immediate port iterator functionality check: not wild
1896 1.1 dyoung */
1897 1.1 dyoung if (enable & 8) {
1898 1.1 dyoung struct tcp_ports_iterator *it;
1899 1.1 dyoung struct vestigial_inpcb res;
1900 1.1 dyoung int cnt = 0;
1901 1.1 dyoung
1902 1.1 dyoung it = tcp_init_ports_v4(inp->inp_laddr
1903 1.1 dyoung , inp->inp_lport, 0);
1904 1.1 dyoung
1905 1.1 dyoung while (tcp_next_port_v4(it, &res)) {
1906 1.1 dyoung ++cnt;
1907 1.1 dyoung }
1908 1.1 dyoung KASSERT(cnt);
1909 1.1 dyoung }
1910 1.1 dyoung /* Immediate port iterator functionality check: wild
1911 1.1 dyoung */
1912 1.1 dyoung if (enable & 16) {
1913 1.1 dyoung struct tcp_ports_iterator *it;
1914 1.1 dyoung struct vestigial_inpcb res;
1915 1.1 dyoung struct in_addr any;
1916 1.1 dyoung int cnt = 0;
1917 1.1 dyoung
1918 1.1 dyoung any.s_addr = htonl(INADDR_ANY);
1919 1.1 dyoung
1920 1.1 dyoung it = tcp_init_ports_v4(any, inp->inp_lport, 1);
1921 1.1 dyoung
1922 1.1 dyoung while (tcp_next_port_v4(it, &res)) {
1923 1.1 dyoung ++cnt;
1924 1.1 dyoung }
1925 1.1 dyoung KASSERT(cnt);
1926 1.1 dyoung }
1927 1.1 dyoung #endif /* VTW_DEBUG */
1928 1.1 dyoung break;
1929 1.1 dyoung }
1930 1.1 dyoung
1931 1.1 dyoung case AF_INET6: {
1932 1.1 dyoung struct in6pcb *inp = tp->t_in6pcb;
1933 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
1934 1.1 dyoung
1935 1.1 dyoung v6->faddr = inp->in6p_faddr;
1936 1.1 dyoung v6->laddr = inp->in6p_laddr;
1937 1.1 dyoung v6->fport = inp->in6p_fport;
1938 1.1 dyoung v6->lport = inp->in6p_lport;
1939 1.1 dyoung
1940 1.1 dyoung vtw->reuse_port = !!(inp->in6p_socket->so_options
1941 1.1 dyoung & SO_REUSEPORT);
1942 1.1 dyoung vtw->reuse_addr = !!(inp->in6p_socket->so_options
1943 1.1 dyoung & SO_REUSEADDR);
1944 1.1 dyoung vtw->v6only = !!(inp->in6p_flags
1945 1.1 dyoung & IN6P_IPV6_V6ONLY);
1946 1.1 dyoung vtw->uid = inp->in6p_socket->so_uidinfo->ui_uid;
1947 1.1 dyoung
1948 1.1 dyoung vtw_inshash_v6(ctl, vtw);
1949 1.1 dyoung #ifdef VTW_DEBUG
1950 1.1 dyoung /* Immediate lookup (connected and port) to
1951 1.1 dyoung * ensure at least that works!
1952 1.1 dyoung */
1953 1.1 dyoung if (enable & 4) {
1954 1.1 dyoung KASSERT(vtw_lookup_hash_v6(ctl
1955 1.1 dyoung , &inp->in6p_faddr, inp->in6p_fport
1956 1.1 dyoung , &inp->in6p_laddr, inp->in6p_lport
1957 1.1 dyoung , 0)
1958 1.1 dyoung == vtw);
1959 1.1 dyoung KASSERT(vtw_lookup_hash_v6
1960 1.1 dyoung (ctl
1961 1.1 dyoung , &inp->in6p_faddr, inp->in6p_fport
1962 1.1 dyoung , &inp->in6p_laddr, inp->in6p_lport
1963 1.1 dyoung , 1));
1964 1.1 dyoung }
1965 1.1 dyoung /* Immediate port iterator functionality check: not wild
1966 1.1 dyoung */
1967 1.1 dyoung if (enable & 8) {
1968 1.1 dyoung struct tcp_ports_iterator *it;
1969 1.1 dyoung struct vestigial_inpcb res;
1970 1.1 dyoung int cnt = 0;
1971 1.1 dyoung
1972 1.1 dyoung it = tcp_init_ports_v6(&inp->in6p_laddr
1973 1.1 dyoung , inp->in6p_lport, 0);
1974 1.1 dyoung
1975 1.1 dyoung while (tcp_next_port_v6(it, &res)) {
1976 1.1 dyoung ++cnt;
1977 1.1 dyoung }
1978 1.1 dyoung KASSERT(cnt);
1979 1.1 dyoung }
1980 1.1 dyoung /* Immediate port iterator functionality check: wild
1981 1.1 dyoung */
1982 1.1 dyoung if (enable & 16) {
1983 1.1 dyoung struct tcp_ports_iterator *it;
1984 1.1 dyoung struct vestigial_inpcb res;
1985 1.1 dyoung static struct in6_addr any = IN6ADDR_ANY_INIT;
1986 1.1 dyoung int cnt = 0;
1987 1.1 dyoung
1988 1.1 dyoung it = tcp_init_ports_v6(&any
1989 1.1 dyoung , inp->in6p_lport, 1);
1990 1.1 dyoung
1991 1.1 dyoung while (tcp_next_port_v6(it, &res)) {
1992 1.1 dyoung ++cnt;
1993 1.1 dyoung }
1994 1.1 dyoung KASSERT(cnt);
1995 1.1 dyoung }
1996 1.1 dyoung #endif /* VTW_DEBUG */
1997 1.1 dyoung break;
1998 1.1 dyoung }
1999 1.1 dyoung }
2000 1.1 dyoung
2001 1.1 dyoung tcp_canceltimers(tp);
2002 1.1 dyoung tp = tcp_close(tp);
2003 1.1 dyoung KASSERT(!tp);
2004 1.1 dyoung
2005 1.1 dyoung return 1;
2006 1.1 dyoung }
2007 1.1 dyoung
2008 1.1 dyoung return 0;
2009 1.1 dyoung }
2010 1.1 dyoung
2011 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2012 1.1 dyoung */
2013 1.1 dyoung static void
2014 1.1 dyoung vtw_restart_v4(vestigial_inpcb_t *vp)
2015 1.1 dyoung {
2016 1.1 dyoung vtw_v4_t copy = *(vtw_v4_t*)vp->vtw;
2017 1.1 dyoung vtw_t *vtw;
2018 1.1 dyoung vtw_t *cp = ©.common;
2019 1.1 dyoung vtw_ctl_t *ctl;
2020 1.1 dyoung
2021 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
2022 1.1 dyoung
2023 1.1 dyoung db_trace(KTR_VTW
2024 1.1 dyoung , (vp->vtw, "vtw: restart %A:%P %A:%P"
2025 1.1 dyoung , vp->faddr.v4.s_addr, vp->fport
2026 1.1 dyoung , vp->laddr.v4.s_addr, vp->lport));
2027 1.1 dyoung
2028 1.1 dyoung /* Class might have changed, so have a squiz.
2029 1.1 dyoung */
2030 1.1 dyoung ctl = vtw_control(AF_INET, class_to_msl(cp->msl_class));
2031 1.1 dyoung vtw = vtw_alloc(ctl);
2032 1.1 dyoung
2033 1.1 dyoung if (vtw) {
2034 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
2035 1.1 dyoung
2036 1.1 dyoung /* Safe now to unhash the old entry
2037 1.1 dyoung */
2038 1.1 dyoung vtw_del(vp->ctl, vp->vtw);
2039 1.1 dyoung
2040 1.1 dyoung vtw->snd_nxt = cp->snd_nxt;
2041 1.1 dyoung vtw->rcv_nxt = cp->rcv_nxt;
2042 1.1 dyoung
2043 1.1 dyoung v4->faddr = copy.faddr;
2044 1.1 dyoung v4->laddr = copy.laddr;
2045 1.1 dyoung v4->fport = copy.fport;
2046 1.1 dyoung v4->lport = copy.lport;
2047 1.1 dyoung
2048 1.1 dyoung vtw->reuse_port = cp->reuse_port;
2049 1.1 dyoung vtw->reuse_addr = cp->reuse_addr;
2050 1.1 dyoung vtw->v6only = 0;
2051 1.1 dyoung vtw->uid = cp->uid;
2052 1.1 dyoung
2053 1.1 dyoung vtw_inshash_v4(ctl, vtw);
2054 1.1 dyoung }
2055 1.1 dyoung
2056 1.1 dyoung vp->valid = 0;
2057 1.1 dyoung }
2058 1.1 dyoung
2059 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2060 1.1 dyoung */
2061 1.1 dyoung static void
2062 1.1 dyoung vtw_restart_v6(vestigial_inpcb_t *vp)
2063 1.1 dyoung {
2064 1.1 dyoung vtw_v6_t copy = *(vtw_v6_t*)vp->vtw;
2065 1.1 dyoung vtw_t *vtw;
2066 1.1 dyoung vtw_t *cp = ©.common;
2067 1.1 dyoung vtw_ctl_t *ctl;
2068 1.1 dyoung
2069 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
2070 1.1 dyoung
2071 1.1 dyoung db_trace(KTR_VTW
2072 1.1 dyoung , (vp->vtw, "vtw: restart %6A:%P %6A:%P"
2073 1.1 dyoung , db_store(&vp->faddr.v6, sizeof (vp->faddr.v6))
2074 1.1 dyoung , vp->fport
2075 1.1 dyoung , db_store(&vp->laddr.v6, sizeof (vp->laddr.v6))
2076 1.1 dyoung , vp->lport));
2077 1.1 dyoung
2078 1.1 dyoung /* Class might have changed, so have a squiz.
2079 1.1 dyoung */
2080 1.1 dyoung ctl = vtw_control(AF_INET6, class_to_msl(cp->msl_class));
2081 1.1 dyoung vtw = vtw_alloc(ctl);
2082 1.1 dyoung
2083 1.1 dyoung if (vtw) {
2084 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
2085 1.1 dyoung
2086 1.1 dyoung /* Safe now to unhash the old entry
2087 1.1 dyoung */
2088 1.1 dyoung vtw_del(vp->ctl, vp->vtw);
2089 1.1 dyoung
2090 1.1 dyoung vtw->snd_nxt = cp->snd_nxt;
2091 1.1 dyoung vtw->rcv_nxt = cp->rcv_nxt;
2092 1.1 dyoung
2093 1.1 dyoung v6->faddr = copy.faddr;
2094 1.1 dyoung v6->laddr = copy.laddr;
2095 1.1 dyoung v6->fport = copy.fport;
2096 1.1 dyoung v6->lport = copy.lport;
2097 1.1 dyoung
2098 1.1 dyoung vtw->reuse_port = cp->reuse_port;
2099 1.1 dyoung vtw->reuse_addr = cp->reuse_addr;
2100 1.1 dyoung vtw->v6only = cp->v6only;
2101 1.1 dyoung vtw->uid = cp->uid;
2102 1.1 dyoung
2103 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2104 1.1 dyoung }
2105 1.1 dyoung
2106 1.1 dyoung vp->valid = 0;
2107 1.1 dyoung }
2108 1.1 dyoung
2109 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2110 1.1 dyoung */
2111 1.1 dyoung void
2112 1.1 dyoung vtw_restart(vestigial_inpcb_t *vp)
2113 1.1 dyoung {
2114 1.1 dyoung if (!vp || !vp->valid)
2115 1.1 dyoung return;
2116 1.1 dyoung
2117 1.1 dyoung if (vp->v4)
2118 1.1 dyoung vtw_restart_v4(vp);
2119 1.1 dyoung else
2120 1.1 dyoung vtw_restart_v6(vp);
2121 1.1 dyoung }
2122 1.1 dyoung
2123 1.1 dyoung int
2124 1.1 dyoung vtw_earlyinit(void)
2125 1.1 dyoung {
2126 1.1 dyoung int rc;
2127 1.1 dyoung
2128 1.1 dyoung if (!tcp_vtw_was_enabled) {
2129 1.1 dyoung int i;
2130 1.1 dyoung
2131 1.1 dyoung /* This guarantees is timer ticks until we no longer need them.
2132 1.1 dyoung */
2133 1.1 dyoung tcp_vtw_was_enabled = 1;
2134 1.1 dyoung
2135 1.1 dyoung callout_init(&vtw_cs, 0);
2136 1.1 dyoung callout_setfunc(&vtw_cs, vtw_tick, 0);
2137 1.1 dyoung callout_schedule(&vtw_cs, hz / 5);
2138 1.1 dyoung
2139 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2140 1.1 dyoung vtw_tcpv4[i].is_v4 = 1;
2141 1.1 dyoung vtw_tcpv6[i].is_v6 = 1;
2142 1.1 dyoung }
2143 1.1 dyoung
2144 1.1 dyoung tcbtable.vestige = &tcp_hooks;
2145 1.1 dyoung }
2146 1.1 dyoung
2147 1.1 dyoung if ((rc = vtw_control_init(AF_INET)) != 0 ||
2148 1.1 dyoung (rc = vtw_control_init(AF_INET6)) != 0)
2149 1.1 dyoung return rc;
2150 1.1 dyoung
2151 1.1 dyoung return 0;
2152 1.1 dyoung }
2153 1.1 dyoung
2154 1.1 dyoung #ifdef VTW_DEBUG
2155 1.1 dyoung #include <sys/syscallargs.h>
2156 1.1 dyoung #include <sys/sysctl.h>
2157 1.1 dyoung
2158 1.1 dyoung /*!\brief add lalp, fafp entries for debug
2159 1.1 dyoung */
2160 1.1 dyoung int
2161 1.1 dyoung vtw_debug_add(int af, sin_either_t *la, sin_either_t *fa, int msl, int class)
2162 1.1 dyoung {
2163 1.1 dyoung vtw_ctl_t *ctl;
2164 1.1 dyoung vtw_t *vtw;
2165 1.1 dyoung
2166 1.1 dyoung ctl = vtw_control(af, msl ? msl : class_to_msl(class));
2167 1.1 dyoung if (!ctl)
2168 1.1 dyoung return 0;
2169 1.1 dyoung
2170 1.1 dyoung vtw = vtw_alloc(ctl);
2171 1.1 dyoung
2172 1.1 dyoung if (vtw) {
2173 1.1 dyoung vtw->snd_nxt = 0;
2174 1.1 dyoung vtw->rcv_nxt = 0;
2175 1.1 dyoung
2176 1.1 dyoung switch (af) {
2177 1.1 dyoung case AF_INET: {
2178 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
2179 1.1 dyoung
2180 1.1 dyoung v4->faddr = fa->sin_addr.v4.s_addr;
2181 1.1 dyoung v4->laddr = la->sin_addr.v4.s_addr;
2182 1.1 dyoung v4->fport = fa->sin_port;
2183 1.1 dyoung v4->lport = la->sin_port;
2184 1.1 dyoung
2185 1.1 dyoung vtw->reuse_port = 1;
2186 1.1 dyoung vtw->reuse_addr = 1;
2187 1.1 dyoung vtw->v6only = 0;
2188 1.1 dyoung vtw->uid = 0;
2189 1.1 dyoung
2190 1.1 dyoung vtw_inshash_v4(ctl, vtw);
2191 1.1 dyoung break;
2192 1.1 dyoung }
2193 1.1 dyoung
2194 1.1 dyoung case AF_INET6: {
2195 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
2196 1.1 dyoung
2197 1.1 dyoung v6->faddr = fa->sin_addr.v6;
2198 1.1 dyoung v6->laddr = la->sin_addr.v6;
2199 1.1 dyoung
2200 1.1 dyoung v6->fport = fa->sin_port;
2201 1.1 dyoung v6->lport = la->sin_port;
2202 1.1 dyoung
2203 1.1 dyoung vtw->reuse_port = 1;
2204 1.1 dyoung vtw->reuse_addr = 1;
2205 1.1 dyoung vtw->v6only = 0;
2206 1.1 dyoung vtw->uid = 0;
2207 1.1 dyoung
2208 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2209 1.1 dyoung break;
2210 1.1 dyoung }
2211 1.1 dyoung
2212 1.1 dyoung default:
2213 1.1 dyoung break;
2214 1.1 dyoung }
2215 1.1 dyoung
2216 1.1 dyoung return 1;
2217 1.1 dyoung }
2218 1.1 dyoung
2219 1.1 dyoung return 0;
2220 1.1 dyoung }
2221 1.1 dyoung
2222 1.1 dyoung static int vtw_syscall = 0;
2223 1.1 dyoung
2224 1.1 dyoung static int
2225 1.1 dyoung vtw_debug_process(vtw_sysargs_t *ap)
2226 1.1 dyoung {
2227 1.1 dyoung struct vestigial_inpcb vestige;
2228 1.1 dyoung int rc = 0;
2229 1.1 dyoung
2230 1.1 dyoung mutex_enter(softnet_lock);
2231 1.1 dyoung
2232 1.1 dyoung switch (ap->op) {
2233 1.1 dyoung case 0: // insert
2234 1.1 dyoung vtw_debug_add(ap->la.sin_family
2235 1.1 dyoung , &ap->la
2236 1.1 dyoung , &ap->fa
2237 1.1 dyoung , TCPTV_MSL
2238 1.1 dyoung , 0);
2239 1.1 dyoung break;
2240 1.1 dyoung
2241 1.1 dyoung case 1: // lookup
2242 1.1 dyoung case 2: // restart
2243 1.1 dyoung switch (ap->la.sin_family) {
2244 1.1 dyoung case AF_INET:
2245 1.1 dyoung if (tcp_lookup_v4(ap->fa.sin_addr.v4, ap->fa.sin_port,
2246 1.1 dyoung ap->la.sin_addr.v4, ap->la.sin_port,
2247 1.1 dyoung &vestige)) {
2248 1.1 dyoung if (ap->op == 2) {
2249 1.1 dyoung vtw_restart(&vestige);
2250 1.1 dyoung }
2251 1.1 dyoung rc = 0;
2252 1.1 dyoung } else
2253 1.1 dyoung rc = ESRCH;
2254 1.1 dyoung break;
2255 1.1 dyoung
2256 1.1 dyoung case AF_INET6:
2257 1.1 dyoung if (tcp_lookup_v6(&ap->fa.sin_addr.v6, ap->fa.sin_port,
2258 1.1 dyoung &ap->la.sin_addr.v6, ap->la.sin_port,
2259 1.1 dyoung &vestige)) {
2260 1.1 dyoung if (ap->op == 2) {
2261 1.1 dyoung vtw_restart(&vestige);
2262 1.1 dyoung }
2263 1.1 dyoung rc = 0;
2264 1.1 dyoung } else
2265 1.1 dyoung rc = ESRCH;
2266 1.1 dyoung break;
2267 1.1 dyoung default:
2268 1.1 dyoung rc = EINVAL;
2269 1.1 dyoung }
2270 1.1 dyoung break;
2271 1.1 dyoung
2272 1.1 dyoung default:
2273 1.1 dyoung rc = EINVAL;
2274 1.1 dyoung }
2275 1.1 dyoung
2276 1.1 dyoung mutex_exit(softnet_lock);
2277 1.1 dyoung return rc;
2278 1.1 dyoung }
2279 1.1 dyoung
2280 1.1 dyoung struct sys_vtw_args {
2281 1.1 dyoung syscallarg(const vtw_sysargs_t *) req;
2282 1.1 dyoung syscallarg(size_t) len;
2283 1.1 dyoung };
2284 1.1 dyoung
2285 1.1 dyoung static int
2286 1.1 dyoung vtw_sys(struct lwp *l, const void *_, register_t *retval)
2287 1.1 dyoung {
2288 1.1 dyoung const struct sys_vtw_args *uap = _;
2289 1.1 dyoung void *buf;
2290 1.1 dyoung int rc;
2291 1.1 dyoung size_t len = SCARG(uap, len);
2292 1.1 dyoung
2293 1.1 dyoung if (len != sizeof (vtw_sysargs_t))
2294 1.1 dyoung return EINVAL;
2295 1.1 dyoung
2296 1.1 dyoung buf = kmem_alloc(len, KM_SLEEP);
2297 1.1 dyoung if (!buf)
2298 1.1 dyoung return ENOMEM;
2299 1.1 dyoung
2300 1.1 dyoung rc = copyin(SCARG(uap, req), buf, len);
2301 1.1 dyoung if (!rc) {
2302 1.1 dyoung rc = vtw_debug_process(buf);
2303 1.1 dyoung }
2304 1.1 dyoung kmem_free(buf, len);
2305 1.1 dyoung
2306 1.1 dyoung return rc;
2307 1.1 dyoung }
2308 1.1 dyoung
2309 1.1 dyoung static void
2310 1.1 dyoung vtw_sanity_check(void)
2311 1.1 dyoung {
2312 1.1 dyoung vtw_ctl_t *ctl;
2313 1.1 dyoung vtw_t *vtw;
2314 1.1 dyoung int i;
2315 1.1 dyoung int n;
2316 1.1 dyoung
2317 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2318 1.1 dyoung ctl = &vtw_tcpv4[i];
2319 1.1 dyoung
2320 1.1 dyoung if (!ctl->base.v || ctl->nalloc)
2321 1.1 dyoung continue;
2322 1.1 dyoung
2323 1.1 dyoung for (n = 0, vtw = ctl->base.v; ; ) {
2324 1.1 dyoung ++n;
2325 1.1 dyoung vtw = vtw_next(ctl, vtw);
2326 1.1 dyoung if (vtw == ctl->base.v)
2327 1.1 dyoung break;
2328 1.1 dyoung }
2329 1.1 dyoung db_trace(KTR_VTW
2330 1.1 dyoung , (ctl, "sanity: class %x n %x nfree %x"
2331 1.1 dyoung , i, n, ctl->nfree));
2332 1.1 dyoung
2333 1.1 dyoung KASSERT(n == ctl->nfree);
2334 1.1 dyoung }
2335 1.1 dyoung
2336 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2337 1.1 dyoung ctl = &vtw_tcpv6[i];
2338 1.1 dyoung
2339 1.1 dyoung if (!ctl->base.v || ctl->nalloc)
2340 1.1 dyoung continue;
2341 1.1 dyoung
2342 1.1 dyoung for (n = 0, vtw = ctl->base.v; ; ) {
2343 1.1 dyoung ++n;
2344 1.1 dyoung vtw = vtw_next(ctl, vtw);
2345 1.1 dyoung if (vtw == ctl->base.v)
2346 1.1 dyoung break;
2347 1.1 dyoung }
2348 1.1 dyoung db_trace(KTR_VTW
2349 1.1 dyoung , (ctl, "sanity: class %x n %x nfree %x"
2350 1.1 dyoung , i, n, ctl->nfree));
2351 1.1 dyoung KASSERT(n == ctl->nfree);
2352 1.1 dyoung }
2353 1.1 dyoung }
2354 1.1 dyoung
2355 1.1 dyoung /*!\brief Initialise debug support.
2356 1.1 dyoung */
2357 1.1 dyoung static void
2358 1.1 dyoung vtw_debug_init(void)
2359 1.1 dyoung {
2360 1.1 dyoung int i;
2361 1.1 dyoung
2362 1.1 dyoung vtw_sanity_check();
2363 1.1 dyoung
2364 1.1 dyoung if (vtw_syscall)
2365 1.1 dyoung return;
2366 1.1 dyoung
2367 1.1 dyoung for (i = 511; i; --i) {
2368 1.1 dyoung if (sysent[i].sy_call == sys_nosys) {
2369 1.1 dyoung sysent[i].sy_call = vtw_sys;
2370 1.1 dyoung sysent[i].sy_narg = 2;
2371 1.1 dyoung sysent[i].sy_argsize = sizeof (struct sys_vtw_args);
2372 1.1 dyoung sysent[i].sy_flags = 0;
2373 1.1 dyoung
2374 1.1 dyoung vtw_syscall = i;
2375 1.1 dyoung break;
2376 1.1 dyoung }
2377 1.1 dyoung }
2378 1.1 dyoung if (i) {
2379 1.1 dyoung const struct sysctlnode *node;
2380 1.1 dyoung uint32_t flags;
2381 1.1 dyoung
2382 1.1 dyoung flags = sysctl_root.sysctl_flags;
2383 1.1 dyoung
2384 1.1 dyoung sysctl_root.sysctl_flags |= CTLFLAG_READWRITE;
2385 1.1 dyoung sysctl_root.sysctl_flags &= ~CTLFLAG_PERMANENT;
2386 1.1 dyoung
2387 1.1 dyoung sysctl_createv(0, 0, 0, &node,
2388 1.1 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE,
2389 1.1 dyoung "koff",
2390 1.1 dyoung SYSCTL_DESCR("Kernel Obscure Feature Finder"),
2391 1.1 dyoung 0, 0, 0, 0, CTL_CREATE, CTL_EOL);
2392 1.1 dyoung
2393 1.1 dyoung if (!node) {
2394 1.1 dyoung sysctl_createv(0, 0, 0, &node,
2395 1.1 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE,
2396 1.1 dyoung "koffka",
2397 1.1 dyoung SYSCTL_DESCR("The Real(tm) Kernel"
2398 1.1 dyoung " Obscure Feature Finder"),
2399 1.1 dyoung 0, 0, 0, 0, CTL_CREATE, CTL_EOL);
2400 1.1 dyoung }
2401 1.1 dyoung if (node) {
2402 1.1 dyoung sysctl_createv(0, 0, 0, 0,
2403 1.1 dyoung CTLFLAG_PERMANENT|CTLFLAG_READONLY,
2404 1.1 dyoung CTLTYPE_INT, "vtw_debug_syscall",
2405 1.1 dyoung SYSCTL_DESCR("vtw debug"
2406 1.1 dyoung " system call number"),
2407 1.1 dyoung 0, 0, &vtw_syscall, 0, node->sysctl_num,
2408 1.1 dyoung CTL_CREATE, CTL_EOL);
2409 1.1 dyoung }
2410 1.1 dyoung sysctl_root.sysctl_flags = flags;
2411 1.1 dyoung }
2412 1.1 dyoung }
2413 1.1 dyoung #else /* !VTW_DEBUG */
2414 1.1 dyoung static void
2415 1.1 dyoung vtw_debug_init(void)
2416 1.1 dyoung {
2417 1.1 dyoung return;
2418 1.1 dyoung }
2419 1.1 dyoung #endif /* !VTW_DEBUG */
2420 1.1 dyoung
2421 1.1 dyoung static void
2422 1.1 dyoung k_vtw(int c, char **v)
2423 1.1 dyoung {
2424 1.1 dyoung return;
2425 1.1 dyoung }
2426