tcp_vtw.c revision 1.4 1 1.1 dyoung /*
2 1.1 dyoung * Copyright (c) 2011 The NetBSD Foundation, Inc.
3 1.1 dyoung * All rights reserved.
4 1.1 dyoung *
5 1.1 dyoung * This code is derived from software contributed to The NetBSD Foundation
6 1.1 dyoung * by Coyote Point Systems, Inc.
7 1.1 dyoung *
8 1.1 dyoung * Redistribution and use in source and binary forms, with or without
9 1.1 dyoung * modification, are permitted provided that the following conditions
10 1.1 dyoung * are met:
11 1.1 dyoung * 1. Redistributions of source code must retain the above copyright
12 1.1 dyoung * notice, this list of conditions and the following disclaimer.
13 1.1 dyoung * 2. Redistributions in binary form must reproduce the above copyright
14 1.1 dyoung * notice, this list of conditions and the following disclaimer in the
15 1.1 dyoung * documentation and/or other materials provided with the distribution.
16 1.1 dyoung *
17 1.1 dyoung * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 1.1 dyoung * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 1.1 dyoung * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 1.1 dyoung * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 1.1 dyoung * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 1.1 dyoung * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 1.1 dyoung * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 1.1 dyoung * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 1.1 dyoung * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 1.1 dyoung * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 1.1 dyoung * POSSIBILITY OF SUCH DAMAGE.
28 1.1 dyoung */
29 1.1 dyoung #include <sys/cdefs.h>
30 1.1 dyoung
31 1.1 dyoung #include "opt_ddb.h"
32 1.1 dyoung #include "opt_inet.h"
33 1.1 dyoung #include "opt_ipsec.h"
34 1.1 dyoung #include "opt_inet_csum.h"
35 1.1 dyoung #include "opt_tcp_debug.h"
36 1.1 dyoung
37 1.1 dyoung #include <sys/param.h>
38 1.1 dyoung #include <sys/systm.h>
39 1.1 dyoung #include <sys/malloc.h>
40 1.1 dyoung #include <sys/kmem.h>
41 1.1 dyoung #include <sys/mbuf.h>
42 1.1 dyoung #include <sys/protosw.h>
43 1.1 dyoung #include <sys/socket.h>
44 1.1 dyoung #include <sys/socketvar.h>
45 1.1 dyoung #include <sys/errno.h>
46 1.1 dyoung #include <sys/syslog.h>
47 1.1 dyoung #include <sys/pool.h>
48 1.1 dyoung #include <sys/domain.h>
49 1.1 dyoung #include <sys/kernel.h>
50 1.1 dyoung #include <net/if.h>
51 1.1 dyoung #include <net/route.h>
52 1.1 dyoung #include <net/if_types.h>
53 1.1 dyoung
54 1.1 dyoung #include <netinet/in.h>
55 1.1 dyoung #include <netinet/in_systm.h>
56 1.1 dyoung #include <netinet/ip.h>
57 1.1 dyoung #include <netinet/in_pcb.h>
58 1.1 dyoung #include <netinet/in_var.h>
59 1.1 dyoung #include <netinet/ip_var.h>
60 1.1 dyoung #include <netinet/in_offload.h>
61 1.1 dyoung #include <netinet/ip6.h>
62 1.1 dyoung #include <netinet6/ip6_var.h>
63 1.1 dyoung #include <netinet6/in6_pcb.h>
64 1.1 dyoung #include <netinet6/ip6_var.h>
65 1.1 dyoung #include <netinet6/in6_var.h>
66 1.1 dyoung #include <netinet/icmp6.h>
67 1.1 dyoung #include <netinet6/nd6.h>
68 1.1 dyoung
69 1.1 dyoung #include <netinet/tcp.h>
70 1.1 dyoung #include <netinet/tcp_fsm.h>
71 1.1 dyoung #include <netinet/tcp_seq.h>
72 1.1 dyoung #include <netinet/tcp_timer.h>
73 1.1 dyoung #include <netinet/tcp_var.h>
74 1.1 dyoung #include <netinet/tcp_private.h>
75 1.1 dyoung #include <netinet/tcpip.h>
76 1.1 dyoung
77 1.1 dyoung #include <machine/stdarg.h>
78 1.1 dyoung #include <netinet/tcp_vtw.h>
79 1.1 dyoung
80 1.4 dholland __KERNEL_RCSID(0, "$NetBSD: tcp_vtw.c,v 1.4 2011/05/17 05:42:40 dholland Exp $");
81 1.1 dyoung
82 1.1 dyoung #define db_trace(__a, __b) do { } while (/*CONSTCOND*/0)
83 1.1 dyoung
84 1.1 dyoung static void vtw_debug_init(void);
85 1.1 dyoung
86 1.1 dyoung fatp_ctl_t fat_tcpv4;
87 1.1 dyoung fatp_ctl_t fat_tcpv6;
88 1.1 dyoung vtw_ctl_t vtw_tcpv4[VTW_NCLASS];
89 1.1 dyoung vtw_ctl_t vtw_tcpv6[VTW_NCLASS];
90 1.1 dyoung vtw_stats_t vtw_stats;
91 1.1 dyoung
92 1.1 dyoung /* We provide state for the lookup_ports iterator.
93 1.1 dyoung * As currently we are netlock-protected, there is one.
94 1.1 dyoung * If we were finer-grain, we would have one per CPU.
95 1.1 dyoung * I do not want to be in the business of alloc/free.
96 1.1 dyoung * The best alternate would be allocate on the caller's
97 1.1 dyoung * stack, but that would require them to know the struct,
98 1.1 dyoung * or at least the size.
99 1.1 dyoung * See how she goes.
100 1.1 dyoung */
101 1.1 dyoung struct tcp_ports_iterator {
102 1.1 dyoung union {
103 1.1 dyoung struct in_addr v4;
104 1.1 dyoung struct in6_addr v6;
105 1.1 dyoung } addr;
106 1.1 dyoung u_int port;
107 1.1 dyoung
108 1.1 dyoung uint32_t wild : 1;
109 1.1 dyoung
110 1.1 dyoung vtw_ctl_t *ctl;
111 1.1 dyoung fatp_t *fp;
112 1.1 dyoung
113 1.1 dyoung uint16_t slot_idx;
114 1.1 dyoung uint16_t ctl_idx;
115 1.1 dyoung };
116 1.1 dyoung
117 1.1 dyoung static struct tcp_ports_iterator tcp_ports_iterator_v4;
118 1.1 dyoung static struct tcp_ports_iterator tcp_ports_iterator_v6;
119 1.1 dyoung
120 1.1 dyoung static int vtw_age(vtw_ctl_t *, struct timeval *);
121 1.1 dyoung
122 1.1 dyoung /*!\brief allocate a fat pointer from a collection.
123 1.1 dyoung */
124 1.1 dyoung static fatp_t *
125 1.1 dyoung fatp_alloc(fatp_ctl_t *fat)
126 1.1 dyoung {
127 1.1 dyoung fatp_t *fp = 0;
128 1.1 dyoung
129 1.1 dyoung if (fat->nfree) {
130 1.1 dyoung fp = fat->free;
131 1.1 dyoung if (fp) {
132 1.1 dyoung fat->free = fatp_next(fat, fp);
133 1.1 dyoung --fat->nfree;
134 1.1 dyoung ++fat->nalloc;
135 1.1 dyoung fp->nxt = 0;
136 1.1 dyoung
137 1.1 dyoung KASSERT(!fp->inuse);
138 1.1 dyoung }
139 1.1 dyoung }
140 1.1 dyoung
141 1.1 dyoung return fp;
142 1.1 dyoung }
143 1.1 dyoung
144 1.1 dyoung /*!\brief free a fat pointer.
145 1.1 dyoung */
146 1.1 dyoung static void
147 1.1 dyoung fatp_free(fatp_ctl_t *fat, fatp_t *fp)
148 1.1 dyoung {
149 1.1 dyoung if (fp) {
150 1.1 dyoung KASSERT(!fp->inuse);
151 1.1 dyoung KASSERT(!fp->nxt);
152 1.1 dyoung
153 1.1 dyoung fp->nxt = fatp_index(fat, fat->free);
154 1.1 dyoung fat->free = fp;
155 1.1 dyoung
156 1.1 dyoung ++fat->nfree;
157 1.1 dyoung --fat->nalloc;
158 1.1 dyoung }
159 1.1 dyoung }
160 1.1 dyoung
161 1.1 dyoung /*!\brief initialise a collection of fat pointers.
162 1.1 dyoung *
163 1.1 dyoung *\param n # hash buckets
164 1.1 dyoung *\param m total # fat pointers to allocate
165 1.1 dyoung *
166 1.1 dyoung * We allocate 2x as much, as we have two hashes: full and lport only.
167 1.1 dyoung */
168 1.1 dyoung static void
169 1.1 dyoung fatp_init(fatp_ctl_t *fat, uint32_t n, uint32_t m)
170 1.1 dyoung {
171 1.1 dyoung fatp_t *fp;
172 1.1 dyoung
173 1.1 dyoung KASSERT(n <= FATP_MAX / 2);
174 1.1 dyoung
175 1.1 dyoung fat->hash = kmem_alloc(2*m * sizeof (fatp_t *), KM_SLEEP);
176 1.1 dyoung fat->base = kmem_alloc(2*n * sizeof (fatp_t), KM_SLEEP);
177 1.1 dyoung
178 1.1 dyoung if (!fat->base) {
179 1.1 dyoung if (fat->hash)
180 1.1 dyoung kmem_free(fat->hash, 2*m * sizeof (fatp_t *));
181 1.1 dyoung
182 1.1 dyoung bzero(fat, sizeof (*fat));
183 1.1 dyoung return;
184 1.1 dyoung }
185 1.1 dyoung
186 1.1 dyoung fat->port = &fat->hash[m];
187 1.1 dyoung
188 1.1 dyoung fat->mask = m - 1; // ASSERT is power of 2 (m)
189 1.1 dyoung fat->lim = fat->base + 2*n - 1;
190 1.1 dyoung fat->nfree = 0;
191 1.1 dyoung fat->nalloc = 2*n;
192 1.1 dyoung
193 1.1 dyoung bzero(fat->hash, 2*m * sizeof (fatp_t *));
194 1.1 dyoung bzero(fat->base, 2*n * sizeof (fatp_t));
195 1.1 dyoung
196 1.1 dyoung /* Initialise the free list.
197 1.1 dyoung */
198 1.1 dyoung for (fp = fat->lim; fp >= fat->base; --fp) {
199 1.1 dyoung fatp_free(fat, fp);
200 1.1 dyoung }
201 1.1 dyoung }
202 1.1 dyoung
203 1.1 dyoung /*
204 1.1 dyoung * The `xtra' is XORed into the tag stored.
205 1.1 dyoung */
206 1.1 dyoung static uint32_t fatp_xtra[] = {
207 1.1 dyoung 0x11111111,0x22222222,0x33333333,0x44444444,
208 1.1 dyoung 0x55555555,0x66666666,0x77777777,0x88888888,
209 1.1 dyoung 0x12121212,0x21212121,0x34343434,0x43434343,
210 1.1 dyoung 0x56565656,0x65656565,0x78787878,0x87878787,
211 1.1 dyoung 0x11221122,0x22112211,0x33443344,0x44334433,
212 1.1 dyoung 0x55665566,0x66556655,0x77887788,0x88778877,
213 1.1 dyoung 0x11112222,0x22221111,0x33334444,0x44443333,
214 1.1 dyoung 0x55556666,0x66665555,0x77778888,0x88887777,
215 1.1 dyoung };
216 1.1 dyoung
217 1.1 dyoung /*!\brief turn a {fatp_t*,slot} into an integral key.
218 1.1 dyoung *
219 1.1 dyoung * The key can be used to obtain the fatp_t, and the slot,
220 1.1 dyoung * as it directly encodes them.
221 1.1 dyoung */
222 1.1 dyoung static inline uint32_t
223 1.1 dyoung fatp_key(fatp_ctl_t *fat, fatp_t *fp, uint32_t slot)
224 1.1 dyoung {
225 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
226 1.1 dyoung CACHE_LINE_SIZE == 64 ||
227 1.1 dyoung CACHE_LINE_SIZE == 128);
228 1.1 dyoung
229 1.1 dyoung switch (fatp_ntags()) {
230 1.1 dyoung case 7:
231 1.1 dyoung return (fatp_index(fat, fp) << 3) | slot;
232 1.1 dyoung case 15:
233 1.1 dyoung return (fatp_index(fat, fp) << 4) | slot;
234 1.1 dyoung case 31:
235 1.1 dyoung return (fatp_index(fat, fp) << 5) | slot;
236 1.1 dyoung default:
237 1.1 dyoung KASSERT(0 && "no support, for no good reason");
238 1.1 dyoung return ~0;
239 1.1 dyoung }
240 1.1 dyoung }
241 1.1 dyoung
242 1.1 dyoung static inline uint32_t
243 1.1 dyoung fatp_slot_from_key(fatp_ctl_t *fat, uint32_t key)
244 1.1 dyoung {
245 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
246 1.1 dyoung CACHE_LINE_SIZE == 64 ||
247 1.1 dyoung CACHE_LINE_SIZE == 128);
248 1.1 dyoung
249 1.1 dyoung switch (fatp_ntags()) {
250 1.1 dyoung case 7:
251 1.1 dyoung return key & 7;
252 1.1 dyoung case 15:
253 1.1 dyoung return key & 15;
254 1.1 dyoung case 31:
255 1.1 dyoung return key & 31;
256 1.1 dyoung default:
257 1.1 dyoung KASSERT(0 && "no support, for no good reason");
258 1.1 dyoung return ~0;
259 1.1 dyoung }
260 1.1 dyoung }
261 1.1 dyoung
262 1.1 dyoung static inline fatp_t *
263 1.1 dyoung fatp_from_key(fatp_ctl_t *fat, uint32_t key)
264 1.1 dyoung {
265 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
266 1.1 dyoung CACHE_LINE_SIZE == 64 ||
267 1.1 dyoung CACHE_LINE_SIZE == 128);
268 1.1 dyoung
269 1.1 dyoung switch (fatp_ntags()) {
270 1.1 dyoung case 7:
271 1.1 dyoung key >>= 3;
272 1.1 dyoung break;
273 1.1 dyoung case 15:
274 1.1 dyoung key >>= 4;
275 1.1 dyoung break;
276 1.1 dyoung case 31:
277 1.1 dyoung key >>= 5;
278 1.1 dyoung break;
279 1.1 dyoung default:
280 1.1 dyoung KASSERT(0 && "no support, for no good reason");
281 1.1 dyoung return 0;
282 1.1 dyoung }
283 1.1 dyoung
284 1.1 dyoung return key ? fat->base + key - 1 : 0;
285 1.1 dyoung }
286 1.1 dyoung
287 1.1 dyoung static inline uint32_t
288 1.1 dyoung idx_encode(vtw_ctl_t *ctl, uint32_t idx)
289 1.1 dyoung {
290 1.1 dyoung return (idx << ctl->idx_bits) | idx;
291 1.1 dyoung }
292 1.1 dyoung
293 1.1 dyoung static inline uint32_t
294 1.1 dyoung idx_decode(vtw_ctl_t *ctl, uint32_t bits)
295 1.1 dyoung {
296 1.1 dyoung uint32_t idx = bits & ctl->idx_mask;
297 1.1 dyoung
298 1.1 dyoung if (idx_encode(ctl, idx) == bits)
299 1.1 dyoung return idx;
300 1.1 dyoung else
301 1.1 dyoung return ~0;
302 1.1 dyoung }
303 1.1 dyoung
304 1.1 dyoung /*!\brief insert index into fatp hash
305 1.1 dyoung *
306 1.1 dyoung *\param idx - index of element being placed in hash chain
307 1.1 dyoung *\param tag - 32-bit tag identifier
308 1.1 dyoung *
309 1.1 dyoung *\returns
310 1.1 dyoung * value which can be used to locate entry.
311 1.1 dyoung *
312 1.1 dyoung *\note
313 1.1 dyoung * we rely on the fact that there are unused high bits in the index
314 1.1 dyoung * for verification purposes on lookup.
315 1.1 dyoung */
316 1.1 dyoung
317 1.1 dyoung static inline uint32_t
318 1.1 dyoung fatp_vtw_inshash(fatp_ctl_t *fat, uint32_t idx, uint32_t tag, int which,
319 1.1 dyoung void *dbg)
320 1.1 dyoung {
321 1.1 dyoung fatp_t *fp;
322 1.1 dyoung fatp_t **hash = (which ? fat->port : fat->hash);
323 1.1 dyoung int i;
324 1.1 dyoung
325 1.1 dyoung fp = hash[tag & fat->mask];
326 1.1 dyoung
327 1.1 dyoung while (!fp || fatp_full(fp)) {
328 1.1 dyoung fatp_t *fq;
329 1.1 dyoung
330 1.1 dyoung /* All entries are inuse at the top level.
331 1.1 dyoung * We allocate a spare, and push the top level
332 1.1 dyoung * down one. All entries in the fp we push down
333 1.1 dyoung * (think of a tape worm here) will be expelled sooner than
334 1.1 dyoung * any entries added subsequently to this hash bucket.
335 1.1 dyoung * This is a property of the time waits we are exploiting.
336 1.1 dyoung */
337 1.1 dyoung
338 1.1 dyoung fq = fatp_alloc(fat);
339 1.1 dyoung if (!fq) {
340 1.1 dyoung vtw_age(fat->vtw, 0);
341 1.1 dyoung fp = hash[tag & fat->mask];
342 1.1 dyoung continue;
343 1.1 dyoung }
344 1.1 dyoung
345 1.1 dyoung fq->inuse = 0;
346 1.1 dyoung fq->nxt = fatp_index(fat, fp);
347 1.1 dyoung
348 1.1 dyoung hash[tag & fat->mask] = fq;
349 1.1 dyoung
350 1.1 dyoung fp = fq;
351 1.1 dyoung }
352 1.1 dyoung
353 1.1 dyoung KASSERT(!fatp_full(fp));
354 1.1 dyoung
355 1.1 dyoung /* Fill highest index first. Lookup is lowest first.
356 1.1 dyoung */
357 1.1 dyoung for (i = fatp_ntags(); --i >= 0; ) {
358 1.1 dyoung if (!((1 << i) & fp->inuse)) {
359 1.1 dyoung break;
360 1.1 dyoung }
361 1.1 dyoung }
362 1.1 dyoung
363 1.1 dyoung fp->inuse |= 1 << i;
364 1.1 dyoung fp->tag[i] = tag ^ idx_encode(fat->vtw, idx) ^ fatp_xtra[i];
365 1.1 dyoung
366 1.1 dyoung db_trace(KTR_VTW
367 1.1 dyoung , (fp, "fat: inuse %5.5x tag[%x] %8.8x"
368 1.1 dyoung , fp->inuse
369 1.1 dyoung , i, fp->tag[i]));
370 1.1 dyoung
371 1.1 dyoung return fatp_key(fat, fp, i);
372 1.1 dyoung }
373 1.1 dyoung
374 1.1 dyoung static inline int
375 1.1 dyoung vtw_alive(const vtw_t *vtw)
376 1.1 dyoung {
377 1.1 dyoung return vtw->hashed && vtw->expire.tv_sec;
378 1.1 dyoung }
379 1.1 dyoung
380 1.1 dyoung static inline uint32_t
381 1.1 dyoung vtw_index_v4(vtw_ctl_t *ctl, vtw_v4_t *v4)
382 1.1 dyoung {
383 1.1 dyoung if (ctl->base.v4 <= v4 && v4 <= ctl->lim.v4)
384 1.1 dyoung return v4 - ctl->base.v4;
385 1.1 dyoung
386 1.1 dyoung KASSERT(0 && "vtw out of bounds");
387 1.1 dyoung
388 1.1 dyoung return ~0;
389 1.1 dyoung }
390 1.1 dyoung
391 1.1 dyoung static inline uint32_t
392 1.1 dyoung vtw_index_v6(vtw_ctl_t *ctl, vtw_v6_t *v6)
393 1.1 dyoung {
394 1.1 dyoung if (ctl->base.v6 <= v6 && v6 <= ctl->lim.v6)
395 1.1 dyoung return v6 - ctl->base.v6;
396 1.1 dyoung
397 1.1 dyoung KASSERT(0 && "vtw out of bounds");
398 1.1 dyoung
399 1.1 dyoung return ~0;
400 1.1 dyoung }
401 1.1 dyoung
402 1.1 dyoung static inline uint32_t
403 1.1 dyoung vtw_index(vtw_ctl_t *ctl, vtw_t *vtw)
404 1.1 dyoung {
405 1.1 dyoung if (ctl->clidx)
406 1.1 dyoung ctl = ctl->ctl;
407 1.1 dyoung
408 1.1 dyoung if (ctl->is_v4)
409 1.1 dyoung return vtw_index_v4(ctl, (vtw_v4_t *)vtw);
410 1.1 dyoung
411 1.1 dyoung if (ctl->is_v6)
412 1.1 dyoung return vtw_index_v6(ctl, (vtw_v6_t *)vtw);
413 1.1 dyoung
414 1.1 dyoung KASSERT(0 && "neither 4 nor 6. most curious.");
415 1.1 dyoung
416 1.1 dyoung return ~0;
417 1.1 dyoung }
418 1.1 dyoung
419 1.1 dyoung static inline vtw_t *
420 1.1 dyoung vtw_from_index(vtw_ctl_t *ctl, uint32_t idx)
421 1.1 dyoung {
422 1.1 dyoung if (ctl->clidx)
423 1.1 dyoung ctl = ctl->ctl;
424 1.1 dyoung
425 1.1 dyoung /* See if the index looks like it might be an index.
426 1.1 dyoung * Bits on outside of the valid index bits is a give away.
427 1.1 dyoung */
428 1.1 dyoung idx = idx_decode(ctl, idx);
429 1.1 dyoung
430 1.1 dyoung if (idx == ~0) {
431 1.1 dyoung return 0;
432 1.1 dyoung } else if (ctl->is_v4) {
433 1.1 dyoung vtw_v4_t *vtw = ctl->base.v4 + idx;
434 1.1 dyoung
435 1.1 dyoung return (ctl->base.v4 <= vtw && vtw <= ctl->lim.v4)
436 1.1 dyoung ? &vtw->common : 0;
437 1.1 dyoung } else if (ctl->is_v6) {
438 1.1 dyoung vtw_v6_t *vtw = ctl->base.v6 + idx;
439 1.1 dyoung
440 1.1 dyoung return (ctl->base.v6 <= vtw && vtw <= ctl->lim.v6)
441 1.1 dyoung ? &vtw->common : 0;
442 1.1 dyoung } else {
443 1.1 dyoung KASSERT(0 && "badness");
444 1.1 dyoung return 0;
445 1.1 dyoung }
446 1.1 dyoung }
447 1.1 dyoung
448 1.1 dyoung /*!\brief return the next vtw after this one.
449 1.1 dyoung *
450 1.1 dyoung * Due to the differing sizes of the entries in differing
451 1.1 dyoung * arenas, we have to ensure we ++ the correct pointer type.
452 1.1 dyoung *
453 1.1 dyoung * Also handles wrap.
454 1.1 dyoung */
455 1.1 dyoung static inline vtw_t *
456 1.1 dyoung vtw_next(vtw_ctl_t *ctl, vtw_t *vtw)
457 1.1 dyoung {
458 1.1 dyoung if (ctl->is_v4) {
459 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
460 1.1 dyoung
461 1.1 dyoung vtw = &(++v4)->common;
462 1.1 dyoung } else {
463 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
464 1.1 dyoung
465 1.1 dyoung vtw = &(++v6)->common;
466 1.1 dyoung }
467 1.1 dyoung
468 1.1 dyoung if (vtw > ctl->lim.v)
469 1.1 dyoung vtw = ctl->base.v;
470 1.1 dyoung
471 1.1 dyoung return vtw;
472 1.1 dyoung }
473 1.1 dyoung
474 1.1 dyoung /*!\brief remove entry from FATP hash chains
475 1.1 dyoung */
476 1.1 dyoung static inline void
477 1.1 dyoung vtw_unhash(vtw_ctl_t *ctl, vtw_t *vtw)
478 1.1 dyoung {
479 1.1 dyoung fatp_ctl_t *fat = ctl->fat;
480 1.1 dyoung fatp_t *fp;
481 1.1 dyoung uint32_t key = vtw->key;
482 1.1 dyoung uint32_t tag, slot, idx;
483 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
484 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
485 1.1 dyoung
486 1.1 dyoung if (!vtw->hashed) {
487 1.1 dyoung KASSERT(0 && "unhashed");
488 1.1 dyoung return;
489 1.1 dyoung }
490 1.1 dyoung
491 1.1 dyoung if (fat->vtw->is_v4) {
492 1.1 dyoung tag = v4_tag(v4->faddr, v4->fport, v4->laddr, v4->lport);
493 1.1 dyoung } else if (fat->vtw->is_v6) {
494 1.1 dyoung tag = v6_tag(&v6->faddr, v6->fport, &v6->laddr, v6->lport);
495 1.1 dyoung } else {
496 1.1 dyoung tag = 0;
497 1.1 dyoung KASSERT(0 && "not reached");
498 1.1 dyoung }
499 1.1 dyoung
500 1.1 dyoung /* Remove from fat->hash[]
501 1.1 dyoung */
502 1.1 dyoung slot = fatp_slot_from_key(fat, key);
503 1.1 dyoung fp = fatp_from_key(fat, key);
504 1.1 dyoung idx = vtw_index(ctl, vtw);
505 1.1 dyoung
506 1.1 dyoung db_trace(KTR_VTW
507 1.1 dyoung , (fp, "fat: del inuse %5.5x slot %x idx %x key %x tag %x"
508 1.1 dyoung , fp->inuse, slot, idx, key, tag));
509 1.1 dyoung
510 1.1 dyoung KASSERT(fp->inuse & (1 << slot));
511 1.1 dyoung KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
512 1.1 dyoung ^ fatp_xtra[slot]));
513 1.1 dyoung
514 1.1 dyoung if ((fp->inuse & (1 << slot))
515 1.1 dyoung && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
516 1.1 dyoung ^ fatp_xtra[slot])) {
517 1.1 dyoung fp->inuse ^= 1 << slot;
518 1.1 dyoung fp->tag[slot] = 0;
519 1.1 dyoung
520 1.1 dyoung /* When we delete entries, we do not compact. This is
521 1.1 dyoung * due to temporality. We add entries, and they
522 1.1 dyoung * (eventually) expire. Older entries will be further
523 1.1 dyoung * down the chain.
524 1.1 dyoung */
525 1.1 dyoung if (!fp->inuse) {
526 1.1 dyoung uint32_t hi = tag & fat->mask;
527 1.1 dyoung fatp_t *fq = 0;
528 1.1 dyoung fatp_t *fr = fat->hash[hi];
529 1.1 dyoung
530 1.1 dyoung while (fr && fr != fp) {
531 1.1 dyoung fr = fatp_next(fat, fq = fr);
532 1.1 dyoung }
533 1.1 dyoung
534 1.1 dyoung if (fr == fp) {
535 1.1 dyoung if (fq) {
536 1.1 dyoung fq->nxt = fp->nxt;
537 1.1 dyoung fp->nxt = 0;
538 1.1 dyoung fatp_free(fat, fp);
539 1.1 dyoung } else {
540 1.1 dyoung KASSERT(fat->hash[hi] == fp);
541 1.1 dyoung
542 1.1 dyoung if (fp->nxt) {
543 1.1 dyoung fat->hash[hi]
544 1.1 dyoung = fatp_next(fat, fp);
545 1.1 dyoung fp->nxt = 0;
546 1.1 dyoung fatp_free(fat, fp);
547 1.1 dyoung } else {
548 1.1 dyoung /* retain for next use.
549 1.1 dyoung */
550 1.1 dyoung ;
551 1.1 dyoung }
552 1.1 dyoung }
553 1.1 dyoung } else {
554 1.1 dyoung fr = fat->hash[hi];
555 1.1 dyoung
556 1.1 dyoung do {
557 1.1 dyoung db_trace(KTR_VTW
558 1.1 dyoung , (fr
559 1.1 dyoung , "fat:*del inuse %5.5x"
560 1.1 dyoung " nxt %x"
561 1.1 dyoung , fr->inuse, fr->nxt));
562 1.1 dyoung
563 1.1 dyoung fr = fatp_next(fat, fq = fr);
564 1.1 dyoung } while (fr && fr != fp);
565 1.1 dyoung
566 1.1 dyoung KASSERT(0 && "oops");
567 1.1 dyoung }
568 1.1 dyoung }
569 1.1 dyoung vtw->key ^= ~0;
570 1.1 dyoung }
571 1.1 dyoung
572 1.1 dyoung if (fat->vtw->is_v4) {
573 1.1 dyoung tag = v4_port_tag(v4->lport);
574 1.1 dyoung } else if (fat->vtw->is_v6) {
575 1.1 dyoung tag = v6_port_tag(v6->lport);
576 1.1 dyoung }
577 1.1 dyoung
578 1.1 dyoung /* Remove from fat->port[]
579 1.1 dyoung */
580 1.1 dyoung key = vtw->port_key;
581 1.1 dyoung slot = fatp_slot_from_key(fat, key);
582 1.1 dyoung fp = fatp_from_key(fat, key);
583 1.1 dyoung idx = vtw_index(ctl, vtw);
584 1.1 dyoung
585 1.1 dyoung db_trace(KTR_VTW
586 1.1 dyoung , (fp, "fatport: del inuse %5.5x"
587 1.1 dyoung " slot %x idx %x key %x tag %x"
588 1.1 dyoung , fp->inuse, slot, idx, key, tag));
589 1.1 dyoung
590 1.1 dyoung KASSERT(fp->inuse & (1 << slot));
591 1.1 dyoung KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
592 1.1 dyoung ^ fatp_xtra[slot]));
593 1.1 dyoung
594 1.1 dyoung if ((fp->inuse & (1 << slot))
595 1.1 dyoung && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
596 1.1 dyoung ^ fatp_xtra[slot])) {
597 1.1 dyoung fp->inuse ^= 1 << slot;
598 1.1 dyoung fp->tag[slot] = 0;
599 1.1 dyoung
600 1.1 dyoung if (!fp->inuse) {
601 1.1 dyoung uint32_t hi = tag & fat->mask;
602 1.1 dyoung fatp_t *fq = 0;
603 1.1 dyoung fatp_t *fr = fat->port[hi];
604 1.1 dyoung
605 1.1 dyoung while (fr && fr != fp) {
606 1.1 dyoung fr = fatp_next(fat, fq = fr);
607 1.1 dyoung }
608 1.1 dyoung
609 1.1 dyoung if (fr == fp) {
610 1.1 dyoung if (fq) {
611 1.1 dyoung fq->nxt = fp->nxt;
612 1.1 dyoung fp->nxt = 0;
613 1.1 dyoung fatp_free(fat, fp);
614 1.1 dyoung } else {
615 1.1 dyoung KASSERT(fat->port[hi] == fp);
616 1.1 dyoung
617 1.1 dyoung if (fp->nxt) {
618 1.1 dyoung fat->port[hi]
619 1.1 dyoung = fatp_next(fat, fp);
620 1.1 dyoung fp->nxt = 0;
621 1.1 dyoung fatp_free(fat, fp);
622 1.1 dyoung } else {
623 1.1 dyoung /* retain for next use.
624 1.1 dyoung */
625 1.1 dyoung ;
626 1.1 dyoung }
627 1.1 dyoung }
628 1.1 dyoung }
629 1.1 dyoung }
630 1.1 dyoung vtw->port_key ^= ~0;
631 1.1 dyoung }
632 1.1 dyoung
633 1.1 dyoung vtw->hashed = 0;
634 1.1 dyoung }
635 1.1 dyoung
636 1.1 dyoung /*!\brief remove entry from hash, possibly free.
637 1.1 dyoung */
638 1.1 dyoung void
639 1.1 dyoung vtw_del(vtw_ctl_t *ctl, vtw_t *vtw)
640 1.1 dyoung {
641 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
642 1.1 dyoung
643 1.1 dyoung if (vtw->hashed) {
644 1.1 dyoung ++vtw_stats.del;
645 1.1 dyoung vtw_unhash(ctl, vtw);
646 1.1 dyoung }
647 1.1 dyoung
648 1.1 dyoung /* We only delete the oldest entry.
649 1.1 dyoung */
650 1.1 dyoung if (vtw != ctl->oldest.v)
651 1.1 dyoung return;
652 1.1 dyoung
653 1.1 dyoung --ctl->nalloc;
654 1.1 dyoung ++ctl->nfree;
655 1.1 dyoung
656 1.1 dyoung vtw->expire.tv_sec = 0;
657 1.1 dyoung vtw->expire.tv_usec = ~0;
658 1.1 dyoung
659 1.1 dyoung if (!ctl->nalloc)
660 1.1 dyoung ctl->oldest.v = 0;
661 1.1 dyoung
662 1.1 dyoung ctl->oldest.v = vtw_next(ctl, vtw);
663 1.1 dyoung }
664 1.1 dyoung
665 1.4 dholland /*!\brief insert vestigial timewait in hash chain
666 1.1 dyoung */
667 1.1 dyoung static void
668 1.1 dyoung vtw_inshash_v4(vtw_ctl_t *ctl, vtw_t *vtw)
669 1.1 dyoung {
670 1.1 dyoung uint32_t idx = vtw_index(ctl, vtw);
671 1.1 dyoung uint32_t tag;
672 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
673 1.1 dyoung
674 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
675 1.1 dyoung KASSERT(!vtw->hashed);
676 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
677 1.1 dyoung
678 1.1 dyoung ++vtw_stats.ins;
679 1.1 dyoung
680 1.1 dyoung tag = v4_tag(v4->faddr, v4->fport,
681 1.1 dyoung v4->laddr, v4->lport);
682 1.1 dyoung
683 1.1 dyoung vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
684 1.1 dyoung
685 1.1 dyoung db_trace(KTR_VTW, (ctl
686 1.1 dyoung , "vtw: ins %8.8x:%4.4x %8.8x:%4.4x"
687 1.1 dyoung " tag %8.8x key %8.8x"
688 1.1 dyoung , v4->faddr, v4->fport
689 1.1 dyoung , v4->laddr, v4->lport
690 1.1 dyoung , tag
691 1.1 dyoung , vtw->key));
692 1.1 dyoung
693 1.1 dyoung tag = v4_port_tag(v4->lport);
694 1.1 dyoung vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
695 1.1 dyoung
696 1.1 dyoung db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
697 1.1 dyoung , v4->lport, v4->lport
698 1.1 dyoung , tag
699 1.1 dyoung , vtw->key));
700 1.1 dyoung
701 1.1 dyoung vtw->hashed = 1;
702 1.1 dyoung }
703 1.1 dyoung
704 1.4 dholland /*!\brief insert vestigial timewait in hash chain
705 1.1 dyoung */
706 1.1 dyoung static void
707 1.1 dyoung vtw_inshash_v6(vtw_ctl_t *ctl, vtw_t *vtw)
708 1.1 dyoung {
709 1.1 dyoung uint32_t idx = vtw_index(ctl, vtw);
710 1.1 dyoung uint32_t tag;
711 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
712 1.1 dyoung
713 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
714 1.1 dyoung KASSERT(!vtw->hashed);
715 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
716 1.1 dyoung
717 1.1 dyoung ++vtw_stats.ins;
718 1.1 dyoung
719 1.1 dyoung tag = v6_tag(&v6->faddr, v6->fport,
720 1.1 dyoung &v6->laddr, v6->lport);
721 1.1 dyoung
722 1.1 dyoung vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
723 1.1 dyoung
724 1.1 dyoung tag = v6_port_tag(v6->lport);
725 1.1 dyoung vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
726 1.1 dyoung
727 1.1 dyoung db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
728 1.1 dyoung , v6->lport, v6->lport
729 1.1 dyoung , tag
730 1.1 dyoung , vtw->key));
731 1.1 dyoung
732 1.1 dyoung vtw->hashed = 1;
733 1.1 dyoung }
734 1.1 dyoung
735 1.1 dyoung static vtw_t *
736 1.1 dyoung vtw_lookup_hash_v4(vtw_ctl_t *ctl, uint32_t faddr, uint16_t fport
737 1.1 dyoung , uint32_t laddr, uint16_t lport
738 1.1 dyoung , int which)
739 1.1 dyoung {
740 1.1 dyoung vtw_v4_t *v4;
741 1.1 dyoung vtw_t *vtw;
742 1.1 dyoung uint32_t tag;
743 1.1 dyoung fatp_t *fp;
744 1.1 dyoung int i;
745 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
746 1.1 dyoung
747 1.1 dyoung if (!ctl || !ctl->fat)
748 1.1 dyoung return 0;
749 1.1 dyoung
750 1.1 dyoung ++vtw_stats.look[which];
751 1.1 dyoung
752 1.1 dyoung if (which) {
753 1.1 dyoung tag = v4_port_tag(lport);
754 1.1 dyoung fp = ctl->fat->port[tag & ctl->fat->mask];
755 1.1 dyoung } else {
756 1.1 dyoung tag = v4_tag(faddr, fport, laddr, lport);
757 1.1 dyoung fp = ctl->fat->hash[tag & ctl->fat->mask];
758 1.1 dyoung }
759 1.1 dyoung
760 1.1 dyoung while (fp && fp->inuse) {
761 1.1 dyoung uint32_t inuse = fp->inuse;
762 1.1 dyoung
763 1.1 dyoung ++fatps;
764 1.1 dyoung
765 1.1 dyoung for (i = 0; inuse && i < fatp_ntags(); ++i) {
766 1.1 dyoung uint32_t idx;
767 1.1 dyoung
768 1.1 dyoung if (!(inuse & (1 << i)))
769 1.1 dyoung continue;
770 1.1 dyoung
771 1.1 dyoung inuse ^= 1 << i;
772 1.1 dyoung
773 1.1 dyoung ++probes;
774 1.1 dyoung ++vtw_stats.probe[which];
775 1.1 dyoung
776 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
777 1.1 dyoung vtw = vtw_from_index(ctl, idx);
778 1.1 dyoung
779 1.1 dyoung if (!vtw) {
780 1.1 dyoung /* Hopefully fast path.
781 1.1 dyoung */
782 1.1 dyoung db_trace(KTR_VTW
783 1.1 dyoung , (fp, "vtw: fast %A:%P %A:%P"
784 1.1 dyoung " idx %x tag %x"
785 1.1 dyoung , faddr, fport
786 1.1 dyoung , laddr, lport
787 1.1 dyoung , idx, tag));
788 1.1 dyoung continue;
789 1.1 dyoung }
790 1.1 dyoung
791 1.1 dyoung v4 = (void*)vtw;
792 1.1 dyoung
793 1.1 dyoung /* The de-referencing of vtw is what we want to avoid.
794 1.1 dyoung * Losing.
795 1.1 dyoung */
796 1.1 dyoung if (vtw_alive(vtw)
797 1.1 dyoung && ((which ? vtw->port_key : vtw->key)
798 1.1 dyoung == fatp_key(ctl->fat, fp, i))
799 1.1 dyoung && (which
800 1.1 dyoung || (v4->faddr == faddr && v4->laddr == laddr
801 1.1 dyoung && v4->fport == fport))
802 1.1 dyoung && v4->lport == lport) {
803 1.1 dyoung ++vtw_stats.hit[which];
804 1.1 dyoung
805 1.1 dyoung db_trace(KTR_VTW
806 1.1 dyoung , (fp, "vtw: hit %8.8x:%4.4x"
807 1.1 dyoung " %8.8x:%4.4x idx %x key %x"
808 1.1 dyoung , faddr, fport
809 1.1 dyoung , laddr, lport
810 1.1 dyoung , idx_decode(ctl, idx), vtw->key));
811 1.1 dyoung
812 1.1 dyoung KASSERT(vtw->hashed);
813 1.1 dyoung
814 1.1 dyoung goto out;
815 1.1 dyoung }
816 1.1 dyoung ++vtw_stats.losing[which];
817 1.1 dyoung ++losings;
818 1.1 dyoung
819 1.1 dyoung if (vtw_alive(vtw)) {
820 1.1 dyoung db_trace(KTR_VTW
821 1.1 dyoung , (fp, "vtw:!mis %8.8x:%4.4x"
822 1.1 dyoung " %8.8x:%4.4x key %x tag %x"
823 1.1 dyoung , faddr, fport
824 1.1 dyoung , laddr, lport
825 1.1 dyoung , fatp_key(ctl->fat, fp, i)
826 1.1 dyoung , v4_tag(faddr, fport
827 1.1 dyoung , laddr, lport)));
828 1.1 dyoung db_trace(KTR_VTW
829 1.1 dyoung , (vtw, "vtw:!mis %8.8x:%4.4x"
830 1.1 dyoung " %8.8x:%4.4x key %x tag %x"
831 1.1 dyoung , v4->faddr, v4->fport
832 1.1 dyoung , v4->laddr, v4->lport
833 1.1 dyoung , vtw->key
834 1.1 dyoung , v4_tag(v4->faddr, v4->fport
835 1.1 dyoung , v4->laddr, v4->lport)));
836 1.1 dyoung
837 1.1 dyoung if (vtw->key == fatp_key(ctl->fat, fp, i)) {
838 1.1 dyoung db_trace(KTR_VTW
839 1.1 dyoung , (vtw, "vtw:!mis %8.8x:%4.4x"
840 1.1 dyoung " %8.8x:%4.4x key %x"
841 1.1 dyoung " which %x"
842 1.1 dyoung , v4->faddr, v4->fport
843 1.1 dyoung , v4->laddr, v4->lport
844 1.1 dyoung , vtw->key
845 1.1 dyoung , which));
846 1.1 dyoung
847 1.1 dyoung } else {
848 1.1 dyoung db_trace(KTR_VTW
849 1.1 dyoung , (vtw
850 1.1 dyoung , "vtw:!mis"
851 1.1 dyoung " key %8.8x != %8.8x"
852 1.1 dyoung " idx %x i %x which %x"
853 1.1 dyoung , vtw->key
854 1.1 dyoung , fatp_key(ctl->fat, fp, i)
855 1.1 dyoung , idx_decode(ctl, idx)
856 1.1 dyoung , i
857 1.1 dyoung , which));
858 1.1 dyoung }
859 1.1 dyoung } else {
860 1.1 dyoung db_trace(KTR_VTW
861 1.1 dyoung , (fp
862 1.1 dyoung , "vtw:!mis free entry"
863 1.1 dyoung " idx %x vtw %p which %x"
864 1.1 dyoung , idx_decode(ctl, idx)
865 1.1 dyoung , vtw, which));
866 1.1 dyoung }
867 1.1 dyoung }
868 1.1 dyoung
869 1.1 dyoung if (fp->nxt) {
870 1.1 dyoung fp = fatp_next(ctl->fat, fp);
871 1.1 dyoung } else {
872 1.1 dyoung break;
873 1.1 dyoung }
874 1.1 dyoung }
875 1.1 dyoung ++vtw_stats.miss[which];
876 1.1 dyoung vtw = 0;
877 1.1 dyoung out:
878 1.1 dyoung if (fatps > vtw_stats.max_chain[which])
879 1.1 dyoung vtw_stats.max_chain[which] = fatps;
880 1.1 dyoung if (probes > vtw_stats.max_probe[which])
881 1.1 dyoung vtw_stats.max_probe[which] = probes;
882 1.1 dyoung if (losings > vtw_stats.max_loss[which])
883 1.1 dyoung vtw_stats.max_loss[which] = losings;
884 1.1 dyoung
885 1.1 dyoung return vtw;
886 1.1 dyoung }
887 1.1 dyoung
888 1.1 dyoung static vtw_t *
889 1.1 dyoung vtw_lookup_hash_v6(vtw_ctl_t *ctl, const struct in6_addr *faddr, uint16_t fport
890 1.1 dyoung , const struct in6_addr *laddr, uint16_t lport
891 1.1 dyoung , int which)
892 1.1 dyoung {
893 1.1 dyoung vtw_v6_t *v6;
894 1.1 dyoung vtw_t *vtw;
895 1.1 dyoung uint32_t tag;
896 1.1 dyoung fatp_t *fp;
897 1.1 dyoung int i;
898 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
899 1.1 dyoung
900 1.1 dyoung ++vtw_stats.look[which];
901 1.1 dyoung
902 1.1 dyoung if (!ctl || !ctl->fat)
903 1.1 dyoung return 0;
904 1.1 dyoung
905 1.1 dyoung if (which) {
906 1.1 dyoung tag = v6_port_tag(lport);
907 1.1 dyoung fp = ctl->fat->port[tag & ctl->fat->mask];
908 1.1 dyoung } else {
909 1.1 dyoung tag = v6_tag(faddr, fport, laddr, lport);
910 1.1 dyoung fp = ctl->fat->hash[tag & ctl->fat->mask];
911 1.1 dyoung }
912 1.1 dyoung
913 1.1 dyoung while (fp && fp->inuse) {
914 1.1 dyoung uint32_t inuse = fp->inuse;
915 1.1 dyoung
916 1.1 dyoung ++fatps;
917 1.1 dyoung
918 1.1 dyoung for (i = 0; inuse && i < fatp_ntags(); ++i) {
919 1.1 dyoung uint32_t idx;
920 1.1 dyoung
921 1.1 dyoung if (!(inuse & (1 << i)))
922 1.1 dyoung continue;
923 1.1 dyoung
924 1.1 dyoung inuse ^= 1 << i;
925 1.1 dyoung
926 1.1 dyoung ++probes;
927 1.1 dyoung ++vtw_stats.probe[which];
928 1.1 dyoung
929 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
930 1.1 dyoung vtw = vtw_from_index(ctl, idx);
931 1.1 dyoung
932 1.1 dyoung db_trace(KTR_VTW
933 1.1 dyoung , (fp, "probe: %2d %6A:%4.4x %6A:%4.4x idx %x"
934 1.1 dyoung , i
935 1.1 dyoung , db_store(faddr, sizeof (*faddr)), fport
936 1.1 dyoung , db_store(laddr, sizeof (*laddr)), lport
937 1.1 dyoung , idx_decode(ctl, idx)));
938 1.1 dyoung
939 1.1 dyoung if (!vtw) {
940 1.1 dyoung /* Hopefully fast path.
941 1.1 dyoung */
942 1.1 dyoung continue;
943 1.1 dyoung }
944 1.1 dyoung
945 1.1 dyoung v6 = (void*)vtw;
946 1.1 dyoung
947 1.1 dyoung if (vtw_alive(vtw)
948 1.1 dyoung && ((which ? vtw->port_key : vtw->key)
949 1.1 dyoung == fatp_key(ctl->fat, fp, i))
950 1.1 dyoung && v6->lport == lport
951 1.1 dyoung && (which
952 1.1 dyoung || (v6->fport == fport
953 1.1 dyoung && !bcmp(&v6->faddr, faddr, sizeof (*faddr))
954 1.1 dyoung && !bcmp(&v6->laddr, laddr
955 1.1 dyoung , sizeof (*laddr))))) {
956 1.1 dyoung ++vtw_stats.hit[which];
957 1.1 dyoung
958 1.1 dyoung KASSERT(vtw->hashed);
959 1.1 dyoung goto out;
960 1.1 dyoung } else {
961 1.1 dyoung ++vtw_stats.losing[which];
962 1.1 dyoung ++losings;
963 1.1 dyoung }
964 1.1 dyoung }
965 1.1 dyoung
966 1.1 dyoung if (fp->nxt) {
967 1.1 dyoung fp = fatp_next(ctl->fat, fp);
968 1.1 dyoung } else {
969 1.1 dyoung break;
970 1.1 dyoung }
971 1.1 dyoung }
972 1.1 dyoung ++vtw_stats.miss[which];
973 1.1 dyoung vtw = 0;
974 1.1 dyoung out:
975 1.1 dyoung if (fatps > vtw_stats.max_chain[which])
976 1.1 dyoung vtw_stats.max_chain[which] = fatps;
977 1.1 dyoung if (probes > vtw_stats.max_probe[which])
978 1.1 dyoung vtw_stats.max_probe[which] = probes;
979 1.1 dyoung if (losings > vtw_stats.max_loss[which])
980 1.1 dyoung vtw_stats.max_loss[which] = losings;
981 1.1 dyoung
982 1.1 dyoung return vtw;
983 1.1 dyoung }
984 1.1 dyoung
985 1.1 dyoung /*!\brief port iterator
986 1.1 dyoung */
987 1.1 dyoung static vtw_t *
988 1.1 dyoung vtw_next_port_v4(struct tcp_ports_iterator *it)
989 1.1 dyoung {
990 1.1 dyoung vtw_ctl_t *ctl = it->ctl;
991 1.1 dyoung vtw_v4_t *v4;
992 1.1 dyoung vtw_t *vtw;
993 1.1 dyoung uint32_t tag;
994 1.1 dyoung uint16_t lport = it->port;
995 1.1 dyoung fatp_t *fp;
996 1.1 dyoung int i;
997 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
998 1.1 dyoung
999 1.1 dyoung tag = v4_port_tag(lport);
1000 1.1 dyoung if (!it->fp) {
1001 1.1 dyoung it->fp = ctl->fat->port[tag & ctl->fat->mask];
1002 1.1 dyoung it->slot_idx = 0;
1003 1.1 dyoung }
1004 1.1 dyoung fp = it->fp;
1005 1.1 dyoung
1006 1.1 dyoung while (fp) {
1007 1.1 dyoung uint32_t inuse = fp->inuse;
1008 1.1 dyoung
1009 1.1 dyoung ++fatps;
1010 1.1 dyoung
1011 1.1 dyoung for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
1012 1.1 dyoung uint32_t idx;
1013 1.1 dyoung
1014 1.1 dyoung if (!(inuse & (1 << i)))
1015 1.1 dyoung continue;
1016 1.1 dyoung
1017 1.1 dyoung inuse &= ~0 << i;
1018 1.1 dyoung
1019 1.1 dyoung if (i < it->slot_idx)
1020 1.1 dyoung continue;
1021 1.1 dyoung
1022 1.1 dyoung ++vtw_stats.probe[1];
1023 1.1 dyoung ++probes;
1024 1.1 dyoung
1025 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
1026 1.1 dyoung vtw = vtw_from_index(ctl, idx);
1027 1.1 dyoung
1028 1.1 dyoung if (!vtw) {
1029 1.1 dyoung /* Hopefully fast path.
1030 1.1 dyoung */
1031 1.1 dyoung continue;
1032 1.1 dyoung }
1033 1.1 dyoung
1034 1.1 dyoung v4 = (void*)vtw;
1035 1.1 dyoung
1036 1.1 dyoung if (vtw_alive(vtw)
1037 1.1 dyoung && vtw->port_key == fatp_key(ctl->fat, fp, i)
1038 1.1 dyoung && v4->lport == lport) {
1039 1.1 dyoung ++vtw_stats.hit[1];
1040 1.1 dyoung
1041 1.1 dyoung it->slot_idx = i + 1;
1042 1.1 dyoung
1043 1.1 dyoung goto out;
1044 1.1 dyoung } else if (vtw_alive(vtw)) {
1045 1.1 dyoung ++vtw_stats.losing[1];
1046 1.1 dyoung ++losings;
1047 1.1 dyoung
1048 1.1 dyoung db_trace(KTR_VTW
1049 1.1 dyoung , (vtw, "vtw:!mis"
1050 1.1 dyoung " port %8.8x:%4.4x %8.8x:%4.4x"
1051 1.1 dyoung " key %x port %x"
1052 1.1 dyoung , v4->faddr, v4->fport
1053 1.1 dyoung , v4->laddr, v4->lport
1054 1.1 dyoung , vtw->key
1055 1.1 dyoung , lport));
1056 1.1 dyoung } else {
1057 1.1 dyoung /* Really losing here. We are coming
1058 1.1 dyoung * up with references to free entries.
1059 1.1 dyoung * Might find it better to use
1060 1.1 dyoung * traditional, or need another
1061 1.1 dyoung * add-hockery. The other add-hockery
1062 1.1 dyoung * would be to pul more into into the
1063 1.1 dyoung * cache line to reject the false
1064 1.1 dyoung * hits.
1065 1.1 dyoung */
1066 1.1 dyoung ++vtw_stats.losing[1];
1067 1.1 dyoung ++losings;
1068 1.1 dyoung db_trace(KTR_VTW
1069 1.1 dyoung , (fp, "vtw:!mis port %x"
1070 1.1 dyoung " - free entry idx %x vtw %p"
1071 1.1 dyoung , lport
1072 1.1 dyoung , idx_decode(ctl, idx)
1073 1.1 dyoung , vtw));
1074 1.1 dyoung }
1075 1.1 dyoung }
1076 1.1 dyoung
1077 1.1 dyoung if (fp->nxt) {
1078 1.1 dyoung it->fp = fp = fatp_next(ctl->fat, fp);
1079 1.1 dyoung it->slot_idx = 0;
1080 1.1 dyoung } else {
1081 1.1 dyoung it->fp = 0;
1082 1.1 dyoung break;
1083 1.1 dyoung }
1084 1.1 dyoung }
1085 1.1 dyoung ++vtw_stats.miss[1];
1086 1.1 dyoung
1087 1.1 dyoung vtw = 0;
1088 1.1 dyoung out:
1089 1.1 dyoung if (fatps > vtw_stats.max_chain[1])
1090 1.1 dyoung vtw_stats.max_chain[1] = fatps;
1091 1.1 dyoung if (probes > vtw_stats.max_probe[1])
1092 1.1 dyoung vtw_stats.max_probe[1] = probes;
1093 1.1 dyoung if (losings > vtw_stats.max_loss[1])
1094 1.1 dyoung vtw_stats.max_loss[1] = losings;
1095 1.1 dyoung
1096 1.1 dyoung return vtw;
1097 1.1 dyoung }
1098 1.1 dyoung
1099 1.1 dyoung /*!\brief port iterator
1100 1.1 dyoung */
1101 1.1 dyoung static vtw_t *
1102 1.1 dyoung vtw_next_port_v6(struct tcp_ports_iterator *it)
1103 1.1 dyoung {
1104 1.1 dyoung vtw_ctl_t *ctl = it->ctl;
1105 1.1 dyoung vtw_v6_t *v6;
1106 1.1 dyoung vtw_t *vtw;
1107 1.1 dyoung uint32_t tag;
1108 1.1 dyoung uint16_t lport = it->port;
1109 1.1 dyoung fatp_t *fp;
1110 1.1 dyoung int i;
1111 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
1112 1.1 dyoung
1113 1.1 dyoung tag = v6_port_tag(lport);
1114 1.1 dyoung if (!it->fp) {
1115 1.1 dyoung it->fp = ctl->fat->port[tag & ctl->fat->mask];
1116 1.1 dyoung it->slot_idx = 0;
1117 1.1 dyoung }
1118 1.1 dyoung fp = it->fp;
1119 1.1 dyoung
1120 1.1 dyoung while (fp) {
1121 1.1 dyoung uint32_t inuse = fp->inuse;
1122 1.1 dyoung
1123 1.1 dyoung ++fatps;
1124 1.1 dyoung
1125 1.1 dyoung for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
1126 1.1 dyoung uint32_t idx;
1127 1.1 dyoung
1128 1.1 dyoung if (!(inuse & (1 << i)))
1129 1.1 dyoung continue;
1130 1.1 dyoung
1131 1.1 dyoung inuse &= ~0 << i;
1132 1.1 dyoung
1133 1.1 dyoung if (i < it->slot_idx)
1134 1.1 dyoung continue;
1135 1.1 dyoung
1136 1.1 dyoung ++vtw_stats.probe[1];
1137 1.1 dyoung ++probes;
1138 1.1 dyoung
1139 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
1140 1.1 dyoung vtw = vtw_from_index(ctl, idx);
1141 1.1 dyoung
1142 1.1 dyoung if (!vtw) {
1143 1.1 dyoung /* Hopefully fast path.
1144 1.1 dyoung */
1145 1.1 dyoung continue;
1146 1.1 dyoung }
1147 1.1 dyoung
1148 1.1 dyoung v6 = (void*)vtw;
1149 1.1 dyoung
1150 1.1 dyoung db_trace(KTR_VTW
1151 1.1 dyoung , (vtw, "vtw: i %x idx %x fp->tag %x"
1152 1.1 dyoung " tag %x xtra %x"
1153 1.1 dyoung , i, idx_decode(ctl, idx)
1154 1.1 dyoung , fp->tag[i], tag, fatp_xtra[i]));
1155 1.1 dyoung
1156 1.1 dyoung if (vtw_alive(vtw)
1157 1.1 dyoung && vtw->port_key == fatp_key(ctl->fat, fp, i)
1158 1.1 dyoung && v6->lport == lport) {
1159 1.1 dyoung ++vtw_stats.hit[1];
1160 1.1 dyoung
1161 1.1 dyoung db_trace(KTR_VTW
1162 1.1 dyoung , (fp, "vtw: nxt port %P - %4.4x"
1163 1.1 dyoung " idx %x key %x"
1164 1.1 dyoung , lport, lport
1165 1.1 dyoung , idx_decode(ctl, idx), vtw->key));
1166 1.1 dyoung
1167 1.1 dyoung it->slot_idx = i + 1;
1168 1.1 dyoung goto out;
1169 1.1 dyoung } else if (vtw_alive(vtw)) {
1170 1.1 dyoung ++vtw_stats.losing[1];
1171 1.1 dyoung
1172 1.1 dyoung db_trace(KTR_VTW
1173 1.1 dyoung , (vtw, "vtw:!mis port %6A:%4.4x"
1174 1.1 dyoung " %6A:%4.4x key %x port %x"
1175 1.1 dyoung , db_store(&v6->faddr
1176 1.1 dyoung , sizeof (v6->faddr))
1177 1.1 dyoung , v6->fport
1178 1.1 dyoung , db_store(&v6->laddr
1179 1.1 dyoung , sizeof (v6->faddr))
1180 1.1 dyoung , v6->lport
1181 1.1 dyoung , vtw->key
1182 1.1 dyoung , lport));
1183 1.1 dyoung } else {
1184 1.1 dyoung /* Really losing here. We are coming
1185 1.1 dyoung * up with references to free entries.
1186 1.1 dyoung * Might find it better to use
1187 1.1 dyoung * traditional, or need another
1188 1.1 dyoung * add-hockery. The other add-hockery
1189 1.1 dyoung * would be to pul more into into the
1190 1.1 dyoung * cache line to reject the false
1191 1.1 dyoung * hits.
1192 1.1 dyoung */
1193 1.1 dyoung ++vtw_stats.losing[1];
1194 1.1 dyoung ++losings;
1195 1.1 dyoung
1196 1.1 dyoung db_trace(KTR_VTW
1197 1.1 dyoung , (fp
1198 1.1 dyoung , "vtw:!mis port %x"
1199 1.1 dyoung " - free entry idx %x vtw %p"
1200 1.1 dyoung , lport, idx_decode(ctl, idx)
1201 1.1 dyoung , vtw));
1202 1.1 dyoung }
1203 1.1 dyoung }
1204 1.1 dyoung
1205 1.1 dyoung if (fp->nxt) {
1206 1.1 dyoung it->fp = fp = fatp_next(ctl->fat, fp);
1207 1.1 dyoung it->slot_idx = 0;
1208 1.1 dyoung } else {
1209 1.1 dyoung it->fp = 0;
1210 1.1 dyoung break;
1211 1.1 dyoung }
1212 1.1 dyoung }
1213 1.1 dyoung ++vtw_stats.miss[1];
1214 1.1 dyoung
1215 1.1 dyoung vtw = 0;
1216 1.1 dyoung out:
1217 1.1 dyoung if (fatps > vtw_stats.max_chain[1])
1218 1.1 dyoung vtw_stats.max_chain[1] = fatps;
1219 1.1 dyoung if (probes > vtw_stats.max_probe[1])
1220 1.1 dyoung vtw_stats.max_probe[1] = probes;
1221 1.1 dyoung if (losings > vtw_stats.max_loss[1])
1222 1.1 dyoung vtw_stats.max_loss[1] = losings;
1223 1.1 dyoung
1224 1.1 dyoung return vtw;
1225 1.1 dyoung }
1226 1.1 dyoung
1227 1.1 dyoung /*!\brief initialise the VTW allocation arena
1228 1.1 dyoung *
1229 1.1 dyoung * There are 1+3 allocation classes:
1230 1.1 dyoung * 0 classless
1231 1.1 dyoung * {1,2,3} MSL-class based allocation
1232 1.1 dyoung *
1233 1.1 dyoung * The allocation arenas are all initialised. Classless gets all the
1234 1.1 dyoung * space. MSL-class based divides the arena, so that allocation
1235 1.1 dyoung * within a class can proceed without having to consider entries
1236 1.1 dyoung * (aka: cache lines) from different classes.
1237 1.1 dyoung *
1238 1.1 dyoung * Usually, we are completely classless or class-based, but there can be
1239 1.1 dyoung * transition periods, corresponding to dynamic adjustments in the config
1240 1.1 dyoung * by the operator.
1241 1.1 dyoung */
1242 1.1 dyoung static void
1243 1.1 dyoung vtw_init(fatp_ctl_t *fat, vtw_ctl_t *ctl, uint32_t n)
1244 1.1 dyoung {
1245 1.1 dyoung int i;
1246 1.1 dyoung int sz = (ctl->is_v4 ? sizeof (vtw_v4_t) : sizeof (vtw_v6_t));
1247 1.1 dyoung
1248 1.1 dyoung ctl->base.v4 = kmem_alloc(n * sz, KM_SLEEP);
1249 1.1 dyoung if (ctl->base.v4) {
1250 1.1 dyoung vtw_t *base;
1251 1.1 dyoung int class_n;
1252 1.1 dyoung
1253 1.1 dyoung bzero(ctl->base.v4, n * sz);
1254 1.1 dyoung
1255 1.1 dyoung if (ctl->is_v4) {
1256 1.1 dyoung ctl->lim.v4 = ctl->base.v4 + n - 1;
1257 1.1 dyoung ctl->alloc.v4 = ctl->base.v4;
1258 1.1 dyoung } else {
1259 1.1 dyoung ctl->lim.v6 = ctl->base.v6 + n - 1;
1260 1.1 dyoung ctl->alloc.v6 = ctl->base.v6;
1261 1.1 dyoung }
1262 1.1 dyoung
1263 1.1 dyoung ctl->nfree = n;
1264 1.1 dyoung ctl->ctl = ctl;
1265 1.1 dyoung
1266 1.1 dyoung ctl->idx_bits = 32;
1267 1.1 dyoung for (ctl->idx_mask = ~0; (ctl->idx_mask & (n-1)) == n-1; ) {
1268 1.1 dyoung ctl->idx_mask >>= 1;
1269 1.1 dyoung ctl->idx_bits -= 1;
1270 1.1 dyoung }
1271 1.1 dyoung
1272 1.1 dyoung ctl->idx_mask <<= 1;
1273 1.1 dyoung ctl->idx_mask |= 1;
1274 1.1 dyoung ctl->idx_bits += 1;
1275 1.1 dyoung
1276 1.1 dyoung ctl->fat = fat;
1277 1.1 dyoung fat->vtw = ctl;
1278 1.1 dyoung
1279 1.1 dyoung /* Divide the resources equally amongst the classes.
1280 1.1 dyoung * This is not optimal, as the different classes
1281 1.1 dyoung * arrive and leave at different rates, but it is
1282 1.1 dyoung * the best I can do for now.
1283 1.1 dyoung */
1284 1.1 dyoung class_n = n / (VTW_NCLASS-1);
1285 1.1 dyoung base = ctl->base.v;
1286 1.1 dyoung
1287 1.1 dyoung for (i = 1; i < VTW_NCLASS; ++i) {
1288 1.1 dyoung int j;
1289 1.1 dyoung
1290 1.1 dyoung ctl[i] = ctl[0];
1291 1.1 dyoung ctl[i].clidx = i;
1292 1.1 dyoung
1293 1.1 dyoung ctl[i].base.v = base;
1294 1.1 dyoung ctl[i].alloc = ctl[i].base;
1295 1.1 dyoung
1296 1.1 dyoung for (j = 0; j < class_n - 1; ++j) {
1297 1.1 dyoung if (tcp_msl_enable)
1298 1.1 dyoung base->msl_class = i;
1299 1.1 dyoung base = vtw_next(ctl, base);
1300 1.1 dyoung }
1301 1.1 dyoung
1302 1.1 dyoung ctl[i].lim.v = base;
1303 1.1 dyoung base = vtw_next(ctl, base);
1304 1.1 dyoung ctl[i].nfree = class_n;
1305 1.1 dyoung }
1306 1.1 dyoung }
1307 1.1 dyoung
1308 1.1 dyoung vtw_debug_init();
1309 1.1 dyoung }
1310 1.1 dyoung
1311 1.1 dyoung /*!\brief map class to TCP MSL
1312 1.1 dyoung */
1313 1.1 dyoung static inline uint32_t
1314 1.1 dyoung class_to_msl(int class)
1315 1.1 dyoung {
1316 1.1 dyoung switch (class) {
1317 1.1 dyoung case 0:
1318 1.1 dyoung case 1:
1319 1.1 dyoung return tcp_msl_remote ? tcp_msl_remote : (TCPTV_MSL >> 0);
1320 1.1 dyoung case 2:
1321 1.1 dyoung return tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1);
1322 1.1 dyoung default:
1323 1.1 dyoung return tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2);
1324 1.1 dyoung }
1325 1.1 dyoung }
1326 1.1 dyoung
1327 1.1 dyoung /*!\brief map TCP MSL to class
1328 1.1 dyoung */
1329 1.1 dyoung static inline uint32_t
1330 1.1 dyoung msl_to_class(int msl)
1331 1.1 dyoung {
1332 1.1 dyoung if (tcp_msl_enable) {
1333 1.1 dyoung if (msl <= (tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2)))
1334 1.1 dyoung return 1+2;
1335 1.1 dyoung if (msl <= (tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1)))
1336 1.1 dyoung return 1+1;
1337 1.1 dyoung return 1;
1338 1.1 dyoung }
1339 1.1 dyoung return 0;
1340 1.1 dyoung }
1341 1.1 dyoung
1342 1.1 dyoung /*!\brief allocate a vtw entry
1343 1.1 dyoung */
1344 1.1 dyoung static inline vtw_t *
1345 1.1 dyoung vtw_alloc(vtw_ctl_t *ctl)
1346 1.1 dyoung {
1347 1.1 dyoung vtw_t *vtw = 0;
1348 1.1 dyoung int stuck = 0;
1349 1.1 dyoung int avail = ctl ? (ctl->nalloc + ctl->nfree) : 0;
1350 1.1 dyoung int msl;
1351 1.1 dyoung
1352 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
1353 1.1 dyoung
1354 1.1 dyoung /* If no resources, we will not get far.
1355 1.1 dyoung */
1356 1.1 dyoung if (!ctl || !ctl->base.v4 || avail <= 0)
1357 1.1 dyoung return 0;
1358 1.1 dyoung
1359 1.1 dyoung /* Obtain a free one.
1360 1.1 dyoung */
1361 1.1 dyoung while (!ctl->nfree) {
1362 1.1 dyoung vtw_age(ctl, 0);
1363 1.1 dyoung
1364 1.1 dyoung if (++stuck > avail) {
1365 1.1 dyoung /* When in transition between
1366 1.1 dyoung * schemes (classless, classed) we
1367 1.1 dyoung * can be stuck having to await the
1368 1.1 dyoung * expiration of cross-allocated entries.
1369 1.1 dyoung *
1370 1.1 dyoung * Returning zero means we will fall back to the
1371 1.1 dyoung * traditional TIME_WAIT handling, except in the
1372 1.1 dyoung * case of a re-shed, in which case we cannot
1373 1.1 dyoung * perform the reshecd, but will retain the extant
1374 1.1 dyoung * entry.
1375 1.1 dyoung */
1376 1.1 dyoung db_trace(KTR_VTW
1377 1.1 dyoung , (ctl, "vtw:!none free in class %x %x/%x"
1378 1.1 dyoung , ctl->clidx
1379 1.1 dyoung , ctl->nalloc, ctl->nfree));
1380 1.1 dyoung
1381 1.1 dyoung return 0;
1382 1.1 dyoung }
1383 1.1 dyoung }
1384 1.1 dyoung
1385 1.1 dyoung vtw = ctl->alloc.v;
1386 1.1 dyoung
1387 1.1 dyoung if (vtw->msl_class != ctl->clidx) {
1388 1.1 dyoung /* Usurping rules:
1389 1.1 dyoung * 0 -> {1,2,3} or {1,2,3} -> 0
1390 1.1 dyoung */
1391 1.1 dyoung KASSERT(!vtw->msl_class || !ctl->clidx);
1392 1.1 dyoung
1393 1.1 dyoung if (vtw->hashed || vtw->expire.tv_sec) {
1394 1.1 dyoung /* As this is owned by some other class,
1395 1.1 dyoung * we must wait for it to expire it.
1396 1.1 dyoung * This will only happen on class/classless
1397 1.1 dyoung * transitions, which are guaranteed to progress
1398 1.1 dyoung * to completion in small finite time, barring bugs.
1399 1.1 dyoung */
1400 1.1 dyoung db_trace(KTR_VTW
1401 1.1 dyoung , (ctl, "vtw:!%p class %x!=%x %x:%x%s"
1402 1.1 dyoung , vtw, vtw->msl_class, ctl->clidx
1403 1.1 dyoung , vtw->expire.tv_sec
1404 1.1 dyoung , vtw->expire.tv_usec
1405 1.1 dyoung , vtw->hashed ? " hashed" : ""));
1406 1.1 dyoung
1407 1.1 dyoung return 0;
1408 1.1 dyoung }
1409 1.1 dyoung
1410 1.1 dyoung db_trace(KTR_VTW
1411 1.1 dyoung , (ctl, "vtw:!%p usurped from %x to %x"
1412 1.1 dyoung , vtw, vtw->msl_class, ctl->clidx));
1413 1.1 dyoung
1414 1.1 dyoung vtw->msl_class = ctl->clidx;
1415 1.1 dyoung }
1416 1.1 dyoung
1417 1.1 dyoung if (vtw_alive(vtw)) {
1418 1.1 dyoung KASSERT(0 && "next free not free");
1419 1.1 dyoung return 0;
1420 1.1 dyoung }
1421 1.1 dyoung
1422 1.1 dyoung /* Advance allocation poiter.
1423 1.1 dyoung */
1424 1.1 dyoung ctl->alloc.v = vtw_next(ctl, vtw);
1425 1.1 dyoung
1426 1.1 dyoung --ctl->nfree;
1427 1.1 dyoung ++ctl->nalloc;
1428 1.1 dyoung
1429 1.1 dyoung msl = (2 * class_to_msl(ctl->clidx) * 1000) / PR_SLOWHZ; // msec
1430 1.1 dyoung
1431 1.1 dyoung /* mark expiration
1432 1.1 dyoung */
1433 1.3 drochner getmicrouptime(&vtw->expire);
1434 1.1 dyoung
1435 1.1 dyoung /* Move expiration into the future.
1436 1.1 dyoung */
1437 1.1 dyoung vtw->expire.tv_sec += msl / 1000;
1438 1.1 dyoung vtw->expire.tv_usec += 1000 * (msl % 1000);
1439 1.1 dyoung
1440 1.1 dyoung while (vtw->expire.tv_usec >= 1000*1000) {
1441 1.1 dyoung vtw->expire.tv_usec -= 1000*1000;
1442 1.1 dyoung vtw->expire.tv_sec += 1;
1443 1.1 dyoung }
1444 1.1 dyoung
1445 1.1 dyoung if (!ctl->oldest.v)
1446 1.1 dyoung ctl->oldest.v = vtw;
1447 1.1 dyoung
1448 1.1 dyoung return vtw;
1449 1.1 dyoung }
1450 1.1 dyoung
1451 1.1 dyoung /*!\brief expiration
1452 1.1 dyoung */
1453 1.1 dyoung static int
1454 1.1 dyoung vtw_age(vtw_ctl_t *ctl, struct timeval *_when)
1455 1.1 dyoung {
1456 1.1 dyoung vtw_t *vtw;
1457 1.1 dyoung struct timeval then, *when = _when;
1458 1.1 dyoung int maxtries = 0;
1459 1.1 dyoung
1460 1.1 dyoung if (!ctl->oldest.v) {
1461 1.1 dyoung KASSERT(!ctl->nalloc);
1462 1.1 dyoung return 0;
1463 1.1 dyoung }
1464 1.1 dyoung
1465 1.1 dyoung for (vtw = ctl->oldest.v; vtw && ctl->nalloc; ) {
1466 1.1 dyoung if (++maxtries > ctl->nalloc)
1467 1.1 dyoung break;
1468 1.1 dyoung
1469 1.1 dyoung if (vtw->msl_class != ctl->clidx) {
1470 1.1 dyoung db_trace(KTR_VTW
1471 1.1 dyoung , (vtw, "vtw:!age class mismatch %x != %x"
1472 1.1 dyoung , vtw->msl_class, ctl->clidx));
1473 1.1 dyoung /* XXXX
1474 1.1 dyoung * See if the appropriate action is to skip to the next.
1475 1.1 dyoung * XXXX
1476 1.1 dyoung */
1477 1.1 dyoung ctl->oldest.v = vtw = vtw_next(ctl, vtw);
1478 1.1 dyoung continue;
1479 1.1 dyoung }
1480 1.1 dyoung if (!when) {
1481 1.1 dyoung /* Latch oldest timeval if none specified.
1482 1.1 dyoung */
1483 1.1 dyoung then = vtw->expire;
1484 1.1 dyoung when = &then;
1485 1.1 dyoung }
1486 1.1 dyoung
1487 1.1 dyoung if (!timercmp(&vtw->expire, when, <=))
1488 1.1 dyoung break;
1489 1.1 dyoung
1490 1.1 dyoung db_trace(KTR_VTW
1491 1.1 dyoung , (vtw, "vtw: expire %x %8.8x:%8.8x %x/%x"
1492 1.1 dyoung , ctl->clidx
1493 1.1 dyoung , vtw->expire.tv_sec
1494 1.1 dyoung , vtw->expire.tv_usec
1495 1.1 dyoung , ctl->nalloc
1496 1.1 dyoung , ctl->nfree));
1497 1.1 dyoung
1498 1.1 dyoung if (!_when)
1499 1.1 dyoung ++vtw_stats.kill;
1500 1.1 dyoung
1501 1.1 dyoung vtw_del(ctl, vtw);
1502 1.1 dyoung vtw = ctl->oldest.v;
1503 1.1 dyoung }
1504 1.1 dyoung
1505 1.1 dyoung return ctl->nalloc; // # remaining allocated
1506 1.1 dyoung }
1507 1.1 dyoung
1508 1.1 dyoung static callout_t vtw_cs;
1509 1.1 dyoung
1510 1.1 dyoung /*!\brief notice the passage of time.
1511 1.1 dyoung * It seems to be getting faster. What happened to the year?
1512 1.1 dyoung */
1513 1.1 dyoung static void
1514 1.1 dyoung vtw_tick(void *arg)
1515 1.1 dyoung {
1516 1.1 dyoung struct timeval now;
1517 1.1 dyoung int i, cnt = 0;
1518 1.1 dyoung
1519 1.3 drochner getmicrouptime(&now);
1520 1.1 dyoung
1521 1.1 dyoung db_trace(KTR_VTW, (arg, "vtk: tick - now %8.8x:%8.8x"
1522 1.1 dyoung , now.tv_sec, now.tv_usec));
1523 1.1 dyoung
1524 1.1 dyoung mutex_enter(softnet_lock);
1525 1.1 dyoung
1526 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
1527 1.1 dyoung cnt += vtw_age(&vtw_tcpv4[i], &now);
1528 1.1 dyoung cnt += vtw_age(&vtw_tcpv6[i], &now);
1529 1.1 dyoung }
1530 1.1 dyoung
1531 1.1 dyoung /* Keep ticks coming while we need them.
1532 1.1 dyoung */
1533 1.1 dyoung if (cnt)
1534 1.1 dyoung callout_schedule(&vtw_cs, hz / 5);
1535 1.1 dyoung else {
1536 1.1 dyoung tcp_vtw_was_enabled = 0;
1537 1.1 dyoung tcbtable.vestige = 0;
1538 1.1 dyoung }
1539 1.1 dyoung mutex_exit(softnet_lock);
1540 1.1 dyoung }
1541 1.1 dyoung
1542 1.1 dyoung /* in_pcblookup_ports assist for handling vestigial entries.
1543 1.1 dyoung */
1544 1.1 dyoung static void *
1545 1.1 dyoung tcp_init_ports_v4(struct in_addr addr, u_int port, int wild)
1546 1.1 dyoung {
1547 1.1 dyoung struct tcp_ports_iterator *it = &tcp_ports_iterator_v4;
1548 1.1 dyoung
1549 1.1 dyoung bzero(it, sizeof (*it));
1550 1.1 dyoung
1551 1.1 dyoung /* Note: the reference to vtw_tcpv4[0] is fine.
1552 1.1 dyoung * We do not need per-class iteration. We just
1553 1.1 dyoung * need to get to the fat, and there is one
1554 1.1 dyoung * shared fat.
1555 1.1 dyoung */
1556 1.1 dyoung if (vtw_tcpv4[0].fat) {
1557 1.1 dyoung it->addr.v4 = addr;
1558 1.1 dyoung it->port = port;
1559 1.1 dyoung it->wild = !!wild;
1560 1.1 dyoung it->ctl = &vtw_tcpv4[0];
1561 1.1 dyoung
1562 1.1 dyoung ++vtw_stats.look[1];
1563 1.1 dyoung }
1564 1.1 dyoung
1565 1.1 dyoung return it;
1566 1.1 dyoung }
1567 1.1 dyoung
1568 1.1 dyoung /*!\brief export an IPv4 vtw.
1569 1.1 dyoung */
1570 1.1 dyoung static int
1571 1.1 dyoung vtw_export_v4(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
1572 1.1 dyoung {
1573 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
1574 1.1 dyoung
1575 1.1 dyoung bzero(res, sizeof (*res));
1576 1.1 dyoung
1577 1.1 dyoung if (ctl && vtw) {
1578 1.1 dyoung if (!ctl->clidx && vtw->msl_class)
1579 1.1 dyoung ctl += vtw->msl_class;
1580 1.1 dyoung else
1581 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
1582 1.1 dyoung
1583 1.1 dyoung res->valid = 1;
1584 1.1 dyoung res->v4 = 1;
1585 1.1 dyoung
1586 1.1 dyoung res->faddr.v4.s_addr = v4->faddr;
1587 1.1 dyoung res->laddr.v4.s_addr = v4->laddr;
1588 1.1 dyoung res->fport = v4->fport;
1589 1.1 dyoung res->lport = v4->lport;
1590 1.1 dyoung res->vtw = vtw; // netlock held over call(s)
1591 1.1 dyoung res->ctl = ctl;
1592 1.1 dyoung res->reuse_addr = vtw->reuse_addr;
1593 1.1 dyoung res->reuse_port = vtw->reuse_port;
1594 1.1 dyoung res->snd_nxt = vtw->snd_nxt;
1595 1.1 dyoung res->rcv_nxt = vtw->rcv_nxt;
1596 1.1 dyoung res->rcv_wnd = vtw->rcv_wnd;
1597 1.1 dyoung res->uid = vtw->uid;
1598 1.1 dyoung }
1599 1.1 dyoung
1600 1.1 dyoung return res->valid;
1601 1.1 dyoung }
1602 1.1 dyoung
1603 1.1 dyoung /*!\brief return next port in the port iterator. yowza.
1604 1.1 dyoung */
1605 1.1 dyoung static int
1606 1.1 dyoung tcp_next_port_v4(void *arg, struct vestigial_inpcb *res)
1607 1.1 dyoung {
1608 1.1 dyoung struct tcp_ports_iterator *it = arg;
1609 1.1 dyoung vtw_t *vtw = 0;
1610 1.1 dyoung
1611 1.1 dyoung if (it->ctl)
1612 1.1 dyoung vtw = vtw_next_port_v4(it);
1613 1.1 dyoung
1614 1.1 dyoung if (!vtw)
1615 1.1 dyoung it->ctl = 0;
1616 1.1 dyoung
1617 1.1 dyoung return vtw_export_v4(it->ctl, vtw, res);
1618 1.1 dyoung }
1619 1.1 dyoung
1620 1.1 dyoung static int
1621 1.1 dyoung tcp_lookup_v4(struct in_addr faddr, uint16_t fport,
1622 1.1 dyoung struct in_addr laddr, uint16_t lport,
1623 1.1 dyoung struct vestigial_inpcb *res)
1624 1.1 dyoung {
1625 1.1 dyoung vtw_t *vtw;
1626 1.1 dyoung vtw_ctl_t *ctl;
1627 1.1 dyoung
1628 1.1 dyoung
1629 1.1 dyoung db_trace(KTR_VTW
1630 1.1 dyoung , (res, "vtw: lookup %A:%P %A:%P"
1631 1.1 dyoung , faddr, fport
1632 1.1 dyoung , laddr, lport));
1633 1.1 dyoung
1634 1.1 dyoung vtw = vtw_lookup_hash_v4((ctl = &vtw_tcpv4[0])
1635 1.1 dyoung , faddr.s_addr, fport
1636 1.1 dyoung , laddr.s_addr, lport, 0);
1637 1.1 dyoung
1638 1.1 dyoung return vtw_export_v4(ctl, vtw, res);
1639 1.1 dyoung }
1640 1.1 dyoung
1641 1.1 dyoung /* in_pcblookup_ports assist for handling vestigial entries.
1642 1.1 dyoung */
1643 1.1 dyoung static void *
1644 1.1 dyoung tcp_init_ports_v6(const struct in6_addr *addr, u_int port, int wild)
1645 1.1 dyoung {
1646 1.1 dyoung struct tcp_ports_iterator *it = &tcp_ports_iterator_v6;
1647 1.1 dyoung
1648 1.1 dyoung bzero(it, sizeof (*it));
1649 1.1 dyoung
1650 1.1 dyoung /* Note: the reference to vtw_tcpv6[0] is fine.
1651 1.1 dyoung * We do not need per-class iteration. We just
1652 1.1 dyoung * need to get to the fat, and there is one
1653 1.1 dyoung * shared fat.
1654 1.1 dyoung */
1655 1.1 dyoung if (vtw_tcpv6[0].fat) {
1656 1.1 dyoung it->addr.v6 = *addr;
1657 1.1 dyoung it->port = port;
1658 1.1 dyoung it->wild = !!wild;
1659 1.1 dyoung it->ctl = &vtw_tcpv6[0];
1660 1.1 dyoung
1661 1.1 dyoung ++vtw_stats.look[1];
1662 1.1 dyoung }
1663 1.1 dyoung
1664 1.1 dyoung return it;
1665 1.1 dyoung }
1666 1.1 dyoung
1667 1.1 dyoung /*!\brief export an IPv6 vtw.
1668 1.1 dyoung */
1669 1.1 dyoung static int
1670 1.1 dyoung vtw_export_v6(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
1671 1.1 dyoung {
1672 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
1673 1.1 dyoung
1674 1.1 dyoung bzero(res, sizeof (*res));
1675 1.1 dyoung
1676 1.1 dyoung if (ctl && vtw) {
1677 1.1 dyoung if (!ctl->clidx && vtw->msl_class)
1678 1.1 dyoung ctl += vtw->msl_class;
1679 1.1 dyoung else
1680 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
1681 1.1 dyoung
1682 1.1 dyoung res->valid = 1;
1683 1.1 dyoung res->v4 = 0;
1684 1.1 dyoung
1685 1.1 dyoung res->faddr.v6 = v6->faddr;
1686 1.1 dyoung res->laddr.v6 = v6->laddr;
1687 1.1 dyoung res->fport = v6->fport;
1688 1.1 dyoung res->lport = v6->lport;
1689 1.1 dyoung res->vtw = vtw; // netlock held over call(s)
1690 1.1 dyoung res->ctl = ctl;
1691 1.1 dyoung
1692 1.1 dyoung res->v6only = vtw->v6only;
1693 1.1 dyoung res->reuse_addr = vtw->reuse_addr;
1694 1.1 dyoung res->reuse_port = vtw->reuse_port;
1695 1.1 dyoung
1696 1.1 dyoung res->snd_nxt = vtw->snd_nxt;
1697 1.1 dyoung res->rcv_nxt = vtw->rcv_nxt;
1698 1.1 dyoung res->rcv_wnd = vtw->rcv_wnd;
1699 1.1 dyoung res->uid = vtw->uid;
1700 1.1 dyoung }
1701 1.1 dyoung
1702 1.1 dyoung return res->valid;
1703 1.1 dyoung }
1704 1.1 dyoung
1705 1.1 dyoung static int
1706 1.1 dyoung tcp_next_port_v6(void *arg, struct vestigial_inpcb *res)
1707 1.1 dyoung {
1708 1.1 dyoung struct tcp_ports_iterator *it = arg;
1709 1.1 dyoung vtw_t *vtw = 0;
1710 1.1 dyoung
1711 1.1 dyoung if (it->ctl)
1712 1.1 dyoung vtw = vtw_next_port_v6(it);
1713 1.1 dyoung
1714 1.1 dyoung if (!vtw)
1715 1.1 dyoung it->ctl = 0;
1716 1.1 dyoung
1717 1.1 dyoung return vtw_export_v6(it->ctl, vtw, res);
1718 1.1 dyoung }
1719 1.1 dyoung
1720 1.1 dyoung static int
1721 1.1 dyoung tcp_lookup_v6(const struct in6_addr *faddr, uint16_t fport,
1722 1.1 dyoung const struct in6_addr *laddr, uint16_t lport,
1723 1.1 dyoung struct vestigial_inpcb *res)
1724 1.1 dyoung {
1725 1.1 dyoung vtw_ctl_t *ctl;
1726 1.1 dyoung vtw_t *vtw;
1727 1.1 dyoung
1728 1.1 dyoung db_trace(KTR_VTW
1729 1.1 dyoung , (res, "vtw: lookup %6A:%P %6A:%P"
1730 1.1 dyoung , db_store(faddr, sizeof (*faddr)), fport
1731 1.1 dyoung , db_store(laddr, sizeof (*laddr)), lport));
1732 1.1 dyoung
1733 1.1 dyoung vtw = vtw_lookup_hash_v6((ctl = &vtw_tcpv6[0])
1734 1.1 dyoung , faddr, fport
1735 1.1 dyoung , laddr, lport, 0);
1736 1.1 dyoung
1737 1.1 dyoung return vtw_export_v6(ctl, vtw, res);
1738 1.1 dyoung }
1739 1.1 dyoung
1740 1.1 dyoung static vestigial_hooks_t tcp_hooks = {
1741 1.1 dyoung .init_ports4 = tcp_init_ports_v4,
1742 1.1 dyoung .next_port4 = tcp_next_port_v4,
1743 1.1 dyoung .lookup4 = tcp_lookup_v4,
1744 1.1 dyoung .init_ports6 = tcp_init_ports_v6,
1745 1.1 dyoung .next_port6 = tcp_next_port_v6,
1746 1.1 dyoung .lookup6 = tcp_lookup_v6,
1747 1.1 dyoung };
1748 1.1 dyoung
1749 1.1 dyoung static bool
1750 1.1 dyoung vtw_select(int af, fatp_ctl_t **fatp, vtw_ctl_t **ctlp)
1751 1.1 dyoung {
1752 1.1 dyoung fatp_ctl_t *fat;
1753 1.1 dyoung vtw_ctl_t *ctl;
1754 1.1 dyoung
1755 1.1 dyoung switch (af) {
1756 1.1 dyoung case AF_INET:
1757 1.1 dyoung fat = &fat_tcpv4;
1758 1.1 dyoung ctl = &vtw_tcpv4[0];
1759 1.1 dyoung break;
1760 1.1 dyoung case AF_INET6:
1761 1.1 dyoung fat = &fat_tcpv6;
1762 1.1 dyoung ctl = &vtw_tcpv6[0];
1763 1.1 dyoung break;
1764 1.1 dyoung default:
1765 1.1 dyoung return false;
1766 1.1 dyoung }
1767 1.1 dyoung if (fatp != NULL)
1768 1.1 dyoung *fatp = fat;
1769 1.1 dyoung if (ctlp != NULL)
1770 1.1 dyoung *ctlp = ctl;
1771 1.1 dyoung return true;
1772 1.1 dyoung }
1773 1.1 dyoung
1774 1.1 dyoung /*!\brief initialize controlling instance
1775 1.1 dyoung */
1776 1.1 dyoung static int
1777 1.1 dyoung vtw_control_init(int af)
1778 1.1 dyoung {
1779 1.1 dyoung fatp_ctl_t *fat;
1780 1.1 dyoung vtw_ctl_t *ctl;
1781 1.1 dyoung
1782 1.1 dyoung if (!vtw_select(af, &fat, &ctl))
1783 1.1 dyoung return EAFNOSUPPORT;
1784 1.1 dyoung
1785 1.1 dyoung if (!fat->base) {
1786 1.1 dyoung uint32_t n, m;
1787 1.1 dyoung
1788 1.1 dyoung KASSERT(powerof2(tcp_vtw_entries));
1789 1.1 dyoung
1790 1.1 dyoung /* Allocate 10% more capacity in the fat pointers.
1791 1.1 dyoung * We should only need ~#hash additional based on
1792 1.1 dyoung * how they age, but TIME_WAIT assassination could cause
1793 1.1 dyoung * sparse fat pointer utilisation.
1794 1.1 dyoung */
1795 1.1 dyoung m = 512;
1796 1.1 dyoung n = 2*m + (11 * (tcp_vtw_entries / fatp_ntags())) / 10;
1797 1.1 dyoung
1798 1.1 dyoung fatp_init(fat, n, m);
1799 1.1 dyoung
1800 1.1 dyoung if (!fat->base)
1801 1.1 dyoung return ENOMEM;
1802 1.1 dyoung }
1803 1.1 dyoung
1804 1.1 dyoung if (!ctl->base.v) {
1805 1.1 dyoung
1806 1.1 dyoung vtw_init(fat, ctl, tcp_vtw_entries);
1807 1.1 dyoung if (!ctl->base.v)
1808 1.1 dyoung return ENOMEM;
1809 1.1 dyoung }
1810 1.1 dyoung
1811 1.1 dyoung return 0;
1812 1.1 dyoung }
1813 1.1 dyoung
1814 1.1 dyoung /*!\brief select controlling instance
1815 1.1 dyoung */
1816 1.1 dyoung static vtw_ctl_t *
1817 1.1 dyoung vtw_control(int af, uint32_t msl)
1818 1.1 dyoung {
1819 1.1 dyoung fatp_ctl_t *fat;
1820 1.1 dyoung vtw_ctl_t *ctl;
1821 1.1 dyoung int class = msl_to_class(msl);
1822 1.1 dyoung
1823 1.1 dyoung if (!vtw_select(af, &fat, &ctl))
1824 1.1 dyoung return NULL;
1825 1.1 dyoung
1826 1.1 dyoung if (!fat->base || !ctl->base.v)
1827 1.1 dyoung return NULL;
1828 1.1 dyoung
1829 1.1 dyoung return ctl + class;
1830 1.1 dyoung }
1831 1.1 dyoung
1832 1.1 dyoung /*!\brief add TCP pcb to vestigial timewait
1833 1.1 dyoung */
1834 1.1 dyoung int
1835 1.1 dyoung vtw_add(int af, struct tcpcb *tp)
1836 1.1 dyoung {
1837 1.1 dyoung int enable;
1838 1.1 dyoung vtw_ctl_t *ctl;
1839 1.1 dyoung vtw_t *vtw;
1840 1.1 dyoung
1841 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
1842 1.1 dyoung
1843 1.1 dyoung ctl = vtw_control(af, tp->t_msl);
1844 1.1 dyoung if (!ctl)
1845 1.1 dyoung return 0;
1846 1.1 dyoung
1847 1.1 dyoung enable = (af == AF_INET) ? tcp4_vtw_enable : tcp6_vtw_enable;
1848 1.1 dyoung
1849 1.1 dyoung vtw = vtw_alloc(ctl);
1850 1.1 dyoung
1851 1.1 dyoung if (vtw) {
1852 1.1 dyoung vtw->snd_nxt = tp->snd_nxt;
1853 1.1 dyoung vtw->rcv_nxt = tp->rcv_nxt;
1854 1.1 dyoung
1855 1.1 dyoung switch (af) {
1856 1.1 dyoung case AF_INET: {
1857 1.1 dyoung struct inpcb *inp = tp->t_inpcb;
1858 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
1859 1.1 dyoung
1860 1.1 dyoung v4->faddr = inp->inp_faddr.s_addr;
1861 1.1 dyoung v4->laddr = inp->inp_laddr.s_addr;
1862 1.1 dyoung v4->fport = inp->inp_fport;
1863 1.1 dyoung v4->lport = inp->inp_lport;
1864 1.1 dyoung
1865 1.1 dyoung vtw->reuse_port = !!(inp->inp_socket->so_options
1866 1.1 dyoung & SO_REUSEPORT);
1867 1.1 dyoung vtw->reuse_addr = !!(inp->inp_socket->so_options
1868 1.1 dyoung & SO_REUSEADDR);
1869 1.1 dyoung vtw->v6only = 0;
1870 1.1 dyoung vtw->uid = inp->inp_socket->so_uidinfo->ui_uid;
1871 1.1 dyoung
1872 1.1 dyoung vtw_inshash_v4(ctl, vtw);
1873 1.1 dyoung
1874 1.1 dyoung
1875 1.1 dyoung #ifdef VTW_DEBUG
1876 1.1 dyoung /* Immediate lookup (connected and port) to
1877 1.1 dyoung * ensure at least that works!
1878 1.1 dyoung */
1879 1.1 dyoung if (enable & 4) {
1880 1.1 dyoung KASSERT(vtw_lookup_hash_v4
1881 1.1 dyoung (ctl
1882 1.1 dyoung , inp->inp_faddr.s_addr, inp->inp_fport
1883 1.1 dyoung , inp->inp_laddr.s_addr, inp->inp_lport
1884 1.1 dyoung , 0)
1885 1.1 dyoung == vtw);
1886 1.1 dyoung KASSERT(vtw_lookup_hash_v4
1887 1.1 dyoung (ctl
1888 1.1 dyoung , inp->inp_faddr.s_addr, inp->inp_fport
1889 1.1 dyoung , inp->inp_laddr.s_addr, inp->inp_lport
1890 1.1 dyoung , 1));
1891 1.1 dyoung }
1892 1.1 dyoung /* Immediate port iterator functionality check: not wild
1893 1.1 dyoung */
1894 1.1 dyoung if (enable & 8) {
1895 1.1 dyoung struct tcp_ports_iterator *it;
1896 1.1 dyoung struct vestigial_inpcb res;
1897 1.1 dyoung int cnt = 0;
1898 1.1 dyoung
1899 1.1 dyoung it = tcp_init_ports_v4(inp->inp_laddr
1900 1.1 dyoung , inp->inp_lport, 0);
1901 1.1 dyoung
1902 1.1 dyoung while (tcp_next_port_v4(it, &res)) {
1903 1.1 dyoung ++cnt;
1904 1.1 dyoung }
1905 1.1 dyoung KASSERT(cnt);
1906 1.1 dyoung }
1907 1.1 dyoung /* Immediate port iterator functionality check: wild
1908 1.1 dyoung */
1909 1.1 dyoung if (enable & 16) {
1910 1.1 dyoung struct tcp_ports_iterator *it;
1911 1.1 dyoung struct vestigial_inpcb res;
1912 1.1 dyoung struct in_addr any;
1913 1.1 dyoung int cnt = 0;
1914 1.1 dyoung
1915 1.1 dyoung any.s_addr = htonl(INADDR_ANY);
1916 1.1 dyoung
1917 1.1 dyoung it = tcp_init_ports_v4(any, inp->inp_lport, 1);
1918 1.1 dyoung
1919 1.1 dyoung while (tcp_next_port_v4(it, &res)) {
1920 1.1 dyoung ++cnt;
1921 1.1 dyoung }
1922 1.1 dyoung KASSERT(cnt);
1923 1.1 dyoung }
1924 1.1 dyoung #endif /* VTW_DEBUG */
1925 1.1 dyoung break;
1926 1.1 dyoung }
1927 1.1 dyoung
1928 1.1 dyoung case AF_INET6: {
1929 1.1 dyoung struct in6pcb *inp = tp->t_in6pcb;
1930 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
1931 1.1 dyoung
1932 1.1 dyoung v6->faddr = inp->in6p_faddr;
1933 1.1 dyoung v6->laddr = inp->in6p_laddr;
1934 1.1 dyoung v6->fport = inp->in6p_fport;
1935 1.1 dyoung v6->lport = inp->in6p_lport;
1936 1.1 dyoung
1937 1.1 dyoung vtw->reuse_port = !!(inp->in6p_socket->so_options
1938 1.1 dyoung & SO_REUSEPORT);
1939 1.1 dyoung vtw->reuse_addr = !!(inp->in6p_socket->so_options
1940 1.1 dyoung & SO_REUSEADDR);
1941 1.1 dyoung vtw->v6only = !!(inp->in6p_flags
1942 1.1 dyoung & IN6P_IPV6_V6ONLY);
1943 1.1 dyoung vtw->uid = inp->in6p_socket->so_uidinfo->ui_uid;
1944 1.1 dyoung
1945 1.1 dyoung vtw_inshash_v6(ctl, vtw);
1946 1.1 dyoung #ifdef VTW_DEBUG
1947 1.1 dyoung /* Immediate lookup (connected and port) to
1948 1.1 dyoung * ensure at least that works!
1949 1.1 dyoung */
1950 1.1 dyoung if (enable & 4) {
1951 1.1 dyoung KASSERT(vtw_lookup_hash_v6(ctl
1952 1.1 dyoung , &inp->in6p_faddr, inp->in6p_fport
1953 1.1 dyoung , &inp->in6p_laddr, inp->in6p_lport
1954 1.1 dyoung , 0)
1955 1.1 dyoung == vtw);
1956 1.1 dyoung KASSERT(vtw_lookup_hash_v6
1957 1.1 dyoung (ctl
1958 1.1 dyoung , &inp->in6p_faddr, inp->in6p_fport
1959 1.1 dyoung , &inp->in6p_laddr, inp->in6p_lport
1960 1.1 dyoung , 1));
1961 1.1 dyoung }
1962 1.1 dyoung /* Immediate port iterator functionality check: not wild
1963 1.1 dyoung */
1964 1.1 dyoung if (enable & 8) {
1965 1.1 dyoung struct tcp_ports_iterator *it;
1966 1.1 dyoung struct vestigial_inpcb res;
1967 1.1 dyoung int cnt = 0;
1968 1.1 dyoung
1969 1.1 dyoung it = tcp_init_ports_v6(&inp->in6p_laddr
1970 1.1 dyoung , inp->in6p_lport, 0);
1971 1.1 dyoung
1972 1.1 dyoung while (tcp_next_port_v6(it, &res)) {
1973 1.1 dyoung ++cnt;
1974 1.1 dyoung }
1975 1.1 dyoung KASSERT(cnt);
1976 1.1 dyoung }
1977 1.1 dyoung /* Immediate port iterator functionality check: wild
1978 1.1 dyoung */
1979 1.1 dyoung if (enable & 16) {
1980 1.1 dyoung struct tcp_ports_iterator *it;
1981 1.1 dyoung struct vestigial_inpcb res;
1982 1.1 dyoung static struct in6_addr any = IN6ADDR_ANY_INIT;
1983 1.1 dyoung int cnt = 0;
1984 1.1 dyoung
1985 1.1 dyoung it = tcp_init_ports_v6(&any
1986 1.1 dyoung , inp->in6p_lport, 1);
1987 1.1 dyoung
1988 1.1 dyoung while (tcp_next_port_v6(it, &res)) {
1989 1.1 dyoung ++cnt;
1990 1.1 dyoung }
1991 1.1 dyoung KASSERT(cnt);
1992 1.1 dyoung }
1993 1.1 dyoung #endif /* VTW_DEBUG */
1994 1.1 dyoung break;
1995 1.1 dyoung }
1996 1.1 dyoung }
1997 1.1 dyoung
1998 1.1 dyoung tcp_canceltimers(tp);
1999 1.1 dyoung tp = tcp_close(tp);
2000 1.1 dyoung KASSERT(!tp);
2001 1.1 dyoung
2002 1.1 dyoung return 1;
2003 1.1 dyoung }
2004 1.1 dyoung
2005 1.1 dyoung return 0;
2006 1.1 dyoung }
2007 1.1 dyoung
2008 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2009 1.1 dyoung */
2010 1.1 dyoung static void
2011 1.1 dyoung vtw_restart_v4(vestigial_inpcb_t *vp)
2012 1.1 dyoung {
2013 1.1 dyoung vtw_v4_t copy = *(vtw_v4_t*)vp->vtw;
2014 1.1 dyoung vtw_t *vtw;
2015 1.1 dyoung vtw_t *cp = ©.common;
2016 1.1 dyoung vtw_ctl_t *ctl;
2017 1.1 dyoung
2018 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
2019 1.1 dyoung
2020 1.1 dyoung db_trace(KTR_VTW
2021 1.1 dyoung , (vp->vtw, "vtw: restart %A:%P %A:%P"
2022 1.1 dyoung , vp->faddr.v4.s_addr, vp->fport
2023 1.1 dyoung , vp->laddr.v4.s_addr, vp->lport));
2024 1.1 dyoung
2025 1.1 dyoung /* Class might have changed, so have a squiz.
2026 1.1 dyoung */
2027 1.1 dyoung ctl = vtw_control(AF_INET, class_to_msl(cp->msl_class));
2028 1.1 dyoung vtw = vtw_alloc(ctl);
2029 1.1 dyoung
2030 1.1 dyoung if (vtw) {
2031 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
2032 1.1 dyoung
2033 1.1 dyoung /* Safe now to unhash the old entry
2034 1.1 dyoung */
2035 1.1 dyoung vtw_del(vp->ctl, vp->vtw);
2036 1.1 dyoung
2037 1.1 dyoung vtw->snd_nxt = cp->snd_nxt;
2038 1.1 dyoung vtw->rcv_nxt = cp->rcv_nxt;
2039 1.1 dyoung
2040 1.1 dyoung v4->faddr = copy.faddr;
2041 1.1 dyoung v4->laddr = copy.laddr;
2042 1.1 dyoung v4->fport = copy.fport;
2043 1.1 dyoung v4->lport = copy.lport;
2044 1.1 dyoung
2045 1.1 dyoung vtw->reuse_port = cp->reuse_port;
2046 1.1 dyoung vtw->reuse_addr = cp->reuse_addr;
2047 1.1 dyoung vtw->v6only = 0;
2048 1.1 dyoung vtw->uid = cp->uid;
2049 1.1 dyoung
2050 1.1 dyoung vtw_inshash_v4(ctl, vtw);
2051 1.1 dyoung }
2052 1.1 dyoung
2053 1.1 dyoung vp->valid = 0;
2054 1.1 dyoung }
2055 1.1 dyoung
2056 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2057 1.1 dyoung */
2058 1.1 dyoung static void
2059 1.1 dyoung vtw_restart_v6(vestigial_inpcb_t *vp)
2060 1.1 dyoung {
2061 1.1 dyoung vtw_v6_t copy = *(vtw_v6_t*)vp->vtw;
2062 1.1 dyoung vtw_t *vtw;
2063 1.1 dyoung vtw_t *cp = ©.common;
2064 1.1 dyoung vtw_ctl_t *ctl;
2065 1.1 dyoung
2066 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
2067 1.1 dyoung
2068 1.1 dyoung db_trace(KTR_VTW
2069 1.1 dyoung , (vp->vtw, "vtw: restart %6A:%P %6A:%P"
2070 1.1 dyoung , db_store(&vp->faddr.v6, sizeof (vp->faddr.v6))
2071 1.1 dyoung , vp->fport
2072 1.1 dyoung , db_store(&vp->laddr.v6, sizeof (vp->laddr.v6))
2073 1.1 dyoung , vp->lport));
2074 1.1 dyoung
2075 1.1 dyoung /* Class might have changed, so have a squiz.
2076 1.1 dyoung */
2077 1.1 dyoung ctl = vtw_control(AF_INET6, class_to_msl(cp->msl_class));
2078 1.1 dyoung vtw = vtw_alloc(ctl);
2079 1.1 dyoung
2080 1.1 dyoung if (vtw) {
2081 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
2082 1.1 dyoung
2083 1.1 dyoung /* Safe now to unhash the old entry
2084 1.1 dyoung */
2085 1.1 dyoung vtw_del(vp->ctl, vp->vtw);
2086 1.1 dyoung
2087 1.1 dyoung vtw->snd_nxt = cp->snd_nxt;
2088 1.1 dyoung vtw->rcv_nxt = cp->rcv_nxt;
2089 1.1 dyoung
2090 1.1 dyoung v6->faddr = copy.faddr;
2091 1.1 dyoung v6->laddr = copy.laddr;
2092 1.1 dyoung v6->fport = copy.fport;
2093 1.1 dyoung v6->lport = copy.lport;
2094 1.1 dyoung
2095 1.1 dyoung vtw->reuse_port = cp->reuse_port;
2096 1.1 dyoung vtw->reuse_addr = cp->reuse_addr;
2097 1.1 dyoung vtw->v6only = cp->v6only;
2098 1.1 dyoung vtw->uid = cp->uid;
2099 1.1 dyoung
2100 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2101 1.1 dyoung }
2102 1.1 dyoung
2103 1.1 dyoung vp->valid = 0;
2104 1.1 dyoung }
2105 1.1 dyoung
2106 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2107 1.1 dyoung */
2108 1.1 dyoung void
2109 1.1 dyoung vtw_restart(vestigial_inpcb_t *vp)
2110 1.1 dyoung {
2111 1.1 dyoung if (!vp || !vp->valid)
2112 1.1 dyoung return;
2113 1.1 dyoung
2114 1.1 dyoung if (vp->v4)
2115 1.1 dyoung vtw_restart_v4(vp);
2116 1.1 dyoung else
2117 1.1 dyoung vtw_restart_v6(vp);
2118 1.1 dyoung }
2119 1.1 dyoung
2120 1.1 dyoung int
2121 1.1 dyoung vtw_earlyinit(void)
2122 1.1 dyoung {
2123 1.1 dyoung int rc;
2124 1.1 dyoung
2125 1.1 dyoung if (!tcp_vtw_was_enabled) {
2126 1.1 dyoung int i;
2127 1.1 dyoung
2128 1.1 dyoung /* This guarantees is timer ticks until we no longer need them.
2129 1.1 dyoung */
2130 1.1 dyoung tcp_vtw_was_enabled = 1;
2131 1.1 dyoung
2132 1.1 dyoung callout_init(&vtw_cs, 0);
2133 1.1 dyoung callout_setfunc(&vtw_cs, vtw_tick, 0);
2134 1.1 dyoung callout_schedule(&vtw_cs, hz / 5);
2135 1.1 dyoung
2136 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2137 1.1 dyoung vtw_tcpv4[i].is_v4 = 1;
2138 1.1 dyoung vtw_tcpv6[i].is_v6 = 1;
2139 1.1 dyoung }
2140 1.1 dyoung
2141 1.1 dyoung tcbtable.vestige = &tcp_hooks;
2142 1.1 dyoung }
2143 1.1 dyoung
2144 1.1 dyoung if ((rc = vtw_control_init(AF_INET)) != 0 ||
2145 1.1 dyoung (rc = vtw_control_init(AF_INET6)) != 0)
2146 1.1 dyoung return rc;
2147 1.1 dyoung
2148 1.1 dyoung return 0;
2149 1.1 dyoung }
2150 1.1 dyoung
2151 1.1 dyoung #ifdef VTW_DEBUG
2152 1.1 dyoung #include <sys/syscallargs.h>
2153 1.1 dyoung #include <sys/sysctl.h>
2154 1.1 dyoung
2155 1.1 dyoung /*!\brief add lalp, fafp entries for debug
2156 1.1 dyoung */
2157 1.1 dyoung int
2158 1.1 dyoung vtw_debug_add(int af, sin_either_t *la, sin_either_t *fa, int msl, int class)
2159 1.1 dyoung {
2160 1.1 dyoung vtw_ctl_t *ctl;
2161 1.1 dyoung vtw_t *vtw;
2162 1.1 dyoung
2163 1.1 dyoung ctl = vtw_control(af, msl ? msl : class_to_msl(class));
2164 1.1 dyoung if (!ctl)
2165 1.1 dyoung return 0;
2166 1.1 dyoung
2167 1.1 dyoung vtw = vtw_alloc(ctl);
2168 1.1 dyoung
2169 1.1 dyoung if (vtw) {
2170 1.1 dyoung vtw->snd_nxt = 0;
2171 1.1 dyoung vtw->rcv_nxt = 0;
2172 1.1 dyoung
2173 1.1 dyoung switch (af) {
2174 1.1 dyoung case AF_INET: {
2175 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
2176 1.1 dyoung
2177 1.1 dyoung v4->faddr = fa->sin_addr.v4.s_addr;
2178 1.1 dyoung v4->laddr = la->sin_addr.v4.s_addr;
2179 1.1 dyoung v4->fport = fa->sin_port;
2180 1.1 dyoung v4->lport = la->sin_port;
2181 1.1 dyoung
2182 1.1 dyoung vtw->reuse_port = 1;
2183 1.1 dyoung vtw->reuse_addr = 1;
2184 1.1 dyoung vtw->v6only = 0;
2185 1.1 dyoung vtw->uid = 0;
2186 1.1 dyoung
2187 1.1 dyoung vtw_inshash_v4(ctl, vtw);
2188 1.1 dyoung break;
2189 1.1 dyoung }
2190 1.1 dyoung
2191 1.1 dyoung case AF_INET6: {
2192 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
2193 1.1 dyoung
2194 1.1 dyoung v6->faddr = fa->sin_addr.v6;
2195 1.1 dyoung v6->laddr = la->sin_addr.v6;
2196 1.1 dyoung
2197 1.1 dyoung v6->fport = fa->sin_port;
2198 1.1 dyoung v6->lport = la->sin_port;
2199 1.1 dyoung
2200 1.1 dyoung vtw->reuse_port = 1;
2201 1.1 dyoung vtw->reuse_addr = 1;
2202 1.1 dyoung vtw->v6only = 0;
2203 1.1 dyoung vtw->uid = 0;
2204 1.1 dyoung
2205 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2206 1.1 dyoung break;
2207 1.1 dyoung }
2208 1.1 dyoung
2209 1.1 dyoung default:
2210 1.1 dyoung break;
2211 1.1 dyoung }
2212 1.1 dyoung
2213 1.1 dyoung return 1;
2214 1.1 dyoung }
2215 1.1 dyoung
2216 1.1 dyoung return 0;
2217 1.1 dyoung }
2218 1.1 dyoung
2219 1.1 dyoung static int vtw_syscall = 0;
2220 1.1 dyoung
2221 1.1 dyoung static int
2222 1.1 dyoung vtw_debug_process(vtw_sysargs_t *ap)
2223 1.1 dyoung {
2224 1.1 dyoung struct vestigial_inpcb vestige;
2225 1.1 dyoung int rc = 0;
2226 1.1 dyoung
2227 1.1 dyoung mutex_enter(softnet_lock);
2228 1.1 dyoung
2229 1.1 dyoung switch (ap->op) {
2230 1.1 dyoung case 0: // insert
2231 1.1 dyoung vtw_debug_add(ap->la.sin_family
2232 1.1 dyoung , &ap->la
2233 1.1 dyoung , &ap->fa
2234 1.1 dyoung , TCPTV_MSL
2235 1.1 dyoung , 0);
2236 1.1 dyoung break;
2237 1.1 dyoung
2238 1.1 dyoung case 1: // lookup
2239 1.1 dyoung case 2: // restart
2240 1.1 dyoung switch (ap->la.sin_family) {
2241 1.1 dyoung case AF_INET:
2242 1.1 dyoung if (tcp_lookup_v4(ap->fa.sin_addr.v4, ap->fa.sin_port,
2243 1.1 dyoung ap->la.sin_addr.v4, ap->la.sin_port,
2244 1.1 dyoung &vestige)) {
2245 1.1 dyoung if (ap->op == 2) {
2246 1.1 dyoung vtw_restart(&vestige);
2247 1.1 dyoung }
2248 1.1 dyoung rc = 0;
2249 1.1 dyoung } else
2250 1.1 dyoung rc = ESRCH;
2251 1.1 dyoung break;
2252 1.1 dyoung
2253 1.1 dyoung case AF_INET6:
2254 1.1 dyoung if (tcp_lookup_v6(&ap->fa.sin_addr.v6, ap->fa.sin_port,
2255 1.1 dyoung &ap->la.sin_addr.v6, ap->la.sin_port,
2256 1.1 dyoung &vestige)) {
2257 1.1 dyoung if (ap->op == 2) {
2258 1.1 dyoung vtw_restart(&vestige);
2259 1.1 dyoung }
2260 1.1 dyoung rc = 0;
2261 1.1 dyoung } else
2262 1.1 dyoung rc = ESRCH;
2263 1.1 dyoung break;
2264 1.1 dyoung default:
2265 1.1 dyoung rc = EINVAL;
2266 1.1 dyoung }
2267 1.1 dyoung break;
2268 1.1 dyoung
2269 1.1 dyoung default:
2270 1.1 dyoung rc = EINVAL;
2271 1.1 dyoung }
2272 1.1 dyoung
2273 1.1 dyoung mutex_exit(softnet_lock);
2274 1.1 dyoung return rc;
2275 1.1 dyoung }
2276 1.1 dyoung
2277 1.1 dyoung struct sys_vtw_args {
2278 1.1 dyoung syscallarg(const vtw_sysargs_t *) req;
2279 1.1 dyoung syscallarg(size_t) len;
2280 1.1 dyoung };
2281 1.1 dyoung
2282 1.1 dyoung static int
2283 1.1 dyoung vtw_sys(struct lwp *l, const void *_, register_t *retval)
2284 1.1 dyoung {
2285 1.1 dyoung const struct sys_vtw_args *uap = _;
2286 1.1 dyoung void *buf;
2287 1.1 dyoung int rc;
2288 1.1 dyoung size_t len = SCARG(uap, len);
2289 1.1 dyoung
2290 1.1 dyoung if (len != sizeof (vtw_sysargs_t))
2291 1.1 dyoung return EINVAL;
2292 1.1 dyoung
2293 1.1 dyoung buf = kmem_alloc(len, KM_SLEEP);
2294 1.1 dyoung if (!buf)
2295 1.1 dyoung return ENOMEM;
2296 1.1 dyoung
2297 1.1 dyoung rc = copyin(SCARG(uap, req), buf, len);
2298 1.1 dyoung if (!rc) {
2299 1.1 dyoung rc = vtw_debug_process(buf);
2300 1.1 dyoung }
2301 1.1 dyoung kmem_free(buf, len);
2302 1.1 dyoung
2303 1.1 dyoung return rc;
2304 1.1 dyoung }
2305 1.1 dyoung
2306 1.1 dyoung static void
2307 1.1 dyoung vtw_sanity_check(void)
2308 1.1 dyoung {
2309 1.1 dyoung vtw_ctl_t *ctl;
2310 1.1 dyoung vtw_t *vtw;
2311 1.1 dyoung int i;
2312 1.1 dyoung int n;
2313 1.1 dyoung
2314 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2315 1.1 dyoung ctl = &vtw_tcpv4[i];
2316 1.1 dyoung
2317 1.1 dyoung if (!ctl->base.v || ctl->nalloc)
2318 1.1 dyoung continue;
2319 1.1 dyoung
2320 1.1 dyoung for (n = 0, vtw = ctl->base.v; ; ) {
2321 1.1 dyoung ++n;
2322 1.1 dyoung vtw = vtw_next(ctl, vtw);
2323 1.1 dyoung if (vtw == ctl->base.v)
2324 1.1 dyoung break;
2325 1.1 dyoung }
2326 1.1 dyoung db_trace(KTR_VTW
2327 1.1 dyoung , (ctl, "sanity: class %x n %x nfree %x"
2328 1.1 dyoung , i, n, ctl->nfree));
2329 1.1 dyoung
2330 1.1 dyoung KASSERT(n == ctl->nfree);
2331 1.1 dyoung }
2332 1.1 dyoung
2333 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2334 1.1 dyoung ctl = &vtw_tcpv6[i];
2335 1.1 dyoung
2336 1.1 dyoung if (!ctl->base.v || ctl->nalloc)
2337 1.1 dyoung continue;
2338 1.1 dyoung
2339 1.1 dyoung for (n = 0, vtw = ctl->base.v; ; ) {
2340 1.1 dyoung ++n;
2341 1.1 dyoung vtw = vtw_next(ctl, vtw);
2342 1.1 dyoung if (vtw == ctl->base.v)
2343 1.1 dyoung break;
2344 1.1 dyoung }
2345 1.1 dyoung db_trace(KTR_VTW
2346 1.1 dyoung , (ctl, "sanity: class %x n %x nfree %x"
2347 1.1 dyoung , i, n, ctl->nfree));
2348 1.1 dyoung KASSERT(n == ctl->nfree);
2349 1.1 dyoung }
2350 1.1 dyoung }
2351 1.1 dyoung
2352 1.1 dyoung /*!\brief Initialise debug support.
2353 1.1 dyoung */
2354 1.1 dyoung static void
2355 1.1 dyoung vtw_debug_init(void)
2356 1.1 dyoung {
2357 1.1 dyoung int i;
2358 1.1 dyoung
2359 1.1 dyoung vtw_sanity_check();
2360 1.1 dyoung
2361 1.1 dyoung if (vtw_syscall)
2362 1.1 dyoung return;
2363 1.1 dyoung
2364 1.1 dyoung for (i = 511; i; --i) {
2365 1.1 dyoung if (sysent[i].sy_call == sys_nosys) {
2366 1.1 dyoung sysent[i].sy_call = vtw_sys;
2367 1.1 dyoung sysent[i].sy_narg = 2;
2368 1.1 dyoung sysent[i].sy_argsize = sizeof (struct sys_vtw_args);
2369 1.1 dyoung sysent[i].sy_flags = 0;
2370 1.1 dyoung
2371 1.1 dyoung vtw_syscall = i;
2372 1.1 dyoung break;
2373 1.1 dyoung }
2374 1.1 dyoung }
2375 1.1 dyoung if (i) {
2376 1.1 dyoung const struct sysctlnode *node;
2377 1.1 dyoung uint32_t flags;
2378 1.1 dyoung
2379 1.1 dyoung flags = sysctl_root.sysctl_flags;
2380 1.1 dyoung
2381 1.1 dyoung sysctl_root.sysctl_flags |= CTLFLAG_READWRITE;
2382 1.1 dyoung sysctl_root.sysctl_flags &= ~CTLFLAG_PERMANENT;
2383 1.1 dyoung
2384 1.1 dyoung sysctl_createv(0, 0, 0, &node,
2385 1.1 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE,
2386 1.1 dyoung "koff",
2387 1.1 dyoung SYSCTL_DESCR("Kernel Obscure Feature Finder"),
2388 1.1 dyoung 0, 0, 0, 0, CTL_CREATE, CTL_EOL);
2389 1.1 dyoung
2390 1.1 dyoung if (!node) {
2391 1.1 dyoung sysctl_createv(0, 0, 0, &node,
2392 1.1 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE,
2393 1.1 dyoung "koffka",
2394 1.1 dyoung SYSCTL_DESCR("The Real(tm) Kernel"
2395 1.1 dyoung " Obscure Feature Finder"),
2396 1.1 dyoung 0, 0, 0, 0, CTL_CREATE, CTL_EOL);
2397 1.1 dyoung }
2398 1.1 dyoung if (node) {
2399 1.1 dyoung sysctl_createv(0, 0, 0, 0,
2400 1.1 dyoung CTLFLAG_PERMANENT|CTLFLAG_READONLY,
2401 1.1 dyoung CTLTYPE_INT, "vtw_debug_syscall",
2402 1.1 dyoung SYSCTL_DESCR("vtw debug"
2403 1.1 dyoung " system call number"),
2404 1.1 dyoung 0, 0, &vtw_syscall, 0, node->sysctl_num,
2405 1.1 dyoung CTL_CREATE, CTL_EOL);
2406 1.1 dyoung }
2407 1.1 dyoung sysctl_root.sysctl_flags = flags;
2408 1.1 dyoung }
2409 1.1 dyoung }
2410 1.1 dyoung #else /* !VTW_DEBUG */
2411 1.1 dyoung static void
2412 1.1 dyoung vtw_debug_init(void)
2413 1.1 dyoung {
2414 1.1 dyoung return;
2415 1.1 dyoung }
2416 1.1 dyoung #endif /* !VTW_DEBUG */
2417