tcp_vtw.c revision 1.18.8.1 1 1.1 dyoung /*
2 1.1 dyoung * Copyright (c) 2011 The NetBSD Foundation, Inc.
3 1.1 dyoung * All rights reserved.
4 1.1 dyoung *
5 1.1 dyoung * This code is derived from software contributed to The NetBSD Foundation
6 1.1 dyoung * by Coyote Point Systems, Inc.
7 1.1 dyoung *
8 1.1 dyoung * Redistribution and use in source and binary forms, with or without
9 1.1 dyoung * modification, are permitted provided that the following conditions
10 1.1 dyoung * are met:
11 1.1 dyoung * 1. Redistributions of source code must retain the above copyright
12 1.1 dyoung * notice, this list of conditions and the following disclaimer.
13 1.1 dyoung * 2. Redistributions in binary form must reproduce the above copyright
14 1.1 dyoung * notice, this list of conditions and the following disclaimer in the
15 1.1 dyoung * documentation and/or other materials provided with the distribution.
16 1.1 dyoung *
17 1.1 dyoung * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 1.1 dyoung * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 1.1 dyoung * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 1.1 dyoung * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 1.1 dyoung * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 1.1 dyoung * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 1.1 dyoung * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 1.1 dyoung * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 1.1 dyoung * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 1.1 dyoung * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 1.1 dyoung * POSSIBILITY OF SUCH DAMAGE.
28 1.1 dyoung */
29 1.9 yamt
30 1.9 yamt /*
31 1.9 yamt * Reduces the resources demanded by TCP sessions in TIME_WAIT-state using
32 1.9 yamt * methods called Vestigial Time-Wait (VTW) and Maximum Segment Lifetime
33 1.9 yamt * Truncation (MSLT).
34 1.9 yamt *
35 1.9 yamt * MSLT and VTW were contributed by Coyote Point Systems, Inc.
36 1.9 yamt *
37 1.9 yamt * Even after a TCP session enters the TIME_WAIT state, its corresponding
38 1.9 yamt * socket and protocol control blocks (PCBs) stick around until the TCP
39 1.9 yamt * Maximum Segment Lifetime (MSL) expires. On a host whose workload
40 1.9 yamt * necessarily creates and closes down many TCP sockets, the sockets & PCBs
41 1.9 yamt * for TCP sessions in TIME_WAIT state amount to many megabytes of dead
42 1.9 yamt * weight in RAM.
43 1.9 yamt *
44 1.9 yamt * Maximum Segment Lifetimes Truncation (MSLT) assigns each TCP session to
45 1.9 yamt * a class based on the nearness of the peer. Corresponding to each class
46 1.9 yamt * is an MSL, and a session uses the MSL of its class. The classes are
47 1.9 yamt * loopback (local host equals remote host), local (local host and remote
48 1.9 yamt * host are on the same link/subnet), and remote (local host and remote
49 1.9 yamt * host communicate via one or more gateways). Classes corresponding to
50 1.9 yamt * nearer peers have lower MSLs by default: 2 seconds for loopback, 10
51 1.9 yamt * seconds for local, 60 seconds for remote. Loopback and local sessions
52 1.9 yamt * expire more quickly when MSLT is used.
53 1.9 yamt *
54 1.9 yamt * Vestigial Time-Wait (VTW) replaces a TIME_WAIT session's PCB/socket
55 1.9 yamt * dead weight with a compact representation of the session, called a
56 1.9 yamt * "vestigial PCB". VTW data structures are designed to be very fast and
57 1.9 yamt * memory-efficient: for fast insertion and lookup of vestigial PCBs,
58 1.9 yamt * the PCBs are stored in a hash table that is designed to minimize the
59 1.9 yamt * number of cacheline visits per lookup/insertion. The memory both
60 1.9 yamt * for vestigial PCBs and for elements of the PCB hashtable come from
61 1.9 yamt * fixed-size pools, and linked data structures exploit this to conserve
62 1.9 yamt * memory by representing references with a narrow index/offset from the
63 1.9 yamt * start of a pool instead of a pointer. When space for new vestigial PCBs
64 1.9 yamt * runs out, VTW makes room by discarding old vestigial PCBs, oldest first.
65 1.9 yamt * VTW cooperates with MSLT.
66 1.9 yamt *
67 1.9 yamt * It may help to think of VTW as a "FIN cache" by analogy to the SYN
68 1.9 yamt * cache.
69 1.9 yamt *
70 1.9 yamt * A 2.8-GHz Pentium 4 running a test workload that creates TIME_WAIT
71 1.9 yamt * sessions as fast as it can is approximately 17% idle when VTW is active
72 1.9 yamt * versus 0% idle when VTW is inactive. It has 103 megabytes more free RAM
73 1.9 yamt * when VTW is active (approximately 64k vestigial PCBs are created) than
74 1.9 yamt * when it is inactive.
75 1.9 yamt */
76 1.9 yamt
77 1.1 dyoung #include <sys/cdefs.h>
78 1.1 dyoung
79 1.14 pooka #ifdef _KERNEL_OPT
80 1.1 dyoung #include "opt_ddb.h"
81 1.1 dyoung #include "opt_inet.h"
82 1.1 dyoung #include "opt_inet_csum.h"
83 1.1 dyoung #include "opt_tcp_debug.h"
84 1.14 pooka #endif
85 1.1 dyoung
86 1.1 dyoung #include <sys/param.h>
87 1.1 dyoung #include <sys/systm.h>
88 1.1 dyoung #include <sys/kmem.h>
89 1.1 dyoung #include <sys/mbuf.h>
90 1.1 dyoung #include <sys/protosw.h>
91 1.1 dyoung #include <sys/socket.h>
92 1.1 dyoung #include <sys/socketvar.h>
93 1.1 dyoung #include <sys/errno.h>
94 1.1 dyoung #include <sys/syslog.h>
95 1.1 dyoung #include <sys/pool.h>
96 1.1 dyoung #include <sys/domain.h>
97 1.1 dyoung #include <sys/kernel.h>
98 1.1 dyoung #include <net/if.h>
99 1.1 dyoung #include <net/if_types.h>
100 1.1 dyoung
101 1.1 dyoung #include <netinet/in.h>
102 1.1 dyoung #include <netinet/in_systm.h>
103 1.1 dyoung #include <netinet/ip.h>
104 1.1 dyoung #include <netinet/in_pcb.h>
105 1.1 dyoung #include <netinet/in_var.h>
106 1.1 dyoung #include <netinet/ip_var.h>
107 1.1 dyoung #include <netinet/in_offload.h>
108 1.1 dyoung #include <netinet/ip6.h>
109 1.1 dyoung #include <netinet6/ip6_var.h>
110 1.1 dyoung #include <netinet6/in6_pcb.h>
111 1.1 dyoung #include <netinet6/ip6_var.h>
112 1.1 dyoung #include <netinet6/in6_var.h>
113 1.1 dyoung #include <netinet/icmp6.h>
114 1.1 dyoung
115 1.1 dyoung #include <netinet/tcp.h>
116 1.1 dyoung #include <netinet/tcp_fsm.h>
117 1.1 dyoung #include <netinet/tcp_seq.h>
118 1.1 dyoung #include <netinet/tcp_timer.h>
119 1.1 dyoung #include <netinet/tcp_var.h>
120 1.1 dyoung #include <netinet/tcp_private.h>
121 1.1 dyoung
122 1.1 dyoung #include <netinet/tcp_vtw.h>
123 1.1 dyoung
124 1.18.8.1 pgoyette __KERNEL_RCSID(0, "$NetBSD: tcp_vtw.c,v 1.18.8.1 2018/05/21 04:36:16 pgoyette Exp $");
125 1.1 dyoung
126 1.1 dyoung #define db_trace(__a, __b) do { } while (/*CONSTCOND*/0)
127 1.1 dyoung
128 1.1 dyoung static void vtw_debug_init(void);
129 1.1 dyoung
130 1.1 dyoung fatp_ctl_t fat_tcpv4;
131 1.1 dyoung fatp_ctl_t fat_tcpv6;
132 1.1 dyoung vtw_ctl_t vtw_tcpv4[VTW_NCLASS];
133 1.1 dyoung vtw_ctl_t vtw_tcpv6[VTW_NCLASS];
134 1.1 dyoung vtw_stats_t vtw_stats;
135 1.1 dyoung
136 1.1 dyoung /* We provide state for the lookup_ports iterator.
137 1.1 dyoung * As currently we are netlock-protected, there is one.
138 1.1 dyoung * If we were finer-grain, we would have one per CPU.
139 1.1 dyoung * I do not want to be in the business of alloc/free.
140 1.1 dyoung * The best alternate would be allocate on the caller's
141 1.1 dyoung * stack, but that would require them to know the struct,
142 1.1 dyoung * or at least the size.
143 1.1 dyoung * See how she goes.
144 1.1 dyoung */
145 1.1 dyoung struct tcp_ports_iterator {
146 1.1 dyoung union {
147 1.1 dyoung struct in_addr v4;
148 1.1 dyoung struct in6_addr v6;
149 1.1 dyoung } addr;
150 1.1 dyoung u_int port;
151 1.1 dyoung
152 1.1 dyoung uint32_t wild : 1;
153 1.1 dyoung
154 1.1 dyoung vtw_ctl_t *ctl;
155 1.1 dyoung fatp_t *fp;
156 1.1 dyoung
157 1.1 dyoung uint16_t slot_idx;
158 1.1 dyoung uint16_t ctl_idx;
159 1.1 dyoung };
160 1.1 dyoung
161 1.1 dyoung static struct tcp_ports_iterator tcp_ports_iterator_v4;
162 1.1 dyoung static struct tcp_ports_iterator tcp_ports_iterator_v6;
163 1.1 dyoung
164 1.1 dyoung static int vtw_age(vtw_ctl_t *, struct timeval *);
165 1.1 dyoung
166 1.1 dyoung /*!\brief allocate a fat pointer from a collection.
167 1.1 dyoung */
168 1.1 dyoung static fatp_t *
169 1.1 dyoung fatp_alloc(fatp_ctl_t *fat)
170 1.1 dyoung {
171 1.1 dyoung fatp_t *fp = 0;
172 1.1 dyoung
173 1.1 dyoung if (fat->nfree) {
174 1.1 dyoung fp = fat->free;
175 1.1 dyoung if (fp) {
176 1.1 dyoung fat->free = fatp_next(fat, fp);
177 1.1 dyoung --fat->nfree;
178 1.1 dyoung ++fat->nalloc;
179 1.1 dyoung fp->nxt = 0;
180 1.1 dyoung
181 1.1 dyoung KASSERT(!fp->inuse);
182 1.1 dyoung }
183 1.1 dyoung }
184 1.1 dyoung
185 1.1 dyoung return fp;
186 1.1 dyoung }
187 1.1 dyoung
188 1.1 dyoung /*!\brief free a fat pointer.
189 1.1 dyoung */
190 1.1 dyoung static void
191 1.1 dyoung fatp_free(fatp_ctl_t *fat, fatp_t *fp)
192 1.1 dyoung {
193 1.1 dyoung if (fp) {
194 1.1 dyoung KASSERT(!fp->inuse);
195 1.1 dyoung KASSERT(!fp->nxt);
196 1.1 dyoung
197 1.1 dyoung fp->nxt = fatp_index(fat, fat->free);
198 1.1 dyoung fat->free = fp;
199 1.1 dyoung
200 1.1 dyoung ++fat->nfree;
201 1.1 dyoung --fat->nalloc;
202 1.1 dyoung }
203 1.1 dyoung }
204 1.1 dyoung
205 1.1 dyoung /*!\brief initialise a collection of fat pointers.
206 1.1 dyoung *
207 1.1 dyoung *\param n # hash buckets
208 1.1 dyoung *\param m total # fat pointers to allocate
209 1.1 dyoung *
210 1.1 dyoung * We allocate 2x as much, as we have two hashes: full and lport only.
211 1.1 dyoung */
212 1.1 dyoung static void
213 1.6 dyoung fatp_init(fatp_ctl_t *fat, uint32_t n, uint32_t m,
214 1.6 dyoung fatp_t *fat_base, fatp_t **fat_hash)
215 1.1 dyoung {
216 1.1 dyoung fatp_t *fp;
217 1.1 dyoung
218 1.1 dyoung KASSERT(n <= FATP_MAX / 2);
219 1.1 dyoung
220 1.6 dyoung fat->hash = fat_hash;
221 1.6 dyoung fat->base = fat_base;
222 1.1 dyoung
223 1.1 dyoung fat->port = &fat->hash[m];
224 1.1 dyoung
225 1.1 dyoung fat->mask = m - 1; // ASSERT is power of 2 (m)
226 1.1 dyoung fat->lim = fat->base + 2*n - 1;
227 1.1 dyoung fat->nfree = 0;
228 1.1 dyoung fat->nalloc = 2*n;
229 1.1 dyoung
230 1.1 dyoung /* Initialise the free list.
231 1.1 dyoung */
232 1.1 dyoung for (fp = fat->lim; fp >= fat->base; --fp) {
233 1.1 dyoung fatp_free(fat, fp);
234 1.1 dyoung }
235 1.1 dyoung }
236 1.1 dyoung
237 1.1 dyoung /*
238 1.1 dyoung * The `xtra' is XORed into the tag stored.
239 1.1 dyoung */
240 1.1 dyoung static uint32_t fatp_xtra[] = {
241 1.1 dyoung 0x11111111,0x22222222,0x33333333,0x44444444,
242 1.1 dyoung 0x55555555,0x66666666,0x77777777,0x88888888,
243 1.1 dyoung 0x12121212,0x21212121,0x34343434,0x43434343,
244 1.1 dyoung 0x56565656,0x65656565,0x78787878,0x87878787,
245 1.1 dyoung 0x11221122,0x22112211,0x33443344,0x44334433,
246 1.1 dyoung 0x55665566,0x66556655,0x77887788,0x88778877,
247 1.1 dyoung 0x11112222,0x22221111,0x33334444,0x44443333,
248 1.1 dyoung 0x55556666,0x66665555,0x77778888,0x88887777,
249 1.1 dyoung };
250 1.1 dyoung
251 1.1 dyoung /*!\brief turn a {fatp_t*,slot} into an integral key.
252 1.1 dyoung *
253 1.1 dyoung * The key can be used to obtain the fatp_t, and the slot,
254 1.1 dyoung * as it directly encodes them.
255 1.1 dyoung */
256 1.1 dyoung static inline uint32_t
257 1.1 dyoung fatp_key(fatp_ctl_t *fat, fatp_t *fp, uint32_t slot)
258 1.1 dyoung {
259 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
260 1.1 dyoung CACHE_LINE_SIZE == 64 ||
261 1.1 dyoung CACHE_LINE_SIZE == 128);
262 1.1 dyoung
263 1.1 dyoung switch (fatp_ntags()) {
264 1.1 dyoung case 7:
265 1.1 dyoung return (fatp_index(fat, fp) << 3) | slot;
266 1.1 dyoung case 15:
267 1.1 dyoung return (fatp_index(fat, fp) << 4) | slot;
268 1.1 dyoung case 31:
269 1.1 dyoung return (fatp_index(fat, fp) << 5) | slot;
270 1.1 dyoung default:
271 1.1 dyoung KASSERT(0 && "no support, for no good reason");
272 1.1 dyoung return ~0;
273 1.1 dyoung }
274 1.1 dyoung }
275 1.1 dyoung
276 1.1 dyoung static inline uint32_t
277 1.1 dyoung fatp_slot_from_key(fatp_ctl_t *fat, uint32_t key)
278 1.1 dyoung {
279 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
280 1.1 dyoung CACHE_LINE_SIZE == 64 ||
281 1.1 dyoung CACHE_LINE_SIZE == 128);
282 1.1 dyoung
283 1.1 dyoung switch (fatp_ntags()) {
284 1.1 dyoung case 7:
285 1.1 dyoung return key & 7;
286 1.1 dyoung case 15:
287 1.1 dyoung return key & 15;
288 1.1 dyoung case 31:
289 1.1 dyoung return key & 31;
290 1.1 dyoung default:
291 1.1 dyoung KASSERT(0 && "no support, for no good reason");
292 1.1 dyoung return ~0;
293 1.1 dyoung }
294 1.1 dyoung }
295 1.1 dyoung
296 1.1 dyoung static inline fatp_t *
297 1.1 dyoung fatp_from_key(fatp_ctl_t *fat, uint32_t key)
298 1.1 dyoung {
299 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
300 1.1 dyoung CACHE_LINE_SIZE == 64 ||
301 1.1 dyoung CACHE_LINE_SIZE == 128);
302 1.1 dyoung
303 1.1 dyoung switch (fatp_ntags()) {
304 1.1 dyoung case 7:
305 1.1 dyoung key >>= 3;
306 1.1 dyoung break;
307 1.1 dyoung case 15:
308 1.1 dyoung key >>= 4;
309 1.1 dyoung break;
310 1.1 dyoung case 31:
311 1.1 dyoung key >>= 5;
312 1.1 dyoung break;
313 1.1 dyoung default:
314 1.1 dyoung KASSERT(0 && "no support, for no good reason");
315 1.1 dyoung return 0;
316 1.1 dyoung }
317 1.1 dyoung
318 1.1 dyoung return key ? fat->base + key - 1 : 0;
319 1.1 dyoung }
320 1.1 dyoung
321 1.1 dyoung static inline uint32_t
322 1.1 dyoung idx_encode(vtw_ctl_t *ctl, uint32_t idx)
323 1.1 dyoung {
324 1.1 dyoung return (idx << ctl->idx_bits) | idx;
325 1.1 dyoung }
326 1.1 dyoung
327 1.1 dyoung static inline uint32_t
328 1.1 dyoung idx_decode(vtw_ctl_t *ctl, uint32_t bits)
329 1.1 dyoung {
330 1.1 dyoung uint32_t idx = bits & ctl->idx_mask;
331 1.1 dyoung
332 1.1 dyoung if (idx_encode(ctl, idx) == bits)
333 1.1 dyoung return idx;
334 1.1 dyoung else
335 1.1 dyoung return ~0;
336 1.1 dyoung }
337 1.1 dyoung
338 1.1 dyoung /*!\brief insert index into fatp hash
339 1.1 dyoung *
340 1.1 dyoung *\param idx - index of element being placed in hash chain
341 1.1 dyoung *\param tag - 32-bit tag identifier
342 1.1 dyoung *
343 1.1 dyoung *\returns
344 1.1 dyoung * value which can be used to locate entry.
345 1.1 dyoung *
346 1.1 dyoung *\note
347 1.1 dyoung * we rely on the fact that there are unused high bits in the index
348 1.1 dyoung * for verification purposes on lookup.
349 1.1 dyoung */
350 1.1 dyoung
351 1.1 dyoung static inline uint32_t
352 1.1 dyoung fatp_vtw_inshash(fatp_ctl_t *fat, uint32_t idx, uint32_t tag, int which,
353 1.1 dyoung void *dbg)
354 1.1 dyoung {
355 1.1 dyoung fatp_t *fp;
356 1.1 dyoung fatp_t **hash = (which ? fat->port : fat->hash);
357 1.1 dyoung int i;
358 1.1 dyoung
359 1.1 dyoung fp = hash[tag & fat->mask];
360 1.1 dyoung
361 1.1 dyoung while (!fp || fatp_full(fp)) {
362 1.1 dyoung fatp_t *fq;
363 1.1 dyoung
364 1.1 dyoung /* All entries are inuse at the top level.
365 1.1 dyoung * We allocate a spare, and push the top level
366 1.1 dyoung * down one. All entries in the fp we push down
367 1.1 dyoung * (think of a tape worm here) will be expelled sooner than
368 1.1 dyoung * any entries added subsequently to this hash bucket.
369 1.1 dyoung * This is a property of the time waits we are exploiting.
370 1.1 dyoung */
371 1.1 dyoung
372 1.1 dyoung fq = fatp_alloc(fat);
373 1.1 dyoung if (!fq) {
374 1.1 dyoung vtw_age(fat->vtw, 0);
375 1.1 dyoung fp = hash[tag & fat->mask];
376 1.1 dyoung continue;
377 1.1 dyoung }
378 1.1 dyoung
379 1.1 dyoung fq->inuse = 0;
380 1.1 dyoung fq->nxt = fatp_index(fat, fp);
381 1.1 dyoung
382 1.1 dyoung hash[tag & fat->mask] = fq;
383 1.1 dyoung
384 1.1 dyoung fp = fq;
385 1.1 dyoung }
386 1.1 dyoung
387 1.1 dyoung KASSERT(!fatp_full(fp));
388 1.1 dyoung
389 1.1 dyoung /* Fill highest index first. Lookup is lowest first.
390 1.1 dyoung */
391 1.1 dyoung for (i = fatp_ntags(); --i >= 0; ) {
392 1.1 dyoung if (!((1 << i) & fp->inuse)) {
393 1.1 dyoung break;
394 1.1 dyoung }
395 1.1 dyoung }
396 1.1 dyoung
397 1.1 dyoung fp->inuse |= 1 << i;
398 1.1 dyoung fp->tag[i] = tag ^ idx_encode(fat->vtw, idx) ^ fatp_xtra[i];
399 1.1 dyoung
400 1.1 dyoung db_trace(KTR_VTW
401 1.1 dyoung , (fp, "fat: inuse %5.5x tag[%x] %8.8x"
402 1.1 dyoung , fp->inuse
403 1.1 dyoung , i, fp->tag[i]));
404 1.1 dyoung
405 1.1 dyoung return fatp_key(fat, fp, i);
406 1.1 dyoung }
407 1.1 dyoung
408 1.1 dyoung static inline int
409 1.1 dyoung vtw_alive(const vtw_t *vtw)
410 1.1 dyoung {
411 1.1 dyoung return vtw->hashed && vtw->expire.tv_sec;
412 1.1 dyoung }
413 1.1 dyoung
414 1.1 dyoung static inline uint32_t
415 1.1 dyoung vtw_index_v4(vtw_ctl_t *ctl, vtw_v4_t *v4)
416 1.1 dyoung {
417 1.1 dyoung if (ctl->base.v4 <= v4 && v4 <= ctl->lim.v4)
418 1.1 dyoung return v4 - ctl->base.v4;
419 1.1 dyoung
420 1.1 dyoung KASSERT(0 && "vtw out of bounds");
421 1.1 dyoung
422 1.1 dyoung return ~0;
423 1.1 dyoung }
424 1.1 dyoung
425 1.1 dyoung static inline uint32_t
426 1.1 dyoung vtw_index_v6(vtw_ctl_t *ctl, vtw_v6_t *v6)
427 1.1 dyoung {
428 1.1 dyoung if (ctl->base.v6 <= v6 && v6 <= ctl->lim.v6)
429 1.1 dyoung return v6 - ctl->base.v6;
430 1.1 dyoung
431 1.1 dyoung KASSERT(0 && "vtw out of bounds");
432 1.1 dyoung
433 1.1 dyoung return ~0;
434 1.1 dyoung }
435 1.1 dyoung
436 1.1 dyoung static inline uint32_t
437 1.1 dyoung vtw_index(vtw_ctl_t *ctl, vtw_t *vtw)
438 1.1 dyoung {
439 1.1 dyoung if (ctl->clidx)
440 1.1 dyoung ctl = ctl->ctl;
441 1.1 dyoung
442 1.1 dyoung if (ctl->is_v4)
443 1.1 dyoung return vtw_index_v4(ctl, (vtw_v4_t *)vtw);
444 1.1 dyoung
445 1.1 dyoung if (ctl->is_v6)
446 1.1 dyoung return vtw_index_v6(ctl, (vtw_v6_t *)vtw);
447 1.1 dyoung
448 1.1 dyoung KASSERT(0 && "neither 4 nor 6. most curious.");
449 1.1 dyoung
450 1.1 dyoung return ~0;
451 1.1 dyoung }
452 1.1 dyoung
453 1.1 dyoung static inline vtw_t *
454 1.1 dyoung vtw_from_index(vtw_ctl_t *ctl, uint32_t idx)
455 1.1 dyoung {
456 1.1 dyoung if (ctl->clidx)
457 1.1 dyoung ctl = ctl->ctl;
458 1.1 dyoung
459 1.1 dyoung /* See if the index looks like it might be an index.
460 1.1 dyoung * Bits on outside of the valid index bits is a give away.
461 1.1 dyoung */
462 1.1 dyoung idx = idx_decode(ctl, idx);
463 1.1 dyoung
464 1.1 dyoung if (idx == ~0) {
465 1.1 dyoung return 0;
466 1.1 dyoung } else if (ctl->is_v4) {
467 1.1 dyoung vtw_v4_t *vtw = ctl->base.v4 + idx;
468 1.1 dyoung
469 1.1 dyoung return (ctl->base.v4 <= vtw && vtw <= ctl->lim.v4)
470 1.1 dyoung ? &vtw->common : 0;
471 1.1 dyoung } else if (ctl->is_v6) {
472 1.1 dyoung vtw_v6_t *vtw = ctl->base.v6 + idx;
473 1.1 dyoung
474 1.1 dyoung return (ctl->base.v6 <= vtw && vtw <= ctl->lim.v6)
475 1.1 dyoung ? &vtw->common : 0;
476 1.1 dyoung } else {
477 1.1 dyoung KASSERT(0 && "badness");
478 1.1 dyoung return 0;
479 1.1 dyoung }
480 1.1 dyoung }
481 1.1 dyoung
482 1.1 dyoung /*!\brief return the next vtw after this one.
483 1.1 dyoung *
484 1.1 dyoung * Due to the differing sizes of the entries in differing
485 1.1 dyoung * arenas, we have to ensure we ++ the correct pointer type.
486 1.1 dyoung *
487 1.1 dyoung * Also handles wrap.
488 1.1 dyoung */
489 1.1 dyoung static inline vtw_t *
490 1.1 dyoung vtw_next(vtw_ctl_t *ctl, vtw_t *vtw)
491 1.1 dyoung {
492 1.1 dyoung if (ctl->is_v4) {
493 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
494 1.1 dyoung
495 1.1 dyoung vtw = &(++v4)->common;
496 1.1 dyoung } else {
497 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
498 1.1 dyoung
499 1.1 dyoung vtw = &(++v6)->common;
500 1.1 dyoung }
501 1.1 dyoung
502 1.1 dyoung if (vtw > ctl->lim.v)
503 1.1 dyoung vtw = ctl->base.v;
504 1.1 dyoung
505 1.1 dyoung return vtw;
506 1.1 dyoung }
507 1.1 dyoung
508 1.1 dyoung /*!\brief remove entry from FATP hash chains
509 1.1 dyoung */
510 1.1 dyoung static inline void
511 1.1 dyoung vtw_unhash(vtw_ctl_t *ctl, vtw_t *vtw)
512 1.1 dyoung {
513 1.1 dyoung fatp_ctl_t *fat = ctl->fat;
514 1.1 dyoung fatp_t *fp;
515 1.1 dyoung uint32_t key = vtw->key;
516 1.1 dyoung uint32_t tag, slot, idx;
517 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
518 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
519 1.1 dyoung
520 1.1 dyoung if (!vtw->hashed) {
521 1.1 dyoung KASSERT(0 && "unhashed");
522 1.1 dyoung return;
523 1.1 dyoung }
524 1.1 dyoung
525 1.1 dyoung if (fat->vtw->is_v4) {
526 1.1 dyoung tag = v4_tag(v4->faddr, v4->fport, v4->laddr, v4->lport);
527 1.1 dyoung } else if (fat->vtw->is_v6) {
528 1.1 dyoung tag = v6_tag(&v6->faddr, v6->fport, &v6->laddr, v6->lport);
529 1.1 dyoung } else {
530 1.1 dyoung tag = 0;
531 1.1 dyoung KASSERT(0 && "not reached");
532 1.1 dyoung }
533 1.1 dyoung
534 1.1 dyoung /* Remove from fat->hash[]
535 1.1 dyoung */
536 1.1 dyoung slot = fatp_slot_from_key(fat, key);
537 1.1 dyoung fp = fatp_from_key(fat, key);
538 1.1 dyoung idx = vtw_index(ctl, vtw);
539 1.1 dyoung
540 1.1 dyoung db_trace(KTR_VTW
541 1.1 dyoung , (fp, "fat: del inuse %5.5x slot %x idx %x key %x tag %x"
542 1.1 dyoung , fp->inuse, slot, idx, key, tag));
543 1.1 dyoung
544 1.1 dyoung KASSERT(fp->inuse & (1 << slot));
545 1.1 dyoung KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
546 1.1 dyoung ^ fatp_xtra[slot]));
547 1.1 dyoung
548 1.1 dyoung if ((fp->inuse & (1 << slot))
549 1.1 dyoung && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
550 1.1 dyoung ^ fatp_xtra[slot])) {
551 1.1 dyoung fp->inuse ^= 1 << slot;
552 1.1 dyoung fp->tag[slot] = 0;
553 1.1 dyoung
554 1.1 dyoung /* When we delete entries, we do not compact. This is
555 1.1 dyoung * due to temporality. We add entries, and they
556 1.1 dyoung * (eventually) expire. Older entries will be further
557 1.1 dyoung * down the chain.
558 1.1 dyoung */
559 1.1 dyoung if (!fp->inuse) {
560 1.1 dyoung uint32_t hi = tag & fat->mask;
561 1.1 dyoung fatp_t *fq = 0;
562 1.1 dyoung fatp_t *fr = fat->hash[hi];
563 1.1 dyoung
564 1.1 dyoung while (fr && fr != fp) {
565 1.1 dyoung fr = fatp_next(fat, fq = fr);
566 1.1 dyoung }
567 1.1 dyoung
568 1.1 dyoung if (fr == fp) {
569 1.1 dyoung if (fq) {
570 1.1 dyoung fq->nxt = fp->nxt;
571 1.1 dyoung fp->nxt = 0;
572 1.1 dyoung fatp_free(fat, fp);
573 1.1 dyoung } else {
574 1.1 dyoung KASSERT(fat->hash[hi] == fp);
575 1.1 dyoung
576 1.1 dyoung if (fp->nxt) {
577 1.1 dyoung fat->hash[hi]
578 1.1 dyoung = fatp_next(fat, fp);
579 1.1 dyoung fp->nxt = 0;
580 1.1 dyoung fatp_free(fat, fp);
581 1.1 dyoung } else {
582 1.1 dyoung /* retain for next use.
583 1.1 dyoung */
584 1.1 dyoung ;
585 1.1 dyoung }
586 1.1 dyoung }
587 1.1 dyoung } else {
588 1.1 dyoung fr = fat->hash[hi];
589 1.1 dyoung
590 1.1 dyoung do {
591 1.1 dyoung db_trace(KTR_VTW
592 1.1 dyoung , (fr
593 1.1 dyoung , "fat:*del inuse %5.5x"
594 1.1 dyoung " nxt %x"
595 1.1 dyoung , fr->inuse, fr->nxt));
596 1.1 dyoung
597 1.1 dyoung fr = fatp_next(fat, fq = fr);
598 1.1 dyoung } while (fr && fr != fp);
599 1.1 dyoung
600 1.1 dyoung KASSERT(0 && "oops");
601 1.1 dyoung }
602 1.1 dyoung }
603 1.1 dyoung vtw->key ^= ~0;
604 1.1 dyoung }
605 1.1 dyoung
606 1.1 dyoung if (fat->vtw->is_v4) {
607 1.1 dyoung tag = v4_port_tag(v4->lport);
608 1.1 dyoung } else if (fat->vtw->is_v6) {
609 1.1 dyoung tag = v6_port_tag(v6->lport);
610 1.1 dyoung }
611 1.1 dyoung
612 1.1 dyoung /* Remove from fat->port[]
613 1.1 dyoung */
614 1.1 dyoung key = vtw->port_key;
615 1.1 dyoung slot = fatp_slot_from_key(fat, key);
616 1.1 dyoung fp = fatp_from_key(fat, key);
617 1.1 dyoung idx = vtw_index(ctl, vtw);
618 1.1 dyoung
619 1.1 dyoung db_trace(KTR_VTW
620 1.1 dyoung , (fp, "fatport: del inuse %5.5x"
621 1.1 dyoung " slot %x idx %x key %x tag %x"
622 1.1 dyoung , fp->inuse, slot, idx, key, tag));
623 1.1 dyoung
624 1.1 dyoung KASSERT(fp->inuse & (1 << slot));
625 1.1 dyoung KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
626 1.1 dyoung ^ fatp_xtra[slot]));
627 1.1 dyoung
628 1.1 dyoung if ((fp->inuse & (1 << slot))
629 1.1 dyoung && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
630 1.1 dyoung ^ fatp_xtra[slot])) {
631 1.1 dyoung fp->inuse ^= 1 << slot;
632 1.1 dyoung fp->tag[slot] = 0;
633 1.1 dyoung
634 1.1 dyoung if (!fp->inuse) {
635 1.1 dyoung uint32_t hi = tag & fat->mask;
636 1.1 dyoung fatp_t *fq = 0;
637 1.1 dyoung fatp_t *fr = fat->port[hi];
638 1.1 dyoung
639 1.1 dyoung while (fr && fr != fp) {
640 1.1 dyoung fr = fatp_next(fat, fq = fr);
641 1.1 dyoung }
642 1.1 dyoung
643 1.1 dyoung if (fr == fp) {
644 1.1 dyoung if (fq) {
645 1.1 dyoung fq->nxt = fp->nxt;
646 1.1 dyoung fp->nxt = 0;
647 1.1 dyoung fatp_free(fat, fp);
648 1.1 dyoung } else {
649 1.1 dyoung KASSERT(fat->port[hi] == fp);
650 1.1 dyoung
651 1.1 dyoung if (fp->nxt) {
652 1.1 dyoung fat->port[hi]
653 1.1 dyoung = fatp_next(fat, fp);
654 1.1 dyoung fp->nxt = 0;
655 1.1 dyoung fatp_free(fat, fp);
656 1.1 dyoung } else {
657 1.1 dyoung /* retain for next use.
658 1.1 dyoung */
659 1.1 dyoung ;
660 1.1 dyoung }
661 1.1 dyoung }
662 1.1 dyoung }
663 1.1 dyoung }
664 1.1 dyoung vtw->port_key ^= ~0;
665 1.1 dyoung }
666 1.1 dyoung
667 1.1 dyoung vtw->hashed = 0;
668 1.1 dyoung }
669 1.1 dyoung
670 1.1 dyoung /*!\brief remove entry from hash, possibly free.
671 1.1 dyoung */
672 1.1 dyoung void
673 1.1 dyoung vtw_del(vtw_ctl_t *ctl, vtw_t *vtw)
674 1.1 dyoung {
675 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
676 1.1 dyoung
677 1.1 dyoung if (vtw->hashed) {
678 1.1 dyoung ++vtw_stats.del;
679 1.1 dyoung vtw_unhash(ctl, vtw);
680 1.1 dyoung }
681 1.1 dyoung
682 1.1 dyoung /* We only delete the oldest entry.
683 1.1 dyoung */
684 1.1 dyoung if (vtw != ctl->oldest.v)
685 1.1 dyoung return;
686 1.1 dyoung
687 1.1 dyoung --ctl->nalloc;
688 1.1 dyoung ++ctl->nfree;
689 1.1 dyoung
690 1.1 dyoung vtw->expire.tv_sec = 0;
691 1.1 dyoung vtw->expire.tv_usec = ~0;
692 1.1 dyoung
693 1.1 dyoung if (!ctl->nalloc)
694 1.1 dyoung ctl->oldest.v = 0;
695 1.1 dyoung
696 1.1 dyoung ctl->oldest.v = vtw_next(ctl, vtw);
697 1.1 dyoung }
698 1.1 dyoung
699 1.4 dholland /*!\brief insert vestigial timewait in hash chain
700 1.1 dyoung */
701 1.1 dyoung static void
702 1.1 dyoung vtw_inshash_v4(vtw_ctl_t *ctl, vtw_t *vtw)
703 1.1 dyoung {
704 1.1 dyoung uint32_t idx = vtw_index(ctl, vtw);
705 1.1 dyoung uint32_t tag;
706 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
707 1.1 dyoung
708 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
709 1.1 dyoung KASSERT(!vtw->hashed);
710 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
711 1.1 dyoung
712 1.1 dyoung ++vtw_stats.ins;
713 1.1 dyoung
714 1.1 dyoung tag = v4_tag(v4->faddr, v4->fport,
715 1.1 dyoung v4->laddr, v4->lport);
716 1.1 dyoung
717 1.1 dyoung vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
718 1.1 dyoung
719 1.1 dyoung db_trace(KTR_VTW, (ctl
720 1.1 dyoung , "vtw: ins %8.8x:%4.4x %8.8x:%4.4x"
721 1.1 dyoung " tag %8.8x key %8.8x"
722 1.1 dyoung , v4->faddr, v4->fport
723 1.1 dyoung , v4->laddr, v4->lport
724 1.1 dyoung , tag
725 1.1 dyoung , vtw->key));
726 1.1 dyoung
727 1.1 dyoung tag = v4_port_tag(v4->lport);
728 1.1 dyoung vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
729 1.1 dyoung
730 1.1 dyoung db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
731 1.1 dyoung , v4->lport, v4->lport
732 1.1 dyoung , tag
733 1.1 dyoung , vtw->key));
734 1.1 dyoung
735 1.1 dyoung vtw->hashed = 1;
736 1.1 dyoung }
737 1.1 dyoung
738 1.4 dholland /*!\brief insert vestigial timewait in hash chain
739 1.1 dyoung */
740 1.1 dyoung static void
741 1.1 dyoung vtw_inshash_v6(vtw_ctl_t *ctl, vtw_t *vtw)
742 1.1 dyoung {
743 1.1 dyoung uint32_t idx = vtw_index(ctl, vtw);
744 1.1 dyoung uint32_t tag;
745 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
746 1.1 dyoung
747 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
748 1.1 dyoung KASSERT(!vtw->hashed);
749 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
750 1.1 dyoung
751 1.1 dyoung ++vtw_stats.ins;
752 1.1 dyoung
753 1.1 dyoung tag = v6_tag(&v6->faddr, v6->fport,
754 1.1 dyoung &v6->laddr, v6->lport);
755 1.1 dyoung
756 1.1 dyoung vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
757 1.1 dyoung
758 1.1 dyoung tag = v6_port_tag(v6->lport);
759 1.1 dyoung vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
760 1.1 dyoung
761 1.1 dyoung db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
762 1.1 dyoung , v6->lport, v6->lport
763 1.1 dyoung , tag
764 1.1 dyoung , vtw->key));
765 1.1 dyoung
766 1.1 dyoung vtw->hashed = 1;
767 1.1 dyoung }
768 1.1 dyoung
769 1.1 dyoung static vtw_t *
770 1.1 dyoung vtw_lookup_hash_v4(vtw_ctl_t *ctl, uint32_t faddr, uint16_t fport
771 1.1 dyoung , uint32_t laddr, uint16_t lport
772 1.1 dyoung , int which)
773 1.1 dyoung {
774 1.1 dyoung vtw_v4_t *v4;
775 1.1 dyoung vtw_t *vtw;
776 1.1 dyoung uint32_t tag;
777 1.1 dyoung fatp_t *fp;
778 1.1 dyoung int i;
779 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
780 1.1 dyoung
781 1.1 dyoung if (!ctl || !ctl->fat)
782 1.1 dyoung return 0;
783 1.1 dyoung
784 1.1 dyoung ++vtw_stats.look[which];
785 1.1 dyoung
786 1.1 dyoung if (which) {
787 1.1 dyoung tag = v4_port_tag(lport);
788 1.1 dyoung fp = ctl->fat->port[tag & ctl->fat->mask];
789 1.1 dyoung } else {
790 1.1 dyoung tag = v4_tag(faddr, fport, laddr, lport);
791 1.1 dyoung fp = ctl->fat->hash[tag & ctl->fat->mask];
792 1.1 dyoung }
793 1.1 dyoung
794 1.1 dyoung while (fp && fp->inuse) {
795 1.1 dyoung uint32_t inuse = fp->inuse;
796 1.1 dyoung
797 1.1 dyoung ++fatps;
798 1.1 dyoung
799 1.1 dyoung for (i = 0; inuse && i < fatp_ntags(); ++i) {
800 1.1 dyoung uint32_t idx;
801 1.1 dyoung
802 1.1 dyoung if (!(inuse & (1 << i)))
803 1.1 dyoung continue;
804 1.1 dyoung
805 1.1 dyoung inuse ^= 1 << i;
806 1.1 dyoung
807 1.1 dyoung ++probes;
808 1.1 dyoung ++vtw_stats.probe[which];
809 1.1 dyoung
810 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
811 1.1 dyoung vtw = vtw_from_index(ctl, idx);
812 1.1 dyoung
813 1.1 dyoung if (!vtw) {
814 1.1 dyoung /* Hopefully fast path.
815 1.1 dyoung */
816 1.1 dyoung db_trace(KTR_VTW
817 1.1 dyoung , (fp, "vtw: fast %A:%P %A:%P"
818 1.1 dyoung " idx %x tag %x"
819 1.1 dyoung , faddr, fport
820 1.1 dyoung , laddr, lport
821 1.1 dyoung , idx, tag));
822 1.1 dyoung continue;
823 1.1 dyoung }
824 1.1 dyoung
825 1.1 dyoung v4 = (void*)vtw;
826 1.1 dyoung
827 1.1 dyoung /* The de-referencing of vtw is what we want to avoid.
828 1.1 dyoung * Losing.
829 1.1 dyoung */
830 1.1 dyoung if (vtw_alive(vtw)
831 1.1 dyoung && ((which ? vtw->port_key : vtw->key)
832 1.1 dyoung == fatp_key(ctl->fat, fp, i))
833 1.1 dyoung && (which
834 1.1 dyoung || (v4->faddr == faddr && v4->laddr == laddr
835 1.1 dyoung && v4->fport == fport))
836 1.1 dyoung && v4->lport == lport) {
837 1.1 dyoung ++vtw_stats.hit[which];
838 1.1 dyoung
839 1.1 dyoung db_trace(KTR_VTW
840 1.1 dyoung , (fp, "vtw: hit %8.8x:%4.4x"
841 1.1 dyoung " %8.8x:%4.4x idx %x key %x"
842 1.1 dyoung , faddr, fport
843 1.1 dyoung , laddr, lport
844 1.1 dyoung , idx_decode(ctl, idx), vtw->key));
845 1.1 dyoung
846 1.1 dyoung KASSERT(vtw->hashed);
847 1.1 dyoung
848 1.1 dyoung goto out;
849 1.1 dyoung }
850 1.1 dyoung ++vtw_stats.losing[which];
851 1.1 dyoung ++losings;
852 1.1 dyoung
853 1.1 dyoung if (vtw_alive(vtw)) {
854 1.1 dyoung db_trace(KTR_VTW
855 1.1 dyoung , (fp, "vtw:!mis %8.8x:%4.4x"
856 1.1 dyoung " %8.8x:%4.4x key %x tag %x"
857 1.1 dyoung , faddr, fport
858 1.1 dyoung , laddr, lport
859 1.1 dyoung , fatp_key(ctl->fat, fp, i)
860 1.1 dyoung , v4_tag(faddr, fport
861 1.1 dyoung , laddr, lport)));
862 1.1 dyoung db_trace(KTR_VTW
863 1.1 dyoung , (vtw, "vtw:!mis %8.8x:%4.4x"
864 1.1 dyoung " %8.8x:%4.4x key %x tag %x"
865 1.1 dyoung , v4->faddr, v4->fport
866 1.1 dyoung , v4->laddr, v4->lport
867 1.1 dyoung , vtw->key
868 1.1 dyoung , v4_tag(v4->faddr, v4->fport
869 1.1 dyoung , v4->laddr, v4->lport)));
870 1.1 dyoung
871 1.1 dyoung if (vtw->key == fatp_key(ctl->fat, fp, i)) {
872 1.1 dyoung db_trace(KTR_VTW
873 1.1 dyoung , (vtw, "vtw:!mis %8.8x:%4.4x"
874 1.1 dyoung " %8.8x:%4.4x key %x"
875 1.1 dyoung " which %x"
876 1.1 dyoung , v4->faddr, v4->fport
877 1.1 dyoung , v4->laddr, v4->lport
878 1.1 dyoung , vtw->key
879 1.1 dyoung , which));
880 1.1 dyoung
881 1.1 dyoung } else {
882 1.1 dyoung db_trace(KTR_VTW
883 1.1 dyoung , (vtw
884 1.1 dyoung , "vtw:!mis"
885 1.1 dyoung " key %8.8x != %8.8x"
886 1.1 dyoung " idx %x i %x which %x"
887 1.1 dyoung , vtw->key
888 1.1 dyoung , fatp_key(ctl->fat, fp, i)
889 1.1 dyoung , idx_decode(ctl, idx)
890 1.1 dyoung , i
891 1.1 dyoung , which));
892 1.1 dyoung }
893 1.1 dyoung } else {
894 1.1 dyoung db_trace(KTR_VTW
895 1.1 dyoung , (fp
896 1.1 dyoung , "vtw:!mis free entry"
897 1.1 dyoung " idx %x vtw %p which %x"
898 1.1 dyoung , idx_decode(ctl, idx)
899 1.1 dyoung , vtw, which));
900 1.1 dyoung }
901 1.1 dyoung }
902 1.1 dyoung
903 1.1 dyoung if (fp->nxt) {
904 1.1 dyoung fp = fatp_next(ctl->fat, fp);
905 1.1 dyoung } else {
906 1.1 dyoung break;
907 1.1 dyoung }
908 1.1 dyoung }
909 1.1 dyoung ++vtw_stats.miss[which];
910 1.1 dyoung vtw = 0;
911 1.1 dyoung out:
912 1.1 dyoung if (fatps > vtw_stats.max_chain[which])
913 1.1 dyoung vtw_stats.max_chain[which] = fatps;
914 1.1 dyoung if (probes > vtw_stats.max_probe[which])
915 1.1 dyoung vtw_stats.max_probe[which] = probes;
916 1.1 dyoung if (losings > vtw_stats.max_loss[which])
917 1.1 dyoung vtw_stats.max_loss[which] = losings;
918 1.1 dyoung
919 1.1 dyoung return vtw;
920 1.1 dyoung }
921 1.1 dyoung
922 1.1 dyoung static vtw_t *
923 1.1 dyoung vtw_lookup_hash_v6(vtw_ctl_t *ctl, const struct in6_addr *faddr, uint16_t fport
924 1.1 dyoung , const struct in6_addr *laddr, uint16_t lport
925 1.1 dyoung , int which)
926 1.1 dyoung {
927 1.1 dyoung vtw_v6_t *v6;
928 1.1 dyoung vtw_t *vtw;
929 1.1 dyoung uint32_t tag;
930 1.1 dyoung fatp_t *fp;
931 1.1 dyoung int i;
932 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
933 1.1 dyoung
934 1.1 dyoung ++vtw_stats.look[which];
935 1.1 dyoung
936 1.1 dyoung if (!ctl || !ctl->fat)
937 1.1 dyoung return 0;
938 1.1 dyoung
939 1.1 dyoung if (which) {
940 1.1 dyoung tag = v6_port_tag(lport);
941 1.1 dyoung fp = ctl->fat->port[tag & ctl->fat->mask];
942 1.1 dyoung } else {
943 1.1 dyoung tag = v6_tag(faddr, fport, laddr, lport);
944 1.1 dyoung fp = ctl->fat->hash[tag & ctl->fat->mask];
945 1.1 dyoung }
946 1.1 dyoung
947 1.1 dyoung while (fp && fp->inuse) {
948 1.1 dyoung uint32_t inuse = fp->inuse;
949 1.1 dyoung
950 1.1 dyoung ++fatps;
951 1.1 dyoung
952 1.1 dyoung for (i = 0; inuse && i < fatp_ntags(); ++i) {
953 1.1 dyoung uint32_t idx;
954 1.1 dyoung
955 1.1 dyoung if (!(inuse & (1 << i)))
956 1.1 dyoung continue;
957 1.1 dyoung
958 1.1 dyoung inuse ^= 1 << i;
959 1.1 dyoung
960 1.1 dyoung ++probes;
961 1.1 dyoung ++vtw_stats.probe[which];
962 1.1 dyoung
963 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
964 1.1 dyoung vtw = vtw_from_index(ctl, idx);
965 1.1 dyoung
966 1.1 dyoung db_trace(KTR_VTW
967 1.1 dyoung , (fp, "probe: %2d %6A:%4.4x %6A:%4.4x idx %x"
968 1.1 dyoung , i
969 1.1 dyoung , db_store(faddr, sizeof (*faddr)), fport
970 1.1 dyoung , db_store(laddr, sizeof (*laddr)), lport
971 1.1 dyoung , idx_decode(ctl, idx)));
972 1.1 dyoung
973 1.1 dyoung if (!vtw) {
974 1.1 dyoung /* Hopefully fast path.
975 1.1 dyoung */
976 1.1 dyoung continue;
977 1.1 dyoung }
978 1.1 dyoung
979 1.1 dyoung v6 = (void*)vtw;
980 1.1 dyoung
981 1.1 dyoung if (vtw_alive(vtw)
982 1.1 dyoung && ((which ? vtw->port_key : vtw->key)
983 1.1 dyoung == fatp_key(ctl->fat, fp, i))
984 1.1 dyoung && v6->lport == lport
985 1.1 dyoung && (which
986 1.1 dyoung || (v6->fport == fport
987 1.1 dyoung && !bcmp(&v6->faddr, faddr, sizeof (*faddr))
988 1.1 dyoung && !bcmp(&v6->laddr, laddr
989 1.1 dyoung , sizeof (*laddr))))) {
990 1.1 dyoung ++vtw_stats.hit[which];
991 1.1 dyoung
992 1.1 dyoung KASSERT(vtw->hashed);
993 1.1 dyoung goto out;
994 1.1 dyoung } else {
995 1.1 dyoung ++vtw_stats.losing[which];
996 1.1 dyoung ++losings;
997 1.1 dyoung }
998 1.1 dyoung }
999 1.1 dyoung
1000 1.1 dyoung if (fp->nxt) {
1001 1.1 dyoung fp = fatp_next(ctl->fat, fp);
1002 1.1 dyoung } else {
1003 1.1 dyoung break;
1004 1.1 dyoung }
1005 1.1 dyoung }
1006 1.1 dyoung ++vtw_stats.miss[which];
1007 1.1 dyoung vtw = 0;
1008 1.1 dyoung out:
1009 1.1 dyoung if (fatps > vtw_stats.max_chain[which])
1010 1.1 dyoung vtw_stats.max_chain[which] = fatps;
1011 1.1 dyoung if (probes > vtw_stats.max_probe[which])
1012 1.1 dyoung vtw_stats.max_probe[which] = probes;
1013 1.1 dyoung if (losings > vtw_stats.max_loss[which])
1014 1.1 dyoung vtw_stats.max_loss[which] = losings;
1015 1.1 dyoung
1016 1.1 dyoung return vtw;
1017 1.1 dyoung }
1018 1.1 dyoung
1019 1.1 dyoung /*!\brief port iterator
1020 1.1 dyoung */
1021 1.1 dyoung static vtw_t *
1022 1.1 dyoung vtw_next_port_v4(struct tcp_ports_iterator *it)
1023 1.1 dyoung {
1024 1.1 dyoung vtw_ctl_t *ctl = it->ctl;
1025 1.1 dyoung vtw_v4_t *v4;
1026 1.1 dyoung vtw_t *vtw;
1027 1.1 dyoung uint32_t tag;
1028 1.1 dyoung uint16_t lport = it->port;
1029 1.1 dyoung fatp_t *fp;
1030 1.1 dyoung int i;
1031 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
1032 1.1 dyoung
1033 1.1 dyoung tag = v4_port_tag(lport);
1034 1.1 dyoung if (!it->fp) {
1035 1.1 dyoung it->fp = ctl->fat->port[tag & ctl->fat->mask];
1036 1.1 dyoung it->slot_idx = 0;
1037 1.1 dyoung }
1038 1.1 dyoung fp = it->fp;
1039 1.1 dyoung
1040 1.1 dyoung while (fp) {
1041 1.1 dyoung uint32_t inuse = fp->inuse;
1042 1.1 dyoung
1043 1.1 dyoung ++fatps;
1044 1.1 dyoung
1045 1.1 dyoung for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
1046 1.1 dyoung uint32_t idx;
1047 1.1 dyoung
1048 1.1 dyoung if (!(inuse & (1 << i)))
1049 1.1 dyoung continue;
1050 1.1 dyoung
1051 1.16 martin inuse &= ~0U << i;
1052 1.1 dyoung
1053 1.1 dyoung if (i < it->slot_idx)
1054 1.1 dyoung continue;
1055 1.1 dyoung
1056 1.1 dyoung ++vtw_stats.probe[1];
1057 1.1 dyoung ++probes;
1058 1.1 dyoung
1059 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
1060 1.1 dyoung vtw = vtw_from_index(ctl, idx);
1061 1.1 dyoung
1062 1.1 dyoung if (!vtw) {
1063 1.1 dyoung /* Hopefully fast path.
1064 1.1 dyoung */
1065 1.1 dyoung continue;
1066 1.1 dyoung }
1067 1.1 dyoung
1068 1.1 dyoung v4 = (void*)vtw;
1069 1.1 dyoung
1070 1.1 dyoung if (vtw_alive(vtw)
1071 1.1 dyoung && vtw->port_key == fatp_key(ctl->fat, fp, i)
1072 1.1 dyoung && v4->lport == lport) {
1073 1.1 dyoung ++vtw_stats.hit[1];
1074 1.1 dyoung
1075 1.1 dyoung it->slot_idx = i + 1;
1076 1.1 dyoung
1077 1.1 dyoung goto out;
1078 1.1 dyoung } else if (vtw_alive(vtw)) {
1079 1.1 dyoung ++vtw_stats.losing[1];
1080 1.1 dyoung ++losings;
1081 1.1 dyoung
1082 1.1 dyoung db_trace(KTR_VTW
1083 1.1 dyoung , (vtw, "vtw:!mis"
1084 1.1 dyoung " port %8.8x:%4.4x %8.8x:%4.4x"
1085 1.1 dyoung " key %x port %x"
1086 1.1 dyoung , v4->faddr, v4->fport
1087 1.1 dyoung , v4->laddr, v4->lport
1088 1.1 dyoung , vtw->key
1089 1.1 dyoung , lport));
1090 1.1 dyoung } else {
1091 1.1 dyoung /* Really losing here. We are coming
1092 1.1 dyoung * up with references to free entries.
1093 1.1 dyoung * Might find it better to use
1094 1.1 dyoung * traditional, or need another
1095 1.1 dyoung * add-hockery. The other add-hockery
1096 1.1 dyoung * would be to pul more into into the
1097 1.1 dyoung * cache line to reject the false
1098 1.1 dyoung * hits.
1099 1.1 dyoung */
1100 1.1 dyoung ++vtw_stats.losing[1];
1101 1.1 dyoung ++losings;
1102 1.1 dyoung db_trace(KTR_VTW
1103 1.1 dyoung , (fp, "vtw:!mis port %x"
1104 1.1 dyoung " - free entry idx %x vtw %p"
1105 1.1 dyoung , lport
1106 1.1 dyoung , idx_decode(ctl, idx)
1107 1.1 dyoung , vtw));
1108 1.1 dyoung }
1109 1.1 dyoung }
1110 1.1 dyoung
1111 1.1 dyoung if (fp->nxt) {
1112 1.1 dyoung it->fp = fp = fatp_next(ctl->fat, fp);
1113 1.1 dyoung it->slot_idx = 0;
1114 1.1 dyoung } else {
1115 1.1 dyoung it->fp = 0;
1116 1.1 dyoung break;
1117 1.1 dyoung }
1118 1.1 dyoung }
1119 1.1 dyoung ++vtw_stats.miss[1];
1120 1.1 dyoung
1121 1.1 dyoung vtw = 0;
1122 1.1 dyoung out:
1123 1.1 dyoung if (fatps > vtw_stats.max_chain[1])
1124 1.1 dyoung vtw_stats.max_chain[1] = fatps;
1125 1.1 dyoung if (probes > vtw_stats.max_probe[1])
1126 1.1 dyoung vtw_stats.max_probe[1] = probes;
1127 1.1 dyoung if (losings > vtw_stats.max_loss[1])
1128 1.1 dyoung vtw_stats.max_loss[1] = losings;
1129 1.1 dyoung
1130 1.1 dyoung return vtw;
1131 1.1 dyoung }
1132 1.1 dyoung
1133 1.1 dyoung /*!\brief port iterator
1134 1.1 dyoung */
1135 1.1 dyoung static vtw_t *
1136 1.1 dyoung vtw_next_port_v6(struct tcp_ports_iterator *it)
1137 1.1 dyoung {
1138 1.1 dyoung vtw_ctl_t *ctl = it->ctl;
1139 1.1 dyoung vtw_v6_t *v6;
1140 1.1 dyoung vtw_t *vtw;
1141 1.1 dyoung uint32_t tag;
1142 1.1 dyoung uint16_t lport = it->port;
1143 1.1 dyoung fatp_t *fp;
1144 1.1 dyoung int i;
1145 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
1146 1.1 dyoung
1147 1.1 dyoung tag = v6_port_tag(lport);
1148 1.1 dyoung if (!it->fp) {
1149 1.1 dyoung it->fp = ctl->fat->port[tag & ctl->fat->mask];
1150 1.1 dyoung it->slot_idx = 0;
1151 1.1 dyoung }
1152 1.1 dyoung fp = it->fp;
1153 1.1 dyoung
1154 1.1 dyoung while (fp) {
1155 1.1 dyoung uint32_t inuse = fp->inuse;
1156 1.1 dyoung
1157 1.1 dyoung ++fatps;
1158 1.1 dyoung
1159 1.1 dyoung for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
1160 1.1 dyoung uint32_t idx;
1161 1.1 dyoung
1162 1.1 dyoung if (!(inuse & (1 << i)))
1163 1.1 dyoung continue;
1164 1.1 dyoung
1165 1.16 martin inuse &= ~0U << i;
1166 1.1 dyoung
1167 1.1 dyoung if (i < it->slot_idx)
1168 1.1 dyoung continue;
1169 1.1 dyoung
1170 1.1 dyoung ++vtw_stats.probe[1];
1171 1.1 dyoung ++probes;
1172 1.1 dyoung
1173 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
1174 1.1 dyoung vtw = vtw_from_index(ctl, idx);
1175 1.1 dyoung
1176 1.1 dyoung if (!vtw) {
1177 1.1 dyoung /* Hopefully fast path.
1178 1.1 dyoung */
1179 1.1 dyoung continue;
1180 1.1 dyoung }
1181 1.1 dyoung
1182 1.1 dyoung v6 = (void*)vtw;
1183 1.1 dyoung
1184 1.1 dyoung db_trace(KTR_VTW
1185 1.1 dyoung , (vtw, "vtw: i %x idx %x fp->tag %x"
1186 1.1 dyoung " tag %x xtra %x"
1187 1.1 dyoung , i, idx_decode(ctl, idx)
1188 1.1 dyoung , fp->tag[i], tag, fatp_xtra[i]));
1189 1.1 dyoung
1190 1.1 dyoung if (vtw_alive(vtw)
1191 1.1 dyoung && vtw->port_key == fatp_key(ctl->fat, fp, i)
1192 1.1 dyoung && v6->lport == lport) {
1193 1.1 dyoung ++vtw_stats.hit[1];
1194 1.1 dyoung
1195 1.1 dyoung db_trace(KTR_VTW
1196 1.1 dyoung , (fp, "vtw: nxt port %P - %4.4x"
1197 1.1 dyoung " idx %x key %x"
1198 1.1 dyoung , lport, lport
1199 1.1 dyoung , idx_decode(ctl, idx), vtw->key));
1200 1.1 dyoung
1201 1.1 dyoung it->slot_idx = i + 1;
1202 1.1 dyoung goto out;
1203 1.1 dyoung } else if (vtw_alive(vtw)) {
1204 1.1 dyoung ++vtw_stats.losing[1];
1205 1.1 dyoung
1206 1.1 dyoung db_trace(KTR_VTW
1207 1.1 dyoung , (vtw, "vtw:!mis port %6A:%4.4x"
1208 1.1 dyoung " %6A:%4.4x key %x port %x"
1209 1.1 dyoung , db_store(&v6->faddr
1210 1.1 dyoung , sizeof (v6->faddr))
1211 1.1 dyoung , v6->fport
1212 1.1 dyoung , db_store(&v6->laddr
1213 1.1 dyoung , sizeof (v6->faddr))
1214 1.1 dyoung , v6->lport
1215 1.1 dyoung , vtw->key
1216 1.1 dyoung , lport));
1217 1.1 dyoung } else {
1218 1.1 dyoung /* Really losing here. We are coming
1219 1.1 dyoung * up with references to free entries.
1220 1.1 dyoung * Might find it better to use
1221 1.1 dyoung * traditional, or need another
1222 1.1 dyoung * add-hockery. The other add-hockery
1223 1.1 dyoung * would be to pul more into into the
1224 1.1 dyoung * cache line to reject the false
1225 1.1 dyoung * hits.
1226 1.1 dyoung */
1227 1.1 dyoung ++vtw_stats.losing[1];
1228 1.1 dyoung ++losings;
1229 1.1 dyoung
1230 1.1 dyoung db_trace(KTR_VTW
1231 1.1 dyoung , (fp
1232 1.1 dyoung , "vtw:!mis port %x"
1233 1.1 dyoung " - free entry idx %x vtw %p"
1234 1.1 dyoung , lport, idx_decode(ctl, idx)
1235 1.1 dyoung , vtw));
1236 1.1 dyoung }
1237 1.1 dyoung }
1238 1.1 dyoung
1239 1.1 dyoung if (fp->nxt) {
1240 1.1 dyoung it->fp = fp = fatp_next(ctl->fat, fp);
1241 1.1 dyoung it->slot_idx = 0;
1242 1.1 dyoung } else {
1243 1.1 dyoung it->fp = 0;
1244 1.1 dyoung break;
1245 1.1 dyoung }
1246 1.1 dyoung }
1247 1.1 dyoung ++vtw_stats.miss[1];
1248 1.1 dyoung
1249 1.1 dyoung vtw = 0;
1250 1.1 dyoung out:
1251 1.1 dyoung if (fatps > vtw_stats.max_chain[1])
1252 1.1 dyoung vtw_stats.max_chain[1] = fatps;
1253 1.1 dyoung if (probes > vtw_stats.max_probe[1])
1254 1.1 dyoung vtw_stats.max_probe[1] = probes;
1255 1.1 dyoung if (losings > vtw_stats.max_loss[1])
1256 1.1 dyoung vtw_stats.max_loss[1] = losings;
1257 1.1 dyoung
1258 1.1 dyoung return vtw;
1259 1.1 dyoung }
1260 1.1 dyoung
1261 1.1 dyoung /*!\brief initialise the VTW allocation arena
1262 1.1 dyoung *
1263 1.1 dyoung * There are 1+3 allocation classes:
1264 1.1 dyoung * 0 classless
1265 1.1 dyoung * {1,2,3} MSL-class based allocation
1266 1.1 dyoung *
1267 1.1 dyoung * The allocation arenas are all initialised. Classless gets all the
1268 1.1 dyoung * space. MSL-class based divides the arena, so that allocation
1269 1.1 dyoung * within a class can proceed without having to consider entries
1270 1.1 dyoung * (aka: cache lines) from different classes.
1271 1.1 dyoung *
1272 1.1 dyoung * Usually, we are completely classless or class-based, but there can be
1273 1.1 dyoung * transition periods, corresponding to dynamic adjustments in the config
1274 1.1 dyoung * by the operator.
1275 1.1 dyoung */
1276 1.1 dyoung static void
1277 1.6 dyoung vtw_init(fatp_ctl_t *fat, vtw_ctl_t *ctl, const uint32_t n, vtw_t *ctl_base_v)
1278 1.1 dyoung {
1279 1.6 dyoung int class_n, i;
1280 1.6 dyoung vtw_t *base;
1281 1.1 dyoung
1282 1.6 dyoung ctl->base.v = ctl_base_v;
1283 1.1 dyoung
1284 1.6 dyoung if (ctl->is_v4) {
1285 1.6 dyoung ctl->lim.v4 = ctl->base.v4 + n - 1;
1286 1.6 dyoung ctl->alloc.v4 = ctl->base.v4;
1287 1.6 dyoung } else {
1288 1.6 dyoung ctl->lim.v6 = ctl->base.v6 + n - 1;
1289 1.6 dyoung ctl->alloc.v6 = ctl->base.v6;
1290 1.6 dyoung }
1291 1.1 dyoung
1292 1.6 dyoung ctl->nfree = n;
1293 1.6 dyoung ctl->ctl = ctl;
1294 1.1 dyoung
1295 1.6 dyoung ctl->idx_bits = 32;
1296 1.6 dyoung for (ctl->idx_mask = ~0; (ctl->idx_mask & (n-1)) == n-1; ) {
1297 1.6 dyoung ctl->idx_mask >>= 1;
1298 1.6 dyoung ctl->idx_bits -= 1;
1299 1.6 dyoung }
1300 1.1 dyoung
1301 1.6 dyoung ctl->idx_mask <<= 1;
1302 1.6 dyoung ctl->idx_mask |= 1;
1303 1.6 dyoung ctl->idx_bits += 1;
1304 1.1 dyoung
1305 1.6 dyoung ctl->fat = fat;
1306 1.6 dyoung fat->vtw = ctl;
1307 1.1 dyoung
1308 1.6 dyoung /* Divide the resources equally amongst the classes.
1309 1.6 dyoung * This is not optimal, as the different classes
1310 1.6 dyoung * arrive and leave at different rates, but it is
1311 1.6 dyoung * the best I can do for now.
1312 1.6 dyoung */
1313 1.6 dyoung class_n = n / (VTW_NCLASS-1);
1314 1.6 dyoung base = ctl->base.v;
1315 1.1 dyoung
1316 1.6 dyoung for (i = 1; i < VTW_NCLASS; ++i) {
1317 1.6 dyoung int j;
1318 1.1 dyoung
1319 1.6 dyoung ctl[i] = ctl[0];
1320 1.6 dyoung ctl[i].clidx = i;
1321 1.1 dyoung
1322 1.6 dyoung ctl[i].base.v = base;
1323 1.6 dyoung ctl[i].alloc = ctl[i].base;
1324 1.1 dyoung
1325 1.6 dyoung for (j = 0; j < class_n - 1; ++j) {
1326 1.6 dyoung if (tcp_msl_enable)
1327 1.6 dyoung base->msl_class = i;
1328 1.1 dyoung base = vtw_next(ctl, base);
1329 1.1 dyoung }
1330 1.6 dyoung
1331 1.6 dyoung ctl[i].lim.v = base;
1332 1.6 dyoung base = vtw_next(ctl, base);
1333 1.6 dyoung ctl[i].nfree = class_n;
1334 1.1 dyoung }
1335 1.1 dyoung
1336 1.1 dyoung vtw_debug_init();
1337 1.1 dyoung }
1338 1.1 dyoung
1339 1.1 dyoung /*!\brief map class to TCP MSL
1340 1.1 dyoung */
1341 1.1 dyoung static inline uint32_t
1342 1.11 matt class_to_msl(int msl_class)
1343 1.1 dyoung {
1344 1.11 matt switch (msl_class) {
1345 1.1 dyoung case 0:
1346 1.1 dyoung case 1:
1347 1.1 dyoung return tcp_msl_remote ? tcp_msl_remote : (TCPTV_MSL >> 0);
1348 1.1 dyoung case 2:
1349 1.1 dyoung return tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1);
1350 1.1 dyoung default:
1351 1.1 dyoung return tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2);
1352 1.1 dyoung }
1353 1.1 dyoung }
1354 1.1 dyoung
1355 1.1 dyoung /*!\brief map TCP MSL to class
1356 1.1 dyoung */
1357 1.1 dyoung static inline uint32_t
1358 1.1 dyoung msl_to_class(int msl)
1359 1.1 dyoung {
1360 1.1 dyoung if (tcp_msl_enable) {
1361 1.1 dyoung if (msl <= (tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2)))
1362 1.1 dyoung return 1+2;
1363 1.1 dyoung if (msl <= (tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1)))
1364 1.1 dyoung return 1+1;
1365 1.1 dyoung return 1;
1366 1.1 dyoung }
1367 1.1 dyoung return 0;
1368 1.1 dyoung }
1369 1.1 dyoung
1370 1.1 dyoung /*!\brief allocate a vtw entry
1371 1.1 dyoung */
1372 1.1 dyoung static inline vtw_t *
1373 1.1 dyoung vtw_alloc(vtw_ctl_t *ctl)
1374 1.1 dyoung {
1375 1.1 dyoung vtw_t *vtw = 0;
1376 1.1 dyoung int stuck = 0;
1377 1.1 dyoung int avail = ctl ? (ctl->nalloc + ctl->nfree) : 0;
1378 1.1 dyoung int msl;
1379 1.1 dyoung
1380 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
1381 1.1 dyoung
1382 1.1 dyoung /* If no resources, we will not get far.
1383 1.1 dyoung */
1384 1.1 dyoung if (!ctl || !ctl->base.v4 || avail <= 0)
1385 1.1 dyoung return 0;
1386 1.1 dyoung
1387 1.1 dyoung /* Obtain a free one.
1388 1.1 dyoung */
1389 1.1 dyoung while (!ctl->nfree) {
1390 1.1 dyoung vtw_age(ctl, 0);
1391 1.1 dyoung
1392 1.1 dyoung if (++stuck > avail) {
1393 1.1 dyoung /* When in transition between
1394 1.1 dyoung * schemes (classless, classed) we
1395 1.1 dyoung * can be stuck having to await the
1396 1.1 dyoung * expiration of cross-allocated entries.
1397 1.1 dyoung *
1398 1.1 dyoung * Returning zero means we will fall back to the
1399 1.1 dyoung * traditional TIME_WAIT handling, except in the
1400 1.1 dyoung * case of a re-shed, in which case we cannot
1401 1.1 dyoung * perform the reshecd, but will retain the extant
1402 1.1 dyoung * entry.
1403 1.1 dyoung */
1404 1.1 dyoung db_trace(KTR_VTW
1405 1.1 dyoung , (ctl, "vtw:!none free in class %x %x/%x"
1406 1.1 dyoung , ctl->clidx
1407 1.1 dyoung , ctl->nalloc, ctl->nfree));
1408 1.1 dyoung
1409 1.1 dyoung return 0;
1410 1.1 dyoung }
1411 1.1 dyoung }
1412 1.1 dyoung
1413 1.1 dyoung vtw = ctl->alloc.v;
1414 1.1 dyoung
1415 1.1 dyoung if (vtw->msl_class != ctl->clidx) {
1416 1.1 dyoung /* Usurping rules:
1417 1.1 dyoung * 0 -> {1,2,3} or {1,2,3} -> 0
1418 1.1 dyoung */
1419 1.1 dyoung KASSERT(!vtw->msl_class || !ctl->clidx);
1420 1.1 dyoung
1421 1.1 dyoung if (vtw->hashed || vtw->expire.tv_sec) {
1422 1.1 dyoung /* As this is owned by some other class,
1423 1.1 dyoung * we must wait for it to expire it.
1424 1.1 dyoung * This will only happen on class/classless
1425 1.1 dyoung * transitions, which are guaranteed to progress
1426 1.1 dyoung * to completion in small finite time, barring bugs.
1427 1.1 dyoung */
1428 1.1 dyoung db_trace(KTR_VTW
1429 1.1 dyoung , (ctl, "vtw:!%p class %x!=%x %x:%x%s"
1430 1.1 dyoung , vtw, vtw->msl_class, ctl->clidx
1431 1.1 dyoung , vtw->expire.tv_sec
1432 1.1 dyoung , vtw->expire.tv_usec
1433 1.1 dyoung , vtw->hashed ? " hashed" : ""));
1434 1.1 dyoung
1435 1.1 dyoung return 0;
1436 1.1 dyoung }
1437 1.1 dyoung
1438 1.1 dyoung db_trace(KTR_VTW
1439 1.1 dyoung , (ctl, "vtw:!%p usurped from %x to %x"
1440 1.1 dyoung , vtw, vtw->msl_class, ctl->clidx));
1441 1.1 dyoung
1442 1.1 dyoung vtw->msl_class = ctl->clidx;
1443 1.1 dyoung }
1444 1.1 dyoung
1445 1.1 dyoung if (vtw_alive(vtw)) {
1446 1.1 dyoung KASSERT(0 && "next free not free");
1447 1.1 dyoung return 0;
1448 1.1 dyoung }
1449 1.1 dyoung
1450 1.1 dyoung /* Advance allocation poiter.
1451 1.1 dyoung */
1452 1.1 dyoung ctl->alloc.v = vtw_next(ctl, vtw);
1453 1.1 dyoung
1454 1.1 dyoung --ctl->nfree;
1455 1.1 dyoung ++ctl->nalloc;
1456 1.1 dyoung
1457 1.1 dyoung msl = (2 * class_to_msl(ctl->clidx) * 1000) / PR_SLOWHZ; // msec
1458 1.1 dyoung
1459 1.1 dyoung /* mark expiration
1460 1.1 dyoung */
1461 1.3 drochner getmicrouptime(&vtw->expire);
1462 1.1 dyoung
1463 1.1 dyoung /* Move expiration into the future.
1464 1.1 dyoung */
1465 1.1 dyoung vtw->expire.tv_sec += msl / 1000;
1466 1.1 dyoung vtw->expire.tv_usec += 1000 * (msl % 1000);
1467 1.1 dyoung
1468 1.1 dyoung while (vtw->expire.tv_usec >= 1000*1000) {
1469 1.1 dyoung vtw->expire.tv_usec -= 1000*1000;
1470 1.1 dyoung vtw->expire.tv_sec += 1;
1471 1.1 dyoung }
1472 1.1 dyoung
1473 1.1 dyoung if (!ctl->oldest.v)
1474 1.1 dyoung ctl->oldest.v = vtw;
1475 1.1 dyoung
1476 1.1 dyoung return vtw;
1477 1.1 dyoung }
1478 1.1 dyoung
1479 1.1 dyoung /*!\brief expiration
1480 1.1 dyoung */
1481 1.1 dyoung static int
1482 1.1 dyoung vtw_age(vtw_ctl_t *ctl, struct timeval *_when)
1483 1.1 dyoung {
1484 1.1 dyoung vtw_t *vtw;
1485 1.1 dyoung struct timeval then, *when = _when;
1486 1.1 dyoung int maxtries = 0;
1487 1.1 dyoung
1488 1.1 dyoung if (!ctl->oldest.v) {
1489 1.1 dyoung KASSERT(!ctl->nalloc);
1490 1.1 dyoung return 0;
1491 1.1 dyoung }
1492 1.1 dyoung
1493 1.1 dyoung for (vtw = ctl->oldest.v; vtw && ctl->nalloc; ) {
1494 1.1 dyoung if (++maxtries > ctl->nalloc)
1495 1.1 dyoung break;
1496 1.1 dyoung
1497 1.1 dyoung if (vtw->msl_class != ctl->clidx) {
1498 1.1 dyoung db_trace(KTR_VTW
1499 1.1 dyoung , (vtw, "vtw:!age class mismatch %x != %x"
1500 1.1 dyoung , vtw->msl_class, ctl->clidx));
1501 1.1 dyoung /* XXXX
1502 1.1 dyoung * See if the appropriate action is to skip to the next.
1503 1.1 dyoung * XXXX
1504 1.1 dyoung */
1505 1.1 dyoung ctl->oldest.v = vtw = vtw_next(ctl, vtw);
1506 1.1 dyoung continue;
1507 1.1 dyoung }
1508 1.1 dyoung if (!when) {
1509 1.1 dyoung /* Latch oldest timeval if none specified.
1510 1.1 dyoung */
1511 1.1 dyoung then = vtw->expire;
1512 1.1 dyoung when = &then;
1513 1.1 dyoung }
1514 1.1 dyoung
1515 1.1 dyoung if (!timercmp(&vtw->expire, when, <=))
1516 1.1 dyoung break;
1517 1.1 dyoung
1518 1.1 dyoung db_trace(KTR_VTW
1519 1.1 dyoung , (vtw, "vtw: expire %x %8.8x:%8.8x %x/%x"
1520 1.1 dyoung , ctl->clidx
1521 1.1 dyoung , vtw->expire.tv_sec
1522 1.1 dyoung , vtw->expire.tv_usec
1523 1.1 dyoung , ctl->nalloc
1524 1.1 dyoung , ctl->nfree));
1525 1.1 dyoung
1526 1.1 dyoung if (!_when)
1527 1.1 dyoung ++vtw_stats.kill;
1528 1.1 dyoung
1529 1.1 dyoung vtw_del(ctl, vtw);
1530 1.1 dyoung vtw = ctl->oldest.v;
1531 1.1 dyoung }
1532 1.1 dyoung
1533 1.1 dyoung return ctl->nalloc; // # remaining allocated
1534 1.1 dyoung }
1535 1.1 dyoung
1536 1.1 dyoung static callout_t vtw_cs;
1537 1.1 dyoung
1538 1.1 dyoung /*!\brief notice the passage of time.
1539 1.1 dyoung * It seems to be getting faster. What happened to the year?
1540 1.1 dyoung */
1541 1.1 dyoung static void
1542 1.1 dyoung vtw_tick(void *arg)
1543 1.1 dyoung {
1544 1.1 dyoung struct timeval now;
1545 1.1 dyoung int i, cnt = 0;
1546 1.1 dyoung
1547 1.3 drochner getmicrouptime(&now);
1548 1.1 dyoung
1549 1.1 dyoung db_trace(KTR_VTW, (arg, "vtk: tick - now %8.8x:%8.8x"
1550 1.1 dyoung , now.tv_sec, now.tv_usec));
1551 1.1 dyoung
1552 1.1 dyoung mutex_enter(softnet_lock);
1553 1.1 dyoung
1554 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
1555 1.1 dyoung cnt += vtw_age(&vtw_tcpv4[i], &now);
1556 1.1 dyoung cnt += vtw_age(&vtw_tcpv6[i], &now);
1557 1.1 dyoung }
1558 1.1 dyoung
1559 1.1 dyoung /* Keep ticks coming while we need them.
1560 1.1 dyoung */
1561 1.1 dyoung if (cnt)
1562 1.1 dyoung callout_schedule(&vtw_cs, hz / 5);
1563 1.1 dyoung else {
1564 1.1 dyoung tcp_vtw_was_enabled = 0;
1565 1.1 dyoung tcbtable.vestige = 0;
1566 1.1 dyoung }
1567 1.1 dyoung mutex_exit(softnet_lock);
1568 1.1 dyoung }
1569 1.1 dyoung
1570 1.1 dyoung /* in_pcblookup_ports assist for handling vestigial entries.
1571 1.1 dyoung */
1572 1.1 dyoung static void *
1573 1.1 dyoung tcp_init_ports_v4(struct in_addr addr, u_int port, int wild)
1574 1.1 dyoung {
1575 1.1 dyoung struct tcp_ports_iterator *it = &tcp_ports_iterator_v4;
1576 1.1 dyoung
1577 1.1 dyoung bzero(it, sizeof (*it));
1578 1.1 dyoung
1579 1.1 dyoung /* Note: the reference to vtw_tcpv4[0] is fine.
1580 1.1 dyoung * We do not need per-class iteration. We just
1581 1.1 dyoung * need to get to the fat, and there is one
1582 1.1 dyoung * shared fat.
1583 1.1 dyoung */
1584 1.1 dyoung if (vtw_tcpv4[0].fat) {
1585 1.1 dyoung it->addr.v4 = addr;
1586 1.1 dyoung it->port = port;
1587 1.1 dyoung it->wild = !!wild;
1588 1.1 dyoung it->ctl = &vtw_tcpv4[0];
1589 1.1 dyoung
1590 1.1 dyoung ++vtw_stats.look[1];
1591 1.1 dyoung }
1592 1.1 dyoung
1593 1.1 dyoung return it;
1594 1.1 dyoung }
1595 1.1 dyoung
1596 1.1 dyoung /*!\brief export an IPv4 vtw.
1597 1.1 dyoung */
1598 1.1 dyoung static int
1599 1.1 dyoung vtw_export_v4(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
1600 1.1 dyoung {
1601 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
1602 1.1 dyoung
1603 1.1 dyoung bzero(res, sizeof (*res));
1604 1.1 dyoung
1605 1.1 dyoung if (ctl && vtw) {
1606 1.1 dyoung if (!ctl->clidx && vtw->msl_class)
1607 1.1 dyoung ctl += vtw->msl_class;
1608 1.1 dyoung else
1609 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
1610 1.1 dyoung
1611 1.1 dyoung res->valid = 1;
1612 1.1 dyoung res->v4 = 1;
1613 1.1 dyoung
1614 1.1 dyoung res->faddr.v4.s_addr = v4->faddr;
1615 1.1 dyoung res->laddr.v4.s_addr = v4->laddr;
1616 1.1 dyoung res->fport = v4->fport;
1617 1.1 dyoung res->lport = v4->lport;
1618 1.1 dyoung res->vtw = vtw; // netlock held over call(s)
1619 1.1 dyoung res->ctl = ctl;
1620 1.1 dyoung res->reuse_addr = vtw->reuse_addr;
1621 1.1 dyoung res->reuse_port = vtw->reuse_port;
1622 1.1 dyoung res->snd_nxt = vtw->snd_nxt;
1623 1.1 dyoung res->rcv_nxt = vtw->rcv_nxt;
1624 1.1 dyoung res->rcv_wnd = vtw->rcv_wnd;
1625 1.1 dyoung res->uid = vtw->uid;
1626 1.1 dyoung }
1627 1.1 dyoung
1628 1.1 dyoung return res->valid;
1629 1.1 dyoung }
1630 1.1 dyoung
1631 1.1 dyoung /*!\brief return next port in the port iterator. yowza.
1632 1.1 dyoung */
1633 1.1 dyoung static int
1634 1.1 dyoung tcp_next_port_v4(void *arg, struct vestigial_inpcb *res)
1635 1.1 dyoung {
1636 1.1 dyoung struct tcp_ports_iterator *it = arg;
1637 1.1 dyoung vtw_t *vtw = 0;
1638 1.1 dyoung
1639 1.1 dyoung if (it->ctl)
1640 1.1 dyoung vtw = vtw_next_port_v4(it);
1641 1.1 dyoung
1642 1.1 dyoung if (!vtw)
1643 1.1 dyoung it->ctl = 0;
1644 1.1 dyoung
1645 1.1 dyoung return vtw_export_v4(it->ctl, vtw, res);
1646 1.1 dyoung }
1647 1.1 dyoung
1648 1.1 dyoung static int
1649 1.1 dyoung tcp_lookup_v4(struct in_addr faddr, uint16_t fport,
1650 1.1 dyoung struct in_addr laddr, uint16_t lport,
1651 1.1 dyoung struct vestigial_inpcb *res)
1652 1.1 dyoung {
1653 1.1 dyoung vtw_t *vtw;
1654 1.1 dyoung vtw_ctl_t *ctl;
1655 1.1 dyoung
1656 1.1 dyoung
1657 1.1 dyoung db_trace(KTR_VTW
1658 1.1 dyoung , (res, "vtw: lookup %A:%P %A:%P"
1659 1.1 dyoung , faddr, fport
1660 1.1 dyoung , laddr, lport));
1661 1.1 dyoung
1662 1.1 dyoung vtw = vtw_lookup_hash_v4((ctl = &vtw_tcpv4[0])
1663 1.1 dyoung , faddr.s_addr, fport
1664 1.1 dyoung , laddr.s_addr, lport, 0);
1665 1.1 dyoung
1666 1.1 dyoung return vtw_export_v4(ctl, vtw, res);
1667 1.1 dyoung }
1668 1.1 dyoung
1669 1.1 dyoung /* in_pcblookup_ports assist for handling vestigial entries.
1670 1.1 dyoung */
1671 1.1 dyoung static void *
1672 1.1 dyoung tcp_init_ports_v6(const struct in6_addr *addr, u_int port, int wild)
1673 1.1 dyoung {
1674 1.1 dyoung struct tcp_ports_iterator *it = &tcp_ports_iterator_v6;
1675 1.1 dyoung
1676 1.1 dyoung bzero(it, sizeof (*it));
1677 1.1 dyoung
1678 1.1 dyoung /* Note: the reference to vtw_tcpv6[0] is fine.
1679 1.1 dyoung * We do not need per-class iteration. We just
1680 1.1 dyoung * need to get to the fat, and there is one
1681 1.1 dyoung * shared fat.
1682 1.1 dyoung */
1683 1.1 dyoung if (vtw_tcpv6[0].fat) {
1684 1.1 dyoung it->addr.v6 = *addr;
1685 1.1 dyoung it->port = port;
1686 1.1 dyoung it->wild = !!wild;
1687 1.1 dyoung it->ctl = &vtw_tcpv6[0];
1688 1.1 dyoung
1689 1.1 dyoung ++vtw_stats.look[1];
1690 1.1 dyoung }
1691 1.1 dyoung
1692 1.1 dyoung return it;
1693 1.1 dyoung }
1694 1.1 dyoung
1695 1.1 dyoung /*!\brief export an IPv6 vtw.
1696 1.1 dyoung */
1697 1.1 dyoung static int
1698 1.1 dyoung vtw_export_v6(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
1699 1.1 dyoung {
1700 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
1701 1.1 dyoung
1702 1.1 dyoung bzero(res, sizeof (*res));
1703 1.1 dyoung
1704 1.1 dyoung if (ctl && vtw) {
1705 1.1 dyoung if (!ctl->clidx && vtw->msl_class)
1706 1.1 dyoung ctl += vtw->msl_class;
1707 1.1 dyoung else
1708 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
1709 1.1 dyoung
1710 1.1 dyoung res->valid = 1;
1711 1.1 dyoung res->v4 = 0;
1712 1.1 dyoung
1713 1.1 dyoung res->faddr.v6 = v6->faddr;
1714 1.1 dyoung res->laddr.v6 = v6->laddr;
1715 1.1 dyoung res->fport = v6->fport;
1716 1.1 dyoung res->lport = v6->lport;
1717 1.1 dyoung res->vtw = vtw; // netlock held over call(s)
1718 1.1 dyoung res->ctl = ctl;
1719 1.1 dyoung
1720 1.1 dyoung res->v6only = vtw->v6only;
1721 1.1 dyoung res->reuse_addr = vtw->reuse_addr;
1722 1.1 dyoung res->reuse_port = vtw->reuse_port;
1723 1.1 dyoung
1724 1.1 dyoung res->snd_nxt = vtw->snd_nxt;
1725 1.1 dyoung res->rcv_nxt = vtw->rcv_nxt;
1726 1.1 dyoung res->rcv_wnd = vtw->rcv_wnd;
1727 1.1 dyoung res->uid = vtw->uid;
1728 1.1 dyoung }
1729 1.1 dyoung
1730 1.1 dyoung return res->valid;
1731 1.1 dyoung }
1732 1.1 dyoung
1733 1.1 dyoung static int
1734 1.1 dyoung tcp_next_port_v6(void *arg, struct vestigial_inpcb *res)
1735 1.1 dyoung {
1736 1.1 dyoung struct tcp_ports_iterator *it = arg;
1737 1.1 dyoung vtw_t *vtw = 0;
1738 1.1 dyoung
1739 1.1 dyoung if (it->ctl)
1740 1.1 dyoung vtw = vtw_next_port_v6(it);
1741 1.1 dyoung
1742 1.1 dyoung if (!vtw)
1743 1.1 dyoung it->ctl = 0;
1744 1.1 dyoung
1745 1.1 dyoung return vtw_export_v6(it->ctl, vtw, res);
1746 1.1 dyoung }
1747 1.1 dyoung
1748 1.1 dyoung static int
1749 1.1 dyoung tcp_lookup_v6(const struct in6_addr *faddr, uint16_t fport,
1750 1.1 dyoung const struct in6_addr *laddr, uint16_t lport,
1751 1.1 dyoung struct vestigial_inpcb *res)
1752 1.1 dyoung {
1753 1.1 dyoung vtw_ctl_t *ctl;
1754 1.1 dyoung vtw_t *vtw;
1755 1.1 dyoung
1756 1.1 dyoung db_trace(KTR_VTW
1757 1.1 dyoung , (res, "vtw: lookup %6A:%P %6A:%P"
1758 1.1 dyoung , db_store(faddr, sizeof (*faddr)), fport
1759 1.1 dyoung , db_store(laddr, sizeof (*laddr)), lport));
1760 1.1 dyoung
1761 1.1 dyoung vtw = vtw_lookup_hash_v6((ctl = &vtw_tcpv6[0])
1762 1.1 dyoung , faddr, fport
1763 1.1 dyoung , laddr, lport, 0);
1764 1.1 dyoung
1765 1.1 dyoung return vtw_export_v6(ctl, vtw, res);
1766 1.1 dyoung }
1767 1.1 dyoung
1768 1.1 dyoung static vestigial_hooks_t tcp_hooks = {
1769 1.1 dyoung .init_ports4 = tcp_init_ports_v4,
1770 1.1 dyoung .next_port4 = tcp_next_port_v4,
1771 1.1 dyoung .lookup4 = tcp_lookup_v4,
1772 1.1 dyoung .init_ports6 = tcp_init_ports_v6,
1773 1.1 dyoung .next_port6 = tcp_next_port_v6,
1774 1.1 dyoung .lookup6 = tcp_lookup_v6,
1775 1.1 dyoung };
1776 1.1 dyoung
1777 1.1 dyoung static bool
1778 1.1 dyoung vtw_select(int af, fatp_ctl_t **fatp, vtw_ctl_t **ctlp)
1779 1.1 dyoung {
1780 1.1 dyoung fatp_ctl_t *fat;
1781 1.1 dyoung vtw_ctl_t *ctl;
1782 1.1 dyoung
1783 1.1 dyoung switch (af) {
1784 1.1 dyoung case AF_INET:
1785 1.1 dyoung fat = &fat_tcpv4;
1786 1.1 dyoung ctl = &vtw_tcpv4[0];
1787 1.1 dyoung break;
1788 1.1 dyoung case AF_INET6:
1789 1.1 dyoung fat = &fat_tcpv6;
1790 1.1 dyoung ctl = &vtw_tcpv6[0];
1791 1.1 dyoung break;
1792 1.1 dyoung default:
1793 1.1 dyoung return false;
1794 1.1 dyoung }
1795 1.1 dyoung if (fatp != NULL)
1796 1.1 dyoung *fatp = fat;
1797 1.1 dyoung if (ctlp != NULL)
1798 1.1 dyoung *ctlp = ctl;
1799 1.1 dyoung return true;
1800 1.1 dyoung }
1801 1.1 dyoung
1802 1.1 dyoung /*!\brief initialize controlling instance
1803 1.1 dyoung */
1804 1.1 dyoung static int
1805 1.1 dyoung vtw_control_init(int af)
1806 1.1 dyoung {
1807 1.1 dyoung fatp_ctl_t *fat;
1808 1.1 dyoung vtw_ctl_t *ctl;
1809 1.6 dyoung fatp_t *fat_base;
1810 1.6 dyoung fatp_t **fat_hash;
1811 1.6 dyoung vtw_t *ctl_base_v;
1812 1.6 dyoung uint32_t n, m;
1813 1.6 dyoung size_t sz;
1814 1.6 dyoung
1815 1.6 dyoung KASSERT(powerof2(tcp_vtw_entries));
1816 1.1 dyoung
1817 1.1 dyoung if (!vtw_select(af, &fat, &ctl))
1818 1.1 dyoung return EAFNOSUPPORT;
1819 1.1 dyoung
1820 1.6 dyoung if (fat->hash != NULL) {
1821 1.6 dyoung KASSERT(fat->base != NULL && ctl->base.v != NULL);
1822 1.6 dyoung return 0;
1823 1.6 dyoung }
1824 1.6 dyoung
1825 1.6 dyoung /* Allocate 10% more capacity in the fat pointers.
1826 1.6 dyoung * We should only need ~#hash additional based on
1827 1.6 dyoung * how they age, but TIME_WAIT assassination could cause
1828 1.6 dyoung * sparse fat pointer utilisation.
1829 1.6 dyoung */
1830 1.6 dyoung m = 512;
1831 1.6 dyoung n = 2*m + (11 * (tcp_vtw_entries / fatp_ntags())) / 10;
1832 1.6 dyoung sz = (ctl->is_v4 ? sizeof(vtw_v4_t) : sizeof(vtw_v6_t));
1833 1.6 dyoung
1834 1.6 dyoung fat_hash = kmem_zalloc(2*m * sizeof(fatp_t *), KM_NOSLEEP);
1835 1.6 dyoung
1836 1.6 dyoung if (fat_hash == NULL) {
1837 1.6 dyoung printf("%s: could not allocate %zu bytes for "
1838 1.6 dyoung "hash anchors", __func__, 2*m * sizeof(fatp_t *));
1839 1.6 dyoung return ENOMEM;
1840 1.6 dyoung }
1841 1.1 dyoung
1842 1.6 dyoung fat_base = kmem_zalloc(2*n * sizeof(fatp_t), KM_NOSLEEP);
1843 1.1 dyoung
1844 1.6 dyoung if (fat_base == NULL) {
1845 1.6 dyoung kmem_free(fat_hash, 2*m * sizeof (fatp_t *));
1846 1.6 dyoung printf("%s: could not allocate %zu bytes for "
1847 1.6 dyoung "fatp_t array", __func__, 2*n * sizeof(fatp_t));
1848 1.6 dyoung return ENOMEM;
1849 1.6 dyoung }
1850 1.1 dyoung
1851 1.6 dyoung ctl_base_v = kmem_zalloc(tcp_vtw_entries * sz, KM_NOSLEEP);
1852 1.1 dyoung
1853 1.6 dyoung if (ctl_base_v == NULL) {
1854 1.6 dyoung kmem_free(fat_hash, 2*m * sizeof (fatp_t *));
1855 1.6 dyoung kmem_free(fat_base, 2*n * sizeof(fatp_t));
1856 1.6 dyoung printf("%s: could not allocate %zu bytes for "
1857 1.6 dyoung "vtw_t array", __func__, tcp_vtw_entries * sz);
1858 1.6 dyoung return ENOMEM;
1859 1.1 dyoung }
1860 1.1 dyoung
1861 1.6 dyoung fatp_init(fat, n, m, fat_base, fat_hash);
1862 1.1 dyoung
1863 1.6 dyoung vtw_init(fat, ctl, tcp_vtw_entries, ctl_base_v);
1864 1.1 dyoung
1865 1.1 dyoung return 0;
1866 1.1 dyoung }
1867 1.1 dyoung
1868 1.1 dyoung /*!\brief select controlling instance
1869 1.1 dyoung */
1870 1.1 dyoung static vtw_ctl_t *
1871 1.1 dyoung vtw_control(int af, uint32_t msl)
1872 1.1 dyoung {
1873 1.1 dyoung fatp_ctl_t *fat;
1874 1.1 dyoung vtw_ctl_t *ctl;
1875 1.11 matt int msl_class = msl_to_class(msl);
1876 1.1 dyoung
1877 1.1 dyoung if (!vtw_select(af, &fat, &ctl))
1878 1.1 dyoung return NULL;
1879 1.1 dyoung
1880 1.1 dyoung if (!fat->base || !ctl->base.v)
1881 1.1 dyoung return NULL;
1882 1.1 dyoung
1883 1.5 dyoung if (!tcp_vtw_was_enabled) {
1884 1.5 dyoung /* This guarantees is timer ticks until we no longer need them.
1885 1.5 dyoung */
1886 1.5 dyoung tcp_vtw_was_enabled = 1;
1887 1.5 dyoung
1888 1.5 dyoung callout_schedule(&vtw_cs, hz / 5);
1889 1.5 dyoung
1890 1.5 dyoung tcbtable.vestige = &tcp_hooks;
1891 1.5 dyoung }
1892 1.5 dyoung
1893 1.11 matt return ctl + msl_class;
1894 1.1 dyoung }
1895 1.1 dyoung
1896 1.1 dyoung /*!\brief add TCP pcb to vestigial timewait
1897 1.1 dyoung */
1898 1.1 dyoung int
1899 1.1 dyoung vtw_add(int af, struct tcpcb *tp)
1900 1.1 dyoung {
1901 1.10 martin #ifdef VTW_DEBUG
1902 1.1 dyoung int enable;
1903 1.10 martin #endif
1904 1.1 dyoung vtw_ctl_t *ctl;
1905 1.1 dyoung vtw_t *vtw;
1906 1.1 dyoung
1907 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
1908 1.1 dyoung
1909 1.1 dyoung ctl = vtw_control(af, tp->t_msl);
1910 1.1 dyoung if (!ctl)
1911 1.1 dyoung return 0;
1912 1.1 dyoung
1913 1.10 martin #ifdef VTW_DEBUG
1914 1.1 dyoung enable = (af == AF_INET) ? tcp4_vtw_enable : tcp6_vtw_enable;
1915 1.10 martin #endif
1916 1.1 dyoung
1917 1.1 dyoung vtw = vtw_alloc(ctl);
1918 1.1 dyoung
1919 1.1 dyoung if (vtw) {
1920 1.1 dyoung vtw->snd_nxt = tp->snd_nxt;
1921 1.1 dyoung vtw->rcv_nxt = tp->rcv_nxt;
1922 1.1 dyoung
1923 1.1 dyoung switch (af) {
1924 1.1 dyoung case AF_INET: {
1925 1.1 dyoung struct inpcb *inp = tp->t_inpcb;
1926 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
1927 1.1 dyoung
1928 1.1 dyoung v4->faddr = inp->inp_faddr.s_addr;
1929 1.1 dyoung v4->laddr = inp->inp_laddr.s_addr;
1930 1.1 dyoung v4->fport = inp->inp_fport;
1931 1.1 dyoung v4->lport = inp->inp_lport;
1932 1.1 dyoung
1933 1.1 dyoung vtw->reuse_port = !!(inp->inp_socket->so_options
1934 1.1 dyoung & SO_REUSEPORT);
1935 1.1 dyoung vtw->reuse_addr = !!(inp->inp_socket->so_options
1936 1.1 dyoung & SO_REUSEADDR);
1937 1.1 dyoung vtw->v6only = 0;
1938 1.1 dyoung vtw->uid = inp->inp_socket->so_uidinfo->ui_uid;
1939 1.1 dyoung
1940 1.1 dyoung vtw_inshash_v4(ctl, vtw);
1941 1.1 dyoung
1942 1.1 dyoung
1943 1.1 dyoung #ifdef VTW_DEBUG
1944 1.1 dyoung /* Immediate lookup (connected and port) to
1945 1.1 dyoung * ensure at least that works!
1946 1.1 dyoung */
1947 1.1 dyoung if (enable & 4) {
1948 1.1 dyoung KASSERT(vtw_lookup_hash_v4
1949 1.1 dyoung (ctl
1950 1.1 dyoung , inp->inp_faddr.s_addr, inp->inp_fport
1951 1.1 dyoung , inp->inp_laddr.s_addr, inp->inp_lport
1952 1.1 dyoung , 0)
1953 1.1 dyoung == vtw);
1954 1.1 dyoung KASSERT(vtw_lookup_hash_v4
1955 1.1 dyoung (ctl
1956 1.1 dyoung , inp->inp_faddr.s_addr, inp->inp_fport
1957 1.1 dyoung , inp->inp_laddr.s_addr, inp->inp_lport
1958 1.1 dyoung , 1));
1959 1.1 dyoung }
1960 1.1 dyoung /* Immediate port iterator functionality check: not wild
1961 1.1 dyoung */
1962 1.1 dyoung if (enable & 8) {
1963 1.1 dyoung struct tcp_ports_iterator *it;
1964 1.1 dyoung struct vestigial_inpcb res;
1965 1.1 dyoung int cnt = 0;
1966 1.1 dyoung
1967 1.1 dyoung it = tcp_init_ports_v4(inp->inp_laddr
1968 1.1 dyoung , inp->inp_lport, 0);
1969 1.1 dyoung
1970 1.1 dyoung while (tcp_next_port_v4(it, &res)) {
1971 1.1 dyoung ++cnt;
1972 1.1 dyoung }
1973 1.1 dyoung KASSERT(cnt);
1974 1.1 dyoung }
1975 1.1 dyoung /* Immediate port iterator functionality check: wild
1976 1.1 dyoung */
1977 1.1 dyoung if (enable & 16) {
1978 1.1 dyoung struct tcp_ports_iterator *it;
1979 1.1 dyoung struct vestigial_inpcb res;
1980 1.1 dyoung struct in_addr any;
1981 1.1 dyoung int cnt = 0;
1982 1.1 dyoung
1983 1.1 dyoung any.s_addr = htonl(INADDR_ANY);
1984 1.1 dyoung
1985 1.1 dyoung it = tcp_init_ports_v4(any, inp->inp_lport, 1);
1986 1.1 dyoung
1987 1.1 dyoung while (tcp_next_port_v4(it, &res)) {
1988 1.1 dyoung ++cnt;
1989 1.1 dyoung }
1990 1.1 dyoung KASSERT(cnt);
1991 1.1 dyoung }
1992 1.1 dyoung #endif /* VTW_DEBUG */
1993 1.1 dyoung break;
1994 1.1 dyoung }
1995 1.1 dyoung
1996 1.1 dyoung case AF_INET6: {
1997 1.1 dyoung struct in6pcb *inp = tp->t_in6pcb;
1998 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
1999 1.1 dyoung
2000 1.1 dyoung v6->faddr = inp->in6p_faddr;
2001 1.1 dyoung v6->laddr = inp->in6p_laddr;
2002 1.1 dyoung v6->fport = inp->in6p_fport;
2003 1.1 dyoung v6->lport = inp->in6p_lport;
2004 1.1 dyoung
2005 1.1 dyoung vtw->reuse_port = !!(inp->in6p_socket->so_options
2006 1.1 dyoung & SO_REUSEPORT);
2007 1.1 dyoung vtw->reuse_addr = !!(inp->in6p_socket->so_options
2008 1.1 dyoung & SO_REUSEADDR);
2009 1.1 dyoung vtw->v6only = !!(inp->in6p_flags
2010 1.1 dyoung & IN6P_IPV6_V6ONLY);
2011 1.1 dyoung vtw->uid = inp->in6p_socket->so_uidinfo->ui_uid;
2012 1.1 dyoung
2013 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2014 1.1 dyoung #ifdef VTW_DEBUG
2015 1.1 dyoung /* Immediate lookup (connected and port) to
2016 1.1 dyoung * ensure at least that works!
2017 1.1 dyoung */
2018 1.1 dyoung if (enable & 4) {
2019 1.1 dyoung KASSERT(vtw_lookup_hash_v6(ctl
2020 1.1 dyoung , &inp->in6p_faddr, inp->in6p_fport
2021 1.1 dyoung , &inp->in6p_laddr, inp->in6p_lport
2022 1.1 dyoung , 0)
2023 1.1 dyoung == vtw);
2024 1.1 dyoung KASSERT(vtw_lookup_hash_v6
2025 1.1 dyoung (ctl
2026 1.1 dyoung , &inp->in6p_faddr, inp->in6p_fport
2027 1.1 dyoung , &inp->in6p_laddr, inp->in6p_lport
2028 1.1 dyoung , 1));
2029 1.1 dyoung }
2030 1.1 dyoung /* Immediate port iterator functionality check: not wild
2031 1.1 dyoung */
2032 1.1 dyoung if (enable & 8) {
2033 1.1 dyoung struct tcp_ports_iterator *it;
2034 1.1 dyoung struct vestigial_inpcb res;
2035 1.1 dyoung int cnt = 0;
2036 1.1 dyoung
2037 1.1 dyoung it = tcp_init_ports_v6(&inp->in6p_laddr
2038 1.1 dyoung , inp->in6p_lport, 0);
2039 1.1 dyoung
2040 1.1 dyoung while (tcp_next_port_v6(it, &res)) {
2041 1.1 dyoung ++cnt;
2042 1.1 dyoung }
2043 1.1 dyoung KASSERT(cnt);
2044 1.1 dyoung }
2045 1.1 dyoung /* Immediate port iterator functionality check: wild
2046 1.1 dyoung */
2047 1.1 dyoung if (enable & 16) {
2048 1.1 dyoung struct tcp_ports_iterator *it;
2049 1.1 dyoung struct vestigial_inpcb res;
2050 1.1 dyoung static struct in6_addr any = IN6ADDR_ANY_INIT;
2051 1.1 dyoung int cnt = 0;
2052 1.1 dyoung
2053 1.1 dyoung it = tcp_init_ports_v6(&any
2054 1.1 dyoung , inp->in6p_lport, 1);
2055 1.1 dyoung
2056 1.1 dyoung while (tcp_next_port_v6(it, &res)) {
2057 1.1 dyoung ++cnt;
2058 1.1 dyoung }
2059 1.1 dyoung KASSERT(cnt);
2060 1.1 dyoung }
2061 1.1 dyoung #endif /* VTW_DEBUG */
2062 1.1 dyoung break;
2063 1.1 dyoung }
2064 1.1 dyoung }
2065 1.1 dyoung
2066 1.1 dyoung tcp_canceltimers(tp);
2067 1.1 dyoung tp = tcp_close(tp);
2068 1.1 dyoung KASSERT(!tp);
2069 1.1 dyoung
2070 1.1 dyoung return 1;
2071 1.1 dyoung }
2072 1.1 dyoung
2073 1.1 dyoung return 0;
2074 1.1 dyoung }
2075 1.1 dyoung
2076 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2077 1.1 dyoung */
2078 1.1 dyoung static void
2079 1.1 dyoung vtw_restart_v4(vestigial_inpcb_t *vp)
2080 1.1 dyoung {
2081 1.1 dyoung vtw_v4_t copy = *(vtw_v4_t*)vp->vtw;
2082 1.1 dyoung vtw_t *vtw;
2083 1.1 dyoung vtw_t *cp = ©.common;
2084 1.1 dyoung vtw_ctl_t *ctl;
2085 1.1 dyoung
2086 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
2087 1.1 dyoung
2088 1.1 dyoung db_trace(KTR_VTW
2089 1.1 dyoung , (vp->vtw, "vtw: restart %A:%P %A:%P"
2090 1.1 dyoung , vp->faddr.v4.s_addr, vp->fport
2091 1.1 dyoung , vp->laddr.v4.s_addr, vp->lport));
2092 1.1 dyoung
2093 1.1 dyoung /* Class might have changed, so have a squiz.
2094 1.1 dyoung */
2095 1.1 dyoung ctl = vtw_control(AF_INET, class_to_msl(cp->msl_class));
2096 1.1 dyoung vtw = vtw_alloc(ctl);
2097 1.1 dyoung
2098 1.1 dyoung if (vtw) {
2099 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
2100 1.1 dyoung
2101 1.1 dyoung /* Safe now to unhash the old entry
2102 1.1 dyoung */
2103 1.1 dyoung vtw_del(vp->ctl, vp->vtw);
2104 1.1 dyoung
2105 1.1 dyoung vtw->snd_nxt = cp->snd_nxt;
2106 1.1 dyoung vtw->rcv_nxt = cp->rcv_nxt;
2107 1.1 dyoung
2108 1.1 dyoung v4->faddr = copy.faddr;
2109 1.1 dyoung v4->laddr = copy.laddr;
2110 1.1 dyoung v4->fport = copy.fport;
2111 1.1 dyoung v4->lport = copy.lport;
2112 1.1 dyoung
2113 1.1 dyoung vtw->reuse_port = cp->reuse_port;
2114 1.1 dyoung vtw->reuse_addr = cp->reuse_addr;
2115 1.1 dyoung vtw->v6only = 0;
2116 1.1 dyoung vtw->uid = cp->uid;
2117 1.1 dyoung
2118 1.1 dyoung vtw_inshash_v4(ctl, vtw);
2119 1.1 dyoung }
2120 1.1 dyoung
2121 1.1 dyoung vp->valid = 0;
2122 1.1 dyoung }
2123 1.1 dyoung
2124 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2125 1.1 dyoung */
2126 1.1 dyoung static void
2127 1.1 dyoung vtw_restart_v6(vestigial_inpcb_t *vp)
2128 1.1 dyoung {
2129 1.1 dyoung vtw_v6_t copy = *(vtw_v6_t*)vp->vtw;
2130 1.1 dyoung vtw_t *vtw;
2131 1.1 dyoung vtw_t *cp = ©.common;
2132 1.1 dyoung vtw_ctl_t *ctl;
2133 1.1 dyoung
2134 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
2135 1.1 dyoung
2136 1.1 dyoung db_trace(KTR_VTW
2137 1.1 dyoung , (vp->vtw, "vtw: restart %6A:%P %6A:%P"
2138 1.1 dyoung , db_store(&vp->faddr.v6, sizeof (vp->faddr.v6))
2139 1.1 dyoung , vp->fport
2140 1.1 dyoung , db_store(&vp->laddr.v6, sizeof (vp->laddr.v6))
2141 1.1 dyoung , vp->lport));
2142 1.1 dyoung
2143 1.1 dyoung /* Class might have changed, so have a squiz.
2144 1.1 dyoung */
2145 1.1 dyoung ctl = vtw_control(AF_INET6, class_to_msl(cp->msl_class));
2146 1.1 dyoung vtw = vtw_alloc(ctl);
2147 1.1 dyoung
2148 1.1 dyoung if (vtw) {
2149 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
2150 1.1 dyoung
2151 1.1 dyoung /* Safe now to unhash the old entry
2152 1.1 dyoung */
2153 1.1 dyoung vtw_del(vp->ctl, vp->vtw);
2154 1.1 dyoung
2155 1.1 dyoung vtw->snd_nxt = cp->snd_nxt;
2156 1.1 dyoung vtw->rcv_nxt = cp->rcv_nxt;
2157 1.1 dyoung
2158 1.1 dyoung v6->faddr = copy.faddr;
2159 1.1 dyoung v6->laddr = copy.laddr;
2160 1.1 dyoung v6->fport = copy.fport;
2161 1.1 dyoung v6->lport = copy.lport;
2162 1.1 dyoung
2163 1.1 dyoung vtw->reuse_port = cp->reuse_port;
2164 1.1 dyoung vtw->reuse_addr = cp->reuse_addr;
2165 1.1 dyoung vtw->v6only = cp->v6only;
2166 1.1 dyoung vtw->uid = cp->uid;
2167 1.1 dyoung
2168 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2169 1.1 dyoung }
2170 1.1 dyoung
2171 1.1 dyoung vp->valid = 0;
2172 1.1 dyoung }
2173 1.1 dyoung
2174 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2175 1.1 dyoung */
2176 1.1 dyoung void
2177 1.1 dyoung vtw_restart(vestigial_inpcb_t *vp)
2178 1.1 dyoung {
2179 1.1 dyoung if (!vp || !vp->valid)
2180 1.1 dyoung return;
2181 1.1 dyoung
2182 1.1 dyoung if (vp->v4)
2183 1.1 dyoung vtw_restart_v4(vp);
2184 1.1 dyoung else
2185 1.1 dyoung vtw_restart_v6(vp);
2186 1.1 dyoung }
2187 1.1 dyoung
2188 1.1 dyoung int
2189 1.7 dyoung sysctl_tcp_vtw_enable(SYSCTLFN_ARGS)
2190 1.7 dyoung {
2191 1.7 dyoung int en, rc;
2192 1.7 dyoung struct sysctlnode node;
2193 1.7 dyoung
2194 1.7 dyoung node = *rnode;
2195 1.7 dyoung en = *(int *)rnode->sysctl_data;
2196 1.7 dyoung node.sysctl_data = &en;
2197 1.7 dyoung
2198 1.7 dyoung rc = sysctl_lookup(SYSCTLFN_CALL(&node));
2199 1.7 dyoung if (rc != 0 || newp == NULL)
2200 1.7 dyoung return rc;
2201 1.7 dyoung
2202 1.7 dyoung if (rnode->sysctl_data != &tcp4_vtw_enable &&
2203 1.7 dyoung rnode->sysctl_data != &tcp6_vtw_enable)
2204 1.7 dyoung rc = ENOENT;
2205 1.7 dyoung else if ((en & 1) == 0)
2206 1.7 dyoung rc = 0;
2207 1.7 dyoung else if (rnode->sysctl_data == &tcp4_vtw_enable)
2208 1.7 dyoung rc = vtw_control_init(AF_INET);
2209 1.7 dyoung else /* rnode->sysctl_data == &tcp6_vtw_enable */
2210 1.7 dyoung rc = vtw_control_init(AF_INET6);
2211 1.7 dyoung
2212 1.7 dyoung if (rc == 0)
2213 1.7 dyoung *(int *)rnode->sysctl_data = en;
2214 1.7 dyoung
2215 1.7 dyoung return rc;
2216 1.7 dyoung }
2217 1.7 dyoung
2218 1.7 dyoung int
2219 1.1 dyoung vtw_earlyinit(void)
2220 1.1 dyoung {
2221 1.5 dyoung int i, rc;
2222 1.1 dyoung
2223 1.5 dyoung callout_init(&vtw_cs, 0);
2224 1.5 dyoung callout_setfunc(&vtw_cs, vtw_tick, 0);
2225 1.1 dyoung
2226 1.5 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2227 1.5 dyoung vtw_tcpv4[i].is_v4 = 1;
2228 1.5 dyoung vtw_tcpv6[i].is_v6 = 1;
2229 1.1 dyoung }
2230 1.1 dyoung
2231 1.7 dyoung if ((tcp4_vtw_enable & 1) != 0 &&
2232 1.7 dyoung (rc = vtw_control_init(AF_INET)) != 0)
2233 1.7 dyoung return rc;
2234 1.7 dyoung
2235 1.7 dyoung if ((tcp6_vtw_enable & 1) != 0 &&
2236 1.1 dyoung (rc = vtw_control_init(AF_INET6)) != 0)
2237 1.1 dyoung return rc;
2238 1.1 dyoung
2239 1.1 dyoung return 0;
2240 1.1 dyoung }
2241 1.1 dyoung
2242 1.1 dyoung #ifdef VTW_DEBUG
2243 1.1 dyoung #include <sys/syscallargs.h>
2244 1.1 dyoung #include <sys/sysctl.h>
2245 1.1 dyoung
2246 1.1 dyoung /*!\brief add lalp, fafp entries for debug
2247 1.1 dyoung */
2248 1.1 dyoung int
2249 1.11 matt vtw_debug_add(int af, sin_either_t *la, sin_either_t *fa, int msl, int msl_class)
2250 1.1 dyoung {
2251 1.1 dyoung vtw_ctl_t *ctl;
2252 1.1 dyoung vtw_t *vtw;
2253 1.1 dyoung
2254 1.11 matt ctl = vtw_control(af, msl ? msl : class_to_msl(msl_class));
2255 1.1 dyoung if (!ctl)
2256 1.1 dyoung return 0;
2257 1.1 dyoung
2258 1.1 dyoung vtw = vtw_alloc(ctl);
2259 1.1 dyoung
2260 1.1 dyoung if (vtw) {
2261 1.1 dyoung vtw->snd_nxt = 0;
2262 1.1 dyoung vtw->rcv_nxt = 0;
2263 1.1 dyoung
2264 1.1 dyoung switch (af) {
2265 1.1 dyoung case AF_INET: {
2266 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
2267 1.1 dyoung
2268 1.1 dyoung v4->faddr = fa->sin_addr.v4.s_addr;
2269 1.1 dyoung v4->laddr = la->sin_addr.v4.s_addr;
2270 1.1 dyoung v4->fport = fa->sin_port;
2271 1.1 dyoung v4->lport = la->sin_port;
2272 1.1 dyoung
2273 1.1 dyoung vtw->reuse_port = 1;
2274 1.1 dyoung vtw->reuse_addr = 1;
2275 1.1 dyoung vtw->v6only = 0;
2276 1.1 dyoung vtw->uid = 0;
2277 1.1 dyoung
2278 1.1 dyoung vtw_inshash_v4(ctl, vtw);
2279 1.1 dyoung break;
2280 1.1 dyoung }
2281 1.1 dyoung
2282 1.1 dyoung case AF_INET6: {
2283 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
2284 1.1 dyoung
2285 1.1 dyoung v6->faddr = fa->sin_addr.v6;
2286 1.1 dyoung v6->laddr = la->sin_addr.v6;
2287 1.1 dyoung
2288 1.1 dyoung v6->fport = fa->sin_port;
2289 1.1 dyoung v6->lport = la->sin_port;
2290 1.1 dyoung
2291 1.1 dyoung vtw->reuse_port = 1;
2292 1.1 dyoung vtw->reuse_addr = 1;
2293 1.1 dyoung vtw->v6only = 0;
2294 1.1 dyoung vtw->uid = 0;
2295 1.1 dyoung
2296 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2297 1.1 dyoung break;
2298 1.1 dyoung }
2299 1.1 dyoung
2300 1.1 dyoung default:
2301 1.1 dyoung break;
2302 1.1 dyoung }
2303 1.1 dyoung
2304 1.1 dyoung return 1;
2305 1.1 dyoung }
2306 1.1 dyoung
2307 1.1 dyoung return 0;
2308 1.1 dyoung }
2309 1.1 dyoung
2310 1.1 dyoung static int vtw_syscall = 0;
2311 1.1 dyoung
2312 1.1 dyoung static int
2313 1.1 dyoung vtw_debug_process(vtw_sysargs_t *ap)
2314 1.1 dyoung {
2315 1.1 dyoung struct vestigial_inpcb vestige;
2316 1.1 dyoung int rc = 0;
2317 1.1 dyoung
2318 1.1 dyoung mutex_enter(softnet_lock);
2319 1.1 dyoung
2320 1.1 dyoung switch (ap->op) {
2321 1.1 dyoung case 0: // insert
2322 1.1 dyoung vtw_debug_add(ap->la.sin_family
2323 1.1 dyoung , &ap->la
2324 1.1 dyoung , &ap->fa
2325 1.1 dyoung , TCPTV_MSL
2326 1.1 dyoung , 0);
2327 1.1 dyoung break;
2328 1.1 dyoung
2329 1.1 dyoung case 1: // lookup
2330 1.1 dyoung case 2: // restart
2331 1.1 dyoung switch (ap->la.sin_family) {
2332 1.1 dyoung case AF_INET:
2333 1.1 dyoung if (tcp_lookup_v4(ap->fa.sin_addr.v4, ap->fa.sin_port,
2334 1.1 dyoung ap->la.sin_addr.v4, ap->la.sin_port,
2335 1.1 dyoung &vestige)) {
2336 1.1 dyoung if (ap->op == 2) {
2337 1.1 dyoung vtw_restart(&vestige);
2338 1.1 dyoung }
2339 1.1 dyoung rc = 0;
2340 1.1 dyoung } else
2341 1.1 dyoung rc = ESRCH;
2342 1.1 dyoung break;
2343 1.1 dyoung
2344 1.1 dyoung case AF_INET6:
2345 1.1 dyoung if (tcp_lookup_v6(&ap->fa.sin_addr.v6, ap->fa.sin_port,
2346 1.1 dyoung &ap->la.sin_addr.v6, ap->la.sin_port,
2347 1.1 dyoung &vestige)) {
2348 1.1 dyoung if (ap->op == 2) {
2349 1.1 dyoung vtw_restart(&vestige);
2350 1.1 dyoung }
2351 1.1 dyoung rc = 0;
2352 1.1 dyoung } else
2353 1.1 dyoung rc = ESRCH;
2354 1.1 dyoung break;
2355 1.1 dyoung default:
2356 1.1 dyoung rc = EINVAL;
2357 1.1 dyoung }
2358 1.1 dyoung break;
2359 1.1 dyoung
2360 1.1 dyoung default:
2361 1.1 dyoung rc = EINVAL;
2362 1.1 dyoung }
2363 1.1 dyoung
2364 1.1 dyoung mutex_exit(softnet_lock);
2365 1.1 dyoung return rc;
2366 1.1 dyoung }
2367 1.1 dyoung
2368 1.1 dyoung struct sys_vtw_args {
2369 1.1 dyoung syscallarg(const vtw_sysargs_t *) req;
2370 1.1 dyoung syscallarg(size_t) len;
2371 1.1 dyoung };
2372 1.1 dyoung
2373 1.1 dyoung static int
2374 1.1 dyoung vtw_sys(struct lwp *l, const void *_, register_t *retval)
2375 1.1 dyoung {
2376 1.1 dyoung const struct sys_vtw_args *uap = _;
2377 1.1 dyoung void *buf;
2378 1.1 dyoung int rc;
2379 1.1 dyoung size_t len = SCARG(uap, len);
2380 1.1 dyoung
2381 1.1 dyoung if (len != sizeof (vtw_sysargs_t))
2382 1.1 dyoung return EINVAL;
2383 1.1 dyoung
2384 1.1 dyoung buf = kmem_alloc(len, KM_SLEEP);
2385 1.1 dyoung rc = copyin(SCARG(uap, req), buf, len);
2386 1.1 dyoung if (!rc) {
2387 1.1 dyoung rc = vtw_debug_process(buf);
2388 1.1 dyoung }
2389 1.1 dyoung kmem_free(buf, len);
2390 1.1 dyoung
2391 1.1 dyoung return rc;
2392 1.1 dyoung }
2393 1.1 dyoung
2394 1.1 dyoung static void
2395 1.1 dyoung vtw_sanity_check(void)
2396 1.1 dyoung {
2397 1.1 dyoung vtw_ctl_t *ctl;
2398 1.1 dyoung vtw_t *vtw;
2399 1.1 dyoung int i;
2400 1.1 dyoung int n;
2401 1.1 dyoung
2402 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2403 1.1 dyoung ctl = &vtw_tcpv4[i];
2404 1.1 dyoung
2405 1.1 dyoung if (!ctl->base.v || ctl->nalloc)
2406 1.1 dyoung continue;
2407 1.1 dyoung
2408 1.1 dyoung for (n = 0, vtw = ctl->base.v; ; ) {
2409 1.1 dyoung ++n;
2410 1.1 dyoung vtw = vtw_next(ctl, vtw);
2411 1.1 dyoung if (vtw == ctl->base.v)
2412 1.1 dyoung break;
2413 1.1 dyoung }
2414 1.1 dyoung db_trace(KTR_VTW
2415 1.1 dyoung , (ctl, "sanity: class %x n %x nfree %x"
2416 1.1 dyoung , i, n, ctl->nfree));
2417 1.1 dyoung
2418 1.1 dyoung KASSERT(n == ctl->nfree);
2419 1.1 dyoung }
2420 1.1 dyoung
2421 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2422 1.1 dyoung ctl = &vtw_tcpv6[i];
2423 1.1 dyoung
2424 1.1 dyoung if (!ctl->base.v || ctl->nalloc)
2425 1.1 dyoung continue;
2426 1.1 dyoung
2427 1.1 dyoung for (n = 0, vtw = ctl->base.v; ; ) {
2428 1.1 dyoung ++n;
2429 1.1 dyoung vtw = vtw_next(ctl, vtw);
2430 1.1 dyoung if (vtw == ctl->base.v)
2431 1.1 dyoung break;
2432 1.1 dyoung }
2433 1.1 dyoung db_trace(KTR_VTW
2434 1.1 dyoung , (ctl, "sanity: class %x n %x nfree %x"
2435 1.1 dyoung , i, n, ctl->nfree));
2436 1.1 dyoung KASSERT(n == ctl->nfree);
2437 1.1 dyoung }
2438 1.1 dyoung }
2439 1.1 dyoung
2440 1.1 dyoung /*!\brief Initialise debug support.
2441 1.1 dyoung */
2442 1.1 dyoung static void
2443 1.1 dyoung vtw_debug_init(void)
2444 1.1 dyoung {
2445 1.1 dyoung int i;
2446 1.1 dyoung
2447 1.1 dyoung vtw_sanity_check();
2448 1.1 dyoung
2449 1.1 dyoung if (vtw_syscall)
2450 1.1 dyoung return;
2451 1.1 dyoung
2452 1.1 dyoung for (i = 511; i; --i) {
2453 1.1 dyoung if (sysent[i].sy_call == sys_nosys) {
2454 1.1 dyoung sysent[i].sy_call = vtw_sys;
2455 1.1 dyoung sysent[i].sy_narg = 2;
2456 1.1 dyoung sysent[i].sy_argsize = sizeof (struct sys_vtw_args);
2457 1.1 dyoung sysent[i].sy_flags = 0;
2458 1.1 dyoung
2459 1.1 dyoung vtw_syscall = i;
2460 1.1 dyoung break;
2461 1.1 dyoung }
2462 1.1 dyoung }
2463 1.1 dyoung if (i) {
2464 1.1 dyoung const struct sysctlnode *node;
2465 1.1 dyoung uint32_t flags;
2466 1.1 dyoung
2467 1.1 dyoung flags = sysctl_root.sysctl_flags;
2468 1.1 dyoung
2469 1.1 dyoung sysctl_root.sysctl_flags |= CTLFLAG_READWRITE;
2470 1.1 dyoung sysctl_root.sysctl_flags &= ~CTLFLAG_PERMANENT;
2471 1.1 dyoung
2472 1.1 dyoung sysctl_createv(0, 0, 0, &node,
2473 1.1 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE,
2474 1.1 dyoung "koff",
2475 1.1 dyoung SYSCTL_DESCR("Kernel Obscure Feature Finder"),
2476 1.1 dyoung 0, 0, 0, 0, CTL_CREATE, CTL_EOL);
2477 1.1 dyoung
2478 1.1 dyoung if (!node) {
2479 1.1 dyoung sysctl_createv(0, 0, 0, &node,
2480 1.1 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE,
2481 1.1 dyoung "koffka",
2482 1.1 dyoung SYSCTL_DESCR("The Real(tm) Kernel"
2483 1.1 dyoung " Obscure Feature Finder"),
2484 1.1 dyoung 0, 0, 0, 0, CTL_CREATE, CTL_EOL);
2485 1.1 dyoung }
2486 1.1 dyoung if (node) {
2487 1.1 dyoung sysctl_createv(0, 0, 0, 0,
2488 1.1 dyoung CTLFLAG_PERMANENT|CTLFLAG_READONLY,
2489 1.1 dyoung CTLTYPE_INT, "vtw_debug_syscall",
2490 1.1 dyoung SYSCTL_DESCR("vtw debug"
2491 1.1 dyoung " system call number"),
2492 1.1 dyoung 0, 0, &vtw_syscall, 0, node->sysctl_num,
2493 1.1 dyoung CTL_CREATE, CTL_EOL);
2494 1.1 dyoung }
2495 1.1 dyoung sysctl_root.sysctl_flags = flags;
2496 1.1 dyoung }
2497 1.1 dyoung }
2498 1.1 dyoung #else /* !VTW_DEBUG */
2499 1.1 dyoung static void
2500 1.1 dyoung vtw_debug_init(void)
2501 1.1 dyoung {
2502 1.1 dyoung return;
2503 1.1 dyoung }
2504 1.1 dyoung #endif /* !VTW_DEBUG */
2505