tcp_vtw.c revision 1.16 1 1.1 dyoung /*
2 1.1 dyoung * Copyright (c) 2011 The NetBSD Foundation, Inc.
3 1.1 dyoung * All rights reserved.
4 1.1 dyoung *
5 1.1 dyoung * This code is derived from software contributed to The NetBSD Foundation
6 1.1 dyoung * by Coyote Point Systems, Inc.
7 1.1 dyoung *
8 1.1 dyoung * Redistribution and use in source and binary forms, with or without
9 1.1 dyoung * modification, are permitted provided that the following conditions
10 1.1 dyoung * are met:
11 1.1 dyoung * 1. Redistributions of source code must retain the above copyright
12 1.1 dyoung * notice, this list of conditions and the following disclaimer.
13 1.1 dyoung * 2. Redistributions in binary form must reproduce the above copyright
14 1.1 dyoung * notice, this list of conditions and the following disclaimer in the
15 1.1 dyoung * documentation and/or other materials provided with the distribution.
16 1.1 dyoung *
17 1.1 dyoung * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 1.1 dyoung * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 1.1 dyoung * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 1.1 dyoung * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 1.1 dyoung * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 1.1 dyoung * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 1.1 dyoung * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 1.1 dyoung * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 1.1 dyoung * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 1.1 dyoung * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 1.1 dyoung * POSSIBILITY OF SUCH DAMAGE.
28 1.1 dyoung */
29 1.9 yamt
30 1.9 yamt /*
31 1.9 yamt * Reduces the resources demanded by TCP sessions in TIME_WAIT-state using
32 1.9 yamt * methods called Vestigial Time-Wait (VTW) and Maximum Segment Lifetime
33 1.9 yamt * Truncation (MSLT).
34 1.9 yamt *
35 1.9 yamt * MSLT and VTW were contributed by Coyote Point Systems, Inc.
36 1.9 yamt *
37 1.9 yamt * Even after a TCP session enters the TIME_WAIT state, its corresponding
38 1.9 yamt * socket and protocol control blocks (PCBs) stick around until the TCP
39 1.9 yamt * Maximum Segment Lifetime (MSL) expires. On a host whose workload
40 1.9 yamt * necessarily creates and closes down many TCP sockets, the sockets & PCBs
41 1.9 yamt * for TCP sessions in TIME_WAIT state amount to many megabytes of dead
42 1.9 yamt * weight in RAM.
43 1.9 yamt *
44 1.9 yamt * Maximum Segment Lifetimes Truncation (MSLT) assigns each TCP session to
45 1.9 yamt * a class based on the nearness of the peer. Corresponding to each class
46 1.9 yamt * is an MSL, and a session uses the MSL of its class. The classes are
47 1.9 yamt * loopback (local host equals remote host), local (local host and remote
48 1.9 yamt * host are on the same link/subnet), and remote (local host and remote
49 1.9 yamt * host communicate via one or more gateways). Classes corresponding to
50 1.9 yamt * nearer peers have lower MSLs by default: 2 seconds for loopback, 10
51 1.9 yamt * seconds for local, 60 seconds for remote. Loopback and local sessions
52 1.9 yamt * expire more quickly when MSLT is used.
53 1.9 yamt *
54 1.9 yamt * Vestigial Time-Wait (VTW) replaces a TIME_WAIT session's PCB/socket
55 1.9 yamt * dead weight with a compact representation of the session, called a
56 1.9 yamt * "vestigial PCB". VTW data structures are designed to be very fast and
57 1.9 yamt * memory-efficient: for fast insertion and lookup of vestigial PCBs,
58 1.9 yamt * the PCBs are stored in a hash table that is designed to minimize the
59 1.9 yamt * number of cacheline visits per lookup/insertion. The memory both
60 1.9 yamt * for vestigial PCBs and for elements of the PCB hashtable come from
61 1.9 yamt * fixed-size pools, and linked data structures exploit this to conserve
62 1.9 yamt * memory by representing references with a narrow index/offset from the
63 1.9 yamt * start of a pool instead of a pointer. When space for new vestigial PCBs
64 1.9 yamt * runs out, VTW makes room by discarding old vestigial PCBs, oldest first.
65 1.9 yamt * VTW cooperates with MSLT.
66 1.9 yamt *
67 1.9 yamt * It may help to think of VTW as a "FIN cache" by analogy to the SYN
68 1.9 yamt * cache.
69 1.9 yamt *
70 1.9 yamt * A 2.8-GHz Pentium 4 running a test workload that creates TIME_WAIT
71 1.9 yamt * sessions as fast as it can is approximately 17% idle when VTW is active
72 1.9 yamt * versus 0% idle when VTW is inactive. It has 103 megabytes more free RAM
73 1.9 yamt * when VTW is active (approximately 64k vestigial PCBs are created) than
74 1.9 yamt * when it is inactive.
75 1.9 yamt */
76 1.9 yamt
77 1.1 dyoung #include <sys/cdefs.h>
78 1.1 dyoung
79 1.14 pooka #ifdef _KERNEL_OPT
80 1.1 dyoung #include "opt_ddb.h"
81 1.1 dyoung #include "opt_inet.h"
82 1.1 dyoung #include "opt_inet_csum.h"
83 1.1 dyoung #include "opt_tcp_debug.h"
84 1.14 pooka #endif
85 1.1 dyoung
86 1.1 dyoung #include <sys/param.h>
87 1.1 dyoung #include <sys/systm.h>
88 1.1 dyoung #include <sys/kmem.h>
89 1.1 dyoung #include <sys/mbuf.h>
90 1.1 dyoung #include <sys/protosw.h>
91 1.1 dyoung #include <sys/socket.h>
92 1.1 dyoung #include <sys/socketvar.h>
93 1.1 dyoung #include <sys/errno.h>
94 1.1 dyoung #include <sys/syslog.h>
95 1.1 dyoung #include <sys/pool.h>
96 1.1 dyoung #include <sys/domain.h>
97 1.1 dyoung #include <sys/kernel.h>
98 1.1 dyoung #include <net/if.h>
99 1.1 dyoung #include <net/if_types.h>
100 1.1 dyoung
101 1.1 dyoung #include <netinet/in.h>
102 1.1 dyoung #include <netinet/in_systm.h>
103 1.1 dyoung #include <netinet/ip.h>
104 1.1 dyoung #include <netinet/in_pcb.h>
105 1.1 dyoung #include <netinet/in_var.h>
106 1.1 dyoung #include <netinet/ip_var.h>
107 1.1 dyoung #include <netinet/in_offload.h>
108 1.1 dyoung #include <netinet/ip6.h>
109 1.1 dyoung #include <netinet6/ip6_var.h>
110 1.1 dyoung #include <netinet6/in6_pcb.h>
111 1.1 dyoung #include <netinet6/ip6_var.h>
112 1.1 dyoung #include <netinet6/in6_var.h>
113 1.1 dyoung #include <netinet/icmp6.h>
114 1.1 dyoung #include <netinet6/nd6.h>
115 1.1 dyoung
116 1.1 dyoung #include <netinet/tcp.h>
117 1.1 dyoung #include <netinet/tcp_fsm.h>
118 1.1 dyoung #include <netinet/tcp_seq.h>
119 1.1 dyoung #include <netinet/tcp_timer.h>
120 1.1 dyoung #include <netinet/tcp_var.h>
121 1.1 dyoung #include <netinet/tcp_private.h>
122 1.1 dyoung #include <netinet/tcpip.h>
123 1.1 dyoung
124 1.1 dyoung #include <netinet/tcp_vtw.h>
125 1.1 dyoung
126 1.16 martin __KERNEL_RCSID(0, "$NetBSD: tcp_vtw.c,v 1.16 2016/07/28 07:54:31 martin Exp $");
127 1.1 dyoung
128 1.1 dyoung #define db_trace(__a, __b) do { } while (/*CONSTCOND*/0)
129 1.1 dyoung
130 1.1 dyoung static void vtw_debug_init(void);
131 1.1 dyoung
132 1.1 dyoung fatp_ctl_t fat_tcpv4;
133 1.1 dyoung fatp_ctl_t fat_tcpv6;
134 1.1 dyoung vtw_ctl_t vtw_tcpv4[VTW_NCLASS];
135 1.1 dyoung vtw_ctl_t vtw_tcpv6[VTW_NCLASS];
136 1.1 dyoung vtw_stats_t vtw_stats;
137 1.1 dyoung
138 1.1 dyoung /* We provide state for the lookup_ports iterator.
139 1.1 dyoung * As currently we are netlock-protected, there is one.
140 1.1 dyoung * If we were finer-grain, we would have one per CPU.
141 1.1 dyoung * I do not want to be in the business of alloc/free.
142 1.1 dyoung * The best alternate would be allocate on the caller's
143 1.1 dyoung * stack, but that would require them to know the struct,
144 1.1 dyoung * or at least the size.
145 1.1 dyoung * See how she goes.
146 1.1 dyoung */
147 1.1 dyoung struct tcp_ports_iterator {
148 1.1 dyoung union {
149 1.1 dyoung struct in_addr v4;
150 1.1 dyoung struct in6_addr v6;
151 1.1 dyoung } addr;
152 1.1 dyoung u_int port;
153 1.1 dyoung
154 1.1 dyoung uint32_t wild : 1;
155 1.1 dyoung
156 1.1 dyoung vtw_ctl_t *ctl;
157 1.1 dyoung fatp_t *fp;
158 1.1 dyoung
159 1.1 dyoung uint16_t slot_idx;
160 1.1 dyoung uint16_t ctl_idx;
161 1.1 dyoung };
162 1.1 dyoung
163 1.1 dyoung static struct tcp_ports_iterator tcp_ports_iterator_v4;
164 1.1 dyoung static struct tcp_ports_iterator tcp_ports_iterator_v6;
165 1.1 dyoung
166 1.1 dyoung static int vtw_age(vtw_ctl_t *, struct timeval *);
167 1.1 dyoung
168 1.1 dyoung /*!\brief allocate a fat pointer from a collection.
169 1.1 dyoung */
170 1.1 dyoung static fatp_t *
171 1.1 dyoung fatp_alloc(fatp_ctl_t *fat)
172 1.1 dyoung {
173 1.1 dyoung fatp_t *fp = 0;
174 1.1 dyoung
175 1.1 dyoung if (fat->nfree) {
176 1.1 dyoung fp = fat->free;
177 1.1 dyoung if (fp) {
178 1.1 dyoung fat->free = fatp_next(fat, fp);
179 1.1 dyoung --fat->nfree;
180 1.1 dyoung ++fat->nalloc;
181 1.1 dyoung fp->nxt = 0;
182 1.1 dyoung
183 1.1 dyoung KASSERT(!fp->inuse);
184 1.1 dyoung }
185 1.1 dyoung }
186 1.1 dyoung
187 1.1 dyoung return fp;
188 1.1 dyoung }
189 1.1 dyoung
190 1.1 dyoung /*!\brief free a fat pointer.
191 1.1 dyoung */
192 1.1 dyoung static void
193 1.1 dyoung fatp_free(fatp_ctl_t *fat, fatp_t *fp)
194 1.1 dyoung {
195 1.1 dyoung if (fp) {
196 1.1 dyoung KASSERT(!fp->inuse);
197 1.1 dyoung KASSERT(!fp->nxt);
198 1.1 dyoung
199 1.1 dyoung fp->nxt = fatp_index(fat, fat->free);
200 1.1 dyoung fat->free = fp;
201 1.1 dyoung
202 1.1 dyoung ++fat->nfree;
203 1.1 dyoung --fat->nalloc;
204 1.1 dyoung }
205 1.1 dyoung }
206 1.1 dyoung
207 1.1 dyoung /*!\brief initialise a collection of fat pointers.
208 1.1 dyoung *
209 1.1 dyoung *\param n # hash buckets
210 1.1 dyoung *\param m total # fat pointers to allocate
211 1.1 dyoung *
212 1.1 dyoung * We allocate 2x as much, as we have two hashes: full and lport only.
213 1.1 dyoung */
214 1.1 dyoung static void
215 1.6 dyoung fatp_init(fatp_ctl_t *fat, uint32_t n, uint32_t m,
216 1.6 dyoung fatp_t *fat_base, fatp_t **fat_hash)
217 1.1 dyoung {
218 1.1 dyoung fatp_t *fp;
219 1.1 dyoung
220 1.1 dyoung KASSERT(n <= FATP_MAX / 2);
221 1.1 dyoung
222 1.6 dyoung fat->hash = fat_hash;
223 1.6 dyoung fat->base = fat_base;
224 1.1 dyoung
225 1.1 dyoung fat->port = &fat->hash[m];
226 1.1 dyoung
227 1.1 dyoung fat->mask = m - 1; // ASSERT is power of 2 (m)
228 1.1 dyoung fat->lim = fat->base + 2*n - 1;
229 1.1 dyoung fat->nfree = 0;
230 1.1 dyoung fat->nalloc = 2*n;
231 1.1 dyoung
232 1.1 dyoung /* Initialise the free list.
233 1.1 dyoung */
234 1.1 dyoung for (fp = fat->lim; fp >= fat->base; --fp) {
235 1.1 dyoung fatp_free(fat, fp);
236 1.1 dyoung }
237 1.1 dyoung }
238 1.1 dyoung
239 1.1 dyoung /*
240 1.1 dyoung * The `xtra' is XORed into the tag stored.
241 1.1 dyoung */
242 1.1 dyoung static uint32_t fatp_xtra[] = {
243 1.1 dyoung 0x11111111,0x22222222,0x33333333,0x44444444,
244 1.1 dyoung 0x55555555,0x66666666,0x77777777,0x88888888,
245 1.1 dyoung 0x12121212,0x21212121,0x34343434,0x43434343,
246 1.1 dyoung 0x56565656,0x65656565,0x78787878,0x87878787,
247 1.1 dyoung 0x11221122,0x22112211,0x33443344,0x44334433,
248 1.1 dyoung 0x55665566,0x66556655,0x77887788,0x88778877,
249 1.1 dyoung 0x11112222,0x22221111,0x33334444,0x44443333,
250 1.1 dyoung 0x55556666,0x66665555,0x77778888,0x88887777,
251 1.1 dyoung };
252 1.1 dyoung
253 1.1 dyoung /*!\brief turn a {fatp_t*,slot} into an integral key.
254 1.1 dyoung *
255 1.1 dyoung * The key can be used to obtain the fatp_t, and the slot,
256 1.1 dyoung * as it directly encodes them.
257 1.1 dyoung */
258 1.1 dyoung static inline uint32_t
259 1.1 dyoung fatp_key(fatp_ctl_t *fat, fatp_t *fp, uint32_t slot)
260 1.1 dyoung {
261 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
262 1.1 dyoung CACHE_LINE_SIZE == 64 ||
263 1.1 dyoung CACHE_LINE_SIZE == 128);
264 1.1 dyoung
265 1.1 dyoung switch (fatp_ntags()) {
266 1.1 dyoung case 7:
267 1.1 dyoung return (fatp_index(fat, fp) << 3) | slot;
268 1.1 dyoung case 15:
269 1.1 dyoung return (fatp_index(fat, fp) << 4) | slot;
270 1.1 dyoung case 31:
271 1.1 dyoung return (fatp_index(fat, fp) << 5) | slot;
272 1.1 dyoung default:
273 1.1 dyoung KASSERT(0 && "no support, for no good reason");
274 1.1 dyoung return ~0;
275 1.1 dyoung }
276 1.1 dyoung }
277 1.1 dyoung
278 1.1 dyoung static inline uint32_t
279 1.1 dyoung fatp_slot_from_key(fatp_ctl_t *fat, uint32_t key)
280 1.1 dyoung {
281 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
282 1.1 dyoung CACHE_LINE_SIZE == 64 ||
283 1.1 dyoung CACHE_LINE_SIZE == 128);
284 1.1 dyoung
285 1.1 dyoung switch (fatp_ntags()) {
286 1.1 dyoung case 7:
287 1.1 dyoung return key & 7;
288 1.1 dyoung case 15:
289 1.1 dyoung return key & 15;
290 1.1 dyoung case 31:
291 1.1 dyoung return key & 31;
292 1.1 dyoung default:
293 1.1 dyoung KASSERT(0 && "no support, for no good reason");
294 1.1 dyoung return ~0;
295 1.1 dyoung }
296 1.1 dyoung }
297 1.1 dyoung
298 1.1 dyoung static inline fatp_t *
299 1.1 dyoung fatp_from_key(fatp_ctl_t *fat, uint32_t key)
300 1.1 dyoung {
301 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
302 1.1 dyoung CACHE_LINE_SIZE == 64 ||
303 1.1 dyoung CACHE_LINE_SIZE == 128);
304 1.1 dyoung
305 1.1 dyoung switch (fatp_ntags()) {
306 1.1 dyoung case 7:
307 1.1 dyoung key >>= 3;
308 1.1 dyoung break;
309 1.1 dyoung case 15:
310 1.1 dyoung key >>= 4;
311 1.1 dyoung break;
312 1.1 dyoung case 31:
313 1.1 dyoung key >>= 5;
314 1.1 dyoung break;
315 1.1 dyoung default:
316 1.1 dyoung KASSERT(0 && "no support, for no good reason");
317 1.1 dyoung return 0;
318 1.1 dyoung }
319 1.1 dyoung
320 1.1 dyoung return key ? fat->base + key - 1 : 0;
321 1.1 dyoung }
322 1.1 dyoung
323 1.1 dyoung static inline uint32_t
324 1.1 dyoung idx_encode(vtw_ctl_t *ctl, uint32_t idx)
325 1.1 dyoung {
326 1.1 dyoung return (idx << ctl->idx_bits) | idx;
327 1.1 dyoung }
328 1.1 dyoung
329 1.1 dyoung static inline uint32_t
330 1.1 dyoung idx_decode(vtw_ctl_t *ctl, uint32_t bits)
331 1.1 dyoung {
332 1.1 dyoung uint32_t idx = bits & ctl->idx_mask;
333 1.1 dyoung
334 1.1 dyoung if (idx_encode(ctl, idx) == bits)
335 1.1 dyoung return idx;
336 1.1 dyoung else
337 1.1 dyoung return ~0;
338 1.1 dyoung }
339 1.1 dyoung
340 1.1 dyoung /*!\brief insert index into fatp hash
341 1.1 dyoung *
342 1.1 dyoung *\param idx - index of element being placed in hash chain
343 1.1 dyoung *\param tag - 32-bit tag identifier
344 1.1 dyoung *
345 1.1 dyoung *\returns
346 1.1 dyoung * value which can be used to locate entry.
347 1.1 dyoung *
348 1.1 dyoung *\note
349 1.1 dyoung * we rely on the fact that there are unused high bits in the index
350 1.1 dyoung * for verification purposes on lookup.
351 1.1 dyoung */
352 1.1 dyoung
353 1.1 dyoung static inline uint32_t
354 1.1 dyoung fatp_vtw_inshash(fatp_ctl_t *fat, uint32_t idx, uint32_t tag, int which,
355 1.1 dyoung void *dbg)
356 1.1 dyoung {
357 1.1 dyoung fatp_t *fp;
358 1.1 dyoung fatp_t **hash = (which ? fat->port : fat->hash);
359 1.1 dyoung int i;
360 1.1 dyoung
361 1.1 dyoung fp = hash[tag & fat->mask];
362 1.1 dyoung
363 1.1 dyoung while (!fp || fatp_full(fp)) {
364 1.1 dyoung fatp_t *fq;
365 1.1 dyoung
366 1.1 dyoung /* All entries are inuse at the top level.
367 1.1 dyoung * We allocate a spare, and push the top level
368 1.1 dyoung * down one. All entries in the fp we push down
369 1.1 dyoung * (think of a tape worm here) will be expelled sooner than
370 1.1 dyoung * any entries added subsequently to this hash bucket.
371 1.1 dyoung * This is a property of the time waits we are exploiting.
372 1.1 dyoung */
373 1.1 dyoung
374 1.1 dyoung fq = fatp_alloc(fat);
375 1.1 dyoung if (!fq) {
376 1.1 dyoung vtw_age(fat->vtw, 0);
377 1.1 dyoung fp = hash[tag & fat->mask];
378 1.1 dyoung continue;
379 1.1 dyoung }
380 1.1 dyoung
381 1.1 dyoung fq->inuse = 0;
382 1.1 dyoung fq->nxt = fatp_index(fat, fp);
383 1.1 dyoung
384 1.1 dyoung hash[tag & fat->mask] = fq;
385 1.1 dyoung
386 1.1 dyoung fp = fq;
387 1.1 dyoung }
388 1.1 dyoung
389 1.1 dyoung KASSERT(!fatp_full(fp));
390 1.1 dyoung
391 1.1 dyoung /* Fill highest index first. Lookup is lowest first.
392 1.1 dyoung */
393 1.1 dyoung for (i = fatp_ntags(); --i >= 0; ) {
394 1.1 dyoung if (!((1 << i) & fp->inuse)) {
395 1.1 dyoung break;
396 1.1 dyoung }
397 1.1 dyoung }
398 1.1 dyoung
399 1.1 dyoung fp->inuse |= 1 << i;
400 1.1 dyoung fp->tag[i] = tag ^ idx_encode(fat->vtw, idx) ^ fatp_xtra[i];
401 1.1 dyoung
402 1.1 dyoung db_trace(KTR_VTW
403 1.1 dyoung , (fp, "fat: inuse %5.5x tag[%x] %8.8x"
404 1.1 dyoung , fp->inuse
405 1.1 dyoung , i, fp->tag[i]));
406 1.1 dyoung
407 1.1 dyoung return fatp_key(fat, fp, i);
408 1.1 dyoung }
409 1.1 dyoung
410 1.1 dyoung static inline int
411 1.1 dyoung vtw_alive(const vtw_t *vtw)
412 1.1 dyoung {
413 1.1 dyoung return vtw->hashed && vtw->expire.tv_sec;
414 1.1 dyoung }
415 1.1 dyoung
416 1.1 dyoung static inline uint32_t
417 1.1 dyoung vtw_index_v4(vtw_ctl_t *ctl, vtw_v4_t *v4)
418 1.1 dyoung {
419 1.1 dyoung if (ctl->base.v4 <= v4 && v4 <= ctl->lim.v4)
420 1.1 dyoung return v4 - ctl->base.v4;
421 1.1 dyoung
422 1.1 dyoung KASSERT(0 && "vtw out of bounds");
423 1.1 dyoung
424 1.1 dyoung return ~0;
425 1.1 dyoung }
426 1.1 dyoung
427 1.1 dyoung static inline uint32_t
428 1.1 dyoung vtw_index_v6(vtw_ctl_t *ctl, vtw_v6_t *v6)
429 1.1 dyoung {
430 1.1 dyoung if (ctl->base.v6 <= v6 && v6 <= ctl->lim.v6)
431 1.1 dyoung return v6 - ctl->base.v6;
432 1.1 dyoung
433 1.1 dyoung KASSERT(0 && "vtw out of bounds");
434 1.1 dyoung
435 1.1 dyoung return ~0;
436 1.1 dyoung }
437 1.1 dyoung
438 1.1 dyoung static inline uint32_t
439 1.1 dyoung vtw_index(vtw_ctl_t *ctl, vtw_t *vtw)
440 1.1 dyoung {
441 1.1 dyoung if (ctl->clidx)
442 1.1 dyoung ctl = ctl->ctl;
443 1.1 dyoung
444 1.1 dyoung if (ctl->is_v4)
445 1.1 dyoung return vtw_index_v4(ctl, (vtw_v4_t *)vtw);
446 1.1 dyoung
447 1.1 dyoung if (ctl->is_v6)
448 1.1 dyoung return vtw_index_v6(ctl, (vtw_v6_t *)vtw);
449 1.1 dyoung
450 1.1 dyoung KASSERT(0 && "neither 4 nor 6. most curious.");
451 1.1 dyoung
452 1.1 dyoung return ~0;
453 1.1 dyoung }
454 1.1 dyoung
455 1.1 dyoung static inline vtw_t *
456 1.1 dyoung vtw_from_index(vtw_ctl_t *ctl, uint32_t idx)
457 1.1 dyoung {
458 1.1 dyoung if (ctl->clidx)
459 1.1 dyoung ctl = ctl->ctl;
460 1.1 dyoung
461 1.1 dyoung /* See if the index looks like it might be an index.
462 1.1 dyoung * Bits on outside of the valid index bits is a give away.
463 1.1 dyoung */
464 1.1 dyoung idx = idx_decode(ctl, idx);
465 1.1 dyoung
466 1.1 dyoung if (idx == ~0) {
467 1.1 dyoung return 0;
468 1.1 dyoung } else if (ctl->is_v4) {
469 1.1 dyoung vtw_v4_t *vtw = ctl->base.v4 + idx;
470 1.1 dyoung
471 1.1 dyoung return (ctl->base.v4 <= vtw && vtw <= ctl->lim.v4)
472 1.1 dyoung ? &vtw->common : 0;
473 1.1 dyoung } else if (ctl->is_v6) {
474 1.1 dyoung vtw_v6_t *vtw = ctl->base.v6 + idx;
475 1.1 dyoung
476 1.1 dyoung return (ctl->base.v6 <= vtw && vtw <= ctl->lim.v6)
477 1.1 dyoung ? &vtw->common : 0;
478 1.1 dyoung } else {
479 1.1 dyoung KASSERT(0 && "badness");
480 1.1 dyoung return 0;
481 1.1 dyoung }
482 1.1 dyoung }
483 1.1 dyoung
484 1.1 dyoung /*!\brief return the next vtw after this one.
485 1.1 dyoung *
486 1.1 dyoung * Due to the differing sizes of the entries in differing
487 1.1 dyoung * arenas, we have to ensure we ++ the correct pointer type.
488 1.1 dyoung *
489 1.1 dyoung * Also handles wrap.
490 1.1 dyoung */
491 1.1 dyoung static inline vtw_t *
492 1.1 dyoung vtw_next(vtw_ctl_t *ctl, vtw_t *vtw)
493 1.1 dyoung {
494 1.1 dyoung if (ctl->is_v4) {
495 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
496 1.1 dyoung
497 1.1 dyoung vtw = &(++v4)->common;
498 1.1 dyoung } else {
499 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
500 1.1 dyoung
501 1.1 dyoung vtw = &(++v6)->common;
502 1.1 dyoung }
503 1.1 dyoung
504 1.1 dyoung if (vtw > ctl->lim.v)
505 1.1 dyoung vtw = ctl->base.v;
506 1.1 dyoung
507 1.1 dyoung return vtw;
508 1.1 dyoung }
509 1.1 dyoung
510 1.1 dyoung /*!\brief remove entry from FATP hash chains
511 1.1 dyoung */
512 1.1 dyoung static inline void
513 1.1 dyoung vtw_unhash(vtw_ctl_t *ctl, vtw_t *vtw)
514 1.1 dyoung {
515 1.1 dyoung fatp_ctl_t *fat = ctl->fat;
516 1.1 dyoung fatp_t *fp;
517 1.1 dyoung uint32_t key = vtw->key;
518 1.1 dyoung uint32_t tag, slot, idx;
519 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
520 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
521 1.1 dyoung
522 1.1 dyoung if (!vtw->hashed) {
523 1.1 dyoung KASSERT(0 && "unhashed");
524 1.1 dyoung return;
525 1.1 dyoung }
526 1.1 dyoung
527 1.1 dyoung if (fat->vtw->is_v4) {
528 1.1 dyoung tag = v4_tag(v4->faddr, v4->fport, v4->laddr, v4->lport);
529 1.1 dyoung } else if (fat->vtw->is_v6) {
530 1.1 dyoung tag = v6_tag(&v6->faddr, v6->fport, &v6->laddr, v6->lport);
531 1.1 dyoung } else {
532 1.1 dyoung tag = 0;
533 1.1 dyoung KASSERT(0 && "not reached");
534 1.1 dyoung }
535 1.1 dyoung
536 1.1 dyoung /* Remove from fat->hash[]
537 1.1 dyoung */
538 1.1 dyoung slot = fatp_slot_from_key(fat, key);
539 1.1 dyoung fp = fatp_from_key(fat, key);
540 1.1 dyoung idx = vtw_index(ctl, vtw);
541 1.1 dyoung
542 1.1 dyoung db_trace(KTR_VTW
543 1.1 dyoung , (fp, "fat: del inuse %5.5x slot %x idx %x key %x tag %x"
544 1.1 dyoung , fp->inuse, slot, idx, key, tag));
545 1.1 dyoung
546 1.1 dyoung KASSERT(fp->inuse & (1 << slot));
547 1.1 dyoung KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
548 1.1 dyoung ^ fatp_xtra[slot]));
549 1.1 dyoung
550 1.1 dyoung if ((fp->inuse & (1 << slot))
551 1.1 dyoung && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
552 1.1 dyoung ^ fatp_xtra[slot])) {
553 1.1 dyoung fp->inuse ^= 1 << slot;
554 1.1 dyoung fp->tag[slot] = 0;
555 1.1 dyoung
556 1.1 dyoung /* When we delete entries, we do not compact. This is
557 1.1 dyoung * due to temporality. We add entries, and they
558 1.1 dyoung * (eventually) expire. Older entries will be further
559 1.1 dyoung * down the chain.
560 1.1 dyoung */
561 1.1 dyoung if (!fp->inuse) {
562 1.1 dyoung uint32_t hi = tag & fat->mask;
563 1.1 dyoung fatp_t *fq = 0;
564 1.1 dyoung fatp_t *fr = fat->hash[hi];
565 1.1 dyoung
566 1.1 dyoung while (fr && fr != fp) {
567 1.1 dyoung fr = fatp_next(fat, fq = fr);
568 1.1 dyoung }
569 1.1 dyoung
570 1.1 dyoung if (fr == fp) {
571 1.1 dyoung if (fq) {
572 1.1 dyoung fq->nxt = fp->nxt;
573 1.1 dyoung fp->nxt = 0;
574 1.1 dyoung fatp_free(fat, fp);
575 1.1 dyoung } else {
576 1.1 dyoung KASSERT(fat->hash[hi] == fp);
577 1.1 dyoung
578 1.1 dyoung if (fp->nxt) {
579 1.1 dyoung fat->hash[hi]
580 1.1 dyoung = fatp_next(fat, fp);
581 1.1 dyoung fp->nxt = 0;
582 1.1 dyoung fatp_free(fat, fp);
583 1.1 dyoung } else {
584 1.1 dyoung /* retain for next use.
585 1.1 dyoung */
586 1.1 dyoung ;
587 1.1 dyoung }
588 1.1 dyoung }
589 1.1 dyoung } else {
590 1.1 dyoung fr = fat->hash[hi];
591 1.1 dyoung
592 1.1 dyoung do {
593 1.1 dyoung db_trace(KTR_VTW
594 1.1 dyoung , (fr
595 1.1 dyoung , "fat:*del inuse %5.5x"
596 1.1 dyoung " nxt %x"
597 1.1 dyoung , fr->inuse, fr->nxt));
598 1.1 dyoung
599 1.1 dyoung fr = fatp_next(fat, fq = fr);
600 1.1 dyoung } while (fr && fr != fp);
601 1.1 dyoung
602 1.1 dyoung KASSERT(0 && "oops");
603 1.1 dyoung }
604 1.1 dyoung }
605 1.1 dyoung vtw->key ^= ~0;
606 1.1 dyoung }
607 1.1 dyoung
608 1.1 dyoung if (fat->vtw->is_v4) {
609 1.1 dyoung tag = v4_port_tag(v4->lport);
610 1.1 dyoung } else if (fat->vtw->is_v6) {
611 1.1 dyoung tag = v6_port_tag(v6->lport);
612 1.1 dyoung }
613 1.1 dyoung
614 1.1 dyoung /* Remove from fat->port[]
615 1.1 dyoung */
616 1.1 dyoung key = vtw->port_key;
617 1.1 dyoung slot = fatp_slot_from_key(fat, key);
618 1.1 dyoung fp = fatp_from_key(fat, key);
619 1.1 dyoung idx = vtw_index(ctl, vtw);
620 1.1 dyoung
621 1.1 dyoung db_trace(KTR_VTW
622 1.1 dyoung , (fp, "fatport: del inuse %5.5x"
623 1.1 dyoung " slot %x idx %x key %x tag %x"
624 1.1 dyoung , fp->inuse, slot, idx, key, tag));
625 1.1 dyoung
626 1.1 dyoung KASSERT(fp->inuse & (1 << slot));
627 1.1 dyoung KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
628 1.1 dyoung ^ fatp_xtra[slot]));
629 1.1 dyoung
630 1.1 dyoung if ((fp->inuse & (1 << slot))
631 1.1 dyoung && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
632 1.1 dyoung ^ fatp_xtra[slot])) {
633 1.1 dyoung fp->inuse ^= 1 << slot;
634 1.1 dyoung fp->tag[slot] = 0;
635 1.1 dyoung
636 1.1 dyoung if (!fp->inuse) {
637 1.1 dyoung uint32_t hi = tag & fat->mask;
638 1.1 dyoung fatp_t *fq = 0;
639 1.1 dyoung fatp_t *fr = fat->port[hi];
640 1.1 dyoung
641 1.1 dyoung while (fr && fr != fp) {
642 1.1 dyoung fr = fatp_next(fat, fq = fr);
643 1.1 dyoung }
644 1.1 dyoung
645 1.1 dyoung if (fr == fp) {
646 1.1 dyoung if (fq) {
647 1.1 dyoung fq->nxt = fp->nxt;
648 1.1 dyoung fp->nxt = 0;
649 1.1 dyoung fatp_free(fat, fp);
650 1.1 dyoung } else {
651 1.1 dyoung KASSERT(fat->port[hi] == fp);
652 1.1 dyoung
653 1.1 dyoung if (fp->nxt) {
654 1.1 dyoung fat->port[hi]
655 1.1 dyoung = fatp_next(fat, fp);
656 1.1 dyoung fp->nxt = 0;
657 1.1 dyoung fatp_free(fat, fp);
658 1.1 dyoung } else {
659 1.1 dyoung /* retain for next use.
660 1.1 dyoung */
661 1.1 dyoung ;
662 1.1 dyoung }
663 1.1 dyoung }
664 1.1 dyoung }
665 1.1 dyoung }
666 1.1 dyoung vtw->port_key ^= ~0;
667 1.1 dyoung }
668 1.1 dyoung
669 1.1 dyoung vtw->hashed = 0;
670 1.1 dyoung }
671 1.1 dyoung
672 1.1 dyoung /*!\brief remove entry from hash, possibly free.
673 1.1 dyoung */
674 1.1 dyoung void
675 1.1 dyoung vtw_del(vtw_ctl_t *ctl, vtw_t *vtw)
676 1.1 dyoung {
677 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
678 1.1 dyoung
679 1.1 dyoung if (vtw->hashed) {
680 1.1 dyoung ++vtw_stats.del;
681 1.1 dyoung vtw_unhash(ctl, vtw);
682 1.1 dyoung }
683 1.1 dyoung
684 1.1 dyoung /* We only delete the oldest entry.
685 1.1 dyoung */
686 1.1 dyoung if (vtw != ctl->oldest.v)
687 1.1 dyoung return;
688 1.1 dyoung
689 1.1 dyoung --ctl->nalloc;
690 1.1 dyoung ++ctl->nfree;
691 1.1 dyoung
692 1.1 dyoung vtw->expire.tv_sec = 0;
693 1.1 dyoung vtw->expire.tv_usec = ~0;
694 1.1 dyoung
695 1.1 dyoung if (!ctl->nalloc)
696 1.1 dyoung ctl->oldest.v = 0;
697 1.1 dyoung
698 1.1 dyoung ctl->oldest.v = vtw_next(ctl, vtw);
699 1.1 dyoung }
700 1.1 dyoung
701 1.4 dholland /*!\brief insert vestigial timewait in hash chain
702 1.1 dyoung */
703 1.1 dyoung static void
704 1.1 dyoung vtw_inshash_v4(vtw_ctl_t *ctl, vtw_t *vtw)
705 1.1 dyoung {
706 1.1 dyoung uint32_t idx = vtw_index(ctl, vtw);
707 1.1 dyoung uint32_t tag;
708 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
709 1.1 dyoung
710 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
711 1.1 dyoung KASSERT(!vtw->hashed);
712 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
713 1.1 dyoung
714 1.1 dyoung ++vtw_stats.ins;
715 1.1 dyoung
716 1.1 dyoung tag = v4_tag(v4->faddr, v4->fport,
717 1.1 dyoung v4->laddr, v4->lport);
718 1.1 dyoung
719 1.1 dyoung vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
720 1.1 dyoung
721 1.1 dyoung db_trace(KTR_VTW, (ctl
722 1.1 dyoung , "vtw: ins %8.8x:%4.4x %8.8x:%4.4x"
723 1.1 dyoung " tag %8.8x key %8.8x"
724 1.1 dyoung , v4->faddr, v4->fport
725 1.1 dyoung , v4->laddr, v4->lport
726 1.1 dyoung , tag
727 1.1 dyoung , vtw->key));
728 1.1 dyoung
729 1.1 dyoung tag = v4_port_tag(v4->lport);
730 1.1 dyoung vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
731 1.1 dyoung
732 1.1 dyoung db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
733 1.1 dyoung , v4->lport, v4->lport
734 1.1 dyoung , tag
735 1.1 dyoung , vtw->key));
736 1.1 dyoung
737 1.1 dyoung vtw->hashed = 1;
738 1.1 dyoung }
739 1.1 dyoung
740 1.4 dholland /*!\brief insert vestigial timewait in hash chain
741 1.1 dyoung */
742 1.1 dyoung static void
743 1.1 dyoung vtw_inshash_v6(vtw_ctl_t *ctl, vtw_t *vtw)
744 1.1 dyoung {
745 1.1 dyoung uint32_t idx = vtw_index(ctl, vtw);
746 1.1 dyoung uint32_t tag;
747 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
748 1.1 dyoung
749 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
750 1.1 dyoung KASSERT(!vtw->hashed);
751 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
752 1.1 dyoung
753 1.1 dyoung ++vtw_stats.ins;
754 1.1 dyoung
755 1.1 dyoung tag = v6_tag(&v6->faddr, v6->fport,
756 1.1 dyoung &v6->laddr, v6->lport);
757 1.1 dyoung
758 1.1 dyoung vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
759 1.1 dyoung
760 1.1 dyoung tag = v6_port_tag(v6->lport);
761 1.1 dyoung vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
762 1.1 dyoung
763 1.1 dyoung db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
764 1.1 dyoung , v6->lport, v6->lport
765 1.1 dyoung , tag
766 1.1 dyoung , vtw->key));
767 1.1 dyoung
768 1.1 dyoung vtw->hashed = 1;
769 1.1 dyoung }
770 1.1 dyoung
771 1.1 dyoung static vtw_t *
772 1.1 dyoung vtw_lookup_hash_v4(vtw_ctl_t *ctl, uint32_t faddr, uint16_t fport
773 1.1 dyoung , uint32_t laddr, uint16_t lport
774 1.1 dyoung , int which)
775 1.1 dyoung {
776 1.1 dyoung vtw_v4_t *v4;
777 1.1 dyoung vtw_t *vtw;
778 1.1 dyoung uint32_t tag;
779 1.1 dyoung fatp_t *fp;
780 1.1 dyoung int i;
781 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
782 1.1 dyoung
783 1.1 dyoung if (!ctl || !ctl->fat)
784 1.1 dyoung return 0;
785 1.1 dyoung
786 1.1 dyoung ++vtw_stats.look[which];
787 1.1 dyoung
788 1.1 dyoung if (which) {
789 1.1 dyoung tag = v4_port_tag(lport);
790 1.1 dyoung fp = ctl->fat->port[tag & ctl->fat->mask];
791 1.1 dyoung } else {
792 1.1 dyoung tag = v4_tag(faddr, fport, laddr, lport);
793 1.1 dyoung fp = ctl->fat->hash[tag & ctl->fat->mask];
794 1.1 dyoung }
795 1.1 dyoung
796 1.1 dyoung while (fp && fp->inuse) {
797 1.1 dyoung uint32_t inuse = fp->inuse;
798 1.1 dyoung
799 1.1 dyoung ++fatps;
800 1.1 dyoung
801 1.1 dyoung for (i = 0; inuse && i < fatp_ntags(); ++i) {
802 1.1 dyoung uint32_t idx;
803 1.1 dyoung
804 1.1 dyoung if (!(inuse & (1 << i)))
805 1.1 dyoung continue;
806 1.1 dyoung
807 1.1 dyoung inuse ^= 1 << i;
808 1.1 dyoung
809 1.1 dyoung ++probes;
810 1.1 dyoung ++vtw_stats.probe[which];
811 1.1 dyoung
812 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
813 1.1 dyoung vtw = vtw_from_index(ctl, idx);
814 1.1 dyoung
815 1.1 dyoung if (!vtw) {
816 1.1 dyoung /* Hopefully fast path.
817 1.1 dyoung */
818 1.1 dyoung db_trace(KTR_VTW
819 1.1 dyoung , (fp, "vtw: fast %A:%P %A:%P"
820 1.1 dyoung " idx %x tag %x"
821 1.1 dyoung , faddr, fport
822 1.1 dyoung , laddr, lport
823 1.1 dyoung , idx, tag));
824 1.1 dyoung continue;
825 1.1 dyoung }
826 1.1 dyoung
827 1.1 dyoung v4 = (void*)vtw;
828 1.1 dyoung
829 1.1 dyoung /* The de-referencing of vtw is what we want to avoid.
830 1.1 dyoung * Losing.
831 1.1 dyoung */
832 1.1 dyoung if (vtw_alive(vtw)
833 1.1 dyoung && ((which ? vtw->port_key : vtw->key)
834 1.1 dyoung == fatp_key(ctl->fat, fp, i))
835 1.1 dyoung && (which
836 1.1 dyoung || (v4->faddr == faddr && v4->laddr == laddr
837 1.1 dyoung && v4->fport == fport))
838 1.1 dyoung && v4->lport == lport) {
839 1.1 dyoung ++vtw_stats.hit[which];
840 1.1 dyoung
841 1.1 dyoung db_trace(KTR_VTW
842 1.1 dyoung , (fp, "vtw: hit %8.8x:%4.4x"
843 1.1 dyoung " %8.8x:%4.4x idx %x key %x"
844 1.1 dyoung , faddr, fport
845 1.1 dyoung , laddr, lport
846 1.1 dyoung , idx_decode(ctl, idx), vtw->key));
847 1.1 dyoung
848 1.1 dyoung KASSERT(vtw->hashed);
849 1.1 dyoung
850 1.1 dyoung goto out;
851 1.1 dyoung }
852 1.1 dyoung ++vtw_stats.losing[which];
853 1.1 dyoung ++losings;
854 1.1 dyoung
855 1.1 dyoung if (vtw_alive(vtw)) {
856 1.1 dyoung db_trace(KTR_VTW
857 1.1 dyoung , (fp, "vtw:!mis %8.8x:%4.4x"
858 1.1 dyoung " %8.8x:%4.4x key %x tag %x"
859 1.1 dyoung , faddr, fport
860 1.1 dyoung , laddr, lport
861 1.1 dyoung , fatp_key(ctl->fat, fp, i)
862 1.1 dyoung , v4_tag(faddr, fport
863 1.1 dyoung , laddr, lport)));
864 1.1 dyoung db_trace(KTR_VTW
865 1.1 dyoung , (vtw, "vtw:!mis %8.8x:%4.4x"
866 1.1 dyoung " %8.8x:%4.4x key %x tag %x"
867 1.1 dyoung , v4->faddr, v4->fport
868 1.1 dyoung , v4->laddr, v4->lport
869 1.1 dyoung , vtw->key
870 1.1 dyoung , v4_tag(v4->faddr, v4->fport
871 1.1 dyoung , v4->laddr, v4->lport)));
872 1.1 dyoung
873 1.1 dyoung if (vtw->key == fatp_key(ctl->fat, fp, i)) {
874 1.1 dyoung db_trace(KTR_VTW
875 1.1 dyoung , (vtw, "vtw:!mis %8.8x:%4.4x"
876 1.1 dyoung " %8.8x:%4.4x key %x"
877 1.1 dyoung " which %x"
878 1.1 dyoung , v4->faddr, v4->fport
879 1.1 dyoung , v4->laddr, v4->lport
880 1.1 dyoung , vtw->key
881 1.1 dyoung , which));
882 1.1 dyoung
883 1.1 dyoung } else {
884 1.1 dyoung db_trace(KTR_VTW
885 1.1 dyoung , (vtw
886 1.1 dyoung , "vtw:!mis"
887 1.1 dyoung " key %8.8x != %8.8x"
888 1.1 dyoung " idx %x i %x which %x"
889 1.1 dyoung , vtw->key
890 1.1 dyoung , fatp_key(ctl->fat, fp, i)
891 1.1 dyoung , idx_decode(ctl, idx)
892 1.1 dyoung , i
893 1.1 dyoung , which));
894 1.1 dyoung }
895 1.1 dyoung } else {
896 1.1 dyoung db_trace(KTR_VTW
897 1.1 dyoung , (fp
898 1.1 dyoung , "vtw:!mis free entry"
899 1.1 dyoung " idx %x vtw %p which %x"
900 1.1 dyoung , idx_decode(ctl, idx)
901 1.1 dyoung , vtw, which));
902 1.1 dyoung }
903 1.1 dyoung }
904 1.1 dyoung
905 1.1 dyoung if (fp->nxt) {
906 1.1 dyoung fp = fatp_next(ctl->fat, fp);
907 1.1 dyoung } else {
908 1.1 dyoung break;
909 1.1 dyoung }
910 1.1 dyoung }
911 1.1 dyoung ++vtw_stats.miss[which];
912 1.1 dyoung vtw = 0;
913 1.1 dyoung out:
914 1.1 dyoung if (fatps > vtw_stats.max_chain[which])
915 1.1 dyoung vtw_stats.max_chain[which] = fatps;
916 1.1 dyoung if (probes > vtw_stats.max_probe[which])
917 1.1 dyoung vtw_stats.max_probe[which] = probes;
918 1.1 dyoung if (losings > vtw_stats.max_loss[which])
919 1.1 dyoung vtw_stats.max_loss[which] = losings;
920 1.1 dyoung
921 1.1 dyoung return vtw;
922 1.1 dyoung }
923 1.1 dyoung
924 1.1 dyoung static vtw_t *
925 1.1 dyoung vtw_lookup_hash_v6(vtw_ctl_t *ctl, const struct in6_addr *faddr, uint16_t fport
926 1.1 dyoung , const struct in6_addr *laddr, uint16_t lport
927 1.1 dyoung , int which)
928 1.1 dyoung {
929 1.1 dyoung vtw_v6_t *v6;
930 1.1 dyoung vtw_t *vtw;
931 1.1 dyoung uint32_t tag;
932 1.1 dyoung fatp_t *fp;
933 1.1 dyoung int i;
934 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
935 1.1 dyoung
936 1.1 dyoung ++vtw_stats.look[which];
937 1.1 dyoung
938 1.1 dyoung if (!ctl || !ctl->fat)
939 1.1 dyoung return 0;
940 1.1 dyoung
941 1.1 dyoung if (which) {
942 1.1 dyoung tag = v6_port_tag(lport);
943 1.1 dyoung fp = ctl->fat->port[tag & ctl->fat->mask];
944 1.1 dyoung } else {
945 1.1 dyoung tag = v6_tag(faddr, fport, laddr, lport);
946 1.1 dyoung fp = ctl->fat->hash[tag & ctl->fat->mask];
947 1.1 dyoung }
948 1.1 dyoung
949 1.1 dyoung while (fp && fp->inuse) {
950 1.1 dyoung uint32_t inuse = fp->inuse;
951 1.1 dyoung
952 1.1 dyoung ++fatps;
953 1.1 dyoung
954 1.1 dyoung for (i = 0; inuse && i < fatp_ntags(); ++i) {
955 1.1 dyoung uint32_t idx;
956 1.1 dyoung
957 1.1 dyoung if (!(inuse & (1 << i)))
958 1.1 dyoung continue;
959 1.1 dyoung
960 1.1 dyoung inuse ^= 1 << i;
961 1.1 dyoung
962 1.1 dyoung ++probes;
963 1.1 dyoung ++vtw_stats.probe[which];
964 1.1 dyoung
965 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
966 1.1 dyoung vtw = vtw_from_index(ctl, idx);
967 1.1 dyoung
968 1.1 dyoung db_trace(KTR_VTW
969 1.1 dyoung , (fp, "probe: %2d %6A:%4.4x %6A:%4.4x idx %x"
970 1.1 dyoung , i
971 1.1 dyoung , db_store(faddr, sizeof (*faddr)), fport
972 1.1 dyoung , db_store(laddr, sizeof (*laddr)), lport
973 1.1 dyoung , idx_decode(ctl, idx)));
974 1.1 dyoung
975 1.1 dyoung if (!vtw) {
976 1.1 dyoung /* Hopefully fast path.
977 1.1 dyoung */
978 1.1 dyoung continue;
979 1.1 dyoung }
980 1.1 dyoung
981 1.1 dyoung v6 = (void*)vtw;
982 1.1 dyoung
983 1.1 dyoung if (vtw_alive(vtw)
984 1.1 dyoung && ((which ? vtw->port_key : vtw->key)
985 1.1 dyoung == fatp_key(ctl->fat, fp, i))
986 1.1 dyoung && v6->lport == lport
987 1.1 dyoung && (which
988 1.1 dyoung || (v6->fport == fport
989 1.1 dyoung && !bcmp(&v6->faddr, faddr, sizeof (*faddr))
990 1.1 dyoung && !bcmp(&v6->laddr, laddr
991 1.1 dyoung , sizeof (*laddr))))) {
992 1.1 dyoung ++vtw_stats.hit[which];
993 1.1 dyoung
994 1.1 dyoung KASSERT(vtw->hashed);
995 1.1 dyoung goto out;
996 1.1 dyoung } else {
997 1.1 dyoung ++vtw_stats.losing[which];
998 1.1 dyoung ++losings;
999 1.1 dyoung }
1000 1.1 dyoung }
1001 1.1 dyoung
1002 1.1 dyoung if (fp->nxt) {
1003 1.1 dyoung fp = fatp_next(ctl->fat, fp);
1004 1.1 dyoung } else {
1005 1.1 dyoung break;
1006 1.1 dyoung }
1007 1.1 dyoung }
1008 1.1 dyoung ++vtw_stats.miss[which];
1009 1.1 dyoung vtw = 0;
1010 1.1 dyoung out:
1011 1.1 dyoung if (fatps > vtw_stats.max_chain[which])
1012 1.1 dyoung vtw_stats.max_chain[which] = fatps;
1013 1.1 dyoung if (probes > vtw_stats.max_probe[which])
1014 1.1 dyoung vtw_stats.max_probe[which] = probes;
1015 1.1 dyoung if (losings > vtw_stats.max_loss[which])
1016 1.1 dyoung vtw_stats.max_loss[which] = losings;
1017 1.1 dyoung
1018 1.1 dyoung return vtw;
1019 1.1 dyoung }
1020 1.1 dyoung
1021 1.1 dyoung /*!\brief port iterator
1022 1.1 dyoung */
1023 1.1 dyoung static vtw_t *
1024 1.1 dyoung vtw_next_port_v4(struct tcp_ports_iterator *it)
1025 1.1 dyoung {
1026 1.1 dyoung vtw_ctl_t *ctl = it->ctl;
1027 1.1 dyoung vtw_v4_t *v4;
1028 1.1 dyoung vtw_t *vtw;
1029 1.1 dyoung uint32_t tag;
1030 1.1 dyoung uint16_t lport = it->port;
1031 1.1 dyoung fatp_t *fp;
1032 1.1 dyoung int i;
1033 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
1034 1.1 dyoung
1035 1.1 dyoung tag = v4_port_tag(lport);
1036 1.1 dyoung if (!it->fp) {
1037 1.1 dyoung it->fp = ctl->fat->port[tag & ctl->fat->mask];
1038 1.1 dyoung it->slot_idx = 0;
1039 1.1 dyoung }
1040 1.1 dyoung fp = it->fp;
1041 1.1 dyoung
1042 1.1 dyoung while (fp) {
1043 1.1 dyoung uint32_t inuse = fp->inuse;
1044 1.1 dyoung
1045 1.1 dyoung ++fatps;
1046 1.1 dyoung
1047 1.1 dyoung for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
1048 1.1 dyoung uint32_t idx;
1049 1.1 dyoung
1050 1.1 dyoung if (!(inuse & (1 << i)))
1051 1.1 dyoung continue;
1052 1.1 dyoung
1053 1.16 martin inuse &= ~0U << i;
1054 1.1 dyoung
1055 1.1 dyoung if (i < it->slot_idx)
1056 1.1 dyoung continue;
1057 1.1 dyoung
1058 1.1 dyoung ++vtw_stats.probe[1];
1059 1.1 dyoung ++probes;
1060 1.1 dyoung
1061 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
1062 1.1 dyoung vtw = vtw_from_index(ctl, idx);
1063 1.1 dyoung
1064 1.1 dyoung if (!vtw) {
1065 1.1 dyoung /* Hopefully fast path.
1066 1.1 dyoung */
1067 1.1 dyoung continue;
1068 1.1 dyoung }
1069 1.1 dyoung
1070 1.1 dyoung v4 = (void*)vtw;
1071 1.1 dyoung
1072 1.1 dyoung if (vtw_alive(vtw)
1073 1.1 dyoung && vtw->port_key == fatp_key(ctl->fat, fp, i)
1074 1.1 dyoung && v4->lport == lport) {
1075 1.1 dyoung ++vtw_stats.hit[1];
1076 1.1 dyoung
1077 1.1 dyoung it->slot_idx = i + 1;
1078 1.1 dyoung
1079 1.1 dyoung goto out;
1080 1.1 dyoung } else if (vtw_alive(vtw)) {
1081 1.1 dyoung ++vtw_stats.losing[1];
1082 1.1 dyoung ++losings;
1083 1.1 dyoung
1084 1.1 dyoung db_trace(KTR_VTW
1085 1.1 dyoung , (vtw, "vtw:!mis"
1086 1.1 dyoung " port %8.8x:%4.4x %8.8x:%4.4x"
1087 1.1 dyoung " key %x port %x"
1088 1.1 dyoung , v4->faddr, v4->fport
1089 1.1 dyoung , v4->laddr, v4->lport
1090 1.1 dyoung , vtw->key
1091 1.1 dyoung , lport));
1092 1.1 dyoung } else {
1093 1.1 dyoung /* Really losing here. We are coming
1094 1.1 dyoung * up with references to free entries.
1095 1.1 dyoung * Might find it better to use
1096 1.1 dyoung * traditional, or need another
1097 1.1 dyoung * add-hockery. The other add-hockery
1098 1.1 dyoung * would be to pul more into into the
1099 1.1 dyoung * cache line to reject the false
1100 1.1 dyoung * hits.
1101 1.1 dyoung */
1102 1.1 dyoung ++vtw_stats.losing[1];
1103 1.1 dyoung ++losings;
1104 1.1 dyoung db_trace(KTR_VTW
1105 1.1 dyoung , (fp, "vtw:!mis port %x"
1106 1.1 dyoung " - free entry idx %x vtw %p"
1107 1.1 dyoung , lport
1108 1.1 dyoung , idx_decode(ctl, idx)
1109 1.1 dyoung , vtw));
1110 1.1 dyoung }
1111 1.1 dyoung }
1112 1.1 dyoung
1113 1.1 dyoung if (fp->nxt) {
1114 1.1 dyoung it->fp = fp = fatp_next(ctl->fat, fp);
1115 1.1 dyoung it->slot_idx = 0;
1116 1.1 dyoung } else {
1117 1.1 dyoung it->fp = 0;
1118 1.1 dyoung break;
1119 1.1 dyoung }
1120 1.1 dyoung }
1121 1.1 dyoung ++vtw_stats.miss[1];
1122 1.1 dyoung
1123 1.1 dyoung vtw = 0;
1124 1.1 dyoung out:
1125 1.1 dyoung if (fatps > vtw_stats.max_chain[1])
1126 1.1 dyoung vtw_stats.max_chain[1] = fatps;
1127 1.1 dyoung if (probes > vtw_stats.max_probe[1])
1128 1.1 dyoung vtw_stats.max_probe[1] = probes;
1129 1.1 dyoung if (losings > vtw_stats.max_loss[1])
1130 1.1 dyoung vtw_stats.max_loss[1] = losings;
1131 1.1 dyoung
1132 1.1 dyoung return vtw;
1133 1.1 dyoung }
1134 1.1 dyoung
1135 1.1 dyoung /*!\brief port iterator
1136 1.1 dyoung */
1137 1.1 dyoung static vtw_t *
1138 1.1 dyoung vtw_next_port_v6(struct tcp_ports_iterator *it)
1139 1.1 dyoung {
1140 1.1 dyoung vtw_ctl_t *ctl = it->ctl;
1141 1.1 dyoung vtw_v6_t *v6;
1142 1.1 dyoung vtw_t *vtw;
1143 1.1 dyoung uint32_t tag;
1144 1.1 dyoung uint16_t lport = it->port;
1145 1.1 dyoung fatp_t *fp;
1146 1.1 dyoung int i;
1147 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
1148 1.1 dyoung
1149 1.1 dyoung tag = v6_port_tag(lport);
1150 1.1 dyoung if (!it->fp) {
1151 1.1 dyoung it->fp = ctl->fat->port[tag & ctl->fat->mask];
1152 1.1 dyoung it->slot_idx = 0;
1153 1.1 dyoung }
1154 1.1 dyoung fp = it->fp;
1155 1.1 dyoung
1156 1.1 dyoung while (fp) {
1157 1.1 dyoung uint32_t inuse = fp->inuse;
1158 1.1 dyoung
1159 1.1 dyoung ++fatps;
1160 1.1 dyoung
1161 1.1 dyoung for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
1162 1.1 dyoung uint32_t idx;
1163 1.1 dyoung
1164 1.1 dyoung if (!(inuse & (1 << i)))
1165 1.1 dyoung continue;
1166 1.1 dyoung
1167 1.16 martin inuse &= ~0U << i;
1168 1.1 dyoung
1169 1.1 dyoung if (i < it->slot_idx)
1170 1.1 dyoung continue;
1171 1.1 dyoung
1172 1.1 dyoung ++vtw_stats.probe[1];
1173 1.1 dyoung ++probes;
1174 1.1 dyoung
1175 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
1176 1.1 dyoung vtw = vtw_from_index(ctl, idx);
1177 1.1 dyoung
1178 1.1 dyoung if (!vtw) {
1179 1.1 dyoung /* Hopefully fast path.
1180 1.1 dyoung */
1181 1.1 dyoung continue;
1182 1.1 dyoung }
1183 1.1 dyoung
1184 1.1 dyoung v6 = (void*)vtw;
1185 1.1 dyoung
1186 1.1 dyoung db_trace(KTR_VTW
1187 1.1 dyoung , (vtw, "vtw: i %x idx %x fp->tag %x"
1188 1.1 dyoung " tag %x xtra %x"
1189 1.1 dyoung , i, idx_decode(ctl, idx)
1190 1.1 dyoung , fp->tag[i], tag, fatp_xtra[i]));
1191 1.1 dyoung
1192 1.1 dyoung if (vtw_alive(vtw)
1193 1.1 dyoung && vtw->port_key == fatp_key(ctl->fat, fp, i)
1194 1.1 dyoung && v6->lport == lport) {
1195 1.1 dyoung ++vtw_stats.hit[1];
1196 1.1 dyoung
1197 1.1 dyoung db_trace(KTR_VTW
1198 1.1 dyoung , (fp, "vtw: nxt port %P - %4.4x"
1199 1.1 dyoung " idx %x key %x"
1200 1.1 dyoung , lport, lport
1201 1.1 dyoung , idx_decode(ctl, idx), vtw->key));
1202 1.1 dyoung
1203 1.1 dyoung it->slot_idx = i + 1;
1204 1.1 dyoung goto out;
1205 1.1 dyoung } else if (vtw_alive(vtw)) {
1206 1.1 dyoung ++vtw_stats.losing[1];
1207 1.1 dyoung
1208 1.1 dyoung db_trace(KTR_VTW
1209 1.1 dyoung , (vtw, "vtw:!mis port %6A:%4.4x"
1210 1.1 dyoung " %6A:%4.4x key %x port %x"
1211 1.1 dyoung , db_store(&v6->faddr
1212 1.1 dyoung , sizeof (v6->faddr))
1213 1.1 dyoung , v6->fport
1214 1.1 dyoung , db_store(&v6->laddr
1215 1.1 dyoung , sizeof (v6->faddr))
1216 1.1 dyoung , v6->lport
1217 1.1 dyoung , vtw->key
1218 1.1 dyoung , lport));
1219 1.1 dyoung } else {
1220 1.1 dyoung /* Really losing here. We are coming
1221 1.1 dyoung * up with references to free entries.
1222 1.1 dyoung * Might find it better to use
1223 1.1 dyoung * traditional, or need another
1224 1.1 dyoung * add-hockery. The other add-hockery
1225 1.1 dyoung * would be to pul more into into the
1226 1.1 dyoung * cache line to reject the false
1227 1.1 dyoung * hits.
1228 1.1 dyoung */
1229 1.1 dyoung ++vtw_stats.losing[1];
1230 1.1 dyoung ++losings;
1231 1.1 dyoung
1232 1.1 dyoung db_trace(KTR_VTW
1233 1.1 dyoung , (fp
1234 1.1 dyoung , "vtw:!mis port %x"
1235 1.1 dyoung " - free entry idx %x vtw %p"
1236 1.1 dyoung , lport, idx_decode(ctl, idx)
1237 1.1 dyoung , vtw));
1238 1.1 dyoung }
1239 1.1 dyoung }
1240 1.1 dyoung
1241 1.1 dyoung if (fp->nxt) {
1242 1.1 dyoung it->fp = fp = fatp_next(ctl->fat, fp);
1243 1.1 dyoung it->slot_idx = 0;
1244 1.1 dyoung } else {
1245 1.1 dyoung it->fp = 0;
1246 1.1 dyoung break;
1247 1.1 dyoung }
1248 1.1 dyoung }
1249 1.1 dyoung ++vtw_stats.miss[1];
1250 1.1 dyoung
1251 1.1 dyoung vtw = 0;
1252 1.1 dyoung out:
1253 1.1 dyoung if (fatps > vtw_stats.max_chain[1])
1254 1.1 dyoung vtw_stats.max_chain[1] = fatps;
1255 1.1 dyoung if (probes > vtw_stats.max_probe[1])
1256 1.1 dyoung vtw_stats.max_probe[1] = probes;
1257 1.1 dyoung if (losings > vtw_stats.max_loss[1])
1258 1.1 dyoung vtw_stats.max_loss[1] = losings;
1259 1.1 dyoung
1260 1.1 dyoung return vtw;
1261 1.1 dyoung }
1262 1.1 dyoung
1263 1.1 dyoung /*!\brief initialise the VTW allocation arena
1264 1.1 dyoung *
1265 1.1 dyoung * There are 1+3 allocation classes:
1266 1.1 dyoung * 0 classless
1267 1.1 dyoung * {1,2,3} MSL-class based allocation
1268 1.1 dyoung *
1269 1.1 dyoung * The allocation arenas are all initialised. Classless gets all the
1270 1.1 dyoung * space. MSL-class based divides the arena, so that allocation
1271 1.1 dyoung * within a class can proceed without having to consider entries
1272 1.1 dyoung * (aka: cache lines) from different classes.
1273 1.1 dyoung *
1274 1.1 dyoung * Usually, we are completely classless or class-based, but there can be
1275 1.1 dyoung * transition periods, corresponding to dynamic adjustments in the config
1276 1.1 dyoung * by the operator.
1277 1.1 dyoung */
1278 1.1 dyoung static void
1279 1.6 dyoung vtw_init(fatp_ctl_t *fat, vtw_ctl_t *ctl, const uint32_t n, vtw_t *ctl_base_v)
1280 1.1 dyoung {
1281 1.6 dyoung int class_n, i;
1282 1.6 dyoung vtw_t *base;
1283 1.1 dyoung
1284 1.6 dyoung ctl->base.v = ctl_base_v;
1285 1.1 dyoung
1286 1.6 dyoung if (ctl->is_v4) {
1287 1.6 dyoung ctl->lim.v4 = ctl->base.v4 + n - 1;
1288 1.6 dyoung ctl->alloc.v4 = ctl->base.v4;
1289 1.6 dyoung } else {
1290 1.6 dyoung ctl->lim.v6 = ctl->base.v6 + n - 1;
1291 1.6 dyoung ctl->alloc.v6 = ctl->base.v6;
1292 1.6 dyoung }
1293 1.1 dyoung
1294 1.6 dyoung ctl->nfree = n;
1295 1.6 dyoung ctl->ctl = ctl;
1296 1.1 dyoung
1297 1.6 dyoung ctl->idx_bits = 32;
1298 1.6 dyoung for (ctl->idx_mask = ~0; (ctl->idx_mask & (n-1)) == n-1; ) {
1299 1.6 dyoung ctl->idx_mask >>= 1;
1300 1.6 dyoung ctl->idx_bits -= 1;
1301 1.6 dyoung }
1302 1.1 dyoung
1303 1.6 dyoung ctl->idx_mask <<= 1;
1304 1.6 dyoung ctl->idx_mask |= 1;
1305 1.6 dyoung ctl->idx_bits += 1;
1306 1.1 dyoung
1307 1.6 dyoung ctl->fat = fat;
1308 1.6 dyoung fat->vtw = ctl;
1309 1.1 dyoung
1310 1.6 dyoung /* Divide the resources equally amongst the classes.
1311 1.6 dyoung * This is not optimal, as the different classes
1312 1.6 dyoung * arrive and leave at different rates, but it is
1313 1.6 dyoung * the best I can do for now.
1314 1.6 dyoung */
1315 1.6 dyoung class_n = n / (VTW_NCLASS-1);
1316 1.6 dyoung base = ctl->base.v;
1317 1.1 dyoung
1318 1.6 dyoung for (i = 1; i < VTW_NCLASS; ++i) {
1319 1.6 dyoung int j;
1320 1.1 dyoung
1321 1.6 dyoung ctl[i] = ctl[0];
1322 1.6 dyoung ctl[i].clidx = i;
1323 1.1 dyoung
1324 1.6 dyoung ctl[i].base.v = base;
1325 1.6 dyoung ctl[i].alloc = ctl[i].base;
1326 1.1 dyoung
1327 1.6 dyoung for (j = 0; j < class_n - 1; ++j) {
1328 1.6 dyoung if (tcp_msl_enable)
1329 1.6 dyoung base->msl_class = i;
1330 1.1 dyoung base = vtw_next(ctl, base);
1331 1.1 dyoung }
1332 1.6 dyoung
1333 1.6 dyoung ctl[i].lim.v = base;
1334 1.6 dyoung base = vtw_next(ctl, base);
1335 1.6 dyoung ctl[i].nfree = class_n;
1336 1.1 dyoung }
1337 1.1 dyoung
1338 1.1 dyoung vtw_debug_init();
1339 1.1 dyoung }
1340 1.1 dyoung
1341 1.1 dyoung /*!\brief map class to TCP MSL
1342 1.1 dyoung */
1343 1.1 dyoung static inline uint32_t
1344 1.11 matt class_to_msl(int msl_class)
1345 1.1 dyoung {
1346 1.11 matt switch (msl_class) {
1347 1.1 dyoung case 0:
1348 1.1 dyoung case 1:
1349 1.1 dyoung return tcp_msl_remote ? tcp_msl_remote : (TCPTV_MSL >> 0);
1350 1.1 dyoung case 2:
1351 1.1 dyoung return tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1);
1352 1.1 dyoung default:
1353 1.1 dyoung return tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2);
1354 1.1 dyoung }
1355 1.1 dyoung }
1356 1.1 dyoung
1357 1.1 dyoung /*!\brief map TCP MSL to class
1358 1.1 dyoung */
1359 1.1 dyoung static inline uint32_t
1360 1.1 dyoung msl_to_class(int msl)
1361 1.1 dyoung {
1362 1.1 dyoung if (tcp_msl_enable) {
1363 1.1 dyoung if (msl <= (tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2)))
1364 1.1 dyoung return 1+2;
1365 1.1 dyoung if (msl <= (tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1)))
1366 1.1 dyoung return 1+1;
1367 1.1 dyoung return 1;
1368 1.1 dyoung }
1369 1.1 dyoung return 0;
1370 1.1 dyoung }
1371 1.1 dyoung
1372 1.1 dyoung /*!\brief allocate a vtw entry
1373 1.1 dyoung */
1374 1.1 dyoung static inline vtw_t *
1375 1.1 dyoung vtw_alloc(vtw_ctl_t *ctl)
1376 1.1 dyoung {
1377 1.1 dyoung vtw_t *vtw = 0;
1378 1.1 dyoung int stuck = 0;
1379 1.1 dyoung int avail = ctl ? (ctl->nalloc + ctl->nfree) : 0;
1380 1.1 dyoung int msl;
1381 1.1 dyoung
1382 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
1383 1.1 dyoung
1384 1.1 dyoung /* If no resources, we will not get far.
1385 1.1 dyoung */
1386 1.1 dyoung if (!ctl || !ctl->base.v4 || avail <= 0)
1387 1.1 dyoung return 0;
1388 1.1 dyoung
1389 1.1 dyoung /* Obtain a free one.
1390 1.1 dyoung */
1391 1.1 dyoung while (!ctl->nfree) {
1392 1.1 dyoung vtw_age(ctl, 0);
1393 1.1 dyoung
1394 1.1 dyoung if (++stuck > avail) {
1395 1.1 dyoung /* When in transition between
1396 1.1 dyoung * schemes (classless, classed) we
1397 1.1 dyoung * can be stuck having to await the
1398 1.1 dyoung * expiration of cross-allocated entries.
1399 1.1 dyoung *
1400 1.1 dyoung * Returning zero means we will fall back to the
1401 1.1 dyoung * traditional TIME_WAIT handling, except in the
1402 1.1 dyoung * case of a re-shed, in which case we cannot
1403 1.1 dyoung * perform the reshecd, but will retain the extant
1404 1.1 dyoung * entry.
1405 1.1 dyoung */
1406 1.1 dyoung db_trace(KTR_VTW
1407 1.1 dyoung , (ctl, "vtw:!none free in class %x %x/%x"
1408 1.1 dyoung , ctl->clidx
1409 1.1 dyoung , ctl->nalloc, ctl->nfree));
1410 1.1 dyoung
1411 1.1 dyoung return 0;
1412 1.1 dyoung }
1413 1.1 dyoung }
1414 1.1 dyoung
1415 1.1 dyoung vtw = ctl->alloc.v;
1416 1.1 dyoung
1417 1.1 dyoung if (vtw->msl_class != ctl->clidx) {
1418 1.1 dyoung /* Usurping rules:
1419 1.1 dyoung * 0 -> {1,2,3} or {1,2,3} -> 0
1420 1.1 dyoung */
1421 1.1 dyoung KASSERT(!vtw->msl_class || !ctl->clidx);
1422 1.1 dyoung
1423 1.1 dyoung if (vtw->hashed || vtw->expire.tv_sec) {
1424 1.1 dyoung /* As this is owned by some other class,
1425 1.1 dyoung * we must wait for it to expire it.
1426 1.1 dyoung * This will only happen on class/classless
1427 1.1 dyoung * transitions, which are guaranteed to progress
1428 1.1 dyoung * to completion in small finite time, barring bugs.
1429 1.1 dyoung */
1430 1.1 dyoung db_trace(KTR_VTW
1431 1.1 dyoung , (ctl, "vtw:!%p class %x!=%x %x:%x%s"
1432 1.1 dyoung , vtw, vtw->msl_class, ctl->clidx
1433 1.1 dyoung , vtw->expire.tv_sec
1434 1.1 dyoung , vtw->expire.tv_usec
1435 1.1 dyoung , vtw->hashed ? " hashed" : ""));
1436 1.1 dyoung
1437 1.1 dyoung return 0;
1438 1.1 dyoung }
1439 1.1 dyoung
1440 1.1 dyoung db_trace(KTR_VTW
1441 1.1 dyoung , (ctl, "vtw:!%p usurped from %x to %x"
1442 1.1 dyoung , vtw, vtw->msl_class, ctl->clidx));
1443 1.1 dyoung
1444 1.1 dyoung vtw->msl_class = ctl->clidx;
1445 1.1 dyoung }
1446 1.1 dyoung
1447 1.1 dyoung if (vtw_alive(vtw)) {
1448 1.1 dyoung KASSERT(0 && "next free not free");
1449 1.1 dyoung return 0;
1450 1.1 dyoung }
1451 1.1 dyoung
1452 1.1 dyoung /* Advance allocation poiter.
1453 1.1 dyoung */
1454 1.1 dyoung ctl->alloc.v = vtw_next(ctl, vtw);
1455 1.1 dyoung
1456 1.1 dyoung --ctl->nfree;
1457 1.1 dyoung ++ctl->nalloc;
1458 1.1 dyoung
1459 1.1 dyoung msl = (2 * class_to_msl(ctl->clidx) * 1000) / PR_SLOWHZ; // msec
1460 1.1 dyoung
1461 1.1 dyoung /* mark expiration
1462 1.1 dyoung */
1463 1.3 drochner getmicrouptime(&vtw->expire);
1464 1.1 dyoung
1465 1.1 dyoung /* Move expiration into the future.
1466 1.1 dyoung */
1467 1.1 dyoung vtw->expire.tv_sec += msl / 1000;
1468 1.1 dyoung vtw->expire.tv_usec += 1000 * (msl % 1000);
1469 1.1 dyoung
1470 1.1 dyoung while (vtw->expire.tv_usec >= 1000*1000) {
1471 1.1 dyoung vtw->expire.tv_usec -= 1000*1000;
1472 1.1 dyoung vtw->expire.tv_sec += 1;
1473 1.1 dyoung }
1474 1.1 dyoung
1475 1.1 dyoung if (!ctl->oldest.v)
1476 1.1 dyoung ctl->oldest.v = vtw;
1477 1.1 dyoung
1478 1.1 dyoung return vtw;
1479 1.1 dyoung }
1480 1.1 dyoung
1481 1.1 dyoung /*!\brief expiration
1482 1.1 dyoung */
1483 1.1 dyoung static int
1484 1.1 dyoung vtw_age(vtw_ctl_t *ctl, struct timeval *_when)
1485 1.1 dyoung {
1486 1.1 dyoung vtw_t *vtw;
1487 1.1 dyoung struct timeval then, *when = _when;
1488 1.1 dyoung int maxtries = 0;
1489 1.1 dyoung
1490 1.1 dyoung if (!ctl->oldest.v) {
1491 1.1 dyoung KASSERT(!ctl->nalloc);
1492 1.1 dyoung return 0;
1493 1.1 dyoung }
1494 1.1 dyoung
1495 1.1 dyoung for (vtw = ctl->oldest.v; vtw && ctl->nalloc; ) {
1496 1.1 dyoung if (++maxtries > ctl->nalloc)
1497 1.1 dyoung break;
1498 1.1 dyoung
1499 1.1 dyoung if (vtw->msl_class != ctl->clidx) {
1500 1.1 dyoung db_trace(KTR_VTW
1501 1.1 dyoung , (vtw, "vtw:!age class mismatch %x != %x"
1502 1.1 dyoung , vtw->msl_class, ctl->clidx));
1503 1.1 dyoung /* XXXX
1504 1.1 dyoung * See if the appropriate action is to skip to the next.
1505 1.1 dyoung * XXXX
1506 1.1 dyoung */
1507 1.1 dyoung ctl->oldest.v = vtw = vtw_next(ctl, vtw);
1508 1.1 dyoung continue;
1509 1.1 dyoung }
1510 1.1 dyoung if (!when) {
1511 1.1 dyoung /* Latch oldest timeval if none specified.
1512 1.1 dyoung */
1513 1.1 dyoung then = vtw->expire;
1514 1.1 dyoung when = &then;
1515 1.1 dyoung }
1516 1.1 dyoung
1517 1.1 dyoung if (!timercmp(&vtw->expire, when, <=))
1518 1.1 dyoung break;
1519 1.1 dyoung
1520 1.1 dyoung db_trace(KTR_VTW
1521 1.1 dyoung , (vtw, "vtw: expire %x %8.8x:%8.8x %x/%x"
1522 1.1 dyoung , ctl->clidx
1523 1.1 dyoung , vtw->expire.tv_sec
1524 1.1 dyoung , vtw->expire.tv_usec
1525 1.1 dyoung , ctl->nalloc
1526 1.1 dyoung , ctl->nfree));
1527 1.1 dyoung
1528 1.1 dyoung if (!_when)
1529 1.1 dyoung ++vtw_stats.kill;
1530 1.1 dyoung
1531 1.1 dyoung vtw_del(ctl, vtw);
1532 1.1 dyoung vtw = ctl->oldest.v;
1533 1.1 dyoung }
1534 1.1 dyoung
1535 1.1 dyoung return ctl->nalloc; // # remaining allocated
1536 1.1 dyoung }
1537 1.1 dyoung
1538 1.1 dyoung static callout_t vtw_cs;
1539 1.1 dyoung
1540 1.1 dyoung /*!\brief notice the passage of time.
1541 1.1 dyoung * It seems to be getting faster. What happened to the year?
1542 1.1 dyoung */
1543 1.1 dyoung static void
1544 1.1 dyoung vtw_tick(void *arg)
1545 1.1 dyoung {
1546 1.1 dyoung struct timeval now;
1547 1.1 dyoung int i, cnt = 0;
1548 1.1 dyoung
1549 1.3 drochner getmicrouptime(&now);
1550 1.1 dyoung
1551 1.1 dyoung db_trace(KTR_VTW, (arg, "vtk: tick - now %8.8x:%8.8x"
1552 1.1 dyoung , now.tv_sec, now.tv_usec));
1553 1.1 dyoung
1554 1.1 dyoung mutex_enter(softnet_lock);
1555 1.1 dyoung
1556 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
1557 1.1 dyoung cnt += vtw_age(&vtw_tcpv4[i], &now);
1558 1.1 dyoung cnt += vtw_age(&vtw_tcpv6[i], &now);
1559 1.1 dyoung }
1560 1.1 dyoung
1561 1.1 dyoung /* Keep ticks coming while we need them.
1562 1.1 dyoung */
1563 1.1 dyoung if (cnt)
1564 1.1 dyoung callout_schedule(&vtw_cs, hz / 5);
1565 1.1 dyoung else {
1566 1.1 dyoung tcp_vtw_was_enabled = 0;
1567 1.1 dyoung tcbtable.vestige = 0;
1568 1.1 dyoung }
1569 1.1 dyoung mutex_exit(softnet_lock);
1570 1.1 dyoung }
1571 1.1 dyoung
1572 1.1 dyoung /* in_pcblookup_ports assist for handling vestigial entries.
1573 1.1 dyoung */
1574 1.1 dyoung static void *
1575 1.1 dyoung tcp_init_ports_v4(struct in_addr addr, u_int port, int wild)
1576 1.1 dyoung {
1577 1.1 dyoung struct tcp_ports_iterator *it = &tcp_ports_iterator_v4;
1578 1.1 dyoung
1579 1.1 dyoung bzero(it, sizeof (*it));
1580 1.1 dyoung
1581 1.1 dyoung /* Note: the reference to vtw_tcpv4[0] is fine.
1582 1.1 dyoung * We do not need per-class iteration. We just
1583 1.1 dyoung * need to get to the fat, and there is one
1584 1.1 dyoung * shared fat.
1585 1.1 dyoung */
1586 1.1 dyoung if (vtw_tcpv4[0].fat) {
1587 1.1 dyoung it->addr.v4 = addr;
1588 1.1 dyoung it->port = port;
1589 1.1 dyoung it->wild = !!wild;
1590 1.1 dyoung it->ctl = &vtw_tcpv4[0];
1591 1.1 dyoung
1592 1.1 dyoung ++vtw_stats.look[1];
1593 1.1 dyoung }
1594 1.1 dyoung
1595 1.1 dyoung return it;
1596 1.1 dyoung }
1597 1.1 dyoung
1598 1.1 dyoung /*!\brief export an IPv4 vtw.
1599 1.1 dyoung */
1600 1.1 dyoung static int
1601 1.1 dyoung vtw_export_v4(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
1602 1.1 dyoung {
1603 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
1604 1.1 dyoung
1605 1.1 dyoung bzero(res, sizeof (*res));
1606 1.1 dyoung
1607 1.1 dyoung if (ctl && vtw) {
1608 1.1 dyoung if (!ctl->clidx && vtw->msl_class)
1609 1.1 dyoung ctl += vtw->msl_class;
1610 1.1 dyoung else
1611 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
1612 1.1 dyoung
1613 1.1 dyoung res->valid = 1;
1614 1.1 dyoung res->v4 = 1;
1615 1.1 dyoung
1616 1.1 dyoung res->faddr.v4.s_addr = v4->faddr;
1617 1.1 dyoung res->laddr.v4.s_addr = v4->laddr;
1618 1.1 dyoung res->fport = v4->fport;
1619 1.1 dyoung res->lport = v4->lport;
1620 1.1 dyoung res->vtw = vtw; // netlock held over call(s)
1621 1.1 dyoung res->ctl = ctl;
1622 1.1 dyoung res->reuse_addr = vtw->reuse_addr;
1623 1.1 dyoung res->reuse_port = vtw->reuse_port;
1624 1.1 dyoung res->snd_nxt = vtw->snd_nxt;
1625 1.1 dyoung res->rcv_nxt = vtw->rcv_nxt;
1626 1.1 dyoung res->rcv_wnd = vtw->rcv_wnd;
1627 1.1 dyoung res->uid = vtw->uid;
1628 1.1 dyoung }
1629 1.1 dyoung
1630 1.1 dyoung return res->valid;
1631 1.1 dyoung }
1632 1.1 dyoung
1633 1.1 dyoung /*!\brief return next port in the port iterator. yowza.
1634 1.1 dyoung */
1635 1.1 dyoung static int
1636 1.1 dyoung tcp_next_port_v4(void *arg, struct vestigial_inpcb *res)
1637 1.1 dyoung {
1638 1.1 dyoung struct tcp_ports_iterator *it = arg;
1639 1.1 dyoung vtw_t *vtw = 0;
1640 1.1 dyoung
1641 1.1 dyoung if (it->ctl)
1642 1.1 dyoung vtw = vtw_next_port_v4(it);
1643 1.1 dyoung
1644 1.1 dyoung if (!vtw)
1645 1.1 dyoung it->ctl = 0;
1646 1.1 dyoung
1647 1.1 dyoung return vtw_export_v4(it->ctl, vtw, res);
1648 1.1 dyoung }
1649 1.1 dyoung
1650 1.1 dyoung static int
1651 1.1 dyoung tcp_lookup_v4(struct in_addr faddr, uint16_t fport,
1652 1.1 dyoung struct in_addr laddr, uint16_t lport,
1653 1.1 dyoung struct vestigial_inpcb *res)
1654 1.1 dyoung {
1655 1.1 dyoung vtw_t *vtw;
1656 1.1 dyoung vtw_ctl_t *ctl;
1657 1.1 dyoung
1658 1.1 dyoung
1659 1.1 dyoung db_trace(KTR_VTW
1660 1.1 dyoung , (res, "vtw: lookup %A:%P %A:%P"
1661 1.1 dyoung , faddr, fport
1662 1.1 dyoung , laddr, lport));
1663 1.1 dyoung
1664 1.1 dyoung vtw = vtw_lookup_hash_v4((ctl = &vtw_tcpv4[0])
1665 1.1 dyoung , faddr.s_addr, fport
1666 1.1 dyoung , laddr.s_addr, lport, 0);
1667 1.1 dyoung
1668 1.1 dyoung return vtw_export_v4(ctl, vtw, res);
1669 1.1 dyoung }
1670 1.1 dyoung
1671 1.1 dyoung /* in_pcblookup_ports assist for handling vestigial entries.
1672 1.1 dyoung */
1673 1.1 dyoung static void *
1674 1.1 dyoung tcp_init_ports_v6(const struct in6_addr *addr, u_int port, int wild)
1675 1.1 dyoung {
1676 1.1 dyoung struct tcp_ports_iterator *it = &tcp_ports_iterator_v6;
1677 1.1 dyoung
1678 1.1 dyoung bzero(it, sizeof (*it));
1679 1.1 dyoung
1680 1.1 dyoung /* Note: the reference to vtw_tcpv6[0] is fine.
1681 1.1 dyoung * We do not need per-class iteration. We just
1682 1.1 dyoung * need to get to the fat, and there is one
1683 1.1 dyoung * shared fat.
1684 1.1 dyoung */
1685 1.1 dyoung if (vtw_tcpv6[0].fat) {
1686 1.1 dyoung it->addr.v6 = *addr;
1687 1.1 dyoung it->port = port;
1688 1.1 dyoung it->wild = !!wild;
1689 1.1 dyoung it->ctl = &vtw_tcpv6[0];
1690 1.1 dyoung
1691 1.1 dyoung ++vtw_stats.look[1];
1692 1.1 dyoung }
1693 1.1 dyoung
1694 1.1 dyoung return it;
1695 1.1 dyoung }
1696 1.1 dyoung
1697 1.1 dyoung /*!\brief export an IPv6 vtw.
1698 1.1 dyoung */
1699 1.1 dyoung static int
1700 1.1 dyoung vtw_export_v6(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
1701 1.1 dyoung {
1702 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
1703 1.1 dyoung
1704 1.1 dyoung bzero(res, sizeof (*res));
1705 1.1 dyoung
1706 1.1 dyoung if (ctl && vtw) {
1707 1.1 dyoung if (!ctl->clidx && vtw->msl_class)
1708 1.1 dyoung ctl += vtw->msl_class;
1709 1.1 dyoung else
1710 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
1711 1.1 dyoung
1712 1.1 dyoung res->valid = 1;
1713 1.1 dyoung res->v4 = 0;
1714 1.1 dyoung
1715 1.1 dyoung res->faddr.v6 = v6->faddr;
1716 1.1 dyoung res->laddr.v6 = v6->laddr;
1717 1.1 dyoung res->fport = v6->fport;
1718 1.1 dyoung res->lport = v6->lport;
1719 1.1 dyoung res->vtw = vtw; // netlock held over call(s)
1720 1.1 dyoung res->ctl = ctl;
1721 1.1 dyoung
1722 1.1 dyoung res->v6only = vtw->v6only;
1723 1.1 dyoung res->reuse_addr = vtw->reuse_addr;
1724 1.1 dyoung res->reuse_port = vtw->reuse_port;
1725 1.1 dyoung
1726 1.1 dyoung res->snd_nxt = vtw->snd_nxt;
1727 1.1 dyoung res->rcv_nxt = vtw->rcv_nxt;
1728 1.1 dyoung res->rcv_wnd = vtw->rcv_wnd;
1729 1.1 dyoung res->uid = vtw->uid;
1730 1.1 dyoung }
1731 1.1 dyoung
1732 1.1 dyoung return res->valid;
1733 1.1 dyoung }
1734 1.1 dyoung
1735 1.1 dyoung static int
1736 1.1 dyoung tcp_next_port_v6(void *arg, struct vestigial_inpcb *res)
1737 1.1 dyoung {
1738 1.1 dyoung struct tcp_ports_iterator *it = arg;
1739 1.1 dyoung vtw_t *vtw = 0;
1740 1.1 dyoung
1741 1.1 dyoung if (it->ctl)
1742 1.1 dyoung vtw = vtw_next_port_v6(it);
1743 1.1 dyoung
1744 1.1 dyoung if (!vtw)
1745 1.1 dyoung it->ctl = 0;
1746 1.1 dyoung
1747 1.1 dyoung return vtw_export_v6(it->ctl, vtw, res);
1748 1.1 dyoung }
1749 1.1 dyoung
1750 1.1 dyoung static int
1751 1.1 dyoung tcp_lookup_v6(const struct in6_addr *faddr, uint16_t fport,
1752 1.1 dyoung const struct in6_addr *laddr, uint16_t lport,
1753 1.1 dyoung struct vestigial_inpcb *res)
1754 1.1 dyoung {
1755 1.1 dyoung vtw_ctl_t *ctl;
1756 1.1 dyoung vtw_t *vtw;
1757 1.1 dyoung
1758 1.1 dyoung db_trace(KTR_VTW
1759 1.1 dyoung , (res, "vtw: lookup %6A:%P %6A:%P"
1760 1.1 dyoung , db_store(faddr, sizeof (*faddr)), fport
1761 1.1 dyoung , db_store(laddr, sizeof (*laddr)), lport));
1762 1.1 dyoung
1763 1.1 dyoung vtw = vtw_lookup_hash_v6((ctl = &vtw_tcpv6[0])
1764 1.1 dyoung , faddr, fport
1765 1.1 dyoung , laddr, lport, 0);
1766 1.1 dyoung
1767 1.1 dyoung return vtw_export_v6(ctl, vtw, res);
1768 1.1 dyoung }
1769 1.1 dyoung
1770 1.1 dyoung static vestigial_hooks_t tcp_hooks = {
1771 1.1 dyoung .init_ports4 = tcp_init_ports_v4,
1772 1.1 dyoung .next_port4 = tcp_next_port_v4,
1773 1.1 dyoung .lookup4 = tcp_lookup_v4,
1774 1.1 dyoung .init_ports6 = tcp_init_ports_v6,
1775 1.1 dyoung .next_port6 = tcp_next_port_v6,
1776 1.1 dyoung .lookup6 = tcp_lookup_v6,
1777 1.1 dyoung };
1778 1.1 dyoung
1779 1.1 dyoung static bool
1780 1.1 dyoung vtw_select(int af, fatp_ctl_t **fatp, vtw_ctl_t **ctlp)
1781 1.1 dyoung {
1782 1.1 dyoung fatp_ctl_t *fat;
1783 1.1 dyoung vtw_ctl_t *ctl;
1784 1.1 dyoung
1785 1.1 dyoung switch (af) {
1786 1.1 dyoung case AF_INET:
1787 1.1 dyoung fat = &fat_tcpv4;
1788 1.1 dyoung ctl = &vtw_tcpv4[0];
1789 1.1 dyoung break;
1790 1.1 dyoung case AF_INET6:
1791 1.1 dyoung fat = &fat_tcpv6;
1792 1.1 dyoung ctl = &vtw_tcpv6[0];
1793 1.1 dyoung break;
1794 1.1 dyoung default:
1795 1.1 dyoung return false;
1796 1.1 dyoung }
1797 1.1 dyoung if (fatp != NULL)
1798 1.1 dyoung *fatp = fat;
1799 1.1 dyoung if (ctlp != NULL)
1800 1.1 dyoung *ctlp = ctl;
1801 1.1 dyoung return true;
1802 1.1 dyoung }
1803 1.1 dyoung
1804 1.1 dyoung /*!\brief initialize controlling instance
1805 1.1 dyoung */
1806 1.1 dyoung static int
1807 1.1 dyoung vtw_control_init(int af)
1808 1.1 dyoung {
1809 1.1 dyoung fatp_ctl_t *fat;
1810 1.1 dyoung vtw_ctl_t *ctl;
1811 1.6 dyoung fatp_t *fat_base;
1812 1.6 dyoung fatp_t **fat_hash;
1813 1.6 dyoung vtw_t *ctl_base_v;
1814 1.6 dyoung uint32_t n, m;
1815 1.6 dyoung size_t sz;
1816 1.6 dyoung
1817 1.6 dyoung KASSERT(powerof2(tcp_vtw_entries));
1818 1.1 dyoung
1819 1.1 dyoung if (!vtw_select(af, &fat, &ctl))
1820 1.1 dyoung return EAFNOSUPPORT;
1821 1.1 dyoung
1822 1.6 dyoung if (fat->hash != NULL) {
1823 1.6 dyoung KASSERT(fat->base != NULL && ctl->base.v != NULL);
1824 1.6 dyoung return 0;
1825 1.6 dyoung }
1826 1.6 dyoung
1827 1.6 dyoung /* Allocate 10% more capacity in the fat pointers.
1828 1.6 dyoung * We should only need ~#hash additional based on
1829 1.6 dyoung * how they age, but TIME_WAIT assassination could cause
1830 1.6 dyoung * sparse fat pointer utilisation.
1831 1.6 dyoung */
1832 1.6 dyoung m = 512;
1833 1.6 dyoung n = 2*m + (11 * (tcp_vtw_entries / fatp_ntags())) / 10;
1834 1.6 dyoung sz = (ctl->is_v4 ? sizeof(vtw_v4_t) : sizeof(vtw_v6_t));
1835 1.6 dyoung
1836 1.6 dyoung fat_hash = kmem_zalloc(2*m * sizeof(fatp_t *), KM_NOSLEEP);
1837 1.6 dyoung
1838 1.6 dyoung if (fat_hash == NULL) {
1839 1.6 dyoung printf("%s: could not allocate %zu bytes for "
1840 1.6 dyoung "hash anchors", __func__, 2*m * sizeof(fatp_t *));
1841 1.6 dyoung return ENOMEM;
1842 1.6 dyoung }
1843 1.1 dyoung
1844 1.6 dyoung fat_base = kmem_zalloc(2*n * sizeof(fatp_t), KM_NOSLEEP);
1845 1.1 dyoung
1846 1.6 dyoung if (fat_base == NULL) {
1847 1.6 dyoung kmem_free(fat_hash, 2*m * sizeof (fatp_t *));
1848 1.6 dyoung printf("%s: could not allocate %zu bytes for "
1849 1.6 dyoung "fatp_t array", __func__, 2*n * sizeof(fatp_t));
1850 1.6 dyoung return ENOMEM;
1851 1.6 dyoung }
1852 1.1 dyoung
1853 1.6 dyoung ctl_base_v = kmem_zalloc(tcp_vtw_entries * sz, KM_NOSLEEP);
1854 1.1 dyoung
1855 1.6 dyoung if (ctl_base_v == NULL) {
1856 1.6 dyoung kmem_free(fat_hash, 2*m * sizeof (fatp_t *));
1857 1.6 dyoung kmem_free(fat_base, 2*n * sizeof(fatp_t));
1858 1.6 dyoung printf("%s: could not allocate %zu bytes for "
1859 1.6 dyoung "vtw_t array", __func__, tcp_vtw_entries * sz);
1860 1.6 dyoung return ENOMEM;
1861 1.1 dyoung }
1862 1.1 dyoung
1863 1.6 dyoung fatp_init(fat, n, m, fat_base, fat_hash);
1864 1.1 dyoung
1865 1.6 dyoung vtw_init(fat, ctl, tcp_vtw_entries, ctl_base_v);
1866 1.1 dyoung
1867 1.1 dyoung return 0;
1868 1.1 dyoung }
1869 1.1 dyoung
1870 1.1 dyoung /*!\brief select controlling instance
1871 1.1 dyoung */
1872 1.1 dyoung static vtw_ctl_t *
1873 1.1 dyoung vtw_control(int af, uint32_t msl)
1874 1.1 dyoung {
1875 1.1 dyoung fatp_ctl_t *fat;
1876 1.1 dyoung vtw_ctl_t *ctl;
1877 1.11 matt int msl_class = msl_to_class(msl);
1878 1.1 dyoung
1879 1.1 dyoung if (!vtw_select(af, &fat, &ctl))
1880 1.1 dyoung return NULL;
1881 1.1 dyoung
1882 1.1 dyoung if (!fat->base || !ctl->base.v)
1883 1.1 dyoung return NULL;
1884 1.1 dyoung
1885 1.5 dyoung if (!tcp_vtw_was_enabled) {
1886 1.5 dyoung /* This guarantees is timer ticks until we no longer need them.
1887 1.5 dyoung */
1888 1.5 dyoung tcp_vtw_was_enabled = 1;
1889 1.5 dyoung
1890 1.5 dyoung callout_schedule(&vtw_cs, hz / 5);
1891 1.5 dyoung
1892 1.5 dyoung tcbtable.vestige = &tcp_hooks;
1893 1.5 dyoung }
1894 1.5 dyoung
1895 1.11 matt return ctl + msl_class;
1896 1.1 dyoung }
1897 1.1 dyoung
1898 1.1 dyoung /*!\brief add TCP pcb to vestigial timewait
1899 1.1 dyoung */
1900 1.1 dyoung int
1901 1.1 dyoung vtw_add(int af, struct tcpcb *tp)
1902 1.1 dyoung {
1903 1.10 martin #ifdef VTW_DEBUG
1904 1.1 dyoung int enable;
1905 1.10 martin #endif
1906 1.1 dyoung vtw_ctl_t *ctl;
1907 1.1 dyoung vtw_t *vtw;
1908 1.1 dyoung
1909 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
1910 1.1 dyoung
1911 1.1 dyoung ctl = vtw_control(af, tp->t_msl);
1912 1.1 dyoung if (!ctl)
1913 1.1 dyoung return 0;
1914 1.1 dyoung
1915 1.10 martin #ifdef VTW_DEBUG
1916 1.1 dyoung enable = (af == AF_INET) ? tcp4_vtw_enable : tcp6_vtw_enable;
1917 1.10 martin #endif
1918 1.1 dyoung
1919 1.1 dyoung vtw = vtw_alloc(ctl);
1920 1.1 dyoung
1921 1.1 dyoung if (vtw) {
1922 1.1 dyoung vtw->snd_nxt = tp->snd_nxt;
1923 1.1 dyoung vtw->rcv_nxt = tp->rcv_nxt;
1924 1.1 dyoung
1925 1.1 dyoung switch (af) {
1926 1.1 dyoung case AF_INET: {
1927 1.1 dyoung struct inpcb *inp = tp->t_inpcb;
1928 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
1929 1.1 dyoung
1930 1.1 dyoung v4->faddr = inp->inp_faddr.s_addr;
1931 1.1 dyoung v4->laddr = inp->inp_laddr.s_addr;
1932 1.1 dyoung v4->fport = inp->inp_fport;
1933 1.1 dyoung v4->lport = inp->inp_lport;
1934 1.1 dyoung
1935 1.1 dyoung vtw->reuse_port = !!(inp->inp_socket->so_options
1936 1.1 dyoung & SO_REUSEPORT);
1937 1.1 dyoung vtw->reuse_addr = !!(inp->inp_socket->so_options
1938 1.1 dyoung & SO_REUSEADDR);
1939 1.1 dyoung vtw->v6only = 0;
1940 1.1 dyoung vtw->uid = inp->inp_socket->so_uidinfo->ui_uid;
1941 1.1 dyoung
1942 1.1 dyoung vtw_inshash_v4(ctl, vtw);
1943 1.1 dyoung
1944 1.1 dyoung
1945 1.1 dyoung #ifdef VTW_DEBUG
1946 1.1 dyoung /* Immediate lookup (connected and port) to
1947 1.1 dyoung * ensure at least that works!
1948 1.1 dyoung */
1949 1.1 dyoung if (enable & 4) {
1950 1.1 dyoung KASSERT(vtw_lookup_hash_v4
1951 1.1 dyoung (ctl
1952 1.1 dyoung , inp->inp_faddr.s_addr, inp->inp_fport
1953 1.1 dyoung , inp->inp_laddr.s_addr, inp->inp_lport
1954 1.1 dyoung , 0)
1955 1.1 dyoung == vtw);
1956 1.1 dyoung KASSERT(vtw_lookup_hash_v4
1957 1.1 dyoung (ctl
1958 1.1 dyoung , inp->inp_faddr.s_addr, inp->inp_fport
1959 1.1 dyoung , inp->inp_laddr.s_addr, inp->inp_lport
1960 1.1 dyoung , 1));
1961 1.1 dyoung }
1962 1.1 dyoung /* Immediate port iterator functionality check: not wild
1963 1.1 dyoung */
1964 1.1 dyoung if (enable & 8) {
1965 1.1 dyoung struct tcp_ports_iterator *it;
1966 1.1 dyoung struct vestigial_inpcb res;
1967 1.1 dyoung int cnt = 0;
1968 1.1 dyoung
1969 1.1 dyoung it = tcp_init_ports_v4(inp->inp_laddr
1970 1.1 dyoung , inp->inp_lport, 0);
1971 1.1 dyoung
1972 1.1 dyoung while (tcp_next_port_v4(it, &res)) {
1973 1.1 dyoung ++cnt;
1974 1.1 dyoung }
1975 1.1 dyoung KASSERT(cnt);
1976 1.1 dyoung }
1977 1.1 dyoung /* Immediate port iterator functionality check: wild
1978 1.1 dyoung */
1979 1.1 dyoung if (enable & 16) {
1980 1.1 dyoung struct tcp_ports_iterator *it;
1981 1.1 dyoung struct vestigial_inpcb res;
1982 1.1 dyoung struct in_addr any;
1983 1.1 dyoung int cnt = 0;
1984 1.1 dyoung
1985 1.1 dyoung any.s_addr = htonl(INADDR_ANY);
1986 1.1 dyoung
1987 1.1 dyoung it = tcp_init_ports_v4(any, inp->inp_lport, 1);
1988 1.1 dyoung
1989 1.1 dyoung while (tcp_next_port_v4(it, &res)) {
1990 1.1 dyoung ++cnt;
1991 1.1 dyoung }
1992 1.1 dyoung KASSERT(cnt);
1993 1.1 dyoung }
1994 1.1 dyoung #endif /* VTW_DEBUG */
1995 1.1 dyoung break;
1996 1.1 dyoung }
1997 1.1 dyoung
1998 1.1 dyoung case AF_INET6: {
1999 1.1 dyoung struct in6pcb *inp = tp->t_in6pcb;
2000 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
2001 1.1 dyoung
2002 1.1 dyoung v6->faddr = inp->in6p_faddr;
2003 1.1 dyoung v6->laddr = inp->in6p_laddr;
2004 1.1 dyoung v6->fport = inp->in6p_fport;
2005 1.1 dyoung v6->lport = inp->in6p_lport;
2006 1.1 dyoung
2007 1.1 dyoung vtw->reuse_port = !!(inp->in6p_socket->so_options
2008 1.1 dyoung & SO_REUSEPORT);
2009 1.1 dyoung vtw->reuse_addr = !!(inp->in6p_socket->so_options
2010 1.1 dyoung & SO_REUSEADDR);
2011 1.1 dyoung vtw->v6only = !!(inp->in6p_flags
2012 1.1 dyoung & IN6P_IPV6_V6ONLY);
2013 1.1 dyoung vtw->uid = inp->in6p_socket->so_uidinfo->ui_uid;
2014 1.1 dyoung
2015 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2016 1.1 dyoung #ifdef VTW_DEBUG
2017 1.1 dyoung /* Immediate lookup (connected and port) to
2018 1.1 dyoung * ensure at least that works!
2019 1.1 dyoung */
2020 1.1 dyoung if (enable & 4) {
2021 1.1 dyoung KASSERT(vtw_lookup_hash_v6(ctl
2022 1.1 dyoung , &inp->in6p_faddr, inp->in6p_fport
2023 1.1 dyoung , &inp->in6p_laddr, inp->in6p_lport
2024 1.1 dyoung , 0)
2025 1.1 dyoung == vtw);
2026 1.1 dyoung KASSERT(vtw_lookup_hash_v6
2027 1.1 dyoung (ctl
2028 1.1 dyoung , &inp->in6p_faddr, inp->in6p_fport
2029 1.1 dyoung , &inp->in6p_laddr, inp->in6p_lport
2030 1.1 dyoung , 1));
2031 1.1 dyoung }
2032 1.1 dyoung /* Immediate port iterator functionality check: not wild
2033 1.1 dyoung */
2034 1.1 dyoung if (enable & 8) {
2035 1.1 dyoung struct tcp_ports_iterator *it;
2036 1.1 dyoung struct vestigial_inpcb res;
2037 1.1 dyoung int cnt = 0;
2038 1.1 dyoung
2039 1.1 dyoung it = tcp_init_ports_v6(&inp->in6p_laddr
2040 1.1 dyoung , inp->in6p_lport, 0);
2041 1.1 dyoung
2042 1.1 dyoung while (tcp_next_port_v6(it, &res)) {
2043 1.1 dyoung ++cnt;
2044 1.1 dyoung }
2045 1.1 dyoung KASSERT(cnt);
2046 1.1 dyoung }
2047 1.1 dyoung /* Immediate port iterator functionality check: wild
2048 1.1 dyoung */
2049 1.1 dyoung if (enable & 16) {
2050 1.1 dyoung struct tcp_ports_iterator *it;
2051 1.1 dyoung struct vestigial_inpcb res;
2052 1.1 dyoung static struct in6_addr any = IN6ADDR_ANY_INIT;
2053 1.1 dyoung int cnt = 0;
2054 1.1 dyoung
2055 1.1 dyoung it = tcp_init_ports_v6(&any
2056 1.1 dyoung , inp->in6p_lport, 1);
2057 1.1 dyoung
2058 1.1 dyoung while (tcp_next_port_v6(it, &res)) {
2059 1.1 dyoung ++cnt;
2060 1.1 dyoung }
2061 1.1 dyoung KASSERT(cnt);
2062 1.1 dyoung }
2063 1.1 dyoung #endif /* VTW_DEBUG */
2064 1.1 dyoung break;
2065 1.1 dyoung }
2066 1.1 dyoung }
2067 1.1 dyoung
2068 1.1 dyoung tcp_canceltimers(tp);
2069 1.1 dyoung tp = tcp_close(tp);
2070 1.1 dyoung KASSERT(!tp);
2071 1.1 dyoung
2072 1.1 dyoung return 1;
2073 1.1 dyoung }
2074 1.1 dyoung
2075 1.1 dyoung return 0;
2076 1.1 dyoung }
2077 1.1 dyoung
2078 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2079 1.1 dyoung */
2080 1.1 dyoung static void
2081 1.1 dyoung vtw_restart_v4(vestigial_inpcb_t *vp)
2082 1.1 dyoung {
2083 1.1 dyoung vtw_v4_t copy = *(vtw_v4_t*)vp->vtw;
2084 1.1 dyoung vtw_t *vtw;
2085 1.1 dyoung vtw_t *cp = ©.common;
2086 1.1 dyoung vtw_ctl_t *ctl;
2087 1.1 dyoung
2088 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
2089 1.1 dyoung
2090 1.1 dyoung db_trace(KTR_VTW
2091 1.1 dyoung , (vp->vtw, "vtw: restart %A:%P %A:%P"
2092 1.1 dyoung , vp->faddr.v4.s_addr, vp->fport
2093 1.1 dyoung , vp->laddr.v4.s_addr, vp->lport));
2094 1.1 dyoung
2095 1.1 dyoung /* Class might have changed, so have a squiz.
2096 1.1 dyoung */
2097 1.1 dyoung ctl = vtw_control(AF_INET, class_to_msl(cp->msl_class));
2098 1.1 dyoung vtw = vtw_alloc(ctl);
2099 1.1 dyoung
2100 1.1 dyoung if (vtw) {
2101 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
2102 1.1 dyoung
2103 1.1 dyoung /* Safe now to unhash the old entry
2104 1.1 dyoung */
2105 1.1 dyoung vtw_del(vp->ctl, vp->vtw);
2106 1.1 dyoung
2107 1.1 dyoung vtw->snd_nxt = cp->snd_nxt;
2108 1.1 dyoung vtw->rcv_nxt = cp->rcv_nxt;
2109 1.1 dyoung
2110 1.1 dyoung v4->faddr = copy.faddr;
2111 1.1 dyoung v4->laddr = copy.laddr;
2112 1.1 dyoung v4->fport = copy.fport;
2113 1.1 dyoung v4->lport = copy.lport;
2114 1.1 dyoung
2115 1.1 dyoung vtw->reuse_port = cp->reuse_port;
2116 1.1 dyoung vtw->reuse_addr = cp->reuse_addr;
2117 1.1 dyoung vtw->v6only = 0;
2118 1.1 dyoung vtw->uid = cp->uid;
2119 1.1 dyoung
2120 1.1 dyoung vtw_inshash_v4(ctl, vtw);
2121 1.1 dyoung }
2122 1.1 dyoung
2123 1.1 dyoung vp->valid = 0;
2124 1.1 dyoung }
2125 1.1 dyoung
2126 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2127 1.1 dyoung */
2128 1.1 dyoung static void
2129 1.1 dyoung vtw_restart_v6(vestigial_inpcb_t *vp)
2130 1.1 dyoung {
2131 1.1 dyoung vtw_v6_t copy = *(vtw_v6_t*)vp->vtw;
2132 1.1 dyoung vtw_t *vtw;
2133 1.1 dyoung vtw_t *cp = ©.common;
2134 1.1 dyoung vtw_ctl_t *ctl;
2135 1.1 dyoung
2136 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
2137 1.1 dyoung
2138 1.1 dyoung db_trace(KTR_VTW
2139 1.1 dyoung , (vp->vtw, "vtw: restart %6A:%P %6A:%P"
2140 1.1 dyoung , db_store(&vp->faddr.v6, sizeof (vp->faddr.v6))
2141 1.1 dyoung , vp->fport
2142 1.1 dyoung , db_store(&vp->laddr.v6, sizeof (vp->laddr.v6))
2143 1.1 dyoung , vp->lport));
2144 1.1 dyoung
2145 1.1 dyoung /* Class might have changed, so have a squiz.
2146 1.1 dyoung */
2147 1.1 dyoung ctl = vtw_control(AF_INET6, class_to_msl(cp->msl_class));
2148 1.1 dyoung vtw = vtw_alloc(ctl);
2149 1.1 dyoung
2150 1.1 dyoung if (vtw) {
2151 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
2152 1.1 dyoung
2153 1.1 dyoung /* Safe now to unhash the old entry
2154 1.1 dyoung */
2155 1.1 dyoung vtw_del(vp->ctl, vp->vtw);
2156 1.1 dyoung
2157 1.1 dyoung vtw->snd_nxt = cp->snd_nxt;
2158 1.1 dyoung vtw->rcv_nxt = cp->rcv_nxt;
2159 1.1 dyoung
2160 1.1 dyoung v6->faddr = copy.faddr;
2161 1.1 dyoung v6->laddr = copy.laddr;
2162 1.1 dyoung v6->fport = copy.fport;
2163 1.1 dyoung v6->lport = copy.lport;
2164 1.1 dyoung
2165 1.1 dyoung vtw->reuse_port = cp->reuse_port;
2166 1.1 dyoung vtw->reuse_addr = cp->reuse_addr;
2167 1.1 dyoung vtw->v6only = cp->v6only;
2168 1.1 dyoung vtw->uid = cp->uid;
2169 1.1 dyoung
2170 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2171 1.1 dyoung }
2172 1.1 dyoung
2173 1.1 dyoung vp->valid = 0;
2174 1.1 dyoung }
2175 1.1 dyoung
2176 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2177 1.1 dyoung */
2178 1.1 dyoung void
2179 1.1 dyoung vtw_restart(vestigial_inpcb_t *vp)
2180 1.1 dyoung {
2181 1.1 dyoung if (!vp || !vp->valid)
2182 1.1 dyoung return;
2183 1.1 dyoung
2184 1.1 dyoung if (vp->v4)
2185 1.1 dyoung vtw_restart_v4(vp);
2186 1.1 dyoung else
2187 1.1 dyoung vtw_restart_v6(vp);
2188 1.1 dyoung }
2189 1.1 dyoung
2190 1.1 dyoung int
2191 1.7 dyoung sysctl_tcp_vtw_enable(SYSCTLFN_ARGS)
2192 1.7 dyoung {
2193 1.7 dyoung int en, rc;
2194 1.7 dyoung struct sysctlnode node;
2195 1.7 dyoung
2196 1.7 dyoung node = *rnode;
2197 1.7 dyoung en = *(int *)rnode->sysctl_data;
2198 1.7 dyoung node.sysctl_data = &en;
2199 1.7 dyoung
2200 1.7 dyoung rc = sysctl_lookup(SYSCTLFN_CALL(&node));
2201 1.7 dyoung if (rc != 0 || newp == NULL)
2202 1.7 dyoung return rc;
2203 1.7 dyoung
2204 1.7 dyoung if (rnode->sysctl_data != &tcp4_vtw_enable &&
2205 1.7 dyoung rnode->sysctl_data != &tcp6_vtw_enable)
2206 1.7 dyoung rc = ENOENT;
2207 1.7 dyoung else if ((en & 1) == 0)
2208 1.7 dyoung rc = 0;
2209 1.7 dyoung else if (rnode->sysctl_data == &tcp4_vtw_enable)
2210 1.7 dyoung rc = vtw_control_init(AF_INET);
2211 1.7 dyoung else /* rnode->sysctl_data == &tcp6_vtw_enable */
2212 1.7 dyoung rc = vtw_control_init(AF_INET6);
2213 1.7 dyoung
2214 1.7 dyoung if (rc == 0)
2215 1.7 dyoung *(int *)rnode->sysctl_data = en;
2216 1.7 dyoung
2217 1.7 dyoung return rc;
2218 1.7 dyoung }
2219 1.7 dyoung
2220 1.7 dyoung int
2221 1.1 dyoung vtw_earlyinit(void)
2222 1.1 dyoung {
2223 1.5 dyoung int i, rc;
2224 1.1 dyoung
2225 1.5 dyoung callout_init(&vtw_cs, 0);
2226 1.5 dyoung callout_setfunc(&vtw_cs, vtw_tick, 0);
2227 1.1 dyoung
2228 1.5 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2229 1.5 dyoung vtw_tcpv4[i].is_v4 = 1;
2230 1.5 dyoung vtw_tcpv6[i].is_v6 = 1;
2231 1.1 dyoung }
2232 1.1 dyoung
2233 1.7 dyoung if ((tcp4_vtw_enable & 1) != 0 &&
2234 1.7 dyoung (rc = vtw_control_init(AF_INET)) != 0)
2235 1.7 dyoung return rc;
2236 1.7 dyoung
2237 1.7 dyoung if ((tcp6_vtw_enable & 1) != 0 &&
2238 1.1 dyoung (rc = vtw_control_init(AF_INET6)) != 0)
2239 1.1 dyoung return rc;
2240 1.1 dyoung
2241 1.1 dyoung return 0;
2242 1.1 dyoung }
2243 1.1 dyoung
2244 1.1 dyoung #ifdef VTW_DEBUG
2245 1.1 dyoung #include <sys/syscallargs.h>
2246 1.1 dyoung #include <sys/sysctl.h>
2247 1.1 dyoung
2248 1.1 dyoung /*!\brief add lalp, fafp entries for debug
2249 1.1 dyoung */
2250 1.1 dyoung int
2251 1.11 matt vtw_debug_add(int af, sin_either_t *la, sin_either_t *fa, int msl, int msl_class)
2252 1.1 dyoung {
2253 1.1 dyoung vtw_ctl_t *ctl;
2254 1.1 dyoung vtw_t *vtw;
2255 1.1 dyoung
2256 1.11 matt ctl = vtw_control(af, msl ? msl : class_to_msl(msl_class));
2257 1.1 dyoung if (!ctl)
2258 1.1 dyoung return 0;
2259 1.1 dyoung
2260 1.1 dyoung vtw = vtw_alloc(ctl);
2261 1.1 dyoung
2262 1.1 dyoung if (vtw) {
2263 1.1 dyoung vtw->snd_nxt = 0;
2264 1.1 dyoung vtw->rcv_nxt = 0;
2265 1.1 dyoung
2266 1.1 dyoung switch (af) {
2267 1.1 dyoung case AF_INET: {
2268 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
2269 1.1 dyoung
2270 1.1 dyoung v4->faddr = fa->sin_addr.v4.s_addr;
2271 1.1 dyoung v4->laddr = la->sin_addr.v4.s_addr;
2272 1.1 dyoung v4->fport = fa->sin_port;
2273 1.1 dyoung v4->lport = la->sin_port;
2274 1.1 dyoung
2275 1.1 dyoung vtw->reuse_port = 1;
2276 1.1 dyoung vtw->reuse_addr = 1;
2277 1.1 dyoung vtw->v6only = 0;
2278 1.1 dyoung vtw->uid = 0;
2279 1.1 dyoung
2280 1.1 dyoung vtw_inshash_v4(ctl, vtw);
2281 1.1 dyoung break;
2282 1.1 dyoung }
2283 1.1 dyoung
2284 1.1 dyoung case AF_INET6: {
2285 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
2286 1.1 dyoung
2287 1.1 dyoung v6->faddr = fa->sin_addr.v6;
2288 1.1 dyoung v6->laddr = la->sin_addr.v6;
2289 1.1 dyoung
2290 1.1 dyoung v6->fport = fa->sin_port;
2291 1.1 dyoung v6->lport = la->sin_port;
2292 1.1 dyoung
2293 1.1 dyoung vtw->reuse_port = 1;
2294 1.1 dyoung vtw->reuse_addr = 1;
2295 1.1 dyoung vtw->v6only = 0;
2296 1.1 dyoung vtw->uid = 0;
2297 1.1 dyoung
2298 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2299 1.1 dyoung break;
2300 1.1 dyoung }
2301 1.1 dyoung
2302 1.1 dyoung default:
2303 1.1 dyoung break;
2304 1.1 dyoung }
2305 1.1 dyoung
2306 1.1 dyoung return 1;
2307 1.1 dyoung }
2308 1.1 dyoung
2309 1.1 dyoung return 0;
2310 1.1 dyoung }
2311 1.1 dyoung
2312 1.1 dyoung static int vtw_syscall = 0;
2313 1.1 dyoung
2314 1.1 dyoung static int
2315 1.1 dyoung vtw_debug_process(vtw_sysargs_t *ap)
2316 1.1 dyoung {
2317 1.1 dyoung struct vestigial_inpcb vestige;
2318 1.1 dyoung int rc = 0;
2319 1.1 dyoung
2320 1.1 dyoung mutex_enter(softnet_lock);
2321 1.1 dyoung
2322 1.1 dyoung switch (ap->op) {
2323 1.1 dyoung case 0: // insert
2324 1.1 dyoung vtw_debug_add(ap->la.sin_family
2325 1.1 dyoung , &ap->la
2326 1.1 dyoung , &ap->fa
2327 1.1 dyoung , TCPTV_MSL
2328 1.1 dyoung , 0);
2329 1.1 dyoung break;
2330 1.1 dyoung
2331 1.1 dyoung case 1: // lookup
2332 1.1 dyoung case 2: // restart
2333 1.1 dyoung switch (ap->la.sin_family) {
2334 1.1 dyoung case AF_INET:
2335 1.1 dyoung if (tcp_lookup_v4(ap->fa.sin_addr.v4, ap->fa.sin_port,
2336 1.1 dyoung ap->la.sin_addr.v4, ap->la.sin_port,
2337 1.1 dyoung &vestige)) {
2338 1.1 dyoung if (ap->op == 2) {
2339 1.1 dyoung vtw_restart(&vestige);
2340 1.1 dyoung }
2341 1.1 dyoung rc = 0;
2342 1.1 dyoung } else
2343 1.1 dyoung rc = ESRCH;
2344 1.1 dyoung break;
2345 1.1 dyoung
2346 1.1 dyoung case AF_INET6:
2347 1.1 dyoung if (tcp_lookup_v6(&ap->fa.sin_addr.v6, ap->fa.sin_port,
2348 1.1 dyoung &ap->la.sin_addr.v6, ap->la.sin_port,
2349 1.1 dyoung &vestige)) {
2350 1.1 dyoung if (ap->op == 2) {
2351 1.1 dyoung vtw_restart(&vestige);
2352 1.1 dyoung }
2353 1.1 dyoung rc = 0;
2354 1.1 dyoung } else
2355 1.1 dyoung rc = ESRCH;
2356 1.1 dyoung break;
2357 1.1 dyoung default:
2358 1.1 dyoung rc = EINVAL;
2359 1.1 dyoung }
2360 1.1 dyoung break;
2361 1.1 dyoung
2362 1.1 dyoung default:
2363 1.1 dyoung rc = EINVAL;
2364 1.1 dyoung }
2365 1.1 dyoung
2366 1.1 dyoung mutex_exit(softnet_lock);
2367 1.1 dyoung return rc;
2368 1.1 dyoung }
2369 1.1 dyoung
2370 1.1 dyoung struct sys_vtw_args {
2371 1.1 dyoung syscallarg(const vtw_sysargs_t *) req;
2372 1.1 dyoung syscallarg(size_t) len;
2373 1.1 dyoung };
2374 1.1 dyoung
2375 1.1 dyoung static int
2376 1.1 dyoung vtw_sys(struct lwp *l, const void *_, register_t *retval)
2377 1.1 dyoung {
2378 1.1 dyoung const struct sys_vtw_args *uap = _;
2379 1.1 dyoung void *buf;
2380 1.1 dyoung int rc;
2381 1.1 dyoung size_t len = SCARG(uap, len);
2382 1.1 dyoung
2383 1.1 dyoung if (len != sizeof (vtw_sysargs_t))
2384 1.1 dyoung return EINVAL;
2385 1.1 dyoung
2386 1.1 dyoung buf = kmem_alloc(len, KM_SLEEP);
2387 1.1 dyoung if (!buf)
2388 1.1 dyoung return ENOMEM;
2389 1.1 dyoung
2390 1.1 dyoung rc = copyin(SCARG(uap, req), buf, len);
2391 1.1 dyoung if (!rc) {
2392 1.1 dyoung rc = vtw_debug_process(buf);
2393 1.1 dyoung }
2394 1.1 dyoung kmem_free(buf, len);
2395 1.1 dyoung
2396 1.1 dyoung return rc;
2397 1.1 dyoung }
2398 1.1 dyoung
2399 1.1 dyoung static void
2400 1.1 dyoung vtw_sanity_check(void)
2401 1.1 dyoung {
2402 1.1 dyoung vtw_ctl_t *ctl;
2403 1.1 dyoung vtw_t *vtw;
2404 1.1 dyoung int i;
2405 1.1 dyoung int n;
2406 1.1 dyoung
2407 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2408 1.1 dyoung ctl = &vtw_tcpv4[i];
2409 1.1 dyoung
2410 1.1 dyoung if (!ctl->base.v || ctl->nalloc)
2411 1.1 dyoung continue;
2412 1.1 dyoung
2413 1.1 dyoung for (n = 0, vtw = ctl->base.v; ; ) {
2414 1.1 dyoung ++n;
2415 1.1 dyoung vtw = vtw_next(ctl, vtw);
2416 1.1 dyoung if (vtw == ctl->base.v)
2417 1.1 dyoung break;
2418 1.1 dyoung }
2419 1.1 dyoung db_trace(KTR_VTW
2420 1.1 dyoung , (ctl, "sanity: class %x n %x nfree %x"
2421 1.1 dyoung , i, n, ctl->nfree));
2422 1.1 dyoung
2423 1.1 dyoung KASSERT(n == ctl->nfree);
2424 1.1 dyoung }
2425 1.1 dyoung
2426 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2427 1.1 dyoung ctl = &vtw_tcpv6[i];
2428 1.1 dyoung
2429 1.1 dyoung if (!ctl->base.v || ctl->nalloc)
2430 1.1 dyoung continue;
2431 1.1 dyoung
2432 1.1 dyoung for (n = 0, vtw = ctl->base.v; ; ) {
2433 1.1 dyoung ++n;
2434 1.1 dyoung vtw = vtw_next(ctl, vtw);
2435 1.1 dyoung if (vtw == ctl->base.v)
2436 1.1 dyoung break;
2437 1.1 dyoung }
2438 1.1 dyoung db_trace(KTR_VTW
2439 1.1 dyoung , (ctl, "sanity: class %x n %x nfree %x"
2440 1.1 dyoung , i, n, ctl->nfree));
2441 1.1 dyoung KASSERT(n == ctl->nfree);
2442 1.1 dyoung }
2443 1.1 dyoung }
2444 1.1 dyoung
2445 1.1 dyoung /*!\brief Initialise debug support.
2446 1.1 dyoung */
2447 1.1 dyoung static void
2448 1.1 dyoung vtw_debug_init(void)
2449 1.1 dyoung {
2450 1.1 dyoung int i;
2451 1.1 dyoung
2452 1.1 dyoung vtw_sanity_check();
2453 1.1 dyoung
2454 1.1 dyoung if (vtw_syscall)
2455 1.1 dyoung return;
2456 1.1 dyoung
2457 1.1 dyoung for (i = 511; i; --i) {
2458 1.1 dyoung if (sysent[i].sy_call == sys_nosys) {
2459 1.1 dyoung sysent[i].sy_call = vtw_sys;
2460 1.1 dyoung sysent[i].sy_narg = 2;
2461 1.1 dyoung sysent[i].sy_argsize = sizeof (struct sys_vtw_args);
2462 1.1 dyoung sysent[i].sy_flags = 0;
2463 1.1 dyoung
2464 1.1 dyoung vtw_syscall = i;
2465 1.1 dyoung break;
2466 1.1 dyoung }
2467 1.1 dyoung }
2468 1.1 dyoung if (i) {
2469 1.1 dyoung const struct sysctlnode *node;
2470 1.1 dyoung uint32_t flags;
2471 1.1 dyoung
2472 1.1 dyoung flags = sysctl_root.sysctl_flags;
2473 1.1 dyoung
2474 1.1 dyoung sysctl_root.sysctl_flags |= CTLFLAG_READWRITE;
2475 1.1 dyoung sysctl_root.sysctl_flags &= ~CTLFLAG_PERMANENT;
2476 1.1 dyoung
2477 1.1 dyoung sysctl_createv(0, 0, 0, &node,
2478 1.1 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE,
2479 1.1 dyoung "koff",
2480 1.1 dyoung SYSCTL_DESCR("Kernel Obscure Feature Finder"),
2481 1.1 dyoung 0, 0, 0, 0, CTL_CREATE, CTL_EOL);
2482 1.1 dyoung
2483 1.1 dyoung if (!node) {
2484 1.1 dyoung sysctl_createv(0, 0, 0, &node,
2485 1.1 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE,
2486 1.1 dyoung "koffka",
2487 1.1 dyoung SYSCTL_DESCR("The Real(tm) Kernel"
2488 1.1 dyoung " Obscure Feature Finder"),
2489 1.1 dyoung 0, 0, 0, 0, CTL_CREATE, CTL_EOL);
2490 1.1 dyoung }
2491 1.1 dyoung if (node) {
2492 1.1 dyoung sysctl_createv(0, 0, 0, 0,
2493 1.1 dyoung CTLFLAG_PERMANENT|CTLFLAG_READONLY,
2494 1.1 dyoung CTLTYPE_INT, "vtw_debug_syscall",
2495 1.1 dyoung SYSCTL_DESCR("vtw debug"
2496 1.1 dyoung " system call number"),
2497 1.1 dyoung 0, 0, &vtw_syscall, 0, node->sysctl_num,
2498 1.1 dyoung CTL_CREATE, CTL_EOL);
2499 1.1 dyoung }
2500 1.1 dyoung sysctl_root.sysctl_flags = flags;
2501 1.1 dyoung }
2502 1.1 dyoung }
2503 1.1 dyoung #else /* !VTW_DEBUG */
2504 1.1 dyoung static void
2505 1.1 dyoung vtw_debug_init(void)
2506 1.1 dyoung {
2507 1.1 dyoung return;
2508 1.1 dyoung }
2509 1.1 dyoung #endif /* !VTW_DEBUG */
2510