tcp_vtw.c revision 1.11 1 1.1 dyoung /*
2 1.1 dyoung * Copyright (c) 2011 The NetBSD Foundation, Inc.
3 1.1 dyoung * All rights reserved.
4 1.1 dyoung *
5 1.1 dyoung * This code is derived from software contributed to The NetBSD Foundation
6 1.1 dyoung * by Coyote Point Systems, Inc.
7 1.1 dyoung *
8 1.1 dyoung * Redistribution and use in source and binary forms, with or without
9 1.1 dyoung * modification, are permitted provided that the following conditions
10 1.1 dyoung * are met:
11 1.1 dyoung * 1. Redistributions of source code must retain the above copyright
12 1.1 dyoung * notice, this list of conditions and the following disclaimer.
13 1.1 dyoung * 2. Redistributions in binary form must reproduce the above copyright
14 1.1 dyoung * notice, this list of conditions and the following disclaimer in the
15 1.1 dyoung * documentation and/or other materials provided with the distribution.
16 1.1 dyoung *
17 1.1 dyoung * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 1.1 dyoung * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 1.1 dyoung * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 1.1 dyoung * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 1.1 dyoung * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 1.1 dyoung * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 1.1 dyoung * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 1.1 dyoung * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 1.1 dyoung * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 1.1 dyoung * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 1.1 dyoung * POSSIBILITY OF SUCH DAMAGE.
28 1.1 dyoung */
29 1.9 yamt
30 1.9 yamt /*
31 1.9 yamt * Reduces the resources demanded by TCP sessions in TIME_WAIT-state using
32 1.9 yamt * methods called Vestigial Time-Wait (VTW) and Maximum Segment Lifetime
33 1.9 yamt * Truncation (MSLT).
34 1.9 yamt *
35 1.9 yamt * MSLT and VTW were contributed by Coyote Point Systems, Inc.
36 1.9 yamt *
37 1.9 yamt * Even after a TCP session enters the TIME_WAIT state, its corresponding
38 1.9 yamt * socket and protocol control blocks (PCBs) stick around until the TCP
39 1.9 yamt * Maximum Segment Lifetime (MSL) expires. On a host whose workload
40 1.9 yamt * necessarily creates and closes down many TCP sockets, the sockets & PCBs
41 1.9 yamt * for TCP sessions in TIME_WAIT state amount to many megabytes of dead
42 1.9 yamt * weight in RAM.
43 1.9 yamt *
44 1.9 yamt * Maximum Segment Lifetimes Truncation (MSLT) assigns each TCP session to
45 1.9 yamt * a class based on the nearness of the peer. Corresponding to each class
46 1.9 yamt * is an MSL, and a session uses the MSL of its class. The classes are
47 1.9 yamt * loopback (local host equals remote host), local (local host and remote
48 1.9 yamt * host are on the same link/subnet), and remote (local host and remote
49 1.9 yamt * host communicate via one or more gateways). Classes corresponding to
50 1.9 yamt * nearer peers have lower MSLs by default: 2 seconds for loopback, 10
51 1.9 yamt * seconds for local, 60 seconds for remote. Loopback and local sessions
52 1.9 yamt * expire more quickly when MSLT is used.
53 1.9 yamt *
54 1.9 yamt * Vestigial Time-Wait (VTW) replaces a TIME_WAIT session's PCB/socket
55 1.9 yamt * dead weight with a compact representation of the session, called a
56 1.9 yamt * "vestigial PCB". VTW data structures are designed to be very fast and
57 1.9 yamt * memory-efficient: for fast insertion and lookup of vestigial PCBs,
58 1.9 yamt * the PCBs are stored in a hash table that is designed to minimize the
59 1.9 yamt * number of cacheline visits per lookup/insertion. The memory both
60 1.9 yamt * for vestigial PCBs and for elements of the PCB hashtable come from
61 1.9 yamt * fixed-size pools, and linked data structures exploit this to conserve
62 1.9 yamt * memory by representing references with a narrow index/offset from the
63 1.9 yamt * start of a pool instead of a pointer. When space for new vestigial PCBs
64 1.9 yamt * runs out, VTW makes room by discarding old vestigial PCBs, oldest first.
65 1.9 yamt * VTW cooperates with MSLT.
66 1.9 yamt *
67 1.9 yamt * It may help to think of VTW as a "FIN cache" by analogy to the SYN
68 1.9 yamt * cache.
69 1.9 yamt *
70 1.9 yamt * A 2.8-GHz Pentium 4 running a test workload that creates TIME_WAIT
71 1.9 yamt * sessions as fast as it can is approximately 17% idle when VTW is active
72 1.9 yamt * versus 0% idle when VTW is inactive. It has 103 megabytes more free RAM
73 1.9 yamt * when VTW is active (approximately 64k vestigial PCBs are created) than
74 1.9 yamt * when it is inactive.
75 1.9 yamt */
76 1.9 yamt
77 1.1 dyoung #include <sys/cdefs.h>
78 1.1 dyoung
79 1.1 dyoung #include "opt_ddb.h"
80 1.1 dyoung #include "opt_inet.h"
81 1.1 dyoung #include "opt_ipsec.h"
82 1.1 dyoung #include "opt_inet_csum.h"
83 1.1 dyoung #include "opt_tcp_debug.h"
84 1.1 dyoung
85 1.1 dyoung #include <sys/param.h>
86 1.1 dyoung #include <sys/systm.h>
87 1.1 dyoung #include <sys/malloc.h>
88 1.1 dyoung #include <sys/kmem.h>
89 1.1 dyoung #include <sys/mbuf.h>
90 1.1 dyoung #include <sys/protosw.h>
91 1.1 dyoung #include <sys/socket.h>
92 1.1 dyoung #include <sys/socketvar.h>
93 1.1 dyoung #include <sys/errno.h>
94 1.1 dyoung #include <sys/syslog.h>
95 1.1 dyoung #include <sys/pool.h>
96 1.1 dyoung #include <sys/domain.h>
97 1.1 dyoung #include <sys/kernel.h>
98 1.1 dyoung #include <net/if.h>
99 1.1 dyoung #include <net/route.h>
100 1.1 dyoung #include <net/if_types.h>
101 1.1 dyoung
102 1.1 dyoung #include <netinet/in.h>
103 1.1 dyoung #include <netinet/in_systm.h>
104 1.1 dyoung #include <netinet/ip.h>
105 1.1 dyoung #include <netinet/in_pcb.h>
106 1.1 dyoung #include <netinet/in_var.h>
107 1.1 dyoung #include <netinet/ip_var.h>
108 1.1 dyoung #include <netinet/in_offload.h>
109 1.1 dyoung #include <netinet/ip6.h>
110 1.1 dyoung #include <netinet6/ip6_var.h>
111 1.1 dyoung #include <netinet6/in6_pcb.h>
112 1.1 dyoung #include <netinet6/ip6_var.h>
113 1.1 dyoung #include <netinet6/in6_var.h>
114 1.1 dyoung #include <netinet/icmp6.h>
115 1.1 dyoung #include <netinet6/nd6.h>
116 1.1 dyoung
117 1.1 dyoung #include <netinet/tcp.h>
118 1.1 dyoung #include <netinet/tcp_fsm.h>
119 1.1 dyoung #include <netinet/tcp_seq.h>
120 1.1 dyoung #include <netinet/tcp_timer.h>
121 1.1 dyoung #include <netinet/tcp_var.h>
122 1.1 dyoung #include <netinet/tcp_private.h>
123 1.1 dyoung #include <netinet/tcpip.h>
124 1.1 dyoung
125 1.1 dyoung #include <netinet/tcp_vtw.h>
126 1.1 dyoung
127 1.11 matt __KERNEL_RCSID(0, "$NetBSD: tcp_vtw.c,v 1.11 2014/09/05 06:03:51 matt Exp $");
128 1.1 dyoung
129 1.1 dyoung #define db_trace(__a, __b) do { } while (/*CONSTCOND*/0)
130 1.1 dyoung
131 1.1 dyoung static void vtw_debug_init(void);
132 1.1 dyoung
133 1.1 dyoung fatp_ctl_t fat_tcpv4;
134 1.1 dyoung fatp_ctl_t fat_tcpv6;
135 1.1 dyoung vtw_ctl_t vtw_tcpv4[VTW_NCLASS];
136 1.1 dyoung vtw_ctl_t vtw_tcpv6[VTW_NCLASS];
137 1.1 dyoung vtw_stats_t vtw_stats;
138 1.1 dyoung
139 1.1 dyoung /* We provide state for the lookup_ports iterator.
140 1.1 dyoung * As currently we are netlock-protected, there is one.
141 1.1 dyoung * If we were finer-grain, we would have one per CPU.
142 1.1 dyoung * I do not want to be in the business of alloc/free.
143 1.1 dyoung * The best alternate would be allocate on the caller's
144 1.1 dyoung * stack, but that would require them to know the struct,
145 1.1 dyoung * or at least the size.
146 1.1 dyoung * See how she goes.
147 1.1 dyoung */
148 1.1 dyoung struct tcp_ports_iterator {
149 1.1 dyoung union {
150 1.1 dyoung struct in_addr v4;
151 1.1 dyoung struct in6_addr v6;
152 1.1 dyoung } addr;
153 1.1 dyoung u_int port;
154 1.1 dyoung
155 1.1 dyoung uint32_t wild : 1;
156 1.1 dyoung
157 1.1 dyoung vtw_ctl_t *ctl;
158 1.1 dyoung fatp_t *fp;
159 1.1 dyoung
160 1.1 dyoung uint16_t slot_idx;
161 1.1 dyoung uint16_t ctl_idx;
162 1.1 dyoung };
163 1.1 dyoung
164 1.1 dyoung static struct tcp_ports_iterator tcp_ports_iterator_v4;
165 1.1 dyoung static struct tcp_ports_iterator tcp_ports_iterator_v6;
166 1.1 dyoung
167 1.1 dyoung static int vtw_age(vtw_ctl_t *, struct timeval *);
168 1.1 dyoung
169 1.1 dyoung /*!\brief allocate a fat pointer from a collection.
170 1.1 dyoung */
171 1.1 dyoung static fatp_t *
172 1.1 dyoung fatp_alloc(fatp_ctl_t *fat)
173 1.1 dyoung {
174 1.1 dyoung fatp_t *fp = 0;
175 1.1 dyoung
176 1.1 dyoung if (fat->nfree) {
177 1.1 dyoung fp = fat->free;
178 1.1 dyoung if (fp) {
179 1.1 dyoung fat->free = fatp_next(fat, fp);
180 1.1 dyoung --fat->nfree;
181 1.1 dyoung ++fat->nalloc;
182 1.1 dyoung fp->nxt = 0;
183 1.1 dyoung
184 1.1 dyoung KASSERT(!fp->inuse);
185 1.1 dyoung }
186 1.1 dyoung }
187 1.1 dyoung
188 1.1 dyoung return fp;
189 1.1 dyoung }
190 1.1 dyoung
191 1.1 dyoung /*!\brief free a fat pointer.
192 1.1 dyoung */
193 1.1 dyoung static void
194 1.1 dyoung fatp_free(fatp_ctl_t *fat, fatp_t *fp)
195 1.1 dyoung {
196 1.1 dyoung if (fp) {
197 1.1 dyoung KASSERT(!fp->inuse);
198 1.1 dyoung KASSERT(!fp->nxt);
199 1.1 dyoung
200 1.1 dyoung fp->nxt = fatp_index(fat, fat->free);
201 1.1 dyoung fat->free = fp;
202 1.1 dyoung
203 1.1 dyoung ++fat->nfree;
204 1.1 dyoung --fat->nalloc;
205 1.1 dyoung }
206 1.1 dyoung }
207 1.1 dyoung
208 1.1 dyoung /*!\brief initialise a collection of fat pointers.
209 1.1 dyoung *
210 1.1 dyoung *\param n # hash buckets
211 1.1 dyoung *\param m total # fat pointers to allocate
212 1.1 dyoung *
213 1.1 dyoung * We allocate 2x as much, as we have two hashes: full and lport only.
214 1.1 dyoung */
215 1.1 dyoung static void
216 1.6 dyoung fatp_init(fatp_ctl_t *fat, uint32_t n, uint32_t m,
217 1.6 dyoung fatp_t *fat_base, fatp_t **fat_hash)
218 1.1 dyoung {
219 1.1 dyoung fatp_t *fp;
220 1.1 dyoung
221 1.1 dyoung KASSERT(n <= FATP_MAX / 2);
222 1.1 dyoung
223 1.6 dyoung fat->hash = fat_hash;
224 1.6 dyoung fat->base = fat_base;
225 1.1 dyoung
226 1.1 dyoung fat->port = &fat->hash[m];
227 1.1 dyoung
228 1.1 dyoung fat->mask = m - 1; // ASSERT is power of 2 (m)
229 1.1 dyoung fat->lim = fat->base + 2*n - 1;
230 1.1 dyoung fat->nfree = 0;
231 1.1 dyoung fat->nalloc = 2*n;
232 1.1 dyoung
233 1.1 dyoung /* Initialise the free list.
234 1.1 dyoung */
235 1.1 dyoung for (fp = fat->lim; fp >= fat->base; --fp) {
236 1.1 dyoung fatp_free(fat, fp);
237 1.1 dyoung }
238 1.1 dyoung }
239 1.1 dyoung
240 1.1 dyoung /*
241 1.1 dyoung * The `xtra' is XORed into the tag stored.
242 1.1 dyoung */
243 1.1 dyoung static uint32_t fatp_xtra[] = {
244 1.1 dyoung 0x11111111,0x22222222,0x33333333,0x44444444,
245 1.1 dyoung 0x55555555,0x66666666,0x77777777,0x88888888,
246 1.1 dyoung 0x12121212,0x21212121,0x34343434,0x43434343,
247 1.1 dyoung 0x56565656,0x65656565,0x78787878,0x87878787,
248 1.1 dyoung 0x11221122,0x22112211,0x33443344,0x44334433,
249 1.1 dyoung 0x55665566,0x66556655,0x77887788,0x88778877,
250 1.1 dyoung 0x11112222,0x22221111,0x33334444,0x44443333,
251 1.1 dyoung 0x55556666,0x66665555,0x77778888,0x88887777,
252 1.1 dyoung };
253 1.1 dyoung
254 1.1 dyoung /*!\brief turn a {fatp_t*,slot} into an integral key.
255 1.1 dyoung *
256 1.1 dyoung * The key can be used to obtain the fatp_t, and the slot,
257 1.1 dyoung * as it directly encodes them.
258 1.1 dyoung */
259 1.1 dyoung static inline uint32_t
260 1.1 dyoung fatp_key(fatp_ctl_t *fat, fatp_t *fp, uint32_t slot)
261 1.1 dyoung {
262 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
263 1.1 dyoung CACHE_LINE_SIZE == 64 ||
264 1.1 dyoung CACHE_LINE_SIZE == 128);
265 1.1 dyoung
266 1.1 dyoung switch (fatp_ntags()) {
267 1.1 dyoung case 7:
268 1.1 dyoung return (fatp_index(fat, fp) << 3) | slot;
269 1.1 dyoung case 15:
270 1.1 dyoung return (fatp_index(fat, fp) << 4) | slot;
271 1.1 dyoung case 31:
272 1.1 dyoung return (fatp_index(fat, fp) << 5) | slot;
273 1.1 dyoung default:
274 1.1 dyoung KASSERT(0 && "no support, for no good reason");
275 1.1 dyoung return ~0;
276 1.1 dyoung }
277 1.1 dyoung }
278 1.1 dyoung
279 1.1 dyoung static inline uint32_t
280 1.1 dyoung fatp_slot_from_key(fatp_ctl_t *fat, uint32_t key)
281 1.1 dyoung {
282 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
283 1.1 dyoung CACHE_LINE_SIZE == 64 ||
284 1.1 dyoung CACHE_LINE_SIZE == 128);
285 1.1 dyoung
286 1.1 dyoung switch (fatp_ntags()) {
287 1.1 dyoung case 7:
288 1.1 dyoung return key & 7;
289 1.1 dyoung case 15:
290 1.1 dyoung return key & 15;
291 1.1 dyoung case 31:
292 1.1 dyoung return key & 31;
293 1.1 dyoung default:
294 1.1 dyoung KASSERT(0 && "no support, for no good reason");
295 1.1 dyoung return ~0;
296 1.1 dyoung }
297 1.1 dyoung }
298 1.1 dyoung
299 1.1 dyoung static inline fatp_t *
300 1.1 dyoung fatp_from_key(fatp_ctl_t *fat, uint32_t key)
301 1.1 dyoung {
302 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
303 1.1 dyoung CACHE_LINE_SIZE == 64 ||
304 1.1 dyoung CACHE_LINE_SIZE == 128);
305 1.1 dyoung
306 1.1 dyoung switch (fatp_ntags()) {
307 1.1 dyoung case 7:
308 1.1 dyoung key >>= 3;
309 1.1 dyoung break;
310 1.1 dyoung case 15:
311 1.1 dyoung key >>= 4;
312 1.1 dyoung break;
313 1.1 dyoung case 31:
314 1.1 dyoung key >>= 5;
315 1.1 dyoung break;
316 1.1 dyoung default:
317 1.1 dyoung KASSERT(0 && "no support, for no good reason");
318 1.1 dyoung return 0;
319 1.1 dyoung }
320 1.1 dyoung
321 1.1 dyoung return key ? fat->base + key - 1 : 0;
322 1.1 dyoung }
323 1.1 dyoung
324 1.1 dyoung static inline uint32_t
325 1.1 dyoung idx_encode(vtw_ctl_t *ctl, uint32_t idx)
326 1.1 dyoung {
327 1.1 dyoung return (idx << ctl->idx_bits) | idx;
328 1.1 dyoung }
329 1.1 dyoung
330 1.1 dyoung static inline uint32_t
331 1.1 dyoung idx_decode(vtw_ctl_t *ctl, uint32_t bits)
332 1.1 dyoung {
333 1.1 dyoung uint32_t idx = bits & ctl->idx_mask;
334 1.1 dyoung
335 1.1 dyoung if (idx_encode(ctl, idx) == bits)
336 1.1 dyoung return idx;
337 1.1 dyoung else
338 1.1 dyoung return ~0;
339 1.1 dyoung }
340 1.1 dyoung
341 1.1 dyoung /*!\brief insert index into fatp hash
342 1.1 dyoung *
343 1.1 dyoung *\param idx - index of element being placed in hash chain
344 1.1 dyoung *\param tag - 32-bit tag identifier
345 1.1 dyoung *
346 1.1 dyoung *\returns
347 1.1 dyoung * value which can be used to locate entry.
348 1.1 dyoung *
349 1.1 dyoung *\note
350 1.1 dyoung * we rely on the fact that there are unused high bits in the index
351 1.1 dyoung * for verification purposes on lookup.
352 1.1 dyoung */
353 1.1 dyoung
354 1.1 dyoung static inline uint32_t
355 1.1 dyoung fatp_vtw_inshash(fatp_ctl_t *fat, uint32_t idx, uint32_t tag, int which,
356 1.1 dyoung void *dbg)
357 1.1 dyoung {
358 1.1 dyoung fatp_t *fp;
359 1.1 dyoung fatp_t **hash = (which ? fat->port : fat->hash);
360 1.1 dyoung int i;
361 1.1 dyoung
362 1.1 dyoung fp = hash[tag & fat->mask];
363 1.1 dyoung
364 1.1 dyoung while (!fp || fatp_full(fp)) {
365 1.1 dyoung fatp_t *fq;
366 1.1 dyoung
367 1.1 dyoung /* All entries are inuse at the top level.
368 1.1 dyoung * We allocate a spare, and push the top level
369 1.1 dyoung * down one. All entries in the fp we push down
370 1.1 dyoung * (think of a tape worm here) will be expelled sooner than
371 1.1 dyoung * any entries added subsequently to this hash bucket.
372 1.1 dyoung * This is a property of the time waits we are exploiting.
373 1.1 dyoung */
374 1.1 dyoung
375 1.1 dyoung fq = fatp_alloc(fat);
376 1.1 dyoung if (!fq) {
377 1.1 dyoung vtw_age(fat->vtw, 0);
378 1.1 dyoung fp = hash[tag & fat->mask];
379 1.1 dyoung continue;
380 1.1 dyoung }
381 1.1 dyoung
382 1.1 dyoung fq->inuse = 0;
383 1.1 dyoung fq->nxt = fatp_index(fat, fp);
384 1.1 dyoung
385 1.1 dyoung hash[tag & fat->mask] = fq;
386 1.1 dyoung
387 1.1 dyoung fp = fq;
388 1.1 dyoung }
389 1.1 dyoung
390 1.1 dyoung KASSERT(!fatp_full(fp));
391 1.1 dyoung
392 1.1 dyoung /* Fill highest index first. Lookup is lowest first.
393 1.1 dyoung */
394 1.1 dyoung for (i = fatp_ntags(); --i >= 0; ) {
395 1.1 dyoung if (!((1 << i) & fp->inuse)) {
396 1.1 dyoung break;
397 1.1 dyoung }
398 1.1 dyoung }
399 1.1 dyoung
400 1.1 dyoung fp->inuse |= 1 << i;
401 1.1 dyoung fp->tag[i] = tag ^ idx_encode(fat->vtw, idx) ^ fatp_xtra[i];
402 1.1 dyoung
403 1.1 dyoung db_trace(KTR_VTW
404 1.1 dyoung , (fp, "fat: inuse %5.5x tag[%x] %8.8x"
405 1.1 dyoung , fp->inuse
406 1.1 dyoung , i, fp->tag[i]));
407 1.1 dyoung
408 1.1 dyoung return fatp_key(fat, fp, i);
409 1.1 dyoung }
410 1.1 dyoung
411 1.1 dyoung static inline int
412 1.1 dyoung vtw_alive(const vtw_t *vtw)
413 1.1 dyoung {
414 1.1 dyoung return vtw->hashed && vtw->expire.tv_sec;
415 1.1 dyoung }
416 1.1 dyoung
417 1.1 dyoung static inline uint32_t
418 1.1 dyoung vtw_index_v4(vtw_ctl_t *ctl, vtw_v4_t *v4)
419 1.1 dyoung {
420 1.1 dyoung if (ctl->base.v4 <= v4 && v4 <= ctl->lim.v4)
421 1.1 dyoung return v4 - ctl->base.v4;
422 1.1 dyoung
423 1.1 dyoung KASSERT(0 && "vtw out of bounds");
424 1.1 dyoung
425 1.1 dyoung return ~0;
426 1.1 dyoung }
427 1.1 dyoung
428 1.1 dyoung static inline uint32_t
429 1.1 dyoung vtw_index_v6(vtw_ctl_t *ctl, vtw_v6_t *v6)
430 1.1 dyoung {
431 1.1 dyoung if (ctl->base.v6 <= v6 && v6 <= ctl->lim.v6)
432 1.1 dyoung return v6 - ctl->base.v6;
433 1.1 dyoung
434 1.1 dyoung KASSERT(0 && "vtw out of bounds");
435 1.1 dyoung
436 1.1 dyoung return ~0;
437 1.1 dyoung }
438 1.1 dyoung
439 1.1 dyoung static inline uint32_t
440 1.1 dyoung vtw_index(vtw_ctl_t *ctl, vtw_t *vtw)
441 1.1 dyoung {
442 1.1 dyoung if (ctl->clidx)
443 1.1 dyoung ctl = ctl->ctl;
444 1.1 dyoung
445 1.1 dyoung if (ctl->is_v4)
446 1.1 dyoung return vtw_index_v4(ctl, (vtw_v4_t *)vtw);
447 1.1 dyoung
448 1.1 dyoung if (ctl->is_v6)
449 1.1 dyoung return vtw_index_v6(ctl, (vtw_v6_t *)vtw);
450 1.1 dyoung
451 1.1 dyoung KASSERT(0 && "neither 4 nor 6. most curious.");
452 1.1 dyoung
453 1.1 dyoung return ~0;
454 1.1 dyoung }
455 1.1 dyoung
456 1.1 dyoung static inline vtw_t *
457 1.1 dyoung vtw_from_index(vtw_ctl_t *ctl, uint32_t idx)
458 1.1 dyoung {
459 1.1 dyoung if (ctl->clidx)
460 1.1 dyoung ctl = ctl->ctl;
461 1.1 dyoung
462 1.1 dyoung /* See if the index looks like it might be an index.
463 1.1 dyoung * Bits on outside of the valid index bits is a give away.
464 1.1 dyoung */
465 1.1 dyoung idx = idx_decode(ctl, idx);
466 1.1 dyoung
467 1.1 dyoung if (idx == ~0) {
468 1.1 dyoung return 0;
469 1.1 dyoung } else if (ctl->is_v4) {
470 1.1 dyoung vtw_v4_t *vtw = ctl->base.v4 + idx;
471 1.1 dyoung
472 1.1 dyoung return (ctl->base.v4 <= vtw && vtw <= ctl->lim.v4)
473 1.1 dyoung ? &vtw->common : 0;
474 1.1 dyoung } else if (ctl->is_v6) {
475 1.1 dyoung vtw_v6_t *vtw = ctl->base.v6 + idx;
476 1.1 dyoung
477 1.1 dyoung return (ctl->base.v6 <= vtw && vtw <= ctl->lim.v6)
478 1.1 dyoung ? &vtw->common : 0;
479 1.1 dyoung } else {
480 1.1 dyoung KASSERT(0 && "badness");
481 1.1 dyoung return 0;
482 1.1 dyoung }
483 1.1 dyoung }
484 1.1 dyoung
485 1.1 dyoung /*!\brief return the next vtw after this one.
486 1.1 dyoung *
487 1.1 dyoung * Due to the differing sizes of the entries in differing
488 1.1 dyoung * arenas, we have to ensure we ++ the correct pointer type.
489 1.1 dyoung *
490 1.1 dyoung * Also handles wrap.
491 1.1 dyoung */
492 1.1 dyoung static inline vtw_t *
493 1.1 dyoung vtw_next(vtw_ctl_t *ctl, vtw_t *vtw)
494 1.1 dyoung {
495 1.1 dyoung if (ctl->is_v4) {
496 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
497 1.1 dyoung
498 1.1 dyoung vtw = &(++v4)->common;
499 1.1 dyoung } else {
500 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
501 1.1 dyoung
502 1.1 dyoung vtw = &(++v6)->common;
503 1.1 dyoung }
504 1.1 dyoung
505 1.1 dyoung if (vtw > ctl->lim.v)
506 1.1 dyoung vtw = ctl->base.v;
507 1.1 dyoung
508 1.1 dyoung return vtw;
509 1.1 dyoung }
510 1.1 dyoung
511 1.1 dyoung /*!\brief remove entry from FATP hash chains
512 1.1 dyoung */
513 1.1 dyoung static inline void
514 1.1 dyoung vtw_unhash(vtw_ctl_t *ctl, vtw_t *vtw)
515 1.1 dyoung {
516 1.1 dyoung fatp_ctl_t *fat = ctl->fat;
517 1.1 dyoung fatp_t *fp;
518 1.1 dyoung uint32_t key = vtw->key;
519 1.1 dyoung uint32_t tag, slot, idx;
520 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
521 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
522 1.1 dyoung
523 1.1 dyoung if (!vtw->hashed) {
524 1.1 dyoung KASSERT(0 && "unhashed");
525 1.1 dyoung return;
526 1.1 dyoung }
527 1.1 dyoung
528 1.1 dyoung if (fat->vtw->is_v4) {
529 1.1 dyoung tag = v4_tag(v4->faddr, v4->fport, v4->laddr, v4->lport);
530 1.1 dyoung } else if (fat->vtw->is_v6) {
531 1.1 dyoung tag = v6_tag(&v6->faddr, v6->fport, &v6->laddr, v6->lport);
532 1.1 dyoung } else {
533 1.1 dyoung tag = 0;
534 1.1 dyoung KASSERT(0 && "not reached");
535 1.1 dyoung }
536 1.1 dyoung
537 1.1 dyoung /* Remove from fat->hash[]
538 1.1 dyoung */
539 1.1 dyoung slot = fatp_slot_from_key(fat, key);
540 1.1 dyoung fp = fatp_from_key(fat, key);
541 1.1 dyoung idx = vtw_index(ctl, vtw);
542 1.1 dyoung
543 1.1 dyoung db_trace(KTR_VTW
544 1.1 dyoung , (fp, "fat: del inuse %5.5x slot %x idx %x key %x tag %x"
545 1.1 dyoung , fp->inuse, slot, idx, key, tag));
546 1.1 dyoung
547 1.1 dyoung KASSERT(fp->inuse & (1 << slot));
548 1.1 dyoung KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
549 1.1 dyoung ^ fatp_xtra[slot]));
550 1.1 dyoung
551 1.1 dyoung if ((fp->inuse & (1 << slot))
552 1.1 dyoung && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
553 1.1 dyoung ^ fatp_xtra[slot])) {
554 1.1 dyoung fp->inuse ^= 1 << slot;
555 1.1 dyoung fp->tag[slot] = 0;
556 1.1 dyoung
557 1.1 dyoung /* When we delete entries, we do not compact. This is
558 1.1 dyoung * due to temporality. We add entries, and they
559 1.1 dyoung * (eventually) expire. Older entries will be further
560 1.1 dyoung * down the chain.
561 1.1 dyoung */
562 1.1 dyoung if (!fp->inuse) {
563 1.1 dyoung uint32_t hi = tag & fat->mask;
564 1.1 dyoung fatp_t *fq = 0;
565 1.1 dyoung fatp_t *fr = fat->hash[hi];
566 1.1 dyoung
567 1.1 dyoung while (fr && fr != fp) {
568 1.1 dyoung fr = fatp_next(fat, fq = fr);
569 1.1 dyoung }
570 1.1 dyoung
571 1.1 dyoung if (fr == fp) {
572 1.1 dyoung if (fq) {
573 1.1 dyoung fq->nxt = fp->nxt;
574 1.1 dyoung fp->nxt = 0;
575 1.1 dyoung fatp_free(fat, fp);
576 1.1 dyoung } else {
577 1.1 dyoung KASSERT(fat->hash[hi] == fp);
578 1.1 dyoung
579 1.1 dyoung if (fp->nxt) {
580 1.1 dyoung fat->hash[hi]
581 1.1 dyoung = fatp_next(fat, fp);
582 1.1 dyoung fp->nxt = 0;
583 1.1 dyoung fatp_free(fat, fp);
584 1.1 dyoung } else {
585 1.1 dyoung /* retain for next use.
586 1.1 dyoung */
587 1.1 dyoung ;
588 1.1 dyoung }
589 1.1 dyoung }
590 1.1 dyoung } else {
591 1.1 dyoung fr = fat->hash[hi];
592 1.1 dyoung
593 1.1 dyoung do {
594 1.1 dyoung db_trace(KTR_VTW
595 1.1 dyoung , (fr
596 1.1 dyoung , "fat:*del inuse %5.5x"
597 1.1 dyoung " nxt %x"
598 1.1 dyoung , fr->inuse, fr->nxt));
599 1.1 dyoung
600 1.1 dyoung fr = fatp_next(fat, fq = fr);
601 1.1 dyoung } while (fr && fr != fp);
602 1.1 dyoung
603 1.1 dyoung KASSERT(0 && "oops");
604 1.1 dyoung }
605 1.1 dyoung }
606 1.1 dyoung vtw->key ^= ~0;
607 1.1 dyoung }
608 1.1 dyoung
609 1.1 dyoung if (fat->vtw->is_v4) {
610 1.1 dyoung tag = v4_port_tag(v4->lport);
611 1.1 dyoung } else if (fat->vtw->is_v6) {
612 1.1 dyoung tag = v6_port_tag(v6->lport);
613 1.1 dyoung }
614 1.1 dyoung
615 1.1 dyoung /* Remove from fat->port[]
616 1.1 dyoung */
617 1.1 dyoung key = vtw->port_key;
618 1.1 dyoung slot = fatp_slot_from_key(fat, key);
619 1.1 dyoung fp = fatp_from_key(fat, key);
620 1.1 dyoung idx = vtw_index(ctl, vtw);
621 1.1 dyoung
622 1.1 dyoung db_trace(KTR_VTW
623 1.1 dyoung , (fp, "fatport: del inuse %5.5x"
624 1.1 dyoung " slot %x idx %x key %x tag %x"
625 1.1 dyoung , fp->inuse, slot, idx, key, tag));
626 1.1 dyoung
627 1.1 dyoung KASSERT(fp->inuse & (1 << slot));
628 1.1 dyoung KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
629 1.1 dyoung ^ fatp_xtra[slot]));
630 1.1 dyoung
631 1.1 dyoung if ((fp->inuse & (1 << slot))
632 1.1 dyoung && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
633 1.1 dyoung ^ fatp_xtra[slot])) {
634 1.1 dyoung fp->inuse ^= 1 << slot;
635 1.1 dyoung fp->tag[slot] = 0;
636 1.1 dyoung
637 1.1 dyoung if (!fp->inuse) {
638 1.1 dyoung uint32_t hi = tag & fat->mask;
639 1.1 dyoung fatp_t *fq = 0;
640 1.1 dyoung fatp_t *fr = fat->port[hi];
641 1.1 dyoung
642 1.1 dyoung while (fr && fr != fp) {
643 1.1 dyoung fr = fatp_next(fat, fq = fr);
644 1.1 dyoung }
645 1.1 dyoung
646 1.1 dyoung if (fr == fp) {
647 1.1 dyoung if (fq) {
648 1.1 dyoung fq->nxt = fp->nxt;
649 1.1 dyoung fp->nxt = 0;
650 1.1 dyoung fatp_free(fat, fp);
651 1.1 dyoung } else {
652 1.1 dyoung KASSERT(fat->port[hi] == fp);
653 1.1 dyoung
654 1.1 dyoung if (fp->nxt) {
655 1.1 dyoung fat->port[hi]
656 1.1 dyoung = fatp_next(fat, fp);
657 1.1 dyoung fp->nxt = 0;
658 1.1 dyoung fatp_free(fat, fp);
659 1.1 dyoung } else {
660 1.1 dyoung /* retain for next use.
661 1.1 dyoung */
662 1.1 dyoung ;
663 1.1 dyoung }
664 1.1 dyoung }
665 1.1 dyoung }
666 1.1 dyoung }
667 1.1 dyoung vtw->port_key ^= ~0;
668 1.1 dyoung }
669 1.1 dyoung
670 1.1 dyoung vtw->hashed = 0;
671 1.1 dyoung }
672 1.1 dyoung
673 1.1 dyoung /*!\brief remove entry from hash, possibly free.
674 1.1 dyoung */
675 1.1 dyoung void
676 1.1 dyoung vtw_del(vtw_ctl_t *ctl, vtw_t *vtw)
677 1.1 dyoung {
678 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
679 1.1 dyoung
680 1.1 dyoung if (vtw->hashed) {
681 1.1 dyoung ++vtw_stats.del;
682 1.1 dyoung vtw_unhash(ctl, vtw);
683 1.1 dyoung }
684 1.1 dyoung
685 1.1 dyoung /* We only delete the oldest entry.
686 1.1 dyoung */
687 1.1 dyoung if (vtw != ctl->oldest.v)
688 1.1 dyoung return;
689 1.1 dyoung
690 1.1 dyoung --ctl->nalloc;
691 1.1 dyoung ++ctl->nfree;
692 1.1 dyoung
693 1.1 dyoung vtw->expire.tv_sec = 0;
694 1.1 dyoung vtw->expire.tv_usec = ~0;
695 1.1 dyoung
696 1.1 dyoung if (!ctl->nalloc)
697 1.1 dyoung ctl->oldest.v = 0;
698 1.1 dyoung
699 1.1 dyoung ctl->oldest.v = vtw_next(ctl, vtw);
700 1.1 dyoung }
701 1.1 dyoung
702 1.4 dholland /*!\brief insert vestigial timewait in hash chain
703 1.1 dyoung */
704 1.1 dyoung static void
705 1.1 dyoung vtw_inshash_v4(vtw_ctl_t *ctl, vtw_t *vtw)
706 1.1 dyoung {
707 1.1 dyoung uint32_t idx = vtw_index(ctl, vtw);
708 1.1 dyoung uint32_t tag;
709 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
710 1.1 dyoung
711 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
712 1.1 dyoung KASSERT(!vtw->hashed);
713 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
714 1.1 dyoung
715 1.1 dyoung ++vtw_stats.ins;
716 1.1 dyoung
717 1.1 dyoung tag = v4_tag(v4->faddr, v4->fport,
718 1.1 dyoung v4->laddr, v4->lport);
719 1.1 dyoung
720 1.1 dyoung vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
721 1.1 dyoung
722 1.1 dyoung db_trace(KTR_VTW, (ctl
723 1.1 dyoung , "vtw: ins %8.8x:%4.4x %8.8x:%4.4x"
724 1.1 dyoung " tag %8.8x key %8.8x"
725 1.1 dyoung , v4->faddr, v4->fport
726 1.1 dyoung , v4->laddr, v4->lport
727 1.1 dyoung , tag
728 1.1 dyoung , vtw->key));
729 1.1 dyoung
730 1.1 dyoung tag = v4_port_tag(v4->lport);
731 1.1 dyoung vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
732 1.1 dyoung
733 1.1 dyoung db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
734 1.1 dyoung , v4->lport, v4->lport
735 1.1 dyoung , tag
736 1.1 dyoung , vtw->key));
737 1.1 dyoung
738 1.1 dyoung vtw->hashed = 1;
739 1.1 dyoung }
740 1.1 dyoung
741 1.4 dholland /*!\brief insert vestigial timewait in hash chain
742 1.1 dyoung */
743 1.1 dyoung static void
744 1.1 dyoung vtw_inshash_v6(vtw_ctl_t *ctl, vtw_t *vtw)
745 1.1 dyoung {
746 1.1 dyoung uint32_t idx = vtw_index(ctl, vtw);
747 1.1 dyoung uint32_t tag;
748 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
749 1.1 dyoung
750 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
751 1.1 dyoung KASSERT(!vtw->hashed);
752 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
753 1.1 dyoung
754 1.1 dyoung ++vtw_stats.ins;
755 1.1 dyoung
756 1.1 dyoung tag = v6_tag(&v6->faddr, v6->fport,
757 1.1 dyoung &v6->laddr, v6->lport);
758 1.1 dyoung
759 1.1 dyoung vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
760 1.1 dyoung
761 1.1 dyoung tag = v6_port_tag(v6->lport);
762 1.1 dyoung vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
763 1.1 dyoung
764 1.1 dyoung db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
765 1.1 dyoung , v6->lport, v6->lport
766 1.1 dyoung , tag
767 1.1 dyoung , vtw->key));
768 1.1 dyoung
769 1.1 dyoung vtw->hashed = 1;
770 1.1 dyoung }
771 1.1 dyoung
772 1.1 dyoung static vtw_t *
773 1.1 dyoung vtw_lookup_hash_v4(vtw_ctl_t *ctl, uint32_t faddr, uint16_t fport
774 1.1 dyoung , uint32_t laddr, uint16_t lport
775 1.1 dyoung , int which)
776 1.1 dyoung {
777 1.1 dyoung vtw_v4_t *v4;
778 1.1 dyoung vtw_t *vtw;
779 1.1 dyoung uint32_t tag;
780 1.1 dyoung fatp_t *fp;
781 1.1 dyoung int i;
782 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
783 1.1 dyoung
784 1.1 dyoung if (!ctl || !ctl->fat)
785 1.1 dyoung return 0;
786 1.1 dyoung
787 1.1 dyoung ++vtw_stats.look[which];
788 1.1 dyoung
789 1.1 dyoung if (which) {
790 1.1 dyoung tag = v4_port_tag(lport);
791 1.1 dyoung fp = ctl->fat->port[tag & ctl->fat->mask];
792 1.1 dyoung } else {
793 1.1 dyoung tag = v4_tag(faddr, fport, laddr, lport);
794 1.1 dyoung fp = ctl->fat->hash[tag & ctl->fat->mask];
795 1.1 dyoung }
796 1.1 dyoung
797 1.1 dyoung while (fp && fp->inuse) {
798 1.1 dyoung uint32_t inuse = fp->inuse;
799 1.1 dyoung
800 1.1 dyoung ++fatps;
801 1.1 dyoung
802 1.1 dyoung for (i = 0; inuse && i < fatp_ntags(); ++i) {
803 1.1 dyoung uint32_t idx;
804 1.1 dyoung
805 1.1 dyoung if (!(inuse & (1 << i)))
806 1.1 dyoung continue;
807 1.1 dyoung
808 1.1 dyoung inuse ^= 1 << i;
809 1.1 dyoung
810 1.1 dyoung ++probes;
811 1.1 dyoung ++vtw_stats.probe[which];
812 1.1 dyoung
813 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
814 1.1 dyoung vtw = vtw_from_index(ctl, idx);
815 1.1 dyoung
816 1.1 dyoung if (!vtw) {
817 1.1 dyoung /* Hopefully fast path.
818 1.1 dyoung */
819 1.1 dyoung db_trace(KTR_VTW
820 1.1 dyoung , (fp, "vtw: fast %A:%P %A:%P"
821 1.1 dyoung " idx %x tag %x"
822 1.1 dyoung , faddr, fport
823 1.1 dyoung , laddr, lport
824 1.1 dyoung , idx, tag));
825 1.1 dyoung continue;
826 1.1 dyoung }
827 1.1 dyoung
828 1.1 dyoung v4 = (void*)vtw;
829 1.1 dyoung
830 1.1 dyoung /* The de-referencing of vtw is what we want to avoid.
831 1.1 dyoung * Losing.
832 1.1 dyoung */
833 1.1 dyoung if (vtw_alive(vtw)
834 1.1 dyoung && ((which ? vtw->port_key : vtw->key)
835 1.1 dyoung == fatp_key(ctl->fat, fp, i))
836 1.1 dyoung && (which
837 1.1 dyoung || (v4->faddr == faddr && v4->laddr == laddr
838 1.1 dyoung && v4->fport == fport))
839 1.1 dyoung && v4->lport == lport) {
840 1.1 dyoung ++vtw_stats.hit[which];
841 1.1 dyoung
842 1.1 dyoung db_trace(KTR_VTW
843 1.1 dyoung , (fp, "vtw: hit %8.8x:%4.4x"
844 1.1 dyoung " %8.8x:%4.4x idx %x key %x"
845 1.1 dyoung , faddr, fport
846 1.1 dyoung , laddr, lport
847 1.1 dyoung , idx_decode(ctl, idx), vtw->key));
848 1.1 dyoung
849 1.1 dyoung KASSERT(vtw->hashed);
850 1.1 dyoung
851 1.1 dyoung goto out;
852 1.1 dyoung }
853 1.1 dyoung ++vtw_stats.losing[which];
854 1.1 dyoung ++losings;
855 1.1 dyoung
856 1.1 dyoung if (vtw_alive(vtw)) {
857 1.1 dyoung db_trace(KTR_VTW
858 1.1 dyoung , (fp, "vtw:!mis %8.8x:%4.4x"
859 1.1 dyoung " %8.8x:%4.4x key %x tag %x"
860 1.1 dyoung , faddr, fport
861 1.1 dyoung , laddr, lport
862 1.1 dyoung , fatp_key(ctl->fat, fp, i)
863 1.1 dyoung , v4_tag(faddr, fport
864 1.1 dyoung , laddr, lport)));
865 1.1 dyoung db_trace(KTR_VTW
866 1.1 dyoung , (vtw, "vtw:!mis %8.8x:%4.4x"
867 1.1 dyoung " %8.8x:%4.4x key %x tag %x"
868 1.1 dyoung , v4->faddr, v4->fport
869 1.1 dyoung , v4->laddr, v4->lport
870 1.1 dyoung , vtw->key
871 1.1 dyoung , v4_tag(v4->faddr, v4->fport
872 1.1 dyoung , v4->laddr, v4->lport)));
873 1.1 dyoung
874 1.1 dyoung if (vtw->key == fatp_key(ctl->fat, fp, i)) {
875 1.1 dyoung db_trace(KTR_VTW
876 1.1 dyoung , (vtw, "vtw:!mis %8.8x:%4.4x"
877 1.1 dyoung " %8.8x:%4.4x key %x"
878 1.1 dyoung " which %x"
879 1.1 dyoung , v4->faddr, v4->fport
880 1.1 dyoung , v4->laddr, v4->lport
881 1.1 dyoung , vtw->key
882 1.1 dyoung , which));
883 1.1 dyoung
884 1.1 dyoung } else {
885 1.1 dyoung db_trace(KTR_VTW
886 1.1 dyoung , (vtw
887 1.1 dyoung , "vtw:!mis"
888 1.1 dyoung " key %8.8x != %8.8x"
889 1.1 dyoung " idx %x i %x which %x"
890 1.1 dyoung , vtw->key
891 1.1 dyoung , fatp_key(ctl->fat, fp, i)
892 1.1 dyoung , idx_decode(ctl, idx)
893 1.1 dyoung , i
894 1.1 dyoung , which));
895 1.1 dyoung }
896 1.1 dyoung } else {
897 1.1 dyoung db_trace(KTR_VTW
898 1.1 dyoung , (fp
899 1.1 dyoung , "vtw:!mis free entry"
900 1.1 dyoung " idx %x vtw %p which %x"
901 1.1 dyoung , idx_decode(ctl, idx)
902 1.1 dyoung , vtw, which));
903 1.1 dyoung }
904 1.1 dyoung }
905 1.1 dyoung
906 1.1 dyoung if (fp->nxt) {
907 1.1 dyoung fp = fatp_next(ctl->fat, fp);
908 1.1 dyoung } else {
909 1.1 dyoung break;
910 1.1 dyoung }
911 1.1 dyoung }
912 1.1 dyoung ++vtw_stats.miss[which];
913 1.1 dyoung vtw = 0;
914 1.1 dyoung out:
915 1.1 dyoung if (fatps > vtw_stats.max_chain[which])
916 1.1 dyoung vtw_stats.max_chain[which] = fatps;
917 1.1 dyoung if (probes > vtw_stats.max_probe[which])
918 1.1 dyoung vtw_stats.max_probe[which] = probes;
919 1.1 dyoung if (losings > vtw_stats.max_loss[which])
920 1.1 dyoung vtw_stats.max_loss[which] = losings;
921 1.1 dyoung
922 1.1 dyoung return vtw;
923 1.1 dyoung }
924 1.1 dyoung
925 1.1 dyoung static vtw_t *
926 1.1 dyoung vtw_lookup_hash_v6(vtw_ctl_t *ctl, const struct in6_addr *faddr, uint16_t fport
927 1.1 dyoung , const struct in6_addr *laddr, uint16_t lport
928 1.1 dyoung , int which)
929 1.1 dyoung {
930 1.1 dyoung vtw_v6_t *v6;
931 1.1 dyoung vtw_t *vtw;
932 1.1 dyoung uint32_t tag;
933 1.1 dyoung fatp_t *fp;
934 1.1 dyoung int i;
935 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
936 1.1 dyoung
937 1.1 dyoung ++vtw_stats.look[which];
938 1.1 dyoung
939 1.1 dyoung if (!ctl || !ctl->fat)
940 1.1 dyoung return 0;
941 1.1 dyoung
942 1.1 dyoung if (which) {
943 1.1 dyoung tag = v6_port_tag(lport);
944 1.1 dyoung fp = ctl->fat->port[tag & ctl->fat->mask];
945 1.1 dyoung } else {
946 1.1 dyoung tag = v6_tag(faddr, fport, laddr, lport);
947 1.1 dyoung fp = ctl->fat->hash[tag & ctl->fat->mask];
948 1.1 dyoung }
949 1.1 dyoung
950 1.1 dyoung while (fp && fp->inuse) {
951 1.1 dyoung uint32_t inuse = fp->inuse;
952 1.1 dyoung
953 1.1 dyoung ++fatps;
954 1.1 dyoung
955 1.1 dyoung for (i = 0; inuse && i < fatp_ntags(); ++i) {
956 1.1 dyoung uint32_t idx;
957 1.1 dyoung
958 1.1 dyoung if (!(inuse & (1 << i)))
959 1.1 dyoung continue;
960 1.1 dyoung
961 1.1 dyoung inuse ^= 1 << i;
962 1.1 dyoung
963 1.1 dyoung ++probes;
964 1.1 dyoung ++vtw_stats.probe[which];
965 1.1 dyoung
966 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
967 1.1 dyoung vtw = vtw_from_index(ctl, idx);
968 1.1 dyoung
969 1.1 dyoung db_trace(KTR_VTW
970 1.1 dyoung , (fp, "probe: %2d %6A:%4.4x %6A:%4.4x idx %x"
971 1.1 dyoung , i
972 1.1 dyoung , db_store(faddr, sizeof (*faddr)), fport
973 1.1 dyoung , db_store(laddr, sizeof (*laddr)), lport
974 1.1 dyoung , idx_decode(ctl, idx)));
975 1.1 dyoung
976 1.1 dyoung if (!vtw) {
977 1.1 dyoung /* Hopefully fast path.
978 1.1 dyoung */
979 1.1 dyoung continue;
980 1.1 dyoung }
981 1.1 dyoung
982 1.1 dyoung v6 = (void*)vtw;
983 1.1 dyoung
984 1.1 dyoung if (vtw_alive(vtw)
985 1.1 dyoung && ((which ? vtw->port_key : vtw->key)
986 1.1 dyoung == fatp_key(ctl->fat, fp, i))
987 1.1 dyoung && v6->lport == lport
988 1.1 dyoung && (which
989 1.1 dyoung || (v6->fport == fport
990 1.1 dyoung && !bcmp(&v6->faddr, faddr, sizeof (*faddr))
991 1.1 dyoung && !bcmp(&v6->laddr, laddr
992 1.1 dyoung , sizeof (*laddr))))) {
993 1.1 dyoung ++vtw_stats.hit[which];
994 1.1 dyoung
995 1.1 dyoung KASSERT(vtw->hashed);
996 1.1 dyoung goto out;
997 1.1 dyoung } else {
998 1.1 dyoung ++vtw_stats.losing[which];
999 1.1 dyoung ++losings;
1000 1.1 dyoung }
1001 1.1 dyoung }
1002 1.1 dyoung
1003 1.1 dyoung if (fp->nxt) {
1004 1.1 dyoung fp = fatp_next(ctl->fat, fp);
1005 1.1 dyoung } else {
1006 1.1 dyoung break;
1007 1.1 dyoung }
1008 1.1 dyoung }
1009 1.1 dyoung ++vtw_stats.miss[which];
1010 1.1 dyoung vtw = 0;
1011 1.1 dyoung out:
1012 1.1 dyoung if (fatps > vtw_stats.max_chain[which])
1013 1.1 dyoung vtw_stats.max_chain[which] = fatps;
1014 1.1 dyoung if (probes > vtw_stats.max_probe[which])
1015 1.1 dyoung vtw_stats.max_probe[which] = probes;
1016 1.1 dyoung if (losings > vtw_stats.max_loss[which])
1017 1.1 dyoung vtw_stats.max_loss[which] = losings;
1018 1.1 dyoung
1019 1.1 dyoung return vtw;
1020 1.1 dyoung }
1021 1.1 dyoung
1022 1.1 dyoung /*!\brief port iterator
1023 1.1 dyoung */
1024 1.1 dyoung static vtw_t *
1025 1.1 dyoung vtw_next_port_v4(struct tcp_ports_iterator *it)
1026 1.1 dyoung {
1027 1.1 dyoung vtw_ctl_t *ctl = it->ctl;
1028 1.1 dyoung vtw_v4_t *v4;
1029 1.1 dyoung vtw_t *vtw;
1030 1.1 dyoung uint32_t tag;
1031 1.1 dyoung uint16_t lport = it->port;
1032 1.1 dyoung fatp_t *fp;
1033 1.1 dyoung int i;
1034 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
1035 1.1 dyoung
1036 1.1 dyoung tag = v4_port_tag(lport);
1037 1.1 dyoung if (!it->fp) {
1038 1.1 dyoung it->fp = ctl->fat->port[tag & ctl->fat->mask];
1039 1.1 dyoung it->slot_idx = 0;
1040 1.1 dyoung }
1041 1.1 dyoung fp = it->fp;
1042 1.1 dyoung
1043 1.1 dyoung while (fp) {
1044 1.1 dyoung uint32_t inuse = fp->inuse;
1045 1.1 dyoung
1046 1.1 dyoung ++fatps;
1047 1.1 dyoung
1048 1.1 dyoung for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
1049 1.1 dyoung uint32_t idx;
1050 1.1 dyoung
1051 1.1 dyoung if (!(inuse & (1 << i)))
1052 1.1 dyoung continue;
1053 1.1 dyoung
1054 1.1 dyoung inuse &= ~0 << i;
1055 1.1 dyoung
1056 1.1 dyoung if (i < it->slot_idx)
1057 1.1 dyoung continue;
1058 1.1 dyoung
1059 1.1 dyoung ++vtw_stats.probe[1];
1060 1.1 dyoung ++probes;
1061 1.1 dyoung
1062 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
1063 1.1 dyoung vtw = vtw_from_index(ctl, idx);
1064 1.1 dyoung
1065 1.1 dyoung if (!vtw) {
1066 1.1 dyoung /* Hopefully fast path.
1067 1.1 dyoung */
1068 1.1 dyoung continue;
1069 1.1 dyoung }
1070 1.1 dyoung
1071 1.1 dyoung v4 = (void*)vtw;
1072 1.1 dyoung
1073 1.1 dyoung if (vtw_alive(vtw)
1074 1.1 dyoung && vtw->port_key == fatp_key(ctl->fat, fp, i)
1075 1.1 dyoung && v4->lport == lport) {
1076 1.1 dyoung ++vtw_stats.hit[1];
1077 1.1 dyoung
1078 1.1 dyoung it->slot_idx = i + 1;
1079 1.1 dyoung
1080 1.1 dyoung goto out;
1081 1.1 dyoung } else if (vtw_alive(vtw)) {
1082 1.1 dyoung ++vtw_stats.losing[1];
1083 1.1 dyoung ++losings;
1084 1.1 dyoung
1085 1.1 dyoung db_trace(KTR_VTW
1086 1.1 dyoung , (vtw, "vtw:!mis"
1087 1.1 dyoung " port %8.8x:%4.4x %8.8x:%4.4x"
1088 1.1 dyoung " key %x port %x"
1089 1.1 dyoung , v4->faddr, v4->fport
1090 1.1 dyoung , v4->laddr, v4->lport
1091 1.1 dyoung , vtw->key
1092 1.1 dyoung , lport));
1093 1.1 dyoung } else {
1094 1.1 dyoung /* Really losing here. We are coming
1095 1.1 dyoung * up with references to free entries.
1096 1.1 dyoung * Might find it better to use
1097 1.1 dyoung * traditional, or need another
1098 1.1 dyoung * add-hockery. The other add-hockery
1099 1.1 dyoung * would be to pul more into into the
1100 1.1 dyoung * cache line to reject the false
1101 1.1 dyoung * hits.
1102 1.1 dyoung */
1103 1.1 dyoung ++vtw_stats.losing[1];
1104 1.1 dyoung ++losings;
1105 1.1 dyoung db_trace(KTR_VTW
1106 1.1 dyoung , (fp, "vtw:!mis port %x"
1107 1.1 dyoung " - free entry idx %x vtw %p"
1108 1.1 dyoung , lport
1109 1.1 dyoung , idx_decode(ctl, idx)
1110 1.1 dyoung , vtw));
1111 1.1 dyoung }
1112 1.1 dyoung }
1113 1.1 dyoung
1114 1.1 dyoung if (fp->nxt) {
1115 1.1 dyoung it->fp = fp = fatp_next(ctl->fat, fp);
1116 1.1 dyoung it->slot_idx = 0;
1117 1.1 dyoung } else {
1118 1.1 dyoung it->fp = 0;
1119 1.1 dyoung break;
1120 1.1 dyoung }
1121 1.1 dyoung }
1122 1.1 dyoung ++vtw_stats.miss[1];
1123 1.1 dyoung
1124 1.1 dyoung vtw = 0;
1125 1.1 dyoung out:
1126 1.1 dyoung if (fatps > vtw_stats.max_chain[1])
1127 1.1 dyoung vtw_stats.max_chain[1] = fatps;
1128 1.1 dyoung if (probes > vtw_stats.max_probe[1])
1129 1.1 dyoung vtw_stats.max_probe[1] = probes;
1130 1.1 dyoung if (losings > vtw_stats.max_loss[1])
1131 1.1 dyoung vtw_stats.max_loss[1] = losings;
1132 1.1 dyoung
1133 1.1 dyoung return vtw;
1134 1.1 dyoung }
1135 1.1 dyoung
1136 1.1 dyoung /*!\brief port iterator
1137 1.1 dyoung */
1138 1.1 dyoung static vtw_t *
1139 1.1 dyoung vtw_next_port_v6(struct tcp_ports_iterator *it)
1140 1.1 dyoung {
1141 1.1 dyoung vtw_ctl_t *ctl = it->ctl;
1142 1.1 dyoung vtw_v6_t *v6;
1143 1.1 dyoung vtw_t *vtw;
1144 1.1 dyoung uint32_t tag;
1145 1.1 dyoung uint16_t lport = it->port;
1146 1.1 dyoung fatp_t *fp;
1147 1.1 dyoung int i;
1148 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
1149 1.1 dyoung
1150 1.1 dyoung tag = v6_port_tag(lport);
1151 1.1 dyoung if (!it->fp) {
1152 1.1 dyoung it->fp = ctl->fat->port[tag & ctl->fat->mask];
1153 1.1 dyoung it->slot_idx = 0;
1154 1.1 dyoung }
1155 1.1 dyoung fp = it->fp;
1156 1.1 dyoung
1157 1.1 dyoung while (fp) {
1158 1.1 dyoung uint32_t inuse = fp->inuse;
1159 1.1 dyoung
1160 1.1 dyoung ++fatps;
1161 1.1 dyoung
1162 1.1 dyoung for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
1163 1.1 dyoung uint32_t idx;
1164 1.1 dyoung
1165 1.1 dyoung if (!(inuse & (1 << i)))
1166 1.1 dyoung continue;
1167 1.1 dyoung
1168 1.1 dyoung inuse &= ~0 << i;
1169 1.1 dyoung
1170 1.1 dyoung if (i < it->slot_idx)
1171 1.1 dyoung continue;
1172 1.1 dyoung
1173 1.1 dyoung ++vtw_stats.probe[1];
1174 1.1 dyoung ++probes;
1175 1.1 dyoung
1176 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
1177 1.1 dyoung vtw = vtw_from_index(ctl, idx);
1178 1.1 dyoung
1179 1.1 dyoung if (!vtw) {
1180 1.1 dyoung /* Hopefully fast path.
1181 1.1 dyoung */
1182 1.1 dyoung continue;
1183 1.1 dyoung }
1184 1.1 dyoung
1185 1.1 dyoung v6 = (void*)vtw;
1186 1.1 dyoung
1187 1.1 dyoung db_trace(KTR_VTW
1188 1.1 dyoung , (vtw, "vtw: i %x idx %x fp->tag %x"
1189 1.1 dyoung " tag %x xtra %x"
1190 1.1 dyoung , i, idx_decode(ctl, idx)
1191 1.1 dyoung , fp->tag[i], tag, fatp_xtra[i]));
1192 1.1 dyoung
1193 1.1 dyoung if (vtw_alive(vtw)
1194 1.1 dyoung && vtw->port_key == fatp_key(ctl->fat, fp, i)
1195 1.1 dyoung && v6->lport == lport) {
1196 1.1 dyoung ++vtw_stats.hit[1];
1197 1.1 dyoung
1198 1.1 dyoung db_trace(KTR_VTW
1199 1.1 dyoung , (fp, "vtw: nxt port %P - %4.4x"
1200 1.1 dyoung " idx %x key %x"
1201 1.1 dyoung , lport, lport
1202 1.1 dyoung , idx_decode(ctl, idx), vtw->key));
1203 1.1 dyoung
1204 1.1 dyoung it->slot_idx = i + 1;
1205 1.1 dyoung goto out;
1206 1.1 dyoung } else if (vtw_alive(vtw)) {
1207 1.1 dyoung ++vtw_stats.losing[1];
1208 1.1 dyoung
1209 1.1 dyoung db_trace(KTR_VTW
1210 1.1 dyoung , (vtw, "vtw:!mis port %6A:%4.4x"
1211 1.1 dyoung " %6A:%4.4x key %x port %x"
1212 1.1 dyoung , db_store(&v6->faddr
1213 1.1 dyoung , sizeof (v6->faddr))
1214 1.1 dyoung , v6->fport
1215 1.1 dyoung , db_store(&v6->laddr
1216 1.1 dyoung , sizeof (v6->faddr))
1217 1.1 dyoung , v6->lport
1218 1.1 dyoung , vtw->key
1219 1.1 dyoung , lport));
1220 1.1 dyoung } else {
1221 1.1 dyoung /* Really losing here. We are coming
1222 1.1 dyoung * up with references to free entries.
1223 1.1 dyoung * Might find it better to use
1224 1.1 dyoung * traditional, or need another
1225 1.1 dyoung * add-hockery. The other add-hockery
1226 1.1 dyoung * would be to pul more into into the
1227 1.1 dyoung * cache line to reject the false
1228 1.1 dyoung * hits.
1229 1.1 dyoung */
1230 1.1 dyoung ++vtw_stats.losing[1];
1231 1.1 dyoung ++losings;
1232 1.1 dyoung
1233 1.1 dyoung db_trace(KTR_VTW
1234 1.1 dyoung , (fp
1235 1.1 dyoung , "vtw:!mis port %x"
1236 1.1 dyoung " - free entry idx %x vtw %p"
1237 1.1 dyoung , lport, idx_decode(ctl, idx)
1238 1.1 dyoung , vtw));
1239 1.1 dyoung }
1240 1.1 dyoung }
1241 1.1 dyoung
1242 1.1 dyoung if (fp->nxt) {
1243 1.1 dyoung it->fp = fp = fatp_next(ctl->fat, fp);
1244 1.1 dyoung it->slot_idx = 0;
1245 1.1 dyoung } else {
1246 1.1 dyoung it->fp = 0;
1247 1.1 dyoung break;
1248 1.1 dyoung }
1249 1.1 dyoung }
1250 1.1 dyoung ++vtw_stats.miss[1];
1251 1.1 dyoung
1252 1.1 dyoung vtw = 0;
1253 1.1 dyoung out:
1254 1.1 dyoung if (fatps > vtw_stats.max_chain[1])
1255 1.1 dyoung vtw_stats.max_chain[1] = fatps;
1256 1.1 dyoung if (probes > vtw_stats.max_probe[1])
1257 1.1 dyoung vtw_stats.max_probe[1] = probes;
1258 1.1 dyoung if (losings > vtw_stats.max_loss[1])
1259 1.1 dyoung vtw_stats.max_loss[1] = losings;
1260 1.1 dyoung
1261 1.1 dyoung return vtw;
1262 1.1 dyoung }
1263 1.1 dyoung
1264 1.1 dyoung /*!\brief initialise the VTW allocation arena
1265 1.1 dyoung *
1266 1.1 dyoung * There are 1+3 allocation classes:
1267 1.1 dyoung * 0 classless
1268 1.1 dyoung * {1,2,3} MSL-class based allocation
1269 1.1 dyoung *
1270 1.1 dyoung * The allocation arenas are all initialised. Classless gets all the
1271 1.1 dyoung * space. MSL-class based divides the arena, so that allocation
1272 1.1 dyoung * within a class can proceed without having to consider entries
1273 1.1 dyoung * (aka: cache lines) from different classes.
1274 1.1 dyoung *
1275 1.1 dyoung * Usually, we are completely classless or class-based, but there can be
1276 1.1 dyoung * transition periods, corresponding to dynamic adjustments in the config
1277 1.1 dyoung * by the operator.
1278 1.1 dyoung */
1279 1.1 dyoung static void
1280 1.6 dyoung vtw_init(fatp_ctl_t *fat, vtw_ctl_t *ctl, const uint32_t n, vtw_t *ctl_base_v)
1281 1.1 dyoung {
1282 1.6 dyoung int class_n, i;
1283 1.6 dyoung vtw_t *base;
1284 1.1 dyoung
1285 1.6 dyoung ctl->base.v = ctl_base_v;
1286 1.1 dyoung
1287 1.6 dyoung if (ctl->is_v4) {
1288 1.6 dyoung ctl->lim.v4 = ctl->base.v4 + n - 1;
1289 1.6 dyoung ctl->alloc.v4 = ctl->base.v4;
1290 1.6 dyoung } else {
1291 1.6 dyoung ctl->lim.v6 = ctl->base.v6 + n - 1;
1292 1.6 dyoung ctl->alloc.v6 = ctl->base.v6;
1293 1.6 dyoung }
1294 1.1 dyoung
1295 1.6 dyoung ctl->nfree = n;
1296 1.6 dyoung ctl->ctl = ctl;
1297 1.1 dyoung
1298 1.6 dyoung ctl->idx_bits = 32;
1299 1.6 dyoung for (ctl->idx_mask = ~0; (ctl->idx_mask & (n-1)) == n-1; ) {
1300 1.6 dyoung ctl->idx_mask >>= 1;
1301 1.6 dyoung ctl->idx_bits -= 1;
1302 1.6 dyoung }
1303 1.1 dyoung
1304 1.6 dyoung ctl->idx_mask <<= 1;
1305 1.6 dyoung ctl->idx_mask |= 1;
1306 1.6 dyoung ctl->idx_bits += 1;
1307 1.1 dyoung
1308 1.6 dyoung ctl->fat = fat;
1309 1.6 dyoung fat->vtw = ctl;
1310 1.1 dyoung
1311 1.6 dyoung /* Divide the resources equally amongst the classes.
1312 1.6 dyoung * This is not optimal, as the different classes
1313 1.6 dyoung * arrive and leave at different rates, but it is
1314 1.6 dyoung * the best I can do for now.
1315 1.6 dyoung */
1316 1.6 dyoung class_n = n / (VTW_NCLASS-1);
1317 1.6 dyoung base = ctl->base.v;
1318 1.1 dyoung
1319 1.6 dyoung for (i = 1; i < VTW_NCLASS; ++i) {
1320 1.6 dyoung int j;
1321 1.1 dyoung
1322 1.6 dyoung ctl[i] = ctl[0];
1323 1.6 dyoung ctl[i].clidx = i;
1324 1.1 dyoung
1325 1.6 dyoung ctl[i].base.v = base;
1326 1.6 dyoung ctl[i].alloc = ctl[i].base;
1327 1.1 dyoung
1328 1.6 dyoung for (j = 0; j < class_n - 1; ++j) {
1329 1.6 dyoung if (tcp_msl_enable)
1330 1.6 dyoung base->msl_class = i;
1331 1.1 dyoung base = vtw_next(ctl, base);
1332 1.1 dyoung }
1333 1.6 dyoung
1334 1.6 dyoung ctl[i].lim.v = base;
1335 1.6 dyoung base = vtw_next(ctl, base);
1336 1.6 dyoung ctl[i].nfree = class_n;
1337 1.1 dyoung }
1338 1.1 dyoung
1339 1.1 dyoung vtw_debug_init();
1340 1.1 dyoung }
1341 1.1 dyoung
1342 1.1 dyoung /*!\brief map class to TCP MSL
1343 1.1 dyoung */
1344 1.1 dyoung static inline uint32_t
1345 1.11 matt class_to_msl(int msl_class)
1346 1.1 dyoung {
1347 1.11 matt switch (msl_class) {
1348 1.1 dyoung case 0:
1349 1.1 dyoung case 1:
1350 1.1 dyoung return tcp_msl_remote ? tcp_msl_remote : (TCPTV_MSL >> 0);
1351 1.1 dyoung case 2:
1352 1.1 dyoung return tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1);
1353 1.1 dyoung default:
1354 1.1 dyoung return tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2);
1355 1.1 dyoung }
1356 1.1 dyoung }
1357 1.1 dyoung
1358 1.1 dyoung /*!\brief map TCP MSL to class
1359 1.1 dyoung */
1360 1.1 dyoung static inline uint32_t
1361 1.1 dyoung msl_to_class(int msl)
1362 1.1 dyoung {
1363 1.1 dyoung if (tcp_msl_enable) {
1364 1.1 dyoung if (msl <= (tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2)))
1365 1.1 dyoung return 1+2;
1366 1.1 dyoung if (msl <= (tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1)))
1367 1.1 dyoung return 1+1;
1368 1.1 dyoung return 1;
1369 1.1 dyoung }
1370 1.1 dyoung return 0;
1371 1.1 dyoung }
1372 1.1 dyoung
1373 1.1 dyoung /*!\brief allocate a vtw entry
1374 1.1 dyoung */
1375 1.1 dyoung static inline vtw_t *
1376 1.1 dyoung vtw_alloc(vtw_ctl_t *ctl)
1377 1.1 dyoung {
1378 1.1 dyoung vtw_t *vtw = 0;
1379 1.1 dyoung int stuck = 0;
1380 1.1 dyoung int avail = ctl ? (ctl->nalloc + ctl->nfree) : 0;
1381 1.1 dyoung int msl;
1382 1.1 dyoung
1383 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
1384 1.1 dyoung
1385 1.1 dyoung /* If no resources, we will not get far.
1386 1.1 dyoung */
1387 1.1 dyoung if (!ctl || !ctl->base.v4 || avail <= 0)
1388 1.1 dyoung return 0;
1389 1.1 dyoung
1390 1.1 dyoung /* Obtain a free one.
1391 1.1 dyoung */
1392 1.1 dyoung while (!ctl->nfree) {
1393 1.1 dyoung vtw_age(ctl, 0);
1394 1.1 dyoung
1395 1.1 dyoung if (++stuck > avail) {
1396 1.1 dyoung /* When in transition between
1397 1.1 dyoung * schemes (classless, classed) we
1398 1.1 dyoung * can be stuck having to await the
1399 1.1 dyoung * expiration of cross-allocated entries.
1400 1.1 dyoung *
1401 1.1 dyoung * Returning zero means we will fall back to the
1402 1.1 dyoung * traditional TIME_WAIT handling, except in the
1403 1.1 dyoung * case of a re-shed, in which case we cannot
1404 1.1 dyoung * perform the reshecd, but will retain the extant
1405 1.1 dyoung * entry.
1406 1.1 dyoung */
1407 1.1 dyoung db_trace(KTR_VTW
1408 1.1 dyoung , (ctl, "vtw:!none free in class %x %x/%x"
1409 1.1 dyoung , ctl->clidx
1410 1.1 dyoung , ctl->nalloc, ctl->nfree));
1411 1.1 dyoung
1412 1.1 dyoung return 0;
1413 1.1 dyoung }
1414 1.1 dyoung }
1415 1.1 dyoung
1416 1.1 dyoung vtw = ctl->alloc.v;
1417 1.1 dyoung
1418 1.1 dyoung if (vtw->msl_class != ctl->clidx) {
1419 1.1 dyoung /* Usurping rules:
1420 1.1 dyoung * 0 -> {1,2,3} or {1,2,3} -> 0
1421 1.1 dyoung */
1422 1.1 dyoung KASSERT(!vtw->msl_class || !ctl->clidx);
1423 1.1 dyoung
1424 1.1 dyoung if (vtw->hashed || vtw->expire.tv_sec) {
1425 1.1 dyoung /* As this is owned by some other class,
1426 1.1 dyoung * we must wait for it to expire it.
1427 1.1 dyoung * This will only happen on class/classless
1428 1.1 dyoung * transitions, which are guaranteed to progress
1429 1.1 dyoung * to completion in small finite time, barring bugs.
1430 1.1 dyoung */
1431 1.1 dyoung db_trace(KTR_VTW
1432 1.1 dyoung , (ctl, "vtw:!%p class %x!=%x %x:%x%s"
1433 1.1 dyoung , vtw, vtw->msl_class, ctl->clidx
1434 1.1 dyoung , vtw->expire.tv_sec
1435 1.1 dyoung , vtw->expire.tv_usec
1436 1.1 dyoung , vtw->hashed ? " hashed" : ""));
1437 1.1 dyoung
1438 1.1 dyoung return 0;
1439 1.1 dyoung }
1440 1.1 dyoung
1441 1.1 dyoung db_trace(KTR_VTW
1442 1.1 dyoung , (ctl, "vtw:!%p usurped from %x to %x"
1443 1.1 dyoung , vtw, vtw->msl_class, ctl->clidx));
1444 1.1 dyoung
1445 1.1 dyoung vtw->msl_class = ctl->clidx;
1446 1.1 dyoung }
1447 1.1 dyoung
1448 1.1 dyoung if (vtw_alive(vtw)) {
1449 1.1 dyoung KASSERT(0 && "next free not free");
1450 1.1 dyoung return 0;
1451 1.1 dyoung }
1452 1.1 dyoung
1453 1.1 dyoung /* Advance allocation poiter.
1454 1.1 dyoung */
1455 1.1 dyoung ctl->alloc.v = vtw_next(ctl, vtw);
1456 1.1 dyoung
1457 1.1 dyoung --ctl->nfree;
1458 1.1 dyoung ++ctl->nalloc;
1459 1.1 dyoung
1460 1.1 dyoung msl = (2 * class_to_msl(ctl->clidx) * 1000) / PR_SLOWHZ; // msec
1461 1.1 dyoung
1462 1.1 dyoung /* mark expiration
1463 1.1 dyoung */
1464 1.3 drochner getmicrouptime(&vtw->expire);
1465 1.1 dyoung
1466 1.1 dyoung /* Move expiration into the future.
1467 1.1 dyoung */
1468 1.1 dyoung vtw->expire.tv_sec += msl / 1000;
1469 1.1 dyoung vtw->expire.tv_usec += 1000 * (msl % 1000);
1470 1.1 dyoung
1471 1.1 dyoung while (vtw->expire.tv_usec >= 1000*1000) {
1472 1.1 dyoung vtw->expire.tv_usec -= 1000*1000;
1473 1.1 dyoung vtw->expire.tv_sec += 1;
1474 1.1 dyoung }
1475 1.1 dyoung
1476 1.1 dyoung if (!ctl->oldest.v)
1477 1.1 dyoung ctl->oldest.v = vtw;
1478 1.1 dyoung
1479 1.1 dyoung return vtw;
1480 1.1 dyoung }
1481 1.1 dyoung
1482 1.1 dyoung /*!\brief expiration
1483 1.1 dyoung */
1484 1.1 dyoung static int
1485 1.1 dyoung vtw_age(vtw_ctl_t *ctl, struct timeval *_when)
1486 1.1 dyoung {
1487 1.1 dyoung vtw_t *vtw;
1488 1.1 dyoung struct timeval then, *when = _when;
1489 1.1 dyoung int maxtries = 0;
1490 1.1 dyoung
1491 1.1 dyoung if (!ctl->oldest.v) {
1492 1.1 dyoung KASSERT(!ctl->nalloc);
1493 1.1 dyoung return 0;
1494 1.1 dyoung }
1495 1.1 dyoung
1496 1.1 dyoung for (vtw = ctl->oldest.v; vtw && ctl->nalloc; ) {
1497 1.1 dyoung if (++maxtries > ctl->nalloc)
1498 1.1 dyoung break;
1499 1.1 dyoung
1500 1.1 dyoung if (vtw->msl_class != ctl->clidx) {
1501 1.1 dyoung db_trace(KTR_VTW
1502 1.1 dyoung , (vtw, "vtw:!age class mismatch %x != %x"
1503 1.1 dyoung , vtw->msl_class, ctl->clidx));
1504 1.1 dyoung /* XXXX
1505 1.1 dyoung * See if the appropriate action is to skip to the next.
1506 1.1 dyoung * XXXX
1507 1.1 dyoung */
1508 1.1 dyoung ctl->oldest.v = vtw = vtw_next(ctl, vtw);
1509 1.1 dyoung continue;
1510 1.1 dyoung }
1511 1.1 dyoung if (!when) {
1512 1.1 dyoung /* Latch oldest timeval if none specified.
1513 1.1 dyoung */
1514 1.1 dyoung then = vtw->expire;
1515 1.1 dyoung when = &then;
1516 1.1 dyoung }
1517 1.1 dyoung
1518 1.1 dyoung if (!timercmp(&vtw->expire, when, <=))
1519 1.1 dyoung break;
1520 1.1 dyoung
1521 1.1 dyoung db_trace(KTR_VTW
1522 1.1 dyoung , (vtw, "vtw: expire %x %8.8x:%8.8x %x/%x"
1523 1.1 dyoung , ctl->clidx
1524 1.1 dyoung , vtw->expire.tv_sec
1525 1.1 dyoung , vtw->expire.tv_usec
1526 1.1 dyoung , ctl->nalloc
1527 1.1 dyoung , ctl->nfree));
1528 1.1 dyoung
1529 1.1 dyoung if (!_when)
1530 1.1 dyoung ++vtw_stats.kill;
1531 1.1 dyoung
1532 1.1 dyoung vtw_del(ctl, vtw);
1533 1.1 dyoung vtw = ctl->oldest.v;
1534 1.1 dyoung }
1535 1.1 dyoung
1536 1.1 dyoung return ctl->nalloc; // # remaining allocated
1537 1.1 dyoung }
1538 1.1 dyoung
1539 1.1 dyoung static callout_t vtw_cs;
1540 1.1 dyoung
1541 1.1 dyoung /*!\brief notice the passage of time.
1542 1.1 dyoung * It seems to be getting faster. What happened to the year?
1543 1.1 dyoung */
1544 1.1 dyoung static void
1545 1.1 dyoung vtw_tick(void *arg)
1546 1.1 dyoung {
1547 1.1 dyoung struct timeval now;
1548 1.1 dyoung int i, cnt = 0;
1549 1.1 dyoung
1550 1.3 drochner getmicrouptime(&now);
1551 1.1 dyoung
1552 1.1 dyoung db_trace(KTR_VTW, (arg, "vtk: tick - now %8.8x:%8.8x"
1553 1.1 dyoung , now.tv_sec, now.tv_usec));
1554 1.1 dyoung
1555 1.1 dyoung mutex_enter(softnet_lock);
1556 1.1 dyoung
1557 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
1558 1.1 dyoung cnt += vtw_age(&vtw_tcpv4[i], &now);
1559 1.1 dyoung cnt += vtw_age(&vtw_tcpv6[i], &now);
1560 1.1 dyoung }
1561 1.1 dyoung
1562 1.1 dyoung /* Keep ticks coming while we need them.
1563 1.1 dyoung */
1564 1.1 dyoung if (cnt)
1565 1.1 dyoung callout_schedule(&vtw_cs, hz / 5);
1566 1.1 dyoung else {
1567 1.1 dyoung tcp_vtw_was_enabled = 0;
1568 1.1 dyoung tcbtable.vestige = 0;
1569 1.1 dyoung }
1570 1.1 dyoung mutex_exit(softnet_lock);
1571 1.1 dyoung }
1572 1.1 dyoung
1573 1.1 dyoung /* in_pcblookup_ports assist for handling vestigial entries.
1574 1.1 dyoung */
1575 1.1 dyoung static void *
1576 1.1 dyoung tcp_init_ports_v4(struct in_addr addr, u_int port, int wild)
1577 1.1 dyoung {
1578 1.1 dyoung struct tcp_ports_iterator *it = &tcp_ports_iterator_v4;
1579 1.1 dyoung
1580 1.1 dyoung bzero(it, sizeof (*it));
1581 1.1 dyoung
1582 1.1 dyoung /* Note: the reference to vtw_tcpv4[0] is fine.
1583 1.1 dyoung * We do not need per-class iteration. We just
1584 1.1 dyoung * need to get to the fat, and there is one
1585 1.1 dyoung * shared fat.
1586 1.1 dyoung */
1587 1.1 dyoung if (vtw_tcpv4[0].fat) {
1588 1.1 dyoung it->addr.v4 = addr;
1589 1.1 dyoung it->port = port;
1590 1.1 dyoung it->wild = !!wild;
1591 1.1 dyoung it->ctl = &vtw_tcpv4[0];
1592 1.1 dyoung
1593 1.1 dyoung ++vtw_stats.look[1];
1594 1.1 dyoung }
1595 1.1 dyoung
1596 1.1 dyoung return it;
1597 1.1 dyoung }
1598 1.1 dyoung
1599 1.1 dyoung /*!\brief export an IPv4 vtw.
1600 1.1 dyoung */
1601 1.1 dyoung static int
1602 1.1 dyoung vtw_export_v4(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
1603 1.1 dyoung {
1604 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
1605 1.1 dyoung
1606 1.1 dyoung bzero(res, sizeof (*res));
1607 1.1 dyoung
1608 1.1 dyoung if (ctl && vtw) {
1609 1.1 dyoung if (!ctl->clidx && vtw->msl_class)
1610 1.1 dyoung ctl += vtw->msl_class;
1611 1.1 dyoung else
1612 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
1613 1.1 dyoung
1614 1.1 dyoung res->valid = 1;
1615 1.1 dyoung res->v4 = 1;
1616 1.1 dyoung
1617 1.1 dyoung res->faddr.v4.s_addr = v4->faddr;
1618 1.1 dyoung res->laddr.v4.s_addr = v4->laddr;
1619 1.1 dyoung res->fport = v4->fport;
1620 1.1 dyoung res->lport = v4->lport;
1621 1.1 dyoung res->vtw = vtw; // netlock held over call(s)
1622 1.1 dyoung res->ctl = ctl;
1623 1.1 dyoung res->reuse_addr = vtw->reuse_addr;
1624 1.1 dyoung res->reuse_port = vtw->reuse_port;
1625 1.1 dyoung res->snd_nxt = vtw->snd_nxt;
1626 1.1 dyoung res->rcv_nxt = vtw->rcv_nxt;
1627 1.1 dyoung res->rcv_wnd = vtw->rcv_wnd;
1628 1.1 dyoung res->uid = vtw->uid;
1629 1.1 dyoung }
1630 1.1 dyoung
1631 1.1 dyoung return res->valid;
1632 1.1 dyoung }
1633 1.1 dyoung
1634 1.1 dyoung /*!\brief return next port in the port iterator. yowza.
1635 1.1 dyoung */
1636 1.1 dyoung static int
1637 1.1 dyoung tcp_next_port_v4(void *arg, struct vestigial_inpcb *res)
1638 1.1 dyoung {
1639 1.1 dyoung struct tcp_ports_iterator *it = arg;
1640 1.1 dyoung vtw_t *vtw = 0;
1641 1.1 dyoung
1642 1.1 dyoung if (it->ctl)
1643 1.1 dyoung vtw = vtw_next_port_v4(it);
1644 1.1 dyoung
1645 1.1 dyoung if (!vtw)
1646 1.1 dyoung it->ctl = 0;
1647 1.1 dyoung
1648 1.1 dyoung return vtw_export_v4(it->ctl, vtw, res);
1649 1.1 dyoung }
1650 1.1 dyoung
1651 1.1 dyoung static int
1652 1.1 dyoung tcp_lookup_v4(struct in_addr faddr, uint16_t fport,
1653 1.1 dyoung struct in_addr laddr, uint16_t lport,
1654 1.1 dyoung struct vestigial_inpcb *res)
1655 1.1 dyoung {
1656 1.1 dyoung vtw_t *vtw;
1657 1.1 dyoung vtw_ctl_t *ctl;
1658 1.1 dyoung
1659 1.1 dyoung
1660 1.1 dyoung db_trace(KTR_VTW
1661 1.1 dyoung , (res, "vtw: lookup %A:%P %A:%P"
1662 1.1 dyoung , faddr, fport
1663 1.1 dyoung , laddr, lport));
1664 1.1 dyoung
1665 1.1 dyoung vtw = vtw_lookup_hash_v4((ctl = &vtw_tcpv4[0])
1666 1.1 dyoung , faddr.s_addr, fport
1667 1.1 dyoung , laddr.s_addr, lport, 0);
1668 1.1 dyoung
1669 1.1 dyoung return vtw_export_v4(ctl, vtw, res);
1670 1.1 dyoung }
1671 1.1 dyoung
1672 1.1 dyoung /* in_pcblookup_ports assist for handling vestigial entries.
1673 1.1 dyoung */
1674 1.1 dyoung static void *
1675 1.1 dyoung tcp_init_ports_v6(const struct in6_addr *addr, u_int port, int wild)
1676 1.1 dyoung {
1677 1.1 dyoung struct tcp_ports_iterator *it = &tcp_ports_iterator_v6;
1678 1.1 dyoung
1679 1.1 dyoung bzero(it, sizeof (*it));
1680 1.1 dyoung
1681 1.1 dyoung /* Note: the reference to vtw_tcpv6[0] is fine.
1682 1.1 dyoung * We do not need per-class iteration. We just
1683 1.1 dyoung * need to get to the fat, and there is one
1684 1.1 dyoung * shared fat.
1685 1.1 dyoung */
1686 1.1 dyoung if (vtw_tcpv6[0].fat) {
1687 1.1 dyoung it->addr.v6 = *addr;
1688 1.1 dyoung it->port = port;
1689 1.1 dyoung it->wild = !!wild;
1690 1.1 dyoung it->ctl = &vtw_tcpv6[0];
1691 1.1 dyoung
1692 1.1 dyoung ++vtw_stats.look[1];
1693 1.1 dyoung }
1694 1.1 dyoung
1695 1.1 dyoung return it;
1696 1.1 dyoung }
1697 1.1 dyoung
1698 1.1 dyoung /*!\brief export an IPv6 vtw.
1699 1.1 dyoung */
1700 1.1 dyoung static int
1701 1.1 dyoung vtw_export_v6(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
1702 1.1 dyoung {
1703 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
1704 1.1 dyoung
1705 1.1 dyoung bzero(res, sizeof (*res));
1706 1.1 dyoung
1707 1.1 dyoung if (ctl && vtw) {
1708 1.1 dyoung if (!ctl->clidx && vtw->msl_class)
1709 1.1 dyoung ctl += vtw->msl_class;
1710 1.1 dyoung else
1711 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
1712 1.1 dyoung
1713 1.1 dyoung res->valid = 1;
1714 1.1 dyoung res->v4 = 0;
1715 1.1 dyoung
1716 1.1 dyoung res->faddr.v6 = v6->faddr;
1717 1.1 dyoung res->laddr.v6 = v6->laddr;
1718 1.1 dyoung res->fport = v6->fport;
1719 1.1 dyoung res->lport = v6->lport;
1720 1.1 dyoung res->vtw = vtw; // netlock held over call(s)
1721 1.1 dyoung res->ctl = ctl;
1722 1.1 dyoung
1723 1.1 dyoung res->v6only = vtw->v6only;
1724 1.1 dyoung res->reuse_addr = vtw->reuse_addr;
1725 1.1 dyoung res->reuse_port = vtw->reuse_port;
1726 1.1 dyoung
1727 1.1 dyoung res->snd_nxt = vtw->snd_nxt;
1728 1.1 dyoung res->rcv_nxt = vtw->rcv_nxt;
1729 1.1 dyoung res->rcv_wnd = vtw->rcv_wnd;
1730 1.1 dyoung res->uid = vtw->uid;
1731 1.1 dyoung }
1732 1.1 dyoung
1733 1.1 dyoung return res->valid;
1734 1.1 dyoung }
1735 1.1 dyoung
1736 1.1 dyoung static int
1737 1.1 dyoung tcp_next_port_v6(void *arg, struct vestigial_inpcb *res)
1738 1.1 dyoung {
1739 1.1 dyoung struct tcp_ports_iterator *it = arg;
1740 1.1 dyoung vtw_t *vtw = 0;
1741 1.1 dyoung
1742 1.1 dyoung if (it->ctl)
1743 1.1 dyoung vtw = vtw_next_port_v6(it);
1744 1.1 dyoung
1745 1.1 dyoung if (!vtw)
1746 1.1 dyoung it->ctl = 0;
1747 1.1 dyoung
1748 1.1 dyoung return vtw_export_v6(it->ctl, vtw, res);
1749 1.1 dyoung }
1750 1.1 dyoung
1751 1.1 dyoung static int
1752 1.1 dyoung tcp_lookup_v6(const struct in6_addr *faddr, uint16_t fport,
1753 1.1 dyoung const struct in6_addr *laddr, uint16_t lport,
1754 1.1 dyoung struct vestigial_inpcb *res)
1755 1.1 dyoung {
1756 1.1 dyoung vtw_ctl_t *ctl;
1757 1.1 dyoung vtw_t *vtw;
1758 1.1 dyoung
1759 1.1 dyoung db_trace(KTR_VTW
1760 1.1 dyoung , (res, "vtw: lookup %6A:%P %6A:%P"
1761 1.1 dyoung , db_store(faddr, sizeof (*faddr)), fport
1762 1.1 dyoung , db_store(laddr, sizeof (*laddr)), lport));
1763 1.1 dyoung
1764 1.1 dyoung vtw = vtw_lookup_hash_v6((ctl = &vtw_tcpv6[0])
1765 1.1 dyoung , faddr, fport
1766 1.1 dyoung , laddr, lport, 0);
1767 1.1 dyoung
1768 1.1 dyoung return vtw_export_v6(ctl, vtw, res);
1769 1.1 dyoung }
1770 1.1 dyoung
1771 1.1 dyoung static vestigial_hooks_t tcp_hooks = {
1772 1.1 dyoung .init_ports4 = tcp_init_ports_v4,
1773 1.1 dyoung .next_port4 = tcp_next_port_v4,
1774 1.1 dyoung .lookup4 = tcp_lookup_v4,
1775 1.1 dyoung .init_ports6 = tcp_init_ports_v6,
1776 1.1 dyoung .next_port6 = tcp_next_port_v6,
1777 1.1 dyoung .lookup6 = tcp_lookup_v6,
1778 1.1 dyoung };
1779 1.1 dyoung
1780 1.1 dyoung static bool
1781 1.1 dyoung vtw_select(int af, fatp_ctl_t **fatp, vtw_ctl_t **ctlp)
1782 1.1 dyoung {
1783 1.1 dyoung fatp_ctl_t *fat;
1784 1.1 dyoung vtw_ctl_t *ctl;
1785 1.1 dyoung
1786 1.1 dyoung switch (af) {
1787 1.1 dyoung case AF_INET:
1788 1.1 dyoung fat = &fat_tcpv4;
1789 1.1 dyoung ctl = &vtw_tcpv4[0];
1790 1.1 dyoung break;
1791 1.1 dyoung case AF_INET6:
1792 1.1 dyoung fat = &fat_tcpv6;
1793 1.1 dyoung ctl = &vtw_tcpv6[0];
1794 1.1 dyoung break;
1795 1.1 dyoung default:
1796 1.1 dyoung return false;
1797 1.1 dyoung }
1798 1.1 dyoung if (fatp != NULL)
1799 1.1 dyoung *fatp = fat;
1800 1.1 dyoung if (ctlp != NULL)
1801 1.1 dyoung *ctlp = ctl;
1802 1.1 dyoung return true;
1803 1.1 dyoung }
1804 1.1 dyoung
1805 1.1 dyoung /*!\brief initialize controlling instance
1806 1.1 dyoung */
1807 1.1 dyoung static int
1808 1.1 dyoung vtw_control_init(int af)
1809 1.1 dyoung {
1810 1.1 dyoung fatp_ctl_t *fat;
1811 1.1 dyoung vtw_ctl_t *ctl;
1812 1.6 dyoung fatp_t *fat_base;
1813 1.6 dyoung fatp_t **fat_hash;
1814 1.6 dyoung vtw_t *ctl_base_v;
1815 1.6 dyoung uint32_t n, m;
1816 1.6 dyoung size_t sz;
1817 1.6 dyoung
1818 1.6 dyoung KASSERT(powerof2(tcp_vtw_entries));
1819 1.1 dyoung
1820 1.1 dyoung if (!vtw_select(af, &fat, &ctl))
1821 1.1 dyoung return EAFNOSUPPORT;
1822 1.1 dyoung
1823 1.6 dyoung if (fat->hash != NULL) {
1824 1.6 dyoung KASSERT(fat->base != NULL && ctl->base.v != NULL);
1825 1.6 dyoung return 0;
1826 1.6 dyoung }
1827 1.6 dyoung
1828 1.6 dyoung /* Allocate 10% more capacity in the fat pointers.
1829 1.6 dyoung * We should only need ~#hash additional based on
1830 1.6 dyoung * how they age, but TIME_WAIT assassination could cause
1831 1.6 dyoung * sparse fat pointer utilisation.
1832 1.6 dyoung */
1833 1.6 dyoung m = 512;
1834 1.6 dyoung n = 2*m + (11 * (tcp_vtw_entries / fatp_ntags())) / 10;
1835 1.6 dyoung sz = (ctl->is_v4 ? sizeof(vtw_v4_t) : sizeof(vtw_v6_t));
1836 1.6 dyoung
1837 1.6 dyoung fat_hash = kmem_zalloc(2*m * sizeof(fatp_t *), KM_NOSLEEP);
1838 1.6 dyoung
1839 1.6 dyoung if (fat_hash == NULL) {
1840 1.6 dyoung printf("%s: could not allocate %zu bytes for "
1841 1.6 dyoung "hash anchors", __func__, 2*m * sizeof(fatp_t *));
1842 1.6 dyoung return ENOMEM;
1843 1.6 dyoung }
1844 1.1 dyoung
1845 1.6 dyoung fat_base = kmem_zalloc(2*n * sizeof(fatp_t), KM_NOSLEEP);
1846 1.1 dyoung
1847 1.6 dyoung if (fat_base == NULL) {
1848 1.6 dyoung kmem_free(fat_hash, 2*m * sizeof (fatp_t *));
1849 1.6 dyoung printf("%s: could not allocate %zu bytes for "
1850 1.6 dyoung "fatp_t array", __func__, 2*n * sizeof(fatp_t));
1851 1.6 dyoung return ENOMEM;
1852 1.6 dyoung }
1853 1.1 dyoung
1854 1.6 dyoung ctl_base_v = kmem_zalloc(tcp_vtw_entries * sz, KM_NOSLEEP);
1855 1.1 dyoung
1856 1.6 dyoung if (ctl_base_v == NULL) {
1857 1.6 dyoung kmem_free(fat_hash, 2*m * sizeof (fatp_t *));
1858 1.6 dyoung kmem_free(fat_base, 2*n * sizeof(fatp_t));
1859 1.6 dyoung printf("%s: could not allocate %zu bytes for "
1860 1.6 dyoung "vtw_t array", __func__, tcp_vtw_entries * sz);
1861 1.6 dyoung return ENOMEM;
1862 1.1 dyoung }
1863 1.1 dyoung
1864 1.6 dyoung fatp_init(fat, n, m, fat_base, fat_hash);
1865 1.1 dyoung
1866 1.6 dyoung vtw_init(fat, ctl, tcp_vtw_entries, ctl_base_v);
1867 1.1 dyoung
1868 1.1 dyoung return 0;
1869 1.1 dyoung }
1870 1.1 dyoung
1871 1.1 dyoung /*!\brief select controlling instance
1872 1.1 dyoung */
1873 1.1 dyoung static vtw_ctl_t *
1874 1.1 dyoung vtw_control(int af, uint32_t msl)
1875 1.1 dyoung {
1876 1.1 dyoung fatp_ctl_t *fat;
1877 1.1 dyoung vtw_ctl_t *ctl;
1878 1.11 matt int msl_class = msl_to_class(msl);
1879 1.1 dyoung
1880 1.1 dyoung if (!vtw_select(af, &fat, &ctl))
1881 1.1 dyoung return NULL;
1882 1.1 dyoung
1883 1.1 dyoung if (!fat->base || !ctl->base.v)
1884 1.1 dyoung return NULL;
1885 1.1 dyoung
1886 1.5 dyoung if (!tcp_vtw_was_enabled) {
1887 1.5 dyoung /* This guarantees is timer ticks until we no longer need them.
1888 1.5 dyoung */
1889 1.5 dyoung tcp_vtw_was_enabled = 1;
1890 1.5 dyoung
1891 1.5 dyoung callout_schedule(&vtw_cs, hz / 5);
1892 1.5 dyoung
1893 1.5 dyoung tcbtable.vestige = &tcp_hooks;
1894 1.5 dyoung }
1895 1.5 dyoung
1896 1.11 matt return ctl + msl_class;
1897 1.1 dyoung }
1898 1.1 dyoung
1899 1.1 dyoung /*!\brief add TCP pcb to vestigial timewait
1900 1.1 dyoung */
1901 1.1 dyoung int
1902 1.1 dyoung vtw_add(int af, struct tcpcb *tp)
1903 1.1 dyoung {
1904 1.10 martin #ifdef VTW_DEBUG
1905 1.1 dyoung int enable;
1906 1.10 martin #endif
1907 1.1 dyoung vtw_ctl_t *ctl;
1908 1.1 dyoung vtw_t *vtw;
1909 1.1 dyoung
1910 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
1911 1.1 dyoung
1912 1.1 dyoung ctl = vtw_control(af, tp->t_msl);
1913 1.1 dyoung if (!ctl)
1914 1.1 dyoung return 0;
1915 1.1 dyoung
1916 1.10 martin #ifdef VTW_DEBUG
1917 1.1 dyoung enable = (af == AF_INET) ? tcp4_vtw_enable : tcp6_vtw_enable;
1918 1.10 martin #endif
1919 1.1 dyoung
1920 1.1 dyoung vtw = vtw_alloc(ctl);
1921 1.1 dyoung
1922 1.1 dyoung if (vtw) {
1923 1.1 dyoung vtw->snd_nxt = tp->snd_nxt;
1924 1.1 dyoung vtw->rcv_nxt = tp->rcv_nxt;
1925 1.1 dyoung
1926 1.1 dyoung switch (af) {
1927 1.1 dyoung case AF_INET: {
1928 1.1 dyoung struct inpcb *inp = tp->t_inpcb;
1929 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
1930 1.1 dyoung
1931 1.1 dyoung v4->faddr = inp->inp_faddr.s_addr;
1932 1.1 dyoung v4->laddr = inp->inp_laddr.s_addr;
1933 1.1 dyoung v4->fport = inp->inp_fport;
1934 1.1 dyoung v4->lport = inp->inp_lport;
1935 1.1 dyoung
1936 1.1 dyoung vtw->reuse_port = !!(inp->inp_socket->so_options
1937 1.1 dyoung & SO_REUSEPORT);
1938 1.1 dyoung vtw->reuse_addr = !!(inp->inp_socket->so_options
1939 1.1 dyoung & SO_REUSEADDR);
1940 1.1 dyoung vtw->v6only = 0;
1941 1.1 dyoung vtw->uid = inp->inp_socket->so_uidinfo->ui_uid;
1942 1.1 dyoung
1943 1.1 dyoung vtw_inshash_v4(ctl, vtw);
1944 1.1 dyoung
1945 1.1 dyoung
1946 1.1 dyoung #ifdef VTW_DEBUG
1947 1.1 dyoung /* Immediate lookup (connected and port) to
1948 1.1 dyoung * ensure at least that works!
1949 1.1 dyoung */
1950 1.1 dyoung if (enable & 4) {
1951 1.1 dyoung KASSERT(vtw_lookup_hash_v4
1952 1.1 dyoung (ctl
1953 1.1 dyoung , inp->inp_faddr.s_addr, inp->inp_fport
1954 1.1 dyoung , inp->inp_laddr.s_addr, inp->inp_lport
1955 1.1 dyoung , 0)
1956 1.1 dyoung == vtw);
1957 1.1 dyoung KASSERT(vtw_lookup_hash_v4
1958 1.1 dyoung (ctl
1959 1.1 dyoung , inp->inp_faddr.s_addr, inp->inp_fport
1960 1.1 dyoung , inp->inp_laddr.s_addr, inp->inp_lport
1961 1.1 dyoung , 1));
1962 1.1 dyoung }
1963 1.1 dyoung /* Immediate port iterator functionality check: not wild
1964 1.1 dyoung */
1965 1.1 dyoung if (enable & 8) {
1966 1.1 dyoung struct tcp_ports_iterator *it;
1967 1.1 dyoung struct vestigial_inpcb res;
1968 1.1 dyoung int cnt = 0;
1969 1.1 dyoung
1970 1.1 dyoung it = tcp_init_ports_v4(inp->inp_laddr
1971 1.1 dyoung , inp->inp_lport, 0);
1972 1.1 dyoung
1973 1.1 dyoung while (tcp_next_port_v4(it, &res)) {
1974 1.1 dyoung ++cnt;
1975 1.1 dyoung }
1976 1.1 dyoung KASSERT(cnt);
1977 1.1 dyoung }
1978 1.1 dyoung /* Immediate port iterator functionality check: wild
1979 1.1 dyoung */
1980 1.1 dyoung if (enable & 16) {
1981 1.1 dyoung struct tcp_ports_iterator *it;
1982 1.1 dyoung struct vestigial_inpcb res;
1983 1.1 dyoung struct in_addr any;
1984 1.1 dyoung int cnt = 0;
1985 1.1 dyoung
1986 1.1 dyoung any.s_addr = htonl(INADDR_ANY);
1987 1.1 dyoung
1988 1.1 dyoung it = tcp_init_ports_v4(any, inp->inp_lport, 1);
1989 1.1 dyoung
1990 1.1 dyoung while (tcp_next_port_v4(it, &res)) {
1991 1.1 dyoung ++cnt;
1992 1.1 dyoung }
1993 1.1 dyoung KASSERT(cnt);
1994 1.1 dyoung }
1995 1.1 dyoung #endif /* VTW_DEBUG */
1996 1.1 dyoung break;
1997 1.1 dyoung }
1998 1.1 dyoung
1999 1.1 dyoung case AF_INET6: {
2000 1.1 dyoung struct in6pcb *inp = tp->t_in6pcb;
2001 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
2002 1.1 dyoung
2003 1.1 dyoung v6->faddr = inp->in6p_faddr;
2004 1.1 dyoung v6->laddr = inp->in6p_laddr;
2005 1.1 dyoung v6->fport = inp->in6p_fport;
2006 1.1 dyoung v6->lport = inp->in6p_lport;
2007 1.1 dyoung
2008 1.1 dyoung vtw->reuse_port = !!(inp->in6p_socket->so_options
2009 1.1 dyoung & SO_REUSEPORT);
2010 1.1 dyoung vtw->reuse_addr = !!(inp->in6p_socket->so_options
2011 1.1 dyoung & SO_REUSEADDR);
2012 1.1 dyoung vtw->v6only = !!(inp->in6p_flags
2013 1.1 dyoung & IN6P_IPV6_V6ONLY);
2014 1.1 dyoung vtw->uid = inp->in6p_socket->so_uidinfo->ui_uid;
2015 1.1 dyoung
2016 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2017 1.1 dyoung #ifdef VTW_DEBUG
2018 1.1 dyoung /* Immediate lookup (connected and port) to
2019 1.1 dyoung * ensure at least that works!
2020 1.1 dyoung */
2021 1.1 dyoung if (enable & 4) {
2022 1.1 dyoung KASSERT(vtw_lookup_hash_v6(ctl
2023 1.1 dyoung , &inp->in6p_faddr, inp->in6p_fport
2024 1.1 dyoung , &inp->in6p_laddr, inp->in6p_lport
2025 1.1 dyoung , 0)
2026 1.1 dyoung == vtw);
2027 1.1 dyoung KASSERT(vtw_lookup_hash_v6
2028 1.1 dyoung (ctl
2029 1.1 dyoung , &inp->in6p_faddr, inp->in6p_fport
2030 1.1 dyoung , &inp->in6p_laddr, inp->in6p_lport
2031 1.1 dyoung , 1));
2032 1.1 dyoung }
2033 1.1 dyoung /* Immediate port iterator functionality check: not wild
2034 1.1 dyoung */
2035 1.1 dyoung if (enable & 8) {
2036 1.1 dyoung struct tcp_ports_iterator *it;
2037 1.1 dyoung struct vestigial_inpcb res;
2038 1.1 dyoung int cnt = 0;
2039 1.1 dyoung
2040 1.1 dyoung it = tcp_init_ports_v6(&inp->in6p_laddr
2041 1.1 dyoung , inp->in6p_lport, 0);
2042 1.1 dyoung
2043 1.1 dyoung while (tcp_next_port_v6(it, &res)) {
2044 1.1 dyoung ++cnt;
2045 1.1 dyoung }
2046 1.1 dyoung KASSERT(cnt);
2047 1.1 dyoung }
2048 1.1 dyoung /* Immediate port iterator functionality check: wild
2049 1.1 dyoung */
2050 1.1 dyoung if (enable & 16) {
2051 1.1 dyoung struct tcp_ports_iterator *it;
2052 1.1 dyoung struct vestigial_inpcb res;
2053 1.1 dyoung static struct in6_addr any = IN6ADDR_ANY_INIT;
2054 1.1 dyoung int cnt = 0;
2055 1.1 dyoung
2056 1.1 dyoung it = tcp_init_ports_v6(&any
2057 1.1 dyoung , inp->in6p_lport, 1);
2058 1.1 dyoung
2059 1.1 dyoung while (tcp_next_port_v6(it, &res)) {
2060 1.1 dyoung ++cnt;
2061 1.1 dyoung }
2062 1.1 dyoung KASSERT(cnt);
2063 1.1 dyoung }
2064 1.1 dyoung #endif /* VTW_DEBUG */
2065 1.1 dyoung break;
2066 1.1 dyoung }
2067 1.1 dyoung }
2068 1.1 dyoung
2069 1.1 dyoung tcp_canceltimers(tp);
2070 1.1 dyoung tp = tcp_close(tp);
2071 1.1 dyoung KASSERT(!tp);
2072 1.1 dyoung
2073 1.1 dyoung return 1;
2074 1.1 dyoung }
2075 1.1 dyoung
2076 1.1 dyoung return 0;
2077 1.1 dyoung }
2078 1.1 dyoung
2079 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2080 1.1 dyoung */
2081 1.1 dyoung static void
2082 1.1 dyoung vtw_restart_v4(vestigial_inpcb_t *vp)
2083 1.1 dyoung {
2084 1.1 dyoung vtw_v4_t copy = *(vtw_v4_t*)vp->vtw;
2085 1.1 dyoung vtw_t *vtw;
2086 1.1 dyoung vtw_t *cp = ©.common;
2087 1.1 dyoung vtw_ctl_t *ctl;
2088 1.1 dyoung
2089 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
2090 1.1 dyoung
2091 1.1 dyoung db_trace(KTR_VTW
2092 1.1 dyoung , (vp->vtw, "vtw: restart %A:%P %A:%P"
2093 1.1 dyoung , vp->faddr.v4.s_addr, vp->fport
2094 1.1 dyoung , vp->laddr.v4.s_addr, vp->lport));
2095 1.1 dyoung
2096 1.1 dyoung /* Class might have changed, so have a squiz.
2097 1.1 dyoung */
2098 1.1 dyoung ctl = vtw_control(AF_INET, class_to_msl(cp->msl_class));
2099 1.1 dyoung vtw = vtw_alloc(ctl);
2100 1.1 dyoung
2101 1.1 dyoung if (vtw) {
2102 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
2103 1.1 dyoung
2104 1.1 dyoung /* Safe now to unhash the old entry
2105 1.1 dyoung */
2106 1.1 dyoung vtw_del(vp->ctl, vp->vtw);
2107 1.1 dyoung
2108 1.1 dyoung vtw->snd_nxt = cp->snd_nxt;
2109 1.1 dyoung vtw->rcv_nxt = cp->rcv_nxt;
2110 1.1 dyoung
2111 1.1 dyoung v4->faddr = copy.faddr;
2112 1.1 dyoung v4->laddr = copy.laddr;
2113 1.1 dyoung v4->fport = copy.fport;
2114 1.1 dyoung v4->lport = copy.lport;
2115 1.1 dyoung
2116 1.1 dyoung vtw->reuse_port = cp->reuse_port;
2117 1.1 dyoung vtw->reuse_addr = cp->reuse_addr;
2118 1.1 dyoung vtw->v6only = 0;
2119 1.1 dyoung vtw->uid = cp->uid;
2120 1.1 dyoung
2121 1.1 dyoung vtw_inshash_v4(ctl, vtw);
2122 1.1 dyoung }
2123 1.1 dyoung
2124 1.1 dyoung vp->valid = 0;
2125 1.1 dyoung }
2126 1.1 dyoung
2127 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2128 1.1 dyoung */
2129 1.1 dyoung static void
2130 1.1 dyoung vtw_restart_v6(vestigial_inpcb_t *vp)
2131 1.1 dyoung {
2132 1.1 dyoung vtw_v6_t copy = *(vtw_v6_t*)vp->vtw;
2133 1.1 dyoung vtw_t *vtw;
2134 1.1 dyoung vtw_t *cp = ©.common;
2135 1.1 dyoung vtw_ctl_t *ctl;
2136 1.1 dyoung
2137 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
2138 1.1 dyoung
2139 1.1 dyoung db_trace(KTR_VTW
2140 1.1 dyoung , (vp->vtw, "vtw: restart %6A:%P %6A:%P"
2141 1.1 dyoung , db_store(&vp->faddr.v6, sizeof (vp->faddr.v6))
2142 1.1 dyoung , vp->fport
2143 1.1 dyoung , db_store(&vp->laddr.v6, sizeof (vp->laddr.v6))
2144 1.1 dyoung , vp->lport));
2145 1.1 dyoung
2146 1.1 dyoung /* Class might have changed, so have a squiz.
2147 1.1 dyoung */
2148 1.1 dyoung ctl = vtw_control(AF_INET6, class_to_msl(cp->msl_class));
2149 1.1 dyoung vtw = vtw_alloc(ctl);
2150 1.1 dyoung
2151 1.1 dyoung if (vtw) {
2152 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
2153 1.1 dyoung
2154 1.1 dyoung /* Safe now to unhash the old entry
2155 1.1 dyoung */
2156 1.1 dyoung vtw_del(vp->ctl, vp->vtw);
2157 1.1 dyoung
2158 1.1 dyoung vtw->snd_nxt = cp->snd_nxt;
2159 1.1 dyoung vtw->rcv_nxt = cp->rcv_nxt;
2160 1.1 dyoung
2161 1.1 dyoung v6->faddr = copy.faddr;
2162 1.1 dyoung v6->laddr = copy.laddr;
2163 1.1 dyoung v6->fport = copy.fport;
2164 1.1 dyoung v6->lport = copy.lport;
2165 1.1 dyoung
2166 1.1 dyoung vtw->reuse_port = cp->reuse_port;
2167 1.1 dyoung vtw->reuse_addr = cp->reuse_addr;
2168 1.1 dyoung vtw->v6only = cp->v6only;
2169 1.1 dyoung vtw->uid = cp->uid;
2170 1.1 dyoung
2171 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2172 1.1 dyoung }
2173 1.1 dyoung
2174 1.1 dyoung vp->valid = 0;
2175 1.1 dyoung }
2176 1.1 dyoung
2177 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2178 1.1 dyoung */
2179 1.1 dyoung void
2180 1.1 dyoung vtw_restart(vestigial_inpcb_t *vp)
2181 1.1 dyoung {
2182 1.1 dyoung if (!vp || !vp->valid)
2183 1.1 dyoung return;
2184 1.1 dyoung
2185 1.1 dyoung if (vp->v4)
2186 1.1 dyoung vtw_restart_v4(vp);
2187 1.1 dyoung else
2188 1.1 dyoung vtw_restart_v6(vp);
2189 1.1 dyoung }
2190 1.1 dyoung
2191 1.1 dyoung int
2192 1.7 dyoung sysctl_tcp_vtw_enable(SYSCTLFN_ARGS)
2193 1.7 dyoung {
2194 1.7 dyoung int en, rc;
2195 1.7 dyoung struct sysctlnode node;
2196 1.7 dyoung
2197 1.7 dyoung node = *rnode;
2198 1.7 dyoung en = *(int *)rnode->sysctl_data;
2199 1.7 dyoung node.sysctl_data = &en;
2200 1.7 dyoung
2201 1.7 dyoung rc = sysctl_lookup(SYSCTLFN_CALL(&node));
2202 1.7 dyoung if (rc != 0 || newp == NULL)
2203 1.7 dyoung return rc;
2204 1.7 dyoung
2205 1.7 dyoung if (rnode->sysctl_data != &tcp4_vtw_enable &&
2206 1.7 dyoung rnode->sysctl_data != &tcp6_vtw_enable)
2207 1.7 dyoung rc = ENOENT;
2208 1.7 dyoung else if ((en & 1) == 0)
2209 1.7 dyoung rc = 0;
2210 1.7 dyoung else if (rnode->sysctl_data == &tcp4_vtw_enable)
2211 1.7 dyoung rc = vtw_control_init(AF_INET);
2212 1.7 dyoung else /* rnode->sysctl_data == &tcp6_vtw_enable */
2213 1.7 dyoung rc = vtw_control_init(AF_INET6);
2214 1.7 dyoung
2215 1.7 dyoung if (rc == 0)
2216 1.7 dyoung *(int *)rnode->sysctl_data = en;
2217 1.7 dyoung
2218 1.7 dyoung return rc;
2219 1.7 dyoung }
2220 1.7 dyoung
2221 1.7 dyoung int
2222 1.1 dyoung vtw_earlyinit(void)
2223 1.1 dyoung {
2224 1.5 dyoung int i, rc;
2225 1.1 dyoung
2226 1.5 dyoung callout_init(&vtw_cs, 0);
2227 1.5 dyoung callout_setfunc(&vtw_cs, vtw_tick, 0);
2228 1.1 dyoung
2229 1.5 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2230 1.5 dyoung vtw_tcpv4[i].is_v4 = 1;
2231 1.5 dyoung vtw_tcpv6[i].is_v6 = 1;
2232 1.1 dyoung }
2233 1.1 dyoung
2234 1.7 dyoung if ((tcp4_vtw_enable & 1) != 0 &&
2235 1.7 dyoung (rc = vtw_control_init(AF_INET)) != 0)
2236 1.7 dyoung return rc;
2237 1.7 dyoung
2238 1.7 dyoung if ((tcp6_vtw_enable & 1) != 0 &&
2239 1.1 dyoung (rc = vtw_control_init(AF_INET6)) != 0)
2240 1.1 dyoung return rc;
2241 1.1 dyoung
2242 1.1 dyoung return 0;
2243 1.1 dyoung }
2244 1.1 dyoung
2245 1.1 dyoung #ifdef VTW_DEBUG
2246 1.1 dyoung #include <sys/syscallargs.h>
2247 1.1 dyoung #include <sys/sysctl.h>
2248 1.1 dyoung
2249 1.1 dyoung /*!\brief add lalp, fafp entries for debug
2250 1.1 dyoung */
2251 1.1 dyoung int
2252 1.11 matt vtw_debug_add(int af, sin_either_t *la, sin_either_t *fa, int msl, int msl_class)
2253 1.1 dyoung {
2254 1.1 dyoung vtw_ctl_t *ctl;
2255 1.1 dyoung vtw_t *vtw;
2256 1.1 dyoung
2257 1.11 matt ctl = vtw_control(af, msl ? msl : class_to_msl(msl_class));
2258 1.1 dyoung if (!ctl)
2259 1.1 dyoung return 0;
2260 1.1 dyoung
2261 1.1 dyoung vtw = vtw_alloc(ctl);
2262 1.1 dyoung
2263 1.1 dyoung if (vtw) {
2264 1.1 dyoung vtw->snd_nxt = 0;
2265 1.1 dyoung vtw->rcv_nxt = 0;
2266 1.1 dyoung
2267 1.1 dyoung switch (af) {
2268 1.1 dyoung case AF_INET: {
2269 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
2270 1.1 dyoung
2271 1.1 dyoung v4->faddr = fa->sin_addr.v4.s_addr;
2272 1.1 dyoung v4->laddr = la->sin_addr.v4.s_addr;
2273 1.1 dyoung v4->fport = fa->sin_port;
2274 1.1 dyoung v4->lport = la->sin_port;
2275 1.1 dyoung
2276 1.1 dyoung vtw->reuse_port = 1;
2277 1.1 dyoung vtw->reuse_addr = 1;
2278 1.1 dyoung vtw->v6only = 0;
2279 1.1 dyoung vtw->uid = 0;
2280 1.1 dyoung
2281 1.1 dyoung vtw_inshash_v4(ctl, vtw);
2282 1.1 dyoung break;
2283 1.1 dyoung }
2284 1.1 dyoung
2285 1.1 dyoung case AF_INET6: {
2286 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
2287 1.1 dyoung
2288 1.1 dyoung v6->faddr = fa->sin_addr.v6;
2289 1.1 dyoung v6->laddr = la->sin_addr.v6;
2290 1.1 dyoung
2291 1.1 dyoung v6->fport = fa->sin_port;
2292 1.1 dyoung v6->lport = la->sin_port;
2293 1.1 dyoung
2294 1.1 dyoung vtw->reuse_port = 1;
2295 1.1 dyoung vtw->reuse_addr = 1;
2296 1.1 dyoung vtw->v6only = 0;
2297 1.1 dyoung vtw->uid = 0;
2298 1.1 dyoung
2299 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2300 1.1 dyoung break;
2301 1.1 dyoung }
2302 1.1 dyoung
2303 1.1 dyoung default:
2304 1.1 dyoung break;
2305 1.1 dyoung }
2306 1.1 dyoung
2307 1.1 dyoung return 1;
2308 1.1 dyoung }
2309 1.1 dyoung
2310 1.1 dyoung return 0;
2311 1.1 dyoung }
2312 1.1 dyoung
2313 1.1 dyoung static int vtw_syscall = 0;
2314 1.1 dyoung
2315 1.1 dyoung static int
2316 1.1 dyoung vtw_debug_process(vtw_sysargs_t *ap)
2317 1.1 dyoung {
2318 1.1 dyoung struct vestigial_inpcb vestige;
2319 1.1 dyoung int rc = 0;
2320 1.1 dyoung
2321 1.1 dyoung mutex_enter(softnet_lock);
2322 1.1 dyoung
2323 1.1 dyoung switch (ap->op) {
2324 1.1 dyoung case 0: // insert
2325 1.1 dyoung vtw_debug_add(ap->la.sin_family
2326 1.1 dyoung , &ap->la
2327 1.1 dyoung , &ap->fa
2328 1.1 dyoung , TCPTV_MSL
2329 1.1 dyoung , 0);
2330 1.1 dyoung break;
2331 1.1 dyoung
2332 1.1 dyoung case 1: // lookup
2333 1.1 dyoung case 2: // restart
2334 1.1 dyoung switch (ap->la.sin_family) {
2335 1.1 dyoung case AF_INET:
2336 1.1 dyoung if (tcp_lookup_v4(ap->fa.sin_addr.v4, ap->fa.sin_port,
2337 1.1 dyoung ap->la.sin_addr.v4, ap->la.sin_port,
2338 1.1 dyoung &vestige)) {
2339 1.1 dyoung if (ap->op == 2) {
2340 1.1 dyoung vtw_restart(&vestige);
2341 1.1 dyoung }
2342 1.1 dyoung rc = 0;
2343 1.1 dyoung } else
2344 1.1 dyoung rc = ESRCH;
2345 1.1 dyoung break;
2346 1.1 dyoung
2347 1.1 dyoung case AF_INET6:
2348 1.1 dyoung if (tcp_lookup_v6(&ap->fa.sin_addr.v6, ap->fa.sin_port,
2349 1.1 dyoung &ap->la.sin_addr.v6, ap->la.sin_port,
2350 1.1 dyoung &vestige)) {
2351 1.1 dyoung if (ap->op == 2) {
2352 1.1 dyoung vtw_restart(&vestige);
2353 1.1 dyoung }
2354 1.1 dyoung rc = 0;
2355 1.1 dyoung } else
2356 1.1 dyoung rc = ESRCH;
2357 1.1 dyoung break;
2358 1.1 dyoung default:
2359 1.1 dyoung rc = EINVAL;
2360 1.1 dyoung }
2361 1.1 dyoung break;
2362 1.1 dyoung
2363 1.1 dyoung default:
2364 1.1 dyoung rc = EINVAL;
2365 1.1 dyoung }
2366 1.1 dyoung
2367 1.1 dyoung mutex_exit(softnet_lock);
2368 1.1 dyoung return rc;
2369 1.1 dyoung }
2370 1.1 dyoung
2371 1.1 dyoung struct sys_vtw_args {
2372 1.1 dyoung syscallarg(const vtw_sysargs_t *) req;
2373 1.1 dyoung syscallarg(size_t) len;
2374 1.1 dyoung };
2375 1.1 dyoung
2376 1.1 dyoung static int
2377 1.1 dyoung vtw_sys(struct lwp *l, const void *_, register_t *retval)
2378 1.1 dyoung {
2379 1.1 dyoung const struct sys_vtw_args *uap = _;
2380 1.1 dyoung void *buf;
2381 1.1 dyoung int rc;
2382 1.1 dyoung size_t len = SCARG(uap, len);
2383 1.1 dyoung
2384 1.1 dyoung if (len != sizeof (vtw_sysargs_t))
2385 1.1 dyoung return EINVAL;
2386 1.1 dyoung
2387 1.1 dyoung buf = kmem_alloc(len, KM_SLEEP);
2388 1.1 dyoung if (!buf)
2389 1.1 dyoung return ENOMEM;
2390 1.1 dyoung
2391 1.1 dyoung rc = copyin(SCARG(uap, req), buf, len);
2392 1.1 dyoung if (!rc) {
2393 1.1 dyoung rc = vtw_debug_process(buf);
2394 1.1 dyoung }
2395 1.1 dyoung kmem_free(buf, len);
2396 1.1 dyoung
2397 1.1 dyoung return rc;
2398 1.1 dyoung }
2399 1.1 dyoung
2400 1.1 dyoung static void
2401 1.1 dyoung vtw_sanity_check(void)
2402 1.1 dyoung {
2403 1.1 dyoung vtw_ctl_t *ctl;
2404 1.1 dyoung vtw_t *vtw;
2405 1.1 dyoung int i;
2406 1.1 dyoung int n;
2407 1.1 dyoung
2408 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2409 1.1 dyoung ctl = &vtw_tcpv4[i];
2410 1.1 dyoung
2411 1.1 dyoung if (!ctl->base.v || ctl->nalloc)
2412 1.1 dyoung continue;
2413 1.1 dyoung
2414 1.1 dyoung for (n = 0, vtw = ctl->base.v; ; ) {
2415 1.1 dyoung ++n;
2416 1.1 dyoung vtw = vtw_next(ctl, vtw);
2417 1.1 dyoung if (vtw == ctl->base.v)
2418 1.1 dyoung break;
2419 1.1 dyoung }
2420 1.1 dyoung db_trace(KTR_VTW
2421 1.1 dyoung , (ctl, "sanity: class %x n %x nfree %x"
2422 1.1 dyoung , i, n, ctl->nfree));
2423 1.1 dyoung
2424 1.1 dyoung KASSERT(n == ctl->nfree);
2425 1.1 dyoung }
2426 1.1 dyoung
2427 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2428 1.1 dyoung ctl = &vtw_tcpv6[i];
2429 1.1 dyoung
2430 1.1 dyoung if (!ctl->base.v || ctl->nalloc)
2431 1.1 dyoung continue;
2432 1.1 dyoung
2433 1.1 dyoung for (n = 0, vtw = ctl->base.v; ; ) {
2434 1.1 dyoung ++n;
2435 1.1 dyoung vtw = vtw_next(ctl, vtw);
2436 1.1 dyoung if (vtw == ctl->base.v)
2437 1.1 dyoung break;
2438 1.1 dyoung }
2439 1.1 dyoung db_trace(KTR_VTW
2440 1.1 dyoung , (ctl, "sanity: class %x n %x nfree %x"
2441 1.1 dyoung , i, n, ctl->nfree));
2442 1.1 dyoung KASSERT(n == ctl->nfree);
2443 1.1 dyoung }
2444 1.1 dyoung }
2445 1.1 dyoung
2446 1.1 dyoung /*!\brief Initialise debug support.
2447 1.1 dyoung */
2448 1.1 dyoung static void
2449 1.1 dyoung vtw_debug_init(void)
2450 1.1 dyoung {
2451 1.1 dyoung int i;
2452 1.1 dyoung
2453 1.1 dyoung vtw_sanity_check();
2454 1.1 dyoung
2455 1.1 dyoung if (vtw_syscall)
2456 1.1 dyoung return;
2457 1.1 dyoung
2458 1.1 dyoung for (i = 511; i; --i) {
2459 1.1 dyoung if (sysent[i].sy_call == sys_nosys) {
2460 1.1 dyoung sysent[i].sy_call = vtw_sys;
2461 1.1 dyoung sysent[i].sy_narg = 2;
2462 1.1 dyoung sysent[i].sy_argsize = sizeof (struct sys_vtw_args);
2463 1.1 dyoung sysent[i].sy_flags = 0;
2464 1.1 dyoung
2465 1.1 dyoung vtw_syscall = i;
2466 1.1 dyoung break;
2467 1.1 dyoung }
2468 1.1 dyoung }
2469 1.1 dyoung if (i) {
2470 1.1 dyoung const struct sysctlnode *node;
2471 1.1 dyoung uint32_t flags;
2472 1.1 dyoung
2473 1.1 dyoung flags = sysctl_root.sysctl_flags;
2474 1.1 dyoung
2475 1.1 dyoung sysctl_root.sysctl_flags |= CTLFLAG_READWRITE;
2476 1.1 dyoung sysctl_root.sysctl_flags &= ~CTLFLAG_PERMANENT;
2477 1.1 dyoung
2478 1.1 dyoung sysctl_createv(0, 0, 0, &node,
2479 1.1 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE,
2480 1.1 dyoung "koff",
2481 1.1 dyoung SYSCTL_DESCR("Kernel Obscure Feature Finder"),
2482 1.1 dyoung 0, 0, 0, 0, CTL_CREATE, CTL_EOL);
2483 1.1 dyoung
2484 1.1 dyoung if (!node) {
2485 1.1 dyoung sysctl_createv(0, 0, 0, &node,
2486 1.1 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE,
2487 1.1 dyoung "koffka",
2488 1.1 dyoung SYSCTL_DESCR("The Real(tm) Kernel"
2489 1.1 dyoung " Obscure Feature Finder"),
2490 1.1 dyoung 0, 0, 0, 0, CTL_CREATE, CTL_EOL);
2491 1.1 dyoung }
2492 1.1 dyoung if (node) {
2493 1.1 dyoung sysctl_createv(0, 0, 0, 0,
2494 1.1 dyoung CTLFLAG_PERMANENT|CTLFLAG_READONLY,
2495 1.1 dyoung CTLTYPE_INT, "vtw_debug_syscall",
2496 1.1 dyoung SYSCTL_DESCR("vtw debug"
2497 1.1 dyoung " system call number"),
2498 1.1 dyoung 0, 0, &vtw_syscall, 0, node->sysctl_num,
2499 1.1 dyoung CTL_CREATE, CTL_EOL);
2500 1.1 dyoung }
2501 1.1 dyoung sysctl_root.sysctl_flags = flags;
2502 1.1 dyoung }
2503 1.1 dyoung }
2504 1.1 dyoung #else /* !VTW_DEBUG */
2505 1.1 dyoung static void
2506 1.1 dyoung vtw_debug_init(void)
2507 1.1 dyoung {
2508 1.1 dyoung return;
2509 1.1 dyoung }
2510 1.1 dyoung #endif /* !VTW_DEBUG */
2511