tcp_vtw.c revision 1.22 1 1.1 dyoung /*
2 1.1 dyoung * Copyright (c) 2011 The NetBSD Foundation, Inc.
3 1.1 dyoung * All rights reserved.
4 1.1 dyoung *
5 1.1 dyoung * This code is derived from software contributed to The NetBSD Foundation
6 1.1 dyoung * by Coyote Point Systems, Inc.
7 1.1 dyoung *
8 1.1 dyoung * Redistribution and use in source and binary forms, with or without
9 1.1 dyoung * modification, are permitted provided that the following conditions
10 1.1 dyoung * are met:
11 1.1 dyoung * 1. Redistributions of source code must retain the above copyright
12 1.1 dyoung * notice, this list of conditions and the following disclaimer.
13 1.1 dyoung * 2. Redistributions in binary form must reproduce the above copyright
14 1.1 dyoung * notice, this list of conditions and the following disclaimer in the
15 1.1 dyoung * documentation and/or other materials provided with the distribution.
16 1.1 dyoung *
17 1.1 dyoung * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 1.1 dyoung * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 1.1 dyoung * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 1.1 dyoung * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 1.1 dyoung * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 1.1 dyoung * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 1.1 dyoung * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 1.1 dyoung * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 1.1 dyoung * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 1.1 dyoung * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 1.1 dyoung * POSSIBILITY OF SUCH DAMAGE.
28 1.1 dyoung */
29 1.9 yamt
30 1.9 yamt /*
31 1.9 yamt * Reduces the resources demanded by TCP sessions in TIME_WAIT-state using
32 1.9 yamt * methods called Vestigial Time-Wait (VTW) and Maximum Segment Lifetime
33 1.9 yamt * Truncation (MSLT).
34 1.9 yamt *
35 1.9 yamt * MSLT and VTW were contributed by Coyote Point Systems, Inc.
36 1.9 yamt *
37 1.9 yamt * Even after a TCP session enters the TIME_WAIT state, its corresponding
38 1.9 yamt * socket and protocol control blocks (PCBs) stick around until the TCP
39 1.9 yamt * Maximum Segment Lifetime (MSL) expires. On a host whose workload
40 1.9 yamt * necessarily creates and closes down many TCP sockets, the sockets & PCBs
41 1.9 yamt * for TCP sessions in TIME_WAIT state amount to many megabytes of dead
42 1.9 yamt * weight in RAM.
43 1.9 yamt *
44 1.9 yamt * Maximum Segment Lifetimes Truncation (MSLT) assigns each TCP session to
45 1.9 yamt * a class based on the nearness of the peer. Corresponding to each class
46 1.9 yamt * is an MSL, and a session uses the MSL of its class. The classes are
47 1.9 yamt * loopback (local host equals remote host), local (local host and remote
48 1.9 yamt * host are on the same link/subnet), and remote (local host and remote
49 1.9 yamt * host communicate via one or more gateways). Classes corresponding to
50 1.9 yamt * nearer peers have lower MSLs by default: 2 seconds for loopback, 10
51 1.9 yamt * seconds for local, 60 seconds for remote. Loopback and local sessions
52 1.9 yamt * expire more quickly when MSLT is used.
53 1.9 yamt *
54 1.9 yamt * Vestigial Time-Wait (VTW) replaces a TIME_WAIT session's PCB/socket
55 1.9 yamt * dead weight with a compact representation of the session, called a
56 1.9 yamt * "vestigial PCB". VTW data structures are designed to be very fast and
57 1.9 yamt * memory-efficient: for fast insertion and lookup of vestigial PCBs,
58 1.9 yamt * the PCBs are stored in a hash table that is designed to minimize the
59 1.9 yamt * number of cacheline visits per lookup/insertion. The memory both
60 1.9 yamt * for vestigial PCBs and for elements of the PCB hashtable come from
61 1.9 yamt * fixed-size pools, and linked data structures exploit this to conserve
62 1.9 yamt * memory by representing references with a narrow index/offset from the
63 1.9 yamt * start of a pool instead of a pointer. When space for new vestigial PCBs
64 1.9 yamt * runs out, VTW makes room by discarding old vestigial PCBs, oldest first.
65 1.9 yamt * VTW cooperates with MSLT.
66 1.9 yamt *
67 1.9 yamt * It may help to think of VTW as a "FIN cache" by analogy to the SYN
68 1.9 yamt * cache.
69 1.9 yamt *
70 1.9 yamt * A 2.8-GHz Pentium 4 running a test workload that creates TIME_WAIT
71 1.9 yamt * sessions as fast as it can is approximately 17% idle when VTW is active
72 1.9 yamt * versus 0% idle when VTW is inactive. It has 103 megabytes more free RAM
73 1.9 yamt * when VTW is active (approximately 64k vestigial PCBs are created) than
74 1.9 yamt * when it is inactive.
75 1.9 yamt */
76 1.9 yamt
77 1.1 dyoung #include <sys/cdefs.h>
78 1.1 dyoung
79 1.14 pooka #ifdef _KERNEL_OPT
80 1.1 dyoung #include "opt_ddb.h"
81 1.1 dyoung #include "opt_inet.h"
82 1.1 dyoung #include "opt_inet_csum.h"
83 1.1 dyoung #include "opt_tcp_debug.h"
84 1.14 pooka #endif
85 1.1 dyoung
86 1.1 dyoung #include <sys/param.h>
87 1.1 dyoung #include <sys/systm.h>
88 1.1 dyoung #include <sys/kmem.h>
89 1.1 dyoung #include <sys/mbuf.h>
90 1.1 dyoung #include <sys/protosw.h>
91 1.1 dyoung #include <sys/socket.h>
92 1.1 dyoung #include <sys/socketvar.h>
93 1.1 dyoung #include <sys/errno.h>
94 1.1 dyoung #include <sys/syslog.h>
95 1.1 dyoung #include <sys/pool.h>
96 1.1 dyoung #include <sys/domain.h>
97 1.1 dyoung #include <sys/kernel.h>
98 1.1 dyoung #include <net/if.h>
99 1.1 dyoung #include <net/if_types.h>
100 1.1 dyoung
101 1.1 dyoung #include <netinet/in.h>
102 1.1 dyoung #include <netinet/in_systm.h>
103 1.1 dyoung #include <netinet/ip.h>
104 1.1 dyoung #include <netinet/in_pcb.h>
105 1.1 dyoung #include <netinet/in_var.h>
106 1.1 dyoung #include <netinet/ip_var.h>
107 1.1 dyoung #include <netinet/in_offload.h>
108 1.1 dyoung #include <netinet/ip6.h>
109 1.1 dyoung #include <netinet6/ip6_var.h>
110 1.1 dyoung #include <netinet6/in6_pcb.h>
111 1.1 dyoung #include <netinet6/ip6_var.h>
112 1.1 dyoung #include <netinet6/in6_var.h>
113 1.1 dyoung #include <netinet/icmp6.h>
114 1.1 dyoung
115 1.1 dyoung #include <netinet/tcp.h>
116 1.1 dyoung #include <netinet/tcp_fsm.h>
117 1.1 dyoung #include <netinet/tcp_seq.h>
118 1.1 dyoung #include <netinet/tcp_timer.h>
119 1.1 dyoung #include <netinet/tcp_var.h>
120 1.1 dyoung #include <netinet/tcp_private.h>
121 1.1 dyoung
122 1.1 dyoung #include <netinet/tcp_vtw.h>
123 1.1 dyoung
124 1.22 ozaki __KERNEL_RCSID(0, "$NetBSD: tcp_vtw.c,v 1.22 2022/10/28 05:18:39 ozaki-r Exp $");
125 1.1 dyoung
126 1.1 dyoung #define db_trace(__a, __b) do { } while (/*CONSTCOND*/0)
127 1.1 dyoung
128 1.1 dyoung static void vtw_debug_init(void);
129 1.1 dyoung
130 1.1 dyoung fatp_ctl_t fat_tcpv4;
131 1.1 dyoung fatp_ctl_t fat_tcpv6;
132 1.1 dyoung vtw_ctl_t vtw_tcpv4[VTW_NCLASS];
133 1.1 dyoung vtw_ctl_t vtw_tcpv6[VTW_NCLASS];
134 1.1 dyoung vtw_stats_t vtw_stats;
135 1.1 dyoung
136 1.1 dyoung /* We provide state for the lookup_ports iterator.
137 1.1 dyoung * As currently we are netlock-protected, there is one.
138 1.1 dyoung * If we were finer-grain, we would have one per CPU.
139 1.1 dyoung * I do not want to be in the business of alloc/free.
140 1.1 dyoung * The best alternate would be allocate on the caller's
141 1.1 dyoung * stack, but that would require them to know the struct,
142 1.1 dyoung * or at least the size.
143 1.1 dyoung * See how she goes.
144 1.1 dyoung */
145 1.1 dyoung struct tcp_ports_iterator {
146 1.1 dyoung union {
147 1.1 dyoung struct in_addr v4;
148 1.1 dyoung struct in6_addr v6;
149 1.1 dyoung } addr;
150 1.1 dyoung u_int port;
151 1.1 dyoung
152 1.1 dyoung uint32_t wild : 1;
153 1.1 dyoung
154 1.1 dyoung vtw_ctl_t *ctl;
155 1.1 dyoung fatp_t *fp;
156 1.1 dyoung
157 1.1 dyoung uint16_t slot_idx;
158 1.1 dyoung uint16_t ctl_idx;
159 1.1 dyoung };
160 1.1 dyoung
161 1.1 dyoung static struct tcp_ports_iterator tcp_ports_iterator_v4;
162 1.1 dyoung static struct tcp_ports_iterator tcp_ports_iterator_v6;
163 1.1 dyoung
164 1.1 dyoung static int vtw_age(vtw_ctl_t *, struct timeval *);
165 1.1 dyoung
166 1.1 dyoung /*!\brief allocate a fat pointer from a collection.
167 1.1 dyoung */
168 1.1 dyoung static fatp_t *
169 1.1 dyoung fatp_alloc(fatp_ctl_t *fat)
170 1.1 dyoung {
171 1.1 dyoung fatp_t *fp = 0;
172 1.1 dyoung
173 1.1 dyoung if (fat->nfree) {
174 1.1 dyoung fp = fat->free;
175 1.1 dyoung if (fp) {
176 1.1 dyoung fat->free = fatp_next(fat, fp);
177 1.1 dyoung --fat->nfree;
178 1.1 dyoung ++fat->nalloc;
179 1.1 dyoung fp->nxt = 0;
180 1.1 dyoung
181 1.1 dyoung KASSERT(!fp->inuse);
182 1.1 dyoung }
183 1.1 dyoung }
184 1.1 dyoung
185 1.1 dyoung return fp;
186 1.1 dyoung }
187 1.1 dyoung
188 1.1 dyoung /*!\brief free a fat pointer.
189 1.1 dyoung */
190 1.1 dyoung static void
191 1.1 dyoung fatp_free(fatp_ctl_t *fat, fatp_t *fp)
192 1.1 dyoung {
193 1.1 dyoung if (fp) {
194 1.1 dyoung KASSERT(!fp->inuse);
195 1.1 dyoung KASSERT(!fp->nxt);
196 1.1 dyoung
197 1.1 dyoung fp->nxt = fatp_index(fat, fat->free);
198 1.1 dyoung fat->free = fp;
199 1.1 dyoung
200 1.1 dyoung ++fat->nfree;
201 1.1 dyoung --fat->nalloc;
202 1.1 dyoung }
203 1.1 dyoung }
204 1.1 dyoung
205 1.1 dyoung /*!\brief initialise a collection of fat pointers.
206 1.1 dyoung *
207 1.1 dyoung *\param n # hash buckets
208 1.1 dyoung *\param m total # fat pointers to allocate
209 1.1 dyoung *
210 1.1 dyoung * We allocate 2x as much, as we have two hashes: full and lport only.
211 1.1 dyoung */
212 1.1 dyoung static void
213 1.6 dyoung fatp_init(fatp_ctl_t *fat, uint32_t n, uint32_t m,
214 1.6 dyoung fatp_t *fat_base, fatp_t **fat_hash)
215 1.1 dyoung {
216 1.1 dyoung fatp_t *fp;
217 1.1 dyoung
218 1.1 dyoung KASSERT(n <= FATP_MAX / 2);
219 1.1 dyoung
220 1.6 dyoung fat->hash = fat_hash;
221 1.6 dyoung fat->base = fat_base;
222 1.1 dyoung
223 1.1 dyoung fat->port = &fat->hash[m];
224 1.1 dyoung
225 1.1 dyoung fat->mask = m - 1; // ASSERT is power of 2 (m)
226 1.1 dyoung fat->lim = fat->base + 2*n - 1;
227 1.1 dyoung fat->nfree = 0;
228 1.1 dyoung fat->nalloc = 2*n;
229 1.1 dyoung
230 1.1 dyoung /* Initialise the free list.
231 1.1 dyoung */
232 1.1 dyoung for (fp = fat->lim; fp >= fat->base; --fp) {
233 1.1 dyoung fatp_free(fat, fp);
234 1.1 dyoung }
235 1.1 dyoung }
236 1.1 dyoung
237 1.1 dyoung /*
238 1.1 dyoung * The `xtra' is XORed into the tag stored.
239 1.1 dyoung */
240 1.1 dyoung static uint32_t fatp_xtra[] = {
241 1.1 dyoung 0x11111111,0x22222222,0x33333333,0x44444444,
242 1.1 dyoung 0x55555555,0x66666666,0x77777777,0x88888888,
243 1.1 dyoung 0x12121212,0x21212121,0x34343434,0x43434343,
244 1.1 dyoung 0x56565656,0x65656565,0x78787878,0x87878787,
245 1.1 dyoung 0x11221122,0x22112211,0x33443344,0x44334433,
246 1.1 dyoung 0x55665566,0x66556655,0x77887788,0x88778877,
247 1.1 dyoung 0x11112222,0x22221111,0x33334444,0x44443333,
248 1.1 dyoung 0x55556666,0x66665555,0x77778888,0x88887777,
249 1.1 dyoung };
250 1.1 dyoung
251 1.1 dyoung /*!\brief turn a {fatp_t*,slot} into an integral key.
252 1.1 dyoung *
253 1.1 dyoung * The key can be used to obtain the fatp_t, and the slot,
254 1.1 dyoung * as it directly encodes them.
255 1.1 dyoung */
256 1.1 dyoung static inline uint32_t
257 1.1 dyoung fatp_key(fatp_ctl_t *fat, fatp_t *fp, uint32_t slot)
258 1.1 dyoung {
259 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
260 1.1 dyoung CACHE_LINE_SIZE == 64 ||
261 1.1 dyoung CACHE_LINE_SIZE == 128);
262 1.1 dyoung
263 1.1 dyoung switch (fatp_ntags()) {
264 1.1 dyoung case 7:
265 1.1 dyoung return (fatp_index(fat, fp) << 3) | slot;
266 1.1 dyoung case 15:
267 1.1 dyoung return (fatp_index(fat, fp) << 4) | slot;
268 1.1 dyoung case 31:
269 1.1 dyoung return (fatp_index(fat, fp) << 5) | slot;
270 1.1 dyoung default:
271 1.1 dyoung KASSERT(0 && "no support, for no good reason");
272 1.1 dyoung return ~0;
273 1.1 dyoung }
274 1.1 dyoung }
275 1.1 dyoung
276 1.1 dyoung static inline uint32_t
277 1.1 dyoung fatp_slot_from_key(fatp_ctl_t *fat, uint32_t key)
278 1.1 dyoung {
279 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
280 1.1 dyoung CACHE_LINE_SIZE == 64 ||
281 1.1 dyoung CACHE_LINE_SIZE == 128);
282 1.1 dyoung
283 1.1 dyoung switch (fatp_ntags()) {
284 1.1 dyoung case 7:
285 1.1 dyoung return key & 7;
286 1.1 dyoung case 15:
287 1.1 dyoung return key & 15;
288 1.1 dyoung case 31:
289 1.1 dyoung return key & 31;
290 1.1 dyoung default:
291 1.1 dyoung KASSERT(0 && "no support, for no good reason");
292 1.1 dyoung return ~0;
293 1.1 dyoung }
294 1.1 dyoung }
295 1.1 dyoung
296 1.1 dyoung static inline fatp_t *
297 1.1 dyoung fatp_from_key(fatp_ctl_t *fat, uint32_t key)
298 1.1 dyoung {
299 1.1 dyoung CTASSERT(CACHE_LINE_SIZE == 32 ||
300 1.1 dyoung CACHE_LINE_SIZE == 64 ||
301 1.1 dyoung CACHE_LINE_SIZE == 128);
302 1.1 dyoung
303 1.1 dyoung switch (fatp_ntags()) {
304 1.1 dyoung case 7:
305 1.1 dyoung key >>= 3;
306 1.1 dyoung break;
307 1.1 dyoung case 15:
308 1.1 dyoung key >>= 4;
309 1.1 dyoung break;
310 1.1 dyoung case 31:
311 1.1 dyoung key >>= 5;
312 1.1 dyoung break;
313 1.1 dyoung default:
314 1.1 dyoung KASSERT(0 && "no support, for no good reason");
315 1.1 dyoung return 0;
316 1.1 dyoung }
317 1.1 dyoung
318 1.1 dyoung return key ? fat->base + key - 1 : 0;
319 1.1 dyoung }
320 1.1 dyoung
321 1.1 dyoung static inline uint32_t
322 1.1 dyoung idx_encode(vtw_ctl_t *ctl, uint32_t idx)
323 1.1 dyoung {
324 1.1 dyoung return (idx << ctl->idx_bits) | idx;
325 1.1 dyoung }
326 1.1 dyoung
327 1.1 dyoung static inline uint32_t
328 1.1 dyoung idx_decode(vtw_ctl_t *ctl, uint32_t bits)
329 1.1 dyoung {
330 1.1 dyoung uint32_t idx = bits & ctl->idx_mask;
331 1.1 dyoung
332 1.1 dyoung if (idx_encode(ctl, idx) == bits)
333 1.1 dyoung return idx;
334 1.1 dyoung else
335 1.1 dyoung return ~0;
336 1.1 dyoung }
337 1.1 dyoung
338 1.1 dyoung /*!\brief insert index into fatp hash
339 1.1 dyoung *
340 1.1 dyoung *\param idx - index of element being placed in hash chain
341 1.1 dyoung *\param tag - 32-bit tag identifier
342 1.1 dyoung *
343 1.1 dyoung *\returns
344 1.1 dyoung * value which can be used to locate entry.
345 1.1 dyoung *
346 1.1 dyoung *\note
347 1.1 dyoung * we rely on the fact that there are unused high bits in the index
348 1.1 dyoung * for verification purposes on lookup.
349 1.1 dyoung */
350 1.1 dyoung
351 1.1 dyoung static inline uint32_t
352 1.1 dyoung fatp_vtw_inshash(fatp_ctl_t *fat, uint32_t idx, uint32_t tag, int which,
353 1.1 dyoung void *dbg)
354 1.1 dyoung {
355 1.1 dyoung fatp_t *fp;
356 1.1 dyoung fatp_t **hash = (which ? fat->port : fat->hash);
357 1.1 dyoung int i;
358 1.1 dyoung
359 1.1 dyoung fp = hash[tag & fat->mask];
360 1.1 dyoung
361 1.1 dyoung while (!fp || fatp_full(fp)) {
362 1.1 dyoung fatp_t *fq;
363 1.1 dyoung
364 1.1 dyoung /* All entries are inuse at the top level.
365 1.1 dyoung * We allocate a spare, and push the top level
366 1.1 dyoung * down one. All entries in the fp we push down
367 1.1 dyoung * (think of a tape worm here) will be expelled sooner than
368 1.1 dyoung * any entries added subsequently to this hash bucket.
369 1.1 dyoung * This is a property of the time waits we are exploiting.
370 1.1 dyoung */
371 1.1 dyoung
372 1.1 dyoung fq = fatp_alloc(fat);
373 1.1 dyoung if (!fq) {
374 1.1 dyoung vtw_age(fat->vtw, 0);
375 1.1 dyoung fp = hash[tag & fat->mask];
376 1.1 dyoung continue;
377 1.1 dyoung }
378 1.1 dyoung
379 1.1 dyoung fq->inuse = 0;
380 1.1 dyoung fq->nxt = fatp_index(fat, fp);
381 1.1 dyoung
382 1.1 dyoung hash[tag & fat->mask] = fq;
383 1.1 dyoung
384 1.1 dyoung fp = fq;
385 1.1 dyoung }
386 1.1 dyoung
387 1.1 dyoung KASSERT(!fatp_full(fp));
388 1.1 dyoung
389 1.1 dyoung /* Fill highest index first. Lookup is lowest first.
390 1.1 dyoung */
391 1.1 dyoung for (i = fatp_ntags(); --i >= 0; ) {
392 1.1 dyoung if (!((1 << i) & fp->inuse)) {
393 1.1 dyoung break;
394 1.1 dyoung }
395 1.1 dyoung }
396 1.1 dyoung
397 1.1 dyoung fp->inuse |= 1 << i;
398 1.1 dyoung fp->tag[i] = tag ^ idx_encode(fat->vtw, idx) ^ fatp_xtra[i];
399 1.1 dyoung
400 1.1 dyoung db_trace(KTR_VTW
401 1.1 dyoung , (fp, "fat: inuse %5.5x tag[%x] %8.8x"
402 1.1 dyoung , fp->inuse
403 1.1 dyoung , i, fp->tag[i]));
404 1.1 dyoung
405 1.1 dyoung return fatp_key(fat, fp, i);
406 1.1 dyoung }
407 1.1 dyoung
408 1.1 dyoung static inline int
409 1.1 dyoung vtw_alive(const vtw_t *vtw)
410 1.1 dyoung {
411 1.1 dyoung return vtw->hashed && vtw->expire.tv_sec;
412 1.1 dyoung }
413 1.1 dyoung
414 1.1 dyoung static inline uint32_t
415 1.1 dyoung vtw_index_v4(vtw_ctl_t *ctl, vtw_v4_t *v4)
416 1.1 dyoung {
417 1.1 dyoung if (ctl->base.v4 <= v4 && v4 <= ctl->lim.v4)
418 1.1 dyoung return v4 - ctl->base.v4;
419 1.1 dyoung
420 1.1 dyoung KASSERT(0 && "vtw out of bounds");
421 1.1 dyoung
422 1.1 dyoung return ~0;
423 1.1 dyoung }
424 1.1 dyoung
425 1.1 dyoung static inline uint32_t
426 1.1 dyoung vtw_index_v6(vtw_ctl_t *ctl, vtw_v6_t *v6)
427 1.1 dyoung {
428 1.1 dyoung if (ctl->base.v6 <= v6 && v6 <= ctl->lim.v6)
429 1.1 dyoung return v6 - ctl->base.v6;
430 1.1 dyoung
431 1.1 dyoung KASSERT(0 && "vtw out of bounds");
432 1.1 dyoung
433 1.1 dyoung return ~0;
434 1.1 dyoung }
435 1.1 dyoung
436 1.1 dyoung static inline uint32_t
437 1.1 dyoung vtw_index(vtw_ctl_t *ctl, vtw_t *vtw)
438 1.1 dyoung {
439 1.1 dyoung if (ctl->clidx)
440 1.1 dyoung ctl = ctl->ctl;
441 1.1 dyoung
442 1.1 dyoung if (ctl->is_v4)
443 1.1 dyoung return vtw_index_v4(ctl, (vtw_v4_t *)vtw);
444 1.1 dyoung
445 1.1 dyoung if (ctl->is_v6)
446 1.1 dyoung return vtw_index_v6(ctl, (vtw_v6_t *)vtw);
447 1.1 dyoung
448 1.1 dyoung KASSERT(0 && "neither 4 nor 6. most curious.");
449 1.1 dyoung
450 1.1 dyoung return ~0;
451 1.1 dyoung }
452 1.1 dyoung
453 1.1 dyoung static inline vtw_t *
454 1.1 dyoung vtw_from_index(vtw_ctl_t *ctl, uint32_t idx)
455 1.1 dyoung {
456 1.1 dyoung if (ctl->clidx)
457 1.1 dyoung ctl = ctl->ctl;
458 1.1 dyoung
459 1.1 dyoung /* See if the index looks like it might be an index.
460 1.1 dyoung * Bits on outside of the valid index bits is a give away.
461 1.1 dyoung */
462 1.1 dyoung idx = idx_decode(ctl, idx);
463 1.1 dyoung
464 1.1 dyoung if (idx == ~0) {
465 1.1 dyoung return 0;
466 1.1 dyoung } else if (ctl->is_v4) {
467 1.1 dyoung vtw_v4_t *vtw = ctl->base.v4 + idx;
468 1.1 dyoung
469 1.1 dyoung return (ctl->base.v4 <= vtw && vtw <= ctl->lim.v4)
470 1.1 dyoung ? &vtw->common : 0;
471 1.1 dyoung } else if (ctl->is_v6) {
472 1.1 dyoung vtw_v6_t *vtw = ctl->base.v6 + idx;
473 1.1 dyoung
474 1.1 dyoung return (ctl->base.v6 <= vtw && vtw <= ctl->lim.v6)
475 1.1 dyoung ? &vtw->common : 0;
476 1.1 dyoung } else {
477 1.1 dyoung KASSERT(0 && "badness");
478 1.1 dyoung return 0;
479 1.1 dyoung }
480 1.1 dyoung }
481 1.1 dyoung
482 1.1 dyoung /*!\brief return the next vtw after this one.
483 1.1 dyoung *
484 1.1 dyoung * Due to the differing sizes of the entries in differing
485 1.1 dyoung * arenas, we have to ensure we ++ the correct pointer type.
486 1.1 dyoung *
487 1.1 dyoung * Also handles wrap.
488 1.1 dyoung */
489 1.1 dyoung static inline vtw_t *
490 1.1 dyoung vtw_next(vtw_ctl_t *ctl, vtw_t *vtw)
491 1.1 dyoung {
492 1.1 dyoung if (ctl->is_v4) {
493 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
494 1.1 dyoung
495 1.1 dyoung vtw = &(++v4)->common;
496 1.1 dyoung } else {
497 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
498 1.1 dyoung
499 1.1 dyoung vtw = &(++v6)->common;
500 1.1 dyoung }
501 1.1 dyoung
502 1.1 dyoung if (vtw > ctl->lim.v)
503 1.1 dyoung vtw = ctl->base.v;
504 1.1 dyoung
505 1.1 dyoung return vtw;
506 1.1 dyoung }
507 1.1 dyoung
508 1.1 dyoung /*!\brief remove entry from FATP hash chains
509 1.1 dyoung */
510 1.1 dyoung static inline void
511 1.1 dyoung vtw_unhash(vtw_ctl_t *ctl, vtw_t *vtw)
512 1.1 dyoung {
513 1.1 dyoung fatp_ctl_t *fat = ctl->fat;
514 1.1 dyoung fatp_t *fp;
515 1.1 dyoung uint32_t key = vtw->key;
516 1.1 dyoung uint32_t tag, slot, idx;
517 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
518 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
519 1.1 dyoung
520 1.1 dyoung if (!vtw->hashed) {
521 1.1 dyoung KASSERT(0 && "unhashed");
522 1.1 dyoung return;
523 1.1 dyoung }
524 1.1 dyoung
525 1.1 dyoung if (fat->vtw->is_v4) {
526 1.1 dyoung tag = v4_tag(v4->faddr, v4->fport, v4->laddr, v4->lport);
527 1.1 dyoung } else if (fat->vtw->is_v6) {
528 1.1 dyoung tag = v6_tag(&v6->faddr, v6->fport, &v6->laddr, v6->lport);
529 1.1 dyoung } else {
530 1.1 dyoung tag = 0;
531 1.1 dyoung KASSERT(0 && "not reached");
532 1.1 dyoung }
533 1.1 dyoung
534 1.1 dyoung /* Remove from fat->hash[]
535 1.1 dyoung */
536 1.1 dyoung slot = fatp_slot_from_key(fat, key);
537 1.1 dyoung fp = fatp_from_key(fat, key);
538 1.1 dyoung idx = vtw_index(ctl, vtw);
539 1.1 dyoung
540 1.1 dyoung db_trace(KTR_VTW
541 1.1 dyoung , (fp, "fat: del inuse %5.5x slot %x idx %x key %x tag %x"
542 1.1 dyoung , fp->inuse, slot, idx, key, tag));
543 1.1 dyoung
544 1.1 dyoung KASSERT(fp->inuse & (1 << slot));
545 1.1 dyoung KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
546 1.1 dyoung ^ fatp_xtra[slot]));
547 1.1 dyoung
548 1.1 dyoung if ((fp->inuse & (1 << slot))
549 1.1 dyoung && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
550 1.1 dyoung ^ fatp_xtra[slot])) {
551 1.1 dyoung fp->inuse ^= 1 << slot;
552 1.1 dyoung fp->tag[slot] = 0;
553 1.1 dyoung
554 1.1 dyoung /* When we delete entries, we do not compact. This is
555 1.1 dyoung * due to temporality. We add entries, and they
556 1.1 dyoung * (eventually) expire. Older entries will be further
557 1.1 dyoung * down the chain.
558 1.1 dyoung */
559 1.1 dyoung if (!fp->inuse) {
560 1.1 dyoung uint32_t hi = tag & fat->mask;
561 1.1 dyoung fatp_t *fq = 0;
562 1.1 dyoung fatp_t *fr = fat->hash[hi];
563 1.1 dyoung
564 1.1 dyoung while (fr && fr != fp) {
565 1.1 dyoung fr = fatp_next(fat, fq = fr);
566 1.1 dyoung }
567 1.1 dyoung
568 1.1 dyoung if (fr == fp) {
569 1.1 dyoung if (fq) {
570 1.1 dyoung fq->nxt = fp->nxt;
571 1.1 dyoung fp->nxt = 0;
572 1.1 dyoung fatp_free(fat, fp);
573 1.1 dyoung } else {
574 1.1 dyoung KASSERT(fat->hash[hi] == fp);
575 1.1 dyoung
576 1.1 dyoung if (fp->nxt) {
577 1.1 dyoung fat->hash[hi]
578 1.1 dyoung = fatp_next(fat, fp);
579 1.1 dyoung fp->nxt = 0;
580 1.1 dyoung fatp_free(fat, fp);
581 1.1 dyoung } else {
582 1.1 dyoung /* retain for next use.
583 1.1 dyoung */
584 1.1 dyoung ;
585 1.1 dyoung }
586 1.1 dyoung }
587 1.1 dyoung } else {
588 1.1 dyoung fr = fat->hash[hi];
589 1.1 dyoung
590 1.1 dyoung do {
591 1.1 dyoung db_trace(KTR_VTW
592 1.1 dyoung , (fr
593 1.1 dyoung , "fat:*del inuse %5.5x"
594 1.1 dyoung " nxt %x"
595 1.1 dyoung , fr->inuse, fr->nxt));
596 1.1 dyoung
597 1.1 dyoung fr = fatp_next(fat, fq = fr);
598 1.1 dyoung } while (fr && fr != fp);
599 1.1 dyoung
600 1.1 dyoung KASSERT(0 && "oops");
601 1.1 dyoung }
602 1.1 dyoung }
603 1.1 dyoung vtw->key ^= ~0;
604 1.1 dyoung }
605 1.1 dyoung
606 1.1 dyoung if (fat->vtw->is_v4) {
607 1.1 dyoung tag = v4_port_tag(v4->lport);
608 1.1 dyoung } else if (fat->vtw->is_v6) {
609 1.1 dyoung tag = v6_port_tag(v6->lport);
610 1.1 dyoung }
611 1.1 dyoung
612 1.1 dyoung /* Remove from fat->port[]
613 1.1 dyoung */
614 1.1 dyoung key = vtw->port_key;
615 1.1 dyoung slot = fatp_slot_from_key(fat, key);
616 1.1 dyoung fp = fatp_from_key(fat, key);
617 1.1 dyoung idx = vtw_index(ctl, vtw);
618 1.1 dyoung
619 1.1 dyoung db_trace(KTR_VTW
620 1.1 dyoung , (fp, "fatport: del inuse %5.5x"
621 1.1 dyoung " slot %x idx %x key %x tag %x"
622 1.1 dyoung , fp->inuse, slot, idx, key, tag));
623 1.1 dyoung
624 1.1 dyoung KASSERT(fp->inuse & (1 << slot));
625 1.1 dyoung KASSERT(fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
626 1.1 dyoung ^ fatp_xtra[slot]));
627 1.1 dyoung
628 1.1 dyoung if ((fp->inuse & (1 << slot))
629 1.1 dyoung && fp->tag[slot] == (tag ^ idx_encode(ctl, idx)
630 1.1 dyoung ^ fatp_xtra[slot])) {
631 1.1 dyoung fp->inuse ^= 1 << slot;
632 1.1 dyoung fp->tag[slot] = 0;
633 1.1 dyoung
634 1.1 dyoung if (!fp->inuse) {
635 1.1 dyoung uint32_t hi = tag & fat->mask;
636 1.1 dyoung fatp_t *fq = 0;
637 1.1 dyoung fatp_t *fr = fat->port[hi];
638 1.1 dyoung
639 1.1 dyoung while (fr && fr != fp) {
640 1.1 dyoung fr = fatp_next(fat, fq = fr);
641 1.1 dyoung }
642 1.1 dyoung
643 1.1 dyoung if (fr == fp) {
644 1.1 dyoung if (fq) {
645 1.1 dyoung fq->nxt = fp->nxt;
646 1.1 dyoung fp->nxt = 0;
647 1.1 dyoung fatp_free(fat, fp);
648 1.1 dyoung } else {
649 1.1 dyoung KASSERT(fat->port[hi] == fp);
650 1.1 dyoung
651 1.1 dyoung if (fp->nxt) {
652 1.1 dyoung fat->port[hi]
653 1.1 dyoung = fatp_next(fat, fp);
654 1.1 dyoung fp->nxt = 0;
655 1.1 dyoung fatp_free(fat, fp);
656 1.1 dyoung } else {
657 1.1 dyoung /* retain for next use.
658 1.1 dyoung */
659 1.1 dyoung ;
660 1.1 dyoung }
661 1.1 dyoung }
662 1.1 dyoung }
663 1.1 dyoung }
664 1.1 dyoung vtw->port_key ^= ~0;
665 1.1 dyoung }
666 1.1 dyoung
667 1.1 dyoung vtw->hashed = 0;
668 1.1 dyoung }
669 1.1 dyoung
670 1.1 dyoung /*!\brief remove entry from hash, possibly free.
671 1.1 dyoung */
672 1.1 dyoung void
673 1.1 dyoung vtw_del(vtw_ctl_t *ctl, vtw_t *vtw)
674 1.1 dyoung {
675 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
676 1.1 dyoung
677 1.1 dyoung if (vtw->hashed) {
678 1.1 dyoung ++vtw_stats.del;
679 1.1 dyoung vtw_unhash(ctl, vtw);
680 1.1 dyoung }
681 1.1 dyoung
682 1.1 dyoung /* We only delete the oldest entry.
683 1.1 dyoung */
684 1.1 dyoung if (vtw != ctl->oldest.v)
685 1.1 dyoung return;
686 1.1 dyoung
687 1.1 dyoung --ctl->nalloc;
688 1.1 dyoung ++ctl->nfree;
689 1.1 dyoung
690 1.1 dyoung vtw->expire.tv_sec = 0;
691 1.1 dyoung vtw->expire.tv_usec = ~0;
692 1.1 dyoung
693 1.1 dyoung if (!ctl->nalloc)
694 1.1 dyoung ctl->oldest.v = 0;
695 1.1 dyoung
696 1.1 dyoung ctl->oldest.v = vtw_next(ctl, vtw);
697 1.1 dyoung }
698 1.1 dyoung
699 1.4 dholland /*!\brief insert vestigial timewait in hash chain
700 1.1 dyoung */
701 1.1 dyoung static void
702 1.1 dyoung vtw_inshash_v4(vtw_ctl_t *ctl, vtw_t *vtw)
703 1.1 dyoung {
704 1.1 dyoung uint32_t idx = vtw_index(ctl, vtw);
705 1.1 dyoung uint32_t tag;
706 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
707 1.1 dyoung
708 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
709 1.1 dyoung KASSERT(!vtw->hashed);
710 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
711 1.1 dyoung
712 1.1 dyoung ++vtw_stats.ins;
713 1.1 dyoung
714 1.1 dyoung tag = v4_tag(v4->faddr, v4->fport,
715 1.1 dyoung v4->laddr, v4->lport);
716 1.1 dyoung
717 1.1 dyoung vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
718 1.1 dyoung
719 1.1 dyoung db_trace(KTR_VTW, (ctl
720 1.1 dyoung , "vtw: ins %8.8x:%4.4x %8.8x:%4.4x"
721 1.1 dyoung " tag %8.8x key %8.8x"
722 1.1 dyoung , v4->faddr, v4->fport
723 1.1 dyoung , v4->laddr, v4->lport
724 1.1 dyoung , tag
725 1.1 dyoung , vtw->key));
726 1.1 dyoung
727 1.1 dyoung tag = v4_port_tag(v4->lport);
728 1.1 dyoung vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
729 1.1 dyoung
730 1.1 dyoung db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
731 1.1 dyoung , v4->lport, v4->lport
732 1.1 dyoung , tag
733 1.1 dyoung , vtw->key));
734 1.1 dyoung
735 1.1 dyoung vtw->hashed = 1;
736 1.1 dyoung }
737 1.1 dyoung
738 1.4 dholland /*!\brief insert vestigial timewait in hash chain
739 1.1 dyoung */
740 1.1 dyoung static void
741 1.1 dyoung vtw_inshash_v6(vtw_ctl_t *ctl, vtw_t *vtw)
742 1.1 dyoung {
743 1.1 dyoung uint32_t idx = vtw_index(ctl, vtw);
744 1.1 dyoung uint32_t tag;
745 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
746 1.1 dyoung
747 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
748 1.1 dyoung KASSERT(!vtw->hashed);
749 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
750 1.1 dyoung
751 1.1 dyoung ++vtw_stats.ins;
752 1.1 dyoung
753 1.1 dyoung tag = v6_tag(&v6->faddr, v6->fport,
754 1.1 dyoung &v6->laddr, v6->lport);
755 1.1 dyoung
756 1.1 dyoung vtw->key = fatp_vtw_inshash(ctl->fat, idx, tag, 0, vtw);
757 1.1 dyoung
758 1.1 dyoung tag = v6_port_tag(v6->lport);
759 1.1 dyoung vtw->port_key = fatp_vtw_inshash(ctl->fat, idx, tag, 1, vtw);
760 1.1 dyoung
761 1.1 dyoung db_trace(KTR_VTW, (ctl, "vtw: ins %P - %4.4x tag %8.8x key %8.8x"
762 1.1 dyoung , v6->lport, v6->lport
763 1.1 dyoung , tag
764 1.1 dyoung , vtw->key));
765 1.1 dyoung
766 1.1 dyoung vtw->hashed = 1;
767 1.1 dyoung }
768 1.1 dyoung
769 1.1 dyoung static vtw_t *
770 1.1 dyoung vtw_lookup_hash_v4(vtw_ctl_t *ctl, uint32_t faddr, uint16_t fport
771 1.1 dyoung , uint32_t laddr, uint16_t lport
772 1.1 dyoung , int which)
773 1.1 dyoung {
774 1.1 dyoung vtw_v4_t *v4;
775 1.1 dyoung vtw_t *vtw;
776 1.1 dyoung uint32_t tag;
777 1.1 dyoung fatp_t *fp;
778 1.1 dyoung int i;
779 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
780 1.1 dyoung
781 1.1 dyoung if (!ctl || !ctl->fat)
782 1.1 dyoung return 0;
783 1.1 dyoung
784 1.1 dyoung ++vtw_stats.look[which];
785 1.1 dyoung
786 1.1 dyoung if (which) {
787 1.1 dyoung tag = v4_port_tag(lport);
788 1.1 dyoung fp = ctl->fat->port[tag & ctl->fat->mask];
789 1.1 dyoung } else {
790 1.1 dyoung tag = v4_tag(faddr, fport, laddr, lport);
791 1.1 dyoung fp = ctl->fat->hash[tag & ctl->fat->mask];
792 1.1 dyoung }
793 1.1 dyoung
794 1.1 dyoung while (fp && fp->inuse) {
795 1.1 dyoung uint32_t inuse = fp->inuse;
796 1.1 dyoung
797 1.1 dyoung ++fatps;
798 1.1 dyoung
799 1.1 dyoung for (i = 0; inuse && i < fatp_ntags(); ++i) {
800 1.1 dyoung uint32_t idx;
801 1.1 dyoung
802 1.1 dyoung if (!(inuse & (1 << i)))
803 1.1 dyoung continue;
804 1.1 dyoung
805 1.1 dyoung inuse ^= 1 << i;
806 1.1 dyoung
807 1.1 dyoung ++probes;
808 1.1 dyoung ++vtw_stats.probe[which];
809 1.1 dyoung
810 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
811 1.1 dyoung vtw = vtw_from_index(ctl, idx);
812 1.1 dyoung
813 1.1 dyoung if (!vtw) {
814 1.1 dyoung /* Hopefully fast path.
815 1.1 dyoung */
816 1.1 dyoung db_trace(KTR_VTW
817 1.1 dyoung , (fp, "vtw: fast %A:%P %A:%P"
818 1.1 dyoung " idx %x tag %x"
819 1.1 dyoung , faddr, fport
820 1.1 dyoung , laddr, lport
821 1.1 dyoung , idx, tag));
822 1.1 dyoung continue;
823 1.1 dyoung }
824 1.1 dyoung
825 1.1 dyoung v4 = (void*)vtw;
826 1.1 dyoung
827 1.1 dyoung /* The de-referencing of vtw is what we want to avoid.
828 1.1 dyoung * Losing.
829 1.1 dyoung */
830 1.1 dyoung if (vtw_alive(vtw)
831 1.1 dyoung && ((which ? vtw->port_key : vtw->key)
832 1.1 dyoung == fatp_key(ctl->fat, fp, i))
833 1.1 dyoung && (which
834 1.1 dyoung || (v4->faddr == faddr && v4->laddr == laddr
835 1.1 dyoung && v4->fport == fport))
836 1.1 dyoung && v4->lport == lport) {
837 1.1 dyoung ++vtw_stats.hit[which];
838 1.1 dyoung
839 1.1 dyoung db_trace(KTR_VTW
840 1.1 dyoung , (fp, "vtw: hit %8.8x:%4.4x"
841 1.1 dyoung " %8.8x:%4.4x idx %x key %x"
842 1.1 dyoung , faddr, fport
843 1.1 dyoung , laddr, lport
844 1.1 dyoung , idx_decode(ctl, idx), vtw->key));
845 1.1 dyoung
846 1.1 dyoung KASSERT(vtw->hashed);
847 1.1 dyoung
848 1.1 dyoung goto out;
849 1.1 dyoung }
850 1.1 dyoung ++vtw_stats.losing[which];
851 1.1 dyoung ++losings;
852 1.1 dyoung
853 1.1 dyoung if (vtw_alive(vtw)) {
854 1.1 dyoung db_trace(KTR_VTW
855 1.1 dyoung , (fp, "vtw:!mis %8.8x:%4.4x"
856 1.1 dyoung " %8.8x:%4.4x key %x tag %x"
857 1.1 dyoung , faddr, fport
858 1.1 dyoung , laddr, lport
859 1.1 dyoung , fatp_key(ctl->fat, fp, i)
860 1.1 dyoung , v4_tag(faddr, fport
861 1.1 dyoung , laddr, lport)));
862 1.1 dyoung db_trace(KTR_VTW
863 1.1 dyoung , (vtw, "vtw:!mis %8.8x:%4.4x"
864 1.1 dyoung " %8.8x:%4.4x key %x tag %x"
865 1.1 dyoung , v4->faddr, v4->fport
866 1.1 dyoung , v4->laddr, v4->lport
867 1.1 dyoung , vtw->key
868 1.1 dyoung , v4_tag(v4->faddr, v4->fport
869 1.1 dyoung , v4->laddr, v4->lport)));
870 1.1 dyoung
871 1.1 dyoung if (vtw->key == fatp_key(ctl->fat, fp, i)) {
872 1.1 dyoung db_trace(KTR_VTW
873 1.1 dyoung , (vtw, "vtw:!mis %8.8x:%4.4x"
874 1.1 dyoung " %8.8x:%4.4x key %x"
875 1.1 dyoung " which %x"
876 1.1 dyoung , v4->faddr, v4->fport
877 1.1 dyoung , v4->laddr, v4->lport
878 1.1 dyoung , vtw->key
879 1.1 dyoung , which));
880 1.1 dyoung
881 1.1 dyoung } else {
882 1.1 dyoung db_trace(KTR_VTW
883 1.1 dyoung , (vtw
884 1.1 dyoung , "vtw:!mis"
885 1.1 dyoung " key %8.8x != %8.8x"
886 1.1 dyoung " idx %x i %x which %x"
887 1.1 dyoung , vtw->key
888 1.1 dyoung , fatp_key(ctl->fat, fp, i)
889 1.1 dyoung , idx_decode(ctl, idx)
890 1.1 dyoung , i
891 1.1 dyoung , which));
892 1.1 dyoung }
893 1.1 dyoung } else {
894 1.1 dyoung db_trace(KTR_VTW
895 1.1 dyoung , (fp
896 1.1 dyoung , "vtw:!mis free entry"
897 1.1 dyoung " idx %x vtw %p which %x"
898 1.1 dyoung , idx_decode(ctl, idx)
899 1.1 dyoung , vtw, which));
900 1.1 dyoung }
901 1.1 dyoung }
902 1.1 dyoung
903 1.1 dyoung if (fp->nxt) {
904 1.1 dyoung fp = fatp_next(ctl->fat, fp);
905 1.1 dyoung } else {
906 1.1 dyoung break;
907 1.1 dyoung }
908 1.1 dyoung }
909 1.1 dyoung ++vtw_stats.miss[which];
910 1.1 dyoung vtw = 0;
911 1.1 dyoung out:
912 1.1 dyoung if (fatps > vtw_stats.max_chain[which])
913 1.1 dyoung vtw_stats.max_chain[which] = fatps;
914 1.1 dyoung if (probes > vtw_stats.max_probe[which])
915 1.1 dyoung vtw_stats.max_probe[which] = probes;
916 1.1 dyoung if (losings > vtw_stats.max_loss[which])
917 1.1 dyoung vtw_stats.max_loss[which] = losings;
918 1.1 dyoung
919 1.1 dyoung return vtw;
920 1.1 dyoung }
921 1.1 dyoung
922 1.1 dyoung static vtw_t *
923 1.1 dyoung vtw_lookup_hash_v6(vtw_ctl_t *ctl, const struct in6_addr *faddr, uint16_t fport
924 1.1 dyoung , const struct in6_addr *laddr, uint16_t lport
925 1.1 dyoung , int which)
926 1.1 dyoung {
927 1.1 dyoung vtw_v6_t *v6;
928 1.1 dyoung vtw_t *vtw;
929 1.1 dyoung uint32_t tag;
930 1.1 dyoung fatp_t *fp;
931 1.1 dyoung int i;
932 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
933 1.1 dyoung
934 1.1 dyoung ++vtw_stats.look[which];
935 1.1 dyoung
936 1.1 dyoung if (!ctl || !ctl->fat)
937 1.1 dyoung return 0;
938 1.1 dyoung
939 1.1 dyoung if (which) {
940 1.1 dyoung tag = v6_port_tag(lport);
941 1.1 dyoung fp = ctl->fat->port[tag & ctl->fat->mask];
942 1.1 dyoung } else {
943 1.1 dyoung tag = v6_tag(faddr, fport, laddr, lport);
944 1.1 dyoung fp = ctl->fat->hash[tag & ctl->fat->mask];
945 1.1 dyoung }
946 1.1 dyoung
947 1.1 dyoung while (fp && fp->inuse) {
948 1.1 dyoung uint32_t inuse = fp->inuse;
949 1.1 dyoung
950 1.1 dyoung ++fatps;
951 1.1 dyoung
952 1.1 dyoung for (i = 0; inuse && i < fatp_ntags(); ++i) {
953 1.1 dyoung uint32_t idx;
954 1.1 dyoung
955 1.1 dyoung if (!(inuse & (1 << i)))
956 1.1 dyoung continue;
957 1.1 dyoung
958 1.1 dyoung inuse ^= 1 << i;
959 1.1 dyoung
960 1.1 dyoung ++probes;
961 1.1 dyoung ++vtw_stats.probe[which];
962 1.1 dyoung
963 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
964 1.1 dyoung vtw = vtw_from_index(ctl, idx);
965 1.1 dyoung
966 1.1 dyoung db_trace(KTR_VTW
967 1.1 dyoung , (fp, "probe: %2d %6A:%4.4x %6A:%4.4x idx %x"
968 1.1 dyoung , i
969 1.1 dyoung , db_store(faddr, sizeof (*faddr)), fport
970 1.1 dyoung , db_store(laddr, sizeof (*laddr)), lport
971 1.1 dyoung , idx_decode(ctl, idx)));
972 1.1 dyoung
973 1.1 dyoung if (!vtw) {
974 1.1 dyoung /* Hopefully fast path.
975 1.1 dyoung */
976 1.1 dyoung continue;
977 1.1 dyoung }
978 1.1 dyoung
979 1.1 dyoung v6 = (void*)vtw;
980 1.1 dyoung
981 1.1 dyoung if (vtw_alive(vtw)
982 1.1 dyoung && ((which ? vtw->port_key : vtw->key)
983 1.1 dyoung == fatp_key(ctl->fat, fp, i))
984 1.1 dyoung && v6->lport == lport
985 1.1 dyoung && (which
986 1.1 dyoung || (v6->fport == fport
987 1.1 dyoung && !bcmp(&v6->faddr, faddr, sizeof (*faddr))
988 1.1 dyoung && !bcmp(&v6->laddr, laddr
989 1.1 dyoung , sizeof (*laddr))))) {
990 1.1 dyoung ++vtw_stats.hit[which];
991 1.1 dyoung
992 1.1 dyoung KASSERT(vtw->hashed);
993 1.1 dyoung goto out;
994 1.1 dyoung } else {
995 1.1 dyoung ++vtw_stats.losing[which];
996 1.1 dyoung ++losings;
997 1.1 dyoung }
998 1.1 dyoung }
999 1.1 dyoung
1000 1.1 dyoung if (fp->nxt) {
1001 1.1 dyoung fp = fatp_next(ctl->fat, fp);
1002 1.1 dyoung } else {
1003 1.1 dyoung break;
1004 1.1 dyoung }
1005 1.1 dyoung }
1006 1.1 dyoung ++vtw_stats.miss[which];
1007 1.1 dyoung vtw = 0;
1008 1.1 dyoung out:
1009 1.1 dyoung if (fatps > vtw_stats.max_chain[which])
1010 1.1 dyoung vtw_stats.max_chain[which] = fatps;
1011 1.1 dyoung if (probes > vtw_stats.max_probe[which])
1012 1.1 dyoung vtw_stats.max_probe[which] = probes;
1013 1.1 dyoung if (losings > vtw_stats.max_loss[which])
1014 1.1 dyoung vtw_stats.max_loss[which] = losings;
1015 1.1 dyoung
1016 1.1 dyoung return vtw;
1017 1.1 dyoung }
1018 1.1 dyoung
1019 1.1 dyoung /*!\brief port iterator
1020 1.1 dyoung */
1021 1.1 dyoung static vtw_t *
1022 1.1 dyoung vtw_next_port_v4(struct tcp_ports_iterator *it)
1023 1.1 dyoung {
1024 1.1 dyoung vtw_ctl_t *ctl = it->ctl;
1025 1.1 dyoung vtw_v4_t *v4;
1026 1.1 dyoung vtw_t *vtw;
1027 1.1 dyoung uint32_t tag;
1028 1.1 dyoung uint16_t lport = it->port;
1029 1.1 dyoung fatp_t *fp;
1030 1.1 dyoung int i;
1031 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
1032 1.1 dyoung
1033 1.1 dyoung tag = v4_port_tag(lport);
1034 1.1 dyoung if (!it->fp) {
1035 1.1 dyoung it->fp = ctl->fat->port[tag & ctl->fat->mask];
1036 1.1 dyoung it->slot_idx = 0;
1037 1.1 dyoung }
1038 1.1 dyoung fp = it->fp;
1039 1.1 dyoung
1040 1.1 dyoung while (fp) {
1041 1.1 dyoung uint32_t inuse = fp->inuse;
1042 1.1 dyoung
1043 1.1 dyoung ++fatps;
1044 1.1 dyoung
1045 1.1 dyoung for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
1046 1.1 dyoung uint32_t idx;
1047 1.1 dyoung
1048 1.1 dyoung if (!(inuse & (1 << i)))
1049 1.1 dyoung continue;
1050 1.1 dyoung
1051 1.16 martin inuse &= ~0U << i;
1052 1.1 dyoung
1053 1.1 dyoung if (i < it->slot_idx)
1054 1.1 dyoung continue;
1055 1.1 dyoung
1056 1.1 dyoung ++vtw_stats.probe[1];
1057 1.1 dyoung ++probes;
1058 1.1 dyoung
1059 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
1060 1.1 dyoung vtw = vtw_from_index(ctl, idx);
1061 1.1 dyoung
1062 1.1 dyoung if (!vtw) {
1063 1.1 dyoung /* Hopefully fast path.
1064 1.1 dyoung */
1065 1.1 dyoung continue;
1066 1.1 dyoung }
1067 1.1 dyoung
1068 1.1 dyoung v4 = (void*)vtw;
1069 1.1 dyoung
1070 1.1 dyoung if (vtw_alive(vtw)
1071 1.1 dyoung && vtw->port_key == fatp_key(ctl->fat, fp, i)
1072 1.1 dyoung && v4->lport == lport) {
1073 1.1 dyoung ++vtw_stats.hit[1];
1074 1.1 dyoung
1075 1.1 dyoung it->slot_idx = i + 1;
1076 1.1 dyoung
1077 1.1 dyoung goto out;
1078 1.1 dyoung } else if (vtw_alive(vtw)) {
1079 1.1 dyoung ++vtw_stats.losing[1];
1080 1.1 dyoung ++losings;
1081 1.1 dyoung
1082 1.1 dyoung db_trace(KTR_VTW
1083 1.1 dyoung , (vtw, "vtw:!mis"
1084 1.1 dyoung " port %8.8x:%4.4x %8.8x:%4.4x"
1085 1.1 dyoung " key %x port %x"
1086 1.1 dyoung , v4->faddr, v4->fport
1087 1.1 dyoung , v4->laddr, v4->lport
1088 1.1 dyoung , vtw->key
1089 1.1 dyoung , lport));
1090 1.1 dyoung } else {
1091 1.1 dyoung /* Really losing here. We are coming
1092 1.1 dyoung * up with references to free entries.
1093 1.1 dyoung * Might find it better to use
1094 1.1 dyoung * traditional, or need another
1095 1.1 dyoung * add-hockery. The other add-hockery
1096 1.1 dyoung * would be to pul more into into the
1097 1.1 dyoung * cache line to reject the false
1098 1.1 dyoung * hits.
1099 1.1 dyoung */
1100 1.1 dyoung ++vtw_stats.losing[1];
1101 1.1 dyoung ++losings;
1102 1.1 dyoung db_trace(KTR_VTW
1103 1.1 dyoung , (fp, "vtw:!mis port %x"
1104 1.1 dyoung " - free entry idx %x vtw %p"
1105 1.1 dyoung , lport
1106 1.1 dyoung , idx_decode(ctl, idx)
1107 1.1 dyoung , vtw));
1108 1.1 dyoung }
1109 1.1 dyoung }
1110 1.1 dyoung
1111 1.1 dyoung if (fp->nxt) {
1112 1.1 dyoung it->fp = fp = fatp_next(ctl->fat, fp);
1113 1.1 dyoung it->slot_idx = 0;
1114 1.1 dyoung } else {
1115 1.1 dyoung it->fp = 0;
1116 1.1 dyoung break;
1117 1.1 dyoung }
1118 1.1 dyoung }
1119 1.1 dyoung ++vtw_stats.miss[1];
1120 1.1 dyoung
1121 1.1 dyoung vtw = 0;
1122 1.1 dyoung out:
1123 1.1 dyoung if (fatps > vtw_stats.max_chain[1])
1124 1.1 dyoung vtw_stats.max_chain[1] = fatps;
1125 1.1 dyoung if (probes > vtw_stats.max_probe[1])
1126 1.1 dyoung vtw_stats.max_probe[1] = probes;
1127 1.1 dyoung if (losings > vtw_stats.max_loss[1])
1128 1.1 dyoung vtw_stats.max_loss[1] = losings;
1129 1.1 dyoung
1130 1.1 dyoung return vtw;
1131 1.1 dyoung }
1132 1.1 dyoung
1133 1.1 dyoung /*!\brief port iterator
1134 1.1 dyoung */
1135 1.1 dyoung static vtw_t *
1136 1.1 dyoung vtw_next_port_v6(struct tcp_ports_iterator *it)
1137 1.1 dyoung {
1138 1.1 dyoung vtw_ctl_t *ctl = it->ctl;
1139 1.1 dyoung vtw_v6_t *v6;
1140 1.1 dyoung vtw_t *vtw;
1141 1.1 dyoung uint32_t tag;
1142 1.1 dyoung uint16_t lport = it->port;
1143 1.1 dyoung fatp_t *fp;
1144 1.1 dyoung int i;
1145 1.1 dyoung uint32_t fatps = 0, probes = 0, losings = 0;
1146 1.1 dyoung
1147 1.1 dyoung tag = v6_port_tag(lport);
1148 1.1 dyoung if (!it->fp) {
1149 1.1 dyoung it->fp = ctl->fat->port[tag & ctl->fat->mask];
1150 1.1 dyoung it->slot_idx = 0;
1151 1.1 dyoung }
1152 1.1 dyoung fp = it->fp;
1153 1.1 dyoung
1154 1.1 dyoung while (fp) {
1155 1.1 dyoung uint32_t inuse = fp->inuse;
1156 1.1 dyoung
1157 1.1 dyoung ++fatps;
1158 1.1 dyoung
1159 1.1 dyoung for (i = it->slot_idx; inuse && i < fatp_ntags(); ++i) {
1160 1.1 dyoung uint32_t idx;
1161 1.1 dyoung
1162 1.1 dyoung if (!(inuse & (1 << i)))
1163 1.1 dyoung continue;
1164 1.1 dyoung
1165 1.16 martin inuse &= ~0U << i;
1166 1.1 dyoung
1167 1.1 dyoung if (i < it->slot_idx)
1168 1.1 dyoung continue;
1169 1.1 dyoung
1170 1.1 dyoung ++vtw_stats.probe[1];
1171 1.1 dyoung ++probes;
1172 1.1 dyoung
1173 1.1 dyoung idx = fp->tag[i] ^ tag ^ fatp_xtra[i];
1174 1.1 dyoung vtw = vtw_from_index(ctl, idx);
1175 1.1 dyoung
1176 1.1 dyoung if (!vtw) {
1177 1.1 dyoung /* Hopefully fast path.
1178 1.1 dyoung */
1179 1.1 dyoung continue;
1180 1.1 dyoung }
1181 1.1 dyoung
1182 1.1 dyoung v6 = (void*)vtw;
1183 1.1 dyoung
1184 1.1 dyoung db_trace(KTR_VTW
1185 1.1 dyoung , (vtw, "vtw: i %x idx %x fp->tag %x"
1186 1.1 dyoung " tag %x xtra %x"
1187 1.1 dyoung , i, idx_decode(ctl, idx)
1188 1.1 dyoung , fp->tag[i], tag, fatp_xtra[i]));
1189 1.1 dyoung
1190 1.1 dyoung if (vtw_alive(vtw)
1191 1.1 dyoung && vtw->port_key == fatp_key(ctl->fat, fp, i)
1192 1.1 dyoung && v6->lport == lport) {
1193 1.1 dyoung ++vtw_stats.hit[1];
1194 1.1 dyoung
1195 1.1 dyoung db_trace(KTR_VTW
1196 1.1 dyoung , (fp, "vtw: nxt port %P - %4.4x"
1197 1.1 dyoung " idx %x key %x"
1198 1.1 dyoung , lport, lport
1199 1.1 dyoung , idx_decode(ctl, idx), vtw->key));
1200 1.1 dyoung
1201 1.1 dyoung it->slot_idx = i + 1;
1202 1.1 dyoung goto out;
1203 1.1 dyoung } else if (vtw_alive(vtw)) {
1204 1.1 dyoung ++vtw_stats.losing[1];
1205 1.1 dyoung
1206 1.1 dyoung db_trace(KTR_VTW
1207 1.1 dyoung , (vtw, "vtw:!mis port %6A:%4.4x"
1208 1.1 dyoung " %6A:%4.4x key %x port %x"
1209 1.1 dyoung , db_store(&v6->faddr
1210 1.1 dyoung , sizeof (v6->faddr))
1211 1.1 dyoung , v6->fport
1212 1.1 dyoung , db_store(&v6->laddr
1213 1.1 dyoung , sizeof (v6->faddr))
1214 1.1 dyoung , v6->lport
1215 1.1 dyoung , vtw->key
1216 1.1 dyoung , lport));
1217 1.1 dyoung } else {
1218 1.1 dyoung /* Really losing here. We are coming
1219 1.1 dyoung * up with references to free entries.
1220 1.1 dyoung * Might find it better to use
1221 1.1 dyoung * traditional, or need another
1222 1.1 dyoung * add-hockery. The other add-hockery
1223 1.1 dyoung * would be to pul more into into the
1224 1.1 dyoung * cache line to reject the false
1225 1.1 dyoung * hits.
1226 1.1 dyoung */
1227 1.1 dyoung ++vtw_stats.losing[1];
1228 1.1 dyoung ++losings;
1229 1.1 dyoung
1230 1.1 dyoung db_trace(KTR_VTW
1231 1.1 dyoung , (fp
1232 1.1 dyoung , "vtw:!mis port %x"
1233 1.1 dyoung " - free entry idx %x vtw %p"
1234 1.1 dyoung , lport, idx_decode(ctl, idx)
1235 1.1 dyoung , vtw));
1236 1.1 dyoung }
1237 1.1 dyoung }
1238 1.1 dyoung
1239 1.1 dyoung if (fp->nxt) {
1240 1.1 dyoung it->fp = fp = fatp_next(ctl->fat, fp);
1241 1.1 dyoung it->slot_idx = 0;
1242 1.1 dyoung } else {
1243 1.1 dyoung it->fp = 0;
1244 1.1 dyoung break;
1245 1.1 dyoung }
1246 1.1 dyoung }
1247 1.1 dyoung ++vtw_stats.miss[1];
1248 1.1 dyoung
1249 1.1 dyoung vtw = 0;
1250 1.1 dyoung out:
1251 1.1 dyoung if (fatps > vtw_stats.max_chain[1])
1252 1.1 dyoung vtw_stats.max_chain[1] = fatps;
1253 1.1 dyoung if (probes > vtw_stats.max_probe[1])
1254 1.1 dyoung vtw_stats.max_probe[1] = probes;
1255 1.1 dyoung if (losings > vtw_stats.max_loss[1])
1256 1.1 dyoung vtw_stats.max_loss[1] = losings;
1257 1.1 dyoung
1258 1.1 dyoung return vtw;
1259 1.1 dyoung }
1260 1.1 dyoung
1261 1.1 dyoung /*!\brief initialise the VTW allocation arena
1262 1.1 dyoung *
1263 1.1 dyoung * There are 1+3 allocation classes:
1264 1.1 dyoung * 0 classless
1265 1.1 dyoung * {1,2,3} MSL-class based allocation
1266 1.1 dyoung *
1267 1.1 dyoung * The allocation arenas are all initialised. Classless gets all the
1268 1.1 dyoung * space. MSL-class based divides the arena, so that allocation
1269 1.1 dyoung * within a class can proceed without having to consider entries
1270 1.1 dyoung * (aka: cache lines) from different classes.
1271 1.1 dyoung *
1272 1.1 dyoung * Usually, we are completely classless or class-based, but there can be
1273 1.1 dyoung * transition periods, corresponding to dynamic adjustments in the config
1274 1.1 dyoung * by the operator.
1275 1.1 dyoung */
1276 1.1 dyoung static void
1277 1.6 dyoung vtw_init(fatp_ctl_t *fat, vtw_ctl_t *ctl, const uint32_t n, vtw_t *ctl_base_v)
1278 1.1 dyoung {
1279 1.6 dyoung int class_n, i;
1280 1.6 dyoung vtw_t *base;
1281 1.1 dyoung
1282 1.6 dyoung ctl->base.v = ctl_base_v;
1283 1.1 dyoung
1284 1.6 dyoung if (ctl->is_v4) {
1285 1.6 dyoung ctl->lim.v4 = ctl->base.v4 + n - 1;
1286 1.6 dyoung ctl->alloc.v4 = ctl->base.v4;
1287 1.6 dyoung } else {
1288 1.6 dyoung ctl->lim.v6 = ctl->base.v6 + n - 1;
1289 1.6 dyoung ctl->alloc.v6 = ctl->base.v6;
1290 1.6 dyoung }
1291 1.1 dyoung
1292 1.6 dyoung ctl->nfree = n;
1293 1.6 dyoung ctl->ctl = ctl;
1294 1.1 dyoung
1295 1.6 dyoung ctl->idx_bits = 32;
1296 1.6 dyoung for (ctl->idx_mask = ~0; (ctl->idx_mask & (n-1)) == n-1; ) {
1297 1.6 dyoung ctl->idx_mask >>= 1;
1298 1.6 dyoung ctl->idx_bits -= 1;
1299 1.6 dyoung }
1300 1.1 dyoung
1301 1.6 dyoung ctl->idx_mask <<= 1;
1302 1.6 dyoung ctl->idx_mask |= 1;
1303 1.6 dyoung ctl->idx_bits += 1;
1304 1.1 dyoung
1305 1.6 dyoung ctl->fat = fat;
1306 1.6 dyoung fat->vtw = ctl;
1307 1.1 dyoung
1308 1.6 dyoung /* Divide the resources equally amongst the classes.
1309 1.6 dyoung * This is not optimal, as the different classes
1310 1.6 dyoung * arrive and leave at different rates, but it is
1311 1.6 dyoung * the best I can do for now.
1312 1.6 dyoung */
1313 1.6 dyoung class_n = n / (VTW_NCLASS-1);
1314 1.6 dyoung base = ctl->base.v;
1315 1.1 dyoung
1316 1.6 dyoung for (i = 1; i < VTW_NCLASS; ++i) {
1317 1.6 dyoung int j;
1318 1.1 dyoung
1319 1.6 dyoung ctl[i] = ctl[0];
1320 1.6 dyoung ctl[i].clidx = i;
1321 1.1 dyoung
1322 1.6 dyoung ctl[i].base.v = base;
1323 1.6 dyoung ctl[i].alloc = ctl[i].base;
1324 1.1 dyoung
1325 1.6 dyoung for (j = 0; j < class_n - 1; ++j) {
1326 1.6 dyoung if (tcp_msl_enable)
1327 1.6 dyoung base->msl_class = i;
1328 1.1 dyoung base = vtw_next(ctl, base);
1329 1.1 dyoung }
1330 1.6 dyoung
1331 1.6 dyoung ctl[i].lim.v = base;
1332 1.6 dyoung base = vtw_next(ctl, base);
1333 1.6 dyoung ctl[i].nfree = class_n;
1334 1.1 dyoung }
1335 1.1 dyoung
1336 1.1 dyoung vtw_debug_init();
1337 1.1 dyoung }
1338 1.1 dyoung
1339 1.1 dyoung /*!\brief map class to TCP MSL
1340 1.1 dyoung */
1341 1.1 dyoung static inline uint32_t
1342 1.11 matt class_to_msl(int msl_class)
1343 1.1 dyoung {
1344 1.11 matt switch (msl_class) {
1345 1.1 dyoung case 0:
1346 1.1 dyoung case 1:
1347 1.1 dyoung return tcp_msl_remote ? tcp_msl_remote : (TCPTV_MSL >> 0);
1348 1.1 dyoung case 2:
1349 1.1 dyoung return tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1);
1350 1.1 dyoung default:
1351 1.1 dyoung return tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2);
1352 1.1 dyoung }
1353 1.1 dyoung }
1354 1.1 dyoung
1355 1.1 dyoung /*!\brief map TCP MSL to class
1356 1.1 dyoung */
1357 1.1 dyoung static inline uint32_t
1358 1.1 dyoung msl_to_class(int msl)
1359 1.1 dyoung {
1360 1.1 dyoung if (tcp_msl_enable) {
1361 1.1 dyoung if (msl <= (tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2)))
1362 1.1 dyoung return 1+2;
1363 1.1 dyoung if (msl <= (tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1)))
1364 1.1 dyoung return 1+1;
1365 1.1 dyoung return 1;
1366 1.1 dyoung }
1367 1.1 dyoung return 0;
1368 1.1 dyoung }
1369 1.1 dyoung
1370 1.1 dyoung /*!\brief allocate a vtw entry
1371 1.1 dyoung */
1372 1.1 dyoung static inline vtw_t *
1373 1.1 dyoung vtw_alloc(vtw_ctl_t *ctl)
1374 1.1 dyoung {
1375 1.1 dyoung vtw_t *vtw = 0;
1376 1.1 dyoung int stuck = 0;
1377 1.1 dyoung int avail = ctl ? (ctl->nalloc + ctl->nfree) : 0;
1378 1.1 dyoung int msl;
1379 1.1 dyoung
1380 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
1381 1.1 dyoung
1382 1.1 dyoung /* If no resources, we will not get far.
1383 1.1 dyoung */
1384 1.1 dyoung if (!ctl || !ctl->base.v4 || avail <= 0)
1385 1.1 dyoung return 0;
1386 1.1 dyoung
1387 1.1 dyoung /* Obtain a free one.
1388 1.1 dyoung */
1389 1.1 dyoung while (!ctl->nfree) {
1390 1.1 dyoung vtw_age(ctl, 0);
1391 1.1 dyoung
1392 1.1 dyoung if (++stuck > avail) {
1393 1.1 dyoung /* When in transition between
1394 1.1 dyoung * schemes (classless, classed) we
1395 1.1 dyoung * can be stuck having to await the
1396 1.1 dyoung * expiration of cross-allocated entries.
1397 1.1 dyoung *
1398 1.1 dyoung * Returning zero means we will fall back to the
1399 1.1 dyoung * traditional TIME_WAIT handling, except in the
1400 1.1 dyoung * case of a re-shed, in which case we cannot
1401 1.1 dyoung * perform the reshecd, but will retain the extant
1402 1.1 dyoung * entry.
1403 1.1 dyoung */
1404 1.1 dyoung db_trace(KTR_VTW
1405 1.1 dyoung , (ctl, "vtw:!none free in class %x %x/%x"
1406 1.1 dyoung , ctl->clidx
1407 1.1 dyoung , ctl->nalloc, ctl->nfree));
1408 1.1 dyoung
1409 1.1 dyoung return 0;
1410 1.1 dyoung }
1411 1.1 dyoung }
1412 1.1 dyoung
1413 1.1 dyoung vtw = ctl->alloc.v;
1414 1.1 dyoung
1415 1.1 dyoung if (vtw->msl_class != ctl->clidx) {
1416 1.1 dyoung /* Usurping rules:
1417 1.1 dyoung * 0 -> {1,2,3} or {1,2,3} -> 0
1418 1.1 dyoung */
1419 1.1 dyoung KASSERT(!vtw->msl_class || !ctl->clidx);
1420 1.1 dyoung
1421 1.1 dyoung if (vtw->hashed || vtw->expire.tv_sec) {
1422 1.1 dyoung /* As this is owned by some other class,
1423 1.1 dyoung * we must wait for it to expire it.
1424 1.1 dyoung * This will only happen on class/classless
1425 1.1 dyoung * transitions, which are guaranteed to progress
1426 1.1 dyoung * to completion in small finite time, barring bugs.
1427 1.1 dyoung */
1428 1.1 dyoung db_trace(KTR_VTW
1429 1.1 dyoung , (ctl, "vtw:!%p class %x!=%x %x:%x%s"
1430 1.1 dyoung , vtw, vtw->msl_class, ctl->clidx
1431 1.1 dyoung , vtw->expire.tv_sec
1432 1.1 dyoung , vtw->expire.tv_usec
1433 1.1 dyoung , vtw->hashed ? " hashed" : ""));
1434 1.1 dyoung
1435 1.1 dyoung return 0;
1436 1.1 dyoung }
1437 1.1 dyoung
1438 1.1 dyoung db_trace(KTR_VTW
1439 1.1 dyoung , (ctl, "vtw:!%p usurped from %x to %x"
1440 1.1 dyoung , vtw, vtw->msl_class, ctl->clidx));
1441 1.1 dyoung
1442 1.1 dyoung vtw->msl_class = ctl->clidx;
1443 1.1 dyoung }
1444 1.1 dyoung
1445 1.1 dyoung if (vtw_alive(vtw)) {
1446 1.1 dyoung KASSERT(0 && "next free not free");
1447 1.1 dyoung return 0;
1448 1.1 dyoung }
1449 1.1 dyoung
1450 1.21 andvar /* Advance allocation pointer.
1451 1.1 dyoung */
1452 1.1 dyoung ctl->alloc.v = vtw_next(ctl, vtw);
1453 1.1 dyoung
1454 1.1 dyoung --ctl->nfree;
1455 1.1 dyoung ++ctl->nalloc;
1456 1.1 dyoung
1457 1.1 dyoung msl = (2 * class_to_msl(ctl->clidx) * 1000) / PR_SLOWHZ; // msec
1458 1.1 dyoung
1459 1.1 dyoung /* mark expiration
1460 1.1 dyoung */
1461 1.3 drochner getmicrouptime(&vtw->expire);
1462 1.1 dyoung
1463 1.1 dyoung /* Move expiration into the future.
1464 1.1 dyoung */
1465 1.1 dyoung vtw->expire.tv_sec += msl / 1000;
1466 1.1 dyoung vtw->expire.tv_usec += 1000 * (msl % 1000);
1467 1.1 dyoung
1468 1.1 dyoung while (vtw->expire.tv_usec >= 1000*1000) {
1469 1.1 dyoung vtw->expire.tv_usec -= 1000*1000;
1470 1.1 dyoung vtw->expire.tv_sec += 1;
1471 1.1 dyoung }
1472 1.1 dyoung
1473 1.1 dyoung if (!ctl->oldest.v)
1474 1.1 dyoung ctl->oldest.v = vtw;
1475 1.1 dyoung
1476 1.1 dyoung return vtw;
1477 1.1 dyoung }
1478 1.1 dyoung
1479 1.1 dyoung /*!\brief expiration
1480 1.1 dyoung */
1481 1.1 dyoung static int
1482 1.1 dyoung vtw_age(vtw_ctl_t *ctl, struct timeval *_when)
1483 1.1 dyoung {
1484 1.1 dyoung vtw_t *vtw;
1485 1.1 dyoung struct timeval then, *when = _when;
1486 1.1 dyoung int maxtries = 0;
1487 1.1 dyoung
1488 1.1 dyoung if (!ctl->oldest.v) {
1489 1.1 dyoung KASSERT(!ctl->nalloc);
1490 1.1 dyoung return 0;
1491 1.1 dyoung }
1492 1.1 dyoung
1493 1.1 dyoung for (vtw = ctl->oldest.v; vtw && ctl->nalloc; ) {
1494 1.1 dyoung if (++maxtries > ctl->nalloc)
1495 1.1 dyoung break;
1496 1.1 dyoung
1497 1.1 dyoung if (vtw->msl_class != ctl->clidx) {
1498 1.1 dyoung db_trace(KTR_VTW
1499 1.1 dyoung , (vtw, "vtw:!age class mismatch %x != %x"
1500 1.1 dyoung , vtw->msl_class, ctl->clidx));
1501 1.1 dyoung /* XXXX
1502 1.1 dyoung * See if the appropriate action is to skip to the next.
1503 1.1 dyoung * XXXX
1504 1.1 dyoung */
1505 1.1 dyoung ctl->oldest.v = vtw = vtw_next(ctl, vtw);
1506 1.1 dyoung continue;
1507 1.1 dyoung }
1508 1.1 dyoung if (!when) {
1509 1.1 dyoung /* Latch oldest timeval if none specified.
1510 1.1 dyoung */
1511 1.1 dyoung then = vtw->expire;
1512 1.1 dyoung when = &then;
1513 1.1 dyoung }
1514 1.1 dyoung
1515 1.1 dyoung if (!timercmp(&vtw->expire, when, <=))
1516 1.1 dyoung break;
1517 1.1 dyoung
1518 1.1 dyoung db_trace(KTR_VTW
1519 1.1 dyoung , (vtw, "vtw: expire %x %8.8x:%8.8x %x/%x"
1520 1.1 dyoung , ctl->clidx
1521 1.1 dyoung , vtw->expire.tv_sec
1522 1.1 dyoung , vtw->expire.tv_usec
1523 1.1 dyoung , ctl->nalloc
1524 1.1 dyoung , ctl->nfree));
1525 1.1 dyoung
1526 1.1 dyoung if (!_when)
1527 1.1 dyoung ++vtw_stats.kill;
1528 1.1 dyoung
1529 1.1 dyoung vtw_del(ctl, vtw);
1530 1.1 dyoung vtw = ctl->oldest.v;
1531 1.1 dyoung }
1532 1.1 dyoung
1533 1.1 dyoung return ctl->nalloc; // # remaining allocated
1534 1.1 dyoung }
1535 1.1 dyoung
1536 1.1 dyoung static callout_t vtw_cs;
1537 1.1 dyoung
1538 1.1 dyoung /*!\brief notice the passage of time.
1539 1.1 dyoung * It seems to be getting faster. What happened to the year?
1540 1.1 dyoung */
1541 1.1 dyoung static void
1542 1.1 dyoung vtw_tick(void *arg)
1543 1.1 dyoung {
1544 1.1 dyoung struct timeval now;
1545 1.1 dyoung int i, cnt = 0;
1546 1.1 dyoung
1547 1.3 drochner getmicrouptime(&now);
1548 1.1 dyoung
1549 1.1 dyoung db_trace(KTR_VTW, (arg, "vtk: tick - now %8.8x:%8.8x"
1550 1.1 dyoung , now.tv_sec, now.tv_usec));
1551 1.1 dyoung
1552 1.1 dyoung mutex_enter(softnet_lock);
1553 1.1 dyoung
1554 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
1555 1.1 dyoung cnt += vtw_age(&vtw_tcpv4[i], &now);
1556 1.1 dyoung cnt += vtw_age(&vtw_tcpv6[i], &now);
1557 1.1 dyoung }
1558 1.1 dyoung
1559 1.1 dyoung /* Keep ticks coming while we need them.
1560 1.1 dyoung */
1561 1.1 dyoung if (cnt)
1562 1.1 dyoung callout_schedule(&vtw_cs, hz / 5);
1563 1.1 dyoung else {
1564 1.1 dyoung tcp_vtw_was_enabled = 0;
1565 1.1 dyoung tcbtable.vestige = 0;
1566 1.1 dyoung }
1567 1.1 dyoung mutex_exit(softnet_lock);
1568 1.1 dyoung }
1569 1.1 dyoung
1570 1.1 dyoung /* in_pcblookup_ports assist for handling vestigial entries.
1571 1.1 dyoung */
1572 1.1 dyoung static void *
1573 1.1 dyoung tcp_init_ports_v4(struct in_addr addr, u_int port, int wild)
1574 1.1 dyoung {
1575 1.1 dyoung struct tcp_ports_iterator *it = &tcp_ports_iterator_v4;
1576 1.1 dyoung
1577 1.1 dyoung bzero(it, sizeof (*it));
1578 1.1 dyoung
1579 1.1 dyoung /* Note: the reference to vtw_tcpv4[0] is fine.
1580 1.1 dyoung * We do not need per-class iteration. We just
1581 1.1 dyoung * need to get to the fat, and there is one
1582 1.1 dyoung * shared fat.
1583 1.1 dyoung */
1584 1.1 dyoung if (vtw_tcpv4[0].fat) {
1585 1.1 dyoung it->addr.v4 = addr;
1586 1.1 dyoung it->port = port;
1587 1.1 dyoung it->wild = !!wild;
1588 1.1 dyoung it->ctl = &vtw_tcpv4[0];
1589 1.1 dyoung
1590 1.1 dyoung ++vtw_stats.look[1];
1591 1.1 dyoung }
1592 1.1 dyoung
1593 1.1 dyoung return it;
1594 1.1 dyoung }
1595 1.1 dyoung
1596 1.1 dyoung /*!\brief export an IPv4 vtw.
1597 1.1 dyoung */
1598 1.1 dyoung static int
1599 1.1 dyoung vtw_export_v4(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
1600 1.1 dyoung {
1601 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
1602 1.1 dyoung
1603 1.1 dyoung bzero(res, sizeof (*res));
1604 1.1 dyoung
1605 1.1 dyoung if (ctl && vtw) {
1606 1.1 dyoung if (!ctl->clidx && vtw->msl_class)
1607 1.1 dyoung ctl += vtw->msl_class;
1608 1.1 dyoung else
1609 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
1610 1.1 dyoung
1611 1.1 dyoung res->valid = 1;
1612 1.1 dyoung res->v4 = 1;
1613 1.1 dyoung
1614 1.1 dyoung res->faddr.v4.s_addr = v4->faddr;
1615 1.1 dyoung res->laddr.v4.s_addr = v4->laddr;
1616 1.1 dyoung res->fport = v4->fport;
1617 1.1 dyoung res->lport = v4->lport;
1618 1.1 dyoung res->vtw = vtw; // netlock held over call(s)
1619 1.1 dyoung res->ctl = ctl;
1620 1.1 dyoung res->reuse_addr = vtw->reuse_addr;
1621 1.1 dyoung res->reuse_port = vtw->reuse_port;
1622 1.1 dyoung res->snd_nxt = vtw->snd_nxt;
1623 1.1 dyoung res->rcv_nxt = vtw->rcv_nxt;
1624 1.1 dyoung res->rcv_wnd = vtw->rcv_wnd;
1625 1.1 dyoung res->uid = vtw->uid;
1626 1.1 dyoung }
1627 1.1 dyoung
1628 1.1 dyoung return res->valid;
1629 1.1 dyoung }
1630 1.1 dyoung
1631 1.1 dyoung /*!\brief return next port in the port iterator. yowza.
1632 1.1 dyoung */
1633 1.1 dyoung static int
1634 1.1 dyoung tcp_next_port_v4(void *arg, struct vestigial_inpcb *res)
1635 1.1 dyoung {
1636 1.1 dyoung struct tcp_ports_iterator *it = arg;
1637 1.1 dyoung vtw_t *vtw = 0;
1638 1.1 dyoung
1639 1.1 dyoung if (it->ctl)
1640 1.1 dyoung vtw = vtw_next_port_v4(it);
1641 1.1 dyoung
1642 1.1 dyoung if (!vtw)
1643 1.1 dyoung it->ctl = 0;
1644 1.1 dyoung
1645 1.1 dyoung return vtw_export_v4(it->ctl, vtw, res);
1646 1.1 dyoung }
1647 1.1 dyoung
1648 1.1 dyoung static int
1649 1.1 dyoung tcp_lookup_v4(struct in_addr faddr, uint16_t fport,
1650 1.1 dyoung struct in_addr laddr, uint16_t lport,
1651 1.1 dyoung struct vestigial_inpcb *res)
1652 1.1 dyoung {
1653 1.1 dyoung vtw_t *vtw;
1654 1.1 dyoung vtw_ctl_t *ctl;
1655 1.1 dyoung
1656 1.1 dyoung
1657 1.1 dyoung db_trace(KTR_VTW
1658 1.1 dyoung , (res, "vtw: lookup %A:%P %A:%P"
1659 1.1 dyoung , faddr, fport
1660 1.1 dyoung , laddr, lport));
1661 1.1 dyoung
1662 1.1 dyoung vtw = vtw_lookup_hash_v4((ctl = &vtw_tcpv4[0])
1663 1.1 dyoung , faddr.s_addr, fport
1664 1.1 dyoung , laddr.s_addr, lport, 0);
1665 1.1 dyoung
1666 1.1 dyoung return vtw_export_v4(ctl, vtw, res);
1667 1.1 dyoung }
1668 1.1 dyoung
1669 1.1 dyoung /* in_pcblookup_ports assist for handling vestigial entries.
1670 1.1 dyoung */
1671 1.1 dyoung static void *
1672 1.1 dyoung tcp_init_ports_v6(const struct in6_addr *addr, u_int port, int wild)
1673 1.1 dyoung {
1674 1.1 dyoung struct tcp_ports_iterator *it = &tcp_ports_iterator_v6;
1675 1.1 dyoung
1676 1.1 dyoung bzero(it, sizeof (*it));
1677 1.1 dyoung
1678 1.1 dyoung /* Note: the reference to vtw_tcpv6[0] is fine.
1679 1.1 dyoung * We do not need per-class iteration. We just
1680 1.1 dyoung * need to get to the fat, and there is one
1681 1.1 dyoung * shared fat.
1682 1.1 dyoung */
1683 1.1 dyoung if (vtw_tcpv6[0].fat) {
1684 1.1 dyoung it->addr.v6 = *addr;
1685 1.1 dyoung it->port = port;
1686 1.1 dyoung it->wild = !!wild;
1687 1.1 dyoung it->ctl = &vtw_tcpv6[0];
1688 1.1 dyoung
1689 1.1 dyoung ++vtw_stats.look[1];
1690 1.1 dyoung }
1691 1.1 dyoung
1692 1.1 dyoung return it;
1693 1.1 dyoung }
1694 1.1 dyoung
1695 1.1 dyoung /*!\brief export an IPv6 vtw.
1696 1.1 dyoung */
1697 1.1 dyoung static int
1698 1.1 dyoung vtw_export_v6(vtw_ctl_t *ctl, vtw_t *vtw, vestigial_inpcb_t *res)
1699 1.1 dyoung {
1700 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
1701 1.1 dyoung
1702 1.1 dyoung bzero(res, sizeof (*res));
1703 1.1 dyoung
1704 1.1 dyoung if (ctl && vtw) {
1705 1.1 dyoung if (!ctl->clidx && vtw->msl_class)
1706 1.1 dyoung ctl += vtw->msl_class;
1707 1.1 dyoung else
1708 1.1 dyoung KASSERT(ctl->clidx == vtw->msl_class);
1709 1.1 dyoung
1710 1.1 dyoung res->valid = 1;
1711 1.1 dyoung res->v4 = 0;
1712 1.1 dyoung
1713 1.1 dyoung res->faddr.v6 = v6->faddr;
1714 1.1 dyoung res->laddr.v6 = v6->laddr;
1715 1.1 dyoung res->fport = v6->fport;
1716 1.1 dyoung res->lport = v6->lport;
1717 1.1 dyoung res->vtw = vtw; // netlock held over call(s)
1718 1.1 dyoung res->ctl = ctl;
1719 1.1 dyoung
1720 1.1 dyoung res->v6only = vtw->v6only;
1721 1.1 dyoung res->reuse_addr = vtw->reuse_addr;
1722 1.1 dyoung res->reuse_port = vtw->reuse_port;
1723 1.1 dyoung
1724 1.1 dyoung res->snd_nxt = vtw->snd_nxt;
1725 1.1 dyoung res->rcv_nxt = vtw->rcv_nxt;
1726 1.1 dyoung res->rcv_wnd = vtw->rcv_wnd;
1727 1.1 dyoung res->uid = vtw->uid;
1728 1.1 dyoung }
1729 1.1 dyoung
1730 1.1 dyoung return res->valid;
1731 1.1 dyoung }
1732 1.1 dyoung
1733 1.1 dyoung static int
1734 1.1 dyoung tcp_next_port_v6(void *arg, struct vestigial_inpcb *res)
1735 1.1 dyoung {
1736 1.1 dyoung struct tcp_ports_iterator *it = arg;
1737 1.1 dyoung vtw_t *vtw = 0;
1738 1.1 dyoung
1739 1.1 dyoung if (it->ctl)
1740 1.1 dyoung vtw = vtw_next_port_v6(it);
1741 1.1 dyoung
1742 1.1 dyoung if (!vtw)
1743 1.1 dyoung it->ctl = 0;
1744 1.1 dyoung
1745 1.1 dyoung return vtw_export_v6(it->ctl, vtw, res);
1746 1.1 dyoung }
1747 1.1 dyoung
1748 1.1 dyoung static int
1749 1.1 dyoung tcp_lookup_v6(const struct in6_addr *faddr, uint16_t fport,
1750 1.1 dyoung const struct in6_addr *laddr, uint16_t lport,
1751 1.1 dyoung struct vestigial_inpcb *res)
1752 1.1 dyoung {
1753 1.1 dyoung vtw_ctl_t *ctl;
1754 1.1 dyoung vtw_t *vtw;
1755 1.1 dyoung
1756 1.1 dyoung db_trace(KTR_VTW
1757 1.1 dyoung , (res, "vtw: lookup %6A:%P %6A:%P"
1758 1.1 dyoung , db_store(faddr, sizeof (*faddr)), fport
1759 1.1 dyoung , db_store(laddr, sizeof (*laddr)), lport));
1760 1.1 dyoung
1761 1.1 dyoung vtw = vtw_lookup_hash_v6((ctl = &vtw_tcpv6[0])
1762 1.1 dyoung , faddr, fport
1763 1.1 dyoung , laddr, lport, 0);
1764 1.1 dyoung
1765 1.1 dyoung return vtw_export_v6(ctl, vtw, res);
1766 1.1 dyoung }
1767 1.1 dyoung
1768 1.1 dyoung static vestigial_hooks_t tcp_hooks = {
1769 1.1 dyoung .init_ports4 = tcp_init_ports_v4,
1770 1.1 dyoung .next_port4 = tcp_next_port_v4,
1771 1.1 dyoung .lookup4 = tcp_lookup_v4,
1772 1.1 dyoung .init_ports6 = tcp_init_ports_v6,
1773 1.1 dyoung .next_port6 = tcp_next_port_v6,
1774 1.1 dyoung .lookup6 = tcp_lookup_v6,
1775 1.1 dyoung };
1776 1.1 dyoung
1777 1.1 dyoung static bool
1778 1.1 dyoung vtw_select(int af, fatp_ctl_t **fatp, vtw_ctl_t **ctlp)
1779 1.1 dyoung {
1780 1.1 dyoung fatp_ctl_t *fat;
1781 1.1 dyoung vtw_ctl_t *ctl;
1782 1.1 dyoung
1783 1.1 dyoung switch (af) {
1784 1.1 dyoung case AF_INET:
1785 1.1 dyoung fat = &fat_tcpv4;
1786 1.1 dyoung ctl = &vtw_tcpv4[0];
1787 1.1 dyoung break;
1788 1.1 dyoung case AF_INET6:
1789 1.1 dyoung fat = &fat_tcpv6;
1790 1.1 dyoung ctl = &vtw_tcpv6[0];
1791 1.1 dyoung break;
1792 1.1 dyoung default:
1793 1.1 dyoung return false;
1794 1.1 dyoung }
1795 1.1 dyoung if (fatp != NULL)
1796 1.1 dyoung *fatp = fat;
1797 1.1 dyoung if (ctlp != NULL)
1798 1.1 dyoung *ctlp = ctl;
1799 1.1 dyoung return true;
1800 1.1 dyoung }
1801 1.1 dyoung
1802 1.1 dyoung /*!\brief initialize controlling instance
1803 1.1 dyoung */
1804 1.1 dyoung static int
1805 1.1 dyoung vtw_control_init(int af)
1806 1.1 dyoung {
1807 1.1 dyoung fatp_ctl_t *fat;
1808 1.1 dyoung vtw_ctl_t *ctl;
1809 1.6 dyoung fatp_t *fat_base;
1810 1.6 dyoung fatp_t **fat_hash;
1811 1.6 dyoung vtw_t *ctl_base_v;
1812 1.6 dyoung uint32_t n, m;
1813 1.6 dyoung size_t sz;
1814 1.6 dyoung
1815 1.6 dyoung KASSERT(powerof2(tcp_vtw_entries));
1816 1.1 dyoung
1817 1.1 dyoung if (!vtw_select(af, &fat, &ctl))
1818 1.1 dyoung return EAFNOSUPPORT;
1819 1.1 dyoung
1820 1.6 dyoung if (fat->hash != NULL) {
1821 1.6 dyoung KASSERT(fat->base != NULL && ctl->base.v != NULL);
1822 1.6 dyoung return 0;
1823 1.6 dyoung }
1824 1.6 dyoung
1825 1.6 dyoung /* Allocate 10% more capacity in the fat pointers.
1826 1.6 dyoung * We should only need ~#hash additional based on
1827 1.6 dyoung * how they age, but TIME_WAIT assassination could cause
1828 1.6 dyoung * sparse fat pointer utilisation.
1829 1.6 dyoung */
1830 1.6 dyoung m = 512;
1831 1.6 dyoung n = 2*m + (11 * (tcp_vtw_entries / fatp_ntags())) / 10;
1832 1.6 dyoung sz = (ctl->is_v4 ? sizeof(vtw_v4_t) : sizeof(vtw_v6_t));
1833 1.6 dyoung
1834 1.20 chs fat_hash = kmem_zalloc(2*m * sizeof(fatp_t *), KM_SLEEP);
1835 1.20 chs fat_base = kmem_zalloc(2*n * sizeof(fatp_t), KM_SLEEP);
1836 1.20 chs ctl_base_v = kmem_zalloc(tcp_vtw_entries * sz, KM_SLEEP);
1837 1.6 dyoung fatp_init(fat, n, m, fat_base, fat_hash);
1838 1.6 dyoung vtw_init(fat, ctl, tcp_vtw_entries, ctl_base_v);
1839 1.1 dyoung
1840 1.1 dyoung return 0;
1841 1.1 dyoung }
1842 1.1 dyoung
1843 1.1 dyoung /*!\brief select controlling instance
1844 1.1 dyoung */
1845 1.1 dyoung static vtw_ctl_t *
1846 1.1 dyoung vtw_control(int af, uint32_t msl)
1847 1.1 dyoung {
1848 1.1 dyoung fatp_ctl_t *fat;
1849 1.1 dyoung vtw_ctl_t *ctl;
1850 1.11 matt int msl_class = msl_to_class(msl);
1851 1.1 dyoung
1852 1.1 dyoung if (!vtw_select(af, &fat, &ctl))
1853 1.1 dyoung return NULL;
1854 1.1 dyoung
1855 1.1 dyoung if (!fat->base || !ctl->base.v)
1856 1.1 dyoung return NULL;
1857 1.1 dyoung
1858 1.5 dyoung if (!tcp_vtw_was_enabled) {
1859 1.5 dyoung /* This guarantees is timer ticks until we no longer need them.
1860 1.5 dyoung */
1861 1.5 dyoung tcp_vtw_was_enabled = 1;
1862 1.5 dyoung
1863 1.5 dyoung callout_schedule(&vtw_cs, hz / 5);
1864 1.5 dyoung
1865 1.5 dyoung tcbtable.vestige = &tcp_hooks;
1866 1.5 dyoung }
1867 1.5 dyoung
1868 1.11 matt return ctl + msl_class;
1869 1.1 dyoung }
1870 1.1 dyoung
1871 1.1 dyoung /*!\brief add TCP pcb to vestigial timewait
1872 1.1 dyoung */
1873 1.1 dyoung int
1874 1.1 dyoung vtw_add(int af, struct tcpcb *tp)
1875 1.1 dyoung {
1876 1.10 martin #ifdef VTW_DEBUG
1877 1.1 dyoung int enable;
1878 1.10 martin #endif
1879 1.1 dyoung vtw_ctl_t *ctl;
1880 1.1 dyoung vtw_t *vtw;
1881 1.1 dyoung
1882 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
1883 1.1 dyoung
1884 1.1 dyoung ctl = vtw_control(af, tp->t_msl);
1885 1.1 dyoung if (!ctl)
1886 1.1 dyoung return 0;
1887 1.1 dyoung
1888 1.10 martin #ifdef VTW_DEBUG
1889 1.1 dyoung enable = (af == AF_INET) ? tcp4_vtw_enable : tcp6_vtw_enable;
1890 1.10 martin #endif
1891 1.1 dyoung
1892 1.1 dyoung vtw = vtw_alloc(ctl);
1893 1.1 dyoung
1894 1.1 dyoung if (vtw) {
1895 1.1 dyoung vtw->snd_nxt = tp->snd_nxt;
1896 1.1 dyoung vtw->rcv_nxt = tp->rcv_nxt;
1897 1.1 dyoung
1898 1.1 dyoung switch (af) {
1899 1.1 dyoung case AF_INET: {
1900 1.1 dyoung struct inpcb *inp = tp->t_inpcb;
1901 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
1902 1.1 dyoung
1903 1.1 dyoung v4->faddr = inp->inp_faddr.s_addr;
1904 1.1 dyoung v4->laddr = inp->inp_laddr.s_addr;
1905 1.1 dyoung v4->fport = inp->inp_fport;
1906 1.1 dyoung v4->lport = inp->inp_lport;
1907 1.1 dyoung
1908 1.1 dyoung vtw->reuse_port = !!(inp->inp_socket->so_options
1909 1.1 dyoung & SO_REUSEPORT);
1910 1.1 dyoung vtw->reuse_addr = !!(inp->inp_socket->so_options
1911 1.1 dyoung & SO_REUSEADDR);
1912 1.1 dyoung vtw->v6only = 0;
1913 1.1 dyoung vtw->uid = inp->inp_socket->so_uidinfo->ui_uid;
1914 1.1 dyoung
1915 1.1 dyoung vtw_inshash_v4(ctl, vtw);
1916 1.1 dyoung
1917 1.1 dyoung
1918 1.1 dyoung #ifdef VTW_DEBUG
1919 1.1 dyoung /* Immediate lookup (connected and port) to
1920 1.1 dyoung * ensure at least that works!
1921 1.1 dyoung */
1922 1.1 dyoung if (enable & 4) {
1923 1.1 dyoung KASSERT(vtw_lookup_hash_v4
1924 1.1 dyoung (ctl
1925 1.1 dyoung , inp->inp_faddr.s_addr, inp->inp_fport
1926 1.1 dyoung , inp->inp_laddr.s_addr, inp->inp_lport
1927 1.1 dyoung , 0)
1928 1.1 dyoung == vtw);
1929 1.1 dyoung KASSERT(vtw_lookup_hash_v4
1930 1.1 dyoung (ctl
1931 1.1 dyoung , inp->inp_faddr.s_addr, inp->inp_fport
1932 1.1 dyoung , inp->inp_laddr.s_addr, inp->inp_lport
1933 1.1 dyoung , 1));
1934 1.1 dyoung }
1935 1.1 dyoung /* Immediate port iterator functionality check: not wild
1936 1.1 dyoung */
1937 1.1 dyoung if (enable & 8) {
1938 1.1 dyoung struct tcp_ports_iterator *it;
1939 1.1 dyoung struct vestigial_inpcb res;
1940 1.1 dyoung int cnt = 0;
1941 1.1 dyoung
1942 1.1 dyoung it = tcp_init_ports_v4(inp->inp_laddr
1943 1.1 dyoung , inp->inp_lport, 0);
1944 1.1 dyoung
1945 1.1 dyoung while (tcp_next_port_v4(it, &res)) {
1946 1.1 dyoung ++cnt;
1947 1.1 dyoung }
1948 1.1 dyoung KASSERT(cnt);
1949 1.1 dyoung }
1950 1.1 dyoung /* Immediate port iterator functionality check: wild
1951 1.1 dyoung */
1952 1.1 dyoung if (enable & 16) {
1953 1.1 dyoung struct tcp_ports_iterator *it;
1954 1.1 dyoung struct vestigial_inpcb res;
1955 1.1 dyoung struct in_addr any;
1956 1.1 dyoung int cnt = 0;
1957 1.1 dyoung
1958 1.1 dyoung any.s_addr = htonl(INADDR_ANY);
1959 1.1 dyoung
1960 1.1 dyoung it = tcp_init_ports_v4(any, inp->inp_lport, 1);
1961 1.1 dyoung
1962 1.1 dyoung while (tcp_next_port_v4(it, &res)) {
1963 1.1 dyoung ++cnt;
1964 1.1 dyoung }
1965 1.1 dyoung KASSERT(cnt);
1966 1.1 dyoung }
1967 1.1 dyoung #endif /* VTW_DEBUG */
1968 1.1 dyoung break;
1969 1.1 dyoung }
1970 1.1 dyoung
1971 1.1 dyoung case AF_INET6: {
1972 1.22 ozaki struct inpcb *inp = tp->t_inpcb;
1973 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
1974 1.1 dyoung
1975 1.22 ozaki v6->faddr = inp->inp_faddr6;
1976 1.22 ozaki v6->laddr = inp->inp_laddr6;
1977 1.22 ozaki v6->fport = inp->inp_fport;
1978 1.22 ozaki v6->lport = inp->inp_lport;
1979 1.1 dyoung
1980 1.22 ozaki vtw->reuse_port = !!(inp->inp_socket->so_options
1981 1.1 dyoung & SO_REUSEPORT);
1982 1.22 ozaki vtw->reuse_addr = !!(inp->inp_socket->so_options
1983 1.1 dyoung & SO_REUSEADDR);
1984 1.22 ozaki vtw->v6only = !!(inp->inp_flags
1985 1.1 dyoung & IN6P_IPV6_V6ONLY);
1986 1.22 ozaki vtw->uid = inp->inp_socket->so_uidinfo->ui_uid;
1987 1.1 dyoung
1988 1.1 dyoung vtw_inshash_v6(ctl, vtw);
1989 1.1 dyoung #ifdef VTW_DEBUG
1990 1.1 dyoung /* Immediate lookup (connected and port) to
1991 1.1 dyoung * ensure at least that works!
1992 1.1 dyoung */
1993 1.1 dyoung if (enable & 4) {
1994 1.1 dyoung KASSERT(vtw_lookup_hash_v6(ctl
1995 1.22 ozaki , &inp->inp_faddr6, inp->inp_fport
1996 1.22 ozaki , &inp->inp_laddr6, inp->inp_lport
1997 1.1 dyoung , 0)
1998 1.1 dyoung == vtw);
1999 1.1 dyoung KASSERT(vtw_lookup_hash_v6
2000 1.1 dyoung (ctl
2001 1.22 ozaki , &inp->inp_faddr6, inp->inp_fport
2002 1.22 ozaki , &inp->inp_laddr6, inp->inp_lport
2003 1.1 dyoung , 1));
2004 1.1 dyoung }
2005 1.1 dyoung /* Immediate port iterator functionality check: not wild
2006 1.1 dyoung */
2007 1.1 dyoung if (enable & 8) {
2008 1.1 dyoung struct tcp_ports_iterator *it;
2009 1.1 dyoung struct vestigial_inpcb res;
2010 1.1 dyoung int cnt = 0;
2011 1.1 dyoung
2012 1.22 ozaki it = tcp_init_ports_v6(&inp->inp_laddr6
2013 1.22 ozaki , inp->inp_lport, 0);
2014 1.1 dyoung
2015 1.1 dyoung while (tcp_next_port_v6(it, &res)) {
2016 1.1 dyoung ++cnt;
2017 1.1 dyoung }
2018 1.1 dyoung KASSERT(cnt);
2019 1.1 dyoung }
2020 1.1 dyoung /* Immediate port iterator functionality check: wild
2021 1.1 dyoung */
2022 1.1 dyoung if (enable & 16) {
2023 1.1 dyoung struct tcp_ports_iterator *it;
2024 1.1 dyoung struct vestigial_inpcb res;
2025 1.1 dyoung static struct in6_addr any = IN6ADDR_ANY_INIT;
2026 1.1 dyoung int cnt = 0;
2027 1.1 dyoung
2028 1.1 dyoung it = tcp_init_ports_v6(&any
2029 1.22 ozaki , inp->inp_lport, 1);
2030 1.1 dyoung
2031 1.1 dyoung while (tcp_next_port_v6(it, &res)) {
2032 1.1 dyoung ++cnt;
2033 1.1 dyoung }
2034 1.1 dyoung KASSERT(cnt);
2035 1.1 dyoung }
2036 1.1 dyoung #endif /* VTW_DEBUG */
2037 1.1 dyoung break;
2038 1.1 dyoung }
2039 1.1 dyoung }
2040 1.1 dyoung
2041 1.1 dyoung tcp_canceltimers(tp);
2042 1.1 dyoung tp = tcp_close(tp);
2043 1.1 dyoung KASSERT(!tp);
2044 1.1 dyoung
2045 1.1 dyoung return 1;
2046 1.1 dyoung }
2047 1.1 dyoung
2048 1.1 dyoung return 0;
2049 1.1 dyoung }
2050 1.1 dyoung
2051 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2052 1.1 dyoung */
2053 1.1 dyoung static void
2054 1.1 dyoung vtw_restart_v4(vestigial_inpcb_t *vp)
2055 1.1 dyoung {
2056 1.1 dyoung vtw_v4_t copy = *(vtw_v4_t*)vp->vtw;
2057 1.1 dyoung vtw_t *vtw;
2058 1.1 dyoung vtw_t *cp = ©.common;
2059 1.1 dyoung vtw_ctl_t *ctl;
2060 1.1 dyoung
2061 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
2062 1.1 dyoung
2063 1.1 dyoung db_trace(KTR_VTW
2064 1.1 dyoung , (vp->vtw, "vtw: restart %A:%P %A:%P"
2065 1.1 dyoung , vp->faddr.v4.s_addr, vp->fport
2066 1.1 dyoung , vp->laddr.v4.s_addr, vp->lport));
2067 1.1 dyoung
2068 1.1 dyoung /* Class might have changed, so have a squiz.
2069 1.1 dyoung */
2070 1.1 dyoung ctl = vtw_control(AF_INET, class_to_msl(cp->msl_class));
2071 1.1 dyoung vtw = vtw_alloc(ctl);
2072 1.1 dyoung
2073 1.1 dyoung if (vtw) {
2074 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
2075 1.1 dyoung
2076 1.1 dyoung /* Safe now to unhash the old entry
2077 1.1 dyoung */
2078 1.1 dyoung vtw_del(vp->ctl, vp->vtw);
2079 1.1 dyoung
2080 1.1 dyoung vtw->snd_nxt = cp->snd_nxt;
2081 1.1 dyoung vtw->rcv_nxt = cp->rcv_nxt;
2082 1.1 dyoung
2083 1.1 dyoung v4->faddr = copy.faddr;
2084 1.1 dyoung v4->laddr = copy.laddr;
2085 1.1 dyoung v4->fport = copy.fport;
2086 1.1 dyoung v4->lport = copy.lport;
2087 1.1 dyoung
2088 1.1 dyoung vtw->reuse_port = cp->reuse_port;
2089 1.1 dyoung vtw->reuse_addr = cp->reuse_addr;
2090 1.1 dyoung vtw->v6only = 0;
2091 1.1 dyoung vtw->uid = cp->uid;
2092 1.1 dyoung
2093 1.1 dyoung vtw_inshash_v4(ctl, vtw);
2094 1.1 dyoung }
2095 1.1 dyoung
2096 1.1 dyoung vp->valid = 0;
2097 1.1 dyoung }
2098 1.1 dyoung
2099 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2100 1.1 dyoung */
2101 1.1 dyoung static void
2102 1.1 dyoung vtw_restart_v6(vestigial_inpcb_t *vp)
2103 1.1 dyoung {
2104 1.1 dyoung vtw_v6_t copy = *(vtw_v6_t*)vp->vtw;
2105 1.1 dyoung vtw_t *vtw;
2106 1.1 dyoung vtw_t *cp = ©.common;
2107 1.1 dyoung vtw_ctl_t *ctl;
2108 1.1 dyoung
2109 1.1 dyoung KASSERT(mutex_owned(softnet_lock));
2110 1.1 dyoung
2111 1.1 dyoung db_trace(KTR_VTW
2112 1.1 dyoung , (vp->vtw, "vtw: restart %6A:%P %6A:%P"
2113 1.1 dyoung , db_store(&vp->faddr.v6, sizeof (vp->faddr.v6))
2114 1.1 dyoung , vp->fport
2115 1.1 dyoung , db_store(&vp->laddr.v6, sizeof (vp->laddr.v6))
2116 1.1 dyoung , vp->lport));
2117 1.1 dyoung
2118 1.1 dyoung /* Class might have changed, so have a squiz.
2119 1.1 dyoung */
2120 1.1 dyoung ctl = vtw_control(AF_INET6, class_to_msl(cp->msl_class));
2121 1.1 dyoung vtw = vtw_alloc(ctl);
2122 1.1 dyoung
2123 1.1 dyoung if (vtw) {
2124 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
2125 1.1 dyoung
2126 1.1 dyoung /* Safe now to unhash the old entry
2127 1.1 dyoung */
2128 1.1 dyoung vtw_del(vp->ctl, vp->vtw);
2129 1.1 dyoung
2130 1.1 dyoung vtw->snd_nxt = cp->snd_nxt;
2131 1.1 dyoung vtw->rcv_nxt = cp->rcv_nxt;
2132 1.1 dyoung
2133 1.1 dyoung v6->faddr = copy.faddr;
2134 1.1 dyoung v6->laddr = copy.laddr;
2135 1.1 dyoung v6->fport = copy.fport;
2136 1.1 dyoung v6->lport = copy.lport;
2137 1.1 dyoung
2138 1.1 dyoung vtw->reuse_port = cp->reuse_port;
2139 1.1 dyoung vtw->reuse_addr = cp->reuse_addr;
2140 1.1 dyoung vtw->v6only = cp->v6only;
2141 1.1 dyoung vtw->uid = cp->uid;
2142 1.1 dyoung
2143 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2144 1.1 dyoung }
2145 1.1 dyoung
2146 1.1 dyoung vp->valid = 0;
2147 1.1 dyoung }
2148 1.1 dyoung
2149 1.1 dyoung /*!\brief restart timer for vestigial time-wait entry
2150 1.1 dyoung */
2151 1.1 dyoung void
2152 1.1 dyoung vtw_restart(vestigial_inpcb_t *vp)
2153 1.1 dyoung {
2154 1.1 dyoung if (!vp || !vp->valid)
2155 1.1 dyoung return;
2156 1.1 dyoung
2157 1.1 dyoung if (vp->v4)
2158 1.1 dyoung vtw_restart_v4(vp);
2159 1.1 dyoung else
2160 1.1 dyoung vtw_restart_v6(vp);
2161 1.1 dyoung }
2162 1.1 dyoung
2163 1.1 dyoung int
2164 1.7 dyoung sysctl_tcp_vtw_enable(SYSCTLFN_ARGS)
2165 1.7 dyoung {
2166 1.7 dyoung int en, rc;
2167 1.7 dyoung struct sysctlnode node;
2168 1.7 dyoung
2169 1.7 dyoung node = *rnode;
2170 1.7 dyoung en = *(int *)rnode->sysctl_data;
2171 1.7 dyoung node.sysctl_data = &en;
2172 1.7 dyoung
2173 1.7 dyoung rc = sysctl_lookup(SYSCTLFN_CALL(&node));
2174 1.7 dyoung if (rc != 0 || newp == NULL)
2175 1.7 dyoung return rc;
2176 1.7 dyoung
2177 1.7 dyoung if (rnode->sysctl_data != &tcp4_vtw_enable &&
2178 1.7 dyoung rnode->sysctl_data != &tcp6_vtw_enable)
2179 1.7 dyoung rc = ENOENT;
2180 1.7 dyoung else if ((en & 1) == 0)
2181 1.7 dyoung rc = 0;
2182 1.7 dyoung else if (rnode->sysctl_data == &tcp4_vtw_enable)
2183 1.7 dyoung rc = vtw_control_init(AF_INET);
2184 1.7 dyoung else /* rnode->sysctl_data == &tcp6_vtw_enable */
2185 1.7 dyoung rc = vtw_control_init(AF_INET6);
2186 1.7 dyoung
2187 1.7 dyoung if (rc == 0)
2188 1.7 dyoung *(int *)rnode->sysctl_data = en;
2189 1.7 dyoung
2190 1.7 dyoung return rc;
2191 1.7 dyoung }
2192 1.7 dyoung
2193 1.7 dyoung int
2194 1.1 dyoung vtw_earlyinit(void)
2195 1.1 dyoung {
2196 1.5 dyoung int i, rc;
2197 1.1 dyoung
2198 1.5 dyoung callout_init(&vtw_cs, 0);
2199 1.5 dyoung callout_setfunc(&vtw_cs, vtw_tick, 0);
2200 1.1 dyoung
2201 1.5 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2202 1.5 dyoung vtw_tcpv4[i].is_v4 = 1;
2203 1.5 dyoung vtw_tcpv6[i].is_v6 = 1;
2204 1.1 dyoung }
2205 1.1 dyoung
2206 1.7 dyoung if ((tcp4_vtw_enable & 1) != 0 &&
2207 1.7 dyoung (rc = vtw_control_init(AF_INET)) != 0)
2208 1.7 dyoung return rc;
2209 1.7 dyoung
2210 1.7 dyoung if ((tcp6_vtw_enable & 1) != 0 &&
2211 1.1 dyoung (rc = vtw_control_init(AF_INET6)) != 0)
2212 1.1 dyoung return rc;
2213 1.1 dyoung
2214 1.1 dyoung return 0;
2215 1.1 dyoung }
2216 1.1 dyoung
2217 1.1 dyoung #ifdef VTW_DEBUG
2218 1.1 dyoung #include <sys/syscallargs.h>
2219 1.1 dyoung #include <sys/sysctl.h>
2220 1.1 dyoung
2221 1.1 dyoung /*!\brief add lalp, fafp entries for debug
2222 1.1 dyoung */
2223 1.1 dyoung int
2224 1.11 matt vtw_debug_add(int af, sin_either_t *la, sin_either_t *fa, int msl, int msl_class)
2225 1.1 dyoung {
2226 1.1 dyoung vtw_ctl_t *ctl;
2227 1.1 dyoung vtw_t *vtw;
2228 1.1 dyoung
2229 1.11 matt ctl = vtw_control(af, msl ? msl : class_to_msl(msl_class));
2230 1.1 dyoung if (!ctl)
2231 1.1 dyoung return 0;
2232 1.1 dyoung
2233 1.1 dyoung vtw = vtw_alloc(ctl);
2234 1.1 dyoung
2235 1.1 dyoung if (vtw) {
2236 1.1 dyoung vtw->snd_nxt = 0;
2237 1.1 dyoung vtw->rcv_nxt = 0;
2238 1.1 dyoung
2239 1.1 dyoung switch (af) {
2240 1.1 dyoung case AF_INET: {
2241 1.1 dyoung vtw_v4_t *v4 = (void*)vtw;
2242 1.1 dyoung
2243 1.1 dyoung v4->faddr = fa->sin_addr.v4.s_addr;
2244 1.1 dyoung v4->laddr = la->sin_addr.v4.s_addr;
2245 1.1 dyoung v4->fport = fa->sin_port;
2246 1.1 dyoung v4->lport = la->sin_port;
2247 1.1 dyoung
2248 1.1 dyoung vtw->reuse_port = 1;
2249 1.1 dyoung vtw->reuse_addr = 1;
2250 1.1 dyoung vtw->v6only = 0;
2251 1.1 dyoung vtw->uid = 0;
2252 1.1 dyoung
2253 1.1 dyoung vtw_inshash_v4(ctl, vtw);
2254 1.1 dyoung break;
2255 1.1 dyoung }
2256 1.1 dyoung
2257 1.1 dyoung case AF_INET6: {
2258 1.1 dyoung vtw_v6_t *v6 = (void*)vtw;
2259 1.1 dyoung
2260 1.1 dyoung v6->faddr = fa->sin_addr.v6;
2261 1.1 dyoung v6->laddr = la->sin_addr.v6;
2262 1.1 dyoung
2263 1.1 dyoung v6->fport = fa->sin_port;
2264 1.1 dyoung v6->lport = la->sin_port;
2265 1.1 dyoung
2266 1.1 dyoung vtw->reuse_port = 1;
2267 1.1 dyoung vtw->reuse_addr = 1;
2268 1.1 dyoung vtw->v6only = 0;
2269 1.1 dyoung vtw->uid = 0;
2270 1.1 dyoung
2271 1.1 dyoung vtw_inshash_v6(ctl, vtw);
2272 1.1 dyoung break;
2273 1.1 dyoung }
2274 1.1 dyoung
2275 1.1 dyoung default:
2276 1.1 dyoung break;
2277 1.1 dyoung }
2278 1.1 dyoung
2279 1.1 dyoung return 1;
2280 1.1 dyoung }
2281 1.1 dyoung
2282 1.1 dyoung return 0;
2283 1.1 dyoung }
2284 1.1 dyoung
2285 1.1 dyoung static int vtw_syscall = 0;
2286 1.1 dyoung
2287 1.1 dyoung static int
2288 1.1 dyoung vtw_debug_process(vtw_sysargs_t *ap)
2289 1.1 dyoung {
2290 1.1 dyoung struct vestigial_inpcb vestige;
2291 1.1 dyoung int rc = 0;
2292 1.1 dyoung
2293 1.1 dyoung mutex_enter(softnet_lock);
2294 1.1 dyoung
2295 1.1 dyoung switch (ap->op) {
2296 1.1 dyoung case 0: // insert
2297 1.1 dyoung vtw_debug_add(ap->la.sin_family
2298 1.1 dyoung , &ap->la
2299 1.1 dyoung , &ap->fa
2300 1.1 dyoung , TCPTV_MSL
2301 1.1 dyoung , 0);
2302 1.1 dyoung break;
2303 1.1 dyoung
2304 1.1 dyoung case 1: // lookup
2305 1.1 dyoung case 2: // restart
2306 1.1 dyoung switch (ap->la.sin_family) {
2307 1.1 dyoung case AF_INET:
2308 1.1 dyoung if (tcp_lookup_v4(ap->fa.sin_addr.v4, ap->fa.sin_port,
2309 1.1 dyoung ap->la.sin_addr.v4, ap->la.sin_port,
2310 1.1 dyoung &vestige)) {
2311 1.1 dyoung if (ap->op == 2) {
2312 1.1 dyoung vtw_restart(&vestige);
2313 1.1 dyoung }
2314 1.1 dyoung rc = 0;
2315 1.1 dyoung } else
2316 1.1 dyoung rc = ESRCH;
2317 1.1 dyoung break;
2318 1.1 dyoung
2319 1.1 dyoung case AF_INET6:
2320 1.1 dyoung if (tcp_lookup_v6(&ap->fa.sin_addr.v6, ap->fa.sin_port,
2321 1.1 dyoung &ap->la.sin_addr.v6, ap->la.sin_port,
2322 1.1 dyoung &vestige)) {
2323 1.1 dyoung if (ap->op == 2) {
2324 1.1 dyoung vtw_restart(&vestige);
2325 1.1 dyoung }
2326 1.1 dyoung rc = 0;
2327 1.1 dyoung } else
2328 1.1 dyoung rc = ESRCH;
2329 1.1 dyoung break;
2330 1.1 dyoung default:
2331 1.1 dyoung rc = EINVAL;
2332 1.1 dyoung }
2333 1.1 dyoung break;
2334 1.1 dyoung
2335 1.1 dyoung default:
2336 1.1 dyoung rc = EINVAL;
2337 1.1 dyoung }
2338 1.1 dyoung
2339 1.1 dyoung mutex_exit(softnet_lock);
2340 1.1 dyoung return rc;
2341 1.1 dyoung }
2342 1.1 dyoung
2343 1.1 dyoung struct sys_vtw_args {
2344 1.1 dyoung syscallarg(const vtw_sysargs_t *) req;
2345 1.1 dyoung syscallarg(size_t) len;
2346 1.1 dyoung };
2347 1.1 dyoung
2348 1.1 dyoung static int
2349 1.1 dyoung vtw_sys(struct lwp *l, const void *_, register_t *retval)
2350 1.1 dyoung {
2351 1.1 dyoung const struct sys_vtw_args *uap = _;
2352 1.1 dyoung void *buf;
2353 1.1 dyoung int rc;
2354 1.1 dyoung size_t len = SCARG(uap, len);
2355 1.1 dyoung
2356 1.1 dyoung if (len != sizeof (vtw_sysargs_t))
2357 1.1 dyoung return EINVAL;
2358 1.1 dyoung
2359 1.1 dyoung buf = kmem_alloc(len, KM_SLEEP);
2360 1.1 dyoung rc = copyin(SCARG(uap, req), buf, len);
2361 1.1 dyoung if (!rc) {
2362 1.1 dyoung rc = vtw_debug_process(buf);
2363 1.1 dyoung }
2364 1.1 dyoung kmem_free(buf, len);
2365 1.1 dyoung
2366 1.1 dyoung return rc;
2367 1.1 dyoung }
2368 1.1 dyoung
2369 1.1 dyoung static void
2370 1.1 dyoung vtw_sanity_check(void)
2371 1.1 dyoung {
2372 1.1 dyoung vtw_ctl_t *ctl;
2373 1.1 dyoung vtw_t *vtw;
2374 1.1 dyoung int i;
2375 1.1 dyoung int n;
2376 1.1 dyoung
2377 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2378 1.1 dyoung ctl = &vtw_tcpv4[i];
2379 1.1 dyoung
2380 1.1 dyoung if (!ctl->base.v || ctl->nalloc)
2381 1.1 dyoung continue;
2382 1.1 dyoung
2383 1.1 dyoung for (n = 0, vtw = ctl->base.v; ; ) {
2384 1.1 dyoung ++n;
2385 1.1 dyoung vtw = vtw_next(ctl, vtw);
2386 1.1 dyoung if (vtw == ctl->base.v)
2387 1.1 dyoung break;
2388 1.1 dyoung }
2389 1.1 dyoung db_trace(KTR_VTW
2390 1.1 dyoung , (ctl, "sanity: class %x n %x nfree %x"
2391 1.1 dyoung , i, n, ctl->nfree));
2392 1.1 dyoung
2393 1.1 dyoung KASSERT(n == ctl->nfree);
2394 1.1 dyoung }
2395 1.1 dyoung
2396 1.1 dyoung for (i = 0; i < VTW_NCLASS; ++i) {
2397 1.1 dyoung ctl = &vtw_tcpv6[i];
2398 1.1 dyoung
2399 1.1 dyoung if (!ctl->base.v || ctl->nalloc)
2400 1.1 dyoung continue;
2401 1.1 dyoung
2402 1.1 dyoung for (n = 0, vtw = ctl->base.v; ; ) {
2403 1.1 dyoung ++n;
2404 1.1 dyoung vtw = vtw_next(ctl, vtw);
2405 1.1 dyoung if (vtw == ctl->base.v)
2406 1.1 dyoung break;
2407 1.1 dyoung }
2408 1.1 dyoung db_trace(KTR_VTW
2409 1.1 dyoung , (ctl, "sanity: class %x n %x nfree %x"
2410 1.1 dyoung , i, n, ctl->nfree));
2411 1.1 dyoung KASSERT(n == ctl->nfree);
2412 1.1 dyoung }
2413 1.1 dyoung }
2414 1.1 dyoung
2415 1.1 dyoung /*!\brief Initialise debug support.
2416 1.1 dyoung */
2417 1.1 dyoung static void
2418 1.1 dyoung vtw_debug_init(void)
2419 1.1 dyoung {
2420 1.1 dyoung int i;
2421 1.1 dyoung
2422 1.1 dyoung vtw_sanity_check();
2423 1.1 dyoung
2424 1.1 dyoung if (vtw_syscall)
2425 1.1 dyoung return;
2426 1.1 dyoung
2427 1.1 dyoung for (i = 511; i; --i) {
2428 1.1 dyoung if (sysent[i].sy_call == sys_nosys) {
2429 1.1 dyoung sysent[i].sy_call = vtw_sys;
2430 1.1 dyoung sysent[i].sy_narg = 2;
2431 1.1 dyoung sysent[i].sy_argsize = sizeof (struct sys_vtw_args);
2432 1.1 dyoung sysent[i].sy_flags = 0;
2433 1.1 dyoung
2434 1.1 dyoung vtw_syscall = i;
2435 1.1 dyoung break;
2436 1.1 dyoung }
2437 1.1 dyoung }
2438 1.1 dyoung if (i) {
2439 1.1 dyoung const struct sysctlnode *node;
2440 1.1 dyoung uint32_t flags;
2441 1.1 dyoung
2442 1.1 dyoung flags = sysctl_root.sysctl_flags;
2443 1.1 dyoung
2444 1.1 dyoung sysctl_root.sysctl_flags |= CTLFLAG_READWRITE;
2445 1.1 dyoung sysctl_root.sysctl_flags &= ~CTLFLAG_PERMANENT;
2446 1.1 dyoung
2447 1.1 dyoung sysctl_createv(0, 0, 0, &node,
2448 1.1 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE,
2449 1.1 dyoung "koff",
2450 1.1 dyoung SYSCTL_DESCR("Kernel Obscure Feature Finder"),
2451 1.1 dyoung 0, 0, 0, 0, CTL_CREATE, CTL_EOL);
2452 1.1 dyoung
2453 1.1 dyoung if (!node) {
2454 1.1 dyoung sysctl_createv(0, 0, 0, &node,
2455 1.1 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE,
2456 1.1 dyoung "koffka",
2457 1.1 dyoung SYSCTL_DESCR("The Real(tm) Kernel"
2458 1.1 dyoung " Obscure Feature Finder"),
2459 1.1 dyoung 0, 0, 0, 0, CTL_CREATE, CTL_EOL);
2460 1.1 dyoung }
2461 1.1 dyoung if (node) {
2462 1.1 dyoung sysctl_createv(0, 0, 0, 0,
2463 1.1 dyoung CTLFLAG_PERMANENT|CTLFLAG_READONLY,
2464 1.1 dyoung CTLTYPE_INT, "vtw_debug_syscall",
2465 1.1 dyoung SYSCTL_DESCR("vtw debug"
2466 1.1 dyoung " system call number"),
2467 1.1 dyoung 0, 0, &vtw_syscall, 0, node->sysctl_num,
2468 1.1 dyoung CTL_CREATE, CTL_EOL);
2469 1.1 dyoung }
2470 1.1 dyoung sysctl_root.sysctl_flags = flags;
2471 1.1 dyoung }
2472 1.1 dyoung }
2473 1.1 dyoung #else /* !VTW_DEBUG */
2474 1.1 dyoung static void
2475 1.1 dyoung vtw_debug_init(void)
2476 1.1 dyoung {
2477 1.1 dyoung return;
2478 1.1 dyoung }
2479 1.1 dyoung #endif /* !VTW_DEBUG */
2480