ip_encap.c revision 1.76 1 1.76 knakahar /* $NetBSD: ip_encap.c,v 1.76 2022/12/07 08:28:46 knakahara Exp $ */
2 1.7 itojun /* $KAME: ip_encap.c,v 1.73 2001/10/02 08:30:58 itojun Exp $ */
3 1.1 itojun
4 1.1 itojun /*
5 1.1 itojun * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 1.1 itojun * All rights reserved.
7 1.1 itojun *
8 1.1 itojun * Redistribution and use in source and binary forms, with or without
9 1.1 itojun * modification, are permitted provided that the following conditions
10 1.1 itojun * are met:
11 1.1 itojun * 1. Redistributions of source code must retain the above copyright
12 1.1 itojun * notice, this list of conditions and the following disclaimer.
13 1.1 itojun * 2. Redistributions in binary form must reproduce the above copyright
14 1.1 itojun * notice, this list of conditions and the following disclaimer in the
15 1.1 itojun * documentation and/or other materials provided with the distribution.
16 1.1 itojun * 3. Neither the name of the project nor the names of its contributors
17 1.1 itojun * may be used to endorse or promote products derived from this software
18 1.1 itojun * without specific prior written permission.
19 1.1 itojun *
20 1.1 itojun * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 1.1 itojun * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 1.1 itojun * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 1.1 itojun * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 1.1 itojun * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 1.1 itojun * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 1.1 itojun * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 1.1 itojun * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 1.1 itojun * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 1.1 itojun * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 1.1 itojun * SUCH DAMAGE.
31 1.1 itojun */
32 1.1 itojun /*
33 1.1 itojun * My grandfather said that there's a devil inside tunnelling technology...
34 1.1 itojun *
35 1.1 itojun * We have surprisingly many protocols that want packets with IP protocol
36 1.1 itojun * #4 or #41. Here's a list of protocols that want protocol #41:
37 1.1 itojun * RFC1933 configured tunnel
38 1.1 itojun * RFC1933 automatic tunnel
39 1.1 itojun * RFC2401 IPsec tunnel
40 1.1 itojun * RFC2473 IPv6 generic packet tunnelling
41 1.1 itojun * RFC2529 6over4 tunnel
42 1.7 itojun * RFC3056 6to4 tunnel
43 1.7 itojun * isatap tunnel
44 1.1 itojun * mobile-ip6 (uses RFC2473)
45 1.1 itojun * Here's a list of protocol that want protocol #4:
46 1.1 itojun * RFC1853 IPv4-in-IPv4 tunnelling
47 1.1 itojun * RFC2003 IPv4 encapsulation within IPv4
48 1.1 itojun * RFC2344 reverse tunnelling for mobile-ip4
49 1.1 itojun * RFC2401 IPsec tunnel
50 1.1 itojun * Well, what can I say. They impose different en/decapsulation mechanism
51 1.1 itojun * from each other, so they need separate protocol handler. The only one
52 1.1 itojun * we can easily determine by protocol # is IPsec, which always has
53 1.1 itojun * AH/ESP/IPComp header right after outer IP header.
54 1.1 itojun *
55 1.1 itojun * So, clearly good old protosw does not work for protocol #4 and #41.
56 1.1 itojun * The code will let you match protocol via src/dst address pair.
57 1.1 itojun */
58 1.1 itojun /* XXX is M_NETADDR correct? */
59 1.6 lukem
60 1.7 itojun /*
61 1.55 knakahar * With USE_RADIX the code will use radix table for tunnel lookup, for
62 1.7 itojun * tunnels registered with encap_attach() with a addr/mask pair.
63 1.7 itojun * Faster on machines with thousands of tunnel registerations (= interfaces).
64 1.7 itojun *
65 1.7 itojun * The code assumes that radix table code can handle non-continuous netmask,
66 1.7 itojun * as it will pass radix table memory region with (src + dst) sockaddr pair.
67 1.7 itojun */
68 1.55 knakahar #define USE_RADIX
69 1.7 itojun
70 1.6 lukem #include <sys/cdefs.h>
71 1.76 knakahar __KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.76 2022/12/07 08:28:46 knakahara Exp $");
72 1.1 itojun
73 1.46 pooka #ifdef _KERNEL_OPT
74 1.4 itojun #include "opt_mrouting.h"
75 1.4 itojun #include "opt_inet.h"
76 1.61 knakahar #include "opt_net_mpsafe.h"
77 1.46 pooka #endif
78 1.1 itojun
79 1.1 itojun #include <sys/param.h>
80 1.1 itojun #include <sys/systm.h>
81 1.1 itojun #include <sys/socket.h>
82 1.71 knakahar #include <sys/socketvar.h> /* for softnet_lock */
83 1.1 itojun #include <sys/sockio.h>
84 1.1 itojun #include <sys/mbuf.h>
85 1.1 itojun #include <sys/errno.h>
86 1.4 itojun #include <sys/queue.h>
87 1.47 knakahar #include <sys/kmem.h>
88 1.56 knakahar #include <sys/mutex.h>
89 1.59 knakahar #include <sys/condvar.h>
90 1.56 knakahar #include <sys/psref.h>
91 1.56 knakahar #include <sys/pslist.h>
92 1.76 knakahar #include <sys/thmap.h>
93 1.1 itojun
94 1.1 itojun #include <net/if.h>
95 1.1 itojun
96 1.1 itojun #include <netinet/in.h>
97 1.1 itojun #include <netinet/in_systm.h>
98 1.1 itojun #include <netinet/ip.h>
99 1.1 itojun #include <netinet/ip_var.h>
100 1.1 itojun #include <netinet/ip_encap.h>
101 1.1 itojun #ifdef MROUTING
102 1.1 itojun #include <netinet/ip_mroute.h>
103 1.1 itojun #endif /* MROUTING */
104 1.1 itojun
105 1.1 itojun #ifdef INET6
106 1.1 itojun #include <netinet/ip6.h>
107 1.1 itojun #include <netinet6/ip6_var.h>
108 1.51 knakahar #include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */
109 1.7 itojun #include <netinet6/in6_var.h>
110 1.7 itojun #include <netinet6/in6_pcb.h>
111 1.7 itojun #include <netinet/icmp6.h>
112 1.1 itojun #endif
113 1.1 itojun
114 1.61 knakahar #ifdef NET_MPSAFE
115 1.61 knakahar #define ENCAP_MPSAFE 1
116 1.61 knakahar #endif
117 1.61 knakahar
118 1.7 itojun enum direction { INBOUND, OUTBOUND };
119 1.7 itojun
120 1.7 itojun #ifdef INET
121 1.56 knakahar static struct encaptab *encap4_lookup(struct mbuf *, int, int, enum direction,
122 1.56 knakahar struct psref *);
123 1.7 itojun #endif
124 1.7 itojun #ifdef INET6
125 1.56 knakahar static struct encaptab *encap6_lookup(struct mbuf *, int, int, enum direction,
126 1.56 knakahar struct psref *);
127 1.7 itojun #endif
128 1.22 perry static int encap_add(struct encaptab *);
129 1.22 perry static int encap_remove(struct encaptab *);
130 1.73 riastrad static void encap_afcheck(int, const struct sockaddr *, const struct sockaddr *);
131 1.55 knakahar #ifdef USE_RADIX
132 1.22 perry static struct radix_node_head *encap_rnh(int);
133 1.22 perry static int mask_matchlen(const struct sockaddr *);
134 1.55 knakahar #else
135 1.55 knakahar static int mask_match(const struct encaptab *, const struct sockaddr *,
136 1.55 knakahar const struct sockaddr *);
137 1.55 knakahar #endif
138 1.76 knakahar static void encap_key_init(struct encap_key *, const struct sockaddr *,
139 1.76 knakahar const struct sockaddr *);
140 1.76 knakahar static void encap_key_inc(struct encap_key *);
141 1.1 itojun
142 1.56 knakahar /*
143 1.56 knakahar * In encap[46]_lookup(), ep->func can sleep(e.g. rtalloc1) while walking
144 1.56 knakahar * encap_table. So, it cannot use pserialize_read_enter()
145 1.56 knakahar */
146 1.56 knakahar static struct {
147 1.56 knakahar struct pslist_head list;
148 1.56 knakahar pserialize_t psz;
149 1.56 knakahar struct psref_class *elem_class; /* for the element of et_list */
150 1.56 knakahar } encaptab __cacheline_aligned = {
151 1.56 knakahar .list = PSLIST_INITIALIZER,
152 1.56 knakahar };
153 1.56 knakahar #define encap_table encaptab.list
154 1.1 itojun
155 1.59 knakahar static struct {
156 1.59 knakahar kmutex_t lock;
157 1.59 knakahar kcondvar_t cv;
158 1.59 knakahar struct lwp *busy;
159 1.59 knakahar } encap_whole __cacheline_aligned;
160 1.59 knakahar
161 1.55 knakahar #ifdef USE_RADIX
162 1.7 itojun struct radix_node_head *encap_head[2]; /* 0 for AF_INET, 1 for AF_INET6 */
163 1.57 knakahar static bool encap_head_updating = false;
164 1.55 knakahar #endif
165 1.7 itojun
166 1.76 knakahar static thmap_t *encap_map[2]; /* 0 for AF_INET, 1 for AF_INET6 */
167 1.76 knakahar
168 1.63 ozaki static bool encap_initialized = false;
169 1.59 knakahar /*
170 1.59 knakahar * must be done before other encap interfaces initialization.
171 1.59 knakahar */
172 1.59 knakahar void
173 1.59 knakahar encapinit(void)
174 1.59 knakahar {
175 1.59 knakahar
176 1.63 ozaki if (encap_initialized)
177 1.63 ozaki return;
178 1.63 ozaki
179 1.60 knakahar encaptab.psz = pserialize_create();
180 1.60 knakahar encaptab.elem_class = psref_class_create("encapelem", IPL_SOFTNET);
181 1.60 knakahar
182 1.59 knakahar mutex_init(&encap_whole.lock, MUTEX_DEFAULT, IPL_NONE);
183 1.59 knakahar cv_init(&encap_whole.cv, "ip_encap cv");
184 1.59 knakahar encap_whole.busy = NULL;
185 1.63 ozaki
186 1.63 ozaki encap_initialized = true;
187 1.59 knakahar }
188 1.59 knakahar
189 1.1 itojun void
190 1.23 perry encap_init(void)
191 1.1 itojun {
192 1.7 itojun static int initialized = 0;
193 1.7 itojun
194 1.7 itojun if (initialized)
195 1.7 itojun return;
196 1.7 itojun initialized++;
197 1.1 itojun #if 0
198 1.1 itojun /*
199 1.1 itojun * we cannot use LIST_INIT() here, since drivers may want to call
200 1.4 itojun * encap_attach(), on driver attach. encap_init() will be called
201 1.1 itojun * on AF_INET{,6} initialization, which happens after driver
202 1.1 itojun * initialization - using LIST_INIT() here can nuke encap_attach()
203 1.1 itojun * from drivers.
204 1.1 itojun */
205 1.56 knakahar PSLIST_INIT(&encap_table);
206 1.1 itojun #endif
207 1.7 itojun
208 1.55 knakahar #ifdef USE_RADIX
209 1.7 itojun /*
210 1.38 pooka * initialize radix lookup table when the radix subsystem is inited.
211 1.7 itojun */
212 1.38 pooka rn_delayedinit((void *)&encap_head[0],
213 1.38 pooka sizeof(struct sockaddr_pack) << 3);
214 1.7 itojun #ifdef INET6
215 1.38 pooka rn_delayedinit((void *)&encap_head[1],
216 1.38 pooka sizeof(struct sockaddr_pack) << 3);
217 1.7 itojun #endif
218 1.55 knakahar #endif
219 1.76 knakahar
220 1.76 knakahar encap_map[0] = thmap_create(0, NULL, THMAP_NOCOPY);
221 1.76 knakahar #ifdef INET6
222 1.76 knakahar encap_map[1] = thmap_create(0, NULL, THMAP_NOCOPY);
223 1.76 knakahar #endif
224 1.1 itojun }
225 1.1 itojun
226 1.4 itojun #ifdef INET
227 1.7 itojun static struct encaptab *
228 1.56 knakahar encap4_lookup(struct mbuf *m, int off, int proto, enum direction dir,
229 1.56 knakahar struct psref *match_psref)
230 1.1 itojun {
231 1.1 itojun struct ip *ip;
232 1.33 pooka struct ip_pack4 pack;
233 1.1 itojun struct encaptab *ep, *match;
234 1.1 itojun int prio, matchprio;
235 1.56 knakahar int s;
236 1.55 knakahar #ifdef USE_RADIX
237 1.7 itojun struct radix_node_head *rnh = encap_rnh(AF_INET);
238 1.7 itojun struct radix_node *rn;
239 1.55 knakahar #endif
240 1.76 knakahar thmap_t *emap = encap_map[0];
241 1.76 knakahar struct encap_key key;
242 1.1 itojun
243 1.41 ozaki KASSERT(m->m_len >= sizeof(*ip));
244 1.41 ozaki
245 1.1 itojun ip = mtod(m, struct ip *);
246 1.1 itojun
247 1.35 cegger memset(&pack, 0, sizeof(pack));
248 1.7 itojun pack.p.sp_len = sizeof(pack);
249 1.7 itojun pack.mine.sin_family = pack.yours.sin_family = AF_INET;
250 1.7 itojun pack.mine.sin_len = pack.yours.sin_len = sizeof(struct sockaddr_in);
251 1.7 itojun if (dir == INBOUND) {
252 1.7 itojun pack.mine.sin_addr = ip->ip_dst;
253 1.7 itojun pack.yours.sin_addr = ip->ip_src;
254 1.7 itojun } else {
255 1.7 itojun pack.mine.sin_addr = ip->ip_src;
256 1.7 itojun pack.yours.sin_addr = ip->ip_dst;
257 1.7 itojun }
258 1.1 itojun
259 1.1 itojun match = NULL;
260 1.1 itojun matchprio = 0;
261 1.7 itojun
262 1.56 knakahar s = pserialize_read_enter();
263 1.55 knakahar #ifdef USE_RADIX
264 1.57 knakahar if (encap_head_updating) {
265 1.57 knakahar /*
266 1.57 knakahar * Update in progress. Do nothing.
267 1.57 knakahar */
268 1.57 knakahar pserialize_read_exit(s);
269 1.57 knakahar return NULL;
270 1.57 knakahar }
271 1.57 knakahar
272 1.30 christos rn = rnh->rnh_matchaddr((void *)&pack, rnh);
273 1.7 itojun if (rn && (rn->rn_flags & RNF_ROOT) == 0) {
274 1.56 knakahar struct encaptab *encapp = (struct encaptab *)rn;
275 1.56 knakahar
276 1.56 knakahar psref_acquire(match_psref, &encapp->psref,
277 1.56 knakahar encaptab.elem_class);
278 1.56 knakahar match = encapp;
279 1.7 itojun matchprio = mask_matchlen(match->srcmask) +
280 1.7 itojun mask_matchlen(match->dstmask);
281 1.7 itojun }
282 1.55 knakahar #endif
283 1.76 knakahar
284 1.76 knakahar encap_key_init(&key, sintosa(&pack.mine), sintosa(&pack.yours));
285 1.76 knakahar while ((ep = thmap_get(emap, &key, sizeof(key))) != NULL) {
286 1.76 knakahar struct psref elem_psref;
287 1.76 knakahar
288 1.76 knakahar KASSERT(ep->af == AF_INET);
289 1.76 knakahar
290 1.76 knakahar if (ep->proto >= 0 && ep->proto != proto) {
291 1.76 knakahar encap_key_inc(&key);
292 1.76 knakahar continue;
293 1.76 knakahar }
294 1.76 knakahar
295 1.76 knakahar psref_acquire(&elem_psref, &ep->psref,
296 1.76 knakahar encaptab.elem_class);
297 1.76 knakahar if (ep->func) {
298 1.76 knakahar pserialize_read_exit(s);
299 1.76 knakahar prio = (*ep->func)(m, off, proto, ep->arg);
300 1.76 knakahar s = pserialize_read_enter();
301 1.76 knakahar } else {
302 1.76 knakahar prio = pack.mine.sin_len + pack.yours.sin_len;
303 1.76 knakahar }
304 1.76 knakahar
305 1.76 knakahar if (prio <= 0) {
306 1.76 knakahar psref_release(&elem_psref, &ep->psref,
307 1.76 knakahar encaptab.elem_class);
308 1.76 knakahar encap_key_inc(&key);
309 1.76 knakahar continue;
310 1.76 knakahar }
311 1.76 knakahar if (prio > matchprio) {
312 1.76 knakahar /* release last matched ep */
313 1.76 knakahar if (match != NULL)
314 1.76 knakahar psref_release(match_psref, &match->psref,
315 1.76 knakahar encaptab.elem_class);
316 1.76 knakahar
317 1.76 knakahar psref_copy(match_psref, &elem_psref,
318 1.76 knakahar encaptab.elem_class);
319 1.76 knakahar matchprio = prio;
320 1.76 knakahar match = ep;
321 1.76 knakahar }
322 1.76 knakahar
323 1.76 knakahar psref_release(&elem_psref, &ep->psref,
324 1.76 knakahar encaptab.elem_class);
325 1.76 knakahar encap_key_inc(&key);
326 1.76 knakahar }
327 1.76 knakahar
328 1.56 knakahar PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
329 1.56 knakahar struct psref elem_psref;
330 1.56 knakahar
331 1.1 itojun if (ep->af != AF_INET)
332 1.1 itojun continue;
333 1.1 itojun if (ep->proto >= 0 && ep->proto != proto)
334 1.1 itojun continue;
335 1.56 knakahar
336 1.56 knakahar psref_acquire(&elem_psref, &ep->psref,
337 1.56 knakahar encaptab.elem_class);
338 1.56 knakahar if (ep->func) {
339 1.56 knakahar pserialize_read_exit(s);
340 1.56 knakahar /* ep->func is sleepable. e.g. rtalloc1 */
341 1.1 itojun prio = (*ep->func)(m, off, proto, ep->arg);
342 1.56 knakahar s = pserialize_read_enter();
343 1.56 knakahar } else {
344 1.55 knakahar #ifdef USE_RADIX
345 1.56 knakahar psref_release(&elem_psref, &ep->psref,
346 1.56 knakahar encaptab.elem_class);
347 1.7 itojun continue;
348 1.55 knakahar #else
349 1.55 knakahar prio = mask_match(ep, (struct sockaddr *)&pack.mine,
350 1.55 knakahar (struct sockaddr *)&pack.yours);
351 1.55 knakahar #endif
352 1.55 knakahar }
353 1.1 itojun
354 1.1 itojun /*
355 1.1 itojun * We prioritize the matches by using bit length of the
356 1.1 itojun * matches. mask_match() and user-supplied matching function
357 1.1 itojun * should return the bit length of the matches (for example,
358 1.1 itojun * if both src/dst are matched for IPv4, 64 should be returned).
359 1.1 itojun * 0 or negative return value means "it did not match".
360 1.1 itojun *
361 1.1 itojun * The question is, since we have two "mask" portion, we
362 1.1 itojun * cannot really define total order between entries.
363 1.1 itojun * For example, which of these should be preferred?
364 1.1 itojun * mask_match() returns 48 (32 + 16) for both of them.
365 1.1 itojun * src=3ffe::/16, dst=3ffe:501::/32
366 1.1 itojun * src=3ffe:501::/32, dst=3ffe::/16
367 1.1 itojun *
368 1.1 itojun * We need to loop through all the possible candidates
369 1.1 itojun * to get the best match - the search takes O(n) for
370 1.1 itojun * n attachments (i.e. interfaces).
371 1.7 itojun *
372 1.7 itojun * For radix-based lookup, I guess source takes precedence.
373 1.7 itojun * See rn_{refines,lexobetter} for the correct answer.
374 1.1 itojun */
375 1.56 knakahar if (prio <= 0) {
376 1.56 knakahar psref_release(&elem_psref, &ep->psref,
377 1.56 knakahar encaptab.elem_class);
378 1.1 itojun continue;
379 1.56 knakahar }
380 1.1 itojun if (prio > matchprio) {
381 1.56 knakahar /* release last matched ep */
382 1.56 knakahar if (match != NULL)
383 1.56 knakahar psref_release(match_psref, &match->psref,
384 1.56 knakahar encaptab.elem_class);
385 1.56 knakahar
386 1.56 knakahar psref_copy(match_psref, &elem_psref,
387 1.56 knakahar encaptab.elem_class);
388 1.1 itojun matchprio = prio;
389 1.1 itojun match = ep;
390 1.1 itojun }
391 1.56 knakahar KASSERTMSG((match == NULL) || psref_held(&match->psref,
392 1.56 knakahar encaptab.elem_class),
393 1.56 knakahar "current match = %p, but not hold its psref", match);
394 1.56 knakahar
395 1.56 knakahar psref_release(&elem_psref, &ep->psref,
396 1.56 knakahar encaptab.elem_class);
397 1.1 itojun }
398 1.56 knakahar pserialize_read_exit(s);
399 1.1 itojun
400 1.7 itojun return match;
401 1.7 itojun }
402 1.7 itojun
403 1.7 itojun void
404 1.70 maxv encap4_input(struct mbuf *m, int off, int proto)
405 1.7 itojun {
406 1.51 knakahar const struct encapsw *esw;
407 1.7 itojun struct encaptab *match;
408 1.56 knakahar struct psref match_psref;
409 1.7 itojun
410 1.56 knakahar match = encap4_lookup(m, off, proto, INBOUND, &match_psref);
411 1.1 itojun if (match) {
412 1.1 itojun /* found a match, "match" has the best one */
413 1.51 knakahar esw = match->esw;
414 1.51 knakahar if (esw && esw->encapsw4.pr_input) {
415 1.66 knakahar (*esw->encapsw4.pr_input)(m, off, proto, match->arg);
416 1.56 knakahar psref_release(&match_psref, &match->psref,
417 1.56 knakahar encaptab.elem_class);
418 1.54 knakahar } else {
419 1.56 knakahar psref_release(&match_psref, &match->psref,
420 1.56 knakahar encaptab.elem_class);
421 1.1 itojun m_freem(m);
422 1.54 knakahar }
423 1.1 itojun return;
424 1.1 itojun }
425 1.1 itojun
426 1.1 itojun /* last resort: inject to raw socket */
427 1.69 knakahar SOFTNET_LOCK_IF_NET_MPSAFE();
428 1.1 itojun rip_input(m, off, proto);
429 1.69 knakahar SOFTNET_UNLOCK_IF_NET_MPSAFE();
430 1.1 itojun }
431 1.1 itojun #endif
432 1.1 itojun
433 1.1 itojun #ifdef INET6
434 1.7 itojun static struct encaptab *
435 1.56 knakahar encap6_lookup(struct mbuf *m, int off, int proto, enum direction dir,
436 1.56 knakahar struct psref *match_psref)
437 1.1 itojun {
438 1.1 itojun struct ip6_hdr *ip6;
439 1.33 pooka struct ip_pack6 pack;
440 1.7 itojun int prio, matchprio;
441 1.56 knakahar int s;
442 1.1 itojun struct encaptab *ep, *match;
443 1.55 knakahar #ifdef USE_RADIX
444 1.7 itojun struct radix_node_head *rnh = encap_rnh(AF_INET6);
445 1.7 itojun struct radix_node *rn;
446 1.55 knakahar #endif
447 1.76 knakahar thmap_t *emap = encap_map[1];
448 1.76 knakahar struct encap_key key;
449 1.1 itojun
450 1.41 ozaki KASSERT(m->m_len >= sizeof(*ip6));
451 1.41 ozaki
452 1.1 itojun ip6 = mtod(m, struct ip6_hdr *);
453 1.1 itojun
454 1.35 cegger memset(&pack, 0, sizeof(pack));
455 1.7 itojun pack.p.sp_len = sizeof(pack);
456 1.7 itojun pack.mine.sin6_family = pack.yours.sin6_family = AF_INET6;
457 1.7 itojun pack.mine.sin6_len = pack.yours.sin6_len = sizeof(struct sockaddr_in6);
458 1.7 itojun if (dir == INBOUND) {
459 1.7 itojun pack.mine.sin6_addr = ip6->ip6_dst;
460 1.7 itojun pack.yours.sin6_addr = ip6->ip6_src;
461 1.7 itojun } else {
462 1.7 itojun pack.mine.sin6_addr = ip6->ip6_src;
463 1.7 itojun pack.yours.sin6_addr = ip6->ip6_dst;
464 1.7 itojun }
465 1.1 itojun
466 1.1 itojun match = NULL;
467 1.1 itojun matchprio = 0;
468 1.7 itojun
469 1.56 knakahar s = pserialize_read_enter();
470 1.55 knakahar #ifdef USE_RADIX
471 1.57 knakahar if (encap_head_updating) {
472 1.57 knakahar /*
473 1.57 knakahar * Update in progress. Do nothing.
474 1.57 knakahar */
475 1.57 knakahar pserialize_read_exit(s);
476 1.57 knakahar return NULL;
477 1.57 knakahar }
478 1.57 knakahar
479 1.30 christos rn = rnh->rnh_matchaddr((void *)&pack, rnh);
480 1.7 itojun if (rn && (rn->rn_flags & RNF_ROOT) == 0) {
481 1.56 knakahar struct encaptab *encapp = (struct encaptab *)rn;
482 1.56 knakahar
483 1.56 knakahar psref_acquire(match_psref, &encapp->psref,
484 1.56 knakahar encaptab.elem_class);
485 1.56 knakahar match = encapp;
486 1.7 itojun matchprio = mask_matchlen(match->srcmask) +
487 1.7 itojun mask_matchlen(match->dstmask);
488 1.7 itojun }
489 1.55 knakahar #endif
490 1.76 knakahar
491 1.76 knakahar encap_key_init(&key, sin6tosa(&pack.mine), sin6tosa(&pack.yours));
492 1.76 knakahar while ((ep = thmap_get(emap, &key, sizeof(key))) != NULL) {
493 1.76 knakahar struct psref elem_psref;
494 1.76 knakahar
495 1.76 knakahar KASSERT(ep->af == AF_INET6);
496 1.76 knakahar
497 1.76 knakahar if (ep->proto >= 0 && ep->proto != proto) {
498 1.76 knakahar encap_key_inc(&key);
499 1.76 knakahar continue;
500 1.76 knakahar }
501 1.76 knakahar
502 1.76 knakahar psref_acquire(&elem_psref, &ep->psref,
503 1.76 knakahar encaptab.elem_class);
504 1.76 knakahar if (ep->func) {
505 1.76 knakahar pserialize_read_exit(s);
506 1.76 knakahar prio = (*ep->func)(m, off, proto, ep->arg);
507 1.76 knakahar s = pserialize_read_enter();
508 1.76 knakahar } else {
509 1.76 knakahar prio = pack.mine.sin6_len + pack.yours.sin6_len;
510 1.76 knakahar }
511 1.76 knakahar
512 1.76 knakahar if (prio <= 0) {
513 1.76 knakahar psref_release(&elem_psref, &ep->psref,
514 1.76 knakahar encaptab.elem_class);
515 1.76 knakahar encap_key_inc(&key);
516 1.76 knakahar continue;
517 1.76 knakahar }
518 1.76 knakahar if (prio > matchprio) {
519 1.76 knakahar /* release last matched ep */
520 1.76 knakahar if (match != NULL)
521 1.76 knakahar psref_release(match_psref, &match->psref,
522 1.76 knakahar encaptab.elem_class);
523 1.76 knakahar
524 1.76 knakahar psref_copy(match_psref, &elem_psref,
525 1.76 knakahar encaptab.elem_class);
526 1.76 knakahar matchprio = prio;
527 1.76 knakahar match = ep;
528 1.76 knakahar }
529 1.76 knakahar psref_release(&elem_psref, &ep->psref,
530 1.76 knakahar encaptab.elem_class);
531 1.76 knakahar encap_key_inc(&key);
532 1.76 knakahar }
533 1.76 knakahar
534 1.56 knakahar PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
535 1.56 knakahar struct psref elem_psref;
536 1.56 knakahar
537 1.1 itojun if (ep->af != AF_INET6)
538 1.1 itojun continue;
539 1.1 itojun if (ep->proto >= 0 && ep->proto != proto)
540 1.1 itojun continue;
541 1.56 knakahar
542 1.56 knakahar psref_acquire(&elem_psref, &ep->psref,
543 1.56 knakahar encaptab.elem_class);
544 1.56 knakahar
545 1.56 knakahar if (ep->func) {
546 1.56 knakahar pserialize_read_exit(s);
547 1.56 knakahar /* ep->func is sleepable. e.g. rtalloc1 */
548 1.7 itojun prio = (*ep->func)(m, off, proto, ep->arg);
549 1.56 knakahar s = pserialize_read_enter();
550 1.56 knakahar } else {
551 1.55 knakahar #ifdef USE_RADIX
552 1.56 knakahar psref_release(&elem_psref, &ep->psref,
553 1.56 knakahar encaptab.elem_class);
554 1.7 itojun continue;
555 1.55 knakahar #else
556 1.55 knakahar prio = mask_match(ep, (struct sockaddr *)&pack.mine,
557 1.55 knakahar (struct sockaddr *)&pack.yours);
558 1.55 knakahar #endif
559 1.55 knakahar }
560 1.1 itojun
561 1.7 itojun /* see encap4_lookup() for issues here */
562 1.56 knakahar if (prio <= 0) {
563 1.56 knakahar psref_release(&elem_psref, &ep->psref,
564 1.56 knakahar encaptab.elem_class);
565 1.1 itojun continue;
566 1.56 knakahar }
567 1.1 itojun if (prio > matchprio) {
568 1.56 knakahar /* release last matched ep */
569 1.56 knakahar if (match != NULL)
570 1.56 knakahar psref_release(match_psref, &match->psref,
571 1.56 knakahar encaptab.elem_class);
572 1.56 knakahar
573 1.56 knakahar psref_copy(match_psref, &elem_psref,
574 1.56 knakahar encaptab.elem_class);
575 1.1 itojun matchprio = prio;
576 1.1 itojun match = ep;
577 1.1 itojun }
578 1.56 knakahar KASSERTMSG((match == NULL) || psref_held(&match->psref,
579 1.56 knakahar encaptab.elem_class),
580 1.56 knakahar "current match = %p, but not hold its psref", match);
581 1.56 knakahar
582 1.56 knakahar psref_release(&elem_psref, &ep->psref,
583 1.56 knakahar encaptab.elem_class);
584 1.1 itojun }
585 1.56 knakahar pserialize_read_exit(s);
586 1.1 itojun
587 1.7 itojun return match;
588 1.7 itojun }
589 1.7 itojun
590 1.7 itojun int
591 1.23 perry encap6_input(struct mbuf **mp, int *offp, int proto)
592 1.7 itojun {
593 1.7 itojun struct mbuf *m = *mp;
594 1.51 knakahar const struct encapsw *esw;
595 1.7 itojun struct encaptab *match;
596 1.56 knakahar struct psref match_psref;
597 1.69 knakahar int rv;
598 1.7 itojun
599 1.56 knakahar match = encap6_lookup(m, *offp, proto, INBOUND, &match_psref);
600 1.7 itojun
601 1.1 itojun if (match) {
602 1.1 itojun /* found a match */
603 1.51 knakahar esw = match->esw;
604 1.51 knakahar if (esw && esw->encapsw6.pr_input) {
605 1.56 knakahar int ret;
606 1.66 knakahar ret = (*esw->encapsw6.pr_input)(mp, offp, proto,
607 1.66 knakahar match->arg);
608 1.56 knakahar psref_release(&match_psref, &match->psref,
609 1.56 knakahar encaptab.elem_class);
610 1.56 knakahar return ret;
611 1.1 itojun } else {
612 1.56 knakahar psref_release(&match_psref, &match->psref,
613 1.56 knakahar encaptab.elem_class);
614 1.1 itojun m_freem(m);
615 1.1 itojun return IPPROTO_DONE;
616 1.1 itojun }
617 1.1 itojun }
618 1.1 itojun
619 1.1 itojun /* last resort: inject to raw socket */
620 1.69 knakahar SOFTNET_LOCK_IF_NET_MPSAFE();
621 1.69 knakahar rv = rip6_input(mp, offp, proto);
622 1.69 knakahar SOFTNET_UNLOCK_IF_NET_MPSAFE();
623 1.69 knakahar return rv;
624 1.1 itojun }
625 1.1 itojun #endif
626 1.1 itojun
627 1.54 knakahar /*
628 1.54 knakahar * XXX
629 1.54 knakahar * The encaptab list and the rnh radix tree must be manipulated atomically.
630 1.54 knakahar */
631 1.7 itojun static int
632 1.23 perry encap_add(struct encaptab *ep)
633 1.1 itojun {
634 1.55 knakahar #ifdef USE_RADIX
635 1.7 itojun struct radix_node_head *rnh = encap_rnh(ep->af);
636 1.55 knakahar #endif
637 1.1 itojun
638 1.56 knakahar KASSERT(encap_lock_held());
639 1.54 knakahar
640 1.55 knakahar #ifdef USE_RADIX
641 1.7 itojun if (!ep->func && rnh) {
642 1.57 knakahar /* Disable access to the radix tree for reader. */
643 1.57 knakahar encap_head_updating = true;
644 1.56 knakahar /* Wait for all readers to drain. */
645 1.56 knakahar pserialize_perform(encaptab.psz);
646 1.56 knakahar
647 1.30 christos if (!rnh->rnh_addaddr((void *)ep->addrpack,
648 1.30 christos (void *)ep->maskpack, rnh, ep->nodes)) {
649 1.57 knakahar encap_head_updating = false;
650 1.56 knakahar return EEXIST;
651 1.7 itojun }
652 1.57 knakahar
653 1.57 knakahar /*
654 1.57 knakahar * The ep added to the radix tree must be skipped while
655 1.57 knakahar * encap[46]_lookup walks encaptab list. In other words,
656 1.57 knakahar * encap_add() does not need to care whether the ep has
657 1.57 knakahar * been added encaptab list or not yet.
658 1.57 knakahar * So, we can re-enable access to the radix tree for now.
659 1.57 knakahar */
660 1.57 knakahar encap_head_updating = false;
661 1.7 itojun }
662 1.55 knakahar #endif
663 1.56 knakahar PSLIST_WRITER_INSERT_HEAD(&encap_table, ep, chain);
664 1.7 itojun
665 1.56 knakahar return 0;
666 1.7 itojun }
667 1.7 itojun
668 1.54 knakahar /*
669 1.54 knakahar * XXX
670 1.54 knakahar * The encaptab list and the rnh radix tree must be manipulated atomically.
671 1.54 knakahar */
672 1.7 itojun static int
673 1.23 perry encap_remove(struct encaptab *ep)
674 1.7 itojun {
675 1.55 knakahar #ifdef USE_RADIX
676 1.7 itojun struct radix_node_head *rnh = encap_rnh(ep->af);
677 1.55 knakahar #endif
678 1.7 itojun int error = 0;
679 1.7 itojun
680 1.56 knakahar KASSERT(encap_lock_held());
681 1.54 knakahar
682 1.55 knakahar #ifdef USE_RADIX
683 1.7 itojun if (!ep->func && rnh) {
684 1.57 knakahar /* Disable access to the radix tree for reader. */
685 1.57 knakahar encap_head_updating = true;
686 1.56 knakahar /* Wait for all readers to drain. */
687 1.56 knakahar pserialize_perform(encaptab.psz);
688 1.56 knakahar
689 1.30 christos if (!rnh->rnh_deladdr((void *)ep->addrpack,
690 1.30 christos (void *)ep->maskpack, rnh))
691 1.7 itojun error = ESRCH;
692 1.57 knakahar
693 1.57 knakahar /*
694 1.57 knakahar * The ep added to the radix tree must be skipped while
695 1.57 knakahar * encap[46]_lookup walks encaptab list. In other words,
696 1.57 knakahar * encap_add() does not need to care whether the ep has
697 1.57 knakahar * been added encaptab list or not yet.
698 1.57 knakahar * So, we can re-enable access to the radix tree for now.
699 1.57 knakahar */
700 1.57 knakahar encap_head_updating = false;
701 1.7 itojun }
702 1.55 knakahar #endif
703 1.56 knakahar PSLIST_WRITER_REMOVE(ep, chain);
704 1.56 knakahar
705 1.7 itojun return error;
706 1.7 itojun }
707 1.7 itojun
708 1.73 riastrad static void
709 1.23 perry encap_afcheck(int af, const struct sockaddr *sp, const struct sockaddr *dp)
710 1.7 itojun {
711 1.7 itojun
712 1.73 riastrad KASSERT(sp != NULL && dp != NULL);
713 1.73 riastrad KASSERT(sp->sa_len == dp->sa_len);
714 1.73 riastrad KASSERT(af == sp->sa_family && af == dp->sa_family);
715 1.7 itojun
716 1.74 riastrad socklen_t len __diagused = sockaddr_getsize_by_family(af);
717 1.73 riastrad KASSERT(len != 0 && len == sp->sa_len && len == dp->sa_len);
718 1.1 itojun }
719 1.1 itojun
720 1.1 itojun /*
721 1.1 itojun * sp (src ptr) is always my side, and dp (dst ptr) is always remote side.
722 1.1 itojun * length of mask (sm and dm) is assumed to be same as sp/dp.
723 1.1 itojun * Return value will be necessary as input (cookie) for encap_detach().
724 1.1 itojun */
725 1.1 itojun const struct encaptab *
726 1.23 perry encap_attach(int af, int proto,
727 1.23 perry const struct sockaddr *sp, const struct sockaddr *sm,
728 1.23 perry const struct sockaddr *dp, const struct sockaddr *dm,
729 1.51 knakahar const struct encapsw *esw, void *arg)
730 1.1 itojun {
731 1.1 itojun struct encaptab *ep;
732 1.1 itojun int error;
733 1.61 knakahar int pss;
734 1.7 itojun size_t l;
735 1.33 pooka struct ip_pack4 *pack4;
736 1.7 itojun #ifdef INET6
737 1.33 pooka struct ip_pack6 *pack6;
738 1.7 itojun #endif
739 1.61 knakahar #ifndef ENCAP_MPSAFE
740 1.61 knakahar int s;
741 1.1 itojun
742 1.1 itojun s = splsoftnet();
743 1.61 knakahar #endif
744 1.73 riastrad
745 1.73 riastrad ASSERT_SLEEPABLE();
746 1.73 riastrad
747 1.1 itojun /* sanity check on args */
748 1.73 riastrad encap_afcheck(af, sp, dp);
749 1.1 itojun
750 1.1 itojun /* check if anyone have already attached with exactly same config */
751 1.56 knakahar pss = pserialize_read_enter();
752 1.56 knakahar PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
753 1.1 itojun if (ep->af != af)
754 1.1 itojun continue;
755 1.1 itojun if (ep->proto != proto)
756 1.1 itojun continue;
757 1.7 itojun if (ep->func)
758 1.7 itojun continue;
759 1.41 ozaki
760 1.43 riastrad KASSERT(ep->src != NULL);
761 1.43 riastrad KASSERT(ep->dst != NULL);
762 1.43 riastrad KASSERT(ep->srcmask != NULL);
763 1.43 riastrad KASSERT(ep->dstmask != NULL);
764 1.41 ozaki
765 1.7 itojun if (ep->src->sa_len != sp->sa_len ||
766 1.34 cegger memcmp(ep->src, sp, sp->sa_len) != 0 ||
767 1.34 cegger memcmp(ep->srcmask, sm, sp->sa_len) != 0)
768 1.7 itojun continue;
769 1.7 itojun if (ep->dst->sa_len != dp->sa_len ||
770 1.34 cegger memcmp(ep->dst, dp, dp->sa_len) != 0 ||
771 1.34 cegger memcmp(ep->dstmask, dm, dp->sa_len) != 0)
772 1.1 itojun continue;
773 1.1 itojun
774 1.1 itojun error = EEXIST;
775 1.56 knakahar pserialize_read_exit(pss);
776 1.1 itojun goto fail;
777 1.1 itojun }
778 1.56 knakahar pserialize_read_exit(pss);
779 1.3 thorpej
780 1.7 itojun switch (af) {
781 1.7 itojun case AF_INET:
782 1.7 itojun l = sizeof(*pack4);
783 1.7 itojun break;
784 1.7 itojun #ifdef INET6
785 1.7 itojun case AF_INET6:
786 1.7 itojun l = sizeof(*pack6);
787 1.7 itojun break;
788 1.7 itojun #endif
789 1.7 itojun default:
790 1.7 itojun goto fail;
791 1.7 itojun }
792 1.7 itojun
793 1.20 itojun /* M_NETADDR ok? */
794 1.73 riastrad ep = kmem_zalloc(sizeof(*ep), KM_SLEEP);
795 1.73 riastrad ep->addrpack = kmem_zalloc(l, KM_SLEEP);
796 1.73 riastrad ep->maskpack = kmem_zalloc(l, KM_SLEEP);
797 1.1 itojun
798 1.1 itojun ep->af = af;
799 1.1 itojun ep->proto = proto;
800 1.7 itojun ep->addrpack->sa_len = l & 0xff;
801 1.7 itojun ep->maskpack->sa_len = l & 0xff;
802 1.7 itojun switch (af) {
803 1.7 itojun case AF_INET:
804 1.33 pooka pack4 = (struct ip_pack4 *)ep->addrpack;
805 1.7 itojun ep->src = (struct sockaddr *)&pack4->mine;
806 1.7 itojun ep->dst = (struct sockaddr *)&pack4->yours;
807 1.33 pooka pack4 = (struct ip_pack4 *)ep->maskpack;
808 1.7 itojun ep->srcmask = (struct sockaddr *)&pack4->mine;
809 1.7 itojun ep->dstmask = (struct sockaddr *)&pack4->yours;
810 1.7 itojun break;
811 1.7 itojun #ifdef INET6
812 1.7 itojun case AF_INET6:
813 1.33 pooka pack6 = (struct ip_pack6 *)ep->addrpack;
814 1.7 itojun ep->src = (struct sockaddr *)&pack6->mine;
815 1.7 itojun ep->dst = (struct sockaddr *)&pack6->yours;
816 1.33 pooka pack6 = (struct ip_pack6 *)ep->maskpack;
817 1.7 itojun ep->srcmask = (struct sockaddr *)&pack6->mine;
818 1.7 itojun ep->dstmask = (struct sockaddr *)&pack6->yours;
819 1.7 itojun break;
820 1.7 itojun #endif
821 1.7 itojun }
822 1.7 itojun
823 1.37 tsutsui memcpy(ep->src, sp, sp->sa_len);
824 1.37 tsutsui memcpy(ep->srcmask, sm, sp->sa_len);
825 1.37 tsutsui memcpy(ep->dst, dp, dp->sa_len);
826 1.37 tsutsui memcpy(ep->dstmask, dm, dp->sa_len);
827 1.51 knakahar ep->esw = esw;
828 1.1 itojun ep->arg = arg;
829 1.56 knakahar psref_target_init(&ep->psref, encaptab.elem_class);
830 1.1 itojun
831 1.7 itojun error = encap_add(ep);
832 1.7 itojun if (error)
833 1.7 itojun goto gc;
834 1.1 itojun
835 1.1 itojun error = 0;
836 1.61 knakahar #ifndef ENCAP_MPSAFE
837 1.1 itojun splx(s);
838 1.61 knakahar #endif
839 1.1 itojun return ep;
840 1.1 itojun
841 1.7 itojun gc:
842 1.7 itojun if (ep->addrpack)
843 1.47 knakahar kmem_free(ep->addrpack, l);
844 1.7 itojun if (ep->maskpack)
845 1.47 knakahar kmem_free(ep->maskpack, l);
846 1.7 itojun if (ep)
847 1.47 knakahar kmem_free(ep, sizeof(*ep));
848 1.1 itojun fail:
849 1.61 knakahar #ifndef ENCAP_MPSAFE
850 1.1 itojun splx(s);
851 1.61 knakahar #endif
852 1.1 itojun return NULL;
853 1.1 itojun }
854 1.1 itojun
855 1.1 itojun const struct encaptab *
856 1.23 perry encap_attach_func(int af, int proto,
857 1.75 knakahar encap_priofunc_t *func,
858 1.51 knakahar const struct encapsw *esw, void *arg)
859 1.1 itojun {
860 1.1 itojun struct encaptab *ep;
861 1.1 itojun int error;
862 1.61 knakahar #ifndef ENCAP_MPSAFE
863 1.1 itojun int s;
864 1.1 itojun
865 1.1 itojun s = splsoftnet();
866 1.61 knakahar #endif
867 1.73 riastrad
868 1.73 riastrad ASSERT_SLEEPABLE();
869 1.73 riastrad
870 1.1 itojun /* sanity check on args */
871 1.73 riastrad KASSERT(func != NULL);
872 1.73 riastrad KASSERT(af == AF_INET
873 1.73 riastrad #ifdef INET6
874 1.73 riastrad || af == AF_INET6
875 1.73 riastrad #endif
876 1.73 riastrad );
877 1.7 itojun
878 1.73 riastrad ep = kmem_alloc(sizeof(*ep), KM_SLEEP);
879 1.35 cegger memset(ep, 0, sizeof(*ep));
880 1.1 itojun
881 1.1 itojun ep->af = af;
882 1.1 itojun ep->proto = proto;
883 1.1 itojun ep->func = func;
884 1.51 knakahar ep->esw = esw;
885 1.1 itojun ep->arg = arg;
886 1.56 knakahar psref_target_init(&ep->psref, encaptab.elem_class);
887 1.1 itojun
888 1.7 itojun error = encap_add(ep);
889 1.7 itojun if (error)
890 1.67 maxv goto gc;
891 1.1 itojun
892 1.1 itojun error = 0;
893 1.61 knakahar #ifndef ENCAP_MPSAFE
894 1.1 itojun splx(s);
895 1.61 knakahar #endif
896 1.1 itojun return ep;
897 1.1 itojun
898 1.67 maxv gc:
899 1.67 maxv kmem_free(ep, sizeof(*ep));
900 1.61 knakahar #ifndef ENCAP_MPSAFE
901 1.1 itojun splx(s);
902 1.61 knakahar #endif
903 1.1 itojun return NULL;
904 1.1 itojun }
905 1.1 itojun
906 1.76 knakahar static void
907 1.76 knakahar encap_key_init(struct encap_key *key,
908 1.76 knakahar const struct sockaddr *local, const struct sockaddr *remote)
909 1.76 knakahar {
910 1.76 knakahar
911 1.76 knakahar memset(key, 0, sizeof(*key));
912 1.76 knakahar
913 1.76 knakahar sockaddr_copy(&key->local_sa, sizeof(key->local_u), local);
914 1.76 knakahar sockaddr_copy(&key->remote_sa, sizeof(key->remote_u), remote);
915 1.76 knakahar }
916 1.76 knakahar
917 1.76 knakahar static void
918 1.76 knakahar encap_key_inc(struct encap_key *key)
919 1.76 knakahar {
920 1.76 knakahar
921 1.76 knakahar (key->seq)++;
922 1.76 knakahar }
923 1.76 knakahar
924 1.76 knakahar static void
925 1.76 knakahar encap_key_dec(struct encap_key *key)
926 1.76 knakahar {
927 1.76 knakahar
928 1.76 knakahar (key->seq)--;
929 1.76 knakahar }
930 1.76 knakahar
931 1.76 knakahar static void
932 1.76 knakahar encap_key_copy(struct encap_key *dst, const struct encap_key *src)
933 1.76 knakahar {
934 1.76 knakahar
935 1.76 knakahar memset(dst, 0, sizeof(*dst));
936 1.76 knakahar *dst = *src;
937 1.76 knakahar }
938 1.76 knakahar
939 1.76 knakahar /*
940 1.76 knakahar * src is always my side, and dst is always remote side.
941 1.76 knakahar * Return value will be necessary as input (cookie) for encap_detach().
942 1.76 knakahar */
943 1.76 knakahar const struct encaptab *
944 1.76 knakahar encap_attach_addr(int af, int proto,
945 1.76 knakahar const struct sockaddr *src, const struct sockaddr *dst,
946 1.76 knakahar encap_priofunc_t *func,
947 1.76 knakahar const struct encapsw *esw, void *arg)
948 1.76 knakahar {
949 1.76 knakahar struct encaptab *ep;
950 1.76 knakahar size_t l;
951 1.76 knakahar thmap_t *emap;
952 1.76 knakahar void *retep;
953 1.76 knakahar struct ip_pack4 *pack4;
954 1.76 knakahar #ifdef INET6
955 1.76 knakahar struct ip_pack6 *pack6;
956 1.76 knakahar #endif
957 1.76 knakahar
958 1.76 knakahar ASSERT_SLEEPABLE();
959 1.76 knakahar
960 1.76 knakahar encap_afcheck(af, src, dst);
961 1.76 knakahar
962 1.76 knakahar switch (af) {
963 1.76 knakahar case AF_INET:
964 1.76 knakahar l = sizeof(*pack4);
965 1.76 knakahar emap = encap_map[0];
966 1.76 knakahar break;
967 1.76 knakahar #ifdef INET6
968 1.76 knakahar case AF_INET6:
969 1.76 knakahar l = sizeof(*pack6);
970 1.76 knakahar emap = encap_map[1];
971 1.76 knakahar break;
972 1.76 knakahar #endif
973 1.76 knakahar default:
974 1.76 knakahar return NULL;
975 1.76 knakahar }
976 1.76 knakahar
977 1.76 knakahar ep = kmem_zalloc(sizeof(*ep), KM_SLEEP);
978 1.76 knakahar ep->addrpack = kmem_zalloc(l, KM_SLEEP);
979 1.76 knakahar ep->addrpack->sa_len = l & 0xff;
980 1.76 knakahar ep->af = af;
981 1.76 knakahar ep->proto = proto;
982 1.76 knakahar ep->flag = IP_ENCAP_ADDR_ENABLE;
983 1.76 knakahar switch (af) {
984 1.76 knakahar case AF_INET:
985 1.76 knakahar pack4 = (struct ip_pack4 *)ep->addrpack;
986 1.76 knakahar ep->src = (struct sockaddr *)&pack4->mine;
987 1.76 knakahar ep->dst = (struct sockaddr *)&pack4->yours;
988 1.76 knakahar break;
989 1.76 knakahar #ifdef INET6
990 1.76 knakahar case AF_INET6:
991 1.76 knakahar pack6 = (struct ip_pack6 *)ep->addrpack;
992 1.76 knakahar ep->src = (struct sockaddr *)&pack6->mine;
993 1.76 knakahar ep->dst = (struct sockaddr *)&pack6->yours;
994 1.76 knakahar break;
995 1.76 knakahar #endif
996 1.76 knakahar }
997 1.76 knakahar memcpy(ep->src, src, src->sa_len);
998 1.76 knakahar memcpy(ep->dst, dst, dst->sa_len);
999 1.76 knakahar ep->esw = esw;
1000 1.76 knakahar ep->arg = arg;
1001 1.76 knakahar ep->func = func;
1002 1.76 knakahar psref_target_init(&ep->psref, encaptab.elem_class);
1003 1.76 knakahar
1004 1.76 knakahar encap_key_init(&ep->key, src, dst);
1005 1.76 knakahar while ((retep = thmap_put(emap, &ep->key, sizeof(ep->key), ep)) != ep)
1006 1.76 knakahar encap_key_inc(&ep->key);
1007 1.76 knakahar return ep;
1008 1.76 knakahar }
1009 1.76 knakahar
1010 1.76 knakahar
1011 1.7 itojun /* XXX encap4_ctlinput() is necessary if we set DF=1 on outer IPv4 header */
1012 1.7 itojun
1013 1.7 itojun #ifdef INET6
1014 1.32 ad void *
1015 1.29 dyoung encap6_ctlinput(int cmd, const struct sockaddr *sa, void *d0)
1016 1.7 itojun {
1017 1.7 itojun void *d = d0;
1018 1.7 itojun struct ip6_hdr *ip6;
1019 1.7 itojun struct mbuf *m;
1020 1.7 itojun int off;
1021 1.7 itojun struct ip6ctlparam *ip6cp = NULL;
1022 1.7 itojun int nxt;
1023 1.56 knakahar int s;
1024 1.7 itojun struct encaptab *ep;
1025 1.51 knakahar const struct encapsw *esw;
1026 1.7 itojun
1027 1.7 itojun if (sa->sa_family != AF_INET6 ||
1028 1.7 itojun sa->sa_len != sizeof(struct sockaddr_in6))
1029 1.32 ad return NULL;
1030 1.7 itojun
1031 1.7 itojun if ((unsigned)cmd >= PRC_NCMDS)
1032 1.32 ad return NULL;
1033 1.7 itojun if (cmd == PRC_HOSTDEAD)
1034 1.7 itojun d = NULL;
1035 1.7 itojun else if (cmd == PRC_MSGSIZE)
1036 1.7 itojun ; /* special code is present, see below */
1037 1.7 itojun else if (inet6ctlerrmap[cmd] == 0)
1038 1.32 ad return NULL;
1039 1.7 itojun
1040 1.7 itojun /* if the parameter is from icmp6, decode it. */
1041 1.7 itojun if (d != NULL) {
1042 1.7 itojun ip6cp = (struct ip6ctlparam *)d;
1043 1.7 itojun m = ip6cp->ip6c_m;
1044 1.7 itojun ip6 = ip6cp->ip6c_ip6;
1045 1.7 itojun off = ip6cp->ip6c_off;
1046 1.7 itojun nxt = ip6cp->ip6c_nxt;
1047 1.15 mycroft
1048 1.15 mycroft if (ip6 && cmd == PRC_MSGSIZE) {
1049 1.15 mycroft int valid = 0;
1050 1.15 mycroft struct encaptab *match;
1051 1.56 knakahar struct psref elem_psref;
1052 1.15 mycroft
1053 1.15 mycroft /*
1054 1.15 mycroft * Check to see if we have a valid encap configuration.
1055 1.15 mycroft */
1056 1.56 knakahar match = encap6_lookup(m, off, nxt, OUTBOUND,
1057 1.56 knakahar &elem_psref);
1058 1.72 knakahar if (match) {
1059 1.15 mycroft valid++;
1060 1.72 knakahar psref_release(&elem_psref, &match->psref,
1061 1.72 knakahar encaptab.elem_class);
1062 1.72 knakahar }
1063 1.15 mycroft
1064 1.15 mycroft /*
1065 1.15 mycroft * Depending on the value of "valid" and routing table
1066 1.15 mycroft * size (mtudisc_{hi,lo}wat), we will:
1067 1.15 mycroft * - recalcurate the new MTU and create the
1068 1.15 mycroft * corresponding routing entry, or
1069 1.15 mycroft * - ignore the MTU change notification.
1070 1.15 mycroft */
1071 1.15 mycroft icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);
1072 1.15 mycroft }
1073 1.7 itojun } else {
1074 1.7 itojun m = NULL;
1075 1.7 itojun ip6 = NULL;
1076 1.7 itojun nxt = -1;
1077 1.7 itojun }
1078 1.7 itojun
1079 1.7 itojun /* inform all listeners */
1080 1.56 knakahar
1081 1.56 knakahar s = pserialize_read_enter();
1082 1.56 knakahar PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
1083 1.56 knakahar struct psref elem_psref;
1084 1.56 knakahar
1085 1.7 itojun if (ep->af != AF_INET6)
1086 1.7 itojun continue;
1087 1.7 itojun if (ep->proto >= 0 && ep->proto != nxt)
1088 1.7 itojun continue;
1089 1.7 itojun
1090 1.7 itojun /* should optimize by looking at address pairs */
1091 1.7 itojun
1092 1.7 itojun /* XXX need to pass ep->arg or ep itself to listeners */
1093 1.56 knakahar psref_acquire(&elem_psref, &ep->psref,
1094 1.56 knakahar encaptab.elem_class);
1095 1.51 knakahar esw = ep->esw;
1096 1.51 knakahar if (esw && esw->encapsw6.pr_ctlinput) {
1097 1.56 knakahar pserialize_read_exit(s);
1098 1.56 knakahar /* pr_ctlinput is sleepable. e.g. rtcache_free */
1099 1.52 knakahar (*esw->encapsw6.pr_ctlinput)(cmd, sa, d, ep->arg);
1100 1.56 knakahar s = pserialize_read_enter();
1101 1.51 knakahar }
1102 1.56 knakahar psref_release(&elem_psref, &ep->psref,
1103 1.56 knakahar encaptab.elem_class);
1104 1.7 itojun }
1105 1.56 knakahar pserialize_read_exit(s);
1106 1.7 itojun
1107 1.7 itojun rip6_ctlinput(cmd, sa, d0);
1108 1.32 ad return NULL;
1109 1.7 itojun }
1110 1.7 itojun #endif
1111 1.7 itojun
1112 1.76 knakahar static int
1113 1.76 knakahar encap_detach_addr(const struct encaptab *ep)
1114 1.76 knakahar {
1115 1.76 knakahar thmap_t *emap;
1116 1.76 knakahar struct encaptab *retep;
1117 1.76 knakahar struct encaptab *target;
1118 1.76 knakahar void *thgc;
1119 1.76 knakahar struct encap_key key;
1120 1.76 knakahar
1121 1.76 knakahar KASSERT(encap_lock_held());
1122 1.76 knakahar KASSERT(ep->flag & IP_ENCAP_ADDR_ENABLE);
1123 1.76 knakahar
1124 1.76 knakahar switch (ep->af) {
1125 1.76 knakahar case AF_INET:
1126 1.76 knakahar emap = encap_map[0];
1127 1.76 knakahar break;
1128 1.76 knakahar #ifdef INET6
1129 1.76 knakahar case AF_INET6:
1130 1.76 knakahar emap = encap_map[1];
1131 1.76 knakahar break;
1132 1.76 knakahar #endif
1133 1.76 knakahar default:
1134 1.76 knakahar return EINVAL;
1135 1.76 knakahar }
1136 1.76 knakahar
1137 1.76 knakahar retep = thmap_del(emap, &ep->key, sizeof(ep->key));
1138 1.76 knakahar if (retep != ep) {
1139 1.76 knakahar return ENOENT;
1140 1.76 knakahar }
1141 1.76 knakahar target = retep;
1142 1.76 knakahar
1143 1.76 knakahar /*
1144 1.76 knakahar * To keep continuity, decrement seq after detached encaptab.
1145 1.76 knakahar */
1146 1.76 knakahar encap_key_copy(&key, &ep->key);
1147 1.76 knakahar encap_key_inc(&key);
1148 1.76 knakahar while ((retep = thmap_del(emap, &key, sizeof(key))) != NULL) {
1149 1.76 knakahar void *pp;
1150 1.76 knakahar
1151 1.76 knakahar encap_key_dec(&retep->key);
1152 1.76 knakahar pp = thmap_put(emap, &retep->key, sizeof(retep->key), retep);
1153 1.76 knakahar KASSERT(retep == pp);
1154 1.76 knakahar
1155 1.76 knakahar encap_key_inc(&key);
1156 1.76 knakahar }
1157 1.76 knakahar
1158 1.76 knakahar thgc = thmap_stage_gc(emap);
1159 1.76 knakahar pserialize_perform(encaptab.psz);
1160 1.76 knakahar thmap_gc(emap, thgc);
1161 1.76 knakahar psref_target_destroy(&target->psref, encaptab.elem_class);
1162 1.76 knakahar kmem_free(target->addrpack, target->addrpack->sa_len);
1163 1.76 knakahar kmem_free(target, sizeof(*target));
1164 1.76 knakahar
1165 1.76 knakahar return 0;
1166 1.76 knakahar }
1167 1.76 knakahar
1168 1.1 itojun int
1169 1.23 perry encap_detach(const struct encaptab *cookie)
1170 1.1 itojun {
1171 1.1 itojun const struct encaptab *ep = cookie;
1172 1.56 knakahar struct encaptab *p;
1173 1.7 itojun int error;
1174 1.1 itojun
1175 1.56 knakahar KASSERT(encap_lock_held());
1176 1.56 knakahar
1177 1.76 knakahar if (ep->flag & IP_ENCAP_ADDR_ENABLE)
1178 1.76 knakahar return encap_detach_addr(ep);
1179 1.76 knakahar
1180 1.56 knakahar PSLIST_WRITER_FOREACH(p, &encap_table, struct encaptab, chain) {
1181 1.1 itojun if (p == ep) {
1182 1.7 itojun error = encap_remove(p);
1183 1.7 itojun if (error)
1184 1.7 itojun return error;
1185 1.56 knakahar else
1186 1.56 knakahar break;
1187 1.56 knakahar }
1188 1.56 knakahar }
1189 1.56 knakahar if (p == NULL)
1190 1.56 knakahar return ENOENT;
1191 1.56 knakahar
1192 1.56 knakahar pserialize_perform(encaptab.psz);
1193 1.56 knakahar psref_target_destroy(&p->psref,
1194 1.56 knakahar encaptab.elem_class);
1195 1.56 knakahar if (!ep->func) {
1196 1.56 knakahar kmem_free(p->addrpack, ep->addrpack->sa_len);
1197 1.56 knakahar kmem_free(p->maskpack, ep->maskpack->sa_len);
1198 1.1 itojun }
1199 1.56 knakahar kmem_free(p, sizeof(*p));
1200 1.1 itojun
1201 1.56 knakahar return 0;
1202 1.7 itojun }
1203 1.7 itojun
1204 1.55 knakahar #ifdef USE_RADIX
1205 1.7 itojun static struct radix_node_head *
1206 1.23 perry encap_rnh(int af)
1207 1.7 itojun {
1208 1.7 itojun
1209 1.7 itojun switch (af) {
1210 1.7 itojun case AF_INET:
1211 1.7 itojun return encap_head[0];
1212 1.7 itojun #ifdef INET6
1213 1.7 itojun case AF_INET6:
1214 1.7 itojun return encap_head[1];
1215 1.7 itojun #endif
1216 1.7 itojun default:
1217 1.7 itojun return NULL;
1218 1.7 itojun }
1219 1.7 itojun }
1220 1.7 itojun
1221 1.7 itojun static int
1222 1.23 perry mask_matchlen(const struct sockaddr *sa)
1223 1.7 itojun {
1224 1.7 itojun const char *p, *ep;
1225 1.7 itojun int l;
1226 1.7 itojun
1227 1.7 itojun p = (const char *)sa;
1228 1.7 itojun ep = p + sa->sa_len;
1229 1.7 itojun p += 2; /* sa_len + sa_family */
1230 1.7 itojun
1231 1.7 itojun l = 0;
1232 1.7 itojun while (p < ep) {
1233 1.7 itojun l += (*p ? 8 : 0); /* estimate */
1234 1.7 itojun p++;
1235 1.7 itojun }
1236 1.7 itojun return l;
1237 1.1 itojun }
1238 1.55 knakahar #endif
1239 1.55 knakahar
1240 1.55 knakahar #ifndef USE_RADIX
1241 1.55 knakahar static int
1242 1.55 knakahar mask_match(const struct encaptab *ep,
1243 1.55 knakahar const struct sockaddr *sp,
1244 1.55 knakahar const struct sockaddr *dp)
1245 1.55 knakahar {
1246 1.55 knakahar struct sockaddr_storage s;
1247 1.55 knakahar struct sockaddr_storage d;
1248 1.55 knakahar int i;
1249 1.55 knakahar const u_int8_t *p, *q;
1250 1.55 knakahar u_int8_t *r;
1251 1.55 knakahar int matchlen;
1252 1.55 knakahar
1253 1.55 knakahar KASSERTMSG(ep->func == NULL, "wrong encaptab passed to mask_match");
1254 1.55 knakahar
1255 1.55 knakahar if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d))
1256 1.55 knakahar return 0;
1257 1.55 knakahar if (sp->sa_family != ep->af || dp->sa_family != ep->af)
1258 1.55 knakahar return 0;
1259 1.55 knakahar if (sp->sa_len != ep->src->sa_len || dp->sa_len != ep->dst->sa_len)
1260 1.55 knakahar return 0;
1261 1.55 knakahar
1262 1.55 knakahar matchlen = 0;
1263 1.55 knakahar
1264 1.55 knakahar p = (const u_int8_t *)sp;
1265 1.55 knakahar q = (const u_int8_t *)ep->srcmask;
1266 1.55 knakahar r = (u_int8_t *)&s;
1267 1.55 knakahar for (i = 0 ; i < sp->sa_len; i++) {
1268 1.55 knakahar r[i] = p[i] & q[i];
1269 1.55 knakahar /* XXX estimate */
1270 1.55 knakahar matchlen += (q[i] ? 8 : 0);
1271 1.55 knakahar }
1272 1.55 knakahar
1273 1.55 knakahar p = (const u_int8_t *)dp;
1274 1.55 knakahar q = (const u_int8_t *)ep->dstmask;
1275 1.55 knakahar r = (u_int8_t *)&d;
1276 1.55 knakahar for (i = 0 ; i < dp->sa_len; i++) {
1277 1.55 knakahar r[i] = p[i] & q[i];
1278 1.55 knakahar /* XXX rough estimate */
1279 1.55 knakahar matchlen += (q[i] ? 8 : 0);
1280 1.55 knakahar }
1281 1.55 knakahar
1282 1.55 knakahar /* need to overwrite len/family portion as we don't compare them */
1283 1.55 knakahar s.ss_len = sp->sa_len;
1284 1.55 knakahar s.ss_family = sp->sa_family;
1285 1.55 knakahar d.ss_len = dp->sa_len;
1286 1.55 knakahar d.ss_family = dp->sa_family;
1287 1.55 knakahar
1288 1.55 knakahar if (memcmp(&s, ep->src, ep->src->sa_len) == 0 &&
1289 1.55 knakahar memcmp(&d, ep->dst, ep->dst->sa_len) == 0) {
1290 1.55 knakahar return matchlen;
1291 1.55 knakahar } else
1292 1.55 knakahar return 0;
1293 1.55 knakahar }
1294 1.55 knakahar #endif
1295 1.1 itojun
1296 1.59 knakahar int
1297 1.54 knakahar encap_lock_enter(void)
1298 1.54 knakahar {
1299 1.59 knakahar int error;
1300 1.59 knakahar
1301 1.59 knakahar mutex_enter(&encap_whole.lock);
1302 1.59 knakahar while (encap_whole.busy != NULL) {
1303 1.59 knakahar error = cv_wait_sig(&encap_whole.cv, &encap_whole.lock);
1304 1.59 knakahar if (error) {
1305 1.59 knakahar mutex_exit(&encap_whole.lock);
1306 1.59 knakahar return error;
1307 1.59 knakahar }
1308 1.59 knakahar }
1309 1.59 knakahar KASSERT(encap_whole.busy == NULL);
1310 1.59 knakahar encap_whole.busy = curlwp;
1311 1.59 knakahar mutex_exit(&encap_whole.lock);
1312 1.54 knakahar
1313 1.59 knakahar return 0;
1314 1.54 knakahar }
1315 1.54 knakahar
1316 1.54 knakahar void
1317 1.54 knakahar encap_lock_exit(void)
1318 1.54 knakahar {
1319 1.54 knakahar
1320 1.59 knakahar mutex_enter(&encap_whole.lock);
1321 1.59 knakahar KASSERT(encap_whole.busy == curlwp);
1322 1.59 knakahar encap_whole.busy = NULL;
1323 1.59 knakahar cv_broadcast(&encap_whole.cv);
1324 1.59 knakahar mutex_exit(&encap_whole.lock);
1325 1.54 knakahar }
1326 1.56 knakahar
1327 1.56 knakahar bool
1328 1.56 knakahar encap_lock_held(void)
1329 1.56 knakahar {
1330 1.56 knakahar
1331 1.59 knakahar return (encap_whole.busy == curlwp);
1332 1.56 knakahar }
1333