ip_encap.c revision 1.77 1 1.77 knakahar /* $NetBSD: ip_encap.c,v 1.77 2022/12/07 08:33:02 knakahara Exp $ */
2 1.7 itojun /* $KAME: ip_encap.c,v 1.73 2001/10/02 08:30:58 itojun Exp $ */
3 1.1 itojun
4 1.1 itojun /*
5 1.1 itojun * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 1.1 itojun * All rights reserved.
7 1.1 itojun *
8 1.1 itojun * Redistribution and use in source and binary forms, with or without
9 1.1 itojun * modification, are permitted provided that the following conditions
10 1.1 itojun * are met:
11 1.1 itojun * 1. Redistributions of source code must retain the above copyright
12 1.1 itojun * notice, this list of conditions and the following disclaimer.
13 1.1 itojun * 2. Redistributions in binary form must reproduce the above copyright
14 1.1 itojun * notice, this list of conditions and the following disclaimer in the
15 1.1 itojun * documentation and/or other materials provided with the distribution.
16 1.1 itojun * 3. Neither the name of the project nor the names of its contributors
17 1.1 itojun * may be used to endorse or promote products derived from this software
18 1.1 itojun * without specific prior written permission.
19 1.1 itojun *
20 1.1 itojun * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 1.1 itojun * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 1.1 itojun * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 1.1 itojun * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 1.1 itojun * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 1.1 itojun * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 1.1 itojun * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 1.1 itojun * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 1.1 itojun * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 1.1 itojun * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 1.1 itojun * SUCH DAMAGE.
31 1.1 itojun */
32 1.1 itojun /*
33 1.1 itojun * My grandfather said that there's a devil inside tunnelling technology...
34 1.1 itojun *
35 1.1 itojun * We have surprisingly many protocols that want packets with IP protocol
36 1.1 itojun * #4 or #41. Here's a list of protocols that want protocol #41:
37 1.1 itojun * RFC1933 configured tunnel
38 1.1 itojun * RFC1933 automatic tunnel
39 1.1 itojun * RFC2401 IPsec tunnel
40 1.1 itojun * RFC2473 IPv6 generic packet tunnelling
41 1.1 itojun * RFC2529 6over4 tunnel
42 1.7 itojun * RFC3056 6to4 tunnel
43 1.7 itojun * isatap tunnel
44 1.1 itojun * mobile-ip6 (uses RFC2473)
45 1.1 itojun * Here's a list of protocol that want protocol #4:
46 1.1 itojun * RFC1853 IPv4-in-IPv4 tunnelling
47 1.1 itojun * RFC2003 IPv4 encapsulation within IPv4
48 1.1 itojun * RFC2344 reverse tunnelling for mobile-ip4
49 1.1 itojun * RFC2401 IPsec tunnel
50 1.1 itojun * Well, what can I say. They impose different en/decapsulation mechanism
51 1.1 itojun * from each other, so they need separate protocol handler. The only one
52 1.1 itojun * we can easily determine by protocol # is IPsec, which always has
53 1.1 itojun * AH/ESP/IPComp header right after outer IP header.
54 1.1 itojun *
55 1.1 itojun * So, clearly good old protosw does not work for protocol #4 and #41.
56 1.1 itojun * The code will let you match protocol via src/dst address pair.
57 1.1 itojun */
58 1.1 itojun /* XXX is M_NETADDR correct? */
59 1.6 lukem
60 1.6 lukem #include <sys/cdefs.h>
61 1.77 knakahar __KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.77 2022/12/07 08:33:02 knakahara Exp $");
62 1.1 itojun
63 1.46 pooka #ifdef _KERNEL_OPT
64 1.4 itojun #include "opt_mrouting.h"
65 1.4 itojun #include "opt_inet.h"
66 1.61 knakahar #include "opt_net_mpsafe.h"
67 1.46 pooka #endif
68 1.1 itojun
69 1.1 itojun #include <sys/param.h>
70 1.1 itojun #include <sys/systm.h>
71 1.1 itojun #include <sys/socket.h>
72 1.71 knakahar #include <sys/socketvar.h> /* for softnet_lock */
73 1.1 itojun #include <sys/sockio.h>
74 1.1 itojun #include <sys/mbuf.h>
75 1.1 itojun #include <sys/errno.h>
76 1.4 itojun #include <sys/queue.h>
77 1.47 knakahar #include <sys/kmem.h>
78 1.56 knakahar #include <sys/mutex.h>
79 1.59 knakahar #include <sys/condvar.h>
80 1.56 knakahar #include <sys/psref.h>
81 1.56 knakahar #include <sys/pslist.h>
82 1.76 knakahar #include <sys/thmap.h>
83 1.1 itojun
84 1.1 itojun #include <net/if.h>
85 1.1 itojun
86 1.1 itojun #include <netinet/in.h>
87 1.1 itojun #include <netinet/in_systm.h>
88 1.1 itojun #include <netinet/ip.h>
89 1.1 itojun #include <netinet/ip_var.h>
90 1.1 itojun #include <netinet/ip_encap.h>
91 1.1 itojun #ifdef MROUTING
92 1.1 itojun #include <netinet/ip_mroute.h>
93 1.1 itojun #endif /* MROUTING */
94 1.1 itojun
95 1.1 itojun #ifdef INET6
96 1.1 itojun #include <netinet/ip6.h>
97 1.1 itojun #include <netinet6/ip6_var.h>
98 1.51 knakahar #include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */
99 1.7 itojun #include <netinet6/in6_var.h>
100 1.7 itojun #include <netinet6/in6_pcb.h>
101 1.7 itojun #include <netinet/icmp6.h>
102 1.1 itojun #endif
103 1.1 itojun
104 1.61 knakahar #ifdef NET_MPSAFE
105 1.61 knakahar #define ENCAP_MPSAFE 1
106 1.61 knakahar #endif
107 1.61 knakahar
108 1.7 itojun enum direction { INBOUND, OUTBOUND };
109 1.7 itojun
110 1.7 itojun #ifdef INET
111 1.56 knakahar static struct encaptab *encap4_lookup(struct mbuf *, int, int, enum direction,
112 1.56 knakahar struct psref *);
113 1.7 itojun #endif
114 1.7 itojun #ifdef INET6
115 1.56 knakahar static struct encaptab *encap6_lookup(struct mbuf *, int, int, enum direction,
116 1.56 knakahar struct psref *);
117 1.7 itojun #endif
118 1.22 perry static int encap_add(struct encaptab *);
119 1.22 perry static int encap_remove(struct encaptab *);
120 1.73 riastrad static void encap_afcheck(int, const struct sockaddr *, const struct sockaddr *);
121 1.76 knakahar static void encap_key_init(struct encap_key *, const struct sockaddr *,
122 1.76 knakahar const struct sockaddr *);
123 1.76 knakahar static void encap_key_inc(struct encap_key *);
124 1.1 itojun
125 1.56 knakahar /*
126 1.56 knakahar * In encap[46]_lookup(), ep->func can sleep(e.g. rtalloc1) while walking
127 1.56 knakahar * encap_table. So, it cannot use pserialize_read_enter()
128 1.56 knakahar */
129 1.56 knakahar static struct {
130 1.56 knakahar struct pslist_head list;
131 1.56 knakahar pserialize_t psz;
132 1.56 knakahar struct psref_class *elem_class; /* for the element of et_list */
133 1.56 knakahar } encaptab __cacheline_aligned = {
134 1.56 knakahar .list = PSLIST_INITIALIZER,
135 1.56 knakahar };
136 1.56 knakahar #define encap_table encaptab.list
137 1.1 itojun
138 1.59 knakahar static struct {
139 1.59 knakahar kmutex_t lock;
140 1.59 knakahar kcondvar_t cv;
141 1.59 knakahar struct lwp *busy;
142 1.59 knakahar } encap_whole __cacheline_aligned;
143 1.59 knakahar
144 1.76 knakahar static thmap_t *encap_map[2]; /* 0 for AF_INET, 1 for AF_INET6 */
145 1.76 knakahar
146 1.63 ozaki static bool encap_initialized = false;
147 1.59 knakahar /*
148 1.59 knakahar * must be done before other encap interfaces initialization.
149 1.59 knakahar */
150 1.59 knakahar void
151 1.59 knakahar encapinit(void)
152 1.59 knakahar {
153 1.59 knakahar
154 1.63 ozaki if (encap_initialized)
155 1.63 ozaki return;
156 1.63 ozaki
157 1.60 knakahar encaptab.psz = pserialize_create();
158 1.60 knakahar encaptab.elem_class = psref_class_create("encapelem", IPL_SOFTNET);
159 1.60 knakahar
160 1.59 knakahar mutex_init(&encap_whole.lock, MUTEX_DEFAULT, IPL_NONE);
161 1.59 knakahar cv_init(&encap_whole.cv, "ip_encap cv");
162 1.59 knakahar encap_whole.busy = NULL;
163 1.63 ozaki
164 1.63 ozaki encap_initialized = true;
165 1.59 knakahar }
166 1.59 knakahar
167 1.1 itojun void
168 1.23 perry encap_init(void)
169 1.1 itojun {
170 1.7 itojun static int initialized = 0;
171 1.7 itojun
172 1.7 itojun if (initialized)
173 1.7 itojun return;
174 1.7 itojun initialized++;
175 1.1 itojun #if 0
176 1.1 itojun /*
177 1.1 itojun * we cannot use LIST_INIT() here, since drivers may want to call
178 1.4 itojun * encap_attach(), on driver attach. encap_init() will be called
179 1.1 itojun * on AF_INET{,6} initialization, which happens after driver
180 1.1 itojun * initialization - using LIST_INIT() here can nuke encap_attach()
181 1.1 itojun * from drivers.
182 1.1 itojun */
183 1.56 knakahar PSLIST_INIT(&encap_table);
184 1.1 itojun #endif
185 1.7 itojun
186 1.76 knakahar encap_map[0] = thmap_create(0, NULL, THMAP_NOCOPY);
187 1.76 knakahar #ifdef INET6
188 1.76 knakahar encap_map[1] = thmap_create(0, NULL, THMAP_NOCOPY);
189 1.76 knakahar #endif
190 1.1 itojun }
191 1.1 itojun
192 1.4 itojun #ifdef INET
193 1.7 itojun static struct encaptab *
194 1.56 knakahar encap4_lookup(struct mbuf *m, int off, int proto, enum direction dir,
195 1.56 knakahar struct psref *match_psref)
196 1.1 itojun {
197 1.1 itojun struct ip *ip;
198 1.33 pooka struct ip_pack4 pack;
199 1.1 itojun struct encaptab *ep, *match;
200 1.1 itojun int prio, matchprio;
201 1.56 knakahar int s;
202 1.76 knakahar thmap_t *emap = encap_map[0];
203 1.76 knakahar struct encap_key key;
204 1.1 itojun
205 1.41 ozaki KASSERT(m->m_len >= sizeof(*ip));
206 1.41 ozaki
207 1.1 itojun ip = mtod(m, struct ip *);
208 1.1 itojun
209 1.35 cegger memset(&pack, 0, sizeof(pack));
210 1.7 itojun pack.p.sp_len = sizeof(pack);
211 1.7 itojun pack.mine.sin_family = pack.yours.sin_family = AF_INET;
212 1.7 itojun pack.mine.sin_len = pack.yours.sin_len = sizeof(struct sockaddr_in);
213 1.7 itojun if (dir == INBOUND) {
214 1.7 itojun pack.mine.sin_addr = ip->ip_dst;
215 1.7 itojun pack.yours.sin_addr = ip->ip_src;
216 1.7 itojun } else {
217 1.7 itojun pack.mine.sin_addr = ip->ip_src;
218 1.7 itojun pack.yours.sin_addr = ip->ip_dst;
219 1.7 itojun }
220 1.1 itojun
221 1.1 itojun match = NULL;
222 1.1 itojun matchprio = 0;
223 1.7 itojun
224 1.56 knakahar s = pserialize_read_enter();
225 1.76 knakahar
226 1.76 knakahar encap_key_init(&key, sintosa(&pack.mine), sintosa(&pack.yours));
227 1.76 knakahar while ((ep = thmap_get(emap, &key, sizeof(key))) != NULL) {
228 1.76 knakahar struct psref elem_psref;
229 1.76 knakahar
230 1.76 knakahar KASSERT(ep->af == AF_INET);
231 1.76 knakahar
232 1.76 knakahar if (ep->proto >= 0 && ep->proto != proto) {
233 1.76 knakahar encap_key_inc(&key);
234 1.76 knakahar continue;
235 1.76 knakahar }
236 1.76 knakahar
237 1.76 knakahar psref_acquire(&elem_psref, &ep->psref,
238 1.76 knakahar encaptab.elem_class);
239 1.76 knakahar if (ep->func) {
240 1.76 knakahar pserialize_read_exit(s);
241 1.76 knakahar prio = (*ep->func)(m, off, proto, ep->arg);
242 1.76 knakahar s = pserialize_read_enter();
243 1.76 knakahar } else {
244 1.76 knakahar prio = pack.mine.sin_len + pack.yours.sin_len;
245 1.76 knakahar }
246 1.76 knakahar
247 1.76 knakahar if (prio <= 0) {
248 1.76 knakahar psref_release(&elem_psref, &ep->psref,
249 1.76 knakahar encaptab.elem_class);
250 1.76 knakahar encap_key_inc(&key);
251 1.76 knakahar continue;
252 1.76 knakahar }
253 1.76 knakahar if (prio > matchprio) {
254 1.76 knakahar /* release last matched ep */
255 1.76 knakahar if (match != NULL)
256 1.76 knakahar psref_release(match_psref, &match->psref,
257 1.76 knakahar encaptab.elem_class);
258 1.76 knakahar
259 1.76 knakahar psref_copy(match_psref, &elem_psref,
260 1.76 knakahar encaptab.elem_class);
261 1.76 knakahar matchprio = prio;
262 1.76 knakahar match = ep;
263 1.76 knakahar }
264 1.76 knakahar
265 1.76 knakahar psref_release(&elem_psref, &ep->psref,
266 1.76 knakahar encaptab.elem_class);
267 1.76 knakahar encap_key_inc(&key);
268 1.76 knakahar }
269 1.76 knakahar
270 1.56 knakahar PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
271 1.56 knakahar struct psref elem_psref;
272 1.56 knakahar
273 1.1 itojun if (ep->af != AF_INET)
274 1.1 itojun continue;
275 1.1 itojun if (ep->proto >= 0 && ep->proto != proto)
276 1.1 itojun continue;
277 1.56 knakahar
278 1.56 knakahar psref_acquire(&elem_psref, &ep->psref,
279 1.56 knakahar encaptab.elem_class);
280 1.77 knakahar pserialize_read_exit(s);
281 1.77 knakahar /* ep->func is sleepable. e.g. rtalloc1 */
282 1.77 knakahar prio = (*ep->func)(m, off, proto, ep->arg);
283 1.77 knakahar s = pserialize_read_enter();
284 1.1 itojun
285 1.1 itojun /*
286 1.1 itojun * We prioritize the matches by using bit length of the
287 1.77 knakahar * matches. user-supplied matching function
288 1.1 itojun * should return the bit length of the matches (for example,
289 1.1 itojun * if both src/dst are matched for IPv4, 64 should be returned).
290 1.1 itojun * 0 or negative return value means "it did not match".
291 1.1 itojun *
292 1.1 itojun * We need to loop through all the possible candidates
293 1.1 itojun * to get the best match - the search takes O(n) for
294 1.1 itojun * n attachments (i.e. interfaces).
295 1.1 itojun */
296 1.56 knakahar if (prio <= 0) {
297 1.56 knakahar psref_release(&elem_psref, &ep->psref,
298 1.56 knakahar encaptab.elem_class);
299 1.1 itojun continue;
300 1.56 knakahar }
301 1.1 itojun if (prio > matchprio) {
302 1.56 knakahar /* release last matched ep */
303 1.56 knakahar if (match != NULL)
304 1.56 knakahar psref_release(match_psref, &match->psref,
305 1.56 knakahar encaptab.elem_class);
306 1.56 knakahar
307 1.56 knakahar psref_copy(match_psref, &elem_psref,
308 1.56 knakahar encaptab.elem_class);
309 1.1 itojun matchprio = prio;
310 1.1 itojun match = ep;
311 1.1 itojun }
312 1.56 knakahar KASSERTMSG((match == NULL) || psref_held(&match->psref,
313 1.56 knakahar encaptab.elem_class),
314 1.56 knakahar "current match = %p, but not hold its psref", match);
315 1.56 knakahar
316 1.56 knakahar psref_release(&elem_psref, &ep->psref,
317 1.56 knakahar encaptab.elem_class);
318 1.1 itojun }
319 1.56 knakahar pserialize_read_exit(s);
320 1.1 itojun
321 1.7 itojun return match;
322 1.7 itojun }
323 1.7 itojun
324 1.7 itojun void
325 1.70 maxv encap4_input(struct mbuf *m, int off, int proto)
326 1.7 itojun {
327 1.51 knakahar const struct encapsw *esw;
328 1.7 itojun struct encaptab *match;
329 1.56 knakahar struct psref match_psref;
330 1.7 itojun
331 1.56 knakahar match = encap4_lookup(m, off, proto, INBOUND, &match_psref);
332 1.1 itojun if (match) {
333 1.1 itojun /* found a match, "match" has the best one */
334 1.51 knakahar esw = match->esw;
335 1.51 knakahar if (esw && esw->encapsw4.pr_input) {
336 1.66 knakahar (*esw->encapsw4.pr_input)(m, off, proto, match->arg);
337 1.56 knakahar psref_release(&match_psref, &match->psref,
338 1.56 knakahar encaptab.elem_class);
339 1.54 knakahar } else {
340 1.56 knakahar psref_release(&match_psref, &match->psref,
341 1.56 knakahar encaptab.elem_class);
342 1.1 itojun m_freem(m);
343 1.54 knakahar }
344 1.1 itojun return;
345 1.1 itojun }
346 1.1 itojun
347 1.1 itojun /* last resort: inject to raw socket */
348 1.69 knakahar SOFTNET_LOCK_IF_NET_MPSAFE();
349 1.1 itojun rip_input(m, off, proto);
350 1.69 knakahar SOFTNET_UNLOCK_IF_NET_MPSAFE();
351 1.1 itojun }
352 1.1 itojun #endif
353 1.1 itojun
354 1.1 itojun #ifdef INET6
355 1.7 itojun static struct encaptab *
356 1.56 knakahar encap6_lookup(struct mbuf *m, int off, int proto, enum direction dir,
357 1.56 knakahar struct psref *match_psref)
358 1.1 itojun {
359 1.1 itojun struct ip6_hdr *ip6;
360 1.33 pooka struct ip_pack6 pack;
361 1.7 itojun int prio, matchprio;
362 1.56 knakahar int s;
363 1.1 itojun struct encaptab *ep, *match;
364 1.76 knakahar thmap_t *emap = encap_map[1];
365 1.76 knakahar struct encap_key key;
366 1.1 itojun
367 1.41 ozaki KASSERT(m->m_len >= sizeof(*ip6));
368 1.41 ozaki
369 1.1 itojun ip6 = mtod(m, struct ip6_hdr *);
370 1.1 itojun
371 1.35 cegger memset(&pack, 0, sizeof(pack));
372 1.7 itojun pack.p.sp_len = sizeof(pack);
373 1.7 itojun pack.mine.sin6_family = pack.yours.sin6_family = AF_INET6;
374 1.7 itojun pack.mine.sin6_len = pack.yours.sin6_len = sizeof(struct sockaddr_in6);
375 1.7 itojun if (dir == INBOUND) {
376 1.7 itojun pack.mine.sin6_addr = ip6->ip6_dst;
377 1.7 itojun pack.yours.sin6_addr = ip6->ip6_src;
378 1.7 itojun } else {
379 1.7 itojun pack.mine.sin6_addr = ip6->ip6_src;
380 1.7 itojun pack.yours.sin6_addr = ip6->ip6_dst;
381 1.7 itojun }
382 1.1 itojun
383 1.1 itojun match = NULL;
384 1.1 itojun matchprio = 0;
385 1.7 itojun
386 1.56 knakahar s = pserialize_read_enter();
387 1.76 knakahar
388 1.76 knakahar encap_key_init(&key, sin6tosa(&pack.mine), sin6tosa(&pack.yours));
389 1.76 knakahar while ((ep = thmap_get(emap, &key, sizeof(key))) != NULL) {
390 1.76 knakahar struct psref elem_psref;
391 1.76 knakahar
392 1.76 knakahar KASSERT(ep->af == AF_INET6);
393 1.76 knakahar
394 1.76 knakahar if (ep->proto >= 0 && ep->proto != proto) {
395 1.76 knakahar encap_key_inc(&key);
396 1.76 knakahar continue;
397 1.76 knakahar }
398 1.76 knakahar
399 1.76 knakahar psref_acquire(&elem_psref, &ep->psref,
400 1.76 knakahar encaptab.elem_class);
401 1.76 knakahar if (ep->func) {
402 1.76 knakahar pserialize_read_exit(s);
403 1.76 knakahar prio = (*ep->func)(m, off, proto, ep->arg);
404 1.76 knakahar s = pserialize_read_enter();
405 1.76 knakahar } else {
406 1.76 knakahar prio = pack.mine.sin6_len + pack.yours.sin6_len;
407 1.76 knakahar }
408 1.76 knakahar
409 1.76 knakahar if (prio <= 0) {
410 1.76 knakahar psref_release(&elem_psref, &ep->psref,
411 1.76 knakahar encaptab.elem_class);
412 1.76 knakahar encap_key_inc(&key);
413 1.76 knakahar continue;
414 1.76 knakahar }
415 1.76 knakahar if (prio > matchprio) {
416 1.76 knakahar /* release last matched ep */
417 1.76 knakahar if (match != NULL)
418 1.76 knakahar psref_release(match_psref, &match->psref,
419 1.76 knakahar encaptab.elem_class);
420 1.76 knakahar
421 1.76 knakahar psref_copy(match_psref, &elem_psref,
422 1.76 knakahar encaptab.elem_class);
423 1.76 knakahar matchprio = prio;
424 1.76 knakahar match = ep;
425 1.76 knakahar }
426 1.76 knakahar psref_release(&elem_psref, &ep->psref,
427 1.76 knakahar encaptab.elem_class);
428 1.76 knakahar encap_key_inc(&key);
429 1.76 knakahar }
430 1.76 knakahar
431 1.56 knakahar PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
432 1.56 knakahar struct psref elem_psref;
433 1.56 knakahar
434 1.1 itojun if (ep->af != AF_INET6)
435 1.1 itojun continue;
436 1.1 itojun if (ep->proto >= 0 && ep->proto != proto)
437 1.1 itojun continue;
438 1.56 knakahar
439 1.56 knakahar psref_acquire(&elem_psref, &ep->psref,
440 1.56 knakahar encaptab.elem_class);
441 1.56 knakahar
442 1.77 knakahar pserialize_read_exit(s);
443 1.77 knakahar /* ep->func is sleepable. e.g. rtalloc1 */
444 1.77 knakahar prio = (*ep->func)(m, off, proto, ep->arg);
445 1.77 knakahar s = pserialize_read_enter();
446 1.1 itojun
447 1.7 itojun /* see encap4_lookup() for issues here */
448 1.56 knakahar if (prio <= 0) {
449 1.56 knakahar psref_release(&elem_psref, &ep->psref,
450 1.56 knakahar encaptab.elem_class);
451 1.1 itojun continue;
452 1.56 knakahar }
453 1.1 itojun if (prio > matchprio) {
454 1.56 knakahar /* release last matched ep */
455 1.56 knakahar if (match != NULL)
456 1.56 knakahar psref_release(match_psref, &match->psref,
457 1.56 knakahar encaptab.elem_class);
458 1.56 knakahar
459 1.56 knakahar psref_copy(match_psref, &elem_psref,
460 1.56 knakahar encaptab.elem_class);
461 1.1 itojun matchprio = prio;
462 1.1 itojun match = ep;
463 1.1 itojun }
464 1.56 knakahar KASSERTMSG((match == NULL) || psref_held(&match->psref,
465 1.56 knakahar encaptab.elem_class),
466 1.56 knakahar "current match = %p, but not hold its psref", match);
467 1.56 knakahar
468 1.56 knakahar psref_release(&elem_psref, &ep->psref,
469 1.56 knakahar encaptab.elem_class);
470 1.1 itojun }
471 1.56 knakahar pserialize_read_exit(s);
472 1.1 itojun
473 1.7 itojun return match;
474 1.7 itojun }
475 1.7 itojun
476 1.7 itojun int
477 1.23 perry encap6_input(struct mbuf **mp, int *offp, int proto)
478 1.7 itojun {
479 1.7 itojun struct mbuf *m = *mp;
480 1.51 knakahar const struct encapsw *esw;
481 1.7 itojun struct encaptab *match;
482 1.56 knakahar struct psref match_psref;
483 1.69 knakahar int rv;
484 1.7 itojun
485 1.56 knakahar match = encap6_lookup(m, *offp, proto, INBOUND, &match_psref);
486 1.7 itojun
487 1.1 itojun if (match) {
488 1.1 itojun /* found a match */
489 1.51 knakahar esw = match->esw;
490 1.51 knakahar if (esw && esw->encapsw6.pr_input) {
491 1.56 knakahar int ret;
492 1.66 knakahar ret = (*esw->encapsw6.pr_input)(mp, offp, proto,
493 1.66 knakahar match->arg);
494 1.56 knakahar psref_release(&match_psref, &match->psref,
495 1.56 knakahar encaptab.elem_class);
496 1.56 knakahar return ret;
497 1.1 itojun } else {
498 1.56 knakahar psref_release(&match_psref, &match->psref,
499 1.56 knakahar encaptab.elem_class);
500 1.1 itojun m_freem(m);
501 1.1 itojun return IPPROTO_DONE;
502 1.1 itojun }
503 1.1 itojun }
504 1.1 itojun
505 1.1 itojun /* last resort: inject to raw socket */
506 1.69 knakahar SOFTNET_LOCK_IF_NET_MPSAFE();
507 1.69 knakahar rv = rip6_input(mp, offp, proto);
508 1.69 knakahar SOFTNET_UNLOCK_IF_NET_MPSAFE();
509 1.69 knakahar return rv;
510 1.1 itojun }
511 1.1 itojun #endif
512 1.1 itojun
513 1.7 itojun static int
514 1.23 perry encap_add(struct encaptab *ep)
515 1.1 itojun {
516 1.1 itojun
517 1.56 knakahar KASSERT(encap_lock_held());
518 1.54 knakahar
519 1.56 knakahar PSLIST_WRITER_INSERT_HEAD(&encap_table, ep, chain);
520 1.7 itojun
521 1.56 knakahar return 0;
522 1.7 itojun }
523 1.7 itojun
524 1.7 itojun static int
525 1.23 perry encap_remove(struct encaptab *ep)
526 1.7 itojun {
527 1.7 itojun int error = 0;
528 1.7 itojun
529 1.56 knakahar KASSERT(encap_lock_held());
530 1.54 knakahar
531 1.56 knakahar PSLIST_WRITER_REMOVE(ep, chain);
532 1.56 knakahar
533 1.7 itojun return error;
534 1.7 itojun }
535 1.7 itojun
536 1.73 riastrad static void
537 1.23 perry encap_afcheck(int af, const struct sockaddr *sp, const struct sockaddr *dp)
538 1.7 itojun {
539 1.7 itojun
540 1.73 riastrad KASSERT(sp != NULL && dp != NULL);
541 1.73 riastrad KASSERT(sp->sa_len == dp->sa_len);
542 1.73 riastrad KASSERT(af == sp->sa_family && af == dp->sa_family);
543 1.7 itojun
544 1.74 riastrad socklen_t len __diagused = sockaddr_getsize_by_family(af);
545 1.73 riastrad KASSERT(len != 0 && len == sp->sa_len && len == dp->sa_len);
546 1.1 itojun }
547 1.1 itojun
548 1.1 itojun const struct encaptab *
549 1.23 perry encap_attach_func(int af, int proto,
550 1.75 knakahar encap_priofunc_t *func,
551 1.51 knakahar const struct encapsw *esw, void *arg)
552 1.1 itojun {
553 1.1 itojun struct encaptab *ep;
554 1.1 itojun int error;
555 1.61 knakahar #ifndef ENCAP_MPSAFE
556 1.1 itojun int s;
557 1.1 itojun
558 1.1 itojun s = splsoftnet();
559 1.61 knakahar #endif
560 1.73 riastrad
561 1.73 riastrad ASSERT_SLEEPABLE();
562 1.73 riastrad
563 1.1 itojun /* sanity check on args */
564 1.73 riastrad KASSERT(func != NULL);
565 1.73 riastrad KASSERT(af == AF_INET
566 1.73 riastrad #ifdef INET6
567 1.73 riastrad || af == AF_INET6
568 1.73 riastrad #endif
569 1.73 riastrad );
570 1.7 itojun
571 1.73 riastrad ep = kmem_alloc(sizeof(*ep), KM_SLEEP);
572 1.35 cegger memset(ep, 0, sizeof(*ep));
573 1.1 itojun
574 1.1 itojun ep->af = af;
575 1.1 itojun ep->proto = proto;
576 1.1 itojun ep->func = func;
577 1.51 knakahar ep->esw = esw;
578 1.1 itojun ep->arg = arg;
579 1.56 knakahar psref_target_init(&ep->psref, encaptab.elem_class);
580 1.1 itojun
581 1.7 itojun error = encap_add(ep);
582 1.7 itojun if (error)
583 1.67 maxv goto gc;
584 1.1 itojun
585 1.1 itojun error = 0;
586 1.61 knakahar #ifndef ENCAP_MPSAFE
587 1.1 itojun splx(s);
588 1.61 knakahar #endif
589 1.1 itojun return ep;
590 1.1 itojun
591 1.67 maxv gc:
592 1.67 maxv kmem_free(ep, sizeof(*ep));
593 1.61 knakahar #ifndef ENCAP_MPSAFE
594 1.1 itojun splx(s);
595 1.61 knakahar #endif
596 1.1 itojun return NULL;
597 1.1 itojun }
598 1.1 itojun
599 1.76 knakahar static void
600 1.76 knakahar encap_key_init(struct encap_key *key,
601 1.76 knakahar const struct sockaddr *local, const struct sockaddr *remote)
602 1.76 knakahar {
603 1.76 knakahar
604 1.76 knakahar memset(key, 0, sizeof(*key));
605 1.76 knakahar
606 1.76 knakahar sockaddr_copy(&key->local_sa, sizeof(key->local_u), local);
607 1.76 knakahar sockaddr_copy(&key->remote_sa, sizeof(key->remote_u), remote);
608 1.76 knakahar }
609 1.76 knakahar
610 1.76 knakahar static void
611 1.76 knakahar encap_key_inc(struct encap_key *key)
612 1.76 knakahar {
613 1.76 knakahar
614 1.76 knakahar (key->seq)++;
615 1.76 knakahar }
616 1.76 knakahar
617 1.76 knakahar static void
618 1.76 knakahar encap_key_dec(struct encap_key *key)
619 1.76 knakahar {
620 1.76 knakahar
621 1.76 knakahar (key->seq)--;
622 1.76 knakahar }
623 1.76 knakahar
624 1.76 knakahar static void
625 1.76 knakahar encap_key_copy(struct encap_key *dst, const struct encap_key *src)
626 1.76 knakahar {
627 1.76 knakahar
628 1.76 knakahar memset(dst, 0, sizeof(*dst));
629 1.76 knakahar *dst = *src;
630 1.76 knakahar }
631 1.76 knakahar
632 1.76 knakahar /*
633 1.76 knakahar * src is always my side, and dst is always remote side.
634 1.76 knakahar * Return value will be necessary as input (cookie) for encap_detach().
635 1.76 knakahar */
636 1.76 knakahar const struct encaptab *
637 1.76 knakahar encap_attach_addr(int af, int proto,
638 1.76 knakahar const struct sockaddr *src, const struct sockaddr *dst,
639 1.76 knakahar encap_priofunc_t *func,
640 1.76 knakahar const struct encapsw *esw, void *arg)
641 1.76 knakahar {
642 1.76 knakahar struct encaptab *ep;
643 1.76 knakahar size_t l;
644 1.76 knakahar thmap_t *emap;
645 1.76 knakahar void *retep;
646 1.76 knakahar struct ip_pack4 *pack4;
647 1.76 knakahar #ifdef INET6
648 1.76 knakahar struct ip_pack6 *pack6;
649 1.76 knakahar #endif
650 1.76 knakahar
651 1.76 knakahar ASSERT_SLEEPABLE();
652 1.76 knakahar
653 1.76 knakahar encap_afcheck(af, src, dst);
654 1.76 knakahar
655 1.76 knakahar switch (af) {
656 1.76 knakahar case AF_INET:
657 1.76 knakahar l = sizeof(*pack4);
658 1.76 knakahar emap = encap_map[0];
659 1.76 knakahar break;
660 1.76 knakahar #ifdef INET6
661 1.76 knakahar case AF_INET6:
662 1.76 knakahar l = sizeof(*pack6);
663 1.76 knakahar emap = encap_map[1];
664 1.76 knakahar break;
665 1.76 knakahar #endif
666 1.76 knakahar default:
667 1.76 knakahar return NULL;
668 1.76 knakahar }
669 1.76 knakahar
670 1.76 knakahar ep = kmem_zalloc(sizeof(*ep), KM_SLEEP);
671 1.76 knakahar ep->addrpack = kmem_zalloc(l, KM_SLEEP);
672 1.76 knakahar ep->addrpack->sa_len = l & 0xff;
673 1.76 knakahar ep->af = af;
674 1.76 knakahar ep->proto = proto;
675 1.76 knakahar ep->flag = IP_ENCAP_ADDR_ENABLE;
676 1.76 knakahar switch (af) {
677 1.76 knakahar case AF_INET:
678 1.76 knakahar pack4 = (struct ip_pack4 *)ep->addrpack;
679 1.76 knakahar ep->src = (struct sockaddr *)&pack4->mine;
680 1.76 knakahar ep->dst = (struct sockaddr *)&pack4->yours;
681 1.76 knakahar break;
682 1.76 knakahar #ifdef INET6
683 1.76 knakahar case AF_INET6:
684 1.76 knakahar pack6 = (struct ip_pack6 *)ep->addrpack;
685 1.76 knakahar ep->src = (struct sockaddr *)&pack6->mine;
686 1.76 knakahar ep->dst = (struct sockaddr *)&pack6->yours;
687 1.76 knakahar break;
688 1.76 knakahar #endif
689 1.76 knakahar }
690 1.76 knakahar memcpy(ep->src, src, src->sa_len);
691 1.76 knakahar memcpy(ep->dst, dst, dst->sa_len);
692 1.76 knakahar ep->esw = esw;
693 1.76 knakahar ep->arg = arg;
694 1.76 knakahar ep->func = func;
695 1.76 knakahar psref_target_init(&ep->psref, encaptab.elem_class);
696 1.76 knakahar
697 1.76 knakahar encap_key_init(&ep->key, src, dst);
698 1.76 knakahar while ((retep = thmap_put(emap, &ep->key, sizeof(ep->key), ep)) != ep)
699 1.76 knakahar encap_key_inc(&ep->key);
700 1.76 knakahar return ep;
701 1.76 knakahar }
702 1.76 knakahar
703 1.76 knakahar
704 1.7 itojun /* XXX encap4_ctlinput() is necessary if we set DF=1 on outer IPv4 header */
705 1.7 itojun
706 1.7 itojun #ifdef INET6
707 1.32 ad void *
708 1.29 dyoung encap6_ctlinput(int cmd, const struct sockaddr *sa, void *d0)
709 1.7 itojun {
710 1.7 itojun void *d = d0;
711 1.7 itojun struct ip6_hdr *ip6;
712 1.7 itojun struct mbuf *m;
713 1.7 itojun int off;
714 1.7 itojun struct ip6ctlparam *ip6cp = NULL;
715 1.7 itojun int nxt;
716 1.56 knakahar int s;
717 1.7 itojun struct encaptab *ep;
718 1.51 knakahar const struct encapsw *esw;
719 1.7 itojun
720 1.7 itojun if (sa->sa_family != AF_INET6 ||
721 1.7 itojun sa->sa_len != sizeof(struct sockaddr_in6))
722 1.32 ad return NULL;
723 1.7 itojun
724 1.7 itojun if ((unsigned)cmd >= PRC_NCMDS)
725 1.32 ad return NULL;
726 1.7 itojun if (cmd == PRC_HOSTDEAD)
727 1.7 itojun d = NULL;
728 1.7 itojun else if (cmd == PRC_MSGSIZE)
729 1.7 itojun ; /* special code is present, see below */
730 1.7 itojun else if (inet6ctlerrmap[cmd] == 0)
731 1.32 ad return NULL;
732 1.7 itojun
733 1.7 itojun /* if the parameter is from icmp6, decode it. */
734 1.7 itojun if (d != NULL) {
735 1.7 itojun ip6cp = (struct ip6ctlparam *)d;
736 1.7 itojun m = ip6cp->ip6c_m;
737 1.7 itojun ip6 = ip6cp->ip6c_ip6;
738 1.7 itojun off = ip6cp->ip6c_off;
739 1.7 itojun nxt = ip6cp->ip6c_nxt;
740 1.15 mycroft
741 1.15 mycroft if (ip6 && cmd == PRC_MSGSIZE) {
742 1.15 mycroft int valid = 0;
743 1.15 mycroft struct encaptab *match;
744 1.56 knakahar struct psref elem_psref;
745 1.15 mycroft
746 1.15 mycroft /*
747 1.15 mycroft * Check to see if we have a valid encap configuration.
748 1.15 mycroft */
749 1.56 knakahar match = encap6_lookup(m, off, nxt, OUTBOUND,
750 1.56 knakahar &elem_psref);
751 1.72 knakahar if (match) {
752 1.15 mycroft valid++;
753 1.72 knakahar psref_release(&elem_psref, &match->psref,
754 1.72 knakahar encaptab.elem_class);
755 1.72 knakahar }
756 1.15 mycroft
757 1.15 mycroft /*
758 1.15 mycroft * Depending on the value of "valid" and routing table
759 1.15 mycroft * size (mtudisc_{hi,lo}wat), we will:
760 1.15 mycroft * - recalcurate the new MTU and create the
761 1.15 mycroft * corresponding routing entry, or
762 1.15 mycroft * - ignore the MTU change notification.
763 1.15 mycroft */
764 1.15 mycroft icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);
765 1.15 mycroft }
766 1.7 itojun } else {
767 1.7 itojun m = NULL;
768 1.7 itojun ip6 = NULL;
769 1.7 itojun nxt = -1;
770 1.7 itojun }
771 1.7 itojun
772 1.7 itojun /* inform all listeners */
773 1.56 knakahar
774 1.56 knakahar s = pserialize_read_enter();
775 1.56 knakahar PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
776 1.56 knakahar struct psref elem_psref;
777 1.56 knakahar
778 1.7 itojun if (ep->af != AF_INET6)
779 1.7 itojun continue;
780 1.7 itojun if (ep->proto >= 0 && ep->proto != nxt)
781 1.7 itojun continue;
782 1.7 itojun
783 1.7 itojun /* should optimize by looking at address pairs */
784 1.7 itojun
785 1.7 itojun /* XXX need to pass ep->arg or ep itself to listeners */
786 1.56 knakahar psref_acquire(&elem_psref, &ep->psref,
787 1.56 knakahar encaptab.elem_class);
788 1.51 knakahar esw = ep->esw;
789 1.51 knakahar if (esw && esw->encapsw6.pr_ctlinput) {
790 1.56 knakahar pserialize_read_exit(s);
791 1.56 knakahar /* pr_ctlinput is sleepable. e.g. rtcache_free */
792 1.52 knakahar (*esw->encapsw6.pr_ctlinput)(cmd, sa, d, ep->arg);
793 1.56 knakahar s = pserialize_read_enter();
794 1.51 knakahar }
795 1.56 knakahar psref_release(&elem_psref, &ep->psref,
796 1.56 knakahar encaptab.elem_class);
797 1.7 itojun }
798 1.56 knakahar pserialize_read_exit(s);
799 1.7 itojun
800 1.7 itojun rip6_ctlinput(cmd, sa, d0);
801 1.32 ad return NULL;
802 1.7 itojun }
803 1.7 itojun #endif
804 1.7 itojun
805 1.76 knakahar static int
806 1.76 knakahar encap_detach_addr(const struct encaptab *ep)
807 1.76 knakahar {
808 1.76 knakahar thmap_t *emap;
809 1.76 knakahar struct encaptab *retep;
810 1.76 knakahar struct encaptab *target;
811 1.76 knakahar void *thgc;
812 1.76 knakahar struct encap_key key;
813 1.76 knakahar
814 1.76 knakahar KASSERT(encap_lock_held());
815 1.76 knakahar KASSERT(ep->flag & IP_ENCAP_ADDR_ENABLE);
816 1.76 knakahar
817 1.76 knakahar switch (ep->af) {
818 1.76 knakahar case AF_INET:
819 1.76 knakahar emap = encap_map[0];
820 1.76 knakahar break;
821 1.76 knakahar #ifdef INET6
822 1.76 knakahar case AF_INET6:
823 1.76 knakahar emap = encap_map[1];
824 1.76 knakahar break;
825 1.76 knakahar #endif
826 1.76 knakahar default:
827 1.76 knakahar return EINVAL;
828 1.76 knakahar }
829 1.76 knakahar
830 1.76 knakahar retep = thmap_del(emap, &ep->key, sizeof(ep->key));
831 1.76 knakahar if (retep != ep) {
832 1.76 knakahar return ENOENT;
833 1.76 knakahar }
834 1.76 knakahar target = retep;
835 1.76 knakahar
836 1.76 knakahar /*
837 1.76 knakahar * To keep continuity, decrement seq after detached encaptab.
838 1.76 knakahar */
839 1.76 knakahar encap_key_copy(&key, &ep->key);
840 1.76 knakahar encap_key_inc(&key);
841 1.76 knakahar while ((retep = thmap_del(emap, &key, sizeof(key))) != NULL) {
842 1.76 knakahar void *pp;
843 1.76 knakahar
844 1.76 knakahar encap_key_dec(&retep->key);
845 1.76 knakahar pp = thmap_put(emap, &retep->key, sizeof(retep->key), retep);
846 1.76 knakahar KASSERT(retep == pp);
847 1.76 knakahar
848 1.76 knakahar encap_key_inc(&key);
849 1.76 knakahar }
850 1.76 knakahar
851 1.76 knakahar thgc = thmap_stage_gc(emap);
852 1.76 knakahar pserialize_perform(encaptab.psz);
853 1.76 knakahar thmap_gc(emap, thgc);
854 1.76 knakahar psref_target_destroy(&target->psref, encaptab.elem_class);
855 1.76 knakahar kmem_free(target->addrpack, target->addrpack->sa_len);
856 1.76 knakahar kmem_free(target, sizeof(*target));
857 1.76 knakahar
858 1.76 knakahar return 0;
859 1.76 knakahar }
860 1.76 knakahar
861 1.1 itojun int
862 1.23 perry encap_detach(const struct encaptab *cookie)
863 1.1 itojun {
864 1.1 itojun const struct encaptab *ep = cookie;
865 1.56 knakahar struct encaptab *p;
866 1.7 itojun int error;
867 1.1 itojun
868 1.56 knakahar KASSERT(encap_lock_held());
869 1.56 knakahar
870 1.76 knakahar if (ep->flag & IP_ENCAP_ADDR_ENABLE)
871 1.76 knakahar return encap_detach_addr(ep);
872 1.76 knakahar
873 1.56 knakahar PSLIST_WRITER_FOREACH(p, &encap_table, struct encaptab, chain) {
874 1.1 itojun if (p == ep) {
875 1.7 itojun error = encap_remove(p);
876 1.7 itojun if (error)
877 1.7 itojun return error;
878 1.56 knakahar else
879 1.56 knakahar break;
880 1.56 knakahar }
881 1.56 knakahar }
882 1.56 knakahar if (p == NULL)
883 1.56 knakahar return ENOENT;
884 1.56 knakahar
885 1.56 knakahar pserialize_perform(encaptab.psz);
886 1.56 knakahar psref_target_destroy(&p->psref,
887 1.56 knakahar encaptab.elem_class);
888 1.56 knakahar kmem_free(p, sizeof(*p));
889 1.1 itojun
890 1.56 knakahar return 0;
891 1.7 itojun }
892 1.7 itojun
893 1.59 knakahar int
894 1.54 knakahar encap_lock_enter(void)
895 1.54 knakahar {
896 1.59 knakahar int error;
897 1.59 knakahar
898 1.59 knakahar mutex_enter(&encap_whole.lock);
899 1.59 knakahar while (encap_whole.busy != NULL) {
900 1.59 knakahar error = cv_wait_sig(&encap_whole.cv, &encap_whole.lock);
901 1.59 knakahar if (error) {
902 1.59 knakahar mutex_exit(&encap_whole.lock);
903 1.59 knakahar return error;
904 1.59 knakahar }
905 1.59 knakahar }
906 1.59 knakahar KASSERT(encap_whole.busy == NULL);
907 1.59 knakahar encap_whole.busy = curlwp;
908 1.59 knakahar mutex_exit(&encap_whole.lock);
909 1.54 knakahar
910 1.59 knakahar return 0;
911 1.54 knakahar }
912 1.54 knakahar
913 1.54 knakahar void
914 1.54 knakahar encap_lock_exit(void)
915 1.54 knakahar {
916 1.54 knakahar
917 1.59 knakahar mutex_enter(&encap_whole.lock);
918 1.59 knakahar KASSERT(encap_whole.busy == curlwp);
919 1.59 knakahar encap_whole.busy = NULL;
920 1.59 knakahar cv_broadcast(&encap_whole.cv);
921 1.59 knakahar mutex_exit(&encap_whole.lock);
922 1.54 knakahar }
923 1.56 knakahar
924 1.56 knakahar bool
925 1.56 knakahar encap_lock_held(void)
926 1.56 knakahar {
927 1.56 knakahar
928 1.59 knakahar return (encap_whole.busy == curlwp);
929 1.56 knakahar }
930