1 1.78 andvar /* $NetBSD: ip_encap.c,v 1.78 2025/02/26 04:49:45 andvar Exp $ */ 2 1.7 itojun /* $KAME: ip_encap.c,v 1.73 2001/10/02 08:30:58 itojun Exp $ */ 3 1.1 itojun 4 1.1 itojun /* 5 1.1 itojun * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 1.1 itojun * All rights reserved. 7 1.1 itojun * 8 1.1 itojun * Redistribution and use in source and binary forms, with or without 9 1.1 itojun * modification, are permitted provided that the following conditions 10 1.1 itojun * are met: 11 1.1 itojun * 1. Redistributions of source code must retain the above copyright 12 1.1 itojun * notice, this list of conditions and the following disclaimer. 13 1.1 itojun * 2. Redistributions in binary form must reproduce the above copyright 14 1.1 itojun * notice, this list of conditions and the following disclaimer in the 15 1.1 itojun * documentation and/or other materials provided with the distribution. 16 1.1 itojun * 3. Neither the name of the project nor the names of its contributors 17 1.1 itojun * may be used to endorse or promote products derived from this software 18 1.1 itojun * without specific prior written permission. 19 1.1 itojun * 20 1.1 itojun * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 1.1 itojun * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 1.1 itojun * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 1.1 itojun * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 1.1 itojun * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 1.1 itojun * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 1.1 itojun * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 1.1 itojun * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 1.1 itojun * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 1.1 itojun * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 1.1 itojun * SUCH DAMAGE. 31 1.1 itojun */ 32 1.1 itojun /* 33 1.1 itojun * My grandfather said that there's a devil inside tunnelling technology... 34 1.1 itojun * 35 1.1 itojun * We have surprisingly many protocols that want packets with IP protocol 36 1.1 itojun * #4 or #41. Here's a list of protocols that want protocol #41: 37 1.1 itojun * RFC1933 configured tunnel 38 1.1 itojun * RFC1933 automatic tunnel 39 1.1 itojun * RFC2401 IPsec tunnel 40 1.1 itojun * RFC2473 IPv6 generic packet tunnelling 41 1.1 itojun * RFC2529 6over4 tunnel 42 1.7 itojun * RFC3056 6to4 tunnel 43 1.7 itojun * isatap tunnel 44 1.1 itojun * mobile-ip6 (uses RFC2473) 45 1.1 itojun * Here's a list of protocol that want protocol #4: 46 1.1 itojun * RFC1853 IPv4-in-IPv4 tunnelling 47 1.1 itojun * RFC2003 IPv4 encapsulation within IPv4 48 1.1 itojun * RFC2344 reverse tunnelling for mobile-ip4 49 1.1 itojun * RFC2401 IPsec tunnel 50 1.1 itojun * Well, what can I say. They impose different en/decapsulation mechanism 51 1.1 itojun * from each other, so they need separate protocol handler. The only one 52 1.1 itojun * we can easily determine by protocol # is IPsec, which always has 53 1.1 itojun * AH/ESP/IPComp header right after outer IP header. 54 1.1 itojun * 55 1.1 itojun * So, clearly good old protosw does not work for protocol #4 and #41. 56 1.1 itojun * The code will let you match protocol via src/dst address pair. 57 1.1 itojun */ 58 1.1 itojun /* XXX is M_NETADDR correct? */ 59 1.6 lukem 60 1.6 lukem #include <sys/cdefs.h> 61 1.78 andvar __KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.78 2025/02/26 04:49:45 andvar Exp $"); 62 1.1 itojun 63 1.46 pooka #ifdef _KERNEL_OPT 64 1.4 itojun #include "opt_mrouting.h" 65 1.4 itojun #include "opt_inet.h" 66 1.61 knakahar #include "opt_net_mpsafe.h" 67 1.46 pooka #endif 68 1.1 itojun 69 1.1 itojun #include <sys/param.h> 70 1.1 itojun #include <sys/systm.h> 71 1.1 itojun #include <sys/socket.h> 72 1.71 knakahar #include <sys/socketvar.h> /* for softnet_lock */ 73 1.1 itojun #include <sys/sockio.h> 74 1.1 itojun #include <sys/mbuf.h> 75 1.1 itojun #include <sys/errno.h> 76 1.4 itojun #include <sys/queue.h> 77 1.47 knakahar #include <sys/kmem.h> 78 1.56 knakahar #include <sys/mutex.h> 79 1.59 knakahar #include <sys/condvar.h> 80 1.56 knakahar #include <sys/psref.h> 81 1.56 knakahar #include <sys/pslist.h> 82 1.76 knakahar #include <sys/thmap.h> 83 1.1 itojun 84 1.1 itojun #include <net/if.h> 85 1.1 itojun 86 1.1 itojun #include <netinet/in.h> 87 1.1 itojun #include <netinet/in_systm.h> 88 1.1 itojun #include <netinet/ip.h> 89 1.1 itojun #include <netinet/ip_var.h> 90 1.1 itojun #include <netinet/ip_encap.h> 91 1.1 itojun #ifdef MROUTING 92 1.1 itojun #include <netinet/ip_mroute.h> 93 1.1 itojun #endif /* MROUTING */ 94 1.1 itojun 95 1.1 itojun #ifdef INET6 96 1.1 itojun #include <netinet/ip6.h> 97 1.1 itojun #include <netinet6/ip6_var.h> 98 1.51 knakahar #include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */ 99 1.7 itojun #include <netinet6/in6_var.h> 100 1.7 itojun #include <netinet6/in6_pcb.h> 101 1.7 itojun #include <netinet/icmp6.h> 102 1.1 itojun #endif 103 1.1 itojun 104 1.61 knakahar #ifdef NET_MPSAFE 105 1.61 knakahar #define ENCAP_MPSAFE 1 106 1.61 knakahar #endif 107 1.61 knakahar 108 1.7 itojun enum direction { INBOUND, OUTBOUND }; 109 1.7 itojun 110 1.7 itojun #ifdef INET 111 1.56 knakahar static struct encaptab *encap4_lookup(struct mbuf *, int, int, enum direction, 112 1.56 knakahar struct psref *); 113 1.7 itojun #endif 114 1.7 itojun #ifdef INET6 115 1.56 knakahar static struct encaptab *encap6_lookup(struct mbuf *, int, int, enum direction, 116 1.56 knakahar struct psref *); 117 1.7 itojun #endif 118 1.22 perry static int encap_add(struct encaptab *); 119 1.22 perry static int encap_remove(struct encaptab *); 120 1.73 riastrad static void encap_afcheck(int, const struct sockaddr *, const struct sockaddr *); 121 1.76 knakahar static void encap_key_init(struct encap_key *, const struct sockaddr *, 122 1.76 knakahar const struct sockaddr *); 123 1.76 knakahar static void encap_key_inc(struct encap_key *); 124 1.1 itojun 125 1.56 knakahar /* 126 1.56 knakahar * In encap[46]_lookup(), ep->func can sleep(e.g. rtalloc1) while walking 127 1.56 knakahar * encap_table. So, it cannot use pserialize_read_enter() 128 1.56 knakahar */ 129 1.56 knakahar static struct { 130 1.56 knakahar struct pslist_head list; 131 1.56 knakahar pserialize_t psz; 132 1.56 knakahar struct psref_class *elem_class; /* for the element of et_list */ 133 1.56 knakahar } encaptab __cacheline_aligned = { 134 1.56 knakahar .list = PSLIST_INITIALIZER, 135 1.56 knakahar }; 136 1.56 knakahar #define encap_table encaptab.list 137 1.1 itojun 138 1.59 knakahar static struct { 139 1.59 knakahar kmutex_t lock; 140 1.59 knakahar kcondvar_t cv; 141 1.59 knakahar struct lwp *busy; 142 1.59 knakahar } encap_whole __cacheline_aligned; 143 1.59 knakahar 144 1.76 knakahar static thmap_t *encap_map[2]; /* 0 for AF_INET, 1 for AF_INET6 */ 145 1.76 knakahar 146 1.63 ozaki static bool encap_initialized = false; 147 1.59 knakahar /* 148 1.59 knakahar * must be done before other encap interfaces initialization. 149 1.59 knakahar */ 150 1.59 knakahar void 151 1.59 knakahar encapinit(void) 152 1.59 knakahar { 153 1.59 knakahar 154 1.63 ozaki if (encap_initialized) 155 1.63 ozaki return; 156 1.63 ozaki 157 1.60 knakahar encaptab.psz = pserialize_create(); 158 1.60 knakahar encaptab.elem_class = psref_class_create("encapelem", IPL_SOFTNET); 159 1.60 knakahar 160 1.59 knakahar mutex_init(&encap_whole.lock, MUTEX_DEFAULT, IPL_NONE); 161 1.59 knakahar cv_init(&encap_whole.cv, "ip_encap cv"); 162 1.59 knakahar encap_whole.busy = NULL; 163 1.63 ozaki 164 1.63 ozaki encap_initialized = true; 165 1.59 knakahar } 166 1.59 knakahar 167 1.1 itojun void 168 1.23 perry encap_init(void) 169 1.1 itojun { 170 1.7 itojun static int initialized = 0; 171 1.7 itojun 172 1.7 itojun if (initialized) 173 1.7 itojun return; 174 1.7 itojun initialized++; 175 1.1 itojun #if 0 176 1.1 itojun /* 177 1.1 itojun * we cannot use LIST_INIT() here, since drivers may want to call 178 1.4 itojun * encap_attach(), on driver attach. encap_init() will be called 179 1.1 itojun * on AF_INET{,6} initialization, which happens after driver 180 1.1 itojun * initialization - using LIST_INIT() here can nuke encap_attach() 181 1.1 itojun * from drivers. 182 1.1 itojun */ 183 1.56 knakahar PSLIST_INIT(&encap_table); 184 1.1 itojun #endif 185 1.7 itojun 186 1.76 knakahar encap_map[0] = thmap_create(0, NULL, THMAP_NOCOPY); 187 1.76 knakahar #ifdef INET6 188 1.76 knakahar encap_map[1] = thmap_create(0, NULL, THMAP_NOCOPY); 189 1.76 knakahar #endif 190 1.1 itojun } 191 1.1 itojun 192 1.4 itojun #ifdef INET 193 1.7 itojun static struct encaptab * 194 1.56 knakahar encap4_lookup(struct mbuf *m, int off, int proto, enum direction dir, 195 1.56 knakahar struct psref *match_psref) 196 1.1 itojun { 197 1.1 itojun struct ip *ip; 198 1.33 pooka struct ip_pack4 pack; 199 1.1 itojun struct encaptab *ep, *match; 200 1.1 itojun int prio, matchprio; 201 1.56 knakahar int s; 202 1.76 knakahar thmap_t *emap = encap_map[0]; 203 1.76 knakahar struct encap_key key; 204 1.1 itojun 205 1.41 ozaki KASSERT(m->m_len >= sizeof(*ip)); 206 1.41 ozaki 207 1.1 itojun ip = mtod(m, struct ip *); 208 1.1 itojun 209 1.35 cegger memset(&pack, 0, sizeof(pack)); 210 1.7 itojun pack.p.sp_len = sizeof(pack); 211 1.7 itojun pack.mine.sin_family = pack.yours.sin_family = AF_INET; 212 1.7 itojun pack.mine.sin_len = pack.yours.sin_len = sizeof(struct sockaddr_in); 213 1.7 itojun if (dir == INBOUND) { 214 1.7 itojun pack.mine.sin_addr = ip->ip_dst; 215 1.7 itojun pack.yours.sin_addr = ip->ip_src; 216 1.7 itojun } else { 217 1.7 itojun pack.mine.sin_addr = ip->ip_src; 218 1.7 itojun pack.yours.sin_addr = ip->ip_dst; 219 1.7 itojun } 220 1.1 itojun 221 1.1 itojun match = NULL; 222 1.1 itojun matchprio = 0; 223 1.7 itojun 224 1.56 knakahar s = pserialize_read_enter(); 225 1.76 knakahar 226 1.76 knakahar encap_key_init(&key, sintosa(&pack.mine), sintosa(&pack.yours)); 227 1.76 knakahar while ((ep = thmap_get(emap, &key, sizeof(key))) != NULL) { 228 1.76 knakahar struct psref elem_psref; 229 1.76 knakahar 230 1.76 knakahar KASSERT(ep->af == AF_INET); 231 1.76 knakahar 232 1.76 knakahar if (ep->proto >= 0 && ep->proto != proto) { 233 1.76 knakahar encap_key_inc(&key); 234 1.76 knakahar continue; 235 1.76 knakahar } 236 1.76 knakahar 237 1.76 knakahar psref_acquire(&elem_psref, &ep->psref, 238 1.76 knakahar encaptab.elem_class); 239 1.76 knakahar if (ep->func) { 240 1.76 knakahar pserialize_read_exit(s); 241 1.76 knakahar prio = (*ep->func)(m, off, proto, ep->arg); 242 1.76 knakahar s = pserialize_read_enter(); 243 1.76 knakahar } else { 244 1.76 knakahar prio = pack.mine.sin_len + pack.yours.sin_len; 245 1.76 knakahar } 246 1.76 knakahar 247 1.76 knakahar if (prio <= 0) { 248 1.76 knakahar psref_release(&elem_psref, &ep->psref, 249 1.76 knakahar encaptab.elem_class); 250 1.76 knakahar encap_key_inc(&key); 251 1.76 knakahar continue; 252 1.76 knakahar } 253 1.76 knakahar if (prio > matchprio) { 254 1.76 knakahar /* release last matched ep */ 255 1.76 knakahar if (match != NULL) 256 1.76 knakahar psref_release(match_psref, &match->psref, 257 1.76 knakahar encaptab.elem_class); 258 1.76 knakahar 259 1.76 knakahar psref_copy(match_psref, &elem_psref, 260 1.76 knakahar encaptab.elem_class); 261 1.76 knakahar matchprio = prio; 262 1.76 knakahar match = ep; 263 1.76 knakahar } 264 1.76 knakahar 265 1.76 knakahar psref_release(&elem_psref, &ep->psref, 266 1.76 knakahar encaptab.elem_class); 267 1.76 knakahar encap_key_inc(&key); 268 1.76 knakahar } 269 1.76 knakahar 270 1.56 knakahar PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) { 271 1.56 knakahar struct psref elem_psref; 272 1.56 knakahar 273 1.1 itojun if (ep->af != AF_INET) 274 1.1 itojun continue; 275 1.1 itojun if (ep->proto >= 0 && ep->proto != proto) 276 1.1 itojun continue; 277 1.56 knakahar 278 1.56 knakahar psref_acquire(&elem_psref, &ep->psref, 279 1.56 knakahar encaptab.elem_class); 280 1.77 knakahar pserialize_read_exit(s); 281 1.77 knakahar /* ep->func is sleepable. e.g. rtalloc1 */ 282 1.77 knakahar prio = (*ep->func)(m, off, proto, ep->arg); 283 1.77 knakahar s = pserialize_read_enter(); 284 1.1 itojun 285 1.1 itojun /* 286 1.1 itojun * We prioritize the matches by using bit length of the 287 1.77 knakahar * matches. user-supplied matching function 288 1.1 itojun * should return the bit length of the matches (for example, 289 1.1 itojun * if both src/dst are matched for IPv4, 64 should be returned). 290 1.1 itojun * 0 or negative return value means "it did not match". 291 1.1 itojun * 292 1.1 itojun * We need to loop through all the possible candidates 293 1.1 itojun * to get the best match - the search takes O(n) for 294 1.1 itojun * n attachments (i.e. interfaces). 295 1.1 itojun */ 296 1.56 knakahar if (prio <= 0) { 297 1.56 knakahar psref_release(&elem_psref, &ep->psref, 298 1.56 knakahar encaptab.elem_class); 299 1.1 itojun continue; 300 1.56 knakahar } 301 1.1 itojun if (prio > matchprio) { 302 1.56 knakahar /* release last matched ep */ 303 1.56 knakahar if (match != NULL) 304 1.56 knakahar psref_release(match_psref, &match->psref, 305 1.56 knakahar encaptab.elem_class); 306 1.56 knakahar 307 1.56 knakahar psref_copy(match_psref, &elem_psref, 308 1.56 knakahar encaptab.elem_class); 309 1.1 itojun matchprio = prio; 310 1.1 itojun match = ep; 311 1.1 itojun } 312 1.56 knakahar KASSERTMSG((match == NULL) || psref_held(&match->psref, 313 1.56 knakahar encaptab.elem_class), 314 1.56 knakahar "current match = %p, but not hold its psref", match); 315 1.56 knakahar 316 1.56 knakahar psref_release(&elem_psref, &ep->psref, 317 1.56 knakahar encaptab.elem_class); 318 1.1 itojun } 319 1.56 knakahar pserialize_read_exit(s); 320 1.1 itojun 321 1.7 itojun return match; 322 1.7 itojun } 323 1.7 itojun 324 1.7 itojun void 325 1.70 maxv encap4_input(struct mbuf *m, int off, int proto) 326 1.7 itojun { 327 1.51 knakahar const struct encapsw *esw; 328 1.7 itojun struct encaptab *match; 329 1.56 knakahar struct psref match_psref; 330 1.7 itojun 331 1.56 knakahar match = encap4_lookup(m, off, proto, INBOUND, &match_psref); 332 1.1 itojun if (match) { 333 1.1 itojun /* found a match, "match" has the best one */ 334 1.51 knakahar esw = match->esw; 335 1.51 knakahar if (esw && esw->encapsw4.pr_input) { 336 1.66 knakahar (*esw->encapsw4.pr_input)(m, off, proto, match->arg); 337 1.56 knakahar psref_release(&match_psref, &match->psref, 338 1.56 knakahar encaptab.elem_class); 339 1.54 knakahar } else { 340 1.56 knakahar psref_release(&match_psref, &match->psref, 341 1.56 knakahar encaptab.elem_class); 342 1.1 itojun m_freem(m); 343 1.54 knakahar } 344 1.1 itojun return; 345 1.1 itojun } 346 1.1 itojun 347 1.1 itojun /* last resort: inject to raw socket */ 348 1.69 knakahar SOFTNET_LOCK_IF_NET_MPSAFE(); 349 1.1 itojun rip_input(m, off, proto); 350 1.69 knakahar SOFTNET_UNLOCK_IF_NET_MPSAFE(); 351 1.1 itojun } 352 1.1 itojun #endif 353 1.1 itojun 354 1.1 itojun #ifdef INET6 355 1.7 itojun static struct encaptab * 356 1.56 knakahar encap6_lookup(struct mbuf *m, int off, int proto, enum direction dir, 357 1.56 knakahar struct psref *match_psref) 358 1.1 itojun { 359 1.1 itojun struct ip6_hdr *ip6; 360 1.33 pooka struct ip_pack6 pack; 361 1.7 itojun int prio, matchprio; 362 1.56 knakahar int s; 363 1.1 itojun struct encaptab *ep, *match; 364 1.76 knakahar thmap_t *emap = encap_map[1]; 365 1.76 knakahar struct encap_key key; 366 1.1 itojun 367 1.41 ozaki KASSERT(m->m_len >= sizeof(*ip6)); 368 1.41 ozaki 369 1.1 itojun ip6 = mtod(m, struct ip6_hdr *); 370 1.1 itojun 371 1.35 cegger memset(&pack, 0, sizeof(pack)); 372 1.7 itojun pack.p.sp_len = sizeof(pack); 373 1.7 itojun pack.mine.sin6_family = pack.yours.sin6_family = AF_INET6; 374 1.7 itojun pack.mine.sin6_len = pack.yours.sin6_len = sizeof(struct sockaddr_in6); 375 1.7 itojun if (dir == INBOUND) { 376 1.7 itojun pack.mine.sin6_addr = ip6->ip6_dst; 377 1.7 itojun pack.yours.sin6_addr = ip6->ip6_src; 378 1.7 itojun } else { 379 1.7 itojun pack.mine.sin6_addr = ip6->ip6_src; 380 1.7 itojun pack.yours.sin6_addr = ip6->ip6_dst; 381 1.7 itojun } 382 1.1 itojun 383 1.1 itojun match = NULL; 384 1.1 itojun matchprio = 0; 385 1.7 itojun 386 1.56 knakahar s = pserialize_read_enter(); 387 1.76 knakahar 388 1.76 knakahar encap_key_init(&key, sin6tosa(&pack.mine), sin6tosa(&pack.yours)); 389 1.76 knakahar while ((ep = thmap_get(emap, &key, sizeof(key))) != NULL) { 390 1.76 knakahar struct psref elem_psref; 391 1.76 knakahar 392 1.76 knakahar KASSERT(ep->af == AF_INET6); 393 1.76 knakahar 394 1.76 knakahar if (ep->proto >= 0 && ep->proto != proto) { 395 1.76 knakahar encap_key_inc(&key); 396 1.76 knakahar continue; 397 1.76 knakahar } 398 1.76 knakahar 399 1.76 knakahar psref_acquire(&elem_psref, &ep->psref, 400 1.76 knakahar encaptab.elem_class); 401 1.76 knakahar if (ep->func) { 402 1.76 knakahar pserialize_read_exit(s); 403 1.76 knakahar prio = (*ep->func)(m, off, proto, ep->arg); 404 1.76 knakahar s = pserialize_read_enter(); 405 1.76 knakahar } else { 406 1.76 knakahar prio = pack.mine.sin6_len + pack.yours.sin6_len; 407 1.76 knakahar } 408 1.76 knakahar 409 1.76 knakahar if (prio <= 0) { 410 1.76 knakahar psref_release(&elem_psref, &ep->psref, 411 1.76 knakahar encaptab.elem_class); 412 1.76 knakahar encap_key_inc(&key); 413 1.76 knakahar continue; 414 1.76 knakahar } 415 1.76 knakahar if (prio > matchprio) { 416 1.76 knakahar /* release last matched ep */ 417 1.76 knakahar if (match != NULL) 418 1.76 knakahar psref_release(match_psref, &match->psref, 419 1.76 knakahar encaptab.elem_class); 420 1.76 knakahar 421 1.76 knakahar psref_copy(match_psref, &elem_psref, 422 1.76 knakahar encaptab.elem_class); 423 1.76 knakahar matchprio = prio; 424 1.76 knakahar match = ep; 425 1.76 knakahar } 426 1.76 knakahar psref_release(&elem_psref, &ep->psref, 427 1.76 knakahar encaptab.elem_class); 428 1.76 knakahar encap_key_inc(&key); 429 1.76 knakahar } 430 1.76 knakahar 431 1.56 knakahar PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) { 432 1.56 knakahar struct psref elem_psref; 433 1.56 knakahar 434 1.1 itojun if (ep->af != AF_INET6) 435 1.1 itojun continue; 436 1.1 itojun if (ep->proto >= 0 && ep->proto != proto) 437 1.1 itojun continue; 438 1.56 knakahar 439 1.56 knakahar psref_acquire(&elem_psref, &ep->psref, 440 1.56 knakahar encaptab.elem_class); 441 1.56 knakahar 442 1.77 knakahar pserialize_read_exit(s); 443 1.77 knakahar /* ep->func is sleepable. e.g. rtalloc1 */ 444 1.77 knakahar prio = (*ep->func)(m, off, proto, ep->arg); 445 1.77 knakahar s = pserialize_read_enter(); 446 1.1 itojun 447 1.7 itojun /* see encap4_lookup() for issues here */ 448 1.56 knakahar if (prio <= 0) { 449 1.56 knakahar psref_release(&elem_psref, &ep->psref, 450 1.56 knakahar encaptab.elem_class); 451 1.1 itojun continue; 452 1.56 knakahar } 453 1.1 itojun if (prio > matchprio) { 454 1.56 knakahar /* release last matched ep */ 455 1.56 knakahar if (match != NULL) 456 1.56 knakahar psref_release(match_psref, &match->psref, 457 1.56 knakahar encaptab.elem_class); 458 1.56 knakahar 459 1.56 knakahar psref_copy(match_psref, &elem_psref, 460 1.56 knakahar encaptab.elem_class); 461 1.1 itojun matchprio = prio; 462 1.1 itojun match = ep; 463 1.1 itojun } 464 1.56 knakahar KASSERTMSG((match == NULL) || psref_held(&match->psref, 465 1.56 knakahar encaptab.elem_class), 466 1.56 knakahar "current match = %p, but not hold its psref", match); 467 1.56 knakahar 468 1.56 knakahar psref_release(&elem_psref, &ep->psref, 469 1.56 knakahar encaptab.elem_class); 470 1.1 itojun } 471 1.56 knakahar pserialize_read_exit(s); 472 1.1 itojun 473 1.7 itojun return match; 474 1.7 itojun } 475 1.7 itojun 476 1.7 itojun int 477 1.23 perry encap6_input(struct mbuf **mp, int *offp, int proto) 478 1.7 itojun { 479 1.7 itojun struct mbuf *m = *mp; 480 1.51 knakahar const struct encapsw *esw; 481 1.7 itojun struct encaptab *match; 482 1.56 knakahar struct psref match_psref; 483 1.69 knakahar int rv; 484 1.7 itojun 485 1.56 knakahar match = encap6_lookup(m, *offp, proto, INBOUND, &match_psref); 486 1.7 itojun 487 1.1 itojun if (match) { 488 1.1 itojun /* found a match */ 489 1.51 knakahar esw = match->esw; 490 1.51 knakahar if (esw && esw->encapsw6.pr_input) { 491 1.56 knakahar int ret; 492 1.66 knakahar ret = (*esw->encapsw6.pr_input)(mp, offp, proto, 493 1.66 knakahar match->arg); 494 1.56 knakahar psref_release(&match_psref, &match->psref, 495 1.56 knakahar encaptab.elem_class); 496 1.56 knakahar return ret; 497 1.1 itojun } else { 498 1.56 knakahar psref_release(&match_psref, &match->psref, 499 1.56 knakahar encaptab.elem_class); 500 1.1 itojun m_freem(m); 501 1.1 itojun return IPPROTO_DONE; 502 1.1 itojun } 503 1.1 itojun } 504 1.1 itojun 505 1.1 itojun /* last resort: inject to raw socket */ 506 1.69 knakahar SOFTNET_LOCK_IF_NET_MPSAFE(); 507 1.69 knakahar rv = rip6_input(mp, offp, proto); 508 1.69 knakahar SOFTNET_UNLOCK_IF_NET_MPSAFE(); 509 1.69 knakahar return rv; 510 1.1 itojun } 511 1.1 itojun #endif 512 1.1 itojun 513 1.7 itojun static int 514 1.23 perry encap_add(struct encaptab *ep) 515 1.1 itojun { 516 1.1 itojun 517 1.56 knakahar KASSERT(encap_lock_held()); 518 1.54 knakahar 519 1.56 knakahar PSLIST_WRITER_INSERT_HEAD(&encap_table, ep, chain); 520 1.7 itojun 521 1.56 knakahar return 0; 522 1.7 itojun } 523 1.7 itojun 524 1.7 itojun static int 525 1.23 perry encap_remove(struct encaptab *ep) 526 1.7 itojun { 527 1.7 itojun int error = 0; 528 1.7 itojun 529 1.56 knakahar KASSERT(encap_lock_held()); 530 1.54 knakahar 531 1.56 knakahar PSLIST_WRITER_REMOVE(ep, chain); 532 1.56 knakahar 533 1.7 itojun return error; 534 1.7 itojun } 535 1.7 itojun 536 1.73 riastrad static void 537 1.23 perry encap_afcheck(int af, const struct sockaddr *sp, const struct sockaddr *dp) 538 1.7 itojun { 539 1.7 itojun 540 1.73 riastrad KASSERT(sp != NULL && dp != NULL); 541 1.73 riastrad KASSERT(sp->sa_len == dp->sa_len); 542 1.73 riastrad KASSERT(af == sp->sa_family && af == dp->sa_family); 543 1.7 itojun 544 1.74 riastrad socklen_t len __diagused = sockaddr_getsize_by_family(af); 545 1.73 riastrad KASSERT(len != 0 && len == sp->sa_len && len == dp->sa_len); 546 1.1 itojun } 547 1.1 itojun 548 1.1 itojun const struct encaptab * 549 1.23 perry encap_attach_func(int af, int proto, 550 1.75 knakahar encap_priofunc_t *func, 551 1.51 knakahar const struct encapsw *esw, void *arg) 552 1.1 itojun { 553 1.1 itojun struct encaptab *ep; 554 1.1 itojun int error; 555 1.61 knakahar #ifndef ENCAP_MPSAFE 556 1.1 itojun int s; 557 1.1 itojun 558 1.1 itojun s = splsoftnet(); 559 1.61 knakahar #endif 560 1.73 riastrad 561 1.73 riastrad ASSERT_SLEEPABLE(); 562 1.73 riastrad 563 1.1 itojun /* sanity check on args */ 564 1.73 riastrad KASSERT(func != NULL); 565 1.73 riastrad KASSERT(af == AF_INET 566 1.73 riastrad #ifdef INET6 567 1.73 riastrad || af == AF_INET6 568 1.73 riastrad #endif 569 1.73 riastrad ); 570 1.7 itojun 571 1.73 riastrad ep = kmem_alloc(sizeof(*ep), KM_SLEEP); 572 1.35 cegger memset(ep, 0, sizeof(*ep)); 573 1.1 itojun 574 1.1 itojun ep->af = af; 575 1.1 itojun ep->proto = proto; 576 1.1 itojun ep->func = func; 577 1.51 knakahar ep->esw = esw; 578 1.1 itojun ep->arg = arg; 579 1.56 knakahar psref_target_init(&ep->psref, encaptab.elem_class); 580 1.1 itojun 581 1.7 itojun error = encap_add(ep); 582 1.7 itojun if (error) 583 1.67 maxv goto gc; 584 1.1 itojun 585 1.1 itojun error = 0; 586 1.61 knakahar #ifndef ENCAP_MPSAFE 587 1.1 itojun splx(s); 588 1.61 knakahar #endif 589 1.1 itojun return ep; 590 1.1 itojun 591 1.67 maxv gc: 592 1.67 maxv kmem_free(ep, sizeof(*ep)); 593 1.61 knakahar #ifndef ENCAP_MPSAFE 594 1.1 itojun splx(s); 595 1.61 knakahar #endif 596 1.1 itojun return NULL; 597 1.1 itojun } 598 1.1 itojun 599 1.76 knakahar static void 600 1.76 knakahar encap_key_init(struct encap_key *key, 601 1.76 knakahar const struct sockaddr *local, const struct sockaddr *remote) 602 1.76 knakahar { 603 1.76 knakahar 604 1.76 knakahar memset(key, 0, sizeof(*key)); 605 1.76 knakahar 606 1.76 knakahar sockaddr_copy(&key->local_sa, sizeof(key->local_u), local); 607 1.76 knakahar sockaddr_copy(&key->remote_sa, sizeof(key->remote_u), remote); 608 1.76 knakahar } 609 1.76 knakahar 610 1.76 knakahar static void 611 1.76 knakahar encap_key_inc(struct encap_key *key) 612 1.76 knakahar { 613 1.76 knakahar 614 1.76 knakahar (key->seq)++; 615 1.76 knakahar } 616 1.76 knakahar 617 1.76 knakahar static void 618 1.76 knakahar encap_key_dec(struct encap_key *key) 619 1.76 knakahar { 620 1.76 knakahar 621 1.76 knakahar (key->seq)--; 622 1.76 knakahar } 623 1.76 knakahar 624 1.76 knakahar static void 625 1.76 knakahar encap_key_copy(struct encap_key *dst, const struct encap_key *src) 626 1.76 knakahar { 627 1.76 knakahar 628 1.76 knakahar memset(dst, 0, sizeof(*dst)); 629 1.76 knakahar *dst = *src; 630 1.76 knakahar } 631 1.76 knakahar 632 1.76 knakahar /* 633 1.76 knakahar * src is always my side, and dst is always remote side. 634 1.76 knakahar * Return value will be necessary as input (cookie) for encap_detach(). 635 1.76 knakahar */ 636 1.76 knakahar const struct encaptab * 637 1.76 knakahar encap_attach_addr(int af, int proto, 638 1.76 knakahar const struct sockaddr *src, const struct sockaddr *dst, 639 1.76 knakahar encap_priofunc_t *func, 640 1.76 knakahar const struct encapsw *esw, void *arg) 641 1.76 knakahar { 642 1.76 knakahar struct encaptab *ep; 643 1.76 knakahar size_t l; 644 1.76 knakahar thmap_t *emap; 645 1.76 knakahar void *retep; 646 1.76 knakahar struct ip_pack4 *pack4; 647 1.76 knakahar #ifdef INET6 648 1.76 knakahar struct ip_pack6 *pack6; 649 1.76 knakahar #endif 650 1.76 knakahar 651 1.76 knakahar ASSERT_SLEEPABLE(); 652 1.76 knakahar 653 1.76 knakahar encap_afcheck(af, src, dst); 654 1.76 knakahar 655 1.76 knakahar switch (af) { 656 1.76 knakahar case AF_INET: 657 1.76 knakahar l = sizeof(*pack4); 658 1.76 knakahar emap = encap_map[0]; 659 1.76 knakahar break; 660 1.76 knakahar #ifdef INET6 661 1.76 knakahar case AF_INET6: 662 1.76 knakahar l = sizeof(*pack6); 663 1.76 knakahar emap = encap_map[1]; 664 1.76 knakahar break; 665 1.76 knakahar #endif 666 1.76 knakahar default: 667 1.76 knakahar return NULL; 668 1.76 knakahar } 669 1.76 knakahar 670 1.76 knakahar ep = kmem_zalloc(sizeof(*ep), KM_SLEEP); 671 1.76 knakahar ep->addrpack = kmem_zalloc(l, KM_SLEEP); 672 1.76 knakahar ep->addrpack->sa_len = l & 0xff; 673 1.76 knakahar ep->af = af; 674 1.76 knakahar ep->proto = proto; 675 1.76 knakahar ep->flag = IP_ENCAP_ADDR_ENABLE; 676 1.76 knakahar switch (af) { 677 1.76 knakahar case AF_INET: 678 1.76 knakahar pack4 = (struct ip_pack4 *)ep->addrpack; 679 1.76 knakahar ep->src = (struct sockaddr *)&pack4->mine; 680 1.76 knakahar ep->dst = (struct sockaddr *)&pack4->yours; 681 1.76 knakahar break; 682 1.76 knakahar #ifdef INET6 683 1.76 knakahar case AF_INET6: 684 1.76 knakahar pack6 = (struct ip_pack6 *)ep->addrpack; 685 1.76 knakahar ep->src = (struct sockaddr *)&pack6->mine; 686 1.76 knakahar ep->dst = (struct sockaddr *)&pack6->yours; 687 1.76 knakahar break; 688 1.76 knakahar #endif 689 1.76 knakahar } 690 1.76 knakahar memcpy(ep->src, src, src->sa_len); 691 1.76 knakahar memcpy(ep->dst, dst, dst->sa_len); 692 1.76 knakahar ep->esw = esw; 693 1.76 knakahar ep->arg = arg; 694 1.76 knakahar ep->func = func; 695 1.76 knakahar psref_target_init(&ep->psref, encaptab.elem_class); 696 1.76 knakahar 697 1.76 knakahar encap_key_init(&ep->key, src, dst); 698 1.76 knakahar while ((retep = thmap_put(emap, &ep->key, sizeof(ep->key), ep)) != ep) 699 1.76 knakahar encap_key_inc(&ep->key); 700 1.76 knakahar return ep; 701 1.76 knakahar } 702 1.76 knakahar 703 1.76 knakahar 704 1.7 itojun /* XXX encap4_ctlinput() is necessary if we set DF=1 on outer IPv4 header */ 705 1.7 itojun 706 1.7 itojun #ifdef INET6 707 1.32 ad void * 708 1.29 dyoung encap6_ctlinput(int cmd, const struct sockaddr *sa, void *d0) 709 1.7 itojun { 710 1.7 itojun void *d = d0; 711 1.7 itojun struct ip6_hdr *ip6; 712 1.7 itojun struct mbuf *m; 713 1.7 itojun int off; 714 1.7 itojun struct ip6ctlparam *ip6cp = NULL; 715 1.7 itojun int nxt; 716 1.56 knakahar int s; 717 1.7 itojun struct encaptab *ep; 718 1.51 knakahar const struct encapsw *esw; 719 1.7 itojun 720 1.7 itojun if (sa->sa_family != AF_INET6 || 721 1.7 itojun sa->sa_len != sizeof(struct sockaddr_in6)) 722 1.32 ad return NULL; 723 1.7 itojun 724 1.7 itojun if ((unsigned)cmd >= PRC_NCMDS) 725 1.32 ad return NULL; 726 1.7 itojun if (cmd == PRC_HOSTDEAD) 727 1.7 itojun d = NULL; 728 1.7 itojun else if (cmd == PRC_MSGSIZE) 729 1.7 itojun ; /* special code is present, see below */ 730 1.7 itojun else if (inet6ctlerrmap[cmd] == 0) 731 1.32 ad return NULL; 732 1.7 itojun 733 1.7 itojun /* if the parameter is from icmp6, decode it. */ 734 1.7 itojun if (d != NULL) { 735 1.7 itojun ip6cp = (struct ip6ctlparam *)d; 736 1.7 itojun m = ip6cp->ip6c_m; 737 1.7 itojun ip6 = ip6cp->ip6c_ip6; 738 1.7 itojun off = ip6cp->ip6c_off; 739 1.7 itojun nxt = ip6cp->ip6c_nxt; 740 1.15 mycroft 741 1.15 mycroft if (ip6 && cmd == PRC_MSGSIZE) { 742 1.15 mycroft int valid = 0; 743 1.15 mycroft struct encaptab *match; 744 1.56 knakahar struct psref elem_psref; 745 1.15 mycroft 746 1.15 mycroft /* 747 1.15 mycroft * Check to see if we have a valid encap configuration. 748 1.15 mycroft */ 749 1.56 knakahar match = encap6_lookup(m, off, nxt, OUTBOUND, 750 1.56 knakahar &elem_psref); 751 1.72 knakahar if (match) { 752 1.15 mycroft valid++; 753 1.72 knakahar psref_release(&elem_psref, &match->psref, 754 1.72 knakahar encaptab.elem_class); 755 1.72 knakahar } 756 1.15 mycroft 757 1.15 mycroft /* 758 1.15 mycroft * Depending on the value of "valid" and routing table 759 1.15 mycroft * size (mtudisc_{hi,lo}wat), we will: 760 1.78 andvar * - recalculate the new MTU and create the 761 1.15 mycroft * corresponding routing entry, or 762 1.15 mycroft * - ignore the MTU change notification. 763 1.15 mycroft */ 764 1.15 mycroft icmp6_mtudisc_update((struct ip6ctlparam *)d, valid); 765 1.15 mycroft } 766 1.7 itojun } else { 767 1.7 itojun m = NULL; 768 1.7 itojun ip6 = NULL; 769 1.7 itojun nxt = -1; 770 1.7 itojun } 771 1.7 itojun 772 1.7 itojun /* inform all listeners */ 773 1.56 knakahar 774 1.56 knakahar s = pserialize_read_enter(); 775 1.56 knakahar PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) { 776 1.56 knakahar struct psref elem_psref; 777 1.56 knakahar 778 1.7 itojun if (ep->af != AF_INET6) 779 1.7 itojun continue; 780 1.7 itojun if (ep->proto >= 0 && ep->proto != nxt) 781 1.7 itojun continue; 782 1.7 itojun 783 1.7 itojun /* should optimize by looking at address pairs */ 784 1.7 itojun 785 1.7 itojun /* XXX need to pass ep->arg or ep itself to listeners */ 786 1.56 knakahar psref_acquire(&elem_psref, &ep->psref, 787 1.56 knakahar encaptab.elem_class); 788 1.51 knakahar esw = ep->esw; 789 1.51 knakahar if (esw && esw->encapsw6.pr_ctlinput) { 790 1.56 knakahar pserialize_read_exit(s); 791 1.56 knakahar /* pr_ctlinput is sleepable. e.g. rtcache_free */ 792 1.52 knakahar (*esw->encapsw6.pr_ctlinput)(cmd, sa, d, ep->arg); 793 1.56 knakahar s = pserialize_read_enter(); 794 1.51 knakahar } 795 1.56 knakahar psref_release(&elem_psref, &ep->psref, 796 1.56 knakahar encaptab.elem_class); 797 1.7 itojun } 798 1.56 knakahar pserialize_read_exit(s); 799 1.7 itojun 800 1.7 itojun rip6_ctlinput(cmd, sa, d0); 801 1.32 ad return NULL; 802 1.7 itojun } 803 1.7 itojun #endif 804 1.7 itojun 805 1.76 knakahar static int 806 1.76 knakahar encap_detach_addr(const struct encaptab *ep) 807 1.76 knakahar { 808 1.76 knakahar thmap_t *emap; 809 1.76 knakahar struct encaptab *retep; 810 1.76 knakahar struct encaptab *target; 811 1.76 knakahar void *thgc; 812 1.76 knakahar struct encap_key key; 813 1.76 knakahar 814 1.76 knakahar KASSERT(encap_lock_held()); 815 1.76 knakahar KASSERT(ep->flag & IP_ENCAP_ADDR_ENABLE); 816 1.76 knakahar 817 1.76 knakahar switch (ep->af) { 818 1.76 knakahar case AF_INET: 819 1.76 knakahar emap = encap_map[0]; 820 1.76 knakahar break; 821 1.76 knakahar #ifdef INET6 822 1.76 knakahar case AF_INET6: 823 1.76 knakahar emap = encap_map[1]; 824 1.76 knakahar break; 825 1.76 knakahar #endif 826 1.76 knakahar default: 827 1.76 knakahar return EINVAL; 828 1.76 knakahar } 829 1.76 knakahar 830 1.76 knakahar retep = thmap_del(emap, &ep->key, sizeof(ep->key)); 831 1.76 knakahar if (retep != ep) { 832 1.76 knakahar return ENOENT; 833 1.76 knakahar } 834 1.76 knakahar target = retep; 835 1.76 knakahar 836 1.76 knakahar /* 837 1.76 knakahar * To keep continuity, decrement seq after detached encaptab. 838 1.76 knakahar */ 839 1.76 knakahar encap_key_copy(&key, &ep->key); 840 1.76 knakahar encap_key_inc(&key); 841 1.76 knakahar while ((retep = thmap_del(emap, &key, sizeof(key))) != NULL) { 842 1.76 knakahar void *pp; 843 1.76 knakahar 844 1.76 knakahar encap_key_dec(&retep->key); 845 1.76 knakahar pp = thmap_put(emap, &retep->key, sizeof(retep->key), retep); 846 1.76 knakahar KASSERT(retep == pp); 847 1.76 knakahar 848 1.76 knakahar encap_key_inc(&key); 849 1.76 knakahar } 850 1.76 knakahar 851 1.76 knakahar thgc = thmap_stage_gc(emap); 852 1.76 knakahar pserialize_perform(encaptab.psz); 853 1.76 knakahar thmap_gc(emap, thgc); 854 1.76 knakahar psref_target_destroy(&target->psref, encaptab.elem_class); 855 1.76 knakahar kmem_free(target->addrpack, target->addrpack->sa_len); 856 1.76 knakahar kmem_free(target, sizeof(*target)); 857 1.76 knakahar 858 1.76 knakahar return 0; 859 1.76 knakahar } 860 1.76 knakahar 861 1.1 itojun int 862 1.23 perry encap_detach(const struct encaptab *cookie) 863 1.1 itojun { 864 1.1 itojun const struct encaptab *ep = cookie; 865 1.56 knakahar struct encaptab *p; 866 1.7 itojun int error; 867 1.1 itojun 868 1.56 knakahar KASSERT(encap_lock_held()); 869 1.56 knakahar 870 1.76 knakahar if (ep->flag & IP_ENCAP_ADDR_ENABLE) 871 1.76 knakahar return encap_detach_addr(ep); 872 1.76 knakahar 873 1.56 knakahar PSLIST_WRITER_FOREACH(p, &encap_table, struct encaptab, chain) { 874 1.1 itojun if (p == ep) { 875 1.7 itojun error = encap_remove(p); 876 1.7 itojun if (error) 877 1.7 itojun return error; 878 1.56 knakahar else 879 1.56 knakahar break; 880 1.56 knakahar } 881 1.56 knakahar } 882 1.56 knakahar if (p == NULL) 883 1.56 knakahar return ENOENT; 884 1.56 knakahar 885 1.56 knakahar pserialize_perform(encaptab.psz); 886 1.56 knakahar psref_target_destroy(&p->psref, 887 1.56 knakahar encaptab.elem_class); 888 1.56 knakahar kmem_free(p, sizeof(*p)); 889 1.1 itojun 890 1.56 knakahar return 0; 891 1.7 itojun } 892 1.7 itojun 893 1.59 knakahar int 894 1.54 knakahar encap_lock_enter(void) 895 1.54 knakahar { 896 1.59 knakahar int error; 897 1.59 knakahar 898 1.59 knakahar mutex_enter(&encap_whole.lock); 899 1.59 knakahar while (encap_whole.busy != NULL) { 900 1.59 knakahar error = cv_wait_sig(&encap_whole.cv, &encap_whole.lock); 901 1.59 knakahar if (error) { 902 1.59 knakahar mutex_exit(&encap_whole.lock); 903 1.59 knakahar return error; 904 1.59 knakahar } 905 1.59 knakahar } 906 1.59 knakahar KASSERT(encap_whole.busy == NULL); 907 1.59 knakahar encap_whole.busy = curlwp; 908 1.59 knakahar mutex_exit(&encap_whole.lock); 909 1.54 knakahar 910 1.59 knakahar return 0; 911 1.54 knakahar } 912 1.54 knakahar 913 1.54 knakahar void 914 1.54 knakahar encap_lock_exit(void) 915 1.54 knakahar { 916 1.54 knakahar 917 1.59 knakahar mutex_enter(&encap_whole.lock); 918 1.59 knakahar KASSERT(encap_whole.busy == curlwp); 919 1.59 knakahar encap_whole.busy = NULL; 920 1.59 knakahar cv_broadcast(&encap_whole.cv); 921 1.59 knakahar mutex_exit(&encap_whole.lock); 922 1.54 knakahar } 923 1.56 knakahar 924 1.56 knakahar bool 925 1.56 knakahar encap_lock_held(void) 926 1.56 knakahar { 927 1.56 knakahar 928 1.59 knakahar return (encap_whole.busy == curlwp); 929 1.56 knakahar } 930