if_ipsec.c revision 1.9 1 /* $NetBSD: if_ipsec.c,v 1.9 2018/04/06 09:28:26 knakahara Exp $ */
2
3 /*
4 * Copyright (c) 2017 Internet Initiative Japan Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: if_ipsec.c,v 1.9 2018/04/06 09:28:26 knakahara Exp $");
31
32 #ifdef _KERNEL_OPT
33 #include "opt_inet.h"
34 #endif
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/mbuf.h>
40 #include <sys/socket.h>
41 #include <sys/sockio.h>
42 #include <sys/errno.h>
43 #include <sys/ioctl.h>
44 #include <sys/time.h>
45 #include <sys/syslog.h>
46 #include <sys/cpu.h>
47 #include <sys/kmem.h>
48 #include <sys/mutex.h>
49 #include <sys/pserialize.h>
50 #include <sys/psref.h>
51
52 #include <net/if.h>
53 #include <net/if_types.h>
54 #include <net/route.h>
55 #include <net/bpf.h>
56 #include <net/pfkeyv2.h>
57
58 #include <netinet/in.h>
59 #include <netinet/in_systm.h>
60 #include <netinet/ip.h>
61 #ifdef INET
62 #include <netinet/in_var.h>
63 #endif /* INET */
64
65 #ifdef INET6
66 #include <netinet6/in6_var.h>
67 #include <netinet/ip6.h>
68 #include <netinet6/ip6_var.h>
69 #endif /* INET6 */
70
71 #include <netinet/ip_encap.h>
72
73 #include <net/if_ipsec.h>
74
75 #include <net/raw_cb.h>
76 #include <net/pfkeyv2.h>
77
78 #include <netipsec/key.h>
79 #include <netipsec/ipsec.h>
80 #include <netipsec/ipsecif.h>
81
82 static void if_ipsec_ro_init_pc(void *, void *, struct cpu_info *);
83 static void if_ipsec_ro_fini_pc(void *, void *, struct cpu_info *);
84
85 static int if_ipsec_clone_create(struct if_clone *, int);
86 static int if_ipsec_clone_destroy(struct ifnet *);
87
88 static inline int if_ipsec_out_direct(struct ipsec_variant *, struct mbuf *, int);
89 static inline void if_ipsec_in_enqueue(struct mbuf *, int, struct ifnet *);
90
91 static int if_ipsec_encap_attach(struct ipsec_variant *);
92 static int if_ipsec_encap_detach(struct ipsec_variant *);
93 static int if_ipsec_set_tunnel(struct ifnet *,
94 struct sockaddr *, struct sockaddr *);
95 static void if_ipsec_delete_tunnel(struct ifnet *);
96 static int if_ipsec_ensure_flags(struct ifnet *, short);
97 static void if_ipsec_attach0(struct ipsec_softc *);
98
99 static int if_ipsec_update_variant(struct ipsec_softc *,
100 struct ipsec_variant *, struct ipsec_variant *);
101
102 /* sadb_msg */
103 static inline void if_ipsec_add_mbuf(struct mbuf *, void *, size_t);
104 static inline void if_ipsec_add_pad(struct mbuf *, size_t);
105 static inline size_t if_ipsec_set_sadb_addr(struct sadb_address *,
106 struct sockaddr *, int, uint16_t);
107 static inline size_t if_ipsec_set_sadb_src(struct sadb_address *,
108 struct sockaddr *, int);
109 static inline size_t if_ipsec_set_sadb_dst(struct sadb_address *,
110 struct sockaddr *, int);
111 static inline size_t if_ipsec_set_sadb_x_policy(struct sadb_x_policy *,
112 struct sadb_x_ipsecrequest *, uint16_t, uint8_t, uint32_t, uint8_t,
113 struct sockaddr *, struct sockaddr *);
114 static inline void if_ipsec_set_sadb_msg(struct sadb_msg *, uint16_t, uint8_t);
115 static inline void if_ipsec_set_sadb_msg_add(struct sadb_msg *, uint16_t);
116 static inline void if_ipsec_set_sadb_msg_del(struct sadb_msg *, uint16_t);
117 /* SPD */
118 static int if_ipsec_share_sp(struct ipsec_variant *);
119 static int if_ipsec_unshare_sp(struct ipsec_variant *);
120 static inline struct secpolicy *if_ipsec_add_sp0(struct sockaddr *,
121 in_port_t, struct sockaddr *, in_port_t, int, int, int, u_int);
122 static inline int if_ipsec_del_sp0(struct secpolicy *);
123 static int if_ipsec_add_sp(struct ipsec_variant *,
124 struct sockaddr *, in_port_t, struct sockaddr *, in_port_t);
125 static void if_ipsec_del_sp(struct ipsec_variant *);
126 static int if_ipsec_replace_sp(struct ipsec_softc *, struct ipsec_variant *,
127 struct ipsec_variant *);
128
129 static int if_ipsec_set_addr_port(struct sockaddr *, struct sockaddr *,
130 in_port_t);
131 #define IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, target) \
132 if_ipsec_set_addr_port(target, (var)->iv_psrc, (var)->iv_sport)
133 #define IF_IPSEC_GATHER_PDST_ADDR_PORT(var, target) \
134 if_ipsec_set_addr_port(target, (var)->iv_pdst, (var)->iv_dport)
135
136 /*
137 * ipsec global variable definitions
138 */
139
140 /* This list is used in ioctl context only. */
141 LIST_HEAD(ipsec_sclist, ipsec_softc);
142 static struct {
143 struct ipsec_sclist list;
144 kmutex_t lock;
145 } ipsec_softcs __cacheline_aligned;
146
147 pserialize_t ipsec_psz __read_mostly;
148 struct psref_class *iv_psref_class __read_mostly;
149
150 struct if_clone ipsec_cloner =
151 IF_CLONE_INITIALIZER("ipsec", if_ipsec_clone_create, if_ipsec_clone_destroy);
152 static int max_ipsec_nesting = MAX_IPSEC_NEST;
153
154 /* ARGSUSED */
155 void
156 ipsecifattach(int count)
157 {
158
159 mutex_init(&ipsec_softcs.lock, MUTEX_DEFAULT, IPL_NONE);
160 LIST_INIT(&ipsec_softcs.list);
161
162 ipsec_psz = pserialize_create();
163 iv_psref_class = psref_class_create("ipsecvar", IPL_SOFTNET);
164
165 if_clone_attach(&ipsec_cloner);
166 }
167
168 static int
169 if_ipsec_clone_create(struct if_clone *ifc, int unit)
170 {
171 struct ipsec_softc *sc;
172 struct ipsec_variant *var;
173
174 sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
175
176 if_initname(&sc->ipsec_if, ifc->ifc_name, unit);
177
178 if_ipsec_attach0(sc);
179
180 var = kmem_zalloc(sizeof(*var), KM_SLEEP);
181 var->iv_softc = sc;
182 psref_target_init(&var->iv_psref, iv_psref_class);
183
184 sc->ipsec_var = var;
185 mutex_init(&sc->ipsec_lock, MUTEX_DEFAULT, IPL_NONE);
186 sc->ipsec_ro_percpu = percpu_alloc(sizeof(struct ipsec_ro));
187 percpu_foreach(sc->ipsec_ro_percpu, if_ipsec_ro_init_pc, NULL);
188
189 mutex_enter(&ipsec_softcs.lock);
190 LIST_INSERT_HEAD(&ipsec_softcs.list, sc, ipsec_list);
191 mutex_exit(&ipsec_softcs.lock);
192 return 0;
193 }
194
195 static void
196 if_ipsec_attach0(struct ipsec_softc *sc)
197 {
198
199 sc->ipsec_if.if_addrlen = 0;
200 sc->ipsec_if.if_mtu = IPSEC_MTU;
201 sc->ipsec_if.if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
202 /* set ipsec(4) specific default flags. */
203 sc->ipsec_if.if_flags |= IFF_FWD_IPV6;
204 sc->ipsec_if.if_extflags = IFEF_NO_LINK_STATE_CHANGE | IFEF_MPSAFE;
205 sc->ipsec_if.if_ioctl = if_ipsec_ioctl;
206 sc->ipsec_if.if_output = if_ipsec_output;
207 sc->ipsec_if.if_type = IFT_IPSEC;
208 sc->ipsec_if.if_dlt = DLT_NULL;
209 sc->ipsec_if.if_softc = sc;
210 IFQ_SET_READY(&sc->ipsec_if.if_snd);
211 if_initialize(&sc->ipsec_if);
212 if_alloc_sadl(&sc->ipsec_if);
213 bpf_attach(&sc->ipsec_if, DLT_NULL, sizeof(u_int));
214 if_register(&sc->ipsec_if);
215 }
216
217 static void
218 if_ipsec_ro_init_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
219 {
220 struct ipsec_ro *iro = p;
221
222 mutex_init(&iro->ir_lock, MUTEX_DEFAULT, IPL_NONE);
223 }
224
225 static void
226 if_ipsec_ro_fini_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
227 {
228 struct ipsec_ro *iro = p;
229
230 rtcache_free(&iro->ir_ro);
231
232 mutex_destroy(&iro->ir_lock);
233 }
234
235 static int
236 if_ipsec_clone_destroy(struct ifnet *ifp)
237 {
238 struct ipsec_softc *sc = ifp->if_softc;
239 struct ipsec_variant *var;
240 int bound;
241
242 mutex_enter(&ipsec_softcs.lock);
243 LIST_REMOVE(sc, ipsec_list);
244 mutex_exit(&ipsec_softcs.lock);
245
246 bound = curlwp_bind();
247 if_ipsec_delete_tunnel(&sc->ipsec_if);
248 curlwp_bindx(bound);
249
250 bpf_detach(ifp);
251 if_detach(ifp);
252
253 percpu_foreach(sc->ipsec_ro_percpu, if_ipsec_ro_fini_pc, NULL);
254 percpu_free(sc->ipsec_ro_percpu, sizeof(struct ipsec_ro));
255
256 mutex_destroy(&sc->ipsec_lock);
257
258 var = sc->ipsec_var;
259 kmem_free(var, sizeof(*var));
260 kmem_free(sc, sizeof(*sc));
261
262 return 0;
263 }
264
265 static inline bool
266 if_ipsec_nat_t(struct ipsec_softc *sc)
267 {
268
269 return (sc->ipsec_if.if_flags & IFF_NAT_T) != 0;
270 }
271
272 static inline bool
273 if_ipsec_fwd_ipv6(struct ipsec_softc *sc)
274 {
275
276 return (sc->ipsec_if.if_flags & IFF_FWD_IPV6) != 0;
277 }
278
279 int
280 if_ipsec_encap_func(struct mbuf *m, int off, int proto, void *arg)
281 {
282 struct ip ip;
283 struct ipsec_softc *sc;
284 struct ipsec_variant *var = NULL;
285 struct psref psref;
286 int ret = 0;
287
288 sc = arg;
289 KASSERT(sc != NULL);
290
291 if ((sc->ipsec_if.if_flags & IFF_UP) == 0)
292 goto out;
293
294 var = if_ipsec_getref_variant(sc, &psref);
295 if (if_ipsec_variant_is_unconfigured(var))
296 goto out;
297
298 switch (proto) {
299 case IPPROTO_IPV4:
300 case IPPROTO_IPV6:
301 break;
302 default:
303 goto out;
304 }
305
306 if (m->m_pkthdr.len < sizeof(ip))
307 goto out;
308
309 m_copydata(m, 0, sizeof(ip), &ip);
310 switch (ip.ip_v) {
311 #ifdef INET
312 case IPVERSION:
313 if (var->iv_psrc->sa_family != AF_INET ||
314 var->iv_pdst->sa_family != AF_INET)
315 goto out;
316 ret = ipsecif4_encap_func(m, &ip, var);
317 break;
318 #endif
319 default:
320 goto out;
321 }
322
323 out:
324 if (var != NULL)
325 if_ipsec_putref_variant(var, &psref);
326 return ret;
327 }
328
329 /*
330 * ipsec(4) I/F may cause infinite recursion calls when misconfigured.
331 * We'll prevent this by introducing upper limit.
332 */
333 static int
334 if_ipsec_check_nesting(struct ifnet *ifp, struct mbuf *m)
335 {
336
337 return if_tunnel_check_nesting(ifp, m, max_ipsec_nesting);
338 }
339
340 int
341 if_ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
342 const struct rtentry *rt)
343 {
344 struct ipsec_softc *sc = ifp->if_softc;
345 struct ipsec_variant *var;
346 struct psref psref;
347 int error;
348 int bound;
349
350 IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);
351
352 error = if_ipsec_check_nesting(ifp, m);
353 if (error) {
354 m_freem(m);
355 goto noref_end;
356 }
357
358 if ((ifp->if_flags & IFF_UP) == 0) {
359 m_freem(m);
360 error = ENETDOWN;
361 goto noref_end;
362 }
363
364
365 bound = curlwp_bind();
366 var = if_ipsec_getref_variant(sc, &psref);
367 if (if_ipsec_variant_is_unconfigured(var)) {
368 m_freem(m);
369 error = ENETDOWN;
370 goto end;
371 }
372
373 m->m_flags &= ~(M_BCAST|M_MCAST);
374
375 /* use DLT_NULL encapsulation here to pass inner af type */
376 M_PREPEND(m, sizeof(int), M_DONTWAIT);
377 if (!m) {
378 error = ENOBUFS;
379 goto end;
380 }
381 *mtod(m, int *) = dst->sa_family;
382
383 #if INET6
384 /* drop IPv6 packet if IFF_FWD_IPV6 is not set */
385 if (dst->sa_family == AF_INET6 &&
386 !if_ipsec_fwd_ipv6(sc)) {
387 /*
388 * IPv6 packet is not allowed to forward,that is not error.
389 */
390 error = 0;
391 IF_DROP(&ifp->if_snd);
392 m_freem(m);
393 goto end;
394 }
395 #endif
396
397 error = if_ipsec_out_direct(var, m, dst->sa_family);
398
399 end:
400 if_ipsec_putref_variant(var, &psref);
401 curlwp_bindx(bound);
402 noref_end:
403 if (error)
404 ifp->if_oerrors++;
405
406 return error;
407 }
408
409 static inline int
410 if_ipsec_out_direct(struct ipsec_variant *var, struct mbuf *m, int family)
411 {
412 struct ifnet *ifp = &var->iv_softc->ipsec_if;
413 int error;
414 int len;
415
416 KASSERT(if_ipsec_heldref_variant(var));
417 KASSERT(var->iv_output != NULL);
418
419 len = m->m_pkthdr.len;
420
421 /* input DLT_NULL frame to BPF */
422 bpf_mtap(ifp, m);
423
424 /* grab and chop off inner af type */
425 /* XXX need pullup? */
426 m_adj(m, sizeof(int));
427
428 error = var->iv_output(var, family, m);
429 if (error)
430 return error;
431
432 ifp->if_opackets++;
433 ifp->if_obytes += len;
434
435 return 0;
436 }
437
438 void
439 if_ipsec_input(struct mbuf *m, int af, struct ifnet *ifp)
440 {
441
442 KASSERT(ifp != NULL);
443
444 m_set_rcvif(m, ifp);
445
446 bpf_mtap_af(ifp, af, m);
447
448 if_ipsec_in_enqueue(m, af, ifp);
449
450 return;
451 }
452
453 static inline void
454 if_ipsec_in_enqueue(struct mbuf *m, int af, struct ifnet *ifp)
455 {
456 pktqueue_t *pktq;
457 int pktlen;
458
459 /*
460 * Put the packet to the network layer input queue according to the
461 * specified address family.
462 */
463 switch (af) {
464 #ifdef INET
465 case AF_INET:
466 pktq = ip_pktq;
467 break;
468 #endif
469 #ifdef INET6
470 case AF_INET6:
471 pktq = ip6_pktq;
472 break;
473 #endif
474 default:
475 ifp->if_ierrors++;
476 m_freem(m);
477 return;
478 }
479
480 #if 1
481 const u_int h = curcpu()->ci_index;
482 #else
483 const uint32_t h = pktq_rps_hash(m);
484 #endif
485 pktlen = m->m_pkthdr.len;
486 if (__predict_true(pktq_enqueue(pktq, m, h))) {
487 ifp->if_ibytes += pktlen;
488 ifp->if_ipackets++;
489 } else {
490 m_freem(m);
491 }
492
493 return;
494 }
495
496 static inline int
497 if_ipsec_check_salen(struct sockaddr *addr)
498 {
499
500 switch (addr->sa_family) {
501 #ifdef INET
502 case AF_INET:
503 if (addr->sa_len != sizeof(struct sockaddr_in))
504 return EINVAL;
505 break;
506 #endif /* INET */
507 #ifdef INET6
508 case AF_INET6:
509 if (addr->sa_len != sizeof(struct sockaddr_in6))
510 return EINVAL;
511 break;
512 #endif /* INET6 */
513 default:
514 return EAFNOSUPPORT;
515 }
516
517 return 0;
518 }
519
520 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
521 int
522 if_ipsec_ioctl(struct ifnet *ifp, u_long cmd, void *data)
523 {
524 struct ipsec_softc *sc = ifp->if_softc;
525 struct ipsec_variant *var = NULL;
526 struct ifreq *ifr = (struct ifreq*)data;
527 struct ifaddr *ifa = (struct ifaddr*)data;
528 int error = 0, size;
529 struct sockaddr *dst, *src;
530 u_long mtu;
531 short oflags = ifp->if_flags;
532 int bound;
533 struct psref psref;
534
535 switch (cmd) {
536 case SIOCINITIFADDR:
537 ifp->if_flags |= IFF_UP;
538 ifa->ifa_rtrequest = p2p_rtrequest;
539 break;
540
541 case SIOCSIFDSTADDR:
542 break;
543
544 case SIOCADDMULTI:
545 case SIOCDELMULTI:
546 switch (ifr->ifr_addr.sa_family) {
547 #ifdef INET
548 case AF_INET: /* IP supports Multicast */
549 break;
550 #endif /* INET */
551 #ifdef INET6
552 case AF_INET6: /* IP6 supports Multicast */
553 break;
554 #endif /* INET6 */
555 default: /* Other protocols doesn't support Multicast */
556 error = EAFNOSUPPORT;
557 break;
558 }
559 break;
560
561 case SIOCSIFMTU:
562 mtu = ifr->ifr_mtu;
563 if (mtu < IPSEC_MTU_MIN || mtu > IPSEC_MTU_MAX)
564 return EINVAL;
565 else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
566 error = 0;
567 break;
568
569 #ifdef INET
570 case SIOCSIFPHYADDR:
571 #endif
572 #ifdef INET6
573 case SIOCSIFPHYADDR_IN6:
574 #endif /* INET6 */
575 case SIOCSLIFPHYADDR:
576 switch (cmd) {
577 #ifdef INET
578 case SIOCSIFPHYADDR:
579 src = (struct sockaddr *)
580 &(((struct in_aliasreq *)data)->ifra_addr);
581 dst = (struct sockaddr *)
582 &(((struct in_aliasreq *)data)->ifra_dstaddr);
583 break;
584 #endif /* INET */
585 #ifdef INET6
586 case SIOCSIFPHYADDR_IN6:
587 src = (struct sockaddr *)
588 &(((struct in6_aliasreq *)data)->ifra_addr);
589 dst = (struct sockaddr *)
590 &(((struct in6_aliasreq *)data)->ifra_dstaddr);
591 break;
592 #endif /* INET6 */
593 case SIOCSLIFPHYADDR:
594 src = (struct sockaddr *)
595 &(((struct if_laddrreq *)data)->addr);
596 dst = (struct sockaddr *)
597 &(((struct if_laddrreq *)data)->dstaddr);
598 break;
599 default:
600 return EINVAL;
601 }
602
603 /* sa_family must be equal */
604 if (src->sa_family != dst->sa_family)
605 return EINVAL;
606
607 error = if_ipsec_check_salen(src);
608 if (error)
609 return error;
610 error = if_ipsec_check_salen(dst);
611 if (error)
612 return error;
613
614 /* check sa_family looks sane for the cmd */
615 switch (cmd) {
616 #ifdef INET
617 case SIOCSIFPHYADDR:
618 if (src->sa_family == AF_INET)
619 break;
620 return EAFNOSUPPORT;
621 #endif /* INET */
622 #ifdef INET6
623 case SIOCSIFPHYADDR_IN6:
624 if (src->sa_family == AF_INET6)
625 break;
626 return EAFNOSUPPORT;
627 #endif /* INET6 */
628 case SIOCSLIFPHYADDR:
629 /* checks done in the above */
630 break;
631 }
632 /*
633 * calls if_ipsec_getref_variant() for other softcs to check
634 * address pair duplicattion
635 */
636 bound = curlwp_bind();
637 error = if_ipsec_set_tunnel(&sc->ipsec_if, src, dst);
638 if (error)
639 goto bad;
640 curlwp_bindx(bound);
641 break;
642
643 case SIOCDIFPHYADDR:
644 bound = curlwp_bind();
645 if_ipsec_delete_tunnel(&sc->ipsec_if);
646 curlwp_bindx(bound);
647 break;
648
649 case SIOCGIFPSRCADDR:
650 #ifdef INET6
651 case SIOCGIFPSRCADDR_IN6:
652 #endif /* INET6 */
653 bound = curlwp_bind();
654 var = if_ipsec_getref_variant(sc, &psref);
655 if (var->iv_psrc == NULL) {
656 error = EADDRNOTAVAIL;
657 goto bad;
658 }
659 src = var->iv_psrc;
660 switch (cmd) {
661 #ifdef INET
662 case SIOCGIFPSRCADDR:
663 dst = &ifr->ifr_addr;
664 size = sizeof(ifr->ifr_addr);
665 break;
666 #endif /* INET */
667 #ifdef INET6
668 case SIOCGIFPSRCADDR_IN6:
669 dst = (struct sockaddr *)
670 &(((struct in6_ifreq *)data)->ifr_addr);
671 size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
672 break;
673 #endif /* INET6 */
674 default:
675 error = EADDRNOTAVAIL;
676 goto bad;
677 }
678 if (src->sa_len > size) {
679 error = EINVAL;
680 goto bad;
681 }
682 error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
683 if (error)
684 goto bad;
685 if_ipsec_putref_variant(var, &psref);
686 curlwp_bindx(bound);
687 break;
688
689 case SIOCGIFPDSTADDR:
690 #ifdef INET6
691 case SIOCGIFPDSTADDR_IN6:
692 #endif /* INET6 */
693 bound = curlwp_bind();
694 var = if_ipsec_getref_variant(sc, &psref);
695 if (var->iv_pdst == NULL) {
696 error = EADDRNOTAVAIL;
697 goto bad;
698 }
699 src = var->iv_pdst;
700 switch (cmd) {
701 #ifdef INET
702 case SIOCGIFPDSTADDR:
703 dst = &ifr->ifr_addr;
704 size = sizeof(ifr->ifr_addr);
705 break;
706 #endif /* INET */
707 #ifdef INET6
708 case SIOCGIFPDSTADDR_IN6:
709 dst = (struct sockaddr *)
710 &(((struct in6_ifreq *)data)->ifr_addr);
711 size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
712 break;
713 #endif /* INET6 */
714 default:
715 error = EADDRNOTAVAIL;
716 goto bad;
717 }
718 if (src->sa_len > size) {
719 error = EINVAL;
720 goto bad;
721 }
722 error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
723 if (error)
724 goto bad;
725 if_ipsec_putref_variant(var, &psref);
726 curlwp_bindx(bound);
727 break;
728
729 case SIOCGLIFPHYADDR:
730 bound = curlwp_bind();
731 var = if_ipsec_getref_variant(sc, &psref);
732 if (if_ipsec_variant_is_unconfigured(var)) {
733 error = EADDRNOTAVAIL;
734 goto bad;
735 }
736
737 /* copy src */
738 src = var->iv_psrc;
739 dst = (struct sockaddr *)
740 &(((struct if_laddrreq *)data)->addr);
741 size = sizeof(((struct if_laddrreq *)data)->addr);
742 if (src->sa_len > size) {
743 error = EINVAL;
744 goto bad;
745 }
746 error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
747 if (error)
748 goto bad;
749
750 /* copy dst */
751 src = var->iv_pdst;
752 dst = (struct sockaddr *)
753 &(((struct if_laddrreq *)data)->dstaddr);
754 size = sizeof(((struct if_laddrreq *)data)->dstaddr);
755 if (src->sa_len > size) {
756 error = EINVAL;
757 goto bad;
758 }
759 error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
760 if (error)
761 goto bad;
762 if_ipsec_putref_variant(var, &psref);
763 curlwp_bindx(bound);
764 break;
765
766 default:
767 error = ifioctl_common(ifp, cmd, data);
768 if (!error) {
769 bound = curlwp_bind();
770 error = if_ipsec_ensure_flags(&sc->ipsec_if, oflags);
771 if (error)
772 goto bad;
773 curlwp_bindx(bound);
774 }
775 break;
776 }
777 return error;
778
779 bad:
780 if (var != NULL)
781 if_ipsec_putref_variant(var, &psref);
782 curlwp_bindx(bound);
783
784 return error;
785 }
786
787 struct encap_funcs {
788 #ifdef INET
789 int (*ef_inet)(struct ipsec_variant *);
790 #endif
791 #ifdef INET6
792 int (*ef_inet6)(struct ipsec_variant *);
793 #endif
794 };
795
796 static struct encap_funcs ipsec_encap_attach = {
797 #ifdef INET
798 .ef_inet = ipsecif4_attach,
799 #endif
800 #ifdef INET6
801 .ef_inet6 = &ipsecif6_attach,
802 #endif
803 };
804
805 static struct encap_funcs ipsec_encap_detach = {
806 #ifdef INET
807 .ef_inet = ipsecif4_detach,
808 #endif
809 #ifdef INET6
810 .ef_inet6 = &ipsecif6_detach,
811 #endif
812 };
813
814 static int
815 if_ipsec_encap_common(struct ipsec_variant *var, struct encap_funcs *funcs)
816 {
817 int error;
818
819 KASSERT(var != NULL);
820 KASSERT(if_ipsec_variant_is_configured(var));
821
822 switch (var->iv_psrc->sa_family) {
823 #ifdef INET
824 case AF_INET:
825 error = (funcs->ef_inet)(var);
826 break;
827 #endif /* INET */
828 #ifdef INET6
829 case AF_INET6:
830 error = (funcs->ef_inet6)(var);
831 break;
832 #endif /* INET6 */
833 default:
834 error = EINVAL;
835 break;
836 }
837
838 return error;
839 }
840
841 static int
842 if_ipsec_encap_attach(struct ipsec_variant *var)
843 {
844
845 return if_ipsec_encap_common(var, &ipsec_encap_attach);
846 }
847
848 static int
849 if_ipsec_encap_detach(struct ipsec_variant *var)
850 {
851
852 return if_ipsec_encap_common(var, &ipsec_encap_detach);
853 }
854
855 /*
856 * Validate and set ipsec(4) I/F configurations.
857 * (1) validate
858 * (1-1) Check the argument src and dst address pair will change
859 * configuration from current src and dst address pair.
860 * (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
861 * with argument src and dst address pair, except for NAT-T shared
862 * tunnels.
863 * (2) set
864 * (2-1) Create variant for new configuration.
865 * (2-2) Create temporary "null" variant used to avoid to access
866 * dangling variant while SPs are deleted and added.
867 * (2-3) Swap variant include its SPs.
868 * (2-4) Cleanup last configurations.
869 */
870 static int
871 if_ipsec_set_tunnel(struct ifnet *ifp,
872 struct sockaddr *src, struct sockaddr *dst)
873 {
874 struct ipsec_softc *sc = ifp->if_softc;
875 struct ipsec_softc *sc2;
876 struct ipsec_variant *ovar, *nvar, *nullvar;
877 struct sockaddr *osrc, *odst;
878 struct sockaddr *nsrc, *ndst;
879 in_port_t nsport = 0, ndport = 0;
880 int error;
881
882 error = encap_lock_enter();
883 if (error)
884 return error;
885
886 nsrc = sockaddr_dup(src, M_WAITOK);
887 ndst = sockaddr_dup(dst, M_WAITOK);
888 nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
889 nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
890
891 mutex_enter(&sc->ipsec_lock);
892
893 ovar = sc->ipsec_var;
894
895 switch(nsrc->sa_family) {
896 #ifdef INET
897 case AF_INET:
898 nsport = satosin(src)->sin_port;
899 /*
900 * avoid confuse SP when NAT-T disabled,
901 * e.g.
902 * expected: 10.0.1.2[any] 10.0.1.1[any] 4(ipv4)
903 * confuse : 10.0.1.2[600] 10.0.1.1[600] 4(ipv4)
904 */
905 satosin(nsrc)->sin_port = 0;
906 ndport = satosin(dst)->sin_port;
907 satosin(ndst)->sin_port = 0;
908 break;
909 #endif /* INET */
910 #ifdef INET6
911 case AF_INET6:
912 nsport = satosin6(src)->sin6_port;
913 satosin6(nsrc)->sin6_port = 0;
914 ndport = satosin6(dst)->sin6_port;
915 satosin6(ndst)->sin6_port = 0;
916 break;
917 #endif /* INET6 */
918 default:
919 log(LOG_DEBUG,
920 "%s: Invalid address family: %d.\n",
921 __func__, src->sa_family);
922 error = EINVAL;
923 goto out;
924 }
925
926 /*
927 * (1-1) Check the argument src and dst address pair will change
928 * configuration from current src and dst address pair.
929 */
930 if ((ovar->iv_pdst && sockaddr_cmp(ovar->iv_pdst, dst) == 0) &&
931 (ovar->iv_psrc && sockaddr_cmp(ovar->iv_psrc, src) == 0) &&
932 (ovar->iv_sport == nsport && ovar->iv_dport == ndport)) {
933 /* address and port pair not changed. */
934 error = 0;
935 goto out;
936 }
937
938 /*
939 * (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
940 * with argument src and dst address pair, except for NAT-T shared
941 * tunnels.
942 */
943 mutex_enter(&ipsec_softcs.lock);
944 LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
945 struct ipsec_variant *var2;
946 struct psref psref;
947
948 if (sc2 == sc)
949 continue;
950 var2 = if_ipsec_getref_variant(sc2, &psref);
951 if (if_ipsec_variant_is_unconfigured(var2)) {
952 if_ipsec_putref_variant(var2, &psref);
953 continue;
954 }
955 if (if_ipsec_nat_t(sc) || if_ipsec_nat_t(sc2)) {
956 if_ipsec_putref_variant(var2, &psref);
957 continue; /* NAT-T shared tunnel */
958 }
959 if (sockaddr_cmp(var2->iv_pdst, dst) == 0 &&
960 sockaddr_cmp(var2->iv_psrc, src) == 0) {
961 if_ipsec_putref_variant(var2, &psref);
962 mutex_exit(&ipsec_softcs.lock);
963 error = EADDRNOTAVAIL;
964 goto out;
965 }
966
967 if_ipsec_putref_variant(var2, &psref);
968 /* XXX both end must be valid? (I mean, not 0.0.0.0) */
969 }
970 mutex_exit(&ipsec_softcs.lock);
971
972
973 osrc = ovar->iv_psrc;
974 odst = ovar->iv_pdst;
975
976 /*
977 * (2-1) Create ipsec_variant for new configuration.
978 */
979 if_ipsec_copy_variant(nvar, ovar);
980 nvar->iv_psrc = nsrc;
981 nvar->iv_pdst = ndst;
982 nvar->iv_sport = nsport;
983 nvar->iv_dport = ndport;
984 nvar->iv_encap_cookie4 = NULL;
985 nvar->iv_encap_cookie6 = NULL;
986 psref_target_init(&nvar->iv_psref, iv_psref_class);
987 error = if_ipsec_encap_attach(nvar);
988 if (error)
989 goto out;
990
991 /*
992 * (2-2) Create temporary "null" variant.
993 */
994 if_ipsec_copy_variant(nullvar, ovar);
995 if_ipsec_clear_config(nullvar);
996 psref_target_init(&nullvar->iv_psref, iv_psref_class);
997 membar_producer();
998 /*
999 * (2-3) Swap variant include its SPs.
1000 */
1001 error = if_ipsec_update_variant(sc, nvar, nullvar);
1002 if (error) {
1003 if_ipsec_encap_detach(nvar);
1004 goto out;
1005 }
1006
1007 mutex_exit(&sc->ipsec_lock);
1008
1009 /*
1010 * (2-4) Cleanup last configurations.
1011 */
1012 if (if_ipsec_variant_is_configured(ovar))
1013 if_ipsec_encap_detach(ovar);
1014 encap_lock_exit();
1015
1016 if (osrc != NULL)
1017 sockaddr_free(osrc);
1018 if (odst != NULL)
1019 sockaddr_free(odst);
1020 kmem_free(ovar, sizeof(*ovar));
1021 kmem_free(nullvar, sizeof(*nullvar));
1022
1023 return 0;
1024
1025 out:
1026 mutex_exit(&sc->ipsec_lock);
1027 encap_lock_exit();
1028
1029 sockaddr_free(nsrc);
1030 sockaddr_free(ndst);
1031 kmem_free(nvar, sizeof(*nvar));
1032 kmem_free(nullvar, sizeof(*nullvar));
1033
1034 return error;
1035 }
1036
1037 /*
1038 * Validate and delete ipsec(4) I/F configurations.
1039 * (1) validate
1040 * (1-1) Check current src and dst address pair are null,
1041 * which means the ipsec(4) I/F is already done deletetunnel.
1042 * (2) delete
1043 * (2-1) Create variant for deleted status.
1044 * (2-2) Create temporary "null" variant used to avoid to access
1045 * dangling variant while SPs are deleted and added.
1046 * NOTE:
1047 * The contents of temporary "null" variant equal to the variant
1048 * of (2-1), however two psref_target_destroy() synchronization
1049 * points are necessary to avoid to access dangling variant
1050 * while SPs are deleted and added. To implement that simply,
1051 * we use the same manner as if_ipsec_set_tunnel(), that is,
1052 * create extra "null" variant and use it temporarily.
1053 * (2-3) Swap variant include its SPs.
1054 * (2-4) Cleanup last configurations.
1055 */
1056 static void
1057 if_ipsec_delete_tunnel(struct ifnet *ifp)
1058 {
1059 struct ipsec_softc *sc = ifp->if_softc;
1060 struct ipsec_variant *ovar, *nvar, *nullvar;
1061 struct sockaddr *osrc, *odst;
1062 int error;
1063
1064 error = encap_lock_enter();
1065 if (error)
1066 return;
1067
1068 nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1069 nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1070
1071 mutex_enter(&sc->ipsec_lock);
1072
1073 ovar = sc->ipsec_var;
1074 osrc = ovar->iv_psrc;
1075 odst = ovar->iv_pdst;
1076 /*
1077 * (1-1) Check current src and dst address pair are null,
1078 * which means the ipsec(4) I/F is already done deletetunnel.
1079 */
1080 if (osrc == NULL || odst == NULL) {
1081 /* address pair not changed. */
1082 mutex_exit(&sc->ipsec_lock);
1083 encap_lock_exit();
1084 kmem_free(nvar, sizeof(*nvar));
1085 return;
1086 }
1087
1088 /*
1089 * (2-1) Create variant for deleted status.
1090 */
1091 if_ipsec_copy_variant(nvar, ovar);
1092 if_ipsec_clear_config(nvar);
1093 psref_target_init(&nvar->iv_psref, iv_psref_class);
1094
1095 /*
1096 * (2-2) Create temporary "null" variant used to avoid to access
1097 * dangling variant while SPs are deleted and added.
1098 */
1099 if_ipsec_copy_variant(nullvar, ovar);
1100 if_ipsec_clear_config(nullvar);
1101 psref_target_init(&nullvar->iv_psref, iv_psref_class);
1102 membar_producer();
1103 /*
1104 * (2-3) Swap variant include its SPs.
1105 */
1106 /* if_ipsec_update_variant() does not fail when delete SP only. */
1107 (void)if_ipsec_update_variant(sc, nvar, nullvar);
1108
1109 mutex_exit(&sc->ipsec_lock);
1110
1111 /*
1112 * (2-4) Cleanup last configurations.
1113 */
1114 if (if_ipsec_variant_is_configured(ovar))
1115 if_ipsec_encap_detach(ovar);
1116 encap_lock_exit();
1117
1118 sockaddr_free(osrc);
1119 sockaddr_free(odst);
1120 kmem_free(ovar, sizeof(*ovar));
1121 kmem_free(nullvar, sizeof(*nullvar));
1122 }
1123
1124 /*
1125 * Check IFF_NAT_T and IFF_FWD_IPV6 flags, therefore update SPs if needed.
1126 * (1) check
1127 * (1-1) Check flags are changed.
1128 * (1-2) Check current src and dst address pair. If they are null,
1129 * that means the ipsec(4) I/F is deletetunnel'ed, so it is
1130 * not needed to update.
1131 * (2) update
1132 * (2-1) Create variant for new SPs.
1133 * (2-2) Create temporary "null" variant used to avoid to access
1134 * dangling variant while SPs are deleted and added.
1135 * NOTE:
1136 * There is the same problem as if_ipsec_delete_tunnel().
1137 * (2-3) Swap variant include its SPs.
1138 * (2-4) Cleanup unused configurations.
1139 * NOTE: use the same encap_cookies.
1140 */
1141 static int
1142 if_ipsec_ensure_flags(struct ifnet *ifp, short oflags)
1143 {
1144 struct ipsec_softc *sc = ifp->if_softc;
1145 struct ipsec_variant *ovar, *nvar, *nullvar;
1146 int error;
1147
1148 /*
1149 * (1) Check flags are changed.
1150 */
1151 if ((oflags & (IFF_NAT_T|IFF_FWD_IPV6)) ==
1152 (ifp->if_flags & (IFF_NAT_T|IFF_FWD_IPV6)))
1153 return 0; /* flags not changed. */
1154
1155 error = encap_lock_enter();
1156 if (error)
1157 return error;
1158
1159 nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1160 nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1161
1162 mutex_enter(&sc->ipsec_lock);
1163
1164 ovar = sc->ipsec_var;
1165 /*
1166 * (1-2) Check current src and dst address pair.
1167 */
1168 if (if_ipsec_variant_is_unconfigured(ovar)) {
1169 /* nothing to do */
1170 mutex_exit(&sc->ipsec_lock);
1171 encap_lock_exit();
1172 return 0;
1173 }
1174
1175 /*
1176 * (2-1) Create variant for new SPs.
1177 */
1178 if_ipsec_copy_variant(nvar, ovar);
1179 psref_target_init(&nvar->iv_psref, iv_psref_class);
1180 /*
1181 * (2-2) Create temporary "null" variant used to avoid to access
1182 * dangling variant while SPs are deleted and added.
1183 */
1184 if_ipsec_copy_variant(nullvar, ovar);
1185 if_ipsec_clear_config(nullvar);
1186 psref_target_init(&nullvar->iv_psref, iv_psref_class);
1187 membar_producer();
1188 /*
1189 * (2-3) Swap variant include its SPs.
1190 */
1191 error = if_ipsec_update_variant(sc, nvar, nullvar);
1192
1193 mutex_exit(&sc->ipsec_lock);
1194 encap_lock_exit();
1195
1196 /*
1197 * (2-4) Cleanup unused configurations.
1198 */
1199 if (!error)
1200 kmem_free(ovar, sizeof(*ovar));
1201 else
1202 kmem_free(nvar, sizeof(*ovar));
1203 kmem_free(nullvar, sizeof(*nullvar));
1204
1205 return error;
1206 }
1207
1208 /*
1209 * SPD management
1210 */
1211
1212 /*
1213 * Share SP set with other NAT-T ipsec(4) I/F(s).
1214 * Return 1, when "var" shares SP set.
1215 * Return 0, when "var" cannot share SP set.
1216 *
1217 * NOTE:
1218 * if_ipsec_share_sp() and if_ipsec_unshare_sp() would require global lock
1219 * to exclude other ipsec(4) I/Fs set_tunnel/delete_tunnel. E.g. when ipsec0
1220 * and ipsec1 can share SP set, running ipsec0's set_tunnel and ipsec1's
1221 * set_tunnel causes race.
1222 * Currently, (fortunately) encap_lock works as this global lock.
1223 */
1224 static int
1225 if_ipsec_share_sp(struct ipsec_variant *var)
1226 {
1227 struct ipsec_softc *sc = var->iv_softc;
1228 struct ipsec_softc *sc2;
1229 struct ipsec_variant *var2;
1230 struct psref psref;
1231
1232 KASSERT(encap_lock_held());
1233 KASSERT(var->iv_psrc != NULL && var->iv_pdst != NULL);
1234
1235 mutex_enter(&ipsec_softcs.lock);
1236 LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1237 if (sc2 == sc)
1238 continue;
1239 var2 = if_ipsec_getref_variant(sc2, &psref);
1240 if (if_ipsec_variant_is_unconfigured(var2)) {
1241 if_ipsec_putref_variant(var2, &psref);
1242 continue;
1243 }
1244 if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
1245 sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
1246 if_ipsec_putref_variant(var2, &psref);
1247 continue;
1248 }
1249
1250 break;
1251 }
1252 mutex_exit(&ipsec_softcs.lock);
1253 if (sc2 == NULL)
1254 return 0; /* not shared */
1255
1256 IV_SP_IN(var) = IV_SP_IN(var2);
1257 IV_SP_IN6(var) = IV_SP_IN6(var2);
1258 IV_SP_OUT(var) = IV_SP_OUT(var2);
1259 IV_SP_OUT6(var) = IV_SP_OUT6(var2);
1260
1261 if_ipsec_putref_variant(var2, &psref);
1262 return 1; /* shared */
1263 }
1264
1265 /*
1266 * Unshare SP set with other NAT-T ipsec(4) I/F(s).
1267 * Return 1, when "var" shared SP set, and then unshare them.
1268 * Return 0, when "var" did not share SP set.
1269 *
1270 * NOTE:
1271 * See if_ipsec_share_sp()'s note.
1272 */
1273 static int
1274 if_ipsec_unshare_sp(struct ipsec_variant *var)
1275 {
1276 struct ipsec_softc *sc = var->iv_softc;
1277 struct ipsec_softc *sc2;
1278 struct ipsec_variant *var2;
1279 struct psref psref;
1280
1281 KASSERT(encap_lock_held());
1282
1283 if (!var->iv_pdst || !var->iv_psrc)
1284 return 0;
1285
1286 mutex_enter(&ipsec_softcs.lock);
1287 LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1288 if (sc2 == sc)
1289 continue;
1290 var2 = if_ipsec_getref_variant(sc2, &psref);
1291 if (!var2->iv_pdst || !var2->iv_psrc) {
1292 if_ipsec_putref_variant(var2, &psref);
1293 continue;
1294 }
1295 if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
1296 sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
1297 if_ipsec_putref_variant(var2, &psref);
1298 continue;
1299 }
1300
1301 break;
1302 }
1303 mutex_exit(&ipsec_softcs.lock);
1304 if (sc2 == NULL)
1305 return 0; /* not shared */
1306
1307 IV_SP_IN(var) = NULL;
1308 IV_SP_IN6(var) = NULL;
1309 IV_SP_OUT(var) = NULL;
1310 IV_SP_OUT6(var) = NULL;
1311 if_ipsec_putref_variant(var2, &psref);
1312 return 1; /* shared */
1313 }
1314
1315 static inline void
1316 if_ipsec_add_mbuf_optalign(struct mbuf *m0, void *data, size_t len, bool align)
1317 {
1318 struct mbuf *m;
1319
1320 MGET(m, M_WAITOK | M_ZERO, MT_DATA);
1321 if (align)
1322 m->m_len = PFKEY_ALIGN8(len);
1323 else
1324 m->m_len = len;
1325 m_copyback(m, 0, len, data);
1326 m_cat(m0, m);
1327 }
1328
1329 static inline void
1330 if_ipsec_add_mbuf(struct mbuf *m0, void *data, size_t len)
1331 {
1332
1333 if_ipsec_add_mbuf_optalign(m0, data, len, true);
1334 }
1335
1336 static inline void
1337 if_ipsec_add_mbuf_addr_port(struct mbuf *m0, struct sockaddr *addr, in_port_t port, bool align)
1338 {
1339
1340 if (port == 0) {
1341 if_ipsec_add_mbuf_optalign(m0, addr, addr->sa_len, align);
1342 } else {
1343 struct sockaddr addrport;
1344
1345 if_ipsec_set_addr_port(&addrport, addr, port);
1346 if_ipsec_add_mbuf_optalign(m0, &addrport, addrport.sa_len, align);
1347 }
1348 }
1349
1350 static inline void
1351 if_ipsec_add_pad(struct mbuf *m0, size_t len)
1352 {
1353 struct mbuf *m;
1354
1355 if (len == 0)
1356 return;
1357
1358 MGET(m, M_WAITOK | M_ZERO, MT_DATA);
1359 m->m_len = len;
1360 m_cat(m0, m);
1361 }
1362
1363 static inline size_t
1364 if_ipsec_set_sadb_addr(struct sadb_address *saaddr, struct sockaddr *addr,
1365 int proto, uint16_t exttype)
1366 {
1367 size_t size;
1368
1369 KASSERT(saaddr != NULL);
1370 KASSERT(addr != NULL);
1371
1372 size = sizeof(*saaddr) + PFKEY_ALIGN8(addr->sa_len);
1373 saaddr->sadb_address_len = PFKEY_UNIT64(size);
1374 saaddr->sadb_address_exttype = exttype;
1375 saaddr->sadb_address_proto = proto;
1376 switch (addr->sa_family) {
1377 #ifdef INET
1378 case AF_INET:
1379 saaddr->sadb_address_prefixlen = sizeof(struct in_addr) << 3;
1380 break;
1381 #endif /* INET */
1382 #ifdef INET6
1383 case AF_INET6:
1384 saaddr->sadb_address_prefixlen = sizeof(struct in6_addr) << 3;
1385 break;
1386 #endif /* INET6 */
1387 default:
1388 log(LOG_DEBUG,
1389 "%s: Invalid address family: %d.\n",
1390 __func__, addr->sa_family);
1391 break;
1392 }
1393 saaddr->sadb_address_reserved = 0;
1394
1395 return size;
1396 }
1397
1398 static inline size_t
1399 if_ipsec_set_sadb_src(struct sadb_address *sasrc, struct sockaddr *src,
1400 int proto)
1401 {
1402
1403 return if_ipsec_set_sadb_addr(sasrc, src, proto,
1404 SADB_EXT_ADDRESS_SRC);
1405 }
1406
1407 static inline size_t
1408 if_ipsec_set_sadb_dst(struct sadb_address *sadst, struct sockaddr *dst,
1409 int proto)
1410 {
1411
1412 return if_ipsec_set_sadb_addr(sadst, dst, proto,
1413 SADB_EXT_ADDRESS_DST);
1414 }
1415
1416 static inline size_t
1417 if_ipsec_set_sadb_x_policy(struct sadb_x_policy *xpl,
1418 struct sadb_x_ipsecrequest *xisr, uint16_t policy, uint8_t dir, uint32_t id,
1419 uint8_t level, struct sockaddr *src, struct sockaddr *dst)
1420 {
1421 size_t size;
1422
1423 KASSERT(policy != IPSEC_POLICY_IPSEC || xisr != NULL);
1424
1425 size = sizeof(*xpl);
1426 if (policy == IPSEC_POLICY_IPSEC) {
1427 size += PFKEY_ALIGN8(sizeof(*xisr));
1428 if (src != NULL && dst != NULL)
1429 size += PFKEY_ALIGN8(src->sa_len + dst->sa_len);
1430 }
1431 xpl->sadb_x_policy_len = PFKEY_UNIT64(size);
1432 xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
1433 xpl->sadb_x_policy_type = policy;
1434 xpl->sadb_x_policy_dir = dir;
1435 xpl->sadb_x_policy_reserved = 0;
1436 xpl->sadb_x_policy_id = id;
1437 xpl->sadb_x_policy_reserved2 = 0;
1438
1439 if (policy == IPSEC_POLICY_IPSEC) {
1440 xisr->sadb_x_ipsecrequest_len = PFKEY_ALIGN8(sizeof(*xisr));
1441 if (src != NULL && dst != NULL)
1442 xisr->sadb_x_ipsecrequest_len +=
1443 PFKEY_ALIGN8(src->sa_len + dst->sa_len);
1444 xisr->sadb_x_ipsecrequest_proto = IPPROTO_ESP;
1445 xisr->sadb_x_ipsecrequest_mode = IPSEC_MODE_TRANSPORT;
1446 xisr->sadb_x_ipsecrequest_level = level;
1447 xisr->sadb_x_ipsecrequest_reqid = key_newreqid();
1448 }
1449
1450 return size;
1451 }
1452
1453 static inline void
1454 if_ipsec_set_sadb_msg(struct sadb_msg *msg, uint16_t extlen, uint8_t msgtype)
1455 {
1456
1457 KASSERT(msg != NULL);
1458
1459 msg->sadb_msg_version = PF_KEY_V2;
1460 msg->sadb_msg_type = msgtype;
1461 msg->sadb_msg_errno = 0;
1462 msg->sadb_msg_satype = SADB_SATYPE_UNSPEC;
1463 msg->sadb_msg_len = PFKEY_UNIT64(sizeof(*msg)) + extlen;
1464 msg->sadb_msg_reserved = 0;
1465 msg->sadb_msg_seq = 0; /* XXXX */
1466 msg->sadb_msg_pid = 0; /* XXXX */
1467 }
1468
1469 static inline void
1470 if_ipsec_set_sadb_msg_add(struct sadb_msg *msg, uint16_t extlen)
1471 {
1472
1473 if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDADD);
1474 }
1475
1476 static inline void
1477 if_ipsec_set_sadb_msg_del(struct sadb_msg *msg, uint16_t extlen)
1478 {
1479
1480 if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDDELETE2);
1481 }
1482
1483 static int
1484 if_ipsec_set_addr_port(struct sockaddr *addrport, struct sockaddr *addr,
1485 in_port_t port)
1486 {
1487 int error = 0;
1488
1489 sockaddr_copy(addrport, addr->sa_len, addr);
1490
1491 switch (addr->sa_family) {
1492 #ifdef INET
1493 case AF_INET: {
1494 struct sockaddr_in *sin = satosin(addrport);
1495 sin->sin_port = port;
1496 break;
1497 }
1498 #endif /* INET */
1499 #ifdef INET6
1500 case AF_INET6: {
1501 struct sockaddr_in6 *sin6 = satosin6(addrport);
1502 sin6->sin6_port = port;
1503 break;
1504 }
1505 #endif /* INET6 */
1506 default:
1507 log(LOG_DEBUG,
1508 "%s: Invalid address family: %d.\n",
1509 __func__, addr->sa_family);
1510 error = EINVAL;
1511 }
1512
1513 return error;
1514 }
1515
1516 static struct secpolicy *
1517 if_ipsec_add_sp0(struct sockaddr *src, in_port_t sport,
1518 struct sockaddr *dst, in_port_t dport,
1519 int dir, int proto, int level, u_int policy)
1520 {
1521 struct sadb_msg msg;
1522 struct sadb_address xsrc, xdst;
1523 struct sadb_x_policy xpl;
1524 struct sadb_x_ipsecrequest xisr;
1525 size_t size;
1526 size_t padlen;
1527 uint16_t ext_msg_len = 0;
1528 struct mbuf *m;
1529
1530 memset(&msg, 0, sizeof(msg));
1531 memset(&xsrc, 0, sizeof(xsrc));
1532 memset(&xdst, 0, sizeof(xdst));
1533 memset(&xpl, 0, sizeof(xpl));
1534 memset(&xisr, 0, sizeof(xisr));
1535
1536 MGETHDR(m, M_WAITOK, MT_DATA);
1537
1538 size = if_ipsec_set_sadb_src(&xsrc, src, proto);
1539 ext_msg_len += PFKEY_UNIT64(size);
1540 size = if_ipsec_set_sadb_dst(&xdst, dst, proto);
1541 ext_msg_len += PFKEY_UNIT64(size);
1542 size = if_ipsec_set_sadb_x_policy(&xpl, &xisr, policy, dir, 0, level, src, dst);
1543 ext_msg_len += PFKEY_UNIT64(size);
1544 if_ipsec_set_sadb_msg_add(&msg, ext_msg_len);
1545
1546 /* build PF_KEY message */
1547
1548 m->m_len = sizeof(msg);
1549 m_copyback(m, 0, sizeof(msg), &msg);
1550
1551 if_ipsec_add_mbuf(m, &xsrc, sizeof(xsrc));
1552 if_ipsec_add_mbuf_addr_port(m, src, sport, true);
1553 padlen = PFKEY_UNUNIT64(xsrc.sadb_address_len)
1554 - (sizeof(xsrc) + PFKEY_ALIGN8(src->sa_len));
1555 if_ipsec_add_pad(m, padlen);
1556
1557 if_ipsec_add_mbuf(m, &xdst, sizeof(xdst));
1558 if_ipsec_add_mbuf_addr_port(m, dst, dport, true);
1559 padlen = PFKEY_UNUNIT64(xdst.sadb_address_len)
1560 - (sizeof(xdst) + PFKEY_ALIGN8(dst->sa_len));
1561 if_ipsec_add_pad(m, padlen);
1562
1563 if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
1564 if (policy == IPSEC_POLICY_IPSEC) {
1565 if_ipsec_add_mbuf(m, &xisr, sizeof(xisr));
1566 if_ipsec_add_mbuf_addr_port(m, src, sport, false);
1567 if_ipsec_add_mbuf_addr_port(m, dst, dport, false);
1568 }
1569 padlen = PFKEY_UNUNIT64(xpl.sadb_x_policy_len) - sizeof(xpl);
1570 if (src != NULL && dst != NULL)
1571 padlen -= PFKEY_ALIGN8(src->sa_len + dst->sa_len);
1572 if_ipsec_add_pad(m, padlen);
1573
1574 /* key_kpi_spdadd() has already done KEY_SP_REF(). */
1575 return key_kpi_spdadd(m);
1576 }
1577
1578 static int
1579 if_ipsec_add_sp(struct ipsec_variant *var,
1580 struct sockaddr *src, in_port_t sport,
1581 struct sockaddr *dst, in_port_t dport)
1582 {
1583 struct ipsec_softc *sc = var->iv_softc;
1584 int level;
1585 u_int v6policy;
1586
1587 /*
1588 * must delete sp before add it.
1589 */
1590 KASSERT(IV_SP_IN(var) == NULL);
1591 KASSERT(IV_SP_OUT(var) == NULL);
1592 KASSERT(IV_SP_IN6(var) == NULL);
1593 KASSERT(IV_SP_OUT6(var) == NULL);
1594
1595 /*
1596 * can be shared?
1597 */
1598 if (if_ipsec_share_sp(var))
1599 return 0;
1600
1601 if (if_ipsec_nat_t(sc))
1602 level = IPSEC_LEVEL_REQUIRE;
1603 else
1604 level = IPSEC_LEVEL_UNIQUE;
1605
1606 if (if_ipsec_fwd_ipv6(sc))
1607 v6policy = IPSEC_POLICY_IPSEC;
1608 else
1609 v6policy = IPSEC_POLICY_DISCARD;
1610
1611 IV_SP_IN(var) = if_ipsec_add_sp0(dst, dport, src, sport,
1612 IPSEC_DIR_INBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC);
1613 if (IV_SP_IN(var) == NULL)
1614 goto fail;
1615 IV_SP_OUT(var) = if_ipsec_add_sp0(src, sport, dst, dport,
1616 IPSEC_DIR_OUTBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC);
1617 if (IV_SP_OUT(var) == NULL)
1618 goto fail;
1619 IV_SP_IN6(var) = if_ipsec_add_sp0(dst, dport, src, sport,
1620 IPSEC_DIR_INBOUND, IPPROTO_IPV6, level, v6policy);
1621 if (IV_SP_IN6(var) == NULL)
1622 goto fail;
1623 IV_SP_OUT6(var) = if_ipsec_add_sp0(src, sport, dst, dport,
1624 IPSEC_DIR_OUTBOUND, IPPROTO_IPV6, level, v6policy);
1625 if (IV_SP_OUT6(var) == NULL)
1626 goto fail;
1627
1628 return 0;
1629
1630 fail:
1631 if (IV_SP_IN6(var) != NULL) {
1632 if_ipsec_del_sp0(IV_SP_IN6(var));
1633 IV_SP_IN6(var) = NULL;
1634 }
1635 if (IV_SP_OUT(var) != NULL) {
1636 if_ipsec_del_sp0(IV_SP_OUT(var));
1637 IV_SP_OUT(var) = NULL;
1638 }
1639 if (IV_SP_IN(var) != NULL) {
1640 if_ipsec_del_sp0(IV_SP_IN(var));
1641 IV_SP_IN(var) = NULL;
1642 }
1643
1644 return EEXIST;
1645 }
1646
1647 static int
1648 if_ipsec_del_sp0(struct secpolicy *sp)
1649 {
1650 struct sadb_msg msg;
1651 struct sadb_x_policy xpl;
1652 size_t size;
1653 uint16_t ext_msg_len = 0;
1654 int error;
1655 struct mbuf *m;
1656
1657 if (sp == NULL)
1658 return 0;
1659
1660 memset(&msg, 0, sizeof(msg));
1661 memset(&xpl, 0, sizeof(xpl));
1662
1663 MGETHDR(m, M_WAITOK, MT_DATA);
1664
1665 size = if_ipsec_set_sadb_x_policy(&xpl, NULL, 0, 0, sp->id, 0, NULL, NULL);
1666 ext_msg_len += PFKEY_UNIT64(size);
1667
1668 if_ipsec_set_sadb_msg_del(&msg, ext_msg_len);
1669
1670 m->m_len = sizeof(msg);
1671 m_copyback(m, 0, sizeof(msg), &msg);
1672
1673 if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
1674
1675 /* unreference correspond to key_kpi_spdadd(). */
1676 KEY_SP_UNREF(&sp);
1677 error = key_kpi_spddelete2(m);
1678 if (error != 0) {
1679 log(LOG_ERR, "%s: cannot delete SP(ID=%u) (error=%d).\n",
1680 __func__, sp->id, error);
1681 }
1682 return error;
1683 }
1684
1685 static void
1686 if_ipsec_del_sp(struct ipsec_variant *var)
1687 {
1688
1689 /* are the SPs shared? */
1690 if (if_ipsec_unshare_sp(var))
1691 return;
1692
1693 (void)if_ipsec_del_sp0(IV_SP_OUT(var));
1694 (void)if_ipsec_del_sp0(IV_SP_IN(var));
1695 (void)if_ipsec_del_sp0(IV_SP_OUT6(var));
1696 (void)if_ipsec_del_sp0(IV_SP_IN6(var));
1697 IV_SP_IN(var) = NULL;
1698 IV_SP_IN6(var) = NULL;
1699 IV_SP_OUT(var) = NULL;
1700 IV_SP_OUT6(var) = NULL;
1701 }
1702
1703 static int
1704 if_ipsec_replace_sp(struct ipsec_softc *sc, struct ipsec_variant *ovar,
1705 struct ipsec_variant *nvar)
1706 {
1707 in_port_t src_port = 0;
1708 in_port_t dst_port = 0;
1709 struct sockaddr *src;
1710 struct sockaddr *dst;
1711 int error = 0;
1712
1713 KASSERT(mutex_owned(&sc->ipsec_lock));
1714
1715 if_ipsec_del_sp(ovar);
1716
1717 src = nvar->iv_psrc;
1718 dst = nvar->iv_pdst;
1719 if (if_ipsec_nat_t(sc)) {
1720 /* NAT-T enabled */
1721 src_port = nvar->iv_sport;
1722 dst_port = nvar->iv_dport;
1723 }
1724 if (src && dst)
1725 error = if_ipsec_add_sp(nvar, src, src_port, dst, dst_port);
1726
1727 return error;
1728 }
1729
1730 /*
1731 * ipsec_variant and its SPs update API.
1732 *
1733 * Assumption:
1734 * reader side dereferences sc->ipsec_var in reader critical section only,
1735 * that is, all of reader sides do not reader the sc->ipsec_var after
1736 * pserialize_perform().
1737 */
1738 static int
1739 if_ipsec_update_variant(struct ipsec_softc *sc, struct ipsec_variant *nvar,
1740 struct ipsec_variant *nullvar)
1741 {
1742 struct ifnet *ifp = &sc->ipsec_if;
1743 struct ipsec_variant *ovar = sc->ipsec_var;
1744 int error;
1745
1746 KASSERT(mutex_owned(&sc->ipsec_lock));
1747
1748 /*
1749 * To keep consistency between ipsec(4) I/F settings and SPs,
1750 * we stop packet processing while replacing SPs, that is, we set
1751 * "null" config variant to sc->ipsec_var.
1752 */
1753 sc->ipsec_var = nullvar;
1754 pserialize_perform(ipsec_psz);
1755 psref_target_destroy(&ovar->iv_psref, iv_psref_class);
1756
1757 error = if_ipsec_replace_sp(sc, ovar, nvar);
1758 if (!error)
1759 sc->ipsec_var = nvar;
1760 else {
1761 sc->ipsec_var = ovar; /* rollback */
1762 psref_target_init(&ovar->iv_psref, iv_psref_class);
1763 }
1764
1765 pserialize_perform(ipsec_psz);
1766 psref_target_destroy(&nullvar->iv_psref, iv_psref_class);
1767
1768 if (if_ipsec_variant_is_configured(sc->ipsec_var))
1769 ifp->if_flags |= IFF_RUNNING;
1770 else
1771 ifp->if_flags &= ~IFF_RUNNING;
1772
1773 return error;
1774 }
1775