if_mpls.c revision 1.16.6.1 1 /* $NetBSD: if_mpls.c,v 1.16.6.1 2018/02/24 18:29:36 snj Exp $ */
2
3 /*
4 * Copyright (c) 2010 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Mihai Chelaru <kefren (at) NetBSD.org>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.16.6.1 2018/02/24 18:29:36 snj Exp $");
34
35 #include "opt_inet.h"
36 #include "opt_mpls.h"
37
38 #include <sys/param.h>
39
40 #include <sys/errno.h>
41 #include <sys/malloc.h>
42 #include <sys/mbuf.h>
43 #include <sys/sysctl.h>
44
45 #include <net/bpf.h>
46 #include <net/if.h>
47 #include <net/if_types.h>
48 #include <net/netisr.h>
49 #include <net/route.h>
50
51 #ifdef INET
52 #include <netinet/in.h>
53 #include <netinet/in_systm.h>
54 #include <netinet/in_var.h>
55 #include <netinet/ip.h>
56 #endif
57
58 #ifdef INET6
59 #include <netinet/ip6.h>
60 #include <netinet6/in6_var.h>
61 #include <netinet6/ip6_var.h>
62 #endif
63
64 #include <netmpls/mpls.h>
65 #include <netmpls/mpls_var.h>
66
67 #include "if_mpls.h"
68
69 #define TRIM_LABEL do { \
70 m_adj(m, sizeof(union mpls_shim)); \
71 if (m->m_len < sizeof(union mpls_shim) && \
72 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL) \
73 goto done; \
74 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); \
75 } while (/* CONSTCOND */ 0)
76
77
78 void ifmplsattach(int);
79
80 static int mpls_clone_create(struct if_clone *, int);
81 static int mpls_clone_destroy(struct ifnet *);
82
83 static struct if_clone mpls_if_cloner =
84 IF_CLONE_INITIALIZER("mpls", mpls_clone_create, mpls_clone_destroy);
85
86
87 static void mpls_input(struct ifnet *, struct mbuf *);
88 static int mpls_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
89 struct rtentry *);
90 static int mpls_ioctl(struct ifnet *, u_long, void *);
91 static int mpls_send_frame(struct mbuf *, struct ifnet *, struct rtentry *);
92 static int mpls_lse(struct mbuf *);
93
94 #ifdef INET
95 static struct mbuf *mpls_unlabel_inet(struct mbuf *, int *error);
96 static struct mbuf *mpls_label_inet(struct mbuf *, union mpls_shim *, uint);
97 #endif
98
99 #ifdef INET6
100 static struct mbuf *mpls_unlabel_inet6(struct mbuf *, int *error);
101 static struct mbuf *mpls_label_inet6(struct mbuf *, union mpls_shim *, uint);
102 #endif
103
104 static struct mbuf *mpls_prepend_shim(struct mbuf *, union mpls_shim *);
105
106 extern int mpls_defttl, mpls_mapttl_inet, mpls_mapttl_inet6, mpls_icmp_respond,
107 mpls_forwarding, mpls_frame_accept, mpls_mapprec_inet, mpls_mapclass_inet6,
108 mpls_rfc4182;
109
110 /* ARGSUSED */
111 void
112 ifmplsattach(int count)
113 {
114 if_clone_attach(&mpls_if_cloner);
115 }
116
117 static int
118 mpls_clone_create(struct if_clone *ifc, int unit)
119 {
120 struct mpls_softc *sc;
121
122 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
123
124 if_initname(&sc->sc_if, ifc->ifc_name, unit);
125 sc->sc_if.if_softc = sc;
126 sc->sc_if.if_type = IFT_MPLS;
127 sc->sc_if.if_addrlen = 0;
128 sc->sc_if.if_hdrlen = sizeof(union mpls_shim);
129 sc->sc_if.if_dlt = DLT_NULL;
130 sc->sc_if.if_mtu = 1500;
131 sc->sc_if.if_flags = 0;
132 sc->sc_if.if_input = mpls_input;
133 sc->sc_if.if_output = mpls_output;
134 sc->sc_if.if_ioctl = mpls_ioctl;
135
136 if_attach(&sc->sc_if);
137 if_alloc_sadl(&sc->sc_if);
138 bpf_attach(&sc->sc_if, DLT_NULL, sizeof(uint32_t));
139 return 0;
140 }
141
142 static int
143 mpls_clone_destroy(struct ifnet *ifp)
144 {
145 int s;
146
147 bpf_detach(ifp);
148
149 s = splnet();
150 if_detach(ifp);
151 splx(s);
152
153 free(ifp->if_softc, M_DEVBUF);
154 return 0;
155 }
156
157 static void
158 mpls_input(struct ifnet *ifp, struct mbuf *m)
159 {
160 #if 0
161 /*
162 * TODO - kefren
163 * I'd love to unshim the packet, guess family
164 * and pass it to bpf
165 */
166 bpf_mtap_af(ifp, AF_MPLS, m);
167 #endif
168
169 mpls_lse(m);
170 }
171
172 void
173 mplsintr(void)
174 {
175 struct mbuf *m;
176 int s;
177
178 while (!IF_IS_EMPTY(&mplsintrq)) {
179 s = splnet();
180 IF_DEQUEUE(&mplsintrq, m);
181 splx(s);
182
183 if (!m)
184 return;
185
186 if (((m->m_flags & M_PKTHDR) == 0) ||
187 (m->m_pkthdr.rcvif == 0))
188 panic("mplsintr(): no pkthdr or rcvif");
189
190 #ifdef MBUFTRACE
191 m_claimm(m, &mpls_owner);
192 #endif
193 mpls_input(m->m_pkthdr.rcvif, m);
194 }
195 }
196
197 /*
198 * prepend shim and deliver
199 */
200 static int
201 mpls_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct rtentry *rt)
202 {
203 union mpls_shim mh, *pms;
204 struct rtentry *rt1;
205 int err;
206 uint psize = sizeof(struct sockaddr_mpls);
207
208 KASSERT(KERNEL_LOCKED_P());
209
210 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
211 m_freem(m);
212 return ENETDOWN;
213 }
214
215 if (rt_gettag(rt) == NULL || rt_gettag(rt)->sa_family != AF_MPLS) {
216 m_freem(m);
217 return EINVAL;
218 }
219
220 bpf_mtap_af(ifp, dst->sa_family, m);
221
222 memset(&mh, 0, sizeof(mh));
223 mh.s_addr = MPLS_GETSADDR(rt);
224 mh.shim.bos = 1;
225 mh.shim.exp = 0;
226 mh.shim.ttl = mpls_defttl;
227
228 pms = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr;
229
230 while (psize <= rt_gettag(rt)->sa_len - sizeof(mh)) {
231 pms++;
232 if (mh.shim.label != MPLS_LABEL_IMPLNULL &&
233 ((m = mpls_prepend_shim(m, &mh)) == NULL))
234 return ENOBUFS;
235 memset(&mh, 0, sizeof(mh));
236 mh.s_addr = ntohl(pms->s_addr);
237 mh.shim.bos = mh.shim.exp = 0;
238 mh.shim.ttl = mpls_defttl;
239 psize += sizeof(mh);
240 }
241
242 switch(dst->sa_family) {
243 #ifdef INET
244 case AF_INET:
245 m = mpls_label_inet(m, &mh, psize - sizeof(struct sockaddr_mpls));
246 break;
247 #endif
248 #ifdef INET6
249 case AF_INET6:
250 m = mpls_label_inet6(m, &mh, psize - sizeof(struct sockaddr_mpls));
251 break;
252 #endif
253 default:
254 m = mpls_prepend_shim(m, &mh);
255 break;
256 }
257
258 if (m == NULL) {
259 IF_DROP(&ifp->if_snd);
260 ifp->if_oerrors++;
261 return ENOBUFS;
262 }
263
264 ifp->if_opackets++;
265 ifp->if_obytes += m->m_pkthdr.len;
266
267 if ((rt1=rtalloc1(rt->rt_gateway, 1)) == NULL) {
268 m_freem(m);
269 return EHOSTUNREACH;
270 }
271
272 err = mpls_send_frame(m, rt1->rt_ifp, rt);
273 rtfree(rt1);
274 return err;
275 }
276
277 static int
278 mpls_ioctl(struct ifnet *ifp, u_long cmd, void *data)
279 {
280 int error = 0, s = splnet();
281 struct ifreq *ifr = data;
282
283 switch(cmd) {
284 case SIOCINITIFADDR:
285 ifp->if_flags |= IFF_UP | IFF_RUNNING;
286 break;
287 case SIOCSIFMTU:
288 if (ifr != NULL && ifr->ifr_mtu < 576) {
289 error = EINVAL;
290 break;
291 }
292 /* FALLTHROUGH */
293 case SIOCGIFMTU:
294 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
295 error = 0;
296 break;
297 case SIOCSIFFLAGS:
298 if ((error = ifioctl_common(ifp, cmd, data)) != 0)
299 break;
300 if (ifp->if_flags & IFF_UP)
301 ifp->if_flags |= IFF_RUNNING;
302 break;
303 default:
304 error = ifioctl_common(ifp, cmd, data);
305 break;
306 }
307 splx(s);
308 return error;
309 }
310
311 /*
312 * MPLS Label Switch Engine
313 */
314 static int
315 mpls_lse(struct mbuf *m)
316 {
317 struct sockaddr_mpls dst;
318 union mpls_shim tshim, *htag;
319 struct rtentry *rt = NULL;
320 int error = ENOBUFS;
321 uint psize = sizeof(struct sockaddr_mpls);
322 bool push_back_alert = false;
323
324 /* If we're not accepting MPLS frames, leave now. */
325 if (!mpls_frame_accept) {
326 error = EINVAL;
327 goto done;
328 }
329
330 if (m->m_len < sizeof(union mpls_shim) &&
331 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL)
332 goto done;
333
334 dst.smpls_len = sizeof(struct sockaddr_mpls);
335 dst.smpls_family = AF_MPLS;
336 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
337
338 error = EINVAL;
339
340 /* TTL decrement */
341 if ((m = mpls_ttl_dec(m)) == NULL)
342 goto done;
343
344 /* RFC 4182 */
345 if (mpls_rfc4182 != 0) {
346 while ((dst.smpls_addr.shim.label == MPLS_LABEL_IPV4NULL ||
347 dst.smpls_addr.shim.label == MPLS_LABEL_IPV6NULL) &&
348 __predict_false(dst.smpls_addr.shim.bos == 0))
349 TRIM_LABEL;
350 }
351
352 /* RFC 3032 Section 2.1 Page 4 */
353 if (__predict_false(dst.smpls_addr.shim.label == MPLS_LABEL_RTALERT) &&
354 dst.smpls_addr.shim.bos == 0) {
355 TRIM_LABEL;
356 push_back_alert = true;
357 }
358
359 if (dst.smpls_addr.shim.label <= MPLS_LABEL_RESMAX) {
360 /* Don't swap reserved labels */
361 switch (dst.smpls_addr.shim.label) {
362 #ifdef INET
363 case MPLS_LABEL_IPV4NULL:
364 /* Pop shim and push mbuf to IP stack */
365 if (dst.smpls_addr.shim.bos) {
366 m = mpls_unlabel_inet(m, &error);
367 }
368 break;
369 #endif
370 #ifdef INET6
371 case MPLS_LABEL_IPV6NULL:
372 /* Pop shim and push mbuf to IPv6 stack */
373 if (dst.smpls_addr.shim.bos) {
374 m = mpls_unlabel_inet6(m, &error);
375 }
376 break;
377 #endif
378 case MPLS_LABEL_RTALERT: /* Yeah, I'm all alerted */
379 case MPLS_LABEL_IMPLNULL: /* This is logical only */
380 default: /* Rest are not allowed */
381 break;
382 }
383 goto done;
384 }
385
386 /* Check if we should do MPLS forwarding */
387 error = EHOSTUNREACH;
388 if (!mpls_forwarding)
389 goto done;
390
391 /* Get a route to dst */
392 dst.smpls_addr.shim.ttl =
393 dst.smpls_addr.shim.bos =
394 dst.smpls_addr.shim.exp = 0;
395 dst.smpls_addr.s_addr = htonl(dst.smpls_addr.s_addr);
396 if ((rt = rtalloc1((const struct sockaddr*)&dst, 1)) == NULL)
397 goto done;
398
399 /* MPLS packet with no MPLS tagged route ? */
400 if ((rt->rt_flags & RTF_GATEWAY) == 0 ||
401 rt_gettag(rt) == NULL ||
402 rt_gettag(rt)->sa_family != AF_MPLS)
403 goto done;
404
405 tshim.s_addr = MPLS_GETSADDR(rt);
406
407 /* Swap labels */
408 if ((m->m_len < sizeof(union mpls_shim)) &&
409 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) {
410 error = ENOBUFS;
411 goto done;
412 }
413
414 /* Replace only the label */
415 htag = mtod(m, union mpls_shim *);
416 htag->s_addr = ntohl(htag->s_addr);
417 htag->shim.label = tshim.shim.label;
418 htag->s_addr = htonl(htag->s_addr);
419
420 /* check if there is anything more to prepend */
421 htag = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr;
422 while (psize <= rt_gettag(rt)->sa_len - sizeof(tshim)) {
423 htag++;
424 memset(&tshim, 0, sizeof(tshim));
425 tshim.s_addr = ntohl(htag->s_addr);
426 tshim.shim.bos = tshim.shim.exp = 0;
427 tshim.shim.ttl = mpls_defttl;
428 if (tshim.shim.label != MPLS_LABEL_IMPLNULL &&
429 ((m = mpls_prepend_shim(m, &tshim)) == NULL)) {
430 error = ENOBUFS;
431 goto done;
432 }
433 psize += sizeof(tshim);
434 }
435
436 if (__predict_false(push_back_alert == true)) {
437 /* re-add the router alert label */
438 memset(&tshim, 0, sizeof(tshim));
439 tshim.s_addr = MPLS_LABEL_RTALERT;
440 tshim.shim.bos = tshim.shim.exp = 0;
441 tshim.shim.ttl = mpls_defttl;
442 if ((m = mpls_prepend_shim(m, &tshim)) == NULL) {
443 error = ENOBUFS;
444 goto done;
445 }
446 }
447
448 error = mpls_send_frame(m, rt->rt_ifp, rt);
449
450 done:
451 if (error != 0 && m != NULL)
452 m_freem(m);
453 if (rt != NULL)
454 rtfree(rt);
455
456 return error;
457 }
458
459 static int
460 mpls_send_frame(struct mbuf *m, struct ifnet *ifp, struct rtentry *rt)
461 {
462 union mpls_shim msh;
463 int ret;
464
465 if ((rt->rt_flags & RTF_GATEWAY) == 0)
466 return EHOSTUNREACH;
467
468 rt->rt_use++;
469
470 msh.s_addr = MPLS_GETSADDR(rt);
471 if (msh.shim.label == MPLS_LABEL_IMPLNULL ||
472 (m->m_flags & (M_MCAST | M_BCAST))) {
473 m_adj(m, sizeof(union mpls_shim));
474 m->m_pkthdr.csum_flags = 0;
475 }
476
477 switch(ifp->if_type) {
478 /* only these are supported for now */
479 case IFT_ETHER:
480 case IFT_TUNNEL:
481 case IFT_LOOP:
482 KERNEL_LOCK(1, NULL);
483 ret = (*ifp->if_output)(ifp, m, rt->rt_gateway, rt);
484 KERNEL_UNLOCK_ONE(NULL);
485 return ret;
486 break;
487 default:
488 return ENETUNREACH;
489 }
490 return 0;
491 }
492
493 #ifdef INET
494 static struct mbuf *
495 mpls_unlabel_inet(struct mbuf *m, int *error)
496 {
497 struct ip *iph;
498 union mpls_shim *ms;
499 int iphlen;
500
501 if (mpls_mapttl_inet || mpls_mapprec_inet) {
502 /* get shim info */
503 ms = mtod(m, union mpls_shim *);
504 ms->s_addr = ntohl(ms->s_addr);
505
506 /* and get rid of it */
507 m_adj(m, sizeof(union mpls_shim));
508
509 /* get ip header */
510 if (m->m_len < sizeof(struct ip) &&
511 (m = m_pullup(m, sizeof(struct ip))) == NULL) {
512 *error = ENOBUFS;
513 return NULL;
514 }
515
516 iph = mtod(m, struct ip *);
517 iphlen = iph->ip_hl << 2;
518
519 /* get it all */
520 if (m->m_len < iphlen) {
521 if ((m = m_pullup(m, iphlen)) == NULL) {
522 *error = ENOBUFS;
523 return NULL;
524 }
525 iph = mtod(m, struct ip *);
526 }
527
528 /* check ipsum */
529 if (in_cksum(m, iphlen) != 0) {
530 m_freem(m);
531 *error = EINVAL;
532 return NULL;
533 }
534
535 /* set IP ttl from MPLS ttl */
536 if (mpls_mapttl_inet)
537 iph->ip_ttl = ms->shim.ttl;
538
539 /* set IP Precedence from MPLS Exp */
540 if (mpls_mapprec_inet) {
541 iph->ip_tos = (iph->ip_tos << 3) >> 3;
542 iph->ip_tos |= ms->shim.exp << 5;
543 }
544
545 /* reset ipsum because we modified TTL and TOS */
546 iph->ip_sum = 0;
547 iph->ip_sum = in_cksum(m, iphlen);
548 } else {
549 m_adj(m, sizeof(union mpls_shim));
550 }
551
552 /* Put it on IP queue */
553 if (__predict_false(!pktq_enqueue(ip_pktq, m, 0))) {
554 m_freem(m);
555 *error = ENOBUFS;
556 return NULL;
557 }
558
559 *error = 0;
560 return m;
561 }
562
563 /*
564 * Prepend MPLS label
565 */
566 static struct mbuf *
567 mpls_label_inet(struct mbuf *m, union mpls_shim *ms, uint offset)
568 {
569 struct ip iphdr;
570
571 if (mpls_mapttl_inet || mpls_mapprec_inet) {
572 /* XXX Maybe just check m->m_pkthdr.len instead? */
573 if ((m->m_len < offset + sizeof(struct ip)) &&
574 (m = m_pullup(m, offset + sizeof(struct ip))) == 0)
575 return NULL;
576
577 m_copydata(m, offset, sizeof(struct ip), &iphdr);
578
579 /* Map TTL */
580 if (mpls_mapttl_inet)
581 ms->shim.ttl = iphdr.ip_ttl;
582
583 /* Copy IP precedence to EXP */
584 if (mpls_mapprec_inet)
585 ms->shim.exp = ((u_int8_t)iphdr.ip_tos) >> 5;
586 }
587
588 if ((m = mpls_prepend_shim(m, ms)) == NULL)
589 return NULL;
590
591 return m;
592 }
593 #endif /* INET */
594
595 #ifdef INET6
596 static struct mbuf *
597 mpls_unlabel_inet6(struct mbuf *m, int *error)
598 {
599 struct ip6_hdr *ip6hdr;
600 union mpls_shim ms;
601
602 /* TODO: mapclass */
603 if (mpls_mapttl_inet6) {
604 ms.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
605 m_adj(m, sizeof(union mpls_shim));
606
607 if (m->m_len < sizeof (struct ip6_hdr) &&
608 (m = m_pullup(m, sizeof(struct ip6_hdr))) == 0) {
609 *error = ENOBUFS;
610 return NULL;
611 }
612 ip6hdr = mtod(m, struct ip6_hdr *);
613
614 /* Because we just decremented this in mpls_lse */
615 ip6hdr->ip6_hlim = ms.shim.ttl + 1;
616 } else {
617 m_adj(m, sizeof(union mpls_shim));
618 }
619
620 /* Put it back on IPv6 queue. */
621 if (__predict_false(!pktq_enqueue(ip6_pktq, m, 0))) {
622 m_freem(m);
623 *error = ENOBUFS;
624 return NULL;
625 }
626
627 *error = 0;
628 return m;
629 }
630
631 static struct mbuf *
632 mpls_label_inet6(struct mbuf *m, union mpls_shim *ms, uint offset)
633 {
634 struct ip6_hdr ip6h;
635
636 if (mpls_mapttl_inet6 || mpls_mapclass_inet6) {
637 /* XXX Maybe just check m->m_pkthdr.len instead? */
638 if ((m->m_len < offset + sizeof(struct ip6_hdr)) &&
639 (m = m_pullup(m, offset + sizeof(struct ip6_hdr))) == 0)
640 return NULL;
641
642 m_copydata(m, offset, sizeof(struct ip6_hdr), &ip6h);
643
644 if (mpls_mapttl_inet6)
645 ms->shim.ttl = ip6h.ip6_hlim;
646
647 if (mpls_mapclass_inet6)
648 ms->shim.exp = ip6h.ip6_vfc << 1 >> 5;
649 }
650
651 if ((m = mpls_prepend_shim(m, ms)) == NULL)
652 return NULL;
653
654 return m;
655 }
656 #endif /* INET6 */
657
658 static struct mbuf *
659 mpls_prepend_shim(struct mbuf *m, union mpls_shim *ms)
660 {
661 union mpls_shim *shim;
662
663 M_PREPEND(m, sizeof(*ms), M_DONTWAIT);
664 if (m == NULL)
665 return NULL;
666
667 if (m->m_len < sizeof(union mpls_shim) &&
668 (m = m_pullup(m, sizeof(union mpls_shim))) == 0)
669 return NULL;
670
671 shim = mtod(m, union mpls_shim *);
672
673 memcpy(shim, ms, sizeof(*shim));
674 shim->s_addr = htonl(shim->s_addr);
675
676 return m;
677 }
678