if_mpls.c revision 1.16.4.2 1 /* $NetBSD: if_mpls.c,v 1.16.4.2 2015/09/22 12:06:10 skrll Exp $ */
2
3 /*
4 * Copyright (c) 2010 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Mihai Chelaru <kefren (at) NetBSD.org>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.16.4.2 2015/09/22 12:06:10 skrll Exp $");
34
35 #ifdef _KERNEL_OPT
36 #include "opt_inet.h"
37 #include "opt_mpls.h"
38 #endif
39
40 #include <sys/param.h>
41
42 #include <sys/errno.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/sysctl.h>
46
47 #include <net/bpf.h>
48 #include <net/if.h>
49 #include <net/if_types.h>
50 #include <net/netisr.h>
51 #include <net/route.h>
52
53 #ifdef INET
54 #include <netinet/in.h>
55 #include <netinet/in_systm.h>
56 #include <netinet/in_var.h>
57 #include <netinet/ip.h>
58 #include <netinet/ip_var.h>
59 #endif
60
61 #ifdef INET6
62 #include <netinet/ip6.h>
63 #include <netinet6/in6_var.h>
64 #include <netinet6/ip6_var.h>
65 #endif
66
67 #include <netmpls/mpls.h>
68 #include <netmpls/mpls_var.h>
69
70 #include "if_mpls.h"
71
72 #include "ioconf.h"
73
74 #define TRIM_LABEL do { \
75 m_adj(m, sizeof(union mpls_shim)); \
76 if (m->m_len < sizeof(union mpls_shim) && \
77 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL) \
78 goto done; \
79 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); \
80 } while (/* CONSTCOND */ 0)
81
82
83 static int mpls_clone_create(struct if_clone *, int);
84 static int mpls_clone_destroy(struct ifnet *);
85
86 static struct if_clone mpls_if_cloner =
87 IF_CLONE_INITIALIZER("mpls", mpls_clone_create, mpls_clone_destroy);
88
89
90 static void mpls_input(struct ifnet *, struct mbuf *);
91 static int mpls_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
92 struct rtentry *);
93 static int mpls_ioctl(struct ifnet *, u_long, void *);
94 static int mpls_send_frame(struct mbuf *, struct ifnet *, struct rtentry *);
95 static int mpls_lse(struct mbuf *);
96
97 #ifdef INET
98 static int mpls_unlabel_inet(struct mbuf *);
99 static struct mbuf *mpls_label_inet(struct mbuf *, union mpls_shim *, uint);
100 #endif
101
102 #ifdef INET6
103 static int mpls_unlabel_inet6(struct mbuf *);
104 static struct mbuf *mpls_label_inet6(struct mbuf *, union mpls_shim *, uint);
105 #endif
106
107 static struct mbuf *mpls_prepend_shim(struct mbuf *, union mpls_shim *);
108
109 extern int mpls_defttl, mpls_mapttl_inet, mpls_mapttl_inet6, mpls_icmp_respond,
110 mpls_forwarding, mpls_frame_accept, mpls_mapprec_inet, mpls_mapclass_inet6,
111 mpls_rfc4182;
112
113 /* ARGSUSED */
114 void
115 ifmplsattach(int count)
116 {
117 if_clone_attach(&mpls_if_cloner);
118 }
119
120 static int
121 mpls_clone_create(struct if_clone *ifc, int unit)
122 {
123 struct mpls_softc *sc;
124
125 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
126
127 if_initname(&sc->sc_if, ifc->ifc_name, unit);
128 sc->sc_if.if_softc = sc;
129 sc->sc_if.if_type = IFT_MPLS;
130 sc->sc_if.if_addrlen = 0;
131 sc->sc_if.if_hdrlen = sizeof(union mpls_shim);
132 sc->sc_if.if_dlt = DLT_NULL;
133 sc->sc_if.if_mtu = 1500;
134 sc->sc_if.if_flags = 0;
135 sc->sc_if.if_input = mpls_input;
136 sc->sc_if.if_output = mpls_output;
137 sc->sc_if.if_ioctl = mpls_ioctl;
138
139 if_attach(&sc->sc_if);
140 if_alloc_sadl(&sc->sc_if);
141 bpf_attach(&sc->sc_if, DLT_NULL, sizeof(uint32_t));
142 return 0;
143 }
144
145 static int
146 mpls_clone_destroy(struct ifnet *ifp)
147 {
148 int s;
149
150 bpf_detach(ifp);
151
152 s = splnet();
153 if_detach(ifp);
154 splx(s);
155
156 free(ifp->if_softc, M_DEVBUF);
157 return 0;
158 }
159
160 static void
161 mpls_input(struct ifnet *ifp, struct mbuf *m)
162 {
163 #if 0
164 /*
165 * TODO - kefren
166 * I'd love to unshim the packet, guess family
167 * and pass it to bpf
168 */
169 bpf_mtap_af(ifp, AF_MPLS, m);
170 #endif
171
172 mpls_lse(m);
173 }
174
175 void
176 mplsintr(void)
177 {
178 struct mbuf *m;
179 int s;
180
181 while (!IF_IS_EMPTY(&mplsintrq)) {
182 s = splnet();
183 IF_DEQUEUE(&mplsintrq, m);
184 splx(s);
185
186 if (!m)
187 return;
188
189 if (((m->m_flags & M_PKTHDR) == 0) ||
190 (m->m_pkthdr.rcvif == 0))
191 panic("mplsintr(): no pkthdr or rcvif");
192
193 #ifdef MBUFTRACE
194 m_claimm(m, &mpls_owner);
195 #endif
196 mpls_input(m->m_pkthdr.rcvif, m);
197 }
198 }
199
200 /*
201 * prepend shim and deliver
202 */
203 static int
204 mpls_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct rtentry *rt)
205 {
206 union mpls_shim mh, *pms;
207 struct rtentry *rt1;
208 int err;
209 uint psize = sizeof(struct sockaddr_mpls);
210
211 KASSERT(KERNEL_LOCKED_P());
212
213 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
214 m_freem(m);
215 return ENETDOWN;
216 }
217
218 if (rt_gettag(rt) == NULL || rt_gettag(rt)->sa_family != AF_MPLS) {
219 m_freem(m);
220 return EINVAL;
221 }
222
223 bpf_mtap_af(ifp, dst->sa_family, m);
224
225 memset(&mh, 0, sizeof(mh));
226 mh.s_addr = MPLS_GETSADDR(rt);
227 mh.shim.bos = 1;
228 mh.shim.exp = 0;
229 mh.shim.ttl = mpls_defttl;
230
231 pms = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr;
232
233 while (psize <= rt_gettag(rt)->sa_len - sizeof(mh)) {
234 pms++;
235 if (mh.shim.label != MPLS_LABEL_IMPLNULL &&
236 ((m = mpls_prepend_shim(m, &mh)) == NULL))
237 return ENOBUFS;
238 memset(&mh, 0, sizeof(mh));
239 mh.s_addr = ntohl(pms->s_addr);
240 mh.shim.bos = mh.shim.exp = 0;
241 mh.shim.ttl = mpls_defttl;
242 psize += sizeof(mh);
243 }
244
245 switch(dst->sa_family) {
246 #ifdef INET
247 case AF_INET:
248 m = mpls_label_inet(m, &mh, psize - sizeof(struct sockaddr_mpls));
249 break;
250 #endif
251 #ifdef INET6
252 case AF_INET6:
253 m = mpls_label_inet6(m, &mh, psize - sizeof(struct sockaddr_mpls));
254 break;
255 #endif
256 default:
257 m = mpls_prepend_shim(m, &mh);
258 break;
259 }
260
261 if (m == NULL) {
262 IF_DROP(&ifp->if_snd);
263 ifp->if_oerrors++;
264 return ENOBUFS;
265 }
266
267 ifp->if_opackets++;
268 ifp->if_obytes += m->m_pkthdr.len;
269
270 if ((rt1=rtalloc1(rt->rt_gateway, 1)) == NULL) {
271 m_freem(m);
272 return EHOSTUNREACH;
273 }
274
275 err = mpls_send_frame(m, rt1->rt_ifp, rt);
276 rtfree(rt1);
277 return err;
278 }
279
280 static int
281 mpls_ioctl(struct ifnet *ifp, u_long cmd, void *data)
282 {
283 int error = 0, s = splnet();
284 struct ifreq *ifr = data;
285
286 switch(cmd) {
287 case SIOCINITIFADDR:
288 ifp->if_flags |= IFF_UP | IFF_RUNNING;
289 break;
290 case SIOCSIFMTU:
291 if (ifr != NULL && ifr->ifr_mtu < 576) {
292 error = EINVAL;
293 break;
294 }
295 /* FALLTHROUGH */
296 case SIOCGIFMTU:
297 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
298 error = 0;
299 break;
300 case SIOCSIFFLAGS:
301 if ((error = ifioctl_common(ifp, cmd, data)) != 0)
302 break;
303 if (ifp->if_flags & IFF_UP)
304 ifp->if_flags |= IFF_RUNNING;
305 break;
306 default:
307 error = ifioctl_common(ifp, cmd, data);
308 break;
309 }
310 splx(s);
311 return error;
312 }
313
314 /*
315 * MPLS Label Switch Engine
316 */
317 static int
318 mpls_lse(struct mbuf *m)
319 {
320 struct sockaddr_mpls dst;
321 union mpls_shim tshim, *htag;
322 struct rtentry *rt = NULL;
323 int error = ENOBUFS;
324 uint psize = sizeof(struct sockaddr_mpls);
325 bool push_back_alert = false;
326
327 if (m->m_len < sizeof(union mpls_shim) &&
328 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL)
329 goto done;
330
331 dst.smpls_len = sizeof(struct sockaddr_mpls);
332 dst.smpls_family = AF_MPLS;
333 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
334
335 /* Check if we're accepting MPLS Frames */
336 error = EINVAL;
337 if (!mpls_frame_accept)
338 goto done;
339
340 /* TTL decrement */
341 if ((m = mpls_ttl_dec(m)) == NULL)
342 goto done;
343
344 /* RFC 4182 */
345 if (mpls_rfc4182 != 0)
346 while((dst.smpls_addr.shim.label == MPLS_LABEL_IPV4NULL ||
347 dst.smpls_addr.shim.label == MPLS_LABEL_IPV6NULL) &&
348 __predict_false(dst.smpls_addr.shim.bos == 0))
349 TRIM_LABEL;
350
351 /* RFC 3032 Section 2.1 Page 4 */
352 if (__predict_false(dst.smpls_addr.shim.label == MPLS_LABEL_RTALERT) &&
353 dst.smpls_addr.shim.bos == 0) {
354 TRIM_LABEL;
355 push_back_alert = true;
356 }
357
358 if (dst.smpls_addr.shim.label <= MPLS_LABEL_RESMAX) {
359 /* Don't swap reserved labels */
360 switch (dst.smpls_addr.shim.label) {
361 #ifdef INET
362 case MPLS_LABEL_IPV4NULL:
363 /* Pop shim and push mbuf to IP stack */
364 if (dst.smpls_addr.shim.bos)
365 error = mpls_unlabel_inet(m);
366 break;
367 #endif
368 #ifdef INET6
369 case MPLS_LABEL_IPV6NULL:
370 /* Pop shim and push mbuf to IPv6 stack */
371 if (dst.smpls_addr.shim.bos)
372 error = mpls_unlabel_inet6(m);
373 break;
374 #endif
375 case MPLS_LABEL_RTALERT: /* Yeah, I'm all alerted */
376 case MPLS_LABEL_IMPLNULL: /* This is logical only */
377 default: /* Rest are not allowed */
378 break;
379 }
380 goto done;
381 }
382
383 /* Check if we should do MPLS forwarding */
384 error = EHOSTUNREACH;
385 if (!mpls_forwarding)
386 goto done;
387
388 /* Get a route to dst */
389 dst.smpls_addr.shim.ttl =
390 dst.smpls_addr.shim.bos =
391 dst.smpls_addr.shim.exp = 0;
392 dst.smpls_addr.s_addr = htonl(dst.smpls_addr.s_addr);
393 if ((rt = rtalloc1((const struct sockaddr*)&dst, 1)) == NULL)
394 goto done;
395
396 /* MPLS packet with no MPLS tagged route ? */
397 if ((rt->rt_flags & RTF_GATEWAY) == 0 ||
398 rt_gettag(rt) == NULL ||
399 rt_gettag(rt)->sa_family != AF_MPLS)
400 goto done;
401
402 tshim.s_addr = MPLS_GETSADDR(rt);
403
404 /* Swap labels */
405 if ((m->m_len < sizeof(union mpls_shim)) &&
406 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) {
407 error = ENOBUFS;
408 goto done;
409 }
410
411 /* Replace only the label */
412 htag = mtod(m, union mpls_shim *);
413 htag->s_addr = ntohl(htag->s_addr);
414 htag->shim.label = tshim.shim.label;
415 htag->s_addr = htonl(htag->s_addr);
416
417 /* check if there is anything more to prepend */
418 htag = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr;
419 while (psize <= rt_gettag(rt)->sa_len - sizeof(tshim)) {
420 htag++;
421 memset(&tshim, 0, sizeof(tshim));
422 tshim.s_addr = ntohl(htag->s_addr);
423 tshim.shim.bos = tshim.shim.exp = 0;
424 tshim.shim.ttl = mpls_defttl;
425 if (tshim.shim.label != MPLS_LABEL_IMPLNULL &&
426 ((m = mpls_prepend_shim(m, &tshim)) == NULL))
427 return ENOBUFS;
428 psize += sizeof(tshim);
429 }
430
431 if (__predict_false(push_back_alert == true)) {
432 /* re-add the router alert label */
433 memset(&tshim, 0, sizeof(tshim));
434 tshim.s_addr = MPLS_LABEL_RTALERT;
435 tshim.shim.bos = tshim.shim.exp = 0;
436 tshim.shim.ttl = mpls_defttl;
437 if ((m = mpls_prepend_shim(m, &tshim)) == NULL)
438 return ENOBUFS;
439 }
440
441 error = mpls_send_frame(m, rt->rt_ifp, rt);
442
443 done:
444 if (error != 0 && m != NULL)
445 m_freem(m);
446 if (rt != NULL)
447 rtfree(rt);
448
449 return error;
450 }
451
452 static int
453 mpls_send_frame(struct mbuf *m, struct ifnet *ifp, struct rtentry *rt)
454 {
455 union mpls_shim msh;
456 int ret;
457
458 if ((rt->rt_flags & RTF_GATEWAY) == 0)
459 return EHOSTUNREACH;
460
461 rt->rt_use++;
462
463 msh.s_addr = MPLS_GETSADDR(rt);
464 if (msh.shim.label == MPLS_LABEL_IMPLNULL ||
465 (m->m_flags & (M_MCAST | M_BCAST))) {
466 m_adj(m, sizeof(union mpls_shim));
467 m->m_pkthdr.csum_flags = 0;
468 }
469
470 switch(ifp->if_type) {
471 /* only these are supported for now */
472 case IFT_ETHER:
473 case IFT_TUNNEL:
474 case IFT_LOOP:
475 #ifdef INET
476 ret = ip_hresolv_output(ifp, m, rt->rt_gateway, rt);
477 #else
478 KERNEL_LOCK(1, NULL);
479 ret = (*ifp->if_output)(ifp, m, rt->rt_gateway, rt);
480 KERNEL_UNLOCK_ONE(NULL);
481 #endif
482 return ret;
483 break;
484 default:
485 return ENETUNREACH;
486 }
487 return 0;
488 }
489
490
491
492 #ifdef INET
493 static int
494 mpls_unlabel_inet(struct mbuf *m)
495 {
496 struct ip *iph;
497 union mpls_shim *ms;
498 int iphlen;
499
500 if (mpls_mapttl_inet || mpls_mapprec_inet) {
501
502 /* get shim info */
503 ms = mtod(m, union mpls_shim *);
504 ms->s_addr = ntohl(ms->s_addr);
505
506 /* and get rid of it */
507 m_adj(m, sizeof(union mpls_shim));
508
509 /* get ip header */
510 if (m->m_len < sizeof (struct ip) &&
511 (m = m_pullup(m, sizeof(struct ip))) == NULL)
512 return ENOBUFS;
513 iph = mtod(m, struct ip *);
514 iphlen = iph->ip_hl << 2;
515
516 /* get it all */
517 if (m->m_len < iphlen) {
518 if ((m = m_pullup(m, iphlen)) == NULL)
519 return ENOBUFS;
520 iph = mtod(m, struct ip *);
521 }
522
523 /* check ipsum */
524 if (in_cksum(m, iphlen) != 0) {
525 m_freem(m);
526 return EINVAL;
527 }
528
529 /* set IP ttl from MPLS ttl */
530 if (mpls_mapttl_inet)
531 iph->ip_ttl = ms->shim.ttl;
532
533 /* set IP Precedence from MPLS Exp */
534 if (mpls_mapprec_inet) {
535 iph->ip_tos = (iph->ip_tos << 3) >> 3;
536 iph->ip_tos |= ms->shim.exp << 5;
537 }
538
539 /* reset ipsum because we modified TTL and TOS */
540 iph->ip_sum = 0;
541 iph->ip_sum = in_cksum(m, iphlen);
542 } else
543 m_adj(m, sizeof(union mpls_shim));
544
545 /* Put it on IP queue */
546 if (__predict_false(!pktq_enqueue(ip_pktq, m, 0))) {
547 m_freem(m);
548 return ENOBUFS;
549 }
550 return 0;
551 }
552
553 /*
554 * Prepend MPLS label
555 */
556 static struct mbuf *
557 mpls_label_inet(struct mbuf *m, union mpls_shim *ms, uint offset)
558 {
559 struct ip iphdr;
560
561 if (mpls_mapttl_inet || mpls_mapprec_inet) {
562 if ((m->m_len < sizeof(struct ip)) &&
563 (m = m_pullup(m, offset + sizeof(struct ip))) == 0)
564 return NULL; /* XXX */
565 m_copydata(m, offset, sizeof(struct ip), &iphdr);
566
567 /* Map TTL */
568 if (mpls_mapttl_inet)
569 ms->shim.ttl = iphdr.ip_ttl;
570
571 /* Copy IP precedence to EXP */
572 if (mpls_mapprec_inet)
573 ms->shim.exp = ((u_int8_t)iphdr.ip_tos) >> 5;
574 }
575
576 if ((m = mpls_prepend_shim(m, ms)) == NULL)
577 return NULL;
578
579 return m;
580 }
581
582 #endif /* INET */
583
584 #ifdef INET6
585
586 static int
587 mpls_unlabel_inet6(struct mbuf *m)
588 {
589 struct ip6_hdr *ip6hdr;
590 union mpls_shim ms;
591
592 /* TODO: mapclass */
593 if (mpls_mapttl_inet6) {
594 ms.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
595 m_adj(m, sizeof(union mpls_shim));
596
597 if (m->m_len < sizeof (struct ip6_hdr) &&
598 (m = m_pullup(m, sizeof(struct ip6_hdr))) == 0)
599 return ENOBUFS;
600 ip6hdr = mtod(m, struct ip6_hdr *);
601
602 /* Because we just decremented this in mpls_lse */
603 ip6hdr->ip6_hlim = ms.shim.ttl + 1;
604 } else
605 m_adj(m, sizeof(union mpls_shim));
606
607 /* Put it back on IPv6 queue. */
608 if (__predict_false(!pktq_enqueue(ip6_pktq, m, 0))) {
609 m_freem(m);
610 return ENOBUFS;
611 }
612 return 0;
613 }
614
615 static struct mbuf *
616 mpls_label_inet6(struct mbuf *m, union mpls_shim *ms, uint offset)
617 {
618 struct ip6_hdr ip6h;
619
620 if (mpls_mapttl_inet6 || mpls_mapclass_inet6) {
621 if (m->m_len < sizeof(struct ip6_hdr) &&
622 (m = m_pullup(m, offset + sizeof(struct ip6_hdr))) == 0)
623 return NULL;
624 m_copydata(m, offset, sizeof(struct ip6_hdr), &ip6h);
625
626 if (mpls_mapttl_inet6)
627 ms->shim.ttl = ip6h.ip6_hlim;
628
629 if (mpls_mapclass_inet6)
630 ms->shim.exp = ip6h.ip6_vfc << 1 >> 5;
631 }
632
633 if ((m = mpls_prepend_shim(m, ms)) == NULL)
634 return NULL;
635
636 return m;
637 }
638
639 #endif /* INET6 */
640
641 static struct mbuf *
642 mpls_prepend_shim(struct mbuf *m, union mpls_shim *ms)
643 {
644 union mpls_shim *shim;
645
646 M_PREPEND(m, sizeof(*ms), M_DONTWAIT);
647 if (m == NULL)
648 return NULL;
649
650 if (m->m_len < sizeof(union mpls_shim) &&
651 (m = m_pullup(m, sizeof(union mpls_shim))) == 0)
652 return NULL;
653
654 shim = mtod(m, union mpls_shim *);
655
656 memcpy(shim, ms, sizeof(*shim));
657 shim->s_addr = htonl(shim->s_addr);
658
659 return m;
660 }
661