if_mpls.c revision 1.8.16.2 1 /* $NetBSD: if_mpls.c,v 1.8.16.2 2014/05/18 17:46:12 rmind Exp $ */
2
3 /*
4 * Copyright (c) 2010 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Mihai Chelaru <kefren (at) NetBSD.org>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.8.16.2 2014/05/18 17:46:12 rmind Exp $");
34
35 #include "opt_inet.h"
36 #include "opt_mpls.h"
37
38 #include <sys/param.h>
39
40 #include <sys/errno.h>
41 #include <sys/malloc.h>
42 #include <sys/mbuf.h>
43 #include <sys/sysctl.h>
44
45 #include <net/bpf.h>
46 #include <net/if.h>
47 #include <net/if_types.h>
48 #include <net/netisr.h>
49 #include <net/route.h>
50
51 #ifdef INET
52 #include <netinet/in.h>
53 #include <netinet/in_systm.h>
54 #include <netinet/in_var.h>
55 #include <netinet/ip.h>
56 #endif
57
58 #ifdef INET6
59 #include <netinet/ip6.h>
60 #include <netinet6/in6_var.h>
61 #include <netinet6/ip6_var.h>
62 #endif
63
64 #include <netmpls/mpls.h>
65 #include <netmpls/mpls_var.h>
66
67 #include "if_mpls.h"
68
69 #define TRIM_LABEL do { \
70 m_adj(m, sizeof(union mpls_shim)); \
71 if (m->m_len < sizeof(union mpls_shim) && \
72 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL) \
73 goto done; \
74 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); \
75 } while (/* CONSTCOND */ 0)
76
77
78 void ifmplsattach(int);
79
80 static int mpls_clone_create(struct if_clone *, int);
81 static int mpls_clone_destroy(struct ifnet *);
82
83 static struct if_clone mpls_if_cloner =
84 IF_CLONE_INITIALIZER("mpls", mpls_clone_create, mpls_clone_destroy);
85
86
87 static void mpls_input(struct ifnet *, struct mbuf *);
88 static int mpls_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
89 struct rtentry *);
90 static int mpls_ioctl(struct ifnet *, u_long, void *);
91 static int mpls_send_frame(struct mbuf *, struct ifnet *, struct rtentry *);
92 static int mpls_lse(struct mbuf *);
93
94 #ifdef INET
95 static int mpls_unlabel_inet(struct mbuf *);
96 static struct mbuf *mpls_label_inet(struct mbuf *, union mpls_shim *, uint);
97 #endif
98
99 #ifdef INET6
100 static int mpls_unlabel_inet6(struct mbuf *);
101 static struct mbuf *mpls_label_inet6(struct mbuf *, union mpls_shim *, uint);
102 #endif
103
104 static struct mbuf *mpls_prepend_shim(struct mbuf *, union mpls_shim *);
105
106 extern int mpls_defttl, mpls_mapttl_inet, mpls_mapttl_inet6, mpls_icmp_respond,
107 mpls_forwarding, mpls_accept, mpls_mapprec_inet, mpls_mapclass_inet6,
108 mpls_rfc4182;
109
110 /* ARGSUSED */
111 void
112 ifmplsattach(int count)
113 {
114 if_clone_attach(&mpls_if_cloner);
115 }
116
117 static int
118 mpls_clone_create(struct if_clone *ifc, int unit)
119 {
120 struct mpls_softc *sc;
121
122 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
123
124 if_initname(&sc->sc_if, ifc->ifc_name, unit);
125 sc->sc_if.if_softc = sc;
126 sc->sc_if.if_type = IFT_MPLS;
127 sc->sc_if.if_addrlen = 0;
128 sc->sc_if.if_hdrlen = sizeof(union mpls_shim);
129 sc->sc_if.if_dlt = DLT_NULL;
130 sc->sc_if.if_mtu = 1500;
131 sc->sc_if.if_flags = 0;
132 sc->sc_if.if_input = mpls_input;
133 sc->sc_if.if_output = mpls_output;
134 sc->sc_if.if_ioctl = mpls_ioctl;
135
136 if_attach(&sc->sc_if);
137 if_alloc_sadl(&sc->sc_if);
138 bpf_attach(&sc->sc_if, DLT_NULL, sizeof(uint32_t));
139 return 0;
140 }
141
142 static int
143 mpls_clone_destroy(struct ifnet *ifp)
144 {
145 int s;
146
147 bpf_detach(ifp);
148
149 s = splnet();
150 if_detach(ifp);
151 splx(s);
152
153 free(ifp->if_softc, M_DEVBUF);
154 return 0;
155 }
156
157 static void
158 mpls_input(struct ifnet *ifp, struct mbuf *m)
159 {
160 #if 0
161 /*
162 * TODO - kefren
163 * I'd love to unshim the packet, guess family
164 * and pass it to bpf
165 */
166 bpf_mtap_af(ifp, AF_MPLS, m);
167 #endif
168
169 mpls_lse(m);
170 }
171
172 void
173 mplsintr(void)
174 {
175 struct mbuf *m;
176 int s;
177
178 while (!IF_IS_EMPTY(&mplsintrq)) {
179 s = splnet();
180 IF_DEQUEUE(&mplsintrq, m);
181 splx(s);
182
183 if (!m)
184 return;
185
186 if (((m->m_flags & M_PKTHDR) == 0) ||
187 (m->m_pkthdr.rcvif == 0))
188 panic("mplsintr(): no pkthdr or rcvif");
189
190 #ifdef MBUFTRACE
191 m_claimm(m, &mpls_owner);
192 #endif
193 mpls_input(m->m_pkthdr.rcvif, m);
194 }
195 }
196
197 /*
198 * prepend shim and deliver
199 */
200 static int
201 mpls_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct rtentry *rt)
202 {
203 union mpls_shim mh, *pms;
204 struct rtentry *rt1;
205 int err;
206 uint psize = sizeof(struct sockaddr_mpls);
207
208 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
209 m_freem(m);
210 return ENETDOWN;
211 }
212
213 if (rt_gettag(rt) == NULL || rt_gettag(rt)->sa_family != AF_MPLS) {
214 m_freem(m);
215 return EINVAL;
216 }
217
218 bpf_mtap_af(ifp, dst->sa_family, m);
219
220 memset(&mh, 0, sizeof(mh));
221 mh.s_addr = MPLS_GETSADDR(rt);
222 mh.shim.bos = 1;
223 mh.shim.exp = 0;
224 mh.shim.ttl = mpls_defttl;
225
226 pms = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr;
227
228 while (psize <= rt_gettag(rt)->sa_len - sizeof(mh)) {
229 pms++;
230 if (mh.shim.label != MPLS_LABEL_IMPLNULL &&
231 ((m = mpls_prepend_shim(m, &mh)) == NULL))
232 return ENOBUFS;
233 memset(&mh, 0, sizeof(mh));
234 mh.s_addr = ntohl(pms->s_addr);
235 mh.shim.bos = mh.shim.exp = 0;
236 mh.shim.ttl = mpls_defttl;
237 psize += sizeof(mh);
238 }
239
240 switch(dst->sa_family) {
241 #ifdef INET
242 case AF_INET:
243 m = mpls_label_inet(m, &mh, psize - sizeof(struct sockaddr_mpls));
244 break;
245 #endif
246 #ifdef INET6
247 case AF_INET6:
248 m = mpls_label_inet6(m, &mh, psize - sizeof(struct sockaddr_mpls));
249 break;
250 #endif
251 default:
252 m = mpls_prepend_shim(m, &mh);
253 break;
254 }
255
256 if (m == NULL) {
257 IF_DROP(&ifp->if_snd);
258 ifp->if_oerrors++;
259 return ENOBUFS;
260 }
261
262 ifp->if_opackets++;
263 ifp->if_obytes += m->m_pkthdr.len;
264
265 if ((rt1=rtalloc1(rt->rt_gateway, 1)) == NULL) {
266 m_freem(m);
267 return EHOSTUNREACH;
268 }
269
270 err = mpls_send_frame(m, rt1->rt_ifp, rt);
271 RTFREE(rt1);
272 return err;
273 }
274
275 static int
276 mpls_ioctl(struct ifnet *ifp, u_long cmd, void *data)
277 {
278 int error = 0, s = splnet();
279 struct ifreq *ifr = data;
280
281 switch(cmd) {
282 case SIOCINITIFADDR:
283 ifp->if_flags |= IFF_UP | IFF_RUNNING;
284 break;
285 case SIOCSIFMTU:
286 if (ifr != NULL && ifr->ifr_mtu < 576) {
287 error = EINVAL;
288 break;
289 }
290 /* FALLTHROUGH */
291 case SIOCGIFMTU:
292 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
293 error = 0;
294 break;
295 case SIOCSIFFLAGS:
296 if ((error = ifioctl_common(ifp, cmd, data)) != 0)
297 break;
298 if (ifp->if_flags & IFF_UP)
299 ifp->if_flags |= IFF_RUNNING;
300 break;
301 default:
302 error = ifioctl_common(ifp, cmd, data);
303 break;
304 }
305 splx(s);
306 return error;
307 }
308
309 /*
310 * MPLS Label Switch Engine
311 */
312 static int
313 mpls_lse(struct mbuf *m)
314 {
315 struct sockaddr_mpls dst;
316 union mpls_shim tshim, *htag;
317 struct rtentry *rt = NULL;
318 int error = ENOBUFS;
319 uint psize = sizeof(struct sockaddr_mpls);
320 bool push_back_alert = false;
321
322 if (m->m_len < sizeof(union mpls_shim) &&
323 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL)
324 goto done;
325
326 dst.smpls_len = sizeof(struct sockaddr_mpls);
327 dst.smpls_family = AF_MPLS;
328 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
329
330 /* Check if we're accepting MPLS Frames */
331 error = EINVAL;
332 if (!mpls_accept)
333 goto done;
334
335 /* TTL decrement */
336 if ((m = mpls_ttl_dec(m)) == NULL)
337 goto done;
338
339 /* RFC 4182 */
340 if (mpls_rfc4182 != 0)
341 while((dst.smpls_addr.shim.label == MPLS_LABEL_IPV4NULL ||
342 dst.smpls_addr.shim.label == MPLS_LABEL_IPV6NULL) &&
343 __predict_false(dst.smpls_addr.shim.bos == 0))
344 TRIM_LABEL;
345
346 /* RFC 3032 Section 2.1 Page 4 */
347 if (__predict_false(dst.smpls_addr.shim.label == MPLS_LABEL_RTALERT) &&
348 dst.smpls_addr.shim.bos == 0) {
349 TRIM_LABEL;
350 push_back_alert = true;
351 }
352
353 if (dst.smpls_addr.shim.label <= MPLS_LABEL_RESMAX) {
354 /* Don't swap reserved labels */
355 switch (dst.smpls_addr.shim.label) {
356 #ifdef INET
357 case MPLS_LABEL_IPV4NULL:
358 /* Pop shim and push mbuf to IP stack */
359 if (dst.smpls_addr.shim.bos)
360 error = mpls_unlabel_inet(m);
361 break;
362 #endif
363 #ifdef INET6
364 case MPLS_LABEL_IPV6NULL:
365 /* Pop shim and push mbuf to IPv6 stack */
366 if (dst.smpls_addr.shim.bos)
367 error = mpls_unlabel_inet6(m);
368 break;
369 #endif
370 case MPLS_LABEL_RTALERT: /* Yeah, I'm all alerted */
371 case MPLS_LABEL_IMPLNULL: /* This is logical only */
372 default: /* Rest are not allowed */
373 break;
374 }
375 goto done;
376 }
377
378 /* Check if we should do MPLS forwarding */
379 error = EHOSTUNREACH;
380 if (!mpls_forwarding)
381 goto done;
382
383 /* Get a route to dst */
384 dst.smpls_addr.shim.ttl =
385 dst.smpls_addr.shim.bos =
386 dst.smpls_addr.shim.exp = 0;
387 dst.smpls_addr.s_addr = htonl(dst.smpls_addr.s_addr);
388 if ((rt = rtalloc1((const struct sockaddr*)&dst, 1)) == NULL)
389 goto done;
390
391 /* MPLS packet with no MPLS tagged route ? */
392 if ((rt->rt_flags & RTF_GATEWAY) == 0 ||
393 rt_gettag(rt) == NULL ||
394 rt_gettag(rt)->sa_family != AF_MPLS)
395 goto done;
396
397 tshim.s_addr = MPLS_GETSADDR(rt);
398
399 /* Swap labels */
400 if ((m->m_len < sizeof(union mpls_shim)) &&
401 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) {
402 error = ENOBUFS;
403 goto done;
404 }
405
406 /* Replace only the label */
407 htag = mtod(m, union mpls_shim *);
408 htag->s_addr = ntohl(htag->s_addr);
409 htag->shim.label = tshim.shim.label;
410 htag->s_addr = htonl(htag->s_addr);
411
412 /* check if there is anything more to prepend */
413 htag = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr;
414 while (psize <= rt_gettag(rt)->sa_len - sizeof(tshim)) {
415 htag++;
416 memset(&tshim, 0, sizeof(tshim));
417 tshim.s_addr = ntohl(htag->s_addr);
418 tshim.shim.bos = tshim.shim.exp = 0;
419 tshim.shim.ttl = mpls_defttl;
420 if (tshim.shim.label != MPLS_LABEL_IMPLNULL &&
421 ((m = mpls_prepend_shim(m, &tshim)) == NULL))
422 return ENOBUFS;
423 psize += sizeof(tshim);
424 }
425
426 if (__predict_false(push_back_alert == true)) {
427 /* re-add the router alert label */
428 memset(&tshim, 0, sizeof(tshim));
429 tshim.s_addr = MPLS_LABEL_RTALERT;
430 tshim.shim.bos = tshim.shim.exp = 0;
431 tshim.shim.ttl = mpls_defttl;
432 if ((m = mpls_prepend_shim(m, &tshim)) == NULL)
433 return ENOBUFS;
434 }
435
436 error = mpls_send_frame(m, rt->rt_ifp, rt);
437
438 done:
439 if (error != 0 && m != NULL)
440 m_freem(m);
441 if (rt != NULL)
442 RTFREE(rt);
443
444 return error;
445 }
446
447 static int
448 mpls_send_frame(struct mbuf *m, struct ifnet *ifp, struct rtentry *rt)
449 {
450 union mpls_shim msh;
451
452 if ((rt->rt_flags & RTF_GATEWAY) == 0)
453 return EHOSTUNREACH;
454
455 rt->rt_use++;
456
457 msh.s_addr = MPLS_GETSADDR(rt);
458 if (msh.shim.label == MPLS_LABEL_IMPLNULL ||
459 (m->m_flags & (M_MCAST | M_BCAST))) {
460 m_adj(m, sizeof(union mpls_shim));
461 m->m_pkthdr.csum_flags = 0;
462 }
463
464 switch(ifp->if_type) {
465 /* only these are supported for now */
466 case IFT_ETHER:
467 case IFT_TUNNEL:
468 case IFT_LOOP:
469 return (*ifp->if_output)(ifp, m, rt->rt_gateway, rt);
470 break;
471 default:
472 return ENETUNREACH;
473 }
474 return 0;
475 }
476
477
478
479 #ifdef INET
480 static int
481 mpls_unlabel_inet(struct mbuf *m)
482 {
483 int s, iphlen;
484 struct ip *iph;
485 union mpls_shim *ms;
486 struct ifqueue *inq;
487
488 if (mpls_mapttl_inet || mpls_mapprec_inet) {
489
490 /* get shim info */
491 ms = mtod(m, union mpls_shim *);
492 ms->s_addr = ntohl(ms->s_addr);
493
494 /* and get rid of it */
495 m_adj(m, sizeof(union mpls_shim));
496
497 /* get ip header */
498 if (m->m_len < sizeof (struct ip) &&
499 (m = m_pullup(m, sizeof(struct ip))) == NULL)
500 return ENOBUFS;
501 iph = mtod(m, struct ip *);
502 iphlen = iph->ip_hl << 2;
503
504 /* get it all */
505 if (m->m_len < iphlen) {
506 if ((m = m_pullup(m, iphlen)) == NULL)
507 return ENOBUFS;
508 iph = mtod(m, struct ip *);
509 }
510
511 /* check ipsum */
512 if (in_cksum(m, iphlen) != 0) {
513 m_freem(m);
514 return EINVAL;
515 }
516
517 /* set IP ttl from MPLS ttl */
518 if (mpls_mapttl_inet)
519 iph->ip_ttl = ms->shim.ttl;
520
521 /* set IP Precedence from MPLS Exp */
522 if (mpls_mapprec_inet) {
523 iph->ip_tos = (iph->ip_tos << 3) >> 3;
524 iph->ip_tos |= ms->shim.exp << 5;
525 }
526
527 /* reset ipsum because we modified TTL and TOS */
528 iph->ip_sum = 0;
529 iph->ip_sum = in_cksum(m, iphlen);
530 } else
531 m_adj(m, sizeof(union mpls_shim));
532
533 /* Put it on IP queue */
534 inq = &ipintrq;
535 s = splnet();
536 if (IF_QFULL(inq)) {
537 IF_DROP(inq);
538 splx(s);
539 m_freem(m);
540 return ENOBUFS;
541 }
542 IF_ENQUEUE(inq, m);
543 schednetisr(NETISR_IP);
544 splx(s);
545
546 return 0;
547 }
548
549 /*
550 * Prepend MPLS label
551 */
552 static struct mbuf *
553 mpls_label_inet(struct mbuf *m, union mpls_shim *ms, uint offset)
554 {
555 struct ip iphdr;
556
557 if (mpls_mapttl_inet || mpls_mapprec_inet) {
558 if ((m->m_len < sizeof(struct ip)) &&
559 (m = m_pullup(m, offset + sizeof(struct ip))) == 0)
560 return NULL; /* XXX */
561 m_copydata(m, offset, sizeof(struct ip), &iphdr);
562
563 /* Map TTL */
564 if (mpls_mapttl_inet)
565 ms->shim.ttl = iphdr.ip_ttl;
566
567 /* Copy IP precedence to EXP */
568 if (mpls_mapprec_inet)
569 ms->shim.exp = ((u_int8_t)iphdr.ip_tos) >> 5;
570 }
571
572 if ((m = mpls_prepend_shim(m, ms)) == NULL)
573 return NULL;
574
575 return m;
576 }
577
578 #endif /* INET */
579
580 #ifdef INET6
581
582 static int
583 mpls_unlabel_inet6(struct mbuf *m)
584 {
585 struct ip6_hdr *ip6hdr;
586 union mpls_shim ms;
587 struct ifqueue *inq;
588 int s;
589
590 /* TODO: mapclass */
591 if (mpls_mapttl_inet6) {
592 ms.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
593 m_adj(m, sizeof(union mpls_shim));
594
595 if (m->m_len < sizeof (struct ip6_hdr) &&
596 (m = m_pullup(m, sizeof(struct ip6_hdr))) == 0)
597 return ENOBUFS;
598 ip6hdr = mtod(m, struct ip6_hdr *);
599
600 /* Because we just decremented this in mpls_lse */
601 ip6hdr->ip6_hlim = ms.shim.ttl + 1;
602 } else
603 m_adj(m, sizeof(union mpls_shim));
604
605 /* Put it back on IPv6 stack */
606 schednetisr(NETISR_IPV6);
607 inq = &ip6intrq;
608 s = splnet();
609 if (IF_QFULL(inq)) {
610 IF_DROP(inq);
611 splx(s);
612 m_freem(m);
613 return ENOBUFS;
614 }
615
616 IF_ENQUEUE(inq, m);
617 splx(s);
618
619 return 0;
620 }
621
622 static struct mbuf *
623 mpls_label_inet6(struct mbuf *m, union mpls_shim *ms, uint offset)
624 {
625 struct ip6_hdr ip6h;
626
627 if (mpls_mapttl_inet6 || mpls_mapclass_inet6) {
628 if (m->m_len < sizeof(struct ip6_hdr) &&
629 (m = m_pullup(m, offset + sizeof(struct ip6_hdr))) == 0)
630 return NULL;
631 m_copydata(m, offset, sizeof(struct ip6_hdr), &ip6h);
632
633 if (mpls_mapttl_inet6)
634 ms->shim.ttl = ip6h.ip6_hlim;
635
636 if (mpls_mapclass_inet6)
637 ms->shim.exp = ip6h.ip6_vfc << 1 >> 5;
638 }
639
640 if ((m = mpls_prepend_shim(m, ms)) == NULL)
641 return NULL;
642
643 return m;
644 }
645
646 #endif /* INET6 */
647
648 static struct mbuf *
649 mpls_prepend_shim(struct mbuf *m, union mpls_shim *ms)
650 {
651 union mpls_shim *shim;
652
653 M_PREPEND(m, sizeof(*ms), M_DONTWAIT);
654 if (m == NULL)
655 return NULL;
656
657 if (m->m_len < sizeof(union mpls_shim) &&
658 (m = m_pullup(m, sizeof(union mpls_shim))) == 0)
659 return NULL;
660
661 shim = mtod(m, union mpls_shim *);
662
663 memcpy(shim, ms, sizeof(*shim));
664 shim->s_addr = htonl(shim->s_addr);
665
666 return m;
667 }
668