if_mpls.c revision 1.26 1 /* $NetBSD: if_mpls.c,v 1.26 2016/07/07 06:55:43 msaitoh Exp $ */
2
3 /*
4 * Copyright (c) 2010 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Mihai Chelaru <kefren (at) NetBSD.org>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.26 2016/07/07 06:55:43 msaitoh Exp $");
34
35 #ifdef _KERNEL_OPT
36 #include "opt_inet.h"
37 #include "opt_mpls.h"
38 #endif
39
40 #include <sys/param.h>
41
42 #include <sys/errno.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/sysctl.h>
46
47 #include <net/bpf.h>
48 #include <net/if.h>
49 #include <net/if_types.h>
50 #include <net/netisr.h>
51 #include <net/route.h>
52
53 #ifdef INET
54 #include <netinet/in.h>
55 #include <netinet/in_systm.h>
56 #include <netinet/in_var.h>
57 #include <netinet/ip.h>
58 #include <netinet/ip_var.h>
59 #endif
60
61 #ifdef INET6
62 #include <netinet/ip6.h>
63 #include <netinet6/in6_var.h>
64 #include <netinet6/ip6_var.h>
65 #endif
66
67 #include <netmpls/mpls.h>
68 #include <netmpls/mpls_var.h>
69
70 #include "if_mpls.h"
71
72 #include "ioconf.h"
73
74 #define TRIM_LABEL do { \
75 m_adj(m, sizeof(union mpls_shim)); \
76 if (m->m_len < sizeof(union mpls_shim) && \
77 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL) \
78 goto done; \
79 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); \
80 } while (/* CONSTCOND */ 0)
81
82
83 static int mpls_clone_create(struct if_clone *, int);
84 static int mpls_clone_destroy(struct ifnet *);
85
86 static struct if_clone mpls_if_cloner =
87 IF_CLONE_INITIALIZER("mpls", mpls_clone_create, mpls_clone_destroy);
88
89
90 static void mpls_input(struct ifnet *, struct mbuf *);
91 static int mpls_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
92 const struct rtentry *);
93 static int mpls_ioctl(struct ifnet *, u_long, void *);
94 static int mpls_send_frame(struct mbuf *, struct ifnet *,
95 const struct rtentry *);
96 static int mpls_lse(struct mbuf *);
97
98 #ifdef INET
99 static int mpls_unlabel_inet(struct mbuf *);
100 static struct mbuf *mpls_label_inet(struct mbuf *, union mpls_shim *, uint);
101 #endif
102
103 #ifdef INET6
104 static int mpls_unlabel_inet6(struct mbuf *);
105 static struct mbuf *mpls_label_inet6(struct mbuf *, union mpls_shim *, uint);
106 #endif
107
108 static struct mbuf *mpls_prepend_shim(struct mbuf *, union mpls_shim *);
109
110 extern int mpls_defttl, mpls_mapttl_inet, mpls_mapttl_inet6, mpls_icmp_respond,
111 mpls_forwarding, mpls_frame_accept, mpls_mapprec_inet, mpls_mapclass_inet6,
112 mpls_rfc4182;
113
114 /* ARGSUSED */
115 void
116 ifmplsattach(int count)
117 {
118 if_clone_attach(&mpls_if_cloner);
119 }
120
121 static int
122 mpls_clone_create(struct if_clone *ifc, int unit)
123 {
124 struct mpls_softc *sc;
125
126 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
127
128 if_initname(&sc->sc_if, ifc->ifc_name, unit);
129 sc->sc_if.if_softc = sc;
130 sc->sc_if.if_type = IFT_MPLS;
131 sc->sc_if.if_addrlen = 0;
132 sc->sc_if.if_hdrlen = sizeof(union mpls_shim);
133 sc->sc_if.if_dlt = DLT_NULL;
134 sc->sc_if.if_mtu = 1500;
135 sc->sc_if.if_flags = 0;
136 sc->sc_if._if_input = mpls_input;
137 sc->sc_if.if_output = mpls_output;
138 sc->sc_if.if_ioctl = mpls_ioctl;
139
140 if_attach(&sc->sc_if);
141 if_alloc_sadl(&sc->sc_if);
142 bpf_attach(&sc->sc_if, DLT_NULL, sizeof(uint32_t));
143 return 0;
144 }
145
146 static int
147 mpls_clone_destroy(struct ifnet *ifp)
148 {
149 int s;
150
151 bpf_detach(ifp);
152
153 s = splnet();
154 if_detach(ifp);
155 splx(s);
156
157 free(ifp->if_softc, M_DEVBUF);
158 return 0;
159 }
160
161 static void
162 mpls_input(struct ifnet *ifp, struct mbuf *m)
163 {
164 #if 0
165 /*
166 * TODO - kefren
167 * I'd love to unshim the packet, guess family
168 * and pass it to bpf
169 */
170 bpf_mtap_af(ifp, AF_MPLS, m);
171 #endif
172
173 mpls_lse(m);
174 }
175
176 void
177 mplsintr(void)
178 {
179 struct mbuf *m;
180 int s;
181
182 while (!IF_IS_EMPTY(&mplsintrq)) {
183 s = splnet();
184 IF_DEQUEUE(&mplsintrq, m);
185 splx(s);
186
187 if (!m)
188 return;
189
190 if (((m->m_flags & M_PKTHDR) == 0) ||
191 (m->m_pkthdr.rcvif_index == 0))
192 panic("mplsintr(): no pkthdr or rcvif");
193
194 #ifdef MBUFTRACE
195 m_claimm(m, &mpls_owner);
196 #endif
197 mpls_input(m_get_rcvif_NOMPSAFE(m), m);
198 }
199 }
200
201 /*
202 * prepend shim and deliver
203 */
204 static int
205 mpls_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
206 const struct rtentry *rt)
207 {
208 union mpls_shim mh, *pms;
209 struct rtentry *rt1;
210 int err;
211 uint psize = sizeof(struct sockaddr_mpls);
212
213 KASSERT(KERNEL_LOCKED_P());
214
215 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
216 m_freem(m);
217 return ENETDOWN;
218 }
219
220 if (rt_gettag(rt) == NULL || rt_gettag(rt)->sa_family != AF_MPLS) {
221 m_freem(m);
222 return EINVAL;
223 }
224
225 bpf_mtap_af(ifp, dst->sa_family, m);
226
227 memset(&mh, 0, sizeof(mh));
228 mh.s_addr = MPLS_GETSADDR(rt);
229 mh.shim.bos = 1;
230 mh.shim.exp = 0;
231 mh.shim.ttl = mpls_defttl;
232
233 pms = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr;
234
235 while (psize <= rt_gettag(rt)->sa_len - sizeof(mh)) {
236 pms++;
237 if (mh.shim.label != MPLS_LABEL_IMPLNULL &&
238 ((m = mpls_prepend_shim(m, &mh)) == NULL))
239 return ENOBUFS;
240 memset(&mh, 0, sizeof(mh));
241 mh.s_addr = ntohl(pms->s_addr);
242 mh.shim.bos = mh.shim.exp = 0;
243 mh.shim.ttl = mpls_defttl;
244 psize += sizeof(mh);
245 }
246
247 switch(dst->sa_family) {
248 #ifdef INET
249 case AF_INET:
250 m = mpls_label_inet(m, &mh, psize - sizeof(struct sockaddr_mpls));
251 break;
252 #endif
253 #ifdef INET6
254 case AF_INET6:
255 m = mpls_label_inet6(m, &mh, psize - sizeof(struct sockaddr_mpls));
256 break;
257 #endif
258 default:
259 m = mpls_prepend_shim(m, &mh);
260 break;
261 }
262
263 if (m == NULL) {
264 IF_DROP(&ifp->if_snd);
265 ifp->if_oerrors++;
266 return ENOBUFS;
267 }
268
269 ifp->if_opackets++;
270 ifp->if_obytes += m->m_pkthdr.len;
271
272 if ((rt1=rtalloc1(rt->rt_gateway, 1)) == NULL) {
273 m_freem(m);
274 return EHOSTUNREACH;
275 }
276
277 err = mpls_send_frame(m, rt1->rt_ifp, rt);
278 rtfree(rt1);
279 return err;
280 }
281
282 static int
283 mpls_ioctl(struct ifnet *ifp, u_long cmd, void *data)
284 {
285 int error = 0, s = splnet();
286 struct ifreq *ifr = data;
287
288 switch(cmd) {
289 case SIOCINITIFADDR:
290 ifp->if_flags |= IFF_UP | IFF_RUNNING;
291 break;
292 case SIOCSIFMTU:
293 if (ifr != NULL && ifr->ifr_mtu < 576) {
294 error = EINVAL;
295 break;
296 }
297 /* FALLTHROUGH */
298 case SIOCGIFMTU:
299 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
300 error = 0;
301 break;
302 case SIOCSIFFLAGS:
303 if ((error = ifioctl_common(ifp, cmd, data)) != 0)
304 break;
305 if (ifp->if_flags & IFF_UP)
306 ifp->if_flags |= IFF_RUNNING;
307 break;
308 default:
309 error = ifioctl_common(ifp, cmd, data);
310 break;
311 }
312 splx(s);
313 return error;
314 }
315
316 /*
317 * MPLS Label Switch Engine
318 */
319 static int
320 mpls_lse(struct mbuf *m)
321 {
322 struct sockaddr_mpls dst;
323 union mpls_shim tshim, *htag;
324 struct rtentry *rt = NULL;
325 int error = ENOBUFS;
326 uint psize = sizeof(struct sockaddr_mpls);
327 bool push_back_alert = false;
328
329 if (m->m_len < sizeof(union mpls_shim) &&
330 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL)
331 goto done;
332
333 dst.smpls_len = sizeof(struct sockaddr_mpls);
334 dst.smpls_family = AF_MPLS;
335 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
336
337 /* Check if we're accepting MPLS Frames */
338 error = EINVAL;
339 if (!mpls_frame_accept)
340 goto done;
341
342 /* TTL decrement */
343 if ((m = mpls_ttl_dec(m)) == NULL)
344 goto done;
345
346 /* RFC 4182 */
347 if (mpls_rfc4182 != 0)
348 while((dst.smpls_addr.shim.label == MPLS_LABEL_IPV4NULL ||
349 dst.smpls_addr.shim.label == MPLS_LABEL_IPV6NULL) &&
350 __predict_false(dst.smpls_addr.shim.bos == 0))
351 TRIM_LABEL;
352
353 /* RFC 3032 Section 2.1 Page 4 */
354 if (__predict_false(dst.smpls_addr.shim.label == MPLS_LABEL_RTALERT) &&
355 dst.smpls_addr.shim.bos == 0) {
356 TRIM_LABEL;
357 push_back_alert = true;
358 }
359
360 if (dst.smpls_addr.shim.label <= MPLS_LABEL_RESMAX) {
361 /* Don't swap reserved labels */
362 switch (dst.smpls_addr.shim.label) {
363 #ifdef INET
364 case MPLS_LABEL_IPV4NULL:
365 /* Pop shim and push mbuf to IP stack */
366 if (dst.smpls_addr.shim.bos)
367 error = mpls_unlabel_inet(m);
368 break;
369 #endif
370 #ifdef INET6
371 case MPLS_LABEL_IPV6NULL:
372 /* Pop shim and push mbuf to IPv6 stack */
373 if (dst.smpls_addr.shim.bos)
374 error = mpls_unlabel_inet6(m);
375 break;
376 #endif
377 case MPLS_LABEL_RTALERT: /* Yeah, I'm all alerted */
378 case MPLS_LABEL_IMPLNULL: /* This is logical only */
379 default: /* Rest are not allowed */
380 break;
381 }
382 goto done;
383 }
384
385 /* Check if we should do MPLS forwarding */
386 error = EHOSTUNREACH;
387 if (!mpls_forwarding)
388 goto done;
389
390 /* Get a route to dst */
391 dst.smpls_addr.shim.ttl =
392 dst.smpls_addr.shim.bos =
393 dst.smpls_addr.shim.exp = 0;
394 dst.smpls_addr.s_addr = htonl(dst.smpls_addr.s_addr);
395 if ((rt = rtalloc1((const struct sockaddr*)&dst, 1)) == NULL)
396 goto done;
397
398 /* MPLS packet with no MPLS tagged route ? */
399 if ((rt->rt_flags & RTF_GATEWAY) == 0 ||
400 rt_gettag(rt) == NULL ||
401 rt_gettag(rt)->sa_family != AF_MPLS)
402 goto done;
403
404 tshim.s_addr = MPLS_GETSADDR(rt);
405
406 /* Swap labels */
407 if ((m->m_len < sizeof(union mpls_shim)) &&
408 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) {
409 error = ENOBUFS;
410 goto done;
411 }
412
413 /* Replace only the label */
414 htag = mtod(m, union mpls_shim *);
415 htag->s_addr = ntohl(htag->s_addr);
416 htag->shim.label = tshim.shim.label;
417 htag->s_addr = htonl(htag->s_addr);
418
419 /* check if there is anything more to prepend */
420 htag = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr;
421 while (psize <= rt_gettag(rt)->sa_len - sizeof(tshim)) {
422 htag++;
423 memset(&tshim, 0, sizeof(tshim));
424 tshim.s_addr = ntohl(htag->s_addr);
425 tshim.shim.bos = tshim.shim.exp = 0;
426 tshim.shim.ttl = mpls_defttl;
427 if (tshim.shim.label != MPLS_LABEL_IMPLNULL &&
428 ((m = mpls_prepend_shim(m, &tshim)) == NULL))
429 return ENOBUFS;
430 psize += sizeof(tshim);
431 }
432
433 if (__predict_false(push_back_alert == true)) {
434 /* re-add the router alert label */
435 memset(&tshim, 0, sizeof(tshim));
436 tshim.s_addr = MPLS_LABEL_RTALERT;
437 tshim.shim.bos = tshim.shim.exp = 0;
438 tshim.shim.ttl = mpls_defttl;
439 if ((m = mpls_prepend_shim(m, &tshim)) == NULL)
440 return ENOBUFS;
441 }
442
443 if ((rt->rt_flags & RTF_GATEWAY) == 0) {
444 error = EHOSTUNREACH;
445 goto done;
446 }
447
448 rt->rt_use++;
449 error = mpls_send_frame(m, rt->rt_ifp, rt);
450
451 done:
452 if (error != 0 && m != NULL)
453 m_freem(m);
454 if (rt != NULL)
455 rtfree(rt);
456
457 return error;
458 }
459
460 static int
461 mpls_send_frame(struct mbuf *m, struct ifnet *ifp, const struct rtentry *rt)
462 {
463 union mpls_shim msh;
464 int ret;
465
466 msh.s_addr = MPLS_GETSADDR(rt);
467 if (msh.shim.label == MPLS_LABEL_IMPLNULL ||
468 (m->m_flags & (M_MCAST | M_BCAST))) {
469 m_adj(m, sizeof(union mpls_shim));
470 m->m_pkthdr.csum_flags = 0;
471 }
472
473 switch(ifp->if_type) {
474 /* only these are supported for now */
475 case IFT_ETHER:
476 case IFT_TUNNEL:
477 case IFT_LOOP:
478 #ifdef INET
479 ret = ip_if_output(ifp, m, rt->rt_gateway, rt);
480 #else
481 ret = if_output_lock(ifp, ifp, m, rt->rt_gateway, rt);
482 #endif
483 return ret;
484 break;
485 default:
486 return ENETUNREACH;
487 }
488 return 0;
489 }
490
491
492
493 #ifdef INET
494 static int
495 mpls_unlabel_inet(struct mbuf *m)
496 {
497 struct ip *iph;
498 union mpls_shim *ms;
499 int iphlen;
500
501 if (mpls_mapttl_inet || mpls_mapprec_inet) {
502
503 /* get shim info */
504 ms = mtod(m, union mpls_shim *);
505 ms->s_addr = ntohl(ms->s_addr);
506
507 /* and get rid of it */
508 m_adj(m, sizeof(union mpls_shim));
509
510 /* get ip header */
511 if (m->m_len < sizeof (struct ip) &&
512 (m = m_pullup(m, sizeof(struct ip))) == NULL)
513 return ENOBUFS;
514 iph = mtod(m, struct ip *);
515 iphlen = iph->ip_hl << 2;
516
517 /* get it all */
518 if (m->m_len < iphlen) {
519 if ((m = m_pullup(m, iphlen)) == NULL)
520 return ENOBUFS;
521 iph = mtod(m, struct ip *);
522 }
523
524 /* check ipsum */
525 if (in_cksum(m, iphlen) != 0) {
526 m_freem(m);
527 return EINVAL;
528 }
529
530 /* set IP ttl from MPLS ttl */
531 if (mpls_mapttl_inet)
532 iph->ip_ttl = ms->shim.ttl;
533
534 /* set IP Precedence from MPLS Exp */
535 if (mpls_mapprec_inet) {
536 iph->ip_tos = (iph->ip_tos << 3) >> 3;
537 iph->ip_tos |= ms->shim.exp << 5;
538 }
539
540 /* reset ipsum because we modified TTL and TOS */
541 iph->ip_sum = 0;
542 iph->ip_sum = in_cksum(m, iphlen);
543 } else
544 m_adj(m, sizeof(union mpls_shim));
545
546 /* Put it on IP queue */
547 if (__predict_false(!pktq_enqueue(ip_pktq, m, 0))) {
548 m_freem(m);
549 return ENOBUFS;
550 }
551 return 0;
552 }
553
554 /*
555 * Prepend MPLS label
556 */
557 static struct mbuf *
558 mpls_label_inet(struct mbuf *m, union mpls_shim *ms, uint offset)
559 {
560 struct ip iphdr;
561
562 if (mpls_mapttl_inet || mpls_mapprec_inet) {
563 if ((m->m_len < sizeof(struct ip)) &&
564 (m = m_pullup(m, offset + sizeof(struct ip))) == 0)
565 return NULL; /* XXX */
566 m_copydata(m, offset, sizeof(struct ip), &iphdr);
567
568 /* Map TTL */
569 if (mpls_mapttl_inet)
570 ms->shim.ttl = iphdr.ip_ttl;
571
572 /* Copy IP precedence to EXP */
573 if (mpls_mapprec_inet)
574 ms->shim.exp = ((u_int8_t)iphdr.ip_tos) >> 5;
575 }
576
577 if ((m = mpls_prepend_shim(m, ms)) == NULL)
578 return NULL;
579
580 return m;
581 }
582
583 #endif /* INET */
584
585 #ifdef INET6
586
587 static int
588 mpls_unlabel_inet6(struct mbuf *m)
589 {
590 struct ip6_hdr *ip6hdr;
591 union mpls_shim ms;
592
593 /* TODO: mapclass */
594 if (mpls_mapttl_inet6) {
595 ms.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
596 m_adj(m, sizeof(union mpls_shim));
597
598 if (m->m_len < sizeof (struct ip6_hdr) &&
599 (m = m_pullup(m, sizeof(struct ip6_hdr))) == 0)
600 return ENOBUFS;
601 ip6hdr = mtod(m, struct ip6_hdr *);
602
603 /* Because we just decremented this in mpls_lse */
604 ip6hdr->ip6_hlim = ms.shim.ttl + 1;
605 } else
606 m_adj(m, sizeof(union mpls_shim));
607
608 /* Put it back on IPv6 queue. */
609 if (__predict_false(!pktq_enqueue(ip6_pktq, m, 0))) {
610 m_freem(m);
611 return ENOBUFS;
612 }
613 return 0;
614 }
615
616 static struct mbuf *
617 mpls_label_inet6(struct mbuf *m, union mpls_shim *ms, uint offset)
618 {
619 struct ip6_hdr ip6h;
620
621 if (mpls_mapttl_inet6 || mpls_mapclass_inet6) {
622 if (m->m_len < sizeof(struct ip6_hdr) &&
623 (m = m_pullup(m, offset + sizeof(struct ip6_hdr))) == 0)
624 return NULL;
625 m_copydata(m, offset, sizeof(struct ip6_hdr), &ip6h);
626
627 if (mpls_mapttl_inet6)
628 ms->shim.ttl = ip6h.ip6_hlim;
629
630 if (mpls_mapclass_inet6)
631 ms->shim.exp = ip6h.ip6_vfc << 1 >> 5;
632 }
633
634 if ((m = mpls_prepend_shim(m, ms)) == NULL)
635 return NULL;
636
637 return m;
638 }
639
640 #endif /* INET6 */
641
642 static struct mbuf *
643 mpls_prepend_shim(struct mbuf *m, union mpls_shim *ms)
644 {
645 union mpls_shim *shim;
646
647 M_PREPEND(m, sizeof(*ms), M_DONTWAIT);
648 if (m == NULL)
649 return NULL;
650
651 if (m->m_len < sizeof(union mpls_shim) &&
652 (m = m_pullup(m, sizeof(union mpls_shim))) == 0)
653 return NULL;
654
655 shim = mtod(m, union mpls_shim *);
656
657 memcpy(shim, ms, sizeof(*shim));
658 shim->s_addr = htonl(shim->s_addr);
659
660 return m;
661 }
662