if_mpls.c revision 1.17 1 /* $NetBSD: if_mpls.c,v 1.17 2015/06/04 09:19:59 ozaki-r Exp $ */
2
3 /*
4 * Copyright (c) 2010 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Mihai Chelaru <kefren (at) NetBSD.org>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.17 2015/06/04 09:19:59 ozaki-r Exp $");
34
35 #include "opt_inet.h"
36 #include "opt_mpls.h"
37
38 #include <sys/param.h>
39
40 #include <sys/errno.h>
41 #include <sys/malloc.h>
42 #include <sys/mbuf.h>
43 #include <sys/sysctl.h>
44
45 #include <net/bpf.h>
46 #include <net/if.h>
47 #include <net/if_types.h>
48 #include <net/netisr.h>
49 #include <net/route.h>
50
51 #ifdef INET
52 #include <netinet/in.h>
53 #include <netinet/in_systm.h>
54 #include <netinet/in_var.h>
55 #include <netinet/ip.h>
56 #include <netinet/ip_var.h>
57 #endif
58
59 #ifdef INET6
60 #include <netinet/ip6.h>
61 #include <netinet6/in6_var.h>
62 #include <netinet6/ip6_var.h>
63 #endif
64
65 #include <netmpls/mpls.h>
66 #include <netmpls/mpls_var.h>
67
68 #include "if_mpls.h"
69
70 #define TRIM_LABEL do { \
71 m_adj(m, sizeof(union mpls_shim)); \
72 if (m->m_len < sizeof(union mpls_shim) && \
73 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL) \
74 goto done; \
75 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); \
76 } while (/* CONSTCOND */ 0)
77
78
79 void ifmplsattach(int);
80
81 static int mpls_clone_create(struct if_clone *, int);
82 static int mpls_clone_destroy(struct ifnet *);
83
84 static struct if_clone mpls_if_cloner =
85 IF_CLONE_INITIALIZER("mpls", mpls_clone_create, mpls_clone_destroy);
86
87
88 static void mpls_input(struct ifnet *, struct mbuf *);
89 static int mpls_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
90 struct rtentry *);
91 static int mpls_ioctl(struct ifnet *, u_long, void *);
92 static int mpls_send_frame(struct mbuf *, struct ifnet *, struct rtentry *);
93 static int mpls_lse(struct mbuf *);
94
95 #ifdef INET
96 static int mpls_unlabel_inet(struct mbuf *);
97 static struct mbuf *mpls_label_inet(struct mbuf *, union mpls_shim *, uint);
98 #endif
99
100 #ifdef INET6
101 static int mpls_unlabel_inet6(struct mbuf *);
102 static struct mbuf *mpls_label_inet6(struct mbuf *, union mpls_shim *, uint);
103 #endif
104
105 static struct mbuf *mpls_prepend_shim(struct mbuf *, union mpls_shim *);
106
107 extern int mpls_defttl, mpls_mapttl_inet, mpls_mapttl_inet6, mpls_icmp_respond,
108 mpls_forwarding, mpls_frame_accept, mpls_mapprec_inet, mpls_mapclass_inet6,
109 mpls_rfc4182;
110
111 /* ARGSUSED */
112 void
113 ifmplsattach(int count)
114 {
115 if_clone_attach(&mpls_if_cloner);
116 }
117
118 static int
119 mpls_clone_create(struct if_clone *ifc, int unit)
120 {
121 struct mpls_softc *sc;
122
123 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
124
125 if_initname(&sc->sc_if, ifc->ifc_name, unit);
126 sc->sc_if.if_softc = sc;
127 sc->sc_if.if_type = IFT_MPLS;
128 sc->sc_if.if_addrlen = 0;
129 sc->sc_if.if_hdrlen = sizeof(union mpls_shim);
130 sc->sc_if.if_dlt = DLT_NULL;
131 sc->sc_if.if_mtu = 1500;
132 sc->sc_if.if_flags = 0;
133 sc->sc_if.if_input = mpls_input;
134 sc->sc_if.if_output = mpls_output;
135 sc->sc_if.if_ioctl = mpls_ioctl;
136
137 if_attach(&sc->sc_if);
138 if_alloc_sadl(&sc->sc_if);
139 bpf_attach(&sc->sc_if, DLT_NULL, sizeof(uint32_t));
140 return 0;
141 }
142
143 static int
144 mpls_clone_destroy(struct ifnet *ifp)
145 {
146 int s;
147
148 bpf_detach(ifp);
149
150 s = splnet();
151 if_detach(ifp);
152 splx(s);
153
154 free(ifp->if_softc, M_DEVBUF);
155 return 0;
156 }
157
158 static void
159 mpls_input(struct ifnet *ifp, struct mbuf *m)
160 {
161 #if 0
162 /*
163 * TODO - kefren
164 * I'd love to unshim the packet, guess family
165 * and pass it to bpf
166 */
167 bpf_mtap_af(ifp, AF_MPLS, m);
168 #endif
169
170 mpls_lse(m);
171 }
172
173 void
174 mplsintr(void)
175 {
176 struct mbuf *m;
177 int s;
178
179 while (!IF_IS_EMPTY(&mplsintrq)) {
180 s = splnet();
181 IF_DEQUEUE(&mplsintrq, m);
182 splx(s);
183
184 if (!m)
185 return;
186
187 if (((m->m_flags & M_PKTHDR) == 0) ||
188 (m->m_pkthdr.rcvif == 0))
189 panic("mplsintr(): no pkthdr or rcvif");
190
191 #ifdef MBUFTRACE
192 m_claimm(m, &mpls_owner);
193 #endif
194 mpls_input(m->m_pkthdr.rcvif, m);
195 }
196 }
197
198 /*
199 * prepend shim and deliver
200 */
201 static int
202 mpls_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct rtentry *rt)
203 {
204 union mpls_shim mh, *pms;
205 struct rtentry *rt1;
206 int err;
207 uint psize = sizeof(struct sockaddr_mpls);
208
209 KASSERT(KERNEL_LOCKED_P());
210
211 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
212 m_freem(m);
213 return ENETDOWN;
214 }
215
216 if (rt_gettag(rt) == NULL || rt_gettag(rt)->sa_family != AF_MPLS) {
217 m_freem(m);
218 return EINVAL;
219 }
220
221 bpf_mtap_af(ifp, dst->sa_family, m);
222
223 memset(&mh, 0, sizeof(mh));
224 mh.s_addr = MPLS_GETSADDR(rt);
225 mh.shim.bos = 1;
226 mh.shim.exp = 0;
227 mh.shim.ttl = mpls_defttl;
228
229 pms = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr;
230
231 while (psize <= rt_gettag(rt)->sa_len - sizeof(mh)) {
232 pms++;
233 if (mh.shim.label != MPLS_LABEL_IMPLNULL &&
234 ((m = mpls_prepend_shim(m, &mh)) == NULL))
235 return ENOBUFS;
236 memset(&mh, 0, sizeof(mh));
237 mh.s_addr = ntohl(pms->s_addr);
238 mh.shim.bos = mh.shim.exp = 0;
239 mh.shim.ttl = mpls_defttl;
240 psize += sizeof(mh);
241 }
242
243 switch(dst->sa_family) {
244 #ifdef INET
245 case AF_INET:
246 m = mpls_label_inet(m, &mh, psize - sizeof(struct sockaddr_mpls));
247 break;
248 #endif
249 #ifdef INET6
250 case AF_INET6:
251 m = mpls_label_inet6(m, &mh, psize - sizeof(struct sockaddr_mpls));
252 break;
253 #endif
254 default:
255 m = mpls_prepend_shim(m, &mh);
256 break;
257 }
258
259 if (m == NULL) {
260 IF_DROP(&ifp->if_snd);
261 ifp->if_oerrors++;
262 return ENOBUFS;
263 }
264
265 ifp->if_opackets++;
266 ifp->if_obytes += m->m_pkthdr.len;
267
268 if ((rt1=rtalloc1(rt->rt_gateway, 1)) == NULL) {
269 m_freem(m);
270 return EHOSTUNREACH;
271 }
272
273 err = mpls_send_frame(m, rt1->rt_ifp, rt);
274 rtfree(rt1);
275 return err;
276 }
277
278 static int
279 mpls_ioctl(struct ifnet *ifp, u_long cmd, void *data)
280 {
281 int error = 0, s = splnet();
282 struct ifreq *ifr = data;
283
284 switch(cmd) {
285 case SIOCINITIFADDR:
286 ifp->if_flags |= IFF_UP | IFF_RUNNING;
287 break;
288 case SIOCSIFMTU:
289 if (ifr != NULL && ifr->ifr_mtu < 576) {
290 error = EINVAL;
291 break;
292 }
293 /* FALLTHROUGH */
294 case SIOCGIFMTU:
295 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
296 error = 0;
297 break;
298 case SIOCSIFFLAGS:
299 if ((error = ifioctl_common(ifp, cmd, data)) != 0)
300 break;
301 if (ifp->if_flags & IFF_UP)
302 ifp->if_flags |= IFF_RUNNING;
303 break;
304 default:
305 error = ifioctl_common(ifp, cmd, data);
306 break;
307 }
308 splx(s);
309 return error;
310 }
311
312 /*
313 * MPLS Label Switch Engine
314 */
315 static int
316 mpls_lse(struct mbuf *m)
317 {
318 struct sockaddr_mpls dst;
319 union mpls_shim tshim, *htag;
320 struct rtentry *rt = NULL;
321 int error = ENOBUFS;
322 uint psize = sizeof(struct sockaddr_mpls);
323 bool push_back_alert = false;
324
325 if (m->m_len < sizeof(union mpls_shim) &&
326 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL)
327 goto done;
328
329 dst.smpls_len = sizeof(struct sockaddr_mpls);
330 dst.smpls_family = AF_MPLS;
331 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
332
333 /* Check if we're accepting MPLS Frames */
334 error = EINVAL;
335 if (!mpls_frame_accept)
336 goto done;
337
338 /* TTL decrement */
339 if ((m = mpls_ttl_dec(m)) == NULL)
340 goto done;
341
342 /* RFC 4182 */
343 if (mpls_rfc4182 != 0)
344 while((dst.smpls_addr.shim.label == MPLS_LABEL_IPV4NULL ||
345 dst.smpls_addr.shim.label == MPLS_LABEL_IPV6NULL) &&
346 __predict_false(dst.smpls_addr.shim.bos == 0))
347 TRIM_LABEL;
348
349 /* RFC 3032 Section 2.1 Page 4 */
350 if (__predict_false(dst.smpls_addr.shim.label == MPLS_LABEL_RTALERT) &&
351 dst.smpls_addr.shim.bos == 0) {
352 TRIM_LABEL;
353 push_back_alert = true;
354 }
355
356 if (dst.smpls_addr.shim.label <= MPLS_LABEL_RESMAX) {
357 /* Don't swap reserved labels */
358 switch (dst.smpls_addr.shim.label) {
359 #ifdef INET
360 case MPLS_LABEL_IPV4NULL:
361 /* Pop shim and push mbuf to IP stack */
362 if (dst.smpls_addr.shim.bos)
363 error = mpls_unlabel_inet(m);
364 break;
365 #endif
366 #ifdef INET6
367 case MPLS_LABEL_IPV6NULL:
368 /* Pop shim and push mbuf to IPv6 stack */
369 if (dst.smpls_addr.shim.bos)
370 error = mpls_unlabel_inet6(m);
371 break;
372 #endif
373 case MPLS_LABEL_RTALERT: /* Yeah, I'm all alerted */
374 case MPLS_LABEL_IMPLNULL: /* This is logical only */
375 default: /* Rest are not allowed */
376 break;
377 }
378 goto done;
379 }
380
381 /* Check if we should do MPLS forwarding */
382 error = EHOSTUNREACH;
383 if (!mpls_forwarding)
384 goto done;
385
386 /* Get a route to dst */
387 dst.smpls_addr.shim.ttl =
388 dst.smpls_addr.shim.bos =
389 dst.smpls_addr.shim.exp = 0;
390 dst.smpls_addr.s_addr = htonl(dst.smpls_addr.s_addr);
391 if ((rt = rtalloc1((const struct sockaddr*)&dst, 1)) == NULL)
392 goto done;
393
394 /* MPLS packet with no MPLS tagged route ? */
395 if ((rt->rt_flags & RTF_GATEWAY) == 0 ||
396 rt_gettag(rt) == NULL ||
397 rt_gettag(rt)->sa_family != AF_MPLS)
398 goto done;
399
400 tshim.s_addr = MPLS_GETSADDR(rt);
401
402 /* Swap labels */
403 if ((m->m_len < sizeof(union mpls_shim)) &&
404 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) {
405 error = ENOBUFS;
406 goto done;
407 }
408
409 /* Replace only the label */
410 htag = mtod(m, union mpls_shim *);
411 htag->s_addr = ntohl(htag->s_addr);
412 htag->shim.label = tshim.shim.label;
413 htag->s_addr = htonl(htag->s_addr);
414
415 /* check if there is anything more to prepend */
416 htag = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr;
417 while (psize <= rt_gettag(rt)->sa_len - sizeof(tshim)) {
418 htag++;
419 memset(&tshim, 0, sizeof(tshim));
420 tshim.s_addr = ntohl(htag->s_addr);
421 tshim.shim.bos = tshim.shim.exp = 0;
422 tshim.shim.ttl = mpls_defttl;
423 if (tshim.shim.label != MPLS_LABEL_IMPLNULL &&
424 ((m = mpls_prepend_shim(m, &tshim)) == NULL))
425 return ENOBUFS;
426 psize += sizeof(tshim);
427 }
428
429 if (__predict_false(push_back_alert == true)) {
430 /* re-add the router alert label */
431 memset(&tshim, 0, sizeof(tshim));
432 tshim.s_addr = MPLS_LABEL_RTALERT;
433 tshim.shim.bos = tshim.shim.exp = 0;
434 tshim.shim.ttl = mpls_defttl;
435 if ((m = mpls_prepend_shim(m, &tshim)) == NULL)
436 return ENOBUFS;
437 }
438
439 error = mpls_send_frame(m, rt->rt_ifp, rt);
440
441 done:
442 if (error != 0 && m != NULL)
443 m_freem(m);
444 if (rt != NULL)
445 rtfree(rt);
446
447 return error;
448 }
449
450 static int
451 mpls_send_frame(struct mbuf *m, struct ifnet *ifp, struct rtentry *rt)
452 {
453 union mpls_shim msh;
454 int ret;
455
456 if ((rt->rt_flags & RTF_GATEWAY) == 0)
457 return EHOSTUNREACH;
458
459 rt->rt_use++;
460
461 msh.s_addr = MPLS_GETSADDR(rt);
462 if (msh.shim.label == MPLS_LABEL_IMPLNULL ||
463 (m->m_flags & (M_MCAST | M_BCAST))) {
464 m_adj(m, sizeof(union mpls_shim));
465 m->m_pkthdr.csum_flags = 0;
466 }
467
468 switch(ifp->if_type) {
469 /* only these are supported for now */
470 case IFT_ETHER:
471 case IFT_TUNNEL:
472 case IFT_LOOP:
473 #ifdef INET
474 ret = ip_hresolv_output(ifp, m, rt->rt_gateway, rt);
475 #else
476 KERNEL_LOCK(1, NULL);
477 ret = (*ifp->if_output)(ifp, m, rt->rt_gateway, rt);
478 KERNEL_UNLOCK_ONE(NULL);
479 #endif
480 return ret;
481 break;
482 default:
483 return ENETUNREACH;
484 }
485 return 0;
486 }
487
488
489
490 #ifdef INET
491 static int
492 mpls_unlabel_inet(struct mbuf *m)
493 {
494 struct ip *iph;
495 union mpls_shim *ms;
496 int iphlen;
497
498 if (mpls_mapttl_inet || mpls_mapprec_inet) {
499
500 /* get shim info */
501 ms = mtod(m, union mpls_shim *);
502 ms->s_addr = ntohl(ms->s_addr);
503
504 /* and get rid of it */
505 m_adj(m, sizeof(union mpls_shim));
506
507 /* get ip header */
508 if (m->m_len < sizeof (struct ip) &&
509 (m = m_pullup(m, sizeof(struct ip))) == NULL)
510 return ENOBUFS;
511 iph = mtod(m, struct ip *);
512 iphlen = iph->ip_hl << 2;
513
514 /* get it all */
515 if (m->m_len < iphlen) {
516 if ((m = m_pullup(m, iphlen)) == NULL)
517 return ENOBUFS;
518 iph = mtod(m, struct ip *);
519 }
520
521 /* check ipsum */
522 if (in_cksum(m, iphlen) != 0) {
523 m_freem(m);
524 return EINVAL;
525 }
526
527 /* set IP ttl from MPLS ttl */
528 if (mpls_mapttl_inet)
529 iph->ip_ttl = ms->shim.ttl;
530
531 /* set IP Precedence from MPLS Exp */
532 if (mpls_mapprec_inet) {
533 iph->ip_tos = (iph->ip_tos << 3) >> 3;
534 iph->ip_tos |= ms->shim.exp << 5;
535 }
536
537 /* reset ipsum because we modified TTL and TOS */
538 iph->ip_sum = 0;
539 iph->ip_sum = in_cksum(m, iphlen);
540 } else
541 m_adj(m, sizeof(union mpls_shim));
542
543 /* Put it on IP queue */
544 if (__predict_false(!pktq_enqueue(ip_pktq, m, 0))) {
545 m_freem(m);
546 return ENOBUFS;
547 }
548 return 0;
549 }
550
551 /*
552 * Prepend MPLS label
553 */
554 static struct mbuf *
555 mpls_label_inet(struct mbuf *m, union mpls_shim *ms, uint offset)
556 {
557 struct ip iphdr;
558
559 if (mpls_mapttl_inet || mpls_mapprec_inet) {
560 if ((m->m_len < sizeof(struct ip)) &&
561 (m = m_pullup(m, offset + sizeof(struct ip))) == 0)
562 return NULL; /* XXX */
563 m_copydata(m, offset, sizeof(struct ip), &iphdr);
564
565 /* Map TTL */
566 if (mpls_mapttl_inet)
567 ms->shim.ttl = iphdr.ip_ttl;
568
569 /* Copy IP precedence to EXP */
570 if (mpls_mapprec_inet)
571 ms->shim.exp = ((u_int8_t)iphdr.ip_tos) >> 5;
572 }
573
574 if ((m = mpls_prepend_shim(m, ms)) == NULL)
575 return NULL;
576
577 return m;
578 }
579
580 #endif /* INET */
581
582 #ifdef INET6
583
584 static int
585 mpls_unlabel_inet6(struct mbuf *m)
586 {
587 struct ip6_hdr *ip6hdr;
588 union mpls_shim ms;
589
590 /* TODO: mapclass */
591 if (mpls_mapttl_inet6) {
592 ms.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
593 m_adj(m, sizeof(union mpls_shim));
594
595 if (m->m_len < sizeof (struct ip6_hdr) &&
596 (m = m_pullup(m, sizeof(struct ip6_hdr))) == 0)
597 return ENOBUFS;
598 ip6hdr = mtod(m, struct ip6_hdr *);
599
600 /* Because we just decremented this in mpls_lse */
601 ip6hdr->ip6_hlim = ms.shim.ttl + 1;
602 } else
603 m_adj(m, sizeof(union mpls_shim));
604
605 /* Put it back on IPv6 queue. */
606 if (__predict_false(!pktq_enqueue(ip6_pktq, m, 0))) {
607 m_freem(m);
608 return ENOBUFS;
609 }
610 return 0;
611 }
612
613 static struct mbuf *
614 mpls_label_inet6(struct mbuf *m, union mpls_shim *ms, uint offset)
615 {
616 struct ip6_hdr ip6h;
617
618 if (mpls_mapttl_inet6 || mpls_mapclass_inet6) {
619 if (m->m_len < sizeof(struct ip6_hdr) &&
620 (m = m_pullup(m, offset + sizeof(struct ip6_hdr))) == 0)
621 return NULL;
622 m_copydata(m, offset, sizeof(struct ip6_hdr), &ip6h);
623
624 if (mpls_mapttl_inet6)
625 ms->shim.ttl = ip6h.ip6_hlim;
626
627 if (mpls_mapclass_inet6)
628 ms->shim.exp = ip6h.ip6_vfc << 1 >> 5;
629 }
630
631 if ((m = mpls_prepend_shim(m, ms)) == NULL)
632 return NULL;
633
634 return m;
635 }
636
637 #endif /* INET6 */
638
639 static struct mbuf *
640 mpls_prepend_shim(struct mbuf *m, union mpls_shim *ms)
641 {
642 union mpls_shim *shim;
643
644 M_PREPEND(m, sizeof(*ms), M_DONTWAIT);
645 if (m == NULL)
646 return NULL;
647
648 if (m->m_len < sizeof(union mpls_shim) &&
649 (m = m_pullup(m, sizeof(union mpls_shim))) == 0)
650 return NULL;
651
652 shim = mtod(m, union mpls_shim *);
653
654 memcpy(shim, ms, sizeof(*shim));
655 shim->s_addr = htonl(shim->s_addr);
656
657 return m;
658 }
659