if_mpls.c revision 1.8.14.2 1 /* $NetBSD: if_mpls.c,v 1.8.14.2 2018/03/13 17:42:37 snj Exp $ */
2
3 /*
4 * Copyright (c) 2010 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Mihai Chelaru <kefren (at) NetBSD.org>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.8.14.2 2018/03/13 17:42:37 snj Exp $");
34
35 #include "opt_inet.h"
36 #include "opt_mpls.h"
37
38 #include <sys/param.h>
39
40 #include <sys/errno.h>
41 #include <sys/malloc.h>
42 #include <sys/mbuf.h>
43 #include <sys/sysctl.h>
44
45 #include <net/bpf.h>
46 #include <net/if.h>
47 #include <net/if_types.h>
48 #include <net/netisr.h>
49 #include <net/route.h>
50
51 #ifdef INET
52 #include <netinet/in.h>
53 #include <netinet/in_systm.h>
54 #include <netinet/in_var.h>
55 #include <netinet/ip.h>
56 #endif
57
58 #ifdef INET6
59 #include <netinet/ip6.h>
60 #include <netinet6/in6_var.h>
61 #include <netinet6/ip6_var.h>
62 #endif
63
64 #include <netmpls/mpls.h>
65 #include <netmpls/mpls_var.h>
66
67 #include "if_mpls.h"
68
69 void ifmplsattach(int);
70
71 static int mpls_clone_create(struct if_clone *, int);
72 static int mpls_clone_destroy(struct ifnet *);
73
74 static struct if_clone mpls_if_cloner =
75 IF_CLONE_INITIALIZER("mpls", mpls_clone_create, mpls_clone_destroy);
76
77
78 static void mpls_input(struct ifnet *, struct mbuf *);
79 static int mpls_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
80 struct rtentry *);
81 static int mpls_ioctl(struct ifnet *, u_long, void *);
82 static int mpls_send_frame(struct mbuf *, struct ifnet *, struct rtentry *);
83 static int mpls_lse(struct mbuf *);
84
85 #ifdef INET
86 static struct mbuf *mpls_unlabel_inet(struct mbuf *, int *error);
87 static struct mbuf *mpls_label_inet(struct mbuf *, union mpls_shim *, uint);
88 #endif
89
90 #ifdef INET6
91 static struct mbuf *mpls_unlabel_inet6(struct mbuf *, int *error);
92 static struct mbuf *mpls_label_inet6(struct mbuf *, union mpls_shim *, uint);
93 #endif
94
95 static struct mbuf *mpls_prepend_shim(struct mbuf *, union mpls_shim *);
96
97 extern int mpls_defttl, mpls_mapttl_inet, mpls_mapttl_inet6, mpls_icmp_respond,
98 mpls_forwarding, mpls_accept, mpls_mapprec_inet, mpls_mapclass_inet6;
99
100 /* ARGSUSED */
101 void
102 ifmplsattach(int count)
103 {
104 if_clone_attach(&mpls_if_cloner);
105 }
106
107 static int
108 mpls_clone_create(struct if_clone *ifc, int unit)
109 {
110 struct mpls_softc *sc;
111
112 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
113
114 if_initname(&sc->sc_if, ifc->ifc_name, unit);
115 sc->sc_if.if_softc = sc;
116 sc->sc_if.if_type = IFT_MPLS;
117 sc->sc_if.if_addrlen = 0;
118 sc->sc_if.if_hdrlen = sizeof(union mpls_shim);
119 sc->sc_if.if_dlt = DLT_NULL;
120 sc->sc_if.if_mtu = 1500;
121 sc->sc_if.if_flags = 0;
122 sc->sc_if.if_input = mpls_input;
123 sc->sc_if.if_output = mpls_output;
124 sc->sc_if.if_ioctl = mpls_ioctl;
125
126 if_attach(&sc->sc_if);
127 if_alloc_sadl(&sc->sc_if);
128 bpf_attach(&sc->sc_if, DLT_NULL, sizeof(uint32_t));
129 return 0;
130 }
131
132 static int
133 mpls_clone_destroy(struct ifnet *ifp)
134 {
135 int s;
136
137 bpf_detach(ifp);
138
139 s = splnet();
140 if_detach(ifp);
141 splx(s);
142
143 free(ifp->if_softc, M_DEVBUF);
144 return 0;
145 }
146
147 static void
148 mpls_input(struct ifnet *ifp, struct mbuf *m)
149 {
150 #if 0
151 /*
152 * TODO - kefren
153 * I'd love to unshim the packet, guess family
154 * and pass it to bpf
155 */
156 bpf_mtap_af(ifp, AF_MPLS, m);
157 #endif
158
159 mpls_lse(m);
160 }
161
162 void
163 mplsintr(void)
164 {
165 struct mbuf *m;
166 int s;
167
168 while (!IF_IS_EMPTY(&mplsintrq)) {
169 s = splnet();
170 IF_DEQUEUE(&mplsintrq, m);
171 splx(s);
172
173 if (!m)
174 return;
175
176 if (((m->m_flags & M_PKTHDR) == 0) ||
177 (m->m_pkthdr.rcvif == 0))
178 panic("mplsintr(): no pkthdr or rcvif");
179
180 #ifdef MBUFTRACE
181 m_claimm(m, &mpls_owner);
182 #endif
183 mpls_input(m->m_pkthdr.rcvif, m);
184 }
185 }
186
187 /*
188 * prepend shim and deliver
189 */
190 static int
191 mpls_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct rtentry *rt)
192 {
193 union mpls_shim mh, *pms;
194 struct rtentry *rt1;
195 int err;
196 uint psize = sizeof(struct sockaddr_mpls);
197
198 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
199 m_freem(m);
200 return ENETDOWN;
201 }
202
203 if (rt_gettag(rt) == NULL || rt_gettag(rt)->sa_family != AF_MPLS) {
204 m_freem(m);
205 return EINVAL;
206 }
207
208 bpf_mtap_af(ifp, dst->sa_family, m);
209
210 memset(&mh, 0, sizeof(mh));
211 mh.s_addr = MPLS_GETSADDR(rt);
212 mh.shim.bos = 1;
213 mh.shim.exp = 0;
214 mh.shim.ttl = mpls_defttl;
215
216 pms = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr;
217
218 while (psize <= rt_gettag(rt)->sa_len - sizeof(mh)) {
219 pms++;
220 if (mh.shim.label != MPLS_LABEL_IMPLNULL &&
221 ((m = mpls_prepend_shim(m, &mh)) == NULL))
222 return ENOBUFS;
223 memset(&mh, 0, sizeof(mh));
224 mh.s_addr = ntohl(pms->s_addr);
225 mh.shim.bos = mh.shim.exp = 0;
226 mh.shim.ttl = mpls_defttl;
227 psize += sizeof(mh);
228 }
229
230 switch(dst->sa_family) {
231 #ifdef INET
232 case AF_INET:
233 m = mpls_label_inet(m, &mh, psize - sizeof(struct sockaddr_mpls));
234 break;
235 #endif
236 #ifdef INET6
237 case AF_INET6:
238 m = mpls_label_inet6(m, &mh, psize - sizeof(struct sockaddr_mpls));
239 break;
240 #endif
241 default:
242 m = mpls_prepend_shim(m, &mh);
243 break;
244 }
245
246 if (m == NULL) {
247 IF_DROP(&ifp->if_snd);
248 ifp->if_oerrors++;
249 return ENOBUFS;
250 }
251
252 ifp->if_opackets++;
253 ifp->if_obytes += m->m_pkthdr.len;
254
255 if ((rt1=rtalloc1(rt->rt_gateway, 1)) == NULL) {
256 m_freem(m);
257 return EHOSTUNREACH;
258 }
259
260 err = mpls_send_frame(m, rt1->rt_ifp, rt);
261 RTFREE(rt1);
262 return err;
263 }
264
265 static int
266 mpls_ioctl(struct ifnet *ifp, u_long cmd, void *data)
267 {
268 int error = 0, s = splnet();
269 struct ifreq *ifr = data;
270
271 switch(cmd) {
272 case SIOCINITIFADDR:
273 ifp->if_flags |= IFF_UP | IFF_RUNNING;
274 break;
275 case SIOCSIFMTU:
276 if (ifr != NULL && ifr->ifr_mtu < 576) {
277 error = EINVAL;
278 break;
279 }
280 /* FALLTHROUGH */
281 case SIOCGIFMTU:
282 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
283 error = 0;
284 break;
285 case SIOCSIFFLAGS:
286 if ((error = ifioctl_common(ifp, cmd, data)) != 0)
287 break;
288 if (ifp->if_flags & IFF_UP)
289 ifp->if_flags |= IFF_RUNNING;
290 break;
291 default:
292 error = ifioctl_common(ifp, cmd, data);
293 break;
294 }
295 splx(s);
296 return error;
297 }
298
299 /*
300 * MPLS Label Switch Engine
301 */
302 static int
303 mpls_lse(struct mbuf *m)
304 {
305 struct sockaddr_mpls dst;
306 union mpls_shim tshim, *htag;
307 struct rtentry *rt = NULL;
308 int error = ENOBUFS;
309 uint psize = sizeof(struct sockaddr_mpls);
310
311 /* If we're not accepting MPLS frames, leave now. */
312 if (!mpls_accept) {
313 error = EINVAL;
314 goto done;
315 }
316
317 if (m->m_len < sizeof(union mpls_shim) &&
318 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL)
319 goto done;
320
321 dst.smpls_len = sizeof(struct sockaddr_mpls);
322 dst.smpls_family = AF_MPLS;
323 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
324
325 error = EINVAL;
326
327 /* TTL decrement */
328 if ((m = mpls_ttl_dec(m)) == NULL)
329 goto done;
330
331 if (dst.smpls_addr.shim.label <= MPLS_LABEL_RESMAX) {
332 /* Don't swap reserved labels */
333 switch (dst.smpls_addr.shim.label) {
334 #ifdef INET
335 case MPLS_LABEL_IPV4NULL:
336 /* Pop shim and push mbuf to IP stack */
337 if (dst.smpls_addr.shim.bos) {
338 m = mpls_unlabel_inet(m, &error);
339 }
340 break;
341 #endif
342 #ifdef INET6
343 case MPLS_LABEL_IPV6NULL:
344 /* Pop shim and push mbuf to IPv6 stack */
345 if (dst.smpls_addr.shim.bos) {
346 m = mpls_unlabel_inet6(m, &error);
347 }
348 break;
349 #endif
350 case MPLS_LABEL_RTALERT: /* Yeah, I'm all alerted */
351 case MPLS_LABEL_IMPLNULL: /* This is logical only */
352 default: /* Rest are not allowed */
353 break;
354 }
355 goto done;
356 }
357
358 /* Check if we should do MPLS forwarding */
359 error = EHOSTUNREACH;
360 if (!mpls_forwarding)
361 goto done;
362
363 /* Get a route to dst */
364 dst.smpls_addr.shim.ttl =
365 dst.smpls_addr.shim.bos =
366 dst.smpls_addr.shim.exp = 0;
367 dst.smpls_addr.s_addr = htonl(dst.smpls_addr.s_addr);
368 if ((rt = rtalloc1((const struct sockaddr*)&dst, 1)) == NULL)
369 goto done;
370
371 /* MPLS packet with no MPLS tagged route ? */
372 if ((rt->rt_flags & RTF_GATEWAY) == 0 ||
373 rt_gettag(rt) == NULL ||
374 rt_gettag(rt)->sa_family != AF_MPLS)
375 goto done;
376
377 tshim.s_addr = MPLS_GETSADDR(rt);
378
379 /* Swap labels */
380 if ((m->m_len < sizeof(union mpls_shim)) &&
381 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) {
382 error = ENOBUFS;
383 goto done;
384 }
385
386 /* Replace only the label */
387 htag = mtod(m, union mpls_shim *);
388 htag->s_addr = ntohl(htag->s_addr);
389 htag->shim.label = tshim.shim.label;
390 htag->s_addr = htonl(htag->s_addr);
391
392 /* check if there is anything more to prepend */
393 htag = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr;
394 while (psize <= rt_gettag(rt)->sa_len - sizeof(tshim)) {
395 htag++;
396 memset(&tshim, 0, sizeof(tshim));
397 tshim.s_addr = ntohl(htag->s_addr);
398 tshim.shim.bos = tshim.shim.exp = 0;
399 tshim.shim.ttl = mpls_defttl;
400 if (tshim.shim.label != MPLS_LABEL_IMPLNULL &&
401 ((m = mpls_prepend_shim(m, &tshim)) == NULL)) {
402 error = ENOBUFS;
403 goto done;
404 }
405 psize += sizeof(tshim);
406 }
407
408 error = mpls_send_frame(m, rt->rt_ifp, rt);
409
410 done:
411 if (error != 0 && m != NULL)
412 m_freem(m);
413 if (rt != NULL)
414 RTFREE(rt);
415
416 return error;
417 }
418
419 static int
420 mpls_send_frame(struct mbuf *m, struct ifnet *ifp, struct rtentry *rt)
421 {
422 union mpls_shim msh;
423
424 if ((rt->rt_flags & RTF_GATEWAY) == 0)
425 return EHOSTUNREACH;
426
427 rt->rt_use++;
428
429 msh.s_addr = MPLS_GETSADDR(rt);
430 if (msh.shim.label == MPLS_LABEL_IMPLNULL ||
431 (m->m_flags & (M_MCAST | M_BCAST))) {
432 m_adj(m, sizeof(union mpls_shim));
433 m->m_pkthdr.csum_flags = 0;
434 }
435
436 switch(ifp->if_type) {
437 /* only these are supported for now */
438 case IFT_ETHER:
439 case IFT_TUNNEL:
440 case IFT_LOOP:
441 return (*ifp->if_output)(ifp, m, rt->rt_gateway, rt);
442 break;
443 default:
444 return ENETUNREACH;
445 }
446 return 0;
447 }
448
449 #ifdef INET
450 static struct mbuf *
451 mpls_unlabel_inet(struct mbuf *m, int *error)
452 {
453 int s, iphlen;
454 struct ip *iph;
455 union mpls_shim *ms;
456 struct ifqueue *inq;
457
458 if (mpls_mapttl_inet || mpls_mapprec_inet) {
459 /* get shim info */
460 ms = mtod(m, union mpls_shim *);
461 ms->s_addr = ntohl(ms->s_addr);
462
463 /* and get rid of it */
464 m_adj(m, sizeof(union mpls_shim));
465
466 /* get ip header */
467 if (m->m_len < sizeof(struct ip) &&
468 (m = m_pullup(m, sizeof(struct ip))) == NULL) {
469 *error = ENOBUFS;
470 return NULL;
471 }
472
473 iph = mtod(m, struct ip *);
474 iphlen = iph->ip_hl << 2;
475
476 /* get it all */
477 if (m->m_len < iphlen) {
478 if ((m = m_pullup(m, iphlen)) == NULL) {
479 *error = ENOBUFS;
480 return NULL;
481 }
482 iph = mtod(m, struct ip *);
483 }
484
485 /* check ipsum */
486 if (in_cksum(m, iphlen) != 0) {
487 m_freem(m);
488 *error = EINVAL;
489 return NULL;
490 }
491
492 /* set IP ttl from MPLS ttl */
493 if (mpls_mapttl_inet)
494 iph->ip_ttl = ms->shim.ttl;
495
496 /* set IP Precedence from MPLS Exp */
497 if (mpls_mapprec_inet) {
498 iph->ip_tos = (iph->ip_tos << 3) >> 3;
499 iph->ip_tos |= ms->shim.exp << 5;
500 }
501
502 /* reset ipsum because we modified TTL and TOS */
503 iph->ip_sum = 0;
504 iph->ip_sum = in_cksum(m, iphlen);
505 } else {
506 m_adj(m, sizeof(union mpls_shim));
507 }
508
509 /* Put it on IP queue */
510 inq = &ipintrq;
511 s = splnet();
512 if (IF_QFULL(inq)) {
513 IF_DROP(inq);
514 splx(s);
515 m_freem(m);
516 *error = ENOBUFS;
517 return NULL;
518 }
519 IF_ENQUEUE(inq, m);
520 splx(s);
521 schednetisr(NETISR_IP);
522
523 *error = 0;
524 return m;
525 }
526
527 /*
528 * Prepend MPLS label
529 */
530 static struct mbuf *
531 mpls_label_inet(struct mbuf *m, union mpls_shim *ms, uint offset)
532 {
533 struct ip iphdr;
534
535 if (mpls_mapttl_inet || mpls_mapprec_inet) {
536 /* XXX Maybe just check m->m_pkthdr.len instead? */
537 if ((m->m_len < offset + sizeof(struct ip)) &&
538 (m = m_pullup(m, offset + sizeof(struct ip))) == 0)
539 return NULL;
540
541 m_copydata(m, offset, sizeof(struct ip), &iphdr);
542
543 /* Map TTL */
544 if (mpls_mapttl_inet)
545 ms->shim.ttl = iphdr.ip_ttl;
546
547 /* Copy IP precedence to EXP */
548 if (mpls_mapprec_inet)
549 ms->shim.exp = ((u_int8_t)iphdr.ip_tos) >> 5;
550 }
551
552 if ((m = mpls_prepend_shim(m, ms)) == NULL)
553 return NULL;
554
555 return m;
556 }
557 #endif /* INET */
558
559 #ifdef INET6
560 static struct mbuf *
561 mpls_unlabel_inet6(struct mbuf *m, int *error)
562 {
563 struct ip6_hdr *ip6hdr;
564 union mpls_shim ms;
565 struct ifqueue *inq;
566 int s;
567
568 /* TODO: mapclass */
569 if (mpls_mapttl_inet6) {
570 ms.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
571 m_adj(m, sizeof(union mpls_shim));
572
573 if (m->m_len < sizeof (struct ip6_hdr) &&
574 (m = m_pullup(m, sizeof(struct ip6_hdr))) == 0) {
575 *error = ENOBUFS;
576 return NULL;
577 }
578 ip6hdr = mtod(m, struct ip6_hdr *);
579
580 /* Because we just decremented this in mpls_lse */
581 ip6hdr->ip6_hlim = ms.shim.ttl + 1;
582 } else {
583 m_adj(m, sizeof(union mpls_shim));
584 }
585
586 /* Put it back on IPv6 stack */
587 schednetisr(NETISR_IPV6);
588 inq = &ip6intrq;
589 s = splnet();
590 if (IF_QFULL(inq)) {
591 IF_DROP(inq);
592 splx(s);
593 m_freem(m);
594 *error = ENOBUFS;
595 return NULL;
596 }
597
598 IF_ENQUEUE(inq, m);
599 splx(s);
600
601 *error = 0;
602 return m;
603 }
604
605 static struct mbuf *
606 mpls_label_inet6(struct mbuf *m, union mpls_shim *ms, uint offset)
607 {
608 struct ip6_hdr ip6h;
609
610 if (mpls_mapttl_inet6 || mpls_mapclass_inet6) {
611 if ((m->m_len < offset + sizeof(struct ip6_hdr)) &&
612 (m = m_pullup(m, offset + sizeof(struct ip6_hdr))) == 0)
613 return NULL;
614
615 m_copydata(m, offset, sizeof(struct ip6_hdr), &ip6h);
616
617 if (mpls_mapttl_inet6)
618 ms->shim.ttl = ip6h.ip6_hlim;
619
620 if (mpls_mapclass_inet6)
621 ms->shim.exp = ip6h.ip6_vfc << 1 >> 5;
622 }
623
624 if ((m = mpls_prepend_shim(m, ms)) == NULL)
625 return NULL;
626
627 return m;
628 }
629 #endif /* INET6 */
630
631 static struct mbuf *
632 mpls_prepend_shim(struct mbuf *m, union mpls_shim *ms)
633 {
634 union mpls_shim *shim;
635
636 M_PREPEND(m, sizeof(*ms), M_DONTWAIT);
637 if (m == NULL)
638 return NULL;
639
640 if (m->m_len < sizeof(union mpls_shim) &&
641 (m = m_pullup(m, sizeof(union mpls_shim))) == 0)
642 return NULL;
643
644 shim = mtod(m, union mpls_shim *);
645
646 memcpy(shim, ms, sizeof(*shim));
647 shim->s_addr = htonl(shim->s_addr);
648
649 return m;
650 }
651