if_mpls.c revision 1.8 1 /* $NetBSD: if_mpls.c,v 1.8 2011/07/03 18:46:12 kefren Exp $ */
2
3 /*
4 * Copyright (c) 2010 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Mihai Chelaru <kefren (at) NetBSD.org>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.8 2011/07/03 18:46:12 kefren Exp $");
34
35 #include "opt_inet.h"
36 #include "opt_mpls.h"
37
38 #include <sys/param.h>
39
40 #include <sys/errno.h>
41 #include <sys/kmem.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/sysctl.h>
45
46 #include <net/bpf.h>
47 #include <net/if.h>
48 #include <net/if_types.h>
49 #include <net/netisr.h>
50 #include <net/route.h>
51
52 #ifdef INET
53 #include <netinet/in.h>
54 #include <netinet/in_systm.h>
55 #include <netinet/in_var.h>
56 #include <netinet/ip.h>
57 #endif
58
59 #ifdef INET6
60 #include <netinet/ip6.h>
61 #include <netinet6/in6_var.h>
62 #include <netinet6/ip6_var.h>
63 #endif
64
65 #include <netmpls/mpls.h>
66 #include <netmpls/mpls_var.h>
67
68 #include "if_mpls.h"
69
70 void ifmplsattach(int);
71
72 static int mpls_clone_create(struct if_clone *, int);
73 static int mpls_clone_destroy(struct ifnet *);
74
75 static struct if_clone mpls_if_cloner =
76 IF_CLONE_INITIALIZER("mpls", mpls_clone_create, mpls_clone_destroy);
77
78
79 static void mpls_input(struct ifnet *, struct mbuf *);
80 static int mpls_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
81 struct rtentry *);
82 static int mpls_ioctl(struct ifnet *, u_long, void *);
83 static int mpls_send_frame(struct mbuf *, struct ifnet *, struct rtentry *);
84 static int mpls_lse(struct mbuf *);
85
86 #ifdef INET
87 static int mpls_unlabel_inet(struct mbuf *);
88 static struct mbuf *mpls_label_inet(struct mbuf *, union mpls_shim *, uint);
89 #endif
90
91 #ifdef INET6
92 static int mpls_unlabel_inet6(struct mbuf *);
93 static struct mbuf *mpls_label_inet6(struct mbuf *, union mpls_shim *, uint);
94 #endif
95
96 static struct mbuf *mpls_prepend_shim(struct mbuf *, union mpls_shim *);
97
98 extern int mpls_defttl, mpls_mapttl_inet, mpls_mapttl_inet6, mpls_icmp_respond,
99 mpls_forwarding, mpls_accept, mpls_mapprec_inet, mpls_mapclass_inet6;
100
101 /* ARGSUSED */
102 void
103 ifmplsattach(int count)
104 {
105 if_clone_attach(&mpls_if_cloner);
106 }
107
108 static int
109 mpls_clone_create(struct if_clone *ifc, int unit)
110 {
111 struct mpls_softc *sc;
112
113 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
114
115 if_initname(&sc->sc_if, ifc->ifc_name, unit);
116 sc->sc_if.if_softc = sc;
117 sc->sc_if.if_type = IFT_MPLS;
118 sc->sc_if.if_addrlen = 0;
119 sc->sc_if.if_hdrlen = sizeof(union mpls_shim);
120 sc->sc_if.if_dlt = DLT_NULL;
121 sc->sc_if.if_mtu = 1500;
122 sc->sc_if.if_flags = 0;
123 sc->sc_if.if_input = mpls_input;
124 sc->sc_if.if_output = mpls_output;
125 sc->sc_if.if_ioctl = mpls_ioctl;
126
127 if_attach(&sc->sc_if);
128 if_alloc_sadl(&sc->sc_if);
129 bpf_attach(&sc->sc_if, DLT_NULL, sizeof(uint32_t));
130 return 0;
131 }
132
133 static int
134 mpls_clone_destroy(struct ifnet *ifp)
135 {
136 int s;
137
138 bpf_detach(ifp);
139
140 s = splnet();
141 if_detach(ifp);
142 splx(s);
143
144 free(ifp->if_softc, M_DEVBUF);
145 return 0;
146 }
147
148 static void
149 mpls_input(struct ifnet *ifp, struct mbuf *m)
150 {
151 #if 0
152 /*
153 * TODO - kefren
154 * I'd love to unshim the packet, guess family
155 * and pass it to bpf
156 */
157 bpf_mtap_af(ifp, AF_MPLS, m);
158 #endif
159
160 mpls_lse(m);
161 }
162
163 void
164 mplsintr(void)
165 {
166 struct mbuf *m;
167 int s;
168
169 while (!IF_IS_EMPTY(&mplsintrq)) {
170 s = splnet();
171 IF_DEQUEUE(&mplsintrq, m);
172 splx(s);
173
174 if (!m)
175 return;
176
177 if (((m->m_flags & M_PKTHDR) == 0) ||
178 (m->m_pkthdr.rcvif == 0))
179 panic("mplsintr(): no pkthdr or rcvif");
180
181 #ifdef MBUFTRACE
182 m_claimm(m, &mpls_owner);
183 #endif
184 mpls_input(m->m_pkthdr.rcvif, m);
185 }
186 }
187
188 /*
189 * prepend shim and deliver
190 */
191 static int
192 mpls_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct rtentry *rt)
193 {
194 union mpls_shim mh, *pms;
195 struct rtentry *rt1;
196 int err;
197 uint psize = sizeof(struct sockaddr_mpls);
198
199 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
200 m_freem(m);
201 return ENETDOWN;
202 }
203
204 if (rt_gettag(rt) == NULL || rt_gettag(rt)->sa_family != AF_MPLS) {
205 m_freem(m);
206 return EINVAL;
207 }
208
209 bpf_mtap_af(ifp, dst->sa_family, m);
210
211 memset(&mh, 0, sizeof(mh));
212 mh.s_addr = MPLS_GETSADDR(rt);
213 mh.shim.bos = 1;
214 mh.shim.exp = 0;
215 mh.shim.ttl = mpls_defttl;
216
217 pms = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr;
218
219 while (psize <= rt_gettag(rt)->sa_len - sizeof(mh)) {
220 pms++;
221 if (mh.shim.label != MPLS_LABEL_IMPLNULL &&
222 ((m = mpls_prepend_shim(m, &mh)) == NULL))
223 return ENOBUFS;
224 memset(&mh, 0, sizeof(mh));
225 mh.s_addr = ntohl(pms->s_addr);
226 mh.shim.bos = mh.shim.exp = 0;
227 mh.shim.ttl = mpls_defttl;
228 psize += sizeof(mh);
229 }
230
231 switch(dst->sa_family) {
232 #ifdef INET
233 case AF_INET:
234 m = mpls_label_inet(m, &mh, psize - sizeof(struct sockaddr_mpls));
235 break;
236 #endif
237 #ifdef INET6
238 case AF_INET6:
239 m = mpls_label_inet6(m, &mh, psize - sizeof(struct sockaddr_mpls));
240 break;
241 #endif
242 default:
243 m = mpls_prepend_shim(m, &mh);
244 break;
245 }
246
247 if (m == NULL) {
248 IF_DROP(&ifp->if_snd);
249 ifp->if_oerrors++;
250 return ENOBUFS;
251 }
252
253 ifp->if_opackets++;
254 ifp->if_obytes += m->m_pkthdr.len;
255
256 if ((rt1=rtalloc1(rt->rt_gateway, 1)) == NULL) {
257 m_freem(m);
258 return EHOSTUNREACH;
259 }
260
261 err = mpls_send_frame(m, rt1->rt_ifp, rt);
262 RTFREE(rt1);
263 return err;
264 }
265
266 static int
267 mpls_ioctl(struct ifnet *ifp, u_long cmd, void *data)
268 {
269 int error = 0, s = splnet();
270 struct ifreq *ifr = data;
271
272 switch(cmd) {
273 case SIOCINITIFADDR:
274 ifp->if_flags |= IFF_UP | IFF_RUNNING;
275 break;
276 case SIOCSIFMTU:
277 if (ifr != NULL && ifr->ifr_mtu < 576) {
278 error = EINVAL;
279 break;
280 }
281 /* FALLTHROUGH */
282 case SIOCGIFMTU:
283 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
284 error = 0;
285 break;
286 case SIOCSIFFLAGS:
287 if ((error = ifioctl_common(ifp, cmd, data)) != 0)
288 break;
289 if (ifp->if_flags & IFF_UP)
290 ifp->if_flags |= IFF_RUNNING;
291 break;
292 default:
293 error = ifioctl_common(ifp, cmd, data);
294 break;
295 }
296 splx(s);
297 return error;
298 }
299
300 /*
301 * MPLS Label Switch Engine
302 */
303 static int
304 mpls_lse(struct mbuf *m)
305 {
306 struct sockaddr_mpls dst;
307 union mpls_shim tshim, *htag;
308 struct rtentry *rt = NULL;
309 int error = ENOBUFS;
310 uint psize = sizeof(struct sockaddr_mpls);
311
312 if (m->m_len < sizeof(union mpls_shim) &&
313 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL)
314 goto done;
315
316 dst.smpls_len = sizeof(struct sockaddr_mpls);
317 dst.smpls_family = AF_MPLS;
318 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
319
320 /* Check if we're accepting MPLS Frames */
321 error = EINVAL;
322 if (!mpls_accept)
323 goto done;
324
325 /* TTL decrement */
326 if ((m = mpls_ttl_dec(m)) == NULL)
327 goto done;
328
329 if (dst.smpls_addr.shim.label <= MPLS_LABEL_RESMAX) {
330 /* Don't swap reserved labels */
331 switch (dst.smpls_addr.shim.label) {
332 #ifdef INET
333 case MPLS_LABEL_IPV4NULL:
334 /* Pop shim and push mbuf to IP stack */
335 if (dst.smpls_addr.shim.bos)
336 error = mpls_unlabel_inet(m);
337 break;
338 #endif
339 #ifdef INET6
340 case MPLS_LABEL_IPV6NULL:
341 /* Pop shim and push mbuf to IPv6 stack */
342 if (dst.smpls_addr.shim.bos)
343 error = mpls_unlabel_inet6(m);
344 break;
345 #endif
346 case MPLS_LABEL_RTALERT: /* Yeah, I'm all alerted */
347 case MPLS_LABEL_IMPLNULL: /* This is logical only */
348 default: /* Rest are not allowed */
349 break;
350 }
351 goto done;
352 }
353
354 /* Check if we should do MPLS forwarding */
355 error = EHOSTUNREACH;
356 if (!mpls_forwarding)
357 goto done;
358
359 /* Get a route to dst */
360 dst.smpls_addr.shim.ttl =
361 dst.smpls_addr.shim.bos =
362 dst.smpls_addr.shim.exp = 0;
363 dst.smpls_addr.s_addr = htonl(dst.smpls_addr.s_addr);
364 if ((rt = rtalloc1((const struct sockaddr*)&dst, 1)) == NULL)
365 goto done;
366
367 /* MPLS packet with no MPLS tagged route ? */
368 if ((rt->rt_flags & RTF_GATEWAY) == 0 ||
369 rt_gettag(rt) == NULL ||
370 rt_gettag(rt)->sa_family != AF_MPLS)
371 goto done;
372
373 tshim.s_addr = MPLS_GETSADDR(rt);
374
375 /* Swap labels */
376 if ((m->m_len < sizeof(union mpls_shim)) &&
377 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) {
378 error = ENOBUFS;
379 goto done;
380 }
381
382 /* Replace only the label */
383 htag = mtod(m, union mpls_shim *);
384 htag->s_addr = ntohl(htag->s_addr);
385 htag->shim.label = tshim.shim.label;
386 htag->s_addr = htonl(htag->s_addr);
387
388 /* check if there is anything more to prepend */
389 htag = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr;
390 while (psize <= rt_gettag(rt)->sa_len - sizeof(tshim)) {
391 htag++;
392 memset(&tshim, 0, sizeof(tshim));
393 tshim.s_addr = ntohl(htag->s_addr);
394 tshim.shim.bos = tshim.shim.exp = 0;
395 tshim.shim.ttl = mpls_defttl;
396 if (tshim.shim.label != MPLS_LABEL_IMPLNULL &&
397 ((m = mpls_prepend_shim(m, &tshim)) == NULL))
398 return ENOBUFS;
399 psize += sizeof(tshim);
400 }
401
402 error = mpls_send_frame(m, rt->rt_ifp, rt);
403
404 done:
405 if (error != 0 && m != NULL)
406 m_freem(m);
407 if (rt != NULL)
408 RTFREE(rt);
409
410 return error;
411 }
412
413 static int
414 mpls_send_frame(struct mbuf *m, struct ifnet *ifp, struct rtentry *rt)
415 {
416 union mpls_shim msh;
417
418 if ((rt->rt_flags & RTF_GATEWAY) == 0)
419 return EHOSTUNREACH;
420
421 rt->rt_use++;
422
423 msh.s_addr = MPLS_GETSADDR(rt);
424 if (msh.shim.label == MPLS_LABEL_IMPLNULL ||
425 (m->m_flags & (M_MCAST | M_BCAST))) {
426 m_adj(m, sizeof(union mpls_shim));
427 m->m_pkthdr.csum_flags = 0;
428 }
429
430 switch(ifp->if_type) {
431 /* only these are supported for now */
432 case IFT_ETHER:
433 case IFT_TUNNEL:
434 case IFT_LOOP:
435 return (*ifp->if_output)(ifp, m, rt->rt_gateway, rt);
436 break;
437 default:
438 return ENETUNREACH;
439 }
440 return 0;
441 }
442
443
444
445 #ifdef INET
446 static int
447 mpls_unlabel_inet(struct mbuf *m)
448 {
449 int s, iphlen;
450 struct ip *iph;
451 union mpls_shim *ms;
452 struct ifqueue *inq;
453
454 if (mpls_mapttl_inet || mpls_mapprec_inet) {
455
456 /* get shim info */
457 ms = mtod(m, union mpls_shim *);
458 ms->s_addr = ntohl(ms->s_addr);
459
460 /* and get rid of it */
461 m_adj(m, sizeof(union mpls_shim));
462
463 /* get ip header */
464 if (m->m_len < sizeof (struct ip) &&
465 (m = m_pullup(m, sizeof(struct ip))) == NULL)
466 return ENOBUFS;
467 iph = mtod(m, struct ip *);
468 iphlen = iph->ip_hl << 2;
469
470 /* get it all */
471 if (m->m_len < iphlen) {
472 if ((m = m_pullup(m, iphlen)) == NULL)
473 return ENOBUFS;
474 iph = mtod(m, struct ip *);
475 }
476
477 /* check ipsum */
478 if (in_cksum(m, iphlen) != 0) {
479 m_freem(m);
480 return EINVAL;
481 }
482
483 /* set IP ttl from MPLS ttl */
484 if (mpls_mapttl_inet)
485 iph->ip_ttl = ms->shim.ttl;
486
487 /* set IP Precedence from MPLS Exp */
488 if (mpls_mapprec_inet) {
489 iph->ip_tos = (iph->ip_tos << 3) >> 3;
490 iph->ip_tos |= ms->shim.exp << 5;
491 }
492
493 /* reset ipsum because we modified TTL and TOS */
494 iph->ip_sum = 0;
495 iph->ip_sum = in_cksum(m, iphlen);
496 } else
497 m_adj(m, sizeof(union mpls_shim));
498
499 /* Put it on IP queue */
500 inq = &ipintrq;
501 s = splnet();
502 if (IF_QFULL(inq)) {
503 IF_DROP(inq);
504 splx(s);
505 m_freem(m);
506 return ENOBUFS;
507 }
508 IF_ENQUEUE(inq, m);
509 splx(s);
510 schednetisr(NETISR_IP);
511
512 return 0;
513 }
514
515 /*
516 * Prepend MPLS label
517 */
518 static struct mbuf *
519 mpls_label_inet(struct mbuf *m, union mpls_shim *ms, uint offset)
520 {
521 struct ip *iphdr;
522
523 if (mpls_mapttl_inet || mpls_mapprec_inet) {
524 if ((m->m_len < sizeof(struct ip)) &&
525 (m = m_pullup(m, offset + sizeof(struct ip))) == 0)
526 return NULL; /* XXX */
527 iphdr = kmem_alloc(sizeof(struct ip), KM_NOSLEEP);
528 if (iphdr == NULL)
529 return NULL;
530 m_copydata(m, offset, sizeof(struct ip), iphdr);
531
532 /* Map TTL */
533 if (mpls_mapttl_inet)
534 ms->shim.ttl = iphdr->ip_ttl;
535
536 /* Copy IP precedence to EXP */
537 if (mpls_mapprec_inet)
538 ms->shim.exp = ((u_int8_t)iphdr->ip_tos) >> 5;
539 kmem_free (iphdr, sizeof(struct ip));
540 }
541
542 if ((m = mpls_prepend_shim(m, ms)) == NULL)
543 return NULL;
544
545 return m;
546 }
547
548 #endif /* INET */
549
550 #ifdef INET6
551
552 static int
553 mpls_unlabel_inet6(struct mbuf *m)
554 {
555 struct ip6_hdr *ip6hdr;
556 union mpls_shim ms;
557 struct ifqueue *inq;
558 int s;
559
560 /* TODO: mapclass */
561 if (mpls_mapttl_inet6) {
562 ms.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr);
563 m_adj(m, sizeof(union mpls_shim));
564
565 if (m->m_len < sizeof (struct ip6_hdr) &&
566 (m = m_pullup(m, sizeof(struct ip6_hdr))) == 0)
567 return ENOBUFS;
568 ip6hdr = mtod(m, struct ip6_hdr *);
569
570 /* Because we just decremented this in mpls_lse */
571 ip6hdr->ip6_hlim = ms.shim.ttl + 1;
572 } else
573 m_adj(m, sizeof(union mpls_shim));
574
575 /* Put it back on IPv6 stack */
576 schednetisr(NETISR_IPV6);
577 inq = &ip6intrq;
578 s = splnet();
579 if (IF_QFULL(inq)) {
580 IF_DROP(inq);
581 splx(s);
582 m_freem(m);
583 return ENOBUFS;
584 }
585
586 IF_ENQUEUE(inq, m);
587 splx(s);
588
589 return 0;
590 }
591
592 static struct mbuf *
593 mpls_label_inet6(struct mbuf *m, union mpls_shim *ms, uint offset)
594 {
595 struct ip6_hdr *ip6h;
596
597 if (mpls_mapttl_inet6 || mpls_mapclass_inet6) {
598 if (m->m_len < sizeof(struct ip6_hdr) &&
599 (m = m_pullup(m, offset + sizeof(struct ip6_hdr))) == 0)
600 return NULL;
601 ip6h = kmem_alloc(sizeof(struct ip6_hdr), KM_NOSLEEP);
602 if (ip6h == NULL)
603 return NULL;
604 m_copydata(m, offset, sizeof(struct ip6_hdr), ip6h);
605
606 if (mpls_mapttl_inet6)
607 ms->shim.ttl = ip6h->ip6_hlim;
608
609 if (mpls_mapclass_inet6)
610 ms->shim.exp = ip6h->ip6_vfc << 1 >> 5;
611 kmem_free(ip6h, sizeof(struct ip6_hdr));
612 }
613
614 if ((m = mpls_prepend_shim(m, ms)) == NULL)
615 return NULL;
616
617 return m;
618 }
619
620 #endif /* INET6 */
621
622 static struct mbuf *
623 mpls_prepend_shim(struct mbuf *m, union mpls_shim *ms)
624 {
625 union mpls_shim *shim;
626
627 M_PREPEND(m, sizeof(*ms), M_DONTWAIT);
628 if (m == NULL)
629 return NULL;
630
631 if (m->m_len < sizeof(union mpls_shim) &&
632 (m = m_pullup(m, sizeof(union mpls_shim))) == 0)
633 return NULL;
634
635 shim = mtod(m, union mpls_shim *);
636
637 memcpy(shim, ms, sizeof(*shim));
638 shim->s_addr = htonl(shim->s_addr);
639
640 return m;
641 }
642