Home | History | Annotate | Line # | Download | only in net
if_gre.c revision 1.10
      1 /*	$NetBSD: if_gre.c,v 1.10 2000/07/05 18:14:13 thorpej Exp $ */
      2 
      3 /*
      4  * Copyright (c) 1998 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Heiko W.Rupp <hwr (at) pilhuhn.de>
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *        This product includes software developed by the NetBSD
     21  *        Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 /*
     40  * Encapsulate L3 protocols into IP
     41  * See RFC 1701 and 1702 for more details.
     42  * If_gre is compatible with Cisco GRE tunnels, so you can
     43  * have a NetBSD box as the other end of a tunnel interface of a Cisco
     44  * router. See gre(4) for more details.
     45  * Also supported:  IP in IP encaps (proto 55) as of RFC 2004
     46  */
     47 
     48 #include "gre.h"
     49 #if NGRE > 0
     50 
     51 #include "opt_inet.h"
     52 #include "opt_ns.h"
     53 #include "bpfilter.h"
     54 
     55 #include <sys/param.h>
     56 #include <sys/malloc.h>
     57 #include <sys/mbuf.h>
     58 #include <sys/protosw.h>
     59 #include <sys/socket.h>
     60 #include <sys/ioctl.h>
     61 #include <sys/queue.h>
     62 #if __NetBSD__
     63 #include <sys/systm.h>
     64 #endif
     65 
     66 #include <machine/cpu.h>
     67 
     68 #include <net/ethertypes.h>
     69 #include <net/if.h>
     70 #include <net/if_types.h>
     71 #include <net/netisr.h>
     72 #include <net/route.h>
     73 
     74 #ifdef INET
     75 #include <netinet/in.h>
     76 #include <netinet/in_systm.h>
     77 #include <netinet/in_var.h>
     78 #include <netinet/ip.h>
     79 #include <netinet/ip_var.h>
     80 #else
     81 #error "Huh? if_gre without inet?"
     82 #endif
     83 
     84 #ifdef NS
     85 #include <netns/ns.h>
     86 #include <netns/ns_if.h>
     87 #endif
     88 
     89 #ifdef NETATALK
     90 #include <netatalk/at.h>
     91 #include <netatalk/at_var.h>
     92 #include <netatalk/at_extern.h>
     93 #endif
     94 
     95 #if NBPFILTER > 0
     96 #include <sys/time.h>
     97 #include <net/bpf.h>
     98 #endif
     99 
    100 #include <net/if_gre.h>
    101 
    102 #define GREMTU 1450	/* XXX this is below the standard MTU of
    103                          1500 Bytes, allowing for headers,
    104                          but we should possibly do path mtu discovery
    105                          before changing if state to up to find the
    106                          correct value */
    107 #define LINK_MASK (IFF_LINK0|IFF_LINK1|IFF_LINK2)
    108 
    109 LIST_HEAD(, gre_softc) gre_softc_list;
    110 
    111 int	gre_clone_create __P((struct if_clone *, int));
    112 void	gre_clone_destroy __P((struct ifnet *));
    113 
    114 struct if_clone gre_cloner =
    115     IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
    116 
    117 void gre_compute_route(struct gre_softc *sc);
    118 #ifdef DIAGNOSTIC
    119 void gre_inet_ntoa(struct in_addr in);
    120 #endif
    121 
    122 void	greattach __P((int));
    123 
    124 /* ARGSUSED */
    125 void
    126 greattach(count)
    127 	int count;
    128 {
    129 
    130 	LIST_INIT(&gre_softc_list);
    131 	if_clone_attach(&gre_cloner);
    132 }
    133 
    134 int
    135 gre_clone_create(ifc, unit)
    136 	struct if_clone *ifc;
    137 	int unit;
    138 {
    139 	struct gre_softc *sc;
    140 
    141 	sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
    142 	memset(sc, 0, sizeof(struct gre_softc));
    143 
    144 	sprintf(sc->sc_if.if_xname, "%s%d", ifc->ifc_name, unit);
    145 	sc->sc_if.if_softc = sc;
    146 	sc->sc_if.if_type =  IFT_OTHER;
    147 	sc->sc_if.if_addrlen = 4;
    148 	sc->sc_if.if_hdrlen = 24; /* IP + GRE */
    149 	sc->sc_if.if_mtu = GREMTU;
    150 	sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
    151 	sc->sc_if.if_output = gre_output;
    152 	sc->sc_if.if_ioctl = gre_ioctl;
    153 	sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
    154 	sc->g_proto = IPPROTO_GRE;
    155 	if_attach(&sc->sc_if);
    156 #if 0
    157 #if NBPFILTER > 0
    158 	bpfattach(&sc->gre_bpf, &sc->sc_if, DLT_RAW, sizeof(u_int32_t));
    159 #endif
    160 #endif
    161 	LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
    162 	return (0);
    163 }
    164 
    165 void
    166 gre_clone_destroy(ifp)
    167 	struct ifnet *ifp;
    168 {
    169 	struct gre_softc *sc = ifp->if_softc;
    170 
    171 	LIST_REMOVE(sc, sc_list);
    172 #if 0
    173 #if NBPFILTER > 0
    174 	bpfdetach(ifp);
    175 #endif
    176 #endif
    177 	if_detach(ifp);
    178 	free(sc, M_DEVBUF);
    179 }
    180 
    181 /*
    182  * The output routine. Takes a packet and encapsulates it in the protocol
    183  * given by sc->g_proto. See also RFC 1701 and RFC 2004
    184  */
    185 
    186 #if 0
    187 struct ip ip_h;
    188 #endif
    189 struct mobile_h mob_h;
    190 
    191 int
    192 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
    193 	   struct rtentry *rt)
    194 {
    195 	int error = 0;
    196 	struct gre_softc *sc = ifp->if_softc;
    197 	struct greip *gh;
    198 	struct ip *inp;
    199 	u_char ttl, osrc;
    200 	u_short etype = 0;
    201 
    202 
    203 	gh = NULL;
    204 	inp = NULL;
    205 	osrc = 0;
    206 
    207 #if 0
    208 #if NBPFILTER >0
    209 
    210 	if (sc->gre_bpf) {
    211 		/* see comment of other if_foo.c files */
    212 		struct mbuf m0;
    213 		u_int af = dst->sa_family;
    214 
    215 		m0.m_next = m;
    216 		m0.m_len = 4;
    217 		m0.m_data = (char *)&af;
    218 
    219 		bpf_mtap(ifp->if_bpf, &m0);
    220 	}
    221 #endif
    222 #endif
    223 
    224 	ttl = 255;
    225 
    226 	if (sc->g_proto == IPPROTO_MOBILE) {
    227 		if (dst->sa_family == AF_INET) {
    228 			struct mbuf *m0;
    229 			int msiz;
    230 
    231 			inp = mtod(m, struct ip *);
    232 
    233 			memset(&mob_h, 0, MOB_H_SIZ_L);
    234 			mob_h.proto = (inp->ip_p) << 8;
    235 			mob_h.odst = inp->ip_dst.s_addr;
    236 			inp->ip_dst.s_addr = sc->g_dst.s_addr;
    237 
    238 			/*
    239 			 * If the packet comes from our host, we only change
    240 			 * the destination address in the IP header.
    241 			 * Else we also need to save and change the source
    242 			 */
    243 			if (in_hosteq(inp->ip_src, sc->g_src)) {
    244 				msiz = MOB_H_SIZ_S;
    245 			} else {
    246 				mob_h.proto |= MOB_H_SBIT;
    247 				mob_h.osrc = inp->ip_src.s_addr;
    248 				inp->ip_src.s_addr = sc->g_src.s_addr;
    249 				msiz = MOB_H_SIZ_L;
    250 			}
    251 			HTONS(mob_h.proto);
    252 			mob_h.hcrc = gre_in_cksum((u_short *)&mob_h, msiz);
    253 
    254 			if ((m->m_data - msiz) < m->m_pktdat) {
    255 				/* need new mbuf */
    256 				MGETHDR(m0, M_DONTWAIT, MT_HEADER);
    257 				if (m0 == NULL) {
    258 					IF_DROP(&ifp->if_snd);
    259 					m_freem(m);
    260 					return (ENOBUFS);
    261 				}
    262 				m0->m_next = m;
    263 				m->m_data += sizeof(struct ip);
    264 				m->m_len -= sizeof(struct ip);
    265 				m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
    266 				m0->m_len = msiz + sizeof(struct ip);
    267 				m0->m_data += max_linkhdr;
    268 				memcpy(mtod(m0, caddr_t), (caddr_t)inp,
    269 				       sizeof(struct ip));
    270 				m = m0;
    271 			} else {  /* we have some spave left in the old one */
    272 				m->m_data -= msiz;
    273 				m->m_len += msiz;
    274 				m->m_pkthdr.len += msiz;
    275 				memmove(mtod(m, caddr_t), inp,
    276 					sizeof(struct ip));
    277 			}
    278 			inp=mtod(m, struct ip *);
    279 			memcpy((caddr_t)(inp + 1), &mob_h, (unsigned)msiz);
    280 			NTOHS(inp->ip_len);
    281 			inp->ip_len += msiz;
    282 		} else {  /* AF_INET */
    283 			IF_DROP(&ifp->if_snd);
    284 			m_freem(m);
    285 			return (EINVAL);
    286 		}
    287 	} else if (sc->g_proto == IPPROTO_GRE) {
    288 		switch(dst->sa_family) {
    289 		case AF_INET:
    290 			inp = mtod(m, struct ip *);
    291 			ttl = inp->ip_ttl;
    292 			etype = ETHERTYPE_IP;
    293 			break;
    294 #ifdef NETATALK
    295 		case AF_APPLETALK:
    296 			etype = ETHERTYPE_ATALK;
    297 			break;
    298 #endif
    299 #ifdef NS
    300 		case AF_NS:
    301 			etype = ETHERTYPE_NS;
    302 			break;
    303 #endif
    304 		default:
    305 			IF_DROP(&ifp->if_snd);
    306 			m_freem(m);
    307 			return (EAFNOSUPPORT);
    308 		}
    309 		M_PREPEND(m, sizeof(struct greip), M_DONTWAIT);
    310 	} else {
    311 		error = EINVAL;
    312 		IF_DROP(&ifp->if_snd);
    313 		m_freem(m);
    314 		return (error);
    315 	}
    316 
    317 
    318 	if (m == NULL) {
    319 		IF_DROP(&ifp->if_snd);
    320 		return (ENOBUFS);
    321 	}
    322 
    323 	gh = mtod(m, struct greip *);
    324 	if (sc->g_proto == IPPROTO_GRE) {
    325 		/* we don't have any GRE flags for now */
    326 
    327 		memset((void *)&gh->gi_g, 0, sizeof(struct gre_h));
    328 		gh->gi_ptype = htons(etype);
    329 	}
    330 
    331 	gh->gi_pr = sc->g_proto;
    332 	if (sc->g_proto != IPPROTO_MOBILE) {
    333 		gh->gi_src = sc->g_src;
    334 		gh->gi_dst = sc->g_dst;
    335 		((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
    336 		((struct ip*)gh)->ip_ttl = ttl;
    337 		((struct ip*)gh)->ip_tos = inp->ip_tos;
    338 	    gh->gi_len = m->m_pkthdr.len;
    339 	}
    340 
    341 	ifp->if_opackets++;
    342 	ifp->if_obytes += m->m_pkthdr.len;
    343 	/* send it off */
    344 	error = ip_output(m, NULL, &sc->route, 0, NULL);
    345 	if (error)
    346 		ifp->if_oerrors++;
    347 	return (error);
    348 
    349 }
    350 
    351 int
    352 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
    353 {
    354 
    355 	struct ifaddr *ifa = (struct ifaddr *)data;
    356 	struct ifreq *ifr = (struct ifreq *)data;
    357 	struct in_ifaddr *ia = (struct in_ifaddr *)data;
    358 	struct gre_softc *sc = ifp->if_softc;
    359 	int s;
    360 	struct sockaddr_in si;
    361 	struct sockaddr *sa = NULL;
    362 	int error;
    363 
    364 	error = 0;
    365 
    366 	s = splimp();
    367 	switch(cmd) {
    368 	case SIOCSIFADDR:
    369 	case SIOCSIFDSTADDR:
    370 		/*
    371                  * set tunnel endpoints in case that we "only"
    372                  * have ip over ip encapsulation. This allows to
    373                  * set tunnel endpoints with ifconfig.
    374                  */
    375 		if (ifa->ifa_addr->sa_family == AF_INET) {
    376 			sa = ifa->ifa_addr;
    377 			sc->g_src = (satosin(sa))->sin_addr;
    378 			sc->g_dst = ia->ia_dstaddr.sin_addr;
    379 			if ((sc->g_src.s_addr != INADDR_ANY) &&
    380 			    (sc->g_dst.s_addr != INADDR_ANY)) {
    381 				if (sc->route.ro_rt != 0) /* free old route */
    382 					RTFREE(sc->route.ro_rt);
    383 				gre_compute_route(sc);
    384 				ifp->if_flags |= IFF_UP;
    385 			}
    386 		}
    387 		break;
    388 	case SIOCSIFFLAGS:
    389 		if ((sc->g_dst.s_addr == INADDR_ANY) ||
    390 		    (sc->g_src.s_addr == INADDR_ANY))
    391 			ifp->if_flags &= ~IFF_UP;
    392 
    393 		switch(ifr->ifr_flags & LINK_MASK) {
    394 			case IFF_LINK0:
    395 				sc->g_proto = IPPROTO_GRE;
    396 				ifp->if_flags |= IFF_LINK0;
    397 				ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
    398 				break;
    399 			case IFF_LINK2:
    400 				sc->g_proto = IPPROTO_MOBILE;
    401 				ifp->if_flags |= IFF_LINK2;
    402 				ifp->if_flags &= ~(IFF_LINK0|IFF_LINK1);
    403 				break;
    404 		}
    405 		break;
    406 	case SIOCSIFMTU:
    407 		if (ifr->ifr_mtu > GREMTU || ifr->ifr_mtu < 576) {
    408 			error = EINVAL;
    409 			break;
    410 		}
    411 		ifp->if_mtu = ifr->ifr_mtu;
    412 		break;
    413 	case SIOCGIFMTU:
    414 		ifr->ifr_mtu = sc->sc_if.if_mtu;
    415 		break;
    416 	case SIOCADDMULTI:
    417 	case SIOCDELMULTI:
    418 		if (ifr == 0) {
    419 			error = EAFNOSUPPORT;
    420 			break;
    421 		}
    422 		switch (ifr->ifr_addr.sa_family) {
    423 #ifdef INET
    424 		case AF_INET:
    425 			break;
    426 #endif
    427 		default:
    428 			error = EAFNOSUPPORT;
    429 			break;
    430 		}
    431 		break;
    432 	case GRESPROTO:
    433 		sc->g_proto = ifr->ifr_flags;
    434 		switch (sc->g_proto) {
    435 		case IPPROTO_GRE :
    436 			ifp->if_flags |= IFF_LINK0;
    437 			ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
    438 			break;
    439 		case IPPROTO_MOBILE :
    440 			ifp->if_flags |= IFF_LINK2;
    441 			ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
    442 			break;
    443 		default:
    444 			ifp->if_flags &= ~(IFF_LINK0|IFF_LINK1|IFF_LINK2);
    445 		}
    446 		break;
    447 	case GREGPROTO:
    448 		ifr->ifr_flags = sc->g_proto;
    449 		break;
    450 	case GRESADDRS:
    451 	case GRESADDRD:
    452 		/*
    453 	         * set tunnel endpoints, compute a less specific route
    454 	         * to the remote end and mark if as up
    455                  */
    456 		sa = &ifr->ifr_addr;
    457 		if (cmd == GRESADDRS )
    458 			sc->g_src = (satosin(sa))->sin_addr;
    459 		if (cmd == GRESADDRD )
    460 			sc->g_dst = (satosin(sa))->sin_addr;
    461 		if ((sc->g_src.s_addr != INADDR_ANY) &&
    462 		    (sc->g_dst.s_addr != INADDR_ANY)) {
    463 			if (sc->route.ro_rt != 0) /* free old route */
    464 				RTFREE(sc->route.ro_rt);
    465 			gre_compute_route(sc);
    466 			ifp->if_flags |= IFF_UP;
    467 		}
    468 		break;
    469 	case GREGADDRS:
    470 		si.sin_addr.s_addr = sc->g_src.s_addr;
    471 		sa = sintosa(&si);
    472 		ifr->ifr_addr = *sa;
    473 		break;
    474 	case GREGADDRD:
    475 		si.sin_addr.s_addr = sc->g_dst.s_addr;
    476 		sa = sintosa(&si);
    477 		ifr->ifr_addr = *sa;
    478 		break;
    479 	default:
    480 		error = EINVAL;
    481 	}
    482 
    483 	splx(s);
    484 	return (error);
    485 }
    486 
    487 /*
    488  * computes a route to our destination that is not the one
    489  * which would be taken by ip_output(), as this one will loop back to
    490  * us. If the interface is p2p as  a--->b, then a routing entry exists
    491  * If we now send a packet to b (e.g. ping b), this will come down here
    492  * gets src=a, dst=b tacked on and would from ip_ouput() sent back to
    493  * if_gre.
    494  * Goal here is to compute a route to b that is less specific than
    495  * a-->b. We know that this one exists as in normal operation we have
    496  * at least a default route which matches.
    497  */
    498 
    499 void
    500 gre_compute_route(struct gre_softc *sc)
    501 {
    502 	struct route *ro;
    503 	u_int32_t a, b, c;
    504 
    505 	ro = &sc->route;
    506 
    507 	memset(ro, 0, sizeof(struct route));
    508 	((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
    509 	ro->ro_dst.sa_family = AF_INET;
    510 	ro->ro_dst.sa_len = sizeof(ro->ro_dst);
    511 
    512 	/*
    513 	 * toggle last bit, so our interface is not found, but a less
    514          * specific route. I'd rather like to specify a shorter mask,
    515  	 * but this is not possible. Should work though. XXX
    516 	 * there is a simpler way ...
    517          */
    518 	if ((sc->sc_if.if_flags & IFF_LINK1) == 0) {
    519 		a = ntohl(sc->g_dst.s_addr);
    520 		b = a & 0x01;
    521 		c = a & 0xfffffffe;
    522 		b = b ^ 0x01;
    523 		a = b | c;
    524 		((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr
    525 			= htonl(a);
    526 	}
    527 
    528 #ifdef DIAGNOSTIC
    529 	printf("%s: searching a route to ", sc->sc_if.if_xname);
    530 	gre_inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr);
    531 #endif
    532 
    533 	rtalloc(ro);
    534 
    535 	/*
    536 	 * now change it back - else ip_output will just drop
    537          * the route and search one to this interface ...
    538          */
    539 	if ((sc->sc_if.if_flags & IFF_LINK1) == 0)
    540 		((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
    541 
    542 #ifdef DIAGNOSTIC
    543 	printf(", choosing %s with gateway ",ro->ro_rt->rt_ifp->if_xname);
    544 	gre_inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr);
    545 	printf("\n");
    546 #endif
    547 }
    548 
    549 /*
    550  * do a checksum of a buffer - much like in_cksum, which operates on
    551  * mbufs.
    552  */
    553 
    554 u_short
    555 gre_in_cksum(u_short *p, u_int len)
    556 {
    557 	u_int sum = 0;
    558 	int nwords = len >> 1;
    559 
    560 	while (nwords-- != 0)
    561 		sum += *p++;
    562 
    563 		if (len & 1) {
    564 			union {
    565 				u_short w;
    566 				u_char c[2];
    567 			} u;
    568 			u.c[0] = *(u_char *)p;
    569 			u.c[1] = 0;
    570 			sum += u.w;
    571 		}
    572 
    573 		/* end-around-carry */
    574 		sum = (sum >> 16) + (sum & 0xffff);
    575 		sum += (sum >> 16);
    576 		return (~sum);
    577 }
    578 
    579 
    580 /* while testing ... */
    581 #ifdef DIAGNOSTIC
    582 void
    583 gre_inet_ntoa(struct in_addr in)
    584 {
    585 	char *p;
    586 
    587 	p = (char *)&in;
    588 #define UC(b)   (((int)b)&0xff)
    589 	printf("%d.%d.%d.%d", UC(p[0]), UC(p[1]), UC(p[2]), UC(p[3]));
    590 }
    591 
    592 #endif
    593 #endif
    594 
    595