Home | History | Annotate | Line # | Download | only in net
if_gre.c revision 1.19
      1 /*	$NetBSD: if_gre.c,v 1.19 2001/05/10 01:04:08 itojun Exp $ */
      2 
      3 /*
      4  * Copyright (c) 1998 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Heiko W.Rupp <hwr (at) pilhuhn.de>
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *        This product includes software developed by the NetBSD
     21  *        Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 /*
     40  * Encapsulate L3 protocols into IP
     41  * See RFC 1701 and 1702 for more details.
     42  * If_gre is compatible with Cisco GRE tunnels, so you can
     43  * have a NetBSD box as the other end of a tunnel interface of a Cisco
     44  * router. See gre(4) for more details.
     45  * Also supported:  IP in IP encaps (proto 55) as of RFC 2004
     46  */
     47 
     48 #include "gre.h"
     49 #if NGRE > 0
     50 
     51 #include "opt_inet.h"
     52 #include "opt_ns.h"
     53 #include "bpfilter.h"
     54 
     55 #include <sys/param.h>
     56 #include <sys/malloc.h>
     57 #include <sys/mbuf.h>
     58 #include <sys/proc.h>
     59 #include <sys/protosw.h>
     60 #include <sys/socket.h>
     61 #include <sys/ioctl.h>
     62 #include <sys/queue.h>
     63 #if __NetBSD__
     64 #include <sys/systm.h>
     65 #endif
     66 
     67 #include <machine/cpu.h>
     68 
     69 #include <net/ethertypes.h>
     70 #include <net/if.h>
     71 #include <net/if_types.h>
     72 #include <net/netisr.h>
     73 #include <net/route.h>
     74 
     75 #ifdef INET
     76 #include <netinet/in.h>
     77 #include <netinet/in_systm.h>
     78 #include <netinet/in_var.h>
     79 #include <netinet/ip.h>
     80 #include <netinet/ip_var.h>
     81 #else
     82 #error "Huh? if_gre without inet?"
     83 #endif
     84 
     85 #ifdef NS
     86 #include <netns/ns.h>
     87 #include <netns/ns_if.h>
     88 #endif
     89 
     90 #ifdef NETATALK
     91 #include <netatalk/at.h>
     92 #include <netatalk/at_var.h>
     93 #include <netatalk/at_extern.h>
     94 #endif
     95 
     96 #if NBPFILTER > 0
     97 #include <sys/time.h>
     98 #include <net/bpf.h>
     99 #endif
    100 
    101 #include <net/if_gre.h>
    102 
    103 #define GREMTU 1450	/* XXX this is below the standard MTU of
    104                          1500 Bytes, allowing for headers,
    105                          but we should possibly do path mtu discovery
    106                          before changing if state to up to find the
    107                          correct value */
    108 #define LINK_MASK (IFF_LINK0|IFF_LINK1|IFF_LINK2)
    109 
    110 struct gre_softc_head gre_softc_list;
    111 
    112 int	gre_clone_create __P((struct if_clone *, int));
    113 void	gre_clone_destroy __P((struct ifnet *));
    114 
    115 struct if_clone gre_cloner =
    116     IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
    117 
    118 void gre_compute_route(struct gre_softc *sc);
    119 
    120 void	greattach __P((int));
    121 
    122 /* ARGSUSED */
    123 void
    124 greattach(count)
    125 	int count;
    126 {
    127 
    128 	LIST_INIT(&gre_softc_list);
    129 	if_clone_attach(&gre_cloner);
    130 }
    131 
    132 int
    133 gre_clone_create(ifc, unit)
    134 	struct if_clone *ifc;
    135 	int unit;
    136 {
    137 	struct gre_softc *sc;
    138 
    139 	sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
    140 	memset(sc, 0, sizeof(struct gre_softc));
    141 
    142 	sprintf(sc->sc_if.if_xname, "%s%d", ifc->ifc_name, unit);
    143 	sc->sc_if.if_softc = sc;
    144 	sc->sc_if.if_type =  IFT_OTHER;
    145 	sc->sc_if.if_addrlen = 4;
    146 	sc->sc_if.if_hdrlen = 24; /* IP + GRE */
    147 	sc->sc_if.if_dlt = DLT_NULL;
    148 	sc->sc_if.if_mtu = GREMTU;
    149 	sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
    150 	sc->sc_if.if_output = gre_output;
    151 	sc->sc_if.if_ioctl = gre_ioctl;
    152 	sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
    153 	sc->g_proto = IPPROTO_GRE;
    154 	if_attach(&sc->sc_if);
    155 	if_alloc_sadl(&sc->sc_if);
    156 #if NBPFILTER > 0
    157 	bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
    158 #endif
    159 	LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
    160 	return (0);
    161 }
    162 
    163 void
    164 gre_clone_destroy(ifp)
    165 	struct ifnet *ifp;
    166 {
    167 	struct gre_softc *sc = ifp->if_softc;
    168 
    169 	LIST_REMOVE(sc, sc_list);
    170 #if NBPFILTER > 0
    171 	bpfdetach(ifp);
    172 #endif
    173 	if_detach(ifp);
    174 	free(sc, M_DEVBUF);
    175 }
    176 
    177 /*
    178  * The output routine. Takes a packet and encapsulates it in the protocol
    179  * given by sc->g_proto. See also RFC 1701 and RFC 2004
    180  */
    181 
    182 #if 0
    183 struct ip ip_h;
    184 #endif
    185 struct mobile_h mob_h;
    186 
    187 int
    188 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
    189 	   struct rtentry *rt)
    190 {
    191 	int error = 0;
    192 	struct gre_softc *sc = ifp->if_softc;
    193 	struct greip *gh;
    194 	struct ip *inp;
    195 	u_char ttl, osrc;
    196 	u_short etype = 0;
    197 
    198 
    199 	gh = NULL;
    200 	inp = NULL;
    201 	osrc = 0;
    202 
    203 #if NBPFILTER >0
    204 	if (ifp->if_bpf) {
    205 		/* see comment of other if_foo.c files */
    206 		struct mbuf m0;
    207 		u_int32_t af = dst->sa_family;
    208 
    209 		m0.m_next = m;
    210 		m0.m_len = 4;
    211 		m0.m_data = (char *)&af;
    212 
    213 		bpf_mtap(ifp->if_bpf, &m0);
    214 	}
    215 #endif
    216 
    217 	ttl = 255;
    218 
    219 	if (sc->g_proto == IPPROTO_MOBILE) {
    220 		if (dst->sa_family == AF_INET) {
    221 			struct mbuf *m0;
    222 			int msiz;
    223 
    224 			inp = mtod(m, struct ip *);
    225 
    226 			memset(&mob_h, 0, MOB_H_SIZ_L);
    227 			mob_h.proto = (inp->ip_p) << 8;
    228 			mob_h.odst = inp->ip_dst.s_addr;
    229 			inp->ip_dst.s_addr = sc->g_dst.s_addr;
    230 
    231 			/*
    232 			 * If the packet comes from our host, we only change
    233 			 * the destination address in the IP header.
    234 			 * Else we also need to save and change the source
    235 			 */
    236 			if (in_hosteq(inp->ip_src, sc->g_src)) {
    237 				msiz = MOB_H_SIZ_S;
    238 			} else {
    239 				mob_h.proto |= MOB_H_SBIT;
    240 				mob_h.osrc = inp->ip_src.s_addr;
    241 				inp->ip_src.s_addr = sc->g_src.s_addr;
    242 				msiz = MOB_H_SIZ_L;
    243 			}
    244 			HTONS(mob_h.proto);
    245 			mob_h.hcrc = gre_in_cksum((u_short *)&mob_h, msiz);
    246 
    247 			if ((m->m_data - msiz) < m->m_pktdat) {
    248 				/* need new mbuf */
    249 				MGETHDR(m0, M_DONTWAIT, MT_HEADER);
    250 				if (m0 == NULL) {
    251 					IF_DROP(&ifp->if_snd);
    252 					m_freem(m);
    253 					return (ENOBUFS);
    254 				}
    255 				m0->m_next = m;
    256 				m->m_data += sizeof(struct ip);
    257 				m->m_len -= sizeof(struct ip);
    258 				m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
    259 				m0->m_len = msiz + sizeof(struct ip);
    260 				m0->m_data += max_linkhdr;
    261 				memcpy(mtod(m0, caddr_t), (caddr_t)inp,
    262 				       sizeof(struct ip));
    263 				m = m0;
    264 			} else {  /* we have some spave left in the old one */
    265 				m->m_data -= msiz;
    266 				m->m_len += msiz;
    267 				m->m_pkthdr.len += msiz;
    268 				memmove(mtod(m, caddr_t), inp,
    269 					sizeof(struct ip));
    270 			}
    271 			inp=mtod(m, struct ip *);
    272 			memcpy((caddr_t)(inp + 1), &mob_h, (unsigned)msiz);
    273 			NTOHS(inp->ip_len);
    274 			inp->ip_len += msiz;
    275 		} else {  /* AF_INET */
    276 			IF_DROP(&ifp->if_snd);
    277 			m_freem(m);
    278 			return (EINVAL);
    279 		}
    280 	} else if (sc->g_proto == IPPROTO_GRE) {
    281 		switch(dst->sa_family) {
    282 		case AF_INET:
    283 			inp = mtod(m, struct ip *);
    284 			ttl = inp->ip_ttl;
    285 			etype = ETHERTYPE_IP;
    286 			break;
    287 #ifdef NETATALK
    288 		case AF_APPLETALK:
    289 			etype = ETHERTYPE_ATALK;
    290 			break;
    291 #endif
    292 #ifdef NS
    293 		case AF_NS:
    294 			etype = ETHERTYPE_NS;
    295 			break;
    296 #endif
    297 		default:
    298 			IF_DROP(&ifp->if_snd);
    299 			m_freem(m);
    300 			return (EAFNOSUPPORT);
    301 		}
    302 		M_PREPEND(m, sizeof(struct greip), M_DONTWAIT);
    303 	} else {
    304 		error = EINVAL;
    305 		IF_DROP(&ifp->if_snd);
    306 		m_freem(m);
    307 		return (error);
    308 	}
    309 
    310 
    311 	if (m == NULL) {
    312 		IF_DROP(&ifp->if_snd);
    313 		return (ENOBUFS);
    314 	}
    315 
    316 	gh = mtod(m, struct greip *);
    317 	if (sc->g_proto == IPPROTO_GRE) {
    318 		/* we don't have any GRE flags for now */
    319 
    320 		memset((void *)&gh->gi_g, 0, sizeof(struct gre_h));
    321 		gh->gi_ptype = htons(etype);
    322 	}
    323 
    324 	gh->gi_pr = sc->g_proto;
    325 	if (sc->g_proto != IPPROTO_MOBILE) {
    326 		gh->gi_src = sc->g_src;
    327 		gh->gi_dst = sc->g_dst;
    328 		((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
    329 		((struct ip*)gh)->ip_ttl = ttl;
    330 		((struct ip*)gh)->ip_tos = inp->ip_tos;
    331 	    gh->gi_len = m->m_pkthdr.len;
    332 	}
    333 
    334 	ifp->if_opackets++;
    335 	ifp->if_obytes += m->m_pkthdr.len;
    336 	/* send it off */
    337 	error = ip_output(m, NULL, &sc->route, 0, NULL);
    338 	if (error)
    339 		ifp->if_oerrors++;
    340 	return (error);
    341 
    342 }
    343 
    344 int
    345 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
    346 {
    347 	struct proc *p = curproc;	/* XXX */
    348 	struct ifaddr *ifa = (struct ifaddr *)data;
    349 	struct ifreq *ifr = (struct ifreq *)data;
    350 	struct in_ifaddr *ia = (struct in_ifaddr *)data;
    351 	struct gre_softc *sc = ifp->if_softc;
    352 	int s;
    353 	struct sockaddr_in si;
    354 	struct sockaddr *sa = NULL;
    355 	int error;
    356 
    357 	error = 0;
    358 
    359 	s = splnet();
    360 	switch(cmd) {
    361 	case SIOCSIFADDR:
    362 	case SIOCSIFDSTADDR:
    363 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    364 			break;
    365 		/*
    366                  * set tunnel endpoints in case that we "only"
    367                  * have ip over ip encapsulation. This allows to
    368                  * set tunnel endpoints with ifconfig.
    369                  */
    370 		if (ifa->ifa_addr->sa_family == AF_INET) {
    371 			sa = ifa->ifa_addr;
    372 			sc->g_src = (satosin(sa))->sin_addr;
    373 			sc->g_dst = ia->ia_dstaddr.sin_addr;
    374 			if ((sc->g_src.s_addr != INADDR_ANY) &&
    375 			    (sc->g_dst.s_addr != INADDR_ANY)) {
    376 				if (sc->route.ro_rt != 0) /* free old route */
    377 					RTFREE(sc->route.ro_rt);
    378 				gre_compute_route(sc);
    379 				ifp->if_flags |= IFF_UP;
    380 			}
    381 		}
    382 		break;
    383 	case SIOCSIFFLAGS:
    384 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    385 			break;
    386 		if ((sc->g_dst.s_addr == INADDR_ANY) ||
    387 		    (sc->g_src.s_addr == INADDR_ANY))
    388 			ifp->if_flags &= ~IFF_UP;
    389 
    390 		switch(ifr->ifr_flags & LINK_MASK) {
    391 			case IFF_LINK0:
    392 				sc->g_proto = IPPROTO_GRE;
    393 				ifp->if_flags |= IFF_LINK0;
    394 				ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
    395 				break;
    396 			case IFF_LINK2:
    397 				sc->g_proto = IPPROTO_MOBILE;
    398 				ifp->if_flags |= IFF_LINK2;
    399 				ifp->if_flags &= ~(IFF_LINK0|IFF_LINK1);
    400 				break;
    401 		}
    402 		break;
    403 	case SIOCSIFMTU:
    404 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    405 			break;
    406 		if (ifr->ifr_mtu > GREMTU || ifr->ifr_mtu < 576) {
    407 			error = EINVAL;
    408 			break;
    409 		}
    410 		ifp->if_mtu = ifr->ifr_mtu;
    411 		break;
    412 	case SIOCGIFMTU:
    413 		ifr->ifr_mtu = sc->sc_if.if_mtu;
    414 		break;
    415 	case SIOCADDMULTI:
    416 	case SIOCDELMULTI:
    417 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    418 			break;
    419 		if (ifr == 0) {
    420 			error = EAFNOSUPPORT;
    421 			break;
    422 		}
    423 		switch (ifr->ifr_addr.sa_family) {
    424 #ifdef INET
    425 		case AF_INET:
    426 			break;
    427 #endif
    428 		default:
    429 			error = EAFNOSUPPORT;
    430 			break;
    431 		}
    432 		break;
    433 	case GRESPROTO:
    434 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    435 			break;
    436 		sc->g_proto = ifr->ifr_flags;
    437 		switch (sc->g_proto) {
    438 		case IPPROTO_GRE :
    439 			ifp->if_flags |= IFF_LINK0;
    440 			ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
    441 			break;
    442 		case IPPROTO_MOBILE :
    443 			ifp->if_flags |= IFF_LINK2;
    444 			ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
    445 			break;
    446 		default:
    447 			ifp->if_flags &= ~(IFF_LINK0|IFF_LINK1|IFF_LINK2);
    448 		}
    449 		break;
    450 	case GREGPROTO:
    451 		ifr->ifr_flags = sc->g_proto;
    452 		break;
    453 	case GRESADDRS:
    454 	case GRESADDRD:
    455 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    456 			break;
    457 		/*
    458 	         * set tunnel endpoints, compute a less specific route
    459 	         * to the remote end and mark if as up
    460                  */
    461 		sa = &ifr->ifr_addr;
    462 		if (cmd == GRESADDRS )
    463 			sc->g_src = (satosin(sa))->sin_addr;
    464 		if (cmd == GRESADDRD )
    465 			sc->g_dst = (satosin(sa))->sin_addr;
    466 		if ((sc->g_src.s_addr != INADDR_ANY) &&
    467 		    (sc->g_dst.s_addr != INADDR_ANY)) {
    468 			if (sc->route.ro_rt != 0) /* free old route */
    469 				RTFREE(sc->route.ro_rt);
    470 			gre_compute_route(sc);
    471 			ifp->if_flags |= IFF_UP;
    472 		}
    473 		break;
    474 	case GREGADDRS:
    475 		si.sin_addr.s_addr = sc->g_src.s_addr;
    476 		sa = sintosa(&si);
    477 		ifr->ifr_addr = *sa;
    478 		break;
    479 	case GREGADDRD:
    480 		si.sin_addr.s_addr = sc->g_dst.s_addr;
    481 		sa = sintosa(&si);
    482 		ifr->ifr_addr = *sa;
    483 		break;
    484 	default:
    485 		error = EINVAL;
    486 	}
    487 
    488 	splx(s);
    489 	return (error);
    490 }
    491 
    492 /*
    493  * computes a route to our destination that is not the one
    494  * which would be taken by ip_output(), as this one will loop back to
    495  * us. If the interface is p2p as  a--->b, then a routing entry exists
    496  * If we now send a packet to b (e.g. ping b), this will come down here
    497  * gets src=a, dst=b tacked on and would from ip_ouput() sent back to
    498  * if_gre.
    499  * Goal here is to compute a route to b that is less specific than
    500  * a-->b. We know that this one exists as in normal operation we have
    501  * at least a default route which matches.
    502  */
    503 
    504 void
    505 gre_compute_route(struct gre_softc *sc)
    506 {
    507 	struct route *ro;
    508 	u_int32_t a, b, c;
    509 
    510 	ro = &sc->route;
    511 
    512 	memset(ro, 0, sizeof(struct route));
    513 	((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
    514 	ro->ro_dst.sa_family = AF_INET;
    515 	ro->ro_dst.sa_len = sizeof(ro->ro_dst);
    516 
    517 	/*
    518 	 * toggle last bit, so our interface is not found, but a less
    519          * specific route. I'd rather like to specify a shorter mask,
    520  	 * but this is not possible. Should work though. XXX
    521 	 * there is a simpler way ...
    522          */
    523 	if ((sc->sc_if.if_flags & IFF_LINK1) == 0) {
    524 		a = ntohl(sc->g_dst.s_addr);
    525 		b = a & 0x01;
    526 		c = a & 0xfffffffe;
    527 		b = b ^ 0x01;
    528 		a = b | c;
    529 		((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr
    530 			= htonl(a);
    531 	}
    532 
    533 #ifdef DIAGNOSTIC
    534 	printf("%s: searching a route to %s", sc->sc_if.if_xname,
    535 	    inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
    536 #endif
    537 
    538 	rtalloc(ro);
    539 
    540 	/*
    541 	 * now change it back - else ip_output will just drop
    542          * the route and search one to this interface ...
    543          */
    544 	if ((sc->sc_if.if_flags & IFF_LINK1) == 0)
    545 		((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
    546 
    547 #ifdef DIAGNOSTIC
    548 	printf(", choosing %s with gateway %s", ro->ro_rt->rt_ifp->if_xname,
    549 	    inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
    550 	printf("\n");
    551 #endif
    552 }
    553 
    554 /*
    555  * do a checksum of a buffer - much like in_cksum, which operates on
    556  * mbufs.
    557  */
    558 
    559 u_short
    560 gre_in_cksum(u_short *p, u_int len)
    561 {
    562 	u_int sum = 0;
    563 	int nwords = len >> 1;
    564 
    565 	while (nwords-- != 0)
    566 		sum += *p++;
    567 
    568 		if (len & 1) {
    569 			union {
    570 				u_short w;
    571 				u_char c[2];
    572 			} u;
    573 			u.c[0] = *(u_char *)p;
    574 			u.c[1] = 0;
    575 			sum += u.w;
    576 		}
    577 
    578 		/* end-around-carry */
    579 		sum = (sum >> 16) + (sum & 0xffff);
    580 		sum += (sum >> 16);
    581 		return (~sum);
    582 }
    583 #endif
    584