Home | History | Annotate | Line # | Download | only in net
if_gre.c revision 1.83.2.2
      1 /*	$NetBSD: if_gre.c,v 1.83.2.2 2007/03/12 05:59:11 rmind Exp $ */
      2 
      3 /*
      4  * Copyright (c) 1998 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Heiko W.Rupp <hwr (at) pilhuhn.de>
      9  *
     10  * IPv6-over-GRE contributed by Gert Doering <gert (at) greenie.muc.de>
     11  *
     12  * Redistribution and use in source and binary forms, with or without
     13  * modification, are permitted provided that the following conditions
     14  * are met:
     15  * 1. Redistributions of source code must retain the above copyright
     16  *    notice, this list of conditions and the following disclaimer.
     17  * 2. Redistributions in binary form must reproduce the above copyright
     18  *    notice, this list of conditions and the following disclaimer in the
     19  *    documentation and/or other materials provided with the distribution.
     20  * 3. All advertising materials mentioning features or use of this software
     21  *    must display the following acknowledgement:
     22  *        This product includes software developed by the NetBSD
     23  *        Foundation, Inc. and its contributors.
     24  * 4. Neither the name of The NetBSD Foundation nor the names of its
     25  *    contributors may be used to endorse or promote products derived
     26  *    from this software without specific prior written permission.
     27  *
     28  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     29  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     30  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     31  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     32  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     33  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     34  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     35  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     36  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     37  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     38  * POSSIBILITY OF SUCH DAMAGE.
     39  */
     40 
     41 /*
     42  * Encapsulate L3 protocols into IP
     43  * See RFC 1701 and 1702 for more details.
     44  * If_gre is compatible with Cisco GRE tunnels, so you can
     45  * have a NetBSD box as the other end of a tunnel interface of a Cisco
     46  * router. See gre(4) for more details.
     47  * Also supported:  IP in IP encaps (proto 55) as of RFC 2004
     48  */
     49 
     50 #include <sys/cdefs.h>
     51 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.83.2.2 2007/03/12 05:59:11 rmind Exp $");
     52 
     53 #include "opt_gre.h"
     54 #include "opt_inet.h"
     55 #include "bpfilter.h"
     56 
     57 #ifdef INET
     58 #include <sys/param.h>
     59 #include <sys/file.h>
     60 #include <sys/filedesc.h>
     61 #include <sys/malloc.h>
     62 #include <sys/mbuf.h>
     63 #include <sys/proc.h>
     64 #include <sys/protosw.h>
     65 #include <sys/socket.h>
     66 #include <sys/socketvar.h>
     67 #include <sys/ioctl.h>
     68 #include <sys/queue.h>
     69 #if __NetBSD__
     70 #include <sys/systm.h>
     71 #include <sys/sysctl.h>
     72 #include <sys/kauth.h>
     73 #endif
     74 
     75 #include <sys/kthread.h>
     76 
     77 #include <machine/cpu.h>
     78 
     79 #include <net/ethertypes.h>
     80 #include <net/if.h>
     81 #include <net/if_types.h>
     82 #include <net/netisr.h>
     83 #include <net/route.h>
     84 
     85 #ifdef INET
     86 #include <netinet/in.h>
     87 #include <netinet/in_systm.h>
     88 #include <netinet/in_var.h>
     89 #include <netinet/ip.h>
     90 #include <netinet/ip_var.h>
     91 #else
     92 #error "Huh? if_gre without inet?"
     93 #endif
     94 
     95 
     96 #ifdef NETATALK
     97 #include <netatalk/at.h>
     98 #include <netatalk/at_var.h>
     99 #include <netatalk/at_extern.h>
    100 #endif
    101 
    102 #if NBPFILTER > 0
    103 #include <sys/time.h>
    104 #include <net/bpf.h>
    105 #endif
    106 
    107 #include <net/if_gre.h>
    108 
    109 /*
    110  * It is not easy to calculate the right value for a GRE MTU.
    111  * We leave this task to the admin and use the same default that
    112  * other vendors use.
    113  */
    114 #define GREMTU 1476
    115 
    116 #ifdef GRE_DEBUG
    117 #define	GRE_DPRINTF(__sc, __fmt, ...)				\
    118 	do {							\
    119 		if (((__sc)->sc_if.if_flags & IFF_DEBUG) != 0)	\
    120 			printf(__fmt, __VA_ARGS__);		\
    121 	} while (/*CONSTCOND*/0)
    122 #else
    123 #define	GRE_DPRINTF(__sc, __fmt, ...)	do { } while (/*CONSTCOND*/0)
    124 #endif /* GRE_DEBUG */
    125 
    126 struct gre_softc_head gre_softc_list;
    127 int ip_gre_ttl = GRE_TTL;
    128 
    129 static int	gre_clone_create(struct if_clone *, int);
    130 static int	gre_clone_destroy(struct ifnet *);
    131 
    132 static struct if_clone gre_cloner =
    133     IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
    134 
    135 static int	gre_output(struct ifnet *, struct mbuf *,
    136 			   const struct sockaddr *, struct rtentry *);
    137 static int	gre_ioctl(struct ifnet *, u_long, void *);
    138 
    139 static int	gre_compute_route(struct gre_softc *sc);
    140 
    141 static int gre_getsockname(struct socket *, struct mbuf *, struct lwp *);
    142 static int gre_getpeername(struct socket *, struct mbuf *, struct lwp *);
    143 static int gre_getnames(struct socket *, struct lwp *, struct sockaddr_in *,
    144     struct sockaddr_in *);
    145 
    146 static void
    147 gre_stop(volatile int *running)
    148 {
    149 	*running = 0;
    150 	wakeup(running);
    151 }
    152 
    153 static void
    154 gre_join(volatile int *running)
    155 {
    156 	int s;
    157 
    158 	s = splnet();
    159 	while (*running != 0) {
    160 		splx(s);
    161 		tsleep(running, PSOCK, "grejoin", 0);
    162 		s = splnet();
    163 	}
    164 	splx(s);
    165 }
    166 
    167 static void
    168 gre_wakeup(struct gre_softc *sc)
    169 {
    170 	GRE_DPRINTF(sc, "%s: enter\n", __func__);
    171 	sc->sc_waitchan = 1;
    172 	wakeup(&sc->sc_waitchan);
    173 }
    174 
    175 static int
    176 gre_clone_create(struct if_clone *ifc, int unit)
    177 {
    178 	struct gre_softc *sc;
    179 
    180 	sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
    181 	memset(sc, 0, sizeof(struct gre_softc));
    182 
    183 	snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d",
    184 	    ifc->ifc_name, unit);
    185 	sc->sc_if.if_softc = sc;
    186 	sc->sc_if.if_type = IFT_TUNNEL;
    187 	sc->sc_if.if_addrlen = 0;
    188 	sc->sc_if.if_hdrlen = 24; /* IP + GRE */
    189 	sc->sc_if.if_dlt = DLT_NULL;
    190 	sc->sc_if.if_mtu = GREMTU;
    191 	sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
    192 	sc->sc_if.if_output = gre_output;
    193 	sc->sc_if.if_ioctl = gre_ioctl;
    194 	sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
    195 	sc->g_dstport = sc->g_srcport = 0;
    196 	sc->sc_proto = IPPROTO_GRE;
    197 	sc->sc_snd.ifq_maxlen = 256;
    198 	sc->sc_if.if_flags |= IFF_LINK0;
    199 	if_attach(&sc->sc_if);
    200 	if_alloc_sadl(&sc->sc_if);
    201 #if NBPFILTER > 0
    202 	bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
    203 #endif
    204 	LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
    205 	return 0;
    206 }
    207 
    208 static int
    209 gre_clone_destroy(struct ifnet *ifp)
    210 {
    211 	int s;
    212 	struct gre_softc *sc = ifp->if_softc;
    213 
    214 	LIST_REMOVE(sc, sc_list);
    215 #if NBPFILTER > 0
    216 	bpfdetach(ifp);
    217 #endif
    218 	s = splnet();
    219 	ifp->if_flags &= ~IFF_UP;
    220 	gre_wakeup(sc);
    221 	splx(s);
    222 	gre_join(&sc->sc_thread);
    223 	s = splnet();
    224 	rtcache_free(&sc->route);
    225 	if_detach(ifp);
    226 	splx(s);
    227 	if (sc->sc_fp != NULL) {
    228 		closef(sc->sc_fp, curlwp);
    229 		sc->sc_fp = NULL;
    230 	}
    231 	free(sc, M_DEVBUF);
    232 
    233 	return 0;
    234 }
    235 
    236 static void
    237 gre_receive(struct socket *so, void *arg, int waitflag)
    238 {
    239 	struct gre_softc *sc = (struct gre_softc *)arg;
    240 
    241 	GRE_DPRINTF(sc, "%s: enter\n", __func__);
    242 
    243 	gre_wakeup(sc);
    244 }
    245 
    246 static void
    247 gre_upcall_add(struct socket *so, void *arg)
    248 {
    249 	/* XXX What if the kernel already set an upcall? */
    250 	so->so_upcallarg = arg;
    251 	so->so_upcall = gre_receive;
    252 	so->so_rcv.sb_flags |= SB_UPCALL;
    253 }
    254 
    255 static void
    256 gre_upcall_remove(struct socket *so)
    257 {
    258 	/* XXX What if the kernel already set an upcall? */
    259 	so->so_rcv.sb_flags &= ~SB_UPCALL;
    260 	so->so_upcallarg = NULL;
    261 	so->so_upcall = NULL;
    262 }
    263 
    264 static void
    265 gre_sodestroy(struct socket **sop)
    266 {
    267 	gre_upcall_remove(*sop);
    268 	soshutdown(*sop, SHUT_RDWR);
    269 	soclose(*sop);
    270 	*sop = NULL;
    271 }
    272 
    273 static struct mbuf *
    274 gre_getsockmbuf(struct socket *so)
    275 {
    276 	struct mbuf *m;
    277 
    278 	m = m_get(M_WAIT, MT_SONAME);
    279 	if (m != NULL)
    280 		MCLAIM(m, so->so_mowner);
    281 	return m;
    282 }
    283 
    284 static int
    285 gre_socreate1(struct gre_softc *sc, struct lwp *l, struct gre_soparm *sp,
    286     struct socket **sop)
    287 {
    288 	int rc;
    289 	struct mbuf *m;
    290 	struct sockaddr_in *sin;
    291 	struct socket *so;
    292 
    293 	GRE_DPRINTF(sc, "%s: enter\n", __func__);
    294 	rc = socreate(AF_INET, sop, SOCK_DGRAM, IPPROTO_UDP, l);
    295 	if (rc != 0) {
    296 		GRE_DPRINTF(sc, "%s: socreate failed\n", __func__);
    297 		return rc;
    298 	}
    299 
    300 	so = *sop;
    301 
    302 	gre_upcall_add(so, (void *)sc);
    303 	if ((m = gre_getsockmbuf(so)) == NULL) {
    304 		rc = ENOBUFS;
    305 		goto out;
    306 	}
    307 	sin = mtod(m, struct sockaddr_in *);
    308 	sin->sin_len = m->m_len = sizeof(struct sockaddr_in);
    309 	sin->sin_family = AF_INET;
    310 	sin->sin_addr = sc->g_src;
    311 	sin->sin_port = sc->g_srcport;
    312 
    313 	GRE_DPRINTF(sc, "%s: bind 0x%08" PRIx32 " port %d\n", __func__,
    314 	    sin->sin_addr.s_addr, ntohs(sin->sin_port));
    315 	if ((rc = sobind(so, m, l)) != 0) {
    316 		GRE_DPRINTF(sc, "%s: sobind failed\n", __func__);
    317 		goto out;
    318 	}
    319 
    320 	if (sc->g_srcport == 0) {
    321 		if ((rc = gre_getsockname(so, m, l)) != 0) {
    322 			GRE_DPRINTF(sc, "%s: gre_getsockname failed\n",
    323 			    __func__);
    324 			goto out;
    325 		}
    326 		sc->g_srcport = sin->sin_port;
    327 	}
    328 
    329 	sin->sin_addr = sc->g_dst;
    330 	sin->sin_port = sc->g_dstport;
    331 
    332 	if ((rc = soconnect(so, m, l)) != 0) {
    333 		GRE_DPRINTF(sc, "%s: soconnect failed\n", __func__);
    334 		goto out;
    335 	}
    336 
    337 	*mtod(m, int *) = ip_gre_ttl;
    338 	m->m_len = sizeof(int);
    339 	rc = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, IPPROTO_IP, IP_TTL,
    340 	    &m);
    341 	m = NULL;
    342 	if (rc != 0) {
    343 		printf("%s: setopt ttl failed\n", __func__);
    344 		rc = 0;
    345 	}
    346 out:
    347 	m_freem(m);
    348 
    349 	if (rc != 0)
    350 		gre_sodestroy(sop);
    351 	else
    352 		*sp = sc->sc_soparm;
    353 
    354 	return rc;
    355 }
    356 
    357 static void
    358 gre_thread1(struct gre_softc *sc, struct lwp *l)
    359 {
    360 	int flags, rc, s;
    361 	const struct gre_h *gh;
    362 	struct ifnet *ifp = &sc->sc_if;
    363 	struct mbuf *m;
    364 	struct socket *so = NULL;
    365 	struct uio uio;
    366 	struct gre_soparm sp;
    367 
    368 	GRE_DPRINTF(sc, "%s: enter\n", __func__);
    369 	s = splnet();
    370 
    371 	sc->sc_waitchan = 1;
    372 
    373 	memset(&sp, 0, sizeof(sp));
    374 	memset(&uio, 0, sizeof(uio));
    375 
    376 	ifp->if_flags |= IFF_RUNNING;
    377 
    378 	for (;;) {
    379 		while (sc->sc_waitchan == 0) {
    380 			splx(s);
    381 			GRE_DPRINTF(sc, "%s: sleeping\n", __func__);
    382 			tsleep(&sc->sc_waitchan, PSOCK, "grewait", 0);
    383 			s = splnet();
    384 		}
    385 		sc->sc_waitchan = 0;
    386 		GRE_DPRINTF(sc, "%s: awake\n", __func__);
    387 		if ((ifp->if_flags & IFF_UP) != IFF_UP) {
    388 			GRE_DPRINTF(sc, "%s: not up & running; exiting\n",
    389 			    __func__);
    390 			break;
    391 		}
    392 		if (sc->sc_proto != IPPROTO_UDP) {
    393 			GRE_DPRINTF(sc, "%s: not udp; exiting\n", __func__);
    394 			break;
    395 		}
    396 		/* XXX optimize */
    397 		if (so == NULL || memcmp(&sp, &sc->sc_soparm, sizeof(sp)) != 0){
    398 			GRE_DPRINTF(sc, "%s: parameters changed\n", __func__);
    399 
    400 			if (sp.sp_fp != NULL) {
    401 				FILE_UNUSE(sp.sp_fp, NULL);
    402 				sp.sp_fp = NULL;
    403 				so = NULL;
    404 			} else if (so != NULL)
    405 				gre_sodestroy(&so);
    406 
    407 			if (sc->sc_fp != NULL) {
    408 				so = (struct socket *)sc->sc_fp->f_data;
    409 				gre_upcall_add(so, (void *)sc);
    410 				sp = sc->sc_soparm;
    411 				FILE_USE(sp.sp_fp);
    412 			} else if (gre_socreate1(sc, l, &sp, &so) != 0)
    413 				goto out;
    414 		}
    415 		for (;;) {
    416 			flags = MSG_DONTWAIT;
    417 			uio.uio_resid = 1000000;
    418 			rc = (*so->so_receive)(so, NULL, &uio, &m, NULL,
    419 			    &flags);
    420 			/* TBD Back off if ECONNREFUSED (indicates
    421 			 * ICMP Port Unreachable)?
    422 			 */
    423 			if (rc == EWOULDBLOCK) {
    424 				GRE_DPRINTF(sc, "%s: so_receive EWOULDBLOCK\n",
    425 				    __func__);
    426 				break;
    427 			} else if (rc != 0 || m == NULL) {
    428 				GRE_DPRINTF(sc, "%s: rc %d m %p\n",
    429 				    ifp->if_xname, rc, (void *)m);
    430 				continue;
    431 			} else
    432 				GRE_DPRINTF(sc, "%s: so_receive ok\n",
    433 				    __func__);
    434 			if (m->m_len < sizeof(*gh) &&
    435 			    (m = m_pullup(m, sizeof(*gh))) == NULL) {
    436 				GRE_DPRINTF(sc, "%s: m_pullup failed\n",
    437 				    __func__);
    438 				continue;
    439 			}
    440 			gh = mtod(m, const struct gre_h *);
    441 
    442 			if (gre_input3(sc, m, 0, IPPROTO_GRE, gh) == 0) {
    443 				GRE_DPRINTF(sc, "%s: dropping unsupported\n",
    444 				    __func__);
    445 				ifp->if_ierrors++;
    446 				m_freem(m);
    447 			}
    448 		}
    449 		for (;;) {
    450 			IF_DEQUEUE(&sc->sc_snd, m);
    451 			if (m == NULL)
    452 				break;
    453 			GRE_DPRINTF(sc, "%s: dequeue\n", __func__);
    454 			if ((so->so_state & SS_ISCONNECTED) == 0) {
    455 				GRE_DPRINTF(sc, "%s: not connected\n",
    456 				    __func__);
    457 				m_freem(m);
    458 				continue;
    459 			}
    460 			rc = (*so->so_send)(so, NULL, NULL, m, NULL, 0, l);
    461 			/* XXX handle ENOBUFS? */
    462 			if (rc != 0)
    463 				GRE_DPRINTF(sc, "%s: so_send failed\n",
    464 				    __func__);
    465 		}
    466 		/* Give the software interrupt queues a chance to
    467 		 * run, or else when I send a ping from gre0 to gre1 on
    468 		 * the same host, gre0 will not wake for the reply.
    469 		 */
    470 		splx(s);
    471 		s = splnet();
    472 	}
    473 	if (sp.sp_fp != NULL) {
    474 		GRE_DPRINTF(sc, "%s: removing upcall\n", __func__);
    475 		gre_upcall_remove(so);
    476 		FILE_UNUSE(sp.sp_fp, NULL);
    477 		sp.sp_fp = NULL;
    478 	} else if (so != NULL)
    479 		gre_sodestroy(&so);
    480 out:
    481 	GRE_DPRINTF(sc, "%s: stopping\n", __func__);
    482 	if (sc->sc_proto == IPPROTO_UDP)
    483 		ifp->if_flags &= ~IFF_RUNNING;
    484 	while (!IF_IS_EMPTY(&sc->sc_snd)) {
    485 		IF_DEQUEUE(&sc->sc_snd, m);
    486 		m_freem(m);
    487 	}
    488 	gre_stop(&sc->sc_thread);
    489 	/* must not touch sc after this! */
    490 	GRE_DPRINTF(sc, "%s: restore ipl\n", __func__);
    491 	splx(s);
    492 }
    493 
    494 static void
    495 gre_thread(void *arg)
    496 {
    497 	struct gre_softc *sc = (struct gre_softc *)arg;
    498 
    499 	gre_thread1(sc, curlwp);
    500 	/* must not touch sc after this! */
    501 	kthread_exit(0);
    502 }
    503 
    504 int
    505 gre_input3(struct gre_softc *sc, struct mbuf *m, int hlen, u_char proto,
    506     const struct gre_h *gh)
    507 {
    508 	u_int16_t flags;
    509 #if NBPFILTER > 0
    510 	u_int32_t af = AF_INET;		/* af passed to BPF tap */
    511 #endif
    512 	int s, isr;
    513 	struct ifqueue *ifq;
    514 
    515 	sc->sc_if.if_ipackets++;
    516 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
    517 
    518 	switch (proto) {
    519 	case IPPROTO_GRE:
    520 		hlen += sizeof(struct gre_h);
    521 
    522 		/* process GRE flags as packet can be of variable len */
    523 		flags = ntohs(gh->flags);
    524 
    525 		/* Checksum & Offset are present */
    526 		if ((flags & GRE_CP) | (flags & GRE_RP))
    527 			hlen += 4;
    528 		/* We don't support routing fields (variable length) */
    529 		if (flags & GRE_RP)
    530 			return 0;
    531 		if (flags & GRE_KP)
    532 			hlen += 4;
    533 		if (flags & GRE_SP)
    534 			hlen += 4;
    535 
    536 		switch (ntohs(gh->ptype)) { /* ethertypes */
    537 		case ETHERTYPE_IP: /* shouldn't need a schednetisr(), as */
    538 			ifq = &ipintrq;          /* we are in ip_input */
    539 			isr = NETISR_IP;
    540 			break;
    541 #ifdef NETATALK
    542 		case ETHERTYPE_ATALK:
    543 			ifq = &atintrq1;
    544 			isr = NETISR_ATALK;
    545 #if NBPFILTER > 0
    546 			af = AF_APPLETALK;
    547 #endif
    548 			break;
    549 #endif
    550 #ifdef INET6
    551 		case ETHERTYPE_IPV6:
    552 			GRE_DPRINTF(sc, "%s: IPv6 packet\n", __func__);
    553 			ifq = &ip6intrq;
    554 			isr = NETISR_IPV6;
    555 #if NBPFILTER > 0
    556 			af = AF_INET6;
    557 #endif
    558 			break;
    559 #endif
    560 		default:	   /* others not yet supported */
    561 			printf("%s: unhandled ethertype 0x%04x\n", __func__,
    562 			    ntohs(gh->ptype));
    563 			return 0;
    564 		}
    565 		break;
    566 	default:
    567 		/* others not yet supported */
    568 		return 0;
    569 	}
    570 
    571 	if (hlen > m->m_pkthdr.len) {
    572 		m_freem(m);
    573 		sc->sc_if.if_ierrors++;
    574 		return EINVAL;
    575 	}
    576 	m_adj(m, hlen);
    577 
    578 #if NBPFILTER > 0
    579 	if (sc->sc_if.if_bpf != NULL)
    580 		bpf_mtap_af(sc->sc_if.if_bpf, af, m);
    581 #endif /*NBPFILTER > 0*/
    582 
    583 	m->m_pkthdr.rcvif = &sc->sc_if;
    584 
    585 	s = splnet();		/* possible */
    586 	if (IF_QFULL(ifq)) {
    587 		IF_DROP(ifq);
    588 		m_freem(m);
    589 	} else {
    590 		IF_ENQUEUE(ifq, m);
    591 	}
    592 	/* we need schednetisr since the address family may change */
    593 	schednetisr(isr);
    594 	splx(s);
    595 
    596 	return 1;	/* packet is done, no further processing needed */
    597 }
    598 
    599 /*
    600  * The output routine. Takes a packet and encapsulates it in the protocol
    601  * given by sc->sc_proto. See also RFC 1701 and RFC 2004
    602  */
    603 static int
    604 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
    605 	   struct rtentry *rt)
    606 {
    607 	int error = 0, hlen;
    608 	struct gre_softc *sc = ifp->if_softc;
    609 	struct greip *gi;
    610 	struct gre_h *gh;
    611 	struct ip *eip, *ip;
    612 	u_int8_t ip_tos = 0;
    613 	u_int16_t etype = 0;
    614 	struct mobile_h mob_h;
    615 
    616 	if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == 0 ||
    617 	    sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
    618 		m_freem(m);
    619 		error = ENETDOWN;
    620 		goto end;
    621 	}
    622 
    623 	gi = NULL;
    624 	ip = NULL;
    625 
    626 #if NBPFILTER >0
    627 	if (ifp->if_bpf)
    628 		bpf_mtap_af(ifp->if_bpf, dst->sa_family, m);
    629 #endif
    630 
    631 	m->m_flags &= ~(M_BCAST|M_MCAST);
    632 
    633 	switch (sc->sc_proto) {
    634 	case IPPROTO_MOBILE:
    635 		if (dst->sa_family == AF_INET) {
    636 			int msiz;
    637 
    638 			if (M_UNWRITABLE(m, sizeof(*ip)) &&
    639 			    (m = m_pullup(m, sizeof(*ip))) == NULL) {
    640 				error = ENOBUFS;
    641 				goto end;
    642 			}
    643 			ip = mtod(m, struct ip *);
    644 
    645 			memset(&mob_h, 0, MOB_H_SIZ_L);
    646 			mob_h.proto = (ip->ip_p) << 8;
    647 			mob_h.odst = ip->ip_dst.s_addr;
    648 			ip->ip_dst.s_addr = sc->g_dst.s_addr;
    649 
    650 			/*
    651 			 * If the packet comes from our host, we only change
    652 			 * the destination address in the IP header.
    653 			 * Else we also need to save and change the source
    654 			 */
    655 			if (in_hosteq(ip->ip_src, sc->g_src)) {
    656 				msiz = MOB_H_SIZ_S;
    657 			} else {
    658 				mob_h.proto |= MOB_H_SBIT;
    659 				mob_h.osrc = ip->ip_src.s_addr;
    660 				ip->ip_src.s_addr = sc->g_src.s_addr;
    661 				msiz = MOB_H_SIZ_L;
    662 			}
    663 			HTONS(mob_h.proto);
    664 			mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
    665 
    666 			M_PREPEND(m, msiz, M_DONTWAIT);
    667 			if (m == NULL) {
    668 				error = ENOBUFS;
    669 				goto end;
    670 			}
    671 			/* XXX Assuming that ip does not dangle after
    672 			 * M_PREPEND.  In practice, that's true, but
    673 			 * that's in M_PREPEND's contract.
    674 			 */
    675 			memmove(mtod(m, void *), ip, sizeof(*ip));
    676 			ip = mtod(m, struct ip *);
    677 			memcpy((void *)(ip + 1), &mob_h, (unsigned)msiz);
    678 			ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
    679 		} else {  /* AF_INET */
    680 			IF_DROP(&ifp->if_snd);
    681 			m_freem(m);
    682 			error = EINVAL;
    683 			goto end;
    684 		}
    685 		break;
    686 	case IPPROTO_UDP:
    687 	case IPPROTO_GRE:
    688 		GRE_DPRINTF(sc, "%s: dst->sa_family=%d\n", __func__,
    689 		    dst->sa_family);
    690 		switch (dst->sa_family) {
    691 		case AF_INET:
    692 			ip = mtod(m, struct ip *);
    693 			ip_tos = ip->ip_tos;
    694 			etype = ETHERTYPE_IP;
    695 			break;
    696 #ifdef NETATALK
    697 		case AF_APPLETALK:
    698 			etype = ETHERTYPE_ATALK;
    699 			break;
    700 #endif
    701 #ifdef INET6
    702 		case AF_INET6:
    703 			etype = ETHERTYPE_IPV6;
    704 			break;
    705 #endif
    706 		default:
    707 			IF_DROP(&ifp->if_snd);
    708 			m_freem(m);
    709 			error = EAFNOSUPPORT;
    710 			goto end;
    711 		}
    712 		break;
    713 	default:
    714 		IF_DROP(&ifp->if_snd);
    715 		m_freem(m);
    716 		error = EINVAL;
    717 		goto end;
    718 	}
    719 
    720 	switch (sc->sc_proto) {
    721 	case IPPROTO_GRE:
    722 		hlen = sizeof(struct greip);
    723 		break;
    724 	case IPPROTO_UDP:
    725 		hlen = sizeof(struct gre_h);
    726 		break;
    727 	default:
    728 		hlen = 0;
    729 		break;
    730 	}
    731 
    732 	M_PREPEND(m, hlen, M_DONTWAIT);
    733 
    734 	if (m == NULL) {
    735 		IF_DROP(&ifp->if_snd);
    736 		error = ENOBUFS;
    737 		goto end;
    738 	}
    739 
    740 	switch (sc->sc_proto) {
    741 	case IPPROTO_UDP:
    742 		gh = mtod(m, struct gre_h *);
    743 		memset(gh, 0, sizeof(*gh));
    744 		gh->ptype = htons(etype);
    745 		/* XXX Need to handle IP ToS.  Look at how I handle IP TTL. */
    746 		break;
    747 	case IPPROTO_GRE:
    748 		gi = mtod(m, struct greip *);
    749 		gh = &gi->gi_g;
    750 		eip = &gi->gi_i;
    751 		/* we don't have any GRE flags for now */
    752 		memset(gh, 0, sizeof(*gh));
    753 		gh->ptype = htons(etype);
    754 		eip->ip_src = sc->g_src;
    755 		eip->ip_dst = sc->g_dst;
    756 		eip->ip_hl = (sizeof(struct ip)) >> 2;
    757 		eip->ip_ttl = ip_gre_ttl;
    758 		eip->ip_tos = ip_tos;
    759 		eip->ip_len = htons(m->m_pkthdr.len);
    760 		eip->ip_p = sc->sc_proto;
    761 		break;
    762 	case IPPROTO_MOBILE:
    763 		eip = mtod(m, struct ip *);
    764 		eip->ip_p = sc->sc_proto;
    765 		break;
    766 	default:
    767 		error = EPROTONOSUPPORT;
    768 		m_freem(m);
    769 		goto end;
    770 	}
    771 
    772 	ifp->if_opackets++;
    773 	ifp->if_obytes += m->m_pkthdr.len;
    774 
    775 	/* send it off */
    776 	if (sc->sc_proto == IPPROTO_UDP) {
    777 		if (IF_QFULL(&sc->sc_snd)) {
    778 			IF_DROP(&sc->sc_snd);
    779 			error = ENOBUFS;
    780 			m_freem(m);
    781 		} else {
    782 			IF_ENQUEUE(&sc->sc_snd, m);
    783 			gre_wakeup(sc);
    784 			error = 0;
    785 		}
    786 		goto end;
    787 	}
    788 	if (sc->route.ro_rt == NULL)
    789 		rtcache_init(&sc->route);
    790 	else
    791 		rtcache_check(&sc->route);
    792 	if (sc->route.ro_rt == NULL)
    793 		goto end;
    794 	if (sc->route.ro_rt->rt_ifp->if_softc == sc)
    795 		rtcache_free(&sc->route);
    796 	else
    797 		error = ip_output(m, NULL, &sc->route, 0,
    798 		    (struct ip_moptions *)NULL, (struct socket *)NULL);
    799   end:
    800 	if (error)
    801 		ifp->if_oerrors++;
    802 	return error;
    803 }
    804 
    805 /* gre_kick must be synchronized with network interrupts in order
    806  * to synchronize access to gre_softc members, so call it with
    807  * interrupt priority level set to IPL_NET or greater.
    808  */
    809 static int
    810 gre_kick(struct gre_softc *sc)
    811 {
    812 	int rc;
    813 	struct ifnet *ifp = &sc->sc_if;
    814 
    815 	if (sc->sc_proto == IPPROTO_UDP && (ifp->if_flags & IFF_UP) == IFF_UP &&
    816 	    !sc->sc_thread) {
    817 		sc->sc_thread = 1;
    818 		rc = kthread_create1(gre_thread, (void *)sc, NULL,
    819 		    ifp->if_xname);
    820 		if (rc != 0)
    821 			gre_stop(&sc->sc_thread);
    822 		return rc;
    823 	} else {
    824 		gre_wakeup(sc);
    825 		return 0;
    826 	}
    827 }
    828 
    829 static int
    830 gre_getname(struct socket *so, int req, struct mbuf *nam, struct lwp *l)
    831 {
    832 	int s, error;
    833 
    834 	s = splsoftnet();
    835 	error = (*so->so_proto->pr_usrreq)(so, req, (struct mbuf *)0,
    836 	    nam, (struct mbuf *)0, l);
    837 	splx(s);
    838 	return error;
    839 }
    840 
    841 static int
    842 gre_getsockname(struct socket *so, struct mbuf *nam, struct lwp *l)
    843 {
    844 	return gre_getname(so, PRU_SOCKADDR, nam, l);
    845 }
    846 
    847 static int
    848 gre_getpeername(struct socket *so, struct mbuf *nam, struct lwp *l)
    849 {
    850 	return gre_getname(so, PRU_PEERADDR, nam, l);
    851 }
    852 
    853 static int
    854 gre_getnames(struct socket *so, struct lwp *l, struct sockaddr_in *src,
    855     struct sockaddr_in *dst)
    856 {
    857 	struct mbuf *m;
    858 	struct sockaddr_in *sin;
    859 	int rc;
    860 
    861 	if ((m = gre_getsockmbuf(so)) == NULL)
    862 		return ENOBUFS;
    863 
    864 	sin = mtod(m, struct sockaddr_in *);
    865 
    866 	if ((rc = gre_getsockname(so, m, l)) != 0)
    867 		goto out;
    868 	if (sin->sin_family != AF_INET) {
    869 		rc = EAFNOSUPPORT;
    870 		goto out;
    871 	}
    872 	*src = *sin;
    873 
    874 	if ((rc = gre_getpeername(so, m, l)) != 0)
    875 		goto out;
    876 	if (sin->sin_family != AF_INET) {
    877 		rc = EAFNOSUPPORT;
    878 		goto out;
    879 	}
    880 	*dst = *sin;
    881 
    882 out:
    883 	m_freem(m);
    884 	return rc;
    885 }
    886 
    887 static int
    888 gre_ioctl(struct ifnet *ifp, u_long cmd, void *data)
    889 {
    890 	u_char oproto;
    891 	struct file *fp, *ofp;
    892 	struct socket *so;
    893 	struct sockaddr_in dst, src;
    894 	struct proc *p = curproc;	/* XXX */
    895 	struct lwp *l = curlwp;	/* XXX */
    896 	struct ifreq *ifr = (struct ifreq *)data;
    897 	struct if_laddrreq *lifr = (struct if_laddrreq *)data;
    898 	struct gre_softc *sc = ifp->if_softc;
    899 	int s;
    900 	struct sockaddr_in si;
    901 	struct sockaddr *sa = NULL;
    902 	int error = 0;
    903 
    904 	switch (cmd) {
    905 	case SIOCSIFFLAGS:
    906 	case SIOCSIFMTU:
    907 	case GRESPROTO:
    908 	case GRESADDRD:
    909 	case GRESADDRS:
    910 	case GRESSOCK:
    911 	case GREDSOCK:
    912 	case SIOCSLIFPHYADDR:
    913 	case SIOCDIFPHYADDR:
    914 		if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
    915 		    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
    916 		    NULL) != 0)
    917 			return EPERM;
    918 		break;
    919 	default:
    920 		break;
    921 	}
    922 
    923 	s = splnet();
    924 	switch (cmd) {
    925 	case SIOCSIFADDR:
    926 		ifp->if_flags |= IFF_UP;
    927 		if ((error = gre_kick(sc)) != 0)
    928 			ifp->if_flags &= ~IFF_UP;
    929 		break;
    930 	case SIOCSIFDSTADDR:
    931 		break;
    932 	case SIOCSIFFLAGS:
    933 		oproto = sc->sc_proto;
    934 		switch (ifr->ifr_flags & (IFF_LINK0|IFF_LINK2)) {
    935 		case IFF_LINK0|IFF_LINK2:
    936 			sc->sc_proto = IPPROTO_UDP;
    937 			if (oproto != IPPROTO_UDP)
    938 				ifp->if_flags &= ~IFF_RUNNING;
    939 			error = gre_kick(sc);
    940 			break;
    941 		case IFF_LINK0:
    942 			sc->sc_proto = IPPROTO_GRE;
    943 			gre_wakeup(sc);
    944 			goto recompute;
    945 		case 0:
    946 			sc->sc_proto = IPPROTO_MOBILE;
    947 			gre_wakeup(sc);
    948 			goto recompute;
    949 		}
    950 		break;
    951 	case SIOCSIFMTU:
    952 		if (ifr->ifr_mtu < 576) {
    953 			error = EINVAL;
    954 			break;
    955 		}
    956 		ifp->if_mtu = ifr->ifr_mtu;
    957 		break;
    958 	case SIOCGIFMTU:
    959 		ifr->ifr_mtu = sc->sc_if.if_mtu;
    960 		break;
    961 	case SIOCADDMULTI:
    962 	case SIOCDELMULTI:
    963 		if (ifr == 0) {
    964 			error = EAFNOSUPPORT;
    965 			break;
    966 		}
    967 		switch (ifr->ifr_addr.sa_family) {
    968 #ifdef INET
    969 		case AF_INET:
    970 			break;
    971 #endif
    972 #ifdef INET6
    973 		case AF_INET6:
    974 			break;
    975 #endif
    976 		default:
    977 			error = EAFNOSUPPORT;
    978 			break;
    979 		}
    980 		break;
    981 	case GRESPROTO:
    982 		oproto = sc->sc_proto;
    983 		sc->sc_proto = ifr->ifr_flags;
    984 		switch (sc->sc_proto) {
    985 		case IPPROTO_UDP:
    986 			ifp->if_flags |= IFF_LINK0|IFF_LINK2;
    987 			if (oproto != IPPROTO_UDP)
    988 				ifp->if_flags &= ~IFF_RUNNING;
    989 			error = gre_kick(sc);
    990 			break;
    991 		case IPPROTO_GRE:
    992 			ifp->if_flags |= IFF_LINK0;
    993 			ifp->if_flags &= ~IFF_LINK2;
    994 			goto recompute;
    995 		case IPPROTO_MOBILE:
    996 			ifp->if_flags &= ~(IFF_LINK0|IFF_LINK2);
    997 			goto recompute;
    998 		default:
    999 			error = EPROTONOSUPPORT;
   1000 			break;
   1001 		}
   1002 		break;
   1003 	case GREGPROTO:
   1004 		ifr->ifr_flags = sc->sc_proto;
   1005 		break;
   1006 	case GRESADDRS:
   1007 	case GRESADDRD:
   1008 		/*
   1009 		 * set tunnel endpoints, compute a less specific route
   1010 		 * to the remote end and mark if as up
   1011 		 */
   1012 		sa = &ifr->ifr_addr;
   1013 		if (cmd == GRESADDRS) {
   1014 			sc->g_src = (satosin(sa))->sin_addr;
   1015 			sc->g_srcport = satosin(sa)->sin_port;
   1016 		}
   1017 		if (cmd == GRESADDRD) {
   1018 			if (sc->sc_proto == IPPROTO_UDP &&
   1019 			    satosin(sa)->sin_port == 0) {
   1020 				error = EINVAL;
   1021 				break;
   1022 			}
   1023 			sc->g_dst = (satosin(sa))->sin_addr;
   1024 			sc->g_dstport = satosin(sa)->sin_port;
   1025 		}
   1026 	recompute:
   1027 		if (sc->sc_proto == IPPROTO_UDP ||
   1028 		    (sc->g_src.s_addr != INADDR_ANY &&
   1029 		     sc->g_dst.s_addr != INADDR_ANY)) {
   1030 			if (sc->sc_fp != NULL) {
   1031 				closef(sc->sc_fp, l);
   1032 				sc->sc_fp = NULL;
   1033 			}
   1034 			rtcache_free(&sc->route);
   1035 			if (sc->sc_proto == IPPROTO_UDP)
   1036 				error = gre_kick(sc);
   1037 			else if (gre_compute_route(sc) == 0)
   1038 				ifp->if_flags |= IFF_RUNNING;
   1039 			else
   1040 				ifp->if_flags &= ~IFF_RUNNING;
   1041 		}
   1042 		break;
   1043 	case GREGADDRS:
   1044 		memset(&si, 0, sizeof(si));
   1045 		si.sin_family = AF_INET;
   1046 		si.sin_len = sizeof(struct sockaddr_in);
   1047 		si.sin_addr.s_addr = sc->g_src.s_addr;
   1048 		sa = sintosa(&si);
   1049 		ifr->ifr_addr = *sa;
   1050 		break;
   1051 	case GREGADDRD:
   1052 		memset(&si, 0, sizeof(si));
   1053 		si.sin_family = AF_INET;
   1054 		si.sin_len = sizeof(struct sockaddr_in);
   1055 		si.sin_addr.s_addr = sc->g_dst.s_addr;
   1056 		sa = sintosa(&si);
   1057 		ifr->ifr_addr = *sa;
   1058 		break;
   1059 	case GREDSOCK:
   1060 		if (sc->sc_proto != IPPROTO_UDP)
   1061 			return EINVAL;
   1062 		if (sc->sc_fp != NULL) {
   1063 			closef(sc->sc_fp, l);
   1064 			sc->sc_fp = NULL;
   1065 			error = gre_kick(sc);
   1066 		}
   1067 		break;
   1068 	case GRESSOCK:
   1069 		if (sc->sc_proto != IPPROTO_UDP)
   1070 			return EINVAL;
   1071 		/* getsock() will FILE_USE() the descriptor for us */
   1072 		if ((error = getsock(p->p_fd, (int)ifr->ifr_value, &fp)) != 0)
   1073 			break;
   1074 		so = (struct socket *)fp->f_data;
   1075 		if (so->so_type != SOCK_DGRAM) {
   1076 			FILE_UNUSE(fp, NULL);
   1077 			error = EINVAL;
   1078 			break;
   1079 		}
   1080 		/* check address */
   1081 		if ((error = gre_getnames(so, curlwp, &src, &dst)) != 0) {
   1082 			FILE_UNUSE(fp, NULL);
   1083 			break;
   1084 		}
   1085 
   1086 		fp->f_count++;
   1087 
   1088 		ofp = sc->sc_fp;
   1089 		sc->sc_fp = fp;
   1090 		if ((error = gre_kick(sc)) != 0) {
   1091 			closef(fp, l);
   1092 			sc->sc_fp = ofp;
   1093 			break;
   1094 		}
   1095 		sc->g_src = src.sin_addr;
   1096 		sc->g_srcport = src.sin_port;
   1097 		sc->g_dst = dst.sin_addr;
   1098 		sc->g_dstport = dst.sin_port;
   1099 		if (ofp != NULL)
   1100 			closef(ofp, l);
   1101 		break;
   1102 	case SIOCSLIFPHYADDR:
   1103 		if (lifr->addr.ss_family != AF_INET ||
   1104 		    lifr->dstaddr.ss_family != AF_INET) {
   1105 			error = EAFNOSUPPORT;
   1106 			break;
   1107 		}
   1108 		if (lifr->addr.ss_len != sizeof(si) ||
   1109 		    lifr->dstaddr.ss_len != sizeof(si)) {
   1110 			error = EINVAL;
   1111 			break;
   1112 		}
   1113 		sc->g_src = satosin(&lifr->addr)->sin_addr;
   1114 		sc->g_dst = satosin(&lifr->dstaddr)->sin_addr;
   1115 		sc->g_srcport = satosin(&lifr->addr)->sin_port;
   1116 		sc->g_dstport = satosin(&lifr->dstaddr)->sin_port;
   1117 		goto recompute;
   1118 	case SIOCDIFPHYADDR:
   1119 		sc->g_src.s_addr = INADDR_ANY;
   1120 		sc->g_dst.s_addr = INADDR_ANY;
   1121 		sc->g_srcport = 0;
   1122 		sc->g_dstport = 0;
   1123 		goto recompute;
   1124 	case SIOCGLIFPHYADDR:
   1125 		if (sc->g_src.s_addr == INADDR_ANY ||
   1126 		    sc->g_dst.s_addr == INADDR_ANY) {
   1127 			error = EADDRNOTAVAIL;
   1128 			break;
   1129 		}
   1130 		memset(&si, 0, sizeof(si));
   1131 		si.sin_family = AF_INET;
   1132 		si.sin_len = sizeof(struct sockaddr_in);
   1133 		si.sin_addr = sc->g_src;
   1134 		if (sc->sc_proto == IPPROTO_UDP)
   1135 			si.sin_port = sc->g_srcport;
   1136 		memcpy(&lifr->addr, &si, sizeof(si));
   1137 		si.sin_addr = sc->g_dst;
   1138 		if (sc->sc_proto == IPPROTO_UDP)
   1139 			si.sin_port = sc->g_dstport;
   1140 		memcpy(&lifr->dstaddr, &si, sizeof(si));
   1141 		break;
   1142 	default:
   1143 		error = EINVAL;
   1144 		break;
   1145 	}
   1146 	splx(s);
   1147 	return error;
   1148 }
   1149 
   1150 /*
   1151  * Compute a route to our destination.
   1152  */
   1153 static int
   1154 gre_compute_route(struct gre_softc *sc)
   1155 {
   1156 	struct route *ro;
   1157 
   1158 	ro = &sc->route;
   1159 
   1160 	memset(ro, 0, sizeof(struct route));
   1161 	satosin(&ro->ro_dst)->sin_addr = sc->g_dst;
   1162 	ro->ro_dst.sa_family = AF_INET;
   1163 	ro->ro_dst.sa_len = sizeof(ro->ro_dst);
   1164 
   1165 #ifdef DIAGNOSTIC
   1166 	printf("%s: searching for a route to %s", sc->sc_if.if_xname,
   1167 	    inet_ntoa(satocsin(rtcache_getdst(ro))->sin_addr));
   1168 #endif
   1169 
   1170 	rtcache_init(ro);
   1171 
   1172 	if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
   1173 #ifdef DIAGNOSTIC
   1174 		if (ro->ro_rt == NULL)
   1175 			printf(" - no route found!\n");
   1176 		else
   1177 			printf(" - route loops back to ourself!\n");
   1178 #endif
   1179 		rtcache_free(ro);
   1180 		return EADDRNOTAVAIL;
   1181 	}
   1182 
   1183 	return 0;
   1184 }
   1185 
   1186 /*
   1187  * do a checksum of a buffer - much like in_cksum, which operates on
   1188  * mbufs.
   1189  */
   1190 u_int16_t
   1191 gre_in_cksum(u_int16_t *p, u_int len)
   1192 {
   1193 	u_int32_t sum = 0;
   1194 	int nwords = len >> 1;
   1195 
   1196 	while (nwords-- != 0)
   1197 		sum += *p++;
   1198 
   1199 	if (len & 1) {
   1200 		union {
   1201 			u_short w;
   1202 			u_char c[2];
   1203 		} u;
   1204 		u.c[0] = *(u_char *)p;
   1205 		u.c[1] = 0;
   1206 		sum += u.w;
   1207 	}
   1208 
   1209 	/* end-around-carry */
   1210 	sum = (sum >> 16) + (sum & 0xffff);
   1211 	sum += (sum >> 16);
   1212 	return ~sum;
   1213 }
   1214 #endif
   1215 
   1216 void	greattach(int);
   1217 
   1218 /* ARGSUSED */
   1219 void
   1220 greattach(int count)
   1221 {
   1222 #ifdef INET
   1223 	LIST_INIT(&gre_softc_list);
   1224 	if_clone_attach(&gre_cloner);
   1225 #endif
   1226 }
   1227