if_gre.c revision 1.55 1 /* $NetBSD: if_gre.c,v 1.55 2005/02/26 22:45:09 perry Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Encapsulate L3 protocols into IP
41 * See RFC 1701 and 1702 for more details.
42 * If_gre is compatible with Cisco GRE tunnels, so you can
43 * have a NetBSD box as the other end of a tunnel interface of a Cisco
44 * router. See gre(4) for more details.
45 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
46 */
47
48 #include <sys/cdefs.h>
49 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.55 2005/02/26 22:45:09 perry Exp $");
50
51 #include "opt_inet.h"
52 #include "opt_ns.h"
53 #include "bpfilter.h"
54
55 #ifdef INET
56 #include <sys/param.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/proc.h>
60 #include <sys/protosw.h>
61 #include <sys/socket.h>
62 #include <sys/ioctl.h>
63 #include <sys/queue.h>
64 #if __NetBSD__
65 #include <sys/systm.h>
66 #endif
67
68 #include <machine/cpu.h>
69
70 #include <net/ethertypes.h>
71 #include <net/if.h>
72 #include <net/if_types.h>
73 #include <net/netisr.h>
74 #include <net/route.h>
75
76 #ifdef INET
77 #include <netinet/in.h>
78 #include <netinet/in_systm.h>
79 #include <netinet/in_var.h>
80 #include <netinet/ip.h>
81 #include <netinet/ip_var.h>
82 #else
83 #error "Huh? if_gre without inet?"
84 #endif
85
86 #ifdef NS
87 #include <netns/ns.h>
88 #include <netns/ns_if.h>
89 #endif
90
91 #ifdef NETATALK
92 #include <netatalk/at.h>
93 #include <netatalk/at_var.h>
94 #include <netatalk/at_extern.h>
95 #endif
96
97 #if NBPFILTER > 0
98 #include <sys/time.h>
99 #include <net/bpf.h>
100 #endif
101
102 #include <net/if_gre.h>
103
104 /*
105 * It is not easy to calculate the right value for a GRE MTU.
106 * We leave this task to the admin and use the same default that
107 * other vendors use.
108 */
109 #define GREMTU 1476
110
111 struct gre_softc_head gre_softc_list;
112 int ip_gre_ttl = GRE_TTL;
113
114 int gre_clone_create __P((struct if_clone *, int));
115 int gre_clone_destroy __P((struct ifnet *));
116
117 struct if_clone gre_cloner =
118 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
119
120 int gre_compute_route(struct gre_softc *sc);
121
122 int
123 gre_clone_create(ifc, unit)
124 struct if_clone *ifc;
125 int unit;
126 {
127 struct gre_softc *sc;
128
129 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
130 memset(sc, 0, sizeof(struct gre_softc));
131
132 snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d",
133 ifc->ifc_name, unit);
134 sc->sc_if.if_softc = sc;
135 sc->sc_if.if_type = IFT_TUNNEL;
136 sc->sc_if.if_addrlen = 0;
137 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
138 sc->sc_if.if_dlt = DLT_NULL;
139 sc->sc_if.if_mtu = GREMTU;
140 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
141 sc->sc_if.if_output = gre_output;
142 sc->sc_if.if_ioctl = gre_ioctl;
143 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
144 sc->g_proto = IPPROTO_GRE;
145 sc->sc_if.if_flags |= IFF_LINK0;
146 if_attach(&sc->sc_if);
147 if_alloc_sadl(&sc->sc_if);
148 #if NBPFILTER > 0
149 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
150 #endif
151 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
152 return (0);
153 }
154
155 int
156 gre_clone_destroy(ifp)
157 struct ifnet *ifp;
158 {
159 struct gre_softc *sc = ifp->if_softc;
160
161 LIST_REMOVE(sc, sc_list);
162 #if NBPFILTER > 0
163 bpfdetach(ifp);
164 #endif
165 if_detach(ifp);
166 free(sc, M_DEVBUF);
167
168 return (0);
169 }
170
171 /*
172 * The output routine. Takes a packet and encapsulates it in the protocol
173 * given by sc->g_proto. See also RFC 1701 and RFC 2004
174 */
175 int
176 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
177 struct rtentry *rt)
178 {
179 int error = 0;
180 struct gre_softc *sc = ifp->if_softc;
181 struct greip *gh;
182 struct ip *ip;
183 u_int16_t etype = 0;
184 struct mobile_h mob_h;
185
186 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == 0 ||
187 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
188 m_freem(m);
189 error = ENETDOWN;
190 goto end;
191 }
192
193 gh = NULL;
194 ip = NULL;
195
196 #if NBPFILTER >0
197 if (ifp->if_bpf)
198 bpf_mtap_af(ifp->if_bpf, dst->sa_family, m);
199 #endif
200
201 m->m_flags &= ~(M_BCAST|M_MCAST);
202
203 if (sc->g_proto == IPPROTO_MOBILE) {
204 if (dst->sa_family == AF_INET) {
205 struct mbuf *m0;
206 int msiz;
207
208 ip = mtod(m, struct ip *);
209
210 memset(&mob_h, 0, MOB_H_SIZ_L);
211 mob_h.proto = (ip->ip_p) << 8;
212 mob_h.odst = ip->ip_dst.s_addr;
213 ip->ip_dst.s_addr = sc->g_dst.s_addr;
214
215 /*
216 * If the packet comes from our host, we only change
217 * the destination address in the IP header.
218 * Else we also need to save and change the source
219 */
220 if (in_hosteq(ip->ip_src, sc->g_src)) {
221 msiz = MOB_H_SIZ_S;
222 } else {
223 mob_h.proto |= MOB_H_SBIT;
224 mob_h.osrc = ip->ip_src.s_addr;
225 ip->ip_src.s_addr = sc->g_src.s_addr;
226 msiz = MOB_H_SIZ_L;
227 }
228 HTONS(mob_h.proto);
229 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
230
231 if ((m->m_data - msiz) < m->m_pktdat) {
232 /* need new mbuf */
233 MGETHDR(m0, M_DONTWAIT, MT_HEADER);
234 if (m0 == NULL) {
235 IF_DROP(&ifp->if_snd);
236 m_freem(m);
237 error = ENOBUFS;
238 goto end;
239 }
240 m0->m_next = m;
241 m->m_data += sizeof(struct ip);
242 m->m_len -= sizeof(struct ip);
243 m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
244 m0->m_len = msiz + sizeof(struct ip);
245 m0->m_data += max_linkhdr;
246 memcpy(mtod(m0, caddr_t), (caddr_t)ip,
247 sizeof(struct ip));
248 m = m0;
249 } else { /* we have some space left in the old one */
250 m->m_data -= msiz;
251 m->m_len += msiz;
252 m->m_pkthdr.len += msiz;
253 memmove(mtod(m, caddr_t), ip,
254 sizeof(struct ip));
255 }
256 ip = mtod(m, struct ip *);
257 memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
258 ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
259 } else { /* AF_INET */
260 IF_DROP(&ifp->if_snd);
261 m_freem(m);
262 error = EINVAL;
263 goto end;
264 }
265 } else if (sc->g_proto == IPPROTO_GRE) {
266 switch (dst->sa_family) {
267 case AF_INET:
268 ip = mtod(m, struct ip *);
269 etype = ETHERTYPE_IP;
270 break;
271 #ifdef NETATALK
272 case AF_APPLETALK:
273 etype = ETHERTYPE_ATALK;
274 break;
275 #endif
276 #ifdef NS
277 case AF_NS:
278 etype = ETHERTYPE_NS;
279 break;
280 #endif
281 default:
282 IF_DROP(&ifp->if_snd);
283 m_freem(m);
284 error = EAFNOSUPPORT;
285 goto end;
286 }
287 M_PREPEND(m, sizeof(struct greip), M_DONTWAIT);
288 } else {
289 IF_DROP(&ifp->if_snd);
290 m_freem(m);
291 error = EINVAL;
292 goto end;
293 }
294
295 if (m == NULL) { /* impossible */
296 IF_DROP(&ifp->if_snd);
297 error = ENOBUFS;
298 goto end;
299 }
300
301 gh = mtod(m, struct greip *);
302 if (sc->g_proto == IPPROTO_GRE) {
303 /* we don't have any GRE flags for now */
304
305 memset((void *)&gh->gi_g, 0, sizeof(struct gre_h));
306 gh->gi_ptype = htons(etype);
307 }
308
309 gh->gi_pr = sc->g_proto;
310 if (sc->g_proto != IPPROTO_MOBILE) {
311 gh->gi_src = sc->g_src;
312 gh->gi_dst = sc->g_dst;
313 ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
314 ((struct ip*)gh)->ip_ttl = ip_gre_ttl;
315 ((struct ip*)gh)->ip_tos = ip->ip_tos;
316 gh->gi_len = htons(m->m_pkthdr.len);
317 }
318
319 ifp->if_opackets++;
320 ifp->if_obytes += m->m_pkthdr.len;
321 /* send it off */
322 error = ip_output(m, NULL, &sc->route, 0,
323 (struct ip_moptions *)NULL, (struct socket *)NULL);
324 end:
325 if (error)
326 ifp->if_oerrors++;
327 return (error);
328 }
329
330 int
331 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
332 {
333 struct proc *p = curproc; /* XXX */
334 struct ifreq *ifr = (struct ifreq *)data;
335 struct if_laddrreq *lifr = (struct if_laddrreq *)data;
336 struct gre_softc *sc = ifp->if_softc;
337 int s;
338 struct sockaddr_in si;
339 struct sockaddr *sa = NULL;
340 int error;
341
342 error = 0;
343
344 s = splnet();
345 switch (cmd) {
346 case SIOCSIFADDR:
347 ifp->if_flags |= IFF_UP;
348 break;
349 case SIOCSIFDSTADDR:
350 break;
351 case SIOCSIFFLAGS:
352 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
353 break;
354 if ((ifr->ifr_flags & IFF_LINK0) != 0)
355 sc->g_proto = IPPROTO_GRE;
356 else
357 sc->g_proto = IPPROTO_MOBILE;
358 break;
359 case SIOCSIFMTU:
360 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
361 break;
362 if (ifr->ifr_mtu < 576) {
363 error = EINVAL;
364 break;
365 }
366 ifp->if_mtu = ifr->ifr_mtu;
367 break;
368 case SIOCGIFMTU:
369 ifr->ifr_mtu = sc->sc_if.if_mtu;
370 break;
371 case SIOCADDMULTI:
372 case SIOCDELMULTI:
373 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
374 break;
375 if (ifr == 0) {
376 error = EAFNOSUPPORT;
377 break;
378 }
379 switch (ifr->ifr_addr.sa_family) {
380 #ifdef INET
381 case AF_INET:
382 break;
383 #endif
384 default:
385 error = EAFNOSUPPORT;
386 break;
387 }
388 break;
389 case GRESPROTO:
390 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
391 break;
392 sc->g_proto = ifr->ifr_flags;
393 switch (sc->g_proto) {
394 case IPPROTO_GRE:
395 ifp->if_flags |= IFF_LINK0;
396 break;
397 case IPPROTO_MOBILE:
398 ifp->if_flags &= ~IFF_LINK0;
399 break;
400 default:
401 error = EPROTONOSUPPORT;
402 break;
403 }
404 break;
405 case GREGPROTO:
406 ifr->ifr_flags = sc->g_proto;
407 break;
408 case GRESADDRS:
409 case GRESADDRD:
410 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
411 break;
412 /*
413 * set tunnel endpoints, compute a less specific route
414 * to the remote end and mark if as up
415 */
416 sa = &ifr->ifr_addr;
417 if (cmd == GRESADDRS)
418 sc->g_src = (satosin(sa))->sin_addr;
419 if (cmd == GRESADDRD)
420 sc->g_dst = (satosin(sa))->sin_addr;
421 recompute:
422 if ((sc->g_src.s_addr != INADDR_ANY) &&
423 (sc->g_dst.s_addr != INADDR_ANY)) {
424 if (sc->route.ro_rt != 0) /* free old route */
425 RTFREE(sc->route.ro_rt);
426 if (gre_compute_route(sc) == 0)
427 ifp->if_flags |= IFF_RUNNING;
428 else
429 ifp->if_flags &= ~IFF_RUNNING;
430 }
431 break;
432 case GREGADDRS:
433 memset(&si, 0, sizeof(si));
434 si.sin_family = AF_INET;
435 si.sin_len = sizeof(struct sockaddr_in);
436 si.sin_addr.s_addr = sc->g_src.s_addr;
437 sa = sintosa(&si);
438 ifr->ifr_addr = *sa;
439 break;
440 case GREGADDRD:
441 memset(&si, 0, sizeof(si));
442 si.sin_family = AF_INET;
443 si.sin_len = sizeof(struct sockaddr_in);
444 si.sin_addr.s_addr = sc->g_dst.s_addr;
445 sa = sintosa(&si);
446 ifr->ifr_addr = *sa;
447 break;
448 case SIOCSLIFPHYADDR:
449 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
450 break;
451 if (lifr->addr.ss_family != AF_INET ||
452 lifr->dstaddr.ss_family != AF_INET) {
453 error = EAFNOSUPPORT;
454 break;
455 }
456 if (lifr->addr.ss_len != sizeof(si) ||
457 lifr->dstaddr.ss_len != sizeof(si)) {
458 error = EINVAL;
459 break;
460 }
461 sc->g_src = (satosin((struct sockadrr *)&lifr->addr))->sin_addr;
462 sc->g_dst =
463 (satosin((struct sockadrr *)&lifr->dstaddr))->sin_addr;
464 goto recompute;
465 case SIOCDIFPHYADDR:
466 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
467 break;
468 sc->g_src.s_addr = INADDR_ANY;
469 sc->g_dst.s_addr = INADDR_ANY;
470 break;
471 case SIOCGLIFPHYADDR:
472 if (sc->g_src.s_addr == INADDR_ANY ||
473 sc->g_dst.s_addr == INADDR_ANY) {
474 error = EADDRNOTAVAIL;
475 break;
476 }
477 memset(&si, 0, sizeof(si));
478 si.sin_family = AF_INET;
479 si.sin_len = sizeof(struct sockaddr_in);
480 si.sin_addr.s_addr = sc->g_src.s_addr;
481 memcpy(&lifr->addr, &si, sizeof(si));
482 si.sin_addr.s_addr = sc->g_dst.s_addr;
483 memcpy(&lifr->dstaddr, &si, sizeof(si));
484 break;
485 default:
486 error = EINVAL;
487 break;
488 }
489
490 splx(s);
491 return (error);
492 }
493
494 /*
495 * computes a route to our destination that is not the one
496 * which would be taken by ip_output(), as this one will loop back to
497 * us. If the interface is p2p as a--->b, then a routing entry exists
498 * If we now send a packet to b (e.g. ping b), this will come down here
499 * gets src=a, dst=b tacked on and would from ip_output() sent back to
500 * if_gre.
501 * Goal here is to compute a route to b that is less specific than
502 * a-->b. We know that this one exists as in normal operation we have
503 * at least a default route which matches.
504 */
505 int
506 gre_compute_route(struct gre_softc *sc)
507 {
508 struct route *ro;
509 u_int32_t a, b, c;
510
511 ro = &sc->route;
512
513 memset(ro, 0, sizeof(struct route));
514 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
515 ro->ro_dst.sa_family = AF_INET;
516 ro->ro_dst.sa_len = sizeof(ro->ro_dst);
517
518 /*
519 * toggle last bit, so our interface is not found, but a less
520 * specific route. I'd rather like to specify a shorter mask,
521 * but this is not possible. Should work though. XXX
522 * there is a simpler way ...
523 */
524 if ((sc->sc_if.if_flags & IFF_LINK1) == 0) {
525 a = ntohl(sc->g_dst.s_addr);
526 b = a & 0x01;
527 c = a & 0xfffffffe;
528 b = b ^ 0x01;
529 a = b | c;
530 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr
531 = htonl(a);
532 }
533
534 #ifdef DIAGNOSTIC
535 printf("%s: searching for a route to %s", sc->sc_if.if_xname,
536 inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
537 #endif
538
539 rtalloc(ro);
540
541 /*
542 * check if this returned a route at all and this route is no
543 * recursion to ourself
544 */
545 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
546 #ifdef DIAGNOSTIC
547 if (ro->ro_rt == NULL)
548 printf(" - no route found!\n");
549 else
550 printf(" - route loops back to ourself!\n");
551 #endif
552 return EADDRNOTAVAIL;
553 }
554
555 /*
556 * now change it back - else ip_output will just drop
557 * the route and search one to this interface ...
558 */
559 if ((sc->sc_if.if_flags & IFF_LINK1) == 0)
560 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
561
562 #ifdef DIAGNOSTIC
563 printf(", choosing %s with gateway %s", ro->ro_rt->rt_ifp->if_xname,
564 inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
565 printf("\n");
566 #endif
567
568 return 0;
569 }
570
571 /*
572 * do a checksum of a buffer - much like in_cksum, which operates on
573 * mbufs.
574 */
575 u_int16_t
576 gre_in_cksum(u_int16_t *p, u_int len)
577 {
578 u_int32_t sum = 0;
579 int nwords = len >> 1;
580
581 while (nwords-- != 0)
582 sum += *p++;
583
584 if (len & 1) {
585 union {
586 u_short w;
587 u_char c[2];
588 } u;
589 u.c[0] = *(u_char *)p;
590 u.c[1] = 0;
591 sum += u.w;
592 }
593
594 /* end-around-carry */
595 sum = (sum >> 16) + (sum & 0xffff);
596 sum += (sum >> 16);
597 return (~sum);
598 }
599 #endif
600
601 void greattach __P((int));
602
603 /* ARGSUSED */
604 void
605 greattach(count)
606 int count;
607 {
608 #ifdef INET
609 LIST_INIT(&gre_softc_list);
610 if_clone_attach(&gre_cloner);
611 #endif
612 }
613