if_gre.c revision 1.17.2.7 1 /* $NetBSD: if_gre.c,v 1.17.2.7 2002/06/24 22:11:30 nathanw Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Encapsulate L3 protocols into IP
41 * See RFC 1701 and 1702 for more details.
42 * If_gre is compatible with Cisco GRE tunnels, so you can
43 * have a NetBSD box as the other end of a tunnel interface of a Cisco
44 * router. See gre(4) for more details.
45 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
46 */
47
48 #include <sys/cdefs.h>
49 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.17.2.7 2002/06/24 22:11:30 nathanw Exp $");
50
51 #include "opt_inet.h"
52 #include "opt_ns.h"
53 #include "bpfilter.h"
54
55 #include <sys/param.h>
56 #include <sys/malloc.h>
57 #include <sys/mbuf.h>
58 #include <sys/lwp.h>
59 #include <sys/proc.h>
60 #include <sys/protosw.h>
61 #include <sys/socket.h>
62 #include <sys/ioctl.h>
63 #include <sys/queue.h>
64 #if __NetBSD__
65 #include <sys/systm.h>
66 #endif
67
68 #include <machine/cpu.h>
69
70 #include <net/ethertypes.h>
71 #include <net/if.h>
72 #include <net/if_types.h>
73 #include <net/netisr.h>
74 #include <net/route.h>
75
76 #ifdef INET
77 #include <netinet/in.h>
78 #include <netinet/in_systm.h>
79 #include <netinet/in_var.h>
80 #include <netinet/ip.h>
81 #include <netinet/ip_var.h>
82 #else
83 #error "Huh? if_gre without inet?"
84 #endif
85
86 #ifdef NS
87 #include <netns/ns.h>
88 #include <netns/ns_if.h>
89 #endif
90
91 #ifdef NETATALK
92 #include <netatalk/at.h>
93 #include <netatalk/at_var.h>
94 #include <netatalk/at_extern.h>
95 #endif
96
97 #if NBPFILTER > 0
98 #include <sys/time.h>
99 #include <net/bpf.h>
100 #endif
101
102 #include <net/if_gre.h>
103
104 /*
105 * It is not easy to calculate the right value for a GRE MTU.
106 * We leave this task to the admin and use the same default that
107 * other vendors use.
108 */
109 #define GREMTU 1476
110
111 struct gre_softc_head gre_softc_list;
112 int ip_gre_ttl = GRE_TTL;
113
114 int gre_clone_create __P((struct if_clone *, int));
115 void gre_clone_destroy __P((struct ifnet *));
116
117 struct if_clone gre_cloner =
118 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
119
120 int gre_compute_route(struct gre_softc *sc);
121
122 void greattach __P((int));
123
124 /* ARGSUSED */
125 void
126 greattach(count)
127 int count;
128 {
129
130 LIST_INIT(&gre_softc_list);
131 if_clone_attach(&gre_cloner);
132 }
133
134 int
135 gre_clone_create(ifc, unit)
136 struct if_clone *ifc;
137 int unit;
138 {
139 struct gre_softc *sc;
140
141 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
142 memset(sc, 0, sizeof(struct gre_softc));
143
144 sprintf(sc->sc_if.if_xname, "%s%d", ifc->ifc_name, unit);
145 sc->sc_if.if_softc = sc;
146 sc->sc_if.if_type = IFT_OTHER;
147 sc->sc_if.if_addrlen = 0;
148 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
149 sc->sc_if.if_dlt = DLT_NULL;
150 sc->sc_if.if_mtu = GREMTU;
151 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
152 sc->sc_if.if_output = gre_output;
153 sc->sc_if.if_ioctl = gre_ioctl;
154 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
155 sc->g_proto = IPPROTO_GRE;
156 sc->sc_if.if_flags |= IFF_LINK0;
157 if_attach(&sc->sc_if);
158 if_alloc_sadl(&sc->sc_if);
159 #if NBPFILTER > 0
160 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
161 #endif
162 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
163 return (0);
164 }
165
166 void
167 gre_clone_destroy(ifp)
168 struct ifnet *ifp;
169 {
170 struct gre_softc *sc = ifp->if_softc;
171
172 LIST_REMOVE(sc, sc_list);
173 #if NBPFILTER > 0
174 bpfdetach(ifp);
175 #endif
176 if_detach(ifp);
177 free(sc, M_DEVBUF);
178 }
179
180 /*
181 * The output routine. Takes a packet and encapsulates it in the protocol
182 * given by sc->g_proto. See also RFC 1701 and RFC 2004
183 */
184 int
185 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
186 struct rtentry *rt)
187 {
188 int error = 0;
189 struct gre_softc *sc = ifp->if_softc;
190 struct greip *gh;
191 struct ip *inp;
192 u_char osrc;
193 u_short etype = 0;
194 struct mobile_h mob_h;
195
196 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == 0 ||
197 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
198 m_freem(m);
199 error = ENETDOWN;
200 goto end;
201 }
202
203 gh = NULL;
204 inp = NULL;
205 osrc = 0;
206
207 #if NBPFILTER >0
208 if (ifp->if_bpf) {
209 /* see comment of other if_foo.c files */
210 struct mbuf m0;
211 u_int32_t af = dst->sa_family;
212
213 m0.m_next = m;
214 m0.m_len = 4;
215 m0.m_data = (char *)⁡
216
217 bpf_mtap(ifp->if_bpf, &m0);
218 }
219 #endif
220
221 m->m_flags &= ~(M_BCAST|M_MCAST);
222
223 if (sc->g_proto == IPPROTO_MOBILE) {
224 if (dst->sa_family == AF_INET) {
225 struct mbuf *m0;
226 int msiz;
227
228 inp = mtod(m, struct ip *);
229
230 memset(&mob_h, 0, MOB_H_SIZ_L);
231 mob_h.proto = (inp->ip_p) << 8;
232 mob_h.odst = inp->ip_dst.s_addr;
233 inp->ip_dst.s_addr = sc->g_dst.s_addr;
234
235 /*
236 * If the packet comes from our host, we only change
237 * the destination address in the IP header.
238 * Else we also need to save and change the source
239 */
240 if (in_hosteq(inp->ip_src, sc->g_src)) {
241 msiz = MOB_H_SIZ_S;
242 } else {
243 mob_h.proto |= MOB_H_SBIT;
244 mob_h.osrc = inp->ip_src.s_addr;
245 inp->ip_src.s_addr = sc->g_src.s_addr;
246 msiz = MOB_H_SIZ_L;
247 }
248 HTONS(mob_h.proto);
249 mob_h.hcrc = gre_in_cksum((u_short *)&mob_h, msiz);
250
251 if ((m->m_data - msiz) < m->m_pktdat) {
252 /* need new mbuf */
253 MGETHDR(m0, M_DONTWAIT, MT_HEADER);
254 if (m0 == NULL) {
255 IF_DROP(&ifp->if_snd);
256 m_freem(m);
257 error = ENOBUFS;
258 goto end;
259 }
260 m0->m_next = m;
261 m->m_data += sizeof(struct ip);
262 m->m_len -= sizeof(struct ip);
263 m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
264 m0->m_len = msiz + sizeof(struct ip);
265 m0->m_data += max_linkhdr;
266 memcpy(mtod(m0, caddr_t), (caddr_t)inp,
267 sizeof(struct ip));
268 m = m0;
269 } else { /* we have some space left in the old one */
270 m->m_data -= msiz;
271 m->m_len += msiz;
272 m->m_pkthdr.len += msiz;
273 memmove(mtod(m, caddr_t), inp,
274 sizeof(struct ip));
275 }
276 inp = mtod(m, struct ip *);
277 memcpy((caddr_t)(inp + 1), &mob_h, (unsigned)msiz);
278 NTOHS(inp->ip_len);
279 inp->ip_len += msiz;
280 } else { /* AF_INET */
281 IF_DROP(&ifp->if_snd);
282 m_freem(m);
283 error = EINVAL;
284 goto end;
285 }
286 } else if (sc->g_proto == IPPROTO_GRE) {
287 switch (dst->sa_family) {
288 case AF_INET:
289 inp = mtod(m, struct ip *);
290 etype = ETHERTYPE_IP;
291 break;
292 #ifdef NETATALK
293 case AF_APPLETALK:
294 etype = ETHERTYPE_ATALK;
295 break;
296 #endif
297 #ifdef NS
298 case AF_NS:
299 etype = ETHERTYPE_NS;
300 break;
301 #endif
302 default:
303 IF_DROP(&ifp->if_snd);
304 m_freem(m);
305 error = EAFNOSUPPORT;
306 goto end;
307 }
308 M_PREPEND(m, sizeof(struct greip), M_DONTWAIT);
309 } else {
310 IF_DROP(&ifp->if_snd);
311 m_freem(m);
312 error = EINVAL;
313 goto end;
314 }
315
316 if (m == NULL) { /* impossible */
317 IF_DROP(&ifp->if_snd);
318 error = ENOBUFS;
319 goto end;
320 }
321
322 gh = mtod(m, struct greip *);
323 if (sc->g_proto == IPPROTO_GRE) {
324 /* we don't have any GRE flags for now */
325
326 memset((void *)&gh->gi_g, 0, sizeof(struct gre_h));
327 gh->gi_ptype = htons(etype);
328 }
329
330 gh->gi_pr = sc->g_proto;
331 if (sc->g_proto != IPPROTO_MOBILE) {
332 gh->gi_src = sc->g_src;
333 gh->gi_dst = sc->g_dst;
334 ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
335 ((struct ip*)gh)->ip_ttl = ip_gre_ttl;
336 ((struct ip*)gh)->ip_tos = inp->ip_tos;
337 gh->gi_len = m->m_pkthdr.len;
338 }
339
340 ifp->if_opackets++;
341 ifp->if_obytes += m->m_pkthdr.len;
342 /* send it off */
343 error = ip_output(m, NULL, &sc->route, 0, NULL);
344 end:
345 if (error)
346 ifp->if_oerrors++;
347 return (error);
348 }
349
350 int
351 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
352 {
353 struct proc *p = curproc; /* XXX */
354 struct ifreq *ifr = (struct ifreq *)data;
355 struct if_laddrreq *lifr = (struct if_laddrreq *)data;
356 struct gre_softc *sc = ifp->if_softc;
357 int s;
358 struct sockaddr_in si;
359 struct sockaddr *sa = NULL;
360 int error;
361
362 error = 0;
363
364 s = splnet();
365 switch (cmd) {
366 case SIOCSIFADDR:
367 ifp->if_flags |= IFF_UP;
368 break;
369 case SIOCSIFDSTADDR:
370 break;
371 case SIOCSIFFLAGS:
372 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
373 break;
374 if ((ifr->ifr_flags & IFF_LINK0) != 0)
375 sc->g_proto = IPPROTO_GRE;
376 else
377 sc->g_proto = IPPROTO_MOBILE;
378 break;
379 case SIOCSIFMTU:
380 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
381 break;
382 if (ifr->ifr_mtu < 576) {
383 error = EINVAL;
384 break;
385 }
386 ifp->if_mtu = ifr->ifr_mtu;
387 break;
388 case SIOCGIFMTU:
389 ifr->ifr_mtu = sc->sc_if.if_mtu;
390 break;
391 case SIOCADDMULTI:
392 case SIOCDELMULTI:
393 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
394 break;
395 if (ifr == 0) {
396 error = EAFNOSUPPORT;
397 break;
398 }
399 switch (ifr->ifr_addr.sa_family) {
400 #ifdef INET
401 case AF_INET:
402 break;
403 #endif
404 default:
405 error = EAFNOSUPPORT;
406 break;
407 }
408 break;
409 case GRESPROTO:
410 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
411 break;
412 sc->g_proto = ifr->ifr_flags;
413 switch (sc->g_proto) {
414 case IPPROTO_GRE:
415 ifp->if_flags |= IFF_LINK0;
416 break;
417 case IPPROTO_MOBILE:
418 ifp->if_flags &= ~IFF_LINK0;
419 break;
420 default:
421 error = EPROTONOSUPPORT;
422 break;
423 }
424 break;
425 case GREGPROTO:
426 ifr->ifr_flags = sc->g_proto;
427 break;
428 case GRESADDRS:
429 case GRESADDRD:
430 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
431 break;
432 /*
433 * set tunnel endpoints, compute a less specific route
434 * to the remote end and mark if as up
435 */
436 sa = &ifr->ifr_addr;
437 if (cmd == GRESADDRS)
438 sc->g_src = (satosin(sa))->sin_addr;
439 if (cmd == GRESADDRD)
440 sc->g_dst = (satosin(sa))->sin_addr;
441 recompute:
442 if ((sc->g_src.s_addr != INADDR_ANY) &&
443 (sc->g_dst.s_addr != INADDR_ANY)) {
444 if (sc->route.ro_rt != 0) /* free old route */
445 RTFREE(sc->route.ro_rt);
446 if (gre_compute_route(sc) == 0)
447 ifp->if_flags |= IFF_RUNNING;
448 else
449 ifp->if_flags &= ~IFF_RUNNING;
450 }
451 break;
452 case GREGADDRS:
453 memset(&si, 0, sizeof(si));
454 si.sin_family = AF_INET;
455 si.sin_len = sizeof(struct sockaddr_in);
456 si.sin_addr.s_addr = sc->g_src.s_addr;
457 sa = sintosa(&si);
458 ifr->ifr_addr = *sa;
459 break;
460 case GREGADDRD:
461 memset(&si, 0, sizeof(si));
462 si.sin_family = AF_INET;
463 si.sin_len = sizeof(struct sockaddr_in);
464 si.sin_addr.s_addr = sc->g_dst.s_addr;
465 sa = sintosa(&si);
466 ifr->ifr_addr = *sa;
467 break;
468 case SIOCSLIFPHYADDR:
469 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
470 break;
471 if (lifr->addr.ss_family != AF_INET ||
472 lifr->dstaddr.ss_family != AF_INET) {
473 error = EAFNOSUPPORT;
474 break;
475 }
476 if (lifr->addr.ss_len != sizeof(si) ||
477 lifr->dstaddr.ss_len != sizeof(si)) {
478 error = EINVAL;
479 break;
480 }
481 sc->g_src = (satosin((struct sockadrr *)&lifr->addr))->sin_addr;
482 sc->g_dst =
483 (satosin((struct sockadrr *)&lifr->dstaddr))->sin_addr;
484 goto recompute;
485 case SIOCDIFPHYADDR:
486 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
487 break;
488 sc->g_src.s_addr = INADDR_ANY;
489 sc->g_dst.s_addr = INADDR_ANY;
490 break;
491 case SIOCGLIFPHYADDR:
492 if (sc->g_src.s_addr == INADDR_ANY ||
493 sc->g_dst.s_addr == INADDR_ANY) {
494 error = EADDRNOTAVAIL;
495 break;
496 }
497 memset(&si, 0, sizeof(si));
498 si.sin_family = AF_INET;
499 si.sin_len = sizeof(struct sockaddr_in);
500 si.sin_addr.s_addr = sc->g_src.s_addr;
501 memcpy(&lifr->addr, &si, sizeof(si));
502 si.sin_addr.s_addr = sc->g_dst.s_addr;
503 memcpy(&lifr->dstaddr, &si, sizeof(si));
504 break;
505 default:
506 error = EINVAL;
507 break;
508 }
509
510 splx(s);
511 return (error);
512 }
513
514 /*
515 * computes a route to our destination that is not the one
516 * which would be taken by ip_output(), as this one will loop back to
517 * us. If the interface is p2p as a--->b, then a routing entry exists
518 * If we now send a packet to b (e.g. ping b), this will come down here
519 * gets src=a, dst=b tacked on and would from ip_ouput() sent back to
520 * if_gre.
521 * Goal here is to compute a route to b that is less specific than
522 * a-->b. We know that this one exists as in normal operation we have
523 * at least a default route which matches.
524 */
525 int
526 gre_compute_route(struct gre_softc *sc)
527 {
528 struct route *ro;
529 u_int32_t a, b, c;
530
531 ro = &sc->route;
532
533 memset(ro, 0, sizeof(struct route));
534 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
535 ro->ro_dst.sa_family = AF_INET;
536 ro->ro_dst.sa_len = sizeof(ro->ro_dst);
537
538 /*
539 * toggle last bit, so our interface is not found, but a less
540 * specific route. I'd rather like to specify a shorter mask,
541 * but this is not possible. Should work though. XXX
542 * there is a simpler way ...
543 */
544 if ((sc->sc_if.if_flags & IFF_LINK1) == 0) {
545 a = ntohl(sc->g_dst.s_addr);
546 b = a & 0x01;
547 c = a & 0xfffffffe;
548 b = b ^ 0x01;
549 a = b | c;
550 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr
551 = htonl(a);
552 }
553
554 #ifdef DIAGNOSTIC
555 printf("%s: searching a route to %s", sc->sc_if.if_xname,
556 inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
557 #endif
558
559 rtalloc(ro);
560
561 /*
562 * check if this returned a route at all and this route is no
563 * recursion to ourself
564 */
565 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
566 #ifdef DIAGNOSTIC
567 if (ro->ro_rt == NULL)
568 printf(" - no route found!\n");
569 else
570 printf(" - route loops back to ourself!\n");
571 #endif
572 return EADDRNOTAVAIL;
573 }
574
575 /*
576 * now change it back - else ip_output will just drop
577 * the route and search one to this interface ...
578 */
579 if ((sc->sc_if.if_flags & IFF_LINK1) == 0)
580 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
581
582 #ifdef DIAGNOSTIC
583 printf(", choosing %s with gateway %s", ro->ro_rt->rt_ifp->if_xname,
584 inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
585 printf("\n");
586 #endif
587
588 return 0;
589 }
590
591 /*
592 * do a checksum of a buffer - much like in_cksum, which operates on
593 * mbufs.
594 */
595 u_short
596 gre_in_cksum(u_short *p, u_int len)
597 {
598 u_int sum = 0;
599 int nwords = len >> 1;
600
601 while (nwords-- != 0)
602 sum += *p++;
603
604 if (len & 1) {
605 union {
606 u_short w;
607 u_char c[2];
608 } u;
609 u.c[0] = *(u_char *)p;
610 u.c[1] = 0;
611 sum += u.w;
612 }
613
614 /* end-around-carry */
615 sum = (sum >> 16) + (sum & 0xffff);
616 sum += (sum >> 16);
617 return (~sum);
618 }
619