if_gre.c revision 1.17.2.2 1 /* $NetBSD: if_gre.c,v 1.17.2.2 2001/06/21 20:08:04 nathanw Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Encapsulate L3 protocols into IP
41 * See RFC 1701 and 1702 for more details.
42 * If_gre is compatible with Cisco GRE tunnels, so you can
43 * have a NetBSD box as the other end of a tunnel interface of a Cisco
44 * router. See gre(4) for more details.
45 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
46 */
47
48 #include "gre.h"
49 #if NGRE > 0
50
51 #include "opt_inet.h"
52 #include "opt_ns.h"
53 #include "bpfilter.h"
54
55 #include <sys/param.h>
56 #include <sys/malloc.h>
57 #include <sys/mbuf.h>
58 #include <sys/lwp.h>
59 #include <sys/proc.h>
60 #include <sys/protosw.h>
61 #include <sys/socket.h>
62 #include <sys/ioctl.h>
63 #include <sys/queue.h>
64 #if __NetBSD__
65 #include <sys/systm.h>
66 #endif
67
68 #include <machine/cpu.h>
69
70 #include <net/ethertypes.h>
71 #include <net/if.h>
72 #include <net/if_types.h>
73 #include <net/netisr.h>
74 #include <net/route.h>
75
76 #ifdef INET
77 #include <netinet/in.h>
78 #include <netinet/in_systm.h>
79 #include <netinet/in_var.h>
80 #include <netinet/ip.h>
81 #include <netinet/ip_var.h>
82 #else
83 #error "Huh? if_gre without inet?"
84 #endif
85
86 #ifdef NS
87 #include <netns/ns.h>
88 #include <netns/ns_if.h>
89 #endif
90
91 #ifdef NETATALK
92 #include <netatalk/at.h>
93 #include <netatalk/at_var.h>
94 #include <netatalk/at_extern.h>
95 #endif
96
97 #if NBPFILTER > 0
98 #include <sys/time.h>
99 #include <net/bpf.h>
100 #endif
101
102 #include <net/if_gre.h>
103
104 /*
105 * XXX this is below the standard MTU of
106 * 1500 Bytes, allowing for headers,
107 * but we should possibly do path mtu discovery
108 * before changing if state to up to find the
109 * correct value
110 */
111 #define GREMTU 1450
112 #define LINK_MASK (IFF_LINK0|IFF_LINK1|IFF_LINK2)
113
114 struct gre_softc_head gre_softc_list;
115
116 int gre_clone_create __P((struct if_clone *, int));
117 void gre_clone_destroy __P((struct ifnet *));
118
119 struct if_clone gre_cloner =
120 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
121
122 void gre_compute_route(struct gre_softc *sc);
123
124 void greattach __P((int));
125
126 /* ARGSUSED */
127 void
128 greattach(count)
129 int count;
130 {
131
132 LIST_INIT(&gre_softc_list);
133 if_clone_attach(&gre_cloner);
134 }
135
136 int
137 gre_clone_create(ifc, unit)
138 struct if_clone *ifc;
139 int unit;
140 {
141 struct gre_softc *sc;
142
143 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
144 memset(sc, 0, sizeof(struct gre_softc));
145
146 sprintf(sc->sc_if.if_xname, "%s%d", ifc->ifc_name, unit);
147 sc->sc_if.if_softc = sc;
148 sc->sc_if.if_type = IFT_OTHER;
149 sc->sc_if.if_addrlen = 4;
150 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
151 sc->sc_if.if_dlt = DLT_NULL;
152 sc->sc_if.if_mtu = GREMTU;
153 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
154 sc->sc_if.if_output = gre_output;
155 sc->sc_if.if_ioctl = gre_ioctl;
156 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
157 sc->g_proto = IPPROTO_GRE;
158 if_attach(&sc->sc_if);
159 if_alloc_sadl(&sc->sc_if);
160 #if NBPFILTER > 0
161 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
162 #endif
163 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
164 return (0);
165 }
166
167 void
168 gre_clone_destroy(ifp)
169 struct ifnet *ifp;
170 {
171 struct gre_softc *sc = ifp->if_softc;
172
173 LIST_REMOVE(sc, sc_list);
174 #if NBPFILTER > 0
175 bpfdetach(ifp);
176 #endif
177 if_detach(ifp);
178 free(sc, M_DEVBUF);
179 }
180
181 /*
182 * The output routine. Takes a packet and encapsulates it in the protocol
183 * given by sc->g_proto. See also RFC 1701 and RFC 2004
184 */
185 int
186 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
187 struct rtentry *rt)
188 {
189 int error = 0;
190 struct gre_softc *sc = ifp->if_softc;
191 struct greip *gh;
192 struct ip *inp;
193 u_char ttl, osrc;
194 u_short etype = 0;
195 struct mobile_h mob_h;
196
197 gh = NULL;
198 inp = NULL;
199 osrc = 0;
200
201 #if NBPFILTER >0
202 if (ifp->if_bpf) {
203 /* see comment of other if_foo.c files */
204 struct mbuf m0;
205 u_int32_t af = dst->sa_family;
206
207 m0.m_next = m;
208 m0.m_len = 4;
209 m0.m_data = (char *)⁡
210
211 bpf_mtap(ifp->if_bpf, &m0);
212 }
213 #endif
214
215 ttl = 255;
216
217 if (sc->g_proto == IPPROTO_MOBILE) {
218 if (dst->sa_family == AF_INET) {
219 struct mbuf *m0;
220 int msiz;
221
222 inp = mtod(m, struct ip *);
223
224 memset(&mob_h, 0, MOB_H_SIZ_L);
225 mob_h.proto = (inp->ip_p) << 8;
226 mob_h.odst = inp->ip_dst.s_addr;
227 inp->ip_dst.s_addr = sc->g_dst.s_addr;
228
229 /*
230 * If the packet comes from our host, we only change
231 * the destination address in the IP header.
232 * Else we also need to save and change the source
233 */
234 if (in_hosteq(inp->ip_src, sc->g_src)) {
235 msiz = MOB_H_SIZ_S;
236 } else {
237 mob_h.proto |= MOB_H_SBIT;
238 mob_h.osrc = inp->ip_src.s_addr;
239 inp->ip_src.s_addr = sc->g_src.s_addr;
240 msiz = MOB_H_SIZ_L;
241 }
242 HTONS(mob_h.proto);
243 mob_h.hcrc = gre_in_cksum((u_short *)&mob_h, msiz);
244
245 if ((m->m_data - msiz) < m->m_pktdat) {
246 /* need new mbuf */
247 MGETHDR(m0, M_DONTWAIT, MT_HEADER);
248 if (m0 == NULL) {
249 IF_DROP(&ifp->if_snd);
250 m_freem(m);
251 return (ENOBUFS);
252 }
253 m0->m_next = m;
254 m->m_data += sizeof(struct ip);
255 m->m_len -= sizeof(struct ip);
256 m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
257 m0->m_len = msiz + sizeof(struct ip);
258 m0->m_data += max_linkhdr;
259 memcpy(mtod(m0, caddr_t), (caddr_t)inp,
260 sizeof(struct ip));
261 m = m0;
262 } else { /* we have some space left in the old one */
263 m->m_data -= msiz;
264 m->m_len += msiz;
265 m->m_pkthdr.len += msiz;
266 memmove(mtod(m, caddr_t), inp,
267 sizeof(struct ip));
268 }
269 inp=mtod(m, struct ip *);
270 memcpy((caddr_t)(inp + 1), &mob_h, (unsigned)msiz);
271 NTOHS(inp->ip_len);
272 inp->ip_len += msiz;
273 } else { /* AF_INET */
274 IF_DROP(&ifp->if_snd);
275 m_freem(m);
276 return (EINVAL);
277 }
278 } else if (sc->g_proto == IPPROTO_GRE) {
279 switch (dst->sa_family) {
280 case AF_INET:
281 inp = mtod(m, struct ip *);
282 ttl = inp->ip_ttl;
283 etype = ETHERTYPE_IP;
284 break;
285 #ifdef NETATALK
286 case AF_APPLETALK:
287 etype = ETHERTYPE_ATALK;
288 break;
289 #endif
290 #ifdef NS
291 case AF_NS:
292 etype = ETHERTYPE_NS;
293 break;
294 #endif
295 default:
296 IF_DROP(&ifp->if_snd);
297 m_freem(m);
298 return (EAFNOSUPPORT);
299 }
300 M_PREPEND(m, sizeof(struct greip), M_DONTWAIT);
301 } else {
302 error = EINVAL;
303 IF_DROP(&ifp->if_snd);
304 m_freem(m);
305 return (error);
306 }
307
308 if (m == NULL) {
309 IF_DROP(&ifp->if_snd);
310 return (ENOBUFS);
311 }
312
313 gh = mtod(m, struct greip *);
314 if (sc->g_proto == IPPROTO_GRE) {
315 /* we don't have any GRE flags for now */
316
317 memset((void *)&gh->gi_g, 0, sizeof(struct gre_h));
318 gh->gi_ptype = htons(etype);
319 }
320
321 gh->gi_pr = sc->g_proto;
322 if (sc->g_proto != IPPROTO_MOBILE) {
323 gh->gi_src = sc->g_src;
324 gh->gi_dst = sc->g_dst;
325 ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
326 ((struct ip*)gh)->ip_ttl = ttl;
327 ((struct ip*)gh)->ip_tos = inp->ip_tos;
328 gh->gi_len = m->m_pkthdr.len;
329 }
330
331 ifp->if_opackets++;
332 ifp->if_obytes += m->m_pkthdr.len;
333 /* send it off */
334 error = ip_output(m, NULL, &sc->route, 0, NULL);
335 if (error)
336 ifp->if_oerrors++;
337 return (error);
338 }
339
340 int
341 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
342 {
343 struct proc *p = curproc->l_proc; /* XXX */
344 struct ifaddr *ifa = (struct ifaddr *)data;
345 struct ifreq *ifr = (struct ifreq *)data;
346 struct in_ifaddr *ia = (struct in_ifaddr *)data;
347 struct gre_softc *sc = ifp->if_softc;
348 int s;
349 struct sockaddr_in si;
350 struct sockaddr *sa = NULL;
351 int error;
352
353 error = 0;
354
355 s = splnet();
356 switch (cmd) {
357 case SIOCSIFADDR:
358 case SIOCSIFDSTADDR:
359 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
360 break;
361 /*
362 * set tunnel endpoints in case that we "only"
363 * have ip over ip encapsulation. This allows to
364 * set tunnel endpoints with ifconfig.
365 */
366 if (ifa->ifa_addr->sa_family == AF_INET) {
367 sa = ifa->ifa_addr;
368 sc->g_src = (satosin(sa))->sin_addr;
369 sc->g_dst = ia->ia_dstaddr.sin_addr;
370 if ((sc->g_src.s_addr != INADDR_ANY) &&
371 (sc->g_dst.s_addr != INADDR_ANY)) {
372 if (sc->route.ro_rt != 0) /* free old route */
373 RTFREE(sc->route.ro_rt);
374 gre_compute_route(sc);
375 ifp->if_flags |= IFF_UP;
376 }
377 }
378 break;
379 case SIOCSIFFLAGS:
380 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
381 break;
382 if ((sc->g_dst.s_addr == INADDR_ANY) ||
383 (sc->g_src.s_addr == INADDR_ANY))
384 ifp->if_flags &= ~IFF_UP;
385
386 switch (ifr->ifr_flags & LINK_MASK) {
387 case IFF_LINK0:
388 sc->g_proto = IPPROTO_GRE;
389 ifp->if_flags |= IFF_LINK0;
390 ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
391 break;
392 case IFF_LINK2:
393 sc->g_proto = IPPROTO_MOBILE;
394 ifp->if_flags |= IFF_LINK2;
395 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK1);
396 break;
397 }
398 break;
399 case SIOCSIFMTU:
400 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
401 break;
402 if (ifr->ifr_mtu > GREMTU || ifr->ifr_mtu < 576) {
403 error = EINVAL;
404 break;
405 }
406 ifp->if_mtu = ifr->ifr_mtu;
407 break;
408 case SIOCGIFMTU:
409 ifr->ifr_mtu = sc->sc_if.if_mtu;
410 break;
411 case SIOCADDMULTI:
412 case SIOCDELMULTI:
413 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
414 break;
415 if (ifr == 0) {
416 error = EAFNOSUPPORT;
417 break;
418 }
419 switch (ifr->ifr_addr.sa_family) {
420 #ifdef INET
421 case AF_INET:
422 break;
423 #endif
424 default:
425 error = EAFNOSUPPORT;
426 break;
427 }
428 break;
429 case GRESPROTO:
430 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
431 break;
432 sc->g_proto = ifr->ifr_flags;
433 switch (sc->g_proto) {
434 case IPPROTO_GRE :
435 ifp->if_flags |= IFF_LINK0;
436 ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
437 break;
438 case IPPROTO_MOBILE :
439 ifp->if_flags |= IFF_LINK2;
440 ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
441 break;
442 default:
443 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK1|IFF_LINK2);
444 }
445 break;
446 case GREGPROTO:
447 ifr->ifr_flags = sc->g_proto;
448 break;
449 case GRESADDRS:
450 case GRESADDRD:
451 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
452 break;
453 /*
454 * set tunnel endpoints, compute a less specific route
455 * to the remote end and mark if as up
456 */
457 sa = &ifr->ifr_addr;
458 if (cmd == GRESADDRS )
459 sc->g_src = (satosin(sa))->sin_addr;
460 if (cmd == GRESADDRD )
461 sc->g_dst = (satosin(sa))->sin_addr;
462 if ((sc->g_src.s_addr != INADDR_ANY) &&
463 (sc->g_dst.s_addr != INADDR_ANY)) {
464 if (sc->route.ro_rt != 0) /* free old route */
465 RTFREE(sc->route.ro_rt);
466 gre_compute_route(sc);
467 ifp->if_flags |= IFF_UP;
468 }
469 break;
470 case GREGADDRS:
471 si.sin_addr.s_addr = sc->g_src.s_addr;
472 sa = sintosa(&si);
473 ifr->ifr_addr = *sa;
474 break;
475 case GREGADDRD:
476 si.sin_addr.s_addr = sc->g_dst.s_addr;
477 sa = sintosa(&si);
478 ifr->ifr_addr = *sa;
479 break;
480 default:
481 error = EINVAL;
482 }
483
484 splx(s);
485 return (error);
486 }
487
488 /*
489 * computes a route to our destination that is not the one
490 * which would be taken by ip_output(), as this one will loop back to
491 * us. If the interface is p2p as a--->b, then a routing entry exists
492 * If we now send a packet to b (e.g. ping b), this will come down here
493 * gets src=a, dst=b tacked on and would from ip_ouput() sent back to
494 * if_gre.
495 * Goal here is to compute a route to b that is less specific than
496 * a-->b. We know that this one exists as in normal operation we have
497 * at least a default route which matches.
498 */
499 void
500 gre_compute_route(struct gre_softc *sc)
501 {
502 struct route *ro;
503 u_int32_t a, b, c;
504
505 ro = &sc->route;
506
507 memset(ro, 0, sizeof(struct route));
508 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
509 ro->ro_dst.sa_family = AF_INET;
510 ro->ro_dst.sa_len = sizeof(ro->ro_dst);
511
512 /*
513 * toggle last bit, so our interface is not found, but a less
514 * specific route. I'd rather like to specify a shorter mask,
515 * but this is not possible. Should work though. XXX
516 * there is a simpler way ...
517 */
518 if ((sc->sc_if.if_flags & IFF_LINK1) == 0) {
519 a = ntohl(sc->g_dst.s_addr);
520 b = a & 0x01;
521 c = a & 0xfffffffe;
522 b = b ^ 0x01;
523 a = b | c;
524 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr
525 = htonl(a);
526 }
527
528 #ifdef DIAGNOSTIC
529 printf("%s: searching a route to %s", sc->sc_if.if_xname,
530 inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
531 #endif
532
533 rtalloc(ro);
534
535 /*
536 * now change it back - else ip_output will just drop
537 * the route and search one to this interface ...
538 */
539 if ((sc->sc_if.if_flags & IFF_LINK1) == 0)
540 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
541
542 #ifdef DIAGNOSTIC
543 printf(", choosing %s with gateway %s", ro->ro_rt->rt_ifp->if_xname,
544 inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
545 printf("\n");
546 #endif
547 }
548
549 /*
550 * do a checksum of a buffer - much like in_cksum, which operates on
551 * mbufs.
552 */
553 u_short
554 gre_in_cksum(u_short *p, u_int len)
555 {
556 u_int sum = 0;
557 int nwords = len >> 1;
558
559 while (nwords-- != 0)
560 sum += *p++;
561
562 if (len & 1) {
563 union {
564 u_short w;
565 u_char c[2];
566 } u;
567 u.c[0] = *(u_char *)p;
568 u.c[1] = 0;
569 sum += u.w;
570 }
571
572 /* end-around-carry */
573 sum = (sum >> 16) + (sum & 0xffff);
574 sum += (sum >> 16);
575 return (~sum);
576 }
577 #endif
578