if_gre.c revision 1.19 1 /* $NetBSD: if_gre.c,v 1.19 2001/05/10 01:04:08 itojun Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Encapsulate L3 protocols into IP
41 * See RFC 1701 and 1702 for more details.
42 * If_gre is compatible with Cisco GRE tunnels, so you can
43 * have a NetBSD box as the other end of a tunnel interface of a Cisco
44 * router. See gre(4) for more details.
45 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
46 */
47
48 #include "gre.h"
49 #if NGRE > 0
50
51 #include "opt_inet.h"
52 #include "opt_ns.h"
53 #include "bpfilter.h"
54
55 #include <sys/param.h>
56 #include <sys/malloc.h>
57 #include <sys/mbuf.h>
58 #include <sys/proc.h>
59 #include <sys/protosw.h>
60 #include <sys/socket.h>
61 #include <sys/ioctl.h>
62 #include <sys/queue.h>
63 #if __NetBSD__
64 #include <sys/systm.h>
65 #endif
66
67 #include <machine/cpu.h>
68
69 #include <net/ethertypes.h>
70 #include <net/if.h>
71 #include <net/if_types.h>
72 #include <net/netisr.h>
73 #include <net/route.h>
74
75 #ifdef INET
76 #include <netinet/in.h>
77 #include <netinet/in_systm.h>
78 #include <netinet/in_var.h>
79 #include <netinet/ip.h>
80 #include <netinet/ip_var.h>
81 #else
82 #error "Huh? if_gre without inet?"
83 #endif
84
85 #ifdef NS
86 #include <netns/ns.h>
87 #include <netns/ns_if.h>
88 #endif
89
90 #ifdef NETATALK
91 #include <netatalk/at.h>
92 #include <netatalk/at_var.h>
93 #include <netatalk/at_extern.h>
94 #endif
95
96 #if NBPFILTER > 0
97 #include <sys/time.h>
98 #include <net/bpf.h>
99 #endif
100
101 #include <net/if_gre.h>
102
103 #define GREMTU 1450 /* XXX this is below the standard MTU of
104 1500 Bytes, allowing for headers,
105 but we should possibly do path mtu discovery
106 before changing if state to up to find the
107 correct value */
108 #define LINK_MASK (IFF_LINK0|IFF_LINK1|IFF_LINK2)
109
110 struct gre_softc_head gre_softc_list;
111
112 int gre_clone_create __P((struct if_clone *, int));
113 void gre_clone_destroy __P((struct ifnet *));
114
115 struct if_clone gre_cloner =
116 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
117
118 void gre_compute_route(struct gre_softc *sc);
119
120 void greattach __P((int));
121
122 /* ARGSUSED */
123 void
124 greattach(count)
125 int count;
126 {
127
128 LIST_INIT(&gre_softc_list);
129 if_clone_attach(&gre_cloner);
130 }
131
132 int
133 gre_clone_create(ifc, unit)
134 struct if_clone *ifc;
135 int unit;
136 {
137 struct gre_softc *sc;
138
139 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
140 memset(sc, 0, sizeof(struct gre_softc));
141
142 sprintf(sc->sc_if.if_xname, "%s%d", ifc->ifc_name, unit);
143 sc->sc_if.if_softc = sc;
144 sc->sc_if.if_type = IFT_OTHER;
145 sc->sc_if.if_addrlen = 4;
146 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
147 sc->sc_if.if_dlt = DLT_NULL;
148 sc->sc_if.if_mtu = GREMTU;
149 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
150 sc->sc_if.if_output = gre_output;
151 sc->sc_if.if_ioctl = gre_ioctl;
152 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
153 sc->g_proto = IPPROTO_GRE;
154 if_attach(&sc->sc_if);
155 if_alloc_sadl(&sc->sc_if);
156 #if NBPFILTER > 0
157 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
158 #endif
159 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
160 return (0);
161 }
162
163 void
164 gre_clone_destroy(ifp)
165 struct ifnet *ifp;
166 {
167 struct gre_softc *sc = ifp->if_softc;
168
169 LIST_REMOVE(sc, sc_list);
170 #if NBPFILTER > 0
171 bpfdetach(ifp);
172 #endif
173 if_detach(ifp);
174 free(sc, M_DEVBUF);
175 }
176
177 /*
178 * The output routine. Takes a packet and encapsulates it in the protocol
179 * given by sc->g_proto. See also RFC 1701 and RFC 2004
180 */
181
182 #if 0
183 struct ip ip_h;
184 #endif
185 struct mobile_h mob_h;
186
187 int
188 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
189 struct rtentry *rt)
190 {
191 int error = 0;
192 struct gre_softc *sc = ifp->if_softc;
193 struct greip *gh;
194 struct ip *inp;
195 u_char ttl, osrc;
196 u_short etype = 0;
197
198
199 gh = NULL;
200 inp = NULL;
201 osrc = 0;
202
203 #if NBPFILTER >0
204 if (ifp->if_bpf) {
205 /* see comment of other if_foo.c files */
206 struct mbuf m0;
207 u_int32_t af = dst->sa_family;
208
209 m0.m_next = m;
210 m0.m_len = 4;
211 m0.m_data = (char *)⁡
212
213 bpf_mtap(ifp->if_bpf, &m0);
214 }
215 #endif
216
217 ttl = 255;
218
219 if (sc->g_proto == IPPROTO_MOBILE) {
220 if (dst->sa_family == AF_INET) {
221 struct mbuf *m0;
222 int msiz;
223
224 inp = mtod(m, struct ip *);
225
226 memset(&mob_h, 0, MOB_H_SIZ_L);
227 mob_h.proto = (inp->ip_p) << 8;
228 mob_h.odst = inp->ip_dst.s_addr;
229 inp->ip_dst.s_addr = sc->g_dst.s_addr;
230
231 /*
232 * If the packet comes from our host, we only change
233 * the destination address in the IP header.
234 * Else we also need to save and change the source
235 */
236 if (in_hosteq(inp->ip_src, sc->g_src)) {
237 msiz = MOB_H_SIZ_S;
238 } else {
239 mob_h.proto |= MOB_H_SBIT;
240 mob_h.osrc = inp->ip_src.s_addr;
241 inp->ip_src.s_addr = sc->g_src.s_addr;
242 msiz = MOB_H_SIZ_L;
243 }
244 HTONS(mob_h.proto);
245 mob_h.hcrc = gre_in_cksum((u_short *)&mob_h, msiz);
246
247 if ((m->m_data - msiz) < m->m_pktdat) {
248 /* need new mbuf */
249 MGETHDR(m0, M_DONTWAIT, MT_HEADER);
250 if (m0 == NULL) {
251 IF_DROP(&ifp->if_snd);
252 m_freem(m);
253 return (ENOBUFS);
254 }
255 m0->m_next = m;
256 m->m_data += sizeof(struct ip);
257 m->m_len -= sizeof(struct ip);
258 m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
259 m0->m_len = msiz + sizeof(struct ip);
260 m0->m_data += max_linkhdr;
261 memcpy(mtod(m0, caddr_t), (caddr_t)inp,
262 sizeof(struct ip));
263 m = m0;
264 } else { /* we have some spave left in the old one */
265 m->m_data -= msiz;
266 m->m_len += msiz;
267 m->m_pkthdr.len += msiz;
268 memmove(mtod(m, caddr_t), inp,
269 sizeof(struct ip));
270 }
271 inp=mtod(m, struct ip *);
272 memcpy((caddr_t)(inp + 1), &mob_h, (unsigned)msiz);
273 NTOHS(inp->ip_len);
274 inp->ip_len += msiz;
275 } else { /* AF_INET */
276 IF_DROP(&ifp->if_snd);
277 m_freem(m);
278 return (EINVAL);
279 }
280 } else if (sc->g_proto == IPPROTO_GRE) {
281 switch(dst->sa_family) {
282 case AF_INET:
283 inp = mtod(m, struct ip *);
284 ttl = inp->ip_ttl;
285 etype = ETHERTYPE_IP;
286 break;
287 #ifdef NETATALK
288 case AF_APPLETALK:
289 etype = ETHERTYPE_ATALK;
290 break;
291 #endif
292 #ifdef NS
293 case AF_NS:
294 etype = ETHERTYPE_NS;
295 break;
296 #endif
297 default:
298 IF_DROP(&ifp->if_snd);
299 m_freem(m);
300 return (EAFNOSUPPORT);
301 }
302 M_PREPEND(m, sizeof(struct greip), M_DONTWAIT);
303 } else {
304 error = EINVAL;
305 IF_DROP(&ifp->if_snd);
306 m_freem(m);
307 return (error);
308 }
309
310
311 if (m == NULL) {
312 IF_DROP(&ifp->if_snd);
313 return (ENOBUFS);
314 }
315
316 gh = mtod(m, struct greip *);
317 if (sc->g_proto == IPPROTO_GRE) {
318 /* we don't have any GRE flags for now */
319
320 memset((void *)&gh->gi_g, 0, sizeof(struct gre_h));
321 gh->gi_ptype = htons(etype);
322 }
323
324 gh->gi_pr = sc->g_proto;
325 if (sc->g_proto != IPPROTO_MOBILE) {
326 gh->gi_src = sc->g_src;
327 gh->gi_dst = sc->g_dst;
328 ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
329 ((struct ip*)gh)->ip_ttl = ttl;
330 ((struct ip*)gh)->ip_tos = inp->ip_tos;
331 gh->gi_len = m->m_pkthdr.len;
332 }
333
334 ifp->if_opackets++;
335 ifp->if_obytes += m->m_pkthdr.len;
336 /* send it off */
337 error = ip_output(m, NULL, &sc->route, 0, NULL);
338 if (error)
339 ifp->if_oerrors++;
340 return (error);
341
342 }
343
344 int
345 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
346 {
347 struct proc *p = curproc; /* XXX */
348 struct ifaddr *ifa = (struct ifaddr *)data;
349 struct ifreq *ifr = (struct ifreq *)data;
350 struct in_ifaddr *ia = (struct in_ifaddr *)data;
351 struct gre_softc *sc = ifp->if_softc;
352 int s;
353 struct sockaddr_in si;
354 struct sockaddr *sa = NULL;
355 int error;
356
357 error = 0;
358
359 s = splnet();
360 switch(cmd) {
361 case SIOCSIFADDR:
362 case SIOCSIFDSTADDR:
363 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
364 break;
365 /*
366 * set tunnel endpoints in case that we "only"
367 * have ip over ip encapsulation. This allows to
368 * set tunnel endpoints with ifconfig.
369 */
370 if (ifa->ifa_addr->sa_family == AF_INET) {
371 sa = ifa->ifa_addr;
372 sc->g_src = (satosin(sa))->sin_addr;
373 sc->g_dst = ia->ia_dstaddr.sin_addr;
374 if ((sc->g_src.s_addr != INADDR_ANY) &&
375 (sc->g_dst.s_addr != INADDR_ANY)) {
376 if (sc->route.ro_rt != 0) /* free old route */
377 RTFREE(sc->route.ro_rt);
378 gre_compute_route(sc);
379 ifp->if_flags |= IFF_UP;
380 }
381 }
382 break;
383 case SIOCSIFFLAGS:
384 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
385 break;
386 if ((sc->g_dst.s_addr == INADDR_ANY) ||
387 (sc->g_src.s_addr == INADDR_ANY))
388 ifp->if_flags &= ~IFF_UP;
389
390 switch(ifr->ifr_flags & LINK_MASK) {
391 case IFF_LINK0:
392 sc->g_proto = IPPROTO_GRE;
393 ifp->if_flags |= IFF_LINK0;
394 ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
395 break;
396 case IFF_LINK2:
397 sc->g_proto = IPPROTO_MOBILE;
398 ifp->if_flags |= IFF_LINK2;
399 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK1);
400 break;
401 }
402 break;
403 case SIOCSIFMTU:
404 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
405 break;
406 if (ifr->ifr_mtu > GREMTU || ifr->ifr_mtu < 576) {
407 error = EINVAL;
408 break;
409 }
410 ifp->if_mtu = ifr->ifr_mtu;
411 break;
412 case SIOCGIFMTU:
413 ifr->ifr_mtu = sc->sc_if.if_mtu;
414 break;
415 case SIOCADDMULTI:
416 case SIOCDELMULTI:
417 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
418 break;
419 if (ifr == 0) {
420 error = EAFNOSUPPORT;
421 break;
422 }
423 switch (ifr->ifr_addr.sa_family) {
424 #ifdef INET
425 case AF_INET:
426 break;
427 #endif
428 default:
429 error = EAFNOSUPPORT;
430 break;
431 }
432 break;
433 case GRESPROTO:
434 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
435 break;
436 sc->g_proto = ifr->ifr_flags;
437 switch (sc->g_proto) {
438 case IPPROTO_GRE :
439 ifp->if_flags |= IFF_LINK0;
440 ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
441 break;
442 case IPPROTO_MOBILE :
443 ifp->if_flags |= IFF_LINK2;
444 ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
445 break;
446 default:
447 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK1|IFF_LINK2);
448 }
449 break;
450 case GREGPROTO:
451 ifr->ifr_flags = sc->g_proto;
452 break;
453 case GRESADDRS:
454 case GRESADDRD:
455 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
456 break;
457 /*
458 * set tunnel endpoints, compute a less specific route
459 * to the remote end and mark if as up
460 */
461 sa = &ifr->ifr_addr;
462 if (cmd == GRESADDRS )
463 sc->g_src = (satosin(sa))->sin_addr;
464 if (cmd == GRESADDRD )
465 sc->g_dst = (satosin(sa))->sin_addr;
466 if ((sc->g_src.s_addr != INADDR_ANY) &&
467 (sc->g_dst.s_addr != INADDR_ANY)) {
468 if (sc->route.ro_rt != 0) /* free old route */
469 RTFREE(sc->route.ro_rt);
470 gre_compute_route(sc);
471 ifp->if_flags |= IFF_UP;
472 }
473 break;
474 case GREGADDRS:
475 si.sin_addr.s_addr = sc->g_src.s_addr;
476 sa = sintosa(&si);
477 ifr->ifr_addr = *sa;
478 break;
479 case GREGADDRD:
480 si.sin_addr.s_addr = sc->g_dst.s_addr;
481 sa = sintosa(&si);
482 ifr->ifr_addr = *sa;
483 break;
484 default:
485 error = EINVAL;
486 }
487
488 splx(s);
489 return (error);
490 }
491
492 /*
493 * computes a route to our destination that is not the one
494 * which would be taken by ip_output(), as this one will loop back to
495 * us. If the interface is p2p as a--->b, then a routing entry exists
496 * If we now send a packet to b (e.g. ping b), this will come down here
497 * gets src=a, dst=b tacked on and would from ip_ouput() sent back to
498 * if_gre.
499 * Goal here is to compute a route to b that is less specific than
500 * a-->b. We know that this one exists as in normal operation we have
501 * at least a default route which matches.
502 */
503
504 void
505 gre_compute_route(struct gre_softc *sc)
506 {
507 struct route *ro;
508 u_int32_t a, b, c;
509
510 ro = &sc->route;
511
512 memset(ro, 0, sizeof(struct route));
513 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
514 ro->ro_dst.sa_family = AF_INET;
515 ro->ro_dst.sa_len = sizeof(ro->ro_dst);
516
517 /*
518 * toggle last bit, so our interface is not found, but a less
519 * specific route. I'd rather like to specify a shorter mask,
520 * but this is not possible. Should work though. XXX
521 * there is a simpler way ...
522 */
523 if ((sc->sc_if.if_flags & IFF_LINK1) == 0) {
524 a = ntohl(sc->g_dst.s_addr);
525 b = a & 0x01;
526 c = a & 0xfffffffe;
527 b = b ^ 0x01;
528 a = b | c;
529 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr
530 = htonl(a);
531 }
532
533 #ifdef DIAGNOSTIC
534 printf("%s: searching a route to %s", sc->sc_if.if_xname,
535 inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
536 #endif
537
538 rtalloc(ro);
539
540 /*
541 * now change it back - else ip_output will just drop
542 * the route and search one to this interface ...
543 */
544 if ((sc->sc_if.if_flags & IFF_LINK1) == 0)
545 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
546
547 #ifdef DIAGNOSTIC
548 printf(", choosing %s with gateway %s", ro->ro_rt->rt_ifp->if_xname,
549 inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
550 printf("\n");
551 #endif
552 }
553
554 /*
555 * do a checksum of a buffer - much like in_cksum, which operates on
556 * mbufs.
557 */
558
559 u_short
560 gre_in_cksum(u_short *p, u_int len)
561 {
562 u_int sum = 0;
563 int nwords = len >> 1;
564
565 while (nwords-- != 0)
566 sum += *p++;
567
568 if (len & 1) {
569 union {
570 u_short w;
571 u_char c[2];
572 } u;
573 u.c[0] = *(u_char *)p;
574 u.c[1] = 0;
575 sum += u.w;
576 }
577
578 /* end-around-carry */
579 sum = (sum >> 16) + (sum & 0xffff);
580 sum += (sum >> 16);
581 return (~sum);
582 }
583 #endif
584