if_gre.c revision 1.17.2.1 1 /* $NetBSD: if_gre.c,v 1.17.2.1 2001/03/05 22:49:54 nathanw Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Encapsulate L3 protocols into IP
41 * See RFC 1701 and 1702 for more details.
42 * If_gre is compatible with Cisco GRE tunnels, so you can
43 * have a NetBSD box as the other end of a tunnel interface of a Cisco
44 * router. See gre(4) for more details.
45 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
46 */
47
48 #include "gre.h"
49 #if NGRE > 0
50
51 #include "opt_inet.h"
52 #include "opt_ns.h"
53 #include "bpfilter.h"
54
55 #include <sys/param.h>
56 #include <sys/malloc.h>
57 #include <sys/mbuf.h>
58 #include <sys/lwp.h>
59 #include <sys/proc.h>
60 #include <sys/protosw.h>
61 #include <sys/socket.h>
62 #include <sys/ioctl.h>
63 #include <sys/queue.h>
64 #if __NetBSD__
65 #include <sys/systm.h>
66 #endif
67
68 #include <machine/cpu.h>
69
70 #include <net/ethertypes.h>
71 #include <net/if.h>
72 #include <net/if_types.h>
73 #include <net/netisr.h>
74 #include <net/route.h>
75
76 #ifdef INET
77 #include <netinet/in.h>
78 #include <netinet/in_systm.h>
79 #include <netinet/in_var.h>
80 #include <netinet/ip.h>
81 #include <netinet/ip_var.h>
82 #else
83 #error "Huh? if_gre without inet?"
84 #endif
85
86 #ifdef NS
87 #include <netns/ns.h>
88 #include <netns/ns_if.h>
89 #endif
90
91 #ifdef NETATALK
92 #include <netatalk/at.h>
93 #include <netatalk/at_var.h>
94 #include <netatalk/at_extern.h>
95 #endif
96
97 #if NBPFILTER > 0
98 #include <sys/time.h>
99 #include <net/bpf.h>
100 #endif
101
102 #include <net/if_gre.h>
103
104 #define GREMTU 1450 /* XXX this is below the standard MTU of
105 1500 Bytes, allowing for headers,
106 but we should possibly do path mtu discovery
107 before changing if state to up to find the
108 correct value */
109 #define LINK_MASK (IFF_LINK0|IFF_LINK1|IFF_LINK2)
110
111 struct gre_softc_head gre_softc_list;
112
113 int gre_clone_create __P((struct if_clone *, int));
114 void gre_clone_destroy __P((struct ifnet *));
115
116 struct if_clone gre_cloner =
117 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
118
119 void gre_compute_route(struct gre_softc *sc);
120 #ifdef DIAGNOSTIC
121 void gre_inet_ntoa(struct in_addr in);
122 #endif
123
124 void greattach __P((int));
125
126 /* ARGSUSED */
127 void
128 greattach(count)
129 int count;
130 {
131
132 LIST_INIT(&gre_softc_list);
133 if_clone_attach(&gre_cloner);
134 }
135
136 int
137 gre_clone_create(ifc, unit)
138 struct if_clone *ifc;
139 int unit;
140 {
141 struct gre_softc *sc;
142
143 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
144 memset(sc, 0, sizeof(struct gre_softc));
145
146 sprintf(sc->sc_if.if_xname, "%s%d", ifc->ifc_name, unit);
147 sc->sc_if.if_softc = sc;
148 sc->sc_if.if_type = IFT_OTHER;
149 sc->sc_if.if_addrlen = 4;
150 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
151 sc->sc_if.if_dlt = DLT_NULL;
152 sc->sc_if.if_mtu = GREMTU;
153 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
154 sc->sc_if.if_output = gre_output;
155 sc->sc_if.if_ioctl = gre_ioctl;
156 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
157 sc->g_proto = IPPROTO_GRE;
158 if_attach(&sc->sc_if);
159 if_alloc_sadl(&sc->sc_if);
160 #if NBPFILTER > 0
161 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
162 #endif
163 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
164 return (0);
165 }
166
167 void
168 gre_clone_destroy(ifp)
169 struct ifnet *ifp;
170 {
171 struct gre_softc *sc = ifp->if_softc;
172
173 LIST_REMOVE(sc, sc_list);
174 #if NBPFILTER > 0
175 bpfdetach(ifp);
176 #endif
177 if_detach(ifp);
178 free(sc, M_DEVBUF);
179 }
180
181 /*
182 * The output routine. Takes a packet and encapsulates it in the protocol
183 * given by sc->g_proto. See also RFC 1701 and RFC 2004
184 */
185
186 #if 0
187 struct ip ip_h;
188 #endif
189 struct mobile_h mob_h;
190
191 int
192 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
193 struct rtentry *rt)
194 {
195 int error = 0;
196 struct gre_softc *sc = ifp->if_softc;
197 struct greip *gh;
198 struct ip *inp;
199 u_char ttl, osrc;
200 u_short etype = 0;
201
202
203 gh = NULL;
204 inp = NULL;
205 osrc = 0;
206
207 #if NBPFILTER >0
208 if (ifp->if_bpf) {
209 /* see comment of other if_foo.c files */
210 struct mbuf m0;
211 u_int32_t af = dst->sa_family;
212
213 m0.m_next = m;
214 m0.m_len = 4;
215 m0.m_data = (char *)⁡
216
217 bpf_mtap(ifp->if_bpf, &m0);
218 }
219 #endif
220
221 ttl = 255;
222
223 if (sc->g_proto == IPPROTO_MOBILE) {
224 if (dst->sa_family == AF_INET) {
225 struct mbuf *m0;
226 int msiz;
227
228 inp = mtod(m, struct ip *);
229
230 memset(&mob_h, 0, MOB_H_SIZ_L);
231 mob_h.proto = (inp->ip_p) << 8;
232 mob_h.odst = inp->ip_dst.s_addr;
233 inp->ip_dst.s_addr = sc->g_dst.s_addr;
234
235 /*
236 * If the packet comes from our host, we only change
237 * the destination address in the IP header.
238 * Else we also need to save and change the source
239 */
240 if (in_hosteq(inp->ip_src, sc->g_src)) {
241 msiz = MOB_H_SIZ_S;
242 } else {
243 mob_h.proto |= MOB_H_SBIT;
244 mob_h.osrc = inp->ip_src.s_addr;
245 inp->ip_src.s_addr = sc->g_src.s_addr;
246 msiz = MOB_H_SIZ_L;
247 }
248 HTONS(mob_h.proto);
249 mob_h.hcrc = gre_in_cksum((u_short *)&mob_h, msiz);
250
251 if ((m->m_data - msiz) < m->m_pktdat) {
252 /* need new mbuf */
253 MGETHDR(m0, M_DONTWAIT, MT_HEADER);
254 if (m0 == NULL) {
255 IF_DROP(&ifp->if_snd);
256 m_freem(m);
257 return (ENOBUFS);
258 }
259 m0->m_next = m;
260 m->m_data += sizeof(struct ip);
261 m->m_len -= sizeof(struct ip);
262 m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
263 m0->m_len = msiz + sizeof(struct ip);
264 m0->m_data += max_linkhdr;
265 memcpy(mtod(m0, caddr_t), (caddr_t)inp,
266 sizeof(struct ip));
267 m = m0;
268 } else { /* we have some spave left in the old one */
269 m->m_data -= msiz;
270 m->m_len += msiz;
271 m->m_pkthdr.len += msiz;
272 memmove(mtod(m, caddr_t), inp,
273 sizeof(struct ip));
274 }
275 inp=mtod(m, struct ip *);
276 memcpy((caddr_t)(inp + 1), &mob_h, (unsigned)msiz);
277 NTOHS(inp->ip_len);
278 inp->ip_len += msiz;
279 } else { /* AF_INET */
280 IF_DROP(&ifp->if_snd);
281 m_freem(m);
282 return (EINVAL);
283 }
284 } else if (sc->g_proto == IPPROTO_GRE) {
285 switch(dst->sa_family) {
286 case AF_INET:
287 inp = mtod(m, struct ip *);
288 ttl = inp->ip_ttl;
289 etype = ETHERTYPE_IP;
290 break;
291 #ifdef NETATALK
292 case AF_APPLETALK:
293 etype = ETHERTYPE_ATALK;
294 break;
295 #endif
296 #ifdef NS
297 case AF_NS:
298 etype = ETHERTYPE_NS;
299 break;
300 #endif
301 default:
302 IF_DROP(&ifp->if_snd);
303 m_freem(m);
304 return (EAFNOSUPPORT);
305 }
306 M_PREPEND(m, sizeof(struct greip), M_DONTWAIT);
307 } else {
308 error = EINVAL;
309 IF_DROP(&ifp->if_snd);
310 m_freem(m);
311 return (error);
312 }
313
314
315 if (m == NULL) {
316 IF_DROP(&ifp->if_snd);
317 return (ENOBUFS);
318 }
319
320 gh = mtod(m, struct greip *);
321 if (sc->g_proto == IPPROTO_GRE) {
322 /* we don't have any GRE flags for now */
323
324 memset((void *)&gh->gi_g, 0, sizeof(struct gre_h));
325 gh->gi_ptype = htons(etype);
326 }
327
328 gh->gi_pr = sc->g_proto;
329 if (sc->g_proto != IPPROTO_MOBILE) {
330 gh->gi_src = sc->g_src;
331 gh->gi_dst = sc->g_dst;
332 ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
333 ((struct ip*)gh)->ip_ttl = ttl;
334 ((struct ip*)gh)->ip_tos = inp->ip_tos;
335 gh->gi_len = m->m_pkthdr.len;
336 }
337
338 ifp->if_opackets++;
339 ifp->if_obytes += m->m_pkthdr.len;
340 /* send it off */
341 error = ip_output(m, NULL, &sc->route, 0, NULL);
342 if (error)
343 ifp->if_oerrors++;
344 return (error);
345
346 }
347
348 int
349 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
350 {
351 struct proc *p = curproc->l_proc; /* XXX */
352 struct ifaddr *ifa = (struct ifaddr *)data;
353 struct ifreq *ifr = (struct ifreq *)data;
354 struct in_ifaddr *ia = (struct in_ifaddr *)data;
355 struct gre_softc *sc = ifp->if_softc;
356 int s;
357 struct sockaddr_in si;
358 struct sockaddr *sa = NULL;
359 int error;
360
361 error = 0;
362
363 s = splimp();
364 switch(cmd) {
365 case SIOCSIFADDR:
366 case SIOCSIFDSTADDR:
367 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
368 break;
369 /*
370 * set tunnel endpoints in case that we "only"
371 * have ip over ip encapsulation. This allows to
372 * set tunnel endpoints with ifconfig.
373 */
374 if (ifa->ifa_addr->sa_family == AF_INET) {
375 sa = ifa->ifa_addr;
376 sc->g_src = (satosin(sa))->sin_addr;
377 sc->g_dst = ia->ia_dstaddr.sin_addr;
378 if ((sc->g_src.s_addr != INADDR_ANY) &&
379 (sc->g_dst.s_addr != INADDR_ANY)) {
380 if (sc->route.ro_rt != 0) /* free old route */
381 RTFREE(sc->route.ro_rt);
382 gre_compute_route(sc);
383 ifp->if_flags |= IFF_UP;
384 }
385 }
386 break;
387 case SIOCSIFFLAGS:
388 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
389 break;
390 if ((sc->g_dst.s_addr == INADDR_ANY) ||
391 (sc->g_src.s_addr == INADDR_ANY))
392 ifp->if_flags &= ~IFF_UP;
393
394 switch(ifr->ifr_flags & LINK_MASK) {
395 case IFF_LINK0:
396 sc->g_proto = IPPROTO_GRE;
397 ifp->if_flags |= IFF_LINK0;
398 ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
399 break;
400 case IFF_LINK2:
401 sc->g_proto = IPPROTO_MOBILE;
402 ifp->if_flags |= IFF_LINK2;
403 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK1);
404 break;
405 }
406 break;
407 case SIOCSIFMTU:
408 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
409 break;
410 if (ifr->ifr_mtu > GREMTU || ifr->ifr_mtu < 576) {
411 error = EINVAL;
412 break;
413 }
414 ifp->if_mtu = ifr->ifr_mtu;
415 break;
416 case SIOCGIFMTU:
417 ifr->ifr_mtu = sc->sc_if.if_mtu;
418 break;
419 case SIOCADDMULTI:
420 case SIOCDELMULTI:
421 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
422 break;
423 if (ifr == 0) {
424 error = EAFNOSUPPORT;
425 break;
426 }
427 switch (ifr->ifr_addr.sa_family) {
428 #ifdef INET
429 case AF_INET:
430 break;
431 #endif
432 default:
433 error = EAFNOSUPPORT;
434 break;
435 }
436 break;
437 case GRESPROTO:
438 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
439 break;
440 sc->g_proto = ifr->ifr_flags;
441 switch (sc->g_proto) {
442 case IPPROTO_GRE :
443 ifp->if_flags |= IFF_LINK0;
444 ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
445 break;
446 case IPPROTO_MOBILE :
447 ifp->if_flags |= IFF_LINK2;
448 ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
449 break;
450 default:
451 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK1|IFF_LINK2);
452 }
453 break;
454 case GREGPROTO:
455 ifr->ifr_flags = sc->g_proto;
456 break;
457 case GRESADDRS:
458 case GRESADDRD:
459 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
460 break;
461 /*
462 * set tunnel endpoints, compute a less specific route
463 * to the remote end and mark if as up
464 */
465 sa = &ifr->ifr_addr;
466 if (cmd == GRESADDRS )
467 sc->g_src = (satosin(sa))->sin_addr;
468 if (cmd == GRESADDRD )
469 sc->g_dst = (satosin(sa))->sin_addr;
470 if ((sc->g_src.s_addr != INADDR_ANY) &&
471 (sc->g_dst.s_addr != INADDR_ANY)) {
472 if (sc->route.ro_rt != 0) /* free old route */
473 RTFREE(sc->route.ro_rt);
474 gre_compute_route(sc);
475 ifp->if_flags |= IFF_UP;
476 }
477 break;
478 case GREGADDRS:
479 si.sin_addr.s_addr = sc->g_src.s_addr;
480 sa = sintosa(&si);
481 ifr->ifr_addr = *sa;
482 break;
483 case GREGADDRD:
484 si.sin_addr.s_addr = sc->g_dst.s_addr;
485 sa = sintosa(&si);
486 ifr->ifr_addr = *sa;
487 break;
488 default:
489 error = EINVAL;
490 }
491
492 splx(s);
493 return (error);
494 }
495
496 /*
497 * computes a route to our destination that is not the one
498 * which would be taken by ip_output(), as this one will loop back to
499 * us. If the interface is p2p as a--->b, then a routing entry exists
500 * If we now send a packet to b (e.g. ping b), this will come down here
501 * gets src=a, dst=b tacked on and would from ip_ouput() sent back to
502 * if_gre.
503 * Goal here is to compute a route to b that is less specific than
504 * a-->b. We know that this one exists as in normal operation we have
505 * at least a default route which matches.
506 */
507
508 void
509 gre_compute_route(struct gre_softc *sc)
510 {
511 struct route *ro;
512 u_int32_t a, b, c;
513
514 ro = &sc->route;
515
516 memset(ro, 0, sizeof(struct route));
517 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
518 ro->ro_dst.sa_family = AF_INET;
519 ro->ro_dst.sa_len = sizeof(ro->ro_dst);
520
521 /*
522 * toggle last bit, so our interface is not found, but a less
523 * specific route. I'd rather like to specify a shorter mask,
524 * but this is not possible. Should work though. XXX
525 * there is a simpler way ...
526 */
527 if ((sc->sc_if.if_flags & IFF_LINK1) == 0) {
528 a = ntohl(sc->g_dst.s_addr);
529 b = a & 0x01;
530 c = a & 0xfffffffe;
531 b = b ^ 0x01;
532 a = b | c;
533 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr
534 = htonl(a);
535 }
536
537 #ifdef DIAGNOSTIC
538 printf("%s: searching a route to ", sc->sc_if.if_xname);
539 gre_inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr);
540 #endif
541
542 rtalloc(ro);
543
544 /*
545 * now change it back - else ip_output will just drop
546 * the route and search one to this interface ...
547 */
548 if ((sc->sc_if.if_flags & IFF_LINK1) == 0)
549 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
550
551 #ifdef DIAGNOSTIC
552 printf(", choosing %s with gateway ",ro->ro_rt->rt_ifp->if_xname);
553 gre_inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr);
554 printf("\n");
555 #endif
556 }
557
558 /*
559 * do a checksum of a buffer - much like in_cksum, which operates on
560 * mbufs.
561 */
562
563 u_short
564 gre_in_cksum(u_short *p, u_int len)
565 {
566 u_int sum = 0;
567 int nwords = len >> 1;
568
569 while (nwords-- != 0)
570 sum += *p++;
571
572 if (len & 1) {
573 union {
574 u_short w;
575 u_char c[2];
576 } u;
577 u.c[0] = *(u_char *)p;
578 u.c[1] = 0;
579 sum += u.w;
580 }
581
582 /* end-around-carry */
583 sum = (sum >> 16) + (sum & 0xffff);
584 sum += (sum >> 16);
585 return (~sum);
586 }
587
588
589 /* while testing ... */
590 #ifdef DIAGNOSTIC
591 void
592 gre_inet_ntoa(struct in_addr in)
593 {
594 char *p;
595
596 p = (char *)∈
597 #define UC(b) (((int)b)&0xff)
598 printf("%d.%d.%d.%d", UC(p[0]), UC(p[1]), UC(p[2]), UC(p[3]));
599 }
600
601 #endif
602 #endif
603
604