if_gre.c revision 1.64 1 /* $NetBSD: if_gre.c,v 1.64 2006/09/03 06:10:06 dyoung Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * IPv6-over-GRE contributed by Gert Doering <gert (at) greenie.muc.de>
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*
42 * Encapsulate L3 protocols into IP
43 * See RFC 1701 and 1702 for more details.
44 * If_gre is compatible with Cisco GRE tunnels, so you can
45 * have a NetBSD box as the other end of a tunnel interface of a Cisco
46 * router. See gre(4) for more details.
47 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
48 */
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.64 2006/09/03 06:10:06 dyoung Exp $");
52
53 #include "opt_gre.h"
54 #include "opt_inet.h"
55 #include "opt_ns.h"
56 #include "bpfilter.h"
57
58 #ifdef INET
59 #include <sys/param.h>
60 #include <sys/file.h>
61 #include <sys/filedesc.h>
62 #include <sys/malloc.h>
63 #include <sys/mbuf.h>
64 #include <sys/proc.h>
65 #include <sys/protosw.h>
66 #include <sys/socket.h>
67 #include <sys/socketvar.h>
68 #include <sys/ioctl.h>
69 #include <sys/queue.h>
70 #if __NetBSD__
71 #include <sys/systm.h>
72 #include <sys/sysctl.h>
73 #include <sys/kauth.h>
74 #endif
75
76 #include <sys/kthread.h>
77
78 #include <machine/cpu.h>
79
80 #include <net/ethertypes.h>
81 #include <net/if.h>
82 #include <net/if_types.h>
83 #include <net/netisr.h>
84 #include <net/route.h>
85
86 #ifdef INET
87 #include <netinet/in.h>
88 #include <netinet/in_systm.h>
89 #include <netinet/in_var.h>
90 #include <netinet/ip.h>
91 #include <netinet/ip_var.h>
92 #else
93 #error "Huh? if_gre without inet?"
94 #endif
95
96 #ifdef NS
97 #include <netns/ns.h>
98 #include <netns/ns_if.h>
99 #endif
100
101 #ifdef NETATALK
102 #include <netatalk/at.h>
103 #include <netatalk/at_var.h>
104 #include <netatalk/at_extern.h>
105 #endif
106
107 #if NBPFILTER > 0
108 #include <sys/time.h>
109 #include <net/bpf.h>
110 #endif
111
112 #include <net/if_gre.h>
113
114 /*
115 * It is not easy to calculate the right value for a GRE MTU.
116 * We leave this task to the admin and use the same default that
117 * other vendors use.
118 */
119 #define GREMTU 1476
120
121 #ifdef GRE_DEBUG
122 #define GRE_DPRINTF(__sc, __fmt, ...) \
123 do { \
124 if (((__sc)->sc_if.if_flags & IFF_DEBUG) != 0) \
125 printf(__fmt, __VA_ARGS__); \
126 } while (/*CONSTCOND*/0)
127 #else
128 #define GRE_DPRINTF(__sc, __fmt, ...) do { } while (/*CONSTCOND*/0)
129 #endif /* GRE_DEBUG */
130
131 struct gre_softc_head gre_softc_list;
132 int ip_gre_ttl = GRE_TTL;
133
134 static int gre_clone_create(struct if_clone *, int);
135 static int gre_clone_destroy(struct ifnet *);
136
137 static struct if_clone gre_cloner =
138 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
139
140 static int gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
141 struct rtentry *);
142 static int gre_ioctl(struct ifnet *, u_long, caddr_t);
143
144 static int gre_compute_route(struct gre_softc *sc);
145
146 static int gre_getsockname(struct socket *, struct mbuf *, struct lwp *);
147 static int gre_getpeername(struct socket *, struct mbuf *, struct lwp *);
148 static int gre_getnames(struct socket *, struct lwp *, struct sockaddr_in *,
149 struct sockaddr_in *);
150
151 static void
152 gre_stop(int *running)
153 {
154 *running = 0;
155 wakeup(running);
156 }
157
158 static void
159 gre_join(int *running)
160 {
161 int s;
162
163 s = splnet();
164 while (*running != 0) {
165 splx(s);
166 tsleep(running, PSOCK, "grejoin", 0);
167 s = splnet();
168 }
169 splx(s);
170 }
171
172 static void
173 gre_wakeup(struct gre_softc *sc)
174 {
175 GRE_DPRINTF(sc, "%s: enter\n", __func__);
176 sc->sc_waitchan = 1;
177 wakeup(&sc->sc_waitchan);
178 }
179
180 static int
181 gre_clone_create(struct if_clone *ifc, int unit)
182 {
183 struct gre_softc *sc;
184
185 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
186 memset(sc, 0, sizeof(struct gre_softc));
187
188 snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d",
189 ifc->ifc_name, unit);
190 sc->sc_if.if_softc = sc;
191 sc->sc_if.if_type = IFT_TUNNEL;
192 sc->sc_if.if_addrlen = 0;
193 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
194 sc->sc_if.if_dlt = DLT_NULL;
195 sc->sc_if.if_mtu = GREMTU;
196 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
197 sc->sc_if.if_output = gre_output;
198 sc->sc_if.if_ioctl = gre_ioctl;
199 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
200 sc->g_dstport = sc->g_srcport = 0;
201 sc->g_proto = IPPROTO_GRE;
202 sc->sc_snd.ifq_maxlen = 256;
203 sc->sc_if.if_flags |= IFF_LINK0;
204 if_attach(&sc->sc_if);
205 if_alloc_sadl(&sc->sc_if);
206 #if NBPFILTER > 0
207 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
208 #endif
209 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
210 return (0);
211 }
212
213 static int
214 gre_clone_destroy(struct ifnet *ifp)
215 {
216 struct gre_softc *sc = ifp->if_softc;
217
218 LIST_REMOVE(sc, sc_list);
219 #if NBPFILTER > 0
220 bpfdetach(ifp);
221 #endif
222 if_detach(ifp);
223 gre_wakeup(sc);
224 gre_join(&sc->sc_thread);
225 if (sc->sc_fp != NULL) {
226 closef(sc->sc_fp, curlwp);
227 sc->sc_fp = NULL;
228 }
229 free(sc, M_DEVBUF);
230
231 return (0);
232 }
233
234 static void
235 gre_receive(struct socket *so, caddr_t arg, int waitflag)
236 {
237 struct gre_softc *sc = (struct gre_softc *)arg;
238
239 GRE_DPRINTF(sc, "%s: enter\n", __func__);
240
241 gre_wakeup(sc);
242 }
243
244 static void
245 gre_upcall_add(struct socket *so, caddr_t arg)
246 {
247 /* XXX What if the kernel already set an upcall? */
248 so->so_upcallarg = arg;
249 so->so_upcall = gre_receive;
250 so->so_rcv.sb_flags |= SB_UPCALL;
251 }
252
253 static void
254 gre_upcall_remove(struct socket *so)
255 {
256 /* XXX What if the kernel already set an upcall? */
257 so->so_rcv.sb_flags &= ~SB_UPCALL;
258 so->so_upcallarg = NULL;
259 so->so_upcall = NULL;
260 }
261
262 static void
263 gre_sodestroy(struct socket **sop)
264 {
265 gre_upcall_remove(*sop);
266 soshutdown(*sop, SHUT_RDWR);
267 soclose(*sop);
268 *sop = NULL;
269 }
270
271 static struct mbuf *
272 gre_getsockmbuf(struct socket *so)
273 {
274 struct mbuf *m;
275
276 m = m_get(M_WAIT, MT_SONAME);
277 if (m != NULL)
278 MCLAIM(m, so->so_mowner);
279 return m;
280 }
281
282 static int
283 gre_socreate1(struct gre_softc *sc, struct lwp *l, struct gre_soparm *sp,
284 struct socket **sop)
285 {
286 int rc;
287 struct mbuf *m;
288 struct sockaddr_in *sin;
289 struct socket *so;
290
291 GRE_DPRINTF(sc, "%s: enter\n", __func__);
292 rc = socreate(AF_INET, sop, SOCK_DGRAM, IPPROTO_UDP, l);
293 if (rc != 0) {
294 GRE_DPRINTF(sc, "%s: socreate failed\n", __func__);
295 return rc;
296 }
297
298 so = *sop;
299
300 gre_upcall_add(so, (caddr_t)sc);
301 if ((m = gre_getsockmbuf(so)) == NULL) {
302 rc = ENOBUFS;
303 goto out;
304 }
305 sin = mtod(m, struct sockaddr_in *);
306 sin->sin_len = m->m_len = sizeof(struct sockaddr_in);
307 sin->sin_family = AF_INET;
308 sin->sin_addr = sc->g_src;
309 sin->sin_port = sc->g_srcport;
310
311 GRE_DPRINTF(sc, "%s: bind 0x%08" PRIx32 " port %d\n", __func__,
312 sin->sin_addr.s_addr, ntohs(sin->sin_port));
313 if ((rc = sobind(so, m, l)) != 0) {
314 GRE_DPRINTF(sc, "%s: sobind failed\n", __func__);
315 goto out;
316 }
317
318 if (sc->g_srcport == 0) {
319 if (gre_getsockname(so, m, l) != 0) {
320 GRE_DPRINTF(sc, "%s: gre_getsockname failed\n",
321 __func__);
322 goto out;
323 }
324 sc->g_srcport = sin->sin_port;
325 }
326
327 sin->sin_addr = sc->g_dst;
328 sin->sin_port = sc->g_dstport;
329
330 rc = soconnect(so, m, l);
331
332 if (rc != 0) {
333 GRE_DPRINTF(sc, "%s: soconnect failed\n", __func__);
334 goto out;
335 }
336
337 *mtod(m, int *) = ip_gre_ttl;
338 rc = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, IPPROTO_IP, IP_TTL,
339 &m);
340 m = NULL;
341 if (rc != 0) {
342 printf("%s: setopt ttl failed\n", __func__);
343 rc = 0;
344 }
345 out:
346 m_freem(m);
347
348 if (rc != 0)
349 gre_sodestroy(sop);
350 else
351 *sp = sc->sc_soparm;
352
353 return rc;
354 }
355
356 static void
357 gre_thread1(struct gre_softc *sc, struct lwp *l)
358 {
359 int flags, rc, s;
360 const struct gre_h *gh;
361 struct ifnet *ifp = &sc->sc_if;
362 struct mbuf *m;
363 struct socket *so = NULL;
364 struct uio uio;
365 struct gre_soparm sp;
366
367 GRE_DPRINTF(sc, "%s: enter\n", __func__);
368 s = splnet();
369
370 sc->sc_waitchan = 1;
371
372 memset(&sp, 0, sizeof(sp));
373 memset(&uio, 0, sizeof(uio));
374
375 ifp->if_flags |= IFF_RUNNING;
376
377 for (;;) {
378 while (sc->sc_waitchan == 0) {
379 splx(s);
380 GRE_DPRINTF(sc, "%s: sleeping\n", __func__);
381 tsleep(&sc->sc_waitchan, PSOCK, "grewait", 0);
382 s = splnet();
383 }
384 sc->sc_waitchan = 0;
385 GRE_DPRINTF(sc, "%s: awake\n", __func__);
386 if ((ifp->if_flags & IFF_UP) != IFF_UP) {
387 GRE_DPRINTF(sc, "%s: not up & running; exiting\n",
388 __func__);
389 break;
390 }
391 if (sc->g_proto != IPPROTO_UDP) {
392 GRE_DPRINTF(sc, "%s: not udp; exiting\n", __func__);
393 break;
394 }
395 /* XXX optimize */
396 if (memcmp(&sp, &sc->sc_soparm, sizeof(sp)) != 0) {
397 GRE_DPRINTF(sc, "%s: parameters changed\n", __func__);
398
399 if (sp.sp_fp != NULL) {
400 FILE_UNUSE(sp.sp_fp, NULL);
401 sp.sp_fp = NULL;
402 so = NULL;
403 } else if (so != NULL)
404 gre_sodestroy(&so);
405
406 if (sc->sc_fp != NULL) {
407 so = (struct socket *)sc->sc_fp->f_data;
408 gre_upcall_add(so, (caddr_t)sc);
409 sp = sc->sc_soparm;
410 FILE_USE(sp.sp_fp);
411 } else if (gre_socreate1(sc, l, &sp, &so) != 0)
412 goto out;
413 }
414 for (;;) {
415 flags = MSG_DONTWAIT;
416 uio.uio_resid = 1000000;
417 rc = (*so->so_receive)(so, NULL, &uio, &m, NULL,
418 &flags);
419 /* TBD Back off if ECONNREFUSED (indicates
420 * ICMP Port Unreachable)?
421 */
422 if (rc == EWOULDBLOCK) {
423 GRE_DPRINTF(sc, "%s: so_receive EWOULDBLOCK\n",
424 __func__);
425 break;
426 } else if (rc != 0 || m == NULL) {
427 GRE_DPRINTF(sc, "%s: rc %d m %p\n",
428 ifp->if_xname, rc, (void *)m);
429 continue;
430 } else
431 GRE_DPRINTF(sc, "%s: so_receive ok\n",
432 __func__);
433 if (m->m_len < sizeof(*gh) &&
434 (m = m_pullup(m, sizeof(*gh))) == NULL) {
435 GRE_DPRINTF(sc, "%s: m_pullup failed\n",
436 __func__);
437 continue;
438 }
439 gh = mtod(m, const struct gre_h *);
440
441 if (gre_input3(sc, m, 0, IPPROTO_GRE, gh) == 0) {
442 GRE_DPRINTF(sc, "%s: dropping unsupported\n",
443 __func__);
444 ifp->if_ierrors++;
445 m_freem(m);
446 }
447 }
448 for (;;) {
449 IF_DEQUEUE(&sc->sc_snd, m);
450 if (m == NULL)
451 break;
452 GRE_DPRINTF(sc, "%s: dequeue\n", __func__);
453 if ((so->so_state & SS_ISCONNECTED) == 0) {
454 GRE_DPRINTF(sc, "%s: not connected\n",
455 __func__);
456 m_freem(m);
457 continue;
458 }
459 rc = (*so->so_send)(so, NULL, NULL, m, NULL, 0, l);
460 /* XXX handle ENOBUFS? */
461 if (rc != 0)
462 GRE_DPRINTF(sc, "%s: so_send failed\n",
463 __func__);
464 }
465 /* Give the software interrupt queues a chance to
466 * run, or else when I send a ping from gre0 to gre1 on
467 * the same host, gre0 will not wake for the reply.
468 */
469 splx(s);
470 s = splnet();
471 }
472 if (sp.sp_fp != NULL) {
473 GRE_DPRINTF(sc, "%s: removing upcall\n", __func__);
474 gre_upcall_remove(so);
475 FILE_UNUSE(sp.sp_fp, NULL);
476 sp.sp_fp = NULL;
477 } else
478 gre_sodestroy(&so);
479 out:
480 GRE_DPRINTF(sc, "%s: stopping\n", __func__);
481 if (sc->g_proto == IPPROTO_UDP)
482 ifp->if_flags &= ~IFF_RUNNING;
483 while (!IF_IS_EMPTY(&sc->sc_snd)) {
484 IF_DEQUEUE(&sc->sc_snd, m);
485 m_freem(m);
486 }
487 gre_stop(&sc->sc_thread);
488 /* must not touch sc after this! */
489 GRE_DPRINTF(sc, "%s: restore ipl\n", __func__);
490 splx(s);
491 }
492
493 static void
494 gre_thread(void *arg)
495 {
496 struct gre_softc *sc = (struct gre_softc *)arg;
497
498 gre_thread1(sc, curlwp);
499 /* must not touch sc after this! */
500 kthread_exit(0);
501 }
502
503 int
504 gre_input3(struct gre_softc *sc, struct mbuf *m, int hlen, u_char proto,
505 const struct gre_h *gh)
506 {
507 u_int16_t flags;
508 #if NBPFILTER > 0
509 u_int32_t af = AF_INET; /* af passed to BPF tap */
510 #endif
511 int s, isr;
512 struct ifqueue *ifq;
513
514 sc->sc_if.if_ipackets++;
515 sc->sc_if.if_ibytes += m->m_pkthdr.len;
516
517 switch (proto) {
518 case IPPROTO_GRE:
519 hlen += sizeof(struct gre_h);
520
521 /* process GRE flags as packet can be of variable len */
522 flags = ntohs(gh->flags);
523
524 /* Checksum & Offset are present */
525 if ((flags & GRE_CP) | (flags & GRE_RP))
526 hlen += 4;
527 /* We don't support routing fields (variable length) */
528 if (flags & GRE_RP)
529 return (0);
530 if (flags & GRE_KP)
531 hlen += 4;
532 if (flags & GRE_SP)
533 hlen += 4;
534
535 switch (ntohs(gh->ptype)) { /* ethertypes */
536 case ETHERTYPE_IP: /* shouldn't need a schednetisr(), as */
537 ifq = &ipintrq; /* we are in ip_input */
538 isr = NETISR_IP;
539 break;
540 #ifdef NS
541 case ETHERTYPE_NS:
542 ifq = &nsintrq;
543 isr = NETISR_NS;
544 #if NBPFILTER > 0
545 af = AF_NS;
546 #endif
547 break;
548 #endif
549 #ifdef NETATALK
550 case ETHERTYPE_ATALK:
551 ifq = &atintrq1;
552 isr = NETISR_ATALK;
553 #if NBPFILTER > 0
554 af = AF_APPLETALK;
555 #endif
556 break;
557 #endif
558 #ifdef INET6
559 case ETHERTYPE_IPV6:
560 GRE_DPRINTF(sc, "%s: IPv6 packet\n", __func__);
561 ifq = &ip6intrq;
562 isr = NETISR_IPV6;
563 #if NBPFILTER > 0
564 af = AF_INET6;
565 #endif
566 break;
567 #endif
568 default: /* others not yet supported */
569 printf("%s: unhandled ethertype 0x%04x\n", __func__,
570 ntohs(gh->ptype));
571 return (0);
572 }
573 break;
574 default:
575 /* others not yet supported */
576 return (0);
577 }
578
579 if (hlen > m->m_pkthdr.len) {
580 m_freem(m);
581 sc->sc_if.if_ierrors++;
582 return (EINVAL);
583 }
584 m_adj(m, hlen);
585
586 #if NBPFILTER > 0
587 if (sc->sc_if.if_bpf != NULL)
588 bpf_mtap_af(sc->sc_if.if_bpf, af, m);
589 #endif /*NBPFILTER > 0*/
590
591 m->m_pkthdr.rcvif = &sc->sc_if;
592
593 s = splnet(); /* possible */
594 if (IF_QFULL(ifq)) {
595 IF_DROP(ifq);
596 m_freem(m);
597 } else {
598 IF_ENQUEUE(ifq, m);
599 }
600 /* we need schednetisr since the address family may change */
601 schednetisr(isr);
602 splx(s);
603
604 return (1); /* packet is done, no further processing needed */
605 }
606
607 /*
608 * The output routine. Takes a packet and encapsulates it in the protocol
609 * given by sc->g_proto. See also RFC 1701 and RFC 2004
610 */
611 static int
612 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
613 struct rtentry *rt)
614 {
615 int error = 0, hlen;
616 struct gre_softc *sc = ifp->if_softc;
617 struct greip *gi;
618 struct gre_h *gh;
619 struct ip *eip, *ip;
620 u_int8_t ip_tos = 0;
621 u_int16_t etype = 0;
622 struct mobile_h mob_h;
623
624 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == 0 ||
625 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
626 m_freem(m);
627 error = ENETDOWN;
628 goto end;
629 }
630
631 gi = NULL;
632 ip = NULL;
633
634 #if NBPFILTER >0
635 if (ifp->if_bpf)
636 bpf_mtap_af(ifp->if_bpf, dst->sa_family, m);
637 #endif
638
639 m->m_flags &= ~(M_BCAST|M_MCAST);
640
641 switch (sc->g_proto) {
642 case IPPROTO_MOBILE:
643 if (dst->sa_family == AF_INET) {
644 int msiz;
645
646 if (M_UNWRITABLE(m, sizeof(*ip)) &&
647 (m = m_pullup(m, sizeof(*ip))) == NULL) {
648 error = ENOBUFS;
649 goto end;
650 }
651 ip = mtod(m, struct ip *);
652
653 memset(&mob_h, 0, MOB_H_SIZ_L);
654 mob_h.proto = (ip->ip_p) << 8;
655 mob_h.odst = ip->ip_dst.s_addr;
656 ip->ip_dst.s_addr = sc->g_dst.s_addr;
657
658 /*
659 * If the packet comes from our host, we only change
660 * the destination address in the IP header.
661 * Else we also need to save and change the source
662 */
663 if (in_hosteq(ip->ip_src, sc->g_src)) {
664 msiz = MOB_H_SIZ_S;
665 } else {
666 mob_h.proto |= MOB_H_SBIT;
667 mob_h.osrc = ip->ip_src.s_addr;
668 ip->ip_src.s_addr = sc->g_src.s_addr;
669 msiz = MOB_H_SIZ_L;
670 }
671 HTONS(mob_h.proto);
672 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
673
674 M_PREPEND(m, msiz, M_DONTWAIT);
675 if (m == NULL) {
676 error = ENOBUFS;
677 goto end;
678 }
679 /* XXX Assuming that ip does not dangle after
680 * M_PREPEND. In practice, that's true, but
681 * that's in M_PREPEND's contract.
682 */
683 memmove(mtod(m, caddr_t), ip, sizeof(*ip));
684 ip = mtod(m, struct ip *);
685 memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
686 ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
687 } else { /* AF_INET */
688 IF_DROP(&ifp->if_snd);
689 m_freem(m);
690 error = EINVAL;
691 goto end;
692 }
693 break;
694 case IPPROTO_UDP:
695 case IPPROTO_GRE:
696 GRE_DPRINTF(sc, "%s: dst->sa_family=%d\n", __func__,
697 dst->sa_family);
698 switch (dst->sa_family) {
699 case AF_INET:
700 ip = mtod(m, struct ip *);
701 ip_tos = ip->ip_tos;
702 etype = ETHERTYPE_IP;
703 break;
704 #ifdef NETATALK
705 case AF_APPLETALK:
706 etype = ETHERTYPE_ATALK;
707 break;
708 #endif
709 #ifdef NS
710 case AF_NS:
711 etype = ETHERTYPE_NS;
712 break;
713 #endif
714 #ifdef INET6
715 case AF_INET6:
716 etype = ETHERTYPE_IPV6;
717 break;
718 #endif
719 default:
720 IF_DROP(&ifp->if_snd);
721 m_freem(m);
722 error = EAFNOSUPPORT;
723 goto end;
724 }
725 break;
726 default:
727 IF_DROP(&ifp->if_snd);
728 m_freem(m);
729 error = EINVAL;
730 goto end;
731 }
732
733 switch (sc->g_proto) {
734 case IPPROTO_GRE:
735 hlen = sizeof(struct greip);
736 break;
737 case IPPROTO_UDP:
738 hlen = sizeof(struct gre_h);
739 break;
740 default:
741 hlen = 0;
742 break;
743 }
744
745 M_PREPEND(m, hlen, M_DONTWAIT);
746
747 if (m == NULL) {
748 IF_DROP(&ifp->if_snd);
749 error = ENOBUFS;
750 goto end;
751 }
752
753 switch (sc->g_proto) {
754 case IPPROTO_UDP:
755 gh = mtod(m, struct gre_h *);
756 memset(gh, 0, sizeof(*gh));
757 gh->ptype = htons(etype);
758 /* XXX Need to handle IP ToS. Look at how I handle IP TTL. */
759 break;
760 case IPPROTO_GRE:
761 gi = mtod(m, struct greip *);
762 gh = &gi->gi_g;
763 eip = &gi->gi_i;
764 /* we don't have any GRE flags for now */
765 memset(gh, 0, sizeof(*gh));
766 gh->ptype = htons(etype);
767 eip->ip_src = sc->g_src;
768 eip->ip_dst = sc->g_dst;
769 eip->ip_hl = (sizeof(struct ip)) >> 2;
770 eip->ip_ttl = ip_gre_ttl;
771 eip->ip_tos = ip_tos;
772 eip->ip_len = htons(m->m_pkthdr.len);
773 eip->ip_p = sc->g_proto;
774 break;
775 case IPPROTO_MOBILE:
776 eip = mtod(m, struct ip *);
777 eip->ip_p = sc->g_proto;
778 break;
779 default:
780 error = EPROTONOSUPPORT;
781 m_freem(m);
782 goto end;
783 }
784
785 ifp->if_opackets++;
786 ifp->if_obytes += m->m_pkthdr.len;
787
788 /* send it off */
789 if (sc->g_proto == IPPROTO_UDP) {
790 if (IF_QFULL(&sc->sc_snd)) {
791 IF_DROP(&sc->sc_snd);
792 error = ENOBUFS;
793 m_freem(m);
794 } else {
795 IF_ENQUEUE(&sc->sc_snd, m);
796 gre_wakeup(sc);
797 error = 0;
798 }
799 } else {
800 error = ip_output(m, NULL, &sc->route, 0,
801 (struct ip_moptions *)NULL, (struct socket *)NULL);
802 }
803 end:
804 if (error)
805 ifp->if_oerrors++;
806 return (error);
807 }
808
809 /* Must be called at IPL_NET. */
810 static int
811 gre_kick(struct gre_softc *sc)
812 {
813 int rc;
814 struct ifnet *ifp = &sc->sc_if;
815
816 if (sc->g_proto == IPPROTO_UDP && (ifp->if_flags & IFF_UP) == IFF_UP &&
817 !sc->sc_thread) {
818 sc->sc_thread = 1;
819 rc = kthread_create1(gre_thread, (void *)sc, NULL,
820 ifp->if_xname);
821 if (rc != 0)
822 gre_stop(&sc->sc_thread);
823 return rc;
824 } else {
825 gre_wakeup(sc);
826 return 0;
827 }
828 }
829
830 static int
831 gre_getname(struct socket *so, int req, struct mbuf *nam, struct lwp *l)
832 {
833 int s, error;
834
835 s = splsoftnet();
836 error = (*so->so_proto->pr_usrreq)(so, req, (struct mbuf *)0,
837 nam, (struct mbuf *)0, l);
838 splx(s);
839 return error;
840 }
841
842 static int
843 gre_getsockname(struct socket *so, struct mbuf *nam, struct lwp *l)
844 {
845 return gre_getname(so, PRU_SOCKADDR, nam, l);
846 }
847
848 static int
849 gre_getpeername(struct socket *so, struct mbuf *nam, struct lwp *l)
850 {
851 return gre_getname(so, PRU_PEERADDR, nam, l);
852 }
853
854 static int
855 gre_getnames(struct socket *so, struct lwp *l, struct sockaddr_in *src,
856 struct sockaddr_in *dst)
857 {
858 struct mbuf *m;
859 struct sockaddr_in *sin;
860 int rc;
861
862 if ((m = gre_getsockmbuf(so)) == NULL)
863 return ENOBUFS;
864
865 sin = mtod(m, struct sockaddr_in *);
866
867 if ((rc = gre_getsockname(so, m, l)) != 0)
868 goto out;
869 if (sin->sin_family != AF_INET) {
870 rc = EAFNOSUPPORT;
871 goto out;
872 }
873 *src = *sin;
874
875 if ((rc = gre_getpeername(so, m, l)) != 0)
876 goto out;
877 if (sin->sin_family != AF_INET) {
878 rc = EAFNOSUPPORT;
879 goto out;
880 }
881 *dst = *sin;
882
883 out:
884 m_freem(m);
885 return rc;
886 }
887
888 static int
889 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
890 {
891 u_char oproto;
892 struct file *fp, *ofp;
893 struct socket *so;
894 struct sockaddr_in dst, src;
895 struct proc *p = curproc; /* XXX */
896 struct lwp *l = curlwp; /* XXX */
897 struct ifreq *ifr = (struct ifreq *)data;
898 struct if_laddrreq *lifr = (struct if_laddrreq *)data;
899 struct gre_softc *sc = ifp->if_softc;
900 int s;
901 struct sockaddr_in si;
902 struct sockaddr *sa = NULL;
903 int error;
904
905 switch (cmd) {
906 case SIOCSIFFLAGS:
907 case SIOCSIFMTU:
908 case GRESPROTO:
909 case GRESADDRD:
910 case GRESADDRS:
911 case GRESSOCK:
912 case GREDSOCK:
913 case SIOCSLIFPHYADDR:
914 case SIOCDIFPHYADDR:
915 if ((error = kauth_authorize_generic(l->l_cred,
916 KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0)
917 return (error);
918 break;
919 default:
920 error = 0;
921 break;
922 }
923
924 s = splnet();
925 switch (cmd) {
926 case SIOCSIFADDR:
927 ifp->if_flags |= IFF_UP;
928 error = gre_kick(sc);
929 break;
930 case SIOCSIFDSTADDR:
931 break;
932 case SIOCSIFFLAGS:
933 oproto = sc->g_proto;
934 switch (ifr->ifr_flags & (IFF_LINK0|IFF_LINK2)) {
935 case IFF_LINK0|IFF_LINK2:
936 sc->g_proto = IPPROTO_UDP;
937 if (oproto != IPPROTO_UDP)
938 ifp->if_flags &= ~IFF_RUNNING;
939 error = gre_kick(sc);
940 break;
941 case IFF_LINK0:
942 sc->g_proto = IPPROTO_GRE;
943 gre_wakeup(sc);
944 goto recompute;
945 case 0:
946 sc->g_proto = IPPROTO_MOBILE;
947 gre_wakeup(sc);
948 goto recompute;
949 }
950 break;
951 case SIOCSIFMTU:
952 if (ifr->ifr_mtu < 576) {
953 error = EINVAL;
954 break;
955 }
956 ifp->if_mtu = ifr->ifr_mtu;
957 break;
958 case SIOCGIFMTU:
959 ifr->ifr_mtu = sc->sc_if.if_mtu;
960 break;
961 case SIOCADDMULTI:
962 case SIOCDELMULTI:
963 if (ifr == 0) {
964 error = EAFNOSUPPORT;
965 break;
966 }
967 switch (ifr->ifr_addr.sa_family) {
968 #ifdef INET
969 case AF_INET:
970 break;
971 #endif
972 #ifdef INET6
973 case AF_INET6:
974 break;
975 #endif
976 default:
977 error = EAFNOSUPPORT;
978 break;
979 }
980 break;
981 case GRESPROTO:
982 oproto = sc->g_proto;
983 sc->g_proto = ifr->ifr_flags;
984 switch (sc->g_proto) {
985 case IPPROTO_UDP:
986 ifp->if_flags |= IFF_LINK0|IFF_LINK2;
987 if (oproto != IPPROTO_UDP)
988 ifp->if_flags &= ~IFF_RUNNING;
989 error = gre_kick(sc);
990 break;
991 case IPPROTO_GRE:
992 ifp->if_flags |= IFF_LINK0;
993 ifp->if_flags &= ~IFF_LINK2;
994 goto recompute;
995 case IPPROTO_MOBILE:
996 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK2);
997 goto recompute;
998 default:
999 error = EPROTONOSUPPORT;
1000 break;
1001 }
1002 break;
1003 case GREGPROTO:
1004 ifr->ifr_flags = sc->g_proto;
1005 break;
1006 case GRESADDRS:
1007 case GRESADDRD:
1008 /*
1009 * set tunnel endpoints, compute a less specific route
1010 * to the remote end and mark if as up
1011 */
1012 sa = &ifr->ifr_addr;
1013 if (cmd == GRESADDRS) {
1014 sc->g_src = (satosin(sa))->sin_addr;
1015 sc->g_srcport = satosin(sa)->sin_port;
1016 }
1017 if (cmd == GRESADDRD) {
1018 if (sc->g_proto == IPPROTO_UDP &&
1019 satosin(sa)->sin_port == 0) {
1020 error = EINVAL;
1021 break;
1022 }
1023 sc->g_dst = (satosin(sa))->sin_addr;
1024 sc->g_dstport = satosin(sa)->sin_port;
1025 }
1026 recompute:
1027 if (sc->g_proto == IPPROTO_UDP ||
1028 (sc->g_src.s_addr != INADDR_ANY &&
1029 sc->g_dst.s_addr != INADDR_ANY)) {
1030 if (sc->sc_fp != NULL) {
1031 closef(sc->sc_fp, l);
1032 sc->sc_fp = NULL;
1033 }
1034 if (sc->route.ro_rt != NULL) {
1035 RTFREE(sc->route.ro_rt);
1036 sc->route.ro_rt = NULL;
1037 }
1038 if (sc->g_proto == IPPROTO_UDP)
1039 error = gre_kick(sc);
1040 else if (gre_compute_route(sc) == 0)
1041 ifp->if_flags |= IFF_RUNNING;
1042 else
1043 ifp->if_flags &= ~IFF_RUNNING;
1044 }
1045 break;
1046 case GREGADDRS:
1047 memset(&si, 0, sizeof(si));
1048 si.sin_family = AF_INET;
1049 si.sin_len = sizeof(struct sockaddr_in);
1050 si.sin_addr.s_addr = sc->g_src.s_addr;
1051 sa = sintosa(&si);
1052 ifr->ifr_addr = *sa;
1053 break;
1054 case GREGADDRD:
1055 memset(&si, 0, sizeof(si));
1056 si.sin_family = AF_INET;
1057 si.sin_len = sizeof(struct sockaddr_in);
1058 si.sin_addr.s_addr = sc->g_dst.s_addr;
1059 sa = sintosa(&si);
1060 ifr->ifr_addr = *sa;
1061 break;
1062 case GREDSOCK:
1063 if (sc->g_proto != IPPROTO_UDP)
1064 return EINVAL;
1065 if (sc->sc_fp != NULL) {
1066 closef(sc->sc_fp, l);
1067 sc->sc_fp = NULL;
1068 error = gre_kick(sc);
1069 }
1070 break;
1071 case GRESSOCK:
1072 if (sc->g_proto != IPPROTO_UDP)
1073 return EINVAL;
1074 /* getsock() will FILE_USE() the descriptor for us */
1075 if ((error = getsock(p->p_fd, (int)ifr->ifr_value, &fp)) != 0)
1076 break;
1077 so = (struct socket *)fp->f_data;
1078 if (so->so_type != SOCK_DGRAM) {
1079 FILE_UNUSE(fp, NULL);
1080 error = EINVAL;
1081 break;
1082 }
1083 /* check address */
1084 if ((error = gre_getnames(so, curlwp, &src, &dst)) != 0) {
1085 FILE_UNUSE(fp, NULL);
1086 break;
1087 }
1088
1089 fp->f_count++;
1090
1091 ofp = sc->sc_fp;
1092 sc->sc_fp = fp;
1093 if ((error = gre_kick(sc)) != 0) {
1094 closef(fp, l);
1095 sc->sc_fp = ofp;
1096 break;
1097 }
1098 sc->g_src = src.sin_addr;
1099 sc->g_srcport = src.sin_port;
1100 sc->g_dst = dst.sin_addr;
1101 sc->g_dstport = dst.sin_port;
1102 if (ofp != NULL)
1103 closef(ofp, l);
1104 break;
1105 case SIOCSLIFPHYADDR:
1106 if (lifr->addr.ss_family != AF_INET ||
1107 lifr->dstaddr.ss_family != AF_INET) {
1108 error = EAFNOSUPPORT;
1109 break;
1110 }
1111 if (lifr->addr.ss_len != sizeof(si) ||
1112 lifr->dstaddr.ss_len != sizeof(si)) {
1113 error = EINVAL;
1114 break;
1115 }
1116 sc->g_src = satosin(&lifr->addr)->sin_addr;
1117 sc->g_dst = satosin(&lifr->dstaddr)->sin_addr;
1118 sc->g_srcport = satosin(&lifr->addr)->sin_port;
1119 sc->g_dstport = satosin(&lifr->dstaddr)->sin_port;
1120 goto recompute;
1121 case SIOCDIFPHYADDR:
1122 sc->g_src.s_addr = INADDR_ANY;
1123 sc->g_dst.s_addr = INADDR_ANY;
1124 sc->g_srcport = 0;
1125 sc->g_dstport = 0;
1126 goto recompute;
1127 case SIOCGLIFPHYADDR:
1128 if (sc->g_src.s_addr == INADDR_ANY ||
1129 sc->g_dst.s_addr == INADDR_ANY) {
1130 error = EADDRNOTAVAIL;
1131 break;
1132 }
1133 memset(&si, 0, sizeof(si));
1134 si.sin_family = AF_INET;
1135 si.sin_len = sizeof(struct sockaddr_in);
1136 si.sin_addr = sc->g_src;
1137 if (sc->g_proto == IPPROTO_UDP)
1138 si.sin_port = sc->g_srcport;
1139 memcpy(&lifr->addr, &si, sizeof(si));
1140 si.sin_addr = sc->g_dst;
1141 if (sc->g_proto == IPPROTO_UDP)
1142 si.sin_port = sc->g_dstport;
1143 memcpy(&lifr->dstaddr, &si, sizeof(si));
1144 break;
1145 default:
1146 error = EINVAL;
1147 break;
1148 }
1149 splx(s);
1150 return (error);
1151 }
1152
1153 /*
1154 * computes a route to our destination that is not the one
1155 * which would be taken by ip_output(), as this one will loop back to
1156 * us. If the interface is p2p as a--->b, then a routing entry exists
1157 * If we now send a packet to b (e.g. ping b), this will come down here
1158 * gets src=a, dst=b tacked on and would from ip_output() sent back to
1159 * if_gre.
1160 * Goal here is to compute a route to b that is less specific than
1161 * a-->b. We know that this one exists as in normal operation we have
1162 * at least a default route which matches.
1163 */
1164 static int
1165 gre_compute_route(struct gre_softc *sc)
1166 {
1167 struct route *ro;
1168 u_int32_t a, b, c;
1169
1170 ro = &sc->route;
1171
1172 memset(ro, 0, sizeof(struct route));
1173 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
1174 ro->ro_dst.sa_family = AF_INET;
1175 ro->ro_dst.sa_len = sizeof(ro->ro_dst);
1176
1177 /*
1178 * toggle last bit, so our interface is not found, but a less
1179 * specific route. I'd rather like to specify a shorter mask,
1180 * but this is not possible. Should work though. XXX
1181 * there is a simpler way ...
1182 */
1183 if ((sc->sc_if.if_flags & IFF_LINK1) == 0) {
1184 a = ntohl(sc->g_dst.s_addr);
1185 b = a & 0x01;
1186 c = a & 0xfffffffe;
1187 b = b ^ 0x01;
1188 a = b | c;
1189 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr
1190 = htonl(a);
1191 }
1192
1193 #ifdef DIAGNOSTIC
1194 printf("%s: searching for a route to %s", sc->sc_if.if_xname,
1195 inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
1196 #endif
1197
1198 rtalloc(ro);
1199
1200 /*
1201 * check if this returned a route at all and this route is no
1202 * recursion to ourself
1203 */
1204 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
1205 #ifdef DIAGNOSTIC
1206 if (ro->ro_rt == NULL)
1207 printf(" - no route found!\n");
1208 else
1209 printf(" - route loops back to ourself!\n");
1210 #endif
1211 return EADDRNOTAVAIL;
1212 }
1213
1214 /*
1215 * now change it back - else ip_output will just drop
1216 * the route and search one to this interface ...
1217 */
1218 if ((sc->sc_if.if_flags & IFF_LINK1) == 0)
1219 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
1220
1221 #ifdef DIAGNOSTIC
1222 printf(", choosing %s with gateway %s\n", ro->ro_rt->rt_ifp->if_xname,
1223 inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
1224 #endif
1225
1226 return 0;
1227 }
1228
1229 /*
1230 * do a checksum of a buffer - much like in_cksum, which operates on
1231 * mbufs.
1232 */
1233 u_int16_t
1234 gre_in_cksum(u_int16_t *p, u_int len)
1235 {
1236 u_int32_t sum = 0;
1237 int nwords = len >> 1;
1238
1239 while (nwords-- != 0)
1240 sum += *p++;
1241
1242 if (len & 1) {
1243 union {
1244 u_short w;
1245 u_char c[2];
1246 } u;
1247 u.c[0] = *(u_char *)p;
1248 u.c[1] = 0;
1249 sum += u.w;
1250 }
1251
1252 /* end-around-carry */
1253 sum = (sum >> 16) + (sum & 0xffff);
1254 sum += (sum >> 16);
1255 return (~sum);
1256 }
1257 #endif
1258
1259 void greattach(int);
1260
1261 /* ARGSUSED */
1262 void
1263 greattach(int count)
1264 {
1265 #ifdef INET
1266 LIST_INIT(&gre_softc_list);
1267 if_clone_attach(&gre_cloner);
1268 #endif
1269 }
1270