if_gre.c revision 1.70 1 /* $NetBSD: if_gre.c,v 1.70 2006/10/25 20:28:45 elad Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * IPv6-over-GRE contributed by Gert Doering <gert (at) greenie.muc.de>
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*
42 * Encapsulate L3 protocols into IP
43 * See RFC 1701 and 1702 for more details.
44 * If_gre is compatible with Cisco GRE tunnels, so you can
45 * have a NetBSD box as the other end of a tunnel interface of a Cisco
46 * router. See gre(4) for more details.
47 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
48 */
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.70 2006/10/25 20:28:45 elad Exp $");
52
53 #include "opt_gre.h"
54 #include "opt_inet.h"
55 #include "bpfilter.h"
56
57 #ifdef INET
58 #include <sys/param.h>
59 #include <sys/file.h>
60 #include <sys/filedesc.h>
61 #include <sys/malloc.h>
62 #include <sys/mbuf.h>
63 #include <sys/proc.h>
64 #include <sys/protosw.h>
65 #include <sys/socket.h>
66 #include <sys/socketvar.h>
67 #include <sys/ioctl.h>
68 #include <sys/queue.h>
69 #if __NetBSD__
70 #include <sys/systm.h>
71 #include <sys/sysctl.h>
72 #include <sys/kauth.h>
73 #endif
74
75 #include <sys/kthread.h>
76
77 #include <machine/cpu.h>
78
79 #include <net/ethertypes.h>
80 #include <net/if.h>
81 #include <net/if_types.h>
82 #include <net/netisr.h>
83 #include <net/route.h>
84
85 #ifdef INET
86 #include <netinet/in.h>
87 #include <netinet/in_systm.h>
88 #include <netinet/in_var.h>
89 #include <netinet/ip.h>
90 #include <netinet/ip_var.h>
91 #else
92 #error "Huh? if_gre without inet?"
93 #endif
94
95
96 #ifdef NETATALK
97 #include <netatalk/at.h>
98 #include <netatalk/at_var.h>
99 #include <netatalk/at_extern.h>
100 #endif
101
102 #if NBPFILTER > 0
103 #include <sys/time.h>
104 #include <net/bpf.h>
105 #endif
106
107 #include <net/if_gre.h>
108
109 /*
110 * It is not easy to calculate the right value for a GRE MTU.
111 * We leave this task to the admin and use the same default that
112 * other vendors use.
113 */
114 #define GREMTU 1476
115
116 #ifdef GRE_DEBUG
117 #define GRE_DPRINTF(__sc, __fmt, ...) \
118 do { \
119 if (((__sc)->sc_if.if_flags & IFF_DEBUG) != 0) \
120 printf(__fmt, __VA_ARGS__); \
121 } while (/*CONSTCOND*/0)
122 #else
123 #define GRE_DPRINTF(__sc, __fmt, ...) do { } while (/*CONSTCOND*/0)
124 #endif /* GRE_DEBUG */
125
126 struct gre_softc_head gre_softc_list;
127 int ip_gre_ttl = GRE_TTL;
128
129 static int gre_clone_create(struct if_clone *, int);
130 static int gre_clone_destroy(struct ifnet *);
131
132 static struct if_clone gre_cloner =
133 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
134
135 static int gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
136 struct rtentry *);
137 static int gre_ioctl(struct ifnet *, u_long, caddr_t);
138
139 static int gre_compute_route(struct gre_softc *sc);
140
141 static int gre_getsockname(struct socket *, struct mbuf *, struct lwp *);
142 static int gre_getpeername(struct socket *, struct mbuf *, struct lwp *);
143 static int gre_getnames(struct socket *, struct lwp *, struct sockaddr_in *,
144 struct sockaddr_in *);
145
146 static void
147 gre_stop(int *running)
148 {
149 *running = 0;
150 wakeup(running);
151 }
152
153 static void
154 gre_join(int *running)
155 {
156 int s;
157
158 s = splnet();
159 while (*running != 0) {
160 splx(s);
161 tsleep(running, PSOCK, "grejoin", 0);
162 s = splnet();
163 }
164 splx(s);
165 }
166
167 static void
168 gre_wakeup(struct gre_softc *sc)
169 {
170 GRE_DPRINTF(sc, "%s: enter\n", __func__);
171 sc->sc_waitchan = 1;
172 wakeup(&sc->sc_waitchan);
173 }
174
175 static int
176 gre_clone_create(struct if_clone *ifc, int unit)
177 {
178 struct gre_softc *sc;
179
180 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
181 memset(sc, 0, sizeof(struct gre_softc));
182
183 snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d",
184 ifc->ifc_name, unit);
185 sc->sc_if.if_softc = sc;
186 sc->sc_if.if_type = IFT_TUNNEL;
187 sc->sc_if.if_addrlen = 0;
188 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
189 sc->sc_if.if_dlt = DLT_NULL;
190 sc->sc_if.if_mtu = GREMTU;
191 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
192 sc->sc_if.if_output = gre_output;
193 sc->sc_if.if_ioctl = gre_ioctl;
194 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
195 sc->g_dstport = sc->g_srcport = 0;
196 sc->g_proto = IPPROTO_GRE;
197 sc->sc_snd.ifq_maxlen = 256;
198 sc->sc_if.if_flags |= IFF_LINK0;
199 if_attach(&sc->sc_if);
200 if_alloc_sadl(&sc->sc_if);
201 #if NBPFILTER > 0
202 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
203 #endif
204 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
205 return (0);
206 }
207
208 static int
209 gre_clone_destroy(struct ifnet *ifp)
210 {
211 struct gre_softc *sc = ifp->if_softc;
212
213 LIST_REMOVE(sc, sc_list);
214 #if NBPFILTER > 0
215 bpfdetach(ifp);
216 #endif
217 if_detach(ifp);
218 gre_wakeup(sc);
219 gre_join(&sc->sc_thread);
220 if (sc->sc_fp != NULL) {
221 closef(sc->sc_fp, curlwp);
222 sc->sc_fp = NULL;
223 }
224 free(sc, M_DEVBUF);
225
226 return (0);
227 }
228
229 static void
230 gre_receive(struct socket *so __unused, caddr_t arg, int waitflag __unused)
231 {
232 struct gre_softc *sc = (struct gre_softc *)arg;
233
234 GRE_DPRINTF(sc, "%s: enter\n", __func__);
235
236 gre_wakeup(sc);
237 }
238
239 static void
240 gre_upcall_add(struct socket *so, caddr_t arg)
241 {
242 /* XXX What if the kernel already set an upcall? */
243 so->so_upcallarg = arg;
244 so->so_upcall = gre_receive;
245 so->so_rcv.sb_flags |= SB_UPCALL;
246 }
247
248 static void
249 gre_upcall_remove(struct socket *so)
250 {
251 /* XXX What if the kernel already set an upcall? */
252 so->so_rcv.sb_flags &= ~SB_UPCALL;
253 so->so_upcallarg = NULL;
254 so->so_upcall = NULL;
255 }
256
257 static void
258 gre_sodestroy(struct socket **sop)
259 {
260 gre_upcall_remove(*sop);
261 soshutdown(*sop, SHUT_RDWR);
262 soclose(*sop);
263 *sop = NULL;
264 }
265
266 static struct mbuf *
267 gre_getsockmbuf(struct socket *so __unused)
268 {
269 struct mbuf *m;
270
271 m = m_get(M_WAIT, MT_SONAME);
272 if (m != NULL)
273 MCLAIM(m, so->so_mowner);
274 return m;
275 }
276
277 static int
278 gre_socreate1(struct gre_softc *sc, struct lwp *l, struct gre_soparm *sp,
279 struct socket **sop)
280 {
281 int rc;
282 struct mbuf *m;
283 struct sockaddr_in *sin;
284 struct socket *so;
285
286 GRE_DPRINTF(sc, "%s: enter\n", __func__);
287 rc = socreate(AF_INET, sop, SOCK_DGRAM, IPPROTO_UDP, l);
288 if (rc != 0) {
289 GRE_DPRINTF(sc, "%s: socreate failed\n", __func__);
290 return rc;
291 }
292
293 so = *sop;
294
295 gre_upcall_add(so, (caddr_t)sc);
296 if ((m = gre_getsockmbuf(so)) == NULL) {
297 rc = ENOBUFS;
298 goto out;
299 }
300 sin = mtod(m, struct sockaddr_in *);
301 sin->sin_len = m->m_len = sizeof(struct sockaddr_in);
302 sin->sin_family = AF_INET;
303 sin->sin_addr = sc->g_src;
304 sin->sin_port = sc->g_srcport;
305
306 GRE_DPRINTF(sc, "%s: bind 0x%08" PRIx32 " port %d\n", __func__,
307 sin->sin_addr.s_addr, ntohs(sin->sin_port));
308 if ((rc = sobind(so, m, l)) != 0) {
309 GRE_DPRINTF(sc, "%s: sobind failed\n", __func__);
310 goto out;
311 }
312
313 if (sc->g_srcport == 0) {
314 if ((rc = gre_getsockname(so, m, l)) != 0) {
315 GRE_DPRINTF(sc, "%s: gre_getsockname failed\n",
316 __func__);
317 goto out;
318 }
319 sc->g_srcport = sin->sin_port;
320 }
321
322 sin->sin_addr = sc->g_dst;
323 sin->sin_port = sc->g_dstport;
324
325 if ((rc = soconnect(so, m, l)) != 0) {
326 GRE_DPRINTF(sc, "%s: soconnect failed\n", __func__);
327 goto out;
328 }
329
330 *mtod(m, int *) = ip_gre_ttl;
331 rc = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, IPPROTO_IP, IP_TTL,
332 &m);
333 m = NULL;
334 if (rc != 0) {
335 printf("%s: setopt ttl failed\n", __func__);
336 rc = 0;
337 }
338 out:
339 m_freem(m);
340
341 if (rc != 0)
342 gre_sodestroy(sop);
343 else
344 *sp = sc->sc_soparm;
345
346 return rc;
347 }
348
349 static void
350 gre_thread1(struct gre_softc *sc, struct lwp *l)
351 {
352 int flags, rc, s;
353 const struct gre_h *gh;
354 struct ifnet *ifp = &sc->sc_if;
355 struct mbuf *m;
356 struct socket *so = NULL;
357 struct uio uio;
358 struct gre_soparm sp;
359
360 GRE_DPRINTF(sc, "%s: enter\n", __func__);
361 s = splnet();
362
363 sc->sc_waitchan = 1;
364
365 memset(&sp, 0, sizeof(sp));
366 memset(&uio, 0, sizeof(uio));
367
368 ifp->if_flags |= IFF_RUNNING;
369
370 for (;;) {
371 while (sc->sc_waitchan == 0) {
372 splx(s);
373 GRE_DPRINTF(sc, "%s: sleeping\n", __func__);
374 tsleep(&sc->sc_waitchan, PSOCK, "grewait", 0);
375 s = splnet();
376 }
377 sc->sc_waitchan = 0;
378 GRE_DPRINTF(sc, "%s: awake\n", __func__);
379 if ((ifp->if_flags & IFF_UP) != IFF_UP) {
380 GRE_DPRINTF(sc, "%s: not up & running; exiting\n",
381 __func__);
382 break;
383 }
384 if (sc->g_proto != IPPROTO_UDP) {
385 GRE_DPRINTF(sc, "%s: not udp; exiting\n", __func__);
386 break;
387 }
388 /* XXX optimize */
389 if (so == NULL || memcmp(&sp, &sc->sc_soparm, sizeof(sp)) != 0){
390 GRE_DPRINTF(sc, "%s: parameters changed\n", __func__);
391
392 if (sp.sp_fp != NULL) {
393 FILE_UNUSE(sp.sp_fp, NULL);
394 sp.sp_fp = NULL;
395 so = NULL;
396 } else if (so != NULL)
397 gre_sodestroy(&so);
398
399 if (sc->sc_fp != NULL) {
400 so = (struct socket *)sc->sc_fp->f_data;
401 gre_upcall_add(so, (caddr_t)sc);
402 sp = sc->sc_soparm;
403 FILE_USE(sp.sp_fp);
404 } else if (gre_socreate1(sc, l, &sp, &so) != 0)
405 goto out;
406 }
407 for (;;) {
408 flags = MSG_DONTWAIT;
409 uio.uio_resid = 1000000;
410 rc = (*so->so_receive)(so, NULL, &uio, &m, NULL,
411 &flags);
412 /* TBD Back off if ECONNREFUSED (indicates
413 * ICMP Port Unreachable)?
414 */
415 if (rc == EWOULDBLOCK) {
416 GRE_DPRINTF(sc, "%s: so_receive EWOULDBLOCK\n",
417 __func__);
418 break;
419 } else if (rc != 0 || m == NULL) {
420 GRE_DPRINTF(sc, "%s: rc %d m %p\n",
421 ifp->if_xname, rc, (void *)m);
422 continue;
423 } else
424 GRE_DPRINTF(sc, "%s: so_receive ok\n",
425 __func__);
426 if (m->m_len < sizeof(*gh) &&
427 (m = m_pullup(m, sizeof(*gh))) == NULL) {
428 GRE_DPRINTF(sc, "%s: m_pullup failed\n",
429 __func__);
430 continue;
431 }
432 gh = mtod(m, const struct gre_h *);
433
434 if (gre_input3(sc, m, 0, IPPROTO_GRE, gh) == 0) {
435 GRE_DPRINTF(sc, "%s: dropping unsupported\n",
436 __func__);
437 ifp->if_ierrors++;
438 m_freem(m);
439 }
440 }
441 for (;;) {
442 IF_DEQUEUE(&sc->sc_snd, m);
443 if (m == NULL)
444 break;
445 GRE_DPRINTF(sc, "%s: dequeue\n", __func__);
446 if ((so->so_state & SS_ISCONNECTED) == 0) {
447 GRE_DPRINTF(sc, "%s: not connected\n",
448 __func__);
449 m_freem(m);
450 continue;
451 }
452 rc = (*so->so_send)(so, NULL, NULL, m, NULL, 0, l);
453 /* XXX handle ENOBUFS? */
454 if (rc != 0)
455 GRE_DPRINTF(sc, "%s: so_send failed\n",
456 __func__);
457 }
458 /* Give the software interrupt queues a chance to
459 * run, or else when I send a ping from gre0 to gre1 on
460 * the same host, gre0 will not wake for the reply.
461 */
462 splx(s);
463 s = splnet();
464 }
465 if (sp.sp_fp != NULL) {
466 GRE_DPRINTF(sc, "%s: removing upcall\n", __func__);
467 gre_upcall_remove(so);
468 FILE_UNUSE(sp.sp_fp, NULL);
469 sp.sp_fp = NULL;
470 } else if (so != NULL)
471 gre_sodestroy(&so);
472 out:
473 GRE_DPRINTF(sc, "%s: stopping\n", __func__);
474 if (sc->g_proto == IPPROTO_UDP)
475 ifp->if_flags &= ~IFF_RUNNING;
476 while (!IF_IS_EMPTY(&sc->sc_snd)) {
477 IF_DEQUEUE(&sc->sc_snd, m);
478 m_freem(m);
479 }
480 gre_stop(&sc->sc_thread);
481 /* must not touch sc after this! */
482 GRE_DPRINTF(sc, "%s: restore ipl\n", __func__);
483 splx(s);
484 }
485
486 static void
487 gre_thread(void *arg)
488 {
489 struct gre_softc *sc = (struct gre_softc *)arg;
490
491 gre_thread1(sc, curlwp);
492 /* must not touch sc after this! */
493 kthread_exit(0);
494 }
495
496 int
497 gre_input3(struct gre_softc *sc, struct mbuf *m, int hlen, u_char proto,
498 const struct gre_h *gh)
499 {
500 u_int16_t flags;
501 #if NBPFILTER > 0
502 u_int32_t af = AF_INET; /* af passed to BPF tap */
503 #endif
504 int s, isr;
505 struct ifqueue *ifq;
506
507 sc->sc_if.if_ipackets++;
508 sc->sc_if.if_ibytes += m->m_pkthdr.len;
509
510 switch (proto) {
511 case IPPROTO_GRE:
512 hlen += sizeof(struct gre_h);
513
514 /* process GRE flags as packet can be of variable len */
515 flags = ntohs(gh->flags);
516
517 /* Checksum & Offset are present */
518 if ((flags & GRE_CP) | (flags & GRE_RP))
519 hlen += 4;
520 /* We don't support routing fields (variable length) */
521 if (flags & GRE_RP)
522 return (0);
523 if (flags & GRE_KP)
524 hlen += 4;
525 if (flags & GRE_SP)
526 hlen += 4;
527
528 switch (ntohs(gh->ptype)) { /* ethertypes */
529 case ETHERTYPE_IP: /* shouldn't need a schednetisr(), as */
530 ifq = &ipintrq; /* we are in ip_input */
531 isr = NETISR_IP;
532 break;
533 #ifdef NETATALK
534 case ETHERTYPE_ATALK:
535 ifq = &atintrq1;
536 isr = NETISR_ATALK;
537 #if NBPFILTER > 0
538 af = AF_APPLETALK;
539 #endif
540 break;
541 #endif
542 #ifdef INET6
543 case ETHERTYPE_IPV6:
544 GRE_DPRINTF(sc, "%s: IPv6 packet\n", __func__);
545 ifq = &ip6intrq;
546 isr = NETISR_IPV6;
547 #if NBPFILTER > 0
548 af = AF_INET6;
549 #endif
550 break;
551 #endif
552 default: /* others not yet supported */
553 printf("%s: unhandled ethertype 0x%04x\n", __func__,
554 ntohs(gh->ptype));
555 return (0);
556 }
557 break;
558 default:
559 /* others not yet supported */
560 return (0);
561 }
562
563 if (hlen > m->m_pkthdr.len) {
564 m_freem(m);
565 sc->sc_if.if_ierrors++;
566 return (EINVAL);
567 }
568 m_adj(m, hlen);
569
570 #if NBPFILTER > 0
571 if (sc->sc_if.if_bpf != NULL)
572 bpf_mtap_af(sc->sc_if.if_bpf, af, m);
573 #endif /*NBPFILTER > 0*/
574
575 m->m_pkthdr.rcvif = &sc->sc_if;
576
577 s = splnet(); /* possible */
578 if (IF_QFULL(ifq)) {
579 IF_DROP(ifq);
580 m_freem(m);
581 } else {
582 IF_ENQUEUE(ifq, m);
583 }
584 /* we need schednetisr since the address family may change */
585 schednetisr(isr);
586 splx(s);
587
588 return (1); /* packet is done, no further processing needed */
589 }
590
591 /*
592 * The output routine. Takes a packet and encapsulates it in the protocol
593 * given by sc->g_proto. See also RFC 1701 and RFC 2004
594 */
595 static int
596 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
597 struct rtentry *rt __unused)
598 {
599 int error = 0, hlen;
600 struct gre_softc *sc = ifp->if_softc;
601 struct greip *gi;
602 struct gre_h *gh;
603 struct ip *eip, *ip;
604 u_int8_t ip_tos = 0;
605 u_int16_t etype = 0;
606 struct mobile_h mob_h;
607
608 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == 0 ||
609 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
610 m_freem(m);
611 error = ENETDOWN;
612 goto end;
613 }
614
615 gi = NULL;
616 ip = NULL;
617
618 #if NBPFILTER >0
619 if (ifp->if_bpf)
620 bpf_mtap_af(ifp->if_bpf, dst->sa_family, m);
621 #endif
622
623 m->m_flags &= ~(M_BCAST|M_MCAST);
624
625 switch (sc->g_proto) {
626 case IPPROTO_MOBILE:
627 if (dst->sa_family == AF_INET) {
628 int msiz;
629
630 if (M_UNWRITABLE(m, sizeof(*ip)) &&
631 (m = m_pullup(m, sizeof(*ip))) == NULL) {
632 error = ENOBUFS;
633 goto end;
634 }
635 ip = mtod(m, struct ip *);
636
637 memset(&mob_h, 0, MOB_H_SIZ_L);
638 mob_h.proto = (ip->ip_p) << 8;
639 mob_h.odst = ip->ip_dst.s_addr;
640 ip->ip_dst.s_addr = sc->g_dst.s_addr;
641
642 /*
643 * If the packet comes from our host, we only change
644 * the destination address in the IP header.
645 * Else we also need to save and change the source
646 */
647 if (in_hosteq(ip->ip_src, sc->g_src)) {
648 msiz = MOB_H_SIZ_S;
649 } else {
650 mob_h.proto |= MOB_H_SBIT;
651 mob_h.osrc = ip->ip_src.s_addr;
652 ip->ip_src.s_addr = sc->g_src.s_addr;
653 msiz = MOB_H_SIZ_L;
654 }
655 HTONS(mob_h.proto);
656 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
657
658 M_PREPEND(m, msiz, M_DONTWAIT);
659 if (m == NULL) {
660 error = ENOBUFS;
661 goto end;
662 }
663 /* XXX Assuming that ip does not dangle after
664 * M_PREPEND. In practice, that's true, but
665 * that's in M_PREPEND's contract.
666 */
667 memmove(mtod(m, caddr_t), ip, sizeof(*ip));
668 ip = mtod(m, struct ip *);
669 memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
670 ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
671 } else { /* AF_INET */
672 IF_DROP(&ifp->if_snd);
673 m_freem(m);
674 error = EINVAL;
675 goto end;
676 }
677 break;
678 case IPPROTO_UDP:
679 case IPPROTO_GRE:
680 GRE_DPRINTF(sc, "%s: dst->sa_family=%d\n", __func__,
681 dst->sa_family);
682 switch (dst->sa_family) {
683 case AF_INET:
684 ip = mtod(m, struct ip *);
685 ip_tos = ip->ip_tos;
686 etype = ETHERTYPE_IP;
687 break;
688 #ifdef NETATALK
689 case AF_APPLETALK:
690 etype = ETHERTYPE_ATALK;
691 break;
692 #endif
693 #ifdef INET6
694 case AF_INET6:
695 etype = ETHERTYPE_IPV6;
696 break;
697 #endif
698 default:
699 IF_DROP(&ifp->if_snd);
700 m_freem(m);
701 error = EAFNOSUPPORT;
702 goto end;
703 }
704 break;
705 default:
706 IF_DROP(&ifp->if_snd);
707 m_freem(m);
708 error = EINVAL;
709 goto end;
710 }
711
712 switch (sc->g_proto) {
713 case IPPROTO_GRE:
714 hlen = sizeof(struct greip);
715 break;
716 case IPPROTO_UDP:
717 hlen = sizeof(struct gre_h);
718 break;
719 default:
720 hlen = 0;
721 break;
722 }
723
724 M_PREPEND(m, hlen, M_DONTWAIT);
725
726 if (m == NULL) {
727 IF_DROP(&ifp->if_snd);
728 error = ENOBUFS;
729 goto end;
730 }
731
732 switch (sc->g_proto) {
733 case IPPROTO_UDP:
734 gh = mtod(m, struct gre_h *);
735 memset(gh, 0, sizeof(*gh));
736 gh->ptype = htons(etype);
737 /* XXX Need to handle IP ToS. Look at how I handle IP TTL. */
738 break;
739 case IPPROTO_GRE:
740 gi = mtod(m, struct greip *);
741 gh = &gi->gi_g;
742 eip = &gi->gi_i;
743 /* we don't have any GRE flags for now */
744 memset(gh, 0, sizeof(*gh));
745 gh->ptype = htons(etype);
746 eip->ip_src = sc->g_src;
747 eip->ip_dst = sc->g_dst;
748 eip->ip_hl = (sizeof(struct ip)) >> 2;
749 eip->ip_ttl = ip_gre_ttl;
750 eip->ip_tos = ip_tos;
751 eip->ip_len = htons(m->m_pkthdr.len);
752 eip->ip_p = sc->g_proto;
753 break;
754 case IPPROTO_MOBILE:
755 eip = mtod(m, struct ip *);
756 eip->ip_p = sc->g_proto;
757 break;
758 default:
759 error = EPROTONOSUPPORT;
760 m_freem(m);
761 goto end;
762 }
763
764 ifp->if_opackets++;
765 ifp->if_obytes += m->m_pkthdr.len;
766
767 /* send it off */
768 if (sc->g_proto == IPPROTO_UDP) {
769 if (IF_QFULL(&sc->sc_snd)) {
770 IF_DROP(&sc->sc_snd);
771 error = ENOBUFS;
772 m_freem(m);
773 } else {
774 IF_ENQUEUE(&sc->sc_snd, m);
775 gre_wakeup(sc);
776 error = 0;
777 }
778 } else {
779 error = ip_output(m, NULL, &sc->route, 0,
780 (struct ip_moptions *)NULL, (struct socket *)NULL);
781 }
782 end:
783 if (error)
784 ifp->if_oerrors++;
785 return (error);
786 }
787
788 /* Must be called at IPL_NET. */
789 static int
790 gre_kick(struct gre_softc *sc)
791 {
792 int rc;
793 struct ifnet *ifp = &sc->sc_if;
794
795 if (sc->g_proto == IPPROTO_UDP && (ifp->if_flags & IFF_UP) == IFF_UP &&
796 !sc->sc_thread) {
797 sc->sc_thread = 1;
798 rc = kthread_create1(gre_thread, (void *)sc, NULL,
799 ifp->if_xname);
800 if (rc != 0)
801 gre_stop(&sc->sc_thread);
802 return rc;
803 } else {
804 gre_wakeup(sc);
805 return 0;
806 }
807 }
808
809 static int
810 gre_getname(struct socket *so, int req, struct mbuf *nam, struct lwp *l)
811 {
812 int s, error;
813
814 s = splsoftnet();
815 error = (*so->so_proto->pr_usrreq)(so, req, (struct mbuf *)0,
816 nam, (struct mbuf *)0, l);
817 splx(s);
818 return error;
819 }
820
821 static int
822 gre_getsockname(struct socket *so, struct mbuf *nam, struct lwp *l)
823 {
824 return gre_getname(so, PRU_SOCKADDR, nam, l);
825 }
826
827 static int
828 gre_getpeername(struct socket *so, struct mbuf *nam, struct lwp *l)
829 {
830 return gre_getname(so, PRU_PEERADDR, nam, l);
831 }
832
833 static int
834 gre_getnames(struct socket *so, struct lwp *l, struct sockaddr_in *src,
835 struct sockaddr_in *dst)
836 {
837 struct mbuf *m;
838 struct sockaddr_in *sin;
839 int rc;
840
841 if ((m = gre_getsockmbuf(so)) == NULL)
842 return ENOBUFS;
843
844 sin = mtod(m, struct sockaddr_in *);
845
846 if ((rc = gre_getsockname(so, m, l)) != 0)
847 goto out;
848 if (sin->sin_family != AF_INET) {
849 rc = EAFNOSUPPORT;
850 goto out;
851 }
852 *src = *sin;
853
854 if ((rc = gre_getpeername(so, m, l)) != 0)
855 goto out;
856 if (sin->sin_family != AF_INET) {
857 rc = EAFNOSUPPORT;
858 goto out;
859 }
860 *dst = *sin;
861
862 out:
863 m_freem(m);
864 return rc;
865 }
866
867 static int
868 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
869 {
870 u_char oproto;
871 struct file *fp, *ofp;
872 struct socket *so;
873 struct sockaddr_in dst, src;
874 struct proc *p = curproc; /* XXX */
875 struct lwp *l = curlwp; /* XXX */
876 struct ifreq *ifr = (struct ifreq *)data;
877 struct if_laddrreq *lifr = (struct if_laddrreq *)data;
878 struct gre_softc *sc = ifp->if_softc;
879 int s;
880 struct sockaddr_in si;
881 struct sockaddr *sa = NULL;
882 int error;
883
884 switch (cmd) {
885 case SIOCSIFFLAGS:
886 case SIOCSIFMTU:
887 case GRESPROTO:
888 case GRESADDRD:
889 case GRESADDRS:
890 case GRESSOCK:
891 case GREDSOCK:
892 case SIOCSLIFPHYADDR:
893 case SIOCDIFPHYADDR:
894 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
895 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
896 NULL) != 0)
897 return (EPERM);
898 break;
899 default:
900 error = 0;
901 break;
902 }
903
904 s = splnet();
905 switch (cmd) {
906 case SIOCSIFADDR:
907 ifp->if_flags |= IFF_UP;
908 error = gre_kick(sc);
909 break;
910 case SIOCSIFDSTADDR:
911 break;
912 case SIOCSIFFLAGS:
913 oproto = sc->g_proto;
914 switch (ifr->ifr_flags & (IFF_LINK0|IFF_LINK2)) {
915 case IFF_LINK0|IFF_LINK2:
916 sc->g_proto = IPPROTO_UDP;
917 if (oproto != IPPROTO_UDP)
918 ifp->if_flags &= ~IFF_RUNNING;
919 error = gre_kick(sc);
920 break;
921 case IFF_LINK0:
922 sc->g_proto = IPPROTO_GRE;
923 gre_wakeup(sc);
924 goto recompute;
925 case 0:
926 sc->g_proto = IPPROTO_MOBILE;
927 gre_wakeup(sc);
928 goto recompute;
929 }
930 break;
931 case SIOCSIFMTU:
932 if (ifr->ifr_mtu < 576) {
933 error = EINVAL;
934 break;
935 }
936 ifp->if_mtu = ifr->ifr_mtu;
937 break;
938 case SIOCGIFMTU:
939 ifr->ifr_mtu = sc->sc_if.if_mtu;
940 break;
941 case SIOCADDMULTI:
942 case SIOCDELMULTI:
943 if (ifr == 0) {
944 error = EAFNOSUPPORT;
945 break;
946 }
947 switch (ifr->ifr_addr.sa_family) {
948 #ifdef INET
949 case AF_INET:
950 break;
951 #endif
952 #ifdef INET6
953 case AF_INET6:
954 break;
955 #endif
956 default:
957 error = EAFNOSUPPORT;
958 break;
959 }
960 break;
961 case GRESPROTO:
962 oproto = sc->g_proto;
963 sc->g_proto = ifr->ifr_flags;
964 switch (sc->g_proto) {
965 case IPPROTO_UDP:
966 ifp->if_flags |= IFF_LINK0|IFF_LINK2;
967 if (oproto != IPPROTO_UDP)
968 ifp->if_flags &= ~IFF_RUNNING;
969 error = gre_kick(sc);
970 break;
971 case IPPROTO_GRE:
972 ifp->if_flags |= IFF_LINK0;
973 ifp->if_flags &= ~IFF_LINK2;
974 goto recompute;
975 case IPPROTO_MOBILE:
976 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK2);
977 goto recompute;
978 default:
979 error = EPROTONOSUPPORT;
980 break;
981 }
982 break;
983 case GREGPROTO:
984 ifr->ifr_flags = sc->g_proto;
985 break;
986 case GRESADDRS:
987 case GRESADDRD:
988 /*
989 * set tunnel endpoints, compute a less specific route
990 * to the remote end and mark if as up
991 */
992 sa = &ifr->ifr_addr;
993 if (cmd == GRESADDRS) {
994 sc->g_src = (satosin(sa))->sin_addr;
995 sc->g_srcport = satosin(sa)->sin_port;
996 }
997 if (cmd == GRESADDRD) {
998 if (sc->g_proto == IPPROTO_UDP &&
999 satosin(sa)->sin_port == 0) {
1000 error = EINVAL;
1001 break;
1002 }
1003 sc->g_dst = (satosin(sa))->sin_addr;
1004 sc->g_dstport = satosin(sa)->sin_port;
1005 }
1006 recompute:
1007 if (sc->g_proto == IPPROTO_UDP ||
1008 (sc->g_src.s_addr != INADDR_ANY &&
1009 sc->g_dst.s_addr != INADDR_ANY)) {
1010 if (sc->sc_fp != NULL) {
1011 closef(sc->sc_fp, l);
1012 sc->sc_fp = NULL;
1013 }
1014 if (sc->route.ro_rt != NULL) {
1015 RTFREE(sc->route.ro_rt);
1016 sc->route.ro_rt = NULL;
1017 }
1018 if (sc->g_proto == IPPROTO_UDP)
1019 error = gre_kick(sc);
1020 else if (gre_compute_route(sc) == 0)
1021 ifp->if_flags |= IFF_RUNNING;
1022 else
1023 ifp->if_flags &= ~IFF_RUNNING;
1024 }
1025 break;
1026 case GREGADDRS:
1027 memset(&si, 0, sizeof(si));
1028 si.sin_family = AF_INET;
1029 si.sin_len = sizeof(struct sockaddr_in);
1030 si.sin_addr.s_addr = sc->g_src.s_addr;
1031 sa = sintosa(&si);
1032 ifr->ifr_addr = *sa;
1033 break;
1034 case GREGADDRD:
1035 memset(&si, 0, sizeof(si));
1036 si.sin_family = AF_INET;
1037 si.sin_len = sizeof(struct sockaddr_in);
1038 si.sin_addr.s_addr = sc->g_dst.s_addr;
1039 sa = sintosa(&si);
1040 ifr->ifr_addr = *sa;
1041 break;
1042 case GREDSOCK:
1043 if (sc->g_proto != IPPROTO_UDP)
1044 return EINVAL;
1045 if (sc->sc_fp != NULL) {
1046 closef(sc->sc_fp, l);
1047 sc->sc_fp = NULL;
1048 error = gre_kick(sc);
1049 }
1050 break;
1051 case GRESSOCK:
1052 if (sc->g_proto != IPPROTO_UDP)
1053 return EINVAL;
1054 /* getsock() will FILE_USE() the descriptor for us */
1055 if ((error = getsock(p->p_fd, (int)ifr->ifr_value, &fp)) != 0)
1056 break;
1057 so = (struct socket *)fp->f_data;
1058 if (so->so_type != SOCK_DGRAM) {
1059 FILE_UNUSE(fp, NULL);
1060 error = EINVAL;
1061 break;
1062 }
1063 /* check address */
1064 if ((error = gre_getnames(so, curlwp, &src, &dst)) != 0) {
1065 FILE_UNUSE(fp, NULL);
1066 break;
1067 }
1068
1069 fp->f_count++;
1070
1071 ofp = sc->sc_fp;
1072 sc->sc_fp = fp;
1073 if ((error = gre_kick(sc)) != 0) {
1074 closef(fp, l);
1075 sc->sc_fp = ofp;
1076 break;
1077 }
1078 sc->g_src = src.sin_addr;
1079 sc->g_srcport = src.sin_port;
1080 sc->g_dst = dst.sin_addr;
1081 sc->g_dstport = dst.sin_port;
1082 if (ofp != NULL)
1083 closef(ofp, l);
1084 break;
1085 case SIOCSLIFPHYADDR:
1086 if (lifr->addr.ss_family != AF_INET ||
1087 lifr->dstaddr.ss_family != AF_INET) {
1088 error = EAFNOSUPPORT;
1089 break;
1090 }
1091 if (lifr->addr.ss_len != sizeof(si) ||
1092 lifr->dstaddr.ss_len != sizeof(si)) {
1093 error = EINVAL;
1094 break;
1095 }
1096 sc->g_src = satosin(&lifr->addr)->sin_addr;
1097 sc->g_dst = satosin(&lifr->dstaddr)->sin_addr;
1098 sc->g_srcport = satosin(&lifr->addr)->sin_port;
1099 sc->g_dstport = satosin(&lifr->dstaddr)->sin_port;
1100 goto recompute;
1101 case SIOCDIFPHYADDR:
1102 sc->g_src.s_addr = INADDR_ANY;
1103 sc->g_dst.s_addr = INADDR_ANY;
1104 sc->g_srcport = 0;
1105 sc->g_dstport = 0;
1106 goto recompute;
1107 case SIOCGLIFPHYADDR:
1108 if (sc->g_src.s_addr == INADDR_ANY ||
1109 sc->g_dst.s_addr == INADDR_ANY) {
1110 error = EADDRNOTAVAIL;
1111 break;
1112 }
1113 memset(&si, 0, sizeof(si));
1114 si.sin_family = AF_INET;
1115 si.sin_len = sizeof(struct sockaddr_in);
1116 si.sin_addr = sc->g_src;
1117 if (sc->g_proto == IPPROTO_UDP)
1118 si.sin_port = sc->g_srcport;
1119 memcpy(&lifr->addr, &si, sizeof(si));
1120 si.sin_addr = sc->g_dst;
1121 if (sc->g_proto == IPPROTO_UDP)
1122 si.sin_port = sc->g_dstport;
1123 memcpy(&lifr->dstaddr, &si, sizeof(si));
1124 break;
1125 default:
1126 error = EINVAL;
1127 break;
1128 }
1129 splx(s);
1130 return (error);
1131 }
1132
1133 /*
1134 * computes a route to our destination that is not the one
1135 * which would be taken by ip_output(), as this one will loop back to
1136 * us. If the interface is p2p as a--->b, then a routing entry exists
1137 * If we now send a packet to b (e.g. ping b), this will come down here
1138 * gets src=a, dst=b tacked on and would from ip_output() sent back to
1139 * if_gre.
1140 * Goal here is to compute a route to b that is less specific than
1141 * a-->b. We know that this one exists as in normal operation we have
1142 * at least a default route which matches.
1143 */
1144 static int
1145 gre_compute_route(struct gre_softc *sc)
1146 {
1147 struct route *ro;
1148 u_int32_t a, b, c;
1149
1150 ro = &sc->route;
1151
1152 memset(ro, 0, sizeof(struct route));
1153 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
1154 ro->ro_dst.sa_family = AF_INET;
1155 ro->ro_dst.sa_len = sizeof(ro->ro_dst);
1156
1157 /*
1158 * toggle last bit, so our interface is not found, but a less
1159 * specific route. I'd rather like to specify a shorter mask,
1160 * but this is not possible. Should work though. XXX
1161 * there is a simpler way ...
1162 */
1163 if ((sc->sc_if.if_flags & IFF_LINK1) == 0) {
1164 a = ntohl(sc->g_dst.s_addr);
1165 b = a & 0x01;
1166 c = a & 0xfffffffe;
1167 b = b ^ 0x01;
1168 a = b | c;
1169 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr
1170 = htonl(a);
1171 }
1172
1173 #ifdef DIAGNOSTIC
1174 printf("%s: searching for a route to %s", sc->sc_if.if_xname,
1175 inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
1176 #endif
1177
1178 rtalloc(ro);
1179
1180 /*
1181 * check if this returned a route at all and this route is no
1182 * recursion to ourself
1183 */
1184 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
1185 #ifdef DIAGNOSTIC
1186 if (ro->ro_rt == NULL)
1187 printf(" - no route found!\n");
1188 else
1189 printf(" - route loops back to ourself!\n");
1190 #endif
1191 return EADDRNOTAVAIL;
1192 }
1193
1194 /*
1195 * now change it back - else ip_output will just drop
1196 * the route and search one to this interface ...
1197 */
1198 if ((sc->sc_if.if_flags & IFF_LINK1) == 0)
1199 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
1200
1201 #ifdef DIAGNOSTIC
1202 printf(", choosing %s with gateway %s\n", ro->ro_rt->rt_ifp->if_xname,
1203 inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
1204 #endif
1205
1206 return 0;
1207 }
1208
1209 /*
1210 * do a checksum of a buffer - much like in_cksum, which operates on
1211 * mbufs.
1212 */
1213 u_int16_t
1214 gre_in_cksum(u_int16_t *p, u_int len)
1215 {
1216 u_int32_t sum = 0;
1217 int nwords = len >> 1;
1218
1219 while (nwords-- != 0)
1220 sum += *p++;
1221
1222 if (len & 1) {
1223 union {
1224 u_short w;
1225 u_char c[2];
1226 } u;
1227 u.c[0] = *(u_char *)p;
1228 u.c[1] = 0;
1229 sum += u.w;
1230 }
1231
1232 /* end-around-carry */
1233 sum = (sum >> 16) + (sum & 0xffff);
1234 sum += (sum >> 16);
1235 return (~sum);
1236 }
1237 #endif
1238
1239 void greattach(int);
1240
1241 /* ARGSUSED */
1242 void
1243 greattach(int count __unused)
1244 {
1245 #ifdef INET
1246 LIST_INIT(&gre_softc_list);
1247 if_clone_attach(&gre_cloner);
1248 #endif
1249 }
1250