if_gre.c revision 1.83 1 /* $NetBSD: if_gre.c,v 1.83 2007/01/26 19:32:32 dyoung Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * IPv6-over-GRE contributed by Gert Doering <gert (at) greenie.muc.de>
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*
42 * Encapsulate L3 protocols into IP
43 * See RFC 1701 and 1702 for more details.
44 * If_gre is compatible with Cisco GRE tunnels, so you can
45 * have a NetBSD box as the other end of a tunnel interface of a Cisco
46 * router. See gre(4) for more details.
47 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
48 */
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.83 2007/01/26 19:32:32 dyoung Exp $");
52
53 #include "opt_gre.h"
54 #include "opt_inet.h"
55 #include "bpfilter.h"
56
57 #ifdef INET
58 #include <sys/param.h>
59 #include <sys/file.h>
60 #include <sys/filedesc.h>
61 #include <sys/malloc.h>
62 #include <sys/mbuf.h>
63 #include <sys/proc.h>
64 #include <sys/protosw.h>
65 #include <sys/socket.h>
66 #include <sys/socketvar.h>
67 #include <sys/ioctl.h>
68 #include <sys/queue.h>
69 #if __NetBSD__
70 #include <sys/systm.h>
71 #include <sys/sysctl.h>
72 #include <sys/kauth.h>
73 #endif
74
75 #include <sys/kthread.h>
76
77 #include <machine/cpu.h>
78
79 #include <net/ethertypes.h>
80 #include <net/if.h>
81 #include <net/if_types.h>
82 #include <net/netisr.h>
83 #include <net/route.h>
84
85 #ifdef INET
86 #include <netinet/in.h>
87 #include <netinet/in_systm.h>
88 #include <netinet/in_var.h>
89 #include <netinet/ip.h>
90 #include <netinet/ip_var.h>
91 #else
92 #error "Huh? if_gre without inet?"
93 #endif
94
95
96 #ifdef NETATALK
97 #include <netatalk/at.h>
98 #include <netatalk/at_var.h>
99 #include <netatalk/at_extern.h>
100 #endif
101
102 #if NBPFILTER > 0
103 #include <sys/time.h>
104 #include <net/bpf.h>
105 #endif
106
107 #include <net/if_gre.h>
108
109 /*
110 * It is not easy to calculate the right value for a GRE MTU.
111 * We leave this task to the admin and use the same default that
112 * other vendors use.
113 */
114 #define GREMTU 1476
115
116 #ifdef GRE_DEBUG
117 #define GRE_DPRINTF(__sc, __fmt, ...) \
118 do { \
119 if (((__sc)->sc_if.if_flags & IFF_DEBUG) != 0) \
120 printf(__fmt, __VA_ARGS__); \
121 } while (/*CONSTCOND*/0)
122 #else
123 #define GRE_DPRINTF(__sc, __fmt, ...) do { } while (/*CONSTCOND*/0)
124 #endif /* GRE_DEBUG */
125
126 struct gre_softc_head gre_softc_list;
127 int ip_gre_ttl = GRE_TTL;
128
129 static int gre_clone_create(struct if_clone *, int);
130 static int gre_clone_destroy(struct ifnet *);
131
132 static struct if_clone gre_cloner =
133 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
134
135 static int gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
136 struct rtentry *);
137 static int gre_ioctl(struct ifnet *, u_long, caddr_t);
138
139 static int gre_compute_route(struct gre_softc *sc);
140
141 static int gre_getsockname(struct socket *, struct mbuf *, struct lwp *);
142 static int gre_getpeername(struct socket *, struct mbuf *, struct lwp *);
143 static int gre_getnames(struct socket *, struct lwp *, struct sockaddr_in *,
144 struct sockaddr_in *);
145
146 static void
147 gre_stop(volatile int *running)
148 {
149 *running = 0;
150 wakeup(running);
151 }
152
153 static void
154 gre_join(volatile int *running)
155 {
156 int s;
157
158 s = splnet();
159 while (*running != 0) {
160 splx(s);
161 tsleep(running, PSOCK, "grejoin", 0);
162 s = splnet();
163 }
164 splx(s);
165 }
166
167 static void
168 gre_wakeup(struct gre_softc *sc)
169 {
170 GRE_DPRINTF(sc, "%s: enter\n", __func__);
171 sc->sc_waitchan = 1;
172 wakeup(&sc->sc_waitchan);
173 }
174
175 static int
176 gre_clone_create(struct if_clone *ifc, int unit)
177 {
178 struct gre_softc *sc;
179
180 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
181 memset(sc, 0, sizeof(struct gre_softc));
182
183 snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d",
184 ifc->ifc_name, unit);
185 sc->sc_if.if_softc = sc;
186 sc->sc_if.if_type = IFT_TUNNEL;
187 sc->sc_if.if_addrlen = 0;
188 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
189 sc->sc_if.if_dlt = DLT_NULL;
190 sc->sc_if.if_mtu = GREMTU;
191 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
192 sc->sc_if.if_output = gre_output;
193 sc->sc_if.if_ioctl = gre_ioctl;
194 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
195 sc->g_dstport = sc->g_srcport = 0;
196 sc->sc_proto = IPPROTO_GRE;
197 sc->sc_snd.ifq_maxlen = 256;
198 sc->sc_if.if_flags |= IFF_LINK0;
199 if_attach(&sc->sc_if);
200 if_alloc_sadl(&sc->sc_if);
201 #if NBPFILTER > 0
202 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
203 #endif
204 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
205 return (0);
206 }
207
208 static int
209 gre_clone_destroy(struct ifnet *ifp)
210 {
211 int s;
212 struct gre_softc *sc = ifp->if_softc;
213
214 LIST_REMOVE(sc, sc_list);
215 #if NBPFILTER > 0
216 bpfdetach(ifp);
217 #endif
218 s = splnet();
219 ifp->if_flags &= ~IFF_UP;
220 gre_wakeup(sc);
221 splx(s);
222 gre_join(&sc->sc_thread);
223 s = splnet();
224 if_detach(ifp);
225 splx(s);
226 if (sc->sc_fp != NULL) {
227 closef(sc->sc_fp, curlwp);
228 sc->sc_fp = NULL;
229 }
230 free(sc, M_DEVBUF);
231
232 return (0);
233 }
234
235 static void
236 gre_receive(struct socket *so, caddr_t arg, int waitflag)
237 {
238 struct gre_softc *sc = (struct gre_softc *)arg;
239
240 GRE_DPRINTF(sc, "%s: enter\n", __func__);
241
242 gre_wakeup(sc);
243 }
244
245 static void
246 gre_upcall_add(struct socket *so, caddr_t arg)
247 {
248 /* XXX What if the kernel already set an upcall? */
249 so->so_upcallarg = arg;
250 so->so_upcall = gre_receive;
251 so->so_rcv.sb_flags |= SB_UPCALL;
252 }
253
254 static void
255 gre_upcall_remove(struct socket *so)
256 {
257 /* XXX What if the kernel already set an upcall? */
258 so->so_rcv.sb_flags &= ~SB_UPCALL;
259 so->so_upcallarg = NULL;
260 so->so_upcall = NULL;
261 }
262
263 static void
264 gre_sodestroy(struct socket **sop)
265 {
266 gre_upcall_remove(*sop);
267 soshutdown(*sop, SHUT_RDWR);
268 soclose(*sop);
269 *sop = NULL;
270 }
271
272 static struct mbuf *
273 gre_getsockmbuf(struct socket *so)
274 {
275 struct mbuf *m;
276
277 m = m_get(M_WAIT, MT_SONAME);
278 if (m != NULL)
279 MCLAIM(m, so->so_mowner);
280 return m;
281 }
282
283 static int
284 gre_socreate1(struct gre_softc *sc, struct lwp *l, struct gre_soparm *sp,
285 struct socket **sop)
286 {
287 int rc;
288 struct mbuf *m;
289 struct sockaddr_in *sin;
290 struct socket *so;
291
292 GRE_DPRINTF(sc, "%s: enter\n", __func__);
293 rc = socreate(AF_INET, sop, SOCK_DGRAM, IPPROTO_UDP, l);
294 if (rc != 0) {
295 GRE_DPRINTF(sc, "%s: socreate failed\n", __func__);
296 return rc;
297 }
298
299 so = *sop;
300
301 gre_upcall_add(so, (caddr_t)sc);
302 if ((m = gre_getsockmbuf(so)) == NULL) {
303 rc = ENOBUFS;
304 goto out;
305 }
306 sin = mtod(m, struct sockaddr_in *);
307 sin->sin_len = m->m_len = sizeof(struct sockaddr_in);
308 sin->sin_family = AF_INET;
309 sin->sin_addr = sc->g_src;
310 sin->sin_port = sc->g_srcport;
311
312 GRE_DPRINTF(sc, "%s: bind 0x%08" PRIx32 " port %d\n", __func__,
313 sin->sin_addr.s_addr, ntohs(sin->sin_port));
314 if ((rc = sobind(so, m, l)) != 0) {
315 GRE_DPRINTF(sc, "%s: sobind failed\n", __func__);
316 goto out;
317 }
318
319 if (sc->g_srcport == 0) {
320 if ((rc = gre_getsockname(so, m, l)) != 0) {
321 GRE_DPRINTF(sc, "%s: gre_getsockname failed\n",
322 __func__);
323 goto out;
324 }
325 sc->g_srcport = sin->sin_port;
326 }
327
328 sin->sin_addr = sc->g_dst;
329 sin->sin_port = sc->g_dstport;
330
331 if ((rc = soconnect(so, m, l)) != 0) {
332 GRE_DPRINTF(sc, "%s: soconnect failed\n", __func__);
333 goto out;
334 }
335
336 *mtod(m, int *) = ip_gre_ttl;
337 m->m_len = sizeof(int);
338 rc = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, IPPROTO_IP, IP_TTL,
339 &m);
340 m = NULL;
341 if (rc != 0) {
342 printf("%s: setopt ttl failed\n", __func__);
343 rc = 0;
344 }
345 out:
346 m_freem(m);
347
348 if (rc != 0)
349 gre_sodestroy(sop);
350 else
351 *sp = sc->sc_soparm;
352
353 return rc;
354 }
355
356 static void
357 gre_thread1(struct gre_softc *sc, struct lwp *l)
358 {
359 int flags, rc, s;
360 const struct gre_h *gh;
361 struct ifnet *ifp = &sc->sc_if;
362 struct mbuf *m;
363 struct socket *so = NULL;
364 struct uio uio;
365 struct gre_soparm sp;
366
367 GRE_DPRINTF(sc, "%s: enter\n", __func__);
368 s = splnet();
369
370 sc->sc_waitchan = 1;
371
372 memset(&sp, 0, sizeof(sp));
373 memset(&uio, 0, sizeof(uio));
374
375 ifp->if_flags |= IFF_RUNNING;
376
377 for (;;) {
378 while (sc->sc_waitchan == 0) {
379 splx(s);
380 GRE_DPRINTF(sc, "%s: sleeping\n", __func__);
381 tsleep(&sc->sc_waitchan, PSOCK, "grewait", 0);
382 s = splnet();
383 }
384 sc->sc_waitchan = 0;
385 GRE_DPRINTF(sc, "%s: awake\n", __func__);
386 if ((ifp->if_flags & IFF_UP) != IFF_UP) {
387 GRE_DPRINTF(sc, "%s: not up & running; exiting\n",
388 __func__);
389 break;
390 }
391 if (sc->sc_proto != IPPROTO_UDP) {
392 GRE_DPRINTF(sc, "%s: not udp; exiting\n", __func__);
393 break;
394 }
395 /* XXX optimize */
396 if (so == NULL || memcmp(&sp, &sc->sc_soparm, sizeof(sp)) != 0){
397 GRE_DPRINTF(sc, "%s: parameters changed\n", __func__);
398
399 if (sp.sp_fp != NULL) {
400 FILE_UNUSE(sp.sp_fp, NULL);
401 sp.sp_fp = NULL;
402 so = NULL;
403 } else if (so != NULL)
404 gre_sodestroy(&so);
405
406 if (sc->sc_fp != NULL) {
407 so = (struct socket *)sc->sc_fp->f_data;
408 gre_upcall_add(so, (caddr_t)sc);
409 sp = sc->sc_soparm;
410 FILE_USE(sp.sp_fp);
411 } else if (gre_socreate1(sc, l, &sp, &so) != 0)
412 goto out;
413 }
414 for (;;) {
415 flags = MSG_DONTWAIT;
416 uio.uio_resid = 1000000;
417 rc = (*so->so_receive)(so, NULL, &uio, &m, NULL,
418 &flags);
419 /* TBD Back off if ECONNREFUSED (indicates
420 * ICMP Port Unreachable)?
421 */
422 if (rc == EWOULDBLOCK) {
423 GRE_DPRINTF(sc, "%s: so_receive EWOULDBLOCK\n",
424 __func__);
425 break;
426 } else if (rc != 0 || m == NULL) {
427 GRE_DPRINTF(sc, "%s: rc %d m %p\n",
428 ifp->if_xname, rc, (void *)m);
429 continue;
430 } else
431 GRE_DPRINTF(sc, "%s: so_receive ok\n",
432 __func__);
433 if (m->m_len < sizeof(*gh) &&
434 (m = m_pullup(m, sizeof(*gh))) == NULL) {
435 GRE_DPRINTF(sc, "%s: m_pullup failed\n",
436 __func__);
437 continue;
438 }
439 gh = mtod(m, const struct gre_h *);
440
441 if (gre_input3(sc, m, 0, IPPROTO_GRE, gh) == 0) {
442 GRE_DPRINTF(sc, "%s: dropping unsupported\n",
443 __func__);
444 ifp->if_ierrors++;
445 m_freem(m);
446 }
447 }
448 for (;;) {
449 IF_DEQUEUE(&sc->sc_snd, m);
450 if (m == NULL)
451 break;
452 GRE_DPRINTF(sc, "%s: dequeue\n", __func__);
453 if ((so->so_state & SS_ISCONNECTED) == 0) {
454 GRE_DPRINTF(sc, "%s: not connected\n",
455 __func__);
456 m_freem(m);
457 continue;
458 }
459 rc = (*so->so_send)(so, NULL, NULL, m, NULL, 0, l);
460 /* XXX handle ENOBUFS? */
461 if (rc != 0)
462 GRE_DPRINTF(sc, "%s: so_send failed\n",
463 __func__);
464 }
465 /* Give the software interrupt queues a chance to
466 * run, or else when I send a ping from gre0 to gre1 on
467 * the same host, gre0 will not wake for the reply.
468 */
469 splx(s);
470 s = splnet();
471 }
472 if (sp.sp_fp != NULL) {
473 GRE_DPRINTF(sc, "%s: removing upcall\n", __func__);
474 gre_upcall_remove(so);
475 FILE_UNUSE(sp.sp_fp, NULL);
476 sp.sp_fp = NULL;
477 } else if (so != NULL)
478 gre_sodestroy(&so);
479 out:
480 GRE_DPRINTF(sc, "%s: stopping\n", __func__);
481 if (sc->sc_proto == IPPROTO_UDP)
482 ifp->if_flags &= ~IFF_RUNNING;
483 while (!IF_IS_EMPTY(&sc->sc_snd)) {
484 IF_DEQUEUE(&sc->sc_snd, m);
485 m_freem(m);
486 }
487 gre_stop(&sc->sc_thread);
488 /* must not touch sc after this! */
489 GRE_DPRINTF(sc, "%s: restore ipl\n", __func__);
490 splx(s);
491 }
492
493 static void
494 gre_thread(void *arg)
495 {
496 struct gre_softc *sc = (struct gre_softc *)arg;
497
498 gre_thread1(sc, curlwp);
499 /* must not touch sc after this! */
500 kthread_exit(0);
501 }
502
503 int
504 gre_input3(struct gre_softc *sc, struct mbuf *m, int hlen, u_char proto,
505 const struct gre_h *gh)
506 {
507 u_int16_t flags;
508 #if NBPFILTER > 0
509 u_int32_t af = AF_INET; /* af passed to BPF tap */
510 #endif
511 int s, isr;
512 struct ifqueue *ifq;
513
514 sc->sc_if.if_ipackets++;
515 sc->sc_if.if_ibytes += m->m_pkthdr.len;
516
517 switch (proto) {
518 case IPPROTO_GRE:
519 hlen += sizeof(struct gre_h);
520
521 /* process GRE flags as packet can be of variable len */
522 flags = ntohs(gh->flags);
523
524 /* Checksum & Offset are present */
525 if ((flags & GRE_CP) | (flags & GRE_RP))
526 hlen += 4;
527 /* We don't support routing fields (variable length) */
528 if (flags & GRE_RP)
529 return (0);
530 if (flags & GRE_KP)
531 hlen += 4;
532 if (flags & GRE_SP)
533 hlen += 4;
534
535 switch (ntohs(gh->ptype)) { /* ethertypes */
536 case ETHERTYPE_IP: /* shouldn't need a schednetisr(), as */
537 ifq = &ipintrq; /* we are in ip_input */
538 isr = NETISR_IP;
539 break;
540 #ifdef NETATALK
541 case ETHERTYPE_ATALK:
542 ifq = &atintrq1;
543 isr = NETISR_ATALK;
544 #if NBPFILTER > 0
545 af = AF_APPLETALK;
546 #endif
547 break;
548 #endif
549 #ifdef INET6
550 case ETHERTYPE_IPV6:
551 GRE_DPRINTF(sc, "%s: IPv6 packet\n", __func__);
552 ifq = &ip6intrq;
553 isr = NETISR_IPV6;
554 #if NBPFILTER > 0
555 af = AF_INET6;
556 #endif
557 break;
558 #endif
559 default: /* others not yet supported */
560 printf("%s: unhandled ethertype 0x%04x\n", __func__,
561 ntohs(gh->ptype));
562 return (0);
563 }
564 break;
565 default:
566 /* others not yet supported */
567 return (0);
568 }
569
570 if (hlen > m->m_pkthdr.len) {
571 m_freem(m);
572 sc->sc_if.if_ierrors++;
573 return (EINVAL);
574 }
575 m_adj(m, hlen);
576
577 #if NBPFILTER > 0
578 if (sc->sc_if.if_bpf != NULL)
579 bpf_mtap_af(sc->sc_if.if_bpf, af, m);
580 #endif /*NBPFILTER > 0*/
581
582 m->m_pkthdr.rcvif = &sc->sc_if;
583
584 s = splnet(); /* possible */
585 if (IF_QFULL(ifq)) {
586 IF_DROP(ifq);
587 m_freem(m);
588 } else {
589 IF_ENQUEUE(ifq, m);
590 }
591 /* we need schednetisr since the address family may change */
592 schednetisr(isr);
593 splx(s);
594
595 return (1); /* packet is done, no further processing needed */
596 }
597
598 /*
599 * The output routine. Takes a packet and encapsulates it in the protocol
600 * given by sc->sc_proto. See also RFC 1701 and RFC 2004
601 */
602 static int
603 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
604 struct rtentry *rt)
605 {
606 int error = 0, hlen;
607 struct gre_softc *sc = ifp->if_softc;
608 struct greip *gi;
609 struct gre_h *gh;
610 struct ip *eip, *ip;
611 u_int8_t ip_tos = 0;
612 u_int16_t etype = 0;
613 struct mobile_h mob_h;
614
615 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == 0 ||
616 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
617 m_freem(m);
618 error = ENETDOWN;
619 goto end;
620 }
621
622 gi = NULL;
623 ip = NULL;
624
625 #if NBPFILTER >0
626 if (ifp->if_bpf)
627 bpf_mtap_af(ifp->if_bpf, dst->sa_family, m);
628 #endif
629
630 m->m_flags &= ~(M_BCAST|M_MCAST);
631
632 switch (sc->sc_proto) {
633 case IPPROTO_MOBILE:
634 if (dst->sa_family == AF_INET) {
635 int msiz;
636
637 if (M_UNWRITABLE(m, sizeof(*ip)) &&
638 (m = m_pullup(m, sizeof(*ip))) == NULL) {
639 error = ENOBUFS;
640 goto end;
641 }
642 ip = mtod(m, struct ip *);
643
644 memset(&mob_h, 0, MOB_H_SIZ_L);
645 mob_h.proto = (ip->ip_p) << 8;
646 mob_h.odst = ip->ip_dst.s_addr;
647 ip->ip_dst.s_addr = sc->g_dst.s_addr;
648
649 /*
650 * If the packet comes from our host, we only change
651 * the destination address in the IP header.
652 * Else we also need to save and change the source
653 */
654 if (in_hosteq(ip->ip_src, sc->g_src)) {
655 msiz = MOB_H_SIZ_S;
656 } else {
657 mob_h.proto |= MOB_H_SBIT;
658 mob_h.osrc = ip->ip_src.s_addr;
659 ip->ip_src.s_addr = sc->g_src.s_addr;
660 msiz = MOB_H_SIZ_L;
661 }
662 HTONS(mob_h.proto);
663 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
664
665 M_PREPEND(m, msiz, M_DONTWAIT);
666 if (m == NULL) {
667 error = ENOBUFS;
668 goto end;
669 }
670 /* XXX Assuming that ip does not dangle after
671 * M_PREPEND. In practice, that's true, but
672 * that's in M_PREPEND's contract.
673 */
674 memmove(mtod(m, caddr_t), ip, sizeof(*ip));
675 ip = mtod(m, struct ip *);
676 memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
677 ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
678 } else { /* AF_INET */
679 IF_DROP(&ifp->if_snd);
680 m_freem(m);
681 error = EINVAL;
682 goto end;
683 }
684 break;
685 case IPPROTO_UDP:
686 case IPPROTO_GRE:
687 GRE_DPRINTF(sc, "%s: dst->sa_family=%d\n", __func__,
688 dst->sa_family);
689 switch (dst->sa_family) {
690 case AF_INET:
691 ip = mtod(m, struct ip *);
692 ip_tos = ip->ip_tos;
693 etype = ETHERTYPE_IP;
694 break;
695 #ifdef NETATALK
696 case AF_APPLETALK:
697 etype = ETHERTYPE_ATALK;
698 break;
699 #endif
700 #ifdef INET6
701 case AF_INET6:
702 etype = ETHERTYPE_IPV6;
703 break;
704 #endif
705 default:
706 IF_DROP(&ifp->if_snd);
707 m_freem(m);
708 error = EAFNOSUPPORT;
709 goto end;
710 }
711 break;
712 default:
713 IF_DROP(&ifp->if_snd);
714 m_freem(m);
715 error = EINVAL;
716 goto end;
717 }
718
719 switch (sc->sc_proto) {
720 case IPPROTO_GRE:
721 hlen = sizeof(struct greip);
722 break;
723 case IPPROTO_UDP:
724 hlen = sizeof(struct gre_h);
725 break;
726 default:
727 hlen = 0;
728 break;
729 }
730
731 M_PREPEND(m, hlen, M_DONTWAIT);
732
733 if (m == NULL) {
734 IF_DROP(&ifp->if_snd);
735 error = ENOBUFS;
736 goto end;
737 }
738
739 switch (sc->sc_proto) {
740 case IPPROTO_UDP:
741 gh = mtod(m, struct gre_h *);
742 memset(gh, 0, sizeof(*gh));
743 gh->ptype = htons(etype);
744 /* XXX Need to handle IP ToS. Look at how I handle IP TTL. */
745 break;
746 case IPPROTO_GRE:
747 gi = mtod(m, struct greip *);
748 gh = &gi->gi_g;
749 eip = &gi->gi_i;
750 /* we don't have any GRE flags for now */
751 memset(gh, 0, sizeof(*gh));
752 gh->ptype = htons(etype);
753 eip->ip_src = sc->g_src;
754 eip->ip_dst = sc->g_dst;
755 eip->ip_hl = (sizeof(struct ip)) >> 2;
756 eip->ip_ttl = ip_gre_ttl;
757 eip->ip_tos = ip_tos;
758 eip->ip_len = htons(m->m_pkthdr.len);
759 eip->ip_p = sc->sc_proto;
760 break;
761 case IPPROTO_MOBILE:
762 eip = mtod(m, struct ip *);
763 eip->ip_p = sc->sc_proto;
764 break;
765 default:
766 error = EPROTONOSUPPORT;
767 m_freem(m);
768 goto end;
769 }
770
771 ifp->if_opackets++;
772 ifp->if_obytes += m->m_pkthdr.len;
773
774 /* send it off */
775 if (sc->sc_proto == IPPROTO_UDP) {
776 if (IF_QFULL(&sc->sc_snd)) {
777 IF_DROP(&sc->sc_snd);
778 error = ENOBUFS;
779 m_freem(m);
780 } else {
781 IF_ENQUEUE(&sc->sc_snd, m);
782 gre_wakeup(sc);
783 error = 0;
784 }
785 goto end;
786 }
787 rtcache_check(&sc->route);
788 if (sc->route.ro_rt == NULL)
789 goto end;
790 if (sc->route.ro_rt->rt_ifp->if_softc == sc)
791 rtcache_free(&sc->route);
792 else
793 error = ip_output(m, NULL, &sc->route, 0,
794 (struct ip_moptions *)NULL, (struct socket *)NULL);
795 end:
796 if (error)
797 ifp->if_oerrors++;
798 return (error);
799 }
800
801 /* gre_kick must be synchronized with network interrupts in order
802 * to synchronize access to gre_softc members, so call it with
803 * interrupt priority level set to IPL_NET or greater.
804 */
805 static int
806 gre_kick(struct gre_softc *sc)
807 {
808 int rc;
809 struct ifnet *ifp = &sc->sc_if;
810
811 if (sc->sc_proto == IPPROTO_UDP && (ifp->if_flags & IFF_UP) == IFF_UP &&
812 !sc->sc_thread) {
813 sc->sc_thread = 1;
814 rc = kthread_create1(gre_thread, (void *)sc, NULL,
815 ifp->if_xname);
816 if (rc != 0)
817 gre_stop(&sc->sc_thread);
818 return rc;
819 } else {
820 gre_wakeup(sc);
821 return 0;
822 }
823 }
824
825 static int
826 gre_getname(struct socket *so, int req, struct mbuf *nam, struct lwp *l)
827 {
828 int s, error;
829
830 s = splsoftnet();
831 error = (*so->so_proto->pr_usrreq)(so, req, (struct mbuf *)0,
832 nam, (struct mbuf *)0, l);
833 splx(s);
834 return error;
835 }
836
837 static int
838 gre_getsockname(struct socket *so, struct mbuf *nam, struct lwp *l)
839 {
840 return gre_getname(so, PRU_SOCKADDR, nam, l);
841 }
842
843 static int
844 gre_getpeername(struct socket *so, struct mbuf *nam, struct lwp *l)
845 {
846 return gre_getname(so, PRU_PEERADDR, nam, l);
847 }
848
849 static int
850 gre_getnames(struct socket *so, struct lwp *l, struct sockaddr_in *src,
851 struct sockaddr_in *dst)
852 {
853 struct mbuf *m;
854 struct sockaddr_in *sin;
855 int rc;
856
857 if ((m = gre_getsockmbuf(so)) == NULL)
858 return ENOBUFS;
859
860 sin = mtod(m, struct sockaddr_in *);
861
862 if ((rc = gre_getsockname(so, m, l)) != 0)
863 goto out;
864 if (sin->sin_family != AF_INET) {
865 rc = EAFNOSUPPORT;
866 goto out;
867 }
868 *src = *sin;
869
870 if ((rc = gre_getpeername(so, m, l)) != 0)
871 goto out;
872 if (sin->sin_family != AF_INET) {
873 rc = EAFNOSUPPORT;
874 goto out;
875 }
876 *dst = *sin;
877
878 out:
879 m_freem(m);
880 return rc;
881 }
882
883 static int
884 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
885 {
886 u_char oproto;
887 struct file *fp, *ofp;
888 struct socket *so;
889 struct sockaddr_in dst, src;
890 struct proc *p = curproc; /* XXX */
891 struct lwp *l = curlwp; /* XXX */
892 struct ifreq *ifr = (struct ifreq *)data;
893 struct if_laddrreq *lifr = (struct if_laddrreq *)data;
894 struct gre_softc *sc = ifp->if_softc;
895 int s;
896 struct sockaddr_in si;
897 struct sockaddr *sa = NULL;
898 int error = 0;
899
900 switch (cmd) {
901 case SIOCSIFFLAGS:
902 case SIOCSIFMTU:
903 case GRESPROTO:
904 case GRESADDRD:
905 case GRESADDRS:
906 case GRESSOCK:
907 case GREDSOCK:
908 case SIOCSLIFPHYADDR:
909 case SIOCDIFPHYADDR:
910 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
911 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
912 NULL) != 0)
913 return (EPERM);
914 break;
915 default:
916 break;
917 }
918
919 s = splnet();
920 switch (cmd) {
921 case SIOCSIFADDR:
922 ifp->if_flags |= IFF_UP;
923 if ((error = gre_kick(sc)) != 0)
924 ifp->if_flags &= ~IFF_UP;
925 break;
926 case SIOCSIFDSTADDR:
927 break;
928 case SIOCSIFFLAGS:
929 oproto = sc->sc_proto;
930 switch (ifr->ifr_flags & (IFF_LINK0|IFF_LINK2)) {
931 case IFF_LINK0|IFF_LINK2:
932 sc->sc_proto = IPPROTO_UDP;
933 if (oproto != IPPROTO_UDP)
934 ifp->if_flags &= ~IFF_RUNNING;
935 error = gre_kick(sc);
936 break;
937 case IFF_LINK0:
938 sc->sc_proto = IPPROTO_GRE;
939 gre_wakeup(sc);
940 goto recompute;
941 case 0:
942 sc->sc_proto = IPPROTO_MOBILE;
943 gre_wakeup(sc);
944 goto recompute;
945 }
946 break;
947 case SIOCSIFMTU:
948 if (ifr->ifr_mtu < 576) {
949 error = EINVAL;
950 break;
951 }
952 ifp->if_mtu = ifr->ifr_mtu;
953 break;
954 case SIOCGIFMTU:
955 ifr->ifr_mtu = sc->sc_if.if_mtu;
956 break;
957 case SIOCADDMULTI:
958 case SIOCDELMULTI:
959 if (ifr == 0) {
960 error = EAFNOSUPPORT;
961 break;
962 }
963 switch (ifr->ifr_addr.sa_family) {
964 #ifdef INET
965 case AF_INET:
966 break;
967 #endif
968 #ifdef INET6
969 case AF_INET6:
970 break;
971 #endif
972 default:
973 error = EAFNOSUPPORT;
974 break;
975 }
976 break;
977 case GRESPROTO:
978 oproto = sc->sc_proto;
979 sc->sc_proto = ifr->ifr_flags;
980 switch (sc->sc_proto) {
981 case IPPROTO_UDP:
982 ifp->if_flags |= IFF_LINK0|IFF_LINK2;
983 if (oproto != IPPROTO_UDP)
984 ifp->if_flags &= ~IFF_RUNNING;
985 error = gre_kick(sc);
986 break;
987 case IPPROTO_GRE:
988 ifp->if_flags |= IFF_LINK0;
989 ifp->if_flags &= ~IFF_LINK2;
990 goto recompute;
991 case IPPROTO_MOBILE:
992 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK2);
993 goto recompute;
994 default:
995 error = EPROTONOSUPPORT;
996 break;
997 }
998 break;
999 case GREGPROTO:
1000 ifr->ifr_flags = sc->sc_proto;
1001 break;
1002 case GRESADDRS:
1003 case GRESADDRD:
1004 /*
1005 * set tunnel endpoints, compute a less specific route
1006 * to the remote end and mark if as up
1007 */
1008 sa = &ifr->ifr_addr;
1009 if (cmd == GRESADDRS) {
1010 sc->g_src = (satosin(sa))->sin_addr;
1011 sc->g_srcport = satosin(sa)->sin_port;
1012 }
1013 if (cmd == GRESADDRD) {
1014 if (sc->sc_proto == IPPROTO_UDP &&
1015 satosin(sa)->sin_port == 0) {
1016 error = EINVAL;
1017 break;
1018 }
1019 sc->g_dst = (satosin(sa))->sin_addr;
1020 sc->g_dstport = satosin(sa)->sin_port;
1021 }
1022 recompute:
1023 if (sc->sc_proto == IPPROTO_UDP ||
1024 (sc->g_src.s_addr != INADDR_ANY &&
1025 sc->g_dst.s_addr != INADDR_ANY)) {
1026 if (sc->sc_fp != NULL) {
1027 closef(sc->sc_fp, l);
1028 sc->sc_fp = NULL;
1029 }
1030 rtcache_free(&sc->route);
1031 if (sc->sc_proto == IPPROTO_UDP)
1032 error = gre_kick(sc);
1033 else if (gre_compute_route(sc) == 0)
1034 ifp->if_flags |= IFF_RUNNING;
1035 else
1036 ifp->if_flags &= ~IFF_RUNNING;
1037 }
1038 break;
1039 case GREGADDRS:
1040 memset(&si, 0, sizeof(si));
1041 si.sin_family = AF_INET;
1042 si.sin_len = sizeof(struct sockaddr_in);
1043 si.sin_addr.s_addr = sc->g_src.s_addr;
1044 sa = sintosa(&si);
1045 ifr->ifr_addr = *sa;
1046 break;
1047 case GREGADDRD:
1048 memset(&si, 0, sizeof(si));
1049 si.sin_family = AF_INET;
1050 si.sin_len = sizeof(struct sockaddr_in);
1051 si.sin_addr.s_addr = sc->g_dst.s_addr;
1052 sa = sintosa(&si);
1053 ifr->ifr_addr = *sa;
1054 break;
1055 case GREDSOCK:
1056 if (sc->sc_proto != IPPROTO_UDP)
1057 return EINVAL;
1058 if (sc->sc_fp != NULL) {
1059 closef(sc->sc_fp, l);
1060 sc->sc_fp = NULL;
1061 error = gre_kick(sc);
1062 }
1063 break;
1064 case GRESSOCK:
1065 if (sc->sc_proto != IPPROTO_UDP)
1066 return EINVAL;
1067 /* getsock() will FILE_USE() the descriptor for us */
1068 if ((error = getsock(p->p_fd, (int)ifr->ifr_value, &fp)) != 0)
1069 break;
1070 so = (struct socket *)fp->f_data;
1071 if (so->so_type != SOCK_DGRAM) {
1072 FILE_UNUSE(fp, NULL);
1073 error = EINVAL;
1074 break;
1075 }
1076 /* check address */
1077 if ((error = gre_getnames(so, curlwp, &src, &dst)) != 0) {
1078 FILE_UNUSE(fp, NULL);
1079 break;
1080 }
1081
1082 fp->f_count++;
1083
1084 ofp = sc->sc_fp;
1085 sc->sc_fp = fp;
1086 if ((error = gre_kick(sc)) != 0) {
1087 closef(fp, l);
1088 sc->sc_fp = ofp;
1089 break;
1090 }
1091 sc->g_src = src.sin_addr;
1092 sc->g_srcport = src.sin_port;
1093 sc->g_dst = dst.sin_addr;
1094 sc->g_dstport = dst.sin_port;
1095 if (ofp != NULL)
1096 closef(ofp, l);
1097 break;
1098 case SIOCSLIFPHYADDR:
1099 if (lifr->addr.ss_family != AF_INET ||
1100 lifr->dstaddr.ss_family != AF_INET) {
1101 error = EAFNOSUPPORT;
1102 break;
1103 }
1104 if (lifr->addr.ss_len != sizeof(si) ||
1105 lifr->dstaddr.ss_len != sizeof(si)) {
1106 error = EINVAL;
1107 break;
1108 }
1109 sc->g_src = satosin(&lifr->addr)->sin_addr;
1110 sc->g_dst = satosin(&lifr->dstaddr)->sin_addr;
1111 sc->g_srcport = satosin(&lifr->addr)->sin_port;
1112 sc->g_dstport = satosin(&lifr->dstaddr)->sin_port;
1113 goto recompute;
1114 case SIOCDIFPHYADDR:
1115 sc->g_src.s_addr = INADDR_ANY;
1116 sc->g_dst.s_addr = INADDR_ANY;
1117 sc->g_srcport = 0;
1118 sc->g_dstport = 0;
1119 goto recompute;
1120 case SIOCGLIFPHYADDR:
1121 if (sc->g_src.s_addr == INADDR_ANY ||
1122 sc->g_dst.s_addr == INADDR_ANY) {
1123 error = EADDRNOTAVAIL;
1124 break;
1125 }
1126 memset(&si, 0, sizeof(si));
1127 si.sin_family = AF_INET;
1128 si.sin_len = sizeof(struct sockaddr_in);
1129 si.sin_addr = sc->g_src;
1130 if (sc->sc_proto == IPPROTO_UDP)
1131 si.sin_port = sc->g_srcport;
1132 memcpy(&lifr->addr, &si, sizeof(si));
1133 si.sin_addr = sc->g_dst;
1134 if (sc->sc_proto == IPPROTO_UDP)
1135 si.sin_port = sc->g_dstport;
1136 memcpy(&lifr->dstaddr, &si, sizeof(si));
1137 break;
1138 default:
1139 error = EINVAL;
1140 break;
1141 }
1142 splx(s);
1143 return (error);
1144 }
1145
1146 /*
1147 * Compute a route to our destination.
1148 */
1149 static int
1150 gre_compute_route(struct gre_softc *sc)
1151 {
1152 struct route *ro;
1153
1154 ro = &sc->route;
1155
1156 memset(ro, 0, sizeof(struct route));
1157 satosin(&ro->ro_dst)->sin_addr = sc->g_dst;
1158 ro->ro_dst.sa_family = AF_INET;
1159 ro->ro_dst.sa_len = sizeof(ro->ro_dst);
1160
1161 #ifdef DIAGNOSTIC
1162 printf("%s: searching for a route to %s", sc->sc_if.if_xname,
1163 inet_ntoa(satosin(&ro->ro_dst)->sin_addr));
1164 #endif
1165
1166 rtcache_init(ro);
1167
1168 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
1169 #ifdef DIAGNOSTIC
1170 if (ro->ro_rt == NULL)
1171 printf(" - no route found!\n");
1172 else
1173 printf(" - route loops back to ourself!\n");
1174 #endif
1175 rtcache_free(ro);
1176 return EADDRNOTAVAIL;
1177 }
1178
1179 return 0;
1180 }
1181
1182 /*
1183 * do a checksum of a buffer - much like in_cksum, which operates on
1184 * mbufs.
1185 */
1186 u_int16_t
1187 gre_in_cksum(u_int16_t *p, u_int len)
1188 {
1189 u_int32_t sum = 0;
1190 int nwords = len >> 1;
1191
1192 while (nwords-- != 0)
1193 sum += *p++;
1194
1195 if (len & 1) {
1196 union {
1197 u_short w;
1198 u_char c[2];
1199 } u;
1200 u.c[0] = *(u_char *)p;
1201 u.c[1] = 0;
1202 sum += u.w;
1203 }
1204
1205 /* end-around-carry */
1206 sum = (sum >> 16) + (sum & 0xffff);
1207 sum += (sum >> 16);
1208 return (~sum);
1209 }
1210 #endif
1211
1212 void greattach(int);
1213
1214 /* ARGSUSED */
1215 void
1216 greattach(int count)
1217 {
1218 #ifdef INET
1219 LIST_INIT(&gre_softc_list);
1220 if_clone_attach(&gre_cloner);
1221 #endif
1222 }
1223