if_gre.c revision 1.78 1 /* $NetBSD: if_gre.c,v 1.78 2006/12/04 02:40:15 dyoung Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * IPv6-over-GRE contributed by Gert Doering <gert (at) greenie.muc.de>
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*
42 * Encapsulate L3 protocols into IP
43 * See RFC 1701 and 1702 for more details.
44 * If_gre is compatible with Cisco GRE tunnels, so you can
45 * have a NetBSD box as the other end of a tunnel interface of a Cisco
46 * router. See gre(4) for more details.
47 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
48 */
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.78 2006/12/04 02:40:15 dyoung Exp $");
52
53 #include "opt_gre.h"
54 #include "opt_inet.h"
55 #include "bpfilter.h"
56
57 #ifdef INET
58 #include <sys/param.h>
59 #include <sys/file.h>
60 #include <sys/filedesc.h>
61 #include <sys/malloc.h>
62 #include <sys/mbuf.h>
63 #include <sys/proc.h>
64 #include <sys/protosw.h>
65 #include <sys/socket.h>
66 #include <sys/socketvar.h>
67 #include <sys/ioctl.h>
68 #include <sys/queue.h>
69 #if __NetBSD__
70 #include <sys/systm.h>
71 #include <sys/sysctl.h>
72 #include <sys/kauth.h>
73 #endif
74
75 #include <sys/kthread.h>
76
77 #include <machine/cpu.h>
78
79 #include <net/ethertypes.h>
80 #include <net/if.h>
81 #include <net/if_types.h>
82 #include <net/netisr.h>
83 #include <net/route.h>
84
85 #ifdef INET
86 #include <netinet/in.h>
87 #include <netinet/in_systm.h>
88 #include <netinet/in_var.h>
89 #include <netinet/ip.h>
90 #include <netinet/ip_var.h>
91 #else
92 #error "Huh? if_gre without inet?"
93 #endif
94
95
96 #ifdef NETATALK
97 #include <netatalk/at.h>
98 #include <netatalk/at_var.h>
99 #include <netatalk/at_extern.h>
100 #endif
101
102 #if NBPFILTER > 0
103 #include <sys/time.h>
104 #include <net/bpf.h>
105 #endif
106
107 #include <net/if_gre.h>
108
109 /*
110 * It is not easy to calculate the right value for a GRE MTU.
111 * We leave this task to the admin and use the same default that
112 * other vendors use.
113 */
114 #define GREMTU 1476
115
116 #ifdef GRE_DEBUG
117 #define GRE_DPRINTF(__sc, __fmt, ...) \
118 do { \
119 if (((__sc)->sc_if.if_flags & IFF_DEBUG) != 0) \
120 printf(__fmt, __VA_ARGS__); \
121 } while (/*CONSTCOND*/0)
122 #else
123 #define GRE_DPRINTF(__sc, __fmt, ...) do { } while (/*CONSTCOND*/0)
124 #endif /* GRE_DEBUG */
125
126 struct gre_softc_head gre_softc_list;
127 int ip_gre_ttl = GRE_TTL;
128
129 static int gre_clone_create(struct if_clone *, int);
130 static int gre_clone_destroy(struct ifnet *);
131
132 static struct if_clone gre_cloner =
133 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
134
135 static int gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
136 struct rtentry *);
137 static int gre_ioctl(struct ifnet *, u_long, caddr_t);
138
139 static int gre_compute_route(struct gre_softc *sc);
140 static int gre_update_route(struct gre_softc *sc);
141
142 static int gre_getsockname(struct socket *, struct mbuf *, struct lwp *);
143 static int gre_getpeername(struct socket *, struct mbuf *, struct lwp *);
144 static int gre_getnames(struct socket *, struct lwp *, struct sockaddr_in *,
145 struct sockaddr_in *);
146
147 static void
148 gre_stop(int *running)
149 {
150 *running = 0;
151 wakeup(running);
152 }
153
154 static void
155 gre_join(int *running)
156 {
157 int s;
158
159 s = splnet();
160 while (*running != 0) {
161 splx(s);
162 tsleep(running, PSOCK, "grejoin", 0);
163 s = splnet();
164 }
165 splx(s);
166 }
167
168 static void
169 gre_wakeup(struct gre_softc *sc)
170 {
171 GRE_DPRINTF(sc, "%s: enter\n", __func__);
172 sc->sc_waitchan = 1;
173 wakeup(&sc->sc_waitchan);
174 }
175
176 static int
177 gre_clone_create(struct if_clone *ifc, int unit)
178 {
179 struct gre_softc *sc;
180
181 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
182 memset(sc, 0, sizeof(struct gre_softc));
183
184 snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d",
185 ifc->ifc_name, unit);
186 sc->sc_if.if_softc = sc;
187 sc->sc_if.if_type = IFT_TUNNEL;
188 sc->sc_if.if_addrlen = 0;
189 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
190 sc->sc_if.if_dlt = DLT_NULL;
191 sc->sc_if.if_mtu = GREMTU;
192 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
193 sc->sc_if.if_output = gre_output;
194 sc->sc_if.if_ioctl = gre_ioctl;
195 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
196 sc->g_dstport = sc->g_srcport = 0;
197 sc->sc_proto = IPPROTO_GRE;
198 sc->sc_snd.ifq_maxlen = 256;
199 sc->sc_if.if_flags |= IFF_LINK0;
200 if_attach(&sc->sc_if);
201 if_alloc_sadl(&sc->sc_if);
202 #if NBPFILTER > 0
203 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
204 #endif
205 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
206 return (0);
207 }
208
209 static int
210 gre_clone_destroy(struct ifnet *ifp)
211 {
212 int s;
213 struct gre_softc *sc = ifp->if_softc;
214
215 LIST_REMOVE(sc, sc_list);
216 #if NBPFILTER > 0
217 bpfdetach(ifp);
218 #endif
219 s = splnet();
220 ifp->if_flags &= ~IFF_UP;
221 gre_wakeup(sc);
222 splx(s);
223 gre_join(&sc->sc_thread);
224 s = splnet();
225 if_detach(ifp);
226 splx(s);
227 if (sc->sc_fp != NULL) {
228 closef(sc->sc_fp, curlwp);
229 sc->sc_fp = NULL;
230 }
231 free(sc, M_DEVBUF);
232
233 return (0);
234 }
235
236 static void
237 gre_receive(struct socket *so, caddr_t arg, int waitflag)
238 {
239 struct gre_softc *sc = (struct gre_softc *)arg;
240
241 GRE_DPRINTF(sc, "%s: enter\n", __func__);
242
243 gre_wakeup(sc);
244 }
245
246 static void
247 gre_upcall_add(struct socket *so, caddr_t arg)
248 {
249 /* XXX What if the kernel already set an upcall? */
250 so->so_upcallarg = arg;
251 so->so_upcall = gre_receive;
252 so->so_rcv.sb_flags |= SB_UPCALL;
253 }
254
255 static void
256 gre_upcall_remove(struct socket *so)
257 {
258 /* XXX What if the kernel already set an upcall? */
259 so->so_rcv.sb_flags &= ~SB_UPCALL;
260 so->so_upcallarg = NULL;
261 so->so_upcall = NULL;
262 }
263
264 static void
265 gre_sodestroy(struct socket **sop)
266 {
267 gre_upcall_remove(*sop);
268 soshutdown(*sop, SHUT_RDWR);
269 soclose(*sop);
270 *sop = NULL;
271 }
272
273 static struct mbuf *
274 gre_getsockmbuf(struct socket *so)
275 {
276 struct mbuf *m;
277
278 m = m_get(M_WAIT, MT_SONAME);
279 if (m != NULL)
280 MCLAIM(m, so->so_mowner);
281 return m;
282 }
283
284 static int
285 gre_socreate1(struct gre_softc *sc, struct lwp *l, struct gre_soparm *sp,
286 struct socket **sop)
287 {
288 int rc;
289 struct mbuf *m;
290 struct sockaddr_in *sin;
291 struct socket *so;
292
293 GRE_DPRINTF(sc, "%s: enter\n", __func__);
294 rc = socreate(AF_INET, sop, SOCK_DGRAM, IPPROTO_UDP, l);
295 if (rc != 0) {
296 GRE_DPRINTF(sc, "%s: socreate failed\n", __func__);
297 return rc;
298 }
299
300 so = *sop;
301
302 gre_upcall_add(so, (caddr_t)sc);
303 if ((m = gre_getsockmbuf(so)) == NULL) {
304 rc = ENOBUFS;
305 goto out;
306 }
307 sin = mtod(m, struct sockaddr_in *);
308 sin->sin_len = m->m_len = sizeof(struct sockaddr_in);
309 sin->sin_family = AF_INET;
310 sin->sin_addr = sc->g_src;
311 sin->sin_port = sc->g_srcport;
312
313 GRE_DPRINTF(sc, "%s: bind 0x%08" PRIx32 " port %d\n", __func__,
314 sin->sin_addr.s_addr, ntohs(sin->sin_port));
315 if ((rc = sobind(so, m, l)) != 0) {
316 GRE_DPRINTF(sc, "%s: sobind failed\n", __func__);
317 goto out;
318 }
319
320 if (sc->g_srcport == 0) {
321 if ((rc = gre_getsockname(so, m, l)) != 0) {
322 GRE_DPRINTF(sc, "%s: gre_getsockname failed\n",
323 __func__);
324 goto out;
325 }
326 sc->g_srcport = sin->sin_port;
327 }
328
329 sin->sin_addr = sc->g_dst;
330 sin->sin_port = sc->g_dstport;
331
332 if ((rc = soconnect(so, m, l)) != 0) {
333 GRE_DPRINTF(sc, "%s: soconnect failed\n", __func__);
334 goto out;
335 }
336
337 *mtod(m, int *) = ip_gre_ttl;
338 m->m_len = sizeof(int);
339 rc = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, IPPROTO_IP, IP_TTL,
340 &m);
341 m = NULL;
342 if (rc != 0) {
343 printf("%s: setopt ttl failed\n", __func__);
344 rc = 0;
345 }
346 out:
347 m_freem(m);
348
349 if (rc != 0)
350 gre_sodestroy(sop);
351 else
352 *sp = sc->sc_soparm;
353
354 return rc;
355 }
356
357 static void
358 gre_thread1(struct gre_softc *sc, struct lwp *l)
359 {
360 int flags, rc, s;
361 const struct gre_h *gh;
362 struct ifnet *ifp = &sc->sc_if;
363 struct mbuf *m;
364 struct socket *so = NULL;
365 struct uio uio;
366 struct gre_soparm sp;
367
368 GRE_DPRINTF(sc, "%s: enter\n", __func__);
369 s = splnet();
370
371 sc->sc_waitchan = 1;
372
373 memset(&sp, 0, sizeof(sp));
374 memset(&uio, 0, sizeof(uio));
375
376 ifp->if_flags |= IFF_RUNNING;
377
378 for (;;) {
379 while (sc->sc_waitchan == 0) {
380 splx(s);
381 GRE_DPRINTF(sc, "%s: sleeping\n", __func__);
382 tsleep(&sc->sc_waitchan, PSOCK, "grewait", 0);
383 s = splnet();
384 }
385 sc->sc_waitchan = 0;
386 GRE_DPRINTF(sc, "%s: awake\n", __func__);
387 if ((ifp->if_flags & IFF_UP) != IFF_UP) {
388 GRE_DPRINTF(sc, "%s: not up & running; exiting\n",
389 __func__);
390 break;
391 }
392 if (sc->sc_proto != IPPROTO_UDP) {
393 GRE_DPRINTF(sc, "%s: not udp; exiting\n", __func__);
394 break;
395 }
396 /* XXX optimize */
397 if (so == NULL || memcmp(&sp, &sc->sc_soparm, sizeof(sp)) != 0){
398 GRE_DPRINTF(sc, "%s: parameters changed\n", __func__);
399
400 if (sp.sp_fp != NULL) {
401 FILE_UNUSE(sp.sp_fp, NULL);
402 sp.sp_fp = NULL;
403 so = NULL;
404 } else if (so != NULL)
405 gre_sodestroy(&so);
406
407 if (sc->sc_fp != NULL) {
408 so = (struct socket *)sc->sc_fp->f_data;
409 gre_upcall_add(so, (caddr_t)sc);
410 sp = sc->sc_soparm;
411 FILE_USE(sp.sp_fp);
412 } else if (gre_socreate1(sc, l, &sp, &so) != 0)
413 goto out;
414 }
415 for (;;) {
416 flags = MSG_DONTWAIT;
417 uio.uio_resid = 1000000;
418 rc = (*so->so_receive)(so, NULL, &uio, &m, NULL,
419 &flags);
420 /* TBD Back off if ECONNREFUSED (indicates
421 * ICMP Port Unreachable)?
422 */
423 if (rc == EWOULDBLOCK) {
424 GRE_DPRINTF(sc, "%s: so_receive EWOULDBLOCK\n",
425 __func__);
426 break;
427 } else if (rc != 0 || m == NULL) {
428 GRE_DPRINTF(sc, "%s: rc %d m %p\n",
429 ifp->if_xname, rc, (void *)m);
430 continue;
431 } else
432 GRE_DPRINTF(sc, "%s: so_receive ok\n",
433 __func__);
434 if (m->m_len < sizeof(*gh) &&
435 (m = m_pullup(m, sizeof(*gh))) == NULL) {
436 GRE_DPRINTF(sc, "%s: m_pullup failed\n",
437 __func__);
438 continue;
439 }
440 gh = mtod(m, const struct gre_h *);
441
442 if (gre_input3(sc, m, 0, IPPROTO_GRE, gh) == 0) {
443 GRE_DPRINTF(sc, "%s: dropping unsupported\n",
444 __func__);
445 ifp->if_ierrors++;
446 m_freem(m);
447 }
448 }
449 for (;;) {
450 IF_DEQUEUE(&sc->sc_snd, m);
451 if (m == NULL)
452 break;
453 GRE_DPRINTF(sc, "%s: dequeue\n", __func__);
454 if ((so->so_state & SS_ISCONNECTED) == 0) {
455 GRE_DPRINTF(sc, "%s: not connected\n",
456 __func__);
457 m_freem(m);
458 continue;
459 }
460 rc = (*so->so_send)(so, NULL, NULL, m, NULL, 0, l);
461 /* XXX handle ENOBUFS? */
462 if (rc != 0)
463 GRE_DPRINTF(sc, "%s: so_send failed\n",
464 __func__);
465 }
466 /* Give the software interrupt queues a chance to
467 * run, or else when I send a ping from gre0 to gre1 on
468 * the same host, gre0 will not wake for the reply.
469 */
470 splx(s);
471 s = splnet();
472 }
473 if (sp.sp_fp != NULL) {
474 GRE_DPRINTF(sc, "%s: removing upcall\n", __func__);
475 gre_upcall_remove(so);
476 FILE_UNUSE(sp.sp_fp, NULL);
477 sp.sp_fp = NULL;
478 } else if (so != NULL)
479 gre_sodestroy(&so);
480 out:
481 GRE_DPRINTF(sc, "%s: stopping\n", __func__);
482 if (sc->sc_proto == IPPROTO_UDP)
483 ifp->if_flags &= ~IFF_RUNNING;
484 while (!IF_IS_EMPTY(&sc->sc_snd)) {
485 IF_DEQUEUE(&sc->sc_snd, m);
486 m_freem(m);
487 }
488 gre_stop(&sc->sc_thread);
489 /* must not touch sc after this! */
490 GRE_DPRINTF(sc, "%s: restore ipl\n", __func__);
491 splx(s);
492 }
493
494 static void
495 gre_thread(void *arg)
496 {
497 struct gre_softc *sc = (struct gre_softc *)arg;
498
499 gre_thread1(sc, curlwp);
500 /* must not touch sc after this! */
501 kthread_exit(0);
502 }
503
504 int
505 gre_input3(struct gre_softc *sc, struct mbuf *m, int hlen, u_char proto,
506 const struct gre_h *gh)
507 {
508 u_int16_t flags;
509 #if NBPFILTER > 0
510 u_int32_t af = AF_INET; /* af passed to BPF tap */
511 #endif
512 int s, isr;
513 struct ifqueue *ifq;
514
515 sc->sc_if.if_ipackets++;
516 sc->sc_if.if_ibytes += m->m_pkthdr.len;
517
518 switch (proto) {
519 case IPPROTO_GRE:
520 hlen += sizeof(struct gre_h);
521
522 /* process GRE flags as packet can be of variable len */
523 flags = ntohs(gh->flags);
524
525 /* Checksum & Offset are present */
526 if ((flags & GRE_CP) | (flags & GRE_RP))
527 hlen += 4;
528 /* We don't support routing fields (variable length) */
529 if (flags & GRE_RP)
530 return (0);
531 if (flags & GRE_KP)
532 hlen += 4;
533 if (flags & GRE_SP)
534 hlen += 4;
535
536 switch (ntohs(gh->ptype)) { /* ethertypes */
537 case ETHERTYPE_IP: /* shouldn't need a schednetisr(), as */
538 ifq = &ipintrq; /* we are in ip_input */
539 isr = NETISR_IP;
540 break;
541 #ifdef NETATALK
542 case ETHERTYPE_ATALK:
543 ifq = &atintrq1;
544 isr = NETISR_ATALK;
545 #if NBPFILTER > 0
546 af = AF_APPLETALK;
547 #endif
548 break;
549 #endif
550 #ifdef INET6
551 case ETHERTYPE_IPV6:
552 GRE_DPRINTF(sc, "%s: IPv6 packet\n", __func__);
553 ifq = &ip6intrq;
554 isr = NETISR_IPV6;
555 #if NBPFILTER > 0
556 af = AF_INET6;
557 #endif
558 break;
559 #endif
560 default: /* others not yet supported */
561 printf("%s: unhandled ethertype 0x%04x\n", __func__,
562 ntohs(gh->ptype));
563 return (0);
564 }
565 break;
566 default:
567 /* others not yet supported */
568 return (0);
569 }
570
571 if (hlen > m->m_pkthdr.len) {
572 m_freem(m);
573 sc->sc_if.if_ierrors++;
574 return (EINVAL);
575 }
576 m_adj(m, hlen);
577
578 #if NBPFILTER > 0
579 if (sc->sc_if.if_bpf != NULL)
580 bpf_mtap_af(sc->sc_if.if_bpf, af, m);
581 #endif /*NBPFILTER > 0*/
582
583 m->m_pkthdr.rcvif = &sc->sc_if;
584
585 s = splnet(); /* possible */
586 if (IF_QFULL(ifq)) {
587 IF_DROP(ifq);
588 m_freem(m);
589 } else {
590 IF_ENQUEUE(ifq, m);
591 }
592 /* we need schednetisr since the address family may change */
593 schednetisr(isr);
594 splx(s);
595
596 return (1); /* packet is done, no further processing needed */
597 }
598
599 /*
600 * The output routine. Takes a packet and encapsulates it in the protocol
601 * given by sc->sc_proto. See also RFC 1701 and RFC 2004
602 */
603 static int
604 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
605 struct rtentry *rt)
606 {
607 int error = 0, hlen;
608 struct gre_softc *sc = ifp->if_softc;
609 struct greip *gi;
610 struct gre_h *gh;
611 struct ip *eip, *ip;
612 u_int8_t ip_tos = 0;
613 u_int16_t etype = 0;
614 struct mobile_h mob_h;
615
616 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == 0 ||
617 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
618 m_freem(m);
619 error = ENETDOWN;
620 goto end;
621 }
622
623 gi = NULL;
624 ip = NULL;
625
626 #if NBPFILTER >0
627 if (ifp->if_bpf)
628 bpf_mtap_af(ifp->if_bpf, dst->sa_family, m);
629 #endif
630
631 m->m_flags &= ~(M_BCAST|M_MCAST);
632
633 switch (sc->sc_proto) {
634 case IPPROTO_MOBILE:
635 if (dst->sa_family == AF_INET) {
636 int msiz;
637
638 if (M_UNWRITABLE(m, sizeof(*ip)) &&
639 (m = m_pullup(m, sizeof(*ip))) == NULL) {
640 error = ENOBUFS;
641 goto end;
642 }
643 ip = mtod(m, struct ip *);
644
645 memset(&mob_h, 0, MOB_H_SIZ_L);
646 mob_h.proto = (ip->ip_p) << 8;
647 mob_h.odst = ip->ip_dst.s_addr;
648 ip->ip_dst.s_addr = sc->g_dst.s_addr;
649
650 /*
651 * If the packet comes from our host, we only change
652 * the destination address in the IP header.
653 * Else we also need to save and change the source
654 */
655 if (in_hosteq(ip->ip_src, sc->g_src)) {
656 msiz = MOB_H_SIZ_S;
657 } else {
658 mob_h.proto |= MOB_H_SBIT;
659 mob_h.osrc = ip->ip_src.s_addr;
660 ip->ip_src.s_addr = sc->g_src.s_addr;
661 msiz = MOB_H_SIZ_L;
662 }
663 HTONS(mob_h.proto);
664 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
665
666 M_PREPEND(m, msiz, M_DONTWAIT);
667 if (m == NULL) {
668 error = ENOBUFS;
669 goto end;
670 }
671 /* XXX Assuming that ip does not dangle after
672 * M_PREPEND. In practice, that's true, but
673 * that's in M_PREPEND's contract.
674 */
675 memmove(mtod(m, caddr_t), ip, sizeof(*ip));
676 ip = mtod(m, struct ip *);
677 memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
678 ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
679 } else { /* AF_INET */
680 IF_DROP(&ifp->if_snd);
681 m_freem(m);
682 error = EINVAL;
683 goto end;
684 }
685 break;
686 case IPPROTO_UDP:
687 case IPPROTO_GRE:
688 GRE_DPRINTF(sc, "%s: dst->sa_family=%d\n", __func__,
689 dst->sa_family);
690 switch (dst->sa_family) {
691 case AF_INET:
692 ip = mtod(m, struct ip *);
693 ip_tos = ip->ip_tos;
694 etype = ETHERTYPE_IP;
695 break;
696 #ifdef NETATALK
697 case AF_APPLETALK:
698 etype = ETHERTYPE_ATALK;
699 break;
700 #endif
701 #ifdef INET6
702 case AF_INET6:
703 etype = ETHERTYPE_IPV6;
704 break;
705 #endif
706 default:
707 IF_DROP(&ifp->if_snd);
708 m_freem(m);
709 error = EAFNOSUPPORT;
710 goto end;
711 }
712 break;
713 default:
714 IF_DROP(&ifp->if_snd);
715 m_freem(m);
716 error = EINVAL;
717 goto end;
718 }
719
720 switch (sc->sc_proto) {
721 case IPPROTO_GRE:
722 hlen = sizeof(struct greip);
723 break;
724 case IPPROTO_UDP:
725 hlen = sizeof(struct gre_h);
726 break;
727 default:
728 hlen = 0;
729 break;
730 }
731
732 M_PREPEND(m, hlen, M_DONTWAIT);
733
734 if (m == NULL) {
735 IF_DROP(&ifp->if_snd);
736 error = ENOBUFS;
737 goto end;
738 }
739
740 switch (sc->sc_proto) {
741 case IPPROTO_UDP:
742 gh = mtod(m, struct gre_h *);
743 memset(gh, 0, sizeof(*gh));
744 gh->ptype = htons(etype);
745 /* XXX Need to handle IP ToS. Look at how I handle IP TTL. */
746 break;
747 case IPPROTO_GRE:
748 gi = mtod(m, struct greip *);
749 gh = &gi->gi_g;
750 eip = &gi->gi_i;
751 /* we don't have any GRE flags for now */
752 memset(gh, 0, sizeof(*gh));
753 gh->ptype = htons(etype);
754 eip->ip_src = sc->g_src;
755 eip->ip_dst = sc->g_dst;
756 eip->ip_hl = (sizeof(struct ip)) >> 2;
757 eip->ip_ttl = ip_gre_ttl;
758 eip->ip_tos = ip_tos;
759 eip->ip_len = htons(m->m_pkthdr.len);
760 eip->ip_p = sc->sc_proto;
761 break;
762 case IPPROTO_MOBILE:
763 eip = mtod(m, struct ip *);
764 eip->ip_p = sc->sc_proto;
765 break;
766 default:
767 error = EPROTONOSUPPORT;
768 m_freem(m);
769 goto end;
770 }
771
772 ifp->if_opackets++;
773 ifp->if_obytes += m->m_pkthdr.len;
774
775 /* send it off */
776 if (sc->sc_proto == IPPROTO_UDP) {
777 if (IF_QFULL(&sc->sc_snd)) {
778 IF_DROP(&sc->sc_snd);
779 error = ENOBUFS;
780 m_freem(m);
781 } else {
782 IF_ENQUEUE(&sc->sc_snd, m);
783 gre_wakeup(sc);
784 error = 0;
785 }
786 } else if ((error = gre_update_route(sc)) == 0) {
787 error = ip_output(m, NULL, &sc->route, 0,
788 (struct ip_moptions *)NULL, (struct socket *)NULL);
789 }
790 end:
791 if (error)
792 ifp->if_oerrors++;
793 return (error);
794 }
795
796 /* gre_kick must be synchronized with network interrupts in order
797 * to synchronize access to gre_softc members, so call it with
798 * interrupt priority level set to IPL_NET or greater.
799 */
800 static int
801 gre_kick(struct gre_softc *sc)
802 {
803 int rc;
804 struct ifnet *ifp = &sc->sc_if;
805
806 if (sc->sc_proto == IPPROTO_UDP && (ifp->if_flags & IFF_UP) == IFF_UP &&
807 !sc->sc_thread) {
808 sc->sc_thread = 1;
809 rc = kthread_create1(gre_thread, (void *)sc, NULL,
810 ifp->if_xname);
811 if (rc != 0)
812 gre_stop(&sc->sc_thread);
813 return rc;
814 } else {
815 gre_wakeup(sc);
816 return 0;
817 }
818 }
819
820 static int
821 gre_getname(struct socket *so, int req, struct mbuf *nam, struct lwp *l)
822 {
823 int s, error;
824
825 s = splsoftnet();
826 error = (*so->so_proto->pr_usrreq)(so, req, (struct mbuf *)0,
827 nam, (struct mbuf *)0, l);
828 splx(s);
829 return error;
830 }
831
832 static int
833 gre_getsockname(struct socket *so, struct mbuf *nam, struct lwp *l)
834 {
835 return gre_getname(so, PRU_SOCKADDR, nam, l);
836 }
837
838 static int
839 gre_getpeername(struct socket *so, struct mbuf *nam, struct lwp *l)
840 {
841 return gre_getname(so, PRU_PEERADDR, nam, l);
842 }
843
844 static int
845 gre_getnames(struct socket *so, struct lwp *l, struct sockaddr_in *src,
846 struct sockaddr_in *dst)
847 {
848 struct mbuf *m;
849 struct sockaddr_in *sin;
850 int rc;
851
852 if ((m = gre_getsockmbuf(so)) == NULL)
853 return ENOBUFS;
854
855 sin = mtod(m, struct sockaddr_in *);
856
857 if ((rc = gre_getsockname(so, m, l)) != 0)
858 goto out;
859 if (sin->sin_family != AF_INET) {
860 rc = EAFNOSUPPORT;
861 goto out;
862 }
863 *src = *sin;
864
865 if ((rc = gre_getpeername(so, m, l)) != 0)
866 goto out;
867 if (sin->sin_family != AF_INET) {
868 rc = EAFNOSUPPORT;
869 goto out;
870 }
871 *dst = *sin;
872
873 out:
874 m_freem(m);
875 return rc;
876 }
877
878 static int
879 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
880 {
881 u_char oproto;
882 struct file *fp, *ofp;
883 struct socket *so;
884 struct sockaddr_in dst, src;
885 struct proc *p = curproc; /* XXX */
886 struct lwp *l = curlwp; /* XXX */
887 struct ifreq *ifr = (struct ifreq *)data;
888 struct if_laddrreq *lifr = (struct if_laddrreq *)data;
889 struct gre_softc *sc = ifp->if_softc;
890 int s;
891 struct sockaddr_in si;
892 struct sockaddr *sa = NULL;
893 int error;
894
895 switch (cmd) {
896 case SIOCSIFFLAGS:
897 case SIOCSIFMTU:
898 case GRESPROTO:
899 case GRESADDRD:
900 case GRESADDRS:
901 case GRESSOCK:
902 case GREDSOCK:
903 case SIOCSLIFPHYADDR:
904 case SIOCDIFPHYADDR:
905 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
906 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
907 NULL) != 0)
908 return (EPERM);
909 break;
910 default:
911 error = 0;
912 break;
913 }
914
915 s = splnet();
916 switch (cmd) {
917 case SIOCSIFADDR:
918 ifp->if_flags |= IFF_UP;
919 if ((error = gre_kick(sc)) != 0)
920 ifp->if_flags &= ~IFF_UP;
921 break;
922 case SIOCSIFDSTADDR:
923 break;
924 case SIOCSIFFLAGS:
925 oproto = sc->sc_proto;
926 switch (ifr->ifr_flags & (IFF_LINK0|IFF_LINK2)) {
927 case IFF_LINK0|IFF_LINK2:
928 sc->sc_proto = IPPROTO_UDP;
929 if (oproto != IPPROTO_UDP)
930 ifp->if_flags &= ~IFF_RUNNING;
931 error = gre_kick(sc);
932 break;
933 case IFF_LINK0:
934 sc->sc_proto = IPPROTO_GRE;
935 gre_wakeup(sc);
936 goto recompute;
937 case 0:
938 sc->sc_proto = IPPROTO_MOBILE;
939 gre_wakeup(sc);
940 goto recompute;
941 }
942 break;
943 case SIOCSIFMTU:
944 if (ifr->ifr_mtu < 576) {
945 error = EINVAL;
946 break;
947 }
948 ifp->if_mtu = ifr->ifr_mtu;
949 break;
950 case SIOCGIFMTU:
951 ifr->ifr_mtu = sc->sc_if.if_mtu;
952 break;
953 case SIOCADDMULTI:
954 case SIOCDELMULTI:
955 if (ifr == 0) {
956 error = EAFNOSUPPORT;
957 break;
958 }
959 switch (ifr->ifr_addr.sa_family) {
960 #ifdef INET
961 case AF_INET:
962 break;
963 #endif
964 #ifdef INET6
965 case AF_INET6:
966 break;
967 #endif
968 default:
969 error = EAFNOSUPPORT;
970 break;
971 }
972 break;
973 case GRESPROTO:
974 oproto = sc->sc_proto;
975 sc->sc_proto = ifr->ifr_flags;
976 switch (sc->sc_proto) {
977 case IPPROTO_UDP:
978 ifp->if_flags |= IFF_LINK0|IFF_LINK2;
979 if (oproto != IPPROTO_UDP)
980 ifp->if_flags &= ~IFF_RUNNING;
981 error = gre_kick(sc);
982 break;
983 case IPPROTO_GRE:
984 ifp->if_flags |= IFF_LINK0;
985 ifp->if_flags &= ~IFF_LINK2;
986 goto recompute;
987 case IPPROTO_MOBILE:
988 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK2);
989 goto recompute;
990 default:
991 error = EPROTONOSUPPORT;
992 break;
993 }
994 break;
995 case GREGPROTO:
996 ifr->ifr_flags = sc->sc_proto;
997 break;
998 case GRESADDRS:
999 case GRESADDRD:
1000 /*
1001 * set tunnel endpoints, compute a less specific route
1002 * to the remote end and mark if as up
1003 */
1004 sa = &ifr->ifr_addr;
1005 if (cmd == GRESADDRS) {
1006 sc->g_src = (satosin(sa))->sin_addr;
1007 sc->g_srcport = satosin(sa)->sin_port;
1008 }
1009 if (cmd == GRESADDRD) {
1010 if (sc->sc_proto == IPPROTO_UDP &&
1011 satosin(sa)->sin_port == 0) {
1012 error = EINVAL;
1013 break;
1014 }
1015 sc->g_dst = (satosin(sa))->sin_addr;
1016 sc->g_dstport = satosin(sa)->sin_port;
1017 }
1018 recompute:
1019 if (sc->sc_proto == IPPROTO_UDP ||
1020 (sc->g_src.s_addr != INADDR_ANY &&
1021 sc->g_dst.s_addr != INADDR_ANY)) {
1022 if (sc->sc_fp != NULL) {
1023 closef(sc->sc_fp, l);
1024 sc->sc_fp = NULL;
1025 }
1026 if (sc->route.ro_rt != NULL) {
1027 RTFREE(sc->route.ro_rt);
1028 sc->route.ro_rt = NULL;
1029 }
1030 if (sc->sc_proto == IPPROTO_UDP)
1031 error = gre_kick(sc);
1032 else if (gre_compute_route(sc) == 0)
1033 ifp->if_flags |= IFF_RUNNING;
1034 else
1035 ifp->if_flags &= ~IFF_RUNNING;
1036 }
1037 break;
1038 case GREGADDRS:
1039 memset(&si, 0, sizeof(si));
1040 si.sin_family = AF_INET;
1041 si.sin_len = sizeof(struct sockaddr_in);
1042 si.sin_addr.s_addr = sc->g_src.s_addr;
1043 sa = sintosa(&si);
1044 ifr->ifr_addr = *sa;
1045 break;
1046 case GREGADDRD:
1047 memset(&si, 0, sizeof(si));
1048 si.sin_family = AF_INET;
1049 si.sin_len = sizeof(struct sockaddr_in);
1050 si.sin_addr.s_addr = sc->g_dst.s_addr;
1051 sa = sintosa(&si);
1052 ifr->ifr_addr = *sa;
1053 break;
1054 case GREDSOCK:
1055 if (sc->sc_proto != IPPROTO_UDP)
1056 return EINVAL;
1057 if (sc->sc_fp != NULL) {
1058 closef(sc->sc_fp, l);
1059 sc->sc_fp = NULL;
1060 error = gre_kick(sc);
1061 }
1062 break;
1063 case GRESSOCK:
1064 if (sc->sc_proto != IPPROTO_UDP)
1065 return EINVAL;
1066 /* getsock() will FILE_USE() the descriptor for us */
1067 if ((error = getsock(p->p_fd, (int)ifr->ifr_value, &fp)) != 0)
1068 break;
1069 so = (struct socket *)fp->f_data;
1070 if (so->so_type != SOCK_DGRAM) {
1071 FILE_UNUSE(fp, NULL);
1072 error = EINVAL;
1073 break;
1074 }
1075 /* check address */
1076 if ((error = gre_getnames(so, curlwp, &src, &dst)) != 0) {
1077 FILE_UNUSE(fp, NULL);
1078 break;
1079 }
1080
1081 fp->f_count++;
1082
1083 ofp = sc->sc_fp;
1084 sc->sc_fp = fp;
1085 if ((error = gre_kick(sc)) != 0) {
1086 closef(fp, l);
1087 sc->sc_fp = ofp;
1088 break;
1089 }
1090 sc->g_src = src.sin_addr;
1091 sc->g_srcport = src.sin_port;
1092 sc->g_dst = dst.sin_addr;
1093 sc->g_dstport = dst.sin_port;
1094 if (ofp != NULL)
1095 closef(ofp, l);
1096 break;
1097 case SIOCSLIFPHYADDR:
1098 if (lifr->addr.ss_family != AF_INET ||
1099 lifr->dstaddr.ss_family != AF_INET) {
1100 error = EAFNOSUPPORT;
1101 break;
1102 }
1103 if (lifr->addr.ss_len != sizeof(si) ||
1104 lifr->dstaddr.ss_len != sizeof(si)) {
1105 error = EINVAL;
1106 break;
1107 }
1108 sc->g_src = satosin(&lifr->addr)->sin_addr;
1109 sc->g_dst = satosin(&lifr->dstaddr)->sin_addr;
1110 sc->g_srcport = satosin(&lifr->addr)->sin_port;
1111 sc->g_dstport = satosin(&lifr->dstaddr)->sin_port;
1112 goto recompute;
1113 case SIOCDIFPHYADDR:
1114 sc->g_src.s_addr = INADDR_ANY;
1115 sc->g_dst.s_addr = INADDR_ANY;
1116 sc->g_srcport = 0;
1117 sc->g_dstport = 0;
1118 goto recompute;
1119 case SIOCGLIFPHYADDR:
1120 if (sc->g_src.s_addr == INADDR_ANY ||
1121 sc->g_dst.s_addr == INADDR_ANY) {
1122 error = EADDRNOTAVAIL;
1123 break;
1124 }
1125 memset(&si, 0, sizeof(si));
1126 si.sin_family = AF_INET;
1127 si.sin_len = sizeof(struct sockaddr_in);
1128 si.sin_addr = sc->g_src;
1129 if (sc->sc_proto == IPPROTO_UDP)
1130 si.sin_port = sc->g_srcport;
1131 memcpy(&lifr->addr, &si, sizeof(si));
1132 si.sin_addr = sc->g_dst;
1133 if (sc->sc_proto == IPPROTO_UDP)
1134 si.sin_port = sc->g_dstport;
1135 memcpy(&lifr->dstaddr, &si, sizeof(si));
1136 break;
1137 default:
1138 error = EINVAL;
1139 break;
1140 }
1141 splx(s);
1142 return (error);
1143 }
1144
1145 /*
1146 * Compute a route to our destination.
1147 */
1148 static int
1149 gre_compute_route(struct gre_softc *sc)
1150 {
1151 int rc;
1152 struct route *ro;
1153
1154 ro = &sc->route;
1155
1156 memset(ro, 0, sizeof(struct route));
1157 satosin(&ro->ro_dst)->sin_addr = sc->g_dst;
1158 ro->ro_dst.sa_family = AF_INET;
1159 ro->ro_dst.sa_len = sizeof(ro->ro_dst);
1160
1161 #ifdef DIAGNOSTIC
1162 printf("%s: searching for a route to %s", sc->sc_if.if_xname,
1163 inet_ntoa(satosin(&ro->ro_dst)->sin_addr));
1164 #endif
1165
1166 if ((rc = gre_update_route(sc)) != 0) {
1167 #ifdef DIAGNOSTIC
1168 if (ro->ro_rt == NULL)
1169 printf(" - no route found!\n");
1170 else
1171 printf(" - route loops back to ourself!\n");
1172 #endif
1173 }
1174
1175 #ifdef DIAGNOSTIC
1176 printf(", choosing %s with gateway %s\n", ro->ro_rt->rt_ifp->if_xname,
1177 inet_ntoa(satosin(ro->ro_rt->rt_gateway)->sin_addr));
1178 #endif
1179 return rc;
1180 }
1181
1182 static int
1183 gre_update_route(struct gre_softc *sc)
1184 {
1185 struct route *ro;
1186
1187 ro = &sc->route;
1188
1189 rtalloc(ro);
1190
1191 /*
1192 * check if this returned a route at all and this route is no
1193 * recursion to ourself
1194 */
1195 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc)
1196 return EADDRNOTAVAIL;
1197
1198 return 0;
1199 }
1200
1201 /*
1202 * do a checksum of a buffer - much like in_cksum, which operates on
1203 * mbufs.
1204 */
1205 u_int16_t
1206 gre_in_cksum(u_int16_t *p, u_int len)
1207 {
1208 u_int32_t sum = 0;
1209 int nwords = len >> 1;
1210
1211 while (nwords-- != 0)
1212 sum += *p++;
1213
1214 if (len & 1) {
1215 union {
1216 u_short w;
1217 u_char c[2];
1218 } u;
1219 u.c[0] = *(u_char *)p;
1220 u.c[1] = 0;
1221 sum += u.w;
1222 }
1223
1224 /* end-around-carry */
1225 sum = (sum >> 16) + (sum & 0xffff);
1226 sum += (sum >> 16);
1227 return (~sum);
1228 }
1229 #endif
1230
1231 void greattach(int);
1232
1233 /* ARGSUSED */
1234 void
1235 greattach(int count)
1236 {
1237 #ifdef INET
1238 LIST_INIT(&gre_softc_list);
1239 if_clone_attach(&gre_cloner);
1240 #endif
1241 }
1242