if_gre.c revision 1.98.2.1 1 /* $NetBSD: if_gre.c,v 1.98.2.1 2007/08/15 13:49:40 skrll Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * IPv6-over-GRE contributed by Gert Doering <gert (at) greenie.muc.de>
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*
42 * Encapsulate L3 protocols into IP
43 * See RFC 1701 and 1702 for more details.
44 * If_gre is compatible with Cisco GRE tunnels, so you can
45 * have a NetBSD box as the other end of a tunnel interface of a Cisco
46 * router. See gre(4) for more details.
47 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
48 */
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.98.2.1 2007/08/15 13:49:40 skrll Exp $");
52
53 #include "opt_gre.h"
54 #include "opt_inet.h"
55 #include "bpfilter.h"
56
57 #ifdef INET
58 #include <sys/param.h>
59 #include <sys/file.h>
60 #include <sys/filedesc.h>
61 #include <sys/malloc.h>
62 #include <sys/mbuf.h>
63 #include <sys/proc.h>
64 #include <sys/protosw.h>
65 #include <sys/socket.h>
66 #include <sys/socketvar.h>
67 #include <sys/ioctl.h>
68 #include <sys/queue.h>
69 #if __NetBSD__
70 #include <sys/systm.h>
71 #include <sys/sysctl.h>
72 #include <sys/kauth.h>
73 #endif
74
75 #include <sys/kernel.h>
76 #include <sys/mutex.h>
77 #include <sys/condvar.h>
78 #include <sys/kthread.h>
79
80 #include <machine/cpu.h>
81
82 #include <net/ethertypes.h>
83 #include <net/if.h>
84 #include <net/if_types.h>
85 #include <net/netisr.h>
86 #include <net/route.h>
87
88 #ifdef INET
89 #include <netinet/in.h>
90 #include <netinet/in_systm.h>
91 #include <netinet/in_var.h>
92 #include <netinet/ip.h>
93 #include <netinet/ip_var.h>
94 #else
95 #error "Huh? if_gre without inet?"
96 #endif
97
98
99 #ifdef NETATALK
100 #include <netatalk/at.h>
101 #include <netatalk/at_var.h>
102 #include <netatalk/at_extern.h>
103 #endif
104
105 #if NBPFILTER > 0
106 #include <sys/time.h>
107 #include <net/bpf.h>
108 #endif
109
110 #include <net/if_gre.h>
111
112 #include <compat/sys/sockio.h>
113 /*
114 * It is not easy to calculate the right value for a GRE MTU.
115 * We leave this task to the admin and use the same default that
116 * other vendors use.
117 */
118 #define GREMTU 1476
119
120 #ifdef GRE_DEBUG
121 #define GRE_DPRINTF(__sc, __fmt, ...) \
122 do { \
123 if (((__sc)->sc_if.if_flags & IFF_DEBUG) != 0) \
124 printf(__fmt, __VA_ARGS__); \
125 } while (/*CONSTCOND*/0)
126 #else
127 #define GRE_DPRINTF(__sc, __fmt, ...) do { } while (/*CONSTCOND*/0)
128 #endif /* GRE_DEBUG */
129
130 struct gre_softc_head gre_softc_list;
131 int ip_gre_ttl = GRE_TTL;
132
133 static int gre_clone_create(struct if_clone *, int);
134 static int gre_clone_destroy(struct ifnet *);
135
136 static struct if_clone gre_cloner =
137 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
138
139 static int gre_output(struct ifnet *, struct mbuf *,
140 const struct sockaddr *, struct rtentry *);
141 static int gre_ioctl(struct ifnet *, u_long, void *);
142
143 static int gre_compute_route(struct gre_softc *sc);
144
145 static void gre_closef(struct file **, struct lwp *);
146 static int gre_getsockname(struct socket *, struct mbuf *, struct lwp *);
147 static int gre_getpeername(struct socket *, struct mbuf *, struct lwp *);
148 static int gre_getnames(struct socket *, struct lwp *, struct sockaddr_in *,
149 struct sockaddr_in *);
150
151 /* Calling thread must hold sc->sc_mtx. */
152 static void
153 gre_stop(struct gre_softc *sc)
154 {
155 sc->sc_running = 0;
156 cv_signal(&sc->sc_join_cv);
157 }
158
159 /* Calling thread must hold sc->sc_mtx. */
160 static void
161 gre_join(struct gre_softc *sc)
162 {
163 while (sc->sc_running != 0)
164 cv_wait(&sc->sc_join_cv, &sc->sc_mtx);
165 }
166
167 /* Calling thread must hold sc->sc_mtx. */
168 static void
169 gre_wakeup(struct gre_softc *sc)
170 {
171 GRE_DPRINTF(sc, "%s: enter\n", __func__);
172 sc->sc_haswork = 1;
173 cv_signal(&sc->sc_work_cv);
174 }
175
176 static int
177 gre_clone_create(struct if_clone *ifc, int unit)
178 {
179 struct gre_softc *sc;
180
181 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
182 memset(sc, 0, sizeof(struct gre_softc));
183 mutex_init(&sc->sc_mtx, MUTEX_DRIVER, IPL_NET);
184 cv_init(&sc->sc_work_cv, "gre work");
185 cv_init(&sc->sc_join_cv, "gre join");
186 cv_init(&sc->sc_soparm_cv, "gre soparm");
187
188 snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d",
189 ifc->ifc_name, unit);
190 sc->sc_if.if_softc = sc;
191 sc->sc_if.if_type = IFT_TUNNEL;
192 sc->sc_if.if_addrlen = 0;
193 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
194 sc->sc_if.if_dlt = DLT_NULL;
195 sc->sc_if.if_mtu = GREMTU;
196 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
197 sc->sc_if.if_output = gre_output;
198 sc->sc_if.if_ioctl = gre_ioctl;
199 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
200 sc->g_dstport = sc->g_srcport = 0;
201 sc->sc_proto = IPPROTO_GRE;
202 sc->sc_snd.ifq_maxlen = 256;
203 sc->sc_if.if_flags |= IFF_LINK0;
204 if_attach(&sc->sc_if);
205 if_alloc_sadl(&sc->sc_if);
206 #if NBPFILTER > 0
207 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
208 #endif
209 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
210 return 0;
211 }
212
213 static int
214 gre_clone_destroy(struct ifnet *ifp)
215 {
216 struct gre_softc *sc = ifp->if_softc;
217
218 LIST_REMOVE(sc, sc_list);
219 #if NBPFILTER > 0
220 bpfdetach(ifp);
221 #endif
222 if_detach(ifp);
223 mutex_enter(&sc->sc_mtx);
224 gre_wakeup(sc);
225 gre_join(sc);
226 mutex_exit(&sc->sc_mtx);
227 rtcache_free(&sc->route);
228
229 cv_destroy(&sc->sc_soparm_cv);
230 cv_destroy(&sc->sc_join_cv);
231 cv_destroy(&sc->sc_work_cv);
232 mutex_destroy(&sc->sc_mtx);
233 free(sc, M_DEVBUF);
234
235 return 0;
236 }
237
238 static void
239 gre_receive(struct socket *so, void *arg, int waitflag)
240 {
241 struct gre_softc *sc = (struct gre_softc *)arg;
242
243 GRE_DPRINTF(sc, "%s: enter\n", __func__);
244
245 gre_wakeup(sc);
246 }
247
248 static void
249 gre_upcall_add(struct socket *so, void *arg)
250 {
251 /* XXX What if the kernel already set an upcall? */
252 so->so_upcallarg = arg;
253 so->so_upcall = gre_receive;
254 so->so_rcv.sb_flags |= SB_UPCALL;
255 }
256
257 static void
258 gre_upcall_remove(struct socket *so)
259 {
260 /* XXX What if the kernel already set an upcall? */
261 so->so_rcv.sb_flags &= ~SB_UPCALL;
262 so->so_upcallarg = NULL;
263 so->so_upcall = NULL;
264 }
265
266 static void
267 gre_sodestroy(struct socket **sop)
268 {
269 gre_upcall_remove(*sop);
270 soshutdown(*sop, SHUT_RDWR);
271 soclose(*sop);
272 *sop = NULL;
273 }
274
275 static struct mbuf *
276 gre_getsockmbuf(struct socket *so)
277 {
278 struct mbuf *m;
279
280 m = m_get(M_WAIT, MT_SONAME);
281 if (m != NULL)
282 MCLAIM(m, so->so_mowner);
283 return m;
284 }
285
286 static int
287 gre_socreate1(struct gre_softc *sc, struct lwp *l, struct gre_soparm *sp,
288 struct socket **sop)
289 {
290 int rc;
291 struct mbuf *m;
292 struct sockaddr_in *sin;
293 struct socket *so;
294
295 GRE_DPRINTF(sc, "%s: enter\n", __func__);
296 rc = socreate(AF_INET, sop, SOCK_DGRAM, IPPROTO_UDP, l);
297 if (rc != 0) {
298 GRE_DPRINTF(sc, "%s: socreate failed\n", __func__);
299 return rc;
300 }
301
302 so = *sop;
303
304 gre_upcall_add(so, sc);
305 if ((m = gre_getsockmbuf(so)) == NULL) {
306 rc = ENOBUFS;
307 goto out;
308 }
309 sin = mtod(m, struct sockaddr_in *);
310 sin->sin_len = m->m_len = sizeof(struct sockaddr_in);
311 sin->sin_family = AF_INET;
312 sin->sin_addr = sc->g_src;
313 sin->sin_port = sc->g_srcport;
314
315 GRE_DPRINTF(sc, "%s: bind 0x%08" PRIx32 " port %d\n", __func__,
316 sin->sin_addr.s_addr, ntohs(sin->sin_port));
317 if ((rc = sobind(so, m, l)) != 0) {
318 GRE_DPRINTF(sc, "%s: sobind failed\n", __func__);
319 goto out;
320 }
321
322 if (sc->g_srcport == 0) {
323 if ((rc = gre_getsockname(so, m, l)) != 0) {
324 GRE_DPRINTF(sc, "%s: gre_getsockname failed\n",
325 __func__);
326 goto out;
327 }
328 sc->g_srcport = sin->sin_port;
329 }
330
331 sin->sin_addr = sc->g_dst;
332 sin->sin_port = sc->g_dstport;
333
334 if ((rc = soconnect(so, m, l)) != 0) {
335 GRE_DPRINTF(sc, "%s: soconnect failed\n", __func__);
336 goto out;
337 }
338
339 *mtod(m, int *) = ip_gre_ttl;
340 m->m_len = sizeof(int);
341 KASSERT(so->so_proto && so->so_proto->pr_ctloutput);
342 rc = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, IPPROTO_IP, IP_TTL,
343 &m);
344 m = NULL;
345 if (rc != 0) {
346 printf("%s: setopt ttl failed\n", __func__);
347 rc = 0;
348 }
349 out:
350 m_freem(m);
351
352 if (rc != 0)
353 gre_sodestroy(sop);
354 else
355 *sp = sc->sc_soparm;
356
357 return rc;
358 }
359
360 static void
361 gre_thread1(struct gre_softc *sc, struct lwp *l)
362 {
363 int flags, rc;
364 const struct gre_h *gh;
365 struct ifnet *ifp = &sc->sc_if;
366 struct mbuf *m;
367 struct socket *so = NULL;
368 struct uio uio;
369 struct gre_soparm sp;
370 struct file *fp = NULL;
371
372 GRE_DPRINTF(sc, "%s: enter\n", __func__);
373 mutex_enter(&sc->sc_mtx);
374
375 sc->sc_haswork = 1;
376
377 memset(&sp, 0, sizeof(sp));
378 memset(&uio, 0, sizeof(uio));
379
380 ifp->if_flags |= IFF_RUNNING;
381
382 for (;;) {
383 while (sc->sc_haswork == 0) {
384 GRE_DPRINTF(sc, "%s: sleeping\n", __func__);
385 cv_wait(&sc->sc_work_cv, &sc->sc_mtx);
386 }
387 sc->sc_haswork = 0;
388 GRE_DPRINTF(sc, "%s: awake\n", __func__);
389 if ((ifp->if_flags & IFF_UP) != IFF_UP) {
390 GRE_DPRINTF(sc, "%s: not up & running; exiting\n",
391 __func__);
392 break;
393 }
394 if (sc->sc_proto != IPPROTO_UDP) {
395 GRE_DPRINTF(sc, "%s: not udp; exiting\n", __func__);
396 break;
397 }
398 /* XXX optimize */
399 if (so == NULL || sc->sc_fp != NULL ||
400 memcmp(&sp, &sc->sc_soparm, sizeof(sp)) != 0) {
401 GRE_DPRINTF(sc, "%s: parameters changed\n", __func__);
402
403 if (fp != NULL) {
404 gre_closef(&fp, curlwp);
405 so = NULL;
406 } else if (so != NULL)
407 gre_sodestroy(&so);
408
409 if (sc->sc_fp != NULL) {
410 fp = sc->sc_fp;
411 sc->sc_fp = NULL;
412 so = (struct socket *)fp->f_data;
413 gre_upcall_add(so, sc);
414 sp = sc->sc_soparm;
415 } else if (gre_socreate1(sc, l, &sp, &so) != 0)
416 goto out;
417 }
418 cv_signal(&sc->sc_soparm_cv);
419 for (;;) {
420 flags = MSG_DONTWAIT;
421 uio.uio_resid = 1000000;
422 rc = (*so->so_receive)(so, NULL, &uio, &m, NULL,
423 &flags);
424 /* TBD Back off if ECONNREFUSED (indicates
425 * ICMP Port Unreachable)?
426 */
427 if (rc == EWOULDBLOCK) {
428 GRE_DPRINTF(sc, "%s: so_receive EWOULDBLOCK\n",
429 __func__);
430 break;
431 } else if (rc != 0 || m == NULL) {
432 GRE_DPRINTF(sc, "%s: rc %d m %p\n",
433 ifp->if_xname, rc, (void *)m);
434 continue;
435 } else
436 GRE_DPRINTF(sc, "%s: so_receive ok\n",
437 __func__);
438 if (m->m_len < sizeof(*gh) &&
439 (m = m_pullup(m, sizeof(*gh))) == NULL) {
440 GRE_DPRINTF(sc, "%s: m_pullup failed\n",
441 __func__);
442 continue;
443 }
444 gh = mtod(m, const struct gre_h *);
445
446 if (gre_input3(sc, m, 0, gh, 1) == 0) {
447 GRE_DPRINTF(sc, "%s: dropping unsupported\n",
448 __func__);
449 m_freem(m);
450 }
451 }
452 for (;;) {
453 IF_DEQUEUE(&sc->sc_snd, m);
454 if (m == NULL)
455 break;
456 GRE_DPRINTF(sc, "%s: dequeue\n", __func__);
457 if ((so->so_state & SS_ISCONNECTED) == 0) {
458 GRE_DPRINTF(sc, "%s: not connected\n",
459 __func__);
460 m_freem(m);
461 continue;
462 }
463 rc = (*so->so_send)(so, NULL, NULL, m, NULL, 0, l);
464 /* XXX handle ENOBUFS? */
465 if (rc != 0)
466 GRE_DPRINTF(sc, "%s: so_send failed\n",
467 __func__);
468 }
469 }
470 if (fp != NULL) {
471 GRE_DPRINTF(sc, "%s: removing upcall\n", __func__);
472 gre_upcall_remove(so);
473 } else if (so != NULL)
474 gre_sodestroy(&so);
475 out:
476 GRE_DPRINTF(sc, "%s: stopping\n", __func__);
477 if (fp != NULL)
478 gre_closef(&fp, curlwp);
479 if (sc->sc_proto == IPPROTO_UDP)
480 ifp->if_flags &= ~IFF_RUNNING;
481 while (!IF_IS_EMPTY(&sc->sc_snd)) {
482 IF_DEQUEUE(&sc->sc_snd, m);
483 m_freem(m);
484 }
485 gre_stop(sc);
486 /* must not touch sc after this! */
487 GRE_DPRINTF(sc, "%s: restore ipl\n", __func__);
488 mutex_exit(&sc->sc_mtx);
489 }
490
491 static void
492 gre_thread(void *arg)
493 {
494 struct gre_softc *sc = (struct gre_softc *)arg;
495
496 gre_thread1(sc, curlwp);
497 /* must not touch sc after this! */
498 kthread_exit(0);
499 }
500
501 /* Calling thread must hold sc->sc_mtx. */
502 int
503 gre_input3(struct gre_softc *sc, struct mbuf *m, int hlen,
504 const struct gre_h *gh, int mtx_held)
505 {
506 u_int16_t flags;
507 #if NBPFILTER > 0
508 u_int32_t af = AF_INET; /* af passed to BPF tap */
509 #endif
510 int isr;
511 struct ifqueue *ifq;
512
513 sc->sc_if.if_ipackets++;
514 sc->sc_if.if_ibytes += m->m_pkthdr.len;
515
516 hlen += sizeof(struct gre_h);
517
518 /* process GRE flags as packet can be of variable len */
519 flags = ntohs(gh->flags);
520
521 /* Checksum & Offset are present */
522 if ((flags & GRE_CP) | (flags & GRE_RP))
523 hlen += 4;
524 /* We don't support routing fields (variable length) */
525 if (flags & GRE_RP) {
526 sc->sc_if.if_ierrors++;
527 return 0;
528 }
529 if (flags & GRE_KP)
530 hlen += 4;
531 if (flags & GRE_SP)
532 hlen += 4;
533
534 switch (ntohs(gh->ptype)) { /* ethertypes */
535 case ETHERTYPE_IP: /* shouldn't need a schednetisr(), as */
536 ifq = &ipintrq; /* we are in ip_input */
537 isr = NETISR_IP;
538 break;
539 #ifdef NETATALK
540 case ETHERTYPE_ATALK:
541 ifq = &atintrq1;
542 isr = NETISR_ATALK;
543 #if NBPFILTER > 0
544 af = AF_APPLETALK;
545 #endif
546 break;
547 #endif
548 #ifdef INET6
549 case ETHERTYPE_IPV6:
550 GRE_DPRINTF(sc, "%s: IPv6 packet\n", __func__);
551 ifq = &ip6intrq;
552 isr = NETISR_IPV6;
553 #if NBPFILTER > 0
554 af = AF_INET6;
555 #endif
556 break;
557 #endif
558 default: /* others not yet supported */
559 GRE_DPRINTF(sc, "%s: unhandled ethertype 0x%04x\n", __func__,
560 ntohs(gh->ptype));
561 sc->sc_if.if_noproto++;
562 return 0;
563 }
564
565 if (hlen > m->m_pkthdr.len) {
566 m_freem(m);
567 sc->sc_if.if_ierrors++;
568 return EINVAL;
569 }
570 m_adj(m, hlen);
571
572 #if NBPFILTER > 0
573 if (sc->sc_if.if_bpf != NULL)
574 bpf_mtap_af(sc->sc_if.if_bpf, af, m);
575 #endif /*NBPFILTER > 0*/
576
577 m->m_pkthdr.rcvif = &sc->sc_if;
578
579 if (!mtx_held)
580 mutex_enter(&sc->sc_mtx);
581 if (IF_QFULL(ifq)) {
582 IF_DROP(ifq);
583 m_freem(m);
584 } else {
585 IF_ENQUEUE(ifq, m);
586 }
587 /* we need schednetisr since the address family may change */
588 schednetisr(isr);
589 if (!mtx_held)
590 mutex_exit(&sc->sc_mtx);
591
592 return 1; /* packet is done, no further processing needed */
593 }
594
595 /*
596 * The output routine. Takes a packet and encapsulates it in the protocol
597 * given by sc->sc_proto. See also RFC 1701 and RFC 2004
598 */
599 static int
600 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
601 struct rtentry *rt)
602 {
603 int error = 0, hlen, msiz;
604 struct gre_softc *sc = ifp->if_softc;
605 struct greip *gi;
606 struct gre_h *gh;
607 struct ip *eip, *ip;
608 u_int8_t ip_tos = 0;
609 u_int16_t etype = 0;
610 struct mobile_h mob_h;
611
612 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
613 (IFF_UP | IFF_RUNNING) ||
614 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
615 m_freem(m);
616 error = ENETDOWN;
617 goto end;
618 }
619
620 gi = NULL;
621 ip = NULL;
622
623 #if NBPFILTER >0
624 if (ifp->if_bpf)
625 bpf_mtap_af(ifp->if_bpf, dst->sa_family, m);
626 #endif
627
628 m->m_flags &= ~(M_BCAST|M_MCAST);
629
630 switch (sc->sc_proto) {
631 case IPPROTO_MOBILE:
632 if (dst->sa_family != AF_INET) {
633 IF_DROP(&ifp->if_snd);
634 m_freem(m);
635 error = EINVAL;
636 goto end;
637 }
638
639 if (M_UNWRITABLE(m, sizeof(*ip)) &&
640 (m = m_pullup(m, sizeof(*ip))) == NULL) {
641 error = ENOBUFS;
642 goto end;
643 }
644 ip = mtod(m, struct ip *);
645
646 memset(&mob_h, 0, MOB_H_SIZ_L);
647 mob_h.proto = (ip->ip_p) << 8;
648 mob_h.odst = ip->ip_dst.s_addr;
649 ip->ip_dst.s_addr = sc->g_dst.s_addr;
650
651 /*
652 * If the packet comes from our host, we only change
653 * the destination address in the IP header.
654 * Else we also need to save and change the source
655 */
656 if (in_hosteq(ip->ip_src, sc->g_src)) {
657 msiz = MOB_H_SIZ_S;
658 } else {
659 mob_h.proto |= MOB_H_SBIT;
660 mob_h.osrc = ip->ip_src.s_addr;
661 ip->ip_src.s_addr = sc->g_src.s_addr;
662 msiz = MOB_H_SIZ_L;
663 }
664 HTONS(mob_h.proto);
665 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
666
667 M_PREPEND(m, msiz, M_DONTWAIT);
668 if (m == NULL) {
669 error = ENOBUFS;
670 goto end;
671 }
672 /* XXX Assuming that ip does not dangle after
673 * M_PREPEND. In practice, that's true, but
674 * that's not in M_PREPEND's contract.
675 */
676 memmove(mtod(m, void *), ip, sizeof(*ip));
677 ip = mtod(m, struct ip *);
678 memcpy(ip + 1, &mob_h, (size_t)msiz);
679 ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
680 break;
681 case IPPROTO_UDP:
682 case IPPROTO_GRE:
683 GRE_DPRINTF(sc, "%s: dst->sa_family=%d\n", __func__,
684 dst->sa_family);
685 switch (dst->sa_family) {
686 case AF_INET:
687 ip = mtod(m, struct ip *);
688 ip_tos = ip->ip_tos;
689 etype = ETHERTYPE_IP;
690 break;
691 #ifdef NETATALK
692 case AF_APPLETALK:
693 etype = ETHERTYPE_ATALK;
694 break;
695 #endif
696 #ifdef INET6
697 case AF_INET6:
698 etype = ETHERTYPE_IPV6;
699 break;
700 #endif
701 default:
702 IF_DROP(&ifp->if_snd);
703 m_freem(m);
704 error = EAFNOSUPPORT;
705 goto end;
706 }
707 break;
708 default:
709 IF_DROP(&ifp->if_snd);
710 m_freem(m);
711 error = EINVAL;
712 goto end;
713 }
714
715 switch (sc->sc_proto) {
716 case IPPROTO_GRE:
717 hlen = sizeof(struct greip);
718 break;
719 case IPPROTO_UDP:
720 hlen = sizeof(struct gre_h);
721 break;
722 default:
723 hlen = 0;
724 break;
725 }
726
727 M_PREPEND(m, hlen, M_DONTWAIT);
728
729 if (m == NULL) {
730 IF_DROP(&ifp->if_snd);
731 error = ENOBUFS;
732 goto end;
733 }
734
735 switch (sc->sc_proto) {
736 case IPPROTO_UDP:
737 gh = mtod(m, struct gre_h *);
738 memset(gh, 0, sizeof(*gh));
739 gh->ptype = htons(etype);
740 /* XXX Need to handle IP ToS. Look at how I handle IP TTL. */
741 break;
742 case IPPROTO_GRE:
743 gi = mtod(m, struct greip *);
744 gh = &gi->gi_g;
745 eip = &gi->gi_i;
746 /* we don't have any GRE flags for now */
747 memset(gh, 0, sizeof(*gh));
748 gh->ptype = htons(etype);
749 eip->ip_src = sc->g_src;
750 eip->ip_dst = sc->g_dst;
751 eip->ip_hl = (sizeof(struct ip)) >> 2;
752 eip->ip_ttl = ip_gre_ttl;
753 eip->ip_tos = ip_tos;
754 eip->ip_len = htons(m->m_pkthdr.len);
755 eip->ip_p = sc->sc_proto;
756 break;
757 case IPPROTO_MOBILE:
758 eip = mtod(m, struct ip *);
759 eip->ip_p = sc->sc_proto;
760 break;
761 default:
762 error = EPROTONOSUPPORT;
763 m_freem(m);
764 goto end;
765 }
766
767 ifp->if_opackets++;
768 ifp->if_obytes += m->m_pkthdr.len;
769
770 /* send it off */
771 if (sc->sc_proto == IPPROTO_UDP) {
772 if (IF_QFULL(&sc->sc_snd)) {
773 IF_DROP(&sc->sc_snd);
774 error = ENOBUFS;
775 m_freem(m);
776 } else {
777 IF_ENQUEUE(&sc->sc_snd, m);
778 gre_wakeup(sc);
779 error = 0;
780 }
781 goto end;
782 }
783 if (sc->route.ro_rt == NULL)
784 rtcache_init(&sc->route);
785 else
786 rtcache_check(&sc->route);
787 if (sc->route.ro_rt == NULL) {
788 m_freem(m);
789 goto end;
790 }
791 if (sc->route.ro_rt->rt_ifp->if_softc == sc) {
792 rtcache_clear(&sc->route);
793 m_freem(m);
794 } else
795 error = ip_output(m, NULL, &sc->route, 0, NULL, NULL);
796 end:
797 if (error)
798 ifp->if_oerrors++;
799 return error;
800 }
801
802 /* Calling thread must hold sc->sc_mtx. */
803 static int
804 gre_kick(struct gre_softc *sc)
805 {
806 int rc;
807 struct ifnet *ifp = &sc->sc_if;
808
809 if (sc->sc_proto == IPPROTO_UDP && (ifp->if_flags & IFF_UP) == IFF_UP &&
810 !sc->sc_running) {
811 sc->sc_running = 1;
812 rc = kthread_create(PRI_NONE, 0, NULL, gre_thread, sc,
813 NULL, ifp->if_xname);
814 if (rc != 0)
815 gre_stop(sc);
816 return rc;
817 } else {
818 gre_wakeup(sc);
819 return 0;
820 }
821 }
822
823 /* Calling thread must hold sc->sc_mtx. */
824 static int
825 gre_getname(struct socket *so, int req, struct mbuf *nam, struct lwp *l)
826 {
827 return (*so->so_proto->pr_usrreq)(so, req, NULL, nam, NULL, l);
828 }
829
830 /* Calling thread must hold sc->sc_mtx. */
831 static int
832 gre_getsockname(struct socket *so, struct mbuf *nam, struct lwp *l)
833 {
834 return gre_getname(so, PRU_SOCKADDR, nam, l);
835 }
836
837 /* Calling thread must hold sc->sc_mtx. */
838 static int
839 gre_getpeername(struct socket *so, struct mbuf *nam, struct lwp *l)
840 {
841 return gre_getname(so, PRU_PEERADDR, nam, l);
842 }
843
844 /* Calling thread must hold sc->sc_mtx. */
845 static int
846 gre_getnames(struct socket *so, struct lwp *l, struct sockaddr_in *src,
847 struct sockaddr_in *dst)
848 {
849 struct mbuf *m;
850 struct sockaddr_in *sin;
851 int rc;
852
853 if ((m = gre_getsockmbuf(so)) == NULL)
854 return ENOBUFS;
855
856 sin = mtod(m, struct sockaddr_in *);
857
858 if ((rc = gre_getsockname(so, m, l)) != 0)
859 goto out;
860 if (sin->sin_family != AF_INET) {
861 rc = EAFNOSUPPORT;
862 goto out;
863 }
864 *src = *sin;
865
866 if ((rc = gre_getpeername(so, m, l)) != 0)
867 goto out;
868 if (sin->sin_family != AF_INET) {
869 rc = EAFNOSUPPORT;
870 goto out;
871 }
872 *dst = *sin;
873
874 out:
875 m_freem(m);
876 return rc;
877 }
878
879 static void
880 gre_closef(struct file **fpp, struct lwp *l)
881 {
882 struct file *fp = *fpp;
883
884 simple_lock(&fp->f_slock);
885 FILE_USE(fp);
886 closef(fp, l);
887 *fpp = NULL;
888 }
889
890 static int
891 gre_ioctl(struct ifnet *ifp, u_long cmd, void *data)
892 {
893 u_char oproto;
894 struct file *fp;
895 struct socket *so;
896 struct sockaddr_in dst, src;
897 struct proc *p = curproc; /* XXX */
898 struct lwp *l = curlwp; /* XXX */
899 struct ifreq *ifr;
900 struct if_laddrreq *lifr = (struct if_laddrreq *)data;
901 struct gre_softc *sc = ifp->if_softc;
902 struct sockaddr_in si;
903 struct sockaddr *sa = NULL;
904 int error = 0;
905 #ifdef COMPAT_OIFREQ
906 u_long ocmd = cmd;
907 struct oifreq *oifr = NULL;
908 struct ifreq ifrb;
909
910 cmd = cvtcmd(cmd);
911 if (cmd != ocmd) {
912 oifr = data;
913 data = ifr = &ifrb;
914 ifreqo2n(oifr, ifr);
915 } else
916 #endif
917 ifr = data;
918
919 switch (cmd) {
920 case SIOCSIFFLAGS:
921 case SIOCSIFMTU:
922 case GRESPROTO:
923 case GRESADDRD:
924 case GRESADDRS:
925 case GRESSOCK:
926 case GREDSOCK:
927 case SIOCSLIFPHYADDR:
928 case SIOCDIFPHYADDR:
929 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
930 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
931 NULL) != 0)
932 return EPERM;
933 break;
934 default:
935 break;
936 }
937
938 mutex_enter(&sc->sc_mtx);
939 switch (cmd) {
940 case SIOCSIFADDR:
941 ifp->if_flags |= IFF_UP;
942 if ((error = gre_kick(sc)) != 0)
943 ifp->if_flags &= ~IFF_UP;
944 break;
945 case SIOCSIFDSTADDR:
946 break;
947 case SIOCSIFFLAGS:
948 oproto = sc->sc_proto;
949 switch (ifr->ifr_flags & (IFF_LINK0|IFF_LINK2)) {
950 case IFF_LINK0|IFF_LINK2:
951 sc->sc_proto = IPPROTO_UDP;
952 if (oproto != IPPROTO_UDP)
953 ifp->if_flags &= ~IFF_RUNNING;
954 error = gre_kick(sc);
955 break;
956 case IFF_LINK0:
957 sc->sc_proto = IPPROTO_GRE;
958 gre_wakeup(sc);
959 goto recompute;
960 case 0:
961 sc->sc_proto = IPPROTO_MOBILE;
962 gre_wakeup(sc);
963 goto recompute;
964 }
965 break;
966 case SIOCSIFMTU:
967 if (ifr->ifr_mtu < 576) {
968 error = EINVAL;
969 break;
970 }
971 ifp->if_mtu = ifr->ifr_mtu;
972 break;
973 case SIOCGIFMTU:
974 ifr->ifr_mtu = sc->sc_if.if_mtu;
975 break;
976 case SIOCADDMULTI:
977 case SIOCDELMULTI:
978 if (ifr == 0) {
979 error = EAFNOSUPPORT;
980 break;
981 }
982 switch (ifr->ifr_addr.sa_family) {
983 #ifdef INET
984 case AF_INET:
985 break;
986 #endif
987 #ifdef INET6
988 case AF_INET6:
989 break;
990 #endif
991 default:
992 error = EAFNOSUPPORT;
993 break;
994 }
995 break;
996 case GRESPROTO:
997 oproto = sc->sc_proto;
998 sc->sc_proto = ifr->ifr_flags;
999 switch (sc->sc_proto) {
1000 case IPPROTO_UDP:
1001 ifp->if_flags |= IFF_LINK0|IFF_LINK2;
1002 if (oproto != IPPROTO_UDP)
1003 ifp->if_flags &= ~IFF_RUNNING;
1004 error = gre_kick(sc);
1005 break;
1006 case IPPROTO_GRE:
1007 ifp->if_flags |= IFF_LINK0;
1008 ifp->if_flags &= ~IFF_LINK2;
1009 goto recompute;
1010 case IPPROTO_MOBILE:
1011 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK2);
1012 goto recompute;
1013 default:
1014 error = EPROTONOSUPPORT;
1015 break;
1016 }
1017 break;
1018 case GREGPROTO:
1019 ifr->ifr_flags = sc->sc_proto;
1020 break;
1021 case GRESADDRS:
1022 case GRESADDRD:
1023 /*
1024 * set tunnel endpoints, compute a less specific route
1025 * to the remote end and mark if as up
1026 */
1027 sa = &ifr->ifr_addr;
1028 if (cmd == GRESADDRS) {
1029 sc->g_src = (satosin(sa))->sin_addr;
1030 sc->g_srcport = satosin(sa)->sin_port;
1031 }
1032 if (cmd == GRESADDRD) {
1033 if (sc->sc_proto == IPPROTO_UDP &&
1034 satosin(sa)->sin_port == 0) {
1035 error = EINVAL;
1036 break;
1037 }
1038 sc->g_dst = (satosin(sa))->sin_addr;
1039 sc->g_dstport = satosin(sa)->sin_port;
1040 }
1041 recompute:
1042 if (sc->sc_proto == IPPROTO_UDP ||
1043 (sc->g_src.s_addr != INADDR_ANY &&
1044 sc->g_dst.s_addr != INADDR_ANY)) {
1045 rtcache_free(&sc->route);
1046 if (sc->sc_proto == IPPROTO_UDP)
1047 error = gre_kick(sc);
1048 else if (gre_compute_route(sc) == 0)
1049 ifp->if_flags |= IFF_RUNNING;
1050 else
1051 ifp->if_flags &= ~IFF_RUNNING;
1052 }
1053 break;
1054 case GREGADDRS:
1055 memset(&si, 0, sizeof(si));
1056 si.sin_family = AF_INET;
1057 si.sin_len = sizeof(struct sockaddr_in);
1058 si.sin_addr.s_addr = sc->g_src.s_addr;
1059 sa = sintosa(&si);
1060 ifr->ifr_addr = *sa;
1061 break;
1062 case GREGADDRD:
1063 memset(&si, 0, sizeof(si));
1064 si.sin_family = AF_INET;
1065 si.sin_len = sizeof(struct sockaddr_in);
1066 si.sin_addr.s_addr = sc->g_dst.s_addr;
1067 sa = sintosa(&si);
1068 ifr->ifr_addr = *sa;
1069 break;
1070 case GREDSOCK:
1071 if (sc->sc_proto != IPPROTO_UDP) {
1072 error = EINVAL;
1073 break;
1074 }
1075 ifp->if_flags &= ~IFF_UP;
1076 gre_wakeup(sc);
1077 break;
1078 case GRESSOCK:
1079 if (sc->sc_proto != IPPROTO_UDP) {
1080 error = EINVAL;
1081 break;
1082 }
1083 /* getsock() will FILE_USE() and unlock the descriptor for us */
1084 if ((error = getsock(p->p_fd, (int)ifr->ifr_value, &fp)) != 0)
1085 break;
1086 so = (struct socket *)fp->f_data;
1087 if (so->so_type != SOCK_DGRAM) {
1088 FILE_UNUSE(fp, NULL);
1089 error = EINVAL;
1090 break;
1091 }
1092 /* check address */
1093 if ((error = gre_getnames(so, curlwp, &src, &dst)) != 0) {
1094 FILE_UNUSE(fp, NULL);
1095 break;
1096 }
1097
1098 /* Increase reference count. Now that our reference
1099 * to the file descriptor is counted, this thread
1100 * can release our "use" of the descriptor, but it
1101 * will not be destroyed by some other thread's
1102 * action. This thread needs to release its use,
1103 * too, because one and only one thread can have
1104 * use of the descriptor at once. The kernel thread
1105 * will pick up the use if it needs it.
1106 */
1107
1108 fp->f_count++;
1109 FILE_UNUSE(fp, NULL);
1110
1111 while (sc->sc_fp != NULL && error == 0) {
1112 error = cv_timedwait_sig(&sc->sc_soparm_cv, &sc->sc_mtx,
1113 MAX(1, hz / 2));
1114 }
1115 if (error == 0) {
1116 sc->sc_fp = fp;
1117 ifp->if_flags |= IFF_UP;
1118 }
1119
1120 if (error != 0 || (error = gre_kick(sc)) != 0) {
1121 gre_closef(&fp, l);
1122 break;
1123 }
1124 /* fp does not any longer belong to this thread. */
1125 sc->g_src = src.sin_addr;
1126 sc->g_srcport = src.sin_port;
1127 sc->g_dst = dst.sin_addr;
1128 sc->g_dstport = dst.sin_port;
1129 break;
1130 case SIOCSLIFPHYADDR:
1131 if (lifr->addr.ss_family != AF_INET ||
1132 lifr->dstaddr.ss_family != AF_INET) {
1133 error = EAFNOSUPPORT;
1134 break;
1135 }
1136 if (lifr->addr.ss_len != sizeof(si) ||
1137 lifr->dstaddr.ss_len != sizeof(si)) {
1138 error = EINVAL;
1139 break;
1140 }
1141 sc->g_src = satosin(&lifr->addr)->sin_addr;
1142 sc->g_dst = satosin(&lifr->dstaddr)->sin_addr;
1143 sc->g_srcport = satosin(&lifr->addr)->sin_port;
1144 sc->g_dstport = satosin(&lifr->dstaddr)->sin_port;
1145 goto recompute;
1146 case SIOCDIFPHYADDR:
1147 sc->g_src.s_addr = INADDR_ANY;
1148 sc->g_dst.s_addr = INADDR_ANY;
1149 sc->g_srcport = 0;
1150 sc->g_dstport = 0;
1151 goto recompute;
1152 case SIOCGLIFPHYADDR:
1153 if (sc->g_src.s_addr == INADDR_ANY ||
1154 sc->g_dst.s_addr == INADDR_ANY) {
1155 error = EADDRNOTAVAIL;
1156 break;
1157 }
1158 memset(&si, 0, sizeof(si));
1159 si.sin_family = AF_INET;
1160 si.sin_len = sizeof(struct sockaddr_in);
1161 si.sin_addr = sc->g_src;
1162 if (sc->sc_proto == IPPROTO_UDP)
1163 si.sin_port = sc->g_srcport;
1164 memcpy(&lifr->addr, &si, sizeof(si));
1165 si.sin_addr = sc->g_dst;
1166 if (sc->sc_proto == IPPROTO_UDP)
1167 si.sin_port = sc->g_dstport;
1168 memcpy(&lifr->dstaddr, &si, sizeof(si));
1169 break;
1170 default:
1171 error = EINVAL;
1172 break;
1173 }
1174 #ifdef COMPAT_OIFREQ
1175 if (cmd != ocmd)
1176 ifreqn2o(oifr, ifr);
1177 #endif
1178 mutex_exit(&sc->sc_mtx);
1179 return error;
1180 }
1181
1182 /*
1183 * Compute a route to our destination.
1184 */
1185 static int
1186 gre_compute_route(struct gre_softc *sc)
1187 {
1188 struct route *ro;
1189 union {
1190 struct sockaddr dst;
1191 struct sockaddr_in dst4;
1192 } u;
1193
1194 ro = &sc->route;
1195
1196 memset(ro, 0, sizeof(*ro));
1197 sockaddr_in_init(&u.dst4, &sc->g_dst, 0);
1198 rtcache_setdst(ro, &u.dst);
1199
1200 rtcache_init(ro);
1201
1202 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
1203 GRE_DPRINTF(sc, "%s: route to %s %s\n", sc->sc_if.if_xname,
1204 inet_ntoa(u.dst4.sin_addr),
1205 (ro->ro_rt == NULL)
1206 ? "does not exist"
1207 : "loops back to ourself");
1208 rtcache_free(ro);
1209 return EADDRNOTAVAIL;
1210 }
1211
1212 return 0;
1213 }
1214
1215 /*
1216 * do a checksum of a buffer - much like in_cksum, which operates on
1217 * mbufs.
1218 */
1219 u_int16_t
1220 gre_in_cksum(u_int16_t *p, u_int len)
1221 {
1222 u_int32_t sum = 0;
1223 int nwords = len >> 1;
1224
1225 while (nwords-- != 0)
1226 sum += *p++;
1227
1228 if (len & 1) {
1229 union {
1230 u_short w;
1231 u_char c[2];
1232 } u;
1233 u.c[0] = *(u_char *)p;
1234 u.c[1] = 0;
1235 sum += u.w;
1236 }
1237
1238 /* end-around-carry */
1239 sum = (sum >> 16) + (sum & 0xffff);
1240 sum += (sum >> 16);
1241 return ~sum;
1242 }
1243 #endif
1244
1245 void greattach(int);
1246
1247 /* ARGSUSED */
1248 void
1249 greattach(int count)
1250 {
1251 #ifdef INET
1252 LIST_INIT(&gre_softc_list);
1253 if_clone_attach(&gre_cloner);
1254 #endif
1255 }
1256