if_gre.c revision 1.96 1 /* $NetBSD: if_gre.c,v 1.96 2007/05/29 23:32:41 christos Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * IPv6-over-GRE contributed by Gert Doering <gert (at) greenie.muc.de>
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*
42 * Encapsulate L3 protocols into IP
43 * See RFC 1701 and 1702 for more details.
44 * If_gre is compatible with Cisco GRE tunnels, so you can
45 * have a NetBSD box as the other end of a tunnel interface of a Cisco
46 * router. See gre(4) for more details.
47 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
48 */
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.96 2007/05/29 23:32:41 christos Exp $");
52
53 #include "opt_gre.h"
54 #include "opt_inet.h"
55 #include "opt_compat_netbsd.h"
56 #include "bpfilter.h"
57
58 #ifdef INET
59 #include <sys/param.h>
60 #include <sys/file.h>
61 #include <sys/filedesc.h>
62 #include <sys/malloc.h>
63 #include <sys/mbuf.h>
64 #include <sys/proc.h>
65 #include <sys/protosw.h>
66 #include <sys/socket.h>
67 #include <sys/socketvar.h>
68 #include <sys/ioctl.h>
69 #include <sys/queue.h>
70 #if __NetBSD__
71 #include <sys/systm.h>
72 #include <sys/sysctl.h>
73 #include <sys/kauth.h>
74 #endif
75
76 #include <sys/kernel.h>
77 #include <sys/mutex.h>
78 #include <sys/condvar.h>
79 #include <sys/kthread.h>
80
81 #include <machine/cpu.h>
82
83 #include <net/ethertypes.h>
84 #include <net/if.h>
85 #include <net/if_types.h>
86 #include <net/netisr.h>
87 #include <net/route.h>
88
89 #ifdef INET
90 #include <netinet/in.h>
91 #include <netinet/in_systm.h>
92 #include <netinet/in_var.h>
93 #include <netinet/ip.h>
94 #include <netinet/ip_var.h>
95 #else
96 #error "Huh? if_gre without inet?"
97 #endif
98
99
100 #ifdef NETATALK
101 #include <netatalk/at.h>
102 #include <netatalk/at_var.h>
103 #include <netatalk/at_extern.h>
104 #endif
105
106 #if NBPFILTER > 0
107 #include <sys/time.h>
108 #include <net/bpf.h>
109 #endif
110
111 #include <net/if_gre.h>
112
113 #if defined(COMPAT_09) || defined(COMPAT_10) || defined(COMPAT_11) || \
114 defined(COMPAT_12) || defined(COMPAT_13) || defined(COMPAT_14) || \
115 defined(COMPAT_15) || defined(COMPAT_16) || defined(COMPAT_20) || \
116 defined(COMPAT_30) || defined(COMPAT_40)
117 #define COMPAT_OIFREQ
118 #include <compat/sys/sockio.h>
119 #endif
120 /*
121 * It is not easy to calculate the right value for a GRE MTU.
122 * We leave this task to the admin and use the same default that
123 * other vendors use.
124 */
125 #define GREMTU 1476
126
127 #ifdef GRE_DEBUG
128 #define GRE_DPRINTF(__sc, __fmt, ...) \
129 do { \
130 if (((__sc)->sc_if.if_flags & IFF_DEBUG) != 0) \
131 printf(__fmt, __VA_ARGS__); \
132 } while (/*CONSTCOND*/0)
133 #else
134 #define GRE_DPRINTF(__sc, __fmt, ...) do { } while (/*CONSTCOND*/0)
135 #endif /* GRE_DEBUG */
136
137 struct gre_softc_head gre_softc_list;
138 int ip_gre_ttl = GRE_TTL;
139
140 static int gre_clone_create(struct if_clone *, int);
141 static int gre_clone_destroy(struct ifnet *);
142
143 static struct if_clone gre_cloner =
144 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
145
146 static int gre_output(struct ifnet *, struct mbuf *,
147 const struct sockaddr *, struct rtentry *);
148 static int gre_ioctl(struct ifnet *, u_long, void *);
149
150 static int gre_compute_route(struct gre_softc *sc);
151
152 static void gre_closef(struct file **, struct lwp *);
153 static int gre_getsockname(struct socket *, struct mbuf *, struct lwp *);
154 static int gre_getpeername(struct socket *, struct mbuf *, struct lwp *);
155 static int gre_getnames(struct socket *, struct lwp *, struct sockaddr_in *,
156 struct sockaddr_in *);
157
158 /* Calling thread must hold sc->sc_mtx. */
159 static void
160 gre_stop(struct gre_softc *sc)
161 {
162 sc->sc_running = 0;
163 cv_signal(&sc->sc_join_cv);
164 }
165
166 /* Calling thread must hold sc->sc_mtx. */
167 static void
168 gre_join(struct gre_softc *sc)
169 {
170 while (sc->sc_running != 0)
171 cv_wait(&sc->sc_join_cv, &sc->sc_mtx);
172 }
173
174 /* Calling thread must hold sc->sc_mtx. */
175 static void
176 gre_wakeup(struct gre_softc *sc)
177 {
178 GRE_DPRINTF(sc, "%s: enter\n", __func__);
179 sc->sc_haswork = 1;
180 cv_signal(&sc->sc_work_cv);
181 }
182
183 static int
184 gre_clone_create(struct if_clone *ifc, int unit)
185 {
186 struct gre_softc *sc;
187
188 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
189 memset(sc, 0, sizeof(struct gre_softc));
190 mutex_init(&sc->sc_mtx, MUTEX_DRIVER, IPL_NET);
191 cv_init(&sc->sc_work_cv, "gre work");
192 cv_init(&sc->sc_join_cv, "gre join");
193 cv_init(&sc->sc_soparm_cv, "gre soparm");
194
195 snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d",
196 ifc->ifc_name, unit);
197 sc->sc_if.if_softc = sc;
198 sc->sc_if.if_type = IFT_TUNNEL;
199 sc->sc_if.if_addrlen = 0;
200 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
201 sc->sc_if.if_dlt = DLT_NULL;
202 sc->sc_if.if_mtu = GREMTU;
203 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
204 sc->sc_if.if_output = gre_output;
205 sc->sc_if.if_ioctl = gre_ioctl;
206 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
207 sc->g_dstport = sc->g_srcport = 0;
208 sc->sc_proto = IPPROTO_GRE;
209 sc->sc_snd.ifq_maxlen = 256;
210 sc->sc_if.if_flags |= IFF_LINK0;
211 if_attach(&sc->sc_if);
212 if_alloc_sadl(&sc->sc_if);
213 #if NBPFILTER > 0
214 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
215 #endif
216 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
217 return 0;
218 }
219
220 static int
221 gre_clone_destroy(struct ifnet *ifp)
222 {
223 struct gre_softc *sc = ifp->if_softc;
224
225 LIST_REMOVE(sc, sc_list);
226 #if NBPFILTER > 0
227 bpfdetach(ifp);
228 #endif
229 if_detach(ifp);
230 mutex_enter(&sc->sc_mtx);
231 gre_wakeup(sc);
232 gre_join(sc);
233 mutex_exit(&sc->sc_mtx);
234 rtcache_free(&sc->route);
235
236 cv_destroy(&sc->sc_soparm_cv);
237 cv_destroy(&sc->sc_join_cv);
238 cv_destroy(&sc->sc_work_cv);
239 mutex_destroy(&sc->sc_mtx);
240 free(sc, M_DEVBUF);
241
242 return 0;
243 }
244
245 static void
246 gre_receive(struct socket *so, void *arg, int waitflag)
247 {
248 struct gre_softc *sc = (struct gre_softc *)arg;
249
250 GRE_DPRINTF(sc, "%s: enter\n", __func__);
251
252 gre_wakeup(sc);
253 }
254
255 static void
256 gre_upcall_add(struct socket *so, void *arg)
257 {
258 /* XXX What if the kernel already set an upcall? */
259 so->so_upcallarg = arg;
260 so->so_upcall = gre_receive;
261 so->so_rcv.sb_flags |= SB_UPCALL;
262 }
263
264 static void
265 gre_upcall_remove(struct socket *so)
266 {
267 /* XXX What if the kernel already set an upcall? */
268 so->so_rcv.sb_flags &= ~SB_UPCALL;
269 so->so_upcallarg = NULL;
270 so->so_upcall = NULL;
271 }
272
273 static void
274 gre_sodestroy(struct socket **sop)
275 {
276 gre_upcall_remove(*sop);
277 soshutdown(*sop, SHUT_RDWR);
278 soclose(*sop);
279 *sop = NULL;
280 }
281
282 static struct mbuf *
283 gre_getsockmbuf(struct socket *so)
284 {
285 struct mbuf *m;
286
287 m = m_get(M_WAIT, MT_SONAME);
288 if (m != NULL)
289 MCLAIM(m, so->so_mowner);
290 return m;
291 }
292
293 static int
294 gre_socreate1(struct gre_softc *sc, struct lwp *l, struct gre_soparm *sp,
295 struct socket **sop)
296 {
297 int rc;
298 struct mbuf *m;
299 struct sockaddr_in *sin;
300 struct socket *so;
301
302 GRE_DPRINTF(sc, "%s: enter\n", __func__);
303 rc = socreate(AF_INET, sop, SOCK_DGRAM, IPPROTO_UDP, l);
304 if (rc != 0) {
305 GRE_DPRINTF(sc, "%s: socreate failed\n", __func__);
306 return rc;
307 }
308
309 so = *sop;
310
311 gre_upcall_add(so, sc);
312 if ((m = gre_getsockmbuf(so)) == NULL) {
313 rc = ENOBUFS;
314 goto out;
315 }
316 sin = mtod(m, struct sockaddr_in *);
317 sin->sin_len = m->m_len = sizeof(struct sockaddr_in);
318 sin->sin_family = AF_INET;
319 sin->sin_addr = sc->g_src;
320 sin->sin_port = sc->g_srcport;
321
322 GRE_DPRINTF(sc, "%s: bind 0x%08" PRIx32 " port %d\n", __func__,
323 sin->sin_addr.s_addr, ntohs(sin->sin_port));
324 if ((rc = sobind(so, m, l)) != 0) {
325 GRE_DPRINTF(sc, "%s: sobind failed\n", __func__);
326 goto out;
327 }
328
329 if (sc->g_srcport == 0) {
330 if ((rc = gre_getsockname(so, m, l)) != 0) {
331 GRE_DPRINTF(sc, "%s: gre_getsockname failed\n",
332 __func__);
333 goto out;
334 }
335 sc->g_srcport = sin->sin_port;
336 }
337
338 sin->sin_addr = sc->g_dst;
339 sin->sin_port = sc->g_dstport;
340
341 if ((rc = soconnect(so, m, l)) != 0) {
342 GRE_DPRINTF(sc, "%s: soconnect failed\n", __func__);
343 goto out;
344 }
345
346 *mtod(m, int *) = ip_gre_ttl;
347 m->m_len = sizeof(int);
348 rc = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, IPPROTO_IP, IP_TTL,
349 &m);
350 m = NULL;
351 if (rc != 0) {
352 printf("%s: setopt ttl failed\n", __func__);
353 rc = 0;
354 }
355 out:
356 m_freem(m);
357
358 if (rc != 0)
359 gre_sodestroy(sop);
360 else
361 *sp = sc->sc_soparm;
362
363 return rc;
364 }
365
366 static void
367 gre_thread1(struct gre_softc *sc, struct lwp *l)
368 {
369 int flags, rc;
370 const struct gre_h *gh;
371 struct ifnet *ifp = &sc->sc_if;
372 struct mbuf *m;
373 struct socket *so = NULL;
374 struct uio uio;
375 struct gre_soparm sp;
376 struct file *fp = NULL;
377
378 GRE_DPRINTF(sc, "%s: enter\n", __func__);
379 mutex_enter(&sc->sc_mtx);
380
381 sc->sc_haswork = 1;
382
383 memset(&sp, 0, sizeof(sp));
384 memset(&uio, 0, sizeof(uio));
385
386 ifp->if_flags |= IFF_RUNNING;
387
388 for (;;) {
389 while (sc->sc_haswork == 0) {
390 GRE_DPRINTF(sc, "%s: sleeping\n", __func__);
391 cv_wait(&sc->sc_work_cv, &sc->sc_mtx);
392 }
393 sc->sc_haswork = 0;
394 GRE_DPRINTF(sc, "%s: awake\n", __func__);
395 if ((ifp->if_flags & IFF_UP) != IFF_UP) {
396 GRE_DPRINTF(sc, "%s: not up & running; exiting\n",
397 __func__);
398 break;
399 }
400 if (sc->sc_proto != IPPROTO_UDP) {
401 GRE_DPRINTF(sc, "%s: not udp; exiting\n", __func__);
402 break;
403 }
404 /* XXX optimize */
405 if (so == NULL || sc->sc_fp != NULL ||
406 memcmp(&sp, &sc->sc_soparm, sizeof(sp)) != 0) {
407 GRE_DPRINTF(sc, "%s: parameters changed\n", __func__);
408
409 if (fp != NULL) {
410 gre_closef(&fp, curlwp);
411 so = NULL;
412 } else if (so != NULL)
413 gre_sodestroy(&so);
414
415 if (sc->sc_fp != NULL) {
416 fp = sc->sc_fp;
417 sc->sc_fp = NULL;
418 so = (struct socket *)fp->f_data;
419 gre_upcall_add(so, sc);
420 sp = sc->sc_soparm;
421 } else if (gre_socreate1(sc, l, &sp, &so) != 0)
422 goto out;
423 }
424 cv_signal(&sc->sc_soparm_cv);
425 for (;;) {
426 flags = MSG_DONTWAIT;
427 uio.uio_resid = 1000000;
428 rc = (*so->so_receive)(so, NULL, &uio, &m, NULL,
429 &flags);
430 /* TBD Back off if ECONNREFUSED (indicates
431 * ICMP Port Unreachable)?
432 */
433 if (rc == EWOULDBLOCK) {
434 GRE_DPRINTF(sc, "%s: so_receive EWOULDBLOCK\n",
435 __func__);
436 break;
437 } else if (rc != 0 || m == NULL) {
438 GRE_DPRINTF(sc, "%s: rc %d m %p\n",
439 ifp->if_xname, rc, (void *)m);
440 continue;
441 } else
442 GRE_DPRINTF(sc, "%s: so_receive ok\n",
443 __func__);
444 if (m->m_len < sizeof(*gh) &&
445 (m = m_pullup(m, sizeof(*gh))) == NULL) {
446 GRE_DPRINTF(sc, "%s: m_pullup failed\n",
447 __func__);
448 continue;
449 }
450 gh = mtod(m, const struct gre_h *);
451
452 if (gre_input3(sc, m, 0, gh, 1) == 0) {
453 GRE_DPRINTF(sc, "%s: dropping unsupported\n",
454 __func__);
455 m_freem(m);
456 }
457 }
458 for (;;) {
459 IF_DEQUEUE(&sc->sc_snd, m);
460 if (m == NULL)
461 break;
462 GRE_DPRINTF(sc, "%s: dequeue\n", __func__);
463 if ((so->so_state & SS_ISCONNECTED) == 0) {
464 GRE_DPRINTF(sc, "%s: not connected\n",
465 __func__);
466 m_freem(m);
467 continue;
468 }
469 rc = (*so->so_send)(so, NULL, NULL, m, NULL, 0, l);
470 /* XXX handle ENOBUFS? */
471 if (rc != 0)
472 GRE_DPRINTF(sc, "%s: so_send failed\n",
473 __func__);
474 }
475 }
476 if (fp != NULL) {
477 GRE_DPRINTF(sc, "%s: removing upcall\n", __func__);
478 gre_upcall_remove(so);
479 } else if (so != NULL)
480 gre_sodestroy(&so);
481 out:
482 GRE_DPRINTF(sc, "%s: stopping\n", __func__);
483 if (fp != NULL)
484 gre_closef(&fp, curlwp);
485 if (sc->sc_proto == IPPROTO_UDP)
486 ifp->if_flags &= ~IFF_RUNNING;
487 while (!IF_IS_EMPTY(&sc->sc_snd)) {
488 IF_DEQUEUE(&sc->sc_snd, m);
489 m_freem(m);
490 }
491 gre_stop(sc);
492 /* must not touch sc after this! */
493 GRE_DPRINTF(sc, "%s: restore ipl\n", __func__);
494 mutex_exit(&sc->sc_mtx);
495 }
496
497 static void
498 gre_thread(void *arg)
499 {
500 struct gre_softc *sc = (struct gre_softc *)arg;
501
502 gre_thread1(sc, curlwp);
503 /* must not touch sc after this! */
504 kthread_exit(0);
505 }
506
507 /* Calling thread must hold sc->sc_mtx. */
508 int
509 gre_input3(struct gre_softc *sc, struct mbuf *m, int hlen,
510 const struct gre_h *gh, int mtx_held)
511 {
512 u_int16_t flags;
513 #if NBPFILTER > 0
514 u_int32_t af = AF_INET; /* af passed to BPF tap */
515 #endif
516 int isr;
517 struct ifqueue *ifq;
518
519 sc->sc_if.if_ipackets++;
520 sc->sc_if.if_ibytes += m->m_pkthdr.len;
521
522 hlen += sizeof(struct gre_h);
523
524 /* process GRE flags as packet can be of variable len */
525 flags = ntohs(gh->flags);
526
527 /* Checksum & Offset are present */
528 if ((flags & GRE_CP) | (flags & GRE_RP))
529 hlen += 4;
530 /* We don't support routing fields (variable length) */
531 if (flags & GRE_RP) {
532 sc->sc_if.if_ierrors++;
533 return 0;
534 }
535 if (flags & GRE_KP)
536 hlen += 4;
537 if (flags & GRE_SP)
538 hlen += 4;
539
540 switch (ntohs(gh->ptype)) { /* ethertypes */
541 case ETHERTYPE_IP: /* shouldn't need a schednetisr(), as */
542 ifq = &ipintrq; /* we are in ip_input */
543 isr = NETISR_IP;
544 break;
545 #ifdef NETATALK
546 case ETHERTYPE_ATALK:
547 ifq = &atintrq1;
548 isr = NETISR_ATALK;
549 #if NBPFILTER > 0
550 af = AF_APPLETALK;
551 #endif
552 break;
553 #endif
554 #ifdef INET6
555 case ETHERTYPE_IPV6:
556 GRE_DPRINTF(sc, "%s: IPv6 packet\n", __func__);
557 ifq = &ip6intrq;
558 isr = NETISR_IPV6;
559 #if NBPFILTER > 0
560 af = AF_INET6;
561 #endif
562 break;
563 #endif
564 default: /* others not yet supported */
565 GRE_DPRINTF(sc, "%s: unhandled ethertype 0x%04x\n", __func__,
566 ntohs(gh->ptype));
567 sc->sc_if.if_noproto++;
568 return 0;
569 }
570
571 if (hlen > m->m_pkthdr.len) {
572 m_freem(m);
573 sc->sc_if.if_ierrors++;
574 return EINVAL;
575 }
576 m_adj(m, hlen);
577
578 #if NBPFILTER > 0
579 if (sc->sc_if.if_bpf != NULL)
580 bpf_mtap_af(sc->sc_if.if_bpf, af, m);
581 #endif /*NBPFILTER > 0*/
582
583 m->m_pkthdr.rcvif = &sc->sc_if;
584
585 if (!mtx_held)
586 mutex_enter(&sc->sc_mtx);
587 if (IF_QFULL(ifq)) {
588 IF_DROP(ifq);
589 m_freem(m);
590 } else {
591 IF_ENQUEUE(ifq, m);
592 }
593 /* we need schednetisr since the address family may change */
594 schednetisr(isr);
595 if (!mtx_held)
596 mutex_exit(&sc->sc_mtx);
597
598 return 1; /* packet is done, no further processing needed */
599 }
600
601 /*
602 * The output routine. Takes a packet and encapsulates it in the protocol
603 * given by sc->sc_proto. See also RFC 1701 and RFC 2004
604 */
605 static int
606 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
607 struct rtentry *rt)
608 {
609 int error = 0, hlen, msiz;
610 struct gre_softc *sc = ifp->if_softc;
611 struct greip *gi;
612 struct gre_h *gh;
613 struct ip *eip, *ip;
614 u_int8_t ip_tos = 0;
615 u_int16_t etype = 0;
616 struct mobile_h mob_h;
617
618 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == 0 ||
619 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
620 m_freem(m);
621 error = ENETDOWN;
622 goto end;
623 }
624
625 gi = NULL;
626 ip = NULL;
627
628 #if NBPFILTER >0
629 if (ifp->if_bpf)
630 bpf_mtap_af(ifp->if_bpf, dst->sa_family, m);
631 #endif
632
633 m->m_flags &= ~(M_BCAST|M_MCAST);
634
635 switch (sc->sc_proto) {
636 case IPPROTO_MOBILE:
637 if (dst->sa_family != AF_INET) {
638 IF_DROP(&ifp->if_snd);
639 m_freem(m);
640 error = EINVAL;
641 goto end;
642 }
643
644 if (M_UNWRITABLE(m, sizeof(*ip)) &&
645 (m = m_pullup(m, sizeof(*ip))) == NULL) {
646 error = ENOBUFS;
647 goto end;
648 }
649 ip = mtod(m, struct ip *);
650
651 memset(&mob_h, 0, MOB_H_SIZ_L);
652 mob_h.proto = (ip->ip_p) << 8;
653 mob_h.odst = ip->ip_dst.s_addr;
654 ip->ip_dst.s_addr = sc->g_dst.s_addr;
655
656 /*
657 * If the packet comes from our host, we only change
658 * the destination address in the IP header.
659 * Else we also need to save and change the source
660 */
661 if (in_hosteq(ip->ip_src, sc->g_src)) {
662 msiz = MOB_H_SIZ_S;
663 } else {
664 mob_h.proto |= MOB_H_SBIT;
665 mob_h.osrc = ip->ip_src.s_addr;
666 ip->ip_src.s_addr = sc->g_src.s_addr;
667 msiz = MOB_H_SIZ_L;
668 }
669 HTONS(mob_h.proto);
670 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
671
672 M_PREPEND(m, msiz, M_DONTWAIT);
673 if (m == NULL) {
674 error = ENOBUFS;
675 goto end;
676 }
677 /* XXX Assuming that ip does not dangle after
678 * M_PREPEND. In practice, that's true, but
679 * that's not in M_PREPEND's contract.
680 */
681 memmove(mtod(m, void *), ip, sizeof(*ip));
682 ip = mtod(m, struct ip *);
683 memcpy(ip + 1, &mob_h, (size_t)msiz);
684 ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
685 break;
686 case IPPROTO_UDP:
687 case IPPROTO_GRE:
688 GRE_DPRINTF(sc, "%s: dst->sa_family=%d\n", __func__,
689 dst->sa_family);
690 switch (dst->sa_family) {
691 case AF_INET:
692 ip = mtod(m, struct ip *);
693 ip_tos = ip->ip_tos;
694 etype = ETHERTYPE_IP;
695 break;
696 #ifdef NETATALK
697 case AF_APPLETALK:
698 etype = ETHERTYPE_ATALK;
699 break;
700 #endif
701 #ifdef INET6
702 case AF_INET6:
703 etype = ETHERTYPE_IPV6;
704 break;
705 #endif
706 default:
707 IF_DROP(&ifp->if_snd);
708 m_freem(m);
709 error = EAFNOSUPPORT;
710 goto end;
711 }
712 break;
713 default:
714 IF_DROP(&ifp->if_snd);
715 m_freem(m);
716 error = EINVAL;
717 goto end;
718 }
719
720 switch (sc->sc_proto) {
721 case IPPROTO_GRE:
722 hlen = sizeof(struct greip);
723 break;
724 case IPPROTO_UDP:
725 hlen = sizeof(struct gre_h);
726 break;
727 default:
728 hlen = 0;
729 break;
730 }
731
732 M_PREPEND(m, hlen, M_DONTWAIT);
733
734 if (m == NULL) {
735 IF_DROP(&ifp->if_snd);
736 error = ENOBUFS;
737 goto end;
738 }
739
740 switch (sc->sc_proto) {
741 case IPPROTO_UDP:
742 gh = mtod(m, struct gre_h *);
743 memset(gh, 0, sizeof(*gh));
744 gh->ptype = htons(etype);
745 /* XXX Need to handle IP ToS. Look at how I handle IP TTL. */
746 break;
747 case IPPROTO_GRE:
748 gi = mtod(m, struct greip *);
749 gh = &gi->gi_g;
750 eip = &gi->gi_i;
751 /* we don't have any GRE flags for now */
752 memset(gh, 0, sizeof(*gh));
753 gh->ptype = htons(etype);
754 eip->ip_src = sc->g_src;
755 eip->ip_dst = sc->g_dst;
756 eip->ip_hl = (sizeof(struct ip)) >> 2;
757 eip->ip_ttl = ip_gre_ttl;
758 eip->ip_tos = ip_tos;
759 eip->ip_len = htons(m->m_pkthdr.len);
760 eip->ip_p = sc->sc_proto;
761 break;
762 case IPPROTO_MOBILE:
763 eip = mtod(m, struct ip *);
764 eip->ip_p = sc->sc_proto;
765 break;
766 default:
767 error = EPROTONOSUPPORT;
768 m_freem(m);
769 goto end;
770 }
771
772 ifp->if_opackets++;
773 ifp->if_obytes += m->m_pkthdr.len;
774
775 /* send it off */
776 if (sc->sc_proto == IPPROTO_UDP) {
777 if (IF_QFULL(&sc->sc_snd)) {
778 IF_DROP(&sc->sc_snd);
779 error = ENOBUFS;
780 m_freem(m);
781 } else {
782 IF_ENQUEUE(&sc->sc_snd, m);
783 gre_wakeup(sc);
784 error = 0;
785 }
786 goto end;
787 }
788 if (sc->route.ro_rt == NULL)
789 rtcache_init(&sc->route);
790 else
791 rtcache_check(&sc->route);
792 if (sc->route.ro_rt == NULL) {
793 m_freem(m);
794 goto end;
795 }
796 if (sc->route.ro_rt->rt_ifp->if_softc == sc) {
797 rtcache_clear(&sc->route);
798 m_freem(m);
799 } else
800 error = ip_output(m, NULL, &sc->route, 0, NULL, NULL);
801 end:
802 if (error)
803 ifp->if_oerrors++;
804 return error;
805 }
806
807 /* Calling thread must hold sc->sc_mtx. */
808 static int
809 gre_kick(struct gre_softc *sc)
810 {
811 int rc;
812 struct ifnet *ifp = &sc->sc_if;
813
814 if (sc->sc_proto == IPPROTO_UDP && (ifp->if_flags & IFF_UP) == IFF_UP &&
815 !sc->sc_running) {
816 sc->sc_running = 1;
817 mutex_exit(&sc->sc_mtx);
818 rc = kthread_create1(gre_thread, sc, NULL, ifp->if_xname);
819 mutex_enter(&sc->sc_mtx);
820 if (rc != 0)
821 gre_stop(sc);
822 return rc;
823 } else {
824 gre_wakeup(sc);
825 return 0;
826 }
827 }
828
829 /* Calling thread must hold sc->sc_mtx. */
830 static int
831 gre_getname(struct socket *so, int req, struct mbuf *nam, struct lwp *l)
832 {
833 return (*so->so_proto->pr_usrreq)(so, req, NULL, nam, NULL, l);
834 }
835
836 /* Calling thread must hold sc->sc_mtx. */
837 static int
838 gre_getsockname(struct socket *so, struct mbuf *nam, struct lwp *l)
839 {
840 return gre_getname(so, PRU_SOCKADDR, nam, l);
841 }
842
843 /* Calling thread must hold sc->sc_mtx. */
844 static int
845 gre_getpeername(struct socket *so, struct mbuf *nam, struct lwp *l)
846 {
847 return gre_getname(so, PRU_PEERADDR, nam, l);
848 }
849
850 /* Calling thread must hold sc->sc_mtx. */
851 static int
852 gre_getnames(struct socket *so, struct lwp *l, struct sockaddr_in *src,
853 struct sockaddr_in *dst)
854 {
855 struct mbuf *m;
856 struct sockaddr_in *sin;
857 int rc;
858
859 if ((m = gre_getsockmbuf(so)) == NULL)
860 return ENOBUFS;
861
862 sin = mtod(m, struct sockaddr_in *);
863
864 if ((rc = gre_getsockname(so, m, l)) != 0)
865 goto out;
866 if (sin->sin_family != AF_INET) {
867 rc = EAFNOSUPPORT;
868 goto out;
869 }
870 *src = *sin;
871
872 if ((rc = gre_getpeername(so, m, l)) != 0)
873 goto out;
874 if (sin->sin_family != AF_INET) {
875 rc = EAFNOSUPPORT;
876 goto out;
877 }
878 *dst = *sin;
879
880 out:
881 m_freem(m);
882 return rc;
883 }
884
885 static void
886 gre_closef(struct file **fpp, struct lwp *l)
887 {
888 struct file *fp = *fpp;
889
890 simple_lock(&fp->f_slock);
891 FILE_USE(fp);
892 closef(fp, l);
893 *fpp = NULL;
894 }
895
896 static int
897 gre_ioctl(struct ifnet *ifp, u_long cmd, void *data)
898 {
899 u_char oproto;
900 struct file *fp;
901 struct socket *so;
902 struct sockaddr_in dst, src;
903 struct proc *p = curproc; /* XXX */
904 struct lwp *l = curlwp; /* XXX */
905 struct ifreq *ifr;
906 struct if_laddrreq *lifr = (struct if_laddrreq *)data;
907 struct gre_softc *sc = ifp->if_softc;
908 struct sockaddr_in si;
909 struct sockaddr *sa = NULL;
910 int error = 0;
911 #ifdef COMPAT_OIFREQ
912 u_long ocmd = cmd;
913 struct oifreq *oifr = NULL;
914 struct ifreq ifrb;
915
916 cmd = cvtcmd(cmd);
917 if (cmd != ocmd) {
918 oifr = data;
919 data = ifr = &ifrb;
920 ifreqo2n(oifr, ifr);
921 } else
922 #endif
923 ifr = data;
924
925 switch (cmd) {
926 case SIOCSIFFLAGS:
927 case SIOCSIFMTU:
928 case GRESPROTO:
929 case GRESADDRD:
930 case GRESADDRS:
931 case GRESSOCK:
932 case GREDSOCK:
933 case SIOCSLIFPHYADDR:
934 case SIOCDIFPHYADDR:
935 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
936 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
937 NULL) != 0)
938 return EPERM;
939 break;
940 default:
941 break;
942 }
943
944 mutex_enter(&sc->sc_mtx);
945 switch (cmd) {
946 case SIOCSIFADDR:
947 ifp->if_flags |= IFF_UP;
948 if ((error = gre_kick(sc)) != 0)
949 ifp->if_flags &= ~IFF_UP;
950 break;
951 case SIOCSIFDSTADDR:
952 break;
953 case SIOCSIFFLAGS:
954 oproto = sc->sc_proto;
955 switch (ifr->ifr_flags & (IFF_LINK0|IFF_LINK2)) {
956 case IFF_LINK0|IFF_LINK2:
957 sc->sc_proto = IPPROTO_UDP;
958 if (oproto != IPPROTO_UDP)
959 ifp->if_flags &= ~IFF_RUNNING;
960 error = gre_kick(sc);
961 break;
962 case IFF_LINK0:
963 sc->sc_proto = IPPROTO_GRE;
964 gre_wakeup(sc);
965 goto recompute;
966 case 0:
967 sc->sc_proto = IPPROTO_MOBILE;
968 gre_wakeup(sc);
969 goto recompute;
970 }
971 break;
972 case SIOCSIFMTU:
973 if (ifr->ifr_mtu < 576) {
974 error = EINVAL;
975 break;
976 }
977 ifp->if_mtu = ifr->ifr_mtu;
978 break;
979 case SIOCGIFMTU:
980 ifr->ifr_mtu = sc->sc_if.if_mtu;
981 break;
982 case SIOCADDMULTI:
983 case SIOCDELMULTI:
984 if (ifr == 0) {
985 error = EAFNOSUPPORT;
986 break;
987 }
988 switch (ifr->ifr_addr.sa_family) {
989 #ifdef INET
990 case AF_INET:
991 break;
992 #endif
993 #ifdef INET6
994 case AF_INET6:
995 break;
996 #endif
997 default:
998 error = EAFNOSUPPORT;
999 break;
1000 }
1001 break;
1002 case GRESPROTO:
1003 oproto = sc->sc_proto;
1004 sc->sc_proto = ifr->ifr_flags;
1005 switch (sc->sc_proto) {
1006 case IPPROTO_UDP:
1007 ifp->if_flags |= IFF_LINK0|IFF_LINK2;
1008 if (oproto != IPPROTO_UDP)
1009 ifp->if_flags &= ~IFF_RUNNING;
1010 error = gre_kick(sc);
1011 break;
1012 case IPPROTO_GRE:
1013 ifp->if_flags |= IFF_LINK0;
1014 ifp->if_flags &= ~IFF_LINK2;
1015 goto recompute;
1016 case IPPROTO_MOBILE:
1017 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK2);
1018 goto recompute;
1019 default:
1020 error = EPROTONOSUPPORT;
1021 break;
1022 }
1023 break;
1024 case GREGPROTO:
1025 ifr->ifr_flags = sc->sc_proto;
1026 break;
1027 case GRESADDRS:
1028 case GRESADDRD:
1029 /*
1030 * set tunnel endpoints, compute a less specific route
1031 * to the remote end and mark if as up
1032 */
1033 sa = &ifr->ifr_addr;
1034 if (cmd == GRESADDRS) {
1035 sc->g_src = (satosin(sa))->sin_addr;
1036 sc->g_srcport = satosin(sa)->sin_port;
1037 }
1038 if (cmd == GRESADDRD) {
1039 if (sc->sc_proto == IPPROTO_UDP &&
1040 satosin(sa)->sin_port == 0) {
1041 error = EINVAL;
1042 break;
1043 }
1044 sc->g_dst = (satosin(sa))->sin_addr;
1045 sc->g_dstport = satosin(sa)->sin_port;
1046 }
1047 recompute:
1048 if (sc->sc_proto == IPPROTO_UDP ||
1049 (sc->g_src.s_addr != INADDR_ANY &&
1050 sc->g_dst.s_addr != INADDR_ANY)) {
1051 rtcache_free(&sc->route);
1052 if (sc->sc_proto == IPPROTO_UDP)
1053 error = gre_kick(sc);
1054 else if (gre_compute_route(sc) == 0)
1055 ifp->if_flags |= IFF_RUNNING;
1056 else
1057 ifp->if_flags &= ~IFF_RUNNING;
1058 }
1059 break;
1060 case GREGADDRS:
1061 memset(&si, 0, sizeof(si));
1062 si.sin_family = AF_INET;
1063 si.sin_len = sizeof(struct sockaddr_in);
1064 si.sin_addr.s_addr = sc->g_src.s_addr;
1065 sa = sintosa(&si);
1066 ifr->ifr_addr = *sa;
1067 break;
1068 case GREGADDRD:
1069 memset(&si, 0, sizeof(si));
1070 si.sin_family = AF_INET;
1071 si.sin_len = sizeof(struct sockaddr_in);
1072 si.sin_addr.s_addr = sc->g_dst.s_addr;
1073 sa = sintosa(&si);
1074 ifr->ifr_addr = *sa;
1075 break;
1076 case GREDSOCK:
1077 if (sc->sc_proto != IPPROTO_UDP) {
1078 error = EINVAL;
1079 break;
1080 }
1081 ifp->if_flags &= ~IFF_UP;
1082 gre_wakeup(sc);
1083 break;
1084 case GRESSOCK:
1085 if (sc->sc_proto != IPPROTO_UDP) {
1086 error = EINVAL;
1087 break;
1088 }
1089 /* getsock() will FILE_USE() and unlock the descriptor for us */
1090 if ((error = getsock(p->p_fd, (int)ifr->ifr_value, &fp)) != 0)
1091 break;
1092 so = (struct socket *)fp->f_data;
1093 if (so->so_type != SOCK_DGRAM) {
1094 FILE_UNUSE(fp, NULL);
1095 error = EINVAL;
1096 break;
1097 }
1098 /* check address */
1099 if ((error = gre_getnames(so, curlwp, &src, &dst)) != 0) {
1100 FILE_UNUSE(fp, NULL);
1101 break;
1102 }
1103
1104 /* Increase reference count. Now that our reference
1105 * to the file descriptor is counted, this thread
1106 * can release our "use" of the descriptor, but it
1107 * will not be destroyed by some other thread's
1108 * action. This thread needs to release its use,
1109 * too, because one and only one thread can have
1110 * use of the descriptor at once. The kernel thread
1111 * will pick up the use if it needs it.
1112 */
1113
1114 fp->f_count++;
1115 FILE_UNUSE(fp, NULL);
1116
1117 while (sc->sc_fp != NULL && error == 0) {
1118 error = cv_timedwait_sig(&sc->sc_soparm_cv, &sc->sc_mtx,
1119 MAX(1, hz / 2));
1120 }
1121 if (error == 0) {
1122 sc->sc_fp = fp;
1123 ifp->if_flags |= IFF_UP;
1124 }
1125
1126 if (error != 0 || (error = gre_kick(sc)) != 0) {
1127 gre_closef(&fp, l);
1128 break;
1129 }
1130 /* fp does not any longer belong to this thread. */
1131 sc->g_src = src.sin_addr;
1132 sc->g_srcport = src.sin_port;
1133 sc->g_dst = dst.sin_addr;
1134 sc->g_dstport = dst.sin_port;
1135 break;
1136 case SIOCSLIFPHYADDR:
1137 if (lifr->addr.ss_family != AF_INET ||
1138 lifr->dstaddr.ss_family != AF_INET) {
1139 error = EAFNOSUPPORT;
1140 break;
1141 }
1142 if (lifr->addr.ss_len != sizeof(si) ||
1143 lifr->dstaddr.ss_len != sizeof(si)) {
1144 error = EINVAL;
1145 break;
1146 }
1147 sc->g_src = satosin(&lifr->addr)->sin_addr;
1148 sc->g_dst = satosin(&lifr->dstaddr)->sin_addr;
1149 sc->g_srcport = satosin(&lifr->addr)->sin_port;
1150 sc->g_dstport = satosin(&lifr->dstaddr)->sin_port;
1151 goto recompute;
1152 case SIOCDIFPHYADDR:
1153 sc->g_src.s_addr = INADDR_ANY;
1154 sc->g_dst.s_addr = INADDR_ANY;
1155 sc->g_srcport = 0;
1156 sc->g_dstport = 0;
1157 goto recompute;
1158 case SIOCGLIFPHYADDR:
1159 if (sc->g_src.s_addr == INADDR_ANY ||
1160 sc->g_dst.s_addr == INADDR_ANY) {
1161 error = EADDRNOTAVAIL;
1162 break;
1163 }
1164 memset(&si, 0, sizeof(si));
1165 si.sin_family = AF_INET;
1166 si.sin_len = sizeof(struct sockaddr_in);
1167 si.sin_addr = sc->g_src;
1168 if (sc->sc_proto == IPPROTO_UDP)
1169 si.sin_port = sc->g_srcport;
1170 memcpy(&lifr->addr, &si, sizeof(si));
1171 si.sin_addr = sc->g_dst;
1172 if (sc->sc_proto == IPPROTO_UDP)
1173 si.sin_port = sc->g_dstport;
1174 memcpy(&lifr->dstaddr, &si, sizeof(si));
1175 break;
1176 default:
1177 error = EINVAL;
1178 break;
1179 }
1180 #ifdef COMPAT_OIFREQ
1181 if (cmd != ocmd)
1182 ifreqn2o(oifr, ifr);
1183 #endif
1184 mutex_exit(&sc->sc_mtx);
1185 return error;
1186 }
1187
1188 /*
1189 * Compute a route to our destination.
1190 */
1191 static int
1192 gre_compute_route(struct gre_softc *sc)
1193 {
1194 struct route *ro;
1195 union {
1196 struct sockaddr dst;
1197 struct sockaddr_in dst4;
1198 } u;
1199
1200 ro = &sc->route;
1201
1202 memset(ro, 0, sizeof(*ro));
1203 sockaddr_in_init(&u.dst4, &sc->g_dst, 0);
1204 rtcache_setdst(ro, &u.dst);
1205
1206 rtcache_init(ro);
1207
1208 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
1209 GRE_DPRINTF(sc, "%s: route to %s %s\n", sc->sc_if.if_xname,
1210 inet_ntoa(u.dst4.sin_addr),
1211 (ro->ro_rt == NULL)
1212 ? "does not exist"
1213 : "loops back to ourself");
1214 rtcache_free(ro);
1215 return EADDRNOTAVAIL;
1216 }
1217
1218 return 0;
1219 }
1220
1221 /*
1222 * do a checksum of a buffer - much like in_cksum, which operates on
1223 * mbufs.
1224 */
1225 u_int16_t
1226 gre_in_cksum(u_int16_t *p, u_int len)
1227 {
1228 u_int32_t sum = 0;
1229 int nwords = len >> 1;
1230
1231 while (nwords-- != 0)
1232 sum += *p++;
1233
1234 if (len & 1) {
1235 union {
1236 u_short w;
1237 u_char c[2];
1238 } u;
1239 u.c[0] = *(u_char *)p;
1240 u.c[1] = 0;
1241 sum += u.w;
1242 }
1243
1244 /* end-around-carry */
1245 sum = (sum >> 16) + (sum & 0xffff);
1246 sum += (sum >> 16);
1247 return ~sum;
1248 }
1249 #endif
1250
1251 void greattach(int);
1252
1253 /* ARGSUSED */
1254 void
1255 greattach(int count)
1256 {
1257 #ifdef INET
1258 LIST_INIT(&gre_softc_list);
1259 if_clone_attach(&gre_cloner);
1260 #endif
1261 }
1262