if_gre.c revision 1.93 1 /* $NetBSD: if_gre.c,v 1.93 2007/05/06 02:47:52 dyoung Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * IPv6-over-GRE contributed by Gert Doering <gert (at) greenie.muc.de>
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*
42 * Encapsulate L3 protocols into IP
43 * See RFC 1701 and 1702 for more details.
44 * If_gre is compatible with Cisco GRE tunnels, so you can
45 * have a NetBSD box as the other end of a tunnel interface of a Cisco
46 * router. See gre(4) for more details.
47 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
48 */
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.93 2007/05/06 02:47:52 dyoung Exp $");
52
53 #include "opt_gre.h"
54 #include "opt_inet.h"
55 #include "bpfilter.h"
56
57 #ifdef INET
58 #include <sys/param.h>
59 #include <sys/file.h>
60 #include <sys/filedesc.h>
61 #include <sys/malloc.h>
62 #include <sys/mbuf.h>
63 #include <sys/proc.h>
64 #include <sys/protosw.h>
65 #include <sys/socket.h>
66 #include <sys/socketvar.h>
67 #include <sys/ioctl.h>
68 #include <sys/queue.h>
69 #if __NetBSD__
70 #include <sys/systm.h>
71 #include <sys/sysctl.h>
72 #include <sys/kauth.h>
73 #endif
74
75 #include <sys/kernel.h>
76 #include <sys/mutex.h>
77 #include <sys/condvar.h>
78 #include <sys/kthread.h>
79
80 #include <machine/cpu.h>
81
82 #include <net/ethertypes.h>
83 #include <net/if.h>
84 #include <net/if_types.h>
85 #include <net/netisr.h>
86 #include <net/route.h>
87
88 #ifdef INET
89 #include <netinet/in.h>
90 #include <netinet/in_systm.h>
91 #include <netinet/in_var.h>
92 #include <netinet/ip.h>
93 #include <netinet/ip_var.h>
94 #else
95 #error "Huh? if_gre without inet?"
96 #endif
97
98
99 #ifdef NETATALK
100 #include <netatalk/at.h>
101 #include <netatalk/at_var.h>
102 #include <netatalk/at_extern.h>
103 #endif
104
105 #if NBPFILTER > 0
106 #include <sys/time.h>
107 #include <net/bpf.h>
108 #endif
109
110 #include <net/if_gre.h>
111
112 /*
113 * It is not easy to calculate the right value for a GRE MTU.
114 * We leave this task to the admin and use the same default that
115 * other vendors use.
116 */
117 #define GREMTU 1476
118
119 #ifdef GRE_DEBUG
120 #define GRE_DPRINTF(__sc, __fmt, ...) \
121 do { \
122 if (((__sc)->sc_if.if_flags & IFF_DEBUG) != 0) \
123 printf(__fmt, __VA_ARGS__); \
124 } while (/*CONSTCOND*/0)
125 #else
126 #define GRE_DPRINTF(__sc, __fmt, ...) do { } while (/*CONSTCOND*/0)
127 #endif /* GRE_DEBUG */
128
129 struct gre_softc_head gre_softc_list;
130 int ip_gre_ttl = GRE_TTL;
131
132 static int gre_clone_create(struct if_clone *, int);
133 static int gre_clone_destroy(struct ifnet *);
134
135 static struct if_clone gre_cloner =
136 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
137
138 static int gre_output(struct ifnet *, struct mbuf *,
139 const struct sockaddr *, struct rtentry *);
140 static int gre_ioctl(struct ifnet *, u_long, void *);
141
142 static int gre_compute_route(struct gre_softc *sc);
143
144 static void gre_closef(struct file **, struct lwp *);
145 static int gre_getsockname(struct socket *, struct mbuf *, struct lwp *);
146 static int gre_getpeername(struct socket *, struct mbuf *, struct lwp *);
147 static int gre_getnames(struct socket *, struct lwp *, struct sockaddr_in *,
148 struct sockaddr_in *);
149
150 /* Calling thread must hold sc->sc_mtx. */
151 static void
152 gre_stop(struct gre_softc *sc)
153 {
154 sc->sc_running = 0;
155 cv_signal(&sc->sc_join_cv);
156 }
157
158 /* Calling thread must hold sc->sc_mtx. */
159 static void
160 gre_join(struct gre_softc *sc)
161 {
162 while (sc->sc_running != 0)
163 cv_wait(&sc->sc_join_cv, &sc->sc_mtx);
164 }
165
166 /* Calling thread must hold sc->sc_mtx. */
167 static void
168 gre_wakeup(struct gre_softc *sc)
169 {
170 GRE_DPRINTF(sc, "%s: enter\n", __func__);
171 sc->sc_haswork = 1;
172 cv_signal(&sc->sc_work_cv);
173 }
174
175 static int
176 gre_clone_create(struct if_clone *ifc, int unit)
177 {
178 struct gre_softc *sc;
179
180 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
181 memset(sc, 0, sizeof(struct gre_softc));
182 mutex_init(&sc->sc_mtx, MUTEX_DRIVER, IPL_NET);
183 cv_init(&sc->sc_work_cv, "gre work");
184 cv_init(&sc->sc_join_cv, "gre join");
185 cv_init(&sc->sc_soparm_cv, "gre soparm");
186
187 snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d",
188 ifc->ifc_name, unit);
189 sc->sc_if.if_softc = sc;
190 sc->sc_if.if_type = IFT_TUNNEL;
191 sc->sc_if.if_addrlen = 0;
192 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
193 sc->sc_if.if_dlt = DLT_NULL;
194 sc->sc_if.if_mtu = GREMTU;
195 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
196 sc->sc_if.if_output = gre_output;
197 sc->sc_if.if_ioctl = gre_ioctl;
198 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
199 sc->g_dstport = sc->g_srcport = 0;
200 sc->sc_proto = IPPROTO_GRE;
201 sc->sc_snd.ifq_maxlen = 256;
202 sc->sc_if.if_flags |= IFF_LINK0;
203 if_attach(&sc->sc_if);
204 if_alloc_sadl(&sc->sc_if);
205 #if NBPFILTER > 0
206 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
207 #endif
208 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
209 return 0;
210 }
211
212 static int
213 gre_clone_destroy(struct ifnet *ifp)
214 {
215 struct gre_softc *sc = ifp->if_softc;
216
217 LIST_REMOVE(sc, sc_list);
218 #if NBPFILTER > 0
219 bpfdetach(ifp);
220 #endif
221 if_detach(ifp);
222 mutex_enter(&sc->sc_mtx);
223 gre_wakeup(sc);
224 gre_join(sc);
225 mutex_exit(&sc->sc_mtx);
226 rtcache_free(&sc->route);
227
228 cv_destroy(&sc->sc_soparm_cv);
229 cv_destroy(&sc->sc_join_cv);
230 cv_destroy(&sc->sc_work_cv);
231 mutex_destroy(&sc->sc_mtx);
232 free(sc, M_DEVBUF);
233
234 return 0;
235 }
236
237 static void
238 gre_receive(struct socket *so, void *arg, int waitflag)
239 {
240 struct gre_softc *sc = (struct gre_softc *)arg;
241
242 GRE_DPRINTF(sc, "%s: enter\n", __func__);
243
244 gre_wakeup(sc);
245 }
246
247 static void
248 gre_upcall_add(struct socket *so, void *arg)
249 {
250 /* XXX What if the kernel already set an upcall? */
251 so->so_upcallarg = arg;
252 so->so_upcall = gre_receive;
253 so->so_rcv.sb_flags |= SB_UPCALL;
254 }
255
256 static void
257 gre_upcall_remove(struct socket *so)
258 {
259 /* XXX What if the kernel already set an upcall? */
260 so->so_rcv.sb_flags &= ~SB_UPCALL;
261 so->so_upcallarg = NULL;
262 so->so_upcall = NULL;
263 }
264
265 static void
266 gre_sodestroy(struct socket **sop)
267 {
268 gre_upcall_remove(*sop);
269 soshutdown(*sop, SHUT_RDWR);
270 soclose(*sop);
271 *sop = NULL;
272 }
273
274 static struct mbuf *
275 gre_getsockmbuf(struct socket *so)
276 {
277 struct mbuf *m;
278
279 m = m_get(M_WAIT, MT_SONAME);
280 if (m != NULL)
281 MCLAIM(m, so->so_mowner);
282 return m;
283 }
284
285 static int
286 gre_socreate1(struct gre_softc *sc, struct lwp *l, struct gre_soparm *sp,
287 struct socket **sop)
288 {
289 int rc;
290 struct mbuf *m;
291 struct sockaddr_in *sin;
292 struct socket *so;
293
294 GRE_DPRINTF(sc, "%s: enter\n", __func__);
295 rc = socreate(AF_INET, sop, SOCK_DGRAM, IPPROTO_UDP, l);
296 if (rc != 0) {
297 GRE_DPRINTF(sc, "%s: socreate failed\n", __func__);
298 return rc;
299 }
300
301 so = *sop;
302
303 gre_upcall_add(so, sc);
304 if ((m = gre_getsockmbuf(so)) == NULL) {
305 rc = ENOBUFS;
306 goto out;
307 }
308 sin = mtod(m, struct sockaddr_in *);
309 sin->sin_len = m->m_len = sizeof(struct sockaddr_in);
310 sin->sin_family = AF_INET;
311 sin->sin_addr = sc->g_src;
312 sin->sin_port = sc->g_srcport;
313
314 GRE_DPRINTF(sc, "%s: bind 0x%08" PRIx32 " port %d\n", __func__,
315 sin->sin_addr.s_addr, ntohs(sin->sin_port));
316 if ((rc = sobind(so, m, l)) != 0) {
317 GRE_DPRINTF(sc, "%s: sobind failed\n", __func__);
318 goto out;
319 }
320
321 if (sc->g_srcport == 0) {
322 if ((rc = gre_getsockname(so, m, l)) != 0) {
323 GRE_DPRINTF(sc, "%s: gre_getsockname failed\n",
324 __func__);
325 goto out;
326 }
327 sc->g_srcport = sin->sin_port;
328 }
329
330 sin->sin_addr = sc->g_dst;
331 sin->sin_port = sc->g_dstport;
332
333 if ((rc = soconnect(so, m, l)) != 0) {
334 GRE_DPRINTF(sc, "%s: soconnect failed\n", __func__);
335 goto out;
336 }
337
338 *mtod(m, int *) = ip_gre_ttl;
339 m->m_len = sizeof(int);
340 rc = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, IPPROTO_IP, IP_TTL,
341 &m);
342 m = NULL;
343 if (rc != 0) {
344 printf("%s: setopt ttl failed\n", __func__);
345 rc = 0;
346 }
347 out:
348 m_freem(m);
349
350 if (rc != 0)
351 gre_sodestroy(sop);
352 else
353 *sp = sc->sc_soparm;
354
355 return rc;
356 }
357
358 static void
359 gre_thread1(struct gre_softc *sc, struct lwp *l)
360 {
361 int flags, rc;
362 const struct gre_h *gh;
363 struct ifnet *ifp = &sc->sc_if;
364 struct mbuf *m;
365 struct socket *so = NULL;
366 struct uio uio;
367 struct gre_soparm sp;
368 struct file *fp = NULL;
369
370 GRE_DPRINTF(sc, "%s: enter\n", __func__);
371 mutex_enter(&sc->sc_mtx);
372
373 sc->sc_haswork = 1;
374
375 memset(&sp, 0, sizeof(sp));
376 memset(&uio, 0, sizeof(uio));
377
378 ifp->if_flags |= IFF_RUNNING;
379
380 for (;;) {
381 while (sc->sc_haswork == 0) {
382 GRE_DPRINTF(sc, "%s: sleeping\n", __func__);
383 cv_wait(&sc->sc_work_cv, &sc->sc_mtx);
384 }
385 sc->sc_haswork = 0;
386 GRE_DPRINTF(sc, "%s: awake\n", __func__);
387 if ((ifp->if_flags & IFF_UP) != IFF_UP) {
388 GRE_DPRINTF(sc, "%s: not up & running; exiting\n",
389 __func__);
390 break;
391 }
392 if (sc->sc_proto != IPPROTO_UDP) {
393 GRE_DPRINTF(sc, "%s: not udp; exiting\n", __func__);
394 break;
395 }
396 /* XXX optimize */
397 if (so == NULL || sc->sc_fp != NULL ||
398 memcmp(&sp, &sc->sc_soparm, sizeof(sp)) != 0) {
399 GRE_DPRINTF(sc, "%s: parameters changed\n", __func__);
400
401 if (fp != NULL) {
402 gre_closef(&fp, curlwp);
403 so = NULL;
404 } else if (so != NULL)
405 gre_sodestroy(&so);
406
407 if (sc->sc_fp != NULL) {
408 fp = sc->sc_fp;
409 sc->sc_fp = NULL;
410 so = (struct socket *)fp->f_data;
411 gre_upcall_add(so, sc);
412 sp = sc->sc_soparm;
413 } else if (gre_socreate1(sc, l, &sp, &so) != 0)
414 goto out;
415 }
416 cv_signal(&sc->sc_soparm_cv);
417 for (;;) {
418 flags = MSG_DONTWAIT;
419 uio.uio_resid = 1000000;
420 rc = (*so->so_receive)(so, NULL, &uio, &m, NULL,
421 &flags);
422 /* TBD Back off if ECONNREFUSED (indicates
423 * ICMP Port Unreachable)?
424 */
425 if (rc == EWOULDBLOCK) {
426 GRE_DPRINTF(sc, "%s: so_receive EWOULDBLOCK\n",
427 __func__);
428 break;
429 } else if (rc != 0 || m == NULL) {
430 GRE_DPRINTF(sc, "%s: rc %d m %p\n",
431 ifp->if_xname, rc, (void *)m);
432 continue;
433 } else
434 GRE_DPRINTF(sc, "%s: so_receive ok\n",
435 __func__);
436 if (m->m_len < sizeof(*gh) &&
437 (m = m_pullup(m, sizeof(*gh))) == NULL) {
438 GRE_DPRINTF(sc, "%s: m_pullup failed\n",
439 __func__);
440 continue;
441 }
442 gh = mtod(m, const struct gre_h *);
443
444 if (gre_input3(sc, m, 0, gh, 1) == 0) {
445 GRE_DPRINTF(sc, "%s: dropping unsupported\n",
446 __func__);
447 m_freem(m);
448 }
449 }
450 for (;;) {
451 IF_DEQUEUE(&sc->sc_snd, m);
452 if (m == NULL)
453 break;
454 GRE_DPRINTF(sc, "%s: dequeue\n", __func__);
455 if ((so->so_state & SS_ISCONNECTED) == 0) {
456 GRE_DPRINTF(sc, "%s: not connected\n",
457 __func__);
458 m_freem(m);
459 continue;
460 }
461 rc = (*so->so_send)(so, NULL, NULL, m, NULL, 0, l);
462 /* XXX handle ENOBUFS? */
463 if (rc != 0)
464 GRE_DPRINTF(sc, "%s: so_send failed\n",
465 __func__);
466 }
467 }
468 if (fp != NULL) {
469 GRE_DPRINTF(sc, "%s: removing upcall\n", __func__);
470 gre_upcall_remove(so);
471 } else if (so != NULL)
472 gre_sodestroy(&so);
473 out:
474 GRE_DPRINTF(sc, "%s: stopping\n", __func__);
475 if (fp != NULL)
476 gre_closef(&fp, curlwp);
477 if (sc->sc_proto == IPPROTO_UDP)
478 ifp->if_flags &= ~IFF_RUNNING;
479 while (!IF_IS_EMPTY(&sc->sc_snd)) {
480 IF_DEQUEUE(&sc->sc_snd, m);
481 m_freem(m);
482 }
483 gre_stop(sc);
484 /* must not touch sc after this! */
485 GRE_DPRINTF(sc, "%s: restore ipl\n", __func__);
486 mutex_exit(&sc->sc_mtx);
487 }
488
489 static void
490 gre_thread(void *arg)
491 {
492 struct gre_softc *sc = (struct gre_softc *)arg;
493
494 gre_thread1(sc, curlwp);
495 /* must not touch sc after this! */
496 kthread_exit(0);
497 }
498
499 /* Calling thread must hold sc->sc_mtx. */
500 int
501 gre_input3(struct gre_softc *sc, struct mbuf *m, int hlen,
502 const struct gre_h *gh, int mtx_held)
503 {
504 u_int16_t flags;
505 #if NBPFILTER > 0
506 u_int32_t af = AF_INET; /* af passed to BPF tap */
507 #endif
508 int isr;
509 struct ifqueue *ifq;
510
511 sc->sc_if.if_ipackets++;
512 sc->sc_if.if_ibytes += m->m_pkthdr.len;
513
514 hlen += sizeof(struct gre_h);
515
516 /* process GRE flags as packet can be of variable len */
517 flags = ntohs(gh->flags);
518
519 /* Checksum & Offset are present */
520 if ((flags & GRE_CP) | (flags & GRE_RP))
521 hlen += 4;
522 /* We don't support routing fields (variable length) */
523 if (flags & GRE_RP) {
524 sc->sc_if.if_ierrors++;
525 return 0;
526 }
527 if (flags & GRE_KP)
528 hlen += 4;
529 if (flags & GRE_SP)
530 hlen += 4;
531
532 switch (ntohs(gh->ptype)) { /* ethertypes */
533 case ETHERTYPE_IP: /* shouldn't need a schednetisr(), as */
534 ifq = &ipintrq; /* we are in ip_input */
535 isr = NETISR_IP;
536 break;
537 #ifdef NETATALK
538 case ETHERTYPE_ATALK:
539 ifq = &atintrq1;
540 isr = NETISR_ATALK;
541 #if NBPFILTER > 0
542 af = AF_APPLETALK;
543 #endif
544 break;
545 #endif
546 #ifdef INET6
547 case ETHERTYPE_IPV6:
548 GRE_DPRINTF(sc, "%s: IPv6 packet\n", __func__);
549 ifq = &ip6intrq;
550 isr = NETISR_IPV6;
551 #if NBPFILTER > 0
552 af = AF_INET6;
553 #endif
554 break;
555 #endif
556 default: /* others not yet supported */
557 GRE_DPRINTF(sc, "%s: unhandled ethertype 0x%04x\n", __func__,
558 ntohs(gh->ptype));
559 sc->sc_if.if_noproto++;
560 return 0;
561 }
562
563 if (hlen > m->m_pkthdr.len) {
564 m_freem(m);
565 sc->sc_if.if_ierrors++;
566 return EINVAL;
567 }
568 m_adj(m, hlen);
569
570 #if NBPFILTER > 0
571 if (sc->sc_if.if_bpf != NULL)
572 bpf_mtap_af(sc->sc_if.if_bpf, af, m);
573 #endif /*NBPFILTER > 0*/
574
575 m->m_pkthdr.rcvif = &sc->sc_if;
576
577 if (!mtx_held)
578 mutex_enter(&sc->sc_mtx);
579 if (IF_QFULL(ifq)) {
580 IF_DROP(ifq);
581 m_freem(m);
582 } else {
583 IF_ENQUEUE(ifq, m);
584 }
585 /* we need schednetisr since the address family may change */
586 schednetisr(isr);
587 if (!mtx_held)
588 mutex_exit(&sc->sc_mtx);
589
590 return 1; /* packet is done, no further processing needed */
591 }
592
593 /*
594 * The output routine. Takes a packet and encapsulates it in the protocol
595 * given by sc->sc_proto. See also RFC 1701 and RFC 2004
596 */
597 static int
598 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
599 struct rtentry *rt)
600 {
601 int error = 0, hlen, msiz;
602 struct gre_softc *sc = ifp->if_softc;
603 struct greip *gi;
604 struct gre_h *gh;
605 struct ip *eip, *ip;
606 u_int8_t ip_tos = 0;
607 u_int16_t etype = 0;
608 struct mobile_h mob_h;
609
610 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == 0 ||
611 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
612 m_freem(m);
613 error = ENETDOWN;
614 goto end;
615 }
616
617 gi = NULL;
618 ip = NULL;
619
620 #if NBPFILTER >0
621 if (ifp->if_bpf)
622 bpf_mtap_af(ifp->if_bpf, dst->sa_family, m);
623 #endif
624
625 m->m_flags &= ~(M_BCAST|M_MCAST);
626
627 switch (sc->sc_proto) {
628 case IPPROTO_MOBILE:
629 if (dst->sa_family != AF_INET) {
630 IF_DROP(&ifp->if_snd);
631 m_freem(m);
632 error = EINVAL;
633 goto end;
634 }
635
636 if (M_UNWRITABLE(m, sizeof(*ip)) &&
637 (m = m_pullup(m, sizeof(*ip))) == NULL) {
638 error = ENOBUFS;
639 goto end;
640 }
641 ip = mtod(m, struct ip *);
642
643 memset(&mob_h, 0, MOB_H_SIZ_L);
644 mob_h.proto = (ip->ip_p) << 8;
645 mob_h.odst = ip->ip_dst.s_addr;
646 ip->ip_dst.s_addr = sc->g_dst.s_addr;
647
648 /*
649 * If the packet comes from our host, we only change
650 * the destination address in the IP header.
651 * Else we also need to save and change the source
652 */
653 if (in_hosteq(ip->ip_src, sc->g_src)) {
654 msiz = MOB_H_SIZ_S;
655 } else {
656 mob_h.proto |= MOB_H_SBIT;
657 mob_h.osrc = ip->ip_src.s_addr;
658 ip->ip_src.s_addr = sc->g_src.s_addr;
659 msiz = MOB_H_SIZ_L;
660 }
661 HTONS(mob_h.proto);
662 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
663
664 M_PREPEND(m, msiz, M_DONTWAIT);
665 if (m == NULL) {
666 error = ENOBUFS;
667 goto end;
668 }
669 /* XXX Assuming that ip does not dangle after
670 * M_PREPEND. In practice, that's true, but
671 * that's not in M_PREPEND's contract.
672 */
673 memmove(mtod(m, void *), ip, sizeof(*ip));
674 ip = mtod(m, struct ip *);
675 memcpy(ip + 1, &mob_h, (size_t)msiz);
676 ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
677 break;
678 case IPPROTO_UDP:
679 case IPPROTO_GRE:
680 GRE_DPRINTF(sc, "%s: dst->sa_family=%d\n", __func__,
681 dst->sa_family);
682 switch (dst->sa_family) {
683 case AF_INET:
684 ip = mtod(m, struct ip *);
685 ip_tos = ip->ip_tos;
686 etype = ETHERTYPE_IP;
687 break;
688 #ifdef NETATALK
689 case AF_APPLETALK:
690 etype = ETHERTYPE_ATALK;
691 break;
692 #endif
693 #ifdef INET6
694 case AF_INET6:
695 etype = ETHERTYPE_IPV6;
696 break;
697 #endif
698 default:
699 IF_DROP(&ifp->if_snd);
700 m_freem(m);
701 error = EAFNOSUPPORT;
702 goto end;
703 }
704 break;
705 default:
706 IF_DROP(&ifp->if_snd);
707 m_freem(m);
708 error = EINVAL;
709 goto end;
710 }
711
712 switch (sc->sc_proto) {
713 case IPPROTO_GRE:
714 hlen = sizeof(struct greip);
715 break;
716 case IPPROTO_UDP:
717 hlen = sizeof(struct gre_h);
718 break;
719 default:
720 hlen = 0;
721 break;
722 }
723
724 M_PREPEND(m, hlen, M_DONTWAIT);
725
726 if (m == NULL) {
727 IF_DROP(&ifp->if_snd);
728 error = ENOBUFS;
729 goto end;
730 }
731
732 switch (sc->sc_proto) {
733 case IPPROTO_UDP:
734 gh = mtod(m, struct gre_h *);
735 memset(gh, 0, sizeof(*gh));
736 gh->ptype = htons(etype);
737 /* XXX Need to handle IP ToS. Look at how I handle IP TTL. */
738 break;
739 case IPPROTO_GRE:
740 gi = mtod(m, struct greip *);
741 gh = &gi->gi_g;
742 eip = &gi->gi_i;
743 /* we don't have any GRE flags for now */
744 memset(gh, 0, sizeof(*gh));
745 gh->ptype = htons(etype);
746 eip->ip_src = sc->g_src;
747 eip->ip_dst = sc->g_dst;
748 eip->ip_hl = (sizeof(struct ip)) >> 2;
749 eip->ip_ttl = ip_gre_ttl;
750 eip->ip_tos = ip_tos;
751 eip->ip_len = htons(m->m_pkthdr.len);
752 eip->ip_p = sc->sc_proto;
753 break;
754 case IPPROTO_MOBILE:
755 eip = mtod(m, struct ip *);
756 eip->ip_p = sc->sc_proto;
757 break;
758 default:
759 error = EPROTONOSUPPORT;
760 m_freem(m);
761 goto end;
762 }
763
764 ifp->if_opackets++;
765 ifp->if_obytes += m->m_pkthdr.len;
766
767 /* send it off */
768 if (sc->sc_proto == IPPROTO_UDP) {
769 if (IF_QFULL(&sc->sc_snd)) {
770 IF_DROP(&sc->sc_snd);
771 error = ENOBUFS;
772 m_freem(m);
773 } else {
774 IF_ENQUEUE(&sc->sc_snd, m);
775 gre_wakeup(sc);
776 error = 0;
777 }
778 goto end;
779 }
780 if (sc->route.ro_rt == NULL)
781 rtcache_init(&sc->route);
782 else
783 rtcache_check(&sc->route);
784 if (sc->route.ro_rt == NULL) {
785 m_freem(m);
786 goto end;
787 }
788 if (sc->route.ro_rt->rt_ifp->if_softc == sc) {
789 rtcache_clear(&sc->route);
790 m_freem(m);
791 } else
792 error = ip_output(m, NULL, &sc->route, 0, NULL, NULL);
793 end:
794 if (error)
795 ifp->if_oerrors++;
796 return error;
797 }
798
799 /* Calling thread must hold sc->sc_mtx. */
800 static int
801 gre_kick(struct gre_softc *sc)
802 {
803 int rc;
804 struct ifnet *ifp = &sc->sc_if;
805
806 if (sc->sc_proto == IPPROTO_UDP && (ifp->if_flags & IFF_UP) == IFF_UP &&
807 !sc->sc_running) {
808 sc->sc_running = 1;
809 mutex_exit(&sc->sc_mtx);
810 rc = kthread_create1(gre_thread, sc, NULL, ifp->if_xname);
811 mutex_enter(&sc->sc_mtx);
812 if (rc != 0)
813 gre_stop(sc);
814 return rc;
815 } else {
816 gre_wakeup(sc);
817 return 0;
818 }
819 }
820
821 /* Calling thread must hold sc->sc_mtx. */
822 static int
823 gre_getname(struct socket *so, int req, struct mbuf *nam, struct lwp *l)
824 {
825 return (*so->so_proto->pr_usrreq)(so, req, NULL, nam, NULL, l);
826 }
827
828 /* Calling thread must hold sc->sc_mtx. */
829 static int
830 gre_getsockname(struct socket *so, struct mbuf *nam, struct lwp *l)
831 {
832 return gre_getname(so, PRU_SOCKADDR, nam, l);
833 }
834
835 /* Calling thread must hold sc->sc_mtx. */
836 static int
837 gre_getpeername(struct socket *so, struct mbuf *nam, struct lwp *l)
838 {
839 return gre_getname(so, PRU_PEERADDR, nam, l);
840 }
841
842 /* Calling thread must hold sc->sc_mtx. */
843 static int
844 gre_getnames(struct socket *so, struct lwp *l, struct sockaddr_in *src,
845 struct sockaddr_in *dst)
846 {
847 struct mbuf *m;
848 struct sockaddr_in *sin;
849 int rc;
850
851 if ((m = gre_getsockmbuf(so)) == NULL)
852 return ENOBUFS;
853
854 sin = mtod(m, struct sockaddr_in *);
855
856 if ((rc = gre_getsockname(so, m, l)) != 0)
857 goto out;
858 if (sin->sin_family != AF_INET) {
859 rc = EAFNOSUPPORT;
860 goto out;
861 }
862 *src = *sin;
863
864 if ((rc = gre_getpeername(so, m, l)) != 0)
865 goto out;
866 if (sin->sin_family != AF_INET) {
867 rc = EAFNOSUPPORT;
868 goto out;
869 }
870 *dst = *sin;
871
872 out:
873 m_freem(m);
874 return rc;
875 }
876
877 static void
878 gre_closef(struct file **fpp, struct lwp *l)
879 {
880 struct file *fp = *fpp;
881
882 simple_lock(&fp->f_slock);
883 FILE_USE(fp);
884 closef(fp, l);
885 *fpp = NULL;
886 }
887
888 static int
889 gre_ioctl(struct ifnet *ifp, u_long cmd, void *data)
890 {
891 u_char oproto;
892 struct file *fp;
893 struct socket *so;
894 struct sockaddr_in dst, src;
895 struct proc *p = curproc; /* XXX */
896 struct lwp *l = curlwp; /* XXX */
897 struct ifreq *ifr = (struct ifreq *)data;
898 struct if_laddrreq *lifr = (struct if_laddrreq *)data;
899 struct gre_softc *sc = ifp->if_softc;
900 struct sockaddr_in si;
901 struct sockaddr *sa = NULL;
902 int error = 0;
903
904 switch (cmd) {
905 case SIOCSIFFLAGS:
906 case SIOCSIFMTU:
907 case GRESPROTO:
908 case GRESADDRD:
909 case GRESADDRS:
910 case GRESSOCK:
911 case GREDSOCK:
912 case SIOCSLIFPHYADDR:
913 case SIOCDIFPHYADDR:
914 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
915 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
916 NULL) != 0)
917 return EPERM;
918 break;
919 default:
920 break;
921 }
922
923 mutex_enter(&sc->sc_mtx);
924 switch (cmd) {
925 case SIOCSIFADDR:
926 ifp->if_flags |= IFF_UP;
927 if ((error = gre_kick(sc)) != 0)
928 ifp->if_flags &= ~IFF_UP;
929 break;
930 case SIOCSIFDSTADDR:
931 break;
932 case SIOCSIFFLAGS:
933 oproto = sc->sc_proto;
934 switch (ifr->ifr_flags & (IFF_LINK0|IFF_LINK2)) {
935 case IFF_LINK0|IFF_LINK2:
936 sc->sc_proto = IPPROTO_UDP;
937 if (oproto != IPPROTO_UDP)
938 ifp->if_flags &= ~IFF_RUNNING;
939 error = gre_kick(sc);
940 break;
941 case IFF_LINK0:
942 sc->sc_proto = IPPROTO_GRE;
943 gre_wakeup(sc);
944 goto recompute;
945 case 0:
946 sc->sc_proto = IPPROTO_MOBILE;
947 gre_wakeup(sc);
948 goto recompute;
949 }
950 break;
951 case SIOCSIFMTU:
952 if (ifr->ifr_mtu < 576) {
953 error = EINVAL;
954 break;
955 }
956 ifp->if_mtu = ifr->ifr_mtu;
957 break;
958 case SIOCGIFMTU:
959 ifr->ifr_mtu = sc->sc_if.if_mtu;
960 break;
961 case SIOCADDMULTI:
962 case SIOCDELMULTI:
963 if (ifr == 0) {
964 error = EAFNOSUPPORT;
965 break;
966 }
967 switch (ifr->ifr_addr.sa_family) {
968 #ifdef INET
969 case AF_INET:
970 break;
971 #endif
972 #ifdef INET6
973 case AF_INET6:
974 break;
975 #endif
976 default:
977 error = EAFNOSUPPORT;
978 break;
979 }
980 break;
981 case GRESPROTO:
982 oproto = sc->sc_proto;
983 sc->sc_proto = ifr->ifr_flags;
984 switch (sc->sc_proto) {
985 case IPPROTO_UDP:
986 ifp->if_flags |= IFF_LINK0|IFF_LINK2;
987 if (oproto != IPPROTO_UDP)
988 ifp->if_flags &= ~IFF_RUNNING;
989 error = gre_kick(sc);
990 break;
991 case IPPROTO_GRE:
992 ifp->if_flags |= IFF_LINK0;
993 ifp->if_flags &= ~IFF_LINK2;
994 goto recompute;
995 case IPPROTO_MOBILE:
996 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK2);
997 goto recompute;
998 default:
999 error = EPROTONOSUPPORT;
1000 break;
1001 }
1002 break;
1003 case GREGPROTO:
1004 ifr->ifr_flags = sc->sc_proto;
1005 break;
1006 case GRESADDRS:
1007 case GRESADDRD:
1008 /*
1009 * set tunnel endpoints, compute a less specific route
1010 * to the remote end and mark if as up
1011 */
1012 sa = &ifr->ifr_addr;
1013 if (cmd == GRESADDRS) {
1014 sc->g_src = (satosin(sa))->sin_addr;
1015 sc->g_srcport = satosin(sa)->sin_port;
1016 }
1017 if (cmd == GRESADDRD) {
1018 if (sc->sc_proto == IPPROTO_UDP &&
1019 satosin(sa)->sin_port == 0) {
1020 error = EINVAL;
1021 break;
1022 }
1023 sc->g_dst = (satosin(sa))->sin_addr;
1024 sc->g_dstport = satosin(sa)->sin_port;
1025 }
1026 recompute:
1027 if (sc->sc_proto == IPPROTO_UDP ||
1028 (sc->g_src.s_addr != INADDR_ANY &&
1029 sc->g_dst.s_addr != INADDR_ANY)) {
1030 rtcache_free(&sc->route);
1031 if (sc->sc_proto == IPPROTO_UDP)
1032 error = gre_kick(sc);
1033 else if (gre_compute_route(sc) == 0)
1034 ifp->if_flags |= IFF_RUNNING;
1035 else
1036 ifp->if_flags &= ~IFF_RUNNING;
1037 }
1038 break;
1039 case GREGADDRS:
1040 memset(&si, 0, sizeof(si));
1041 si.sin_family = AF_INET;
1042 si.sin_len = sizeof(struct sockaddr_in);
1043 si.sin_addr.s_addr = sc->g_src.s_addr;
1044 sa = sintosa(&si);
1045 ifr->ifr_addr = *sa;
1046 break;
1047 case GREGADDRD:
1048 memset(&si, 0, sizeof(si));
1049 si.sin_family = AF_INET;
1050 si.sin_len = sizeof(struct sockaddr_in);
1051 si.sin_addr.s_addr = sc->g_dst.s_addr;
1052 sa = sintosa(&si);
1053 ifr->ifr_addr = *sa;
1054 break;
1055 case GREDSOCK:
1056 if (sc->sc_proto != IPPROTO_UDP) {
1057 error = EINVAL;
1058 break;
1059 }
1060 ifp->if_flags &= ~IFF_UP;
1061 gre_wakeup(sc);
1062 break;
1063 case GRESSOCK:
1064 if (sc->sc_proto != IPPROTO_UDP) {
1065 error = EINVAL;
1066 break;
1067 }
1068 /* getsock() will FILE_USE() and unlock the descriptor for us */
1069 if ((error = getsock(p->p_fd, (int)ifr->ifr_value, &fp)) != 0)
1070 break;
1071 so = (struct socket *)fp->f_data;
1072 if (so->so_type != SOCK_DGRAM) {
1073 FILE_UNUSE(fp, NULL);
1074 error = EINVAL;
1075 break;
1076 }
1077 /* check address */
1078 if ((error = gre_getnames(so, curlwp, &src, &dst)) != 0) {
1079 FILE_UNUSE(fp, NULL);
1080 break;
1081 }
1082
1083 /* Increase reference count. Now that our reference
1084 * to the file descriptor is counted, this thread
1085 * can release our "use" of the descriptor, but it
1086 * will not be destroyed by some other thread's
1087 * action. This thread needs to release its use,
1088 * too, because one and only one thread can have
1089 * use of the descriptor at once. The kernel thread
1090 * will pick up the use if it needs it.
1091 */
1092
1093 fp->f_count++;
1094 FILE_UNUSE(fp, NULL);
1095
1096 while (sc->sc_fp != NULL && error == 0) {
1097 error = cv_timedwait_sig(&sc->sc_soparm_cv, &sc->sc_mtx,
1098 MAX(1, hz / 2));
1099 }
1100 if (error == 0) {
1101 sc->sc_fp = fp;
1102 ifp->if_flags |= IFF_UP;
1103 }
1104
1105 if (error != 0 || (error = gre_kick(sc)) != 0) {
1106 gre_closef(&fp, l);
1107 break;
1108 }
1109 /* fp does not any longer belong to this thread. */
1110 sc->g_src = src.sin_addr;
1111 sc->g_srcport = src.sin_port;
1112 sc->g_dst = dst.sin_addr;
1113 sc->g_dstport = dst.sin_port;
1114 break;
1115 case SIOCSLIFPHYADDR:
1116 if (lifr->addr.ss_family != AF_INET ||
1117 lifr->dstaddr.ss_family != AF_INET) {
1118 error = EAFNOSUPPORT;
1119 break;
1120 }
1121 if (lifr->addr.ss_len != sizeof(si) ||
1122 lifr->dstaddr.ss_len != sizeof(si)) {
1123 error = EINVAL;
1124 break;
1125 }
1126 sc->g_src = satosin(&lifr->addr)->sin_addr;
1127 sc->g_dst = satosin(&lifr->dstaddr)->sin_addr;
1128 sc->g_srcport = satosin(&lifr->addr)->sin_port;
1129 sc->g_dstport = satosin(&lifr->dstaddr)->sin_port;
1130 goto recompute;
1131 case SIOCDIFPHYADDR:
1132 sc->g_src.s_addr = INADDR_ANY;
1133 sc->g_dst.s_addr = INADDR_ANY;
1134 sc->g_srcport = 0;
1135 sc->g_dstport = 0;
1136 goto recompute;
1137 case SIOCGLIFPHYADDR:
1138 if (sc->g_src.s_addr == INADDR_ANY ||
1139 sc->g_dst.s_addr == INADDR_ANY) {
1140 error = EADDRNOTAVAIL;
1141 break;
1142 }
1143 memset(&si, 0, sizeof(si));
1144 si.sin_family = AF_INET;
1145 si.sin_len = sizeof(struct sockaddr_in);
1146 si.sin_addr = sc->g_src;
1147 if (sc->sc_proto == IPPROTO_UDP)
1148 si.sin_port = sc->g_srcport;
1149 memcpy(&lifr->addr, &si, sizeof(si));
1150 si.sin_addr = sc->g_dst;
1151 if (sc->sc_proto == IPPROTO_UDP)
1152 si.sin_port = sc->g_dstport;
1153 memcpy(&lifr->dstaddr, &si, sizeof(si));
1154 break;
1155 default:
1156 error = EINVAL;
1157 break;
1158 }
1159 mutex_exit(&sc->sc_mtx);
1160 return error;
1161 }
1162
1163 /*
1164 * Compute a route to our destination.
1165 */
1166 static int
1167 gre_compute_route(struct gre_softc *sc)
1168 {
1169 struct route *ro;
1170 union {
1171 struct sockaddr dst;
1172 struct sockaddr_in dst4;
1173 } u;
1174
1175 ro = &sc->route;
1176
1177 memset(ro, 0, sizeof(*ro));
1178 sockaddr_in_init(&u.dst4, &sc->g_dst, 0);
1179 rtcache_setdst(ro, &u.dst);
1180
1181 rtcache_init(ro);
1182
1183 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
1184 GRE_DPRINTF(sc, "%s: route to %s %s\n", sc->sc_if.if_xname,
1185 inet_ntoa(u.dst4.sin_addr),
1186 (ro->ro_rt == NULL)
1187 ? "does not exist"
1188 : "loops back to ourself");
1189 rtcache_free(ro);
1190 return EADDRNOTAVAIL;
1191 }
1192
1193 return 0;
1194 }
1195
1196 /*
1197 * do a checksum of a buffer - much like in_cksum, which operates on
1198 * mbufs.
1199 */
1200 u_int16_t
1201 gre_in_cksum(u_int16_t *p, u_int len)
1202 {
1203 u_int32_t sum = 0;
1204 int nwords = len >> 1;
1205
1206 while (nwords-- != 0)
1207 sum += *p++;
1208
1209 if (len & 1) {
1210 union {
1211 u_short w;
1212 u_char c[2];
1213 } u;
1214 u.c[0] = *(u_char *)p;
1215 u.c[1] = 0;
1216 sum += u.w;
1217 }
1218
1219 /* end-around-carry */
1220 sum = (sum >> 16) + (sum & 0xffff);
1221 sum += (sum >> 16);
1222 return ~sum;
1223 }
1224 #endif
1225
1226 void greattach(int);
1227
1228 /* ARGSUSED */
1229 void
1230 greattach(int count)
1231 {
1232 #ifdef INET
1233 LIST_INIT(&gre_softc_list);
1234 if_clone_attach(&gre_cloner);
1235 #endif
1236 }
1237