if_gre.c revision 1.88.2.5 1 /* $NetBSD: if_gre.c,v 1.88.2.5 2007/06/08 14:17:35 ad Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * IPv6-over-GRE contributed by Gert Doering <gert (at) greenie.muc.de>
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*
42 * Encapsulate L3 protocols into IP
43 * See RFC 1701 and 1702 for more details.
44 * If_gre is compatible with Cisco GRE tunnels, so you can
45 * have a NetBSD box as the other end of a tunnel interface of a Cisco
46 * router. See gre(4) for more details.
47 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
48 */
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.88.2.5 2007/06/08 14:17:35 ad Exp $");
52
53 #include "opt_gre.h"
54 #include "opt_inet.h"
55 #include "bpfilter.h"
56
57 #ifdef INET
58 #include <sys/param.h>
59 #include <sys/file.h>
60 #include <sys/filedesc.h>
61 #include <sys/malloc.h>
62 #include <sys/mbuf.h>
63 #include <sys/proc.h>
64 #include <sys/protosw.h>
65 #include <sys/socket.h>
66 #include <sys/socketvar.h>
67 #include <sys/ioctl.h>
68 #include <sys/queue.h>
69 #if __NetBSD__
70 #include <sys/systm.h>
71 #include <sys/sysctl.h>
72 #include <sys/kauth.h>
73 #endif
74
75 #include <sys/kernel.h>
76 #include <sys/mutex.h>
77 #include <sys/condvar.h>
78 #include <sys/kthread.h>
79
80 #include <machine/cpu.h>
81
82 #include <net/ethertypes.h>
83 #include <net/if.h>
84 #include <net/if_types.h>
85 #include <net/netisr.h>
86 #include <net/route.h>
87
88 #ifdef INET
89 #include <netinet/in.h>
90 #include <netinet/in_systm.h>
91 #include <netinet/in_var.h>
92 #include <netinet/ip.h>
93 #include <netinet/ip_var.h>
94 #else
95 #error "Huh? if_gre without inet?"
96 #endif
97
98
99 #ifdef NETATALK
100 #include <netatalk/at.h>
101 #include <netatalk/at_var.h>
102 #include <netatalk/at_extern.h>
103 #endif
104
105 #if NBPFILTER > 0
106 #include <sys/time.h>
107 #include <net/bpf.h>
108 #endif
109
110 #include <net/if_gre.h>
111
112 /*
113 * It is not easy to calculate the right value for a GRE MTU.
114 * We leave this task to the admin and use the same default that
115 * other vendors use.
116 */
117 #define GREMTU 1476
118
119 #ifdef GRE_DEBUG
120 #define GRE_DPRINTF(__sc, __fmt, ...) \
121 do { \
122 if (((__sc)->sc_if.if_flags & IFF_DEBUG) != 0) \
123 printf(__fmt, __VA_ARGS__); \
124 } while (/*CONSTCOND*/0)
125 #else
126 #define GRE_DPRINTF(__sc, __fmt, ...) do { } while (/*CONSTCOND*/0)
127 #endif /* GRE_DEBUG */
128
129 struct gre_softc_head gre_softc_list;
130 int ip_gre_ttl = GRE_TTL;
131
132 static int gre_clone_create(struct if_clone *, int);
133 static int gre_clone_destroy(struct ifnet *);
134
135 static struct if_clone gre_cloner =
136 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
137
138 static int gre_output(struct ifnet *, struct mbuf *,
139 const struct sockaddr *, struct rtentry *);
140 static int gre_ioctl(struct ifnet *, u_long, void *);
141
142 static int gre_compute_route(struct gre_softc *sc);
143
144 static void gre_closef(struct file **, struct lwp *);
145 static int gre_getsockname(struct socket *, struct mbuf *, struct lwp *);
146 static int gre_getpeername(struct socket *, struct mbuf *, struct lwp *);
147 static int gre_getnames(struct socket *, struct lwp *, struct sockaddr_in *,
148 struct sockaddr_in *);
149
150 /* Calling thread must hold sc->sc_mtx. */
151 static void
152 gre_stop(struct gre_softc *sc)
153 {
154 sc->sc_running = 0;
155 cv_signal(&sc->sc_join_cv);
156 }
157
158 /* Calling thread must hold sc->sc_mtx. */
159 static void
160 gre_join(struct gre_softc *sc)
161 {
162 while (sc->sc_running != 0)
163 cv_wait(&sc->sc_join_cv, &sc->sc_mtx);
164 }
165
166 /* Calling thread must hold sc->sc_mtx. */
167 static void
168 gre_wakeup(struct gre_softc *sc)
169 {
170 GRE_DPRINTF(sc, "%s: enter\n", __func__);
171 sc->sc_haswork = 1;
172 cv_signal(&sc->sc_work_cv);
173 }
174
175 static int
176 gre_clone_create(struct if_clone *ifc, int unit)
177 {
178 struct gre_softc *sc;
179
180 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
181 memset(sc, 0, sizeof(struct gre_softc));
182 mutex_init(&sc->sc_mtx, MUTEX_DRIVER, IPL_NET);
183 cv_init(&sc->sc_work_cv, "gre work");
184 cv_init(&sc->sc_join_cv, "gre join");
185 cv_init(&sc->sc_soparm_cv, "gre soparm");
186
187 snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d",
188 ifc->ifc_name, unit);
189 sc->sc_if.if_softc = sc;
190 sc->sc_if.if_type = IFT_TUNNEL;
191 sc->sc_if.if_addrlen = 0;
192 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
193 sc->sc_if.if_dlt = DLT_NULL;
194 sc->sc_if.if_mtu = GREMTU;
195 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
196 sc->sc_if.if_output = gre_output;
197 sc->sc_if.if_ioctl = gre_ioctl;
198 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
199 sc->g_dstport = sc->g_srcport = 0;
200 sc->sc_proto = IPPROTO_GRE;
201 sc->sc_snd.ifq_maxlen = 256;
202 sc->sc_if.if_flags |= IFF_LINK0;
203 if_attach(&sc->sc_if);
204 if_alloc_sadl(&sc->sc_if);
205 #if NBPFILTER > 0
206 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
207 #endif
208 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
209 return 0;
210 }
211
212 static int
213 gre_clone_destroy(struct ifnet *ifp)
214 {
215 struct gre_softc *sc = ifp->if_softc;
216
217 LIST_REMOVE(sc, sc_list);
218 #if NBPFILTER > 0
219 bpfdetach(ifp);
220 #endif
221 if_detach(ifp);
222 mutex_enter(&sc->sc_mtx);
223 gre_wakeup(sc);
224 gre_join(sc);
225 mutex_exit(&sc->sc_mtx);
226 rtcache_free(&sc->route);
227
228 cv_destroy(&sc->sc_soparm_cv);
229 cv_destroy(&sc->sc_join_cv);
230 cv_destroy(&sc->sc_work_cv);
231 mutex_destroy(&sc->sc_mtx);
232 free(sc, M_DEVBUF);
233
234 return 0;
235 }
236
237 static void
238 gre_receive(struct socket *so, void *arg, int waitflag)
239 {
240 struct gre_softc *sc = (struct gre_softc *)arg;
241
242 GRE_DPRINTF(sc, "%s: enter\n", __func__);
243
244 gre_wakeup(sc);
245 }
246
247 static void
248 gre_upcall_add(struct socket *so, void *arg)
249 {
250 /* XXX What if the kernel already set an upcall? */
251 so->so_upcallarg = arg;
252 so->so_upcall = gre_receive;
253 so->so_rcv.sb_flags |= SB_UPCALL;
254 }
255
256 static void
257 gre_upcall_remove(struct socket *so)
258 {
259 /* XXX What if the kernel already set an upcall? */
260 so->so_rcv.sb_flags &= ~SB_UPCALL;
261 so->so_upcallarg = NULL;
262 so->so_upcall = NULL;
263 }
264
265 static void
266 gre_sodestroy(struct socket **sop)
267 {
268 gre_upcall_remove(*sop);
269 soshutdown(*sop, SHUT_RDWR);
270 soclose(*sop);
271 *sop = NULL;
272 }
273
274 static struct mbuf *
275 gre_getsockmbuf(struct socket *so)
276 {
277 struct mbuf *m;
278
279 m = m_get(M_WAIT, MT_SONAME);
280 if (m != NULL)
281 MCLAIM(m, so->so_mowner);
282 return m;
283 }
284
285 static int
286 gre_socreate1(struct gre_softc *sc, struct lwp *l, struct gre_soparm *sp,
287 struct socket **sop)
288 {
289 int rc;
290 struct mbuf *m;
291 struct sockaddr_in *sin;
292 struct socket *so;
293
294 GRE_DPRINTF(sc, "%s: enter\n", __func__);
295 rc = socreate(AF_INET, sop, SOCK_DGRAM, IPPROTO_UDP, l);
296 if (rc != 0) {
297 GRE_DPRINTF(sc, "%s: socreate failed\n", __func__);
298 return rc;
299 }
300
301 so = *sop;
302
303 gre_upcall_add(so, sc);
304 if ((m = gre_getsockmbuf(so)) == NULL) {
305 rc = ENOBUFS;
306 goto out;
307 }
308 sin = mtod(m, struct sockaddr_in *);
309 sin->sin_len = m->m_len = sizeof(struct sockaddr_in);
310 sin->sin_family = AF_INET;
311 sin->sin_addr = sc->g_src;
312 sin->sin_port = sc->g_srcport;
313
314 GRE_DPRINTF(sc, "%s: bind 0x%08" PRIx32 " port %d\n", __func__,
315 sin->sin_addr.s_addr, ntohs(sin->sin_port));
316 if ((rc = sobind(so, m, l)) != 0) {
317 GRE_DPRINTF(sc, "%s: sobind failed\n", __func__);
318 goto out;
319 }
320
321 if (sc->g_srcport == 0) {
322 if ((rc = gre_getsockname(so, m, l)) != 0) {
323 GRE_DPRINTF(sc, "%s: gre_getsockname failed\n",
324 __func__);
325 goto out;
326 }
327 sc->g_srcport = sin->sin_port;
328 }
329
330 sin->sin_addr = sc->g_dst;
331 sin->sin_port = sc->g_dstport;
332
333 if ((rc = soconnect(so, m, l)) != 0) {
334 GRE_DPRINTF(sc, "%s: soconnect failed\n", __func__);
335 goto out;
336 }
337
338 *mtod(m, int *) = ip_gre_ttl;
339 m->m_len = sizeof(int);
340 rc = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, IPPROTO_IP, IP_TTL,
341 &m);
342 m = NULL;
343 if (rc != 0) {
344 printf("%s: setopt ttl failed\n", __func__);
345 rc = 0;
346 }
347 out:
348 m_freem(m);
349
350 if (rc != 0)
351 gre_sodestroy(sop);
352 else
353 *sp = sc->sc_soparm;
354
355 return rc;
356 }
357
358 static void
359 gre_thread1(struct gre_softc *sc, struct lwp *l)
360 {
361 int flags, rc;
362 const struct gre_h *gh;
363 struct ifnet *ifp = &sc->sc_if;
364 struct mbuf *m;
365 struct socket *so = NULL;
366 struct uio uio;
367 struct gre_soparm sp;
368 struct file *fp = NULL;
369
370 GRE_DPRINTF(sc, "%s: enter\n", __func__);
371 mutex_enter(&sc->sc_mtx);
372
373 sc->sc_haswork = 1;
374
375 memset(&sp, 0, sizeof(sp));
376 memset(&uio, 0, sizeof(uio));
377
378 ifp->if_flags |= IFF_RUNNING;
379
380 for (;;) {
381 while (sc->sc_haswork == 0) {
382 GRE_DPRINTF(sc, "%s: sleeping\n", __func__);
383 cv_wait(&sc->sc_work_cv, &sc->sc_mtx);
384 }
385 sc->sc_haswork = 0;
386 GRE_DPRINTF(sc, "%s: awake\n", __func__);
387 if ((ifp->if_flags & IFF_UP) != IFF_UP) {
388 GRE_DPRINTF(sc, "%s: not up & running; exiting\n",
389 __func__);
390 break;
391 }
392 if (sc->sc_proto != IPPROTO_UDP) {
393 GRE_DPRINTF(sc, "%s: not udp; exiting\n", __func__);
394 break;
395 }
396 /* XXX optimize */
397 if (so == NULL || sc->sc_fp != NULL ||
398 memcmp(&sp, &sc->sc_soparm, sizeof(sp)) != 0) {
399 GRE_DPRINTF(sc, "%s: parameters changed\n", __func__);
400
401 if (fp != NULL) {
402 gre_closef(&fp, curlwp);
403 so = NULL;
404 } else if (so != NULL)
405 gre_sodestroy(&so);
406
407 if (sc->sc_fp != NULL) {
408 fp = sc->sc_fp;
409 sc->sc_fp = NULL;
410 so = (struct socket *)fp->f_data;
411 gre_upcall_add(so, sc);
412 sp = sc->sc_soparm;
413 } else if (gre_socreate1(sc, l, &sp, &so) != 0)
414 goto out;
415 }
416 cv_signal(&sc->sc_soparm_cv);
417 for (;;) {
418 flags = MSG_DONTWAIT;
419 uio.uio_resid = 1000000;
420 rc = (*so->so_receive)(so, NULL, &uio, &m, NULL,
421 &flags);
422 /* TBD Back off if ECONNREFUSED (indicates
423 * ICMP Port Unreachable)?
424 */
425 if (rc == EWOULDBLOCK) {
426 GRE_DPRINTF(sc, "%s: so_receive EWOULDBLOCK\n",
427 __func__);
428 break;
429 } else if (rc != 0 || m == NULL) {
430 GRE_DPRINTF(sc, "%s: rc %d m %p\n",
431 ifp->if_xname, rc, (void *)m);
432 continue;
433 } else
434 GRE_DPRINTF(sc, "%s: so_receive ok\n",
435 __func__);
436 if (m->m_len < sizeof(*gh) &&
437 (m = m_pullup(m, sizeof(*gh))) == NULL) {
438 GRE_DPRINTF(sc, "%s: m_pullup failed\n",
439 __func__);
440 continue;
441 }
442 gh = mtod(m, const struct gre_h *);
443
444 if (gre_input3(sc, m, 0, gh, 1) == 0) {
445 GRE_DPRINTF(sc, "%s: dropping unsupported\n",
446 __func__);
447 m_freem(m);
448 }
449 }
450 for (;;) {
451 IF_DEQUEUE(&sc->sc_snd, m);
452 if (m == NULL)
453 break;
454 GRE_DPRINTF(sc, "%s: dequeue\n", __func__);
455 if ((so->so_state & SS_ISCONNECTED) == 0) {
456 GRE_DPRINTF(sc, "%s: not connected\n",
457 __func__);
458 m_freem(m);
459 continue;
460 }
461 rc = (*so->so_send)(so, NULL, NULL, m, NULL, 0, l);
462 /* XXX handle ENOBUFS? */
463 if (rc != 0)
464 GRE_DPRINTF(sc, "%s: so_send failed\n",
465 __func__);
466 }
467 }
468 if (fp != NULL) {
469 GRE_DPRINTF(sc, "%s: removing upcall\n", __func__);
470 gre_upcall_remove(so);
471 } else if (so != NULL)
472 gre_sodestroy(&so);
473 out:
474 GRE_DPRINTF(sc, "%s: stopping\n", __func__);
475 if (fp != NULL)
476 gre_closef(&fp, curlwp);
477 if (sc->sc_proto == IPPROTO_UDP)
478 ifp->if_flags &= ~IFF_RUNNING;
479 while (!IF_IS_EMPTY(&sc->sc_snd)) {
480 IF_DEQUEUE(&sc->sc_snd, m);
481 m_freem(m);
482 }
483 gre_stop(sc);
484 /* must not touch sc after this! */
485 GRE_DPRINTF(sc, "%s: restore ipl\n", __func__);
486 mutex_exit(&sc->sc_mtx);
487 }
488
489 static void
490 gre_thread(void *arg)
491 {
492 struct gre_softc *sc = (struct gre_softc *)arg;
493
494 gre_thread1(sc, curlwp);
495 /* must not touch sc after this! */
496 kthread_exit(0);
497 }
498
499 /* Calling thread must hold sc->sc_mtx. */
500 int
501 gre_input3(struct gre_softc *sc, struct mbuf *m, int hlen,
502 const struct gre_h *gh, int mtx_held)
503 {
504 u_int16_t flags;
505 #if NBPFILTER > 0
506 u_int32_t af = AF_INET; /* af passed to BPF tap */
507 #endif
508 int isr;
509 struct ifqueue *ifq;
510
511 sc->sc_if.if_ipackets++;
512 sc->sc_if.if_ibytes += m->m_pkthdr.len;
513
514 hlen += sizeof(struct gre_h);
515
516 /* process GRE flags as packet can be of variable len */
517 flags = ntohs(gh->flags);
518
519 /* Checksum & Offset are present */
520 if ((flags & GRE_CP) | (flags & GRE_RP))
521 hlen += 4;
522 /* We don't support routing fields (variable length) */
523 if (flags & GRE_RP) {
524 sc->sc_if.if_ierrors++;
525 return 0;
526 }
527 if (flags & GRE_KP)
528 hlen += 4;
529 if (flags & GRE_SP)
530 hlen += 4;
531
532 switch (ntohs(gh->ptype)) { /* ethertypes */
533 case ETHERTYPE_IP: /* shouldn't need a schednetisr(), as */
534 ifq = &ipintrq; /* we are in ip_input */
535 isr = NETISR_IP;
536 break;
537 #ifdef NETATALK
538 case ETHERTYPE_ATALK:
539 ifq = &atintrq1;
540 isr = NETISR_ATALK;
541 #if NBPFILTER > 0
542 af = AF_APPLETALK;
543 #endif
544 break;
545 #endif
546 #ifdef INET6
547 case ETHERTYPE_IPV6:
548 GRE_DPRINTF(sc, "%s: IPv6 packet\n", __func__);
549 ifq = &ip6intrq;
550 isr = NETISR_IPV6;
551 #if NBPFILTER > 0
552 af = AF_INET6;
553 #endif
554 break;
555 #endif
556 default: /* others not yet supported */
557 GRE_DPRINTF(sc, "%s: unhandled ethertype 0x%04x\n", __func__,
558 ntohs(gh->ptype));
559 sc->sc_if.if_noproto++;
560 return 0;
561 }
562
563 if (hlen > m->m_pkthdr.len) {
564 m_freem(m);
565 sc->sc_if.if_ierrors++;
566 return EINVAL;
567 }
568 m_adj(m, hlen);
569
570 #if NBPFILTER > 0
571 if (sc->sc_if.if_bpf != NULL)
572 bpf_mtap_af(sc->sc_if.if_bpf, af, m);
573 #endif /*NBPFILTER > 0*/
574
575 m->m_pkthdr.rcvif = &sc->sc_if;
576
577 if (!mtx_held)
578 mutex_enter(&sc->sc_mtx);
579 if (IF_QFULL(ifq)) {
580 IF_DROP(ifq);
581 m_freem(m);
582 } else {
583 IF_ENQUEUE(ifq, m);
584 }
585 /* we need schednetisr since the address family may change */
586 schednetisr(isr);
587 if (!mtx_held)
588 mutex_exit(&sc->sc_mtx);
589
590 return 1; /* packet is done, no further processing needed */
591 }
592
593 /*
594 * The output routine. Takes a packet and encapsulates it in the protocol
595 * given by sc->sc_proto. See also RFC 1701 and RFC 2004
596 */
597 static int
598 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
599 struct rtentry *rt)
600 {
601 int error = 0, hlen, msiz;
602 struct gre_softc *sc = ifp->if_softc;
603 struct greip *gi;
604 struct gre_h *gh;
605 struct ip *eip, *ip;
606 u_int8_t ip_tos = 0;
607 u_int16_t etype = 0;
608 struct mobile_h mob_h;
609
610 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == 0 ||
611 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
612 m_freem(m);
613 error = ENETDOWN;
614 goto end;
615 }
616
617 gi = NULL;
618 ip = NULL;
619
620 #if NBPFILTER >0
621 if (ifp->if_bpf)
622 bpf_mtap_af(ifp->if_bpf, dst->sa_family, m);
623 #endif
624
625 m->m_flags &= ~(M_BCAST|M_MCAST);
626
627 switch (sc->sc_proto) {
628 case IPPROTO_MOBILE:
629 if (dst->sa_family != AF_INET) {
630 IF_DROP(&ifp->if_snd);
631 m_freem(m);
632 error = EINVAL;
633 goto end;
634 }
635
636 if (M_UNWRITABLE(m, sizeof(*ip)) &&
637 (m = m_pullup(m, sizeof(*ip))) == NULL) {
638 error = ENOBUFS;
639 goto end;
640 }
641 ip = mtod(m, struct ip *);
642
643 memset(&mob_h, 0, MOB_H_SIZ_L);
644 mob_h.proto = (ip->ip_p) << 8;
645 mob_h.odst = ip->ip_dst.s_addr;
646 ip->ip_dst.s_addr = sc->g_dst.s_addr;
647
648 /*
649 * If the packet comes from our host, we only change
650 * the destination address in the IP header.
651 * Else we also need to save and change the source
652 */
653 if (in_hosteq(ip->ip_src, sc->g_src)) {
654 msiz = MOB_H_SIZ_S;
655 } else {
656 mob_h.proto |= MOB_H_SBIT;
657 mob_h.osrc = ip->ip_src.s_addr;
658 ip->ip_src.s_addr = sc->g_src.s_addr;
659 msiz = MOB_H_SIZ_L;
660 }
661 HTONS(mob_h.proto);
662 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
663
664 M_PREPEND(m, msiz, M_DONTWAIT);
665 if (m == NULL) {
666 error = ENOBUFS;
667 goto end;
668 }
669 /* XXX Assuming that ip does not dangle after
670 * M_PREPEND. In practice, that's true, but
671 * that's not in M_PREPEND's contract.
672 */
673 memmove(mtod(m, void *), ip, sizeof(*ip));
674 ip = mtod(m, struct ip *);
675 memcpy(ip + 1, &mob_h, (size_t)msiz);
676 ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
677 break;
678 case IPPROTO_UDP:
679 case IPPROTO_GRE:
680 GRE_DPRINTF(sc, "%s: dst->sa_family=%d\n", __func__,
681 dst->sa_family);
682 switch (dst->sa_family) {
683 case AF_INET:
684 ip = mtod(m, struct ip *);
685 ip_tos = ip->ip_tos;
686 etype = ETHERTYPE_IP;
687 break;
688 #ifdef NETATALK
689 case AF_APPLETALK:
690 etype = ETHERTYPE_ATALK;
691 break;
692 #endif
693 #ifdef INET6
694 case AF_INET6:
695 etype = ETHERTYPE_IPV6;
696 break;
697 #endif
698 default:
699 IF_DROP(&ifp->if_snd);
700 m_freem(m);
701 error = EAFNOSUPPORT;
702 goto end;
703 }
704 break;
705 default:
706 IF_DROP(&ifp->if_snd);
707 m_freem(m);
708 error = EINVAL;
709 goto end;
710 }
711
712 switch (sc->sc_proto) {
713 case IPPROTO_GRE:
714 hlen = sizeof(struct greip);
715 break;
716 case IPPROTO_UDP:
717 hlen = sizeof(struct gre_h);
718 break;
719 default:
720 hlen = 0;
721 break;
722 }
723
724 M_PREPEND(m, hlen, M_DONTWAIT);
725
726 if (m == NULL) {
727 IF_DROP(&ifp->if_snd);
728 error = ENOBUFS;
729 goto end;
730 }
731
732 switch (sc->sc_proto) {
733 case IPPROTO_UDP:
734 gh = mtod(m, struct gre_h *);
735 memset(gh, 0, sizeof(*gh));
736 gh->ptype = htons(etype);
737 /* XXX Need to handle IP ToS. Look at how I handle IP TTL. */
738 break;
739 case IPPROTO_GRE:
740 gi = mtod(m, struct greip *);
741 gh = &gi->gi_g;
742 eip = &gi->gi_i;
743 /* we don't have any GRE flags for now */
744 memset(gh, 0, sizeof(*gh));
745 gh->ptype = htons(etype);
746 eip->ip_src = sc->g_src;
747 eip->ip_dst = sc->g_dst;
748 eip->ip_hl = (sizeof(struct ip)) >> 2;
749 eip->ip_ttl = ip_gre_ttl;
750 eip->ip_tos = ip_tos;
751 eip->ip_len = htons(m->m_pkthdr.len);
752 eip->ip_p = sc->sc_proto;
753 break;
754 case IPPROTO_MOBILE:
755 eip = mtod(m, struct ip *);
756 eip->ip_p = sc->sc_proto;
757 break;
758 default:
759 error = EPROTONOSUPPORT;
760 m_freem(m);
761 goto end;
762 }
763
764 ifp->if_opackets++;
765 ifp->if_obytes += m->m_pkthdr.len;
766
767 /* send it off */
768 if (sc->sc_proto == IPPROTO_UDP) {
769 if (IF_QFULL(&sc->sc_snd)) {
770 IF_DROP(&sc->sc_snd);
771 error = ENOBUFS;
772 m_freem(m);
773 } else {
774 IF_ENQUEUE(&sc->sc_snd, m);
775 gre_wakeup(sc);
776 error = 0;
777 }
778 goto end;
779 }
780 if (sc->route.ro_rt == NULL)
781 rtcache_init(&sc->route);
782 else
783 rtcache_check(&sc->route);
784 if (sc->route.ro_rt == NULL) {
785 m_freem(m);
786 goto end;
787 }
788 if (sc->route.ro_rt->rt_ifp->if_softc == sc) {
789 rtcache_clear(&sc->route);
790 m_freem(m);
791 } else
792 error = ip_output(m, NULL, &sc->route, 0, NULL, NULL);
793 end:
794 if (error)
795 ifp->if_oerrors++;
796 return error;
797 }
798
799 /* Calling thread must hold sc->sc_mtx. */
800 static int
801 gre_kick(struct gre_softc *sc)
802 {
803 int rc;
804 struct ifnet *ifp = &sc->sc_if;
805
806 if (sc->sc_proto == IPPROTO_UDP && (ifp->if_flags & IFF_UP) == IFF_UP &&
807 !sc->sc_running) {
808 sc->sc_running = 1;
809 rc = kthread_create(PRI_NONE, 0, NULL, gre_thread, sc,
810 NULL, ifp->if_xname);
811 if (rc != 0)
812 gre_stop(sc);
813 return rc;
814 } else {
815 gre_wakeup(sc);
816 return 0;
817 }
818 }
819
820 /* Calling thread must hold sc->sc_mtx. */
821 static int
822 gre_getname(struct socket *so, int req, struct mbuf *nam, struct lwp *l)
823 {
824 return (*so->so_proto->pr_usrreq)(so, req, NULL, nam, NULL, l);
825 }
826
827 /* Calling thread must hold sc->sc_mtx. */
828 static int
829 gre_getsockname(struct socket *so, struct mbuf *nam, struct lwp *l)
830 {
831 return gre_getname(so, PRU_SOCKADDR, nam, l);
832 }
833
834 /* Calling thread must hold sc->sc_mtx. */
835 static int
836 gre_getpeername(struct socket *so, struct mbuf *nam, struct lwp *l)
837 {
838 return gre_getname(so, PRU_PEERADDR, nam, l);
839 }
840
841 /* Calling thread must hold sc->sc_mtx. */
842 static int
843 gre_getnames(struct socket *so, struct lwp *l, struct sockaddr_in *src,
844 struct sockaddr_in *dst)
845 {
846 struct mbuf *m;
847 struct sockaddr_in *sin;
848 int rc;
849
850 if ((m = gre_getsockmbuf(so)) == NULL)
851 return ENOBUFS;
852
853 sin = mtod(m, struct sockaddr_in *);
854
855 if ((rc = gre_getsockname(so, m, l)) != 0)
856 goto out;
857 if (sin->sin_family != AF_INET) {
858 rc = EAFNOSUPPORT;
859 goto out;
860 }
861 *src = *sin;
862
863 if ((rc = gre_getpeername(so, m, l)) != 0)
864 goto out;
865 if (sin->sin_family != AF_INET) {
866 rc = EAFNOSUPPORT;
867 goto out;
868 }
869 *dst = *sin;
870
871 out:
872 m_freem(m);
873 return rc;
874 }
875
876 static void
877 gre_closef(struct file **fpp, struct lwp *l)
878 {
879 struct file *fp = *fpp;
880
881 simple_lock(&fp->f_slock);
882 FILE_USE(fp);
883 closef(fp, l);
884 *fpp = NULL;
885 }
886
887 static int
888 gre_ioctl(struct ifnet *ifp, u_long cmd, void *data)
889 {
890 u_char oproto;
891 struct file *fp;
892 struct socket *so;
893 struct sockaddr_in dst, src;
894 struct proc *p = curproc; /* XXX */
895 struct lwp *l = curlwp; /* XXX */
896 struct ifreq *ifr = (struct ifreq *)data;
897 struct if_laddrreq *lifr = (struct if_laddrreq *)data;
898 struct gre_softc *sc = ifp->if_softc;
899 struct sockaddr_in si;
900 struct sockaddr *sa = NULL;
901 int error = 0;
902
903 switch (cmd) {
904 case SIOCSIFFLAGS:
905 case SIOCSIFMTU:
906 case GRESPROTO:
907 case GRESADDRD:
908 case GRESADDRS:
909 case GRESSOCK:
910 case GREDSOCK:
911 case SIOCSLIFPHYADDR:
912 case SIOCDIFPHYADDR:
913 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
914 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
915 NULL) != 0)
916 return EPERM;
917 break;
918 default:
919 break;
920 }
921
922 mutex_enter(&sc->sc_mtx);
923 switch (cmd) {
924 case SIOCSIFADDR:
925 ifp->if_flags |= IFF_UP;
926 if ((error = gre_kick(sc)) != 0)
927 ifp->if_flags &= ~IFF_UP;
928 break;
929 case SIOCSIFDSTADDR:
930 break;
931 case SIOCSIFFLAGS:
932 oproto = sc->sc_proto;
933 switch (ifr->ifr_flags & (IFF_LINK0|IFF_LINK2)) {
934 case IFF_LINK0|IFF_LINK2:
935 sc->sc_proto = IPPROTO_UDP;
936 if (oproto != IPPROTO_UDP)
937 ifp->if_flags &= ~IFF_RUNNING;
938 error = gre_kick(sc);
939 break;
940 case IFF_LINK0:
941 sc->sc_proto = IPPROTO_GRE;
942 gre_wakeup(sc);
943 goto recompute;
944 case 0:
945 sc->sc_proto = IPPROTO_MOBILE;
946 gre_wakeup(sc);
947 goto recompute;
948 }
949 break;
950 case SIOCSIFMTU:
951 if (ifr->ifr_mtu < 576) {
952 error = EINVAL;
953 break;
954 }
955 ifp->if_mtu = ifr->ifr_mtu;
956 break;
957 case SIOCGIFMTU:
958 ifr->ifr_mtu = sc->sc_if.if_mtu;
959 break;
960 case SIOCADDMULTI:
961 case SIOCDELMULTI:
962 if (ifr == 0) {
963 error = EAFNOSUPPORT;
964 break;
965 }
966 switch (ifr->ifr_addr.sa_family) {
967 #ifdef INET
968 case AF_INET:
969 break;
970 #endif
971 #ifdef INET6
972 case AF_INET6:
973 break;
974 #endif
975 default:
976 error = EAFNOSUPPORT;
977 break;
978 }
979 break;
980 case GRESPROTO:
981 oproto = sc->sc_proto;
982 sc->sc_proto = ifr->ifr_flags;
983 switch (sc->sc_proto) {
984 case IPPROTO_UDP:
985 ifp->if_flags |= IFF_LINK0|IFF_LINK2;
986 if (oproto != IPPROTO_UDP)
987 ifp->if_flags &= ~IFF_RUNNING;
988 error = gre_kick(sc);
989 break;
990 case IPPROTO_GRE:
991 ifp->if_flags |= IFF_LINK0;
992 ifp->if_flags &= ~IFF_LINK2;
993 goto recompute;
994 case IPPROTO_MOBILE:
995 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK2);
996 goto recompute;
997 default:
998 error = EPROTONOSUPPORT;
999 break;
1000 }
1001 break;
1002 case GREGPROTO:
1003 ifr->ifr_flags = sc->sc_proto;
1004 break;
1005 case GRESADDRS:
1006 case GRESADDRD:
1007 /*
1008 * set tunnel endpoints, compute a less specific route
1009 * to the remote end and mark if as up
1010 */
1011 sa = &ifr->ifr_addr;
1012 if (cmd == GRESADDRS) {
1013 sc->g_src = (satosin(sa))->sin_addr;
1014 sc->g_srcport = satosin(sa)->sin_port;
1015 }
1016 if (cmd == GRESADDRD) {
1017 if (sc->sc_proto == IPPROTO_UDP &&
1018 satosin(sa)->sin_port == 0) {
1019 error = EINVAL;
1020 break;
1021 }
1022 sc->g_dst = (satosin(sa))->sin_addr;
1023 sc->g_dstport = satosin(sa)->sin_port;
1024 }
1025 recompute:
1026 if (sc->sc_proto == IPPROTO_UDP ||
1027 (sc->g_src.s_addr != INADDR_ANY &&
1028 sc->g_dst.s_addr != INADDR_ANY)) {
1029 rtcache_free(&sc->route);
1030 if (sc->sc_proto == IPPROTO_UDP)
1031 error = gre_kick(sc);
1032 else if (gre_compute_route(sc) == 0)
1033 ifp->if_flags |= IFF_RUNNING;
1034 else
1035 ifp->if_flags &= ~IFF_RUNNING;
1036 }
1037 break;
1038 case GREGADDRS:
1039 memset(&si, 0, sizeof(si));
1040 si.sin_family = AF_INET;
1041 si.sin_len = sizeof(struct sockaddr_in);
1042 si.sin_addr.s_addr = sc->g_src.s_addr;
1043 sa = sintosa(&si);
1044 ifr->ifr_addr = *sa;
1045 break;
1046 case GREGADDRD:
1047 memset(&si, 0, sizeof(si));
1048 si.sin_family = AF_INET;
1049 si.sin_len = sizeof(struct sockaddr_in);
1050 si.sin_addr.s_addr = sc->g_dst.s_addr;
1051 sa = sintosa(&si);
1052 ifr->ifr_addr = *sa;
1053 break;
1054 case GREDSOCK:
1055 if (sc->sc_proto != IPPROTO_UDP) {
1056 error = EINVAL;
1057 break;
1058 }
1059 ifp->if_flags &= ~IFF_UP;
1060 gre_wakeup(sc);
1061 break;
1062 case GRESSOCK:
1063 if (sc->sc_proto != IPPROTO_UDP) {
1064 error = EINVAL;
1065 break;
1066 }
1067 /* getsock() will FILE_USE() and unlock the descriptor for us */
1068 if ((error = getsock(p->p_fd, (int)ifr->ifr_value, &fp)) != 0)
1069 break;
1070 so = (struct socket *)fp->f_data;
1071 if (so->so_type != SOCK_DGRAM) {
1072 FILE_UNUSE(fp, NULL);
1073 error = EINVAL;
1074 break;
1075 }
1076 /* check address */
1077 if ((error = gre_getnames(so, curlwp, &src, &dst)) != 0) {
1078 FILE_UNUSE(fp, NULL);
1079 break;
1080 }
1081
1082 /* Increase reference count. Now that our reference
1083 * to the file descriptor is counted, this thread
1084 * can release our "use" of the descriptor, but it
1085 * will not be destroyed by some other thread's
1086 * action. This thread needs to release its use,
1087 * too, because one and only one thread can have
1088 * use of the descriptor at once. The kernel thread
1089 * will pick up the use if it needs it.
1090 */
1091
1092 fp->f_count++;
1093 FILE_UNUSE(fp, NULL);
1094
1095 while (sc->sc_fp != NULL && error == 0) {
1096 error = cv_timedwait_sig(&sc->sc_soparm_cv, &sc->sc_mtx,
1097 MAX(1, hz / 2));
1098 }
1099 if (error == 0) {
1100 sc->sc_fp = fp;
1101 ifp->if_flags |= IFF_UP;
1102 }
1103
1104 if (error != 0 || (error = gre_kick(sc)) != 0) {
1105 gre_closef(&fp, l);
1106 break;
1107 }
1108 /* fp does not any longer belong to this thread. */
1109 sc->g_src = src.sin_addr;
1110 sc->g_srcport = src.sin_port;
1111 sc->g_dst = dst.sin_addr;
1112 sc->g_dstport = dst.sin_port;
1113 break;
1114 case SIOCSLIFPHYADDR:
1115 if (lifr->addr.ss_family != AF_INET ||
1116 lifr->dstaddr.ss_family != AF_INET) {
1117 error = EAFNOSUPPORT;
1118 break;
1119 }
1120 if (lifr->addr.ss_len != sizeof(si) ||
1121 lifr->dstaddr.ss_len != sizeof(si)) {
1122 error = EINVAL;
1123 break;
1124 }
1125 sc->g_src = satosin(&lifr->addr)->sin_addr;
1126 sc->g_dst = satosin(&lifr->dstaddr)->sin_addr;
1127 sc->g_srcport = satosin(&lifr->addr)->sin_port;
1128 sc->g_dstport = satosin(&lifr->dstaddr)->sin_port;
1129 goto recompute;
1130 case SIOCDIFPHYADDR:
1131 sc->g_src.s_addr = INADDR_ANY;
1132 sc->g_dst.s_addr = INADDR_ANY;
1133 sc->g_srcport = 0;
1134 sc->g_dstport = 0;
1135 goto recompute;
1136 case SIOCGLIFPHYADDR:
1137 if (sc->g_src.s_addr == INADDR_ANY ||
1138 sc->g_dst.s_addr == INADDR_ANY) {
1139 error = EADDRNOTAVAIL;
1140 break;
1141 }
1142 memset(&si, 0, sizeof(si));
1143 si.sin_family = AF_INET;
1144 si.sin_len = sizeof(struct sockaddr_in);
1145 si.sin_addr = sc->g_src;
1146 if (sc->sc_proto == IPPROTO_UDP)
1147 si.sin_port = sc->g_srcport;
1148 memcpy(&lifr->addr, &si, sizeof(si));
1149 si.sin_addr = sc->g_dst;
1150 if (sc->sc_proto == IPPROTO_UDP)
1151 si.sin_port = sc->g_dstport;
1152 memcpy(&lifr->dstaddr, &si, sizeof(si));
1153 break;
1154 default:
1155 error = EINVAL;
1156 break;
1157 }
1158 mutex_exit(&sc->sc_mtx);
1159 return error;
1160 }
1161
1162 /*
1163 * Compute a route to our destination.
1164 */
1165 static int
1166 gre_compute_route(struct gre_softc *sc)
1167 {
1168 struct route *ro;
1169 union {
1170 struct sockaddr dst;
1171 struct sockaddr_in dst4;
1172 } u;
1173
1174 ro = &sc->route;
1175
1176 memset(ro, 0, sizeof(*ro));
1177 sockaddr_in_init(&u.dst4, &sc->g_dst, 0);
1178 rtcache_setdst(ro, &u.dst);
1179
1180 rtcache_init(ro);
1181
1182 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
1183 GRE_DPRINTF(sc, "%s: route to %s %s\n", sc->sc_if.if_xname,
1184 inet_ntoa(u.dst4.sin_addr),
1185 (ro->ro_rt == NULL)
1186 ? "does not exist"
1187 : "loops back to ourself");
1188 rtcache_free(ro);
1189 return EADDRNOTAVAIL;
1190 }
1191
1192 return 0;
1193 }
1194
1195 /*
1196 * do a checksum of a buffer - much like in_cksum, which operates on
1197 * mbufs.
1198 */
1199 u_int16_t
1200 gre_in_cksum(u_int16_t *p, u_int len)
1201 {
1202 u_int32_t sum = 0;
1203 int nwords = len >> 1;
1204
1205 while (nwords-- != 0)
1206 sum += *p++;
1207
1208 if (len & 1) {
1209 union {
1210 u_short w;
1211 u_char c[2];
1212 } u;
1213 u.c[0] = *(u_char *)p;
1214 u.c[1] = 0;
1215 sum += u.w;
1216 }
1217
1218 /* end-around-carry */
1219 sum = (sum >> 16) + (sum & 0xffff);
1220 sum += (sum >> 16);
1221 return ~sum;
1222 }
1223 #endif
1224
1225 void greattach(int);
1226
1227 /* ARGSUSED */
1228 void
1229 greattach(int count)
1230 {
1231 #ifdef INET
1232 LIST_INIT(&gre_softc_list);
1233 if_clone_attach(&gre_cloner);
1234 #endif
1235 }
1236