if_gre.c revision 1.88.2.6 1 /* $NetBSD: if_gre.c,v 1.88.2.6 2007/06/09 23:58:11 ad Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * IPv6-over-GRE contributed by Gert Doering <gert (at) greenie.muc.de>
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*
42 * Encapsulate L3 protocols into IP
43 * See RFC 1701 and 1702 for more details.
44 * If_gre is compatible with Cisco GRE tunnels, so you can
45 * have a NetBSD box as the other end of a tunnel interface of a Cisco
46 * router. See gre(4) for more details.
47 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
48 */
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.88.2.6 2007/06/09 23:58:11 ad Exp $");
52
53 #include "opt_gre.h"
54 #include "opt_inet.h"
55 #include "bpfilter.h"
56
57 #ifdef INET
58 #include <sys/param.h>
59 #include <sys/file.h>
60 #include <sys/filedesc.h>
61 #include <sys/malloc.h>
62 #include <sys/mbuf.h>
63 #include <sys/proc.h>
64 #include <sys/protosw.h>
65 #include <sys/socket.h>
66 #include <sys/socketvar.h>
67 #include <sys/ioctl.h>
68 #include <sys/queue.h>
69 #if __NetBSD__
70 #include <sys/systm.h>
71 #include <sys/sysctl.h>
72 #include <sys/kauth.h>
73 #endif
74
75 #include <sys/kernel.h>
76 #include <sys/mutex.h>
77 #include <sys/condvar.h>
78 #include <sys/kthread.h>
79
80 #include <machine/cpu.h>
81
82 #include <net/ethertypes.h>
83 #include <net/if.h>
84 #include <net/if_types.h>
85 #include <net/netisr.h>
86 #include <net/route.h>
87
88 #ifdef INET
89 #include <netinet/in.h>
90 #include <netinet/in_systm.h>
91 #include <netinet/in_var.h>
92 #include <netinet/ip.h>
93 #include <netinet/ip_var.h>
94 #else
95 #error "Huh? if_gre without inet?"
96 #endif
97
98
99 #ifdef NETATALK
100 #include <netatalk/at.h>
101 #include <netatalk/at_var.h>
102 #include <netatalk/at_extern.h>
103 #endif
104
105 #if NBPFILTER > 0
106 #include <sys/time.h>
107 #include <net/bpf.h>
108 #endif
109
110 #include <net/if_gre.h>
111
112 #include <compat/sys/sockio.h>
113 /*
114 * It is not easy to calculate the right value for a GRE MTU.
115 * We leave this task to the admin and use the same default that
116 * other vendors use.
117 */
118 #define GREMTU 1476
119
120 #ifdef GRE_DEBUG
121 #define GRE_DPRINTF(__sc, __fmt, ...) \
122 do { \
123 if (((__sc)->sc_if.if_flags & IFF_DEBUG) != 0) \
124 printf(__fmt, __VA_ARGS__); \
125 } while (/*CONSTCOND*/0)
126 #else
127 #define GRE_DPRINTF(__sc, __fmt, ...) do { } while (/*CONSTCOND*/0)
128 #endif /* GRE_DEBUG */
129
130 struct gre_softc_head gre_softc_list;
131 int ip_gre_ttl = GRE_TTL;
132
133 static int gre_clone_create(struct if_clone *, int);
134 static int gre_clone_destroy(struct ifnet *);
135
136 static struct if_clone gre_cloner =
137 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
138
139 static int gre_output(struct ifnet *, struct mbuf *,
140 const struct sockaddr *, struct rtentry *);
141 static int gre_ioctl(struct ifnet *, u_long, void *);
142
143 static int gre_compute_route(struct gre_softc *sc);
144
145 static void gre_closef(struct file **, struct lwp *);
146 static int gre_getsockname(struct socket *, struct mbuf *, struct lwp *);
147 static int gre_getpeername(struct socket *, struct mbuf *, struct lwp *);
148 static int gre_getnames(struct socket *, struct lwp *, struct sockaddr_in *,
149 struct sockaddr_in *);
150
151 /* Calling thread must hold sc->sc_mtx. */
152 static void
153 gre_stop(struct gre_softc *sc)
154 {
155 sc->sc_running = 0;
156 cv_signal(&sc->sc_join_cv);
157 }
158
159 /* Calling thread must hold sc->sc_mtx. */
160 static void
161 gre_join(struct gre_softc *sc)
162 {
163 while (sc->sc_running != 0)
164 cv_wait(&sc->sc_join_cv, &sc->sc_mtx);
165 }
166
167 /* Calling thread must hold sc->sc_mtx. */
168 static void
169 gre_wakeup(struct gre_softc *sc)
170 {
171 GRE_DPRINTF(sc, "%s: enter\n", __func__);
172 sc->sc_haswork = 1;
173 cv_signal(&sc->sc_work_cv);
174 }
175
176 static int
177 gre_clone_create(struct if_clone *ifc, int unit)
178 {
179 struct gre_softc *sc;
180
181 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
182 memset(sc, 0, sizeof(struct gre_softc));
183 mutex_init(&sc->sc_mtx, MUTEX_DRIVER, IPL_NET);
184 cv_init(&sc->sc_work_cv, "gre work");
185 cv_init(&sc->sc_join_cv, "gre join");
186 cv_init(&sc->sc_soparm_cv, "gre soparm");
187
188 snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d",
189 ifc->ifc_name, unit);
190 sc->sc_if.if_softc = sc;
191 sc->sc_if.if_type = IFT_TUNNEL;
192 sc->sc_if.if_addrlen = 0;
193 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
194 sc->sc_if.if_dlt = DLT_NULL;
195 sc->sc_if.if_mtu = GREMTU;
196 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
197 sc->sc_if.if_output = gre_output;
198 sc->sc_if.if_ioctl = gre_ioctl;
199 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
200 sc->g_dstport = sc->g_srcport = 0;
201 sc->sc_proto = IPPROTO_GRE;
202 sc->sc_snd.ifq_maxlen = 256;
203 sc->sc_if.if_flags |= IFF_LINK0;
204 if_attach(&sc->sc_if);
205 if_alloc_sadl(&sc->sc_if);
206 #if NBPFILTER > 0
207 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
208 #endif
209 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
210 return 0;
211 }
212
213 static int
214 gre_clone_destroy(struct ifnet *ifp)
215 {
216 struct gre_softc *sc = ifp->if_softc;
217
218 LIST_REMOVE(sc, sc_list);
219 #if NBPFILTER > 0
220 bpfdetach(ifp);
221 #endif
222 if_detach(ifp);
223 mutex_enter(&sc->sc_mtx);
224 gre_wakeup(sc);
225 gre_join(sc);
226 mutex_exit(&sc->sc_mtx);
227 rtcache_free(&sc->route);
228
229 cv_destroy(&sc->sc_soparm_cv);
230 cv_destroy(&sc->sc_join_cv);
231 cv_destroy(&sc->sc_work_cv);
232 mutex_destroy(&sc->sc_mtx);
233 free(sc, M_DEVBUF);
234
235 return 0;
236 }
237
238 static void
239 gre_receive(struct socket *so, void *arg, int waitflag)
240 {
241 struct gre_softc *sc = (struct gre_softc *)arg;
242
243 GRE_DPRINTF(sc, "%s: enter\n", __func__);
244
245 gre_wakeup(sc);
246 }
247
248 static void
249 gre_upcall_add(struct socket *so, void *arg)
250 {
251 /* XXX What if the kernel already set an upcall? */
252 so->so_upcallarg = arg;
253 so->so_upcall = gre_receive;
254 so->so_rcv.sb_flags |= SB_UPCALL;
255 }
256
257 static void
258 gre_upcall_remove(struct socket *so)
259 {
260 /* XXX What if the kernel already set an upcall? */
261 so->so_rcv.sb_flags &= ~SB_UPCALL;
262 so->so_upcallarg = NULL;
263 so->so_upcall = NULL;
264 }
265
266 static void
267 gre_sodestroy(struct socket **sop)
268 {
269 gre_upcall_remove(*sop);
270 soshutdown(*sop, SHUT_RDWR);
271 soclose(*sop);
272 *sop = NULL;
273 }
274
275 static struct mbuf *
276 gre_getsockmbuf(struct socket *so)
277 {
278 struct mbuf *m;
279
280 m = m_get(M_WAIT, MT_SONAME);
281 if (m != NULL)
282 MCLAIM(m, so->so_mowner);
283 return m;
284 }
285
286 static int
287 gre_socreate1(struct gre_softc *sc, struct lwp *l, struct gre_soparm *sp,
288 struct socket **sop)
289 {
290 int rc;
291 struct mbuf *m;
292 struct sockaddr_in *sin;
293 struct socket *so;
294
295 GRE_DPRINTF(sc, "%s: enter\n", __func__);
296 rc = socreate(AF_INET, sop, SOCK_DGRAM, IPPROTO_UDP, l);
297 if (rc != 0) {
298 GRE_DPRINTF(sc, "%s: socreate failed\n", __func__);
299 return rc;
300 }
301
302 so = *sop;
303
304 gre_upcall_add(so, sc);
305 if ((m = gre_getsockmbuf(so)) == NULL) {
306 rc = ENOBUFS;
307 goto out;
308 }
309 sin = mtod(m, struct sockaddr_in *);
310 sin->sin_len = m->m_len = sizeof(struct sockaddr_in);
311 sin->sin_family = AF_INET;
312 sin->sin_addr = sc->g_src;
313 sin->sin_port = sc->g_srcport;
314
315 GRE_DPRINTF(sc, "%s: bind 0x%08" PRIx32 " port %d\n", __func__,
316 sin->sin_addr.s_addr, ntohs(sin->sin_port));
317 if ((rc = sobind(so, m, l)) != 0) {
318 GRE_DPRINTF(sc, "%s: sobind failed\n", __func__);
319 goto out;
320 }
321
322 if (sc->g_srcport == 0) {
323 if ((rc = gre_getsockname(so, m, l)) != 0) {
324 GRE_DPRINTF(sc, "%s: gre_getsockname failed\n",
325 __func__);
326 goto out;
327 }
328 sc->g_srcport = sin->sin_port;
329 }
330
331 sin->sin_addr = sc->g_dst;
332 sin->sin_port = sc->g_dstport;
333
334 if ((rc = soconnect(so, m, l)) != 0) {
335 GRE_DPRINTF(sc, "%s: soconnect failed\n", __func__);
336 goto out;
337 }
338
339 *mtod(m, int *) = ip_gre_ttl;
340 m->m_len = sizeof(int);
341 rc = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, IPPROTO_IP, IP_TTL,
342 &m);
343 m = NULL;
344 if (rc != 0) {
345 printf("%s: setopt ttl failed\n", __func__);
346 rc = 0;
347 }
348 out:
349 m_freem(m);
350
351 if (rc != 0)
352 gre_sodestroy(sop);
353 else
354 *sp = sc->sc_soparm;
355
356 return rc;
357 }
358
359 static void
360 gre_thread1(struct gre_softc *sc, struct lwp *l)
361 {
362 int flags, rc;
363 const struct gre_h *gh;
364 struct ifnet *ifp = &sc->sc_if;
365 struct mbuf *m;
366 struct socket *so = NULL;
367 struct uio uio;
368 struct gre_soparm sp;
369 struct file *fp = NULL;
370
371 GRE_DPRINTF(sc, "%s: enter\n", __func__);
372 mutex_enter(&sc->sc_mtx);
373
374 sc->sc_haswork = 1;
375
376 memset(&sp, 0, sizeof(sp));
377 memset(&uio, 0, sizeof(uio));
378
379 ifp->if_flags |= IFF_RUNNING;
380
381 for (;;) {
382 while (sc->sc_haswork == 0) {
383 GRE_DPRINTF(sc, "%s: sleeping\n", __func__);
384 cv_wait(&sc->sc_work_cv, &sc->sc_mtx);
385 }
386 sc->sc_haswork = 0;
387 GRE_DPRINTF(sc, "%s: awake\n", __func__);
388 if ((ifp->if_flags & IFF_UP) != IFF_UP) {
389 GRE_DPRINTF(sc, "%s: not up & running; exiting\n",
390 __func__);
391 break;
392 }
393 if (sc->sc_proto != IPPROTO_UDP) {
394 GRE_DPRINTF(sc, "%s: not udp; exiting\n", __func__);
395 break;
396 }
397 /* XXX optimize */
398 if (so == NULL || sc->sc_fp != NULL ||
399 memcmp(&sp, &sc->sc_soparm, sizeof(sp)) != 0) {
400 GRE_DPRINTF(sc, "%s: parameters changed\n", __func__);
401
402 if (fp != NULL) {
403 gre_closef(&fp, curlwp);
404 so = NULL;
405 } else if (so != NULL)
406 gre_sodestroy(&so);
407
408 if (sc->sc_fp != NULL) {
409 fp = sc->sc_fp;
410 sc->sc_fp = NULL;
411 so = (struct socket *)fp->f_data;
412 gre_upcall_add(so, sc);
413 sp = sc->sc_soparm;
414 } else if (gre_socreate1(sc, l, &sp, &so) != 0)
415 goto out;
416 }
417 cv_signal(&sc->sc_soparm_cv);
418 for (;;) {
419 flags = MSG_DONTWAIT;
420 uio.uio_resid = 1000000;
421 rc = (*so->so_receive)(so, NULL, &uio, &m, NULL,
422 &flags);
423 /* TBD Back off if ECONNREFUSED (indicates
424 * ICMP Port Unreachable)?
425 */
426 if (rc == EWOULDBLOCK) {
427 GRE_DPRINTF(sc, "%s: so_receive EWOULDBLOCK\n",
428 __func__);
429 break;
430 } else if (rc != 0 || m == NULL) {
431 GRE_DPRINTF(sc, "%s: rc %d m %p\n",
432 ifp->if_xname, rc, (void *)m);
433 continue;
434 } else
435 GRE_DPRINTF(sc, "%s: so_receive ok\n",
436 __func__);
437 if (m->m_len < sizeof(*gh) &&
438 (m = m_pullup(m, sizeof(*gh))) == NULL) {
439 GRE_DPRINTF(sc, "%s: m_pullup failed\n",
440 __func__);
441 continue;
442 }
443 gh = mtod(m, const struct gre_h *);
444
445 if (gre_input3(sc, m, 0, gh, 1) == 0) {
446 GRE_DPRINTF(sc, "%s: dropping unsupported\n",
447 __func__);
448 m_freem(m);
449 }
450 }
451 for (;;) {
452 IF_DEQUEUE(&sc->sc_snd, m);
453 if (m == NULL)
454 break;
455 GRE_DPRINTF(sc, "%s: dequeue\n", __func__);
456 if ((so->so_state & SS_ISCONNECTED) == 0) {
457 GRE_DPRINTF(sc, "%s: not connected\n",
458 __func__);
459 m_freem(m);
460 continue;
461 }
462 rc = (*so->so_send)(so, NULL, NULL, m, NULL, 0, l);
463 /* XXX handle ENOBUFS? */
464 if (rc != 0)
465 GRE_DPRINTF(sc, "%s: so_send failed\n",
466 __func__);
467 }
468 }
469 if (fp != NULL) {
470 GRE_DPRINTF(sc, "%s: removing upcall\n", __func__);
471 gre_upcall_remove(so);
472 } else if (so != NULL)
473 gre_sodestroy(&so);
474 out:
475 GRE_DPRINTF(sc, "%s: stopping\n", __func__);
476 if (fp != NULL)
477 gre_closef(&fp, curlwp);
478 if (sc->sc_proto == IPPROTO_UDP)
479 ifp->if_flags &= ~IFF_RUNNING;
480 while (!IF_IS_EMPTY(&sc->sc_snd)) {
481 IF_DEQUEUE(&sc->sc_snd, m);
482 m_freem(m);
483 }
484 gre_stop(sc);
485 /* must not touch sc after this! */
486 GRE_DPRINTF(sc, "%s: restore ipl\n", __func__);
487 mutex_exit(&sc->sc_mtx);
488 }
489
490 static void
491 gre_thread(void *arg)
492 {
493 struct gre_softc *sc = (struct gre_softc *)arg;
494
495 gre_thread1(sc, curlwp);
496 /* must not touch sc after this! */
497 kthread_exit(0);
498 }
499
500 /* Calling thread must hold sc->sc_mtx. */
501 int
502 gre_input3(struct gre_softc *sc, struct mbuf *m, int hlen,
503 const struct gre_h *gh, int mtx_held)
504 {
505 u_int16_t flags;
506 #if NBPFILTER > 0
507 u_int32_t af = AF_INET; /* af passed to BPF tap */
508 #endif
509 int isr;
510 struct ifqueue *ifq;
511
512 sc->sc_if.if_ipackets++;
513 sc->sc_if.if_ibytes += m->m_pkthdr.len;
514
515 hlen += sizeof(struct gre_h);
516
517 /* process GRE flags as packet can be of variable len */
518 flags = ntohs(gh->flags);
519
520 /* Checksum & Offset are present */
521 if ((flags & GRE_CP) | (flags & GRE_RP))
522 hlen += 4;
523 /* We don't support routing fields (variable length) */
524 if (flags & GRE_RP) {
525 sc->sc_if.if_ierrors++;
526 return 0;
527 }
528 if (flags & GRE_KP)
529 hlen += 4;
530 if (flags & GRE_SP)
531 hlen += 4;
532
533 switch (ntohs(gh->ptype)) { /* ethertypes */
534 case ETHERTYPE_IP: /* shouldn't need a schednetisr(), as */
535 ifq = &ipintrq; /* we are in ip_input */
536 isr = NETISR_IP;
537 break;
538 #ifdef NETATALK
539 case ETHERTYPE_ATALK:
540 ifq = &atintrq1;
541 isr = NETISR_ATALK;
542 #if NBPFILTER > 0
543 af = AF_APPLETALK;
544 #endif
545 break;
546 #endif
547 #ifdef INET6
548 case ETHERTYPE_IPV6:
549 GRE_DPRINTF(sc, "%s: IPv6 packet\n", __func__);
550 ifq = &ip6intrq;
551 isr = NETISR_IPV6;
552 #if NBPFILTER > 0
553 af = AF_INET6;
554 #endif
555 break;
556 #endif
557 default: /* others not yet supported */
558 GRE_DPRINTF(sc, "%s: unhandled ethertype 0x%04x\n", __func__,
559 ntohs(gh->ptype));
560 sc->sc_if.if_noproto++;
561 return 0;
562 }
563
564 if (hlen > m->m_pkthdr.len) {
565 m_freem(m);
566 sc->sc_if.if_ierrors++;
567 return EINVAL;
568 }
569 m_adj(m, hlen);
570
571 #if NBPFILTER > 0
572 if (sc->sc_if.if_bpf != NULL)
573 bpf_mtap_af(sc->sc_if.if_bpf, af, m);
574 #endif /*NBPFILTER > 0*/
575
576 m->m_pkthdr.rcvif = &sc->sc_if;
577
578 if (!mtx_held)
579 mutex_enter(&sc->sc_mtx);
580 if (IF_QFULL(ifq)) {
581 IF_DROP(ifq);
582 m_freem(m);
583 } else {
584 IF_ENQUEUE(ifq, m);
585 }
586 /* we need schednetisr since the address family may change */
587 schednetisr(isr);
588 if (!mtx_held)
589 mutex_exit(&sc->sc_mtx);
590
591 return 1; /* packet is done, no further processing needed */
592 }
593
594 /*
595 * The output routine. Takes a packet and encapsulates it in the protocol
596 * given by sc->sc_proto. See also RFC 1701 and RFC 2004
597 */
598 static int
599 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
600 struct rtentry *rt)
601 {
602 int error = 0, hlen, msiz;
603 struct gre_softc *sc = ifp->if_softc;
604 struct greip *gi;
605 struct gre_h *gh;
606 struct ip *eip, *ip;
607 u_int8_t ip_tos = 0;
608 u_int16_t etype = 0;
609 struct mobile_h mob_h;
610
611 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == 0 ||
612 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
613 m_freem(m);
614 error = ENETDOWN;
615 goto end;
616 }
617
618 gi = NULL;
619 ip = NULL;
620
621 #if NBPFILTER >0
622 if (ifp->if_bpf)
623 bpf_mtap_af(ifp->if_bpf, dst->sa_family, m);
624 #endif
625
626 m->m_flags &= ~(M_BCAST|M_MCAST);
627
628 switch (sc->sc_proto) {
629 case IPPROTO_MOBILE:
630 if (dst->sa_family != AF_INET) {
631 IF_DROP(&ifp->if_snd);
632 m_freem(m);
633 error = EINVAL;
634 goto end;
635 }
636
637 if (M_UNWRITABLE(m, sizeof(*ip)) &&
638 (m = m_pullup(m, sizeof(*ip))) == NULL) {
639 error = ENOBUFS;
640 goto end;
641 }
642 ip = mtod(m, struct ip *);
643
644 memset(&mob_h, 0, MOB_H_SIZ_L);
645 mob_h.proto = (ip->ip_p) << 8;
646 mob_h.odst = ip->ip_dst.s_addr;
647 ip->ip_dst.s_addr = sc->g_dst.s_addr;
648
649 /*
650 * If the packet comes from our host, we only change
651 * the destination address in the IP header.
652 * Else we also need to save and change the source
653 */
654 if (in_hosteq(ip->ip_src, sc->g_src)) {
655 msiz = MOB_H_SIZ_S;
656 } else {
657 mob_h.proto |= MOB_H_SBIT;
658 mob_h.osrc = ip->ip_src.s_addr;
659 ip->ip_src.s_addr = sc->g_src.s_addr;
660 msiz = MOB_H_SIZ_L;
661 }
662 HTONS(mob_h.proto);
663 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
664
665 M_PREPEND(m, msiz, M_DONTWAIT);
666 if (m == NULL) {
667 error = ENOBUFS;
668 goto end;
669 }
670 /* XXX Assuming that ip does not dangle after
671 * M_PREPEND. In practice, that's true, but
672 * that's not in M_PREPEND's contract.
673 */
674 memmove(mtod(m, void *), ip, sizeof(*ip));
675 ip = mtod(m, struct ip *);
676 memcpy(ip + 1, &mob_h, (size_t)msiz);
677 ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
678 break;
679 case IPPROTO_UDP:
680 case IPPROTO_GRE:
681 GRE_DPRINTF(sc, "%s: dst->sa_family=%d\n", __func__,
682 dst->sa_family);
683 switch (dst->sa_family) {
684 case AF_INET:
685 ip = mtod(m, struct ip *);
686 ip_tos = ip->ip_tos;
687 etype = ETHERTYPE_IP;
688 break;
689 #ifdef NETATALK
690 case AF_APPLETALK:
691 etype = ETHERTYPE_ATALK;
692 break;
693 #endif
694 #ifdef INET6
695 case AF_INET6:
696 etype = ETHERTYPE_IPV6;
697 break;
698 #endif
699 default:
700 IF_DROP(&ifp->if_snd);
701 m_freem(m);
702 error = EAFNOSUPPORT;
703 goto end;
704 }
705 break;
706 default:
707 IF_DROP(&ifp->if_snd);
708 m_freem(m);
709 error = EINVAL;
710 goto end;
711 }
712
713 switch (sc->sc_proto) {
714 case IPPROTO_GRE:
715 hlen = sizeof(struct greip);
716 break;
717 case IPPROTO_UDP:
718 hlen = sizeof(struct gre_h);
719 break;
720 default:
721 hlen = 0;
722 break;
723 }
724
725 M_PREPEND(m, hlen, M_DONTWAIT);
726
727 if (m == NULL) {
728 IF_DROP(&ifp->if_snd);
729 error = ENOBUFS;
730 goto end;
731 }
732
733 switch (sc->sc_proto) {
734 case IPPROTO_UDP:
735 gh = mtod(m, struct gre_h *);
736 memset(gh, 0, sizeof(*gh));
737 gh->ptype = htons(etype);
738 /* XXX Need to handle IP ToS. Look at how I handle IP TTL. */
739 break;
740 case IPPROTO_GRE:
741 gi = mtod(m, struct greip *);
742 gh = &gi->gi_g;
743 eip = &gi->gi_i;
744 /* we don't have any GRE flags for now */
745 memset(gh, 0, sizeof(*gh));
746 gh->ptype = htons(etype);
747 eip->ip_src = sc->g_src;
748 eip->ip_dst = sc->g_dst;
749 eip->ip_hl = (sizeof(struct ip)) >> 2;
750 eip->ip_ttl = ip_gre_ttl;
751 eip->ip_tos = ip_tos;
752 eip->ip_len = htons(m->m_pkthdr.len);
753 eip->ip_p = sc->sc_proto;
754 break;
755 case IPPROTO_MOBILE:
756 eip = mtod(m, struct ip *);
757 eip->ip_p = sc->sc_proto;
758 break;
759 default:
760 error = EPROTONOSUPPORT;
761 m_freem(m);
762 goto end;
763 }
764
765 ifp->if_opackets++;
766 ifp->if_obytes += m->m_pkthdr.len;
767
768 /* send it off */
769 if (sc->sc_proto == IPPROTO_UDP) {
770 if (IF_QFULL(&sc->sc_snd)) {
771 IF_DROP(&sc->sc_snd);
772 error = ENOBUFS;
773 m_freem(m);
774 } else {
775 IF_ENQUEUE(&sc->sc_snd, m);
776 gre_wakeup(sc);
777 error = 0;
778 }
779 goto end;
780 }
781 if (sc->route.ro_rt == NULL)
782 rtcache_init(&sc->route);
783 else
784 rtcache_check(&sc->route);
785 if (sc->route.ro_rt == NULL) {
786 m_freem(m);
787 goto end;
788 }
789 if (sc->route.ro_rt->rt_ifp->if_softc == sc) {
790 rtcache_clear(&sc->route);
791 m_freem(m);
792 } else
793 error = ip_output(m, NULL, &sc->route, 0, NULL, NULL);
794 end:
795 if (error)
796 ifp->if_oerrors++;
797 return error;
798 }
799
800 /* Calling thread must hold sc->sc_mtx. */
801 static int
802 gre_kick(struct gre_softc *sc)
803 {
804 int rc;
805 struct ifnet *ifp = &sc->sc_if;
806
807 if (sc->sc_proto == IPPROTO_UDP && (ifp->if_flags & IFF_UP) == IFF_UP &&
808 !sc->sc_running) {
809 sc->sc_running = 1;
810 rc = kthread_create(PRI_NONE, 0, NULL, gre_thread, sc,
811 NULL, ifp->if_xname);
812 if (rc != 0)
813 gre_stop(sc);
814 return rc;
815 } else {
816 gre_wakeup(sc);
817 return 0;
818 }
819 }
820
821 /* Calling thread must hold sc->sc_mtx. */
822 static int
823 gre_getname(struct socket *so, int req, struct mbuf *nam, struct lwp *l)
824 {
825 return (*so->so_proto->pr_usrreq)(so, req, NULL, nam, NULL, l);
826 }
827
828 /* Calling thread must hold sc->sc_mtx. */
829 static int
830 gre_getsockname(struct socket *so, struct mbuf *nam, struct lwp *l)
831 {
832 return gre_getname(so, PRU_SOCKADDR, nam, l);
833 }
834
835 /* Calling thread must hold sc->sc_mtx. */
836 static int
837 gre_getpeername(struct socket *so, struct mbuf *nam, struct lwp *l)
838 {
839 return gre_getname(so, PRU_PEERADDR, nam, l);
840 }
841
842 /* Calling thread must hold sc->sc_mtx. */
843 static int
844 gre_getnames(struct socket *so, struct lwp *l, struct sockaddr_in *src,
845 struct sockaddr_in *dst)
846 {
847 struct mbuf *m;
848 struct sockaddr_in *sin;
849 int rc;
850
851 if ((m = gre_getsockmbuf(so)) == NULL)
852 return ENOBUFS;
853
854 sin = mtod(m, struct sockaddr_in *);
855
856 if ((rc = gre_getsockname(so, m, l)) != 0)
857 goto out;
858 if (sin->sin_family != AF_INET) {
859 rc = EAFNOSUPPORT;
860 goto out;
861 }
862 *src = *sin;
863
864 if ((rc = gre_getpeername(so, m, l)) != 0)
865 goto out;
866 if (sin->sin_family != AF_INET) {
867 rc = EAFNOSUPPORT;
868 goto out;
869 }
870 *dst = *sin;
871
872 out:
873 m_freem(m);
874 return rc;
875 }
876
877 static void
878 gre_closef(struct file **fpp, struct lwp *l)
879 {
880 struct file *fp = *fpp;
881
882 mutex_enter(&fp->f_lock);
883 FILE_USE(fp);
884 closef(fp, l);
885 *fpp = NULL;
886 }
887
888 static int
889 gre_ioctl(struct ifnet *ifp, u_long cmd, void *data)
890 {
891 u_char oproto;
892 struct file *fp;
893 struct socket *so;
894 struct sockaddr_in dst, src;
895 struct proc *p = curproc; /* XXX */
896 struct lwp *l = curlwp; /* XXX */
897 struct ifreq *ifr;
898 struct if_laddrreq *lifr = (struct if_laddrreq *)data;
899 struct gre_softc *sc = ifp->if_softc;
900 struct sockaddr_in si;
901 struct sockaddr *sa = NULL;
902 int error = 0;
903 #ifdef COMPAT_OIFREQ
904 u_long ocmd = cmd;
905 struct oifreq *oifr = NULL;
906 struct ifreq ifrb;
907
908 cmd = cvtcmd(cmd);
909 if (cmd != ocmd) {
910 oifr = data;
911 data = ifr = &ifrb;
912 ifreqo2n(oifr, ifr);
913 } else
914 #endif
915 ifr = data;
916
917 switch (cmd) {
918 case SIOCSIFFLAGS:
919 case SIOCSIFMTU:
920 case GRESPROTO:
921 case GRESADDRD:
922 case GRESADDRS:
923 case GRESSOCK:
924 case GREDSOCK:
925 case SIOCSLIFPHYADDR:
926 case SIOCDIFPHYADDR:
927 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
928 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
929 NULL) != 0)
930 return EPERM;
931 break;
932 default:
933 break;
934 }
935
936 mutex_enter(&sc->sc_mtx);
937 switch (cmd) {
938 case SIOCSIFADDR:
939 ifp->if_flags |= IFF_UP;
940 if ((error = gre_kick(sc)) != 0)
941 ifp->if_flags &= ~IFF_UP;
942 break;
943 case SIOCSIFDSTADDR:
944 break;
945 case SIOCSIFFLAGS:
946 oproto = sc->sc_proto;
947 switch (ifr->ifr_flags & (IFF_LINK0|IFF_LINK2)) {
948 case IFF_LINK0|IFF_LINK2:
949 sc->sc_proto = IPPROTO_UDP;
950 if (oproto != IPPROTO_UDP)
951 ifp->if_flags &= ~IFF_RUNNING;
952 error = gre_kick(sc);
953 break;
954 case IFF_LINK0:
955 sc->sc_proto = IPPROTO_GRE;
956 gre_wakeup(sc);
957 goto recompute;
958 case 0:
959 sc->sc_proto = IPPROTO_MOBILE;
960 gre_wakeup(sc);
961 goto recompute;
962 }
963 break;
964 case SIOCSIFMTU:
965 if (ifr->ifr_mtu < 576) {
966 error = EINVAL;
967 break;
968 }
969 ifp->if_mtu = ifr->ifr_mtu;
970 break;
971 case SIOCGIFMTU:
972 ifr->ifr_mtu = sc->sc_if.if_mtu;
973 break;
974 case SIOCADDMULTI:
975 case SIOCDELMULTI:
976 if (ifr == 0) {
977 error = EAFNOSUPPORT;
978 break;
979 }
980 switch (ifr->ifr_addr.sa_family) {
981 #ifdef INET
982 case AF_INET:
983 break;
984 #endif
985 #ifdef INET6
986 case AF_INET6:
987 break;
988 #endif
989 default:
990 error = EAFNOSUPPORT;
991 break;
992 }
993 break;
994 case GRESPROTO:
995 oproto = sc->sc_proto;
996 sc->sc_proto = ifr->ifr_flags;
997 switch (sc->sc_proto) {
998 case IPPROTO_UDP:
999 ifp->if_flags |= IFF_LINK0|IFF_LINK2;
1000 if (oproto != IPPROTO_UDP)
1001 ifp->if_flags &= ~IFF_RUNNING;
1002 error = gre_kick(sc);
1003 break;
1004 case IPPROTO_GRE:
1005 ifp->if_flags |= IFF_LINK0;
1006 ifp->if_flags &= ~IFF_LINK2;
1007 goto recompute;
1008 case IPPROTO_MOBILE:
1009 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK2);
1010 goto recompute;
1011 default:
1012 error = EPROTONOSUPPORT;
1013 break;
1014 }
1015 break;
1016 case GREGPROTO:
1017 ifr->ifr_flags = sc->sc_proto;
1018 break;
1019 case GRESADDRS:
1020 case GRESADDRD:
1021 /*
1022 * set tunnel endpoints, compute a less specific route
1023 * to the remote end and mark if as up
1024 */
1025 sa = &ifr->ifr_addr;
1026 if (cmd == GRESADDRS) {
1027 sc->g_src = (satosin(sa))->sin_addr;
1028 sc->g_srcport = satosin(sa)->sin_port;
1029 }
1030 if (cmd == GRESADDRD) {
1031 if (sc->sc_proto == IPPROTO_UDP &&
1032 satosin(sa)->sin_port == 0) {
1033 error = EINVAL;
1034 break;
1035 }
1036 sc->g_dst = (satosin(sa))->sin_addr;
1037 sc->g_dstport = satosin(sa)->sin_port;
1038 }
1039 recompute:
1040 if (sc->sc_proto == IPPROTO_UDP ||
1041 (sc->g_src.s_addr != INADDR_ANY &&
1042 sc->g_dst.s_addr != INADDR_ANY)) {
1043 rtcache_free(&sc->route);
1044 if (sc->sc_proto == IPPROTO_UDP)
1045 error = gre_kick(sc);
1046 else if (gre_compute_route(sc) == 0)
1047 ifp->if_flags |= IFF_RUNNING;
1048 else
1049 ifp->if_flags &= ~IFF_RUNNING;
1050 }
1051 break;
1052 case GREGADDRS:
1053 memset(&si, 0, sizeof(si));
1054 si.sin_family = AF_INET;
1055 si.sin_len = sizeof(struct sockaddr_in);
1056 si.sin_addr.s_addr = sc->g_src.s_addr;
1057 sa = sintosa(&si);
1058 ifr->ifr_addr = *sa;
1059 break;
1060 case GREGADDRD:
1061 memset(&si, 0, sizeof(si));
1062 si.sin_family = AF_INET;
1063 si.sin_len = sizeof(struct sockaddr_in);
1064 si.sin_addr.s_addr = sc->g_dst.s_addr;
1065 sa = sintosa(&si);
1066 ifr->ifr_addr = *sa;
1067 break;
1068 case GREDSOCK:
1069 if (sc->sc_proto != IPPROTO_UDP) {
1070 error = EINVAL;
1071 break;
1072 }
1073 ifp->if_flags &= ~IFF_UP;
1074 gre_wakeup(sc);
1075 break;
1076 case GRESSOCK:
1077 if (sc->sc_proto != IPPROTO_UDP) {
1078 error = EINVAL;
1079 break;
1080 }
1081 /* getsock() will FILE_USE() and unlock the descriptor for us */
1082 if ((error = getsock(p->p_fd, (int)ifr->ifr_value, &fp)) != 0)
1083 break;
1084 so = (struct socket *)fp->f_data;
1085 if (so->so_type != SOCK_DGRAM) {
1086 FILE_UNUSE(fp, NULL);
1087 error = EINVAL;
1088 break;
1089 }
1090 /* check address */
1091 if ((error = gre_getnames(so, curlwp, &src, &dst)) != 0) {
1092 FILE_UNUSE(fp, NULL);
1093 break;
1094 }
1095
1096 /* Increase reference count. Now that our reference
1097 * to the file descriptor is counted, this thread
1098 * can release our "use" of the descriptor, but it
1099 * will not be destroyed by some other thread's
1100 * action. This thread needs to release its use,
1101 * too, because one and only one thread can have
1102 * use of the descriptor at once. The kernel thread
1103 * will pick up the use if it needs it.
1104 */
1105
1106 fp->f_count++;
1107 FILE_UNUSE(fp, NULL);
1108
1109 while (sc->sc_fp != NULL && error == 0) {
1110 error = cv_timedwait_sig(&sc->sc_soparm_cv, &sc->sc_mtx,
1111 MAX(1, hz / 2));
1112 }
1113 if (error == 0) {
1114 sc->sc_fp = fp;
1115 ifp->if_flags |= IFF_UP;
1116 }
1117
1118 if (error != 0 || (error = gre_kick(sc)) != 0) {
1119 gre_closef(&fp, l);
1120 break;
1121 }
1122 /* fp does not any longer belong to this thread. */
1123 sc->g_src = src.sin_addr;
1124 sc->g_srcport = src.sin_port;
1125 sc->g_dst = dst.sin_addr;
1126 sc->g_dstport = dst.sin_port;
1127 break;
1128 case SIOCSLIFPHYADDR:
1129 if (lifr->addr.ss_family != AF_INET ||
1130 lifr->dstaddr.ss_family != AF_INET) {
1131 error = EAFNOSUPPORT;
1132 break;
1133 }
1134 if (lifr->addr.ss_len != sizeof(si) ||
1135 lifr->dstaddr.ss_len != sizeof(si)) {
1136 error = EINVAL;
1137 break;
1138 }
1139 sc->g_src = satosin(&lifr->addr)->sin_addr;
1140 sc->g_dst = satosin(&lifr->dstaddr)->sin_addr;
1141 sc->g_srcport = satosin(&lifr->addr)->sin_port;
1142 sc->g_dstport = satosin(&lifr->dstaddr)->sin_port;
1143 goto recompute;
1144 case SIOCDIFPHYADDR:
1145 sc->g_src.s_addr = INADDR_ANY;
1146 sc->g_dst.s_addr = INADDR_ANY;
1147 sc->g_srcport = 0;
1148 sc->g_dstport = 0;
1149 goto recompute;
1150 case SIOCGLIFPHYADDR:
1151 if (sc->g_src.s_addr == INADDR_ANY ||
1152 sc->g_dst.s_addr == INADDR_ANY) {
1153 error = EADDRNOTAVAIL;
1154 break;
1155 }
1156 memset(&si, 0, sizeof(si));
1157 si.sin_family = AF_INET;
1158 si.sin_len = sizeof(struct sockaddr_in);
1159 si.sin_addr = sc->g_src;
1160 if (sc->sc_proto == IPPROTO_UDP)
1161 si.sin_port = sc->g_srcport;
1162 memcpy(&lifr->addr, &si, sizeof(si));
1163 si.sin_addr = sc->g_dst;
1164 if (sc->sc_proto == IPPROTO_UDP)
1165 si.sin_port = sc->g_dstport;
1166 memcpy(&lifr->dstaddr, &si, sizeof(si));
1167 break;
1168 default:
1169 error = EINVAL;
1170 break;
1171 }
1172 #ifdef COMPAT_OIFREQ
1173 if (cmd != ocmd)
1174 ifreqn2o(oifr, ifr);
1175 #endif
1176 mutex_exit(&sc->sc_mtx);
1177 return error;
1178 }
1179
1180 /*
1181 * Compute a route to our destination.
1182 */
1183 static int
1184 gre_compute_route(struct gre_softc *sc)
1185 {
1186 struct route *ro;
1187 union {
1188 struct sockaddr dst;
1189 struct sockaddr_in dst4;
1190 } u;
1191
1192 ro = &sc->route;
1193
1194 memset(ro, 0, sizeof(*ro));
1195 sockaddr_in_init(&u.dst4, &sc->g_dst, 0);
1196 rtcache_setdst(ro, &u.dst);
1197
1198 rtcache_init(ro);
1199
1200 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
1201 GRE_DPRINTF(sc, "%s: route to %s %s\n", sc->sc_if.if_xname,
1202 inet_ntoa(u.dst4.sin_addr),
1203 (ro->ro_rt == NULL)
1204 ? "does not exist"
1205 : "loops back to ourself");
1206 rtcache_free(ro);
1207 return EADDRNOTAVAIL;
1208 }
1209
1210 return 0;
1211 }
1212
1213 /*
1214 * do a checksum of a buffer - much like in_cksum, which operates on
1215 * mbufs.
1216 */
1217 u_int16_t
1218 gre_in_cksum(u_int16_t *p, u_int len)
1219 {
1220 u_int32_t sum = 0;
1221 int nwords = len >> 1;
1222
1223 while (nwords-- != 0)
1224 sum += *p++;
1225
1226 if (len & 1) {
1227 union {
1228 u_short w;
1229 u_char c[2];
1230 } u;
1231 u.c[0] = *(u_char *)p;
1232 u.c[1] = 0;
1233 sum += u.w;
1234 }
1235
1236 /* end-around-carry */
1237 sum = (sum >> 16) + (sum & 0xffff);
1238 sum += (sum >> 16);
1239 return ~sum;
1240 }
1241 #endif
1242
1243 void greattach(int);
1244
1245 /* ARGSUSED */
1246 void
1247 greattach(int count)
1248 {
1249 #ifdef INET
1250 LIST_INIT(&gre_softc_list);
1251 if_clone_attach(&gre_cloner);
1252 #endif
1253 }
1254