if_gre.c revision 1.102 1 /* $NetBSD: if_gre.c,v 1.102 2007/08/24 23:38:31 dyoung Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * IPv6-over-GRE contributed by Gert Doering <gert (at) greenie.muc.de>
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*
42 * Encapsulate L3 protocols into IP
43 * See RFC 1701 and 1702 for more details.
44 * If_gre is compatible with Cisco GRE tunnels, so you can
45 * have a NetBSD box as the other end of a tunnel interface of a Cisco
46 * router. See gre(4) for more details.
47 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
48 */
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.102 2007/08/24 23:38:31 dyoung Exp $");
52
53 #include "opt_gre.h"
54 #include "opt_inet.h"
55 #include "bpfilter.h"
56
57 #ifdef INET
58 #include <sys/param.h>
59 #include <sys/file.h>
60 #include <sys/filedesc.h>
61 #include <sys/malloc.h>
62 #include <sys/mbuf.h>
63 #include <sys/proc.h>
64 #include <sys/protosw.h>
65 #include <sys/socket.h>
66 #include <sys/socketvar.h>
67 #include <sys/ioctl.h>
68 #include <sys/queue.h>
69 #if __NetBSD__
70 #include <sys/systm.h>
71 #include <sys/sysctl.h>
72 #include <sys/kauth.h>
73 #endif
74
75 #include <sys/kernel.h>
76 #include <sys/mutex.h>
77 #include <sys/condvar.h>
78 #include <sys/kthread.h>
79
80 #include <machine/cpu.h>
81
82 #include <net/ethertypes.h>
83 #include <net/if.h>
84 #include <net/if_types.h>
85 #include <net/netisr.h>
86 #include <net/route.h>
87
88 #ifdef INET
89 #include <netinet/in.h>
90 #include <netinet/in_systm.h>
91 #include <netinet/in_var.h>
92 #include <netinet/ip.h>
93 #include <netinet/ip_var.h>
94 #else
95 #error "Huh? if_gre without inet?"
96 #endif
97
98
99 #ifdef NETATALK
100 #include <netatalk/at.h>
101 #include <netatalk/at_var.h>
102 #include <netatalk/at_extern.h>
103 #endif
104
105 #if NBPFILTER > 0
106 #include <sys/time.h>
107 #include <net/bpf.h>
108 #endif
109
110 #include <net/if_gre.h>
111
112 #include <compat/sys/socket.h>
113 #include <compat/sys/sockio.h>
114 /*
115 * It is not easy to calculate the right value for a GRE MTU.
116 * We leave this task to the admin and use the same default that
117 * other vendors use.
118 */
119 #define GREMTU 1476
120
121 #ifdef GRE_DEBUG
122 int gre_debug = 0;
123 #define GRE_DPRINTF(__sc, __fmt, ...) \
124 do { \
125 if (gre_debug || ((__sc)->sc_if.if_flags & IFF_DEBUG) != 0)\
126 printf(__fmt, __VA_ARGS__); \
127 } while (/*CONSTCOND*/0)
128 #else
129 #define GRE_DPRINTF(__sc, __fmt, ...) do { } while (/*CONSTCOND*/0)
130 #endif /* GRE_DEBUG */
131
132 struct gre_softc_head gre_softc_list;
133 int ip_gre_ttl = GRE_TTL;
134
135 static int gre_clone_create(struct if_clone *, int);
136 static int gre_clone_destroy(struct ifnet *);
137
138 static struct if_clone gre_cloner =
139 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
140
141 static int gre_output(struct ifnet *, struct mbuf *,
142 const struct sockaddr *, struct rtentry *);
143 static int gre_ioctl(struct ifnet *, u_long, void *);
144
145 static void gre_thread(void *);
146 static int gre_compute_route(struct gre_softc *sc);
147
148 static void gre_closef(struct file **, struct lwp *);
149 static int gre_getsockname(struct socket *, struct mbuf *, struct lwp *);
150 static int gre_getpeername(struct socket *, struct mbuf *, struct lwp *);
151 static int gre_getnames(struct socket *, struct lwp *, struct sockaddr_in *,
152 struct sockaddr_in *);
153
154 /* Calling thread must hold sc->sc_mtx. */
155 static void
156 gre_join(struct gre_softc *sc)
157 {
158 while (sc->sc_running != 0)
159 cv_wait(&sc->sc_join_cv, &sc->sc_mtx);
160 }
161
162 /* Calling thread must hold sc->sc_mtx. */
163 static void
164 gre_wakeup(struct gre_softc *sc)
165 {
166 GRE_DPRINTF(sc, "%s: enter\n", __func__);
167 sc->sc_haswork = 1;
168 cv_signal(&sc->sc_work_cv);
169 }
170
171 static int
172 gre_clone_create(struct if_clone *ifc, int unit)
173 {
174 struct gre_softc *sc;
175
176 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
177 memset(sc, 0, sizeof(struct gre_softc));
178 mutex_init(&sc->sc_mtx, MUTEX_DRIVER, IPL_NET);
179 cv_init(&sc->sc_work_cv, "gre work");
180 cv_init(&sc->sc_join_cv, "gre join");
181 cv_init(&sc->sc_soparm_cv, "gre soparm");
182
183 snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d",
184 ifc->ifc_name, unit);
185 sc->sc_if.if_softc = sc;
186 sc->sc_if.if_type = IFT_TUNNEL;
187 sc->sc_if.if_addrlen = 0;
188 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
189 sc->sc_if.if_dlt = DLT_NULL;
190 sc->sc_if.if_mtu = GREMTU;
191 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
192 sc->sc_if.if_output = gre_output;
193 sc->sc_if.if_ioctl = gre_ioctl;
194 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
195 sc->g_dstport = sc->g_srcport = 0;
196 sc->sc_proto = IPPROTO_GRE;
197 sc->sc_snd.ifq_maxlen = 256;
198 sc->sc_if.if_flags |= IFF_LINK0;
199 if_attach(&sc->sc_if);
200 if_alloc_sadl(&sc->sc_if);
201 #if NBPFILTER > 0
202 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
203 #endif
204 sc->sc_running = 1;
205 if (kthread_create(PRI_NONE, 0, NULL, gre_thread, sc,
206 NULL, sc->sc_if.if_xname) != 0)
207 sc->sc_running = 0;
208 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
209 return 0;
210 }
211
212 static int
213 gre_clone_destroy(struct ifnet *ifp)
214 {
215 struct gre_softc *sc = ifp->if_softc;
216
217 LIST_REMOVE(sc, sc_list);
218 #if NBPFILTER > 0
219 bpfdetach(ifp);
220 #endif
221 if_detach(ifp);
222 mutex_enter(&sc->sc_mtx);
223 sc->sc_dying = 1;
224 gre_wakeup(sc);
225 gre_join(sc);
226 mutex_exit(&sc->sc_mtx);
227 rtcache_free(&sc->route);
228
229 cv_destroy(&sc->sc_soparm_cv);
230 cv_destroy(&sc->sc_join_cv);
231 cv_destroy(&sc->sc_work_cv);
232 mutex_destroy(&sc->sc_mtx);
233 free(sc, M_DEVBUF);
234
235 return 0;
236 }
237
238 static void
239 gre_receive(struct socket *so, void *arg, int waitflag)
240 {
241 struct gre_softc *sc = (struct gre_softc *)arg;
242
243 GRE_DPRINTF(sc, "%s: enter\n", __func__);
244
245 gre_wakeup(sc);
246 }
247
248 static void
249 gre_upcall_add(struct socket *so, void *arg)
250 {
251 /* XXX What if the kernel already set an upcall? */
252 KASSERT((so->so_rcv.sb_flags & SB_UPCALL) == 0);
253 so->so_upcallarg = arg;
254 so->so_upcall = gre_receive;
255 so->so_rcv.sb_flags |= SB_UPCALL;
256 }
257
258 static void
259 gre_upcall_remove(struct socket *so)
260 {
261 /* XXX What if the kernel already set an upcall? */
262 so->so_rcv.sb_flags &= ~SB_UPCALL;
263 so->so_upcallarg = NULL;
264 so->so_upcall = NULL;
265 }
266
267 static void
268 gre_sodestroy(struct socket **sop)
269 {
270 gre_upcall_remove(*sop);
271 soshutdown(*sop, SHUT_RDWR);
272 soclose(*sop);
273 *sop = NULL;
274 }
275
276 static struct mbuf *
277 gre_getsockmbuf(struct socket *so)
278 {
279 struct mbuf *m;
280
281 m = m_get(M_WAIT, MT_SONAME);
282 if (m != NULL)
283 MCLAIM(m, so->so_mowner);
284 return m;
285 }
286
287 static int
288 gre_socreate1(struct gre_softc *sc, struct lwp *l, struct socket **sop)
289 {
290 int rc;
291 struct gre_soparm *sp = &sc->sc_soparm;
292 struct mbuf *m;
293 struct sockaddr_in *sin;
294 struct socket *so;
295
296 GRE_DPRINTF(sc, "%s: enter\n", __func__);
297 rc = socreate(AF_INET, sop, SOCK_DGRAM, IPPROTO_UDP, l);
298 if (rc != 0) {
299 GRE_DPRINTF(sc, "%s: socreate failed\n", __func__);
300 return rc;
301 }
302
303 so = *sop;
304
305 gre_upcall_add(so, sc);
306 if ((m = gre_getsockmbuf(so)) == NULL) {
307 rc = ENOBUFS;
308 goto out;
309 }
310 sin = mtod(m, struct sockaddr_in *);
311 sockaddr_in_init(sin, &sc->g_src, sc->g_srcport);
312 m->m_len = sin->sin_len;
313
314 GRE_DPRINTF(sc, "%s: bind 0x%08" PRIx32 " port %d\n", __func__,
315 sin->sin_addr.s_addr, ntohs(sin->sin_port));
316 if ((rc = sobind(so, m, l)) != 0) {
317 GRE_DPRINTF(sc, "%s: sobind failed\n", __func__);
318 goto out;
319 }
320
321 if (sc->g_srcport == 0) {
322 if ((rc = gre_getsockname(so, m, l)) != 0) {
323 GRE_DPRINTF(sc, "%s: gre_getsockname failed\n",
324 __func__);
325 goto out;
326 }
327 sc->g_srcport = sin->sin_port;
328 }
329
330 sockaddr_in_init(sin, &sc->g_dst, sc->g_dstport);
331 m->m_len = sin->sin_len;
332
333 if ((rc = soconnect(so, m, l)) != 0) {
334 GRE_DPRINTF(sc, "%s: soconnect failed\n", __func__);
335 goto out;
336 }
337
338 *mtod(m, int *) = ip_gre_ttl;
339 m->m_len = sizeof(int);
340 KASSERT(so->so_proto && so->so_proto->pr_ctloutput);
341 rc = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, IPPROTO_IP, IP_TTL,
342 &m);
343 m = NULL;
344 if (rc != 0) {
345 printf("%s: setopt ttl failed\n", __func__);
346 rc = 0;
347 }
348 out:
349 m_freem(m);
350
351 if (rc != 0)
352 gre_sodestroy(sop);
353 else {
354 sc->sc_if.if_flags |= IFF_RUNNING;
355 *sp = sc->sc_newsoparm;
356 }
357
358 return rc;
359 }
360
361 static void
362 gre_do_recv(struct gre_softc *sc, struct socket *so, lwp_t *l)
363 {
364 for (;;) {
365 int flags, rc;
366 const struct gre_h *gh;
367 struct mbuf *m;
368
369 flags = MSG_DONTWAIT;
370 sc->sc_uio.uio_resid = 1000000;
371 rc = (*so->so_receive)(so, NULL, &sc->sc_uio, &m, NULL,
372 &flags);
373 /* TBD Back off if ECONNREFUSED (indicates
374 * ICMP Port Unreachable)?
375 */
376 if (rc == EWOULDBLOCK) {
377 GRE_DPRINTF(sc, "%s: so_receive EWOULDBLOCK\n",
378 __func__);
379 break;
380 } else if (rc != 0 || m == NULL) {
381 GRE_DPRINTF(sc, "%s: rc %d m %p\n",
382 sc->sc_if.if_xname, rc, (void *)m);
383 continue;
384 } else
385 GRE_DPRINTF(sc, "%s: so_receive ok\n",
386 __func__);
387 if (m->m_len < sizeof(*gh) &&
388 (m = m_pullup(m, sizeof(*gh))) == NULL) {
389 GRE_DPRINTF(sc, "%s: m_pullup failed\n",
390 __func__);
391 continue;
392 }
393 gh = mtod(m, const struct gre_h *);
394
395 if (gre_input3(sc, m, 0, gh, 1) == 0) {
396 GRE_DPRINTF(sc, "%s: dropping unsupported\n",
397 __func__);
398 m_freem(m);
399 }
400 }
401 }
402
403 static void
404 gre_do_send(struct gre_softc *sc, struct socket *so, lwp_t *l)
405 {
406 for (;;) {
407 int rc;
408 struct mbuf *m;
409
410 IF_DEQUEUE(&sc->sc_snd, m);
411 if (m == NULL)
412 break;
413 GRE_DPRINTF(sc, "%s: dequeue\n", __func__);
414 if ((so->so_state & SS_ISCONNECTED) == 0) {
415 GRE_DPRINTF(sc, "%s: not connected\n",
416 __func__);
417 m_freem(m);
418 continue;
419 }
420 rc = (*so->so_send)(so, NULL, NULL, m, NULL, 0, l);
421 /* XXX handle ENOBUFS? */
422 if (rc != 0)
423 GRE_DPRINTF(sc, "%s: so_send failed\n",
424 __func__);
425 }
426 }
427
428 static struct socket *
429 gre_reconf(struct gre_softc *sc, struct socket *so, lwp_t *l)
430 {
431 struct ifnet *ifp = &sc->sc_if;
432
433 GRE_DPRINTF(sc, "%s: enter\n", __func__);
434
435 shutdown:
436 if (sc->sc_fp != NULL) {
437 gre_upcall_remove(so);
438 gre_closef(&sc->sc_fp, curlwp);
439 so = NULL;
440 } else if (so != NULL)
441 gre_sodestroy(&so);
442
443 if (sc->sc_dying)
444 GRE_DPRINTF(sc, "%s: dying\n", __func__);
445 else if ((ifp->if_flags & IFF_UP) != IFF_UP)
446 GRE_DPRINTF(sc, "%s: down\n", __func__);
447 else if (sc->sc_proto != IPPROTO_UDP) {
448 GRE_DPRINTF(sc, "%s: not UDP\n", __func__);
449 } else if (sc->sc_newfp != NULL) {
450 sc->sc_fp = sc->sc_newfp;
451 sc->sc_newfp = NULL;
452 so = (struct socket *)sc->sc_fp->f_data;
453 gre_upcall_add(so, sc);
454 sc->sc_soparm = sc->sc_newsoparm;
455 } else if (gre_socreate1(sc, l, &so) != 0) {
456 sc->sc_dying = 1;
457 goto shutdown;
458 }
459 cv_signal(&sc->sc_soparm_cv);
460 if (so != NULL)
461 sc->sc_if.if_flags |= IFF_RUNNING;
462 else if (sc->sc_proto == IPPROTO_UDP) {
463 sc->sc_if.if_flags &= ~IFF_RUNNING;
464 }
465 return so;
466 }
467
468 static void
469 gre_thread1(struct gre_softc *sc, struct lwp *l)
470 {
471 struct ifnet *ifp = &sc->sc_if;
472 struct socket *so = NULL;
473
474 GRE_DPRINTF(sc, "%s: enter\n", __func__);
475 mutex_enter(&sc->sc_mtx);
476
477 while (!sc->sc_dying) {
478 while (sc->sc_haswork == 0) {
479 GRE_DPRINTF(sc, "%s: sleeping\n", __func__);
480 cv_wait(&sc->sc_work_cv, &sc->sc_mtx);
481 }
482 sc->sc_haswork = 0;
483 GRE_DPRINTF(sc, "%s: awake\n", __func__);
484 /* XXX optimize */
485 if ((ifp->if_flags & IFF_UP) != IFF_UP ||
486 sc->sc_proto != IPPROTO_UDP || so == NULL ||
487 sc->sc_newfp != NULL ||
488 memcmp(&sc->sc_soparm, &sc->sc_newsoparm, sizeof(sc->sc_soparm)) != 0)
489 so = gre_reconf(sc, so, l);
490 if (so == NULL)
491 continue;
492 gre_do_recv(sc, so, l);
493 gre_do_send(sc, so, l);
494 }
495 sc->sc_running = 0;
496 cv_signal(&sc->sc_join_cv);
497 /* must not touch sc after this! */
498 GRE_DPRINTF(sc, "%s: restore ipl\n", __func__);
499 mutex_exit(&sc->sc_mtx);
500 }
501
502 static void
503 gre_thread(void *arg)
504 {
505 struct gre_softc *sc = (struct gre_softc *)arg;
506
507 gre_thread1(sc, curlwp);
508 /* must not touch sc after this! */
509 kthread_exit(0);
510 }
511
512 /* Calling thread must hold sc->sc_mtx. */
513 int
514 gre_input3(struct gre_softc *sc, struct mbuf *m, int hlen,
515 const struct gre_h *gh, int mtx_held)
516 {
517 u_int16_t flags;
518 #if NBPFILTER > 0
519 u_int32_t af = AF_INET; /* af passed to BPF tap */
520 #endif
521 int isr;
522 struct ifqueue *ifq;
523
524 sc->sc_if.if_ipackets++;
525 sc->sc_if.if_ibytes += m->m_pkthdr.len;
526
527 hlen += sizeof(struct gre_h);
528
529 /* process GRE flags as packet can be of variable len */
530 flags = ntohs(gh->flags);
531
532 /* Checksum & Offset are present */
533 if ((flags & GRE_CP) | (flags & GRE_RP))
534 hlen += 4;
535 /* We don't support routing fields (variable length) */
536 if (flags & GRE_RP) {
537 sc->sc_if.if_ierrors++;
538 return 0;
539 }
540 if (flags & GRE_KP)
541 hlen += 4;
542 if (flags & GRE_SP)
543 hlen += 4;
544
545 switch (ntohs(gh->ptype)) { /* ethertypes */
546 case ETHERTYPE_IP: /* shouldn't need a schednetisr(), as */
547 ifq = &ipintrq; /* we are in ip_input */
548 isr = NETISR_IP;
549 break;
550 #ifdef NETATALK
551 case ETHERTYPE_ATALK:
552 ifq = &atintrq1;
553 isr = NETISR_ATALK;
554 #if NBPFILTER > 0
555 af = AF_APPLETALK;
556 #endif
557 break;
558 #endif
559 #ifdef INET6
560 case ETHERTYPE_IPV6:
561 GRE_DPRINTF(sc, "%s: IPv6 packet\n", __func__);
562 ifq = &ip6intrq;
563 isr = NETISR_IPV6;
564 #if NBPFILTER > 0
565 af = AF_INET6;
566 #endif
567 break;
568 #endif
569 default: /* others not yet supported */
570 GRE_DPRINTF(sc, "%s: unhandled ethertype 0x%04x\n", __func__,
571 ntohs(gh->ptype));
572 sc->sc_if.if_noproto++;
573 return 0;
574 }
575
576 if (hlen > m->m_pkthdr.len) {
577 m_freem(m);
578 sc->sc_if.if_ierrors++;
579 return EINVAL;
580 }
581 m_adj(m, hlen);
582
583 #if NBPFILTER > 0
584 if (sc->sc_if.if_bpf != NULL)
585 bpf_mtap_af(sc->sc_if.if_bpf, af, m);
586 #endif /*NBPFILTER > 0*/
587
588 m->m_pkthdr.rcvif = &sc->sc_if;
589
590 if (!mtx_held)
591 mutex_enter(&sc->sc_mtx);
592 if (IF_QFULL(ifq)) {
593 IF_DROP(ifq);
594 m_freem(m);
595 } else {
596 IF_ENQUEUE(ifq, m);
597 }
598 /* we need schednetisr since the address family may change */
599 schednetisr(isr);
600 if (!mtx_held)
601 mutex_exit(&sc->sc_mtx);
602
603 return 1; /* packet is done, no further processing needed */
604 }
605
606 /*
607 * The output routine. Takes a packet and encapsulates it in the protocol
608 * given by sc->sc_proto. See also RFC 1701 and RFC 2004
609 */
610 static int
611 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
612 struct rtentry *rt)
613 {
614 int error = 0, hlen, msiz;
615 struct gre_softc *sc = ifp->if_softc;
616 struct greip *gi;
617 struct gre_h *gh;
618 struct ip *eip, *ip;
619 u_int8_t ip_tos = 0;
620 u_int16_t etype = 0;
621 struct mobile_h mob_h;
622
623 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
624 (IFF_UP | IFF_RUNNING) ||
625 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
626 m_freem(m);
627 error = ENETDOWN;
628 goto end;
629 }
630
631 gi = NULL;
632 ip = NULL;
633
634 #if NBPFILTER >0
635 if (ifp->if_bpf)
636 bpf_mtap_af(ifp->if_bpf, dst->sa_family, m);
637 #endif
638
639 m->m_flags &= ~(M_BCAST|M_MCAST);
640
641 switch (sc->sc_proto) {
642 case IPPROTO_MOBILE:
643 if (dst->sa_family != AF_INET) {
644 IF_DROP(&ifp->if_snd);
645 m_freem(m);
646 error = EINVAL;
647 goto end;
648 }
649
650 if (M_UNWRITABLE(m, sizeof(*ip)) &&
651 (m = m_pullup(m, sizeof(*ip))) == NULL) {
652 error = ENOBUFS;
653 goto end;
654 }
655 ip = mtod(m, struct ip *);
656
657 memset(&mob_h, 0, MOB_H_SIZ_L);
658 mob_h.proto = (ip->ip_p) << 8;
659 mob_h.odst = ip->ip_dst.s_addr;
660 ip->ip_dst.s_addr = sc->g_dst.s_addr;
661
662 /*
663 * If the packet comes from our host, we only change
664 * the destination address in the IP header.
665 * Else we also need to save and change the source
666 */
667 if (in_hosteq(ip->ip_src, sc->g_src)) {
668 msiz = MOB_H_SIZ_S;
669 } else {
670 mob_h.proto |= MOB_H_SBIT;
671 mob_h.osrc = ip->ip_src.s_addr;
672 ip->ip_src.s_addr = sc->g_src.s_addr;
673 msiz = MOB_H_SIZ_L;
674 }
675 HTONS(mob_h.proto);
676 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
677
678 M_PREPEND(m, msiz, M_DONTWAIT);
679 if (m == NULL) {
680 error = ENOBUFS;
681 goto end;
682 }
683 /* XXX Assuming that ip does not dangle after
684 * M_PREPEND. In practice, that's true, but
685 * that's not in M_PREPEND's contract.
686 */
687 memmove(mtod(m, void *), ip, sizeof(*ip));
688 ip = mtod(m, struct ip *);
689 memcpy(ip + 1, &mob_h, (size_t)msiz);
690 ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
691 break;
692 case IPPROTO_UDP:
693 case IPPROTO_GRE:
694 GRE_DPRINTF(sc, "%s: dst->sa_family=%d\n", __func__,
695 dst->sa_family);
696 switch (dst->sa_family) {
697 case AF_INET:
698 ip = mtod(m, struct ip *);
699 ip_tos = ip->ip_tos;
700 etype = ETHERTYPE_IP;
701 break;
702 #ifdef NETATALK
703 case AF_APPLETALK:
704 etype = ETHERTYPE_ATALK;
705 break;
706 #endif
707 #ifdef INET6
708 case AF_INET6:
709 etype = ETHERTYPE_IPV6;
710 break;
711 #endif
712 default:
713 IF_DROP(&ifp->if_snd);
714 m_freem(m);
715 error = EAFNOSUPPORT;
716 goto end;
717 }
718 break;
719 default:
720 IF_DROP(&ifp->if_snd);
721 m_freem(m);
722 error = EINVAL;
723 goto end;
724 }
725
726 switch (sc->sc_proto) {
727 case IPPROTO_GRE:
728 hlen = sizeof(struct greip);
729 break;
730 case IPPROTO_UDP:
731 hlen = sizeof(struct gre_h);
732 break;
733 default:
734 hlen = 0;
735 break;
736 }
737
738 M_PREPEND(m, hlen, M_DONTWAIT);
739
740 if (m == NULL) {
741 IF_DROP(&ifp->if_snd);
742 error = ENOBUFS;
743 goto end;
744 }
745
746 switch (sc->sc_proto) {
747 case IPPROTO_UDP:
748 gh = mtod(m, struct gre_h *);
749 memset(gh, 0, sizeof(*gh));
750 gh->ptype = htons(etype);
751 /* XXX Need to handle IP ToS. Look at how I handle IP TTL. */
752 break;
753 case IPPROTO_GRE:
754 gi = mtod(m, struct greip *);
755 gh = &gi->gi_g;
756 eip = &gi->gi_i;
757 /* we don't have any GRE flags for now */
758 memset(gh, 0, sizeof(*gh));
759 gh->ptype = htons(etype);
760 eip->ip_src = sc->g_src;
761 eip->ip_dst = sc->g_dst;
762 eip->ip_hl = (sizeof(struct ip)) >> 2;
763 eip->ip_ttl = ip_gre_ttl;
764 eip->ip_tos = ip_tos;
765 eip->ip_len = htons(m->m_pkthdr.len);
766 eip->ip_p = sc->sc_proto;
767 break;
768 case IPPROTO_MOBILE:
769 eip = mtod(m, struct ip *);
770 eip->ip_p = sc->sc_proto;
771 break;
772 default:
773 error = EPROTONOSUPPORT;
774 m_freem(m);
775 goto end;
776 }
777
778 ifp->if_opackets++;
779 ifp->if_obytes += m->m_pkthdr.len;
780
781 /* send it off */
782 if (sc->sc_proto == IPPROTO_UDP) {
783 if (IF_QFULL(&sc->sc_snd)) {
784 IF_DROP(&sc->sc_snd);
785 error = ENOBUFS;
786 m_freem(m);
787 } else {
788 IF_ENQUEUE(&sc->sc_snd, m);
789 gre_wakeup(sc);
790 error = 0;
791 }
792 goto end;
793 }
794 if (sc->route.ro_rt == NULL)
795 rtcache_init(&sc->route);
796 else
797 rtcache_check(&sc->route);
798 if (sc->route.ro_rt == NULL) {
799 m_freem(m);
800 goto end;
801 }
802 if (sc->route.ro_rt->rt_ifp->if_softc == sc) {
803 rtcache_clear(&sc->route);
804 m_freem(m);
805 } else
806 error = ip_output(m, NULL, &sc->route, 0, NULL, NULL);
807 end:
808 if (error)
809 ifp->if_oerrors++;
810 return error;
811 }
812
813 /* Calling thread must hold sc->sc_mtx. */
814 static int
815 gre_kick(struct gre_softc *sc)
816 {
817 struct ifnet *ifp = &sc->sc_if;
818
819 if (sc->sc_proto == IPPROTO_UDP && (ifp->if_flags & IFF_UP) == IFF_UP &&
820 !sc->sc_running)
821 return EBUSY;
822 gre_wakeup(sc);
823 return 0;
824 }
825
826 /* Calling thread must hold sc->sc_mtx. */
827 static int
828 gre_getname(struct socket *so, int req, struct mbuf *nam, struct lwp *l)
829 {
830 return (*so->so_proto->pr_usrreq)(so, req, NULL, nam, NULL, l);
831 }
832
833 /* Calling thread must hold sc->sc_mtx. */
834 static int
835 gre_getsockname(struct socket *so, struct mbuf *nam, struct lwp *l)
836 {
837 return gre_getname(so, PRU_SOCKADDR, nam, l);
838 }
839
840 /* Calling thread must hold sc->sc_mtx. */
841 static int
842 gre_getpeername(struct socket *so, struct mbuf *nam, struct lwp *l)
843 {
844 return gre_getname(so, PRU_PEERADDR, nam, l);
845 }
846
847 /* Calling thread must hold sc->sc_mtx. */
848 static int
849 gre_getnames(struct socket *so, struct lwp *l, struct sockaddr_in *src,
850 struct sockaddr_in *dst)
851 {
852 struct mbuf *m;
853 struct sockaddr_in *sin;
854 int rc;
855
856 if ((m = gre_getsockmbuf(so)) == NULL)
857 return ENOBUFS;
858
859 sin = mtod(m, struct sockaddr_in *);
860
861 if ((rc = gre_getsockname(so, m, l)) != 0)
862 goto out;
863 if (sin->sin_family != AF_INET) {
864 rc = EAFNOSUPPORT;
865 goto out;
866 }
867 *src = *sin;
868
869 if ((rc = gre_getpeername(so, m, l)) != 0)
870 goto out;
871 if (sin->sin_family != AF_INET) {
872 rc = EAFNOSUPPORT;
873 goto out;
874 }
875 *dst = *sin;
876
877 out:
878 m_freem(m);
879 return rc;
880 }
881
882 static void
883 gre_closef(struct file **fpp, struct lwp *l)
884 {
885 struct file *fp = *fpp;
886
887 simple_lock(&fp->f_slock);
888 FILE_USE(fp);
889 closef(fp, l);
890 *fpp = NULL;
891 }
892
893 static int
894 gre_ioctl(struct ifnet *ifp, u_long cmd, void *data)
895 {
896 u_char oproto;
897 struct file *fp;
898 struct socket *so;
899 struct sockaddr_in dst, src;
900 struct proc *p = curproc; /* XXX */
901 struct lwp *l = curlwp; /* XXX */
902 struct ifreq *ifr;
903 struct if_laddrreq *lifr = (struct if_laddrreq *)data;
904 struct gre_softc *sc = ifp->if_softc;
905 struct sockaddr_in si;
906 struct sockaddr *sa = NULL;
907 int error = 0;
908 #ifdef COMPAT_OIFREQ
909 u_long ocmd = cmd;
910 struct oifreq *oifr = NULL;
911 struct ifreq ifrb;
912
913 cmd = compat_cvtcmd(cmd);
914 if (cmd != ocmd) {
915 oifr = data;
916 data = ifr = &ifrb;
917 ifreqo2n(oifr, ifr);
918 } else
919 #endif
920 ifr = data;
921
922 switch (cmd) {
923 case SIOCSIFFLAGS:
924 case SIOCSIFMTU:
925 case GRESPROTO:
926 case GRESADDRD:
927 case GRESADDRS:
928 case GRESSOCK:
929 case GREDSOCK:
930 case SIOCSLIFPHYADDR:
931 case SIOCDIFPHYADDR:
932 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
933 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
934 NULL) != 0)
935 return EPERM;
936 break;
937 default:
938 break;
939 }
940
941 mutex_enter(&sc->sc_mtx);
942 switch (cmd) {
943 case SIOCSIFADDR:
944 ifp->if_flags |= IFF_UP;
945 if ((error = gre_kick(sc)) != 0)
946 ifp->if_flags &= ~IFF_UP;
947 break;
948 case SIOCSIFDSTADDR:
949 break;
950 case SIOCSIFFLAGS:
951 oproto = sc->sc_proto;
952 switch (ifr->ifr_flags & (IFF_LINK0|IFF_LINK2)) {
953 case IFF_LINK0|IFF_LINK2:
954 sc->sc_proto = IPPROTO_UDP;
955 if (oproto != IPPROTO_UDP)
956 ifp->if_flags &= ~IFF_RUNNING;
957 error = gre_kick(sc);
958 break;
959 case IFF_LINK0:
960 sc->sc_proto = IPPROTO_GRE;
961 gre_wakeup(sc);
962 goto recompute;
963 case 0:
964 sc->sc_proto = IPPROTO_MOBILE;
965 gre_wakeup(sc);
966 goto recompute;
967 }
968 break;
969 case SIOCSIFMTU:
970 if (ifr->ifr_mtu < 576) {
971 error = EINVAL;
972 break;
973 }
974 ifp->if_mtu = ifr->ifr_mtu;
975 break;
976 case SIOCGIFMTU:
977 ifr->ifr_mtu = sc->sc_if.if_mtu;
978 break;
979 case SIOCADDMULTI:
980 case SIOCDELMULTI:
981 if (ifr == 0) {
982 error = EAFNOSUPPORT;
983 break;
984 }
985 switch (ifr->ifr_addr.sa_family) {
986 #ifdef INET
987 case AF_INET:
988 break;
989 #endif
990 #ifdef INET6
991 case AF_INET6:
992 break;
993 #endif
994 default:
995 error = EAFNOSUPPORT;
996 break;
997 }
998 break;
999 case GRESPROTO:
1000 oproto = sc->sc_proto;
1001 sc->sc_proto = ifr->ifr_flags;
1002 switch (sc->sc_proto) {
1003 case IPPROTO_UDP:
1004 ifp->if_flags |= IFF_LINK0|IFF_LINK2;
1005 if (oproto != IPPROTO_UDP)
1006 ifp->if_flags &= ~IFF_RUNNING;
1007 error = gre_kick(sc);
1008 break;
1009 case IPPROTO_GRE:
1010 ifp->if_flags |= IFF_LINK0;
1011 ifp->if_flags &= ~IFF_LINK2;
1012 goto recompute;
1013 case IPPROTO_MOBILE:
1014 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK2);
1015 goto recompute;
1016 default:
1017 error = EPROTONOSUPPORT;
1018 break;
1019 }
1020 break;
1021 case GREGPROTO:
1022 ifr->ifr_flags = sc->sc_proto;
1023 break;
1024 case GRESADDRS:
1025 case GRESADDRD:
1026 /*
1027 * set tunnel endpoints, compute a less specific route
1028 * to the remote end and mark if as up
1029 */
1030 sa = &ifr->ifr_addr;
1031 if (cmd == GRESADDRS) {
1032 sc->g_src = (satosin(sa))->sin_addr;
1033 sc->g_srcport = satosin(sa)->sin_port;
1034 }
1035 if (cmd == GRESADDRD) {
1036 if (sc->sc_proto == IPPROTO_UDP &&
1037 satosin(sa)->sin_port == 0) {
1038 error = EINVAL;
1039 break;
1040 }
1041 sc->g_dst = (satosin(sa))->sin_addr;
1042 sc->g_dstport = satosin(sa)->sin_port;
1043 }
1044 recompute:
1045 if (sc->sc_proto == IPPROTO_UDP ||
1046 (sc->g_src.s_addr != INADDR_ANY &&
1047 sc->g_dst.s_addr != INADDR_ANY)) {
1048 rtcache_free(&sc->route);
1049 if (sc->sc_proto == IPPROTO_UDP) {
1050 if ((error = gre_kick(sc)) == 0)
1051 ifp->if_flags |= IFF_RUNNING;
1052 else
1053 ifp->if_flags &= ~IFF_RUNNING;
1054 }
1055 else if (gre_compute_route(sc) == 0)
1056 ifp->if_flags |= IFF_RUNNING;
1057 else
1058 ifp->if_flags &= ~IFF_RUNNING;
1059 }
1060 break;
1061 case GREGADDRS:
1062 sockaddr_in_init(&si, &sc->g_src,
1063 (sc->sc_proto == IPPROTO_UDP) ? sc->g_srcport : 0);
1064 ifr->ifr_addr = *sintosa(&si);
1065 break;
1066 case GREGADDRD:
1067 sockaddr_in_init(&si, &sc->g_dst,
1068 (sc->sc_proto == IPPROTO_UDP) ? sc->g_dstport : 0);
1069 ifr->ifr_addr = *sintosa(&si);
1070 break;
1071 case GREDSOCK:
1072 if (sc->sc_proto != IPPROTO_UDP) {
1073 error = EINVAL;
1074 break;
1075 }
1076 ifp->if_flags &= ~IFF_UP;
1077 gre_wakeup(sc);
1078 break;
1079 case GRESSOCK:
1080 if (sc->sc_proto != IPPROTO_UDP) {
1081 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1082 error = EINVAL;
1083 break;
1084 }
1085 /* getsock() will FILE_USE() and unlock the descriptor for us */
1086 if ((error = getsock(p->p_fd, (int)ifr->ifr_value, &fp)) != 0) {
1087 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1088 error = EINVAL;
1089 break;
1090 }
1091 so = (struct socket *)fp->f_data;
1092 if (so->so_type != SOCK_DGRAM) {
1093 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1094 FILE_UNUSE(fp, NULL);
1095 error = EINVAL;
1096 break;
1097 }
1098 /* check address */
1099 if ((error = gre_getnames(so, curlwp, &src, &dst)) != 0) {
1100 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1101 FILE_UNUSE(fp, NULL);
1102 break;
1103 }
1104
1105 /* Increase reference count. Now that our reference
1106 * to the file descriptor is counted, this thread
1107 * can release our "use" of the descriptor, but it
1108 * will not be destroyed by some other thread's
1109 * action. This thread needs to release its use,
1110 * too, because one and only one thread can have
1111 * use of the descriptor at once. The kernel thread
1112 * will pick up the use if it needs it.
1113 */
1114
1115 fp->f_count++;
1116 FILE_UNUSE(fp, NULL);
1117
1118 while (sc->sc_newfp != NULL && error == 0) {
1119 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1120 error = cv_timedwait_sig(&sc->sc_soparm_cv, &sc->sc_mtx,
1121 MAX(1, hz / 2));
1122 }
1123 if (error == 0) {
1124 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1125 sc->sc_newfp = fp;
1126 ifp->if_flags |= IFF_UP;
1127 }
1128
1129 if (error != 0 || (error = gre_kick(sc)) != 0) {
1130 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1131 gre_closef(&fp, l);
1132 break;
1133 }
1134 /* fp does not any longer belong to this thread. */
1135 sc->g_src = src.sin_addr;
1136 sc->g_srcport = src.sin_port;
1137 sc->g_dst = dst.sin_addr;
1138 sc->g_dstport = dst.sin_port;
1139 GRE_DPRINTF(sc, "%s: sock 0x%08" PRIx32 " port %d -> "
1140 "0x%08" PRIx32 " port %d\n", __func__,
1141 src.sin_addr.s_addr, ntohs(src.sin_port),
1142 dst.sin_addr.s_addr, ntohs(dst.sin_port));
1143 break;
1144 case SIOCSLIFPHYADDR:
1145 if (lifr->addr.ss_family != AF_INET ||
1146 lifr->dstaddr.ss_family != AF_INET) {
1147 error = EAFNOSUPPORT;
1148 break;
1149 }
1150 if (lifr->addr.ss_len != sizeof(si) ||
1151 lifr->dstaddr.ss_len != sizeof(si)) {
1152 error = EINVAL;
1153 break;
1154 }
1155 sc->g_src = satosin(&lifr->addr)->sin_addr;
1156 sc->g_dst = satosin(&lifr->dstaddr)->sin_addr;
1157 sc->g_srcport = satosin(&lifr->addr)->sin_port;
1158 sc->g_dstport = satosin(&lifr->dstaddr)->sin_port;
1159 goto recompute;
1160 case SIOCDIFPHYADDR:
1161 sc->g_src.s_addr = INADDR_ANY;
1162 sc->g_dst.s_addr = INADDR_ANY;
1163 sc->g_srcport = 0;
1164 sc->g_dstport = 0;
1165 goto recompute;
1166 case SIOCGLIFPHYADDR:
1167 if (sc->g_src.s_addr == INADDR_ANY ||
1168 sc->g_dst.s_addr == INADDR_ANY) {
1169 error = EADDRNOTAVAIL;
1170 break;
1171 }
1172 sockaddr_in_init(satosin(&lifr->addr), &sc->g_src,
1173 (sc->sc_proto == IPPROTO_UDP) ? sc->g_srcport : 0);
1174 sockaddr_in_init(satosin(&lifr->dstaddr), &sc->g_dst,
1175 (sc->sc_proto == IPPROTO_UDP) ? sc->g_dstport : 0);
1176 break;
1177 default:
1178 error = EINVAL;
1179 break;
1180 }
1181 #ifdef COMPAT_OIFREQ
1182 if (cmd != ocmd)
1183 ifreqn2o(oifr, ifr);
1184 #endif
1185 mutex_exit(&sc->sc_mtx);
1186 return error;
1187 }
1188
1189 /*
1190 * Compute a route to our destination.
1191 */
1192 static int
1193 gre_compute_route(struct gre_softc *sc)
1194 {
1195 struct route *ro;
1196 union {
1197 struct sockaddr dst;
1198 struct sockaddr_in dst4;
1199 } u;
1200
1201 ro = &sc->route;
1202
1203 memset(ro, 0, sizeof(*ro));
1204 sockaddr_in_init(&u.dst4, &sc->g_dst, 0);
1205 rtcache_setdst(ro, &u.dst);
1206
1207 rtcache_init(ro);
1208
1209 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
1210 GRE_DPRINTF(sc, "%s: route to %s %s\n", sc->sc_if.if_xname,
1211 inet_ntoa(u.dst4.sin_addr),
1212 (ro->ro_rt == NULL)
1213 ? "does not exist"
1214 : "loops back to ourself");
1215 rtcache_free(ro);
1216 return EADDRNOTAVAIL;
1217 }
1218
1219 return 0;
1220 }
1221
1222 /*
1223 * do a checksum of a buffer - much like in_cksum, which operates on
1224 * mbufs.
1225 */
1226 u_int16_t
1227 gre_in_cksum(u_int16_t *p, u_int len)
1228 {
1229 u_int32_t sum = 0;
1230 int nwords = len >> 1;
1231
1232 while (nwords-- != 0)
1233 sum += *p++;
1234
1235 if (len & 1) {
1236 union {
1237 u_short w;
1238 u_char c[2];
1239 } u;
1240 u.c[0] = *(u_char *)p;
1241 u.c[1] = 0;
1242 sum += u.w;
1243 }
1244
1245 /* end-around-carry */
1246 sum = (sum >> 16) + (sum & 0xffff);
1247 sum += (sum >> 16);
1248 return ~sum;
1249 }
1250 #endif
1251
1252 void greattach(int);
1253
1254 /* ARGSUSED */
1255 void
1256 greattach(int count)
1257 {
1258 #ifdef INET
1259 LIST_INIT(&gre_softc_list);
1260 if_clone_attach(&gre_cloner);
1261 #endif
1262 }
1263