if_gre.c revision 1.104 1 /* $NetBSD: if_gre.c,v 1.104 2007/08/30 05:14:32 dyoung Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * IPv6-over-GRE contributed by Gert Doering <gert (at) greenie.muc.de>
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*
42 * Encapsulate L3 protocols into IP
43 * See RFC 1701 and 1702 for more details.
44 * If_gre is compatible with Cisco GRE tunnels, so you can
45 * have a NetBSD box as the other end of a tunnel interface of a Cisco
46 * router. See gre(4) for more details.
47 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
48 */
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.104 2007/08/30 05:14:32 dyoung Exp $");
52
53 #include "opt_gre.h"
54 #include "opt_inet.h"
55 #include "bpfilter.h"
56
57 #ifdef INET
58 #include <sys/param.h>
59 #include <sys/file.h>
60 #include <sys/filedesc.h>
61 #include <sys/malloc.h>
62 #include <sys/mbuf.h>
63 #include <sys/proc.h>
64 #include <sys/protosw.h>
65 #include <sys/socket.h>
66 #include <sys/socketvar.h>
67 #include <sys/ioctl.h>
68 #include <sys/queue.h>
69 #if __NetBSD__
70 #include <sys/systm.h>
71 #include <sys/sysctl.h>
72 #include <sys/kauth.h>
73 #endif
74
75 #include <sys/kernel.h>
76 #include <sys/mutex.h>
77 #include <sys/condvar.h>
78 #include <sys/kthread.h>
79
80 #include <machine/cpu.h>
81
82 #include <net/ethertypes.h>
83 #include <net/if.h>
84 #include <net/if_types.h>
85 #include <net/netisr.h>
86 #include <net/route.h>
87
88 #ifdef INET
89 #include <netinet/in.h>
90 #include <netinet/in_systm.h>
91 #include <netinet/in_var.h>
92 #include <netinet/ip.h>
93 #include <netinet/ip_var.h>
94 #else
95 #error "Huh? if_gre without inet?"
96 #endif
97
98
99 #ifdef NETATALK
100 #include <netatalk/at.h>
101 #include <netatalk/at_var.h>
102 #include <netatalk/at_extern.h>
103 #endif
104
105 #if NBPFILTER > 0
106 #include <sys/time.h>
107 #include <net/bpf.h>
108 #endif
109
110 #include <net/if_gre.h>
111
112 #include <compat/sys/socket.h>
113 #include <compat/sys/sockio.h>
114 /*
115 * It is not easy to calculate the right value for a GRE MTU.
116 * We leave this task to the admin and use the same default that
117 * other vendors use.
118 */
119 #define GREMTU 1476
120
121 #ifdef GRE_DEBUG
122 int gre_debug = 0;
123 #define GRE_DPRINTF(__sc, __fmt, ...) \
124 do { \
125 if (gre_debug || ((__sc)->sc_if.if_flags & IFF_DEBUG) != 0)\
126 printf(__fmt, __VA_ARGS__); \
127 } while (/*CONSTCOND*/0)
128 #else
129 #define GRE_DPRINTF(__sc, __fmt, ...) do { } while (/*CONSTCOND*/0)
130 #endif /* GRE_DEBUG */
131
132 struct gre_softc_head gre_softc_list;
133 int ip_gre_ttl = GRE_TTL;
134
135 static int gre_clone_create(struct if_clone *, int);
136 static int gre_clone_destroy(struct ifnet *);
137
138 static struct if_clone gre_cloner =
139 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
140
141 static int gre_output(struct ifnet *, struct mbuf *,
142 const struct sockaddr *, struct rtentry *);
143 static int gre_ioctl(struct ifnet *, u_long, void *);
144
145 static void gre_thread(void *);
146 static int gre_compute_route(struct gre_softc *sc);
147
148 static void gre_closef(struct file **, struct lwp *);
149 static int gre_getsockname(struct socket *, struct mbuf *, struct lwp *);
150 static int gre_getpeername(struct socket *, struct mbuf *, struct lwp *);
151 static int gre_getnames(struct socket *, struct lwp *, struct sockaddr_in *,
152 struct sockaddr_in *);
153
154 /* Calling thread must hold sc->sc_mtx. */
155 static void
156 gre_join(struct gre_softc *sc)
157 {
158 while (sc->sc_running != 0)
159 cv_wait(&sc->sc_join_cv, &sc->sc_mtx);
160 }
161
162 /* Calling thread must hold sc->sc_mtx. */
163 static void
164 gre_wakeup(struct gre_softc *sc)
165 {
166 GRE_DPRINTF(sc, "%s: enter\n", __func__);
167 sc->sc_haswork = 1;
168 cv_signal(&sc->sc_work_cv);
169 }
170
171 static int
172 gre_clone_create(struct if_clone *ifc, int unit)
173 {
174 struct gre_softc *sc;
175
176 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
177 memset(sc, 0, sizeof(struct gre_softc));
178 mutex_init(&sc->sc_mtx, MUTEX_DRIVER, IPL_NET);
179 cv_init(&sc->sc_work_cv, "gre work");
180 cv_init(&sc->sc_join_cv, "gre join");
181 cv_init(&sc->sc_soparm_cv, "gre soparm");
182
183 snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d",
184 ifc->ifc_name, unit);
185 sc->sc_if.if_softc = sc;
186 sc->sc_if.if_type = IFT_TUNNEL;
187 sc->sc_if.if_addrlen = 0;
188 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
189 sc->sc_if.if_dlt = DLT_NULL;
190 sc->sc_if.if_mtu = GREMTU;
191 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
192 sc->sc_if.if_output = gre_output;
193 sc->sc_if.if_ioctl = gre_ioctl;
194 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
195 sc->g_dstport = sc->g_srcport = 0;
196 sc->sc_proto = IPPROTO_GRE;
197 sc->sc_snd.ifq_maxlen = 256;
198 sc->sc_if.if_flags |= IFF_LINK0;
199 if_attach(&sc->sc_if);
200 if_alloc_sadl(&sc->sc_if);
201 #if NBPFILTER > 0
202 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
203 #endif
204 sc->sc_running = 1;
205 if (kthread_create(PRI_NONE, 0, NULL, gre_thread, sc,
206 NULL, sc->sc_if.if_xname) != 0)
207 sc->sc_running = 0;
208 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
209 return 0;
210 }
211
212 static int
213 gre_clone_destroy(struct ifnet *ifp)
214 {
215 struct gre_softc *sc = ifp->if_softc;
216
217 LIST_REMOVE(sc, sc_list);
218 #if NBPFILTER > 0
219 bpfdetach(ifp);
220 #endif
221 if_detach(ifp);
222 mutex_enter(&sc->sc_mtx);
223 sc->sc_dying = 1;
224 gre_wakeup(sc);
225 gre_join(sc);
226 mutex_exit(&sc->sc_mtx);
227 rtcache_free(&sc->route);
228
229 cv_destroy(&sc->sc_soparm_cv);
230 cv_destroy(&sc->sc_join_cv);
231 cv_destroy(&sc->sc_work_cv);
232 mutex_destroy(&sc->sc_mtx);
233 free(sc, M_DEVBUF);
234
235 return 0;
236 }
237
238 static void
239 gre_receive(struct socket *so, void *arg, int waitflag)
240 {
241 struct gre_softc *sc = (struct gre_softc *)arg;
242
243 GRE_DPRINTF(sc, "%s: enter\n", __func__);
244
245 gre_wakeup(sc);
246 }
247
248 static void
249 gre_upcall_add(struct socket *so, void *arg)
250 {
251 /* XXX What if the kernel already set an upcall? */
252 KASSERT((so->so_rcv.sb_flags & SB_UPCALL) == 0);
253 so->so_upcallarg = arg;
254 so->so_upcall = gre_receive;
255 so->so_rcv.sb_flags |= SB_UPCALL;
256 }
257
258 static void
259 gre_upcall_remove(struct socket *so)
260 {
261 /* XXX What if the kernel already set an upcall? */
262 so->so_rcv.sb_flags &= ~SB_UPCALL;
263 so->so_upcallarg = NULL;
264 so->so_upcall = NULL;
265 }
266
267 static void
268 gre_sodestroy(struct socket **sop)
269 {
270 gre_upcall_remove(*sop);
271 soshutdown(*sop, SHUT_RDWR);
272 soclose(*sop);
273 *sop = NULL;
274 }
275
276 static struct mbuf *
277 gre_getsockmbuf(struct socket *so)
278 {
279 struct mbuf *m;
280
281 m = m_get(M_WAIT, MT_SONAME);
282 if (m != NULL)
283 MCLAIM(m, so->so_mowner);
284 return m;
285 }
286
287 static int
288 gre_socreate1(struct gre_softc *sc, struct lwp *l, struct socket **sop)
289 {
290 int rc;
291 struct gre_soparm *sp = &sc->sc_soparm;
292 struct mbuf *m;
293 struct sockaddr_in *sin;
294 struct socket *so;
295
296 GRE_DPRINTF(sc, "%s: enter\n", __func__);
297 rc = socreate(AF_INET, sop, SOCK_DGRAM, IPPROTO_UDP, l);
298 if (rc != 0) {
299 GRE_DPRINTF(sc, "%s: socreate failed\n", __func__);
300 return rc;
301 }
302
303 so = *sop;
304
305 gre_upcall_add(so, sc);
306 if ((m = gre_getsockmbuf(so)) == NULL) {
307 rc = ENOBUFS;
308 goto out;
309 }
310 sin = mtod(m, struct sockaddr_in *);
311 sockaddr_in_init(sin, &sc->g_src, sc->g_srcport);
312 m->m_len = sin->sin_len;
313
314 GRE_DPRINTF(sc, "%s: bind 0x%08" PRIx32 " port %d\n", __func__,
315 sin->sin_addr.s_addr, ntohs(sin->sin_port));
316 if ((rc = sobind(so, m, l)) != 0) {
317 GRE_DPRINTF(sc, "%s: sobind failed\n", __func__);
318 goto out;
319 }
320
321 if (sc->g_srcport == 0) {
322 if ((rc = gre_getsockname(so, m, l)) != 0) {
323 GRE_DPRINTF(sc, "%s: gre_getsockname\n", __func__);
324 goto out;
325 }
326 sc->g_srcport = sin->sin_port;
327 }
328
329 sockaddr_in_init(sin, &sc->g_dst, sc->g_dstport);
330 m->m_len = sin->sin_len;
331
332 if ((rc = soconnect(so, m, l)) != 0) {
333 GRE_DPRINTF(sc, "%s: soconnect failed\n", __func__);
334 goto out;
335 }
336
337 *mtod(m, int *) = ip_gre_ttl;
338 m->m_len = sizeof(int);
339 KASSERT(so->so_proto && so->so_proto->pr_ctloutput);
340 rc = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, IPPROTO_IP, IP_TTL,
341 &m);
342 m = NULL;
343 if (rc != 0) {
344 GRE_DPRINTF(sc, "%s: setopt ttl failed\n", __func__);
345 rc = 0;
346 }
347 out:
348 m_freem(m);
349
350 if (rc != 0)
351 gre_sodestroy(sop);
352 else {
353 sc->sc_if.if_flags |= IFF_RUNNING;
354 *sp = sc->sc_newsoparm;
355 }
356
357 return rc;
358 }
359
360 static void
361 gre_do_recv(struct gre_softc *sc, struct socket *so, lwp_t *l)
362 {
363 for (;;) {
364 int flags, rc;
365 const struct gre_h *gh;
366 struct mbuf *m;
367
368 flags = MSG_DONTWAIT;
369 sc->sc_uio.uio_resid = 1000000;
370 rc = (*so->so_receive)(so, NULL, &sc->sc_uio, &m, NULL, &flags);
371 /* TBD Back off if ECONNREFUSED (indicates
372 * ICMP Port Unreachable)?
373 */
374 if (rc == EWOULDBLOCK) {
375 GRE_DPRINTF(sc, "%s: so_receive EWOULDBLOCK\n",
376 __func__);
377 break;
378 } else if (rc != 0 || m == NULL) {
379 GRE_DPRINTF(sc, "%s: rc %d m %p\n",
380 sc->sc_if.if_xname, rc, (void *)m);
381 continue;
382 } else
383 GRE_DPRINTF(sc, "%s: so_receive ok\n", __func__);
384 if (m->m_len < sizeof(*gh) &&
385 (m = m_pullup(m, sizeof(*gh))) == NULL) {
386 GRE_DPRINTF(sc, "%s: m_pullup failed\n", __func__);
387 continue;
388 }
389 gh = mtod(m, const struct gre_h *);
390
391 if (gre_input3(sc, m, 0, gh, 0) == 0) {
392 GRE_DPRINTF(sc, "%s: dropping unsupported\n", __func__);
393 m_freem(m);
394 }
395 }
396 }
397
398 static void
399 gre_do_send(struct gre_softc *sc, struct socket *so, lwp_t *l)
400 {
401 for (;;) {
402 int rc;
403 struct mbuf *m;
404
405 mutex_enter(&sc->sc_mtx);
406 IF_DEQUEUE(&sc->sc_snd, m);
407 mutex_exit(&sc->sc_mtx);
408 if (m == NULL)
409 break;
410 GRE_DPRINTF(sc, "%s: dequeue\n", __func__);
411 if ((so->so_state & SS_ISCONNECTED) == 0) {
412 GRE_DPRINTF(sc, "%s: not connected\n", __func__);
413 m_freem(m);
414 continue;
415 }
416 rc = (*so->so_send)(so, NULL, NULL, m, NULL, 0, l);
417 /* XXX handle ENOBUFS? */
418 if (rc != 0)
419 GRE_DPRINTF(sc, "%s: so_send failed\n",
420 __func__);
421 }
422 }
423
424 static struct socket *
425 gre_reconf(struct gre_softc *sc, struct socket *so, lwp_t *l)
426 {
427 struct ifnet *ifp = &sc->sc_if;
428
429 GRE_DPRINTF(sc, "%s: enter\n", __func__);
430
431 shutdown:
432 if (sc->sc_fp != NULL) {
433 gre_upcall_remove(so);
434 gre_closef(&sc->sc_fp, curlwp);
435 so = NULL;
436 } else if (so != NULL)
437 gre_sodestroy(&so);
438
439 if (sc->sc_dying)
440 GRE_DPRINTF(sc, "%s: dying\n", __func__);
441 else if ((ifp->if_flags & IFF_UP) != IFF_UP)
442 GRE_DPRINTF(sc, "%s: down\n", __func__);
443 else if (sc->sc_proto != IPPROTO_UDP)
444 GRE_DPRINTF(sc, "%s: not UDP\n", __func__);
445 else if (sc->sc_newfp != NULL) {
446 sc->sc_fp = sc->sc_newfp;
447 sc->sc_newfp = NULL;
448 so = (struct socket *)sc->sc_fp->f_data;
449 gre_upcall_add(so, sc);
450 sc->sc_soparm = sc->sc_newsoparm;
451 } else if (gre_socreate1(sc, l, &so) != 0) {
452 sc->sc_dying = 1;
453 goto shutdown;
454 }
455 cv_signal(&sc->sc_soparm_cv);
456 if (so != NULL)
457 sc->sc_if.if_flags |= IFF_RUNNING;
458 else if (sc->sc_proto == IPPROTO_UDP)
459 sc->sc_if.if_flags &= ~IFF_RUNNING;
460 return so;
461 }
462
463 static void
464 gre_thread1(struct gre_softc *sc, struct lwp *l)
465 {
466 struct ifnet *ifp = &sc->sc_if;
467 struct socket *so = NULL;
468
469 GRE_DPRINTF(sc, "%s: enter\n", __func__);
470
471 while (!sc->sc_dying) {
472 while (sc->sc_haswork == 0) {
473 GRE_DPRINTF(sc, "%s: sleeping\n", __func__);
474 cv_wait(&sc->sc_work_cv, &sc->sc_mtx);
475 }
476 sc->sc_haswork = 0;
477
478 GRE_DPRINTF(sc, "%s: awake\n", __func__);
479
480 /* XXX optimize */
481 if ((ifp->if_flags & IFF_UP) != IFF_UP ||
482 sc->sc_proto != IPPROTO_UDP || so == NULL ||
483 sc->sc_newfp != NULL ||
484 memcmp(&sc->sc_soparm, &sc->sc_newsoparm,
485 sizeof(sc->sc_soparm)) != 0)
486 so = gre_reconf(sc, so, l);
487 mutex_exit(&sc->sc_mtx);
488 if (so != NULL) {
489 gre_do_recv(sc, so, l);
490 gre_do_send(sc, so, l);
491 }
492 mutex_enter(&sc->sc_mtx);
493 }
494 sc->sc_running = 0;
495 cv_signal(&sc->sc_join_cv);
496 }
497
498 static void
499 gre_thread(void *arg)
500 {
501 struct gre_softc *sc = (struct gre_softc *)arg;
502
503 mutex_enter(&sc->sc_mtx);
504 gre_thread1(sc, curlwp);
505 mutex_exit(&sc->sc_mtx);
506
507 /* must not touch sc after this! */
508 kthread_exit(0);
509 }
510
511 /* Calling thread must hold sc->sc_mtx. */
512 int
513 gre_input3(struct gre_softc *sc, struct mbuf *m, int hlen,
514 const struct gre_h *gh, int mtx_held)
515 {
516 u_int16_t flags;
517 #if NBPFILTER > 0
518 u_int32_t af = AF_INET; /* af passed to BPF tap */
519 #endif
520 int isr;
521 struct ifqueue *ifq;
522
523 sc->sc_if.if_ipackets++;
524 sc->sc_if.if_ibytes += m->m_pkthdr.len;
525
526 hlen += sizeof(struct gre_h);
527
528 /* process GRE flags as packet can be of variable len */
529 flags = ntohs(gh->flags);
530
531 /* Checksum & Offset are present */
532 if ((flags & GRE_CP) | (flags & GRE_RP))
533 hlen += 4;
534 /* We don't support routing fields (variable length) */
535 if (flags & GRE_RP) {
536 sc->sc_if.if_ierrors++;
537 return 0;
538 }
539 if (flags & GRE_KP)
540 hlen += 4;
541 if (flags & GRE_SP)
542 hlen += 4;
543
544 switch (ntohs(gh->ptype)) { /* ethertypes */
545 case ETHERTYPE_IP: /* shouldn't need a schednetisr(), as */
546 ifq = &ipintrq; /* we are in ip_input */
547 isr = NETISR_IP;
548 break;
549 #ifdef NETATALK
550 case ETHERTYPE_ATALK:
551 ifq = &atintrq1;
552 isr = NETISR_ATALK;
553 #if NBPFILTER > 0
554 af = AF_APPLETALK;
555 #endif
556 break;
557 #endif
558 #ifdef INET6
559 case ETHERTYPE_IPV6:
560 GRE_DPRINTF(sc, "%s: IPv6 packet\n", __func__);
561 ifq = &ip6intrq;
562 isr = NETISR_IPV6;
563 #if NBPFILTER > 0
564 af = AF_INET6;
565 #endif
566 break;
567 #endif
568 default: /* others not yet supported */
569 GRE_DPRINTF(sc, "%s: unhandled ethertype 0x%04x\n", __func__,
570 ntohs(gh->ptype));
571 sc->sc_if.if_noproto++;
572 return 0;
573 }
574
575 if (hlen > m->m_pkthdr.len) {
576 m_freem(m);
577 sc->sc_if.if_ierrors++;
578 return EINVAL;
579 }
580 m_adj(m, hlen);
581
582 #if NBPFILTER > 0
583 if (sc->sc_if.if_bpf != NULL)
584 bpf_mtap_af(sc->sc_if.if_bpf, af, m);
585 #endif /*NBPFILTER > 0*/
586
587 m->m_pkthdr.rcvif = &sc->sc_if;
588
589 if (!mtx_held)
590 mutex_enter(&sc->sc_mtx);
591 if (IF_QFULL(ifq)) {
592 IF_DROP(ifq);
593 m_freem(m);
594 } else {
595 IF_ENQUEUE(ifq, m);
596 }
597 /* we need schednetisr since the address family may change */
598 schednetisr(isr);
599 if (!mtx_held)
600 mutex_exit(&sc->sc_mtx);
601
602 return 1; /* packet is done, no further processing needed */
603 }
604
605 /*
606 * The output routine. Takes a packet and encapsulates it in the protocol
607 * given by sc->sc_proto. See also RFC 1701 and RFC 2004
608 */
609 static int
610 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
611 struct rtentry *rt)
612 {
613 int error = 0, hlen, msiz;
614 struct gre_softc *sc = ifp->if_softc;
615 struct greip *gi;
616 struct gre_h *gh;
617 struct ip *eip, *ip;
618 u_int8_t ip_tos = 0;
619 u_int16_t etype = 0;
620 struct mobile_h mob_h;
621
622 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
623 (IFF_UP | IFF_RUNNING) ||
624 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
625 m_freem(m);
626 error = ENETDOWN;
627 goto end;
628 }
629
630 gi = NULL;
631 ip = NULL;
632
633 #if NBPFILTER >0
634 if (ifp->if_bpf)
635 bpf_mtap_af(ifp->if_bpf, dst->sa_family, m);
636 #endif
637
638 m->m_flags &= ~(M_BCAST|M_MCAST);
639
640 switch (sc->sc_proto) {
641 case IPPROTO_MOBILE:
642 if (dst->sa_family != AF_INET) {
643 IF_DROP(&ifp->if_snd);
644 m_freem(m);
645 error = EINVAL;
646 goto end;
647 }
648
649 if (M_UNWRITABLE(m, sizeof(*ip)) &&
650 (m = m_pullup(m, sizeof(*ip))) == NULL) {
651 error = ENOBUFS;
652 goto end;
653 }
654 ip = mtod(m, struct ip *);
655
656 memset(&mob_h, 0, MOB_H_SIZ_L);
657 mob_h.proto = (ip->ip_p) << 8;
658 mob_h.odst = ip->ip_dst.s_addr;
659 ip->ip_dst.s_addr = sc->g_dst.s_addr;
660
661 /*
662 * If the packet comes from our host, we only change
663 * the destination address in the IP header.
664 * Else we also need to save and change the source
665 */
666 if (in_hosteq(ip->ip_src, sc->g_src))
667 msiz = MOB_H_SIZ_S;
668 else {
669 mob_h.proto |= MOB_H_SBIT;
670 mob_h.osrc = ip->ip_src.s_addr;
671 ip->ip_src.s_addr = sc->g_src.s_addr;
672 msiz = MOB_H_SIZ_L;
673 }
674 HTONS(mob_h.proto);
675 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
676
677 M_PREPEND(m, msiz, M_DONTWAIT);
678 if (m == NULL) {
679 error = ENOBUFS;
680 goto end;
681 }
682 /* XXX Assuming that ip does not dangle after
683 * M_PREPEND. In practice, that's true, but
684 * that's not in M_PREPEND's contract.
685 */
686 memmove(mtod(m, void *), ip, sizeof(*ip));
687 ip = mtod(m, struct ip *);
688 memcpy(ip + 1, &mob_h, (size_t)msiz);
689 ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
690 break;
691 case IPPROTO_UDP:
692 case IPPROTO_GRE:
693 GRE_DPRINTF(sc, "%s: dst->sa_family=%d\n", __func__,
694 dst->sa_family);
695 switch (dst->sa_family) {
696 case AF_INET:
697 ip = mtod(m, struct ip *);
698 ip_tos = ip->ip_tos;
699 etype = ETHERTYPE_IP;
700 break;
701 #ifdef NETATALK
702 case AF_APPLETALK:
703 etype = ETHERTYPE_ATALK;
704 break;
705 #endif
706 #ifdef INET6
707 case AF_INET6:
708 etype = ETHERTYPE_IPV6;
709 break;
710 #endif
711 default:
712 IF_DROP(&ifp->if_snd);
713 m_freem(m);
714 error = EAFNOSUPPORT;
715 goto end;
716 }
717 break;
718 default:
719 IF_DROP(&ifp->if_snd);
720 m_freem(m);
721 error = EINVAL;
722 goto end;
723 }
724
725 switch (sc->sc_proto) {
726 case IPPROTO_GRE:
727 hlen = sizeof(struct greip);
728 break;
729 case IPPROTO_UDP:
730 hlen = sizeof(struct gre_h);
731 break;
732 default:
733 hlen = 0;
734 break;
735 }
736
737 M_PREPEND(m, hlen, M_DONTWAIT);
738
739 if (m == NULL) {
740 IF_DROP(&ifp->if_snd);
741 error = ENOBUFS;
742 goto end;
743 }
744
745 switch (sc->sc_proto) {
746 case IPPROTO_UDP:
747 gh = mtod(m, struct gre_h *);
748 memset(gh, 0, sizeof(*gh));
749 gh->ptype = htons(etype);
750 /* XXX Need to handle IP ToS. Look at how I handle IP TTL. */
751 break;
752 case IPPROTO_GRE:
753 gi = mtod(m, struct greip *);
754 gh = &gi->gi_g;
755 eip = &gi->gi_i;
756 /* we don't have any GRE flags for now */
757 memset(gh, 0, sizeof(*gh));
758 gh->ptype = htons(etype);
759 eip->ip_src = sc->g_src;
760 eip->ip_dst = sc->g_dst;
761 eip->ip_hl = (sizeof(struct ip)) >> 2;
762 eip->ip_ttl = ip_gre_ttl;
763 eip->ip_tos = ip_tos;
764 eip->ip_len = htons(m->m_pkthdr.len);
765 eip->ip_p = sc->sc_proto;
766 break;
767 case IPPROTO_MOBILE:
768 eip = mtod(m, struct ip *);
769 eip->ip_p = sc->sc_proto;
770 break;
771 default:
772 error = EPROTONOSUPPORT;
773 m_freem(m);
774 goto end;
775 }
776
777 ifp->if_opackets++;
778 ifp->if_obytes += m->m_pkthdr.len;
779
780 /* send it off */
781 if (sc->sc_proto == IPPROTO_UDP) {
782 if (IF_QFULL(&sc->sc_snd)) {
783 IF_DROP(&sc->sc_snd);
784 error = ENOBUFS;
785 m_freem(m);
786 } else {
787 IF_ENQUEUE(&sc->sc_snd, m);
788 gre_wakeup(sc);
789 error = 0;
790 }
791 goto end;
792 }
793 if (sc->route.ro_rt == NULL)
794 rtcache_init(&sc->route);
795 else
796 rtcache_check(&sc->route);
797 if (sc->route.ro_rt == NULL) {
798 m_freem(m);
799 goto end;
800 }
801 if (sc->route.ro_rt->rt_ifp->if_softc == sc) {
802 rtcache_clear(&sc->route);
803 m_freem(m);
804 } else
805 error = ip_output(m, NULL, &sc->route, 0, NULL, NULL);
806 end:
807 if (error)
808 ifp->if_oerrors++;
809 return error;
810 }
811
812 /* Calling thread must hold sc->sc_mtx. */
813 static int
814 gre_kick(struct gre_softc *sc)
815 {
816 struct ifnet *ifp = &sc->sc_if;
817
818 if (sc->sc_proto == IPPROTO_UDP && (ifp->if_flags & IFF_UP) == IFF_UP &&
819 !sc->sc_running)
820 return EBUSY;
821 gre_wakeup(sc);
822 return 0;
823 }
824
825 /* Calling thread must hold sc->sc_mtx. */
826 static int
827 gre_getname(struct socket *so, int req, struct mbuf *nam, struct lwp *l)
828 {
829 return (*so->so_proto->pr_usrreq)(so, req, NULL, nam, NULL, l);
830 }
831
832 /* Calling thread must hold sc->sc_mtx. */
833 static int
834 gre_getsockname(struct socket *so, struct mbuf *nam, struct lwp *l)
835 {
836 return gre_getname(so, PRU_SOCKADDR, nam, l);
837 }
838
839 /* Calling thread must hold sc->sc_mtx. */
840 static int
841 gre_getpeername(struct socket *so, struct mbuf *nam, struct lwp *l)
842 {
843 return gre_getname(so, PRU_PEERADDR, nam, l);
844 }
845
846 /* Calling thread must hold sc->sc_mtx. */
847 static int
848 gre_getnames(struct socket *so, struct lwp *l, struct sockaddr_in *src,
849 struct sockaddr_in *dst)
850 {
851 struct mbuf *m;
852 struct sockaddr_in *sin;
853 int rc;
854
855 if ((m = gre_getsockmbuf(so)) == NULL)
856 return ENOBUFS;
857
858 sin = mtod(m, struct sockaddr_in *);
859
860 if ((rc = gre_getsockname(so, m, l)) != 0)
861 goto out;
862 if (sin->sin_family != AF_INET) {
863 rc = EAFNOSUPPORT;
864 goto out;
865 }
866 *src = *sin;
867
868 if ((rc = gre_getpeername(so, m, l)) != 0)
869 goto out;
870 if (sin->sin_family != AF_INET) {
871 rc = EAFNOSUPPORT;
872 goto out;
873 }
874 *dst = *sin;
875
876 out:
877 m_freem(m);
878 return rc;
879 }
880
881 static void
882 gre_closef(struct file **fpp, struct lwp *l)
883 {
884 struct file *fp = *fpp;
885
886 simple_lock(&fp->f_slock);
887 FILE_USE(fp);
888 closef(fp, l);
889 *fpp = NULL;
890 }
891
892 static int
893 gre_ioctl(struct ifnet *ifp, u_long cmd, void *data)
894 {
895 u_char oproto;
896 struct file *fp;
897 struct socket *so;
898 struct sockaddr_in dst, src;
899 struct proc *p = curproc; /* XXX */
900 struct lwp *l = curlwp; /* XXX */
901 struct ifreq *ifr;
902 struct if_laddrreq *lifr = (struct if_laddrreq *)data;
903 struct gre_softc *sc = ifp->if_softc;
904 struct sockaddr_in si;
905 struct sockaddr *sa = NULL;
906 int error = 0;
907 #ifdef COMPAT_OIFREQ
908 u_long ocmd = cmd;
909 struct oifreq *oifr = NULL;
910 struct ifreq ifrb;
911
912 cmd = compat_cvtcmd(cmd);
913 if (cmd != ocmd) {
914 oifr = data;
915 data = ifr = &ifrb;
916 ifreqo2n(oifr, ifr);
917 } else
918 #endif
919 ifr = data;
920
921 switch (cmd) {
922 case SIOCSIFFLAGS:
923 case SIOCSIFMTU:
924 case GRESPROTO:
925 case GRESADDRD:
926 case GRESADDRS:
927 case GRESSOCK:
928 case GREDSOCK:
929 case SIOCSLIFPHYADDR:
930 case SIOCDIFPHYADDR:
931 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
932 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
933 NULL) != 0)
934 return EPERM;
935 break;
936 default:
937 break;
938 }
939
940 mutex_enter(&sc->sc_mtx);
941 switch (cmd) {
942 case SIOCSIFADDR:
943 ifp->if_flags |= IFF_UP;
944 if ((error = gre_kick(sc)) != 0)
945 ifp->if_flags &= ~IFF_UP;
946 break;
947 case SIOCSIFDSTADDR:
948 break;
949 case SIOCSIFFLAGS:
950 oproto = sc->sc_proto;
951 switch (ifr->ifr_flags & (IFF_LINK0|IFF_LINK2)) {
952 case IFF_LINK0|IFF_LINK2:
953 sc->sc_proto = IPPROTO_UDP;
954 if (oproto != IPPROTO_UDP)
955 ifp->if_flags &= ~IFF_RUNNING;
956 error = gre_kick(sc);
957 break;
958 case IFF_LINK0:
959 sc->sc_proto = IPPROTO_GRE;
960 gre_wakeup(sc);
961 goto recompute;
962 case 0:
963 sc->sc_proto = IPPROTO_MOBILE;
964 gre_wakeup(sc);
965 goto recompute;
966 }
967 break;
968 case SIOCSIFMTU:
969 if (ifr->ifr_mtu < 576) {
970 error = EINVAL;
971 break;
972 }
973 ifp->if_mtu = ifr->ifr_mtu;
974 break;
975 case SIOCGIFMTU:
976 ifr->ifr_mtu = sc->sc_if.if_mtu;
977 break;
978 case SIOCADDMULTI:
979 case SIOCDELMULTI:
980 if (ifr == 0) {
981 error = EAFNOSUPPORT;
982 break;
983 }
984 switch (ifr->ifr_addr.sa_family) {
985 #ifdef INET
986 case AF_INET:
987 break;
988 #endif
989 #ifdef INET6
990 case AF_INET6:
991 break;
992 #endif
993 default:
994 error = EAFNOSUPPORT;
995 break;
996 }
997 break;
998 case GRESPROTO:
999 oproto = sc->sc_proto;
1000 sc->sc_proto = ifr->ifr_flags;
1001 switch (sc->sc_proto) {
1002 case IPPROTO_UDP:
1003 ifp->if_flags |= IFF_LINK0|IFF_LINK2;
1004 if (oproto != IPPROTO_UDP)
1005 ifp->if_flags &= ~IFF_RUNNING;
1006 error = gre_kick(sc);
1007 break;
1008 case IPPROTO_GRE:
1009 ifp->if_flags |= IFF_LINK0;
1010 ifp->if_flags &= ~IFF_LINK2;
1011 goto recompute;
1012 case IPPROTO_MOBILE:
1013 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK2);
1014 goto recompute;
1015 default:
1016 error = EPROTONOSUPPORT;
1017 break;
1018 }
1019 break;
1020 case GREGPROTO:
1021 ifr->ifr_flags = sc->sc_proto;
1022 break;
1023 case GRESADDRS:
1024 case GRESADDRD:
1025 /*
1026 * set tunnel endpoints, compute a less specific route
1027 * to the remote end and mark if as up
1028 */
1029 sa = &ifr->ifr_addr;
1030 if (cmd == GRESADDRS) {
1031 sc->g_src = (satosin(sa))->sin_addr;
1032 sc->g_srcport = satosin(sa)->sin_port;
1033 }
1034 if (cmd == GRESADDRD) {
1035 if (sc->sc_proto == IPPROTO_UDP &&
1036 satosin(sa)->sin_port == 0) {
1037 error = EINVAL;
1038 break;
1039 }
1040 sc->g_dst = (satosin(sa))->sin_addr;
1041 sc->g_dstport = satosin(sa)->sin_port;
1042 }
1043 recompute:
1044 if (sc->sc_proto == IPPROTO_UDP ||
1045 (sc->g_src.s_addr != INADDR_ANY &&
1046 sc->g_dst.s_addr != INADDR_ANY)) {
1047 rtcache_free(&sc->route);
1048 if (sc->sc_proto == IPPROTO_UDP) {
1049 if ((error = gre_kick(sc)) == 0)
1050 ifp->if_flags |= IFF_RUNNING;
1051 else
1052 ifp->if_flags &= ~IFF_RUNNING;
1053 }
1054 else if (gre_compute_route(sc) == 0)
1055 ifp->if_flags |= IFF_RUNNING;
1056 else
1057 ifp->if_flags &= ~IFF_RUNNING;
1058 }
1059 break;
1060 case GREGADDRS:
1061 sockaddr_in_init(&si, &sc->g_src,
1062 (sc->sc_proto == IPPROTO_UDP) ? sc->g_srcport : 0);
1063 ifr->ifr_addr = *sintosa(&si);
1064 break;
1065 case GREGADDRD:
1066 sockaddr_in_init(&si, &sc->g_dst,
1067 (sc->sc_proto == IPPROTO_UDP) ? sc->g_dstport : 0);
1068 ifr->ifr_addr = *sintosa(&si);
1069 break;
1070 case GREDSOCK:
1071 if (sc->sc_proto != IPPROTO_UDP) {
1072 error = EINVAL;
1073 break;
1074 }
1075 ifp->if_flags &= ~IFF_UP;
1076 gre_wakeup(sc);
1077 break;
1078 case GRESSOCK:
1079 if (sc->sc_proto != IPPROTO_UDP) {
1080 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1081 error = EINVAL;
1082 break;
1083 }
1084 /* getsock() will FILE_USE() and unlock the descriptor for us */
1085 if ((error = getsock(p->p_fd, (int)ifr->ifr_value, &fp)) != 0) {
1086 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1087 error = EINVAL;
1088 break;
1089 }
1090 so = (struct socket *)fp->f_data;
1091 if (so->so_type != SOCK_DGRAM) {
1092 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1093 FILE_UNUSE(fp, NULL);
1094 error = EINVAL;
1095 break;
1096 }
1097 /* check address */
1098 if ((error = gre_getnames(so, curlwp, &src, &dst)) != 0) {
1099 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1100 FILE_UNUSE(fp, NULL);
1101 break;
1102 }
1103
1104 /* Increase reference count. Now that our reference
1105 * to the file descriptor is counted, this thread
1106 * can release our "use" of the descriptor, but it
1107 * will not be destroyed by some other thread's
1108 * action. This thread needs to release its use,
1109 * too, because one and only one thread can have
1110 * use of the descriptor at once. The kernel thread
1111 * will pick up the use if it needs it.
1112 */
1113
1114 fp->f_count++;
1115 FILE_UNUSE(fp, NULL);
1116
1117 while (sc->sc_newfp != NULL && error == 0) {
1118 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1119 error = cv_timedwait_sig(&sc->sc_soparm_cv, &sc->sc_mtx,
1120 MAX(1, hz / 2));
1121 }
1122 if (error == 0) {
1123 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1124 sc->sc_newfp = fp;
1125 ifp->if_flags |= IFF_UP;
1126 }
1127
1128 if (error != 0 || (error = gre_kick(sc)) != 0) {
1129 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1130 gre_closef(&fp, l);
1131 break;
1132 }
1133 /* fp does not any longer belong to this thread. */
1134 sc->g_src = src.sin_addr;
1135 sc->g_srcport = src.sin_port;
1136 sc->g_dst = dst.sin_addr;
1137 sc->g_dstport = dst.sin_port;
1138 GRE_DPRINTF(sc, "%s: sock 0x%08" PRIx32 " port %d -> "
1139 "0x%08" PRIx32 " port %d\n", __func__,
1140 src.sin_addr.s_addr, ntohs(src.sin_port),
1141 dst.sin_addr.s_addr, ntohs(dst.sin_port));
1142 break;
1143 case SIOCSLIFPHYADDR:
1144 if (lifr->addr.ss_family != AF_INET ||
1145 lifr->dstaddr.ss_family != AF_INET) {
1146 error = EAFNOSUPPORT;
1147 break;
1148 }
1149 if (lifr->addr.ss_len != sizeof(si) ||
1150 lifr->dstaddr.ss_len != sizeof(si)) {
1151 error = EINVAL;
1152 break;
1153 }
1154 sc->g_src = satosin(&lifr->addr)->sin_addr;
1155 sc->g_dst = satosin(&lifr->dstaddr)->sin_addr;
1156 sc->g_srcport = satosin(&lifr->addr)->sin_port;
1157 sc->g_dstport = satosin(&lifr->dstaddr)->sin_port;
1158 goto recompute;
1159 case SIOCDIFPHYADDR:
1160 sc->g_src.s_addr = INADDR_ANY;
1161 sc->g_dst.s_addr = INADDR_ANY;
1162 sc->g_srcport = 0;
1163 sc->g_dstport = 0;
1164 goto recompute;
1165 case SIOCGLIFPHYADDR:
1166 if (sc->g_src.s_addr == INADDR_ANY ||
1167 sc->g_dst.s_addr == INADDR_ANY) {
1168 error = EADDRNOTAVAIL;
1169 break;
1170 }
1171 sockaddr_in_init(satosin(&lifr->addr), &sc->g_src,
1172 (sc->sc_proto == IPPROTO_UDP) ? sc->g_srcport : 0);
1173 sockaddr_in_init(satosin(&lifr->dstaddr), &sc->g_dst,
1174 (sc->sc_proto == IPPROTO_UDP) ? sc->g_dstport : 0);
1175 break;
1176 default:
1177 error = EINVAL;
1178 break;
1179 }
1180 #ifdef COMPAT_OIFREQ
1181 if (cmd != ocmd)
1182 ifreqn2o(oifr, ifr);
1183 #endif
1184 mutex_exit(&sc->sc_mtx);
1185 return error;
1186 }
1187
1188 /*
1189 * Compute a route to our destination.
1190 */
1191 static int
1192 gre_compute_route(struct gre_softc *sc)
1193 {
1194 struct route *ro;
1195 union {
1196 struct sockaddr dst;
1197 struct sockaddr_in dst4;
1198 } u;
1199
1200 ro = &sc->route;
1201
1202 memset(ro, 0, sizeof(*ro));
1203 sockaddr_in_init(&u.dst4, &sc->g_dst, 0);
1204 rtcache_setdst(ro, &u.dst);
1205
1206 rtcache_init(ro);
1207
1208 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
1209 GRE_DPRINTF(sc, "%s: route to %s %s\n", sc->sc_if.if_xname,
1210 inet_ntoa(u.dst4.sin_addr),
1211 (ro->ro_rt == NULL)
1212 ? "does not exist"
1213 : "loops back to ourself");
1214 rtcache_free(ro);
1215 return EADDRNOTAVAIL;
1216 }
1217
1218 return 0;
1219 }
1220
1221 /*
1222 * do a checksum of a buffer - much like in_cksum, which operates on
1223 * mbufs.
1224 */
1225 u_int16_t
1226 gre_in_cksum(u_int16_t *p, u_int len)
1227 {
1228 u_int32_t sum = 0;
1229 int nwords = len >> 1;
1230
1231 while (nwords-- != 0)
1232 sum += *p++;
1233
1234 if (len & 1) {
1235 union {
1236 u_short w;
1237 u_char c[2];
1238 } u;
1239 u.c[0] = *(u_char *)p;
1240 u.c[1] = 0;
1241 sum += u.w;
1242 }
1243
1244 /* end-around-carry */
1245 sum = (sum >> 16) + (sum & 0xffff);
1246 sum += (sum >> 16);
1247 return ~sum;
1248 }
1249 #endif
1250
1251 void greattach(int);
1252
1253 /* ARGSUSED */
1254 void
1255 greattach(int count)
1256 {
1257 #ifdef INET
1258 LIST_INIT(&gre_softc_list);
1259 if_clone_attach(&gre_cloner);
1260 #endif
1261 }
1262