if_gre.c revision 1.101 1 /* $NetBSD: if_gre.c,v 1.101 2007/08/20 04:49:41 skd Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * IPv6-over-GRE contributed by Gert Doering <gert (at) greenie.muc.de>
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*
42 * Encapsulate L3 protocols into IP
43 * See RFC 1701 and 1702 for more details.
44 * If_gre is compatible with Cisco GRE tunnels, so you can
45 * have a NetBSD box as the other end of a tunnel interface of a Cisco
46 * router. See gre(4) for more details.
47 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
48 */
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.101 2007/08/20 04:49:41 skd Exp $");
52
53 #include "opt_gre.h"
54 #include "opt_inet.h"
55 #include "bpfilter.h"
56
57 #ifdef INET
58 #include <sys/param.h>
59 #include <sys/file.h>
60 #include <sys/filedesc.h>
61 #include <sys/malloc.h>
62 #include <sys/mbuf.h>
63 #include <sys/proc.h>
64 #include <sys/protosw.h>
65 #include <sys/socket.h>
66 #include <sys/socketvar.h>
67 #include <sys/ioctl.h>
68 #include <sys/queue.h>
69 #if __NetBSD__
70 #include <sys/systm.h>
71 #include <sys/sysctl.h>
72 #include <sys/kauth.h>
73 #endif
74
75 #include <sys/kernel.h>
76 #include <sys/mutex.h>
77 #include <sys/condvar.h>
78 #include <sys/kthread.h>
79
80 #include <machine/cpu.h>
81
82 #include <net/ethertypes.h>
83 #include <net/if.h>
84 #include <net/if_types.h>
85 #include <net/netisr.h>
86 #include <net/route.h>
87
88 #ifdef INET
89 #include <netinet/in.h>
90 #include <netinet/in_systm.h>
91 #include <netinet/in_var.h>
92 #include <netinet/ip.h>
93 #include <netinet/ip_var.h>
94 #else
95 #error "Huh? if_gre without inet?"
96 #endif
97
98
99 #ifdef NETATALK
100 #include <netatalk/at.h>
101 #include <netatalk/at_var.h>
102 #include <netatalk/at_extern.h>
103 #endif
104
105 #if NBPFILTER > 0
106 #include <sys/time.h>
107 #include <net/bpf.h>
108 #endif
109
110 #include <net/if_gre.h>
111
112 #include <compat/sys/socket.h>
113 #include <compat/sys/sockio.h>
114 /*
115 * It is not easy to calculate the right value for a GRE MTU.
116 * We leave this task to the admin and use the same default that
117 * other vendors use.
118 */
119 #define GREMTU 1476
120
121 #ifdef GRE_DEBUG
122 #define GRE_DPRINTF(__sc, __fmt, ...) \
123 do { \
124 if (((__sc)->sc_if.if_flags & IFF_DEBUG) != 0) \
125 printf(__fmt, __VA_ARGS__); \
126 } while (/*CONSTCOND*/0)
127 #else
128 #define GRE_DPRINTF(__sc, __fmt, ...) do { } while (/*CONSTCOND*/0)
129 #endif /* GRE_DEBUG */
130
131 struct gre_softc_head gre_softc_list;
132 int ip_gre_ttl = GRE_TTL;
133
134 static int gre_clone_create(struct if_clone *, int);
135 static int gre_clone_destroy(struct ifnet *);
136
137 static struct if_clone gre_cloner =
138 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
139
140 static int gre_output(struct ifnet *, struct mbuf *,
141 const struct sockaddr *, struct rtentry *);
142 static int gre_ioctl(struct ifnet *, u_long, void *);
143
144 static int gre_compute_route(struct gre_softc *sc);
145
146 static void gre_closef(struct file **, struct lwp *);
147 static int gre_getsockname(struct socket *, struct mbuf *, struct lwp *);
148 static int gre_getpeername(struct socket *, struct mbuf *, struct lwp *);
149 static int gre_getnames(struct socket *, struct lwp *, struct sockaddr_in *,
150 struct sockaddr_in *);
151
152 /* Calling thread must hold sc->sc_mtx. */
153 static void
154 gre_stop(struct gre_softc *sc)
155 {
156 sc->sc_running = 0;
157 cv_signal(&sc->sc_join_cv);
158 }
159
160 /* Calling thread must hold sc->sc_mtx. */
161 static void
162 gre_join(struct gre_softc *sc)
163 {
164 while (sc->sc_running != 0)
165 cv_wait(&sc->sc_join_cv, &sc->sc_mtx);
166 }
167
168 /* Calling thread must hold sc->sc_mtx. */
169 static void
170 gre_wakeup(struct gre_softc *sc)
171 {
172 GRE_DPRINTF(sc, "%s: enter\n", __func__);
173 sc->sc_haswork = 1;
174 cv_signal(&sc->sc_work_cv);
175 }
176
177 static int
178 gre_clone_create(struct if_clone *ifc, int unit)
179 {
180 struct gre_softc *sc;
181
182 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
183 memset(sc, 0, sizeof(struct gre_softc));
184 mutex_init(&sc->sc_mtx, MUTEX_DRIVER, IPL_NET);
185 cv_init(&sc->sc_work_cv, "gre work");
186 cv_init(&sc->sc_join_cv, "gre join");
187 cv_init(&sc->sc_soparm_cv, "gre soparm");
188
189 snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d",
190 ifc->ifc_name, unit);
191 sc->sc_if.if_softc = sc;
192 sc->sc_if.if_type = IFT_TUNNEL;
193 sc->sc_if.if_addrlen = 0;
194 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
195 sc->sc_if.if_dlt = DLT_NULL;
196 sc->sc_if.if_mtu = GREMTU;
197 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
198 sc->sc_if.if_output = gre_output;
199 sc->sc_if.if_ioctl = gre_ioctl;
200 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
201 sc->g_dstport = sc->g_srcport = 0;
202 sc->sc_proto = IPPROTO_GRE;
203 sc->sc_snd.ifq_maxlen = 256;
204 sc->sc_if.if_flags |= IFF_LINK0;
205 if_attach(&sc->sc_if);
206 if_alloc_sadl(&sc->sc_if);
207 #if NBPFILTER > 0
208 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
209 #endif
210 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
211 return 0;
212 }
213
214 static int
215 gre_clone_destroy(struct ifnet *ifp)
216 {
217 struct gre_softc *sc = ifp->if_softc;
218
219 LIST_REMOVE(sc, sc_list);
220 #if NBPFILTER > 0
221 bpfdetach(ifp);
222 #endif
223 if_detach(ifp);
224 mutex_enter(&sc->sc_mtx);
225 gre_wakeup(sc);
226 gre_join(sc);
227 mutex_exit(&sc->sc_mtx);
228 rtcache_free(&sc->route);
229
230 cv_destroy(&sc->sc_soparm_cv);
231 cv_destroy(&sc->sc_join_cv);
232 cv_destroy(&sc->sc_work_cv);
233 mutex_destroy(&sc->sc_mtx);
234 free(sc, M_DEVBUF);
235
236 return 0;
237 }
238
239 static void
240 gre_receive(struct socket *so, void *arg, int waitflag)
241 {
242 struct gre_softc *sc = (struct gre_softc *)arg;
243
244 GRE_DPRINTF(sc, "%s: enter\n", __func__);
245
246 gre_wakeup(sc);
247 }
248
249 static void
250 gre_upcall_add(struct socket *so, void *arg)
251 {
252 /* XXX What if the kernel already set an upcall? */
253 so->so_upcallarg = arg;
254 so->so_upcall = gre_receive;
255 so->so_rcv.sb_flags |= SB_UPCALL;
256 }
257
258 static void
259 gre_upcall_remove(struct socket *so)
260 {
261 /* XXX What if the kernel already set an upcall? */
262 so->so_rcv.sb_flags &= ~SB_UPCALL;
263 so->so_upcallarg = NULL;
264 so->so_upcall = NULL;
265 }
266
267 static void
268 gre_sodestroy(struct socket **sop)
269 {
270 gre_upcall_remove(*sop);
271 soshutdown(*sop, SHUT_RDWR);
272 soclose(*sop);
273 *sop = NULL;
274 }
275
276 static struct mbuf *
277 gre_getsockmbuf(struct socket *so)
278 {
279 struct mbuf *m;
280
281 m = m_get(M_WAIT, MT_SONAME);
282 if (m != NULL)
283 MCLAIM(m, so->so_mowner);
284 return m;
285 }
286
287 static int
288 gre_socreate1(struct gre_softc *sc, struct lwp *l, struct gre_soparm *sp,
289 struct socket **sop)
290 {
291 int rc;
292 struct mbuf *m;
293 struct sockaddr_in *sin;
294 struct socket *so;
295
296 GRE_DPRINTF(sc, "%s: enter\n", __func__);
297 rc = socreate(AF_INET, sop, SOCK_DGRAM, IPPROTO_UDP, l);
298 if (rc != 0) {
299 GRE_DPRINTF(sc, "%s: socreate failed\n", __func__);
300 return rc;
301 }
302
303 so = *sop;
304
305 gre_upcall_add(so, sc);
306 if ((m = gre_getsockmbuf(so)) == NULL) {
307 rc = ENOBUFS;
308 goto out;
309 }
310 sin = mtod(m, struct sockaddr_in *);
311 sin->sin_len = m->m_len = sizeof(struct sockaddr_in);
312 sin->sin_family = AF_INET;
313 sin->sin_addr = sc->g_src;
314 sin->sin_port = sc->g_srcport;
315
316 GRE_DPRINTF(sc, "%s: bind 0x%08" PRIx32 " port %d\n", __func__,
317 sin->sin_addr.s_addr, ntohs(sin->sin_port));
318 if ((rc = sobind(so, m, l)) != 0) {
319 GRE_DPRINTF(sc, "%s: sobind failed\n", __func__);
320 goto out;
321 }
322
323 if (sc->g_srcport == 0) {
324 if ((rc = gre_getsockname(so, m, l)) != 0) {
325 GRE_DPRINTF(sc, "%s: gre_getsockname failed\n",
326 __func__);
327 goto out;
328 }
329 sc->g_srcport = sin->sin_port;
330 }
331
332 sin->sin_addr = sc->g_dst;
333 sin->sin_port = sc->g_dstport;
334
335 if ((rc = soconnect(so, m, l)) != 0) {
336 GRE_DPRINTF(sc, "%s: soconnect failed\n", __func__);
337 goto out;
338 }
339
340 *mtod(m, int *) = ip_gre_ttl;
341 m->m_len = sizeof(int);
342 KASSERT(so->so_proto && so->so_proto->pr_ctloutput);
343 rc = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, IPPROTO_IP, IP_TTL,
344 &m);
345 m = NULL;
346 if (rc != 0) {
347 printf("%s: setopt ttl failed\n", __func__);
348 rc = 0;
349 }
350 out:
351 m_freem(m);
352
353 if (rc != 0)
354 gre_sodestroy(sop);
355 else
356 *sp = sc->sc_soparm;
357
358 return rc;
359 }
360
361 static void
362 gre_thread1(struct gre_softc *sc, struct lwp *l)
363 {
364 int flags, rc;
365 const struct gre_h *gh;
366 struct ifnet *ifp = &sc->sc_if;
367 struct mbuf *m;
368 struct socket *so = NULL;
369 struct uio uio;
370 struct gre_soparm sp;
371 struct file *fp = NULL;
372
373 GRE_DPRINTF(sc, "%s: enter\n", __func__);
374 mutex_enter(&sc->sc_mtx);
375
376 sc->sc_haswork = 1;
377
378 memset(&sp, 0, sizeof(sp));
379 memset(&uio, 0, sizeof(uio));
380
381 ifp->if_flags |= IFF_RUNNING;
382
383 for (;;) {
384 while (sc->sc_haswork == 0) {
385 GRE_DPRINTF(sc, "%s: sleeping\n", __func__);
386 cv_wait(&sc->sc_work_cv, &sc->sc_mtx);
387 }
388 sc->sc_haswork = 0;
389 GRE_DPRINTF(sc, "%s: awake\n", __func__);
390 if ((ifp->if_flags & IFF_UP) != IFF_UP) {
391 GRE_DPRINTF(sc, "%s: not up & running; exiting\n",
392 __func__);
393 break;
394 }
395 if (sc->sc_proto != IPPROTO_UDP) {
396 GRE_DPRINTF(sc, "%s: not udp; exiting\n", __func__);
397 break;
398 }
399 /* XXX optimize */
400 if (so == NULL || sc->sc_fp != NULL ||
401 memcmp(&sp, &sc->sc_soparm, sizeof(sp)) != 0) {
402 GRE_DPRINTF(sc, "%s: parameters changed\n", __func__);
403
404 if (fp != NULL) {
405 gre_closef(&fp, curlwp);
406 so = NULL;
407 } else if (so != NULL)
408 gre_sodestroy(&so);
409
410 if (sc->sc_fp != NULL) {
411 fp = sc->sc_fp;
412 sc->sc_fp = NULL;
413 so = (struct socket *)fp->f_data;
414 gre_upcall_add(so, sc);
415 sp = sc->sc_soparm;
416 } else if (gre_socreate1(sc, l, &sp, &so) != 0)
417 goto out;
418 }
419 cv_signal(&sc->sc_soparm_cv);
420 for (;;) {
421 flags = MSG_DONTWAIT;
422 uio.uio_resid = 1000000;
423 rc = (*so->so_receive)(so, NULL, &uio, &m, NULL,
424 &flags);
425 /* TBD Back off if ECONNREFUSED (indicates
426 * ICMP Port Unreachable)?
427 */
428 if (rc == EWOULDBLOCK) {
429 GRE_DPRINTF(sc, "%s: so_receive EWOULDBLOCK\n",
430 __func__);
431 break;
432 } else if (rc != 0 || m == NULL) {
433 GRE_DPRINTF(sc, "%s: rc %d m %p\n",
434 ifp->if_xname, rc, (void *)m);
435 continue;
436 } else
437 GRE_DPRINTF(sc, "%s: so_receive ok\n",
438 __func__);
439 if (m->m_len < sizeof(*gh) &&
440 (m = m_pullup(m, sizeof(*gh))) == NULL) {
441 GRE_DPRINTF(sc, "%s: m_pullup failed\n",
442 __func__);
443 continue;
444 }
445 gh = mtod(m, const struct gre_h *);
446
447 if (gre_input3(sc, m, 0, gh, 1) == 0) {
448 GRE_DPRINTF(sc, "%s: dropping unsupported\n",
449 __func__);
450 m_freem(m);
451 }
452 }
453 for (;;) {
454 IF_DEQUEUE(&sc->sc_snd, m);
455 if (m == NULL)
456 break;
457 GRE_DPRINTF(sc, "%s: dequeue\n", __func__);
458 if ((so->so_state & SS_ISCONNECTED) == 0) {
459 GRE_DPRINTF(sc, "%s: not connected\n",
460 __func__);
461 m_freem(m);
462 continue;
463 }
464 rc = (*so->so_send)(so, NULL, NULL, m, NULL, 0, l);
465 /* XXX handle ENOBUFS? */
466 if (rc != 0)
467 GRE_DPRINTF(sc, "%s: so_send failed\n",
468 __func__);
469 }
470 }
471 if (fp != NULL) {
472 GRE_DPRINTF(sc, "%s: removing upcall\n", __func__);
473 gre_upcall_remove(so);
474 } else if (so != NULL)
475 gre_sodestroy(&so);
476 out:
477 GRE_DPRINTF(sc, "%s: stopping\n", __func__);
478 if (fp != NULL)
479 gre_closef(&fp, curlwp);
480 if (sc->sc_proto == IPPROTO_UDP)
481 ifp->if_flags &= ~IFF_RUNNING;
482 while (!IF_IS_EMPTY(&sc->sc_snd)) {
483 IF_DEQUEUE(&sc->sc_snd, m);
484 m_freem(m);
485 }
486 gre_stop(sc);
487 /* must not touch sc after this! */
488 GRE_DPRINTF(sc, "%s: restore ipl\n", __func__);
489 mutex_exit(&sc->sc_mtx);
490 }
491
492 static void
493 gre_thread(void *arg)
494 {
495 struct gre_softc *sc = (struct gre_softc *)arg;
496
497 gre_thread1(sc, curlwp);
498 /* must not touch sc after this! */
499 kthread_exit(0);
500 }
501
502 /* Calling thread must hold sc->sc_mtx. */
503 int
504 gre_input3(struct gre_softc *sc, struct mbuf *m, int hlen,
505 const struct gre_h *gh, int mtx_held)
506 {
507 u_int16_t flags;
508 #if NBPFILTER > 0
509 u_int32_t af = AF_INET; /* af passed to BPF tap */
510 #endif
511 int isr;
512 struct ifqueue *ifq;
513
514 sc->sc_if.if_ipackets++;
515 sc->sc_if.if_ibytes += m->m_pkthdr.len;
516
517 hlen += sizeof(struct gre_h);
518
519 /* process GRE flags as packet can be of variable len */
520 flags = ntohs(gh->flags);
521
522 /* Checksum & Offset are present */
523 if ((flags & GRE_CP) | (flags & GRE_RP))
524 hlen += 4;
525 /* We don't support routing fields (variable length) */
526 if (flags & GRE_RP) {
527 sc->sc_if.if_ierrors++;
528 return 0;
529 }
530 if (flags & GRE_KP)
531 hlen += 4;
532 if (flags & GRE_SP)
533 hlen += 4;
534
535 switch (ntohs(gh->ptype)) { /* ethertypes */
536 case ETHERTYPE_IP: /* shouldn't need a schednetisr(), as */
537 ifq = &ipintrq; /* we are in ip_input */
538 isr = NETISR_IP;
539 break;
540 #ifdef NETATALK
541 case ETHERTYPE_ATALK:
542 ifq = &atintrq1;
543 isr = NETISR_ATALK;
544 #if NBPFILTER > 0
545 af = AF_APPLETALK;
546 #endif
547 break;
548 #endif
549 #ifdef INET6
550 case ETHERTYPE_IPV6:
551 GRE_DPRINTF(sc, "%s: IPv6 packet\n", __func__);
552 ifq = &ip6intrq;
553 isr = NETISR_IPV6;
554 #if NBPFILTER > 0
555 af = AF_INET6;
556 #endif
557 break;
558 #endif
559 default: /* others not yet supported */
560 GRE_DPRINTF(sc, "%s: unhandled ethertype 0x%04x\n", __func__,
561 ntohs(gh->ptype));
562 sc->sc_if.if_noproto++;
563 return 0;
564 }
565
566 if (hlen > m->m_pkthdr.len) {
567 m_freem(m);
568 sc->sc_if.if_ierrors++;
569 return EINVAL;
570 }
571 m_adj(m, hlen);
572
573 #if NBPFILTER > 0
574 if (sc->sc_if.if_bpf != NULL)
575 bpf_mtap_af(sc->sc_if.if_bpf, af, m);
576 #endif /*NBPFILTER > 0*/
577
578 m->m_pkthdr.rcvif = &sc->sc_if;
579
580 if (!mtx_held)
581 mutex_enter(&sc->sc_mtx);
582 if (IF_QFULL(ifq)) {
583 IF_DROP(ifq);
584 m_freem(m);
585 } else {
586 IF_ENQUEUE(ifq, m);
587 }
588 /* we need schednetisr since the address family may change */
589 schednetisr(isr);
590 if (!mtx_held)
591 mutex_exit(&sc->sc_mtx);
592
593 return 1; /* packet is done, no further processing needed */
594 }
595
596 /*
597 * The output routine. Takes a packet and encapsulates it in the protocol
598 * given by sc->sc_proto. See also RFC 1701 and RFC 2004
599 */
600 static int
601 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
602 struct rtentry *rt)
603 {
604 int error = 0, hlen, msiz;
605 struct gre_softc *sc = ifp->if_softc;
606 struct greip *gi;
607 struct gre_h *gh;
608 struct ip *eip, *ip;
609 u_int8_t ip_tos = 0;
610 u_int16_t etype = 0;
611 struct mobile_h mob_h;
612
613 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
614 (IFF_UP | IFF_RUNNING) ||
615 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
616 m_freem(m);
617 error = ENETDOWN;
618 goto end;
619 }
620
621 gi = NULL;
622 ip = NULL;
623
624 #if NBPFILTER >0
625 if (ifp->if_bpf)
626 bpf_mtap_af(ifp->if_bpf, dst->sa_family, m);
627 #endif
628
629 m->m_flags &= ~(M_BCAST|M_MCAST);
630
631 switch (sc->sc_proto) {
632 case IPPROTO_MOBILE:
633 if (dst->sa_family != AF_INET) {
634 IF_DROP(&ifp->if_snd);
635 m_freem(m);
636 error = EINVAL;
637 goto end;
638 }
639
640 if (M_UNWRITABLE(m, sizeof(*ip)) &&
641 (m = m_pullup(m, sizeof(*ip))) == NULL) {
642 error = ENOBUFS;
643 goto end;
644 }
645 ip = mtod(m, struct ip *);
646
647 memset(&mob_h, 0, MOB_H_SIZ_L);
648 mob_h.proto = (ip->ip_p) << 8;
649 mob_h.odst = ip->ip_dst.s_addr;
650 ip->ip_dst.s_addr = sc->g_dst.s_addr;
651
652 /*
653 * If the packet comes from our host, we only change
654 * the destination address in the IP header.
655 * Else we also need to save and change the source
656 */
657 if (in_hosteq(ip->ip_src, sc->g_src)) {
658 msiz = MOB_H_SIZ_S;
659 } else {
660 mob_h.proto |= MOB_H_SBIT;
661 mob_h.osrc = ip->ip_src.s_addr;
662 ip->ip_src.s_addr = sc->g_src.s_addr;
663 msiz = MOB_H_SIZ_L;
664 }
665 HTONS(mob_h.proto);
666 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
667
668 M_PREPEND(m, msiz, M_DONTWAIT);
669 if (m == NULL) {
670 error = ENOBUFS;
671 goto end;
672 }
673 /* XXX Assuming that ip does not dangle after
674 * M_PREPEND. In practice, that's true, but
675 * that's not in M_PREPEND's contract.
676 */
677 memmove(mtod(m, void *), ip, sizeof(*ip));
678 ip = mtod(m, struct ip *);
679 memcpy(ip + 1, &mob_h, (size_t)msiz);
680 ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
681 break;
682 case IPPROTO_UDP:
683 case IPPROTO_GRE:
684 GRE_DPRINTF(sc, "%s: dst->sa_family=%d\n", __func__,
685 dst->sa_family);
686 switch (dst->sa_family) {
687 case AF_INET:
688 ip = mtod(m, struct ip *);
689 ip_tos = ip->ip_tos;
690 etype = ETHERTYPE_IP;
691 break;
692 #ifdef NETATALK
693 case AF_APPLETALK:
694 etype = ETHERTYPE_ATALK;
695 break;
696 #endif
697 #ifdef INET6
698 case AF_INET6:
699 etype = ETHERTYPE_IPV6;
700 break;
701 #endif
702 default:
703 IF_DROP(&ifp->if_snd);
704 m_freem(m);
705 error = EAFNOSUPPORT;
706 goto end;
707 }
708 break;
709 default:
710 IF_DROP(&ifp->if_snd);
711 m_freem(m);
712 error = EINVAL;
713 goto end;
714 }
715
716 switch (sc->sc_proto) {
717 case IPPROTO_GRE:
718 hlen = sizeof(struct greip);
719 break;
720 case IPPROTO_UDP:
721 hlen = sizeof(struct gre_h);
722 break;
723 default:
724 hlen = 0;
725 break;
726 }
727
728 M_PREPEND(m, hlen, M_DONTWAIT);
729
730 if (m == NULL) {
731 IF_DROP(&ifp->if_snd);
732 error = ENOBUFS;
733 goto end;
734 }
735
736 switch (sc->sc_proto) {
737 case IPPROTO_UDP:
738 gh = mtod(m, struct gre_h *);
739 memset(gh, 0, sizeof(*gh));
740 gh->ptype = htons(etype);
741 /* XXX Need to handle IP ToS. Look at how I handle IP TTL. */
742 break;
743 case IPPROTO_GRE:
744 gi = mtod(m, struct greip *);
745 gh = &gi->gi_g;
746 eip = &gi->gi_i;
747 /* we don't have any GRE flags for now */
748 memset(gh, 0, sizeof(*gh));
749 gh->ptype = htons(etype);
750 eip->ip_src = sc->g_src;
751 eip->ip_dst = sc->g_dst;
752 eip->ip_hl = (sizeof(struct ip)) >> 2;
753 eip->ip_ttl = ip_gre_ttl;
754 eip->ip_tos = ip_tos;
755 eip->ip_len = htons(m->m_pkthdr.len);
756 eip->ip_p = sc->sc_proto;
757 break;
758 case IPPROTO_MOBILE:
759 eip = mtod(m, struct ip *);
760 eip->ip_p = sc->sc_proto;
761 break;
762 default:
763 error = EPROTONOSUPPORT;
764 m_freem(m);
765 goto end;
766 }
767
768 ifp->if_opackets++;
769 ifp->if_obytes += m->m_pkthdr.len;
770
771 /* send it off */
772 if (sc->sc_proto == IPPROTO_UDP) {
773 if (IF_QFULL(&sc->sc_snd)) {
774 IF_DROP(&sc->sc_snd);
775 error = ENOBUFS;
776 m_freem(m);
777 } else {
778 IF_ENQUEUE(&sc->sc_snd, m);
779 gre_wakeup(sc);
780 error = 0;
781 }
782 goto end;
783 }
784 if (sc->route.ro_rt == NULL)
785 rtcache_init(&sc->route);
786 else
787 rtcache_check(&sc->route);
788 if (sc->route.ro_rt == NULL) {
789 m_freem(m);
790 goto end;
791 }
792 if (sc->route.ro_rt->rt_ifp->if_softc == sc) {
793 rtcache_clear(&sc->route);
794 m_freem(m);
795 } else
796 error = ip_output(m, NULL, &sc->route, 0, NULL, NULL);
797 end:
798 if (error)
799 ifp->if_oerrors++;
800 return error;
801 }
802
803 /* Calling thread must hold sc->sc_mtx. */
804 static int
805 gre_kick(struct gre_softc *sc)
806 {
807 int rc;
808 struct ifnet *ifp = &sc->sc_if;
809
810 if (sc->sc_proto == IPPROTO_UDP && (ifp->if_flags & IFF_UP) == IFF_UP &&
811 !sc->sc_running) {
812 sc->sc_running = 1;
813 rc = kthread_create(PRI_NONE, 0, NULL, gre_thread, sc,
814 NULL, ifp->if_xname);
815 if (rc != 0)
816 gre_stop(sc);
817 return rc;
818 } else {
819 gre_wakeup(sc);
820 return 0;
821 }
822 }
823
824 /* Calling thread must hold sc->sc_mtx. */
825 static int
826 gre_getname(struct socket *so, int req, struct mbuf *nam, struct lwp *l)
827 {
828 return (*so->so_proto->pr_usrreq)(so, req, NULL, nam, NULL, l);
829 }
830
831 /* Calling thread must hold sc->sc_mtx. */
832 static int
833 gre_getsockname(struct socket *so, struct mbuf *nam, struct lwp *l)
834 {
835 return gre_getname(so, PRU_SOCKADDR, nam, l);
836 }
837
838 /* Calling thread must hold sc->sc_mtx. */
839 static int
840 gre_getpeername(struct socket *so, struct mbuf *nam, struct lwp *l)
841 {
842 return gre_getname(so, PRU_PEERADDR, nam, l);
843 }
844
845 /* Calling thread must hold sc->sc_mtx. */
846 static int
847 gre_getnames(struct socket *so, struct lwp *l, struct sockaddr_in *src,
848 struct sockaddr_in *dst)
849 {
850 struct mbuf *m;
851 struct sockaddr_in *sin;
852 int rc;
853
854 if ((m = gre_getsockmbuf(so)) == NULL)
855 return ENOBUFS;
856
857 sin = mtod(m, struct sockaddr_in *);
858
859 if ((rc = gre_getsockname(so, m, l)) != 0)
860 goto out;
861 if (sin->sin_family != AF_INET) {
862 rc = EAFNOSUPPORT;
863 goto out;
864 }
865 *src = *sin;
866
867 if ((rc = gre_getpeername(so, m, l)) != 0)
868 goto out;
869 if (sin->sin_family != AF_INET) {
870 rc = EAFNOSUPPORT;
871 goto out;
872 }
873 *dst = *sin;
874
875 out:
876 m_freem(m);
877 return rc;
878 }
879
880 static void
881 gre_closef(struct file **fpp, struct lwp *l)
882 {
883 struct file *fp = *fpp;
884
885 simple_lock(&fp->f_slock);
886 FILE_USE(fp);
887 closef(fp, l);
888 *fpp = NULL;
889 }
890
891 static int
892 gre_ioctl(struct ifnet *ifp, u_long cmd, void *data)
893 {
894 u_char oproto;
895 struct file *fp;
896 struct socket *so;
897 struct sockaddr_in dst, src;
898 struct proc *p = curproc; /* XXX */
899 struct lwp *l = curlwp; /* XXX */
900 struct ifreq *ifr;
901 struct if_laddrreq *lifr = (struct if_laddrreq *)data;
902 struct gre_softc *sc = ifp->if_softc;
903 struct sockaddr_in si;
904 struct sockaddr *sa = NULL;
905 int error = 0;
906 #ifdef COMPAT_OIFREQ
907 u_long ocmd = cmd;
908 struct oifreq *oifr = NULL;
909 struct ifreq ifrb;
910
911 cmd = compat_cvtcmd(cmd);
912 if (cmd != ocmd) {
913 oifr = data;
914 data = ifr = &ifrb;
915 ifreqo2n(oifr, ifr);
916 } else
917 #endif
918 ifr = data;
919
920 switch (cmd) {
921 case SIOCSIFFLAGS:
922 case SIOCSIFMTU:
923 case GRESPROTO:
924 case GRESADDRD:
925 case GRESADDRS:
926 case GRESSOCK:
927 case GREDSOCK:
928 case SIOCSLIFPHYADDR:
929 case SIOCDIFPHYADDR:
930 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
931 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
932 NULL) != 0)
933 return EPERM;
934 break;
935 default:
936 break;
937 }
938
939 mutex_enter(&sc->sc_mtx);
940 switch (cmd) {
941 case SIOCSIFADDR:
942 ifp->if_flags |= IFF_UP;
943 if ((error = gre_kick(sc)) != 0)
944 ifp->if_flags &= ~IFF_UP;
945 break;
946 case SIOCSIFDSTADDR:
947 break;
948 case SIOCSIFFLAGS:
949 oproto = sc->sc_proto;
950 switch (ifr->ifr_flags & (IFF_LINK0|IFF_LINK2)) {
951 case IFF_LINK0|IFF_LINK2:
952 sc->sc_proto = IPPROTO_UDP;
953 if (oproto != IPPROTO_UDP)
954 ifp->if_flags &= ~IFF_RUNNING;
955 error = gre_kick(sc);
956 break;
957 case IFF_LINK0:
958 sc->sc_proto = IPPROTO_GRE;
959 gre_wakeup(sc);
960 goto recompute;
961 case 0:
962 sc->sc_proto = IPPROTO_MOBILE;
963 gre_wakeup(sc);
964 goto recompute;
965 }
966 break;
967 case SIOCSIFMTU:
968 if (ifr->ifr_mtu < 576) {
969 error = EINVAL;
970 break;
971 }
972 ifp->if_mtu = ifr->ifr_mtu;
973 break;
974 case SIOCGIFMTU:
975 ifr->ifr_mtu = sc->sc_if.if_mtu;
976 break;
977 case SIOCADDMULTI:
978 case SIOCDELMULTI:
979 if (ifr == 0) {
980 error = EAFNOSUPPORT;
981 break;
982 }
983 switch (ifr->ifr_addr.sa_family) {
984 #ifdef INET
985 case AF_INET:
986 break;
987 #endif
988 #ifdef INET6
989 case AF_INET6:
990 break;
991 #endif
992 default:
993 error = EAFNOSUPPORT;
994 break;
995 }
996 break;
997 case GRESPROTO:
998 oproto = sc->sc_proto;
999 sc->sc_proto = ifr->ifr_flags;
1000 switch (sc->sc_proto) {
1001 case IPPROTO_UDP:
1002 ifp->if_flags |= IFF_LINK0|IFF_LINK2;
1003 if (oproto != IPPROTO_UDP)
1004 ifp->if_flags &= ~IFF_RUNNING;
1005 error = gre_kick(sc);
1006 break;
1007 case IPPROTO_GRE:
1008 ifp->if_flags |= IFF_LINK0;
1009 ifp->if_flags &= ~IFF_LINK2;
1010 goto recompute;
1011 case IPPROTO_MOBILE:
1012 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK2);
1013 goto recompute;
1014 default:
1015 error = EPROTONOSUPPORT;
1016 break;
1017 }
1018 break;
1019 case GREGPROTO:
1020 ifr->ifr_flags = sc->sc_proto;
1021 break;
1022 case GRESADDRS:
1023 case GRESADDRD:
1024 /*
1025 * set tunnel endpoints, compute a less specific route
1026 * to the remote end and mark if as up
1027 */
1028 sa = &ifr->ifr_addr;
1029 if (cmd == GRESADDRS) {
1030 sc->g_src = (satosin(sa))->sin_addr;
1031 sc->g_srcport = satosin(sa)->sin_port;
1032 }
1033 if (cmd == GRESADDRD) {
1034 if (sc->sc_proto == IPPROTO_UDP &&
1035 satosin(sa)->sin_port == 0) {
1036 error = EINVAL;
1037 break;
1038 }
1039 sc->g_dst = (satosin(sa))->sin_addr;
1040 sc->g_dstport = satosin(sa)->sin_port;
1041 }
1042 recompute:
1043 if (sc->sc_proto == IPPROTO_UDP ||
1044 (sc->g_src.s_addr != INADDR_ANY &&
1045 sc->g_dst.s_addr != INADDR_ANY)) {
1046 rtcache_free(&sc->route);
1047 if (sc->sc_proto == IPPROTO_UDP)
1048 error = gre_kick(sc);
1049 else if (gre_compute_route(sc) == 0)
1050 ifp->if_flags |= IFF_RUNNING;
1051 else
1052 ifp->if_flags &= ~IFF_RUNNING;
1053 }
1054 break;
1055 case GREGADDRS:
1056 memset(&si, 0, sizeof(si));
1057 si.sin_family = AF_INET;
1058 si.sin_len = sizeof(struct sockaddr_in);
1059 si.sin_addr.s_addr = sc->g_src.s_addr;
1060 sa = sintosa(&si);
1061 ifr->ifr_addr = *sa;
1062 break;
1063 case GREGADDRD:
1064 memset(&si, 0, sizeof(si));
1065 si.sin_family = AF_INET;
1066 si.sin_len = sizeof(struct sockaddr_in);
1067 si.sin_addr.s_addr = sc->g_dst.s_addr;
1068 sa = sintosa(&si);
1069 ifr->ifr_addr = *sa;
1070 break;
1071 case GREDSOCK:
1072 if (sc->sc_proto != IPPROTO_UDP) {
1073 error = EINVAL;
1074 break;
1075 }
1076 ifp->if_flags &= ~IFF_UP;
1077 gre_wakeup(sc);
1078 break;
1079 case GRESSOCK:
1080 if (sc->sc_proto != IPPROTO_UDP) {
1081 error = EINVAL;
1082 break;
1083 }
1084 /* getsock() will FILE_USE() and unlock the descriptor for us */
1085 if ((error = getsock(p->p_fd, (int)ifr->ifr_value, &fp)) != 0)
1086 break;
1087 so = (struct socket *)fp->f_data;
1088 if (so->so_type != SOCK_DGRAM) {
1089 FILE_UNUSE(fp, NULL);
1090 error = EINVAL;
1091 break;
1092 }
1093 /* check address */
1094 if ((error = gre_getnames(so, curlwp, &src, &dst)) != 0) {
1095 FILE_UNUSE(fp, NULL);
1096 break;
1097 }
1098
1099 /* Increase reference count. Now that our reference
1100 * to the file descriptor is counted, this thread
1101 * can release our "use" of the descriptor, but it
1102 * will not be destroyed by some other thread's
1103 * action. This thread needs to release its use,
1104 * too, because one and only one thread can have
1105 * use of the descriptor at once. The kernel thread
1106 * will pick up the use if it needs it.
1107 */
1108
1109 fp->f_count++;
1110 FILE_UNUSE(fp, NULL);
1111
1112 while (sc->sc_fp != NULL && error == 0) {
1113 error = cv_timedwait_sig(&sc->sc_soparm_cv, &sc->sc_mtx,
1114 MAX(1, hz / 2));
1115 }
1116 if (error == 0) {
1117 sc->sc_fp = fp;
1118 ifp->if_flags |= IFF_UP;
1119 }
1120
1121 if (error != 0 || (error = gre_kick(sc)) != 0) {
1122 gre_closef(&fp, l);
1123 break;
1124 }
1125 /* fp does not any longer belong to this thread. */
1126 sc->g_src = src.sin_addr;
1127 sc->g_srcport = src.sin_port;
1128 sc->g_dst = dst.sin_addr;
1129 sc->g_dstport = dst.sin_port;
1130 break;
1131 case SIOCSLIFPHYADDR:
1132 if (lifr->addr.ss_family != AF_INET ||
1133 lifr->dstaddr.ss_family != AF_INET) {
1134 error = EAFNOSUPPORT;
1135 break;
1136 }
1137 if (lifr->addr.ss_len != sizeof(si) ||
1138 lifr->dstaddr.ss_len != sizeof(si)) {
1139 error = EINVAL;
1140 break;
1141 }
1142 sc->g_src = satosin(&lifr->addr)->sin_addr;
1143 sc->g_dst = satosin(&lifr->dstaddr)->sin_addr;
1144 sc->g_srcport = satosin(&lifr->addr)->sin_port;
1145 sc->g_dstport = satosin(&lifr->dstaddr)->sin_port;
1146 goto recompute;
1147 case SIOCDIFPHYADDR:
1148 sc->g_src.s_addr = INADDR_ANY;
1149 sc->g_dst.s_addr = INADDR_ANY;
1150 sc->g_srcport = 0;
1151 sc->g_dstport = 0;
1152 goto recompute;
1153 case SIOCGLIFPHYADDR:
1154 if (sc->g_src.s_addr == INADDR_ANY ||
1155 sc->g_dst.s_addr == INADDR_ANY) {
1156 error = EADDRNOTAVAIL;
1157 break;
1158 }
1159 memset(&si, 0, sizeof(si));
1160 si.sin_family = AF_INET;
1161 si.sin_len = sizeof(struct sockaddr_in);
1162 si.sin_addr = sc->g_src;
1163 if (sc->sc_proto == IPPROTO_UDP)
1164 si.sin_port = sc->g_srcport;
1165 memcpy(&lifr->addr, &si, sizeof(si));
1166 si.sin_addr = sc->g_dst;
1167 if (sc->sc_proto == IPPROTO_UDP)
1168 si.sin_port = sc->g_dstport;
1169 memcpy(&lifr->dstaddr, &si, sizeof(si));
1170 break;
1171 default:
1172 error = EINVAL;
1173 break;
1174 }
1175 #ifdef COMPAT_OIFREQ
1176 if (cmd != ocmd)
1177 ifreqn2o(oifr, ifr);
1178 #endif
1179 mutex_exit(&sc->sc_mtx);
1180 return error;
1181 }
1182
1183 /*
1184 * Compute a route to our destination.
1185 */
1186 static int
1187 gre_compute_route(struct gre_softc *sc)
1188 {
1189 struct route *ro;
1190 union {
1191 struct sockaddr dst;
1192 struct sockaddr_in dst4;
1193 } u;
1194
1195 ro = &sc->route;
1196
1197 memset(ro, 0, sizeof(*ro));
1198 sockaddr_in_init(&u.dst4, &sc->g_dst, 0);
1199 rtcache_setdst(ro, &u.dst);
1200
1201 rtcache_init(ro);
1202
1203 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
1204 GRE_DPRINTF(sc, "%s: route to %s %s\n", sc->sc_if.if_xname,
1205 inet_ntoa(u.dst4.sin_addr),
1206 (ro->ro_rt == NULL)
1207 ? "does not exist"
1208 : "loops back to ourself");
1209 rtcache_free(ro);
1210 return EADDRNOTAVAIL;
1211 }
1212
1213 return 0;
1214 }
1215
1216 /*
1217 * do a checksum of a buffer - much like in_cksum, which operates on
1218 * mbufs.
1219 */
1220 u_int16_t
1221 gre_in_cksum(u_int16_t *p, u_int len)
1222 {
1223 u_int32_t sum = 0;
1224 int nwords = len >> 1;
1225
1226 while (nwords-- != 0)
1227 sum += *p++;
1228
1229 if (len & 1) {
1230 union {
1231 u_short w;
1232 u_char c[2];
1233 } u;
1234 u.c[0] = *(u_char *)p;
1235 u.c[1] = 0;
1236 sum += u.w;
1237 }
1238
1239 /* end-around-carry */
1240 sum = (sum >> 16) + (sum & 0xffff);
1241 sum += (sum >> 16);
1242 return ~sum;
1243 }
1244 #endif
1245
1246 void greattach(int);
1247
1248 /* ARGSUSED */
1249 void
1250 greattach(int count)
1251 {
1252 #ifdef INET
1253 LIST_INIT(&gre_softc_list);
1254 if_clone_attach(&gre_cloner);
1255 #endif
1256 }
1257