if_gre.c revision 1.108 1 /* $NetBSD: if_gre.c,v 1.108 2007/09/02 07:01:41 dyoung Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * IPv6-over-GRE contributed by Gert Doering <gert (at) greenie.muc.de>
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*
42 * Encapsulate L3 protocols into IP
43 * See RFC 1701 and 1702 for more details.
44 * If_gre is compatible with Cisco GRE tunnels, so you can
45 * have a NetBSD box as the other end of a tunnel interface of a Cisco
46 * router. See gre(4) for more details.
47 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
48 */
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.108 2007/09/02 07:01:41 dyoung Exp $");
52
53 #include "opt_gre.h"
54 #include "opt_inet.h"
55 #include "bpfilter.h"
56
57 #ifdef INET
58 #include <sys/param.h>
59 #include <sys/file.h>
60 #include <sys/filedesc.h>
61 #include <sys/malloc.h>
62 #include <sys/mbuf.h>
63 #include <sys/proc.h>
64 #include <sys/protosw.h>
65 #include <sys/socket.h>
66 #include <sys/socketvar.h>
67 #include <sys/ioctl.h>
68 #include <sys/queue.h>
69 #if __NetBSD__
70 #include <sys/systm.h>
71 #include <sys/sysctl.h>
72 #include <sys/kauth.h>
73 #endif
74
75 #include <sys/kernel.h>
76 #include <sys/mutex.h>
77 #include <sys/condvar.h>
78 #include <sys/kthread.h>
79
80 #include <machine/cpu.h>
81
82 #include <net/ethertypes.h>
83 #include <net/if.h>
84 #include <net/if_types.h>
85 #include <net/netisr.h>
86 #include <net/route.h>
87
88 #ifdef INET
89 #include <netinet/in.h>
90 #include <netinet/in_systm.h>
91 #include <netinet/in_var.h>
92 #include <netinet/ip.h>
93 #include <netinet/ip_var.h>
94 #else
95 #error "Huh? if_gre without inet?"
96 #endif
97
98
99 #ifdef NETATALK
100 #include <netatalk/at.h>
101 #include <netatalk/at_var.h>
102 #include <netatalk/at_extern.h>
103 #endif
104
105 #if NBPFILTER > 0
106 #include <sys/time.h>
107 #include <net/bpf.h>
108 #endif
109
110 #include <net/if_gre.h>
111
112 #include <compat/sys/socket.h>
113 #include <compat/sys/sockio.h>
114 /*
115 * It is not easy to calculate the right value for a GRE MTU.
116 * We leave this task to the admin and use the same default that
117 * other vendors use.
118 */
119 #define GREMTU 1476
120
121 #ifdef GRE_DEBUG
122 int gre_debug = 0;
123 #define GRE_DPRINTF(__sc, __fmt, ...) \
124 do { \
125 if (gre_debug || ((__sc)->sc_if.if_flags & IFF_DEBUG) != 0)\
126 printf(__fmt, __VA_ARGS__); \
127 } while (/*CONSTCOND*/0)
128 #else
129 #define GRE_DPRINTF(__sc, __fmt, ...) do { } while (/*CONSTCOND*/0)
130 #endif /* GRE_DEBUG */
131
132 struct gre_softc_head gre_softc_list;
133 int ip_gre_ttl = GRE_TTL;
134
135 static int gre_clone_create(struct if_clone *, int);
136 static int gre_clone_destroy(struct ifnet *);
137
138 static struct if_clone gre_cloner =
139 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
140
141 static int gre_output(struct ifnet *, struct mbuf *,
142 const struct sockaddr *, struct rtentry *);
143 static int gre_ioctl(struct ifnet *, u_long, void *);
144
145 static void gre_thread(void *);
146 static int gre_compute_route(struct gre_softc *sc);
147
148 static void gre_closef(struct file **, struct lwp *);
149 static int gre_getsockname(struct socket *, struct mbuf *, struct lwp *);
150 static int gre_getpeername(struct socket *, struct mbuf *, struct lwp *);
151 static int gre_getnames(struct socket *, struct lwp *, struct sockaddr_in *,
152 struct sockaddr_in *);
153
154 /* Calling thread must hold sc->sc_mtx. */
155 static void
156 gre_join(struct gre_softc *sc)
157 {
158 while (sc->sc_running != 0)
159 cv_wait(&sc->sc_join_cv, &sc->sc_mtx);
160 }
161
162 /* Calling thread must hold sc->sc_mtx. */
163 static void
164 gre_wakeup(struct gre_softc *sc)
165 {
166 GRE_DPRINTF(sc, "%s: enter\n", __func__);
167 sc->sc_haswork = 1;
168 cv_signal(&sc->sc_work_cv);
169 }
170
171 static int
172 gre_clone_create(struct if_clone *ifc, int unit)
173 {
174 struct gre_softc *sc;
175
176 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
177 memset(sc, 0, sizeof(struct gre_softc));
178 mutex_init(&sc->sc_mtx, MUTEX_DRIVER, IPL_NET);
179 cv_init(&sc->sc_work_cv, "gre work");
180 cv_init(&sc->sc_join_cv, "gre join");
181 cv_init(&sc->sc_soparm_cv, "gre soparm");
182
183 snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d",
184 ifc->ifc_name, unit);
185 sc->sc_if.if_softc = sc;
186 sc->sc_if.if_type = IFT_TUNNEL;
187 sc->sc_if.if_addrlen = 0;
188 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
189 sc->sc_if.if_dlt = DLT_NULL;
190 sc->sc_if.if_mtu = GREMTU;
191 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
192 sc->sc_if.if_output = gre_output;
193 sc->sc_if.if_ioctl = gre_ioctl;
194 sc->sc_dst.s_addr = sc->sc_src.s_addr = INADDR_ANY;
195 sc->sc_dstport = sc->sc_srcport = 0;
196 sc->sc_proto = IPPROTO_GRE;
197 sc->sc_snd.ifq_maxlen = 256;
198 sc->sc_if.if_flags |= IFF_LINK0;
199 if_attach(&sc->sc_if);
200 if_alloc_sadl(&sc->sc_if);
201 #if NBPFILTER > 0
202 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
203 #endif
204 sc->sc_running = 1;
205 if (kthread_create(PRI_NONE, 0, NULL, gre_thread, sc,
206 NULL, sc->sc_if.if_xname) != 0)
207 sc->sc_running = 0;
208 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
209 return 0;
210 }
211
212 static int
213 gre_clone_destroy(struct ifnet *ifp)
214 {
215 struct gre_softc *sc = ifp->if_softc;
216
217 LIST_REMOVE(sc, sc_list);
218 #if NBPFILTER > 0
219 bpfdetach(ifp);
220 #endif
221 if_detach(ifp);
222 mutex_enter(&sc->sc_mtx);
223 sc->sc_dying = 1;
224 gre_wakeup(sc);
225 gre_join(sc);
226 mutex_exit(&sc->sc_mtx);
227 rtcache_free(&sc->route);
228
229 cv_destroy(&sc->sc_soparm_cv);
230 cv_destroy(&sc->sc_join_cv);
231 cv_destroy(&sc->sc_work_cv);
232 mutex_destroy(&sc->sc_mtx);
233 free(sc, M_DEVBUF);
234
235 return 0;
236 }
237
238 static void
239 gre_receive(struct socket *so, void *arg, int waitflag)
240 {
241 struct gre_softc *sc = (struct gre_softc *)arg;
242
243 GRE_DPRINTF(sc, "%s: enter\n", __func__);
244
245 gre_wakeup(sc);
246 }
247
248 static void
249 gre_upcall_add(struct socket *so, void *arg)
250 {
251 /* XXX What if the kernel already set an upcall? */
252 KASSERT((so->so_rcv.sb_flags & SB_UPCALL) == 0);
253 so->so_upcallarg = arg;
254 so->so_upcall = gre_receive;
255 so->so_rcv.sb_flags |= SB_UPCALL;
256 }
257
258 static void
259 gre_upcall_remove(struct socket *so)
260 {
261 /* XXX What if the kernel already set an upcall? */
262 so->so_rcv.sb_flags &= ~SB_UPCALL;
263 so->so_upcallarg = NULL;
264 so->so_upcall = NULL;
265 }
266
267 static void
268 gre_sodestroy(struct socket **sop)
269 {
270 gre_upcall_remove(*sop);
271 soshutdown(*sop, SHUT_RDWR);
272 soclose(*sop);
273 *sop = NULL;
274 }
275
276 static struct mbuf *
277 gre_getsockmbuf(struct socket *so)
278 {
279 struct mbuf *m;
280
281 m = m_get(M_WAIT, MT_SONAME);
282 if (m != NULL)
283 MCLAIM(m, so->so_mowner);
284 return m;
285 }
286
287 static int
288 gre_socreate1(struct gre_softc *sc, struct lwp *l, struct socket **sop)
289 {
290 int rc;
291 struct mbuf *m;
292 struct sockaddr_in *sin;
293 struct socket *so;
294
295 GRE_DPRINTF(sc, "%s: enter\n", __func__);
296 rc = socreate(AF_INET, sop, SOCK_DGRAM, IPPROTO_UDP, l);
297 if (rc != 0) {
298 GRE_DPRINTF(sc, "%s: socreate failed\n", __func__);
299 return rc;
300 }
301
302 so = *sop;
303
304 gre_upcall_add(so, sc);
305 if ((m = gre_getsockmbuf(so)) == NULL) {
306 rc = ENOBUFS;
307 goto out;
308 }
309 sin = mtod(m, struct sockaddr_in *);
310 sockaddr_in_init(sin, &sc->sc_src, sc->sc_srcport);
311 m->m_len = sin->sin_len;
312
313 GRE_DPRINTF(sc, "%s: bind 0x%08" PRIx32 " port %d\n", __func__,
314 sin->sin_addr.s_addr, ntohs(sin->sin_port));
315 if ((rc = sobind(so, m, l)) != 0) {
316 GRE_DPRINTF(sc, "%s: sobind failed\n", __func__);
317 goto out;
318 }
319
320 if (sc->sc_srcport == 0) {
321 if ((rc = gre_getsockname(so, m, l)) != 0) {
322 GRE_DPRINTF(sc, "%s: gre_getsockname\n", __func__);
323 goto out;
324 }
325 sc->sc_srcport = sin->sin_port;
326 }
327
328 sockaddr_in_init(sin, &sc->sc_dst, sc->sc_dstport);
329 m->m_len = sin->sin_len;
330
331 if ((rc = soconnect(so, m, l)) != 0) {
332 GRE_DPRINTF(sc, "%s: soconnect failed\n", __func__);
333 goto out;
334 }
335
336 *mtod(m, int *) = ip_gre_ttl;
337 m->m_len = sizeof(int);
338 KASSERT(so->so_proto && so->so_proto->pr_ctloutput);
339 rc = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, IPPROTO_IP, IP_TTL,
340 &m);
341 m = NULL;
342 if (rc != 0) {
343 GRE_DPRINTF(sc, "%s: setopt ttl failed\n", __func__);
344 rc = 0;
345 }
346 out:
347 m_freem(m);
348
349 if (rc != 0)
350 gre_sodestroy(sop);
351 else {
352 sc->sc_if.if_flags |= IFF_RUNNING;
353 sc->sc_soparm = sc->sc_newsoparm;
354 }
355
356 return rc;
357 }
358
359 static void
360 gre_do_recv(struct gre_softc *sc, struct socket *so, lwp_t *l)
361 {
362 for (;;) {
363 int flags, rc;
364 const struct gre_h *gh;
365 struct mbuf *m;
366
367 flags = MSG_DONTWAIT;
368 sc->sc_uio.uio_resid = 1000000;
369 rc = (*so->so_receive)(so, NULL, &sc->sc_uio, &m, NULL, &flags);
370 /* TBD Back off if ECONNREFUSED (indicates
371 * ICMP Port Unreachable)?
372 */
373 if (rc == EWOULDBLOCK) {
374 GRE_DPRINTF(sc, "%s: so_receive EWOULDBLOCK\n",
375 __func__);
376 break;
377 } else if (rc != 0 || m == NULL) {
378 GRE_DPRINTF(sc, "%s: rc %d m %p\n",
379 sc->sc_if.if_xname, rc, (void *)m);
380 continue;
381 } else
382 GRE_DPRINTF(sc, "%s: so_receive ok\n", __func__);
383 if (m->m_len < sizeof(*gh) &&
384 (m = m_pullup(m, sizeof(*gh))) == NULL) {
385 GRE_DPRINTF(sc, "%s: m_pullup failed\n", __func__);
386 continue;
387 }
388 gh = mtod(m, const struct gre_h *);
389
390 if (gre_input3(sc, m, 0, gh, 0) == 0) {
391 GRE_DPRINTF(sc, "%s: dropping unsupported\n", __func__);
392 m_freem(m);
393 }
394 }
395 }
396
397 static void
398 gre_do_send(struct gre_softc *sc, struct socket *so, lwp_t *l)
399 {
400 for (;;) {
401 int rc;
402 struct mbuf *m;
403
404 mutex_enter(&sc->sc_mtx);
405 IF_DEQUEUE(&sc->sc_snd, m);
406 mutex_exit(&sc->sc_mtx);
407 if (m == NULL)
408 break;
409 GRE_DPRINTF(sc, "%s: dequeue\n", __func__);
410 if ((so->so_state & SS_ISCONNECTED) == 0) {
411 GRE_DPRINTF(sc, "%s: not connected\n", __func__);
412 m_freem(m);
413 continue;
414 }
415 rc = (*so->so_send)(so, NULL, NULL, m, NULL, 0, l);
416 /* XXX handle ENOBUFS? */
417 if (rc != 0)
418 GRE_DPRINTF(sc, "%s: so_send failed\n",
419 __func__);
420 }
421 }
422
423 static struct socket *
424 gre_reconf(struct gre_softc *sc, struct socket *so, lwp_t *l)
425 {
426 struct ifnet *ifp = &sc->sc_if;
427
428 GRE_DPRINTF(sc, "%s: enter\n", __func__);
429
430 shutdown:
431 if (sc->sc_soparm.sp_fp != NULL) {
432 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
433 gre_upcall_remove(so);
434 gre_closef(&sc->sc_soparm.sp_fp, curlwp);
435 so = NULL;
436 } else if (so != NULL)
437 gre_sodestroy(&so);
438
439 if (sc->sc_dying)
440 GRE_DPRINTF(sc, "%s: dying\n", __func__);
441 else if ((ifp->if_flags & IFF_UP) != IFF_UP)
442 GRE_DPRINTF(sc, "%s: down\n", __func__);
443 else if (sc->sc_proto != IPPROTO_UDP)
444 GRE_DPRINTF(sc, "%s: not UDP\n", __func__);
445 else if (sc->sc_newsoparm.sp_fp != NULL) {
446 sc->sc_soparm = sc->sc_newsoparm;
447 sc->sc_newsoparm.sp_fp = NULL;
448 so = (struct socket *)sc->sc_soparm.sp_fp->f_data;
449 gre_upcall_add(so, sc);
450 } else if (gre_socreate1(sc, l, &so) != 0) {
451 sc->sc_dying = 1;
452 goto shutdown;
453 }
454 cv_signal(&sc->sc_soparm_cv);
455 if (so != NULL)
456 sc->sc_if.if_flags |= IFF_RUNNING;
457 else if (sc->sc_proto == IPPROTO_UDP)
458 sc->sc_if.if_flags &= ~IFF_RUNNING;
459 return so;
460 }
461
462 static void
463 gre_thread1(struct gre_softc *sc, struct lwp *l)
464 {
465 struct ifnet *ifp = &sc->sc_if;
466 struct socket *so = NULL;
467
468 GRE_DPRINTF(sc, "%s: enter\n", __func__);
469
470 while (!sc->sc_dying) {
471 while (sc->sc_haswork == 0) {
472 GRE_DPRINTF(sc, "%s: sleeping\n", __func__);
473 cv_wait(&sc->sc_work_cv, &sc->sc_mtx);
474 }
475 sc->sc_haswork = 0;
476
477 GRE_DPRINTF(sc, "%s: awake\n", __func__);
478
479 /* XXX optimize */
480 if ((ifp->if_flags & IFF_UP) != IFF_UP ||
481 sc->sc_proto != IPPROTO_UDP || so == NULL ||
482 sc->sc_newsoparm.sp_fp != NULL ||
483 memcmp(&sc->sc_soparm, &sc->sc_newsoparm,
484 offsetof(struct gre_soparm, sp_fp)) != 0)
485 so = gre_reconf(sc, so, l);
486 mutex_exit(&sc->sc_mtx);
487 if (so != NULL) {
488 gre_do_recv(sc, so, l);
489 gre_do_send(sc, so, l);
490 }
491 mutex_enter(&sc->sc_mtx);
492 }
493 sc->sc_running = 0;
494 cv_signal(&sc->sc_join_cv);
495 }
496
497 static void
498 gre_thread(void *arg)
499 {
500 struct gre_softc *sc = (struct gre_softc *)arg;
501
502 mutex_enter(&sc->sc_mtx);
503 gre_thread1(sc, curlwp);
504 mutex_exit(&sc->sc_mtx);
505
506 /* must not touch sc after this! */
507 kthread_exit(0);
508 }
509
510 /* Calling thread must hold sc->sc_mtx. */
511 int
512 gre_input3(struct gre_softc *sc, struct mbuf *m, int hlen,
513 const struct gre_h *gh, int mtx_held)
514 {
515 u_int16_t flags;
516 #if NBPFILTER > 0
517 u_int32_t af = AF_INET; /* af passed to BPF tap */
518 #endif
519 int isr;
520 struct ifqueue *ifq;
521
522 sc->sc_if.if_ipackets++;
523 sc->sc_if.if_ibytes += m->m_pkthdr.len;
524
525 hlen += sizeof(struct gre_h);
526
527 /* process GRE flags as packet can be of variable len */
528 flags = ntohs(gh->flags);
529
530 /* Checksum & Offset are present */
531 if ((flags & GRE_CP) | (flags & GRE_RP))
532 hlen += 4;
533 /* We don't support routing fields (variable length) */
534 if (flags & GRE_RP) {
535 sc->sc_if.if_ierrors++;
536 return 0;
537 }
538 if (flags & GRE_KP)
539 hlen += 4;
540 if (flags & GRE_SP)
541 hlen += 4;
542
543 switch (ntohs(gh->ptype)) { /* ethertypes */
544 case ETHERTYPE_IP: /* shouldn't need a schednetisr(), as */
545 ifq = &ipintrq; /* we are in ip_input */
546 isr = NETISR_IP;
547 break;
548 #ifdef NETATALK
549 case ETHERTYPE_ATALK:
550 ifq = &atintrq1;
551 isr = NETISR_ATALK;
552 #if NBPFILTER > 0
553 af = AF_APPLETALK;
554 #endif
555 break;
556 #endif
557 #ifdef INET6
558 case ETHERTYPE_IPV6:
559 GRE_DPRINTF(sc, "%s: IPv6 packet\n", __func__);
560 ifq = &ip6intrq;
561 isr = NETISR_IPV6;
562 #if NBPFILTER > 0
563 af = AF_INET6;
564 #endif
565 break;
566 #endif
567 default: /* others not yet supported */
568 GRE_DPRINTF(sc, "%s: unhandled ethertype 0x%04x\n", __func__,
569 ntohs(gh->ptype));
570 sc->sc_if.if_noproto++;
571 return 0;
572 }
573
574 if (hlen > m->m_pkthdr.len) {
575 m_freem(m);
576 sc->sc_if.if_ierrors++;
577 return EINVAL;
578 }
579 m_adj(m, hlen);
580
581 #if NBPFILTER > 0
582 if (sc->sc_if.if_bpf != NULL)
583 bpf_mtap_af(sc->sc_if.if_bpf, af, m);
584 #endif /*NBPFILTER > 0*/
585
586 m->m_pkthdr.rcvif = &sc->sc_if;
587
588 if (!mtx_held)
589 mutex_enter(&sc->sc_mtx);
590 if (IF_QFULL(ifq)) {
591 IF_DROP(ifq);
592 m_freem(m);
593 } else {
594 IF_ENQUEUE(ifq, m);
595 }
596 /* we need schednetisr since the address family may change */
597 schednetisr(isr);
598 if (!mtx_held)
599 mutex_exit(&sc->sc_mtx);
600
601 return 1; /* packet is done, no further processing needed */
602 }
603
604 /*
605 * The output routine. Takes a packet and encapsulates it in the protocol
606 * given by sc->sc_proto. See also RFC 1701 and RFC 2004
607 */
608 static int
609 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
610 struct rtentry *rt)
611 {
612 int error = 0, hlen, msiz;
613 struct gre_softc *sc = ifp->if_softc;
614 struct greip *gi;
615 struct gre_h *gh;
616 struct ip *eip, *ip;
617 u_int8_t ip_tos = 0;
618 u_int16_t etype = 0;
619 struct mobile_h mob_h;
620
621 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
622 (IFF_UP | IFF_RUNNING) ||
623 sc->sc_src.s_addr == INADDR_ANY ||
624 sc->sc_dst.s_addr == INADDR_ANY) {
625 m_freem(m);
626 error = ENETDOWN;
627 goto end;
628 }
629
630 gi = NULL;
631 ip = NULL;
632
633 #if NBPFILTER >0
634 if (ifp->if_bpf)
635 bpf_mtap_af(ifp->if_bpf, dst->sa_family, m);
636 #endif
637
638 m->m_flags &= ~(M_BCAST|M_MCAST);
639
640 switch (sc->sc_proto) {
641 case IPPROTO_MOBILE:
642 if (dst->sa_family != AF_INET) {
643 IF_DROP(&ifp->if_snd);
644 m_freem(m);
645 error = EINVAL;
646 goto end;
647 }
648
649 if (M_UNWRITABLE(m, sizeof(*ip)) &&
650 (m = m_pullup(m, sizeof(*ip))) == NULL) {
651 error = ENOBUFS;
652 goto end;
653 }
654 ip = mtod(m, struct ip *);
655
656 memset(&mob_h, 0, MOB_H_SIZ_L);
657 mob_h.proto = (ip->ip_p) << 8;
658 mob_h.odst = ip->ip_dst.s_addr;
659 ip->ip_dst.s_addr = sc->sc_dst.s_addr;
660
661 /*
662 * If the packet comes from our host, we only change
663 * the destination address in the IP header.
664 * Else we also need to save and change the source
665 */
666 if (in_hosteq(ip->ip_src, sc->sc_src))
667 msiz = MOB_H_SIZ_S;
668 else {
669 mob_h.proto |= MOB_H_SBIT;
670 mob_h.osrc = ip->ip_src.s_addr;
671 ip->ip_src.s_addr = sc->sc_src.s_addr;
672 msiz = MOB_H_SIZ_L;
673 }
674 HTONS(mob_h.proto);
675 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
676
677 M_PREPEND(m, msiz, M_DONTWAIT);
678 if (m == NULL) {
679 error = ENOBUFS;
680 goto end;
681 }
682 /* XXX Assuming that ip does not dangle after
683 * M_PREPEND. In practice, that's true, but
684 * that's not in M_PREPEND's contract.
685 */
686 memmove(mtod(m, void *), ip, sizeof(*ip));
687 ip = mtod(m, struct ip *);
688 memcpy(ip + 1, &mob_h, (size_t)msiz);
689 ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
690 break;
691 case IPPROTO_UDP:
692 case IPPROTO_GRE:
693 GRE_DPRINTF(sc, "%s: dst->sa_family=%d\n", __func__,
694 dst->sa_family);
695 switch (dst->sa_family) {
696 case AF_INET:
697 ip = mtod(m, struct ip *);
698 ip_tos = ip->ip_tos;
699 etype = ETHERTYPE_IP;
700 break;
701 #ifdef NETATALK
702 case AF_APPLETALK:
703 etype = ETHERTYPE_ATALK;
704 break;
705 #endif
706 #ifdef INET6
707 case AF_INET6:
708 etype = ETHERTYPE_IPV6;
709 break;
710 #endif
711 default:
712 IF_DROP(&ifp->if_snd);
713 m_freem(m);
714 error = EAFNOSUPPORT;
715 goto end;
716 }
717 break;
718 default:
719 IF_DROP(&ifp->if_snd);
720 m_freem(m);
721 error = EINVAL;
722 goto end;
723 }
724
725 switch (sc->sc_proto) {
726 case IPPROTO_GRE:
727 hlen = sizeof(struct greip);
728 break;
729 case IPPROTO_UDP:
730 hlen = sizeof(struct gre_h);
731 break;
732 default:
733 hlen = 0;
734 break;
735 }
736
737 M_PREPEND(m, hlen, M_DONTWAIT);
738
739 if (m == NULL) {
740 IF_DROP(&ifp->if_snd);
741 error = ENOBUFS;
742 goto end;
743 }
744
745 switch (sc->sc_proto) {
746 case IPPROTO_UDP:
747 gh = mtod(m, struct gre_h *);
748 memset(gh, 0, sizeof(*gh));
749 gh->ptype = htons(etype);
750 /* XXX Need to handle IP ToS. Look at how I handle IP TTL. */
751 break;
752 case IPPROTO_GRE:
753 gi = mtod(m, struct greip *);
754 gh = &gi->gi_g;
755 eip = &gi->gi_i;
756 /* we don't have any GRE flags for now */
757 memset(gh, 0, sizeof(*gh));
758 gh->ptype = htons(etype);
759 eip->ip_src = sc->sc_src;
760 eip->ip_dst = sc->sc_dst;
761 eip->ip_hl = (sizeof(struct ip)) >> 2;
762 eip->ip_ttl = ip_gre_ttl;
763 eip->ip_tos = ip_tos;
764 eip->ip_len = htons(m->m_pkthdr.len);
765 eip->ip_p = sc->sc_proto;
766 break;
767 case IPPROTO_MOBILE:
768 eip = mtod(m, struct ip *);
769 eip->ip_p = sc->sc_proto;
770 break;
771 default:
772 error = EPROTONOSUPPORT;
773 m_freem(m);
774 goto end;
775 }
776
777 ifp->if_opackets++;
778 ifp->if_obytes += m->m_pkthdr.len;
779
780 /* send it off */
781 if (sc->sc_proto == IPPROTO_UDP) {
782 if (IF_QFULL(&sc->sc_snd)) {
783 IF_DROP(&sc->sc_snd);
784 error = ENOBUFS;
785 m_freem(m);
786 } else {
787 IF_ENQUEUE(&sc->sc_snd, m);
788 gre_wakeup(sc);
789 error = 0;
790 }
791 goto end;
792 }
793 if (sc->route.ro_rt == NULL)
794 rtcache_init(&sc->route);
795 else
796 rtcache_check(&sc->route);
797 if (sc->route.ro_rt == NULL) {
798 m_freem(m);
799 goto end;
800 }
801 if (sc->route.ro_rt->rt_ifp->if_softc == sc) {
802 rtcache_clear(&sc->route);
803 m_freem(m);
804 } else
805 error = ip_output(m, NULL, &sc->route, 0, NULL, NULL);
806 end:
807 if (error)
808 ifp->if_oerrors++;
809 return error;
810 }
811
812 /* Calling thread must hold sc->sc_mtx. */
813 static int
814 gre_kick(struct gre_softc *sc)
815 {
816 struct ifnet *ifp = &sc->sc_if;
817
818 if (!sc->sc_running)
819 return EBUSY;
820 gre_wakeup(sc);
821 return 0;
822 }
823
824 /* Calling thread must hold sc->sc_mtx. */
825 static int
826 gre_getname(struct socket *so, int req, struct mbuf *nam, struct lwp *l)
827 {
828 return (*so->so_proto->pr_usrreq)(so, req, NULL, nam, NULL, l);
829 }
830
831 /* Calling thread must hold sc->sc_mtx. */
832 static int
833 gre_getsockname(struct socket *so, struct mbuf *nam, struct lwp *l)
834 {
835 return gre_getname(so, PRU_SOCKADDR, nam, l);
836 }
837
838 /* Calling thread must hold sc->sc_mtx. */
839 static int
840 gre_getpeername(struct socket *so, struct mbuf *nam, struct lwp *l)
841 {
842 return gre_getname(so, PRU_PEERADDR, nam, l);
843 }
844
845 /* Calling thread must hold sc->sc_mtx. */
846 static int
847 gre_getnames(struct socket *so, struct lwp *l, struct sockaddr_in *src,
848 struct sockaddr_in *dst)
849 {
850 struct mbuf *m;
851 struct sockaddr_in *sin;
852 int rc;
853
854 if ((m = gre_getsockmbuf(so)) == NULL)
855 return ENOBUFS;
856
857 sin = mtod(m, struct sockaddr_in *);
858
859 if ((rc = gre_getsockname(so, m, l)) != 0)
860 goto out;
861 if (sin->sin_family != AF_INET) {
862 rc = EAFNOSUPPORT;
863 goto out;
864 }
865 *src = *sin;
866
867 if ((rc = gre_getpeername(so, m, l)) != 0)
868 goto out;
869 if (sin->sin_family != AF_INET) {
870 rc = EAFNOSUPPORT;
871 goto out;
872 }
873 *dst = *sin;
874
875 out:
876 m_freem(m);
877 return rc;
878 }
879
880 static void
881 gre_closef(struct file **fpp, struct lwp *l)
882 {
883 struct file *fp = *fpp;
884
885 simple_lock(&fp->f_slock);
886 FILE_USE(fp);
887 closef(fp, l);
888 *fpp = NULL;
889 }
890
891 static int
892 gre_ioctl(struct ifnet *ifp, const u_long cmd, void *data)
893 {
894 u_char oproto;
895 struct file *fp;
896 struct socket *so;
897 struct sockaddr_in dst, src;
898 struct proc *p = curproc; /* XXX */
899 struct lwp *l = curlwp; /* XXX */
900 struct ifreq *ifr;
901 struct if_laddrreq *lifr = (struct if_laddrreq *)data;
902 struct gre_softc *sc = ifp->if_softc;
903 struct sockaddr_in si;
904 const struct sockaddr *sa;
905 int error = 0;
906
907 ifr = data;
908
909 switch (cmd) {
910 case SIOCSIFFLAGS:
911 case SIOCSIFMTU:
912 case GRESPROTO:
913 case GRESADDRD:
914 case GRESADDRS:
915 case GRESSOCK:
916 case GREDSOCK:
917 case SIOCSLIFPHYADDR:
918 case SIOCDIFPHYADDR:
919 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
920 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
921 NULL) != 0)
922 return EPERM;
923 break;
924 default:
925 break;
926 }
927
928 mutex_enter(&sc->sc_mtx);
929 switch (cmd) {
930 case SIOCSIFADDR:
931 ifp->if_flags |= IFF_UP;
932 if ((error = gre_kick(sc)) != 0)
933 ifp->if_flags &= ~IFF_UP;
934 break;
935 case SIOCSIFDSTADDR:
936 break;
937 case SIOCSIFFLAGS:
938 oproto = sc->sc_proto;
939 switch (ifr->ifr_flags & (IFF_LINK0|IFF_LINK2)) {
940 case IFF_LINK0|IFF_LINK2:
941 sc->sc_proto = IPPROTO_UDP;
942 if (oproto != IPPROTO_UDP)
943 ifp->if_flags &= ~IFF_RUNNING;
944 error = gre_kick(sc);
945 break;
946 case IFF_LINK0:
947 sc->sc_proto = IPPROTO_GRE;
948 gre_wakeup(sc);
949 goto recompute;
950 case 0:
951 sc->sc_proto = IPPROTO_MOBILE;
952 gre_wakeup(sc);
953 goto recompute;
954 }
955 break;
956 case SIOCSIFMTU:
957 if (ifr->ifr_mtu < 576) {
958 error = EINVAL;
959 break;
960 }
961 ifp->if_mtu = ifr->ifr_mtu;
962 break;
963 case SIOCGIFMTU:
964 ifr->ifr_mtu = sc->sc_if.if_mtu;
965 break;
966 case SIOCADDMULTI:
967 case SIOCDELMULTI:
968 if (ifr == NULL) {
969 error = EAFNOSUPPORT;
970 break;
971 }
972 switch (ifreq_getaddr(cmd, ifr)->sa_family) {
973 #ifdef INET
974 case AF_INET:
975 break;
976 #endif
977 #ifdef INET6
978 case AF_INET6:
979 break;
980 #endif
981 default:
982 error = EAFNOSUPPORT;
983 break;
984 }
985 break;
986 case GRESPROTO:
987 oproto = sc->sc_proto;
988 sc->sc_proto = ifr->ifr_flags;
989 switch (sc->sc_proto) {
990 case IPPROTO_UDP:
991 ifp->if_flags |= IFF_LINK0|IFF_LINK2;
992 if (oproto != IPPROTO_UDP)
993 ifp->if_flags &= ~IFF_RUNNING;
994 error = gre_kick(sc);
995 break;
996 case IPPROTO_GRE:
997 ifp->if_flags |= IFF_LINK0;
998 ifp->if_flags &= ~IFF_LINK2;
999 goto recompute;
1000 case IPPROTO_MOBILE:
1001 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK2);
1002 goto recompute;
1003 default:
1004 error = EPROTONOSUPPORT;
1005 break;
1006 }
1007 break;
1008 case GREGPROTO:
1009 ifr->ifr_flags = sc->sc_proto;
1010 break;
1011 case GRESADDRS:
1012 case GRESADDRD:
1013 /*
1014 * set tunnel endpoints, compute a less specific route
1015 * to the remote end and mark if as up
1016 */
1017 sa = &ifr->ifr_addr;
1018 if (cmd == GRESADDRS) {
1019 sc->sc_src = satocsin(sa)->sin_addr;
1020 sc->sc_srcport = satocsin(sa)->sin_port;
1021 }
1022 if (cmd == GRESADDRD) {
1023 if (sc->sc_proto == IPPROTO_UDP &&
1024 satocsin(sa)->sin_port == 0) {
1025 error = EINVAL;
1026 break;
1027 }
1028 sc->sc_dst = satocsin(sa)->sin_addr;
1029 sc->sc_dstport = satocsin(sa)->sin_port;
1030 }
1031 recompute:
1032 if (sc->sc_proto == IPPROTO_UDP ||
1033 (sc->sc_src.s_addr != INADDR_ANY &&
1034 sc->sc_dst.s_addr != INADDR_ANY)) {
1035 rtcache_free(&sc->route);
1036 if (sc->sc_proto == IPPROTO_UDP) {
1037 if ((error = gre_kick(sc)) == 0)
1038 ifp->if_flags |= IFF_RUNNING;
1039 else
1040 ifp->if_flags &= ~IFF_RUNNING;
1041 }
1042 else if (gre_compute_route(sc) == 0)
1043 ifp->if_flags |= IFF_RUNNING;
1044 else
1045 ifp->if_flags &= ~IFF_RUNNING;
1046 }
1047 break;
1048 case GREGADDRS:
1049 sockaddr_in_init(&si, &sc->sc_src,
1050 (sc->sc_proto == IPPROTO_UDP) ? sc->sc_srcport : 0);
1051 ifr->ifr_addr = *sintosa(&si);
1052 break;
1053 case GREGADDRD:
1054 sockaddr_in_init(&si, &sc->sc_dst,
1055 (sc->sc_proto == IPPROTO_UDP) ? sc->sc_dstport : 0);
1056 ifr->ifr_addr = *sintosa(&si);
1057 break;
1058 case GREDSOCK:
1059 if (sc->sc_proto != IPPROTO_UDP) {
1060 error = EINVAL;
1061 break;
1062 }
1063 ifp->if_flags &= ~IFF_UP;
1064 gre_wakeup(sc);
1065 break;
1066 case GRESSOCK:
1067 if (sc->sc_proto != IPPROTO_UDP) {
1068 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1069 error = EINVAL;
1070 break;
1071 }
1072 /* getsock() will FILE_USE() and unlock the descriptor for us */
1073 if ((error = getsock(p->p_fd, (int)ifr->ifr_value, &fp)) != 0) {
1074 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1075 error = EINVAL;
1076 break;
1077 }
1078 so = (struct socket *)fp->f_data;
1079 if (so->so_type != SOCK_DGRAM) {
1080 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1081 FILE_UNUSE(fp, NULL);
1082 error = EINVAL;
1083 break;
1084 }
1085 /* check address */
1086 if ((error = gre_getnames(so, curlwp, &src, &dst)) != 0) {
1087 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1088 FILE_UNUSE(fp, NULL);
1089 break;
1090 }
1091
1092 /* Increase reference count. Now that our reference
1093 * to the file descriptor is counted, this thread
1094 * can release our "use" of the descriptor, but it
1095 * will not be destroyed by some other thread's
1096 * action. This thread needs to release its use,
1097 * too, because one and only one thread can have
1098 * use of the descriptor at once. The kernel thread
1099 * will pick up the use if it needs it.
1100 */
1101
1102 fp->f_count++;
1103 GRE_DPRINTF(sc, "%s: l.%d f_count %d\n", __func__, __LINE__,
1104 fp->f_count);
1105 FILE_UNUSE(fp, NULL);
1106
1107 while (sc->sc_newsoparm.sp_fp != NULL && error == 0) {
1108 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1109 error = cv_timedwait_sig(&sc->sc_soparm_cv, &sc->sc_mtx,
1110 MAX(1, hz / 2));
1111 }
1112 if (error == 0) {
1113 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1114 sc->sc_newsoparm.sp_fp = fp;
1115 ifp->if_flags |= IFF_UP;
1116 }
1117
1118 if (error != 0 || (error = gre_kick(sc)) != 0) {
1119 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1120 gre_closef(&fp, l);
1121 break;
1122 }
1123 /* fp does not any longer belong to this thread. */
1124 sc->sc_src = src.sin_addr;
1125 sc->sc_srcport = src.sin_port;
1126 sc->sc_dst = dst.sin_addr;
1127 sc->sc_dstport = dst.sin_port;
1128 GRE_DPRINTF(sc, "%s: sock 0x%08" PRIx32 " port %d -> "
1129 "0x%08" PRIx32 " port %d\n", __func__,
1130 src.sin_addr.s_addr, ntohs(src.sin_port),
1131 dst.sin_addr.s_addr, ntohs(dst.sin_port));
1132 break;
1133 case SIOCSLIFPHYADDR:
1134 if (lifr->addr.ss_family != AF_INET ||
1135 lifr->dstaddr.ss_family != AF_INET) {
1136 error = EAFNOSUPPORT;
1137 break;
1138 }
1139 if (lifr->addr.ss_len != sizeof(si) ||
1140 lifr->dstaddr.ss_len != sizeof(si)) {
1141 error = EINVAL;
1142 break;
1143 }
1144 sc->sc_src = satocsin(&lifr->addr)->sin_addr;
1145 sc->sc_dst = satocsin(&lifr->dstaddr)->sin_addr;
1146 sc->sc_srcport = satocsin(&lifr->addr)->sin_port;
1147 sc->sc_dstport = satocsin(&lifr->dstaddr)->sin_port;
1148 goto recompute;
1149 case SIOCDIFPHYADDR:
1150 sc->sc_src.s_addr = INADDR_ANY;
1151 sc->sc_dst.s_addr = INADDR_ANY;
1152 sc->sc_srcport = 0;
1153 sc->sc_dstport = 0;
1154 goto recompute;
1155 case SIOCGLIFPHYADDR:
1156 if (sc->sc_src.s_addr == INADDR_ANY ||
1157 sc->sc_dst.s_addr == INADDR_ANY) {
1158 error = EADDRNOTAVAIL;
1159 break;
1160 }
1161 sockaddr_in_init(satosin(&lifr->addr), &sc->sc_src,
1162 (sc->sc_proto == IPPROTO_UDP) ? sc->sc_srcport : 0);
1163 sockaddr_in_init(satosin(&lifr->dstaddr), &sc->sc_dst,
1164 (sc->sc_proto == IPPROTO_UDP) ? sc->sc_dstport : 0);
1165 break;
1166 default:
1167 error = EINVAL;
1168 break;
1169 }
1170 mutex_exit(&sc->sc_mtx);
1171 return error;
1172 }
1173
1174 /*
1175 * Compute a route to our destination.
1176 */
1177 static int
1178 gre_compute_route(struct gre_softc *sc)
1179 {
1180 struct route *ro;
1181 union {
1182 struct sockaddr dst;
1183 struct sockaddr_in dst4;
1184 } u;
1185
1186 ro = &sc->route;
1187
1188 memset(ro, 0, sizeof(*ro));
1189 sockaddr_in_init(&u.dst4, &sc->sc_dst, 0);
1190 rtcache_setdst(ro, &u.dst);
1191
1192 rtcache_init(ro);
1193
1194 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
1195 GRE_DPRINTF(sc, "%s: route to %s %s\n", sc->sc_if.if_xname,
1196 inet_ntoa(u.dst4.sin_addr),
1197 (ro->ro_rt == NULL)
1198 ? "does not exist"
1199 : "loops back to ourself");
1200 rtcache_free(ro);
1201 return EADDRNOTAVAIL;
1202 }
1203
1204 return 0;
1205 }
1206
1207 /*
1208 * do a checksum of a buffer - much like in_cksum, which operates on
1209 * mbufs.
1210 */
1211 u_int16_t
1212 gre_in_cksum(u_int16_t *p, u_int len)
1213 {
1214 u_int32_t sum = 0;
1215 int nwords = len >> 1;
1216
1217 while (nwords-- != 0)
1218 sum += *p++;
1219
1220 if (len & 1) {
1221 union {
1222 u_short w;
1223 u_char c[2];
1224 } u;
1225 u.c[0] = *(u_char *)p;
1226 u.c[1] = 0;
1227 sum += u.w;
1228 }
1229
1230 /* end-around-carry */
1231 sum = (sum >> 16) + (sum & 0xffff);
1232 sum += (sum >> 16);
1233 return ~sum;
1234 }
1235 #endif
1236
1237 void greattach(int);
1238
1239 /* ARGSUSED */
1240 void
1241 greattach(int count)
1242 {
1243 #ifdef INET
1244 LIST_INIT(&gre_softc_list);
1245 if_clone_attach(&gre_cloner);
1246 #endif
1247 }
1248