if_gre.c revision 1.107 1 /* $NetBSD: if_gre.c,v 1.107 2007/09/02 01:50:58 dyoung Exp $ */
2
3 /*
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr (at) pilhuhn.de>
9 *
10 * IPv6-over-GRE contributed by Gert Doering <gert (at) greenie.muc.de>
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*
42 * Encapsulate L3 protocols into IP
43 * See RFC 1701 and 1702 for more details.
44 * If_gre is compatible with Cisco GRE tunnels, so you can
45 * have a NetBSD box as the other end of a tunnel interface of a Cisco
46 * router. See gre(4) for more details.
47 * Also supported: IP in IP encaps (proto 55) as of RFC 2004
48 */
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.107 2007/09/02 01:50:58 dyoung Exp $");
52
53 #include "opt_gre.h"
54 #include "opt_inet.h"
55 #include "bpfilter.h"
56
57 #ifdef INET
58 #include <sys/param.h>
59 #include <sys/file.h>
60 #include <sys/filedesc.h>
61 #include <sys/malloc.h>
62 #include <sys/mbuf.h>
63 #include <sys/proc.h>
64 #include <sys/protosw.h>
65 #include <sys/socket.h>
66 #include <sys/socketvar.h>
67 #include <sys/ioctl.h>
68 #include <sys/queue.h>
69 #if __NetBSD__
70 #include <sys/systm.h>
71 #include <sys/sysctl.h>
72 #include <sys/kauth.h>
73 #endif
74
75 #include <sys/kernel.h>
76 #include <sys/mutex.h>
77 #include <sys/condvar.h>
78 #include <sys/kthread.h>
79
80 #include <machine/cpu.h>
81
82 #include <net/ethertypes.h>
83 #include <net/if.h>
84 #include <net/if_types.h>
85 #include <net/netisr.h>
86 #include <net/route.h>
87
88 #ifdef INET
89 #include <netinet/in.h>
90 #include <netinet/in_systm.h>
91 #include <netinet/in_var.h>
92 #include <netinet/ip.h>
93 #include <netinet/ip_var.h>
94 #else
95 #error "Huh? if_gre without inet?"
96 #endif
97
98
99 #ifdef NETATALK
100 #include <netatalk/at.h>
101 #include <netatalk/at_var.h>
102 #include <netatalk/at_extern.h>
103 #endif
104
105 #if NBPFILTER > 0
106 #include <sys/time.h>
107 #include <net/bpf.h>
108 #endif
109
110 #include <net/if_gre.h>
111
112 #include <compat/sys/socket.h>
113 #include <compat/sys/sockio.h>
114 /*
115 * It is not easy to calculate the right value for a GRE MTU.
116 * We leave this task to the admin and use the same default that
117 * other vendors use.
118 */
119 #define GREMTU 1476
120
121 #ifdef GRE_DEBUG
122 int gre_debug = 0;
123 #define GRE_DPRINTF(__sc, __fmt, ...) \
124 do { \
125 if (gre_debug || ((__sc)->sc_if.if_flags & IFF_DEBUG) != 0)\
126 printf(__fmt, __VA_ARGS__); \
127 } while (/*CONSTCOND*/0)
128 #else
129 #define GRE_DPRINTF(__sc, __fmt, ...) do { } while (/*CONSTCOND*/0)
130 #endif /* GRE_DEBUG */
131
132 struct gre_softc_head gre_softc_list;
133 int ip_gre_ttl = GRE_TTL;
134
135 static int gre_clone_create(struct if_clone *, int);
136 static int gre_clone_destroy(struct ifnet *);
137
138 static struct if_clone gre_cloner =
139 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
140
141 static int gre_output(struct ifnet *, struct mbuf *,
142 const struct sockaddr *, struct rtentry *);
143 static int gre_ioctl(struct ifnet *, u_long, void *);
144
145 static void gre_thread(void *);
146 static int gre_compute_route(struct gre_softc *sc);
147
148 static void gre_closef(struct file **, struct lwp *);
149 static int gre_getsockname(struct socket *, struct mbuf *, struct lwp *);
150 static int gre_getpeername(struct socket *, struct mbuf *, struct lwp *);
151 static int gre_getnames(struct socket *, struct lwp *, struct sockaddr_in *,
152 struct sockaddr_in *);
153
154 /* Calling thread must hold sc->sc_mtx. */
155 static void
156 gre_join(struct gre_softc *sc)
157 {
158 while (sc->sc_running != 0)
159 cv_wait(&sc->sc_join_cv, &sc->sc_mtx);
160 }
161
162 /* Calling thread must hold sc->sc_mtx. */
163 static void
164 gre_wakeup(struct gre_softc *sc)
165 {
166 GRE_DPRINTF(sc, "%s: enter\n", __func__);
167 sc->sc_haswork = 1;
168 cv_signal(&sc->sc_work_cv);
169 }
170
171 static int
172 gre_clone_create(struct if_clone *ifc, int unit)
173 {
174 struct gre_softc *sc;
175
176 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
177 memset(sc, 0, sizeof(struct gre_softc));
178 mutex_init(&sc->sc_mtx, MUTEX_DRIVER, IPL_NET);
179 cv_init(&sc->sc_work_cv, "gre work");
180 cv_init(&sc->sc_join_cv, "gre join");
181 cv_init(&sc->sc_soparm_cv, "gre soparm");
182
183 snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d",
184 ifc->ifc_name, unit);
185 sc->sc_if.if_softc = sc;
186 sc->sc_if.if_type = IFT_TUNNEL;
187 sc->sc_if.if_addrlen = 0;
188 sc->sc_if.if_hdrlen = 24; /* IP + GRE */
189 sc->sc_if.if_dlt = DLT_NULL;
190 sc->sc_if.if_mtu = GREMTU;
191 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
192 sc->sc_if.if_output = gre_output;
193 sc->sc_if.if_ioctl = gre_ioctl;
194 sc->sc_dst.s_addr = sc->sc_src.s_addr = INADDR_ANY;
195 sc->sc_dstport = sc->sc_srcport = 0;
196 sc->sc_proto = IPPROTO_GRE;
197 sc->sc_snd.ifq_maxlen = 256;
198 sc->sc_if.if_flags |= IFF_LINK0;
199 if_attach(&sc->sc_if);
200 if_alloc_sadl(&sc->sc_if);
201 #if NBPFILTER > 0
202 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
203 #endif
204 sc->sc_running = 1;
205 if (kthread_create(PRI_NONE, 0, NULL, gre_thread, sc,
206 NULL, sc->sc_if.if_xname) != 0)
207 sc->sc_running = 0;
208 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
209 return 0;
210 }
211
212 static int
213 gre_clone_destroy(struct ifnet *ifp)
214 {
215 struct gre_softc *sc = ifp->if_softc;
216
217 LIST_REMOVE(sc, sc_list);
218 #if NBPFILTER > 0
219 bpfdetach(ifp);
220 #endif
221 if_detach(ifp);
222 mutex_enter(&sc->sc_mtx);
223 sc->sc_dying = 1;
224 gre_wakeup(sc);
225 gre_join(sc);
226 mutex_exit(&sc->sc_mtx);
227 rtcache_free(&sc->route);
228
229 cv_destroy(&sc->sc_soparm_cv);
230 cv_destroy(&sc->sc_join_cv);
231 cv_destroy(&sc->sc_work_cv);
232 mutex_destroy(&sc->sc_mtx);
233 free(sc, M_DEVBUF);
234
235 return 0;
236 }
237
238 static void
239 gre_receive(struct socket *so, void *arg, int waitflag)
240 {
241 struct gre_softc *sc = (struct gre_softc *)arg;
242
243 GRE_DPRINTF(sc, "%s: enter\n", __func__);
244
245 gre_wakeup(sc);
246 }
247
248 static void
249 gre_upcall_add(struct socket *so, void *arg)
250 {
251 /* XXX What if the kernel already set an upcall? */
252 KASSERT((so->so_rcv.sb_flags & SB_UPCALL) == 0);
253 so->so_upcallarg = arg;
254 so->so_upcall = gre_receive;
255 so->so_rcv.sb_flags |= SB_UPCALL;
256 }
257
258 static void
259 gre_upcall_remove(struct socket *so)
260 {
261 /* XXX What if the kernel already set an upcall? */
262 so->so_rcv.sb_flags &= ~SB_UPCALL;
263 so->so_upcallarg = NULL;
264 so->so_upcall = NULL;
265 }
266
267 static void
268 gre_sodestroy(struct socket **sop)
269 {
270 gre_upcall_remove(*sop);
271 soshutdown(*sop, SHUT_RDWR);
272 soclose(*sop);
273 *sop = NULL;
274 }
275
276 static struct mbuf *
277 gre_getsockmbuf(struct socket *so)
278 {
279 struct mbuf *m;
280
281 m = m_get(M_WAIT, MT_SONAME);
282 if (m != NULL)
283 MCLAIM(m, so->so_mowner);
284 return m;
285 }
286
287 static int
288 gre_socreate1(struct gre_softc *sc, struct lwp *l, struct socket **sop)
289 {
290 int rc;
291 struct gre_soparm *sp = &sc->sc_soparm;
292 struct mbuf *m;
293 struct sockaddr_in *sin;
294 struct socket *so;
295
296 GRE_DPRINTF(sc, "%s: enter\n", __func__);
297 rc = socreate(AF_INET, sop, SOCK_DGRAM, IPPROTO_UDP, l);
298 if (rc != 0) {
299 GRE_DPRINTF(sc, "%s: socreate failed\n", __func__);
300 return rc;
301 }
302
303 so = *sop;
304
305 gre_upcall_add(so, sc);
306 if ((m = gre_getsockmbuf(so)) == NULL) {
307 rc = ENOBUFS;
308 goto out;
309 }
310 sin = mtod(m, struct sockaddr_in *);
311 sockaddr_in_init(sin, &sc->sc_src, sc->sc_srcport);
312 m->m_len = sin->sin_len;
313
314 GRE_DPRINTF(sc, "%s: bind 0x%08" PRIx32 " port %d\n", __func__,
315 sin->sin_addr.s_addr, ntohs(sin->sin_port));
316 if ((rc = sobind(so, m, l)) != 0) {
317 GRE_DPRINTF(sc, "%s: sobind failed\n", __func__);
318 goto out;
319 }
320
321 if (sc->sc_srcport == 0) {
322 if ((rc = gre_getsockname(so, m, l)) != 0) {
323 GRE_DPRINTF(sc, "%s: gre_getsockname\n", __func__);
324 goto out;
325 }
326 sc->sc_srcport = sin->sin_port;
327 }
328
329 sockaddr_in_init(sin, &sc->sc_dst, sc->sc_dstport);
330 m->m_len = sin->sin_len;
331
332 if ((rc = soconnect(so, m, l)) != 0) {
333 GRE_DPRINTF(sc, "%s: soconnect failed\n", __func__);
334 goto out;
335 }
336
337 *mtod(m, int *) = ip_gre_ttl;
338 m->m_len = sizeof(int);
339 KASSERT(so->so_proto && so->so_proto->pr_ctloutput);
340 rc = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, IPPROTO_IP, IP_TTL,
341 &m);
342 m = NULL;
343 if (rc != 0) {
344 GRE_DPRINTF(sc, "%s: setopt ttl failed\n", __func__);
345 rc = 0;
346 }
347 out:
348 m_freem(m);
349
350 if (rc != 0)
351 gre_sodestroy(sop);
352 else {
353 sc->sc_if.if_flags |= IFF_RUNNING;
354 *sp = sc->sc_newsoparm;
355 }
356
357 return rc;
358 }
359
360 static void
361 gre_do_recv(struct gre_softc *sc, struct socket *so, lwp_t *l)
362 {
363 for (;;) {
364 int flags, rc;
365 const struct gre_h *gh;
366 struct mbuf *m;
367
368 flags = MSG_DONTWAIT;
369 sc->sc_uio.uio_resid = 1000000;
370 rc = (*so->so_receive)(so, NULL, &sc->sc_uio, &m, NULL, &flags);
371 /* TBD Back off if ECONNREFUSED (indicates
372 * ICMP Port Unreachable)?
373 */
374 if (rc == EWOULDBLOCK) {
375 GRE_DPRINTF(sc, "%s: so_receive EWOULDBLOCK\n",
376 __func__);
377 break;
378 } else if (rc != 0 || m == NULL) {
379 GRE_DPRINTF(sc, "%s: rc %d m %p\n",
380 sc->sc_if.if_xname, rc, (void *)m);
381 continue;
382 } else
383 GRE_DPRINTF(sc, "%s: so_receive ok\n", __func__);
384 if (m->m_len < sizeof(*gh) &&
385 (m = m_pullup(m, sizeof(*gh))) == NULL) {
386 GRE_DPRINTF(sc, "%s: m_pullup failed\n", __func__);
387 continue;
388 }
389 gh = mtod(m, const struct gre_h *);
390
391 if (gre_input3(sc, m, 0, gh, 0) == 0) {
392 GRE_DPRINTF(sc, "%s: dropping unsupported\n", __func__);
393 m_freem(m);
394 }
395 }
396 }
397
398 static void
399 gre_do_send(struct gre_softc *sc, struct socket *so, lwp_t *l)
400 {
401 for (;;) {
402 int rc;
403 struct mbuf *m;
404
405 mutex_enter(&sc->sc_mtx);
406 IF_DEQUEUE(&sc->sc_snd, m);
407 mutex_exit(&sc->sc_mtx);
408 if (m == NULL)
409 break;
410 GRE_DPRINTF(sc, "%s: dequeue\n", __func__);
411 if ((so->so_state & SS_ISCONNECTED) == 0) {
412 GRE_DPRINTF(sc, "%s: not connected\n", __func__);
413 m_freem(m);
414 continue;
415 }
416 rc = (*so->so_send)(so, NULL, NULL, m, NULL, 0, l);
417 /* XXX handle ENOBUFS? */
418 if (rc != 0)
419 GRE_DPRINTF(sc, "%s: so_send failed\n",
420 __func__);
421 }
422 }
423
424 static struct socket *
425 gre_reconf(struct gre_softc *sc, struct socket *so, lwp_t *l)
426 {
427 struct ifnet *ifp = &sc->sc_if;
428
429 GRE_DPRINTF(sc, "%s: enter\n", __func__);
430
431 shutdown:
432 if (sc->sc_soparm.sp_fp != NULL) {
433 gre_upcall_remove(so);
434 gre_closef(&sc->sc_soparm.sp_fp, curlwp);
435 so = NULL;
436 } else if (so != NULL)
437 gre_sodestroy(&so);
438
439 if (sc->sc_dying)
440 GRE_DPRINTF(sc, "%s: dying\n", __func__);
441 else if ((ifp->if_flags & IFF_UP) != IFF_UP)
442 GRE_DPRINTF(sc, "%s: down\n", __func__);
443 else if (sc->sc_proto != IPPROTO_UDP)
444 GRE_DPRINTF(sc, "%s: not UDP\n", __func__);
445 else if (sc->sc_newsoparm.sp_fp != NULL) {
446 sc->sc_soparm.sp_fp = sc->sc_newsoparm.sp_fp;
447 sc->sc_newsoparm.sp_fp = NULL;
448 so = (struct socket *)sc->sc_soparm.sp_fp->f_data;
449 gre_upcall_add(so, sc);
450 sc->sc_soparm = sc->sc_newsoparm;
451 } else if (gre_socreate1(sc, l, &so) != 0) {
452 sc->sc_dying = 1;
453 goto shutdown;
454 }
455 cv_signal(&sc->sc_soparm_cv);
456 if (so != NULL)
457 sc->sc_if.if_flags |= IFF_RUNNING;
458 else if (sc->sc_proto == IPPROTO_UDP)
459 sc->sc_if.if_flags &= ~IFF_RUNNING;
460 return so;
461 }
462
463 static void
464 gre_thread1(struct gre_softc *sc, struct lwp *l)
465 {
466 struct ifnet *ifp = &sc->sc_if;
467 struct socket *so = NULL;
468
469 GRE_DPRINTF(sc, "%s: enter\n", __func__);
470
471 while (!sc->sc_dying) {
472 while (sc->sc_haswork == 0) {
473 GRE_DPRINTF(sc, "%s: sleeping\n", __func__);
474 cv_wait(&sc->sc_work_cv, &sc->sc_mtx);
475 }
476 sc->sc_haswork = 0;
477
478 GRE_DPRINTF(sc, "%s: awake\n", __func__);
479
480 /* XXX optimize */
481 if ((ifp->if_flags & IFF_UP) != IFF_UP ||
482 sc->sc_proto != IPPROTO_UDP || so == NULL ||
483 sc->sc_newsoparm.sp_fp != NULL ||
484 memcmp(&sc->sc_soparm, &sc->sc_newsoparm,
485 sizeof(sc->sc_soparm)) != 0)
486 so = gre_reconf(sc, so, l);
487 mutex_exit(&sc->sc_mtx);
488 if (so != NULL) {
489 gre_do_recv(sc, so, l);
490 gre_do_send(sc, so, l);
491 }
492 mutex_enter(&sc->sc_mtx);
493 }
494 sc->sc_running = 0;
495 cv_signal(&sc->sc_join_cv);
496 }
497
498 static void
499 gre_thread(void *arg)
500 {
501 struct gre_softc *sc = (struct gre_softc *)arg;
502
503 mutex_enter(&sc->sc_mtx);
504 gre_thread1(sc, curlwp);
505 mutex_exit(&sc->sc_mtx);
506
507 /* must not touch sc after this! */
508 kthread_exit(0);
509 }
510
511 /* Calling thread must hold sc->sc_mtx. */
512 int
513 gre_input3(struct gre_softc *sc, struct mbuf *m, int hlen,
514 const struct gre_h *gh, int mtx_held)
515 {
516 u_int16_t flags;
517 #if NBPFILTER > 0
518 u_int32_t af = AF_INET; /* af passed to BPF tap */
519 #endif
520 int isr;
521 struct ifqueue *ifq;
522
523 sc->sc_if.if_ipackets++;
524 sc->sc_if.if_ibytes += m->m_pkthdr.len;
525
526 hlen += sizeof(struct gre_h);
527
528 /* process GRE flags as packet can be of variable len */
529 flags = ntohs(gh->flags);
530
531 /* Checksum & Offset are present */
532 if ((flags & GRE_CP) | (flags & GRE_RP))
533 hlen += 4;
534 /* We don't support routing fields (variable length) */
535 if (flags & GRE_RP) {
536 sc->sc_if.if_ierrors++;
537 return 0;
538 }
539 if (flags & GRE_KP)
540 hlen += 4;
541 if (flags & GRE_SP)
542 hlen += 4;
543
544 switch (ntohs(gh->ptype)) { /* ethertypes */
545 case ETHERTYPE_IP: /* shouldn't need a schednetisr(), as */
546 ifq = &ipintrq; /* we are in ip_input */
547 isr = NETISR_IP;
548 break;
549 #ifdef NETATALK
550 case ETHERTYPE_ATALK:
551 ifq = &atintrq1;
552 isr = NETISR_ATALK;
553 #if NBPFILTER > 0
554 af = AF_APPLETALK;
555 #endif
556 break;
557 #endif
558 #ifdef INET6
559 case ETHERTYPE_IPV6:
560 GRE_DPRINTF(sc, "%s: IPv6 packet\n", __func__);
561 ifq = &ip6intrq;
562 isr = NETISR_IPV6;
563 #if NBPFILTER > 0
564 af = AF_INET6;
565 #endif
566 break;
567 #endif
568 default: /* others not yet supported */
569 GRE_DPRINTF(sc, "%s: unhandled ethertype 0x%04x\n", __func__,
570 ntohs(gh->ptype));
571 sc->sc_if.if_noproto++;
572 return 0;
573 }
574
575 if (hlen > m->m_pkthdr.len) {
576 m_freem(m);
577 sc->sc_if.if_ierrors++;
578 return EINVAL;
579 }
580 m_adj(m, hlen);
581
582 #if NBPFILTER > 0
583 if (sc->sc_if.if_bpf != NULL)
584 bpf_mtap_af(sc->sc_if.if_bpf, af, m);
585 #endif /*NBPFILTER > 0*/
586
587 m->m_pkthdr.rcvif = &sc->sc_if;
588
589 if (!mtx_held)
590 mutex_enter(&sc->sc_mtx);
591 if (IF_QFULL(ifq)) {
592 IF_DROP(ifq);
593 m_freem(m);
594 } else {
595 IF_ENQUEUE(ifq, m);
596 }
597 /* we need schednetisr since the address family may change */
598 schednetisr(isr);
599 if (!mtx_held)
600 mutex_exit(&sc->sc_mtx);
601
602 return 1; /* packet is done, no further processing needed */
603 }
604
605 /*
606 * The output routine. Takes a packet and encapsulates it in the protocol
607 * given by sc->sc_proto. See also RFC 1701 and RFC 2004
608 */
609 static int
610 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
611 struct rtentry *rt)
612 {
613 int error = 0, hlen, msiz;
614 struct gre_softc *sc = ifp->if_softc;
615 struct greip *gi;
616 struct gre_h *gh;
617 struct ip *eip, *ip;
618 u_int8_t ip_tos = 0;
619 u_int16_t etype = 0;
620 struct mobile_h mob_h;
621
622 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
623 (IFF_UP | IFF_RUNNING) ||
624 sc->sc_src.s_addr == INADDR_ANY ||
625 sc->sc_dst.s_addr == INADDR_ANY) {
626 m_freem(m);
627 error = ENETDOWN;
628 goto end;
629 }
630
631 gi = NULL;
632 ip = NULL;
633
634 #if NBPFILTER >0
635 if (ifp->if_bpf)
636 bpf_mtap_af(ifp->if_bpf, dst->sa_family, m);
637 #endif
638
639 m->m_flags &= ~(M_BCAST|M_MCAST);
640
641 switch (sc->sc_proto) {
642 case IPPROTO_MOBILE:
643 if (dst->sa_family != AF_INET) {
644 IF_DROP(&ifp->if_snd);
645 m_freem(m);
646 error = EINVAL;
647 goto end;
648 }
649
650 if (M_UNWRITABLE(m, sizeof(*ip)) &&
651 (m = m_pullup(m, sizeof(*ip))) == NULL) {
652 error = ENOBUFS;
653 goto end;
654 }
655 ip = mtod(m, struct ip *);
656
657 memset(&mob_h, 0, MOB_H_SIZ_L);
658 mob_h.proto = (ip->ip_p) << 8;
659 mob_h.odst = ip->ip_dst.s_addr;
660 ip->ip_dst.s_addr = sc->sc_dst.s_addr;
661
662 /*
663 * If the packet comes from our host, we only change
664 * the destination address in the IP header.
665 * Else we also need to save and change the source
666 */
667 if (in_hosteq(ip->ip_src, sc->sc_src))
668 msiz = MOB_H_SIZ_S;
669 else {
670 mob_h.proto |= MOB_H_SBIT;
671 mob_h.osrc = ip->ip_src.s_addr;
672 ip->ip_src.s_addr = sc->sc_src.s_addr;
673 msiz = MOB_H_SIZ_L;
674 }
675 HTONS(mob_h.proto);
676 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
677
678 M_PREPEND(m, msiz, M_DONTWAIT);
679 if (m == NULL) {
680 error = ENOBUFS;
681 goto end;
682 }
683 /* XXX Assuming that ip does not dangle after
684 * M_PREPEND. In practice, that's true, but
685 * that's not in M_PREPEND's contract.
686 */
687 memmove(mtod(m, void *), ip, sizeof(*ip));
688 ip = mtod(m, struct ip *);
689 memcpy(ip + 1, &mob_h, (size_t)msiz);
690 ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
691 break;
692 case IPPROTO_UDP:
693 case IPPROTO_GRE:
694 GRE_DPRINTF(sc, "%s: dst->sa_family=%d\n", __func__,
695 dst->sa_family);
696 switch (dst->sa_family) {
697 case AF_INET:
698 ip = mtod(m, struct ip *);
699 ip_tos = ip->ip_tos;
700 etype = ETHERTYPE_IP;
701 break;
702 #ifdef NETATALK
703 case AF_APPLETALK:
704 etype = ETHERTYPE_ATALK;
705 break;
706 #endif
707 #ifdef INET6
708 case AF_INET6:
709 etype = ETHERTYPE_IPV6;
710 break;
711 #endif
712 default:
713 IF_DROP(&ifp->if_snd);
714 m_freem(m);
715 error = EAFNOSUPPORT;
716 goto end;
717 }
718 break;
719 default:
720 IF_DROP(&ifp->if_snd);
721 m_freem(m);
722 error = EINVAL;
723 goto end;
724 }
725
726 switch (sc->sc_proto) {
727 case IPPROTO_GRE:
728 hlen = sizeof(struct greip);
729 break;
730 case IPPROTO_UDP:
731 hlen = sizeof(struct gre_h);
732 break;
733 default:
734 hlen = 0;
735 break;
736 }
737
738 M_PREPEND(m, hlen, M_DONTWAIT);
739
740 if (m == NULL) {
741 IF_DROP(&ifp->if_snd);
742 error = ENOBUFS;
743 goto end;
744 }
745
746 switch (sc->sc_proto) {
747 case IPPROTO_UDP:
748 gh = mtod(m, struct gre_h *);
749 memset(gh, 0, sizeof(*gh));
750 gh->ptype = htons(etype);
751 /* XXX Need to handle IP ToS. Look at how I handle IP TTL. */
752 break;
753 case IPPROTO_GRE:
754 gi = mtod(m, struct greip *);
755 gh = &gi->gi_g;
756 eip = &gi->gi_i;
757 /* we don't have any GRE flags for now */
758 memset(gh, 0, sizeof(*gh));
759 gh->ptype = htons(etype);
760 eip->ip_src = sc->sc_src;
761 eip->ip_dst = sc->sc_dst;
762 eip->ip_hl = (sizeof(struct ip)) >> 2;
763 eip->ip_ttl = ip_gre_ttl;
764 eip->ip_tos = ip_tos;
765 eip->ip_len = htons(m->m_pkthdr.len);
766 eip->ip_p = sc->sc_proto;
767 break;
768 case IPPROTO_MOBILE:
769 eip = mtod(m, struct ip *);
770 eip->ip_p = sc->sc_proto;
771 break;
772 default:
773 error = EPROTONOSUPPORT;
774 m_freem(m);
775 goto end;
776 }
777
778 ifp->if_opackets++;
779 ifp->if_obytes += m->m_pkthdr.len;
780
781 /* send it off */
782 if (sc->sc_proto == IPPROTO_UDP) {
783 if (IF_QFULL(&sc->sc_snd)) {
784 IF_DROP(&sc->sc_snd);
785 error = ENOBUFS;
786 m_freem(m);
787 } else {
788 IF_ENQUEUE(&sc->sc_snd, m);
789 gre_wakeup(sc);
790 error = 0;
791 }
792 goto end;
793 }
794 if (sc->route.ro_rt == NULL)
795 rtcache_init(&sc->route);
796 else
797 rtcache_check(&sc->route);
798 if (sc->route.ro_rt == NULL) {
799 m_freem(m);
800 goto end;
801 }
802 if (sc->route.ro_rt->rt_ifp->if_softc == sc) {
803 rtcache_clear(&sc->route);
804 m_freem(m);
805 } else
806 error = ip_output(m, NULL, &sc->route, 0, NULL, NULL);
807 end:
808 if (error)
809 ifp->if_oerrors++;
810 return error;
811 }
812
813 /* Calling thread must hold sc->sc_mtx. */
814 static int
815 gre_kick(struct gre_softc *sc)
816 {
817 struct ifnet *ifp = &sc->sc_if;
818
819 if (sc->sc_proto == IPPROTO_UDP && (ifp->if_flags & IFF_UP) == IFF_UP &&
820 !sc->sc_running)
821 return EBUSY;
822 gre_wakeup(sc);
823 return 0;
824 }
825
826 /* Calling thread must hold sc->sc_mtx. */
827 static int
828 gre_getname(struct socket *so, int req, struct mbuf *nam, struct lwp *l)
829 {
830 return (*so->so_proto->pr_usrreq)(so, req, NULL, nam, NULL, l);
831 }
832
833 /* Calling thread must hold sc->sc_mtx. */
834 static int
835 gre_getsockname(struct socket *so, struct mbuf *nam, struct lwp *l)
836 {
837 return gre_getname(so, PRU_SOCKADDR, nam, l);
838 }
839
840 /* Calling thread must hold sc->sc_mtx. */
841 static int
842 gre_getpeername(struct socket *so, struct mbuf *nam, struct lwp *l)
843 {
844 return gre_getname(so, PRU_PEERADDR, nam, l);
845 }
846
847 /* Calling thread must hold sc->sc_mtx. */
848 static int
849 gre_getnames(struct socket *so, struct lwp *l, struct sockaddr_in *src,
850 struct sockaddr_in *dst)
851 {
852 struct mbuf *m;
853 struct sockaddr_in *sin;
854 int rc;
855
856 if ((m = gre_getsockmbuf(so)) == NULL)
857 return ENOBUFS;
858
859 sin = mtod(m, struct sockaddr_in *);
860
861 if ((rc = gre_getsockname(so, m, l)) != 0)
862 goto out;
863 if (sin->sin_family != AF_INET) {
864 rc = EAFNOSUPPORT;
865 goto out;
866 }
867 *src = *sin;
868
869 if ((rc = gre_getpeername(so, m, l)) != 0)
870 goto out;
871 if (sin->sin_family != AF_INET) {
872 rc = EAFNOSUPPORT;
873 goto out;
874 }
875 *dst = *sin;
876
877 out:
878 m_freem(m);
879 return rc;
880 }
881
882 static void
883 gre_closef(struct file **fpp, struct lwp *l)
884 {
885 struct file *fp = *fpp;
886
887 simple_lock(&fp->f_slock);
888 FILE_USE(fp);
889 closef(fp, l);
890 *fpp = NULL;
891 }
892
893 static int
894 gre_ioctl(struct ifnet *ifp, const u_long cmd, void *data)
895 {
896 u_char oproto;
897 struct file *fp;
898 struct socket *so;
899 struct sockaddr_in dst, src;
900 struct proc *p = curproc; /* XXX */
901 struct lwp *l = curlwp; /* XXX */
902 struct ifreq *ifr;
903 struct if_laddrreq *lifr = (struct if_laddrreq *)data;
904 struct gre_softc *sc = ifp->if_softc;
905 struct sockaddr_in si;
906 const struct sockaddr *sa;
907 int error = 0;
908
909 ifr = data;
910
911 switch (cmd) {
912 case SIOCSIFFLAGS:
913 case SIOCSIFMTU:
914 case GRESPROTO:
915 case GRESADDRD:
916 case GRESADDRS:
917 case GRESSOCK:
918 case GREDSOCK:
919 case SIOCSLIFPHYADDR:
920 case SIOCDIFPHYADDR:
921 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
922 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
923 NULL) != 0)
924 return EPERM;
925 break;
926 default:
927 break;
928 }
929
930 mutex_enter(&sc->sc_mtx);
931 switch (cmd) {
932 case SIOCSIFADDR:
933 ifp->if_flags |= IFF_UP;
934 if ((error = gre_kick(sc)) != 0)
935 ifp->if_flags &= ~IFF_UP;
936 break;
937 case SIOCSIFDSTADDR:
938 break;
939 case SIOCSIFFLAGS:
940 oproto = sc->sc_proto;
941 switch (ifr->ifr_flags & (IFF_LINK0|IFF_LINK2)) {
942 case IFF_LINK0|IFF_LINK2:
943 sc->sc_proto = IPPROTO_UDP;
944 if (oproto != IPPROTO_UDP)
945 ifp->if_flags &= ~IFF_RUNNING;
946 error = gre_kick(sc);
947 break;
948 case IFF_LINK0:
949 sc->sc_proto = IPPROTO_GRE;
950 gre_wakeup(sc);
951 goto recompute;
952 case 0:
953 sc->sc_proto = IPPROTO_MOBILE;
954 gre_wakeup(sc);
955 goto recompute;
956 }
957 break;
958 case SIOCSIFMTU:
959 if (ifr->ifr_mtu < 576) {
960 error = EINVAL;
961 break;
962 }
963 ifp->if_mtu = ifr->ifr_mtu;
964 break;
965 case SIOCGIFMTU:
966 ifr->ifr_mtu = sc->sc_if.if_mtu;
967 break;
968 case SIOCADDMULTI:
969 case SIOCDELMULTI:
970 if (ifr == NULL) {
971 error = EAFNOSUPPORT;
972 break;
973 }
974 switch (ifreq_getaddr(cmd, ifr)->sa_family) {
975 #ifdef INET
976 case AF_INET:
977 break;
978 #endif
979 #ifdef INET6
980 case AF_INET6:
981 break;
982 #endif
983 default:
984 error = EAFNOSUPPORT;
985 break;
986 }
987 break;
988 case GRESPROTO:
989 oproto = sc->sc_proto;
990 sc->sc_proto = ifr->ifr_flags;
991 switch (sc->sc_proto) {
992 case IPPROTO_UDP:
993 ifp->if_flags |= IFF_LINK0|IFF_LINK2;
994 if (oproto != IPPROTO_UDP)
995 ifp->if_flags &= ~IFF_RUNNING;
996 error = gre_kick(sc);
997 break;
998 case IPPROTO_GRE:
999 ifp->if_flags |= IFF_LINK0;
1000 ifp->if_flags &= ~IFF_LINK2;
1001 goto recompute;
1002 case IPPROTO_MOBILE:
1003 ifp->if_flags &= ~(IFF_LINK0|IFF_LINK2);
1004 goto recompute;
1005 default:
1006 error = EPROTONOSUPPORT;
1007 break;
1008 }
1009 break;
1010 case GREGPROTO:
1011 ifr->ifr_flags = sc->sc_proto;
1012 break;
1013 case GRESADDRS:
1014 case GRESADDRD:
1015 /*
1016 * set tunnel endpoints, compute a less specific route
1017 * to the remote end and mark if as up
1018 */
1019 sa = &ifr->ifr_addr;
1020 if (cmd == GRESADDRS) {
1021 sc->sc_src = satocsin(sa)->sin_addr;
1022 sc->sc_srcport = satocsin(sa)->sin_port;
1023 }
1024 if (cmd == GRESADDRD) {
1025 if (sc->sc_proto == IPPROTO_UDP &&
1026 satocsin(sa)->sin_port == 0) {
1027 error = EINVAL;
1028 break;
1029 }
1030 sc->sc_dst = satocsin(sa)->sin_addr;
1031 sc->sc_dstport = satocsin(sa)->sin_port;
1032 }
1033 recompute:
1034 if (sc->sc_proto == IPPROTO_UDP ||
1035 (sc->sc_src.s_addr != INADDR_ANY &&
1036 sc->sc_dst.s_addr != INADDR_ANY)) {
1037 rtcache_free(&sc->route);
1038 if (sc->sc_proto == IPPROTO_UDP) {
1039 if ((error = gre_kick(sc)) == 0)
1040 ifp->if_flags |= IFF_RUNNING;
1041 else
1042 ifp->if_flags &= ~IFF_RUNNING;
1043 }
1044 else if (gre_compute_route(sc) == 0)
1045 ifp->if_flags |= IFF_RUNNING;
1046 else
1047 ifp->if_flags &= ~IFF_RUNNING;
1048 }
1049 break;
1050 case GREGADDRS:
1051 sockaddr_in_init(&si, &sc->sc_src,
1052 (sc->sc_proto == IPPROTO_UDP) ? sc->sc_srcport : 0);
1053 ifr->ifr_addr = *sintosa(&si);
1054 break;
1055 case GREGADDRD:
1056 sockaddr_in_init(&si, &sc->sc_dst,
1057 (sc->sc_proto == IPPROTO_UDP) ? sc->sc_dstport : 0);
1058 ifr->ifr_addr = *sintosa(&si);
1059 break;
1060 case GREDSOCK:
1061 if (sc->sc_proto != IPPROTO_UDP) {
1062 error = EINVAL;
1063 break;
1064 }
1065 ifp->if_flags &= ~IFF_UP;
1066 gre_wakeup(sc);
1067 break;
1068 case GRESSOCK:
1069 if (sc->sc_proto != IPPROTO_UDP) {
1070 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1071 error = EINVAL;
1072 break;
1073 }
1074 /* getsock() will FILE_USE() and unlock the descriptor for us */
1075 if ((error = getsock(p->p_fd, (int)ifr->ifr_value, &fp)) != 0) {
1076 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1077 error = EINVAL;
1078 break;
1079 }
1080 so = (struct socket *)fp->f_data;
1081 if (so->so_type != SOCK_DGRAM) {
1082 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1083 FILE_UNUSE(fp, NULL);
1084 error = EINVAL;
1085 break;
1086 }
1087 /* check address */
1088 if ((error = gre_getnames(so, curlwp, &src, &dst)) != 0) {
1089 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1090 FILE_UNUSE(fp, NULL);
1091 break;
1092 }
1093
1094 /* Increase reference count. Now that our reference
1095 * to the file descriptor is counted, this thread
1096 * can release our "use" of the descriptor, but it
1097 * will not be destroyed by some other thread's
1098 * action. This thread needs to release its use,
1099 * too, because one and only one thread can have
1100 * use of the descriptor at once. The kernel thread
1101 * will pick up the use if it needs it.
1102 */
1103
1104 fp->f_count++;
1105 FILE_UNUSE(fp, NULL);
1106
1107 while (sc->sc_newsoparm.sp_fp != NULL && error == 0) {
1108 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1109 error = cv_timedwait_sig(&sc->sc_soparm_cv, &sc->sc_mtx,
1110 MAX(1, hz / 2));
1111 }
1112 if (error == 0) {
1113 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1114 sc->sc_newsoparm.sp_fp = fp;
1115 ifp->if_flags |= IFF_UP;
1116 }
1117
1118 if (error != 0 || (error = gre_kick(sc)) != 0) {
1119 GRE_DPRINTF(sc, "%s: l.%d\n", __func__, __LINE__);
1120 gre_closef(&fp, l);
1121 break;
1122 }
1123 /* fp does not any longer belong to this thread. */
1124 sc->sc_src = src.sin_addr;
1125 sc->sc_srcport = src.sin_port;
1126 sc->sc_dst = dst.sin_addr;
1127 sc->sc_dstport = dst.sin_port;
1128 GRE_DPRINTF(sc, "%s: sock 0x%08" PRIx32 " port %d -> "
1129 "0x%08" PRIx32 " port %d\n", __func__,
1130 src.sin_addr.s_addr, ntohs(src.sin_port),
1131 dst.sin_addr.s_addr, ntohs(dst.sin_port));
1132 break;
1133 case SIOCSLIFPHYADDR:
1134 if (lifr->addr.ss_family != AF_INET ||
1135 lifr->dstaddr.ss_family != AF_INET) {
1136 error = EAFNOSUPPORT;
1137 break;
1138 }
1139 if (lifr->addr.ss_len != sizeof(si) ||
1140 lifr->dstaddr.ss_len != sizeof(si)) {
1141 error = EINVAL;
1142 break;
1143 }
1144 sc->sc_src = satocsin(&lifr->addr)->sin_addr;
1145 sc->sc_dst = satocsin(&lifr->dstaddr)->sin_addr;
1146 sc->sc_srcport = satocsin(&lifr->addr)->sin_port;
1147 sc->sc_dstport = satocsin(&lifr->dstaddr)->sin_port;
1148 goto recompute;
1149 case SIOCDIFPHYADDR:
1150 sc->sc_src.s_addr = INADDR_ANY;
1151 sc->sc_dst.s_addr = INADDR_ANY;
1152 sc->sc_srcport = 0;
1153 sc->sc_dstport = 0;
1154 goto recompute;
1155 case SIOCGLIFPHYADDR:
1156 if (sc->sc_src.s_addr == INADDR_ANY ||
1157 sc->sc_dst.s_addr == INADDR_ANY) {
1158 error = EADDRNOTAVAIL;
1159 break;
1160 }
1161 sockaddr_in_init(satosin(&lifr->addr), &sc->sc_src,
1162 (sc->sc_proto == IPPROTO_UDP) ? sc->sc_srcport : 0);
1163 sockaddr_in_init(satosin(&lifr->dstaddr), &sc->sc_dst,
1164 (sc->sc_proto == IPPROTO_UDP) ? sc->sc_dstport : 0);
1165 break;
1166 default:
1167 error = EINVAL;
1168 break;
1169 }
1170 mutex_exit(&sc->sc_mtx);
1171 return error;
1172 }
1173
1174 /*
1175 * Compute a route to our destination.
1176 */
1177 static int
1178 gre_compute_route(struct gre_softc *sc)
1179 {
1180 struct route *ro;
1181 union {
1182 struct sockaddr dst;
1183 struct sockaddr_in dst4;
1184 } u;
1185
1186 ro = &sc->route;
1187
1188 memset(ro, 0, sizeof(*ro));
1189 sockaddr_in_init(&u.dst4, &sc->sc_dst, 0);
1190 rtcache_setdst(ro, &u.dst);
1191
1192 rtcache_init(ro);
1193
1194 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
1195 GRE_DPRINTF(sc, "%s: route to %s %s\n", sc->sc_if.if_xname,
1196 inet_ntoa(u.dst4.sin_addr),
1197 (ro->ro_rt == NULL)
1198 ? "does not exist"
1199 : "loops back to ourself");
1200 rtcache_free(ro);
1201 return EADDRNOTAVAIL;
1202 }
1203
1204 return 0;
1205 }
1206
1207 /*
1208 * do a checksum of a buffer - much like in_cksum, which operates on
1209 * mbufs.
1210 */
1211 u_int16_t
1212 gre_in_cksum(u_int16_t *p, u_int len)
1213 {
1214 u_int32_t sum = 0;
1215 int nwords = len >> 1;
1216
1217 while (nwords-- != 0)
1218 sum += *p++;
1219
1220 if (len & 1) {
1221 union {
1222 u_short w;
1223 u_char c[2];
1224 } u;
1225 u.c[0] = *(u_char *)p;
1226 u.c[1] = 0;
1227 sum += u.w;
1228 }
1229
1230 /* end-around-carry */
1231 sum = (sum >> 16) + (sum & 0xffff);
1232 sum += (sum >> 16);
1233 return ~sum;
1234 }
1235 #endif
1236
1237 void greattach(int);
1238
1239 /* ARGSUSED */
1240 void
1241 greattach(int count)
1242 {
1243 #ifdef INET
1244 LIST_INIT(&gre_softc_list);
1245 if_clone_attach(&gre_cloner);
1246 #endif
1247 }
1248