if_tun.c revision 1.45.2.1 1 /* $NetBSD: if_tun.c,v 1.45.2.1 2001/09/07 04:45:42 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1988, Julian Onions <jpo (at) cs.nott.ac.uk>
5 * Nottingham University 1987.
6 *
7 * This source may be freely distributed, however I would be interested
8 * in any changes that are made.
9 *
10 * This driver takes packets off the IP i/f and hands them up to a
11 * user process to have its wicked way with. This driver has its
12 * roots in a similar driver written by Phil Cockcroft (formerly) at
13 * UCL. This driver is based much more on read/write/poll mode of
14 * operation though.
15 */
16
17 #include "tun.h"
18 #if NTUN > 0
19
20 #include "opt_inet.h"
21 #include "opt_ns.h"
22
23 #include <sys/param.h>
24 #include <sys/proc.h>
25 #include <sys/systm.h>
26 #include <sys/mbuf.h>
27 #include <sys/buf.h>
28 #include <sys/protosw.h>
29 #include <sys/socket.h>
30 #include <sys/ioctl.h>
31 #include <sys/errno.h>
32 #include <sys/syslog.h>
33 #include <sys/select.h>
34 #include <sys/poll.h>
35 #include <sys/file.h>
36 #include <sys/signalvar.h>
37 #include <sys/conf.h>
38 #include <sys/vnode.h>
39
40 #include <miscfs/specfs/specdev.h>
41
42 #include <machine/cpu.h>
43
44 #include <net/if.h>
45 #include <net/if_ether.h>
46 #include <net/netisr.h>
47 #include <net/route.h>
48
49
50 #ifdef INET
51 #include <netinet/in.h>
52 #include <netinet/in_systm.h>
53 #include <netinet/in_var.h>
54 #include <netinet/ip.h>
55 #include <netinet/if_inarp.h>
56 #endif
57
58 #ifdef NS
59 #include <netns/ns.h>
60 #include <netns/ns_if.h>
61 #endif
62
63 #include "bpfilter.h"
64 #if NBPFILTER > 0
65 #include <sys/time.h>
66 #include <net/bpf.h>
67 #endif
68
69 #include <net/if_tun.h>
70
71 #define TUNDEBUG if (tundebug) printf
72 int tundebug = 0;
73
74 struct tun_softc tunctl[NTUN];
75 extern int ifqmaxlen;
76 void tunattach __P((int));
77
78 int tun_ioctl __P((struct ifnet *, u_long, caddr_t));
79 int tun_output __P((struct ifnet *, struct mbuf *, struct sockaddr *,
80 struct rtentry *rt));
81
82 static void tuninit __P((struct tun_softc *));
83
84 void
85 tunattach(unused)
86 int unused;
87 {
88 int i;
89 struct ifnet *ifp;
90
91 for (i = 0; i < NTUN; i++) {
92 tunctl[i].tun_flags = TUN_INITED;
93
94 ifp = &tunctl[i].tun_if;
95 sprintf(ifp->if_xname, "tun%d", i);
96 ifp->if_softc = &tunctl[i];
97 ifp->if_mtu = TUNMTU;
98 ifp->if_ioctl = tun_ioctl;
99 ifp->if_output = tun_output;
100 ifp->if_flags = IFF_POINTOPOINT;
101 ifp->if_snd.ifq_maxlen = ifqmaxlen;
102 ifp->if_collisions = 0;
103 ifp->if_ierrors = 0;
104 ifp->if_oerrors = 0;
105 ifp->if_ipackets = 0;
106 ifp->if_opackets = 0;
107 ifp->if_dlt = DLT_NULL;
108 if_attach(ifp);
109 if_alloc_sadl(ifp);
110 #if NBPFILTER > 0
111 bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
112 #endif
113 }
114 }
115
116 /*
117 * tunnel open - must be superuser & the device must be
118 * configured in
119 */
120 int
121 tunopen(devvp, flag, mode, p)
122 struct vnode *devvp;
123 int flag, mode;
124 struct proc *p;
125 {
126 struct ifnet *ifp;
127 struct tun_softc *tp;
128 int unit, error;
129
130 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
131 return (error);
132
133 if ((unit = minor(devvp->v_rdev)) >= NTUN)
134 return (ENXIO);
135 tp = &tunctl[unit];
136
137 devvp->v_devcookie = tp;
138
139 if (tp->tun_flags & TUN_OPEN)
140 return ENXIO;
141 ifp = &tp->tun_if;
142 tp->tun_flags |= TUN_OPEN;
143 TUNDEBUG("%s: open\n", ifp->if_xname);
144 return (0);
145 }
146
147 /*
148 * tunclose - close the device - mark i/f down & delete
149 * routing info
150 */
151 int
152 tunclose(devvp, flag, mode, p)
153 struct vnode *devvp;
154 int flag;
155 int mode;
156 struct proc *p;
157 {
158 struct tun_softc *tp = devvp->v_devcookie;
159 struct ifnet *ifp = &tp->tun_if;
160 struct mbuf *m;
161 int s;
162
163 tp->tun_flags &= ~TUN_OPEN;
164
165 /*
166 * junk all pending output
167 */
168 do {
169 s = splnet();
170 IF_DEQUEUE(&ifp->if_snd, m);
171 splx(s);
172 if (m)
173 m_freem(m);
174 } while (m);
175
176 if (ifp->if_flags & IFF_UP) {
177 s = splnet();
178 if_down(ifp);
179 if (ifp->if_flags & IFF_RUNNING) {
180 /* find internet addresses and delete routes */
181 struct ifaddr *ifa;
182 for (ifa = ifp->if_addrlist.tqh_first; ifa != 0;
183 ifa = ifa->ifa_list.tqe_next) {
184 #ifdef INET
185 if (ifa->ifa_addr->sa_family == AF_INET) {
186 rtinit(ifa, (int)RTM_DELETE,
187 tp->tun_flags & TUN_DSTADDR
188 ? RTF_HOST
189 : 0);
190 }
191 #endif
192 }
193 }
194 splx(s);
195 }
196 tp->tun_pgrp = 0;
197 selwakeup(&tp->tun_rsel);
198
199 TUNDEBUG ("%s: closed\n", ifp->if_xname);
200 return (0);
201 }
202
203 static void
204 tuninit(tp)
205 struct tun_softc *tp;
206 {
207 struct ifnet *ifp = &tp->tun_if;
208 struct ifaddr *ifa;
209
210 TUNDEBUG("%s: tuninit\n", ifp->if_xname);
211
212 ifp->if_flags |= IFF_UP | IFF_RUNNING;
213
214 tp->tun_flags &= ~(TUN_IASET|TUN_DSTADDR);
215 for (ifa = ifp->if_addrlist.tqh_first; ifa != 0;
216 ifa = ifa->ifa_list.tqe_next) {
217 #ifdef INET
218 if (ifa->ifa_addr->sa_family == AF_INET) {
219 struct sockaddr_in *sin;
220
221 sin = satosin(ifa->ifa_addr);
222 if (sin && sin->sin_addr.s_addr)
223 tp->tun_flags |= TUN_IASET;
224
225 if (ifp->if_flags & IFF_POINTOPOINT) {
226 sin = satosin(ifa->ifa_dstaddr);
227 if (sin && sin->sin_addr.s_addr)
228 tp->tun_flags |= TUN_DSTADDR;
229 }
230 }
231 #endif
232 }
233
234 return;
235 }
236
237 /*
238 * Process an ioctl request.
239 */
240 int
241 tun_ioctl(ifp, cmd, data)
242 struct ifnet *ifp;
243 u_long cmd;
244 caddr_t data;
245 {
246 int error = 0, s;
247
248 s = splnet();
249 switch(cmd) {
250 case SIOCSIFADDR:
251 tuninit((struct tun_softc *)(ifp->if_softc));
252 TUNDEBUG("%s: address set\n", ifp->if_xname);
253 break;
254 case SIOCSIFDSTADDR:
255 tuninit((struct tun_softc *)(ifp->if_softc));
256 TUNDEBUG("%s: destination address set\n", ifp->if_xname);
257 break;
258 case SIOCSIFBRDADDR:
259 TUNDEBUG("%s: broadcast address set\n", ifp->if_xname);
260 break;
261 case SIOCSIFMTU: {
262 struct ifreq *ifr = (struct ifreq *) data;
263 if (ifr->ifr_mtu > TUNMTU || ifr->ifr_mtu < 576) {
264 error = EINVAL;
265 break;
266 }
267 TUNDEBUG("%s: interface mtu set\n", ifp->if_xname);
268 ifp->if_mtu = ifr->ifr_mtu;
269 break;
270 }
271 case SIOCADDMULTI:
272 case SIOCDELMULTI: {
273 struct ifreq *ifr = (struct ifreq *) data;
274 if (ifr == 0) {
275 error = EAFNOSUPPORT; /* XXX */
276 break;
277 }
278 switch (ifr->ifr_addr.sa_family) {
279
280 #ifdef INET
281 case AF_INET:
282 break;
283 #endif
284
285 default:
286 error = EAFNOSUPPORT;
287 break;
288 }
289 break;
290 }
291 case SIOCSIFFLAGS:
292 break;
293 default:
294 error = EINVAL;
295 }
296 splx(s);
297 return (error);
298 }
299
300 /*
301 * tun_output - queue packets from higher level ready to put out.
302 */
303 int
304 tun_output(ifp, m0, dst, rt)
305 struct ifnet *ifp;
306 struct mbuf *m0;
307 struct sockaddr *dst;
308 struct rtentry *rt;
309 {
310 struct tun_softc *tp = ifp->if_softc;
311 struct proc *p;
312 #ifdef INET
313 int s;
314 #endif
315
316 TUNDEBUG ("%s: tun_output\n", ifp->if_xname);
317
318 if ((tp->tun_flags & TUN_READY) != TUN_READY) {
319 TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname,
320 tp->tun_flags);
321 m_freem (m0);
322 return (EHOSTDOWN);
323 }
324
325 #if NBPFILTER > 0
326 if (ifp->if_bpf) {
327 /*
328 * We need to prepend the address family as
329 * a four byte field. Cons up a dummy header
330 * to pacify bpf. This is safe because bpf
331 * will only read from the mbuf (i.e., it won't
332 * try to free it or keep a pointer to it).
333 */
334 struct mbuf m;
335 u_int32_t af = dst->sa_family;
336
337 m.m_next = m0;
338 m.m_len = sizeof(af);
339 m.m_data = (char *)⁡
340
341 bpf_mtap(ifp->if_bpf, &m);
342 }
343 #endif
344
345 switch(dst->sa_family) {
346 #ifdef INET
347 case AF_INET:
348 if (tp->tun_flags & TUN_PREPADDR) {
349 /* Simple link-layer header */
350 M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
351 if (m0 == NULL) {
352 IF_DROP(&ifp->if_snd);
353 return (ENOBUFS);
354 }
355 bcopy(dst, mtod(m0, char *), dst->sa_len);
356 }
357 /* FALLTHROUGH */
358 case AF_UNSPEC:
359 s = splnet();
360 if (IF_QFULL(&ifp->if_snd)) {
361 IF_DROP(&ifp->if_snd);
362 m_freem(m0);
363 splx(s);
364 ifp->if_collisions++;
365 return (ENOBUFS);
366 }
367 IF_ENQUEUE(&ifp->if_snd, m0);
368 splx(s);
369 ifp->if_opackets++;
370 break;
371 #endif
372 default:
373 m_freem(m0);
374 return (EAFNOSUPPORT);
375 }
376
377 if (tp->tun_flags & TUN_RWAIT) {
378 tp->tun_flags &= ~TUN_RWAIT;
379 wakeup((caddr_t)tp);
380 }
381 if (tp->tun_flags & TUN_ASYNC && tp->tun_pgrp) {
382 if (tp->tun_pgrp > 0)
383 gsignal(tp->tun_pgrp, SIGIO);
384 else if ((p = pfind(-tp->tun_pgrp)) != NULL)
385 psignal(p, SIGIO);
386 }
387 selwakeup(&tp->tun_rsel);
388 return (0);
389 }
390
391 /*
392 * the cdevsw interface is now pretty minimal.
393 */
394 int
395 tunioctl(devvp, cmd, data, flag, p)
396 struct vnode *devvp;
397 u_long cmd;
398 caddr_t data;
399 int flag;
400 struct proc *p;
401 {
402 struct tun_softc *tp = devvp->v_devcookie;
403 int s;
404
405 switch (cmd) {
406 case TUNSDEBUG:
407 tundebug = *(int *)data;
408 break;
409
410 case TUNGDEBUG:
411 *(int *)data = tundebug;
412 break;
413
414 case TUNSIFMODE:
415 switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
416 case IFF_POINTOPOINT:
417 case IFF_BROADCAST:
418 s = splnet();
419 if (tp->tun_if.if_flags & IFF_UP) {
420 splx(s);
421 return (EBUSY);
422 }
423 tp->tun_if.if_flags &=
424 ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
425 tp->tun_if.if_flags |= *(int *)data;
426 splx(s);
427 break;
428 default:
429 return (EINVAL);
430 break;
431 }
432 break;
433
434 case TUNSLMODE:
435 if (*(int *)data)
436 tp->tun_flags |= TUN_PREPADDR;
437 else
438 tp->tun_flags &= ~TUN_PREPADDR;
439 break;
440
441 case FIONBIO:
442 if (*(int *)data)
443 tp->tun_flags |= TUN_NBIO;
444 else
445 tp->tun_flags &= ~TUN_NBIO;
446 break;
447
448 case FIOASYNC:
449 if (*(int *)data)
450 tp->tun_flags |= TUN_ASYNC;
451 else
452 tp->tun_flags &= ~TUN_ASYNC;
453 break;
454
455 case FIONREAD:
456 s = splnet();
457 if (tp->tun_if.if_snd.ifq_head)
458 *(int *)data = tp->tun_if.if_snd.ifq_head->m_pkthdr.len;
459 else
460 *(int *)data = 0;
461 splx(s);
462 break;
463
464 case TIOCSPGRP:
465 tp->tun_pgrp = *(int *)data;
466 break;
467
468 case TIOCGPGRP:
469 *(int *)data = tp->tun_pgrp;
470 break;
471
472 default:
473 return (ENOTTY);
474 }
475 return (0);
476 }
477
478 /*
479 * The cdevsw read interface - reads a packet at a time, or at
480 * least as much of a packet as can be read.
481 */
482 int
483 tunread(devvp, uio, ioflag)
484 struct vnode *devvp;
485 struct uio *uio;
486 int ioflag;
487 {
488 struct tun_softc *tp = devvp->v_devcookie;
489 struct ifnet *ifp = &tp->tun_if;
490 struct mbuf *m, *m0;
491 int error=0, len, s;
492
493 TUNDEBUG ("%s: read\n", ifp->if_xname);
494 if ((tp->tun_flags & TUN_READY) != TUN_READY) {
495 TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname, tp->tun_flags);
496 return EHOSTDOWN;
497 }
498
499 tp->tun_flags &= ~TUN_RWAIT;
500
501 s = splnet();
502 do {
503 IF_DEQUEUE(&ifp->if_snd, m0);
504 if (m0 == 0) {
505 if (tp->tun_flags & TUN_NBIO) {
506 splx(s);
507 return (EWOULDBLOCK);
508 }
509 tp->tun_flags |= TUN_RWAIT;
510 if (tsleep((caddr_t)tp, PZERO|PCATCH, "tunread", 0)) {
511 splx(s);
512 return (EINTR);
513 }
514 }
515 } while (m0 == 0);
516 splx(s);
517
518 while (m0 && uio->uio_resid > 0 && error == 0) {
519 len = min(uio->uio_resid, m0->m_len);
520 if (len != 0)
521 error = uiomove(mtod(m0, caddr_t), len, uio);
522 MFREE(m0, m);
523 m0 = m;
524 }
525
526 if (m0) {
527 TUNDEBUG("Dropping mbuf\n");
528 m_freem(m0);
529 }
530 if (error)
531 ifp->if_ierrors++;
532 return (error);
533 }
534
535 /*
536 * the cdevsw write interface - an atomic write is a packet - or else!
537 */
538 int
539 tunwrite(devvp, uio, ioflag)
540 struct vnode *devvp;
541 struct uio *uio;
542 int ioflag;
543 {
544 struct tun_softc *tp = devvp->v_devcookie;
545 struct ifnet *ifp = &tp->tun_if;
546 struct mbuf *top, **mp, *m;
547 struct ifqueue *ifq;
548 struct sockaddr dst;
549 int isr, error=0, s, tlen, mlen;
550
551 TUNDEBUG("%s: tunwrite\n", ifp->if_xname);
552
553 if (tp->tun_flags & TUN_PREPADDR) {
554 if (uio->uio_resid < sizeof(dst))
555 return (EIO);
556 error = uiomove((caddr_t)&dst, sizeof(dst), uio);
557 if (dst.sa_len > sizeof(dst)) {
558 /* Duh.. */
559 char discard;
560 int n = dst.sa_len - sizeof(dst);
561 while (n--)
562 if ((error = uiomove(&discard, 1, uio)) != 0)
563 return (error);
564 }
565 } else {
566 #ifdef INET
567 dst.sa_family = AF_INET;
568 #endif
569 }
570
571 if (uio->uio_resid < 0 || uio->uio_resid > TUNMTU) {
572 TUNDEBUG("%s: len=%lu!\n", ifp->if_xname,
573 (unsigned long)uio->uio_resid);
574 return (EIO);
575 }
576
577 switch (dst.sa_family) {
578 #ifdef INET
579 case AF_INET:
580 ifq = &ipintrq;
581 isr = NETISR_IP;
582 break;
583 #endif
584 default:
585 return (EAFNOSUPPORT);
586 }
587
588 tlen = uio->uio_resid;
589
590 /* get a header mbuf */
591 MGETHDR(m, M_DONTWAIT, MT_DATA);
592 if (m == NULL)
593 return (ENOBUFS);
594 mlen = MHLEN;
595
596 top = 0;
597 mp = ⊤
598 while (error == 0 && uio->uio_resid > 0) {
599 m->m_len = min(mlen, uio->uio_resid);
600 error = uiomove(mtod (m, caddr_t), m->m_len, uio);
601 *mp = m;
602 mp = &m->m_next;
603 if (uio->uio_resid > 0) {
604 MGET (m, M_DONTWAIT, MT_DATA);
605 if (m == 0) {
606 error = ENOBUFS;
607 break;
608 }
609 mlen = MLEN;
610 }
611 }
612 if (error) {
613 if (top)
614 m_freem (top);
615 ifp->if_ierrors++;
616 return (error);
617 }
618
619 top->m_pkthdr.len = tlen;
620 top->m_pkthdr.rcvif = ifp;
621
622 #if NBPFILTER > 0
623 if (ifp->if_bpf) {
624 /*
625 * We need to prepend the address family as
626 * a four byte field. Cons up a dummy header
627 * to pacify bpf. This is safe because bpf
628 * will only read from the mbuf (i.e., it won't
629 * try to free it or keep a pointer to it).
630 */
631 struct mbuf m;
632 u_int32_t af = AF_INET;
633
634 m.m_next = top;
635 m.m_len = sizeof(af);
636 m.m_data = (char *)⁡
637
638 bpf_mtap(ifp->if_bpf, &m);
639 }
640 #endif
641
642 s = splnet();
643 if (IF_QFULL(ifq)) {
644 IF_DROP(ifq);
645 splx(s);
646 ifp->if_collisions++;
647 m_freem(top);
648 return (ENOBUFS);
649 }
650 IF_ENQUEUE(ifq, top);
651 splx(s);
652 ifp->if_ipackets++;
653 schednetisr(isr);
654 return (error);
655 }
656
657 /*
658 * tunpoll - the poll interface, this is only useful on reads
659 * really. The write detect always returns true, write never blocks
660 * anyway, it either accepts the packet or drops it.
661 */
662 int
663 tunpoll(devvp, events, p)
664 struct vnode *devvp;
665 int events;
666 struct proc *p;
667 {
668 struct tun_softc *tp = devvp->v_devcookie;
669 struct ifnet *ifp = &tp->tun_if;
670 int s, revents = 0;
671
672 s = splnet();
673 TUNDEBUG("%s: tunpoll\n", ifp->if_xname);
674
675 if (events & (POLLIN | POLLRDNORM)) {
676 if (ifp->if_snd.ifq_len > 0) {
677 TUNDEBUG("%s: tunpoll q=%d\n", ifp->if_xname,
678 ifp->if_snd.ifq_len);
679 revents |= events & (POLLIN | POLLRDNORM);
680 } else {
681 TUNDEBUG("%s: tunpoll waiting\n", ifp->if_xname);
682 selrecord(p, &tp->tun_rsel);
683 }
684 }
685
686 if (events & (POLLOUT | POLLWRNORM))
687 revents |= events & (POLLOUT | POLLWRNORM);
688
689 splx(s);
690 return (revents);
691 }
692
693 #endif /* NTUN */
694