if_tun.c revision 1.45 1 /* $NetBSD: if_tun.c,v 1.45 2001/08/03 21:11:57 itojun Exp $ */
2
3 /*
4 * Copyright (c) 1988, Julian Onions <jpo (at) cs.nott.ac.uk>
5 * Nottingham University 1987.
6 *
7 * This source may be freely distributed, however I would be interested
8 * in any changes that are made.
9 *
10 * This driver takes packets off the IP i/f and hands them up to a
11 * user process to have its wicked way with. This driver has its
12 * roots in a similar driver written by Phil Cockcroft (formerly) at
13 * UCL. This driver is based much more on read/write/poll mode of
14 * operation though.
15 */
16
17 #include "tun.h"
18 #if NTUN > 0
19
20 #include "opt_inet.h"
21 #include "opt_ns.h"
22
23 #include <sys/param.h>
24 #include <sys/proc.h>
25 #include <sys/systm.h>
26 #include <sys/mbuf.h>
27 #include <sys/buf.h>
28 #include <sys/protosw.h>
29 #include <sys/socket.h>
30 #include <sys/ioctl.h>
31 #include <sys/errno.h>
32 #include <sys/syslog.h>
33 #include <sys/select.h>
34 #include <sys/poll.h>
35 #include <sys/file.h>
36 #include <sys/signalvar.h>
37 #include <sys/conf.h>
38
39 #include <machine/cpu.h>
40
41 #include <net/if.h>
42 #include <net/if_ether.h>
43 #include <net/netisr.h>
44 #include <net/route.h>
45
46
47 #ifdef INET
48 #include <netinet/in.h>
49 #include <netinet/in_systm.h>
50 #include <netinet/in_var.h>
51 #include <netinet/ip.h>
52 #include <netinet/if_inarp.h>
53 #endif
54
55 #ifdef NS
56 #include <netns/ns.h>
57 #include <netns/ns_if.h>
58 #endif
59
60 #include "bpfilter.h"
61 #if NBPFILTER > 0
62 #include <sys/time.h>
63 #include <net/bpf.h>
64 #endif
65
66 #include <net/if_tun.h>
67
68 #define TUNDEBUG if (tundebug) printf
69 int tundebug = 0;
70
71 struct tun_softc tunctl[NTUN];
72 extern int ifqmaxlen;
73 void tunattach __P((int));
74
75 int tun_ioctl __P((struct ifnet *, u_long, caddr_t));
76 int tun_output __P((struct ifnet *, struct mbuf *, struct sockaddr *,
77 struct rtentry *rt));
78
79 static void tuninit __P((struct tun_softc *));
80
81 void
82 tunattach(unused)
83 int unused;
84 {
85 int i;
86 struct ifnet *ifp;
87
88 for (i = 0; i < NTUN; i++) {
89 tunctl[i].tun_flags = TUN_INITED;
90
91 ifp = &tunctl[i].tun_if;
92 sprintf(ifp->if_xname, "tun%d", i);
93 ifp->if_softc = &tunctl[i];
94 ifp->if_mtu = TUNMTU;
95 ifp->if_ioctl = tun_ioctl;
96 ifp->if_output = tun_output;
97 ifp->if_flags = IFF_POINTOPOINT;
98 ifp->if_snd.ifq_maxlen = ifqmaxlen;
99 ifp->if_collisions = 0;
100 ifp->if_ierrors = 0;
101 ifp->if_oerrors = 0;
102 ifp->if_ipackets = 0;
103 ifp->if_opackets = 0;
104 ifp->if_dlt = DLT_NULL;
105 if_attach(ifp);
106 if_alloc_sadl(ifp);
107 #if NBPFILTER > 0
108 bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
109 #endif
110 }
111 }
112
113 /*
114 * tunnel open - must be superuser & the device must be
115 * configured in
116 */
117 int
118 tunopen(dev, flag, mode, p)
119 dev_t dev;
120 int flag, mode;
121 struct proc *p;
122 {
123 struct ifnet *ifp;
124 struct tun_softc *tp;
125 int unit, error;
126
127 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
128 return (error);
129
130 if ((unit = minor(dev)) >= NTUN)
131 return (ENXIO);
132 tp = &tunctl[unit];
133 if (tp->tun_flags & TUN_OPEN)
134 return ENXIO;
135 ifp = &tp->tun_if;
136 tp->tun_flags |= TUN_OPEN;
137 TUNDEBUG("%s: open\n", ifp->if_xname);
138 return (0);
139 }
140
141 /*
142 * tunclose - close the device - mark i/f down & delete
143 * routing info
144 */
145 int
146 tunclose(dev, flag, mode, p)
147 dev_t dev;
148 int flag;
149 int mode;
150 struct proc *p;
151 {
152 int unit = minor(dev), s;
153 struct tun_softc *tp = &tunctl[unit];
154 struct ifnet *ifp = &tp->tun_if;
155 struct mbuf *m;
156
157 tp->tun_flags &= ~TUN_OPEN;
158
159 /*
160 * junk all pending output
161 */
162 do {
163 s = splnet();
164 IF_DEQUEUE(&ifp->if_snd, m);
165 splx(s);
166 if (m)
167 m_freem(m);
168 } while (m);
169
170 if (ifp->if_flags & IFF_UP) {
171 s = splnet();
172 if_down(ifp);
173 if (ifp->if_flags & IFF_RUNNING) {
174 /* find internet addresses and delete routes */
175 struct ifaddr *ifa;
176 for (ifa = ifp->if_addrlist.tqh_first; ifa != 0;
177 ifa = ifa->ifa_list.tqe_next) {
178 #ifdef INET
179 if (ifa->ifa_addr->sa_family == AF_INET) {
180 rtinit(ifa, (int)RTM_DELETE,
181 tp->tun_flags & TUN_DSTADDR
182 ? RTF_HOST
183 : 0);
184 }
185 #endif
186 }
187 }
188 splx(s);
189 }
190 tp->tun_pgrp = 0;
191 selwakeup(&tp->tun_rsel);
192
193 TUNDEBUG ("%s: closed\n", ifp->if_xname);
194 return (0);
195 }
196
197 static void
198 tuninit(tp)
199 struct tun_softc *tp;
200 {
201 struct ifnet *ifp = &tp->tun_if;
202 struct ifaddr *ifa;
203
204 TUNDEBUG("%s: tuninit\n", ifp->if_xname);
205
206 ifp->if_flags |= IFF_UP | IFF_RUNNING;
207
208 tp->tun_flags &= ~(TUN_IASET|TUN_DSTADDR);
209 for (ifa = ifp->if_addrlist.tqh_first; ifa != 0;
210 ifa = ifa->ifa_list.tqe_next) {
211 #ifdef INET
212 if (ifa->ifa_addr->sa_family == AF_INET) {
213 struct sockaddr_in *sin;
214
215 sin = satosin(ifa->ifa_addr);
216 if (sin && sin->sin_addr.s_addr)
217 tp->tun_flags |= TUN_IASET;
218
219 if (ifp->if_flags & IFF_POINTOPOINT) {
220 sin = satosin(ifa->ifa_dstaddr);
221 if (sin && sin->sin_addr.s_addr)
222 tp->tun_flags |= TUN_DSTADDR;
223 }
224 }
225 #endif
226 }
227
228 return;
229 }
230
231 /*
232 * Process an ioctl request.
233 */
234 int
235 tun_ioctl(ifp, cmd, data)
236 struct ifnet *ifp;
237 u_long cmd;
238 caddr_t data;
239 {
240 int error = 0, s;
241
242 s = splnet();
243 switch(cmd) {
244 case SIOCSIFADDR:
245 tuninit((struct tun_softc *)(ifp->if_softc));
246 TUNDEBUG("%s: address set\n", ifp->if_xname);
247 break;
248 case SIOCSIFDSTADDR:
249 tuninit((struct tun_softc *)(ifp->if_softc));
250 TUNDEBUG("%s: destination address set\n", ifp->if_xname);
251 break;
252 case SIOCSIFBRDADDR:
253 TUNDEBUG("%s: broadcast address set\n", ifp->if_xname);
254 break;
255 case SIOCSIFMTU: {
256 struct ifreq *ifr = (struct ifreq *) data;
257 if (ifr->ifr_mtu > TUNMTU || ifr->ifr_mtu < 576) {
258 error = EINVAL;
259 break;
260 }
261 TUNDEBUG("%s: interface mtu set\n", ifp->if_xname);
262 ifp->if_mtu = ifr->ifr_mtu;
263 break;
264 }
265 case SIOCADDMULTI:
266 case SIOCDELMULTI: {
267 struct ifreq *ifr = (struct ifreq *) data;
268 if (ifr == 0) {
269 error = EAFNOSUPPORT; /* XXX */
270 break;
271 }
272 switch (ifr->ifr_addr.sa_family) {
273
274 #ifdef INET
275 case AF_INET:
276 break;
277 #endif
278
279 default:
280 error = EAFNOSUPPORT;
281 break;
282 }
283 break;
284 }
285 case SIOCSIFFLAGS:
286 break;
287 default:
288 error = EINVAL;
289 }
290 splx(s);
291 return (error);
292 }
293
294 /*
295 * tun_output - queue packets from higher level ready to put out.
296 */
297 int
298 tun_output(ifp, m0, dst, rt)
299 struct ifnet *ifp;
300 struct mbuf *m0;
301 struct sockaddr *dst;
302 struct rtentry *rt;
303 {
304 struct tun_softc *tp = ifp->if_softc;
305 struct proc *p;
306 #ifdef INET
307 int s;
308 #endif
309
310 TUNDEBUG ("%s: tun_output\n", ifp->if_xname);
311
312 if ((tp->tun_flags & TUN_READY) != TUN_READY) {
313 TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname,
314 tp->tun_flags);
315 m_freem (m0);
316 return (EHOSTDOWN);
317 }
318
319 #if NBPFILTER > 0
320 if (ifp->if_bpf) {
321 /*
322 * We need to prepend the address family as
323 * a four byte field. Cons up a dummy header
324 * to pacify bpf. This is safe because bpf
325 * will only read from the mbuf (i.e., it won't
326 * try to free it or keep a pointer to it).
327 */
328 struct mbuf m;
329 u_int32_t af = dst->sa_family;
330
331 m.m_next = m0;
332 m.m_len = sizeof(af);
333 m.m_data = (char *)⁡
334
335 bpf_mtap(ifp->if_bpf, &m);
336 }
337 #endif
338
339 switch(dst->sa_family) {
340 #ifdef INET
341 case AF_INET:
342 if (tp->tun_flags & TUN_PREPADDR) {
343 /* Simple link-layer header */
344 M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
345 if (m0 == NULL) {
346 IF_DROP(&ifp->if_snd);
347 return (ENOBUFS);
348 }
349 bcopy(dst, mtod(m0, char *), dst->sa_len);
350 }
351 /* FALLTHROUGH */
352 case AF_UNSPEC:
353 s = splnet();
354 if (IF_QFULL(&ifp->if_snd)) {
355 IF_DROP(&ifp->if_snd);
356 m_freem(m0);
357 splx(s);
358 ifp->if_collisions++;
359 return (ENOBUFS);
360 }
361 IF_ENQUEUE(&ifp->if_snd, m0);
362 splx(s);
363 ifp->if_opackets++;
364 break;
365 #endif
366 default:
367 m_freem(m0);
368 return (EAFNOSUPPORT);
369 }
370
371 if (tp->tun_flags & TUN_RWAIT) {
372 tp->tun_flags &= ~TUN_RWAIT;
373 wakeup((caddr_t)tp);
374 }
375 if (tp->tun_flags & TUN_ASYNC && tp->tun_pgrp) {
376 if (tp->tun_pgrp > 0)
377 gsignal(tp->tun_pgrp, SIGIO);
378 else if ((p = pfind(-tp->tun_pgrp)) != NULL)
379 psignal(p, SIGIO);
380 }
381 selwakeup(&tp->tun_rsel);
382 return (0);
383 }
384
385 /*
386 * the cdevsw interface is now pretty minimal.
387 */
388 int
389 tunioctl(dev, cmd, data, flag, p)
390 dev_t dev;
391 u_long cmd;
392 caddr_t data;
393 int flag;
394 struct proc *p;
395 {
396 int unit = minor(dev), s;
397 struct tun_softc *tp = &tunctl[unit];
398
399 switch (cmd) {
400 case TUNSDEBUG:
401 tundebug = *(int *)data;
402 break;
403
404 case TUNGDEBUG:
405 *(int *)data = tundebug;
406 break;
407
408 case TUNSIFMODE:
409 switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
410 case IFF_POINTOPOINT:
411 case IFF_BROADCAST:
412 s = splnet();
413 if (tp->tun_if.if_flags & IFF_UP) {
414 splx(s);
415 return (EBUSY);
416 }
417 tp->tun_if.if_flags &=
418 ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
419 tp->tun_if.if_flags |= *(int *)data;
420 splx(s);
421 break;
422 default:
423 return (EINVAL);
424 break;
425 }
426 break;
427
428 case TUNSLMODE:
429 if (*(int *)data)
430 tp->tun_flags |= TUN_PREPADDR;
431 else
432 tp->tun_flags &= ~TUN_PREPADDR;
433 break;
434
435 case FIONBIO:
436 if (*(int *)data)
437 tp->tun_flags |= TUN_NBIO;
438 else
439 tp->tun_flags &= ~TUN_NBIO;
440 break;
441
442 case FIOASYNC:
443 if (*(int *)data)
444 tp->tun_flags |= TUN_ASYNC;
445 else
446 tp->tun_flags &= ~TUN_ASYNC;
447 break;
448
449 case FIONREAD:
450 s = splnet();
451 if (tp->tun_if.if_snd.ifq_head)
452 *(int *)data = tp->tun_if.if_snd.ifq_head->m_pkthdr.len;
453 else
454 *(int *)data = 0;
455 splx(s);
456 break;
457
458 case TIOCSPGRP:
459 tp->tun_pgrp = *(int *)data;
460 break;
461
462 case TIOCGPGRP:
463 *(int *)data = tp->tun_pgrp;
464 break;
465
466 default:
467 return (ENOTTY);
468 }
469 return (0);
470 }
471
472 /*
473 * The cdevsw read interface - reads a packet at a time, or at
474 * least as much of a packet as can be read.
475 */
476 int
477 tunread(dev, uio, ioflag)
478 dev_t dev;
479 struct uio *uio;
480 int ioflag;
481 {
482 int unit = minor(dev);
483 struct tun_softc *tp = &tunctl[unit];
484 struct ifnet *ifp = &tp->tun_if;
485 struct mbuf *m, *m0;
486 int error=0, len, s;
487
488 TUNDEBUG ("%s: read\n", ifp->if_xname);
489 if ((tp->tun_flags & TUN_READY) != TUN_READY) {
490 TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname, tp->tun_flags);
491 return EHOSTDOWN;
492 }
493
494 tp->tun_flags &= ~TUN_RWAIT;
495
496 s = splnet();
497 do {
498 IF_DEQUEUE(&ifp->if_snd, m0);
499 if (m0 == 0) {
500 if (tp->tun_flags & TUN_NBIO) {
501 splx(s);
502 return (EWOULDBLOCK);
503 }
504 tp->tun_flags |= TUN_RWAIT;
505 if (tsleep((caddr_t)tp, PZERO|PCATCH, "tunread", 0)) {
506 splx(s);
507 return (EINTR);
508 }
509 }
510 } while (m0 == 0);
511 splx(s);
512
513 while (m0 && uio->uio_resid > 0 && error == 0) {
514 len = min(uio->uio_resid, m0->m_len);
515 if (len != 0)
516 error = uiomove(mtod(m0, caddr_t), len, uio);
517 MFREE(m0, m);
518 m0 = m;
519 }
520
521 if (m0) {
522 TUNDEBUG("Dropping mbuf\n");
523 m_freem(m0);
524 }
525 if (error)
526 ifp->if_ierrors++;
527 return (error);
528 }
529
530 /*
531 * the cdevsw write interface - an atomic write is a packet - or else!
532 */
533 int
534 tunwrite(dev, uio, ioflag)
535 dev_t dev;
536 struct uio *uio;
537 int ioflag;
538 {
539 int unit = minor (dev);
540 struct tun_softc *tp = &tunctl[unit];
541 struct ifnet *ifp = &tp->tun_if;
542 struct mbuf *top, **mp, *m;
543 struct ifqueue *ifq;
544 struct sockaddr dst;
545 int isr, error=0, s, tlen, mlen;
546
547 TUNDEBUG("%s: tunwrite\n", ifp->if_xname);
548
549 if (tp->tun_flags & TUN_PREPADDR) {
550 if (uio->uio_resid < sizeof(dst))
551 return (EIO);
552 error = uiomove((caddr_t)&dst, sizeof(dst), uio);
553 if (dst.sa_len > sizeof(dst)) {
554 /* Duh.. */
555 char discard;
556 int n = dst.sa_len - sizeof(dst);
557 while (n--)
558 if ((error = uiomove(&discard, 1, uio)) != 0)
559 return (error);
560 }
561 } else {
562 #ifdef INET
563 dst.sa_family = AF_INET;
564 #endif
565 }
566
567 if (uio->uio_resid < 0 || uio->uio_resid > TUNMTU) {
568 TUNDEBUG("%s: len=%lu!\n", ifp->if_xname,
569 (unsigned long)uio->uio_resid);
570 return (EIO);
571 }
572
573 switch (dst.sa_family) {
574 #ifdef INET
575 case AF_INET:
576 ifq = &ipintrq;
577 isr = NETISR_IP;
578 break;
579 #endif
580 default:
581 return (EAFNOSUPPORT);
582 }
583
584 tlen = uio->uio_resid;
585
586 /* get a header mbuf */
587 MGETHDR(m, M_DONTWAIT, MT_DATA);
588 if (m == NULL)
589 return (ENOBUFS);
590 mlen = MHLEN;
591
592 top = 0;
593 mp = ⊤
594 while (error == 0 && uio->uio_resid > 0) {
595 m->m_len = min(mlen, uio->uio_resid);
596 error = uiomove(mtod (m, caddr_t), m->m_len, uio);
597 *mp = m;
598 mp = &m->m_next;
599 if (uio->uio_resid > 0) {
600 MGET (m, M_DONTWAIT, MT_DATA);
601 if (m == 0) {
602 error = ENOBUFS;
603 break;
604 }
605 mlen = MLEN;
606 }
607 }
608 if (error) {
609 if (top)
610 m_freem (top);
611 ifp->if_ierrors++;
612 return (error);
613 }
614
615 top->m_pkthdr.len = tlen;
616 top->m_pkthdr.rcvif = ifp;
617
618 #if NBPFILTER > 0
619 if (ifp->if_bpf) {
620 /*
621 * We need to prepend the address family as
622 * a four byte field. Cons up a dummy header
623 * to pacify bpf. This is safe because bpf
624 * will only read from the mbuf (i.e., it won't
625 * try to free it or keep a pointer to it).
626 */
627 struct mbuf m;
628 u_int32_t af = AF_INET;
629
630 m.m_next = top;
631 m.m_len = sizeof(af);
632 m.m_data = (char *)⁡
633
634 bpf_mtap(ifp->if_bpf, &m);
635 }
636 #endif
637
638 s = splnet();
639 if (IF_QFULL(ifq)) {
640 IF_DROP(ifq);
641 splx(s);
642 ifp->if_collisions++;
643 m_freem(top);
644 return (ENOBUFS);
645 }
646 IF_ENQUEUE(ifq, top);
647 splx(s);
648 ifp->if_ipackets++;
649 schednetisr(isr);
650 return (error);
651 }
652
653 /*
654 * tunpoll - the poll interface, this is only useful on reads
655 * really. The write detect always returns true, write never blocks
656 * anyway, it either accepts the packet or drops it.
657 */
658 int
659 tunpoll(dev, events, p)
660 dev_t dev;
661 int events;
662 struct proc *p;
663 {
664 int unit = minor(dev), s;
665 struct tun_softc *tp = &tunctl[unit];
666 struct ifnet *ifp = &tp->tun_if;
667 int revents = 0;
668
669 s = splnet();
670 TUNDEBUG("%s: tunpoll\n", ifp->if_xname);
671
672 if (events & (POLLIN | POLLRDNORM)) {
673 if (ifp->if_snd.ifq_len > 0) {
674 TUNDEBUG("%s: tunpoll q=%d\n", ifp->if_xname,
675 ifp->if_snd.ifq_len);
676 revents |= events & (POLLIN | POLLRDNORM);
677 } else {
678 TUNDEBUG("%s: tunpoll waiting\n", ifp->if_xname);
679 selrecord(p, &tp->tun_rsel);
680 }
681 }
682
683 if (events & (POLLOUT | POLLWRNORM))
684 revents |= events & (POLLOUT | POLLWRNORM);
685
686 splx(s);
687 return (revents);
688 }
689
690 #endif /* NTUN */
691