if_tun.c revision 1.41 1 /* $NetBSD: if_tun.c,v 1.41 2000/12/18 19:50:45 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1988, Julian Onions <jpo (at) cs.nott.ac.uk>
5 * Nottingham University 1987.
6 *
7 * This source may be freely distributed, however I would be interested
8 * in any changes that are made.
9 *
10 * This driver takes packets off the IP i/f and hands them up to a
11 * user process to have its wicked way with. This driver has its
12 * roots in a similar driver written by Phil Cockcroft (formerly) at
13 * UCL. This driver is based much more on read/write/poll mode of
14 * operation though.
15 */
16
17 #include "tun.h"
18 #if NTUN > 0
19
20 #include "opt_inet.h"
21 #include "opt_ns.h"
22
23 #include <sys/param.h>
24 #include <sys/proc.h>
25 #include <sys/systm.h>
26 #include <sys/mbuf.h>
27 #include <sys/buf.h>
28 #include <sys/protosw.h>
29 #include <sys/socket.h>
30 #include <sys/ioctl.h>
31 #include <sys/errno.h>
32 #include <sys/syslog.h>
33 #include <sys/select.h>
34 #include <sys/poll.h>
35 #include <sys/file.h>
36 #include <sys/signalvar.h>
37 #include <sys/conf.h>
38
39 #include <machine/cpu.h>
40
41 #include <net/if.h>
42 #include <net/if_ether.h>
43 #include <net/netisr.h>
44 #include <net/route.h>
45
46
47 #ifdef INET
48 #include <netinet/in.h>
49 #include <netinet/in_systm.h>
50 #include <netinet/in_var.h>
51 #include <netinet/ip.h>
52 #include <netinet/if_inarp.h>
53 #endif
54
55 #ifdef NS
56 #include <netns/ns.h>
57 #include <netns/ns_if.h>
58 #endif
59
60 #include "bpfilter.h"
61 #if NBPFILTER > 0
62 #include <sys/time.h>
63 #include <net/bpf.h>
64 #endif
65
66 #include <net/if_tun.h>
67
68 #define TUNDEBUG if (tundebug) printf
69 int tundebug = 0;
70
71 struct tun_softc tunctl[NTUN];
72 extern int ifqmaxlen;
73 void tunattach __P((int));
74
75 int tun_ioctl __P((struct ifnet *, u_long, caddr_t));
76 int tun_output __P((struct ifnet *, struct mbuf *, struct sockaddr *,
77 struct rtentry *rt));
78
79 static void tuninit __P((struct tun_softc *));
80
81 void
82 tunattach(unused)
83 int unused;
84 {
85 int i;
86 struct ifnet *ifp;
87
88 for (i = 0; i < NTUN; i++) {
89 tunctl[i].tun_flags = TUN_INITED;
90
91 ifp = &tunctl[i].tun_if;
92 sprintf(ifp->if_xname, "tun%d", i);
93 ifp->if_softc = &tunctl[i];
94 ifp->if_mtu = TUNMTU;
95 ifp->if_ioctl = tun_ioctl;
96 ifp->if_output = tun_output;
97 ifp->if_flags = IFF_POINTOPOINT;
98 ifp->if_snd.ifq_maxlen = ifqmaxlen;
99 ifp->if_collisions = 0;
100 ifp->if_ierrors = 0;
101 ifp->if_oerrors = 0;
102 ifp->if_ipackets = 0;
103 ifp->if_opackets = 0;
104 ifp->if_dlt = DLT_NULL;
105 if_attach(ifp);
106 #if NBPFILTER > 0
107 bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
108 #endif
109 }
110 }
111
112 /*
113 * tunnel open - must be superuser & the device must be
114 * configured in
115 */
116 int
117 tunopen(dev, flag, mode, p)
118 dev_t dev;
119 int flag, mode;
120 struct proc *p;
121 {
122 struct ifnet *ifp;
123 struct tun_softc *tp;
124 int unit, error;
125
126 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
127 return (error);
128
129 if ((unit = minor(dev)) >= NTUN)
130 return (ENXIO);
131 tp = &tunctl[unit];
132 if (tp->tun_flags & TUN_OPEN)
133 return ENXIO;
134 ifp = &tp->tun_if;
135 tp->tun_flags |= TUN_OPEN;
136 TUNDEBUG("%s: open\n", ifp->if_xname);
137 return (0);
138 }
139
140 /*
141 * tunclose - close the device - mark i/f down & delete
142 * routing info
143 */
144 int
145 tunclose(dev, flag, mode, p)
146 dev_t dev;
147 int flag;
148 int mode;
149 struct proc *p;
150 {
151 int unit = minor(dev), s;
152 struct tun_softc *tp = &tunctl[unit];
153 struct ifnet *ifp = &tp->tun_if;
154 struct mbuf *m;
155
156 tp->tun_flags &= ~TUN_OPEN;
157
158 /*
159 * junk all pending output
160 */
161 do {
162 s = splimp();
163 IF_DEQUEUE(&ifp->if_snd, m);
164 splx(s);
165 if (m)
166 m_freem(m);
167 } while (m);
168
169 if (ifp->if_flags & IFF_UP) {
170 s = splimp();
171 if_down(ifp);
172 if (ifp->if_flags & IFF_RUNNING) {
173 /* find internet addresses and delete routes */
174 struct ifaddr *ifa;
175 for (ifa = ifp->if_addrlist.tqh_first; ifa != 0;
176 ifa = ifa->ifa_list.tqe_next) {
177 #ifdef INET
178 if (ifa->ifa_addr->sa_family == AF_INET) {
179 rtinit(ifa, (int)RTM_DELETE,
180 tp->tun_flags & TUN_DSTADDR
181 ? RTF_HOST
182 : 0);
183 }
184 #endif
185 }
186 }
187 splx(s);
188 }
189 tp->tun_pgrp = 0;
190 selwakeup(&tp->tun_rsel);
191
192 TUNDEBUG ("%s: closed\n", ifp->if_xname);
193 return (0);
194 }
195
196 static void
197 tuninit(tp)
198 struct tun_softc *tp;
199 {
200 struct ifnet *ifp = &tp->tun_if;
201 struct ifaddr *ifa;
202
203 TUNDEBUG("%s: tuninit\n", ifp->if_xname);
204
205 ifp->if_flags |= IFF_UP | IFF_RUNNING;
206
207 tp->tun_flags &= ~(TUN_IASET|TUN_DSTADDR);
208 for (ifa = ifp->if_addrlist.tqh_first; ifa != 0;
209 ifa = ifa->ifa_list.tqe_next) {
210 #ifdef INET
211 if (ifa->ifa_addr->sa_family == AF_INET) {
212 struct sockaddr_in *sin;
213
214 sin = satosin(ifa->ifa_addr);
215 if (sin && sin->sin_addr.s_addr)
216 tp->tun_flags |= TUN_IASET;
217
218 if (ifp->if_flags & IFF_POINTOPOINT) {
219 sin = satosin(ifa->ifa_dstaddr);
220 if (sin && sin->sin_addr.s_addr)
221 tp->tun_flags |= TUN_DSTADDR;
222 }
223 }
224 #endif
225 }
226
227 return;
228 }
229
230 /*
231 * Process an ioctl request.
232 */
233 int
234 tun_ioctl(ifp, cmd, data)
235 struct ifnet *ifp;
236 u_long cmd;
237 caddr_t data;
238 {
239 int error = 0, s;
240
241 s = splimp();
242 switch(cmd) {
243 case SIOCSIFADDR:
244 tuninit((struct tun_softc *)(ifp->if_softc));
245 TUNDEBUG("%s: address set\n", ifp->if_xname);
246 break;
247 case SIOCSIFDSTADDR:
248 tuninit((struct tun_softc *)(ifp->if_softc));
249 TUNDEBUG("%s: destination address set\n", ifp->if_xname);
250 break;
251 case SIOCSIFBRDADDR:
252 TUNDEBUG("%s: broadcast address set\n", ifp->if_xname);
253 break;
254 case SIOCSIFMTU: {
255 struct ifreq *ifr = (struct ifreq *) data;
256 if (ifr->ifr_mtu > TUNMTU || ifr->ifr_mtu < 576) {
257 error = EINVAL;
258 break;
259 }
260 TUNDEBUG("%s: interface mtu set\n", ifp->if_xname);
261 ifp->if_mtu = ifr->ifr_mtu;
262 break;
263 }
264 case SIOCADDMULTI:
265 case SIOCDELMULTI: {
266 struct ifreq *ifr = (struct ifreq *) data;
267 if (ifr == 0) {
268 error = EAFNOSUPPORT; /* XXX */
269 break;
270 }
271 switch (ifr->ifr_addr.sa_family) {
272
273 #ifdef INET
274 case AF_INET:
275 break;
276 #endif
277
278 default:
279 error = EAFNOSUPPORT;
280 break;
281 }
282 break;
283 }
284 case SIOCSIFFLAGS:
285 break;
286 default:
287 error = EINVAL;
288 }
289 splx(s);
290 return (error);
291 }
292
293 /*
294 * tun_output - queue packets from higher level ready to put out.
295 */
296 int
297 tun_output(ifp, m0, dst, rt)
298 struct ifnet *ifp;
299 struct mbuf *m0;
300 struct sockaddr *dst;
301 struct rtentry *rt;
302 {
303 struct tun_softc *tp = ifp->if_softc;
304 struct proc *p;
305 #ifdef INET
306 int s;
307 #endif
308
309 TUNDEBUG ("%s: tun_output\n", ifp->if_xname);
310
311 if ((tp->tun_flags & TUN_READY) != TUN_READY) {
312 TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname,
313 tp->tun_flags);
314 m_freem (m0);
315 return (EHOSTDOWN);
316 }
317
318 #if NBPFILTER > 0
319 if (ifp->if_bpf) {
320 /*
321 * We need to prepend the address family as
322 * a four byte field. Cons up a dummy header
323 * to pacify bpf. This is safe because bpf
324 * will only read from the mbuf (i.e., it won't
325 * try to free it or keep a pointer to it).
326 */
327 struct mbuf m;
328 u_int32_t af = dst->sa_family;
329
330 m.m_next = m0;
331 m.m_len = sizeof(af);
332 m.m_data = (char *)⁡
333
334 bpf_mtap(ifp->if_bpf, &m);
335 }
336 #endif
337
338 switch(dst->sa_family) {
339 #ifdef INET
340 case AF_INET:
341 if (tp->tun_flags & TUN_PREPADDR) {
342 /* Simple link-layer header */
343 M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
344 if (m0 == NULL) {
345 IF_DROP(&ifp->if_snd);
346 return (ENOBUFS);
347 }
348 bcopy(dst, mtod(m0, char *), dst->sa_len);
349 }
350 /* FALLTHROUGH */
351 case AF_UNSPEC:
352 s = splimp();
353 if (IF_QFULL(&ifp->if_snd)) {
354 IF_DROP(&ifp->if_snd);
355 m_freem(m0);
356 splx(s);
357 ifp->if_collisions++;
358 return (ENOBUFS);
359 }
360 IF_ENQUEUE(&ifp->if_snd, m0);
361 splx(s);
362 ifp->if_opackets++;
363 break;
364 #endif
365 default:
366 m_freem(m0);
367 return (EAFNOSUPPORT);
368 }
369
370 if (tp->tun_flags & TUN_RWAIT) {
371 tp->tun_flags &= ~TUN_RWAIT;
372 wakeup((caddr_t)tp);
373 }
374 if (tp->tun_flags & TUN_ASYNC && tp->tun_pgrp) {
375 if (tp->tun_pgrp > 0)
376 gsignal(tp->tun_pgrp, SIGIO);
377 else if ((p = pfind(-tp->tun_pgrp)) != NULL)
378 psignal(p, SIGIO);
379 }
380 selwakeup(&tp->tun_rsel);
381 return (0);
382 }
383
384 /*
385 * the cdevsw interface is now pretty minimal.
386 */
387 int
388 tunioctl(dev, cmd, data, flag, p)
389 dev_t dev;
390 u_long cmd;
391 caddr_t data;
392 int flag;
393 struct proc *p;
394 {
395 int unit = minor(dev), s;
396 struct tun_softc *tp = &tunctl[unit];
397
398 switch (cmd) {
399 case TUNSDEBUG:
400 tundebug = *(int *)data;
401 break;
402
403 case TUNGDEBUG:
404 *(int *)data = tundebug;
405 break;
406
407 case TUNSIFMODE:
408 switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
409 case IFF_POINTOPOINT:
410 case IFF_BROADCAST:
411 s = splimp();
412 if (tp->tun_if.if_flags & IFF_UP) {
413 splx(s);
414 return (EBUSY);
415 }
416 tp->tun_if.if_flags &=
417 ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
418 tp->tun_if.if_flags |= *(int *)data;
419 splx(s);
420 break;
421 default:
422 return (EINVAL);
423 break;
424 }
425 break;
426
427 case TUNSLMODE:
428 if (*(int *)data)
429 tp->tun_flags |= TUN_PREPADDR;
430 else
431 tp->tun_flags &= ~TUN_PREPADDR;
432 break;
433
434 case FIONBIO:
435 if (*(int *)data)
436 tp->tun_flags |= TUN_NBIO;
437 else
438 tp->tun_flags &= ~TUN_NBIO;
439 break;
440
441 case FIOASYNC:
442 if (*(int *)data)
443 tp->tun_flags |= TUN_ASYNC;
444 else
445 tp->tun_flags &= ~TUN_ASYNC;
446 break;
447
448 case FIONREAD:
449 s = splimp();
450 if (tp->tun_if.if_snd.ifq_head)
451 *(int *)data = tp->tun_if.if_snd.ifq_head->m_pkthdr.len;
452 else
453 *(int *)data = 0;
454 splx(s);
455 break;
456
457 case TIOCSPGRP:
458 tp->tun_pgrp = *(int *)data;
459 break;
460
461 case TIOCGPGRP:
462 *(int *)data = tp->tun_pgrp;
463 break;
464
465 default:
466 return (ENOTTY);
467 }
468 return (0);
469 }
470
471 /*
472 * The cdevsw read interface - reads a packet at a time, or at
473 * least as much of a packet as can be read.
474 */
475 int
476 tunread(dev, uio, ioflag)
477 dev_t dev;
478 struct uio *uio;
479 int ioflag;
480 {
481 int unit = minor(dev);
482 struct tun_softc *tp = &tunctl[unit];
483 struct ifnet *ifp = &tp->tun_if;
484 struct mbuf *m, *m0;
485 int error=0, len, s;
486
487 TUNDEBUG ("%s: read\n", ifp->if_xname);
488 if ((tp->tun_flags & TUN_READY) != TUN_READY) {
489 TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname, tp->tun_flags);
490 return EHOSTDOWN;
491 }
492
493 tp->tun_flags &= ~TUN_RWAIT;
494
495 s = splimp();
496 do {
497 IF_DEQUEUE(&ifp->if_snd, m0);
498 if (m0 == 0) {
499 if (tp->tun_flags & TUN_NBIO) {
500 splx(s);
501 return (EWOULDBLOCK);
502 }
503 tp->tun_flags |= TUN_RWAIT;
504 if (tsleep((caddr_t)tp, PZERO|PCATCH, "tunread", 0)) {
505 splx(s);
506 return (EINTR);
507 }
508 }
509 } while (m0 == 0);
510 splx(s);
511
512 while (m0 && uio->uio_resid > 0 && error == 0) {
513 len = min(uio->uio_resid, m0->m_len);
514 if (len == 0)
515 break;
516 error = uiomove(mtod(m0, caddr_t), len, uio);
517 MFREE(m0, m);
518 m0 = m;
519 }
520
521 if (m0) {
522 TUNDEBUG("Dropping mbuf\n");
523 m_freem(m0);
524 }
525 if (error)
526 ifp->if_ierrors++;
527 return (error);
528 }
529
530 /*
531 * the cdevsw write interface - an atomic write is a packet - or else!
532 */
533 int
534 tunwrite(dev, uio, ioflag)
535 dev_t dev;
536 struct uio *uio;
537 int ioflag;
538 {
539 int unit = minor (dev);
540 struct tun_softc *tp = &tunctl[unit];
541 struct ifnet *ifp = &tp->tun_if;
542 struct mbuf *top, **mp, *m;
543 struct ifqueue *ifq;
544 struct sockaddr dst;
545 int isr, error=0, s, tlen, mlen;
546
547 TUNDEBUG("%s: tunwrite\n", ifp->if_xname);
548
549 if (tp->tun_flags & TUN_PREPADDR) {
550 if (uio->uio_resid < sizeof(dst))
551 return (EIO);
552 error = uiomove((caddr_t)&dst, sizeof(dst), uio);
553 if (dst.sa_len > sizeof(dst)) {
554 /* Duh.. */
555 char discard;
556 int n = dst.sa_len - sizeof(dst);
557 while (n--)
558 if ((error = uiomove(&discard, 1, uio)) != 0)
559 return (error);
560 }
561 } else {
562 #ifdef INET
563 dst.sa_family = AF_INET;
564 #endif
565 }
566
567 if (uio->uio_resid < 0 || uio->uio_resid > TUNMTU) {
568 TUNDEBUG("%s: len=%lu!\n", ifp->if_xname,
569 (unsigned long)uio->uio_resid);
570 return (EIO);
571 }
572
573 switch (dst.sa_family) {
574 #ifdef INET
575 case AF_INET:
576 ifq = &ipintrq;
577 isr = NETISR_IP;
578 break;
579 #endif
580 default:
581 return (EAFNOSUPPORT);
582 }
583
584 tlen = uio->uio_resid;
585
586 /* get a header mbuf */
587 MGETHDR(m, M_DONTWAIT, MT_DATA);
588 if (m == NULL)
589 return (ENOBUFS);
590 mlen = MHLEN;
591
592 top = 0;
593 mp = ⊤
594 while (error == 0 && uio->uio_resid > 0) {
595 m->m_len = min(mlen, uio->uio_resid);
596 error = uiomove(mtod (m, caddr_t), m->m_len, uio);
597 *mp = m;
598 mp = &m->m_next;
599 if (uio->uio_resid > 0) {
600 MGET (m, M_DONTWAIT, MT_DATA);
601 if (m == 0) {
602 error = ENOBUFS;
603 break;
604 }
605 mlen = MLEN;
606 }
607 }
608 if (error) {
609 if (top)
610 m_freem (top);
611 ifp->if_ierrors++;
612 return (error);
613 }
614
615 top->m_pkthdr.len = tlen;
616 top->m_pkthdr.rcvif = ifp;
617
618 #if NBPFILTER > 0
619 if (ifp->if_bpf) {
620 /*
621 * We need to prepend the address family as
622 * a four byte field. Cons up a dummy header
623 * to pacify bpf. This is safe because bpf
624 * will only read from the mbuf (i.e., it won't
625 * try to free it or keep a pointer to it).
626 */
627 struct mbuf m;
628 u_int32_t af = AF_INET;
629
630 m.m_next = top;
631 m.m_len = sizeof(af);
632 m.m_data = (char *)⁡
633
634 bpf_mtap(ifp->if_bpf, &m);
635 }
636 #endif
637
638 s = splimp();
639 if (IF_QFULL(ifq)) {
640 IF_DROP(ifq);
641 splx(s);
642 ifp->if_collisions++;
643 m_freem(top);
644 return (ENOBUFS);
645 }
646 IF_ENQUEUE(ifq, top);
647 splx(s);
648 ifp->if_ipackets++;
649 schednetisr(isr);
650 return (error);
651 }
652
653 /*
654 * tunpoll - the poll interface, this is only useful on reads
655 * really. The write detect always returns true, write never blocks
656 * anyway, it either accepts the packet or drops it.
657 */
658 int
659 tunpoll(dev, events, p)
660 dev_t dev;
661 int events;
662 struct proc *p;
663 {
664 int unit = minor(dev), s;
665 struct tun_softc *tp = &tunctl[unit];
666 struct ifnet *ifp = &tp->tun_if;
667 int revents = 0;
668
669 s = splimp();
670 TUNDEBUG("%s: tunpoll\n", ifp->if_xname);
671
672 if (events & (POLLIN | POLLRDNORM)) {
673 if (ifp->if_snd.ifq_len > 0) {
674 TUNDEBUG("%s: tunpoll q=%d\n", ifp->if_xname,
675 ifp->if_snd.ifq_len);
676 revents |= events & (POLLIN | POLLRDNORM);
677 } else {
678 TUNDEBUG("%s: tunpoll waiting\n", ifp->if_xname);
679 selrecord(p, &tp->tun_rsel);
680 }
681 }
682
683 if (events & (POLLOUT | POLLWRNORM))
684 revents |= events & (POLLOUT | POLLWRNORM);
685
686 splx(s);
687 return (revents);
688 }
689
690 #endif /* NTUN */
691