if_tun.c revision 1.33 1 /* $NetBSD: if_tun.c,v 1.33 1998/07/05 00:51:27 jonathan Exp $ */
2
3 /*
4 * Copyright (c) 1988, Julian Onions <jpo (at) cs.nott.ac.uk>
5 * Nottingham University 1987.
6 *
7 * This source may be freely distributed, however I would be interested
8 * in any changes that are made.
9 *
10 * This driver takes packets off the IP i/f and hands them up to a
11 * user process to have its wicked way with. This driver has its
12 * roots in a similar driver written by Phil Cockcroft (formerly) at
13 * UCL. This driver is based much more on read/write/poll mode of
14 * operation though.
15 */
16
17 #include "tun.h"
18 #if NTUN > 0
19
20 #include "opt_inet.h"
21
22 #include <sys/param.h>
23 #include <sys/proc.h>
24 #include <sys/systm.h>
25 #include <sys/mbuf.h>
26 #include <sys/buf.h>
27 #include <sys/protosw.h>
28 #include <sys/socket.h>
29 #include <sys/ioctl.h>
30 #include <sys/errno.h>
31 #include <sys/syslog.h>
32 #include <sys/select.h>
33 #include <sys/poll.h>
34 #include <sys/file.h>
35 #include <sys/signalvar.h>
36 #include <sys/conf.h>
37
38 #include <machine/cpu.h>
39
40 #include <net/if.h>
41 #include <net/if_ether.h>
42 #include <net/netisr.h>
43 #include <net/route.h>
44
45
46 #ifdef INET
47 #include <netinet/in.h>
48 #include <netinet/in_systm.h>
49 #include <netinet/in_var.h>
50 #include <netinet/ip.h>
51 #include <netinet/if_inarp.h>
52 #endif
53
54 #ifdef NS
55 #include <netns/ns.h>
56 #include <netns/ns_if.h>
57 #endif
58
59 #include "bpfilter.h"
60 #if NBPFILTER > 0
61 #include <sys/time.h>
62 #include <net/bpf.h>
63 #endif
64
65 #include <net/if_tun.h>
66
67 #define TUNDEBUG if (tundebug) printf
68 int tundebug = 0;
69
70 struct tun_softc tunctl[NTUN];
71 extern int ifqmaxlen;
72 void tunattach __P((int));
73
74 int tun_ioctl __P((struct ifnet *, u_long, caddr_t));
75 int tun_output __P((struct ifnet *, struct mbuf *, struct sockaddr *,
76 struct rtentry *rt));
77
78 static void tuninit __P((struct tun_softc *));
79
80 void
81 tunattach(unused)
82 int unused;
83 {
84 register int i;
85 struct ifnet *ifp;
86
87 for (i = 0; i < NTUN; i++) {
88 tunctl[i].tun_flags = TUN_INITED;
89
90 ifp = &tunctl[i].tun_if;
91 sprintf(ifp->if_xname, "tun%d", i);
92 ifp->if_softc = &tunctl[i];
93 ifp->if_mtu = TUNMTU;
94 ifp->if_ioctl = tun_ioctl;
95 ifp->if_output = tun_output;
96 ifp->if_flags = IFF_POINTOPOINT;
97 ifp->if_snd.ifq_maxlen = ifqmaxlen;
98 ifp->if_collisions = 0;
99 ifp->if_ierrors = 0;
100 ifp->if_oerrors = 0;
101 ifp->if_ipackets = 0;
102 ifp->if_opackets = 0;
103 if_attach(ifp);
104 #if NBPFILTER > 0
105 bpfattach(&tunctl[i].tun_bpf, ifp, DLT_NULL, sizeof(u_int32_t));
106 #endif
107 }
108 }
109
110 /*
111 * tunnel open - must be superuser & the device must be
112 * configured in
113 */
114 int
115 tunopen(dev, flag, mode, p)
116 dev_t dev;
117 int flag, mode;
118 struct proc *p;
119 {
120 struct ifnet *ifp;
121 struct tun_softc *tp;
122 register int unit, error;
123
124 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
125 return (error);
126
127 if ((unit = minor(dev)) >= NTUN)
128 return (ENXIO);
129 tp = &tunctl[unit];
130 if (tp->tun_flags & TUN_OPEN)
131 return ENXIO;
132 ifp = &tp->tun_if;
133 tp->tun_flags |= TUN_OPEN;
134 TUNDEBUG("%s: open\n", ifp->if_xname);
135 return (0);
136 }
137
138 /*
139 * tunclose - close the device - mark i/f down & delete
140 * routing info
141 */
142 int
143 tunclose(dev, flag, mode, p)
144 dev_t dev;
145 int flag;
146 int mode;
147 struct proc *p;
148 {
149 register int unit = minor(dev), s;
150 struct tun_softc *tp = &tunctl[unit];
151 struct ifnet *ifp = &tp->tun_if;
152 struct mbuf *m;
153
154 tp->tun_flags &= ~TUN_OPEN;
155
156 /*
157 * junk all pending output
158 */
159 do {
160 s = splimp();
161 IF_DEQUEUE(&ifp->if_snd, m);
162 splx(s);
163 if (m)
164 m_freem(m);
165 } while (m);
166
167 if (ifp->if_flags & IFF_UP) {
168 s = splimp();
169 if_down(ifp);
170 if (ifp->if_flags & IFF_RUNNING) {
171 /* find internet addresses and delete routes */
172 register struct ifaddr *ifa;
173 for (ifa = ifp->if_addrlist.tqh_first; ifa != 0;
174 ifa = ifa->ifa_list.tqe_next) {
175 if (ifa->ifa_addr->sa_family == AF_INET) {
176 rtinit(ifa, (int)RTM_DELETE,
177 tp->tun_flags & TUN_DSTADDR
178 ? RTF_HOST
179 : 0);
180 }
181 }
182 }
183 splx(s);
184 }
185 tp->tun_pgrp = 0;
186 selwakeup(&tp->tun_rsel);
187
188 TUNDEBUG ("%s: closed\n", ifp->if_xname);
189 return (0);
190 }
191
192 static void
193 tuninit(tp)
194 struct tun_softc *tp;
195 {
196 struct ifnet *ifp = &tp->tun_if;
197 register struct ifaddr *ifa;
198
199 TUNDEBUG("%s: tuninit\n", ifp->if_xname);
200
201 ifp->if_flags |= IFF_UP | IFF_RUNNING;
202
203 tp->tun_flags &= ~(TUN_IASET|TUN_DSTADDR);
204 for (ifa = ifp->if_addrlist.tqh_first; ifa != 0;
205 ifa = ifa->ifa_list.tqe_next) {
206 if (ifa->ifa_addr->sa_family == AF_INET) {
207 struct sockaddr_in *sin;
208
209 sin = satosin(ifa->ifa_addr);
210 if (sin && sin->sin_addr.s_addr)
211 tp->tun_flags |= TUN_IASET;
212
213 if (ifp->if_flags & IFF_POINTOPOINT) {
214 sin = satosin(ifa->ifa_dstaddr);
215 if (sin && sin->sin_addr.s_addr)
216 tp->tun_flags |= TUN_DSTADDR;
217 }
218 }
219 }
220
221 return;
222 }
223
224 /*
225 * Process an ioctl request.
226 */
227 int
228 tun_ioctl(ifp, cmd, data)
229 struct ifnet *ifp;
230 u_long cmd;
231 caddr_t data;
232 {
233 int error = 0, s;
234
235 s = splimp();
236 switch(cmd) {
237 case SIOCSIFADDR:
238 tuninit((struct tun_softc *)(ifp->if_softc));
239 TUNDEBUG("%s: address set\n", ifp->if_xname);
240 break;
241 case SIOCSIFDSTADDR:
242 tuninit((struct tun_softc *)(ifp->if_softc));
243 TUNDEBUG("%s: destination address set\n", ifp->if_xname);
244 break;
245 case SIOCSIFBRDADDR:
246 TUNDEBUG("%s: broadcast address set\n", ifp->if_xname);
247 break;
248 case SIOCSIFMTU: {
249 struct ifreq *ifr = (struct ifreq *) data;
250 if (ifr->ifr_mtu > TUNMTU || ifr->ifr_mtu < 576) {
251 error = EINVAL;
252 break;
253 }
254 TUNDEBUG("%s: interface mtu set\n", ifp->if_xname);
255 ifp->if_mtu = ifr->ifr_mtu;
256 break;
257 }
258 case SIOCADDMULTI:
259 case SIOCDELMULTI: {
260 struct ifreq *ifr = (struct ifreq *) data;
261 if (ifr == 0) {
262 error = EAFNOSUPPORT; /* XXX */
263 break;
264 }
265 switch (ifr->ifr_addr.sa_family) {
266
267 #ifdef INET
268 case AF_INET:
269 break;
270 #endif
271
272 default:
273 error = EAFNOSUPPORT;
274 break;
275 }
276 break;
277 }
278 default:
279 error = EINVAL;
280 }
281 splx(s);
282 return (error);
283 }
284
285 /*
286 * tun_output - queue packets from higher level ready to put out.
287 */
288 int
289 tun_output(ifp, m0, dst, rt)
290 struct ifnet *ifp;
291 struct mbuf *m0;
292 struct sockaddr *dst;
293 struct rtentry *rt;
294 {
295 struct tun_softc *tp = ifp->if_softc;
296 struct proc *p;
297 int s;
298
299 TUNDEBUG ("%s: tun_output\n", ifp->if_xname);
300
301 if ((tp->tun_flags & TUN_READY) != TUN_READY) {
302 TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname,
303 tp->tun_flags);
304 m_freem (m0);
305 return (EHOSTDOWN);
306 }
307
308 #if NBPFILTER > 0
309 if (tp->tun_bpf) {
310 /*
311 * We need to prepend the address family as
312 * a four byte field. Cons up a dummy header
313 * to pacify bpf. This is safe because bpf
314 * will only read from the mbuf (i.e., it won't
315 * try to free it or keep a pointer to it).
316 */
317 struct mbuf m;
318 u_int32_t af = dst->sa_family;
319
320 m.m_next = m0;
321 m.m_len = sizeof(af);
322 m.m_data = (char *)⁡
323
324 bpf_mtap(tp->tun_bpf, &m);
325 }
326 #endif
327
328 switch(dst->sa_family) {
329 #ifdef INET
330 case AF_INET:
331 if (tp->tun_flags & TUN_PREPADDR) {
332 /* Simple link-layer header */
333 M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
334 if (m0 == NULL) {
335 IF_DROP(&ifp->if_snd);
336 return (ENOBUFS);
337 }
338 bcopy(dst, mtod(m0, char *), dst->sa_len);
339 }
340
341 s = splimp();
342 if (IF_QFULL(&ifp->if_snd)) {
343 IF_DROP(&ifp->if_snd);
344 m_freem(m0);
345 splx(s);
346 ifp->if_collisions++;
347 return (ENOBUFS);
348 }
349 IF_ENQUEUE(&ifp->if_snd, m0);
350 splx(s);
351 ifp->if_opackets++;
352 break;
353 #endif
354 default:
355 m_freem(m0);
356 return (EAFNOSUPPORT);
357 }
358
359 if (tp->tun_flags & TUN_RWAIT) {
360 tp->tun_flags &= ~TUN_RWAIT;
361 wakeup((caddr_t)tp);
362 }
363 if (tp->tun_flags & TUN_ASYNC && tp->tun_pgrp) {
364 if (tp->tun_pgrp > 0)
365 gsignal(tp->tun_pgrp, SIGIO);
366 else if ((p = pfind(-tp->tun_pgrp)) != NULL)
367 psignal(p, SIGIO);
368 }
369 selwakeup(&tp->tun_rsel);
370 return (0);
371 }
372
373 /*
374 * the cdevsw interface is now pretty minimal.
375 */
376 int
377 tunioctl(dev, cmd, data, flag, p)
378 dev_t dev;
379 u_long cmd;
380 caddr_t data;
381 int flag;
382 struct proc *p;
383 {
384 int unit = minor(dev), s;
385 struct tun_softc *tp = &tunctl[unit];
386
387 switch (cmd) {
388 case TUNSDEBUG:
389 tundebug = *(int *)data;
390 break;
391
392 case TUNGDEBUG:
393 *(int *)data = tundebug;
394 break;
395
396 case TUNSIFMODE:
397 switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
398 case IFF_POINTOPOINT:
399 case IFF_BROADCAST:
400 s = splimp();
401 if (tp->tun_if.if_flags & IFF_UP) {
402 splx(s);
403 return (EBUSY);
404 }
405 tp->tun_if.if_flags &=
406 ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
407 tp->tun_if.if_flags |= *(int *)data;
408 splx(s);
409 break;
410 default:
411 return (EINVAL);
412 break;
413 }
414 break;
415
416 case TUNSLMODE:
417 if (*(int *)data)
418 tp->tun_flags |= TUN_PREPADDR;
419 else
420 tp->tun_flags &= ~TUN_PREPADDR;
421 break;
422
423 case FIONBIO:
424 if (*(int *)data)
425 tp->tun_flags |= TUN_NBIO;
426 else
427 tp->tun_flags &= ~TUN_NBIO;
428 break;
429
430 case FIOASYNC:
431 if (*(int *)data)
432 tp->tun_flags |= TUN_ASYNC;
433 else
434 tp->tun_flags &= ~TUN_ASYNC;
435 break;
436
437 case FIONREAD:
438 s = splimp();
439 if (tp->tun_if.if_snd.ifq_head)
440 *(int *)data = tp->tun_if.if_snd.ifq_head->m_pkthdr.len;
441 else
442 *(int *)data = 0;
443 splx(s);
444 break;
445
446 case TIOCSPGRP:
447 tp->tun_pgrp = *(int *)data;
448 break;
449
450 case TIOCGPGRP:
451 *(int *)data = tp->tun_pgrp;
452 break;
453
454 default:
455 return (ENOTTY);
456 }
457 return (0);
458 }
459
460 /*
461 * The cdevsw read interface - reads a packet at a time, or at
462 * least as much of a packet as can be read.
463 */
464 int
465 tunread(dev, uio, ioflag)
466 dev_t dev;
467 struct uio *uio;
468 int ioflag;
469 {
470 int unit = minor(dev);
471 struct tun_softc *tp = &tunctl[unit];
472 struct ifnet *ifp = &tp->tun_if;
473 struct mbuf *m, *m0;
474 int error=0, len, s;
475
476 TUNDEBUG ("%s: read\n", ifp->if_xname);
477 if ((tp->tun_flags & TUN_READY) != TUN_READY) {
478 TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname, tp->tun_flags);
479 return EHOSTDOWN;
480 }
481
482 tp->tun_flags &= ~TUN_RWAIT;
483
484 s = splimp();
485 do {
486 IF_DEQUEUE(&ifp->if_snd, m0);
487 if (m0 == 0) {
488 if (tp->tun_flags & TUN_NBIO) {
489 splx(s);
490 return (EWOULDBLOCK);
491 }
492 tp->tun_flags |= TUN_RWAIT;
493 if (tsleep((caddr_t)tp, PZERO|PCATCH, "tunread", 0)) {
494 splx(s);
495 return (EINTR);
496 }
497 }
498 } while (m0 == 0);
499 splx(s);
500
501 while (m0 && uio->uio_resid > 0 && error == 0) {
502 len = min(uio->uio_resid, m0->m_len);
503 if (len == 0)
504 break;
505 error = uiomove(mtod(m0, caddr_t), len, uio);
506 MFREE(m0, m);
507 m0 = m;
508 }
509
510 if (m0) {
511 TUNDEBUG("Dropping mbuf\n");
512 m_freem(m0);
513 }
514 if (error)
515 ifp->if_ierrors++;
516 return (error);
517 }
518
519 /*
520 * the cdevsw write interface - an atomic write is a packet - or else!
521 */
522 int
523 tunwrite(dev, uio, ioflag)
524 dev_t dev;
525 struct uio *uio;
526 int ioflag;
527 {
528 int unit = minor (dev);
529 struct tun_softc *tp = &tunctl[unit];
530 struct ifnet *ifp = &tp->tun_if;
531 struct mbuf *top, **mp, *m;
532 struct ifqueue *ifq;
533 struct sockaddr dst;
534 int isr, error=0, s, tlen, mlen;
535
536 TUNDEBUG("%s: tunwrite\n", ifp->if_xname);
537
538 if (tp->tun_flags & TUN_PREPADDR) {
539 if (uio->uio_resid < sizeof(dst))
540 return (EIO);
541 error = uiomove((caddr_t)&dst, sizeof(dst), uio);
542 if (dst.sa_len > sizeof(dst)) {
543 /* Duh.. */
544 char discard;
545 int n = dst.sa_len - sizeof(dst);
546 while (n--)
547 if ((error = uiomove(&discard, 1, uio)) != 0)
548 return (error);
549 }
550 } else {
551 #ifdef INET
552 dst.sa_family = AF_INET;
553 #endif
554 }
555
556 if (uio->uio_resid < 0 || uio->uio_resid > TUNMTU) {
557 TUNDEBUG("%s: len=%d!\n", ifp->if_xname, uio->uio_resid);
558 return (EIO);
559 }
560
561 switch (dst.sa_family) {
562 #ifdef INET
563 case AF_INET:
564 ifq = &ipintrq;
565 isr = NETISR_IP;
566 break;
567 #endif
568 default:
569 return (EAFNOSUPPORT);
570 }
571
572 tlen = uio->uio_resid;
573
574 /* get a header mbuf */
575 MGETHDR(m, M_DONTWAIT, MT_DATA);
576 if (m == NULL)
577 return (ENOBUFS);
578 mlen = MHLEN;
579
580 top = 0;
581 mp = ⊤
582 while (error == 0 && uio->uio_resid > 0) {
583 m->m_len = min(mlen, uio->uio_resid);
584 error = uiomove(mtod (m, caddr_t), m->m_len, uio);
585 *mp = m;
586 mp = &m->m_next;
587 if (uio->uio_resid > 0) {
588 MGET (m, M_DONTWAIT, MT_DATA);
589 if (m == 0) {
590 error = ENOBUFS;
591 break;
592 }
593 mlen = MLEN;
594 }
595 }
596 if (error) {
597 if (top)
598 m_freem (top);
599 ifp->if_ierrors++;
600 return (error);
601 }
602
603 top->m_pkthdr.len = tlen;
604 top->m_pkthdr.rcvif = ifp;
605
606 #if NBPFILTER > 0
607 if (tp->tun_bpf) {
608 /*
609 * We need to prepend the address family as
610 * a four byte field. Cons up a dummy header
611 * to pacify bpf. This is safe because bpf
612 * will only read from the mbuf (i.e., it won't
613 * try to free it or keep a pointer to it).
614 */
615 struct mbuf m;
616 u_int32_t af = AF_INET;
617
618 m.m_next = top;
619 m.m_len = sizeof(af);
620 m.m_data = (char *)⁡
621
622 bpf_mtap(tp->tun_bpf, &m);
623 }
624 #endif
625
626 s = splimp();
627 if (IF_QFULL(ifq)) {
628 IF_DROP(ifq);
629 splx(s);
630 ifp->if_collisions++;
631 m_freem(top);
632 return (ENOBUFS);
633 }
634 IF_ENQUEUE(ifq, top);
635 splx(s);
636 ifp->if_ipackets++;
637 schednetisr(isr);
638 return (error);
639 }
640
641 /*
642 * tunpoll - the poll interface, this is only useful on reads
643 * really. The write detect always returns true, write never blocks
644 * anyway, it either accepts the packet or drops it.
645 */
646 int
647 tunpoll(dev, events, p)
648 dev_t dev;
649 int events;
650 struct proc *p;
651 {
652 int unit = minor(dev), s;
653 struct tun_softc *tp = &tunctl[unit];
654 struct ifnet *ifp = &tp->tun_if;
655 int revents = 0;
656
657 s = splimp();
658 TUNDEBUG("%s: tunpoll\n", ifp->if_xname);
659
660 if (events & (POLLIN | POLLRDNORM))
661 if (ifp->if_snd.ifq_len > 0) {
662 TUNDEBUG("%s: tunpoll q=%d\n", ifp->if_xname,
663 ifp->if_snd.ifq_len);
664 revents |= events & (POLLIN | POLLRDNORM);
665 } else {
666 TUNDEBUG("%s: tunpoll waiting\n", ifp->if_xname);
667 selrecord(p, &tp->tun_rsel);
668 }
669
670 if (events & (POLLOUT | POLLWRNORM))
671 revents |= events & (POLLOUT | POLLWRNORM);
672
673 splx(s);
674 return (revents);
675 }
676
677 #endif /* NTUN */
678