if_tun.c revision 1.45.2.2 1 /* $NetBSD: if_tun.c,v 1.45.2.2 2001/09/26 15:28:25 fvdl Exp $ */
2
3 /*
4 * Copyright (c) 1988, Julian Onions <jpo (at) cs.nott.ac.uk>
5 * Nottingham University 1987.
6 *
7 * This source may be freely distributed, however I would be interested
8 * in any changes that are made.
9 *
10 * This driver takes packets off the IP i/f and hands them up to a
11 * user process to have its wicked way with. This driver has its
12 * roots in a similar driver written by Phil Cockcroft (formerly) at
13 * UCL. This driver is based much more on read/write/poll mode of
14 * operation though.
15 */
16
17 #include "tun.h"
18 #if NTUN > 0
19
20 #include "opt_inet.h"
21 #include "opt_ns.h"
22
23 #include <sys/param.h>
24 #include <sys/proc.h>
25 #include <sys/systm.h>
26 #include <sys/mbuf.h>
27 #include <sys/buf.h>
28 #include <sys/protosw.h>
29 #include <sys/socket.h>
30 #include <sys/ioctl.h>
31 #include <sys/errno.h>
32 #include <sys/syslog.h>
33 #include <sys/select.h>
34 #include <sys/poll.h>
35 #include <sys/file.h>
36 #include <sys/signalvar.h>
37 #include <sys/conf.h>
38 #include <sys/vnode.h>
39
40 #include <miscfs/specfs/specdev.h>
41
42 #include <machine/cpu.h>
43
44 #include <net/if.h>
45 #include <net/if_ether.h>
46 #include <net/netisr.h>
47 #include <net/route.h>
48
49
50 #ifdef INET
51 #include <netinet/in.h>
52 #include <netinet/in_systm.h>
53 #include <netinet/in_var.h>
54 #include <netinet/ip.h>
55 #include <netinet/if_inarp.h>
56 #endif
57
58 #ifdef NS
59 #include <netns/ns.h>
60 #include <netns/ns_if.h>
61 #endif
62
63 #include "bpfilter.h"
64 #if NBPFILTER > 0
65 #include <sys/time.h>
66 #include <net/bpf.h>
67 #endif
68
69 #include <net/if_tun.h>
70
71 #define TUNDEBUG if (tundebug) printf
72 int tundebug = 0;
73
74 struct tun_softc tunctl[NTUN];
75 extern int ifqmaxlen;
76 void tunattach __P((int));
77
78 int tun_ioctl __P((struct ifnet *, u_long, caddr_t));
79 int tun_output __P((struct ifnet *, struct mbuf *, struct sockaddr *,
80 struct rtentry *rt));
81
82 static void tuninit __P((struct tun_softc *));
83
84 void
85 tunattach(unused)
86 int unused;
87 {
88 int i;
89 struct ifnet *ifp;
90
91 for (i = 0; i < NTUN; i++) {
92 tunctl[i].tun_flags = TUN_INITED;
93
94 ifp = &tunctl[i].tun_if;
95 sprintf(ifp->if_xname, "tun%d", i);
96 ifp->if_softc = &tunctl[i];
97 ifp->if_mtu = TUNMTU;
98 ifp->if_ioctl = tun_ioctl;
99 ifp->if_output = tun_output;
100 ifp->if_flags = IFF_POINTOPOINT;
101 ifp->if_snd.ifq_maxlen = ifqmaxlen;
102 ifp->if_collisions = 0;
103 ifp->if_ierrors = 0;
104 ifp->if_oerrors = 0;
105 ifp->if_ipackets = 0;
106 ifp->if_opackets = 0;
107 ifp->if_dlt = DLT_NULL;
108 if_attach(ifp);
109 if_alloc_sadl(ifp);
110 #if NBPFILTER > 0
111 bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
112 #endif
113 }
114 }
115
116 /*
117 * tunnel open - must be superuser & the device must be
118 * configured in
119 */
120 int
121 tunopen(devvp, flag, mode, p)
122 struct vnode *devvp;
123 int flag, mode;
124 struct proc *p;
125 {
126 struct ifnet *ifp;
127 struct tun_softc *tp;
128 int unit, error;
129
130 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
131 return (error);
132
133 if ((unit = minor(vdev_rdev(devvp))) >= NTUN)
134 return (ENXIO);
135 tp = &tunctl[unit];
136
137 vdev_setprivdata(devvp, tp);
138
139 if (tp->tun_flags & TUN_OPEN)
140 return ENXIO;
141 ifp = &tp->tun_if;
142 tp->tun_flags |= TUN_OPEN;
143 TUNDEBUG("%s: open\n", ifp->if_xname);
144 return (0);
145 }
146
147 /*
148 * tunclose - close the device - mark i/f down & delete
149 * routing info
150 */
151 int
152 tunclose(devvp, flag, mode, p)
153 struct vnode *devvp;
154 int flag;
155 int mode;
156 struct proc *p;
157 {
158 struct tun_softc *tp;
159 struct ifnet *ifp;
160 struct mbuf *m;
161 int s;
162
163 tp = vdev_privdata(devvp);
164 ifp = &tp->tun_if;
165
166 tp->tun_flags &= ~TUN_OPEN;
167
168 /*
169 * junk all pending output
170 */
171 do {
172 s = splnet();
173 IF_DEQUEUE(&ifp->if_snd, m);
174 splx(s);
175 if (m)
176 m_freem(m);
177 } while (m);
178
179 if (ifp->if_flags & IFF_UP) {
180 s = splnet();
181 if_down(ifp);
182 if (ifp->if_flags & IFF_RUNNING) {
183 /* find internet addresses and delete routes */
184 struct ifaddr *ifa;
185 for (ifa = ifp->if_addrlist.tqh_first; ifa != 0;
186 ifa = ifa->ifa_list.tqe_next) {
187 #ifdef INET
188 if (ifa->ifa_addr->sa_family == AF_INET) {
189 rtinit(ifa, (int)RTM_DELETE,
190 tp->tun_flags & TUN_DSTADDR
191 ? RTF_HOST
192 : 0);
193 }
194 #endif
195 }
196 }
197 splx(s);
198 }
199 tp->tun_pgrp = 0;
200 selwakeup(&tp->tun_rsel);
201
202 TUNDEBUG ("%s: closed\n", ifp->if_xname);
203 return (0);
204 }
205
206 static void
207 tuninit(tp)
208 struct tun_softc *tp;
209 {
210 struct ifnet *ifp = &tp->tun_if;
211 struct ifaddr *ifa;
212
213 TUNDEBUG("%s: tuninit\n", ifp->if_xname);
214
215 ifp->if_flags |= IFF_UP | IFF_RUNNING;
216
217 tp->tun_flags &= ~(TUN_IASET|TUN_DSTADDR);
218 for (ifa = ifp->if_addrlist.tqh_first; ifa != 0;
219 ifa = ifa->ifa_list.tqe_next) {
220 #ifdef INET
221 if (ifa->ifa_addr->sa_family == AF_INET) {
222 struct sockaddr_in *sin;
223
224 sin = satosin(ifa->ifa_addr);
225 if (sin && sin->sin_addr.s_addr)
226 tp->tun_flags |= TUN_IASET;
227
228 if (ifp->if_flags & IFF_POINTOPOINT) {
229 sin = satosin(ifa->ifa_dstaddr);
230 if (sin && sin->sin_addr.s_addr)
231 tp->tun_flags |= TUN_DSTADDR;
232 }
233 }
234 #endif
235 }
236
237 return;
238 }
239
240 /*
241 * Process an ioctl request.
242 */
243 int
244 tun_ioctl(ifp, cmd, data)
245 struct ifnet *ifp;
246 u_long cmd;
247 caddr_t data;
248 {
249 int error = 0, s;
250
251 s = splnet();
252 switch(cmd) {
253 case SIOCSIFADDR:
254 tuninit((struct tun_softc *)(ifp->if_softc));
255 TUNDEBUG("%s: address set\n", ifp->if_xname);
256 break;
257 case SIOCSIFDSTADDR:
258 tuninit((struct tun_softc *)(ifp->if_softc));
259 TUNDEBUG("%s: destination address set\n", ifp->if_xname);
260 break;
261 case SIOCSIFBRDADDR:
262 TUNDEBUG("%s: broadcast address set\n", ifp->if_xname);
263 break;
264 case SIOCSIFMTU: {
265 struct ifreq *ifr = (struct ifreq *) data;
266 if (ifr->ifr_mtu > TUNMTU || ifr->ifr_mtu < 576) {
267 error = EINVAL;
268 break;
269 }
270 TUNDEBUG("%s: interface mtu set\n", ifp->if_xname);
271 ifp->if_mtu = ifr->ifr_mtu;
272 break;
273 }
274 case SIOCADDMULTI:
275 case SIOCDELMULTI: {
276 struct ifreq *ifr = (struct ifreq *) data;
277 if (ifr == 0) {
278 error = EAFNOSUPPORT; /* XXX */
279 break;
280 }
281 switch (ifr->ifr_addr.sa_family) {
282
283 #ifdef INET
284 case AF_INET:
285 break;
286 #endif
287
288 default:
289 error = EAFNOSUPPORT;
290 break;
291 }
292 break;
293 }
294 case SIOCSIFFLAGS:
295 break;
296 default:
297 error = EINVAL;
298 }
299 splx(s);
300 return (error);
301 }
302
303 /*
304 * tun_output - queue packets from higher level ready to put out.
305 */
306 int
307 tun_output(ifp, m0, dst, rt)
308 struct ifnet *ifp;
309 struct mbuf *m0;
310 struct sockaddr *dst;
311 struct rtentry *rt;
312 {
313 struct tun_softc *tp = ifp->if_softc;
314 struct proc *p;
315 #ifdef INET
316 int s;
317 #endif
318
319 TUNDEBUG ("%s: tun_output\n", ifp->if_xname);
320
321 if ((tp->tun_flags & TUN_READY) != TUN_READY) {
322 TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname,
323 tp->tun_flags);
324 m_freem (m0);
325 return (EHOSTDOWN);
326 }
327
328 #if NBPFILTER > 0
329 if (ifp->if_bpf) {
330 /*
331 * We need to prepend the address family as
332 * a four byte field. Cons up a dummy header
333 * to pacify bpf. This is safe because bpf
334 * will only read from the mbuf (i.e., it won't
335 * try to free it or keep a pointer to it).
336 */
337 struct mbuf m;
338 u_int32_t af = dst->sa_family;
339
340 m.m_next = m0;
341 m.m_len = sizeof(af);
342 m.m_data = (char *)⁡
343
344 bpf_mtap(ifp->if_bpf, &m);
345 }
346 #endif
347
348 switch(dst->sa_family) {
349 #ifdef INET
350 case AF_INET:
351 if (tp->tun_flags & TUN_PREPADDR) {
352 /* Simple link-layer header */
353 M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
354 if (m0 == NULL) {
355 IF_DROP(&ifp->if_snd);
356 return (ENOBUFS);
357 }
358 bcopy(dst, mtod(m0, char *), dst->sa_len);
359 }
360 /* FALLTHROUGH */
361 case AF_UNSPEC:
362 s = splnet();
363 if (IF_QFULL(&ifp->if_snd)) {
364 IF_DROP(&ifp->if_snd);
365 m_freem(m0);
366 splx(s);
367 ifp->if_collisions++;
368 return (ENOBUFS);
369 }
370 IF_ENQUEUE(&ifp->if_snd, m0);
371 splx(s);
372 ifp->if_opackets++;
373 break;
374 #endif
375 default:
376 m_freem(m0);
377 return (EAFNOSUPPORT);
378 }
379
380 if (tp->tun_flags & TUN_RWAIT) {
381 tp->tun_flags &= ~TUN_RWAIT;
382 wakeup((caddr_t)tp);
383 }
384 if (tp->tun_flags & TUN_ASYNC && tp->tun_pgrp) {
385 if (tp->tun_pgrp > 0)
386 gsignal(tp->tun_pgrp, SIGIO);
387 else if ((p = pfind(-tp->tun_pgrp)) != NULL)
388 psignal(p, SIGIO);
389 }
390 selwakeup(&tp->tun_rsel);
391 return (0);
392 }
393
394 /*
395 * the cdevsw interface is now pretty minimal.
396 */
397 int
398 tunioctl(devvp, cmd, data, flag, p)
399 struct vnode *devvp;
400 u_long cmd;
401 caddr_t data;
402 int flag;
403 struct proc *p;
404 {
405 struct tun_softc *tp;
406 int s;
407
408 tp = vdev_privdata(devvp);
409
410 switch (cmd) {
411 case TUNSDEBUG:
412 tundebug = *(int *)data;
413 break;
414
415 case TUNGDEBUG:
416 *(int *)data = tundebug;
417 break;
418
419 case TUNSIFMODE:
420 switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
421 case IFF_POINTOPOINT:
422 case IFF_BROADCAST:
423 s = splnet();
424 if (tp->tun_if.if_flags & IFF_UP) {
425 splx(s);
426 return (EBUSY);
427 }
428 tp->tun_if.if_flags &=
429 ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
430 tp->tun_if.if_flags |= *(int *)data;
431 splx(s);
432 break;
433 default:
434 return (EINVAL);
435 break;
436 }
437 break;
438
439 case TUNSLMODE:
440 if (*(int *)data)
441 tp->tun_flags |= TUN_PREPADDR;
442 else
443 tp->tun_flags &= ~TUN_PREPADDR;
444 break;
445
446 case FIONBIO:
447 if (*(int *)data)
448 tp->tun_flags |= TUN_NBIO;
449 else
450 tp->tun_flags &= ~TUN_NBIO;
451 break;
452
453 case FIOASYNC:
454 if (*(int *)data)
455 tp->tun_flags |= TUN_ASYNC;
456 else
457 tp->tun_flags &= ~TUN_ASYNC;
458 break;
459
460 case FIONREAD:
461 s = splnet();
462 if (tp->tun_if.if_snd.ifq_head)
463 *(int *)data = tp->tun_if.if_snd.ifq_head->m_pkthdr.len;
464 else
465 *(int *)data = 0;
466 splx(s);
467 break;
468
469 case TIOCSPGRP:
470 tp->tun_pgrp = *(int *)data;
471 break;
472
473 case TIOCGPGRP:
474 *(int *)data = tp->tun_pgrp;
475 break;
476
477 default:
478 return (ENOTTY);
479 }
480 return (0);
481 }
482
483 /*
484 * The cdevsw read interface - reads a packet at a time, or at
485 * least as much of a packet as can be read.
486 */
487 int
488 tunread(devvp, uio, ioflag)
489 struct vnode *devvp;
490 struct uio *uio;
491 int ioflag;
492 {
493 struct tun_softc *tp;
494 struct ifnet *ifp;
495 struct mbuf *m, *m0;
496 int error, len, s;
497
498 tp = vdev_privdata(devvp);
499 ifp = &tp->tun_if;
500 error = 0;
501
502 TUNDEBUG ("%s: read\n", ifp->if_xname);
503 if ((tp->tun_flags & TUN_READY) != TUN_READY) {
504 TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname, tp->tun_flags);
505 return EHOSTDOWN;
506 }
507
508 tp->tun_flags &= ~TUN_RWAIT;
509
510 s = splnet();
511 do {
512 IF_DEQUEUE(&ifp->if_snd, m0);
513 if (m0 == 0) {
514 if (tp->tun_flags & TUN_NBIO) {
515 splx(s);
516 return (EWOULDBLOCK);
517 }
518 tp->tun_flags |= TUN_RWAIT;
519 if (tsleep((caddr_t)tp, PZERO|PCATCH, "tunread", 0)) {
520 splx(s);
521 return (EINTR);
522 }
523 }
524 } while (m0 == 0);
525 splx(s);
526
527 while (m0 && uio->uio_resid > 0 && error == 0) {
528 len = min(uio->uio_resid, m0->m_len);
529 if (len != 0)
530 error = uiomove(mtod(m0, caddr_t), len, uio);
531 MFREE(m0, m);
532 m0 = m;
533 }
534
535 if (m0) {
536 TUNDEBUG("Dropping mbuf\n");
537 m_freem(m0);
538 }
539 if (error)
540 ifp->if_ierrors++;
541 return (error);
542 }
543
544 /*
545 * the cdevsw write interface - an atomic write is a packet - or else!
546 */
547 int
548 tunwrite(devvp, uio, ioflag)
549 struct vnode *devvp;
550 struct uio *uio;
551 int ioflag;
552 {
553 struct tun_softc *tp;
554 struct ifnet *ifp;
555 struct mbuf *top, **mp, *m;
556 struct ifqueue *ifq;
557 struct sockaddr dst;
558 int isr, error, s, tlen, mlen;
559
560 tp = vdev_privdata(devvp);
561 ifp = &tp->tun_if;
562 error = 0;
563
564 TUNDEBUG("%s: tunwrite\n", ifp->if_xname);
565
566 if (tp->tun_flags & TUN_PREPADDR) {
567 if (uio->uio_resid < sizeof(dst))
568 return (EIO);
569 error = uiomove((caddr_t)&dst, sizeof(dst), uio);
570 if (dst.sa_len > sizeof(dst)) {
571 /* Duh.. */
572 char discard;
573 int n = dst.sa_len - sizeof(dst);
574 while (n--)
575 if ((error = uiomove(&discard, 1, uio)) != 0)
576 return (error);
577 }
578 } else {
579 #ifdef INET
580 dst.sa_family = AF_INET;
581 #endif
582 }
583
584 if (uio->uio_resid < 0 || uio->uio_resid > TUNMTU) {
585 TUNDEBUG("%s: len=%lu!\n", ifp->if_xname,
586 (unsigned long)uio->uio_resid);
587 return (EIO);
588 }
589
590 switch (dst.sa_family) {
591 #ifdef INET
592 case AF_INET:
593 ifq = &ipintrq;
594 isr = NETISR_IP;
595 break;
596 #endif
597 default:
598 return (EAFNOSUPPORT);
599 }
600
601 tlen = uio->uio_resid;
602
603 /* get a header mbuf */
604 MGETHDR(m, M_DONTWAIT, MT_DATA);
605 if (m == NULL)
606 return (ENOBUFS);
607 mlen = MHLEN;
608
609 top = 0;
610 mp = ⊤
611 while (error == 0 && uio->uio_resid > 0) {
612 m->m_len = min(mlen, uio->uio_resid);
613 error = uiomove(mtod (m, caddr_t), m->m_len, uio);
614 *mp = m;
615 mp = &m->m_next;
616 if (uio->uio_resid > 0) {
617 MGET (m, M_DONTWAIT, MT_DATA);
618 if (m == 0) {
619 error = ENOBUFS;
620 break;
621 }
622 mlen = MLEN;
623 }
624 }
625 if (error) {
626 if (top)
627 m_freem (top);
628 ifp->if_ierrors++;
629 return (error);
630 }
631
632 top->m_pkthdr.len = tlen;
633 top->m_pkthdr.rcvif = ifp;
634
635 #if NBPFILTER > 0
636 if (ifp->if_bpf) {
637 /*
638 * We need to prepend the address family as
639 * a four byte field. Cons up a dummy header
640 * to pacify bpf. This is safe because bpf
641 * will only read from the mbuf (i.e., it won't
642 * try to free it or keep a pointer to it).
643 */
644 struct mbuf m;
645 u_int32_t af = AF_INET;
646
647 m.m_next = top;
648 m.m_len = sizeof(af);
649 m.m_data = (char *)⁡
650
651 bpf_mtap(ifp->if_bpf, &m);
652 }
653 #endif
654
655 s = splnet();
656 if (IF_QFULL(ifq)) {
657 IF_DROP(ifq);
658 splx(s);
659 ifp->if_collisions++;
660 m_freem(top);
661 return (ENOBUFS);
662 }
663 IF_ENQUEUE(ifq, top);
664 splx(s);
665 ifp->if_ipackets++;
666 schednetisr(isr);
667 return (error);
668 }
669
670 /*
671 * tunpoll - the poll interface, this is only useful on reads
672 * really. The write detect always returns true, write never blocks
673 * anyway, it either accepts the packet or drops it.
674 */
675 int
676 tunpoll(devvp, events, p)
677 struct vnode *devvp;
678 int events;
679 struct proc *p;
680 {
681 struct tun_softc *tp;
682 struct ifnet *ifp;
683 int s, revents;
684
685 tp = vdev_privdata(devvp);
686 ifp = &tp->tun_if;
687 revents = 0;
688
689 s = splnet();
690 TUNDEBUG("%s: tunpoll\n", ifp->if_xname);
691
692 if (events & (POLLIN | POLLRDNORM)) {
693 if (ifp->if_snd.ifq_len > 0) {
694 TUNDEBUG("%s: tunpoll q=%d\n", ifp->if_xname,
695 ifp->if_snd.ifq_len);
696 revents |= events & (POLLIN | POLLRDNORM);
697 } else {
698 TUNDEBUG("%s: tunpoll waiting\n", ifp->if_xname);
699 selrecord(p, &tp->tun_rsel);
700 }
701 }
702
703 if (events & (POLLOUT | POLLWRNORM))
704 revents |= events & (POLLOUT | POLLWRNORM);
705
706 splx(s);
707 return (revents);
708 }
709
710 #endif /* NTUN */
711