Home | History | Annotate | Line # | Download | only in dist
pcap-netmap.c revision 1.1.1.4
      1 /*
      2  * Copyright (C) 2014 Luigi Rizzo. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  *
      8  *   1. Redistributions of source code must retain the above copyright
      9  *      notice, this list of conditions and the following disclaimer.
     10  *   2. Redistributions in binary form must reproduce the above copyright
     11  *      notice, this list of conditions and the following disclaimer in the
     12  *      documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''AND
     15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     17  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     24  * SUCH DAMAGE.
     25  */
     26 
     27 #include <config.h>
     28 
     29 #include <poll.h>
     30 #include <errno.h>
     31 #include <netdb.h>
     32 #include <stdio.h>
     33 #include <stdlib.h>
     34 #include <string.h>
     35 #include <unistd.h>
     36 
     37 #define NETMAP_WITH_LIBS
     38 #include <net/netmap_user.h>
     39 
     40 #include "pcap-int.h"
     41 #include "pcap-netmap.h"
     42 
     43 #ifndef __FreeBSD__
     44   /*
     45    * On FreeBSD we use IFF_PPROMISC which is in ifr_flagshigh.
     46    * Remap to IFF_PROMISC on other platforms.
     47    *
     48    * XXX - DragonFly BSD?
     49    */
     50   #define IFF_PPROMISC	IFF_PROMISC
     51 #endif /* __FreeBSD__ */
     52 
     53 struct pcap_netmap {
     54 	struct nm_desc *d;	/* pointer returned by nm_open() */
     55 	pcap_handler cb;	/* callback and argument */
     56 	u_char *cb_arg;
     57 	int must_clear_promisc;	/* flag */
     58 	uint64_t rx_pkts;	/* # of pkts received before the filter */
     59 };
     60 
     61 
     62 static int
     63 pcap_netmap_stats(pcap_t *p, struct pcap_stat *ps)
     64 {
     65 	struct pcap_netmap *pn = p->priv;
     66 
     67 	ps->ps_recv = (u_int)pn->rx_pkts;
     68 	ps->ps_drop = 0;
     69 	ps->ps_ifdrop = 0;
     70 	return 0;
     71 }
     72 
     73 
     74 static void
     75 pcap_netmap_filter(u_char *arg, struct pcap_pkthdr *h, const u_char *buf)
     76 {
     77 	pcap_t *p = (pcap_t *)arg;
     78 	struct pcap_netmap *pn = p->priv;
     79 	const struct bpf_insn *pc = p->fcode.bf_insns;
     80 
     81 	++pn->rx_pkts;
     82 	if (pc == NULL || pcapint_filter(pc, buf, h->len, h->caplen))
     83 		pn->cb(pn->cb_arg, h, buf);
     84 }
     85 
     86 
     87 static int
     88 pcap_netmap_dispatch(pcap_t *p, int cnt, pcap_handler cb, u_char *user)
     89 {
     90 	int ret;
     91 	struct pcap_netmap *pn = p->priv;
     92 	struct nm_desc *d = pn->d;
     93 	struct pollfd pfd = { .fd = p->fd, .events = POLLIN, .revents = 0 };
     94 
     95 	pn->cb = cb;
     96 	pn->cb_arg = user;
     97 
     98 	for (;;) {
     99 		if (p->break_loop) {
    100 			p->break_loop = 0;
    101 			return PCAP_ERROR_BREAK;
    102 		}
    103 		/* nm_dispatch won't run forever */
    104 
    105 		ret = nm_dispatch((void *)d, cnt, (void *)pcap_netmap_filter, (void *)p);
    106 		if (ret != 0)
    107 			break;
    108 		errno = 0;
    109 		ret = poll(&pfd, 1, p->opt.timeout);
    110 	}
    111 	return ret;
    112 }
    113 
    114 
    115 /* XXX need to check the NIOCTXSYNC/poll */
    116 static int
    117 pcap_netmap_inject(pcap_t *p, const void *buf, int size)
    118 {
    119 	struct pcap_netmap *pn = p->priv;
    120 	struct nm_desc *d = pn->d;
    121 
    122 	return nm_inject(d, buf, size);
    123 }
    124 
    125 
    126 static int
    127 pcap_netmap_ioctl(pcap_t *p, u_long what, uint32_t *if_flags)
    128 {
    129 	struct pcap_netmap *pn = p->priv;
    130 	struct nm_desc *d = pn->d;
    131 	struct ifreq ifr;
    132 	int error, fd = d->fd;
    133 
    134 #ifdef __linux__
    135 	fd = socket(AF_INET, SOCK_DGRAM, 0);
    136 	if (fd < 0) {
    137 		fprintf(stderr, "Error: cannot get device control socket.\n");
    138 		return -1;
    139 	}
    140 #endif /* __linux__ */
    141 	bzero(&ifr, sizeof(ifr));
    142 	strncpy(ifr.ifr_name, d->req.nr_name, sizeof(ifr.ifr_name));
    143 	switch (what) {
    144 	case SIOCSIFFLAGS:
    145 		/*
    146 		 * The flags we pass in are 32-bit and unsigned.
    147 		 *
    148 		 * On most if not all UN*Xes, ifr_flags is 16-bit and
    149 		 * signed, and the result of assigning a longer
    150 		 * unsigned value to a shorter signed value is
    151 		 * implementation-defined (even if, in practice, it'll
    152 		 * do what's intended on all platforms we support
    153 		 * result of assigning a 32-bit unsigned value).
    154 		 * So we mask out the upper 16 bits.
    155 		 */
    156 		ifr.ifr_flags = *if_flags & 0xffff;
    157 #ifdef __FreeBSD__
    158 		/*
    159 		 * In FreeBSD, we need to set the high-order flags,
    160 		 * as we're using IFF_PPROMISC, which is in those bits.
    161 		 *
    162 		 * XXX - DragonFly BSD?
    163 		 */
    164 		ifr.ifr_flagshigh = *if_flags >> 16;
    165 #endif /* __FreeBSD__ */
    166 		break;
    167 	}
    168 	error = ioctl(fd, what, &ifr);
    169 	if (!error) {
    170 		switch (what) {
    171 		case SIOCGIFFLAGS:
    172 			/*
    173 			 * The flags we return are 32-bit.
    174 			 *
    175 			 * On most if not all UN*Xes, ifr_flags is
    176 			 * 16-bit and signed, and will get sign-
    177 			 * extended, so that the upper 16 bits of
    178 			 * those flags will be forced on.  So we
    179 			 * mask out the upper 16 bits of the
    180 			 * sign-extended value.
    181 			 */
    182 			*if_flags = ifr.ifr_flags & 0xffff;
    183 #ifdef __FreeBSD__
    184 			/*
    185 			 * In FreeBSD, we need to return the
    186 			 * high-order flags, as we're using
    187 			 * IFF_PPROMISC, which is in those bits.
    188 			 *
    189 			 * XXX - DragonFly BSD?
    190 			 */
    191 			*if_flags |= (ifr.ifr_flagshigh << 16);
    192 #endif /* __FreeBSD__ */
    193 		}
    194 	}
    195 #ifdef __linux__
    196 	close(fd);
    197 #endif /* __linux__ */
    198 	return error ? -1 : 0;
    199 }
    200 
    201 
    202 static void
    203 pcap_netmap_close(pcap_t *p)
    204 {
    205 	struct pcap_netmap *pn = p->priv;
    206 	struct nm_desc *d = pn->d;
    207 	uint32_t if_flags = 0;
    208 
    209 	if (pn->must_clear_promisc) {
    210 		pcap_netmap_ioctl(p, SIOCGIFFLAGS, &if_flags); /* fetch flags */
    211 		if (if_flags & IFF_PPROMISC) {
    212 			if_flags &= ~IFF_PPROMISC;
    213 			pcap_netmap_ioctl(p, SIOCSIFFLAGS, &if_flags);
    214 		}
    215 	}
    216 	nm_close(d);
    217 	pcapint_cleanup_live_common(p);
    218 }
    219 
    220 
    221 static int
    222 pcap_netmap_activate(pcap_t *p)
    223 {
    224 	struct pcap_netmap *pn = p->priv;
    225 	struct nm_desc *d;
    226 	uint32_t if_flags = 0;
    227 
    228 	d = nm_open(p->opt.device, NULL, 0, NULL);
    229 	if (d == NULL) {
    230 		pcapint_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
    231 		    errno, "netmap open: cannot access %s",
    232 		    p->opt.device);
    233 		pcapint_cleanup_live_common(p);
    234 		return (PCAP_ERROR);
    235 	}
    236 #if 0
    237 	fprintf(stderr, "%s device %s priv %p fd %d ports %d..%d\n",
    238 	    __FUNCTION__, p->opt.device, d, d->fd,
    239 	    d->first_rx_ring, d->last_rx_ring);
    240 #endif
    241 	pn->d = d;
    242 	p->fd = d->fd;
    243 
    244 	/*
    245 	 * Turn a negative snapshot value (invalid), a snapshot value of
    246 	 * 0 (unspecified), or a value bigger than the normal maximum
    247 	 * value, into the maximum allowed value.
    248 	 *
    249 	 * If some application really *needs* a bigger snapshot
    250 	 * length, we should just increase MAXIMUM_SNAPLEN.
    251 	 */
    252 	if (p->snapshot <= 0 || p->snapshot > MAXIMUM_SNAPLEN)
    253 		p->snapshot = MAXIMUM_SNAPLEN;
    254 
    255 	if (p->opt.promisc && !(d->req.nr_ringid & NETMAP_SW_RING)) {
    256 		pcap_netmap_ioctl(p, SIOCGIFFLAGS, &if_flags); /* fetch flags */
    257 		if (!(if_flags & IFF_PPROMISC)) {
    258 			pn->must_clear_promisc = 1;
    259 			if_flags |= IFF_PPROMISC;
    260 			pcap_netmap_ioctl(p, SIOCSIFFLAGS, &if_flags);
    261 		}
    262 	}
    263 	p->linktype = DLT_EN10MB;
    264 	p->selectable_fd = p->fd;
    265 	p->read_op = pcap_netmap_dispatch;
    266 	p->inject_op = pcap_netmap_inject;
    267 	p->setfilter_op = pcapint_install_bpf_program;
    268 	p->setdirection_op = NULL;
    269 	p->set_datalink_op = NULL;
    270 	p->getnonblock_op = pcapint_getnonblock_fd;
    271 	p->setnonblock_op = pcapint_setnonblock_fd;
    272 	p->stats_op = pcap_netmap_stats;
    273 	p->cleanup_op = pcap_netmap_close;
    274 
    275 	return (0);
    276 }
    277 
    278 
    279 pcap_t *
    280 pcap_netmap_create(const char *device, char *ebuf, int *is_ours)
    281 {
    282 	pcap_t *p;
    283 
    284 	*is_ours = (!strncmp(device, "netmap:", 7) || !strncmp(device, "vale", 4));
    285 	if (! *is_ours)
    286 		return NULL;
    287 	p = PCAP_CREATE_COMMON(ebuf, struct pcap_netmap);
    288 	if (p == NULL)
    289 		return (NULL);
    290 	p->activate_op = pcap_netmap_activate;
    291 	return (p);
    292 }
    293 
    294 /*
    295  * The "device name" for netmap devices isn't a name for a device, it's
    296  * an expression that indicates how the device should be set up, so
    297  * there's no way to enumerate them.
    298  */
    299 int
    300 pcap_netmap_findalldevs(pcap_if_list_t *devlistp _U_, char *err_str _U_)
    301 {
    302 	return 0;
    303 }
    304