1 /* $NetBSD: pcap-linux.c,v 1.9 2026/03/18 23:43:20 christos Exp $ */ 2 3 /* 4 * pcap-linux.c: Packet capture interface to the Linux kernel 5 * 6 * Copyright (c) 2000 Torsten Landschoff <torsten (at) debian.org> 7 * Sebastian Krahmer <krahmer (at) cs.uni-potsdam.de> 8 * 9 * License: BSD 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in 19 * the documentation and/or other materials provided with the 20 * distribution. 21 * 3. The names of the authors may not be used to endorse or promote 22 * products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 26 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 27 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 28 * 29 * Modifications: Added PACKET_MMAP support 30 * Paolo Abeni <paolo.abeni (at) email.it> 31 * Added TPACKET_V3 support 32 * Gabor Tatarka <gabor.tatarka (at) ericsson.com> 33 * 34 * based on previous works of: 35 * Simon Patarin <patarin (at) cs.unibo.it> 36 * Phil Wood <cpw (at) lanl.gov> 37 * 38 * Monitor-mode support for mac80211 includes code taken from the iw 39 * command; the copyright notice for that code is 40 * 41 * Copyright (c) 2007, 2008 Johannes Berg 42 * Copyright (c) 2007 Andy Lutomirski 43 * Copyright (c) 2007 Mike Kershaw 44 * Copyright (c) 2008 Gbor Stefanik 45 * 46 * All rights reserved. 47 * 48 * Redistribution and use in source and binary forms, with or without 49 * modification, are permitted provided that the following conditions 50 * are met: 51 * 1. Redistributions of source code must retain the above copyright 52 * notice, this list of conditions and the following disclaimer. 53 * 2. Redistributions in binary form must reproduce the above copyright 54 * notice, this list of conditions and the following disclaimer in the 55 * documentation and/or other materials provided with the distribution. 56 * 3. The name of the author may not be used to endorse or promote products 57 * derived from this software without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 60 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 61 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 62 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 63 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 64 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 65 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 66 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 67 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 */ 71 72 73 #define _GNU_SOURCE 74 75 #include <sys/cdefs.h> 76 __RCSID("$NetBSD: pcap-linux.c,v 1.9 2026/03/18 23:43:20 christos Exp $"); 77 78 #include <config.h> 79 80 #include <errno.h> 81 #include <stdio.h> 82 #include <stdlib.h> 83 #include <unistd.h> 84 #include <fcntl.h> 85 #include <string.h> 86 #include <limits.h> 87 #include <endian.h> 88 #include <sys/stat.h> 89 #include <sys/socket.h> 90 #include <sys/ioctl.h> 91 #include <sys/utsname.h> 92 #include <sys/mman.h> 93 #include <linux/if.h> 94 #include <linux/if_packet.h> 95 #include <linux/sockios.h> 96 #include <linux/ethtool.h> 97 #include <netinet/in.h> 98 #include <linux/if_ether.h> 99 #include <linux/netlink.h> 100 #include <linux/if_arp.h> 101 #include <poll.h> 102 #include <dirent.h> 103 #include <sys/eventfd.h> 104 105 #include "pcap-int.h" 106 #include "pcap-util.h" 107 #include "pcap/sll.h" 108 #include "pcap/vlan.h" 109 #include "pcap/can_socketcan.h" 110 111 #include "diag-control.h" 112 113 /* 114 * We require TPACKET_V2 support. 115 */ 116 #ifndef TPACKET2_HDRLEN 117 #error "Libpcap will only work if TPACKET_V2 is supported; you must build for a 2.6.27 or later kernel" 118 #endif 119 120 /* check for memory mapped access availability. We assume every needed 121 * struct is defined if the macro TPACKET_HDRLEN is defined, because it 122 * uses many ring related structs and macros */ 123 #ifdef TPACKET3_HDRLEN 124 # define HAVE_TPACKET3 125 #endif /* TPACKET3_HDRLEN */ 126 127 /* 128 * Not all compilers that are used to compile code to run on Linux have 129 * these builtins. For example, older versions of GCC don't, and at 130 * least some people are doing cross-builds for MIPS with older versions 131 * of GCC. 132 */ 133 #ifndef HAVE___ATOMIC_LOAD_N 134 #define __atomic_load_n(ptr, memory_model) (*(ptr)) 135 #endif 136 #ifndef HAVE___ATOMIC_STORE_N 137 #define __atomic_store_n(ptr, val, memory_model) *(ptr) = (val) 138 #endif 139 140 #define packet_mmap_acquire(pkt) \ 141 (__atomic_load_n(&pkt->tp_status, __ATOMIC_ACQUIRE) != TP_STATUS_KERNEL) 142 #define packet_mmap_release(pkt) \ 143 (__atomic_store_n(&pkt->tp_status, TP_STATUS_KERNEL, __ATOMIC_RELEASE)) 144 #define packet_mmap_v3_acquire(pkt) \ 145 (__atomic_load_n(&pkt->hdr.bh1.block_status, __ATOMIC_ACQUIRE) != TP_STATUS_KERNEL) 146 #define packet_mmap_v3_release(pkt) \ 147 (__atomic_store_n(&pkt->hdr.bh1.block_status, TP_STATUS_KERNEL, __ATOMIC_RELEASE)) 148 149 #include <linux/types.h> 150 #include <linux/filter.h> 151 152 #ifdef HAVE_LINUX_NET_TSTAMP_H 153 #include <linux/net_tstamp.h> 154 #endif 155 156 /* 157 * For checking whether a device is a bonding device. 158 */ 159 #include <linux/if_bonding.h> 160 161 /* 162 * Got libnl? 163 */ 164 #ifdef HAVE_LIBNL 165 #include <linux/nl80211.h> 166 167 #include <netlink/genl/genl.h> 168 #include <netlink/genl/family.h> 169 #include <netlink/genl/ctrl.h> 170 #include <netlink/msg.h> 171 #include <netlink/attr.h> 172 #endif /* HAVE_LIBNL */ 173 174 #ifndef HAVE_SOCKLEN_T 175 typedef int socklen_t; 176 #endif 177 178 #define MAX_LINKHEADER_SIZE 256 179 180 /* 181 * When capturing on all interfaces we use this as the buffer size. 182 * Should be bigger then all MTUs that occur in real life. 183 * 64kB should be enough for now. 184 */ 185 #define BIGGER_THAN_ALL_MTUS (64*1024) 186 187 /* 188 * Private data for capturing on Linux PF_PACKET sockets. 189 */ 190 struct pcap_linux { 191 long long sysfs_dropped; /* packets reported dropped by /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors */ 192 struct pcap_stat stat; 193 194 char *device; /* device name */ 195 int filter_in_userland; /* must filter in userland */ 196 int blocks_to_filter_in_userland; 197 int must_do_on_close; /* stuff we must do when we close */ 198 int timeout; /* timeout for buffering */ 199 int cooked; /* using SOCK_DGRAM rather than SOCK_RAW */ 200 int ifindex; /* interface index of device we're bound to */ 201 int lo_ifindex; /* interface index of the loopback device */ 202 int netdown; /* we got an ENETDOWN and haven't resolved it */ 203 bpf_u_int32 oldmode; /* mode to restore when turning monitor mode off */ 204 char *mondevice; /* mac80211 monitor device we created */ 205 u_char *mmapbuf; /* memory-mapped region pointer */ 206 size_t mmapbuflen; /* size of region */ 207 int vlan_offset; /* offset at which to insert vlan tags; if -1, don't insert */ 208 u_int tp_version; /* version of tpacket_hdr for mmaped ring */ 209 u_int tp_hdrlen; /* hdrlen of tpacket_hdr for mmaped ring */ 210 u_char *oneshot_buffer; /* buffer for copy of packet */ 211 int poll_timeout; /* timeout to use in poll() */ 212 #ifdef HAVE_TPACKET3 213 unsigned char *current_packet; /* Current packet within the TPACKET_V3 block. Move to next block if NULL. */ 214 int packets_left; /* Unhandled packets left within the block from previous call to pcap_read_linux_mmap_v3 in case of TPACKET_V3. */ 215 #endif 216 int poll_breakloop_fd; /* fd to an eventfd to break from blocking operations */ 217 }; 218 219 /* 220 * Stuff to do when we close. 221 */ 222 #define MUST_DELETE_MONIF 0x00000001 /* delete monitor-mode interface */ 223 224 /* 225 * Prototypes for internal functions and methods. 226 */ 227 static int get_if_flags(const char *, bpf_u_int32 *, char *); 228 static int is_wifi(const char *); 229 static int map_arphrd_to_dlt(pcap_t *, int, const char *, int); 230 static int pcap_activate_linux(pcap_t *); 231 static int setup_socket(pcap_t *, int); 232 static int setup_mmapped(pcap_t *); 233 static int pcap_can_set_rfmon_linux(pcap_t *); 234 static int pcap_inject_linux(pcap_t *, const void *, int); 235 static int pcap_stats_linux(pcap_t *, struct pcap_stat *); 236 static int pcap_setfilter_linux(pcap_t *, struct bpf_program *); 237 static int pcap_setdirection_linux(pcap_t *, pcap_direction_t); 238 static int pcap_set_datalink_linux(pcap_t *, int); 239 static void pcap_cleanup_linux(pcap_t *); 240 241 union thdr { 242 struct tpacket2_hdr *h2; 243 #ifdef HAVE_TPACKET3 244 struct tpacket_block_desc *h3; 245 #endif 246 u_char *raw; 247 }; 248 249 #define RING_GET_FRAME_AT(h, offset) (((u_char **)h->buffer)[(offset)]) 250 #define RING_GET_CURRENT_FRAME(h) RING_GET_FRAME_AT(h, h->offset) 251 252 static void destroy_ring(pcap_t *handle); 253 static int create_ring(pcap_t *handle); 254 static int prepare_tpacket_socket(pcap_t *handle); 255 static int pcap_read_linux_mmap_v2(pcap_t *, int, pcap_handler , u_char *); 256 #ifdef HAVE_TPACKET3 257 static int pcap_read_linux_mmap_v3(pcap_t *, int, pcap_handler , u_char *); 258 #endif 259 static int pcap_setnonblock_linux(pcap_t *p, int nonblock); 260 static int pcap_getnonblock_linux(pcap_t *p); 261 static void pcapint_oneshot_linux(u_char *user, const struct pcap_pkthdr *h, 262 const u_char *bytes); 263 264 /* 265 * In pre-3.0 kernels, the tp_vlan_tci field is set to whatever the 266 * vlan_tci field in the skbuff is. 0 can either mean "not on a VLAN" 267 * or "on VLAN 0". There is no flag set in the tp_status field to 268 * distinguish between them. 269 * 270 * In 3.0 and later kernels, if there's a VLAN tag present, the tp_vlan_tci 271 * field is set to the VLAN tag, and the TP_STATUS_VLAN_VALID flag is set 272 * in the tp_status field, otherwise the tp_vlan_tci field is set to 0 and 273 * the TP_STATUS_VLAN_VALID flag isn't set in the tp_status field. 274 * 275 * With a pre-3.0 kernel, we cannot distinguish between packets with no 276 * VLAN tag and packets on VLAN 0, so we will mishandle some packets, and 277 * there's nothing we can do about that. 278 * 279 * So, on those systems, which never set the TP_STATUS_VLAN_VALID flag, we 280 * continue the behavior of earlier libpcaps, wherein we treated packets 281 * with a VLAN tag of 0 as being packets without a VLAN tag rather than packets 282 * on VLAN 0. We do this by treating packets with a tp_vlan_tci of 0 and 283 * with the TP_STATUS_VLAN_VALID flag not set in tp_status as not having 284 * VLAN tags. This does the right thing on 3.0 and later kernels, and 285 * continues the old unfixably-imperfect behavior on pre-3.0 kernels. 286 * 287 * If TP_STATUS_VLAN_VALID isn't defined, we test it as the 0x10 bit; it 288 * has that value in 3.0 and later kernels. 289 */ 290 #ifdef TP_STATUS_VLAN_VALID 291 #define VLAN_VALID(hdr, hv) ((hv)->tp_vlan_tci != 0 || ((hdr)->tp_status & TP_STATUS_VLAN_VALID)) 292 #else 293 /* 294 * This is being compiled on a system that lacks TP_STATUS_VLAN_VALID, 295 * so we test with the value it has in the 3.0 and later kernels, so 296 * we can test it if we're running on a system that has it. (If we're 297 * running on a system that doesn't have it, it won't be set in the 298 * tp_status field, so the tests of it will always fail; that means 299 * we behave the way we did before we introduced this macro.) 300 */ 301 #define VLAN_VALID(hdr, hv) ((hv)->tp_vlan_tci != 0 || ((hdr)->tp_status & 0x10)) 302 #endif 303 304 #ifdef TP_STATUS_VLAN_TPID_VALID 305 # define VLAN_TPID(hdr, hv) (((hv)->tp_vlan_tpid || ((hdr)->tp_status & TP_STATUS_VLAN_TPID_VALID)) ? (hv)->tp_vlan_tpid : ETH_P_8021Q) 306 #else 307 # define VLAN_TPID(hdr, hv) ETH_P_8021Q 308 #endif 309 310 /* 311 * Required select timeout if we're polling for an "interface disappeared" 312 * indication - 1 millisecond. 313 */ 314 static const struct timeval netdown_timeout = { 315 0, 1000 /* 1000 microseconds = 1 millisecond */ 316 }; 317 318 /* 319 * Wrap some ioctl calls 320 */ 321 static int iface_get_id(int fd, const char *device, char *ebuf); 322 static int iface_get_mtu(int fd, const char *device, char *ebuf); 323 static int iface_get_arptype(int fd, const char *device, char *ebuf); 324 static int iface_bind(int fd, int ifindex, char *ebuf, int protocol); 325 static int enter_rfmon_mode(pcap_t *handle, int sock_fd, 326 const char *device); 327 static int iface_get_ts_types(const char *device, pcap_t *handle, 328 char *ebuf); 329 static int iface_get_offload(pcap_t *handle); 330 331 static int fix_program(pcap_t *handle, struct sock_fprog *fcode); 332 static int fix_offset(pcap_t *handle, struct bpf_insn *p); 333 static int set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode); 334 static int reset_kernel_filter(pcap_t *handle); 335 336 static struct sock_filter total_insn 337 = BPF_STMT(BPF_RET | BPF_K, 0); 338 static struct sock_fprog total_fcode 339 = { 1, &total_insn }; 340 341 static int iface_dsa_get_proto_info(const char *device, pcap_t *handle); 342 343 pcap_t * 344 pcapint_create_interface(const char *device, char *ebuf) 345 { 346 pcap_t *handle; 347 348 handle = PCAP_CREATE_COMMON(ebuf, struct pcap_linux); 349 if (handle == NULL) 350 return NULL; 351 352 handle->activate_op = pcap_activate_linux; 353 handle->can_set_rfmon_op = pcap_can_set_rfmon_linux; 354 355 /* 356 * See what time stamp types we support. 357 */ 358 if (iface_get_ts_types(device, handle, ebuf) == -1) { 359 pcap_close(handle); 360 return NULL; 361 } 362 363 /* 364 * We claim that we support microsecond and nanosecond time 365 * stamps. 366 * 367 * XXX - with adapter-supplied time stamps, can we choose 368 * microsecond or nanosecond time stamps on arbitrary 369 * adapters? 370 */ 371 handle->tstamp_precision_list = malloc(2 * sizeof(u_int)); 372 if (handle->tstamp_precision_list == NULL) { 373 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 374 errno, "malloc"); 375 pcap_close(handle); 376 return NULL; 377 } 378 handle->tstamp_precision_list[0] = PCAP_TSTAMP_PRECISION_MICRO; 379 handle->tstamp_precision_list[1] = PCAP_TSTAMP_PRECISION_NANO; 380 handle->tstamp_precision_count = 2; 381 382 /* 383 * Start out with the breakloop handle not open; we don't 384 * need it until we're activated and ready to capture. 385 */ 386 struct pcap_linux *handlep = handle->priv; 387 handlep->poll_breakloop_fd = -1; 388 389 return handle; 390 } 391 392 #ifdef HAVE_LIBNL 393 /* 394 * If interface {if_name} is a mac80211 driver, the file 395 * /sys/class/net/{if_name}/phy80211 is a symlink to 396 * /sys/class/ieee80211/{phydev_name}, for some {phydev_name}. 397 * 398 * On Fedora 9, with a 2.6.26.3-29 kernel, my Zydas stick, at 399 * least, has a "wmaster0" device and a "wlan0" device; the 400 * latter is the one with the IP address. Both show up in 401 * "tcpdump -D" output. Capturing on the wmaster0 device 402 * captures with 802.11 headers. 403 * 404 * airmon-ng searches through /sys/class/net for devices named 405 * monN, starting with mon0; as soon as one *doesn't* exist, 406 * it chooses that as the monitor device name. If the "iw" 407 * command exists, it does 408 * 409 * iw dev {if_name} interface add {monif_name} type monitor 410 * 411 * where {monif_name} is the monitor device. It then (sigh) sleeps 412 * .1 second, and then configures the device up. Otherwise, if 413 * /sys/class/ieee80211/{phydev_name}/add_iface is a file, it writes 414 * {mondev_name}, without a newline, to that file, and again (sigh) 415 * sleeps .1 second, and then iwconfig's that device into monitor 416 * mode and configures it up. Otherwise, you can't do monitor mode. 417 * 418 * All these devices are "glued" together by having the 419 * /sys/class/net/{if_name}/phy80211 links pointing to the same 420 * place, so, given a wmaster, wlan, or mon device, you can 421 * find the other devices by looking for devices with 422 * the same phy80211 link. 423 * 424 * To turn monitor mode off, delete the monitor interface, 425 * either with 426 * 427 * iw dev {monif_name} interface del 428 * 429 * or by sending {monif_name}, with no NL, down 430 * /sys/class/ieee80211/{phydev_name}/remove_iface 431 * 432 * Note: if you try to create a monitor device named "monN", and 433 * there's already a "monN" device, it fails, as least with 434 * the netlink interface (which is what iw uses), with a return 435 * value of -ENFILE. (Return values are negative errnos.) We 436 * could probably use that to find an unused device. 437 * 438 * Yes, you can have multiple monitor devices for a given 439 * physical device. 440 */ 441 442 /* 443 * Is this a mac80211 device? If so, fill in the physical device path and 444 * return 1; if not, return 0. On an error, fill in handle->errbuf and 445 * return PCAP_ERROR. 446 */ 447 static int 448 get_mac80211_phydev(pcap_t *handle, const char *device, char *phydev_path, 449 size_t phydev_max_pathlen) 450 { 451 char *pathstr; 452 ssize_t bytes_read; 453 454 /* 455 * Generate the path string for the symlink to the physical device. 456 */ 457 if (asprintf(&pathstr, "/sys/class/net/%s/phy80211", device) == -1) { 458 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 459 "%s: Can't generate path name string for /sys/class/net device", 460 device); 461 return PCAP_ERROR; 462 } 463 bytes_read = readlink(pathstr, phydev_path, phydev_max_pathlen); 464 if (bytes_read == -1) { 465 if (errno == ENOENT) { 466 /* 467 * This either means that the directory 468 * /sys/class/net/{device} exists but doesn't 469 * have anything named "phy80211" in it, 470 * in which case it's not a mac80211 device, 471 * or that the directory doesn't exist, 472 * in which case the device doesn't exist. 473 * 474 * Directly check whether the directory 475 * exists. 476 */ 477 struct stat statb; 478 479 free(pathstr); 480 if (asprintf(&pathstr, "/sys/class/net/%s", device) == -1) { 481 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 482 "%s: Can't generate path name string for /sys/class/net device", 483 device); 484 return PCAP_ERROR; 485 } 486 if (stat(pathstr, &statb) == -1) { 487 if (errno == ENOENT) { 488 /* 489 * No such device. 490 */ 491 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 492 "%s: %s doesn't exist", 493 device, pathstr); 494 free(pathstr); 495 return PCAP_ERROR_NO_SUCH_DEVICE; 496 } 497 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 498 "%s: Can't stat %s: %s", 499 device, pathstr, strerror(errno)); 500 free(pathstr); 501 return PCAP_ERROR; 502 } 503 504 /* 505 * Path to the directory that would contain 506 * "phy80211" exists, but "phy80211" doesn't 507 * exist; that means it's not a mac80211 508 * device. 509 */ 510 free(pathstr); 511 return 0; 512 } 513 if (errno == EINVAL) { 514 /* 515 * Exists, but it's not a symlink; assume that 516 * means it's not a mac80211 device. 517 */ 518 free(pathstr); 519 return 0; 520 } 521 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 522 errno, "%s: Can't readlink %s", device, pathstr); 523 free(pathstr); 524 return PCAP_ERROR; 525 } 526 free(pathstr); 527 phydev_path[bytes_read] = '\0'; 528 return 1; 529 } 530 531 struct nl80211_state { 532 struct nl_sock *nl_sock; 533 struct nl_cache *nl_cache; 534 struct genl_family *nl80211; 535 }; 536 537 static int 538 nl80211_init(pcap_t *handle, struct nl80211_state *state, const char *device) 539 { 540 int err; 541 542 state->nl_sock = nl_socket_alloc(); 543 if (!state->nl_sock) { 544 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 545 "%s: failed to allocate netlink handle", device); 546 return PCAP_ERROR; 547 } 548 549 if (genl_connect(state->nl_sock)) { 550 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 551 "%s: failed to connect to generic netlink", device); 552 goto out_handle_destroy; 553 } 554 555 err = genl_ctrl_alloc_cache(state->nl_sock, &state->nl_cache); 556 if (err < 0) { 557 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 558 "%s: failed to allocate generic netlink cache: %s", 559 device, nl_geterror(-err)); 560 goto out_handle_destroy; 561 } 562 563 state->nl80211 = genl_ctrl_search_by_name(state->nl_cache, "nl80211"); 564 if (!state->nl80211) { 565 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 566 "%s: nl80211 not found", device); 567 goto out_cache_free; 568 } 569 570 return 0; 571 572 out_cache_free: 573 nl_cache_free(state->nl_cache); 574 out_handle_destroy: 575 nl_socket_free(state->nl_sock); 576 return PCAP_ERROR; 577 } 578 579 static void 580 nl80211_cleanup(struct nl80211_state *state) 581 { 582 genl_family_put(state->nl80211); 583 nl_cache_free(state->nl_cache); 584 nl_socket_free(state->nl_sock); 585 } 586 587 static int 588 del_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state, 589 const char *device, const char *mondevice); 590 591 static int 592 if_type_cb(struct nl_msg *msg, void* arg) 593 { 594 struct nlmsghdr* ret_hdr = nlmsg_hdr(msg); 595 struct nlattr *tb_msg[NL80211_ATTR_MAX + 1]; 596 int *type = (int*)arg; 597 598 struct genlmsghdr *gnlh = (struct genlmsghdr*) nlmsg_data(ret_hdr); 599 600 nla_parse(tb_msg, NL80211_ATTR_MAX, genlmsg_attrdata(gnlh, 0), 601 genlmsg_attrlen(gnlh, 0), NULL); 602 603 /* 604 * We sent a message asking for info about a single index. 605 * To be really paranoid, we could check if the index matched 606 * by examining nla_get_u32(tb_msg[NL80211_ATTR_IFINDEX]). 607 */ 608 609 if (tb_msg[NL80211_ATTR_IFTYPE]) { 610 *type = nla_get_u32(tb_msg[NL80211_ATTR_IFTYPE]); 611 } 612 613 return NL_SKIP; 614 } 615 616 static int 617 get_if_type(pcap_t *handle, int sock_fd, struct nl80211_state *state, 618 const char *device, int *type) 619 { 620 int ifindex; 621 struct nl_msg *msg; 622 int err; 623 624 ifindex = iface_get_id(sock_fd, device, handle->errbuf); 625 if (ifindex == -1) 626 return PCAP_ERROR; 627 628 msg = nlmsg_alloc(); 629 if (!msg) { 630 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 631 "%s: failed to allocate netlink msg", device); 632 return PCAP_ERROR; 633 } 634 635 genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, 636 genl_family_get_id(state->nl80211), 0, 637 0, NL80211_CMD_GET_INTERFACE, 0); 638 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex); 639 640 err = nl_send_auto(state->nl_sock, msg); 641 nlmsg_free(msg); 642 if (err < 0) { 643 if (err == -NLE_FAILURE) { 644 /* 645 * Device not available; our caller should just 646 * keep trying. (libnl 2.x maps ENFILE to 647 * NLE_FAILURE; it can also map other errors 648 * to that, but there's not much we can do 649 * about that.) 650 */ 651 return 0; 652 } else { 653 /* 654 * Real failure, not just "that device is not 655 * available. 656 */ 657 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 658 "%s: nl_send_auto failed getting interface type: %s", 659 device, nl_geterror(-err)); 660 return PCAP_ERROR; 661 } 662 } 663 664 struct nl_cb *cb = nl_cb_alloc(NL_CB_DEFAULT); 665 nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, if_type_cb, (void*)type); 666 err = nl_recvmsgs(state->nl_sock, cb); 667 nl_cb_put(cb); 668 669 if (err < 0) { 670 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 671 "%s: nl_recvmsgs failed getting interface type: %s", 672 device, nl_geterror(-err)); 673 return PCAP_ERROR; 674 } 675 676 /* 677 * If this is a mac80211 device not in monitor mode, nl_sock will be 678 * reused for add_mon_if. So we must wait for the ACK here so that 679 * add_mon_if does not receive it instead and incorrectly interpret 680 * the ACK as its NEW_INTERFACE command succeeding, even when it fails. 681 */ 682 err = nl_wait_for_ack(state->nl_sock); 683 if (err < 0) { 684 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 685 "%s: nl_wait_for_ack failed getting interface type: %s", 686 device, nl_geterror(-err)); 687 return PCAP_ERROR; 688 } 689 690 /* 691 * Success. 692 */ 693 return 1; 694 695 nla_put_failure: 696 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 697 "%s: nl_put failed getting interface type", 698 device); 699 nlmsg_free(msg); 700 // Do not call nl_cb_put(): nl_cb_alloc() has not been called. 701 return PCAP_ERROR; 702 } 703 704 static int 705 add_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state, 706 const char *device, const char *mondevice) 707 { 708 struct pcap_linux *handlep = handle->priv; 709 int ifindex; 710 struct nl_msg *msg; 711 int err; 712 713 ifindex = iface_get_id(sock_fd, device, handle->errbuf); 714 if (ifindex == -1) 715 return PCAP_ERROR; 716 717 msg = nlmsg_alloc(); 718 if (!msg) { 719 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 720 "%s: failed to allocate netlink msg", device); 721 return PCAP_ERROR; 722 } 723 724 genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, 725 genl_family_get_id(state->nl80211), 0, 726 0, NL80211_CMD_NEW_INTERFACE, 0); 727 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex); 728 DIAG_OFF_NARROWING 729 NLA_PUT_STRING(msg, NL80211_ATTR_IFNAME, mondevice); 730 DIAG_ON_NARROWING 731 NLA_PUT_U32(msg, NL80211_ATTR_IFTYPE, NL80211_IFTYPE_MONITOR); 732 733 err = nl_send_sync(state->nl_sock, msg); // calls nlmsg_free() 734 if (err < 0) { 735 switch (err) { 736 737 case -NLE_FAILURE: 738 case -NLE_AGAIN: 739 /* 740 * Device not available; our caller should just 741 * keep trying. (libnl 2.x maps ENFILE to 742 * NLE_FAILURE; it can also map other errors 743 * to that, but there's not much we can do 744 * about that.) 745 */ 746 return 0; 747 748 case -NLE_OPNOTSUPP: 749 /* 750 * Device is a mac80211 device but adding it as a 751 * monitor mode device isn't supported. Report our 752 * error. 753 */ 754 return PCAP_ERROR_RFMON_NOTSUP; 755 756 default: 757 /* 758 * Real failure, not just "that device is not 759 * available." Report a generic error, using the 760 * error message from libnl. 761 */ 762 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 763 "%s: nl_send_sync failed adding %s interface: %s", 764 device, mondevice, nl_geterror(-err)); 765 return PCAP_ERROR; 766 } 767 } 768 769 /* 770 * Success. 771 */ 772 773 /* 774 * Try to remember the monitor device. 775 */ 776 handlep->mondevice = strdup(mondevice); 777 if (handlep->mondevice == NULL) { 778 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 779 errno, "strdup"); 780 /* 781 * Get rid of the monitor device. 782 */ 783 del_mon_if(handle, sock_fd, state, device, mondevice); 784 return PCAP_ERROR; 785 } 786 return 1; 787 788 nla_put_failure: 789 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 790 "%s: nl_put failed adding %s interface", 791 device, mondevice); 792 nlmsg_free(msg); 793 return PCAP_ERROR; 794 } 795 796 static int 797 del_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state, 798 const char *device, const char *mondevice) 799 { 800 int ifindex; 801 struct nl_msg *msg; 802 int err; 803 804 ifindex = iface_get_id(sock_fd, mondevice, handle->errbuf); 805 if (ifindex == -1) 806 return PCAP_ERROR; 807 808 msg = nlmsg_alloc(); 809 if (!msg) { 810 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 811 "%s: failed to allocate netlink msg", device); 812 return PCAP_ERROR; 813 } 814 815 genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, 816 genl_family_get_id(state->nl80211), 0, 817 0, NL80211_CMD_DEL_INTERFACE, 0); 818 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex); 819 820 err = nl_send_sync(state->nl_sock, msg); // calls nlmsg_free() 821 if (err < 0) { 822 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 823 "%s: nl_send_sync failed deleting %s interface: %s", 824 device, mondevice, nl_geterror(-err)); 825 return PCAP_ERROR; 826 } 827 828 /* 829 * Success. 830 */ 831 return 1; 832 833 nla_put_failure: 834 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 835 "%s: nl_put failed deleting %s interface", 836 device, mondevice); 837 nlmsg_free(msg); 838 return PCAP_ERROR; 839 } 840 #endif /* HAVE_LIBNL */ 841 842 static int pcap_protocol(pcap_t *handle) 843 { 844 int protocol; 845 846 protocol = handle->opt.protocol; 847 if (protocol == 0) 848 protocol = ETH_P_ALL; 849 850 return htons(protocol); 851 } 852 853 static int 854 pcap_can_set_rfmon_linux(pcap_t *handle) 855 { 856 #ifdef HAVE_LIBNL 857 char phydev_path[PATH_MAX+1]; 858 int ret; 859 #endif 860 861 if (strcmp(handle->opt.device, "any") == 0) { 862 /* 863 * Monitor mode makes no sense on the "any" device. 864 */ 865 return 0; 866 } 867 868 #ifdef HAVE_LIBNL 869 /* 870 * Bleah. There doesn't seem to be a way to ask a mac80211 871 * device, through libnl, whether it supports monitor mode; 872 * we'll just check whether the device appears to be a 873 * mac80211 device and, if so, assume the device supports 874 * monitor mode. 875 */ 876 ret = get_mac80211_phydev(handle, handle->opt.device, phydev_path, 877 PATH_MAX); 878 if (ret < 0) 879 return ret; /* error */ 880 if (ret == 1) 881 return 1; /* mac80211 device */ 882 #endif 883 884 return 0; 885 } 886 887 /* 888 * Grabs the number of missed packets by the interface from 889 * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors. 890 * 891 * Compared to /proc/net/dev this avoids counting software drops, 892 * but may be unimplemented and just return 0. 893 * The author has found no straightforward way to check for support. 894 */ 895 static long long int 896 linux_get_stat(const char * if_name, const char * stat) { 897 ssize_t bytes_read; 898 int fd; 899 char buffer[PATH_MAX]; 900 901 snprintf(buffer, sizeof(buffer), "/sys/class/net/%s/statistics/%s", if_name, stat); 902 fd = open(buffer, O_RDONLY); 903 if (fd == -1) 904 return 0; 905 906 bytes_read = read(fd, buffer, sizeof(buffer) - 1); 907 close(fd); 908 if (bytes_read == -1) 909 return 0; 910 buffer[bytes_read] = '\0'; 911 912 return strtoll(buffer, NULL, 10); 913 } 914 915 static long long int 916 linux_if_drops(const char * if_name) 917 { 918 long long int missed = linux_get_stat(if_name, "rx_missed_errors"); 919 long long int fifo = linux_get_stat(if_name, "rx_fifo_errors"); 920 return missed + fifo; 921 } 922 923 924 /* 925 * Monitor mode is kind of interesting because we have to reset the 926 * interface before exiting. The problem can't really be solved without 927 * some daemon taking care of managing usage counts. If we put the 928 * interface into monitor mode, we set a flag indicating that we must 929 * take it out of that mode when the interface is closed, and, when 930 * closing the interface, if that flag is set we take it out of monitor 931 * mode. 932 */ 933 934 static void pcap_cleanup_linux( pcap_t *handle ) 935 { 936 struct pcap_linux *handlep = handle->priv; 937 #ifdef HAVE_LIBNL 938 struct nl80211_state nlstate; 939 int ret; 940 #endif /* HAVE_LIBNL */ 941 942 if (handlep->must_do_on_close != 0) { 943 /* 944 * There's something we have to do when closing this 945 * pcap_t. 946 */ 947 #ifdef HAVE_LIBNL 948 if (handlep->must_do_on_close & MUST_DELETE_MONIF) { 949 ret = nl80211_init(handle, &nlstate, handlep->device); 950 if (ret >= 0) { 951 ret = del_mon_if(handle, handle->fd, &nlstate, 952 handlep->device, handlep->mondevice); 953 nl80211_cleanup(&nlstate); 954 } 955 if (ret < 0) { 956 fprintf(stderr, 957 "Can't delete monitor interface %s (%s).\n" 958 "Please delete manually.\n", 959 handlep->mondevice, handle->errbuf); 960 } 961 } 962 #endif /* HAVE_LIBNL */ 963 964 /* 965 * Take this pcap out of the list of pcaps for which we 966 * have to take the interface out of some mode. 967 */ 968 pcapint_remove_from_pcaps_to_close(handle); 969 } 970 971 if (handle->fd != -1) { 972 /* 973 * Destroy the ring buffer (assuming we've set it up), 974 * and unmap it if it's mapped. 975 */ 976 destroy_ring(handle); 977 } 978 979 if (handlep->oneshot_buffer != NULL) { 980 free(handlep->oneshot_buffer); 981 handlep->oneshot_buffer = NULL; 982 } 983 984 if (handlep->mondevice != NULL) { 985 free(handlep->mondevice); 986 handlep->mondevice = NULL; 987 } 988 if (handlep->device != NULL) { 989 free(handlep->device); 990 handlep->device = NULL; 991 } 992 993 if (handlep->poll_breakloop_fd != -1) { 994 close(handlep->poll_breakloop_fd); 995 handlep->poll_breakloop_fd = -1; 996 } 997 pcapint_cleanup_live_common(handle); 998 } 999 1000 #ifdef HAVE_TPACKET3 1001 /* 1002 * Some versions of TPACKET_V3 have annoying bugs/misfeatures 1003 * around which we have to work. Determine if we have those 1004 * problems or not. 1005 * 3.19 is the first release with a fixed version of 1006 * TPACKET_V3. We treat anything before that as 1007 * not having a fixed version; that may really mean 1008 * it has *no* version. 1009 */ 1010 static int has_broken_tpacket_v3(void) 1011 { 1012 struct utsname utsname; 1013 const char *release; 1014 long major, minor; 1015 int matches, verlen; 1016 1017 /* No version information, assume broken. */ 1018 if (uname(&utsname) == -1) 1019 return 1; 1020 release = utsname.release; 1021 1022 /* A malformed version, ditto. */ 1023 matches = sscanf(release, "%ld.%ld%n", &major, &minor, &verlen); 1024 if (matches != 2) 1025 return 1; 1026 if (release[verlen] != '.' && release[verlen] != '\0') 1027 return 1; 1028 1029 /* OK, a fixed version. */ 1030 if (major > 3 || (major == 3 && minor >= 19)) 1031 return 0; 1032 1033 /* Too old :( */ 1034 return 1; 1035 } 1036 #endif 1037 1038 /* 1039 * Set the timeout to be used in poll() with memory-mapped packet capture. 1040 */ 1041 static void 1042 set_poll_timeout(struct pcap_linux *handlep) 1043 { 1044 #ifdef HAVE_TPACKET3 1045 int broken_tpacket_v3 = has_broken_tpacket_v3(); 1046 #endif 1047 if (handlep->timeout == 0) { 1048 #ifdef HAVE_TPACKET3 1049 /* 1050 * XXX - due to a set of (mis)features in the TPACKET_V3 1051 * kernel code prior to the 3.19 kernel, blocking forever 1052 * with a TPACKET_V3 socket can, if few packets are 1053 * arriving and passing the socket filter, cause most 1054 * packets to be dropped. See libpcap issue #335 for the 1055 * full painful story. 1056 * 1057 * The workaround is to have poll() time out very quickly, 1058 * so we grab the frames handed to us, and return them to 1059 * the kernel, ASAP. 1060 */ 1061 if (handlep->tp_version == TPACKET_V3 && broken_tpacket_v3) 1062 handlep->poll_timeout = 1; /* don't block for very long */ 1063 else 1064 #endif 1065 handlep->poll_timeout = -1; /* block forever */ 1066 } else if (handlep->timeout > 0) { 1067 #ifdef HAVE_TPACKET3 1068 /* 1069 * For TPACKET_V3, the timeout is handled by the kernel, 1070 * so block forever; that way, we don't get extra timeouts. 1071 * Don't do that if we have a broken TPACKET_V3, though. 1072 */ 1073 if (handlep->tp_version == TPACKET_V3 && !broken_tpacket_v3) 1074 handlep->poll_timeout = -1; /* block forever, let TPACKET_V3 wake us up */ 1075 else 1076 #endif 1077 handlep->poll_timeout = handlep->timeout; /* block for that amount of time */ 1078 } else { 1079 /* 1080 * Non-blocking mode; we call poll() to pick up error 1081 * indications, but we don't want it to wait for 1082 * anything. 1083 */ 1084 handlep->poll_timeout = 0; 1085 } 1086 } 1087 1088 static void pcap_breakloop_linux(pcap_t *handle) 1089 { 1090 pcapint_breakloop_common(handle); 1091 struct pcap_linux *handlep = handle->priv; 1092 1093 uint64_t value = 1; 1094 1095 if (handlep->poll_breakloop_fd != -1) { 1096 /* 1097 * XXX - pcap_breakloop() doesn't have a return value, 1098 * so we can't indicate an error. 1099 */ 1100 DIAG_OFF_WARN_UNUSED_RESULT 1101 (void)write(handlep->poll_breakloop_fd, &value, sizeof(value)); 1102 DIAG_ON_WARN_UNUSED_RESULT 1103 } 1104 } 1105 1106 /* 1107 * Set the offset at which to insert VLAN tags. 1108 * That should be the offset of the type field. 1109 */ 1110 static void 1111 set_vlan_offset(pcap_t *handle) 1112 { 1113 struct pcap_linux *handlep = handle->priv; 1114 1115 switch (handle->linktype) { 1116 1117 case DLT_EN10MB: 1118 /* 1119 * The type field is after the destination and source 1120 * MAC address. 1121 */ 1122 handlep->vlan_offset = 2 * ETH_ALEN; 1123 break; 1124 1125 case DLT_LINUX_SLL: 1126 /* 1127 * The type field is in the last 2 bytes of the 1128 * DLT_LINUX_SLL header. 1129 */ 1130 handlep->vlan_offset = SLL_HDR_LEN - 2; 1131 break; 1132 1133 default: 1134 handlep->vlan_offset = -1; /* unknown */ 1135 break; 1136 } 1137 } 1138 1139 /* 1140 * Get a handle for a live capture from the given device. You can 1141 * pass NULL as device to get all packages (without link level 1142 * information of course). If you pass 1 as promisc the interface 1143 * will be set to promiscuous mode (XXX: I think this usage should 1144 * be deprecated and functions be added to select that later allow 1145 * modification of that values -- Torsten). 1146 */ 1147 static int 1148 pcap_activate_linux(pcap_t *handle) 1149 { 1150 struct pcap_linux *handlep = handle->priv; 1151 const char *device; 1152 int is_any_device; 1153 struct ifreq ifr; 1154 int status; 1155 int ret; 1156 1157 device = handle->opt.device; 1158 1159 /* 1160 * Start out assuming no warnings. 1161 */ 1162 status = 0; 1163 1164 /* 1165 * Make sure the name we were handed will fit into the ioctls we 1166 * might perform on the device; if not, return a "No such device" 1167 * indication, as the Linux kernel shouldn't support creating 1168 * a device whose name won't fit into those ioctls. 1169 * 1170 * "Will fit" means "will fit, complete with a null terminator", 1171 * so if the length, which does *not* include the null terminator, 1172 * is greater than *or equal to* the size of the field into which 1173 * we'll be copying it, that won't fit. 1174 */ 1175 if (strlen(device) >= sizeof(ifr.ifr_name)) { 1176 /* 1177 * There's nothing more to say, so clear the error 1178 * message. 1179 */ 1180 handle->errbuf[0] = '\0'; 1181 status = PCAP_ERROR_NO_SUCH_DEVICE; 1182 goto fail; 1183 } 1184 1185 /* 1186 * Turn a negative snapshot value (invalid), a snapshot value of 1187 * 0 (unspecified), or a value bigger than the normal maximum 1188 * value, into the maximum allowed value. 1189 * 1190 * If some application really *needs* a bigger snapshot 1191 * length, we should just increase MAXIMUM_SNAPLEN. 1192 */ 1193 if (handle->snapshot <= 0 || handle->snapshot > MAXIMUM_SNAPLEN) 1194 handle->snapshot = MAXIMUM_SNAPLEN; 1195 1196 handlep->device = strdup(device); 1197 if (handlep->device == NULL) { 1198 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 1199 errno, "strdup"); 1200 status = PCAP_ERROR; 1201 goto fail; 1202 } 1203 1204 /* 1205 * The "any" device is a special device which causes us not 1206 * to bind to a particular device and thus to look at all 1207 * devices. 1208 */ 1209 is_any_device = (strcmp(device, "any") == 0); 1210 if (is_any_device) { 1211 if (handle->opt.promisc) { 1212 handle->opt.promisc = 0; 1213 /* Just a warning. */ 1214 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 1215 "Promiscuous mode not supported on the \"any\" device"); 1216 status = PCAP_WARNING_PROMISC_NOTSUP; 1217 } 1218 } 1219 1220 /* copy timeout value */ 1221 handlep->timeout = handle->opt.timeout; 1222 1223 /* 1224 * If we're in promiscuous mode, then we probably want 1225 * to see when the interface drops packets too, so get an 1226 * initial count from 1227 * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors 1228 */ 1229 if (handle->opt.promisc) 1230 handlep->sysfs_dropped = linux_if_drops(handlep->device); 1231 1232 /* 1233 * If the "any" device is specified, try to open a SOCK_DGRAM. 1234 * Otherwise, open a SOCK_RAW. 1235 */ 1236 ret = setup_socket(handle, is_any_device); 1237 if (ret < 0) { 1238 /* 1239 * Fatal error; the return value is the error code, 1240 * and handle->errbuf has been set to an appropriate 1241 * error message. 1242 */ 1243 status = ret; 1244 goto fail; 1245 } 1246 if (ret > 0) { 1247 /* 1248 * We got a warning; return that, as handle->errbuf 1249 * might have been overwritten by this warning. 1250 */ 1251 status = ret; 1252 } 1253 1254 /* 1255 * Success (possibly with a warning). 1256 * 1257 * First, try to allocate an event FD for breakloop, if 1258 * we're not going to start in non-blocking mode. 1259 */ 1260 if (!handle->opt.nonblock) { 1261 handlep->poll_breakloop_fd = eventfd(0, EFD_NONBLOCK); 1262 if (handlep->poll_breakloop_fd == -1) { 1263 /* 1264 * Failed. 1265 */ 1266 pcapint_fmt_errmsg_for_errno(handle->errbuf, 1267 PCAP_ERRBUF_SIZE, errno, "could not open eventfd"); 1268 status = PCAP_ERROR; 1269 goto fail; 1270 } 1271 } 1272 1273 /* 1274 * Succeeded. 1275 * Try to set up memory-mapped access. 1276 */ 1277 ret = setup_mmapped(handle); 1278 if (ret < 0) { 1279 /* 1280 * We failed to set up to use it, or the 1281 * kernel supports it, but we failed to 1282 * enable it. The return value is the 1283 * error status to return and, if it's 1284 * PCAP_ERROR, handle->errbuf contains 1285 * the error message. 1286 */ 1287 status = ret; 1288 goto fail; 1289 } 1290 if (ret > 0) { 1291 /* 1292 * We got a warning; return that, as handle->errbuf 1293 * might have been overwritten by this warning. 1294 */ 1295 status = ret; 1296 } 1297 1298 /* 1299 * We succeeded. status has been set to the status to return, 1300 * which might be 0, or might be a PCAP_WARNING_ value. 1301 */ 1302 /* 1303 * Now that we have activated the mmap ring, we can 1304 * set the correct protocol. 1305 */ 1306 if ((ret = iface_bind(handle->fd, handlep->ifindex, 1307 handle->errbuf, pcap_protocol(handle))) != 0) { 1308 status = ret; 1309 goto fail; 1310 } 1311 1312 handle->inject_op = pcap_inject_linux; 1313 handle->setfilter_op = pcap_setfilter_linux; 1314 handle->setdirection_op = pcap_setdirection_linux; 1315 handle->set_datalink_op = pcap_set_datalink_linux; 1316 handle->setnonblock_op = pcap_setnonblock_linux; 1317 handle->getnonblock_op = pcap_getnonblock_linux; 1318 handle->cleanup_op = pcap_cleanup_linux; 1319 handle->stats_op = pcap_stats_linux; 1320 handle->breakloop_op = pcap_breakloop_linux; 1321 1322 switch (handlep->tp_version) { 1323 1324 case TPACKET_V2: 1325 handle->read_op = pcap_read_linux_mmap_v2; 1326 break; 1327 #ifdef HAVE_TPACKET3 1328 case TPACKET_V3: 1329 handle->read_op = pcap_read_linux_mmap_v3; 1330 break; 1331 #endif 1332 } 1333 handle->oneshot_callback = pcapint_oneshot_linux; 1334 handle->selectable_fd = handle->fd; 1335 1336 return status; 1337 1338 fail: 1339 pcap_cleanup_linux(handle); 1340 return status; 1341 } 1342 1343 static int 1344 pcap_set_datalink_linux(pcap_t *handle, int dlt) 1345 { 1346 handle->linktype = dlt; 1347 1348 /* 1349 * Update the offset at which to insert VLAN tags for the 1350 * new link-layer type. 1351 */ 1352 set_vlan_offset(handle); 1353 1354 return 0; 1355 } 1356 1357 /* 1358 * linux_check_direction() 1359 * 1360 * Do checks based on packet direction. 1361 */ 1362 static inline int 1363 linux_check_direction(const pcap_t *handle, const struct sockaddr_ll *sll) 1364 { 1365 struct pcap_linux *handlep = handle->priv; 1366 1367 if (sll->sll_pkttype == PACKET_OUTGOING) { 1368 /* 1369 * Outgoing packet. 1370 * If this is from the loopback device, reject it; 1371 * we'll see the packet as an incoming packet as well, 1372 * and we don't want to see it twice. 1373 */ 1374 if (sll->sll_ifindex == handlep->lo_ifindex) 1375 return 0; 1376 1377 /* 1378 * If this is an outgoing CAN frame, and the user doesn't 1379 * want only outgoing packets, reject it; CAN devices 1380 * and drivers, and the CAN stack, always arrange to 1381 * loop back transmitted packets, so they also appear 1382 * as incoming packets. We don't want duplicate packets, 1383 * and we can't easily distinguish packets looped back 1384 * by the CAN layer than those received by the CAN layer, 1385 * so we eliminate this packet instead. 1386 * 1387 * We check whether this is a CAN frame by checking whether 1388 * the device's hardware type is ARPHRD_CAN. 1389 */ 1390 if (sll->sll_hatype == ARPHRD_CAN && 1391 handle->direction != PCAP_D_OUT) 1392 return 0; 1393 1394 /* 1395 * If the user only wants incoming packets, reject it. 1396 */ 1397 if (handle->direction == PCAP_D_IN) 1398 return 0; 1399 } else { 1400 /* 1401 * Incoming packet. 1402 * If the user only wants outgoing packets, reject it. 1403 */ 1404 if (handle->direction == PCAP_D_OUT) 1405 return 0; 1406 } 1407 return 1; 1408 } 1409 1410 /* 1411 * Check whether the device to which the pcap_t is bound still exists. 1412 * We do so by asking what address the socket is bound to, and checking 1413 * whether the ifindex in the address is -1, meaning "that device is gone", 1414 * or some other value, meaning "that device still exists". 1415 */ 1416 static int 1417 device_still_exists(pcap_t *handle) 1418 { 1419 struct pcap_linux *handlep = handle->priv; 1420 struct sockaddr_ll addr; 1421 socklen_t addr_len; 1422 1423 /* 1424 * If handlep->ifindex is -1, the socket isn't bound, meaning 1425 * we're capturing on the "any" device; that device never 1426 * disappears. (It should also never be configured down, so 1427 * we shouldn't even get here, but let's make sure.) 1428 */ 1429 if (handlep->ifindex == -1) 1430 return (1); /* it's still here */ 1431 1432 /* 1433 * OK, now try to get the address for the socket. 1434 */ 1435 addr_len = sizeof (addr); 1436 if (getsockname(handle->fd, (struct sockaddr *) &addr, &addr_len) == -1) { 1437 /* 1438 * Error - report an error and return -1. 1439 */ 1440 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 1441 errno, "getsockname failed"); 1442 return (-1); 1443 } 1444 if (addr.sll_ifindex == -1) { 1445 /* 1446 * This means the device went away. 1447 */ 1448 return (0); 1449 } 1450 1451 /* 1452 * The device presumably just went down. 1453 */ 1454 return (1); 1455 } 1456 1457 static int 1458 pcap_inject_linux(pcap_t *handle, const void *buf, int size) 1459 { 1460 struct pcap_linux *handlep = handle->priv; 1461 int ret; 1462 1463 if (handlep->ifindex == -1) { 1464 /* 1465 * We don't support sending on the "any" device. 1466 */ 1467 pcapint_strlcpy(handle->errbuf, 1468 "Sending packets isn't supported on the \"any\" device", 1469 PCAP_ERRBUF_SIZE); 1470 return (-1); 1471 } 1472 1473 if (handlep->cooked) { 1474 /* 1475 * We don't support sending on cooked-mode sockets. 1476 * 1477 * XXX - how do you send on a bound cooked-mode 1478 * socket? 1479 * Is a "sendto()" required there? 1480 */ 1481 pcapint_strlcpy(handle->errbuf, 1482 "Sending packets isn't supported in cooked mode", 1483 PCAP_ERRBUF_SIZE); 1484 return (-1); 1485 } 1486 1487 ret = (int)send(handle->fd, buf, size, 0); 1488 if (ret == -1) { 1489 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 1490 errno, "send"); 1491 return (-1); 1492 } 1493 return (ret); 1494 } 1495 1496 /* 1497 * Get the statistics for the given packet capture handle. 1498 */ 1499 static int 1500 pcap_stats_linux(pcap_t *handle, struct pcap_stat *stats) 1501 { 1502 struct pcap_linux *handlep = handle->priv; 1503 #ifdef HAVE_TPACKET3 1504 /* 1505 * For sockets using TPACKET_V2, the extra stuff at the end 1506 * of a struct tpacket_stats_v3 will not be filled in, and 1507 * we don't look at it so this is OK even for those sockets. 1508 * In addition, the PF_PACKET socket code in the kernel only 1509 * uses the length parameter to compute how much data to 1510 * copy out and to indicate how much data was copied out, so 1511 * it's OK to base it on the size of a struct tpacket_stats. 1512 * 1513 * XXX - it's probably OK, in fact, to just use a 1514 * struct tpacket_stats for V3 sockets, as we don't 1515 * care about the tp_freeze_q_cnt stat. 1516 */ 1517 struct tpacket_stats_v3 kstats; 1518 #else /* HAVE_TPACKET3 */ 1519 struct tpacket_stats kstats; 1520 #endif /* HAVE_TPACKET3 */ 1521 socklen_t len = sizeof (struct tpacket_stats); 1522 1523 long long if_dropped = 0; 1524 1525 /* 1526 * To fill in ps_ifdrop, we parse 1527 * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors 1528 * for the numbers 1529 */ 1530 if (handle->opt.promisc) 1531 { 1532 /* 1533 * XXX - is there any reason to do this by remembering 1534 * the last counts value, subtracting it from the 1535 * current counts value, and adding that to stat.ps_ifdrop, 1536 * maintaining stat.ps_ifdrop as a count, rather than just 1537 * saving the *initial* counts value and setting 1538 * stat.ps_ifdrop to the difference between the current 1539 * value and the initial value? 1540 * 1541 * One reason might be to handle the count wrapping 1542 * around, on platforms where the count is 32 bits 1543 * and where you might get more than 2^32 dropped 1544 * packets; is there any other reason? 1545 * 1546 * (We maintain the count as a long long int so that, 1547 * if the kernel maintains the counts as 64-bit even 1548 * on 32-bit platforms, we can handle the real count. 1549 * 1550 * Unfortunately, we can't report 64-bit counts; we 1551 * need a better API for reporting statistics, such as 1552 * one that reports them in a style similar to the 1553 * pcapng Interface Statistics Block, so that 1) the 1554 * counts are 64-bit, 2) it's easier to add new statistics 1555 * without breaking the ABI, and 3) it's easier to 1556 * indicate to a caller that wants one particular 1557 * statistic that it's not available by just not supplying 1558 * it.) 1559 */ 1560 if_dropped = handlep->sysfs_dropped; 1561 handlep->sysfs_dropped = linux_if_drops(handlep->device); 1562 handlep->stat.ps_ifdrop += (u_int)(handlep->sysfs_dropped - if_dropped); 1563 } 1564 1565 /* 1566 * Try to get the packet counts from the kernel. 1567 */ 1568 if (getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS, 1569 &kstats, &len) > -1) { 1570 /* 1571 * "ps_recv" counts only packets that *passed* the 1572 * filter, not packets that didn't pass the filter. 1573 * This includes packets later dropped because we 1574 * ran out of buffer space. 1575 * 1576 * "ps_drop" counts packets dropped because we ran 1577 * out of buffer space. It doesn't count packets 1578 * dropped by the interface driver. It counts only 1579 * packets that passed the filter. 1580 * 1581 * See above for ps_ifdrop. 1582 * 1583 * Both statistics include packets not yet read from 1584 * the kernel by libpcap, and thus not yet seen by 1585 * the application. 1586 * 1587 * In "linux/net/packet/af_packet.c", at least in 2.6.27 1588 * through 5.6 kernels, "tp_packets" is incremented for 1589 * every packet that passes the packet filter *and* is 1590 * successfully copied to the ring buffer; "tp_drops" is 1591 * incremented for every packet dropped because there's 1592 * not enough free space in the ring buffer. 1593 * 1594 * When the statistics are returned for a PACKET_STATISTICS 1595 * "getsockopt()" call, "tp_drops" is added to "tp_packets", 1596 * so that "tp_packets" counts all packets handed to 1597 * the PF_PACKET socket, including packets dropped because 1598 * there wasn't room on the socket buffer - but not 1599 * including packets that didn't pass the filter. 1600 * 1601 * In the BSD BPF, the count of received packets is 1602 * incremented for every packet handed to BPF, regardless 1603 * of whether it passed the filter. 1604 * 1605 * We can't make "pcap_stats()" work the same on both 1606 * platforms, but the best approximation is to return 1607 * "tp_packets" as the count of packets and "tp_drops" 1608 * as the count of drops. 1609 * 1610 * Keep a running total because each call to 1611 * getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS, .... 1612 * resets the counters to zero. 1613 */ 1614 handlep->stat.ps_recv += kstats.tp_packets; 1615 handlep->stat.ps_drop += kstats.tp_drops; 1616 *stats = handlep->stat; 1617 return 0; 1618 } 1619 1620 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, errno, 1621 "failed to get statistics from socket"); 1622 return -1; 1623 } 1624 1625 /* 1626 * A PF_PACKET socket can be bound to any network interface. 1627 */ 1628 static int 1629 can_be_bound(const char *name _U_) 1630 { 1631 return (1); 1632 } 1633 1634 /* 1635 * Get a socket to use with various interface ioctls. 1636 */ 1637 static int 1638 get_if_ioctl_socket(void) 1639 { 1640 int fd; 1641 1642 /* 1643 * This is a bit ugly. 1644 * 1645 * There isn't a socket type that's guaranteed to work. 1646 * 1647 * AF_NETLINK will work *if* you have Netlink configured into the 1648 * kernel (can it be configured out if you have any networking 1649 * support at all?) *and* if you're running a sufficiently recent 1650 * kernel, but not all the kernels we support are sufficiently 1651 * recent - that feature was introduced in Linux 4.6. 1652 * 1653 * AF_UNIX will work *if* you have UNIX-domain sockets configured 1654 * into the kernel and *if* you're not on a system that doesn't 1655 * allow them - some SELinux systems don't allow you create them. 1656 * Most systems probably have them configured in, but not all systems 1657 * have them configured in and allow them to be created. 1658 * 1659 * AF_INET will work *if* you have IPv4 configured into the kernel, 1660 * but, apparently, some systems have network adapters but have 1661 * kernels without IPv4 support. 1662 * 1663 * AF_INET6 will work *if* you have IPv6 configured into the 1664 * kernel, but if you don't have AF_INET, you might not have 1665 * AF_INET6, either (that is, independently on its own grounds). 1666 * 1667 * AF_PACKET would work, except that some of these calls should 1668 * work even if you *don't* have capture permission (you should be 1669 * able to enumerate interfaces and get information about them 1670 * without capture permission; you shouldn't get a failure until 1671 * you try pcap_activate()). (If you don't allow programs to 1672 * get as much information as possible about interfaces if you 1673 * don't have permission to capture, you run the risk of users 1674 * asking "why isn't it showing XXX" - or, worse, if you don't 1675 * show interfaces *at all* if you don't have permission to 1676 * capture on them, "why do no interfaces show up?" - when the 1677 * real problem is a permissions problem. Error reports of that 1678 * type require a lot more back-and-forth to debug, as evidenced 1679 * by many Wireshark bugs/mailing list questions/Q&A questions.) 1680 * 1681 * So: 1682 * 1683 * we first try an AF_NETLINK socket, where "try" includes 1684 * "try to do a device ioctl on it", as, in the future, once 1685 * pre-4.6 kernels are sufficiently rare, that will probably 1686 * be the mechanism most likely to work; 1687 * 1688 * if that fails, we try an AF_UNIX socket, as that's less 1689 * likely to be configured out on a networking-capable system 1690 * than is IP; 1691 * 1692 * if that fails, we try an AF_INET6 socket; 1693 * 1694 * if that fails, we try an AF_INET socket. 1695 */ 1696 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 1697 if (fd != -1) { 1698 /* 1699 * OK, let's make sure we can do an SIOCGIFNAME 1700 * ioctl. 1701 */ 1702 struct ifreq ifr; 1703 1704 memset(&ifr, 0, sizeof(ifr)); 1705 if (ioctl(fd, SIOCGIFNAME, &ifr) == 0 || 1706 errno != EOPNOTSUPP) { 1707 /* 1708 * It succeeded, or failed for some reason 1709 * other than "netlink sockets don't support 1710 * device ioctls". Go with the AF_NETLINK 1711 * socket. 1712 */ 1713 return (fd); 1714 } 1715 1716 /* 1717 * OK, that didn't work, so it's as bad as "netlink 1718 * sockets aren't available". Close the socket and 1719 * drive on. 1720 */ 1721 close(fd); 1722 } 1723 1724 /* 1725 * Now try an AF_UNIX socket. 1726 */ 1727 fd = socket(AF_UNIX, SOCK_RAW, 0); 1728 if (fd != -1) { 1729 /* 1730 * OK, we got it! 1731 */ 1732 return (fd); 1733 } 1734 1735 /* 1736 * Now try an AF_INET6 socket. 1737 */ 1738 fd = socket(AF_INET6, SOCK_DGRAM, 0); 1739 if (fd != -1) { 1740 return (fd); 1741 } 1742 1743 /* 1744 * Now try an AF_INET socket. 1745 * 1746 * XXX - if that fails, is there anything else we should try? 1747 * AF_CAN, for embedded systems in vehicles, in case they're 1748 * built without Internet protocol support? Any other socket 1749 * types popular in non-Internet embedded systems? 1750 */ 1751 return (socket(AF_INET, SOCK_DGRAM, 0)); 1752 } 1753 1754 /* 1755 * Get additional flags for a device, using SIOCGIFMEDIA. 1756 */ 1757 static int 1758 get_if_flags(const char *name, bpf_u_int32 *flags, char *errbuf) 1759 { 1760 int sock; 1761 FILE *fh; 1762 unsigned int arptype; 1763 struct ifreq ifr; 1764 struct ethtool_value info; 1765 1766 if (*flags & PCAP_IF_LOOPBACK) { 1767 /* 1768 * Loopback devices aren't wireless, and "connected"/ 1769 * "disconnected" doesn't apply to them. 1770 */ 1771 *flags |= PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE; 1772 return 0; 1773 } 1774 1775 sock = get_if_ioctl_socket(); 1776 if (sock == -1) { 1777 pcapint_fmt_errmsg_for_errno(errbuf, PCAP_ERRBUF_SIZE, errno, 1778 "Can't create socket to get ethtool information for %s", 1779 name); 1780 return -1; 1781 } 1782 1783 /* 1784 * OK, what type of network is this? 1785 * In particular, is it wired or wireless? 1786 */ 1787 if (is_wifi(name)) { 1788 /* 1789 * Wi-Fi, hence wireless. 1790 */ 1791 *flags |= PCAP_IF_WIRELESS; 1792 } else { 1793 /* 1794 * OK, what does /sys/class/net/{if_name}/type contain? 1795 * (We don't use that for Wi-Fi, as it'll report 1796 * "Ethernet", i.e. ARPHRD_ETHER, for non-monitor- 1797 * mode devices.) 1798 */ 1799 char *pathstr; 1800 1801 if (asprintf(&pathstr, "/sys/class/net/%s/type", name) == -1) { 1802 snprintf(errbuf, PCAP_ERRBUF_SIZE, 1803 "%s: Can't generate path name string for /sys/class/net device", 1804 name); 1805 close(sock); 1806 return -1; 1807 } 1808 fh = fopen(pathstr, "r"); 1809 if (fh != NULL) { 1810 if (fscanf(fh, "%u", &arptype) == 1) { 1811 /* 1812 * OK, we got an ARPHRD_ type; what is it? 1813 */ 1814 switch (arptype) { 1815 1816 case ARPHRD_LOOPBACK: 1817 /* 1818 * These are types to which 1819 * "connected" and "disconnected" 1820 * don't apply, so don't bother 1821 * asking about it. 1822 * 1823 * XXX - add other types? 1824 */ 1825 close(sock); 1826 fclose(fh); 1827 free(pathstr); 1828 return 0; 1829 1830 case ARPHRD_IRDA: 1831 case ARPHRD_IEEE80211: 1832 case ARPHRD_IEEE80211_PRISM: 1833 case ARPHRD_IEEE80211_RADIOTAP: 1834 #ifdef ARPHRD_IEEE802154 1835 case ARPHRD_IEEE802154: 1836 #endif 1837 #ifdef ARPHRD_IEEE802154_MONITOR 1838 case ARPHRD_IEEE802154_MONITOR: 1839 #endif 1840 #ifdef ARPHRD_6LOWPAN 1841 case ARPHRD_6LOWPAN: 1842 #endif 1843 /* 1844 * Various wireless types. 1845 */ 1846 *flags |= PCAP_IF_WIRELESS; 1847 break; 1848 } 1849 } 1850 fclose(fh); 1851 } 1852 free(pathstr); 1853 } 1854 1855 #ifdef ETHTOOL_GLINK 1856 memset(&ifr, 0, sizeof(ifr)); 1857 pcapint_strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); 1858 info.cmd = ETHTOOL_GLINK; 1859 /* 1860 * XXX - while Valgrind handles SIOCETHTOOL and knows that 1861 * the ETHTOOL_GLINK command sets the .data member of the 1862 * structure, Memory Sanitizer doesn't yet do so: 1863 * 1864 * https://bugs.llvm.org/show_bug.cgi?id=45814 1865 * 1866 * For now, we zero it out to squelch warnings; if the bug 1867 * in question is fixed, we can remove this. 1868 */ 1869 info.data = 0; 1870 ifr.ifr_data = (caddr_t)&info; 1871 if (ioctl(sock, SIOCETHTOOL, &ifr) == -1) { 1872 int save_errno = errno; 1873 1874 switch (save_errno) { 1875 1876 case EOPNOTSUPP: 1877 case EINVAL: 1878 /* 1879 * OK, this OS version or driver doesn't support 1880 * asking for this information. 1881 * XXX - distinguish between "this doesn't 1882 * support ethtool at all because it's not 1883 * that type of device" vs. "this doesn't 1884 * support ethtool even though it's that 1885 * type of device", and return "unknown". 1886 */ 1887 *flags |= PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE; 1888 close(sock); 1889 return 0; 1890 1891 case ENODEV: 1892 /* 1893 * OK, no such device. 1894 * The user will find that out when they try to 1895 * activate the device; just say "OK" and 1896 * don't set anything. 1897 */ 1898 close(sock); 1899 return 0; 1900 1901 default: 1902 /* 1903 * Other error. 1904 */ 1905 pcapint_fmt_errmsg_for_errno(errbuf, PCAP_ERRBUF_SIZE, 1906 save_errno, 1907 "%s: SIOCETHTOOL(ETHTOOL_GLINK) ioctl failed", 1908 name); 1909 close(sock); 1910 return -1; 1911 } 1912 } 1913 1914 /* 1915 * Is it connected? 1916 */ 1917 if (info.data) { 1918 /* 1919 * It's connected. 1920 */ 1921 *flags |= PCAP_IF_CONNECTION_STATUS_CONNECTED; 1922 } else { 1923 /* 1924 * It's disconnected. 1925 */ 1926 *flags |= PCAP_IF_CONNECTION_STATUS_DISCONNECTED; 1927 } 1928 #endif 1929 1930 close(sock); 1931 return 0; 1932 } 1933 1934 int 1935 pcapint_platform_finddevs(pcap_if_list_t *devlistp, char *errbuf) 1936 { 1937 /* 1938 * Get the list of regular interfaces first. 1939 */ 1940 if (pcapint_findalldevs_interfaces(devlistp, errbuf, can_be_bound, 1941 get_if_flags) == -1) 1942 return (-1); /* failure */ 1943 1944 /* 1945 * Add the "any" device. 1946 */ 1947 if (pcap_add_any_dev(devlistp, errbuf) == NULL) 1948 return (-1); 1949 1950 return (0); 1951 } 1952 1953 /* 1954 * Set direction flag: Which packets do we accept on a forwarding 1955 * single device? IN, OUT or both? 1956 */ 1957 static int 1958 pcap_setdirection_linux(pcap_t *handle, pcap_direction_t d) 1959 { 1960 /* 1961 * It's guaranteed, at this point, that d is a valid 1962 * direction value. 1963 */ 1964 handle->direction = d; 1965 return 0; 1966 } 1967 1968 static int 1969 is_wifi(const char *device) 1970 { 1971 char *pathstr; 1972 struct stat statb; 1973 1974 /* 1975 * See if there's a sysfs wireless directory for it. 1976 * If so, it's a wireless interface. 1977 */ 1978 if (asprintf(&pathstr, "/sys/class/net/%s/wireless", device) == -1) { 1979 /* 1980 * Just give up here. 1981 */ 1982 return 0; 1983 } 1984 if (stat(pathstr, &statb) == 0) { 1985 free(pathstr); 1986 return 1; 1987 } 1988 free(pathstr); 1989 1990 return 0; 1991 } 1992 1993 /* 1994 * Linux uses the ARP hardware type to identify the type of an 1995 * interface. pcap uses the DLT_xxx constants for this. This 1996 * function takes a pointer to a "pcap_t", and an ARPHRD_xxx 1997 * constant, as arguments, and sets "handle->linktype" to the 1998 * appropriate DLT_XXX constant and sets "handle->offset" to 1999 * the appropriate value (to make "handle->offset" plus link-layer 2000 * header length be a multiple of 4, so that the link-layer payload 2001 * will be aligned on a 4-byte boundary when capturing packets). 2002 * (If the offset isn't set here, it'll be 0; add code as appropriate 2003 * for cases where it shouldn't be 0.) 2004 * 2005 * If "cooked_ok" is non-zero, we can use DLT_LINUX_SLL and capture 2006 * in cooked mode; otherwise, we can't use cooked mode, so we have 2007 * to pick some type that works in raw mode, or fail. 2008 * 2009 * Sets the link type to -1 if unable to map the type. 2010 * 2011 * Returns 0 on success or a PCAP_ERROR_ value on error. 2012 */ 2013 static int map_arphrd_to_dlt(pcap_t *handle, int arptype, 2014 const char *device, int cooked_ok) 2015 { 2016 static const char cdma_rmnet[] = "cdma_rmnet"; 2017 2018 switch (arptype) { 2019 2020 case ARPHRD_ETHER: 2021 /* 2022 * For various annoying reasons having to do with DHCP 2023 * software, some versions of Android give the mobile- 2024 * phone-network interface an ARPHRD_ value of 2025 * ARPHRD_ETHER, even though the packets supplied by 2026 * that interface have no link-layer header, and begin 2027 * with an IP header, so that the ARPHRD_ value should 2028 * be ARPHRD_NONE. 2029 * 2030 * Detect those devices by checking the device name, and 2031 * use DLT_RAW for them. 2032 */ 2033 if (strncmp(device, cdma_rmnet, sizeof cdma_rmnet - 1) == 0) { 2034 handle->linktype = DLT_RAW; 2035 return 0; 2036 } 2037 2038 /* 2039 * Is this a real Ethernet device? If so, give it a 2040 * link-layer-type list with DLT_EN10MB and DLT_DOCSIS, so 2041 * that an application can let you choose it, in case you're 2042 * capturing DOCSIS traffic that a Cisco Cable Modem 2043 * Termination System is putting out onto an Ethernet (it 2044 * doesn't put an Ethernet header onto the wire, it puts raw 2045 * DOCSIS frames out on the wire inside the low-level 2046 * Ethernet framing). 2047 * 2048 * XXX - are there any other sorts of "fake Ethernet" that 2049 * have ARPHRD_ETHER but that shouldn't offer DLT_DOCSIS as 2050 * a Cisco CMTS won't put traffic onto it or get traffic 2051 * bridged onto it? ISDN is handled in "setup_socket()", 2052 * as we fall back on cooked mode there, and we use 2053 * is_wifi() to check for 802.11 devices; are there any 2054 * others? 2055 */ 2056 if (!is_wifi(device)) { 2057 int ret; 2058 2059 /* 2060 * This is not a Wi-Fi device but it could be 2061 * a DSA master/management network device. 2062 */ 2063 ret = iface_dsa_get_proto_info(device, handle); 2064 if (ret < 0) 2065 return ret; 2066 2067 if (ret == 1) { 2068 /* 2069 * This is a DSA master/management network 2070 * device, linktype is already set by 2071 * iface_dsa_get_proto_info(), set an 2072 * appropriate offset here. 2073 */ 2074 handle->offset = 2; 2075 break; 2076 } 2077 2078 /* 2079 * It's not a Wi-Fi device; offer DOCSIS. 2080 */ 2081 handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2); 2082 if (handle->dlt_list == NULL) { 2083 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2084 PCAP_ERRBUF_SIZE, errno, "malloc"); 2085 return (PCAP_ERROR); 2086 } 2087 handle->dlt_list[0] = DLT_EN10MB; 2088 handle->dlt_list[1] = DLT_DOCSIS; 2089 handle->dlt_count = 2; 2090 } 2091 /* FALLTHROUGH */ 2092 2093 case ARPHRD_METRICOM: 2094 case ARPHRD_LOOPBACK: 2095 handle->linktype = DLT_EN10MB; 2096 handle->offset = 2; 2097 break; 2098 2099 case ARPHRD_EETHER: 2100 handle->linktype = DLT_EN3MB; 2101 break; 2102 2103 case ARPHRD_AX25: 2104 handle->linktype = DLT_AX25_KISS; 2105 break; 2106 2107 case ARPHRD_PRONET: 2108 handle->linktype = DLT_PRONET; 2109 break; 2110 2111 case ARPHRD_CHAOS: 2112 handle->linktype = DLT_CHAOS; 2113 break; 2114 #ifndef ARPHRD_CAN 2115 #define ARPHRD_CAN 280 2116 #endif 2117 case ARPHRD_CAN: 2118 handle->linktype = DLT_CAN_SOCKETCAN; 2119 break; 2120 2121 #ifndef ARPHRD_IEEE802_TR 2122 #define ARPHRD_IEEE802_TR 800 /* From Linux 2.4 */ 2123 #endif 2124 case ARPHRD_IEEE802_TR: 2125 case ARPHRD_IEEE802: 2126 handle->linktype = DLT_IEEE802; 2127 handle->offset = 2; 2128 break; 2129 2130 case ARPHRD_ARCNET: 2131 handle->linktype = DLT_ARCNET_LINUX; 2132 break; 2133 2134 #ifndef ARPHRD_FDDI /* From Linux 2.2.13 */ 2135 #define ARPHRD_FDDI 774 2136 #endif 2137 case ARPHRD_FDDI: 2138 handle->linktype = DLT_FDDI; 2139 handle->offset = 3; 2140 break; 2141 2142 #ifndef ARPHRD_ATM /* FIXME: How to #include this? */ 2143 #define ARPHRD_ATM 19 2144 #endif 2145 case ARPHRD_ATM: 2146 /* 2147 * The Classical IP implementation in ATM for Linux 2148 * supports both what RFC 1483 calls "LLC Encapsulation", 2149 * in which each packet has an LLC header, possibly 2150 * with a SNAP header as well, prepended to it, and 2151 * what RFC 1483 calls "VC Based Multiplexing", in which 2152 * different virtual circuits carry different network 2153 * layer protocols, and no header is prepended to packets. 2154 * 2155 * They both have an ARPHRD_ type of ARPHRD_ATM, so 2156 * you can't use the ARPHRD_ type to find out whether 2157 * captured packets will have an LLC header, and, 2158 * while there's a socket ioctl to *set* the encapsulation 2159 * type, there's no ioctl to *get* the encapsulation type. 2160 * 2161 * This means that 2162 * 2163 * programs that dissect Linux Classical IP frames 2164 * would have to check for an LLC header and, 2165 * depending on whether they see one or not, dissect 2166 * the frame as LLC-encapsulated or as raw IP (I 2167 * don't know whether there's any traffic other than 2168 * IP that would show up on the socket, or whether 2169 * there's any support for IPv6 in the Linux 2170 * Classical IP code); 2171 * 2172 * filter expressions would have to compile into 2173 * code that checks for an LLC header and does 2174 * the right thing. 2175 * 2176 * Both of those are a nuisance - and, at least on systems 2177 * that support PF_PACKET sockets, we don't have to put 2178 * up with those nuisances; instead, we can just capture 2179 * in cooked mode. That's what we'll do, if we can. 2180 * Otherwise, we'll just fail. 2181 */ 2182 if (cooked_ok) 2183 handle->linktype = DLT_LINUX_SLL; 2184 else 2185 handle->linktype = -1; 2186 break; 2187 2188 #ifndef ARPHRD_IEEE80211 /* From Linux 2.4.6 */ 2189 #define ARPHRD_IEEE80211 801 2190 #endif 2191 case ARPHRD_IEEE80211: 2192 handle->linktype = DLT_IEEE802_11; 2193 break; 2194 2195 #ifndef ARPHRD_IEEE80211_PRISM /* From Linux 2.4.18 */ 2196 #define ARPHRD_IEEE80211_PRISM 802 2197 #endif 2198 case ARPHRD_IEEE80211_PRISM: 2199 handle->linktype = DLT_PRISM_HEADER; 2200 break; 2201 2202 #ifndef ARPHRD_IEEE80211_RADIOTAP /* new */ 2203 #define ARPHRD_IEEE80211_RADIOTAP 803 2204 #endif 2205 case ARPHRD_IEEE80211_RADIOTAP: 2206 handle->linktype = DLT_IEEE802_11_RADIO; 2207 break; 2208 2209 case ARPHRD_PPP: 2210 /* 2211 * Some PPP code in the kernel supplies no link-layer 2212 * header whatsoever to PF_PACKET sockets; other PPP 2213 * code supplies PPP link-layer headers ("syncppp.c"); 2214 * some PPP code might supply random link-layer 2215 * headers (PPP over ISDN - there's code in Ethereal, 2216 * for example, to cope with PPP-over-ISDN captures 2217 * with which the Ethereal developers have had to cope, 2218 * heuristically trying to determine which of the 2219 * oddball link-layer headers particular packets have). 2220 * 2221 * As such, we just punt, and run all PPP interfaces 2222 * in cooked mode, if we can; otherwise, we just treat 2223 * it as DLT_RAW, for now - if somebody needs to capture, 2224 * on a 2.0[.x] kernel, on PPP devices that supply a 2225 * link-layer header, they'll have to add code here to 2226 * map to the appropriate DLT_ type (possibly adding a 2227 * new DLT_ type, if necessary). 2228 */ 2229 if (cooked_ok) 2230 handle->linktype = DLT_LINUX_SLL; 2231 else { 2232 /* 2233 * XXX - handle ISDN types here? We can't fall 2234 * back on cooked sockets, so we'd have to 2235 * figure out from the device name what type of 2236 * link-layer encapsulation it's using, and map 2237 * that to an appropriate DLT_ value, meaning 2238 * we'd map "isdnN" devices to DLT_RAW (they 2239 * supply raw IP packets with no link-layer 2240 * header) and "isdY" devices to a new DLT_I4L_IP 2241 * type that has only an Ethernet packet type as 2242 * a link-layer header. 2243 * 2244 * But sometimes we seem to get random crap 2245 * in the link-layer header when capturing on 2246 * ISDN devices.... 2247 */ 2248 handle->linktype = DLT_RAW; 2249 } 2250 break; 2251 2252 #ifndef ARPHRD_CISCO 2253 #define ARPHRD_CISCO 513 /* previously ARPHRD_HDLC */ 2254 #endif 2255 case ARPHRD_CISCO: 2256 handle->linktype = DLT_C_HDLC; 2257 break; 2258 2259 /* Not sure if this is correct for all tunnels, but it 2260 * works for CIPE */ 2261 case ARPHRD_TUNNEL: 2262 #ifndef ARPHRD_SIT 2263 #define ARPHRD_SIT 776 /* From Linux 2.2.13 */ 2264 #endif 2265 case ARPHRD_SIT: 2266 case ARPHRD_CSLIP: 2267 case ARPHRD_SLIP6: 2268 case ARPHRD_CSLIP6: 2269 case ARPHRD_ADAPT: 2270 case ARPHRD_SLIP: 2271 #ifndef ARPHRD_RAWHDLC 2272 #define ARPHRD_RAWHDLC 518 2273 #endif 2274 case ARPHRD_RAWHDLC: 2275 #ifndef ARPHRD_DLCI 2276 #define ARPHRD_DLCI 15 2277 #endif 2278 case ARPHRD_DLCI: 2279 /* 2280 * XXX - should some of those be mapped to DLT_LINUX_SLL 2281 * instead? Should we just map all of them to DLT_LINUX_SLL? 2282 */ 2283 handle->linktype = DLT_RAW; 2284 break; 2285 2286 #ifndef ARPHRD_FRAD 2287 #define ARPHRD_FRAD 770 2288 #endif 2289 case ARPHRD_FRAD: 2290 handle->linktype = DLT_FRELAY; 2291 break; 2292 2293 case ARPHRD_LOCALTLK: 2294 handle->linktype = DLT_LTALK; 2295 break; 2296 2297 case 18: 2298 /* 2299 * RFC 4338 defines an encapsulation for IP and ARP 2300 * packets that's compatible with the RFC 2625 2301 * encapsulation, but that uses a different ARP 2302 * hardware type and hardware addresses. That 2303 * ARP hardware type is 18; Linux doesn't define 2304 * any ARPHRD_ value as 18, but if it ever officially 2305 * supports RFC 4338-style IP-over-FC, it should define 2306 * one. 2307 * 2308 * For now, we map it to DLT_IP_OVER_FC, in the hopes 2309 * that this will encourage its use in the future, 2310 * should Linux ever officially support RFC 4338-style 2311 * IP-over-FC. 2312 */ 2313 handle->linktype = DLT_IP_OVER_FC; 2314 break; 2315 2316 #ifndef ARPHRD_FCPP 2317 #define ARPHRD_FCPP 784 2318 #endif 2319 case ARPHRD_FCPP: 2320 #ifndef ARPHRD_FCAL 2321 #define ARPHRD_FCAL 785 2322 #endif 2323 case ARPHRD_FCAL: 2324 #ifndef ARPHRD_FCPL 2325 #define ARPHRD_FCPL 786 2326 #endif 2327 case ARPHRD_FCPL: 2328 #ifndef ARPHRD_FCFABRIC 2329 #define ARPHRD_FCFABRIC 787 2330 #endif 2331 case ARPHRD_FCFABRIC: 2332 /* 2333 * Back in 2002, Donald Lee at Cray wanted a DLT_ for 2334 * IP-over-FC: 2335 * 2336 * https://www.mail-archive.com/tcpdump-workers@sandelman.ottawa.on.ca/msg01043.html 2337 * 2338 * and one was assigned. 2339 * 2340 * In a later private discussion (spun off from a message 2341 * on the ethereal-users list) on how to get that DLT_ 2342 * value in libpcap on Linux, I ended up deciding that 2343 * the best thing to do would be to have him tweak the 2344 * driver to set the ARPHRD_ value to some ARPHRD_FCxx 2345 * type, and map all those types to DLT_IP_OVER_FC: 2346 * 2347 * I've checked into the libpcap and tcpdump CVS tree 2348 * support for DLT_IP_OVER_FC. In order to use that, 2349 * you'd have to modify your modified driver to return 2350 * one of the ARPHRD_FCxxx types, in "fcLINUXfcp.c" - 2351 * change it to set "dev->type" to ARPHRD_FCFABRIC, for 2352 * example (the exact value doesn't matter, it can be 2353 * any of ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, or 2354 * ARPHRD_FCFABRIC). 2355 * 2356 * 11 years later, Christian Svensson wanted to map 2357 * various ARPHRD_ values to DLT_FC_2 and 2358 * DLT_FC_2_WITH_FRAME_DELIMS for raw Fibre Channel 2359 * frames: 2360 * 2361 * https://github.com/mcr/libpcap/pull/29 2362 * 2363 * There don't seem to be any network drivers that uses 2364 * any of the ARPHRD_FC* values for IP-over-FC, and 2365 * it's not exactly clear what the "Dummy types for non 2366 * ARP hardware" are supposed to mean (link-layer 2367 * header type? Physical network type?), so it's 2368 * not exactly clear why the ARPHRD_FC* types exist 2369 * in the first place. 2370 * 2371 * For now, we map them to DLT_FC_2, and provide an 2372 * option of DLT_FC_2_WITH_FRAME_DELIMS, as well as 2373 * DLT_IP_OVER_FC just in case there's some old 2374 * driver out there that uses one of those types for 2375 * IP-over-FC on which somebody wants to capture 2376 * packets. 2377 */ 2378 handle->linktype = DLT_FC_2; 2379 handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 3); 2380 if (handle->dlt_list == NULL) { 2381 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2382 PCAP_ERRBUF_SIZE, errno, "malloc"); 2383 return (PCAP_ERROR); 2384 } 2385 handle->dlt_list[0] = DLT_FC_2; 2386 handle->dlt_list[1] = DLT_FC_2_WITH_FRAME_DELIMS; 2387 handle->dlt_list[2] = DLT_IP_OVER_FC; 2388 handle->dlt_count = 3; 2389 break; 2390 2391 #ifndef ARPHRD_IRDA 2392 #define ARPHRD_IRDA 783 2393 #endif 2394 case ARPHRD_IRDA: 2395 /* Don't expect IP packet out of this interfaces... */ 2396 handle->linktype = DLT_LINUX_IRDA; 2397 /* We need to save packet direction for IrDA decoding, 2398 * so let's use "Linux-cooked" mode. Jean II 2399 * 2400 * XXX - this is handled in setup_socket(). */ 2401 /* handlep->cooked = 1; */ 2402 break; 2403 2404 /* ARPHRD_LAPD is unofficial and randomly allocated, if reallocation 2405 * is needed, please report it to <daniele (at) orlandi.com> */ 2406 #ifndef ARPHRD_LAPD 2407 #define ARPHRD_LAPD 8445 2408 #endif 2409 case ARPHRD_LAPD: 2410 /* Don't expect IP packet out of this interfaces... */ 2411 handle->linktype = DLT_LINUX_LAPD; 2412 break; 2413 2414 #ifndef ARPHRD_NONE 2415 #define ARPHRD_NONE 0xFFFE 2416 #endif 2417 case ARPHRD_NONE: 2418 /* 2419 * No link-layer header; packets are just IP 2420 * packets, so use DLT_RAW. 2421 */ 2422 handle->linktype = DLT_RAW; 2423 break; 2424 2425 #ifndef ARPHRD_IEEE802154 2426 #define ARPHRD_IEEE802154 804 2427 #endif 2428 case ARPHRD_IEEE802154: 2429 handle->linktype = DLT_IEEE802_15_4_NOFCS; 2430 break; 2431 2432 #ifndef ARPHRD_NETLINK 2433 #define ARPHRD_NETLINK 824 2434 #endif 2435 case ARPHRD_NETLINK: 2436 handle->linktype = DLT_NETLINK; 2437 /* 2438 * We need to use cooked mode, so that in sll_protocol we 2439 * pick up the netlink protocol type such as NETLINK_ROUTE, 2440 * NETLINK_GENERIC, NETLINK_FIB_LOOKUP, etc. 2441 * 2442 * XXX - this is handled in setup_socket(). 2443 */ 2444 /* handlep->cooked = 1; */ 2445 break; 2446 2447 #ifndef ARPHRD_VSOCKMON 2448 #define ARPHRD_VSOCKMON 826 2449 #endif 2450 case ARPHRD_VSOCKMON: 2451 handle->linktype = DLT_VSOCK; 2452 break; 2453 2454 default: 2455 handle->linktype = -1; 2456 break; 2457 } 2458 return (0); 2459 } 2460 2461 /* 2462 * Try to set up a PF_PACKET socket. 2463 * Returns 0 or a PCAP_WARNING_ value on success and a PCAP_ERROR_ value 2464 * on failure. 2465 */ 2466 static int 2467 setup_socket(pcap_t *handle, int is_any_device) 2468 { 2469 struct pcap_linux *handlep = handle->priv; 2470 const char *device = handle->opt.device; 2471 int status = 0; 2472 int sock_fd, arptype; 2473 int val; 2474 int err = 0; 2475 struct packet_mreq mr; 2476 #if defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT) 2477 int bpf_extensions; 2478 socklen_t len = sizeof(bpf_extensions); 2479 #endif 2480 2481 /* 2482 * Open a socket with protocol family packet. If cooked is true, 2483 * we open a SOCK_DGRAM socket for the cooked interface, otherwise 2484 * we open a SOCK_RAW socket for the raw interface. 2485 * 2486 * The protocol is set to 0. This means we will receive no 2487 * packets until we "bind" the socket with a non-zero 2488 * protocol. This allows us to setup the ring buffers without 2489 * dropping any packets. 2490 */ 2491 sock_fd = is_any_device ? 2492 socket(PF_PACKET, SOCK_DGRAM, 0) : 2493 socket(PF_PACKET, SOCK_RAW, 0); 2494 2495 if (sock_fd == -1) { 2496 if (errno == EPERM || errno == EACCES) { 2497 /* 2498 * You don't have permission to open the 2499 * socket. 2500 */ 2501 status = PCAP_ERROR_PERM_DENIED; 2502 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2503 "Attempt to create packet socket failed - CAP_NET_RAW may be required"); 2504 } else if (errno == EAFNOSUPPORT) { 2505 /* 2506 * PF_PACKET sockets not supported. 2507 * Perhaps we're running on the WSL1 module 2508 * in the Windows NT kernel rather than on 2509 * a real Linux kernel. 2510 */ 2511 status = PCAP_ERROR_CAPTURE_NOTSUP; 2512 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2513 "PF_PACKET sockets not supported - is this WSL1?"); 2514 } else { 2515 /* 2516 * Other error. 2517 */ 2518 status = PCAP_ERROR; 2519 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2520 PCAP_ERRBUF_SIZE, errno, "socket"); 2521 } 2522 return status; 2523 } 2524 2525 /* 2526 * Get the interface index of the loopback device. 2527 * If the attempt fails, don't fail, just set the 2528 * "handlep->lo_ifindex" to -1. 2529 * 2530 * XXX - can there be more than one device that loops 2531 * packets back, i.e. devices other than "lo"? If so, 2532 * we'd need to find them all, and have an array of 2533 * indices for them, and check all of them in 2534 * "pcap_read_packet()". 2535 */ 2536 handlep->lo_ifindex = iface_get_id(sock_fd, "lo", handle->errbuf); 2537 2538 /* 2539 * Default value for offset to align link-layer payload 2540 * on a 4-byte boundary. 2541 */ 2542 handle->offset = 0; 2543 2544 /* 2545 * What kind of frames do we have to deal with? Fall back 2546 * to cooked mode if we have an unknown interface type 2547 * or a type we know doesn't work well in raw mode. 2548 */ 2549 if (!is_any_device) { 2550 /* Assume for now we don't need cooked mode. */ 2551 handlep->cooked = 0; 2552 2553 if (handle->opt.rfmon) { 2554 /* 2555 * We were asked to turn on monitor mode. 2556 * Do so before we get the link-layer type, 2557 * because entering monitor mode could change 2558 * the link-layer type. 2559 */ 2560 err = enter_rfmon_mode(handle, sock_fd, device); 2561 if (err < 0) { 2562 /* Hard failure */ 2563 close(sock_fd); 2564 return err; 2565 } 2566 if (err == 0) { 2567 /* 2568 * Nothing worked for turning monitor mode 2569 * on. 2570 */ 2571 close(sock_fd); 2572 2573 return PCAP_ERROR_RFMON_NOTSUP; 2574 } 2575 2576 /* 2577 * Either monitor mode has been turned on for 2578 * the device, or we've been given a different 2579 * device to open for monitor mode. If we've 2580 * been given a different device, use it. 2581 */ 2582 if (handlep->mondevice != NULL) 2583 device = handlep->mondevice; 2584 } 2585 arptype = iface_get_arptype(sock_fd, device, handle->errbuf); 2586 if (arptype < 0) { 2587 close(sock_fd); 2588 return arptype; 2589 } 2590 status = map_arphrd_to_dlt(handle, arptype, device, 1); 2591 if (status < 0) { 2592 close(sock_fd); 2593 return status; 2594 } 2595 if (handle->linktype == -1 || 2596 handle->linktype == DLT_LINUX_SLL || 2597 handle->linktype == DLT_LINUX_IRDA || 2598 handle->linktype == DLT_LINUX_LAPD || 2599 handle->linktype == DLT_NETLINK || 2600 (handle->linktype == DLT_EN10MB && 2601 (strncmp("isdn", device, 4) == 0 || 2602 strncmp("isdY", device, 4) == 0))) { 2603 /* 2604 * Unknown interface type (-1), or a 2605 * device we explicitly chose to run 2606 * in cooked mode (e.g., PPP devices), 2607 * or an ISDN device (whose link-layer 2608 * type we can only determine by using 2609 * APIs that may be different on different 2610 * kernels) - reopen in cooked mode. 2611 * 2612 * If the type is unknown, return a warning; 2613 * map_arphrd_to_dlt() has already set the 2614 * warning message. 2615 */ 2616 if (close(sock_fd) == -1) { 2617 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2618 PCAP_ERRBUF_SIZE, errno, "close"); 2619 return PCAP_ERROR; 2620 } 2621 sock_fd = socket(PF_PACKET, SOCK_DGRAM, 0); 2622 if (sock_fd < 0) { 2623 /* 2624 * Fatal error. We treat this as 2625 * a generic error; we already know 2626 * that we were able to open a 2627 * PF_PACKET/SOCK_RAW socket, so 2628 * any failure is a "this shouldn't 2629 * happen" case. 2630 */ 2631 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2632 PCAP_ERRBUF_SIZE, errno, "socket"); 2633 return PCAP_ERROR; 2634 } 2635 handlep->cooked = 1; 2636 2637 /* 2638 * Get rid of any link-layer type list 2639 * we allocated - this only supports cooked 2640 * capture. 2641 */ 2642 if (handle->dlt_list != NULL) { 2643 free(handle->dlt_list); 2644 handle->dlt_list = NULL; 2645 handle->dlt_count = 0; 2646 } 2647 2648 if (handle->linktype == -1) { 2649 /* 2650 * Warn that we're falling back on 2651 * cooked mode; we may want to 2652 * update "map_arphrd_to_dlt()" 2653 * to handle the new type. 2654 */ 2655 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2656 "arptype %d not " 2657 "supported by libpcap - " 2658 "falling back to cooked " 2659 "socket", 2660 arptype); 2661 status = PCAP_WARNING; 2662 } 2663 2664 /* 2665 * IrDA capture is not a real "cooked" capture, 2666 * it's IrLAP frames, not IP packets. The 2667 * same applies to LAPD capture. 2668 */ 2669 if (handle->linktype != DLT_LINUX_IRDA && 2670 handle->linktype != DLT_LINUX_LAPD && 2671 handle->linktype != DLT_NETLINK) 2672 handle->linktype = DLT_LINUX_SLL; 2673 } 2674 2675 handlep->ifindex = iface_get_id(sock_fd, device, 2676 handle->errbuf); 2677 if (handlep->ifindex == -1) { 2678 close(sock_fd); 2679 return PCAP_ERROR; 2680 } 2681 2682 if ((err = iface_bind(sock_fd, handlep->ifindex, 2683 handle->errbuf, 0)) != 0) { 2684 close(sock_fd); 2685 return err; 2686 } 2687 } else { 2688 /* 2689 * The "any" device. 2690 */ 2691 if (handle->opt.rfmon) { 2692 /* 2693 * It doesn't support monitor mode. 2694 */ 2695 close(sock_fd); 2696 return PCAP_ERROR_RFMON_NOTSUP; 2697 } 2698 2699 /* 2700 * It uses cooked mode. 2701 * Support both DLT_LINUX_SLL and DLT_LINUX_SLL2. 2702 */ 2703 handlep->cooked = 1; 2704 handle->linktype = DLT_LINUX_SLL; 2705 handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2); 2706 if (handle->dlt_list == NULL) { 2707 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2708 PCAP_ERRBUF_SIZE, errno, "malloc"); 2709 close(sock_fd); 2710 return (PCAP_ERROR); 2711 } 2712 handle->dlt_list[0] = DLT_LINUX_SLL; 2713 handle->dlt_list[1] = DLT_LINUX_SLL2; 2714 handle->dlt_count = 2; 2715 2716 /* 2717 * We're not bound to a device. 2718 * For now, we're using this as an indication 2719 * that we can't transmit; stop doing that only 2720 * if we figure out how to transmit in cooked 2721 * mode. 2722 */ 2723 handlep->ifindex = -1; 2724 } 2725 2726 /* 2727 * Select promiscuous mode on if "promisc" is set. 2728 * 2729 * Do not turn allmulti mode on if we don't select 2730 * promiscuous mode - on some devices (e.g., Orinoco 2731 * wireless interfaces), allmulti mode isn't supported 2732 * and the driver implements it by turning promiscuous 2733 * mode on, and that screws up the operation of the 2734 * card as a normal networking interface, and on no 2735 * other platform I know of does starting a non- 2736 * promiscuous capture affect which multicast packets 2737 * are received by the interface. 2738 */ 2739 2740 /* 2741 * Hmm, how can we set promiscuous mode on all interfaces? 2742 * I am not sure if that is possible at all. For now, we 2743 * silently ignore attempts to turn promiscuous mode on 2744 * for the "any" device (so you don't have to explicitly 2745 * disable it in programs such as tcpdump). 2746 */ 2747 2748 if (!is_any_device && handle->opt.promisc) { 2749 memset(&mr, 0, sizeof(mr)); 2750 mr.mr_ifindex = handlep->ifindex; 2751 mr.mr_type = PACKET_MR_PROMISC; 2752 if (setsockopt(sock_fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, 2753 &mr, sizeof(mr)) == -1) { 2754 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2755 PCAP_ERRBUF_SIZE, errno, "setsockopt (PACKET_ADD_MEMBERSHIP)"); 2756 close(sock_fd); 2757 return PCAP_ERROR; 2758 } 2759 } 2760 2761 /* 2762 * Enable auxiliary data and reserve room for reconstructing 2763 * VLAN headers. 2764 * 2765 * XXX - is enabling auxiliary data necessary, now that we 2766 * only support memory-mapped capture? The kernel's memory-mapped 2767 * capture code doesn't seem to check whether auxiliary data 2768 * is enabled, it seems to provide it whether it is or not. 2769 */ 2770 val = 1; 2771 if (setsockopt(sock_fd, SOL_PACKET, PACKET_AUXDATA, &val, 2772 sizeof(val)) == -1 && errno != ENOPROTOOPT) { 2773 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2774 errno, "setsockopt (PACKET_AUXDATA)"); 2775 close(sock_fd); 2776 return PCAP_ERROR; 2777 } 2778 handle->offset += VLAN_TAG_LEN; 2779 2780 /* 2781 * If we're in cooked mode, make the snapshot length 2782 * large enough to hold a "cooked mode" header plus 2783 * 1 byte of packet data (so we don't pass a byte 2784 * count of 0 to "recvfrom()"). 2785 * XXX - we don't know whether this will be DLT_LINUX_SLL 2786 * or DLT_LINUX_SLL2, so make sure it's big enough for 2787 * a DLT_LINUX_SLL2 "cooked mode" header; a snapshot length 2788 * that small is silly anyway. 2789 */ 2790 if (handlep->cooked) { 2791 if (handle->snapshot < SLL2_HDR_LEN + 1) 2792 handle->snapshot = SLL2_HDR_LEN + 1; 2793 } 2794 handle->bufsize = handle->snapshot; 2795 2796 /* 2797 * Set the offset at which to insert VLAN tags. 2798 */ 2799 set_vlan_offset(handle); 2800 2801 if (handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO) { 2802 int nsec_tstamps = 1; 2803 2804 if (setsockopt(sock_fd, SOL_SOCKET, SO_TIMESTAMPNS, &nsec_tstamps, sizeof(nsec_tstamps)) < 0) { 2805 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "setsockopt: unable to set SO_TIMESTAMPNS"); 2806 close(sock_fd); 2807 return PCAP_ERROR; 2808 } 2809 } 2810 2811 /* 2812 * We've succeeded. Save the socket FD in the pcap structure. 2813 */ 2814 handle->fd = sock_fd; 2815 2816 #if defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT) 2817 /* 2818 * Can we generate special code for VLAN checks? 2819 * (XXX - what if we need the special code but it's not supported 2820 * by the OS? Is that possible?) 2821 */ 2822 if (getsockopt(sock_fd, SOL_SOCKET, SO_BPF_EXTENSIONS, 2823 &bpf_extensions, &len) == 0) { 2824 if (bpf_extensions >= SKF_AD_VLAN_TAG_PRESENT) { 2825 /* 2826 * Yes, we can. Request that we do so. 2827 */ 2828 handle->bpf_codegen_flags |= BPF_SPECIAL_VLAN_HANDLING; 2829 } 2830 } 2831 #endif /* defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT) */ 2832 2833 return status; 2834 } 2835 2836 /* 2837 * Attempt to setup memory-mapped access. 2838 * 2839 * On success, returns 0 if there are no warnings or a PCAP_WARNING_ code 2840 * if there is a warning. 2841 * 2842 * On error, returns the appropriate error code; if that is PCAP_ERROR, 2843 * sets handle->errbuf to the appropriate message. 2844 */ 2845 static int 2846 setup_mmapped(pcap_t *handle) 2847 { 2848 struct pcap_linux *handlep = handle->priv; 2849 int status; 2850 2851 /* 2852 * Attempt to allocate a buffer to hold the contents of one 2853 * packet, for use by the oneshot callback. 2854 */ 2855 handlep->oneshot_buffer = malloc(handle->snapshot); 2856 if (handlep->oneshot_buffer == NULL) { 2857 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2858 errno, "can't allocate oneshot buffer"); 2859 return PCAP_ERROR; 2860 } 2861 2862 if (handle->opt.buffer_size == 0) { 2863 /* by default request 2M for the ring buffer */ 2864 handle->opt.buffer_size = 2*1024*1024; 2865 } 2866 status = prepare_tpacket_socket(handle); 2867 if (status == -1) { 2868 free(handlep->oneshot_buffer); 2869 handlep->oneshot_buffer = NULL; 2870 return PCAP_ERROR; 2871 } 2872 status = create_ring(handle); 2873 if (status < 0) { 2874 /* 2875 * Error attempting to enable memory-mapped capture; 2876 * fail. The return value is the status to return. 2877 */ 2878 free(handlep->oneshot_buffer); 2879 handlep->oneshot_buffer = NULL; 2880 return status; 2881 } 2882 2883 /* 2884 * Success. status has been set either to 0 if there are no 2885 * warnings or to a PCAP_WARNING_ value if there is a warning. 2886 * 2887 * handle->offset is used to get the current position into the rx ring. 2888 * handle->cc is used to store the ring size. 2889 */ 2890 2891 /* 2892 * Set the timeout to use in poll() before returning. 2893 */ 2894 set_poll_timeout(handlep); 2895 2896 return status; 2897 } 2898 2899 /* 2900 * Attempt to set the socket to the specified version of the memory-mapped 2901 * header. 2902 * 2903 * Return 0 if we succeed; return 1 if we fail because that version isn't 2904 * supported; return -1 on any other error, and set handle->errbuf. 2905 */ 2906 static int 2907 init_tpacket(pcap_t *handle, int version, const char *version_str) 2908 { 2909 struct pcap_linux *handlep = handle->priv; 2910 int val = version; 2911 socklen_t len = sizeof(val); 2912 2913 /* 2914 * Probe whether kernel supports the specified TPACKET version; 2915 * this also gets the length of the header for that version. 2916 * 2917 * This socket option was introduced in 2.6.27, which was 2918 * also the first release with TPACKET_V2 support. 2919 */ 2920 if (getsockopt(handle->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) { 2921 if (errno == EINVAL) { 2922 /* 2923 * EINVAL means this specific version of TPACKET 2924 * is not supported. Tell the caller they can try 2925 * with a different one; if they've run out of 2926 * others to try, let them set the error message 2927 * appropriately. 2928 */ 2929 return 1; 2930 } 2931 2932 /* 2933 * All other errors are fatal. 2934 */ 2935 if (errno == ENOPROTOOPT) { 2936 /* 2937 * PACKET_HDRLEN isn't supported, which means 2938 * that memory-mapped capture isn't supported. 2939 * Indicate that in the message. 2940 */ 2941 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2942 "Kernel doesn't support memory-mapped capture; a 2.6.27 or later 2.x kernel is required, with CONFIG_PACKET_MMAP specified for 2.x kernels"); 2943 } else { 2944 /* 2945 * Some unexpected error. 2946 */ 2947 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2948 errno, "can't get %s header len on packet socket", 2949 version_str); 2950 } 2951 return -1; 2952 } 2953 handlep->tp_hdrlen = val; 2954 2955 val = version; 2956 if (setsockopt(handle->fd, SOL_PACKET, PACKET_VERSION, &val, 2957 sizeof(val)) < 0) { 2958 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2959 errno, "can't activate %s on packet socket", version_str); 2960 return -1; 2961 } 2962 handlep->tp_version = version; 2963 2964 return 0; 2965 } 2966 2967 /* 2968 * Attempt to set the socket to version 3 of the memory-mapped header and, 2969 * if that fails because version 3 isn't supported, attempt to fall 2970 * back to version 2. If version 2 isn't supported, just fail. 2971 * 2972 * Return 0 if we succeed and -1 on any other error, and set handle->errbuf. 2973 */ 2974 static int 2975 prepare_tpacket_socket(pcap_t *handle) 2976 { 2977 int ret; 2978 2979 #ifdef HAVE_TPACKET3 2980 /* 2981 * Try setting the version to TPACKET_V3. 2982 * 2983 * The only mode in which buffering is done on PF_PACKET 2984 * sockets, so that packets might not be delivered 2985 * immediately, is TPACKET_V3 mode. 2986 * 2987 * The buffering cannot be disabled in that mode, so 2988 * if the user has requested immediate mode, we don't 2989 * use TPACKET_V3. 2990 */ 2991 if (!handle->opt.immediate) { 2992 ret = init_tpacket(handle, TPACKET_V3, "TPACKET_V3"); 2993 if (ret == 0) { 2994 /* 2995 * Success. 2996 */ 2997 return 0; 2998 } 2999 if (ret == -1) { 3000 /* 3001 * We failed for some reason other than "the 3002 * kernel doesn't support TPACKET_V3". 3003 */ 3004 return -1; 3005 } 3006 3007 /* 3008 * This means it returned 1, which means "the kernel 3009 * doesn't support TPACKET_V3"; try TPACKET_V2. 3010 */ 3011 } 3012 #endif /* HAVE_TPACKET3 */ 3013 3014 /* 3015 * Try setting the version to TPACKET_V2. 3016 */ 3017 ret = init_tpacket(handle, TPACKET_V2, "TPACKET_V2"); 3018 if (ret == 0) { 3019 /* 3020 * Success. 3021 */ 3022 return 0; 3023 } 3024 3025 if (ret == 1) { 3026 /* 3027 * OK, the kernel supports memory-mapped capture, but 3028 * not TPACKET_V2. Set the error message appropriately. 3029 */ 3030 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3031 "Kernel doesn't support TPACKET_V2; a 2.6.27 or later kernel is required"); 3032 } 3033 3034 /* 3035 * We failed. 3036 */ 3037 return -1; 3038 } 3039 3040 #define MAX(a,b) ((a)>(b)?(a):(b)) 3041 3042 /* 3043 * Attempt to set up memory-mapped access. 3044 * 3045 * On success, returns 0 if there are no warnings or to a PCAP_WARNING_ code 3046 * if there is a warning. 3047 * 3048 * On error, returns the appropriate error code; if that is PCAP_ERROR, 3049 * sets handle->errbuf to the appropriate message. 3050 */ 3051 static int 3052 create_ring(pcap_t *handle) 3053 { 3054 struct pcap_linux *handlep = handle->priv; 3055 unsigned i, j, frames_per_block; 3056 #ifdef HAVE_TPACKET3 3057 /* 3058 * For sockets using TPACKET_V2, the extra stuff at the end of a 3059 * struct tpacket_req3 will be ignored, so this is OK even for 3060 * those sockets. 3061 */ 3062 struct tpacket_req3 req; 3063 #else 3064 struct tpacket_req req; 3065 #endif 3066 socklen_t len; 3067 unsigned int sk_type, tp_reserve, maclen, tp_hdrlen, netoff, macoff; 3068 unsigned int frame_size; 3069 int status; 3070 3071 /* 3072 * Start out assuming no warnings. 3073 */ 3074 status = 0; 3075 3076 /* 3077 * Reserve space for VLAN tag reconstruction. 3078 */ 3079 tp_reserve = VLAN_TAG_LEN; 3080 3081 /* 3082 * If we're capturing in cooked mode, reserve space for 3083 * a DLT_LINUX_SLL2 header; we don't know yet whether 3084 * we'll be using DLT_LINUX_SLL or DLT_LINUX_SLL2, as 3085 * that can be changed on an open device, so we reserve 3086 * space for the larger of the two. 3087 * 3088 * XXX - we assume that the kernel is still adding 3089 * 16 bytes of extra space, so we subtract 16 from 3090 * SLL2_HDR_LEN to get the additional space needed. 3091 * (Are they doing that for DLT_LINUX_SLL, the link- 3092 * layer header for which is 16 bytes?) 3093 * 3094 * XXX - should we use TPACKET_ALIGN(SLL2_HDR_LEN - 16)? 3095 */ 3096 if (handlep->cooked) 3097 tp_reserve += SLL2_HDR_LEN - 16; 3098 3099 /* 3100 * Try to request that amount of reserve space. 3101 * This must be done before creating the ring buffer. 3102 */ 3103 len = sizeof(tp_reserve); 3104 if (setsockopt(handle->fd, SOL_PACKET, PACKET_RESERVE, 3105 &tp_reserve, len) < 0) { 3106 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3107 PCAP_ERRBUF_SIZE, errno, 3108 "setsockopt (PACKET_RESERVE)"); 3109 return PCAP_ERROR; 3110 } 3111 3112 switch (handlep->tp_version) { 3113 3114 case TPACKET_V2: 3115 /* Note that with large snapshot length (say 256K, which is 3116 * the default for recent versions of tcpdump, Wireshark, 3117 * TShark, dumpcap or 64K, the value that "-s 0" has given for 3118 * a long time with tcpdump), if we use the snapshot 3119 * length to calculate the frame length, only a few frames 3120 * will be available in the ring even with pretty 3121 * large ring size (and a lot of memory will be unused). 3122 * 3123 * Ideally, we should choose a frame length based on the 3124 * minimum of the specified snapshot length and the maximum 3125 * packet size. That's not as easy as it sounds; consider, 3126 * for example, an 802.11 interface in monitor mode, where 3127 * the frame would include a radiotap header, where the 3128 * maximum radiotap header length is device-dependent. 3129 * 3130 * So, for now, we just do this for Ethernet devices, where 3131 * there's no metadata header, and the link-layer header is 3132 * fixed length. We can get the maximum packet size by 3133 * adding 18, the Ethernet header length plus the CRC length 3134 * (just in case we happen to get the CRC in the packet), to 3135 * the MTU of the interface; we fetch the MTU in the hopes 3136 * that it reflects support for jumbo frames. (Even if the 3137 * interface is just being used for passive snooping, the 3138 * driver might set the size of buffers in the receive ring 3139 * based on the MTU, so that the MTU limits the maximum size 3140 * of packets that we can receive.) 3141 * 3142 * If segmentation/fragmentation or receive offload are 3143 * enabled, we can get reassembled/aggregated packets larger 3144 * than MTU, but bounded to 65535 plus the Ethernet overhead, 3145 * due to kernel and protocol constraints */ 3146 frame_size = handle->snapshot; 3147 if (handle->linktype == DLT_EN10MB) { 3148 unsigned int max_frame_len; 3149 int mtu; 3150 int offload; 3151 3152 mtu = iface_get_mtu(handle->fd, handle->opt.device, 3153 handle->errbuf); 3154 if (mtu == -1) 3155 return PCAP_ERROR; 3156 offload = iface_get_offload(handle); 3157 if (offload == -1) 3158 return PCAP_ERROR; 3159 if (offload) 3160 max_frame_len = MAX(mtu, 65535); 3161 else 3162 max_frame_len = mtu; 3163 max_frame_len += 18; 3164 3165 if (frame_size > max_frame_len) 3166 frame_size = max_frame_len; 3167 } 3168 3169 /* NOTE: calculus matching those in tpacket_rcv() 3170 * in linux-2.6/net/packet/af_packet.c 3171 */ 3172 len = sizeof(sk_type); 3173 if (getsockopt(handle->fd, SOL_SOCKET, SO_TYPE, &sk_type, 3174 &len) < 0) { 3175 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3176 PCAP_ERRBUF_SIZE, errno, "getsockopt (SO_TYPE)"); 3177 return PCAP_ERROR; 3178 } 3179 maclen = (sk_type == SOCK_DGRAM) ? 0 : MAX_LINKHEADER_SIZE; 3180 /* XXX: in the kernel maclen is calculated from 3181 * LL_ALLOCATED_SPACE(dev) and vnet_hdr.hdr_len 3182 * in: packet_snd() in linux-2.6/net/packet/af_packet.c 3183 * then packet_alloc_skb() in linux-2.6/net/packet/af_packet.c 3184 * then sock_alloc_send_pskb() in linux-2.6/net/core/sock.c 3185 * but I see no way to get those sizes in userspace, 3186 * like for instance with an ifreq ioctl(); 3187 * the best thing I've found so far is MAX_HEADER in 3188 * the kernel part of linux-2.6/include/linux/netdevice.h 3189 * which goes up to 128+48=176; since pcap-linux.c 3190 * defines a MAX_LINKHEADER_SIZE of 256 which is 3191 * greater than that, let's use it.. maybe is it even 3192 * large enough to directly replace macoff.. 3193 */ 3194 tp_hdrlen = TPACKET_ALIGN(handlep->tp_hdrlen) + sizeof(struct sockaddr_ll) ; 3195 netoff = TPACKET_ALIGN(tp_hdrlen + (maclen < 16 ? 16 : maclen)) + tp_reserve; 3196 /* NOTE: AFAICS tp_reserve may break the TPACKET_ALIGN 3197 * of netoff, which contradicts 3198 * linux-2.6/Documentation/networking/packet_mmap.txt 3199 * documenting that: 3200 * "- Gap, chosen so that packet data (Start+tp_net) 3201 * aligns to TPACKET_ALIGNMENT=16" 3202 */ 3203 /* NOTE: in linux-2.6/include/linux/skbuff.h: 3204 * "CPUs often take a performance hit 3205 * when accessing unaligned memory locations" 3206 */ 3207 macoff = netoff - maclen; 3208 req.tp_frame_size = TPACKET_ALIGN(macoff + frame_size); 3209 /* 3210 * Round the buffer size up to a multiple of the 3211 * frame size (rather than rounding down, which 3212 * would give a buffer smaller than our caller asked 3213 * for, and possibly give zero frames if the requested 3214 * buffer size is too small for one frame). 3215 */ 3216 req.tp_frame_nr = (handle->opt.buffer_size + req.tp_frame_size - 1)/req.tp_frame_size; 3217 break; 3218 3219 #ifdef HAVE_TPACKET3 3220 case TPACKET_V3: 3221 /* The "frames" for this are actually buffers that 3222 * contain multiple variable-sized frames. 3223 * 3224 * We pick a "frame" size of MAXIMUM_SNAPLEN to leave 3225 * enough room for at least one reasonably-sized packet 3226 * in the "frame". */ 3227 req.tp_frame_size = MAXIMUM_SNAPLEN; 3228 /* 3229 * Round the buffer size up to a multiple of the 3230 * "frame" size (rather than rounding down, which 3231 * would give a buffer smaller than our caller asked 3232 * for, and possibly give zero "frames" if the requested 3233 * buffer size is too small for one "frame"). 3234 */ 3235 req.tp_frame_nr = (handle->opt.buffer_size + req.tp_frame_size - 1)/req.tp_frame_size; 3236 break; 3237 #endif 3238 default: 3239 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3240 "Internal error: unknown TPACKET_ value %u", 3241 handlep->tp_version); 3242 return PCAP_ERROR; 3243 } 3244 3245 /* compute the minimum block size that will handle this frame. 3246 * The block has to be page size aligned. 3247 * The max block size allowed by the kernel is arch-dependent and 3248 * it's not explicitly checked here. */ 3249 req.tp_block_size = getpagesize(); 3250 while (req.tp_block_size < req.tp_frame_size) 3251 req.tp_block_size <<= 1; 3252 3253 frames_per_block = req.tp_block_size/req.tp_frame_size; 3254 3255 /* 3256 * PACKET_TIMESTAMP was added after linux/net_tstamp.h was, 3257 * so we check for PACKET_TIMESTAMP. We check for 3258 * linux/net_tstamp.h just in case a system somehow has 3259 * PACKET_TIMESTAMP but not linux/net_tstamp.h; that might 3260 * be unnecessary. 3261 * 3262 * SIOCSHWTSTAMP was introduced in the patch that introduced 3263 * linux/net_tstamp.h, so we don't bother checking whether 3264 * SIOCSHWTSTAMP is defined (if your Linux system has 3265 * linux/net_tstamp.h but doesn't define SIOCSHWTSTAMP, your 3266 * Linux system is badly broken). 3267 */ 3268 #if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) 3269 /* 3270 * If we were told to do so, ask the kernel and the driver 3271 * to use hardware timestamps. 3272 * 3273 * Hardware timestamps are only supported with mmapped 3274 * captures. 3275 */ 3276 if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER || 3277 handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER_UNSYNCED) { 3278 struct hwtstamp_config hwconfig; 3279 struct ifreq ifr; 3280 int timesource; 3281 3282 /* 3283 * Ask for hardware time stamps on all packets, 3284 * including transmitted packets. 3285 */ 3286 memset(&hwconfig, 0, sizeof(hwconfig)); 3287 hwconfig.tx_type = HWTSTAMP_TX_ON; 3288 hwconfig.rx_filter = HWTSTAMP_FILTER_ALL; 3289 3290 memset(&ifr, 0, sizeof(ifr)); 3291 pcapint_strlcpy(ifr.ifr_name, handle->opt.device, sizeof(ifr.ifr_name)); 3292 ifr.ifr_data = (void *)&hwconfig; 3293 3294 /* 3295 * This may require CAP_NET_ADMIN. 3296 */ 3297 if (ioctl(handle->fd, SIOCSHWTSTAMP, &ifr) < 0) { 3298 switch (errno) { 3299 3300 case EPERM: 3301 /* 3302 * Treat this as an error, as the 3303 * user should try to run this 3304 * with the appropriate privileges - 3305 * and, if they can't, shouldn't 3306 * try requesting hardware time stamps. 3307 */ 3308 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3309 "Attempt to set hardware timestamp failed - CAP_NET_ADMIN may be required"); 3310 return PCAP_ERROR_PERM_DENIED; 3311 3312 case EOPNOTSUPP: 3313 case ERANGE: 3314 /* 3315 * Treat this as a warning, as the 3316 * only way to fix the warning is to 3317 * get an adapter that supports hardware 3318 * time stamps for *all* packets. 3319 * (ERANGE means "we support hardware 3320 * time stamps, but for packets matching 3321 * that particular filter", so it means 3322 * "we don't support hardware time stamps 3323 * for all incoming packets" here.) 3324 * 3325 * We'll just fall back on the standard 3326 * host time stamps. 3327 */ 3328 status = PCAP_WARNING_TSTAMP_TYPE_NOTSUP; 3329 break; 3330 3331 default: 3332 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3333 PCAP_ERRBUF_SIZE, errno, 3334 "SIOCSHWTSTAMP failed"); 3335 return PCAP_ERROR; 3336 } 3337 } else { 3338 /* 3339 * Well, that worked. Now specify the type of 3340 * hardware time stamp we want for this 3341 * socket. 3342 */ 3343 if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER) { 3344 /* 3345 * Hardware timestamp, synchronized 3346 * with the system clock. 3347 */ 3348 timesource = SOF_TIMESTAMPING_SYS_HARDWARE; 3349 } else { 3350 /* 3351 * PCAP_TSTAMP_ADAPTER_UNSYNCED - hardware 3352 * timestamp, not synchronized with the 3353 * system clock. 3354 */ 3355 timesource = SOF_TIMESTAMPING_RAW_HARDWARE; 3356 } 3357 if (setsockopt(handle->fd, SOL_PACKET, PACKET_TIMESTAMP, 3358 (void *)×ource, sizeof(timesource))) { 3359 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3360 PCAP_ERRBUF_SIZE, errno, 3361 "can't set PACKET_TIMESTAMP"); 3362 return PCAP_ERROR; 3363 } 3364 } 3365 } 3366 #endif /* HAVE_LINUX_NET_TSTAMP_H && PACKET_TIMESTAMP */ 3367 3368 /* ask the kernel to create the ring */ 3369 retry: 3370 req.tp_block_nr = req.tp_frame_nr / frames_per_block; 3371 3372 /* req.tp_frame_nr is requested to match frames_per_block*req.tp_block_nr */ 3373 req.tp_frame_nr = req.tp_block_nr * frames_per_block; 3374 3375 #ifdef HAVE_TPACKET3 3376 /* timeout value to retire block - use the configured buffering timeout, or default if <0. */ 3377 if (handlep->timeout > 0) { 3378 /* Use the user specified timeout as the block timeout */ 3379 req.tp_retire_blk_tov = handlep->timeout; 3380 } else if (handlep->timeout == 0) { 3381 /* 3382 * In pcap, this means "infinite timeout"; TPACKET_V3 3383 * doesn't support that, so just set it to UINT_MAX 3384 * milliseconds. In the TPACKET_V3 loop, if the 3385 * timeout is 0, and we haven't yet seen any packets, 3386 * and we block and still don't have any packets, we 3387 * keep blocking until we do. 3388 */ 3389 req.tp_retire_blk_tov = UINT_MAX; 3390 } else { 3391 /* 3392 * XXX - this is not valid; use 0, meaning "have the 3393 * kernel pick a default", for now. 3394 */ 3395 req.tp_retire_blk_tov = 0; 3396 } 3397 /* private data not used */ 3398 req.tp_sizeof_priv = 0; 3399 /* Rx ring - feature request bits - none (rxhash will not be filled) */ 3400 req.tp_feature_req_word = 0; 3401 #endif 3402 3403 if (setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING, 3404 (void *) &req, sizeof(req))) { 3405 if ((errno == ENOMEM) && (req.tp_block_nr > 1)) { 3406 /* 3407 * Memory failure; try to reduce the requested ring 3408 * size. 3409 * 3410 * We used to reduce this by half -- do 5% instead. 3411 * That may result in more iterations and a longer 3412 * startup, but the user will be much happier with 3413 * the resulting buffer size. 3414 */ 3415 if (req.tp_frame_nr < 20) 3416 req.tp_frame_nr -= 1; 3417 else 3418 req.tp_frame_nr -= req.tp_frame_nr/20; 3419 goto retry; 3420 } 3421 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 3422 errno, "can't create rx ring on packet socket"); 3423 return PCAP_ERROR; 3424 } 3425 3426 /* memory map the rx ring */ 3427 handlep->mmapbuflen = req.tp_block_nr * req.tp_block_size; 3428 handlep->mmapbuf = mmap(0, handlep->mmapbuflen, 3429 PROT_READ|PROT_WRITE, MAP_SHARED, handle->fd, 0); 3430 if (handlep->mmapbuf == MAP_FAILED) { 3431 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 3432 errno, "can't mmap rx ring"); 3433 3434 /* clear the allocated ring on error*/ 3435 destroy_ring(handle); 3436 return PCAP_ERROR; 3437 } 3438 3439 /* allocate a ring for each frame header pointer*/ 3440 handle->cc = req.tp_frame_nr; 3441 handle->buffer = malloc(handle->cc * sizeof(union thdr *)); 3442 if (!handle->buffer) { 3443 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 3444 errno, "can't allocate ring of frame headers"); 3445 3446 destroy_ring(handle); 3447 return PCAP_ERROR; 3448 } 3449 3450 /* fill the header ring with proper frame ptr*/ 3451 handle->offset = 0; 3452 for (i=0; i<req.tp_block_nr; ++i) { 3453 u_char *base = &handlep->mmapbuf[i*req.tp_block_size]; 3454 for (j=0; j<frames_per_block; ++j, ++handle->offset) { 3455 RING_GET_CURRENT_FRAME(handle) = base; 3456 base += req.tp_frame_size; 3457 } 3458 } 3459 3460 handle->bufsize = req.tp_frame_size; 3461 handle->offset = 0; 3462 return status; 3463 } 3464 3465 /* free all ring related resources*/ 3466 static void 3467 destroy_ring(pcap_t *handle) 3468 { 3469 struct pcap_linux *handlep = handle->priv; 3470 3471 /* 3472 * Tell the kernel to destroy the ring. 3473 * We don't check for setsockopt failure, as 1) we can't recover 3474 * from an error and 2) we might not yet have set it up in the 3475 * first place. 3476 */ 3477 struct tpacket_req req; 3478 memset(&req, 0, sizeof(req)); 3479 (void)setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING, 3480 (void *) &req, sizeof(req)); 3481 3482 /* if ring is mapped, unmap it*/ 3483 if (handlep->mmapbuf) { 3484 /* do not test for mmap failure, as we can't recover from any error */ 3485 (void)munmap(handlep->mmapbuf, handlep->mmapbuflen); 3486 handlep->mmapbuf = NULL; 3487 } 3488 } 3489 3490 /* 3491 * Special one-shot callback, used for pcap_next() and pcap_next_ex(), 3492 * for Linux mmapped capture. 3493 * 3494 * The problem is that pcap_next() and pcap_next_ex() expect the packet 3495 * data handed to the callback to be valid after the callback returns, 3496 * but pcap_read_linux_mmap() has to release that packet as soon as 3497 * the callback returns (otherwise, the kernel thinks there's still 3498 * at least one unprocessed packet available in the ring, so a select() 3499 * will immediately return indicating that there's data to process), so, 3500 * in the callback, we have to make a copy of the packet. 3501 * 3502 * Yes, this means that, if the capture is using the ring buffer, using 3503 * pcap_next() or pcap_next_ex() requires more copies than using 3504 * pcap_loop() or pcap_dispatch(). If that bothers you, don't use 3505 * pcap_next() or pcap_next_ex(). 3506 */ 3507 static void 3508 pcapint_oneshot_linux(u_char *user, const struct pcap_pkthdr *h, 3509 const u_char *bytes) 3510 { 3511 struct oneshot_userdata *sp = (struct oneshot_userdata *)user; 3512 pcap_t *handle = sp->pd; 3513 struct pcap_linux *handlep = handle->priv; 3514 3515 *sp->hdr = *h; 3516 memcpy(handlep->oneshot_buffer, bytes, h->caplen); 3517 *sp->pkt = handlep->oneshot_buffer; 3518 } 3519 3520 static int 3521 pcap_getnonblock_linux(pcap_t *handle) 3522 { 3523 struct pcap_linux *handlep = handle->priv; 3524 3525 /* use negative value of timeout to indicate non blocking ops */ 3526 return (handlep->timeout<0); 3527 } 3528 3529 static int 3530 pcap_setnonblock_linux(pcap_t *handle, int nonblock) 3531 { 3532 struct pcap_linux *handlep = handle->priv; 3533 3534 /* 3535 * Set the file descriptor to the requested mode, as we use 3536 * it for sending packets. 3537 */ 3538 if (pcapint_setnonblock_fd(handle, nonblock) == -1) 3539 return -1; 3540 3541 /* 3542 * Map each value to their corresponding negation to 3543 * preserve the timeout value provided with pcap_set_timeout. 3544 */ 3545 if (nonblock) { 3546 /* 3547 * We're setting the mode to non-blocking mode. 3548 */ 3549 if (handlep->timeout >= 0) { 3550 /* 3551 * Indicate that we're switching to 3552 * non-blocking mode. 3553 */ 3554 handlep->timeout = ~handlep->timeout; 3555 } 3556 if (handlep->poll_breakloop_fd != -1) { 3557 /* Close the eventfd; we do not need it in nonblock mode. */ 3558 close(handlep->poll_breakloop_fd); 3559 handlep->poll_breakloop_fd = -1; 3560 } 3561 } else { 3562 /* 3563 * We're setting the mode to blocking mode. 3564 */ 3565 if (handlep->poll_breakloop_fd == -1) { 3566 /* If we did not have an eventfd, open one now that we are blocking. */ 3567 if ( ( handlep->poll_breakloop_fd = eventfd(0, EFD_NONBLOCK) ) == -1 ) { 3568 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3569 PCAP_ERRBUF_SIZE, errno, 3570 "could not open eventfd"); 3571 return -1; 3572 } 3573 } 3574 if (handlep->timeout < 0) { 3575 handlep->timeout = ~handlep->timeout; 3576 } 3577 } 3578 /* Update the timeout to use in poll(). */ 3579 set_poll_timeout(handlep); 3580 return 0; 3581 } 3582 3583 /* 3584 * Get the status field of the ring buffer frame at a specified offset. 3585 */ 3586 static inline u_int 3587 pcap_get_ring_frame_status(pcap_t *handle, int offset) 3588 { 3589 struct pcap_linux *handlep = handle->priv; 3590 union thdr h; 3591 3592 h.raw = RING_GET_FRAME_AT(handle, offset); 3593 switch (handlep->tp_version) { 3594 case TPACKET_V2: 3595 return __atomic_load_n(&h.h2->tp_status, __ATOMIC_ACQUIRE); 3596 break; 3597 #ifdef HAVE_TPACKET3 3598 case TPACKET_V3: 3599 return __atomic_load_n(&h.h3->hdr.bh1.block_status, __ATOMIC_ACQUIRE); 3600 break; 3601 #endif 3602 } 3603 /* This should not happen. */ 3604 return 0; 3605 } 3606 3607 /* 3608 * Block waiting for frames to be available. 3609 */ 3610 static int pcap_wait_for_frames_mmap(pcap_t *handle) 3611 { 3612 struct pcap_linux *handlep = handle->priv; 3613 int timeout; 3614 struct ifreq ifr; 3615 int ret; 3616 struct pollfd pollinfo[2]; 3617 int numpollinfo; 3618 pollinfo[0].fd = handle->fd; 3619 pollinfo[0].events = POLLIN; 3620 if ( handlep->poll_breakloop_fd == -1 ) { 3621 numpollinfo = 1; 3622 pollinfo[1].revents = 0; 3623 /* 3624 * We set pollinfo[1].revents to zero, even though 3625 * numpollinfo = 1 meaning that poll() doesn't see 3626 * pollinfo[1], so that we do not have to add a 3627 * conditional of numpollinfo > 1 below when we 3628 * test pollinfo[1].revents. 3629 */ 3630 } else { 3631 pollinfo[1].fd = handlep->poll_breakloop_fd; 3632 pollinfo[1].events = POLLIN; 3633 numpollinfo = 2; 3634 } 3635 3636 /* 3637 * Keep polling until we either get some packets to read, see 3638 * that we got told to break out of the loop, get a fatal error, 3639 * or discover that the device went away. 3640 * 3641 * In non-blocking mode, we must still do one poll() to catch 3642 * any pending error indications, but the poll() has a timeout 3643 * of 0, so that it doesn't block, and we quit after that one 3644 * poll(). 3645 * 3646 * If we've seen an ENETDOWN, it might be the first indication 3647 * that the device went away, or it might just be that it was 3648 * configured down. Unfortunately, there's no guarantee that 3649 * the device has actually been removed as an interface, because: 3650 * 3651 * 1) if, as appears to be the case at least some of the time, 3652 * the PF_PACKET socket code first gets a NETDEV_DOWN indication 3653 * for the device and then gets a NETDEV_UNREGISTER indication 3654 * for it, the first indication will cause a wakeup with ENETDOWN 3655 * but won't set the packet socket's field for the interface index 3656 * to -1, and the second indication won't cause a wakeup (because 3657 * the first indication also caused the protocol hook to be 3658 * unregistered) but will set the packet socket's field for the 3659 * interface index to -1; 3660 * 3661 * 2) even if just a NETDEV_UNREGISTER indication is registered, 3662 * the packet socket's field for the interface index only gets 3663 * set to -1 after the wakeup, so there's a small but non-zero 3664 * risk that a thread blocked waiting for the wakeup will get 3665 * to the "fetch the socket name" code before the interface index 3666 * gets set to -1, so it'll get the old interface index. 3667 * 3668 * Therefore, if we got an ENETDOWN and haven't seen a packet 3669 * since then, we assume that we might be waiting for the interface 3670 * to disappear, and poll with a timeout to try again in a short 3671 * period of time. If we *do* see a packet, the interface has 3672 * come back up again, and is *definitely* still there, so we 3673 * don't need to poll. 3674 */ 3675 for (;;) { 3676 /* 3677 * Yes, we do this even in non-blocking mode, as it's 3678 * the only way to get error indications from a 3679 * tpacket socket. 3680 * 3681 * The timeout is 0 in non-blocking mode, so poll() 3682 * returns immediately. 3683 */ 3684 timeout = handlep->poll_timeout; 3685 3686 /* 3687 * If we got an ENETDOWN and haven't gotten an indication 3688 * that the device has gone away or that the device is up, 3689 * we don't yet know for certain whether the device has 3690 * gone away or not, do a poll() with a 1-millisecond timeout, 3691 * as we have to poll indefinitely for "device went away" 3692 * indications until we either get one or see that the 3693 * device is up. 3694 */ 3695 if (handlep->netdown) { 3696 if (timeout != 0) 3697 timeout = 1; 3698 } 3699 ret = poll(pollinfo, numpollinfo, timeout); 3700 if (ret < 0) { 3701 /* 3702 * Error. If it's not EINTR, report it. 3703 */ 3704 if (errno != EINTR) { 3705 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3706 PCAP_ERRBUF_SIZE, errno, 3707 "can't poll on packet socket"); 3708 return PCAP_ERROR; 3709 } 3710 3711 /* 3712 * It's EINTR; if we were told to break out of 3713 * the loop, do so. 3714 */ 3715 if (handle->break_loop) { 3716 handle->break_loop = 0; 3717 return PCAP_ERROR_BREAK; 3718 } 3719 } else if (ret > 0) { 3720 /* 3721 * OK, some descriptor is ready. 3722 * Check the socket descriptor first. 3723 * 3724 * As I read the Linux man page, pollinfo[0].revents 3725 * will either be POLLIN, POLLERR, POLLHUP, or POLLNVAL. 3726 */ 3727 if (pollinfo[0].revents == POLLIN) { 3728 /* 3729 * OK, we may have packets to 3730 * read. 3731 */ 3732 break; 3733 } 3734 if (pollinfo[0].revents != 0) { 3735 /* 3736 * There's some indication other than 3737 * "you can read on this descriptor" on 3738 * the descriptor. 3739 */ 3740 if (pollinfo[0].revents & POLLNVAL) { 3741 snprintf(handle->errbuf, 3742 PCAP_ERRBUF_SIZE, 3743 "Invalid polling request on packet socket"); 3744 return PCAP_ERROR; 3745 } 3746 if (pollinfo[0].revents & (POLLHUP | POLLRDHUP)) { 3747 snprintf(handle->errbuf, 3748 PCAP_ERRBUF_SIZE, 3749 "Hangup on packet socket"); 3750 return PCAP_ERROR; 3751 } 3752 if (pollinfo[0].revents & POLLERR) { 3753 /* 3754 * Get the error. 3755 */ 3756 int err; 3757 socklen_t errlen; 3758 3759 errlen = sizeof(err); 3760 if (getsockopt(handle->fd, SOL_SOCKET, 3761 SO_ERROR, &err, &errlen) == -1) { 3762 /* 3763 * The call *itself* returned 3764 * an error; make *that* 3765 * the error. 3766 */ 3767 err = errno; 3768 } 3769 3770 /* 3771 * OK, we have the error. 3772 */ 3773 if (err == ENETDOWN) { 3774 /* 3775 * The device on which we're 3776 * capturing went away or the 3777 * interface was taken down. 3778 * 3779 * We don't know for certain 3780 * which happened, and the 3781 * next poll() may indicate 3782 * that there are packets 3783 * to be read, so just set 3784 * a flag to get us to do 3785 * checks later, and set 3786 * the required select 3787 * timeout to 1 millisecond 3788 * so that event loops that 3789 * check our socket descriptor 3790 * also time out so that 3791 * they can call us and we 3792 * can do the checks. 3793 */ 3794 handlep->netdown = 1; 3795 handle->required_select_timeout = &netdown_timeout; 3796 } else if (err == 0) { 3797 /* 3798 * This shouldn't happen, so 3799 * report a special indication 3800 * that it did. 3801 */ 3802 snprintf(handle->errbuf, 3803 PCAP_ERRBUF_SIZE, 3804 "Error condition on packet socket: Reported error was 0"); 3805 return PCAP_ERROR; 3806 } else { 3807 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3808 PCAP_ERRBUF_SIZE, 3809 err, 3810 "Error condition on packet socket"); 3811 return PCAP_ERROR; 3812 } 3813 } 3814 } 3815 /* 3816 * Now check the event device. 3817 */ 3818 if (pollinfo[1].revents & POLLIN) { 3819 ssize_t nread; 3820 uint64_t value; 3821 3822 /* 3823 * This should never fail, but, just 3824 * in case.... 3825 */ 3826 nread = read(handlep->poll_breakloop_fd, &value, 3827 sizeof(value)); 3828 if (nread == -1) { 3829 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3830 PCAP_ERRBUF_SIZE, 3831 errno, 3832 "Error reading from event FD"); 3833 return PCAP_ERROR; 3834 } 3835 3836 /* 3837 * According to the Linux read(2) man 3838 * page, read() will transfer at most 3839 * 2^31-1 bytes, so the return value is 3840 * either -1 or a value between 0 3841 * and 2^31-1, so it's non-negative. 3842 * 3843 * Cast it to size_t to squelch 3844 * warnings from the compiler; add this 3845 * comment to squelch warnings from 3846 * humans reading the code. :-) 3847 * 3848 * Don't treat an EOF as an error, but 3849 * *do* treat a short read as an error; 3850 * that "shouldn't happen", but.... 3851 */ 3852 if (nread != 0 && 3853 (size_t)nread < sizeof(value)) { 3854 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3855 "Short read from event FD: expected %zu, got %zd", 3856 sizeof(value), nread); 3857 return PCAP_ERROR; 3858 } 3859 3860 /* 3861 * This event gets signaled by a 3862 * pcap_breakloop() call; if we were told 3863 * to break out of the loop, do so. 3864 */ 3865 if (handle->break_loop) { 3866 handle->break_loop = 0; 3867 return PCAP_ERROR_BREAK; 3868 } 3869 } 3870 } 3871 3872 /* 3873 * Either: 3874 * 3875 * 1) we got neither an error from poll() nor any 3876 * readable descriptors, in which case there 3877 * are no packets waiting to read 3878 * 3879 * or 3880 * 3881 * 2) We got readable descriptors but the PF_PACKET 3882 * socket wasn't one of them, in which case there 3883 * are no packets waiting to read 3884 * 3885 * so, if we got an ENETDOWN, we've drained whatever 3886 * packets were available to read at the point of the 3887 * ENETDOWN. 3888 * 3889 * So, if we got an ENETDOWN and haven't gotten an indication 3890 * that the device has gone away or that the device is up, 3891 * we don't yet know for certain whether the device has 3892 * gone away or not, check whether the device exists and is 3893 * up. 3894 */ 3895 if (handlep->netdown) { 3896 if (!device_still_exists(handle)) { 3897 /* 3898 * The device doesn't exist any more; 3899 * report that. 3900 * 3901 * XXX - we should really return an 3902 * appropriate error for that, but 3903 * pcap_dispatch() etc. aren't documented 3904 * as having error returns other than 3905 * PCAP_ERROR or PCAP_ERROR_BREAK. 3906 */ 3907 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3908 "The interface disappeared"); 3909 return PCAP_ERROR; 3910 } 3911 3912 /* 3913 * The device still exists; try to see if it's up. 3914 */ 3915 memset(&ifr, 0, sizeof(ifr)); 3916 pcapint_strlcpy(ifr.ifr_name, handlep->device, 3917 sizeof(ifr.ifr_name)); 3918 if (ioctl(handle->fd, SIOCGIFFLAGS, &ifr) == -1) { 3919 if (errno == ENXIO || errno == ENODEV) { 3920 /* 3921 * OK, *now* it's gone. 3922 * 3923 * XXX - see above comment. 3924 */ 3925 snprintf(handle->errbuf, 3926 PCAP_ERRBUF_SIZE, 3927 "The interface disappeared"); 3928 return PCAP_ERROR; 3929 } else { 3930 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3931 PCAP_ERRBUF_SIZE, errno, 3932 "%s: Can't get flags", 3933 handlep->device); 3934 return PCAP_ERROR; 3935 } 3936 } 3937 if (ifr.ifr_flags & IFF_UP) { 3938 /* 3939 * It's up, so it definitely still exists. 3940 * Cancel the ENETDOWN indication - we 3941 * presumably got it due to the interface 3942 * going down rather than the device going 3943 * away - and revert to "no required select 3944 * timeout. 3945 */ 3946 handlep->netdown = 0; 3947 handle->required_select_timeout = NULL; 3948 } 3949 } 3950 3951 /* 3952 * If we're in non-blocking mode, just quit now, rather 3953 * than spinning in a loop doing poll()s that immediately 3954 * time out if there's no indication on any descriptor. 3955 */ 3956 if (handlep->poll_timeout == 0) 3957 break; 3958 } 3959 return 0; 3960 } 3961 3962 /* handle a single memory mapped packet */ 3963 static int pcap_handle_packet_mmap( 3964 pcap_t *handle, 3965 pcap_handler callback, 3966 u_char *user, 3967 unsigned char *frame, 3968 unsigned int tp_len, 3969 unsigned int tp_mac, 3970 unsigned int tp_snaplen, 3971 unsigned int tp_sec, 3972 unsigned int tp_usec, 3973 int tp_vlan_tci_valid, 3974 __u16 tp_vlan_tci, 3975 __u16 tp_vlan_tpid) 3976 { 3977 struct pcap_linux *handlep = handle->priv; 3978 unsigned char *bp; 3979 struct sockaddr_ll *sll; 3980 struct pcap_pkthdr pcaphdr; 3981 unsigned int snaplen = tp_snaplen; 3982 struct utsname utsname; 3983 3984 /* perform sanity check on internal offset. */ 3985 if (tp_mac + tp_snaplen > handle->bufsize) { 3986 /* 3987 * Report some system information as a debugging aid. 3988 */ 3989 if (uname(&utsname) != -1) { 3990 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3991 "corrupted frame on kernel ring mac " 3992 "offset %u + caplen %u > frame len %d " 3993 "(kernel %.32s version %s, machine %.16s)", 3994 tp_mac, tp_snaplen, handle->bufsize, 3995 utsname.release, utsname.version, 3996 utsname.machine); 3997 } else { 3998 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3999 "corrupted frame on kernel ring mac " 4000 "offset %u + caplen %u > frame len %d", 4001 tp_mac, tp_snaplen, handle->bufsize); 4002 } 4003 return -1; 4004 } 4005 4006 /* run filter on received packet 4007 * If the kernel filtering is enabled we need to run the 4008 * filter until all the frames present into the ring 4009 * at filter creation time are processed. 4010 * In this case, blocks_to_filter_in_userland is used 4011 * as a counter for the packet we need to filter. 4012 * Note: alternatively it could be possible to stop applying 4013 * the filter when the ring became empty, but it can possibly 4014 * happen a lot later... */ 4015 bp = frame + tp_mac; 4016 4017 /* if required build in place the sll header*/ 4018 sll = (void *)(frame + TPACKET_ALIGN(handlep->tp_hdrlen)); 4019 if (handlep->cooked) { 4020 if (handle->linktype == DLT_LINUX_SLL2) { 4021 struct sll2_header *hdrp; 4022 4023 /* 4024 * The kernel should have left us with enough 4025 * space for an sll header; back up the packet 4026 * data pointer into that space, as that'll be 4027 * the beginning of the packet we pass to the 4028 * callback. 4029 */ 4030 bp -= SLL2_HDR_LEN; 4031 4032 /* 4033 * Let's make sure that's past the end of 4034 * the tpacket header, i.e. >= 4035 * ((u_char *)thdr + TPACKET_HDRLEN), so we 4036 * don't step on the header when we construct 4037 * the sll header. 4038 */ 4039 if (bp < (u_char *)frame + 4040 TPACKET_ALIGN(handlep->tp_hdrlen) + 4041 sizeof(struct sockaddr_ll)) { 4042 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 4043 "cooked-mode frame doesn't have room for sll header"); 4044 return -1; 4045 } 4046 4047 /* 4048 * OK, that worked; construct the sll header. 4049 */ 4050 hdrp = (struct sll2_header *)bp; 4051 hdrp->sll2_protocol = sll->sll_protocol; 4052 hdrp->sll2_reserved_mbz = 0; 4053 hdrp->sll2_if_index = htonl(sll->sll_ifindex); 4054 hdrp->sll2_hatype = htons(sll->sll_hatype); 4055 hdrp->sll2_pkttype = sll->sll_pkttype; 4056 hdrp->sll2_halen = sll->sll_halen; 4057 memcpy(hdrp->sll2_addr, sll->sll_addr, SLL_ADDRLEN); 4058 4059 snaplen += sizeof(struct sll2_header); 4060 } else { 4061 struct sll_header *hdrp; 4062 4063 /* 4064 * The kernel should have left us with enough 4065 * space for an sll header; back up the packet 4066 * data pointer into that space, as that'll be 4067 * the beginning of the packet we pass to the 4068 * callback. 4069 */ 4070 bp -= SLL_HDR_LEN; 4071 4072 /* 4073 * Let's make sure that's past the end of 4074 * the tpacket header, i.e. >= 4075 * ((u_char *)thdr + TPACKET_HDRLEN), so we 4076 * don't step on the header when we construct 4077 * the sll header. 4078 */ 4079 if (bp < (u_char *)frame + 4080 TPACKET_ALIGN(handlep->tp_hdrlen) + 4081 sizeof(struct sockaddr_ll)) { 4082 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 4083 "cooked-mode frame doesn't have room for sll header"); 4084 return -1; 4085 } 4086 4087 /* 4088 * OK, that worked; construct the sll header. 4089 */ 4090 hdrp = (struct sll_header *)bp; 4091 hdrp->sll_pkttype = htons(sll->sll_pkttype); 4092 hdrp->sll_hatype = htons(sll->sll_hatype); 4093 hdrp->sll_halen = htons(sll->sll_halen); 4094 memcpy(hdrp->sll_addr, sll->sll_addr, SLL_ADDRLEN); 4095 hdrp->sll_protocol = sll->sll_protocol; 4096 4097 snaplen += sizeof(struct sll_header); 4098 } 4099 } else { 4100 /* 4101 * If this is a packet from a CAN device, so that 4102 * sll->sll_hatype is ARPHRD_CAN, then, as we're 4103 * not capturing in cooked mode, its link-layer 4104 * type is DLT_CAN_SOCKETCAN. Fix up the header 4105 * provided by the code below us to match what 4106 * DLT_CAN_SOCKETCAN is expected to provide. 4107 */ 4108 if (sll->sll_hatype == ARPHRD_CAN) { 4109 pcap_can_socketcan_hdr *canhdr = (pcap_can_socketcan_hdr *)bp; 4110 uint16_t protocol = ntohs(sll->sll_protocol); 4111 4112 /* 4113 * Check the protocol field from the sll header. 4114 * If it's one of the known CAN protocol types, 4115 * make sure the appropriate flags are set, so 4116 * that a program can tell what type of frame 4117 * it is. 4118 * 4119 * The two flags are: 4120 * 4121 * CANFD_FDF, which is in the fd_flags field 4122 * of the CAN classic/CAN FD header; 4123 * 4124 * CANXL_XLF, which is in the flags field 4125 * of the CAN XL header, which overlaps 4126 * the payload_length field of the CAN 4127 * classic/CAN FD header. 4128 */ 4129 switch (protocol) { 4130 4131 case LINUX_SLL_P_CAN: 4132 /* 4133 * CAN classic. 4134 * 4135 * Zero out the fd_flags and reserved 4136 * fields, in case they're uninitialized 4137 * crap, and clear the CANXL_XLF bit in 4138 * the payload_length field. 4139 * 4140 * This means that the CANFD_FDF flag isn't 4141 * set in the fd_flags field, and that 4142 * the CANXL_XLF bit isn't set in the 4143 * payload_length field, so this frame 4144 * will appear to be a CAN classic frame. 4145 */ 4146 canhdr->payload_length &= ~CANXL_XLF; 4147 canhdr->fd_flags = 0; 4148 canhdr->reserved1 = 0; 4149 canhdr->reserved2 = 0; 4150 break; 4151 4152 case LINUX_SLL_P_CANFD: 4153 /* 4154 * Set CANFD_FDF in the fd_flags field, 4155 * and clear the CANXL_XLF bit in the 4156 * payload_length field, so this frame 4157 * will appear to be a CAN FD frame. 4158 */ 4159 canhdr->payload_length &= ~CANXL_XLF; 4160 canhdr->fd_flags |= CANFD_FDF; 4161 4162 /* 4163 * Zero out all the unknown bits in fd_flags 4164 * and clear the reserved fields, so that 4165 * a program reading this can assume that 4166 * CANFD_FDF is set because we set it, not 4167 * because some uninitialized crap was 4168 * provided in the fd_flags field. 4169 * 4170 * (At least some LINKTYPE_CAN_SOCKETCAN 4171 * files attached to Wireshark bugs had 4172 * uninitialized junk there, so it does 4173 * happen.) 4174 * 4175 * Update this if Linux adds more flag bits 4176 * to the fd_flags field or uses either of 4177 * the reserved fields for FD frames. 4178 */ 4179 canhdr->fd_flags &= (CANFD_FDF|CANFD_ESI|CANFD_BRS); 4180 canhdr->reserved1 = 0; 4181 canhdr->reserved2 = 0; 4182 break; 4183 4184 case LINUX_SLL_P_CANXL: 4185 /* 4186 * CAN XL frame. 4187 * 4188 * Make sure the CANXL_XLF bit is set in 4189 * the payload_length field, so that 4190 * this frame will appear to be a 4191 * CAN XL frame. 4192 */ 4193 canhdr->payload_length |= CANXL_XLF; 4194 break; 4195 } 4196 4197 /* 4198 * Put multi-byte header fields in a byte-order 4199 *-independent format. 4200 */ 4201 if (canhdr->payload_length & CANXL_XLF) { 4202 /* 4203 * This is a CAN XL frame. 4204 * 4205 * DLT_CAN_SOCKETCAN is specified as having 4206 * the Priority ID/VCID field in big-- 4207 * endian byte order, and the payload length 4208 * and Acceptance Field in little-endian byte 4209 * order. but capturing on a CAN device 4210 * provides them in host byte order. 4211 * Convert them to the appropriate byte 4212 * orders. 4213 * 4214 * The reason we put the first field 4215 * into big-endian byte order is that 4216 * older libpcap code, ignorant of 4217 * CAN XL, treated it as the CAN ID 4218 * field and put it into big-endian 4219 * byte order, and we don't want to 4220 * break code that understands CAN XL 4221 * headers, and treats that field as 4222 * being big-endian. 4223 * 4224 * The other fields are put in little- 4225 * endian byte order is that older 4226 * libpcap code, ignorant of CAN XL, 4227 * left those fields alone, and the 4228 * processors on which the CAN XL 4229 * frames were captured are likely 4230 * to be little-endian processors. 4231 */ 4232 pcap_can_socketcan_xl_hdr *canxl_hdr = (pcap_can_socketcan_xl_hdr *)bp; 4233 4234 #if __BYTE_ORDER == __LITTLE_ENDIAN 4235 /* 4236 * We're capturing on a little-endian 4237 * machine, so we put the priority/VCID 4238 * field into big-endian byte order, and 4239 * leave the payload length and acceptance 4240 * field in little-endian byte order. 4241 */ 4242 /* Byte-swap priority/VCID. */ 4243 canxl_hdr->priority_vcid = SWAPLONG(canxl_hdr->priority_vcid); 4244 #elif __BYTE_ORDER == __BIG_ENDIAN 4245 /* 4246 * We're capturing on a big-endian 4247 * machine, so we want to leave the 4248 * priority/VCID field alone, and byte-swap 4249 * the payload length and acceptance 4250 * fields to little-endian. 4251 */ 4252 /* Byte-swap the payload length */ 4253 canxl_hdr->payload_length = SWAPSHORT(canxl_hdr->payload_length); 4254 4255 /* 4256 * Byte-swap the acceptance field. 4257 * 4258 * XXX - is it just a 4-octet string, 4259 * not in any byte order? 4260 */ 4261 canxl_hdr->acceptance_field = SWAPLONG(canxl_hdr->acceptance_field); 4262 #else 4263 #error "Unknown byte order" 4264 #endif 4265 } else { 4266 /* 4267 * CAN or CAN FD frame. 4268 * 4269 * DLT_CAN_SOCKETCAN is specified as having 4270 * the CAN ID and flags in network byte 4271 * order, but capturing on a CAN device 4272 * provides it in host byte order. Convert 4273 * it to network byte order. 4274 */ 4275 canhdr->can_id = htonl(canhdr->can_id); 4276 } 4277 } 4278 } 4279 4280 if (handlep->filter_in_userland && handle->fcode.bf_insns) { 4281 struct pcap_bpf_aux_data aux_data; 4282 4283 aux_data.vlan_tag_present = tp_vlan_tci_valid; 4284 aux_data.vlan_tag = tp_vlan_tci & 0x0fff; 4285 4286 if (pcapint_filter_with_aux_data(handle->fcode.bf_insns, 4287 bp, 4288 tp_len, 4289 snaplen, 4290 &aux_data) == 0) 4291 return 0; 4292 } 4293 4294 if (!linux_check_direction(handle, sll)) 4295 return 0; 4296 4297 /* get required packet info from ring header */ 4298 pcaphdr.ts.tv_sec = tp_sec; 4299 pcaphdr.ts.tv_usec = tp_usec; 4300 pcaphdr.caplen = tp_snaplen; 4301 pcaphdr.len = tp_len; 4302 4303 /* if required build in place the sll header*/ 4304 if (handlep->cooked) { 4305 /* update packet len */ 4306 if (handle->linktype == DLT_LINUX_SLL2) { 4307 pcaphdr.caplen += SLL2_HDR_LEN; 4308 pcaphdr.len += SLL2_HDR_LEN; 4309 } else { 4310 pcaphdr.caplen += SLL_HDR_LEN; 4311 pcaphdr.len += SLL_HDR_LEN; 4312 } 4313 } 4314 4315 if (tp_vlan_tci_valid && 4316 handlep->vlan_offset != -1 && 4317 tp_snaplen >= (unsigned int) handlep->vlan_offset) 4318 { 4319 struct vlan_tag *tag; 4320 4321 /* 4322 * Move everything in the header, except the type field, 4323 * down VLAN_TAG_LEN bytes, to allow us to insert the 4324 * VLAN tag between that stuff and the type field. 4325 */ 4326 bp -= VLAN_TAG_LEN; 4327 memmove(bp, bp + VLAN_TAG_LEN, handlep->vlan_offset); 4328 4329 /* 4330 * Now insert the tag. 4331 */ 4332 tag = (struct vlan_tag *)(bp + handlep->vlan_offset); 4333 tag->vlan_tpid = htons(tp_vlan_tpid); 4334 tag->vlan_tci = htons(tp_vlan_tci); 4335 4336 /* 4337 * Add the tag to the packet lengths. 4338 */ 4339 pcaphdr.caplen += VLAN_TAG_LEN; 4340 pcaphdr.len += VLAN_TAG_LEN; 4341 } 4342 4343 /* 4344 * The only way to tell the kernel to cut off the 4345 * packet at a snapshot length is with a filter program; 4346 * if there's no filter program, the kernel won't cut 4347 * the packet off. 4348 * 4349 * Trim the snapshot length to be no longer than the 4350 * specified snapshot length. 4351 * 4352 * XXX - an alternative is to put a filter, consisting 4353 * of a "ret <snaplen>" instruction, on the socket 4354 * in the activate routine, so that the truncation is 4355 * done in the kernel even if nobody specified a filter; 4356 * that means that less buffer space is consumed in 4357 * the memory-mapped buffer. 4358 */ 4359 if (pcaphdr.caplen > (bpf_u_int32)handle->snapshot) 4360 pcaphdr.caplen = handle->snapshot; 4361 4362 /* pass the packet to the user */ 4363 callback(user, &pcaphdr, bp); 4364 4365 return 1; 4366 } 4367 4368 static int 4369 pcap_read_linux_mmap_v2(pcap_t *handle, int max_packets, pcap_handler callback, 4370 u_char *user) 4371 { 4372 struct pcap_linux *handlep = handle->priv; 4373 union thdr h; 4374 int pkts = 0; 4375 int ret; 4376 4377 /* wait for frames availability.*/ 4378 h.raw = RING_GET_CURRENT_FRAME(handle); 4379 if (!packet_mmap_acquire(h.h2)) { 4380 /* 4381 * The current frame is owned by the kernel; wait for 4382 * a frame to be handed to us. 4383 */ 4384 ret = pcap_wait_for_frames_mmap(handle); 4385 if (ret) { 4386 return ret; 4387 } 4388 } 4389 4390 /* 4391 * This can conceivably process more than INT_MAX packets, 4392 * which would overflow the packet count, causing it either 4393 * to look like a negative number, and thus cause us to 4394 * return a value that looks like an error, or overflow 4395 * back into positive territory, and thus cause us to 4396 * return a too-low count. 4397 * 4398 * Therefore, if the packet count is unlimited, we clip 4399 * it at INT_MAX; this routine is not expected to 4400 * process packets indefinitely, so that's not an issue. 4401 */ 4402 if (PACKET_COUNT_IS_UNLIMITED(max_packets)) 4403 max_packets = INT_MAX; 4404 4405 while (pkts < max_packets) { 4406 /* 4407 * Get the current ring buffer frame, and break if 4408 * it's still owned by the kernel. 4409 */ 4410 h.raw = RING_GET_CURRENT_FRAME(handle); 4411 if (!packet_mmap_acquire(h.h2)) 4412 break; 4413 4414 ret = pcap_handle_packet_mmap( 4415 handle, 4416 callback, 4417 user, 4418 h.raw, 4419 h.h2->tp_len, 4420 h.h2->tp_mac, 4421 h.h2->tp_snaplen, 4422 h.h2->tp_sec, 4423 handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO ? h.h2->tp_nsec : h.h2->tp_nsec / 1000, 4424 VLAN_VALID(h.h2, h.h2), 4425 h.h2->tp_vlan_tci, 4426 VLAN_TPID(h.h2, h.h2)); 4427 if (ret == 1) { 4428 pkts++; 4429 } else if (ret < 0) { 4430 return ret; 4431 } 4432 4433 /* 4434 * Hand this block back to the kernel, and, if we're 4435 * counting blocks that need to be filtered in userland 4436 * after having been filtered by the kernel, count 4437 * the one we've just processed. 4438 */ 4439 packet_mmap_release(h.h2); 4440 if (handlep->blocks_to_filter_in_userland > 0) { 4441 handlep->blocks_to_filter_in_userland--; 4442 if (handlep->blocks_to_filter_in_userland == 0) { 4443 /* 4444 * No more blocks need to be filtered 4445 * in userland. 4446 */ 4447 handlep->filter_in_userland = 0; 4448 } 4449 } 4450 4451 /* next block */ 4452 if (++handle->offset >= handle->cc) 4453 handle->offset = 0; 4454 4455 /* check for break loop condition*/ 4456 if (handle->break_loop) { 4457 handle->break_loop = 0; 4458 return PCAP_ERROR_BREAK; 4459 } 4460 } 4461 return pkts; 4462 } 4463 4464 #ifdef HAVE_TPACKET3 4465 static int 4466 pcap_read_linux_mmap_v3(pcap_t *handle, int max_packets, pcap_handler callback, 4467 u_char *user) 4468 { 4469 struct pcap_linux *handlep = handle->priv; 4470 union thdr h; 4471 int pkts = 0; 4472 int ret; 4473 4474 again: 4475 if (handlep->current_packet == NULL) { 4476 /* wait for frames availability.*/ 4477 h.raw = RING_GET_CURRENT_FRAME(handle); 4478 if (!packet_mmap_v3_acquire(h.h3)) { 4479 /* 4480 * The current frame is owned by the kernel; wait 4481 * for a frame to be handed to us. 4482 */ 4483 ret = pcap_wait_for_frames_mmap(handle); 4484 if (ret) { 4485 return ret; 4486 } 4487 } 4488 } 4489 h.raw = RING_GET_CURRENT_FRAME(handle); 4490 if (!packet_mmap_v3_acquire(h.h3)) { 4491 if (pkts == 0 && handlep->timeout == 0) { 4492 /* Block until we see a packet. */ 4493 goto again; 4494 } 4495 return pkts; 4496 } 4497 4498 /* 4499 * This can conceivably process more than INT_MAX packets, 4500 * which would overflow the packet count, causing it either 4501 * to look like a negative number, and thus cause us to 4502 * return a value that looks like an error, or overflow 4503 * back into positive territory, and thus cause us to 4504 * return a too-low count. 4505 * 4506 * Therefore, if the packet count is unlimited, we clip 4507 * it at INT_MAX; this routine is not expected to 4508 * process packets indefinitely, so that's not an issue. 4509 */ 4510 if (PACKET_COUNT_IS_UNLIMITED(max_packets)) 4511 max_packets = INT_MAX; 4512 4513 while (pkts < max_packets) { 4514 int packets_to_read; 4515 4516 if (handlep->current_packet == NULL) { 4517 h.raw = RING_GET_CURRENT_FRAME(handle); 4518 if (!packet_mmap_v3_acquire(h.h3)) 4519 break; 4520 4521 handlep->current_packet = h.raw + h.h3->hdr.bh1.offset_to_first_pkt; 4522 handlep->packets_left = h.h3->hdr.bh1.num_pkts; 4523 } 4524 packets_to_read = handlep->packets_left; 4525 4526 if (packets_to_read > (max_packets - pkts)) { 4527 /* 4528 * There are more packets in the buffer than 4529 * the number of packets we have left to 4530 * process to get up to the maximum number 4531 * of packets to process. Only process enough 4532 * of them to get us up to that maximum. 4533 */ 4534 packets_to_read = max_packets - pkts; 4535 } 4536 4537 while (packets_to_read-- && !handle->break_loop) { 4538 struct tpacket3_hdr* tp3_hdr = (struct tpacket3_hdr*) handlep->current_packet; 4539 ret = pcap_handle_packet_mmap( 4540 handle, 4541 callback, 4542 user, 4543 handlep->current_packet, 4544 tp3_hdr->tp_len, 4545 tp3_hdr->tp_mac, 4546 tp3_hdr->tp_snaplen, 4547 tp3_hdr->tp_sec, 4548 handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO ? tp3_hdr->tp_nsec : tp3_hdr->tp_nsec / 1000, 4549 VLAN_VALID(tp3_hdr, &tp3_hdr->hv1), 4550 tp3_hdr->hv1.tp_vlan_tci, 4551 VLAN_TPID(tp3_hdr, &tp3_hdr->hv1)); 4552 if (ret == 1) { 4553 pkts++; 4554 } else if (ret < 0) { 4555 handlep->current_packet = NULL; 4556 return ret; 4557 } 4558 handlep->current_packet += tp3_hdr->tp_next_offset; 4559 handlep->packets_left--; 4560 } 4561 4562 if (handlep->packets_left <= 0) { 4563 /* 4564 * Hand this block back to the kernel, and, if 4565 * we're counting blocks that need to be 4566 * filtered in userland after having been 4567 * filtered by the kernel, count the one we've 4568 * just processed. 4569 */ 4570 packet_mmap_v3_release(h.h3); 4571 if (handlep->blocks_to_filter_in_userland > 0) { 4572 handlep->blocks_to_filter_in_userland--; 4573 if (handlep->blocks_to_filter_in_userland == 0) { 4574 /* 4575 * No more blocks need to be filtered 4576 * in userland. 4577 */ 4578 handlep->filter_in_userland = 0; 4579 } 4580 } 4581 4582 /* next block */ 4583 if (++handle->offset >= handle->cc) 4584 handle->offset = 0; 4585 4586 handlep->current_packet = NULL; 4587 } 4588 4589 /* check for break loop condition*/ 4590 if (handle->break_loop) { 4591 handle->break_loop = 0; 4592 return PCAP_ERROR_BREAK; 4593 } 4594 } 4595 if (pkts == 0 && handlep->timeout == 0) { 4596 /* Block until we see a packet. */ 4597 goto again; 4598 } 4599 return pkts; 4600 } 4601 #endif /* HAVE_TPACKET3 */ 4602 4603 /* 4604 * Attach the given BPF code to the packet capture device. 4605 */ 4606 static int 4607 pcap_setfilter_linux(pcap_t *handle, struct bpf_program *filter) 4608 { 4609 struct pcap_linux *handlep; 4610 struct sock_fprog fcode; 4611 int can_filter_in_kernel; 4612 int err = 0; 4613 int n, offset; 4614 4615 if (!handle) 4616 return -1; 4617 if (!filter) { 4618 pcapint_strlcpy(handle->errbuf, "setfilter: No filter specified", 4619 PCAP_ERRBUF_SIZE); 4620 return -1; 4621 } 4622 4623 handlep = handle->priv; 4624 4625 /* Make our private copy of the filter */ 4626 4627 if (pcapint_install_bpf_program(handle, filter) < 0) 4628 /* pcapint_install_bpf_program() filled in errbuf */ 4629 return -1; 4630 4631 /* 4632 * Run user level packet filter by default. Will be overridden if 4633 * installing a kernel filter succeeds. 4634 */ 4635 handlep->filter_in_userland = 1; 4636 4637 /* Install kernel level filter if possible */ 4638 4639 #ifdef USHRT_MAX 4640 if (handle->fcode.bf_len > USHRT_MAX) { 4641 /* 4642 * fcode.len is an unsigned short for current kernel. 4643 * I have yet to see BPF-Code with that much 4644 * instructions but still it is possible. So for the 4645 * sake of correctness I added this check. 4646 */ 4647 fprintf(stderr, "Warning: Filter too complex for kernel\n"); 4648 fcode.len = 0; 4649 fcode.filter = NULL; 4650 can_filter_in_kernel = 0; 4651 } else 4652 #endif /* USHRT_MAX */ 4653 { 4654 /* 4655 * Oh joy, the Linux kernel uses struct sock_fprog instead 4656 * of struct bpf_program and of course the length field is 4657 * of different size. Pointed out by Sebastian 4658 * 4659 * Oh, and we also need to fix it up so that all "ret" 4660 * instructions with non-zero operands have MAXIMUM_SNAPLEN 4661 * as the operand if we're not capturing in memory-mapped 4662 * mode, and so that, if we're in cooked mode, all memory- 4663 * reference instructions use special magic offsets in 4664 * references to the link-layer header and assume that the 4665 * link-layer payload begins at 0; "fix_program()" will do 4666 * that. 4667 */ 4668 switch (fix_program(handle, &fcode)) { 4669 4670 case -1: 4671 default: 4672 /* 4673 * Fatal error; just quit. 4674 * (The "default" case shouldn't happen; we 4675 * return -1 for that reason.) 4676 */ 4677 return -1; 4678 4679 case 0: 4680 /* 4681 * The program performed checks that we can't make 4682 * work in the kernel. 4683 */ 4684 can_filter_in_kernel = 0; 4685 break; 4686 4687 case 1: 4688 /* 4689 * We have a filter that'll work in the kernel. 4690 */ 4691 can_filter_in_kernel = 1; 4692 break; 4693 } 4694 } 4695 4696 /* 4697 * NOTE: at this point, we've set both the "len" and "filter" 4698 * fields of "fcode". As of the 2.6.32.4 kernel, at least, 4699 * those are the only members of the "sock_fprog" structure, 4700 * so we initialize every member of that structure. 4701 * 4702 * If there is anything in "fcode" that is not initialized, 4703 * it is either a field added in a later kernel, or it's 4704 * padding. 4705 * 4706 * If a new field is added, this code needs to be updated 4707 * to set it correctly. 4708 * 4709 * If there are no other fields, then: 4710 * 4711 * if the Linux kernel looks at the padding, it's 4712 * buggy; 4713 * 4714 * if the Linux kernel doesn't look at the padding, 4715 * then if some tool complains that we're passing 4716 * uninitialized data to the kernel, then the tool 4717 * is buggy and needs to understand that it's just 4718 * padding. 4719 */ 4720 if (can_filter_in_kernel) { 4721 if ((err = set_kernel_filter(handle, &fcode)) == 0) 4722 { 4723 /* 4724 * Installation succeeded - using kernel filter, 4725 * so userland filtering not needed. 4726 */ 4727 handlep->filter_in_userland = 0; 4728 } 4729 else if (err == -1) /* Non-fatal error */ 4730 { 4731 /* 4732 * Print a warning if we weren't able to install 4733 * the filter for a reason other than "this kernel 4734 * isn't configured to support socket filters. 4735 */ 4736 if (errno == ENOMEM) { 4737 /* 4738 * Either a kernel memory allocation 4739 * failure occurred, or there's too 4740 * much "other/option memory" allocated 4741 * for this socket. Suggest that they 4742 * increase the "other/option memory" 4743 * limit. 4744 */ 4745 fprintf(stderr, 4746 "Warning: Couldn't allocate kernel memory for filter: try increasing net.core.optmem_max with sysctl\n"); 4747 } else if (errno != ENOPROTOOPT && errno != EOPNOTSUPP) { 4748 fprintf(stderr, 4749 "Warning: Kernel filter failed: %s\n", 4750 pcap_strerror(errno)); 4751 } 4752 } 4753 } 4754 4755 /* 4756 * If we're not using the kernel filter, get rid of any kernel 4757 * filter that might've been there before, e.g. because the 4758 * previous filter could work in the kernel, or because some other 4759 * code attached a filter to the socket by some means other than 4760 * calling "pcap_setfilter()". Otherwise, the kernel filter may 4761 * filter out packets that would pass the new userland filter. 4762 */ 4763 if (handlep->filter_in_userland) { 4764 if (reset_kernel_filter(handle) == -1) { 4765 pcapint_fmt_errmsg_for_errno(handle->errbuf, 4766 PCAP_ERRBUF_SIZE, errno, 4767 "can't remove kernel filter"); 4768 err = -2; /* fatal error */ 4769 } 4770 } 4771 4772 /* 4773 * Free up the copy of the filter that was made by "fix_program()". 4774 */ 4775 if (fcode.filter != NULL) 4776 free(fcode.filter); 4777 4778 if (err == -2) 4779 /* Fatal error */ 4780 return -1; 4781 4782 /* 4783 * If we're filtering in userland, there's nothing to do; 4784 * the new filter will be used for the next packet. 4785 */ 4786 if (handlep->filter_in_userland) 4787 return 0; 4788 4789 /* 4790 * We're filtering in the kernel; the packets present in 4791 * all blocks currently in the ring were already filtered 4792 * by the old filter, and so will need to be filtered in 4793 * userland by the new filter. 4794 * 4795 * Get an upper bound for the number of such blocks; first, 4796 * walk the ring backward and count the free blocks. 4797 */ 4798 offset = handle->offset; 4799 if (--offset < 0) 4800 offset = handle->cc - 1; 4801 for (n=0; n < handle->cc; ++n) { 4802 if (--offset < 0) 4803 offset = handle->cc - 1; 4804 if (pcap_get_ring_frame_status(handle, offset) != TP_STATUS_KERNEL) 4805 break; 4806 } 4807 4808 /* 4809 * If we found free blocks, decrement the count of free 4810 * blocks by 1, just in case we lost a race with another 4811 * thread of control that was adding a packet while 4812 * we were counting and that had run the filter before 4813 * we changed it. 4814 * 4815 * XXX - could there be more than one block added in 4816 * this fashion? 4817 * 4818 * XXX - is there a way to avoid that race, e.g. somehow 4819 * wait for all packets that passed the old filter to 4820 * be added to the ring? 4821 */ 4822 if (n != 0) 4823 n--; 4824 4825 /* 4826 * Set the count of blocks worth of packets to filter 4827 * in userland to the total number of blocks in the 4828 * ring minus the number of free blocks we found, and 4829 * turn on userland filtering. (The count of blocks 4830 * worth of packets to filter in userland is guaranteed 4831 * not to be zero - n, above, couldn't be set to a 4832 * value > handle->cc, and if it were equal to 4833 * handle->cc, it wouldn't be zero, and thus would 4834 * be decremented to handle->cc - 1.) 4835 */ 4836 handlep->blocks_to_filter_in_userland = handle->cc - n; 4837 handlep->filter_in_userland = 1; 4838 4839 return 0; 4840 } 4841 4842 /* 4843 * Return the index of the given device name. Fill ebuf and return 4844 * -1 on failure. 4845 */ 4846 static int 4847 iface_get_id(int fd, const char *device, char *ebuf) 4848 { 4849 struct ifreq ifr; 4850 4851 memset(&ifr, 0, sizeof(ifr)); 4852 pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 4853 4854 if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) { 4855 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4856 errno, "SIOCGIFINDEX"); 4857 return -1; 4858 } 4859 4860 return ifr.ifr_ifindex; 4861 } 4862 4863 /* 4864 * Bind the socket associated with FD to the given device. 4865 * Return 0 on success or a PCAP_ERROR_ value on a hard error. 4866 */ 4867 static int 4868 iface_bind(int fd, int ifindex, char *ebuf, int protocol) 4869 { 4870 struct sockaddr_ll sll; 4871 int ret, err; 4872 socklen_t errlen = sizeof(err); 4873 4874 memset(&sll, 0, sizeof(sll)); 4875 sll.sll_family = AF_PACKET; 4876 sll.sll_ifindex = ifindex < 0 ? 0 : ifindex; 4877 sll.sll_protocol = protocol; 4878 4879 if (bind(fd, (struct sockaddr *) &sll, sizeof(sll)) == -1) { 4880 if (errno == ENETDOWN) { 4881 /* 4882 * Return a "network down" indication, so that 4883 * the application can report that rather than 4884 * saying we had a mysterious failure and 4885 * suggest that they report a problem to the 4886 * libpcap developers. 4887 */ 4888 return PCAP_ERROR_IFACE_NOT_UP; 4889 } 4890 if (errno == ENODEV) { 4891 /* 4892 * There's nothing more to say, so clear the 4893 * error message. 4894 */ 4895 ebuf[0] = '\0'; 4896 ret = PCAP_ERROR_NO_SUCH_DEVICE; 4897 } else { 4898 ret = PCAP_ERROR; 4899 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4900 errno, "bind"); 4901 } 4902 return ret; 4903 } 4904 4905 /* Any pending errors, e.g., network is down? */ 4906 4907 if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) { 4908 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4909 errno, "getsockopt (SO_ERROR)"); 4910 return PCAP_ERROR; 4911 } 4912 4913 if (err == ENETDOWN) { 4914 /* 4915 * Return a "network down" indication, so that 4916 * the application can report that rather than 4917 * saying we had a mysterious failure and 4918 * suggest that they report a problem to the 4919 * libpcap developers. 4920 */ 4921 return PCAP_ERROR_IFACE_NOT_UP; 4922 } else if (err > 0) { 4923 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4924 err, "bind"); 4925 return PCAP_ERROR; 4926 } 4927 4928 return 0; 4929 } 4930 4931 /* 4932 * Try to enter monitor mode. 4933 * If we have libnl, try to create a new monitor-mode device and 4934 * capture on that; otherwise, just say "not supported". 4935 */ 4936 #ifdef HAVE_LIBNL 4937 static int 4938 enter_rfmon_mode(pcap_t *handle, int sock_fd, const char *device) 4939 { 4940 struct pcap_linux *handlep = handle->priv; 4941 int ret; 4942 char phydev_path[PATH_MAX+1]; 4943 struct nl80211_state nlstate; 4944 struct ifreq ifr; 4945 u_int n; 4946 4947 /* 4948 * Is this a mac80211 device? 4949 */ 4950 ret = get_mac80211_phydev(handle, device, phydev_path, PATH_MAX); 4951 if (ret < 0) 4952 return ret; /* error */ 4953 if (ret == 0) 4954 return 0; /* no error, but not mac80211 device */ 4955 4956 ret = nl80211_init(handle, &nlstate, device); 4957 if (ret != 0) 4958 return ret; 4959 4960 /* 4961 * Is this already a monN device? 4962 * If so, we're done. 4963 */ 4964 int type; 4965 ret = get_if_type(handle, sock_fd, &nlstate, device, &type); 4966 if (ret <= 0) { 4967 /* 4968 * < 0 is a Hard failure. Just return ret; handle->errbuf 4969 * has already been set. 4970 * 4971 * 0 is "device not available"; the caller should retry later. 4972 */ 4973 nl80211_cleanup(&nlstate); 4974 return ret; 4975 } 4976 if (type == NL80211_IFTYPE_MONITOR) { 4977 /* 4978 * OK, it's already a monitor mode device; just use it. 4979 * There's no point in creating another monitor device 4980 * that will have to be cleaned up. 4981 */ 4982 nl80211_cleanup(&nlstate); 4983 return ret; 4984 } 4985 4986 /* 4987 * OK, it's apparently a mac80211 device but not a monitor device. 4988 * Try to find an unused monN device for it. 4989 */ 4990 for (n = 0; n < UINT_MAX; n++) { 4991 /* 4992 * Try mon{n}. 4993 */ 4994 char mondevice[3+10+1]; /* mon{UINT_MAX}\0 */ 4995 4996 snprintf(mondevice, sizeof mondevice, "mon%u", n); 4997 ret = add_mon_if(handle, sock_fd, &nlstate, device, mondevice); 4998 if (ret == 1) { 4999 /* 5000 * Success. We don't clean up the libnl state 5001 * yet, as we'll be using it later. 5002 */ 5003 goto added; 5004 } 5005 if (ret < 0) { 5006 /* 5007 * Hard failure. Just return ret; handle->errbuf 5008 * has already been set. 5009 */ 5010 nl80211_cleanup(&nlstate); 5011 return ret; 5012 } 5013 } 5014 5015 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 5016 "%s: No free monN interfaces", device); 5017 nl80211_cleanup(&nlstate); 5018 return PCAP_ERROR; 5019 5020 added: 5021 5022 #if 0 5023 /* 5024 * Sleep for .1 seconds. 5025 */ 5026 delay.tv_sec = 0; 5027 delay.tv_nsec = 500000000; 5028 nanosleep(&delay, NULL); 5029 #endif 5030 5031 /* 5032 * If we haven't already done so, arrange to have 5033 * "pcap_close_all()" called when we exit. 5034 */ 5035 if (!pcapint_do_addexit(handle)) { 5036 /* 5037 * "atexit()" failed; don't put the interface 5038 * in rfmon mode, just give up. 5039 */ 5040 del_mon_if(handle, sock_fd, &nlstate, device, 5041 handlep->mondevice); 5042 nl80211_cleanup(&nlstate); 5043 return PCAP_ERROR; 5044 } 5045 5046 /* 5047 * Now configure the monitor interface up. 5048 */ 5049 memset(&ifr, 0, sizeof(ifr)); 5050 pcapint_strlcpy(ifr.ifr_name, handlep->mondevice, sizeof(ifr.ifr_name)); 5051 if (ioctl(sock_fd, SIOCGIFFLAGS, &ifr) == -1) { 5052 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5053 errno, "%s: Can't get flags for %s", device, 5054 handlep->mondevice); 5055 del_mon_if(handle, sock_fd, &nlstate, device, 5056 handlep->mondevice); 5057 nl80211_cleanup(&nlstate); 5058 return PCAP_ERROR; 5059 } 5060 ifr.ifr_flags |= IFF_UP|IFF_RUNNING; 5061 if (ioctl(sock_fd, SIOCSIFFLAGS, &ifr) == -1) { 5062 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5063 errno, "%s: Can't set flags for %s", device, 5064 handlep->mondevice); 5065 del_mon_if(handle, sock_fd, &nlstate, device, 5066 handlep->mondevice); 5067 nl80211_cleanup(&nlstate); 5068 return PCAP_ERROR; 5069 } 5070 5071 /* 5072 * Success. Clean up the libnl state. 5073 */ 5074 nl80211_cleanup(&nlstate); 5075 5076 /* 5077 * Note that we have to delete the monitor device when we close 5078 * the handle. 5079 */ 5080 handlep->must_do_on_close |= MUST_DELETE_MONIF; 5081 5082 /* 5083 * Add this to the list of pcaps to close when we exit. 5084 */ 5085 pcapint_add_to_pcaps_to_close(handle); 5086 5087 return 1; 5088 } 5089 #else /* HAVE_LIBNL */ 5090 static int 5091 enter_rfmon_mode(pcap_t *handle _U_, int sock_fd _U_, const char *device _U_) 5092 { 5093 /* 5094 * We don't have libnl, so we can't do monitor mode. 5095 */ 5096 return 0; 5097 } 5098 #endif /* HAVE_LIBNL */ 5099 5100 #if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) 5101 /* 5102 * Map SOF_TIMESTAMPING_ values to PCAP_TSTAMP_ values. 5103 */ 5104 static const struct { 5105 int soft_timestamping_val; 5106 int pcap_tstamp_val; 5107 } sof_ts_type_map[3] = { 5108 { SOF_TIMESTAMPING_SOFTWARE, PCAP_TSTAMP_HOST }, 5109 { SOF_TIMESTAMPING_SYS_HARDWARE, PCAP_TSTAMP_ADAPTER }, 5110 { SOF_TIMESTAMPING_RAW_HARDWARE, PCAP_TSTAMP_ADAPTER_UNSYNCED } 5111 }; 5112 #define NUM_SOF_TIMESTAMPING_TYPES (sizeof sof_ts_type_map / sizeof sof_ts_type_map[0]) 5113 5114 /* 5115 * Set the list of time stamping types to include all types. 5116 */ 5117 static int 5118 iface_set_all_ts_types(pcap_t *handle, char *ebuf) 5119 { 5120 u_int i; 5121 5122 handle->tstamp_type_list = malloc(NUM_SOF_TIMESTAMPING_TYPES * sizeof(u_int)); 5123 if (handle->tstamp_type_list == NULL) { 5124 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5125 errno, "malloc"); 5126 return -1; 5127 } 5128 for (i = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) 5129 handle->tstamp_type_list[i] = sof_ts_type_map[i].pcap_tstamp_val; 5130 handle->tstamp_type_count = NUM_SOF_TIMESTAMPING_TYPES; 5131 return 0; 5132 } 5133 5134 /* 5135 * Get a list of time stamp types. 5136 */ 5137 #ifdef ETHTOOL_GET_TS_INFO 5138 static int 5139 iface_get_ts_types(const char *device, pcap_t *handle, char *ebuf) 5140 { 5141 int fd; 5142 struct ifreq ifr; 5143 struct ethtool_ts_info info; 5144 int num_ts_types; 5145 u_int i, j; 5146 5147 /* 5148 * This doesn't apply to the "any" device; you can't say "turn on 5149 * hardware time stamping for all devices that exist now and arrange 5150 * that it be turned on for any device that appears in the future", 5151 * and not all devices even necessarily *support* hardware time 5152 * stamping, so don't report any time stamp types. 5153 */ 5154 if (strcmp(device, "any") == 0) { 5155 handle->tstamp_type_list = NULL; 5156 return 0; 5157 } 5158 5159 /* 5160 * Create a socket from which to fetch time stamping capabilities. 5161 */ 5162 fd = get_if_ioctl_socket(); 5163 if (fd < 0) { 5164 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5165 errno, "socket for SIOCETHTOOL(ETHTOOL_GET_TS_INFO)"); 5166 return -1; 5167 } 5168 5169 memset(&ifr, 0, sizeof(ifr)); 5170 pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 5171 memset(&info, 0, sizeof(info)); 5172 info.cmd = ETHTOOL_GET_TS_INFO; 5173 ifr.ifr_data = (caddr_t)&info; 5174 if (ioctl(fd, SIOCETHTOOL, &ifr) == -1) { 5175 int save_errno = errno; 5176 5177 close(fd); 5178 switch (save_errno) { 5179 5180 case EOPNOTSUPP: 5181 case EINVAL: 5182 /* 5183 * OK, this OS version or driver doesn't support 5184 * asking for the time stamping types, so let's 5185 * just return all the possible types. 5186 */ 5187 if (iface_set_all_ts_types(handle, ebuf) == -1) 5188 return -1; 5189 return 0; 5190 5191 case ENODEV: 5192 /* 5193 * OK, no such device. 5194 * The user will find that out when they try to 5195 * activate the device; just return an empty 5196 * list of time stamp types. 5197 */ 5198 handle->tstamp_type_list = NULL; 5199 return 0; 5200 5201 default: 5202 /* 5203 * Other error. 5204 */ 5205 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5206 save_errno, 5207 "%s: SIOCETHTOOL(ETHTOOL_GET_TS_INFO) ioctl failed", 5208 device); 5209 return -1; 5210 } 5211 } 5212 close(fd); 5213 5214 /* 5215 * Do we support hardware time stamping of *all* packets? 5216 */ 5217 if (!(info.rx_filters & (1 << HWTSTAMP_FILTER_ALL))) { 5218 /* 5219 * No, so don't report any time stamp types. 5220 * 5221 * XXX - some devices either don't report 5222 * HWTSTAMP_FILTER_ALL when they do support it, or 5223 * report HWTSTAMP_FILTER_ALL but map it to only 5224 * time stamping a few PTP packets. See 5225 * http://marc.info/?l=linux-netdev&m=146318183529571&w=2 5226 * 5227 * Maybe that got fixed later. 5228 */ 5229 handle->tstamp_type_list = NULL; 5230 return 0; 5231 } 5232 5233 num_ts_types = 0; 5234 for (i = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) { 5235 if (info.so_timestamping & sof_ts_type_map[i].soft_timestamping_val) 5236 num_ts_types++; 5237 } 5238 if (num_ts_types != 0) { 5239 handle->tstamp_type_list = malloc(num_ts_types * sizeof(u_int)); 5240 if (handle->tstamp_type_list == NULL) { 5241 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5242 errno, "malloc"); 5243 return -1; 5244 } 5245 for (i = 0, j = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) { 5246 if (info.so_timestamping & sof_ts_type_map[i].soft_timestamping_val) { 5247 handle->tstamp_type_list[j] = sof_ts_type_map[i].pcap_tstamp_val; 5248 j++; 5249 } 5250 } 5251 handle->tstamp_type_count = num_ts_types; 5252 } else 5253 handle->tstamp_type_list = NULL; 5254 5255 return 0; 5256 } 5257 #else /* ETHTOOL_GET_TS_INFO */ 5258 static int 5259 iface_get_ts_types(const char *device, pcap_t *handle, char *ebuf) 5260 { 5261 /* 5262 * This doesn't apply to the "any" device; you can't say "turn on 5263 * hardware time stamping for all devices that exist now and arrange 5264 * that it be turned on for any device that appears in the future", 5265 * and not all devices even necessarily *support* hardware time 5266 * stamping, so don't report any time stamp types. 5267 */ 5268 if (strcmp(device, "any") == 0) { 5269 handle->tstamp_type_list = NULL; 5270 return 0; 5271 } 5272 5273 /* 5274 * We don't have an ioctl to use to ask what's supported, 5275 * so say we support everything. 5276 */ 5277 if (iface_set_all_ts_types(handle, ebuf) == -1) 5278 return -1; 5279 return 0; 5280 } 5281 #endif /* ETHTOOL_GET_TS_INFO */ 5282 #else /* defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) */ 5283 static int 5284 iface_get_ts_types(const char *device _U_, pcap_t *p _U_, char *ebuf _U_) 5285 { 5286 /* 5287 * Nothing to fetch, so it always "succeeds". 5288 */ 5289 return 0; 5290 } 5291 #endif /* defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) */ 5292 5293 /* 5294 * Find out if we have any form of fragmentation/reassembly offloading. 5295 * 5296 * We do so using SIOCETHTOOL checking for various types of offloading; 5297 * if SIOCETHTOOL isn't defined, or we don't have any #defines for any 5298 * of the types of offloading, there's nothing we can do to check, so 5299 * we just say "no, we don't". 5300 * 5301 * We treat EOPNOTSUPP, EINVAL and, if eperm_ok is true, EPERM as 5302 * indications that the operation isn't supported. We do EPERM 5303 * weirdly because the SIOCETHTOOL code in later kernels 1) doesn't 5304 * support ETHTOOL_GUFO, 2) also doesn't include it in the list 5305 * of ethtool operations that don't require CAP_NET_ADMIN privileges, 5306 * and 3) does the "is this permitted" check before doing the "is 5307 * this even supported" check, so it fails with "this is not permitted" 5308 * rather than "this is not even supported". To work around this 5309 * annoyance, we only treat EPERM as an error for the first feature, 5310 * and assume that they all do the same permission checks, so if the 5311 * first one is allowed all the others are allowed if supported. 5312 */ 5313 #if defined(SIOCETHTOOL) && (defined(ETHTOOL_GTSO) || defined(ETHTOOL_GUFO) || defined(ETHTOOL_GGSO) || defined(ETHTOOL_GFLAGS) || defined(ETHTOOL_GGRO)) 5314 static int 5315 iface_ethtool_flag_ioctl(pcap_t *handle, int cmd, const char *cmdname, 5316 int eperm_ok) 5317 { 5318 struct ifreq ifr; 5319 struct ethtool_value eval; 5320 5321 memset(&ifr, 0, sizeof(ifr)); 5322 pcapint_strlcpy(ifr.ifr_name, handle->opt.device, sizeof(ifr.ifr_name)); 5323 eval.cmd = cmd; 5324 eval.data = 0; 5325 ifr.ifr_data = (caddr_t)&eval; 5326 if (ioctl(handle->fd, SIOCETHTOOL, &ifr) == -1) { 5327 if (errno == EOPNOTSUPP || errno == EINVAL || 5328 (errno == EPERM && eperm_ok)) { 5329 /* 5330 * OK, let's just return 0, which, in our 5331 * case, either means "no, what we're asking 5332 * about is not enabled" or "all the flags 5333 * are clear (i.e., nothing is enabled)". 5334 */ 5335 return 0; 5336 } 5337 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5338 errno, "%s: SIOCETHTOOL(%s) ioctl failed", 5339 handle->opt.device, cmdname); 5340 return -1; 5341 } 5342 return eval.data; 5343 } 5344 5345 /* 5346 * XXX - it's annoying that we have to check for offloading at all, but, 5347 * given that we have to, it's still annoying that we have to check for 5348 * particular types of offloading, especially that shiny new types of 5349 * offloading may be added - and, worse, may not be checkable with 5350 * a particular ETHTOOL_ operation; ETHTOOL_GFEATURES would, in 5351 * theory, give those to you, but the actual flags being used are 5352 * opaque (defined in a non-uapi header), and there doesn't seem to 5353 * be any obvious way to ask the kernel what all the offloading flags 5354 * are - at best, you can ask for a set of strings(!) to get *names* 5355 * for various flags. (That whole mechanism appears to have been 5356 * designed for the sole purpose of letting ethtool report flags 5357 * by name and set flags by name, with the names having no semantics 5358 * ethtool understands.) 5359 */ 5360 static int 5361 iface_get_offload(pcap_t *handle) 5362 { 5363 int ret; 5364 5365 #ifdef ETHTOOL_GTSO 5366 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GTSO, "ETHTOOL_GTSO", 0); 5367 if (ret == -1) 5368 return -1; 5369 if (ret) 5370 return 1; /* TCP segmentation offloading on */ 5371 #endif 5372 5373 #ifdef ETHTOOL_GGSO 5374 /* 5375 * XXX - will this cause large unsegmented packets to be 5376 * handed to PF_PACKET sockets on transmission? If not, 5377 * this need not be checked. 5378 */ 5379 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GGSO, "ETHTOOL_GGSO", 0); 5380 if (ret == -1) 5381 return -1; 5382 if (ret) 5383 return 1; /* generic segmentation offloading on */ 5384 #endif 5385 5386 #ifdef ETHTOOL_GFLAGS 5387 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GFLAGS, "ETHTOOL_GFLAGS", 0); 5388 if (ret == -1) 5389 return -1; 5390 if (ret & ETH_FLAG_LRO) 5391 return 1; /* large receive offloading on */ 5392 #endif 5393 5394 #ifdef ETHTOOL_GGRO 5395 /* 5396 * XXX - will this cause large reassembled packets to be 5397 * handed to PF_PACKET sockets on receipt? If not, 5398 * this need not be checked. 5399 */ 5400 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GGRO, "ETHTOOL_GGRO", 0); 5401 if (ret == -1) 5402 return -1; 5403 if (ret) 5404 return 1; /* generic (large) receive offloading on */ 5405 #endif 5406 5407 #ifdef ETHTOOL_GUFO 5408 /* 5409 * Do this one last, as support for it was removed in later 5410 * kernels, and it fails with EPERM on those kernels rather 5411 * than with EOPNOTSUPP (see explanation in comment for 5412 * iface_ethtool_flag_ioctl()). 5413 */ 5414 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GUFO, "ETHTOOL_GUFO", 1); 5415 if (ret == -1) 5416 return -1; 5417 if (ret) 5418 return 1; /* UDP fragmentation offloading on */ 5419 #endif 5420 5421 return 0; 5422 } 5423 #else /* SIOCETHTOOL */ 5424 static int 5425 iface_get_offload(pcap_t *handle _U_) 5426 { 5427 /* 5428 * XXX - do we need to get this information if we don't 5429 * have the ethtool ioctls? If so, how do we do that? 5430 */ 5431 return 0; 5432 } 5433 #endif /* SIOCETHTOOL */ 5434 5435 /* 5436 * As per 5437 * 5438 * https://www.kernel.org/doc/html/latest/networking/dsa/dsa.html#switch-tagging-protocols 5439 * 5440 * Type 1 means that the tag is prepended to the Ethernet packet. 5441 * 5442 * Type 2 means that the tag is inserted into the Ethernet header 5443 * after the source address and before the type/length field. 5444 * 5445 * Type 3 means that tag is a packet trailer. 5446 * 5447 * Every element in the array below uses a DLT. Because a DSA-tagged frame is 5448 * not a standard IEEE 802.3 Ethernet frame, the array elements must not use 5449 * DLT_EN10MB. It is safe, albeit only barely useful, to use DLT_DEBUG_ONLY, 5450 * which is also the implicit default for any DSA tag that is not present in 5451 * the array. To implement proper support for a particular DSA tag of 5452 * interest, please do as much of the following as is reasonably practicable: 5453 * 5454 * 1. Using recent versions of tcpdump and libpcap on a Linux host with a 5455 * network interface that implements the required DSA tag, capture packets 5456 * on the interface and study the hex dumps. 5457 * 2. Using the hex dumps and any other available supporting materials, produce 5458 * a sufficiently detailed description of the DSA tag structure, complete 5459 * with a full comment indicating whether it's type 1, 2, or 3, and, for 5460 * type 2, indicating whether it has an Ethertype and, if so, what that type 5461 * is, and whether it's registered with the IEEE or not. Refer to the 5462 * specification(s), existing implementation(s), or any other relevant 5463 * resources. 5464 * 3. Using the description, request and obtain a new DLT for the DSA tag. 5465 * 4. Associate the new DLT with the DSA tag in the array below. 5466 * 5. Using the updated libpcap, capture packets again, produce a .pcap file 5467 * and confirm it uses the new DLT. 5468 * 6. Using the .pcap file as a test, prepare additional changes to tcpdump to 5469 * enable decoding of packets for the new DLT. 5470 * 7. Using the .pcap file as a test, prepare additional changes to libpcap to 5471 * enable filtering of packets for the new DLT. 5472 * 5473 * For working examples of such support, see the existing DLTs other than 5474 * DLT_DEBUG_ONLY in the array below. 5475 */ 5476 static struct dsa_proto { 5477 const char *name; 5478 bpf_u_int32 linktype; 5479 } dsa_protos[] = { 5480 /* 5481 * Type 1. See 5482 * 5483 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ar9331.c 5484 */ 5485 { "ar9331", DLT_DEBUG_ONLY }, 5486 5487 /* 5488 * Type 2, without an EtherType at the beginning. 5489 */ 5490 { "brcm", DLT_DSA_TAG_BRCM }, 5491 5492 /* 5493 * Type 2, with EtherType 0x8874, assigned to Broadcom. 5494 */ 5495 { "brcm-legacy", DLT_DEBUG_ONLY }, 5496 5497 /* 5498 * Type 1. 5499 */ 5500 { "brcm-prepend", DLT_DSA_TAG_BRCM_PREPEND }, 5501 5502 /* 5503 * Type 2, without an EtherType at the beginning. 5504 */ 5505 { "dsa", DLT_DSA_TAG_DSA }, 5506 5507 /* 5508 * Type 2, with an Ethertype field, but without 5509 * an assigned EtherType value that can be relied 5510 * on. 5511 */ 5512 { "edsa", DLT_DSA_TAG_EDSA }, 5513 5514 /* 5515 * Type 1, with different transmit and receive headers, 5516 * so can't really be handled well with the current 5517 * libpcap API and with pcap files. 5518 * 5519 * See 5520 * 5521 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_gswip.c 5522 */ 5523 { "gswip", DLT_DEBUG_ONLY }, 5524 5525 /* 5526 * Type 3. See 5527 * 5528 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_hellcreek.c 5529 */ 5530 { "hellcreek", DLT_DEBUG_ONLY }, 5531 5532 /* 5533 * Type 3, with different transmit and receive headers, 5534 * so can't really be handled well with the current 5535 * libpcap API and with pcap files. 5536 * 5537 * See 5538 * 5539 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ksz.c#L102 5540 */ 5541 { "ksz8795", DLT_DEBUG_ONLY }, 5542 5543 /* 5544 * Type 3, with different transmit and receive headers, 5545 * so can't really be handled well with the current 5546 * libpcap API and with pcap files. 5547 * 5548 * See 5549 * 5550 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ksz.c#L160 5551 */ 5552 { "ksz9477", DLT_DEBUG_ONLY }, 5553 5554 /* 5555 * Type 3, with different transmit and receive headers, 5556 * so can't really be handled well with the current 5557 * libpcap API and with pcap files. 5558 * 5559 * See 5560 * 5561 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ksz.c#L341 5562 */ 5563 { "ksz9893", DLT_DEBUG_ONLY }, 5564 5565 /* 5566 * Type 3, with different transmit and receive headers, 5567 * so can't really be handled well with the current 5568 * libpcap API and with pcap files. 5569 * 5570 * See 5571 * 5572 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ksz.c#L386 5573 */ 5574 { "lan937x", DLT_DEBUG_ONLY }, 5575 5576 /* 5577 * Type 2, with EtherType 0x8100; the VID can be interpreted 5578 * as per 5579 * 5580 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_lan9303.c#L24 5581 */ 5582 { "lan9303", DLT_DEBUG_ONLY }, 5583 5584 /* 5585 * Type 2, without an EtherType at the beginning. 5586 * 5587 * See 5588 * 5589 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_mtk.c#L15 5590 * 5591 * Linux kernel implements this tag so that it does not indicate the frame 5592 * encoding reliably. The matter is, some drivers use METADATA_HW_PORT_MUX, 5593 * which (for the switch->CPU direction only, at the time of this writing) 5594 * means that the frame does not have a DSA tag, the frame metadata is stored 5595 * elsewhere and libpcap receives the frame only. Specifically, this is the 5596 * case for drivers/net/ethernet/mediatek/mtk_eth_soc.c, but the tag visible 5597 * in sysfs is still "mtk" even though the wire encoding is different. 5598 */ 5599 { "mtk", DLT_DEBUG_ONLY }, 5600 5601 /* 5602 * Type 1. 5603 * 5604 * See 5605 * 5606 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ocelot.c 5607 */ 5608 { "ocelot", DLT_DEBUG_ONLY }, 5609 5610 /* 5611 * Type 1. 5612 * 5613 * See 5614 * 5615 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_ocelot.c 5616 */ 5617 { "seville", DLT_DEBUG_ONLY }, 5618 5619 /* 5620 * Type 2, with EtherType 0x8100; the VID can be interpreted 5621 * as per 5622 * 5623 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_8021q.c#L15 5624 */ 5625 { "ocelot-8021q", DLT_DEBUG_ONLY }, 5626 5627 /* 5628 * Type 2, without an EtherType at the beginning. 5629 * 5630 * See 5631 * 5632 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_qca.c 5633 */ 5634 { "qca", DLT_DEBUG_ONLY }, 5635 5636 /* 5637 * Type 2, with EtherType 0x8899, assigned to Realtek; 5638 * they use it for several on-the-Ethernet protocols 5639 * as well, but there are fields that allow the two 5640 * tag formats, and all the protocols in question, 5641 * to be distinguiished from one another. 5642 * 5643 * See 5644 * 5645 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_rtl4_a.c 5646 * 5647 * http://realtek.info/pdf/rtl8306sd%28m%29_datasheet_1.1.pdf 5648 * 5649 * and various pages in tcpdump's print-realtek.c and Wireshark's 5650 * epan/dissectors/packet-realtek.c for the other protocols. 5651 */ 5652 { "rtl4a", DLT_DEBUG_ONLY }, 5653 5654 /* 5655 * Type 2, with EtherType 0x8899, assigned to Realtek; 5656 * see above. 5657 */ 5658 { "rtl8_4", DLT_DEBUG_ONLY }, 5659 5660 /* 5661 * Type 3, with the same tag format as rtl8_4. 5662 */ 5663 { "rtl8_4t", DLT_DEBUG_ONLY }, 5664 5665 /* 5666 * Type 2, with EtherType 0xe001; that's probably 5667 * self-assigned. 5668 * 5669 * See 5670 * 5671 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_rzn1_a5psw.c 5672 */ 5673 { "a5psw", DLT_DEBUG_ONLY }, 5674 5675 /* 5676 * Type 2, with EtherType 0x8100 or the self-assigned 5677 * 0xdadb, so this really should have its own 5678 * LINKTYPE_/DLT_ value; that would also allow the 5679 * VID of the tag to be dissected as per 5680 * 5681 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_8021q.c#L15 5682 */ 5683 { "sja1105", DLT_DEBUG_ONLY }, 5684 5685 /* 5686 * Type "none of the above", with both a header and trailer, 5687 * with different transmit and receive tags. Has 5688 * EtherType 0xdadc, which is probably self-assigned. 5689 */ 5690 { "sja1110", DLT_DEBUG_ONLY }, 5691 5692 /* 5693 * Type 3, as the name suggests. 5694 * 5695 * See 5696 * 5697 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_trailer.c 5698 */ 5699 { "trailer", DLT_DEBUG_ONLY }, 5700 5701 /* 5702 * Type 2, with EtherType 0x8100; the VID can be interpreted 5703 * as per 5704 * 5705 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_8021q.c#L15 5706 */ 5707 { "vsc73xx-8021q", DLT_DEBUG_ONLY }, 5708 5709 /* 5710 * Type 3. 5711 * 5712 * See 5713 * 5714 * https://elixir.bootlin.com/linux/v6.13.2/source/net/dsa/tag_xrs700x.c 5715 */ 5716 { "xrs700x", DLT_DEBUG_ONLY }, 5717 }; 5718 5719 /* 5720 * Return 1 if the interface uses DSA tagging, 0 if the interface does not use 5721 * DSA tagging, or PCAP_ERROR on error. 5722 */ 5723 static int 5724 iface_dsa_get_proto_info(const char *device, pcap_t *handle) 5725 { 5726 char *pathstr; 5727 unsigned int i; 5728 /* 5729 * Make this significantly smaller than PCAP_ERRBUF_SIZE; 5730 * the tag *shouldn't* have some huge long name, and making 5731 * it smaller keeps newer versions of GCC from whining that 5732 * the error message if we don't support the tag could 5733 * overflow the error message buffer. 5734 */ 5735 char buf[128]; 5736 ssize_t r; 5737 int fd; 5738 5739 fd = asprintf(&pathstr, "/sys/class/net/%s/dsa/tagging", device); 5740 if (fd < 0) { 5741 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5742 fd, "asprintf"); 5743 return PCAP_ERROR; 5744 } 5745 5746 fd = open(pathstr, O_RDONLY); 5747 free(pathstr); 5748 /* 5749 * This could be not fatal: kernel >= 4.20 *might* expose this 5750 * attribute. However, if it exposes the attribute, but the read has 5751 * failed due to another reason (ENFILE, EMFILE, ENOMEM...), propagate 5752 * the failure. 5753 */ 5754 if (fd < 0) { 5755 if (errno == ENOENT) 5756 return 0; 5757 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5758 errno, "open"); 5759 return PCAP_ERROR; 5760 } 5761 5762 r = read(fd, buf, sizeof(buf) - 1); 5763 if (r <= 0) { 5764 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5765 errno, "read"); 5766 close(fd); 5767 return PCAP_ERROR; 5768 } 5769 close(fd); 5770 5771 /* 5772 * Buffer should be LF terminated. 5773 */ 5774 if (buf[r - 1] == '\n') 5775 r--; 5776 buf[r] = '\0'; 5777 5778 /* 5779 * The string "none" indicates that the interface does not have 5780 * any tagging protocol configured, and is therefore a standard 5781 * Ethernet interface. 5782 */ 5783 if (strcmp(buf, "none") == 0) 5784 return 0; 5785 5786 /* 5787 * Every element in the array stands for a DSA-tagged interface. Using 5788 * DLT_EN10MB (the standard IEEE 802.3 Ethernet) for such an interface 5789 * may seem a good idea at first, but doing so would certainly cause 5790 * major problems in areas that are already complicated and depend on 5791 * DLT_EN10MB meaning the standard IEEE 802.3 Ethernet only, namely: 5792 * 5793 * - live capturing of packets on Linux, and 5794 * - live kernel filtering of packets on Linux, and 5795 * - live userspace filtering of packets on Linux, and 5796 * - offline filtering of packets on all supported OSes, and 5797 * - identification of savefiles on all OSes. 5798 * 5799 * Therefore use a default DLT value that does not block capturing and 5800 * hexdumping of unsupported DSA encodings (in case the tag is not in 5801 * the array) and enforce the non-use of DLT_EN10MB (in case the tag is 5802 * in the array, but is incorrectly declared). 5803 */ 5804 handle->linktype = DLT_DEBUG_ONLY; 5805 for (i = 0; i < sizeof(dsa_protos) / sizeof(dsa_protos[0]); i++) { 5806 if (strcmp(buf, dsa_protos[i].name) == 0) { 5807 if (dsa_protos[i].linktype != DLT_EN10MB) 5808 handle->linktype = dsa_protos[i].linktype; 5809 break; 5810 } 5811 } 5812 return 1; 5813 } 5814 5815 /* 5816 * Query the kernel for the MTU of the given interface. 5817 */ 5818 static int 5819 iface_get_mtu(int fd, const char *device, char *ebuf) 5820 { 5821 struct ifreq ifr; 5822 5823 if (!device) 5824 return BIGGER_THAN_ALL_MTUS; 5825 5826 memset(&ifr, 0, sizeof(ifr)); 5827 pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 5828 5829 if (ioctl(fd, SIOCGIFMTU, &ifr) == -1) { 5830 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5831 errno, "SIOCGIFMTU"); 5832 return -1; 5833 } 5834 5835 return ifr.ifr_mtu; 5836 } 5837 5838 /* 5839 * Get the hardware type of the given interface as ARPHRD_xxx constant. 5840 */ 5841 static int 5842 iface_get_arptype(int fd, const char *device, char *ebuf) 5843 { 5844 struct ifreq ifr; 5845 int ret; 5846 5847 memset(&ifr, 0, sizeof(ifr)); 5848 pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 5849 5850 if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) { 5851 if (errno == ENODEV) { 5852 /* 5853 * No such device. 5854 * 5855 * There's nothing more to say, so clear 5856 * the error message. 5857 */ 5858 ret = PCAP_ERROR_NO_SUCH_DEVICE; 5859 ebuf[0] = '\0'; 5860 } else { 5861 ret = PCAP_ERROR; 5862 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5863 errno, "SIOCGIFHWADDR"); 5864 } 5865 return ret; 5866 } 5867 5868 return ifr.ifr_hwaddr.sa_family; 5869 } 5870 5871 static int 5872 fix_program(pcap_t *handle, struct sock_fprog *fcode) 5873 { 5874 struct pcap_linux *handlep = handle->priv; 5875 size_t prog_size; 5876 register int i; 5877 register struct bpf_insn *p; 5878 struct bpf_insn *f; 5879 int len; 5880 5881 /* 5882 * Make a copy of the filter, and modify that copy if 5883 * necessary. 5884 */ 5885 prog_size = sizeof(*handle->fcode.bf_insns) * handle->fcode.bf_len; 5886 len = handle->fcode.bf_len; 5887 f = (struct bpf_insn *)malloc(prog_size); 5888 if (f == NULL) { 5889 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5890 errno, "malloc"); 5891 return -1; 5892 } 5893 memcpy(f, handle->fcode.bf_insns, prog_size); 5894 fcode->len = len; 5895 fcode->filter = (struct sock_filter *) f; 5896 5897 for (i = 0; i < len; ++i) { 5898 p = &f[i]; 5899 /* 5900 * What type of instruction is this? 5901 */ 5902 switch (BPF_CLASS(p->code)) { 5903 5904 case BPF_LD: 5905 case BPF_LDX: 5906 /* 5907 * It's a load instruction; is it loading 5908 * from the packet? 5909 */ 5910 switch (BPF_MODE(p->code)) { 5911 5912 case BPF_ABS: 5913 case BPF_IND: 5914 case BPF_MSH: 5915 /* 5916 * Yes; are we in cooked mode? 5917 */ 5918 if (handlep->cooked) { 5919 /* 5920 * Yes, so we need to fix this 5921 * instruction. 5922 */ 5923 if (fix_offset(handle, p) < 0) { 5924 /* 5925 * We failed to do so. 5926 * Return 0, so our caller 5927 * knows to punt to userland. 5928 */ 5929 return 0; 5930 } 5931 } 5932 break; 5933 } 5934 break; 5935 } 5936 } 5937 return 1; /* we succeeded */ 5938 } 5939 5940 static int 5941 fix_offset(pcap_t *handle, struct bpf_insn *p) 5942 { 5943 /* 5944 * Existing references to auxiliary data shouldn't be adjusted. 5945 * 5946 * Note that SKF_AD_OFF is negative, but p->k is unsigned, so 5947 * we use >= and cast SKF_AD_OFF to unsigned. 5948 */ 5949 if (p->k >= (bpf_u_int32)SKF_AD_OFF) 5950 return 0; 5951 if (handle->linktype == DLT_LINUX_SLL2) { 5952 /* 5953 * What's the offset? 5954 */ 5955 if (p->k >= SLL2_HDR_LEN) { 5956 /* 5957 * It's within the link-layer payload; that starts 5958 * at an offset of 0, as far as the kernel packet 5959 * filter is concerned, so subtract the length of 5960 * the link-layer header. 5961 */ 5962 p->k -= SLL2_HDR_LEN; 5963 } else if (p->k == 0) { 5964 /* 5965 * It's the protocol field; map it to the 5966 * special magic kernel offset for that field. 5967 */ 5968 p->k = SKF_AD_OFF + SKF_AD_PROTOCOL; 5969 } else if (p->k == 4) { 5970 /* 5971 * It's the ifindex field; map it to the 5972 * special magic kernel offset for that field. 5973 */ 5974 p->k = SKF_AD_OFF + SKF_AD_IFINDEX; 5975 } else if (p->k == 10) { 5976 /* 5977 * It's the packet type field; map it to the 5978 * special magic kernel offset for that field. 5979 */ 5980 p->k = SKF_AD_OFF + SKF_AD_PKTTYPE; 5981 } else if ((bpf_int32)(p->k) > 0) { 5982 /* 5983 * It's within the header, but it's not one of 5984 * those fields; we can't do that in the kernel, 5985 * so punt to userland. 5986 */ 5987 return -1; 5988 } 5989 } else { 5990 /* 5991 * What's the offset? 5992 */ 5993 if (p->k >= SLL_HDR_LEN) { 5994 /* 5995 * It's within the link-layer payload; that starts 5996 * at an offset of 0, as far as the kernel packet 5997 * filter is concerned, so subtract the length of 5998 * the link-layer header. 5999 */ 6000 p->k -= SLL_HDR_LEN; 6001 } else if (p->k == 0) { 6002 /* 6003 * It's the packet type field; map it to the 6004 * special magic kernel offset for that field. 6005 */ 6006 p->k = SKF_AD_OFF + SKF_AD_PKTTYPE; 6007 } else if (p->k == 14) { 6008 /* 6009 * It's the protocol field; map it to the 6010 * special magic kernel offset for that field. 6011 */ 6012 p->k = SKF_AD_OFF + SKF_AD_PROTOCOL; 6013 } else if ((bpf_int32)(p->k) > 0) { 6014 /* 6015 * It's within the header, but it's not one of 6016 * those fields; we can't do that in the kernel, 6017 * so punt to userland. 6018 */ 6019 return -1; 6020 } 6021 } 6022 return 0; 6023 } 6024 6025 static int 6026 set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode) 6027 { 6028 int total_filter_on = 0; 6029 int save_mode; 6030 int ret; 6031 int save_errno; 6032 6033 /* 6034 * The socket filter code doesn't discard all packets queued 6035 * up on the socket when the filter is changed; this means 6036 * that packets that don't match the new filter may show up 6037 * after the new filter is put onto the socket, if those 6038 * packets haven't yet been read. 6039 * 6040 * This means, for example, that if you do a tcpdump capture 6041 * with a filter, the first few packets in the capture might 6042 * be packets that wouldn't have passed the filter. 6043 * 6044 * We therefore discard all packets queued up on the socket 6045 * when setting a kernel filter. (This isn't an issue for 6046 * userland filters, as the userland filtering is done after 6047 * packets are queued up.) 6048 * 6049 * To flush those packets, we put the socket in read-only mode, 6050 * and read packets from the socket until there are no more to 6051 * read. 6052 * 6053 * In order to keep that from being an infinite loop - i.e., 6054 * to keep more packets from arriving while we're draining 6055 * the queue - we put the "total filter", which is a filter 6056 * that rejects all packets, onto the socket before draining 6057 * the queue. 6058 * 6059 * This code deliberately ignores any errors, so that you may 6060 * get bogus packets if an error occurs, rather than having 6061 * the filtering done in userland even if it could have been 6062 * done in the kernel. 6063 */ 6064 if (setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER, 6065 &total_fcode, sizeof(total_fcode)) == 0) { 6066 char drain[1]; 6067 6068 /* 6069 * Note that we've put the total filter onto the socket. 6070 */ 6071 total_filter_on = 1; 6072 6073 /* 6074 * Save the socket's current mode, and put it in 6075 * non-blocking mode; we drain it by reading packets 6076 * until we get an error (which is normally a 6077 * "nothing more to be read" error). 6078 */ 6079 save_mode = fcntl(handle->fd, F_GETFL, 0); 6080 if (save_mode == -1) { 6081 pcapint_fmt_errmsg_for_errno(handle->errbuf, 6082 PCAP_ERRBUF_SIZE, errno, 6083 "can't get FD flags when changing filter"); 6084 return -2; 6085 } 6086 if (fcntl(handle->fd, F_SETFL, save_mode | O_NONBLOCK) < 0) { 6087 pcapint_fmt_errmsg_for_errno(handle->errbuf, 6088 PCAP_ERRBUF_SIZE, errno, 6089 "can't set nonblocking mode when changing filter"); 6090 return -2; 6091 } 6092 while (recv(handle->fd, &drain, sizeof drain, MSG_TRUNC) >= 0) 6093 ; 6094 save_errno = errno; 6095 if (save_errno != EAGAIN) { 6096 /* 6097 * Fatal error. 6098 * 6099 * If we can't restore the mode or reset the 6100 * kernel filter, there's nothing we can do. 6101 */ 6102 (void)fcntl(handle->fd, F_SETFL, save_mode); 6103 (void)reset_kernel_filter(handle); 6104 pcapint_fmt_errmsg_for_errno(handle->errbuf, 6105 PCAP_ERRBUF_SIZE, save_errno, 6106 "recv failed when changing filter"); 6107 return -2; 6108 } 6109 if (fcntl(handle->fd, F_SETFL, save_mode) == -1) { 6110 pcapint_fmt_errmsg_for_errno(handle->errbuf, 6111 PCAP_ERRBUF_SIZE, errno, 6112 "can't restore FD flags when changing filter"); 6113 return -2; 6114 } 6115 } 6116 6117 /* 6118 * Now attach the new filter. 6119 */ 6120 ret = setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER, 6121 fcode, sizeof(*fcode)); 6122 if (ret == -1 && total_filter_on) { 6123 /* 6124 * Well, we couldn't set that filter on the socket, 6125 * but we could set the total filter on the socket. 6126 * 6127 * This could, for example, mean that the filter was 6128 * too big to put into the kernel, so we'll have to 6129 * filter in userland; in any case, we'll be doing 6130 * filtering in userland, so we need to remove the 6131 * total filter so we see packets. 6132 */ 6133 save_errno = errno; 6134 6135 /* 6136 * If this fails, we're really screwed; we have the 6137 * total filter on the socket, and it won't come off. 6138 * Report it as a fatal error. 6139 */ 6140 if (reset_kernel_filter(handle) == -1) { 6141 pcapint_fmt_errmsg_for_errno(handle->errbuf, 6142 PCAP_ERRBUF_SIZE, errno, 6143 "can't remove kernel total filter"); 6144 return -2; /* fatal error */ 6145 } 6146 6147 errno = save_errno; 6148 } 6149 return ret; 6150 } 6151 6152 static int 6153 reset_kernel_filter(pcap_t *handle) 6154 { 6155 int ret; 6156 /* 6157 * setsockopt() barfs unless it get a dummy parameter. 6158 * valgrind whines unless the value is initialized, 6159 * as it has no idea that setsockopt() ignores its 6160 * parameter. 6161 */ 6162 int dummy = 0; 6163 6164 ret = setsockopt(handle->fd, SOL_SOCKET, SO_DETACH_FILTER, 6165 &dummy, sizeof(dummy)); 6166 /* 6167 * Ignore ENOENT - it means "we don't have a filter", so there 6168 * was no filter to remove, and there's still no filter. 6169 * 6170 * Also ignore ENONET, as a lot of kernel versions had a 6171 * typo where ENONET, rather than ENOENT, was returned. 6172 */ 6173 if (ret == -1 && errno != ENOENT && errno != ENONET) 6174 return -1; 6175 return 0; 6176 } 6177 6178 int 6179 pcap_set_protocol_linux(pcap_t *p, int protocol) 6180 { 6181 if (pcapint_check_activated(p)) 6182 return (PCAP_ERROR_ACTIVATED); 6183 p->opt.protocol = protocol; 6184 return (0); 6185 } 6186 6187 /* 6188 * Libpcap version string. 6189 */ 6190 #if defined(HAVE_TPACKET3) && defined(PCAP_SUPPORT_NETMAP) 6191 #define ADDITIONAL_INFO_STRING "with TPACKET_V3 and netmap" 6192 #elif defined(HAVE_TPACKET3) 6193 #define ADDITIONAL_INFO_STRING "with TPACKET_V3" 6194 #elif defined(PCAP_SUPPORT_NETMAP) 6195 #define ADDITIONAL_INFO_STRING "with TPACKET_V2 and netmap" 6196 #else 6197 #define ADDITIONAL_INFO_STRING "with TPACKET_V2" 6198 #endif 6199 6200 const char * 6201 pcap_lib_version(void) 6202 { 6203 return (PCAP_VERSION_STRING_WITH_ADDITIONAL_INFO(ADDITIONAL_INFO_STRING)); 6204 } 6205