1 /* 2 * pcap-linux.c: Packet capture interface to the Linux kernel 3 * 4 * Copyright (c) 2000 Torsten Landschoff <torsten@debian.org> 5 * Sebastian Krahmer <krahmer@cs.uni-potsdam.de> 6 * 7 * License: BSD 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior 21 * written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 26 * 27 * Modifications: Added PACKET_MMAP support 28 * Paolo Abeni <paolo.abeni@email.it> 29 * Added TPACKET_V3 support 30 * Gabor Tatarka <gabor.tatarka@ericsson.com> 31 * 32 * based on previous works of: 33 * Simon Patarin <patarin@cs.unibo.it> 34 * Phil Wood <cpw@lanl.gov> 35 * 36 * Monitor-mode support for mac80211 includes code taken from the iw 37 * command; the copyright notice for that code is 38 * 39 * Copyright (c) 2007, 2008 Johannes Berg 40 * Copyright (c) 2007 Andy Lutomirski 41 * Copyright (c) 2007 Mike Kershaw 42 * Copyright (c) 2008 Gábor Stefanik 43 * 44 * All rights reserved. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. The name of the author may not be used to endorse or promote products 55 * derived from this software without specific prior written permission. 56 * 57 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 58 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 59 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 60 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 61 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 62 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 63 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 64 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 65 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 67 * SUCH DAMAGE. 68 */ 69 70 71 #define _GNU_SOURCE 72 73 #include <config.h> 74 75 #include <errno.h> 76 #include <stdio.h> 77 #include <stdlib.h> 78 #include <unistd.h> 79 #include <fcntl.h> 80 #include <string.h> 81 #include <limits.h> 82 #include <endian.h> 83 #include <sys/stat.h> 84 #include <sys/socket.h> 85 #include <sys/ioctl.h> 86 #include <sys/utsname.h> 87 #include <sys/mman.h> 88 #include <linux/if.h> 89 #include <linux/if_packet.h> 90 #include <linux/sockios.h> 91 #include <linux/ethtool.h> 92 #include <netinet/in.h> 93 #include <linux/if_ether.h> 94 #include <linux/if_arp.h> 95 #include <poll.h> 96 #include <dirent.h> 97 #include <sys/eventfd.h> 98 99 #include "pcap-int.h" 100 #include "pcap-util.h" 101 #include "pcap/sll.h" 102 #include "pcap/vlan.h" 103 #include "pcap/can_socketcan.h" 104 105 #include "diag-control.h" 106 107 /* 108 * We require TPACKET_V2 support. 109 */ 110 #ifndef TPACKET2_HDRLEN 111 #error "Libpcap will only work if TPACKET_V2 is supported; you must build for a 2.6.27 or later kernel" 112 #endif 113 114 /* check for memory mapped access availability. We assume every needed 115 * struct is defined if the macro TPACKET_HDRLEN is defined, because it 116 * uses many ring related structs and macros */ 117 #ifdef TPACKET3_HDRLEN 118 # define HAVE_TPACKET3 119 #endif /* TPACKET3_HDRLEN */ 120 121 /* 122 * Not all compilers that are used to compile code to run on Linux have 123 * these builtins. For example, older versions of GCC don't, and at 124 * least some people are doing cross-builds for MIPS with older versions 125 * of GCC. 126 */ 127 #ifndef HAVE___ATOMIC_LOAD_N 128 #define __atomic_load_n(ptr, memory_model) (*(ptr)) 129 #endif 130 #ifndef HAVE___ATOMIC_STORE_N 131 #define __atomic_store_n(ptr, val, memory_model) *(ptr) = (val) 132 #endif 133 134 #define packet_mmap_acquire(pkt) \ 135 (__atomic_load_n(&pkt->tp_status, __ATOMIC_ACQUIRE) != TP_STATUS_KERNEL) 136 #define packet_mmap_release(pkt) \ 137 (__atomic_store_n(&pkt->tp_status, TP_STATUS_KERNEL, __ATOMIC_RELEASE)) 138 #define packet_mmap_v3_acquire(pkt) \ 139 (__atomic_load_n(&pkt->hdr.bh1.block_status, __ATOMIC_ACQUIRE) != TP_STATUS_KERNEL) 140 #define packet_mmap_v3_release(pkt) \ 141 (__atomic_store_n(&pkt->hdr.bh1.block_status, TP_STATUS_KERNEL, __ATOMIC_RELEASE)) 142 143 #include <linux/types.h> 144 #include <linux/filter.h> 145 146 #ifdef HAVE_LINUX_NET_TSTAMP_H 147 #include <linux/net_tstamp.h> 148 #endif 149 150 /* 151 * For checking whether a device is a bonding device. 152 */ 153 #include <linux/if_bonding.h> 154 155 /* 156 * Got libnl? 157 */ 158 #ifdef HAVE_LIBNL 159 #include <linux/nl80211.h> 160 161 #include <netlink/genl/genl.h> 162 #include <netlink/genl/family.h> 163 #include <netlink/genl/ctrl.h> 164 #include <netlink/msg.h> 165 #include <netlink/attr.h> 166 #endif /* HAVE_LIBNL */ 167 168 #ifndef HAVE_SOCKLEN_T 169 typedef int socklen_t; 170 #endif 171 172 #define MAX_LINKHEADER_SIZE 256 173 174 /* 175 * When capturing on all interfaces we use this as the buffer size. 176 * Should be bigger then all MTUs that occur in real life. 177 * 64kB should be enough for now. 178 */ 179 #define BIGGER_THAN_ALL_MTUS (64*1024) 180 181 /* 182 * Private data for capturing on Linux PF_PACKET sockets. 183 */ 184 struct pcap_linux { 185 long long sysfs_dropped; /* packets reported dropped by /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors */ 186 struct pcap_stat stat; 187 188 char *device; /* device name */ 189 int filter_in_userland; /* must filter in userland */ 190 int blocks_to_filter_in_userland; 191 int must_do_on_close; /* stuff we must do when we close */ 192 int timeout; /* timeout for buffering */ 193 int cooked; /* using SOCK_DGRAM rather than SOCK_RAW */ 194 int ifindex; /* interface index of device we're bound to */ 195 int lo_ifindex; /* interface index of the loopback device */ 196 int netdown; /* we got an ENETDOWN and haven't resolved it */ 197 bpf_u_int32 oldmode; /* mode to restore when turning monitor mode off */ 198 char *mondevice; /* mac80211 monitor device we created */ 199 u_char *mmapbuf; /* memory-mapped region pointer */ 200 size_t mmapbuflen; /* size of region */ 201 int vlan_offset; /* offset at which to insert vlan tags; if -1, don't insert */ 202 u_int tp_version; /* version of tpacket_hdr for mmaped ring */ 203 u_int tp_hdrlen; /* hdrlen of tpacket_hdr for mmaped ring */ 204 u_char *oneshot_buffer; /* buffer for copy of packet */ 205 int poll_timeout; /* timeout to use in poll() */ 206 #ifdef HAVE_TPACKET3 207 unsigned char *current_packet; /* Current packet within the TPACKET_V3 block. Move to next block if NULL. */ 208 int packets_left; /* Unhandled packets left within the block from previous call to pcap_read_linux_mmap_v3 in case of TPACKET_V3. */ 209 #endif 210 int poll_breakloop_fd; /* fd to an eventfd to break from blocking operations */ 211 }; 212 213 /* 214 * Stuff to do when we close. 215 */ 216 #define MUST_CLEAR_RFMON 0x00000001 /* clear rfmon (monitor) mode */ 217 #define MUST_DELETE_MONIF 0x00000002 /* delete monitor-mode interface */ 218 219 /* 220 * Prototypes for internal functions and methods. 221 */ 222 static int get_if_flags(const char *, bpf_u_int32 *, char *); 223 static int is_wifi(const char *); 224 static int map_arphrd_to_dlt(pcap_t *, int, const char *, int); 225 static int pcap_activate_linux(pcap_t *); 226 static int setup_socket(pcap_t *, int); 227 static int setup_mmapped(pcap_t *); 228 static int pcap_can_set_rfmon_linux(pcap_t *); 229 static int pcap_inject_linux(pcap_t *, const void *, int); 230 static int pcap_stats_linux(pcap_t *, struct pcap_stat *); 231 static int pcap_setfilter_linux(pcap_t *, struct bpf_program *); 232 static int pcap_setdirection_linux(pcap_t *, pcap_direction_t); 233 static int pcap_set_datalink_linux(pcap_t *, int); 234 static void pcap_cleanup_linux(pcap_t *); 235 236 union thdr { 237 struct tpacket2_hdr *h2; 238 #ifdef HAVE_TPACKET3 239 struct tpacket_block_desc *h3; 240 #endif 241 u_char *raw; 242 }; 243 244 #define RING_GET_FRAME_AT(h, offset) (((u_char **)h->buffer)[(offset)]) 245 #define RING_GET_CURRENT_FRAME(h) RING_GET_FRAME_AT(h, h->offset) 246 247 static void destroy_ring(pcap_t *handle); 248 static int create_ring(pcap_t *handle); 249 static int prepare_tpacket_socket(pcap_t *handle); 250 static int pcap_read_linux_mmap_v2(pcap_t *, int, pcap_handler , u_char *); 251 #ifdef HAVE_TPACKET3 252 static int pcap_read_linux_mmap_v3(pcap_t *, int, pcap_handler , u_char *); 253 #endif 254 static int pcap_setnonblock_linux(pcap_t *p, int nonblock); 255 static int pcap_getnonblock_linux(pcap_t *p); 256 static void pcapint_oneshot_linux(u_char *user, const struct pcap_pkthdr *h, 257 const u_char *bytes); 258 259 /* 260 * In pre-3.0 kernels, the tp_vlan_tci field is set to whatever the 261 * vlan_tci field in the skbuff is. 0 can either mean "not on a VLAN" 262 * or "on VLAN 0". There is no flag set in the tp_status field to 263 * distinguish between them. 264 * 265 * In 3.0 and later kernels, if there's a VLAN tag present, the tp_vlan_tci 266 * field is set to the VLAN tag, and the TP_STATUS_VLAN_VALID flag is set 267 * in the tp_status field, otherwise the tp_vlan_tci field is set to 0 and 268 * the TP_STATUS_VLAN_VALID flag isn't set in the tp_status field. 269 * 270 * With a pre-3.0 kernel, we cannot distinguish between packets with no 271 * VLAN tag and packets on VLAN 0, so we will mishandle some packets, and 272 * there's nothing we can do about that. 273 * 274 * So, on those systems, which never set the TP_STATUS_VLAN_VALID flag, we 275 * continue the behavior of earlier libpcaps, wherein we treated packets 276 * with a VLAN tag of 0 as being packets without a VLAN tag rather than packets 277 * on VLAN 0. We do this by treating packets with a tp_vlan_tci of 0 and 278 * with the TP_STATUS_VLAN_VALID flag not set in tp_status as not having 279 * VLAN tags. This does the right thing on 3.0 and later kernels, and 280 * continues the old unfixably-imperfect behavior on pre-3.0 kernels. 281 * 282 * If TP_STATUS_VLAN_VALID isn't defined, we test it as the 0x10 bit; it 283 * has that value in 3.0 and later kernels. 284 */ 285 #ifdef TP_STATUS_VLAN_VALID 286 #define VLAN_VALID(hdr, hv) ((hv)->tp_vlan_tci != 0 || ((hdr)->tp_status & TP_STATUS_VLAN_VALID)) 287 #else 288 /* 289 * This is being compiled on a system that lacks TP_STATUS_VLAN_VALID, 290 * so we test with the value it has in the 3.0 and later kernels, so 291 * we can test it if we're running on a system that has it. (If we're 292 * running on a system that doesn't have it, it won't be set in the 293 * tp_status field, so the tests of it will always fail; that means 294 * we behave the way we did before we introduced this macro.) 295 */ 296 #define VLAN_VALID(hdr, hv) ((hv)->tp_vlan_tci != 0 || ((hdr)->tp_status & 0x10)) 297 #endif 298 299 #ifdef TP_STATUS_VLAN_TPID_VALID 300 # define VLAN_TPID(hdr, hv) (((hv)->tp_vlan_tpid || ((hdr)->tp_status & TP_STATUS_VLAN_TPID_VALID)) ? (hv)->tp_vlan_tpid : ETH_P_8021Q) 301 #else 302 # define VLAN_TPID(hdr, hv) ETH_P_8021Q 303 #endif 304 305 /* 306 * Required select timeout if we're polling for an "interface disappeared" 307 * indication - 1 millisecond. 308 */ 309 static const struct timeval netdown_timeout = { 310 0, 1000 /* 1000 microseconds = 1 millisecond */ 311 }; 312 313 /* 314 * Wrap some ioctl calls 315 */ 316 static int iface_get_id(int fd, const char *device, char *ebuf); 317 static int iface_get_mtu(int fd, const char *device, char *ebuf); 318 static int iface_get_arptype(int fd, const char *device, char *ebuf); 319 static int iface_bind(int fd, int ifindex, char *ebuf, int protocol); 320 static int enter_rfmon_mode(pcap_t *handle, int sock_fd, 321 const char *device); 322 static int iface_get_ts_types(const char *device, pcap_t *handle, 323 char *ebuf); 324 static int iface_get_offload(pcap_t *handle); 325 326 static int fix_program(pcap_t *handle, struct sock_fprog *fcode); 327 static int fix_offset(pcap_t *handle, struct bpf_insn *p); 328 static int set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode); 329 static int reset_kernel_filter(pcap_t *handle); 330 331 static struct sock_filter total_insn 332 = BPF_STMT(BPF_RET | BPF_K, 0); 333 static struct sock_fprog total_fcode 334 = { 1, &total_insn }; 335 336 static int iface_dsa_get_proto_info(const char *device, pcap_t *handle); 337 338 pcap_t * 339 pcapint_create_interface(const char *device, char *ebuf) 340 { 341 pcap_t *handle; 342 343 handle = PCAP_CREATE_COMMON(ebuf, struct pcap_linux); 344 if (handle == NULL) 345 return NULL; 346 347 handle->activate_op = pcap_activate_linux; 348 handle->can_set_rfmon_op = pcap_can_set_rfmon_linux; 349 350 /* 351 * See what time stamp types we support. 352 */ 353 if (iface_get_ts_types(device, handle, ebuf) == -1) { 354 pcap_close(handle); 355 return NULL; 356 } 357 358 /* 359 * We claim that we support microsecond and nanosecond time 360 * stamps. 361 * 362 * XXX - with adapter-supplied time stamps, can we choose 363 * microsecond or nanosecond time stamps on arbitrary 364 * adapters? 365 */ 366 handle->tstamp_precision_list = malloc(2 * sizeof(u_int)); 367 if (handle->tstamp_precision_list == NULL) { 368 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 369 errno, "malloc"); 370 pcap_close(handle); 371 return NULL; 372 } 373 handle->tstamp_precision_list[0] = PCAP_TSTAMP_PRECISION_MICRO; 374 handle->tstamp_precision_list[1] = PCAP_TSTAMP_PRECISION_NANO; 375 handle->tstamp_precision_count = 2; 376 377 /* 378 * Start out with the breakloop handle not open; we don't 379 * need it until we're activated and ready to capture. 380 */ 381 struct pcap_linux *handlep = handle->priv; 382 handlep->poll_breakloop_fd = -1; 383 384 return handle; 385 } 386 387 #ifdef HAVE_LIBNL 388 /* 389 * If interface {if_name} is a mac80211 driver, the file 390 * /sys/class/net/{if_name}/phy80211 is a symlink to 391 * /sys/class/ieee80211/{phydev_name}, for some {phydev_name}. 392 * 393 * On Fedora 9, with a 2.6.26.3-29 kernel, my Zydas stick, at 394 * least, has a "wmaster0" device and a "wlan0" device; the 395 * latter is the one with the IP address. Both show up in 396 * "tcpdump -D" output. Capturing on the wmaster0 device 397 * captures with 802.11 headers. 398 * 399 * airmon-ng searches through /sys/class/net for devices named 400 * monN, starting with mon0; as soon as one *doesn't* exist, 401 * it chooses that as the monitor device name. If the "iw" 402 * command exists, it does 403 * 404 * iw dev {if_name} interface add {monif_name} type monitor 405 * 406 * where {monif_name} is the monitor device. It then (sigh) sleeps 407 * .1 second, and then configures the device up. Otherwise, if 408 * /sys/class/ieee80211/{phydev_name}/add_iface is a file, it writes 409 * {mondev_name}, without a newline, to that file, and again (sigh) 410 * sleeps .1 second, and then iwconfig's that device into monitor 411 * mode and configures it up. Otherwise, you can't do monitor mode. 412 * 413 * All these devices are "glued" together by having the 414 * /sys/class/net/{if_name}/phy80211 links pointing to the same 415 * place, so, given a wmaster, wlan, or mon device, you can 416 * find the other devices by looking for devices with 417 * the same phy80211 link. 418 * 419 * To turn monitor mode off, delete the monitor interface, 420 * either with 421 * 422 * iw dev {monif_name} interface del 423 * 424 * or by sending {monif_name}, with no NL, down 425 * /sys/class/ieee80211/{phydev_name}/remove_iface 426 * 427 * Note: if you try to create a monitor device named "monN", and 428 * there's already a "monN" device, it fails, as least with 429 * the netlink interface (which is what iw uses), with a return 430 * value of -ENFILE. (Return values are negative errnos.) We 431 * could probably use that to find an unused device. 432 * 433 * Yes, you can have multiple monitor devices for a given 434 * physical device. 435 */ 436 437 /* 438 * Is this a mac80211 device? If so, fill in the physical device path and 439 * return 1; if not, return 0. On an error, fill in handle->errbuf and 440 * return PCAP_ERROR. 441 */ 442 static int 443 get_mac80211_phydev(pcap_t *handle, const char *device, char *phydev_path, 444 size_t phydev_max_pathlen) 445 { 446 char *pathstr; 447 ssize_t bytes_read; 448 449 /* 450 * Generate the path string for the symlink to the physical device. 451 */ 452 if (asprintf(&pathstr, "/sys/class/net/%s/phy80211", device) == -1) { 453 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 454 "%s: Can't generate path name string for /sys/class/net device", 455 device); 456 return PCAP_ERROR; 457 } 458 bytes_read = readlink(pathstr, phydev_path, phydev_max_pathlen); 459 if (bytes_read == -1) { 460 if (errno == ENOENT || errno == EINVAL) { 461 /* 462 * Doesn't exist, or not a symlink; assume that 463 * means it's not a mac80211 device. 464 */ 465 free(pathstr); 466 return 0; 467 } 468 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 469 errno, "%s: Can't readlink %s", device, pathstr); 470 free(pathstr); 471 return PCAP_ERROR; 472 } 473 free(pathstr); 474 phydev_path[bytes_read] = '\0'; 475 return 1; 476 } 477 478 struct nl80211_state { 479 struct nl_sock *nl_sock; 480 struct nl_cache *nl_cache; 481 struct genl_family *nl80211; 482 }; 483 484 static int 485 nl80211_init(pcap_t *handle, struct nl80211_state *state, const char *device) 486 { 487 int err; 488 489 state->nl_sock = nl_socket_alloc(); 490 if (!state->nl_sock) { 491 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 492 "%s: failed to allocate netlink handle", device); 493 return PCAP_ERROR; 494 } 495 496 if (genl_connect(state->nl_sock)) { 497 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 498 "%s: failed to connect to generic netlink", device); 499 goto out_handle_destroy; 500 } 501 502 err = genl_ctrl_alloc_cache(state->nl_sock, &state->nl_cache); 503 if (err < 0) { 504 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 505 "%s: failed to allocate generic netlink cache: %s", 506 device, nl_geterror(-err)); 507 goto out_handle_destroy; 508 } 509 510 state->nl80211 = genl_ctrl_search_by_name(state->nl_cache, "nl80211"); 511 if (!state->nl80211) { 512 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 513 "%s: nl80211 not found", device); 514 goto out_cache_free; 515 } 516 517 return 0; 518 519 out_cache_free: 520 nl_cache_free(state->nl_cache); 521 out_handle_destroy: 522 nl_socket_free(state->nl_sock); 523 return PCAP_ERROR; 524 } 525 526 static void 527 nl80211_cleanup(struct nl80211_state *state) 528 { 529 genl_family_put(state->nl80211); 530 nl_cache_free(state->nl_cache); 531 nl_socket_free(state->nl_sock); 532 } 533 534 static int 535 del_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state, 536 const char *device, const char *mondevice); 537 538 static int 539 add_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state, 540 const char *device, const char *mondevice) 541 { 542 struct pcap_linux *handlep = handle->priv; 543 int ifindex; 544 struct nl_msg *msg; 545 int err; 546 547 ifindex = iface_get_id(sock_fd, device, handle->errbuf); 548 if (ifindex == -1) 549 return PCAP_ERROR; 550 551 msg = nlmsg_alloc(); 552 if (!msg) { 553 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 554 "%s: failed to allocate netlink msg", device); 555 return PCAP_ERROR; 556 } 557 558 genlmsg_put(msg, 0, 0, genl_family_get_id(state->nl80211), 0, 559 0, NL80211_CMD_NEW_INTERFACE, 0); 560 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex); 561 DIAG_OFF_NARROWING 562 NLA_PUT_STRING(msg, NL80211_ATTR_IFNAME, mondevice); 563 DIAG_ON_NARROWING 564 NLA_PUT_U32(msg, NL80211_ATTR_IFTYPE, NL80211_IFTYPE_MONITOR); 565 566 err = nl_send_auto_complete(state->nl_sock, msg); 567 if (err < 0) { 568 if (err == -NLE_FAILURE) { 569 /* 570 * Device not available; our caller should just 571 * keep trying. (libnl 2.x maps ENFILE to 572 * NLE_FAILURE; it can also map other errors 573 * to that, but there's not much we can do 574 * about that.) 575 */ 576 nlmsg_free(msg); 577 return 0; 578 } else { 579 /* 580 * Real failure, not just "that device is not 581 * available. 582 */ 583 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 584 "%s: nl_send_auto_complete failed adding %s interface: %s", 585 device, mondevice, nl_geterror(-err)); 586 nlmsg_free(msg); 587 return PCAP_ERROR; 588 } 589 } 590 err = nl_wait_for_ack(state->nl_sock); 591 if (err < 0) { 592 if (err == -NLE_FAILURE) { 593 /* 594 * Device not available; our caller should just 595 * keep trying. (libnl 2.x maps ENFILE to 596 * NLE_FAILURE; it can also map other errors 597 * to that, but there's not much we can do 598 * about that.) 599 */ 600 nlmsg_free(msg); 601 return 0; 602 } else { 603 /* 604 * Real failure, not just "that device is not 605 * available. 606 */ 607 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 608 "%s: nl_wait_for_ack failed adding %s interface: %s", 609 device, mondevice, nl_geterror(-err)); 610 nlmsg_free(msg); 611 return PCAP_ERROR; 612 } 613 } 614 615 /* 616 * Success. 617 */ 618 nlmsg_free(msg); 619 620 /* 621 * Try to remember the monitor device. 622 */ 623 handlep->mondevice = strdup(mondevice); 624 if (handlep->mondevice == NULL) { 625 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 626 errno, "strdup"); 627 /* 628 * Get rid of the monitor device. 629 */ 630 del_mon_if(handle, sock_fd, state, device, mondevice); 631 return PCAP_ERROR; 632 } 633 return 1; 634 635 nla_put_failure: 636 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 637 "%s: nl_put failed adding %s interface", 638 device, mondevice); 639 nlmsg_free(msg); 640 return PCAP_ERROR; 641 } 642 643 static int 644 del_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state, 645 const char *device, const char *mondevice) 646 { 647 int ifindex; 648 struct nl_msg *msg; 649 int err; 650 651 ifindex = iface_get_id(sock_fd, mondevice, handle->errbuf); 652 if (ifindex == -1) 653 return PCAP_ERROR; 654 655 msg = nlmsg_alloc(); 656 if (!msg) { 657 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 658 "%s: failed to allocate netlink msg", device); 659 return PCAP_ERROR; 660 } 661 662 genlmsg_put(msg, 0, 0, genl_family_get_id(state->nl80211), 0, 663 0, NL80211_CMD_DEL_INTERFACE, 0); 664 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex); 665 666 err = nl_send_auto_complete(state->nl_sock, msg); 667 if (err < 0) { 668 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 669 "%s: nl_send_auto_complete failed deleting %s interface: %s", 670 device, mondevice, nl_geterror(-err)); 671 nlmsg_free(msg); 672 return PCAP_ERROR; 673 } 674 err = nl_wait_for_ack(state->nl_sock); 675 if (err < 0) { 676 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 677 "%s: nl_wait_for_ack failed adding %s interface: %s", 678 device, mondevice, nl_geterror(-err)); 679 nlmsg_free(msg); 680 return PCAP_ERROR; 681 } 682 683 /* 684 * Success. 685 */ 686 nlmsg_free(msg); 687 return 1; 688 689 nla_put_failure: 690 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 691 "%s: nl_put failed deleting %s interface", 692 device, mondevice); 693 nlmsg_free(msg); 694 return PCAP_ERROR; 695 } 696 #endif /* HAVE_LIBNL */ 697 698 static int pcap_protocol(pcap_t *handle) 699 { 700 int protocol; 701 702 protocol = handle->opt.protocol; 703 if (protocol == 0) 704 protocol = ETH_P_ALL; 705 706 return htons(protocol); 707 } 708 709 static int 710 pcap_can_set_rfmon_linux(pcap_t *handle) 711 { 712 #ifdef HAVE_LIBNL 713 char phydev_path[PATH_MAX+1]; 714 int ret; 715 #endif 716 717 if (strcmp(handle->opt.device, "any") == 0) { 718 /* 719 * Monitor mode makes no sense on the "any" device. 720 */ 721 return 0; 722 } 723 724 #ifdef HAVE_LIBNL 725 /* 726 * Bleah. There doesn't seem to be a way to ask a mac80211 727 * device, through libnl, whether it supports monitor mode; 728 * we'll just check whether the device appears to be a 729 * mac80211 device and, if so, assume the device supports 730 * monitor mode. 731 */ 732 ret = get_mac80211_phydev(handle, handle->opt.device, phydev_path, 733 PATH_MAX); 734 if (ret < 0) 735 return ret; /* error */ 736 if (ret == 1) 737 return 1; /* mac80211 device */ 738 #endif 739 740 return 0; 741 } 742 743 /* 744 * Grabs the number of missed packets by the interface from 745 * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors. 746 * 747 * Compared to /proc/net/dev this avoids counting software drops, 748 * but may be unimplemented and just return 0. 749 * The author has found no straightforward way to check for support. 750 */ 751 static long long int 752 linux_get_stat(const char * if_name, const char * stat) { 753 ssize_t bytes_read; 754 int fd; 755 char buffer[PATH_MAX]; 756 757 snprintf(buffer, sizeof(buffer), "/sys/class/net/%s/statistics/%s", if_name, stat); 758 fd = open(buffer, O_RDONLY); 759 if (fd == -1) 760 return 0; 761 762 bytes_read = read(fd, buffer, sizeof(buffer) - 1); 763 close(fd); 764 if (bytes_read == -1) 765 return 0; 766 buffer[bytes_read] = '\0'; 767 768 return strtoll(buffer, NULL, 10); 769 } 770 771 static long long int 772 linux_if_drops(const char * if_name) 773 { 774 long long int missed = linux_get_stat(if_name, "rx_missed_errors"); 775 long long int fifo = linux_get_stat(if_name, "rx_fifo_errors"); 776 return missed + fifo; 777 } 778 779 780 /* 781 * Monitor mode is kind of interesting because we have to reset the 782 * interface before exiting. The problem can't really be solved without 783 * some daemon taking care of managing usage counts. If we put the 784 * interface into monitor mode, we set a flag indicating that we must 785 * take it out of that mode when the interface is closed, and, when 786 * closing the interface, if that flag is set we take it out of monitor 787 * mode. 788 */ 789 790 static void pcap_cleanup_linux( pcap_t *handle ) 791 { 792 struct pcap_linux *handlep = handle->priv; 793 #ifdef HAVE_LIBNL 794 struct nl80211_state nlstate; 795 int ret; 796 #endif /* HAVE_LIBNL */ 797 798 if (handlep->must_do_on_close != 0) { 799 /* 800 * There's something we have to do when closing this 801 * pcap_t. 802 */ 803 #ifdef HAVE_LIBNL 804 if (handlep->must_do_on_close & MUST_DELETE_MONIF) { 805 ret = nl80211_init(handle, &nlstate, handlep->device); 806 if (ret >= 0) { 807 ret = del_mon_if(handle, handle->fd, &nlstate, 808 handlep->device, handlep->mondevice); 809 nl80211_cleanup(&nlstate); 810 } 811 if (ret < 0) { 812 fprintf(stderr, 813 "Can't delete monitor interface %s (%s).\n" 814 "Please delete manually.\n", 815 handlep->mondevice, handle->errbuf); 816 } 817 } 818 #endif /* HAVE_LIBNL */ 819 820 /* 821 * Take this pcap out of the list of pcaps for which we 822 * have to take the interface out of some mode. 823 */ 824 pcapint_remove_from_pcaps_to_close(handle); 825 } 826 827 if (handle->fd != -1) { 828 /* 829 * Destroy the ring buffer (assuming we've set it up), 830 * and unmap it if it's mapped. 831 */ 832 destroy_ring(handle); 833 } 834 835 if (handlep->oneshot_buffer != NULL) { 836 free(handlep->oneshot_buffer); 837 handlep->oneshot_buffer = NULL; 838 } 839 840 if (handlep->mondevice != NULL) { 841 free(handlep->mondevice); 842 handlep->mondevice = NULL; 843 } 844 if (handlep->device != NULL) { 845 free(handlep->device); 846 handlep->device = NULL; 847 } 848 849 if (handlep->poll_breakloop_fd != -1) { 850 close(handlep->poll_breakloop_fd); 851 handlep->poll_breakloop_fd = -1; 852 } 853 pcapint_cleanup_live_common(handle); 854 } 855 856 #ifdef HAVE_TPACKET3 857 /* 858 * Some versions of TPACKET_V3 have annoying bugs/misfeatures 859 * around which we have to work. Determine if we have those 860 * problems or not. 861 * 3.19 is the first release with a fixed version of 862 * TPACKET_V3. We treat anything before that as 863 * not having a fixed version; that may really mean 864 * it has *no* version. 865 */ 866 static int has_broken_tpacket_v3(void) 867 { 868 struct utsname utsname; 869 const char *release; 870 long major, minor; 871 int matches, verlen; 872 873 /* No version information, assume broken. */ 874 if (uname(&utsname) == -1) 875 return 1; 876 release = utsname.release; 877 878 /* A malformed version, ditto. */ 879 matches = sscanf(release, "%ld.%ld%n", &major, &minor, &verlen); 880 if (matches != 2) 881 return 1; 882 if (release[verlen] != '.' && release[verlen] != '\0') 883 return 1; 884 885 /* OK, a fixed version. */ 886 if (major > 3 || (major == 3 && minor >= 19)) 887 return 0; 888 889 /* Too old :( */ 890 return 1; 891 } 892 #endif 893 894 /* 895 * Set the timeout to be used in poll() with memory-mapped packet capture. 896 */ 897 static void 898 set_poll_timeout(struct pcap_linux *handlep) 899 { 900 #ifdef HAVE_TPACKET3 901 int broken_tpacket_v3 = has_broken_tpacket_v3(); 902 #endif 903 if (handlep->timeout == 0) { 904 #ifdef HAVE_TPACKET3 905 /* 906 * XXX - due to a set of (mis)features in the TPACKET_V3 907 * kernel code prior to the 3.19 kernel, blocking forever 908 * with a TPACKET_V3 socket can, if few packets are 909 * arriving and passing the socket filter, cause most 910 * packets to be dropped. See libpcap issue #335 for the 911 * full painful story. 912 * 913 * The workaround is to have poll() time out very quickly, 914 * so we grab the frames handed to us, and return them to 915 * the kernel, ASAP. 916 */ 917 if (handlep->tp_version == TPACKET_V3 && broken_tpacket_v3) 918 handlep->poll_timeout = 1; /* don't block for very long */ 919 else 920 #endif 921 handlep->poll_timeout = -1; /* block forever */ 922 } else if (handlep->timeout > 0) { 923 #ifdef HAVE_TPACKET3 924 /* 925 * For TPACKET_V3, the timeout is handled by the kernel, 926 * so block forever; that way, we don't get extra timeouts. 927 * Don't do that if we have a broken TPACKET_V3, though. 928 */ 929 if (handlep->tp_version == TPACKET_V3 && !broken_tpacket_v3) 930 handlep->poll_timeout = -1; /* block forever, let TPACKET_V3 wake us up */ 931 else 932 #endif 933 handlep->poll_timeout = handlep->timeout; /* block for that amount of time */ 934 } else { 935 /* 936 * Non-blocking mode; we call poll() to pick up error 937 * indications, but we don't want it to wait for 938 * anything. 939 */ 940 handlep->poll_timeout = 0; 941 } 942 } 943 944 static void pcap_breakloop_linux(pcap_t *handle) 945 { 946 pcapint_breakloop_common(handle); 947 struct pcap_linux *handlep = handle->priv; 948 949 uint64_t value = 1; 950 951 if (handlep->poll_breakloop_fd != -1) { 952 /* 953 * XXX - pcap_breakloop() doesn't have a return value, 954 * so we can't indicate an error. 955 */ 956 DIAG_OFF_WARN_UNUSED_RESULT 957 (void)write(handlep->poll_breakloop_fd, &value, sizeof(value)); 958 DIAG_ON_WARN_UNUSED_RESULT 959 } 960 } 961 962 /* 963 * Set the offset at which to insert VLAN tags. 964 * That should be the offset of the type field. 965 */ 966 static void 967 set_vlan_offset(pcap_t *handle) 968 { 969 struct pcap_linux *handlep = handle->priv; 970 971 switch (handle->linktype) { 972 973 case DLT_EN10MB: 974 /* 975 * The type field is after the destination and source 976 * MAC address. 977 */ 978 handlep->vlan_offset = 2 * ETH_ALEN; 979 break; 980 981 case DLT_LINUX_SLL: 982 /* 983 * The type field is in the last 2 bytes of the 984 * DLT_LINUX_SLL header. 985 */ 986 handlep->vlan_offset = SLL_HDR_LEN - 2; 987 break; 988 989 default: 990 handlep->vlan_offset = -1; /* unknown */ 991 break; 992 } 993 } 994 995 /* 996 * Get a handle for a live capture from the given device. You can 997 * pass NULL as device to get all packages (without link level 998 * information of course). If you pass 1 as promisc the interface 999 * will be set to promiscuous mode (XXX: I think this usage should 1000 * be deprecated and functions be added to select that later allow 1001 * modification of that values -- Torsten). 1002 */ 1003 static int 1004 pcap_activate_linux(pcap_t *handle) 1005 { 1006 struct pcap_linux *handlep = handle->priv; 1007 const char *device; 1008 int is_any_device; 1009 struct ifreq ifr; 1010 int status; 1011 int ret; 1012 1013 device = handle->opt.device; 1014 1015 /* 1016 * Start out assuming no warnings. 1017 */ 1018 status = 0; 1019 1020 /* 1021 * Make sure the name we were handed will fit into the ioctls we 1022 * might perform on the device; if not, return a "No such device" 1023 * indication, as the Linux kernel shouldn't support creating 1024 * a device whose name won't fit into those ioctls. 1025 * 1026 * "Will fit" means "will fit, complete with a null terminator", 1027 * so if the length, which does *not* include the null terminator, 1028 * is greater than *or equal to* the size of the field into which 1029 * we'll be copying it, that won't fit. 1030 */ 1031 if (strlen(device) >= sizeof(ifr.ifr_name)) { 1032 /* 1033 * There's nothing more to say, so clear the error 1034 * message. 1035 */ 1036 handle->errbuf[0] = '\0'; 1037 status = PCAP_ERROR_NO_SUCH_DEVICE; 1038 goto fail; 1039 } 1040 1041 /* 1042 * Turn a negative snapshot value (invalid), a snapshot value of 1043 * 0 (unspecified), or a value bigger than the normal maximum 1044 * value, into the maximum allowed value. 1045 * 1046 * If some application really *needs* a bigger snapshot 1047 * length, we should just increase MAXIMUM_SNAPLEN. 1048 */ 1049 if (handle->snapshot <= 0 || handle->snapshot > MAXIMUM_SNAPLEN) 1050 handle->snapshot = MAXIMUM_SNAPLEN; 1051 1052 handlep->device = strdup(device); 1053 if (handlep->device == NULL) { 1054 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 1055 errno, "strdup"); 1056 status = PCAP_ERROR; 1057 goto fail; 1058 } 1059 1060 /* 1061 * The "any" device is a special device which causes us not 1062 * to bind to a particular device and thus to look at all 1063 * devices. 1064 */ 1065 is_any_device = (strcmp(device, "any") == 0); 1066 if (is_any_device) { 1067 if (handle->opt.promisc) { 1068 handle->opt.promisc = 0; 1069 /* Just a warning. */ 1070 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 1071 "Promiscuous mode not supported on the \"any\" device"); 1072 status = PCAP_WARNING_PROMISC_NOTSUP; 1073 } 1074 } 1075 1076 /* copy timeout value */ 1077 handlep->timeout = handle->opt.timeout; 1078 1079 /* 1080 * If we're in promiscuous mode, then we probably want 1081 * to see when the interface drops packets too, so get an 1082 * initial count from 1083 * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors 1084 */ 1085 if (handle->opt.promisc) 1086 handlep->sysfs_dropped = linux_if_drops(handlep->device); 1087 1088 /* 1089 * If the "any" device is specified, try to open a SOCK_DGRAM. 1090 * Otherwise, open a SOCK_RAW. 1091 */ 1092 ret = setup_socket(handle, is_any_device); 1093 if (ret < 0) { 1094 /* 1095 * Fatal error; the return value is the error code, 1096 * and handle->errbuf has been set to an appropriate 1097 * error message. 1098 */ 1099 status = ret; 1100 goto fail; 1101 } 1102 if (ret > 0) { 1103 /* 1104 * We got a warning; return that, as handle->errbuf 1105 * might have been overwritten by this warning. 1106 */ 1107 status = ret; 1108 } 1109 1110 /* 1111 * Success (possibly with a warning). 1112 * 1113 * First, try to allocate an event FD for breakloop, if 1114 * we're not going to start in non-blocking mode. 1115 */ 1116 if (!handle->opt.nonblock) { 1117 handlep->poll_breakloop_fd = eventfd(0, EFD_NONBLOCK); 1118 if (handlep->poll_breakloop_fd == -1) { 1119 /* 1120 * Failed. 1121 */ 1122 pcapint_fmt_errmsg_for_errno(handle->errbuf, 1123 PCAP_ERRBUF_SIZE, errno, "could not open eventfd"); 1124 status = PCAP_ERROR; 1125 goto fail; 1126 } 1127 } 1128 1129 /* 1130 * Succeeded. 1131 * Try to set up memory-mapped access. 1132 */ 1133 ret = setup_mmapped(handle); 1134 if (ret < 0) { 1135 /* 1136 * We failed to set up to use it, or the 1137 * kernel supports it, but we failed to 1138 * enable it. The return value is the 1139 * error status to return and, if it's 1140 * PCAP_ERROR, handle->errbuf contains 1141 * the error message. 1142 */ 1143 status = ret; 1144 goto fail; 1145 } 1146 if (ret > 0) { 1147 /* 1148 * We got a warning; return that, as handle->errbuf 1149 * might have been overwritten by this warning. 1150 */ 1151 status = ret; 1152 } 1153 1154 /* 1155 * We succeeded. status has been set to the status to return, 1156 * which might be 0, or might be a PCAP_WARNING_ value. 1157 */ 1158 /* 1159 * Now that we have activated the mmap ring, we can 1160 * set the correct protocol. 1161 */ 1162 if ((ret = iface_bind(handle->fd, handlep->ifindex, 1163 handle->errbuf, pcap_protocol(handle))) != 0) { 1164 status = ret; 1165 goto fail; 1166 } 1167 1168 handle->inject_op = pcap_inject_linux; 1169 handle->setfilter_op = pcap_setfilter_linux; 1170 handle->setdirection_op = pcap_setdirection_linux; 1171 handle->set_datalink_op = pcap_set_datalink_linux; 1172 handle->setnonblock_op = pcap_setnonblock_linux; 1173 handle->getnonblock_op = pcap_getnonblock_linux; 1174 handle->cleanup_op = pcap_cleanup_linux; 1175 handle->stats_op = pcap_stats_linux; 1176 handle->breakloop_op = pcap_breakloop_linux; 1177 1178 switch (handlep->tp_version) { 1179 1180 case TPACKET_V2: 1181 handle->read_op = pcap_read_linux_mmap_v2; 1182 break; 1183 #ifdef HAVE_TPACKET3 1184 case TPACKET_V3: 1185 handle->read_op = pcap_read_linux_mmap_v3; 1186 break; 1187 #endif 1188 } 1189 handle->oneshot_callback = pcapint_oneshot_linux; 1190 handle->selectable_fd = handle->fd; 1191 1192 return status; 1193 1194 fail: 1195 pcap_cleanup_linux(handle); 1196 return status; 1197 } 1198 1199 static int 1200 pcap_set_datalink_linux(pcap_t *handle, int dlt) 1201 { 1202 handle->linktype = dlt; 1203 1204 /* 1205 * Update the offset at which to insert VLAN tags for the 1206 * new link-layer type. 1207 */ 1208 set_vlan_offset(handle); 1209 1210 return 0; 1211 } 1212 1213 /* 1214 * linux_check_direction() 1215 * 1216 * Do checks based on packet direction. 1217 */ 1218 static inline int 1219 linux_check_direction(const pcap_t *handle, const struct sockaddr_ll *sll) 1220 { 1221 struct pcap_linux *handlep = handle->priv; 1222 1223 if (sll->sll_pkttype == PACKET_OUTGOING) { 1224 /* 1225 * Outgoing packet. 1226 * If this is from the loopback device, reject it; 1227 * we'll see the packet as an incoming packet as well, 1228 * and we don't want to see it twice. 1229 */ 1230 if (sll->sll_ifindex == handlep->lo_ifindex) 1231 return 0; 1232 1233 /* 1234 * If this is an outgoing CAN or CAN FD frame, and 1235 * the user doesn't only want outgoing packets, 1236 * reject it; CAN devices and drivers, and the CAN 1237 * stack, always arrange to loop back transmitted 1238 * packets, so they also appear as incoming packets. 1239 * We don't want duplicate packets, and we can't 1240 * easily distinguish packets looped back by the CAN 1241 * layer than those received by the CAN layer, so we 1242 * eliminate this packet instead. 1243 * 1244 * We check whether this is a CAN or CAN FD frame 1245 * by checking whether the device's hardware type 1246 * is ARPHRD_CAN. 1247 */ 1248 if (sll->sll_hatype == ARPHRD_CAN && 1249 handle->direction != PCAP_D_OUT) 1250 return 0; 1251 1252 /* 1253 * If the user only wants incoming packets, reject it. 1254 */ 1255 if (handle->direction == PCAP_D_IN) 1256 return 0; 1257 } else { 1258 /* 1259 * Incoming packet. 1260 * If the user only wants outgoing packets, reject it. 1261 */ 1262 if (handle->direction == PCAP_D_OUT) 1263 return 0; 1264 } 1265 return 1; 1266 } 1267 1268 /* 1269 * Check whether the device to which the pcap_t is bound still exists. 1270 * We do so by asking what address the socket is bound to, and checking 1271 * whether the ifindex in the address is -1, meaning "that device is gone", 1272 * or some other value, meaning "that device still exists". 1273 */ 1274 static int 1275 device_still_exists(pcap_t *handle) 1276 { 1277 struct pcap_linux *handlep = handle->priv; 1278 struct sockaddr_ll addr; 1279 socklen_t addr_len; 1280 1281 /* 1282 * If handlep->ifindex is -1, the socket isn't bound, meaning 1283 * we're capturing on the "any" device; that device never 1284 * disappears. (It should also never be configured down, so 1285 * we shouldn't even get here, but let's make sure.) 1286 */ 1287 if (handlep->ifindex == -1) 1288 return (1); /* it's still here */ 1289 1290 /* 1291 * OK, now try to get the address for the socket. 1292 */ 1293 addr_len = sizeof (addr); 1294 if (getsockname(handle->fd, (struct sockaddr *) &addr, &addr_len) == -1) { 1295 /* 1296 * Error - report an error and return -1. 1297 */ 1298 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 1299 errno, "getsockname failed"); 1300 return (-1); 1301 } 1302 if (addr.sll_ifindex == -1) { 1303 /* 1304 * This means the device went away. 1305 */ 1306 return (0); 1307 } 1308 1309 /* 1310 * The device presumably just went down. 1311 */ 1312 return (1); 1313 } 1314 1315 static int 1316 pcap_inject_linux(pcap_t *handle, const void *buf, int size) 1317 { 1318 struct pcap_linux *handlep = handle->priv; 1319 int ret; 1320 1321 if (handlep->ifindex == -1) { 1322 /* 1323 * We don't support sending on the "any" device. 1324 */ 1325 pcapint_strlcpy(handle->errbuf, 1326 "Sending packets isn't supported on the \"any\" device", 1327 PCAP_ERRBUF_SIZE); 1328 return (-1); 1329 } 1330 1331 if (handlep->cooked) { 1332 /* 1333 * We don't support sending on cooked-mode sockets. 1334 * 1335 * XXX - how do you send on a bound cooked-mode 1336 * socket? 1337 * Is a "sendto()" required there? 1338 */ 1339 pcapint_strlcpy(handle->errbuf, 1340 "Sending packets isn't supported in cooked mode", 1341 PCAP_ERRBUF_SIZE); 1342 return (-1); 1343 } 1344 1345 ret = (int)send(handle->fd, buf, size, 0); 1346 if (ret == -1) { 1347 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 1348 errno, "send"); 1349 return (-1); 1350 } 1351 return (ret); 1352 } 1353 1354 /* 1355 * Get the statistics for the given packet capture handle. 1356 */ 1357 static int 1358 pcap_stats_linux(pcap_t *handle, struct pcap_stat *stats) 1359 { 1360 struct pcap_linux *handlep = handle->priv; 1361 #ifdef HAVE_TPACKET3 1362 /* 1363 * For sockets using TPACKET_V2, the extra stuff at the end 1364 * of a struct tpacket_stats_v3 will not be filled in, and 1365 * we don't look at it so this is OK even for those sockets. 1366 * In addition, the PF_PACKET socket code in the kernel only 1367 * uses the length parameter to compute how much data to 1368 * copy out and to indicate how much data was copied out, so 1369 * it's OK to base it on the size of a struct tpacket_stats. 1370 * 1371 * XXX - it's probably OK, in fact, to just use a 1372 * struct tpacket_stats for V3 sockets, as we don't 1373 * care about the tp_freeze_q_cnt stat. 1374 */ 1375 struct tpacket_stats_v3 kstats; 1376 #else /* HAVE_TPACKET3 */ 1377 struct tpacket_stats kstats; 1378 #endif /* HAVE_TPACKET3 */ 1379 socklen_t len = sizeof (struct tpacket_stats); 1380 1381 long long if_dropped = 0; 1382 1383 /* 1384 * To fill in ps_ifdrop, we parse 1385 * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors 1386 * for the numbers 1387 */ 1388 if (handle->opt.promisc) 1389 { 1390 /* 1391 * XXX - is there any reason to do this by remembering 1392 * the last counts value, subtracting it from the 1393 * current counts value, and adding that to stat.ps_ifdrop, 1394 * maintaining stat.ps_ifdrop as a count, rather than just 1395 * saving the *initial* counts value and setting 1396 * stat.ps_ifdrop to the difference between the current 1397 * value and the initial value? 1398 * 1399 * One reason might be to handle the count wrapping 1400 * around, on platforms where the count is 32 bits 1401 * and where you might get more than 2^32 dropped 1402 * packets; is there any other reason? 1403 * 1404 * (We maintain the count as a long long int so that, 1405 * if the kernel maintains the counts as 64-bit even 1406 * on 32-bit platforms, we can handle the real count. 1407 * 1408 * Unfortunately, we can't report 64-bit counts; we 1409 * need a better API for reporting statistics, such as 1410 * one that reports them in a style similar to the 1411 * pcapng Interface Statistics Block, so that 1) the 1412 * counts are 64-bit, 2) it's easier to add new statistics 1413 * without breaking the ABI, and 3) it's easier to 1414 * indicate to a caller that wants one particular 1415 * statistic that it's not available by just not supplying 1416 * it.) 1417 */ 1418 if_dropped = handlep->sysfs_dropped; 1419 handlep->sysfs_dropped = linux_if_drops(handlep->device); 1420 handlep->stat.ps_ifdrop += (u_int)(handlep->sysfs_dropped - if_dropped); 1421 } 1422 1423 /* 1424 * Try to get the packet counts from the kernel. 1425 */ 1426 if (getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS, 1427 &kstats, &len) > -1) { 1428 /* 1429 * "ps_recv" counts only packets that *passed* the 1430 * filter, not packets that didn't pass the filter. 1431 * This includes packets later dropped because we 1432 * ran out of buffer space. 1433 * 1434 * "ps_drop" counts packets dropped because we ran 1435 * out of buffer space. It doesn't count packets 1436 * dropped by the interface driver. It counts only 1437 * packets that passed the filter. 1438 * 1439 * See above for ps_ifdrop. 1440 * 1441 * Both statistics include packets not yet read from 1442 * the kernel by libpcap, and thus not yet seen by 1443 * the application. 1444 * 1445 * In "linux/net/packet/af_packet.c", at least in 2.6.27 1446 * through 5.6 kernels, "tp_packets" is incremented for 1447 * every packet that passes the packet filter *and* is 1448 * successfully copied to the ring buffer; "tp_drops" is 1449 * incremented for every packet dropped because there's 1450 * not enough free space in the ring buffer. 1451 * 1452 * When the statistics are returned for a PACKET_STATISTICS 1453 * "getsockopt()" call, "tp_drops" is added to "tp_packets", 1454 * so that "tp_packets" counts all packets handed to 1455 * the PF_PACKET socket, including packets dropped because 1456 * there wasn't room on the socket buffer - but not 1457 * including packets that didn't pass the filter. 1458 * 1459 * In the BSD BPF, the count of received packets is 1460 * incremented for every packet handed to BPF, regardless 1461 * of whether it passed the filter. 1462 * 1463 * We can't make "pcap_stats()" work the same on both 1464 * platforms, but the best approximation is to return 1465 * "tp_packets" as the count of packets and "tp_drops" 1466 * as the count of drops. 1467 * 1468 * Keep a running total because each call to 1469 * getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS, .... 1470 * resets the counters to zero. 1471 */ 1472 handlep->stat.ps_recv += kstats.tp_packets; 1473 handlep->stat.ps_drop += kstats.tp_drops; 1474 *stats = handlep->stat; 1475 return 0; 1476 } 1477 1478 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, errno, 1479 "failed to get statistics from socket"); 1480 return -1; 1481 } 1482 1483 /* 1484 * A PF_PACKET socket can be bound to any network interface. 1485 */ 1486 static int 1487 can_be_bound(const char *name _U_) 1488 { 1489 return (1); 1490 } 1491 1492 /* 1493 * Get a socket to use with various interface ioctls. 1494 */ 1495 static int 1496 get_if_ioctl_socket(void) 1497 { 1498 int fd; 1499 1500 /* 1501 * This is a bit ugly. 1502 * 1503 * There isn't a socket type that's guaranteed to work. 1504 * 1505 * AF_NETLINK will work *if* you have Netlink configured into the 1506 * kernel (can it be configured out if you have any networking 1507 * support at all?) *and* if you're running a sufficiently recent 1508 * kernel, but not all the kernels we support are sufficiently 1509 * recent - that feature was introduced in Linux 4.6. 1510 * 1511 * AF_UNIX will work *if* you have UNIX-domain sockets configured 1512 * into the kernel and *if* you're not on a system that doesn't 1513 * allow them - some SELinux systems don't allow you create them. 1514 * Most systems probably have them configured in, but not all systems 1515 * have them configured in and allow them to be created. 1516 * 1517 * AF_INET will work *if* you have IPv4 configured into the kernel, 1518 * but, apparently, some systems have network adapters but have 1519 * kernels without IPv4 support. 1520 * 1521 * AF_INET6 will work *if* you have IPv6 configured into the 1522 * kernel, but if you don't have AF_INET, you might not have 1523 * AF_INET6, either (that is, independently on its own grounds). 1524 * 1525 * AF_PACKET would work, except that some of these calls should 1526 * work even if you *don't* have capture permission (you should be 1527 * able to enumerate interfaces and get information about them 1528 * without capture permission; you shouldn't get a failure until 1529 * you try pcap_activate()). (If you don't allow programs to 1530 * get as much information as possible about interfaces if you 1531 * don't have permission to capture, you run the risk of users 1532 * asking "why isn't it showing XXX" - or, worse, if you don't 1533 * show interfaces *at all* if you don't have permission to 1534 * capture on them, "why do no interfaces show up?" - when the 1535 * real problem is a permissions problem. Error reports of that 1536 * type require a lot more back-and-forth to debug, as evidenced 1537 * by many Wireshark bugs/mailing list questions/Q&A questions.) 1538 * 1539 * So: 1540 * 1541 * we first try an AF_NETLINK socket, where "try" includes 1542 * "try to do a device ioctl on it", as, in the future, once 1543 * pre-4.6 kernels are sufficiently rare, that will probably 1544 * be the mechanism most likely to work; 1545 * 1546 * if that fails, we try an AF_UNIX socket, as that's less 1547 * likely to be configured out on a networking-capable system 1548 * than is IP; 1549 * 1550 * if that fails, we try an AF_INET6 socket; 1551 * 1552 * if that fails, we try an AF_INET socket. 1553 */ 1554 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 1555 if (fd != -1) { 1556 /* 1557 * OK, let's make sure we can do an SIOCGIFNAME 1558 * ioctl. 1559 */ 1560 struct ifreq ifr; 1561 1562 memset(&ifr, 0, sizeof(ifr)); 1563 if (ioctl(fd, SIOCGIFNAME, &ifr) == 0 || 1564 errno != EOPNOTSUPP) { 1565 /* 1566 * It succeeded, or failed for some reason 1567 * other than "netlink sockets don't support 1568 * device ioctls". Go with the AF_NETLINK 1569 * socket. 1570 */ 1571 return (fd); 1572 } 1573 1574 /* 1575 * OK, that didn't work, so it's as bad as "netlink 1576 * sockets aren't available". Close the socket and 1577 * drive on. 1578 */ 1579 close(fd); 1580 } 1581 1582 /* 1583 * Now try an AF_UNIX socket. 1584 */ 1585 fd = socket(AF_UNIX, SOCK_RAW, 0); 1586 if (fd != -1) { 1587 /* 1588 * OK, we got it! 1589 */ 1590 return (fd); 1591 } 1592 1593 /* 1594 * Now try an AF_INET6 socket. 1595 */ 1596 fd = socket(AF_INET6, SOCK_DGRAM, 0); 1597 if (fd != -1) { 1598 return (fd); 1599 } 1600 1601 /* 1602 * Now try an AF_INET socket. 1603 * 1604 * XXX - if that fails, is there anything else we should try? 1605 * AF_CAN, for embedded systems in vehicles, in case they're 1606 * built without Internet protocol support? Any other socket 1607 * types popular in non-Internet embedded systems? 1608 */ 1609 return (socket(AF_INET, SOCK_DGRAM, 0)); 1610 } 1611 1612 /* 1613 * Get additional flags for a device, using SIOCGIFMEDIA. 1614 */ 1615 static int 1616 get_if_flags(const char *name, bpf_u_int32 *flags, char *errbuf) 1617 { 1618 int sock; 1619 FILE *fh; 1620 unsigned int arptype; 1621 struct ifreq ifr; 1622 struct ethtool_value info; 1623 1624 if (*flags & PCAP_IF_LOOPBACK) { 1625 /* 1626 * Loopback devices aren't wireless, and "connected"/ 1627 * "disconnected" doesn't apply to them. 1628 */ 1629 *flags |= PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE; 1630 return 0; 1631 } 1632 1633 sock = get_if_ioctl_socket(); 1634 if (sock == -1) { 1635 pcapint_fmt_errmsg_for_errno(errbuf, PCAP_ERRBUF_SIZE, errno, 1636 "Can't create socket to get ethtool information for %s", 1637 name); 1638 return -1; 1639 } 1640 1641 /* 1642 * OK, what type of network is this? 1643 * In particular, is it wired or wireless? 1644 */ 1645 if (is_wifi(name)) { 1646 /* 1647 * Wi-Fi, hence wireless. 1648 */ 1649 *flags |= PCAP_IF_WIRELESS; 1650 } else { 1651 /* 1652 * OK, what does /sys/class/net/{if_name}/type contain? 1653 * (We don't use that for Wi-Fi, as it'll report 1654 * "Ethernet", i.e. ARPHRD_ETHER, for non-monitor- 1655 * mode devices.) 1656 */ 1657 char *pathstr; 1658 1659 if (asprintf(&pathstr, "/sys/class/net/%s/type", name) == -1) { 1660 snprintf(errbuf, PCAP_ERRBUF_SIZE, 1661 "%s: Can't generate path name string for /sys/class/net device", 1662 name); 1663 close(sock); 1664 return -1; 1665 } 1666 fh = fopen(pathstr, "r"); 1667 if (fh != NULL) { 1668 if (fscanf(fh, "%u", &arptype) == 1) { 1669 /* 1670 * OK, we got an ARPHRD_ type; what is it? 1671 */ 1672 switch (arptype) { 1673 1674 case ARPHRD_LOOPBACK: 1675 /* 1676 * These are types to which 1677 * "connected" and "disconnected" 1678 * don't apply, so don't bother 1679 * asking about it. 1680 * 1681 * XXX - add other types? 1682 */ 1683 close(sock); 1684 fclose(fh); 1685 free(pathstr); 1686 return 0; 1687 1688 case ARPHRD_IRDA: 1689 case ARPHRD_IEEE80211: 1690 case ARPHRD_IEEE80211_PRISM: 1691 case ARPHRD_IEEE80211_RADIOTAP: 1692 #ifdef ARPHRD_IEEE802154 1693 case ARPHRD_IEEE802154: 1694 #endif 1695 #ifdef ARPHRD_IEEE802154_MONITOR 1696 case ARPHRD_IEEE802154_MONITOR: 1697 #endif 1698 #ifdef ARPHRD_6LOWPAN 1699 case ARPHRD_6LOWPAN: 1700 #endif 1701 /* 1702 * Various wireless types. 1703 */ 1704 *flags |= PCAP_IF_WIRELESS; 1705 break; 1706 } 1707 } 1708 fclose(fh); 1709 } 1710 free(pathstr); 1711 } 1712 1713 #ifdef ETHTOOL_GLINK 1714 memset(&ifr, 0, sizeof(ifr)); 1715 pcapint_strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); 1716 info.cmd = ETHTOOL_GLINK; 1717 /* 1718 * XXX - while Valgrind handles SIOCETHTOOL and knows that 1719 * the ETHTOOL_GLINK command sets the .data member of the 1720 * structure, Memory Sanitizer doesn't yet do so: 1721 * 1722 * https://bugs.llvm.org/show_bug.cgi?id=45814 1723 * 1724 * For now, we zero it out to squelch warnings; if the bug 1725 * in question is fixed, we can remove this. 1726 */ 1727 info.data = 0; 1728 ifr.ifr_data = (caddr_t)&info; 1729 if (ioctl(sock, SIOCETHTOOL, &ifr) == -1) { 1730 int save_errno = errno; 1731 1732 switch (save_errno) { 1733 1734 case EOPNOTSUPP: 1735 case EINVAL: 1736 /* 1737 * OK, this OS version or driver doesn't support 1738 * asking for this information. 1739 * XXX - distinguish between "this doesn't 1740 * support ethtool at all because it's not 1741 * that type of device" vs. "this doesn't 1742 * support ethtool even though it's that 1743 * type of device", and return "unknown". 1744 */ 1745 *flags |= PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE; 1746 close(sock); 1747 return 0; 1748 1749 case ENODEV: 1750 /* 1751 * OK, no such device. 1752 * The user will find that out when they try to 1753 * activate the device; just say "OK" and 1754 * don't set anything. 1755 */ 1756 close(sock); 1757 return 0; 1758 1759 default: 1760 /* 1761 * Other error. 1762 */ 1763 pcapint_fmt_errmsg_for_errno(errbuf, PCAP_ERRBUF_SIZE, 1764 save_errno, 1765 "%s: SIOCETHTOOL(ETHTOOL_GLINK) ioctl failed", 1766 name); 1767 close(sock); 1768 return -1; 1769 } 1770 } 1771 1772 /* 1773 * Is it connected? 1774 */ 1775 if (info.data) { 1776 /* 1777 * It's connected. 1778 */ 1779 *flags |= PCAP_IF_CONNECTION_STATUS_CONNECTED; 1780 } else { 1781 /* 1782 * It's disconnected. 1783 */ 1784 *flags |= PCAP_IF_CONNECTION_STATUS_DISCONNECTED; 1785 } 1786 #endif 1787 1788 close(sock); 1789 return 0; 1790 } 1791 1792 int 1793 pcapint_platform_finddevs(pcap_if_list_t *devlistp, char *errbuf) 1794 { 1795 /* 1796 * Get the list of regular interfaces first. 1797 */ 1798 if (pcapint_findalldevs_interfaces(devlistp, errbuf, can_be_bound, 1799 get_if_flags) == -1) 1800 return (-1); /* failure */ 1801 1802 /* 1803 * Add the "any" device. 1804 */ 1805 if (pcap_add_any_dev(devlistp, errbuf) == NULL) 1806 return (-1); 1807 1808 return (0); 1809 } 1810 1811 /* 1812 * Set direction flag: Which packets do we accept on a forwarding 1813 * single device? IN, OUT or both? 1814 */ 1815 static int 1816 pcap_setdirection_linux(pcap_t *handle, pcap_direction_t d) 1817 { 1818 /* 1819 * It's guaranteed, at this point, that d is a valid 1820 * direction value. 1821 */ 1822 handle->direction = d; 1823 return 0; 1824 } 1825 1826 static int 1827 is_wifi(const char *device) 1828 { 1829 char *pathstr; 1830 struct stat statb; 1831 1832 /* 1833 * See if there's a sysfs wireless directory for it. 1834 * If so, it's a wireless interface. 1835 */ 1836 if (asprintf(&pathstr, "/sys/class/net/%s/wireless", device) == -1) { 1837 /* 1838 * Just give up here. 1839 */ 1840 return 0; 1841 } 1842 if (stat(pathstr, &statb) == 0) { 1843 free(pathstr); 1844 return 1; 1845 } 1846 free(pathstr); 1847 1848 return 0; 1849 } 1850 1851 /* 1852 * Linux uses the ARP hardware type to identify the type of an 1853 * interface. pcap uses the DLT_xxx constants for this. This 1854 * function takes a pointer to a "pcap_t", and an ARPHRD_xxx 1855 * constant, as arguments, and sets "handle->linktype" to the 1856 * appropriate DLT_XXX constant and sets "handle->offset" to 1857 * the appropriate value (to make "handle->offset" plus link-layer 1858 * header length be a multiple of 4, so that the link-layer payload 1859 * will be aligned on a 4-byte boundary when capturing packets). 1860 * (If the offset isn't set here, it'll be 0; add code as appropriate 1861 * for cases where it shouldn't be 0.) 1862 * 1863 * If "cooked_ok" is non-zero, we can use DLT_LINUX_SLL and capture 1864 * in cooked mode; otherwise, we can't use cooked mode, so we have 1865 * to pick some type that works in raw mode, or fail. 1866 * 1867 * Sets the link type to -1 if unable to map the type. 1868 * 1869 * Returns 0 on success or a PCAP_ERROR_ value on error. 1870 */ 1871 static int map_arphrd_to_dlt(pcap_t *handle, int arptype, 1872 const char *device, int cooked_ok) 1873 { 1874 static const char cdma_rmnet[] = "cdma_rmnet"; 1875 1876 switch (arptype) { 1877 1878 case ARPHRD_ETHER: 1879 /* 1880 * For various annoying reasons having to do with DHCP 1881 * software, some versions of Android give the mobile- 1882 * phone-network interface an ARPHRD_ value of 1883 * ARPHRD_ETHER, even though the packets supplied by 1884 * that interface have no link-layer header, and begin 1885 * with an IP header, so that the ARPHRD_ value should 1886 * be ARPHRD_NONE. 1887 * 1888 * Detect those devices by checking the device name, and 1889 * use DLT_RAW for them. 1890 */ 1891 if (strncmp(device, cdma_rmnet, sizeof cdma_rmnet - 1) == 0) { 1892 handle->linktype = DLT_RAW; 1893 return 0; 1894 } 1895 1896 /* 1897 * Is this a real Ethernet device? If so, give it a 1898 * link-layer-type list with DLT_EN10MB and DLT_DOCSIS, so 1899 * that an application can let you choose it, in case you're 1900 * capturing DOCSIS traffic that a Cisco Cable Modem 1901 * Termination System is putting out onto an Ethernet (it 1902 * doesn't put an Ethernet header onto the wire, it puts raw 1903 * DOCSIS frames out on the wire inside the low-level 1904 * Ethernet framing). 1905 * 1906 * XXX - are there any other sorts of "fake Ethernet" that 1907 * have ARPHRD_ETHER but that shouldn't offer DLT_DOCSIS as 1908 * a Cisco CMTS won't put traffic onto it or get traffic 1909 * bridged onto it? ISDN is handled in "setup_socket()", 1910 * as we fall back on cooked mode there, and we use 1911 * is_wifi() to check for 802.11 devices; are there any 1912 * others? 1913 */ 1914 if (!is_wifi(device)) { 1915 int ret; 1916 1917 /* 1918 * This is not a Wi-Fi device but it could be 1919 * a DSA master/management network device. 1920 */ 1921 ret = iface_dsa_get_proto_info(device, handle); 1922 if (ret < 0) 1923 return ret; 1924 1925 if (ret == 1) { 1926 /* 1927 * This is a DSA master/management network 1928 * device linktype is already set by 1929 * iface_dsa_get_proto_info() set an 1930 * appropriate offset here. 1931 */ 1932 handle->offset = 2; 1933 break; 1934 } 1935 1936 /* 1937 * It's not a Wi-Fi device; offer DOCSIS. 1938 */ 1939 handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2); 1940 if (handle->dlt_list == NULL) { 1941 pcapint_fmt_errmsg_for_errno(handle->errbuf, 1942 PCAP_ERRBUF_SIZE, errno, "malloc"); 1943 return (PCAP_ERROR); 1944 } 1945 handle->dlt_list[0] = DLT_EN10MB; 1946 handle->dlt_list[1] = DLT_DOCSIS; 1947 handle->dlt_count = 2; 1948 } 1949 /* FALLTHROUGH */ 1950 1951 case ARPHRD_METRICOM: 1952 case ARPHRD_LOOPBACK: 1953 handle->linktype = DLT_EN10MB; 1954 handle->offset = 2; 1955 break; 1956 1957 case ARPHRD_EETHER: 1958 handle->linktype = DLT_EN3MB; 1959 break; 1960 1961 case ARPHRD_AX25: 1962 handle->linktype = DLT_AX25_KISS; 1963 break; 1964 1965 case ARPHRD_PRONET: 1966 handle->linktype = DLT_PRONET; 1967 break; 1968 1969 case ARPHRD_CHAOS: 1970 handle->linktype = DLT_CHAOS; 1971 break; 1972 #ifndef ARPHRD_CAN 1973 #define ARPHRD_CAN 280 1974 #endif 1975 case ARPHRD_CAN: 1976 handle->linktype = DLT_CAN_SOCKETCAN; 1977 break; 1978 1979 #ifndef ARPHRD_IEEE802_TR 1980 #define ARPHRD_IEEE802_TR 800 /* From Linux 2.4 */ 1981 #endif 1982 case ARPHRD_IEEE802_TR: 1983 case ARPHRD_IEEE802: 1984 handle->linktype = DLT_IEEE802; 1985 handle->offset = 2; 1986 break; 1987 1988 case ARPHRD_ARCNET: 1989 handle->linktype = DLT_ARCNET_LINUX; 1990 break; 1991 1992 #ifndef ARPHRD_FDDI /* From Linux 2.2.13 */ 1993 #define ARPHRD_FDDI 774 1994 #endif 1995 case ARPHRD_FDDI: 1996 handle->linktype = DLT_FDDI; 1997 handle->offset = 3; 1998 break; 1999 2000 #ifndef ARPHRD_ATM /* FIXME: How to #include this? */ 2001 #define ARPHRD_ATM 19 2002 #endif 2003 case ARPHRD_ATM: 2004 /* 2005 * The Classical IP implementation in ATM for Linux 2006 * supports both what RFC 1483 calls "LLC Encapsulation", 2007 * in which each packet has an LLC header, possibly 2008 * with a SNAP header as well, prepended to it, and 2009 * what RFC 1483 calls "VC Based Multiplexing", in which 2010 * different virtual circuits carry different network 2011 * layer protocols, and no header is prepended to packets. 2012 * 2013 * They both have an ARPHRD_ type of ARPHRD_ATM, so 2014 * you can't use the ARPHRD_ type to find out whether 2015 * captured packets will have an LLC header, and, 2016 * while there's a socket ioctl to *set* the encapsulation 2017 * type, there's no ioctl to *get* the encapsulation type. 2018 * 2019 * This means that 2020 * 2021 * programs that dissect Linux Classical IP frames 2022 * would have to check for an LLC header and, 2023 * depending on whether they see one or not, dissect 2024 * the frame as LLC-encapsulated or as raw IP (I 2025 * don't know whether there's any traffic other than 2026 * IP that would show up on the socket, or whether 2027 * there's any support for IPv6 in the Linux 2028 * Classical IP code); 2029 * 2030 * filter expressions would have to compile into 2031 * code that checks for an LLC header and does 2032 * the right thing. 2033 * 2034 * Both of those are a nuisance - and, at least on systems 2035 * that support PF_PACKET sockets, we don't have to put 2036 * up with those nuisances; instead, we can just capture 2037 * in cooked mode. That's what we'll do, if we can. 2038 * Otherwise, we'll just fail. 2039 */ 2040 if (cooked_ok) 2041 handle->linktype = DLT_LINUX_SLL; 2042 else 2043 handle->linktype = -1; 2044 break; 2045 2046 #ifndef ARPHRD_IEEE80211 /* From Linux 2.4.6 */ 2047 #define ARPHRD_IEEE80211 801 2048 #endif 2049 case ARPHRD_IEEE80211: 2050 handle->linktype = DLT_IEEE802_11; 2051 break; 2052 2053 #ifndef ARPHRD_IEEE80211_PRISM /* From Linux 2.4.18 */ 2054 #define ARPHRD_IEEE80211_PRISM 802 2055 #endif 2056 case ARPHRD_IEEE80211_PRISM: 2057 handle->linktype = DLT_PRISM_HEADER; 2058 break; 2059 2060 #ifndef ARPHRD_IEEE80211_RADIOTAP /* new */ 2061 #define ARPHRD_IEEE80211_RADIOTAP 803 2062 #endif 2063 case ARPHRD_IEEE80211_RADIOTAP: 2064 handle->linktype = DLT_IEEE802_11_RADIO; 2065 break; 2066 2067 case ARPHRD_PPP: 2068 /* 2069 * Some PPP code in the kernel supplies no link-layer 2070 * header whatsoever to PF_PACKET sockets; other PPP 2071 * code supplies PPP link-layer headers ("syncppp.c"); 2072 * some PPP code might supply random link-layer 2073 * headers (PPP over ISDN - there's code in Ethereal, 2074 * for example, to cope with PPP-over-ISDN captures 2075 * with which the Ethereal developers have had to cope, 2076 * heuristically trying to determine which of the 2077 * oddball link-layer headers particular packets have). 2078 * 2079 * As such, we just punt, and run all PPP interfaces 2080 * in cooked mode, if we can; otherwise, we just treat 2081 * it as DLT_RAW, for now - if somebody needs to capture, 2082 * on a 2.0[.x] kernel, on PPP devices that supply a 2083 * link-layer header, they'll have to add code here to 2084 * map to the appropriate DLT_ type (possibly adding a 2085 * new DLT_ type, if necessary). 2086 */ 2087 if (cooked_ok) 2088 handle->linktype = DLT_LINUX_SLL; 2089 else { 2090 /* 2091 * XXX - handle ISDN types here? We can't fall 2092 * back on cooked sockets, so we'd have to 2093 * figure out from the device name what type of 2094 * link-layer encapsulation it's using, and map 2095 * that to an appropriate DLT_ value, meaning 2096 * we'd map "isdnN" devices to DLT_RAW (they 2097 * supply raw IP packets with no link-layer 2098 * header) and "isdY" devices to a new DLT_I4L_IP 2099 * type that has only an Ethernet packet type as 2100 * a link-layer header. 2101 * 2102 * But sometimes we seem to get random crap 2103 * in the link-layer header when capturing on 2104 * ISDN devices.... 2105 */ 2106 handle->linktype = DLT_RAW; 2107 } 2108 break; 2109 2110 #ifndef ARPHRD_CISCO 2111 #define ARPHRD_CISCO 513 /* previously ARPHRD_HDLC */ 2112 #endif 2113 case ARPHRD_CISCO: 2114 handle->linktype = DLT_C_HDLC; 2115 break; 2116 2117 /* Not sure if this is correct for all tunnels, but it 2118 * works for CIPE */ 2119 case ARPHRD_TUNNEL: 2120 #ifndef ARPHRD_SIT 2121 #define ARPHRD_SIT 776 /* From Linux 2.2.13 */ 2122 #endif 2123 case ARPHRD_SIT: 2124 case ARPHRD_CSLIP: 2125 case ARPHRD_SLIP6: 2126 case ARPHRD_CSLIP6: 2127 case ARPHRD_ADAPT: 2128 case ARPHRD_SLIP: 2129 #ifndef ARPHRD_RAWHDLC 2130 #define ARPHRD_RAWHDLC 518 2131 #endif 2132 case ARPHRD_RAWHDLC: 2133 #ifndef ARPHRD_DLCI 2134 #define ARPHRD_DLCI 15 2135 #endif 2136 case ARPHRD_DLCI: 2137 /* 2138 * XXX - should some of those be mapped to DLT_LINUX_SLL 2139 * instead? Should we just map all of them to DLT_LINUX_SLL? 2140 */ 2141 handle->linktype = DLT_RAW; 2142 break; 2143 2144 #ifndef ARPHRD_FRAD 2145 #define ARPHRD_FRAD 770 2146 #endif 2147 case ARPHRD_FRAD: 2148 handle->linktype = DLT_FRELAY; 2149 break; 2150 2151 case ARPHRD_LOCALTLK: 2152 handle->linktype = DLT_LTALK; 2153 break; 2154 2155 case 18: 2156 /* 2157 * RFC 4338 defines an encapsulation for IP and ARP 2158 * packets that's compatible with the RFC 2625 2159 * encapsulation, but that uses a different ARP 2160 * hardware type and hardware addresses. That 2161 * ARP hardware type is 18; Linux doesn't define 2162 * any ARPHRD_ value as 18, but if it ever officially 2163 * supports RFC 4338-style IP-over-FC, it should define 2164 * one. 2165 * 2166 * For now, we map it to DLT_IP_OVER_FC, in the hopes 2167 * that this will encourage its use in the future, 2168 * should Linux ever officially support RFC 4338-style 2169 * IP-over-FC. 2170 */ 2171 handle->linktype = DLT_IP_OVER_FC; 2172 break; 2173 2174 #ifndef ARPHRD_FCPP 2175 #define ARPHRD_FCPP 784 2176 #endif 2177 case ARPHRD_FCPP: 2178 #ifndef ARPHRD_FCAL 2179 #define ARPHRD_FCAL 785 2180 #endif 2181 case ARPHRD_FCAL: 2182 #ifndef ARPHRD_FCPL 2183 #define ARPHRD_FCPL 786 2184 #endif 2185 case ARPHRD_FCPL: 2186 #ifndef ARPHRD_FCFABRIC 2187 #define ARPHRD_FCFABRIC 787 2188 #endif 2189 case ARPHRD_FCFABRIC: 2190 /* 2191 * Back in 2002, Donald Lee at Cray wanted a DLT_ for 2192 * IP-over-FC: 2193 * 2194 * https://www.mail-archive.com/tcpdump-workers@sandelman.ottawa.on.ca/msg01043.html 2195 * 2196 * and one was assigned. 2197 * 2198 * In a later private discussion (spun off from a message 2199 * on the ethereal-users list) on how to get that DLT_ 2200 * value in libpcap on Linux, I ended up deciding that 2201 * the best thing to do would be to have him tweak the 2202 * driver to set the ARPHRD_ value to some ARPHRD_FCxx 2203 * type, and map all those types to DLT_IP_OVER_FC: 2204 * 2205 * I've checked into the libpcap and tcpdump CVS tree 2206 * support for DLT_IP_OVER_FC. In order to use that, 2207 * you'd have to modify your modified driver to return 2208 * one of the ARPHRD_FCxxx types, in "fcLINUXfcp.c" - 2209 * change it to set "dev->type" to ARPHRD_FCFABRIC, for 2210 * example (the exact value doesn't matter, it can be 2211 * any of ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, or 2212 * ARPHRD_FCFABRIC). 2213 * 2214 * 11 years later, Christian Svensson wanted to map 2215 * various ARPHRD_ values to DLT_FC_2 and 2216 * DLT_FC_2_WITH_FRAME_DELIMS for raw Fibre Channel 2217 * frames: 2218 * 2219 * https://github.com/mcr/libpcap/pull/29 2220 * 2221 * There doesn't seem to be any network drivers that uses 2222 * any of the ARPHRD_FC* values for IP-over-FC, and 2223 * it's not exactly clear what the "Dummy types for non 2224 * ARP hardware" are supposed to mean (link-layer 2225 * header type? Physical network type?), so it's 2226 * not exactly clear why the ARPHRD_FC* types exist 2227 * in the first place. 2228 * 2229 * For now, we map them to DLT_FC_2, and provide an 2230 * option of DLT_FC_2_WITH_FRAME_DELIMS, as well as 2231 * DLT_IP_OVER_FC just in case there's some old 2232 * driver out there that uses one of those types for 2233 * IP-over-FC on which somebody wants to capture 2234 * packets. 2235 */ 2236 handle->linktype = DLT_FC_2; 2237 handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 3); 2238 if (handle->dlt_list == NULL) { 2239 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2240 PCAP_ERRBUF_SIZE, errno, "malloc"); 2241 return (PCAP_ERROR); 2242 } 2243 handle->dlt_list[0] = DLT_FC_2; 2244 handle->dlt_list[1] = DLT_FC_2_WITH_FRAME_DELIMS; 2245 handle->dlt_list[2] = DLT_IP_OVER_FC; 2246 handle->dlt_count = 3; 2247 break; 2248 2249 #ifndef ARPHRD_IRDA 2250 #define ARPHRD_IRDA 783 2251 #endif 2252 case ARPHRD_IRDA: 2253 /* Don't expect IP packet out of this interfaces... */ 2254 handle->linktype = DLT_LINUX_IRDA; 2255 /* We need to save packet direction for IrDA decoding, 2256 * so let's use "Linux-cooked" mode. Jean II 2257 * 2258 * XXX - this is handled in setup_socket(). */ 2259 /* handlep->cooked = 1; */ 2260 break; 2261 2262 /* ARPHRD_LAPD is unofficial and randomly allocated, if reallocation 2263 * is needed, please report it to <daniele@orlandi.com> */ 2264 #ifndef ARPHRD_LAPD 2265 #define ARPHRD_LAPD 8445 2266 #endif 2267 case ARPHRD_LAPD: 2268 /* Don't expect IP packet out of this interfaces... */ 2269 handle->linktype = DLT_LINUX_LAPD; 2270 break; 2271 2272 #ifndef ARPHRD_NONE 2273 #define ARPHRD_NONE 0xFFFE 2274 #endif 2275 case ARPHRD_NONE: 2276 /* 2277 * No link-layer header; packets are just IP 2278 * packets, so use DLT_RAW. 2279 */ 2280 handle->linktype = DLT_RAW; 2281 break; 2282 2283 #ifndef ARPHRD_IEEE802154 2284 #define ARPHRD_IEEE802154 804 2285 #endif 2286 case ARPHRD_IEEE802154: 2287 handle->linktype = DLT_IEEE802_15_4_NOFCS; 2288 break; 2289 2290 #ifndef ARPHRD_NETLINK 2291 #define ARPHRD_NETLINK 824 2292 #endif 2293 case ARPHRD_NETLINK: 2294 handle->linktype = DLT_NETLINK; 2295 /* 2296 * We need to use cooked mode, so that in sll_protocol we 2297 * pick up the netlink protocol type such as NETLINK_ROUTE, 2298 * NETLINK_GENERIC, NETLINK_FIB_LOOKUP, etc. 2299 * 2300 * XXX - this is handled in setup_socket(). 2301 */ 2302 /* handlep->cooked = 1; */ 2303 break; 2304 2305 #ifndef ARPHRD_VSOCKMON 2306 #define ARPHRD_VSOCKMON 826 2307 #endif 2308 case ARPHRD_VSOCKMON: 2309 handle->linktype = DLT_VSOCK; 2310 break; 2311 2312 default: 2313 handle->linktype = -1; 2314 break; 2315 } 2316 return (0); 2317 } 2318 2319 /* 2320 * Try to set up a PF_PACKET socket. 2321 * Returns 0 or a PCAP_WARNING_ value on success and a PCAP_ERROR_ value 2322 * on failure. 2323 */ 2324 static int 2325 setup_socket(pcap_t *handle, int is_any_device) 2326 { 2327 struct pcap_linux *handlep = handle->priv; 2328 const char *device = handle->opt.device; 2329 int status = 0; 2330 int sock_fd, arptype; 2331 int val; 2332 int err = 0; 2333 struct packet_mreq mr; 2334 #if defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT) 2335 int bpf_extensions; 2336 socklen_t len = sizeof(bpf_extensions); 2337 #endif 2338 2339 /* 2340 * Open a socket with protocol family packet. If cooked is true, 2341 * we open a SOCK_DGRAM socket for the cooked interface, otherwise 2342 * we open a SOCK_RAW socket for the raw interface. 2343 * 2344 * The protocol is set to 0. This means we will receive no 2345 * packets until we "bind" the socket with a non-zero 2346 * protocol. This allows us to setup the ring buffers without 2347 * dropping any packets. 2348 */ 2349 sock_fd = is_any_device ? 2350 socket(PF_PACKET, SOCK_DGRAM, 0) : 2351 socket(PF_PACKET, SOCK_RAW, 0); 2352 2353 if (sock_fd == -1) { 2354 if (errno == EPERM || errno == EACCES) { 2355 /* 2356 * You don't have permission to open the 2357 * socket. 2358 */ 2359 status = PCAP_ERROR_PERM_DENIED; 2360 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2361 "Attempt to create packet socket failed - CAP_NET_RAW may be required"); 2362 } else if (errno == EAFNOSUPPORT) { 2363 /* 2364 * PF_PACKET sockets not supported. 2365 * Perhaps we're running on the WSL1 module 2366 * in the Windows NT kernel rather than on 2367 * a real Linux kernel. 2368 */ 2369 status = PCAP_ERROR_CAPTURE_NOTSUP; 2370 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2371 "PF_PACKET sockets not supported - is this WSL1?"); 2372 } else { 2373 /* 2374 * Other error. 2375 */ 2376 status = PCAP_ERROR; 2377 } 2378 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2379 errno, "socket"); 2380 return status; 2381 } 2382 2383 /* 2384 * Get the interface index of the loopback device. 2385 * If the attempt fails, don't fail, just set the 2386 * "handlep->lo_ifindex" to -1. 2387 * 2388 * XXX - can there be more than one device that loops 2389 * packets back, i.e. devices other than "lo"? If so, 2390 * we'd need to find them all, and have an array of 2391 * indices for them, and check all of them in 2392 * "pcap_read_packet()". 2393 */ 2394 handlep->lo_ifindex = iface_get_id(sock_fd, "lo", handle->errbuf); 2395 2396 /* 2397 * Default value for offset to align link-layer payload 2398 * on a 4-byte boundary. 2399 */ 2400 handle->offset = 0; 2401 2402 /* 2403 * What kind of frames do we have to deal with? Fall back 2404 * to cooked mode if we have an unknown interface type 2405 * or a type we know doesn't work well in raw mode. 2406 */ 2407 if (!is_any_device) { 2408 /* Assume for now we don't need cooked mode. */ 2409 handlep->cooked = 0; 2410 2411 if (handle->opt.rfmon) { 2412 /* 2413 * We were asked to turn on monitor mode. 2414 * Do so before we get the link-layer type, 2415 * because entering monitor mode could change 2416 * the link-layer type. 2417 */ 2418 err = enter_rfmon_mode(handle, sock_fd, device); 2419 if (err < 0) { 2420 /* Hard failure */ 2421 close(sock_fd); 2422 return err; 2423 } 2424 if (err == 0) { 2425 /* 2426 * Nothing worked for turning monitor mode 2427 * on. 2428 */ 2429 close(sock_fd); 2430 2431 return PCAP_ERROR_RFMON_NOTSUP; 2432 } 2433 2434 /* 2435 * Either monitor mode has been turned on for 2436 * the device, or we've been given a different 2437 * device to open for monitor mode. If we've 2438 * been given a different device, use it. 2439 */ 2440 if (handlep->mondevice != NULL) 2441 device = handlep->mondevice; 2442 } 2443 arptype = iface_get_arptype(sock_fd, device, handle->errbuf); 2444 if (arptype < 0) { 2445 close(sock_fd); 2446 return arptype; 2447 } 2448 status = map_arphrd_to_dlt(handle, arptype, device, 1); 2449 if (status < 0) { 2450 close(sock_fd); 2451 return status; 2452 } 2453 if (handle->linktype == -1 || 2454 handle->linktype == DLT_LINUX_SLL || 2455 handle->linktype == DLT_LINUX_IRDA || 2456 handle->linktype == DLT_LINUX_LAPD || 2457 handle->linktype == DLT_NETLINK || 2458 (handle->linktype == DLT_EN10MB && 2459 (strncmp("isdn", device, 4) == 0 || 2460 strncmp("isdY", device, 4) == 0))) { 2461 /* 2462 * Unknown interface type (-1), or a 2463 * device we explicitly chose to run 2464 * in cooked mode (e.g., PPP devices), 2465 * or an ISDN device (whose link-layer 2466 * type we can only determine by using 2467 * APIs that may be different on different 2468 * kernels) - reopen in cooked mode. 2469 * 2470 * If the type is unknown, return a warning; 2471 * map_arphrd_to_dlt() has already set the 2472 * warning message. 2473 */ 2474 if (close(sock_fd) == -1) { 2475 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2476 PCAP_ERRBUF_SIZE, errno, "close"); 2477 return PCAP_ERROR; 2478 } 2479 sock_fd = socket(PF_PACKET, SOCK_DGRAM, 0); 2480 if (sock_fd < 0) { 2481 /* 2482 * Fatal error. We treat this as 2483 * a generic error; we already know 2484 * that we were able to open a 2485 * PF_PACKET/SOCK_RAW socket, so 2486 * any failure is a "this shouldn't 2487 * happen" case. 2488 */ 2489 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2490 PCAP_ERRBUF_SIZE, errno, "socket"); 2491 return PCAP_ERROR; 2492 } 2493 handlep->cooked = 1; 2494 2495 /* 2496 * Get rid of any link-layer type list 2497 * we allocated - this only supports cooked 2498 * capture. 2499 */ 2500 if (handle->dlt_list != NULL) { 2501 free(handle->dlt_list); 2502 handle->dlt_list = NULL; 2503 handle->dlt_count = 0; 2504 } 2505 2506 if (handle->linktype == -1) { 2507 /* 2508 * Warn that we're falling back on 2509 * cooked mode; we may want to 2510 * update "map_arphrd_to_dlt()" 2511 * to handle the new type. 2512 */ 2513 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2514 "arptype %d not " 2515 "supported by libpcap - " 2516 "falling back to cooked " 2517 "socket", 2518 arptype); 2519 status = PCAP_WARNING; 2520 } 2521 2522 /* 2523 * IrDA capture is not a real "cooked" capture, 2524 * it's IrLAP frames, not IP packets. The 2525 * same applies to LAPD capture. 2526 */ 2527 if (handle->linktype != DLT_LINUX_IRDA && 2528 handle->linktype != DLT_LINUX_LAPD && 2529 handle->linktype != DLT_NETLINK) 2530 handle->linktype = DLT_LINUX_SLL; 2531 } 2532 2533 handlep->ifindex = iface_get_id(sock_fd, device, 2534 handle->errbuf); 2535 if (handlep->ifindex == -1) { 2536 close(sock_fd); 2537 return PCAP_ERROR; 2538 } 2539 2540 if ((err = iface_bind(sock_fd, handlep->ifindex, 2541 handle->errbuf, 0)) != 0) { 2542 close(sock_fd); 2543 return err; 2544 } 2545 } else { 2546 /* 2547 * The "any" device. 2548 */ 2549 if (handle->opt.rfmon) { 2550 /* 2551 * It doesn't support monitor mode. 2552 */ 2553 close(sock_fd); 2554 return PCAP_ERROR_RFMON_NOTSUP; 2555 } 2556 2557 /* 2558 * It uses cooked mode. 2559 * Support both DLT_LINUX_SLL and DLT_LINUX_SLL2. 2560 */ 2561 handlep->cooked = 1; 2562 handle->linktype = DLT_LINUX_SLL; 2563 handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2); 2564 if (handle->dlt_list == NULL) { 2565 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2566 PCAP_ERRBUF_SIZE, errno, "malloc"); 2567 return (PCAP_ERROR); 2568 } 2569 handle->dlt_list[0] = DLT_LINUX_SLL; 2570 handle->dlt_list[1] = DLT_LINUX_SLL2; 2571 handle->dlt_count = 2; 2572 2573 /* 2574 * We're not bound to a device. 2575 * For now, we're using this as an indication 2576 * that we can't transmit; stop doing that only 2577 * if we figure out how to transmit in cooked 2578 * mode. 2579 */ 2580 handlep->ifindex = -1; 2581 } 2582 2583 /* 2584 * Select promiscuous mode on if "promisc" is set. 2585 * 2586 * Do not turn allmulti mode on if we don't select 2587 * promiscuous mode - on some devices (e.g., Orinoco 2588 * wireless interfaces), allmulti mode isn't supported 2589 * and the driver implements it by turning promiscuous 2590 * mode on, and that screws up the operation of the 2591 * card as a normal networking interface, and on no 2592 * other platform I know of does starting a non- 2593 * promiscuous capture affect which multicast packets 2594 * are received by the interface. 2595 */ 2596 2597 /* 2598 * Hmm, how can we set promiscuous mode on all interfaces? 2599 * I am not sure if that is possible at all. For now, we 2600 * silently ignore attempts to turn promiscuous mode on 2601 * for the "any" device (so you don't have to explicitly 2602 * disable it in programs such as tcpdump). 2603 */ 2604 2605 if (!is_any_device && handle->opt.promisc) { 2606 memset(&mr, 0, sizeof(mr)); 2607 mr.mr_ifindex = handlep->ifindex; 2608 mr.mr_type = PACKET_MR_PROMISC; 2609 if (setsockopt(sock_fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, 2610 &mr, sizeof(mr)) == -1) { 2611 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2612 PCAP_ERRBUF_SIZE, errno, "setsockopt (PACKET_ADD_MEMBERSHIP)"); 2613 close(sock_fd); 2614 return PCAP_ERROR; 2615 } 2616 } 2617 2618 /* 2619 * Enable auxiliary data and reserve room for reconstructing 2620 * VLAN headers. 2621 * 2622 * XXX - is enabling auxiliary data necessary, now that we 2623 * only support memory-mapped capture? The kernel's memory-mapped 2624 * capture code doesn't seem to check whether auxiliary data 2625 * is enabled, it seems to provide it whether it is or not. 2626 */ 2627 val = 1; 2628 if (setsockopt(sock_fd, SOL_PACKET, PACKET_AUXDATA, &val, 2629 sizeof(val)) == -1 && errno != ENOPROTOOPT) { 2630 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2631 errno, "setsockopt (PACKET_AUXDATA)"); 2632 close(sock_fd); 2633 return PCAP_ERROR; 2634 } 2635 handle->offset += VLAN_TAG_LEN; 2636 2637 /* 2638 * If we're in cooked mode, make the snapshot length 2639 * large enough to hold a "cooked mode" header plus 2640 * 1 byte of packet data (so we don't pass a byte 2641 * count of 0 to "recvfrom()"). 2642 * XXX - we don't know whether this will be DLT_LINUX_SLL 2643 * or DLT_LINUX_SLL2, so make sure it's big enough for 2644 * a DLT_LINUX_SLL2 "cooked mode" header; a snapshot length 2645 * that small is silly anyway. 2646 */ 2647 if (handlep->cooked) { 2648 if (handle->snapshot < SLL2_HDR_LEN + 1) 2649 handle->snapshot = SLL2_HDR_LEN + 1; 2650 } 2651 handle->bufsize = handle->snapshot; 2652 2653 /* 2654 * Set the offset at which to insert VLAN tags. 2655 */ 2656 set_vlan_offset(handle); 2657 2658 if (handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO) { 2659 int nsec_tstamps = 1; 2660 2661 if (setsockopt(sock_fd, SOL_SOCKET, SO_TIMESTAMPNS, &nsec_tstamps, sizeof(nsec_tstamps)) < 0) { 2662 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "setsockopt: unable to set SO_TIMESTAMPNS"); 2663 close(sock_fd); 2664 return PCAP_ERROR; 2665 } 2666 } 2667 2668 /* 2669 * We've succeeded. Save the socket FD in the pcap structure. 2670 */ 2671 handle->fd = sock_fd; 2672 2673 #if defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT) 2674 /* 2675 * Can we generate special code for VLAN checks? 2676 * (XXX - what if we need the special code but it's not supported 2677 * by the OS? Is that possible?) 2678 */ 2679 if (getsockopt(sock_fd, SOL_SOCKET, SO_BPF_EXTENSIONS, 2680 &bpf_extensions, &len) == 0) { 2681 if (bpf_extensions >= SKF_AD_VLAN_TAG_PRESENT) { 2682 /* 2683 * Yes, we can. Request that we do so. 2684 */ 2685 handle->bpf_codegen_flags |= BPF_SPECIAL_VLAN_HANDLING; 2686 } 2687 } 2688 #endif /* defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT) */ 2689 2690 return status; 2691 } 2692 2693 /* 2694 * Attempt to setup memory-mapped access. 2695 * 2696 * On success, returns 0 if there are no warnings or a PCAP_WARNING_ code 2697 * if there is a warning. 2698 * 2699 * On error, returns the appropriate error code; if that is PCAP_ERROR, 2700 * sets handle->errbuf to the appropriate message. 2701 */ 2702 static int 2703 setup_mmapped(pcap_t *handle) 2704 { 2705 struct pcap_linux *handlep = handle->priv; 2706 int status; 2707 2708 /* 2709 * Attempt to allocate a buffer to hold the contents of one 2710 * packet, for use by the oneshot callback. 2711 */ 2712 handlep->oneshot_buffer = malloc(handle->snapshot); 2713 if (handlep->oneshot_buffer == NULL) { 2714 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2715 errno, "can't allocate oneshot buffer"); 2716 return PCAP_ERROR; 2717 } 2718 2719 if (handle->opt.buffer_size == 0) { 2720 /* by default request 2M for the ring buffer */ 2721 handle->opt.buffer_size = 2*1024*1024; 2722 } 2723 status = prepare_tpacket_socket(handle); 2724 if (status == -1) { 2725 free(handlep->oneshot_buffer); 2726 handlep->oneshot_buffer = NULL; 2727 return PCAP_ERROR; 2728 } 2729 status = create_ring(handle); 2730 if (status < 0) { 2731 /* 2732 * Error attempting to enable memory-mapped capture; 2733 * fail. The return value is the status to return. 2734 */ 2735 free(handlep->oneshot_buffer); 2736 handlep->oneshot_buffer = NULL; 2737 return status; 2738 } 2739 2740 /* 2741 * Success. status has been set either to 0 if there are no 2742 * warnings or to a PCAP_WARNING_ value if there is a warning. 2743 * 2744 * handle->offset is used to get the current position into the rx ring. 2745 * handle->cc is used to store the ring size. 2746 */ 2747 2748 /* 2749 * Set the timeout to use in poll() before returning. 2750 */ 2751 set_poll_timeout(handlep); 2752 2753 return status; 2754 } 2755 2756 /* 2757 * Attempt to set the socket to the specified version of the memory-mapped 2758 * header. 2759 * 2760 * Return 0 if we succeed; return 1 if we fail because that version isn't 2761 * supported; return -1 on any other error, and set handle->errbuf. 2762 */ 2763 static int 2764 init_tpacket(pcap_t *handle, int version, const char *version_str) 2765 { 2766 struct pcap_linux *handlep = handle->priv; 2767 int val = version; 2768 socklen_t len = sizeof(val); 2769 2770 /* 2771 * Probe whether kernel supports the specified TPACKET version; 2772 * this also gets the length of the header for that version. 2773 * 2774 * This socket option was introduced in 2.6.27, which was 2775 * also the first release with TPACKET_V2 support. 2776 */ 2777 if (getsockopt(handle->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) { 2778 if (errno == EINVAL) { 2779 /* 2780 * EINVAL means this specific version of TPACKET 2781 * is not supported. Tell the caller they can try 2782 * with a different one; if they've run out of 2783 * others to try, let them set the error message 2784 * appropriately. 2785 */ 2786 return 1; 2787 } 2788 2789 /* 2790 * All other errors are fatal. 2791 */ 2792 if (errno == ENOPROTOOPT) { 2793 /* 2794 * PACKET_HDRLEN isn't supported, which means 2795 * that memory-mapped capture isn't supported. 2796 * Indicate that in the message. 2797 */ 2798 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2799 "Kernel doesn't support memory-mapped capture; a 2.6.27 or later 2.x kernel is required, with CONFIG_PACKET_MMAP specified for 2.x kernels"); 2800 } else { 2801 /* 2802 * Some unexpected error. 2803 */ 2804 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2805 errno, "can't get %s header len on packet socket", 2806 version_str); 2807 } 2808 return -1; 2809 } 2810 handlep->tp_hdrlen = val; 2811 2812 val = version; 2813 if (setsockopt(handle->fd, SOL_PACKET, PACKET_VERSION, &val, 2814 sizeof(val)) < 0) { 2815 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2816 errno, "can't activate %s on packet socket", version_str); 2817 return -1; 2818 } 2819 handlep->tp_version = version; 2820 2821 return 0; 2822 } 2823 2824 /* 2825 * Attempt to set the socket to version 3 of the memory-mapped header and, 2826 * if that fails because version 3 isn't supported, attempt to fall 2827 * back to version 2. If version 2 isn't supported, just fail. 2828 * 2829 * Return 0 if we succeed and -1 on any other error, and set handle->errbuf. 2830 */ 2831 static int 2832 prepare_tpacket_socket(pcap_t *handle) 2833 { 2834 int ret; 2835 2836 #ifdef HAVE_TPACKET3 2837 /* 2838 * Try setting the version to TPACKET_V3. 2839 * 2840 * The only mode in which buffering is done on PF_PACKET 2841 * sockets, so that packets might not be delivered 2842 * immediately, is TPACKET_V3 mode. 2843 * 2844 * The buffering cannot be disabled in that mode, so 2845 * if the user has requested immediate mode, we don't 2846 * use TPACKET_V3. 2847 */ 2848 if (!handle->opt.immediate) { 2849 ret = init_tpacket(handle, TPACKET_V3, "TPACKET_V3"); 2850 if (ret == 0) { 2851 /* 2852 * Success. 2853 */ 2854 return 0; 2855 } 2856 if (ret == -1) { 2857 /* 2858 * We failed for some reason other than "the 2859 * kernel doesn't support TPACKET_V3". 2860 */ 2861 return -1; 2862 } 2863 2864 /* 2865 * This means it returned 1, which means "the kernel 2866 * doesn't support TPACKET_V3"; try TPACKET_V2. 2867 */ 2868 } 2869 #endif /* HAVE_TPACKET3 */ 2870 2871 /* 2872 * Try setting the version to TPACKET_V2. 2873 */ 2874 ret = init_tpacket(handle, TPACKET_V2, "TPACKET_V2"); 2875 if (ret == 0) { 2876 /* 2877 * Success. 2878 */ 2879 return 0; 2880 } 2881 2882 if (ret == 1) { 2883 /* 2884 * OK, the kernel supports memory-mapped capture, but 2885 * not TPACKET_V2. Set the error message appropriately. 2886 */ 2887 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2888 "Kernel doesn't support TPACKET_V2; a 2.6.27 or later kernel is required"); 2889 } 2890 2891 /* 2892 * We failed. 2893 */ 2894 return -1; 2895 } 2896 2897 #define MAX(a,b) ((a)>(b)?(a):(b)) 2898 2899 /* 2900 * Attempt to set up memory-mapped access. 2901 * 2902 * On success, returns 0 if there are no warnings or to a PCAP_WARNING_ code 2903 * if there is a warning. 2904 * 2905 * On error, returns the appropriate error code; if that is PCAP_ERROR, 2906 * sets handle->errbuf to the appropriate message. 2907 */ 2908 static int 2909 create_ring(pcap_t *handle) 2910 { 2911 struct pcap_linux *handlep = handle->priv; 2912 unsigned i, j, frames_per_block; 2913 #ifdef HAVE_TPACKET3 2914 /* 2915 * For sockets using TPACKET_V2, the extra stuff at the end of a 2916 * struct tpacket_req3 will be ignored, so this is OK even for 2917 * those sockets. 2918 */ 2919 struct tpacket_req3 req; 2920 #else 2921 struct tpacket_req req; 2922 #endif 2923 socklen_t len; 2924 unsigned int sk_type, tp_reserve, maclen, tp_hdrlen, netoff, macoff; 2925 unsigned int frame_size; 2926 int status; 2927 2928 /* 2929 * Start out assuming no warnings. 2930 */ 2931 status = 0; 2932 2933 /* 2934 * Reserve space for VLAN tag reconstruction. 2935 */ 2936 tp_reserve = VLAN_TAG_LEN; 2937 2938 /* 2939 * If we're capturing in cooked mode, reserve space for 2940 * a DLT_LINUX_SLL2 header; we don't know yet whether 2941 * we'll be using DLT_LINUX_SLL or DLT_LINUX_SLL2, as 2942 * that can be changed on an open device, so we reserve 2943 * space for the larger of the two. 2944 * 2945 * XXX - we assume that the kernel is still adding 2946 * 16 bytes of extra space, so we subtract 16 from 2947 * SLL2_HDR_LEN to get the additional space needed. 2948 * (Are they doing that for DLT_LINUX_SLL, the link- 2949 * layer header for which is 16 bytes?) 2950 * 2951 * XXX - should we use TPACKET_ALIGN(SLL2_HDR_LEN - 16)? 2952 */ 2953 if (handlep->cooked) 2954 tp_reserve += SLL2_HDR_LEN - 16; 2955 2956 /* 2957 * Try to request that amount of reserve space. 2958 * This must be done before creating the ring buffer. 2959 */ 2960 len = sizeof(tp_reserve); 2961 if (setsockopt(handle->fd, SOL_PACKET, PACKET_RESERVE, 2962 &tp_reserve, len) < 0) { 2963 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2964 PCAP_ERRBUF_SIZE, errno, 2965 "setsockopt (PACKET_RESERVE)"); 2966 return PCAP_ERROR; 2967 } 2968 2969 switch (handlep->tp_version) { 2970 2971 case TPACKET_V2: 2972 /* Note that with large snapshot length (say 256K, which is 2973 * the default for recent versions of tcpdump, Wireshark, 2974 * TShark, dumpcap or 64K, the value that "-s 0" has given for 2975 * a long time with tcpdump), if we use the snapshot 2976 * length to calculate the frame length, only a few frames 2977 * will be available in the ring even with pretty 2978 * large ring size (and a lot of memory will be unused). 2979 * 2980 * Ideally, we should choose a frame length based on the 2981 * minimum of the specified snapshot length and the maximum 2982 * packet size. That's not as easy as it sounds; consider, 2983 * for example, an 802.11 interface in monitor mode, where 2984 * the frame would include a radiotap header, where the 2985 * maximum radiotap header length is device-dependent. 2986 * 2987 * So, for now, we just do this for Ethernet devices, where 2988 * there's no metadata header, and the link-layer header is 2989 * fixed length. We can get the maximum packet size by 2990 * adding 18, the Ethernet header length plus the CRC length 2991 * (just in case we happen to get the CRC in the packet), to 2992 * the MTU of the interface; we fetch the MTU in the hopes 2993 * that it reflects support for jumbo frames. (Even if the 2994 * interface is just being used for passive snooping, the 2995 * driver might set the size of buffers in the receive ring 2996 * based on the MTU, so that the MTU limits the maximum size 2997 * of packets that we can receive.) 2998 * 2999 * If segmentation/fragmentation or receive offload are 3000 * enabled, we can get reassembled/aggregated packets larger 3001 * than MTU, but bounded to 65535 plus the Ethernet overhead, 3002 * due to kernel and protocol constraints */ 3003 frame_size = handle->snapshot; 3004 if (handle->linktype == DLT_EN10MB) { 3005 unsigned int max_frame_len; 3006 int mtu; 3007 int offload; 3008 3009 mtu = iface_get_mtu(handle->fd, handle->opt.device, 3010 handle->errbuf); 3011 if (mtu == -1) 3012 return PCAP_ERROR; 3013 offload = iface_get_offload(handle); 3014 if (offload == -1) 3015 return PCAP_ERROR; 3016 if (offload) 3017 max_frame_len = MAX(mtu, 65535); 3018 else 3019 max_frame_len = mtu; 3020 max_frame_len += 18; 3021 3022 if (frame_size > max_frame_len) 3023 frame_size = max_frame_len; 3024 } 3025 3026 /* NOTE: calculus matching those in tpacket_rcv() 3027 * in linux-2.6/net/packet/af_packet.c 3028 */ 3029 len = sizeof(sk_type); 3030 if (getsockopt(handle->fd, SOL_SOCKET, SO_TYPE, &sk_type, 3031 &len) < 0) { 3032 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3033 PCAP_ERRBUF_SIZE, errno, "getsockopt (SO_TYPE)"); 3034 return PCAP_ERROR; 3035 } 3036 maclen = (sk_type == SOCK_DGRAM) ? 0 : MAX_LINKHEADER_SIZE; 3037 /* XXX: in the kernel maclen is calculated from 3038 * LL_ALLOCATED_SPACE(dev) and vnet_hdr.hdr_len 3039 * in: packet_snd() in linux-2.6/net/packet/af_packet.c 3040 * then packet_alloc_skb() in linux-2.6/net/packet/af_packet.c 3041 * then sock_alloc_send_pskb() in linux-2.6/net/core/sock.c 3042 * but I see no way to get those sizes in userspace, 3043 * like for instance with an ifreq ioctl(); 3044 * the best thing I've found so far is MAX_HEADER in 3045 * the kernel part of linux-2.6/include/linux/netdevice.h 3046 * which goes up to 128+48=176; since pcap-linux.c 3047 * defines a MAX_LINKHEADER_SIZE of 256 which is 3048 * greater than that, let's use it.. maybe is it even 3049 * large enough to directly replace macoff.. 3050 */ 3051 tp_hdrlen = TPACKET_ALIGN(handlep->tp_hdrlen) + sizeof(struct sockaddr_ll) ; 3052 netoff = TPACKET_ALIGN(tp_hdrlen + (maclen < 16 ? 16 : maclen)) + tp_reserve; 3053 /* NOTE: AFAICS tp_reserve may break the TPACKET_ALIGN 3054 * of netoff, which contradicts 3055 * linux-2.6/Documentation/networking/packet_mmap.txt 3056 * documenting that: 3057 * "- Gap, chosen so that packet data (Start+tp_net) 3058 * aligns to TPACKET_ALIGNMENT=16" 3059 */ 3060 /* NOTE: in linux-2.6/include/linux/skbuff.h: 3061 * "CPUs often take a performance hit 3062 * when accessing unaligned memory locations" 3063 */ 3064 macoff = netoff - maclen; 3065 req.tp_frame_size = TPACKET_ALIGN(macoff + frame_size); 3066 /* 3067 * Round the buffer size up to a multiple of the 3068 * frame size (rather than rounding down, which 3069 * would give a buffer smaller than our caller asked 3070 * for, and possibly give zero frames if the requested 3071 * buffer size is too small for one frame). 3072 */ 3073 req.tp_frame_nr = (handle->opt.buffer_size + req.tp_frame_size - 1)/req.tp_frame_size; 3074 break; 3075 3076 #ifdef HAVE_TPACKET3 3077 case TPACKET_V3: 3078 /* The "frames" for this are actually buffers that 3079 * contain multiple variable-sized frames. 3080 * 3081 * We pick a "frame" size of MAXIMUM_SNAPLEN to leave 3082 * enough room for at least one reasonably-sized packet 3083 * in the "frame". */ 3084 req.tp_frame_size = MAXIMUM_SNAPLEN; 3085 /* 3086 * Round the buffer size up to a multiple of the 3087 * "frame" size (rather than rounding down, which 3088 * would give a buffer smaller than our caller asked 3089 * for, and possibly give zero "frames" if the requested 3090 * buffer size is too small for one "frame"). 3091 */ 3092 req.tp_frame_nr = (handle->opt.buffer_size + req.tp_frame_size - 1)/req.tp_frame_size; 3093 break; 3094 #endif 3095 default: 3096 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3097 "Internal error: unknown TPACKET_ value %u", 3098 handlep->tp_version); 3099 return PCAP_ERROR; 3100 } 3101 3102 /* compute the minimum block size that will handle this frame. 3103 * The block has to be page size aligned. 3104 * The max block size allowed by the kernel is arch-dependent and 3105 * it's not explicitly checked here. */ 3106 req.tp_block_size = getpagesize(); 3107 while (req.tp_block_size < req.tp_frame_size) 3108 req.tp_block_size <<= 1; 3109 3110 frames_per_block = req.tp_block_size/req.tp_frame_size; 3111 3112 /* 3113 * PACKET_TIMESTAMP was added after linux/net_tstamp.h was, 3114 * so we check for PACKET_TIMESTAMP. We check for 3115 * linux/net_tstamp.h just in case a system somehow has 3116 * PACKET_TIMESTAMP but not linux/net_tstamp.h; that might 3117 * be unnecessary. 3118 * 3119 * SIOCSHWTSTAMP was introduced in the patch that introduced 3120 * linux/net_tstamp.h, so we don't bother checking whether 3121 * SIOCSHWTSTAMP is defined (if your Linux system has 3122 * linux/net_tstamp.h but doesn't define SIOCSHWTSTAMP, your 3123 * Linux system is badly broken). 3124 */ 3125 #if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) 3126 /* 3127 * If we were told to do so, ask the kernel and the driver 3128 * to use hardware timestamps. 3129 * 3130 * Hardware timestamps are only supported with mmapped 3131 * captures. 3132 */ 3133 if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER || 3134 handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER_UNSYNCED) { 3135 struct hwtstamp_config hwconfig; 3136 struct ifreq ifr; 3137 int timesource; 3138 3139 /* 3140 * Ask for hardware time stamps on all packets, 3141 * including transmitted packets. 3142 */ 3143 memset(&hwconfig, 0, sizeof(hwconfig)); 3144 hwconfig.tx_type = HWTSTAMP_TX_ON; 3145 hwconfig.rx_filter = HWTSTAMP_FILTER_ALL; 3146 3147 memset(&ifr, 0, sizeof(ifr)); 3148 pcapint_strlcpy(ifr.ifr_name, handle->opt.device, sizeof(ifr.ifr_name)); 3149 ifr.ifr_data = (void *)&hwconfig; 3150 3151 /* 3152 * This may require CAP_NET_ADMIN. 3153 */ 3154 if (ioctl(handle->fd, SIOCSHWTSTAMP, &ifr) < 0) { 3155 switch (errno) { 3156 3157 case EPERM: 3158 /* 3159 * Treat this as an error, as the 3160 * user should try to run this 3161 * with the appropriate privileges - 3162 * and, if they can't, shouldn't 3163 * try requesting hardware time stamps. 3164 */ 3165 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3166 "Attempt to set hardware timestamp failed - CAP_NET_ADMIN may be required"); 3167 return PCAP_ERROR_PERM_DENIED; 3168 3169 case EOPNOTSUPP: 3170 case ERANGE: 3171 /* 3172 * Treat this as a warning, as the 3173 * only way to fix the warning is to 3174 * get an adapter that supports hardware 3175 * time stamps for *all* packets. 3176 * (ERANGE means "we support hardware 3177 * time stamps, but for packets matching 3178 * that particular filter", so it means 3179 * "we don't support hardware time stamps 3180 * for all incoming packets" here.) 3181 * 3182 * We'll just fall back on the standard 3183 * host time stamps. 3184 */ 3185 status = PCAP_WARNING_TSTAMP_TYPE_NOTSUP; 3186 break; 3187 3188 default: 3189 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3190 PCAP_ERRBUF_SIZE, errno, 3191 "SIOCSHWTSTAMP failed"); 3192 return PCAP_ERROR; 3193 } 3194 } else { 3195 /* 3196 * Well, that worked. Now specify the type of 3197 * hardware time stamp we want for this 3198 * socket. 3199 */ 3200 if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER) { 3201 /* 3202 * Hardware timestamp, synchronized 3203 * with the system clock. 3204 */ 3205 timesource = SOF_TIMESTAMPING_SYS_HARDWARE; 3206 } else { 3207 /* 3208 * PCAP_TSTAMP_ADAPTER_UNSYNCED - hardware 3209 * timestamp, not synchronized with the 3210 * system clock. 3211 */ 3212 timesource = SOF_TIMESTAMPING_RAW_HARDWARE; 3213 } 3214 if (setsockopt(handle->fd, SOL_PACKET, PACKET_TIMESTAMP, 3215 (void *)×ource, sizeof(timesource))) { 3216 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3217 PCAP_ERRBUF_SIZE, errno, 3218 "can't set PACKET_TIMESTAMP"); 3219 return PCAP_ERROR; 3220 } 3221 } 3222 } 3223 #endif /* HAVE_LINUX_NET_TSTAMP_H && PACKET_TIMESTAMP */ 3224 3225 /* ask the kernel to create the ring */ 3226 retry: 3227 req.tp_block_nr = req.tp_frame_nr / frames_per_block; 3228 3229 /* req.tp_frame_nr is requested to match frames_per_block*req.tp_block_nr */ 3230 req.tp_frame_nr = req.tp_block_nr * frames_per_block; 3231 3232 #ifdef HAVE_TPACKET3 3233 /* timeout value to retire block - use the configured buffering timeout, or default if <0. */ 3234 if (handlep->timeout > 0) { 3235 /* Use the user specified timeout as the block timeout */ 3236 req.tp_retire_blk_tov = handlep->timeout; 3237 } else if (handlep->timeout == 0) { 3238 /* 3239 * In pcap, this means "infinite timeout"; TPACKET_V3 3240 * doesn't support that, so just set it to UINT_MAX 3241 * milliseconds. In the TPACKET_V3 loop, if the 3242 * timeout is 0, and we haven't yet seen any packets, 3243 * and we block and still don't have any packets, we 3244 * keep blocking until we do. 3245 */ 3246 req.tp_retire_blk_tov = UINT_MAX; 3247 } else { 3248 /* 3249 * XXX - this is not valid; use 0, meaning "have the 3250 * kernel pick a default", for now. 3251 */ 3252 req.tp_retire_blk_tov = 0; 3253 } 3254 /* private data not used */ 3255 req.tp_sizeof_priv = 0; 3256 /* Rx ring - feature request bits - none (rxhash will not be filled) */ 3257 req.tp_feature_req_word = 0; 3258 #endif 3259 3260 if (setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING, 3261 (void *) &req, sizeof(req))) { 3262 if ((errno == ENOMEM) && (req.tp_block_nr > 1)) { 3263 /* 3264 * Memory failure; try to reduce the requested ring 3265 * size. 3266 * 3267 * We used to reduce this by half -- do 5% instead. 3268 * That may result in more iterations and a longer 3269 * startup, but the user will be much happier with 3270 * the resulting buffer size. 3271 */ 3272 if (req.tp_frame_nr < 20) 3273 req.tp_frame_nr -= 1; 3274 else 3275 req.tp_frame_nr -= req.tp_frame_nr/20; 3276 goto retry; 3277 } 3278 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 3279 errno, "can't create rx ring on packet socket"); 3280 return PCAP_ERROR; 3281 } 3282 3283 /* memory map the rx ring */ 3284 handlep->mmapbuflen = req.tp_block_nr * req.tp_block_size; 3285 handlep->mmapbuf = mmap(0, handlep->mmapbuflen, 3286 PROT_READ|PROT_WRITE, MAP_SHARED, handle->fd, 0); 3287 if (handlep->mmapbuf == MAP_FAILED) { 3288 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 3289 errno, "can't mmap rx ring"); 3290 3291 /* clear the allocated ring on error*/ 3292 destroy_ring(handle); 3293 return PCAP_ERROR; 3294 } 3295 3296 /* allocate a ring for each frame header pointer*/ 3297 handle->cc = req.tp_frame_nr; 3298 handle->buffer = malloc(handle->cc * sizeof(union thdr *)); 3299 if (!handle->buffer) { 3300 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 3301 errno, "can't allocate ring of frame headers"); 3302 3303 destroy_ring(handle); 3304 return PCAP_ERROR; 3305 } 3306 3307 /* fill the header ring with proper frame ptr*/ 3308 handle->offset = 0; 3309 for (i=0; i<req.tp_block_nr; ++i) { 3310 u_char *base = &handlep->mmapbuf[i*req.tp_block_size]; 3311 for (j=0; j<frames_per_block; ++j, ++handle->offset) { 3312 RING_GET_CURRENT_FRAME(handle) = base; 3313 base += req.tp_frame_size; 3314 } 3315 } 3316 3317 handle->bufsize = req.tp_frame_size; 3318 handle->offset = 0; 3319 return status; 3320 } 3321 3322 /* free all ring related resources*/ 3323 static void 3324 destroy_ring(pcap_t *handle) 3325 { 3326 struct pcap_linux *handlep = handle->priv; 3327 3328 /* 3329 * Tell the kernel to destroy the ring. 3330 * We don't check for setsockopt failure, as 1) we can't recover 3331 * from an error and 2) we might not yet have set it up in the 3332 * first place. 3333 */ 3334 struct tpacket_req req; 3335 memset(&req, 0, sizeof(req)); 3336 (void)setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING, 3337 (void *) &req, sizeof(req)); 3338 3339 /* if ring is mapped, unmap it*/ 3340 if (handlep->mmapbuf) { 3341 /* do not test for mmap failure, as we can't recover from any error */ 3342 (void)munmap(handlep->mmapbuf, handlep->mmapbuflen); 3343 handlep->mmapbuf = NULL; 3344 } 3345 } 3346 3347 /* 3348 * Special one-shot callback, used for pcap_next() and pcap_next_ex(), 3349 * for Linux mmapped capture. 3350 * 3351 * The problem is that pcap_next() and pcap_next_ex() expect the packet 3352 * data handed to the callback to be valid after the callback returns, 3353 * but pcap_read_linux_mmap() has to release that packet as soon as 3354 * the callback returns (otherwise, the kernel thinks there's still 3355 * at least one unprocessed packet available in the ring, so a select() 3356 * will immediately return indicating that there's data to process), so, 3357 * in the callback, we have to make a copy of the packet. 3358 * 3359 * Yes, this means that, if the capture is using the ring buffer, using 3360 * pcap_next() or pcap_next_ex() requires more copies than using 3361 * pcap_loop() or pcap_dispatch(). If that bothers you, don't use 3362 * pcap_next() or pcap_next_ex(). 3363 */ 3364 static void 3365 pcapint_oneshot_linux(u_char *user, const struct pcap_pkthdr *h, 3366 const u_char *bytes) 3367 { 3368 struct oneshot_userdata *sp = (struct oneshot_userdata *)user; 3369 pcap_t *handle = sp->pd; 3370 struct pcap_linux *handlep = handle->priv; 3371 3372 *sp->hdr = *h; 3373 memcpy(handlep->oneshot_buffer, bytes, h->caplen); 3374 *sp->pkt = handlep->oneshot_buffer; 3375 } 3376 3377 static int 3378 pcap_getnonblock_linux(pcap_t *handle) 3379 { 3380 struct pcap_linux *handlep = handle->priv; 3381 3382 /* use negative value of timeout to indicate non blocking ops */ 3383 return (handlep->timeout<0); 3384 } 3385 3386 static int 3387 pcap_setnonblock_linux(pcap_t *handle, int nonblock) 3388 { 3389 struct pcap_linux *handlep = handle->priv; 3390 3391 /* 3392 * Set the file descriptor to the requested mode, as we use 3393 * it for sending packets. 3394 */ 3395 if (pcapint_setnonblock_fd(handle, nonblock) == -1) 3396 return -1; 3397 3398 /* 3399 * Map each value to their corresponding negation to 3400 * preserve the timeout value provided with pcap_set_timeout. 3401 */ 3402 if (nonblock) { 3403 /* 3404 * We're setting the mode to non-blocking mode. 3405 */ 3406 if (handlep->timeout >= 0) { 3407 /* 3408 * Indicate that we're switching to 3409 * non-blocking mode. 3410 */ 3411 handlep->timeout = ~handlep->timeout; 3412 } 3413 if (handlep->poll_breakloop_fd != -1) { 3414 /* Close the eventfd; we do not need it in nonblock mode. */ 3415 close(handlep->poll_breakloop_fd); 3416 handlep->poll_breakloop_fd = -1; 3417 } 3418 } else { 3419 /* 3420 * We're setting the mode to blocking mode. 3421 */ 3422 if (handlep->poll_breakloop_fd == -1) { 3423 /* If we did not have an eventfd, open one now that we are blocking. */ 3424 if ( ( handlep->poll_breakloop_fd = eventfd(0, EFD_NONBLOCK) ) == -1 ) { 3425 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3426 PCAP_ERRBUF_SIZE, errno, 3427 "could not open eventfd"); 3428 return -1; 3429 } 3430 } 3431 if (handlep->timeout < 0) { 3432 handlep->timeout = ~handlep->timeout; 3433 } 3434 } 3435 /* Update the timeout to use in poll(). */ 3436 set_poll_timeout(handlep); 3437 return 0; 3438 } 3439 3440 /* 3441 * Get the status field of the ring buffer frame at a specified offset. 3442 */ 3443 static inline u_int 3444 pcap_get_ring_frame_status(pcap_t *handle, int offset) 3445 { 3446 struct pcap_linux *handlep = handle->priv; 3447 union thdr h; 3448 3449 h.raw = RING_GET_FRAME_AT(handle, offset); 3450 switch (handlep->tp_version) { 3451 case TPACKET_V2: 3452 return __atomic_load_n(&h.h2->tp_status, __ATOMIC_ACQUIRE); 3453 break; 3454 #ifdef HAVE_TPACKET3 3455 case TPACKET_V3: 3456 return __atomic_load_n(&h.h3->hdr.bh1.block_status, __ATOMIC_ACQUIRE); 3457 break; 3458 #endif 3459 } 3460 /* This should not happen. */ 3461 return 0; 3462 } 3463 3464 /* 3465 * Block waiting for frames to be available. 3466 */ 3467 static int pcap_wait_for_frames_mmap(pcap_t *handle) 3468 { 3469 struct pcap_linux *handlep = handle->priv; 3470 int timeout; 3471 struct ifreq ifr; 3472 int ret; 3473 struct pollfd pollinfo[2]; 3474 int numpollinfo; 3475 pollinfo[0].fd = handle->fd; 3476 pollinfo[0].events = POLLIN; 3477 if ( handlep->poll_breakloop_fd == -1 ) { 3478 numpollinfo = 1; 3479 pollinfo[1].revents = 0; 3480 /* 3481 * We set pollinfo[1].revents to zero, even though 3482 * numpollinfo = 1 meaning that poll() doesn't see 3483 * pollinfo[1], so that we do not have to add a 3484 * conditional of numpollinfo > 1 below when we 3485 * test pollinfo[1].revents. 3486 */ 3487 } else { 3488 pollinfo[1].fd = handlep->poll_breakloop_fd; 3489 pollinfo[1].events = POLLIN; 3490 numpollinfo = 2; 3491 } 3492 3493 /* 3494 * Keep polling until we either get some packets to read, see 3495 * that we got told to break out of the loop, get a fatal error, 3496 * or discover that the device went away. 3497 * 3498 * In non-blocking mode, we must still do one poll() to catch 3499 * any pending error indications, but the poll() has a timeout 3500 * of 0, so that it doesn't block, and we quit after that one 3501 * poll(). 3502 * 3503 * If we've seen an ENETDOWN, it might be the first indication 3504 * that the device went away, or it might just be that it was 3505 * configured down. Unfortunately, there's no guarantee that 3506 * the device has actually been removed as an interface, because: 3507 * 3508 * 1) if, as appears to be the case at least some of the time, 3509 * the PF_PACKET socket code first gets a NETDEV_DOWN indication 3510 * for the device and then gets a NETDEV_UNREGISTER indication 3511 * for it, the first indication will cause a wakeup with ENETDOWN 3512 * but won't set the packet socket's field for the interface index 3513 * to -1, and the second indication won't cause a wakeup (because 3514 * the first indication also caused the protocol hook to be 3515 * unregistered) but will set the packet socket's field for the 3516 * interface index to -1; 3517 * 3518 * 2) even if just a NETDEV_UNREGISTER indication is registered, 3519 * the packet socket's field for the interface index only gets 3520 * set to -1 after the wakeup, so there's a small but non-zero 3521 * risk that a thread blocked waiting for the wakeup will get 3522 * to the "fetch the socket name" code before the interface index 3523 * gets set to -1, so it'll get the old interface index. 3524 * 3525 * Therefore, if we got an ENETDOWN and haven't seen a packet 3526 * since then, we assume that we might be waiting for the interface 3527 * to disappear, and poll with a timeout to try again in a short 3528 * period of time. If we *do* see a packet, the interface has 3529 * come back up again, and is *definitely* still there, so we 3530 * don't need to poll. 3531 */ 3532 for (;;) { 3533 /* 3534 * Yes, we do this even in non-blocking mode, as it's 3535 * the only way to get error indications from a 3536 * tpacket socket. 3537 * 3538 * The timeout is 0 in non-blocking mode, so poll() 3539 * returns immediately. 3540 */ 3541 timeout = handlep->poll_timeout; 3542 3543 /* 3544 * If we got an ENETDOWN and haven't gotten an indication 3545 * that the device has gone away or that the device is up, 3546 * we don't yet know for certain whether the device has 3547 * gone away or not, do a poll() with a 1-millisecond timeout, 3548 * as we have to poll indefinitely for "device went away" 3549 * indications until we either get one or see that the 3550 * device is up. 3551 */ 3552 if (handlep->netdown) { 3553 if (timeout != 0) 3554 timeout = 1; 3555 } 3556 ret = poll(pollinfo, numpollinfo, timeout); 3557 if (ret < 0) { 3558 /* 3559 * Error. If it's not EINTR, report it. 3560 */ 3561 if (errno != EINTR) { 3562 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3563 PCAP_ERRBUF_SIZE, errno, 3564 "can't poll on packet socket"); 3565 return PCAP_ERROR; 3566 } 3567 3568 /* 3569 * It's EINTR; if we were told to break out of 3570 * the loop, do so. 3571 */ 3572 if (handle->break_loop) { 3573 handle->break_loop = 0; 3574 return PCAP_ERROR_BREAK; 3575 } 3576 } else if (ret > 0) { 3577 /* 3578 * OK, some descriptor is ready. 3579 * Check the socket descriptor first. 3580 * 3581 * As I read the Linux man page, pollinfo[0].revents 3582 * will either be POLLIN, POLLERR, POLLHUP, or POLLNVAL. 3583 */ 3584 if (pollinfo[0].revents == POLLIN) { 3585 /* 3586 * OK, we may have packets to 3587 * read. 3588 */ 3589 break; 3590 } 3591 if (pollinfo[0].revents != 0) { 3592 /* 3593 * There's some indication other than 3594 * "you can read on this descriptor" on 3595 * the descriptor. 3596 */ 3597 if (pollinfo[0].revents & POLLNVAL) { 3598 snprintf(handle->errbuf, 3599 PCAP_ERRBUF_SIZE, 3600 "Invalid polling request on packet socket"); 3601 return PCAP_ERROR; 3602 } 3603 if (pollinfo[0].revents & (POLLHUP | POLLRDHUP)) { 3604 snprintf(handle->errbuf, 3605 PCAP_ERRBUF_SIZE, 3606 "Hangup on packet socket"); 3607 return PCAP_ERROR; 3608 } 3609 if (pollinfo[0].revents & POLLERR) { 3610 /* 3611 * Get the error. 3612 */ 3613 int err; 3614 socklen_t errlen; 3615 3616 errlen = sizeof(err); 3617 if (getsockopt(handle->fd, SOL_SOCKET, 3618 SO_ERROR, &err, &errlen) == -1) { 3619 /* 3620 * The call *itself* returned 3621 * an error; make *that* 3622 * the error. 3623 */ 3624 err = errno; 3625 } 3626 3627 /* 3628 * OK, we have the error. 3629 */ 3630 if (err == ENETDOWN) { 3631 /* 3632 * The device on which we're 3633 * capturing went away or the 3634 * interface was taken down. 3635 * 3636 * We don't know for certain 3637 * which happened, and the 3638 * next poll() may indicate 3639 * that there are packets 3640 * to be read, so just set 3641 * a flag to get us to do 3642 * checks later, and set 3643 * the required select 3644 * timeout to 1 millisecond 3645 * so that event loops that 3646 * check our socket descriptor 3647 * also time out so that 3648 * they can call us and we 3649 * can do the checks. 3650 */ 3651 handlep->netdown = 1; 3652 handle->required_select_timeout = &netdown_timeout; 3653 } else if (err == 0) { 3654 /* 3655 * This shouldn't happen, so 3656 * report a special indication 3657 * that it did. 3658 */ 3659 snprintf(handle->errbuf, 3660 PCAP_ERRBUF_SIZE, 3661 "Error condition on packet socket: Reported error was 0"); 3662 return PCAP_ERROR; 3663 } else { 3664 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3665 PCAP_ERRBUF_SIZE, 3666 err, 3667 "Error condition on packet socket"); 3668 return PCAP_ERROR; 3669 } 3670 } 3671 } 3672 /* 3673 * Now check the event device. 3674 */ 3675 if (pollinfo[1].revents & POLLIN) { 3676 ssize_t nread; 3677 uint64_t value; 3678 3679 /* 3680 * This should never fail, but, just 3681 * in case.... 3682 */ 3683 nread = read(handlep->poll_breakloop_fd, &value, 3684 sizeof(value)); 3685 if (nread == -1) { 3686 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3687 PCAP_ERRBUF_SIZE, 3688 errno, 3689 "Error reading from event FD"); 3690 return PCAP_ERROR; 3691 } 3692 3693 /* 3694 * According to the Linux read(2) man 3695 * page, read() will transfer at most 3696 * 2^31-1 bytes, so the return value is 3697 * either -1 or a value between 0 3698 * and 2^31-1, so it's non-negative. 3699 * 3700 * Cast it to size_t to squelch 3701 * warnings from the compiler; add this 3702 * comment to squelch warnings from 3703 * humans reading the code. :-) 3704 * 3705 * Don't treat an EOF as an error, but 3706 * *do* treat a short read as an error; 3707 * that "shouldn't happen", but.... 3708 */ 3709 if (nread != 0 && 3710 (size_t)nread < sizeof(value)) { 3711 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3712 "Short read from event FD: expected %zu, got %zd", 3713 sizeof(value), nread); 3714 return PCAP_ERROR; 3715 } 3716 3717 /* 3718 * This event gets signaled by a 3719 * pcap_breakloop() call; if we were told 3720 * to break out of the loop, do so. 3721 */ 3722 if (handle->break_loop) { 3723 handle->break_loop = 0; 3724 return PCAP_ERROR_BREAK; 3725 } 3726 } 3727 } 3728 3729 /* 3730 * Either: 3731 * 3732 * 1) we got neither an error from poll() nor any 3733 * readable descriptors, in which case there 3734 * are no packets waiting to read 3735 * 3736 * or 3737 * 3738 * 2) We got readable descriptors but the PF_PACKET 3739 * socket wasn't one of them, in which case there 3740 * are no packets waiting to read 3741 * 3742 * so, if we got an ENETDOWN, we've drained whatever 3743 * packets were available to read at the point of the 3744 * ENETDOWN. 3745 * 3746 * So, if we got an ENETDOWN and haven't gotten an indication 3747 * that the device has gone away or that the device is up, 3748 * we don't yet know for certain whether the device has 3749 * gone away or not, check whether the device exists and is 3750 * up. 3751 */ 3752 if (handlep->netdown) { 3753 if (!device_still_exists(handle)) { 3754 /* 3755 * The device doesn't exist any more; 3756 * report that. 3757 * 3758 * XXX - we should really return an 3759 * appropriate error for that, but 3760 * pcap_dispatch() etc. aren't documented 3761 * as having error returns other than 3762 * PCAP_ERROR or PCAP_ERROR_BREAK. 3763 */ 3764 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3765 "The interface disappeared"); 3766 return PCAP_ERROR; 3767 } 3768 3769 /* 3770 * The device still exists; try to see if it's up. 3771 */ 3772 memset(&ifr, 0, sizeof(ifr)); 3773 pcapint_strlcpy(ifr.ifr_name, handlep->device, 3774 sizeof(ifr.ifr_name)); 3775 if (ioctl(handle->fd, SIOCGIFFLAGS, &ifr) == -1) { 3776 if (errno == ENXIO || errno == ENODEV) { 3777 /* 3778 * OK, *now* it's gone. 3779 * 3780 * XXX - see above comment. 3781 */ 3782 snprintf(handle->errbuf, 3783 PCAP_ERRBUF_SIZE, 3784 "The interface disappeared"); 3785 return PCAP_ERROR; 3786 } else { 3787 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3788 PCAP_ERRBUF_SIZE, errno, 3789 "%s: Can't get flags", 3790 handlep->device); 3791 return PCAP_ERROR; 3792 } 3793 } 3794 if (ifr.ifr_flags & IFF_UP) { 3795 /* 3796 * It's up, so it definitely still exists. 3797 * Cancel the ENETDOWN indication - we 3798 * presumably got it due to the interface 3799 * going down rather than the device going 3800 * away - and revert to "no required select 3801 * timeout. 3802 */ 3803 handlep->netdown = 0; 3804 handle->required_select_timeout = NULL; 3805 } 3806 } 3807 3808 /* 3809 * If we're in non-blocking mode, just quit now, rather 3810 * than spinning in a loop doing poll()s that immediately 3811 * time out if there's no indication on any descriptor. 3812 */ 3813 if (handlep->poll_timeout == 0) 3814 break; 3815 } 3816 return 0; 3817 } 3818 3819 /* handle a single memory mapped packet */ 3820 static int pcap_handle_packet_mmap( 3821 pcap_t *handle, 3822 pcap_handler callback, 3823 u_char *user, 3824 unsigned char *frame, 3825 unsigned int tp_len, 3826 unsigned int tp_mac, 3827 unsigned int tp_snaplen, 3828 unsigned int tp_sec, 3829 unsigned int tp_usec, 3830 int tp_vlan_tci_valid, 3831 __u16 tp_vlan_tci, 3832 __u16 tp_vlan_tpid) 3833 { 3834 struct pcap_linux *handlep = handle->priv; 3835 unsigned char *bp; 3836 struct sockaddr_ll *sll; 3837 struct pcap_pkthdr pcaphdr; 3838 unsigned int snaplen = tp_snaplen; 3839 struct utsname utsname; 3840 3841 /* perform sanity check on internal offset. */ 3842 if (tp_mac + tp_snaplen > handle->bufsize) { 3843 /* 3844 * Report some system information as a debugging aid. 3845 */ 3846 if (uname(&utsname) != -1) { 3847 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3848 "corrupted frame on kernel ring mac " 3849 "offset %u + caplen %u > frame len %d " 3850 "(kernel %.32s version %s, machine %.16s)", 3851 tp_mac, tp_snaplen, handle->bufsize, 3852 utsname.release, utsname.version, 3853 utsname.machine); 3854 } else { 3855 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3856 "corrupted frame on kernel ring mac " 3857 "offset %u + caplen %u > frame len %d", 3858 tp_mac, tp_snaplen, handle->bufsize); 3859 } 3860 return -1; 3861 } 3862 3863 /* run filter on received packet 3864 * If the kernel filtering is enabled we need to run the 3865 * filter until all the frames present into the ring 3866 * at filter creation time are processed. 3867 * In this case, blocks_to_filter_in_userland is used 3868 * as a counter for the packet we need to filter. 3869 * Note: alternatively it could be possible to stop applying 3870 * the filter when the ring became empty, but it can possibly 3871 * happen a lot later... */ 3872 bp = frame + tp_mac; 3873 3874 /* if required build in place the sll header*/ 3875 sll = (void *)(frame + TPACKET_ALIGN(handlep->tp_hdrlen)); 3876 if (handlep->cooked) { 3877 if (handle->linktype == DLT_LINUX_SLL2) { 3878 struct sll2_header *hdrp; 3879 3880 /* 3881 * The kernel should have left us with enough 3882 * space for an sll header; back up the packet 3883 * data pointer into that space, as that'll be 3884 * the beginning of the packet we pass to the 3885 * callback. 3886 */ 3887 bp -= SLL2_HDR_LEN; 3888 3889 /* 3890 * Let's make sure that's past the end of 3891 * the tpacket header, i.e. >= 3892 * ((u_char *)thdr + TPACKET_HDRLEN), so we 3893 * don't step on the header when we construct 3894 * the sll header. 3895 */ 3896 if (bp < (u_char *)frame + 3897 TPACKET_ALIGN(handlep->tp_hdrlen) + 3898 sizeof(struct sockaddr_ll)) { 3899 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3900 "cooked-mode frame doesn't have room for sll header"); 3901 return -1; 3902 } 3903 3904 /* 3905 * OK, that worked; construct the sll header. 3906 */ 3907 hdrp = (struct sll2_header *)bp; 3908 hdrp->sll2_protocol = sll->sll_protocol; 3909 hdrp->sll2_reserved_mbz = 0; 3910 hdrp->sll2_if_index = htonl(sll->sll_ifindex); 3911 hdrp->sll2_hatype = htons(sll->sll_hatype); 3912 hdrp->sll2_pkttype = sll->sll_pkttype; 3913 hdrp->sll2_halen = sll->sll_halen; 3914 memcpy(hdrp->sll2_addr, sll->sll_addr, SLL_ADDRLEN); 3915 3916 snaplen += sizeof(struct sll2_header); 3917 } else { 3918 struct sll_header *hdrp; 3919 3920 /* 3921 * The kernel should have left us with enough 3922 * space for an sll header; back up the packet 3923 * data pointer into that space, as that'll be 3924 * the beginning of the packet we pass to the 3925 * callback. 3926 */ 3927 bp -= SLL_HDR_LEN; 3928 3929 /* 3930 * Let's make sure that's past the end of 3931 * the tpacket header, i.e. >= 3932 * ((u_char *)thdr + TPACKET_HDRLEN), so we 3933 * don't step on the header when we construct 3934 * the sll header. 3935 */ 3936 if (bp < (u_char *)frame + 3937 TPACKET_ALIGN(handlep->tp_hdrlen) + 3938 sizeof(struct sockaddr_ll)) { 3939 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3940 "cooked-mode frame doesn't have room for sll header"); 3941 return -1; 3942 } 3943 3944 /* 3945 * OK, that worked; construct the sll header. 3946 */ 3947 hdrp = (struct sll_header *)bp; 3948 hdrp->sll_pkttype = htons(sll->sll_pkttype); 3949 hdrp->sll_hatype = htons(sll->sll_hatype); 3950 hdrp->sll_halen = htons(sll->sll_halen); 3951 memcpy(hdrp->sll_addr, sll->sll_addr, SLL_ADDRLEN); 3952 hdrp->sll_protocol = sll->sll_protocol; 3953 3954 snaplen += sizeof(struct sll_header); 3955 } 3956 } else { 3957 /* 3958 * If this is a packet from a CAN device, so that 3959 * sll->sll_hatype is ARPHRD_CAN, then, as we're 3960 * not capturing in cooked mode, its link-layer 3961 * type is DLT_CAN_SOCKETCAN. Fix up the header 3962 * provided by the code below us to match what 3963 * DLT_CAN_SOCKETCAN is expected to provide. 3964 */ 3965 if (sll->sll_hatype == ARPHRD_CAN) { 3966 pcap_can_socketcan_hdr *canhdr = (pcap_can_socketcan_hdr *)bp; 3967 uint16_t protocol = ntohs(sll->sll_protocol); 3968 3969 /* 3970 * Check the protocol field from the sll header. 3971 * If it's one of the known CAN protocol types, 3972 * make sure the appropriate flags are set, so 3973 * that a program can tell what type of frame 3974 * it is. 3975 * 3976 * The two flags are: 3977 * 3978 * CANFD_FDF, which is in the fd_flags field 3979 * of the CAN classic/CAN FD header; 3980 * 3981 * CANXL_XLF, which is in the flags field 3982 * of the CAN XL header, which overlaps 3983 * the payload_length field of the CAN 3984 * classic/CAN FD header. 3985 */ 3986 switch (protocol) { 3987 3988 case LINUX_SLL_P_CAN: 3989 /* 3990 * CAN classic. 3991 * 3992 * Zero out the fd_flags and reserved 3993 * fields, in case they're uninitialized 3994 * crap, and clear the CANXL_XLF bit in 3995 * the payload_length field. 3996 * 3997 * This means that the CANFD_FDF flag isn't 3998 * set in the fd_flags field, and that 3999 * the CANXL_XLF bit isn't set in the 4000 * payload_length field, so this frame 4001 * will appear to be a CAN classic frame. 4002 */ 4003 canhdr->payload_length &= ~CANXL_XLF; 4004 canhdr->fd_flags = 0; 4005 canhdr->reserved1 = 0; 4006 canhdr->reserved2 = 0; 4007 break; 4008 4009 case LINUX_SLL_P_CANFD: 4010 /* 4011 * Set CANFD_FDF in the fd_flags field, 4012 * and clear the CANXL_XLF bit in the 4013 * payload_length field, so this frame 4014 * will appear to be a CAN FD frame. 4015 */ 4016 canhdr->payload_length &= ~CANXL_XLF; 4017 canhdr->fd_flags |= CANFD_FDF; 4018 4019 /* 4020 * Zero out all the unknown bits in fd_flags 4021 * and clear the reserved fields, so that 4022 * a program reading this can assume that 4023 * CANFD_FDF is set because we set it, not 4024 * because some uninitialized crap was 4025 * provided in the fd_flags field. 4026 * 4027 * (At least some LINKTYPE_CAN_SOCKETCAN 4028 * files attached to Wireshark bugs had 4029 * uninitialized junk there, so it does 4030 * happen.) 4031 * 4032 * Update this if Linux adds more flag bits 4033 * to the fd_flags field or uses either of 4034 * the reserved fields for FD frames. 4035 */ 4036 canhdr->fd_flags &= (CANFD_FDF|CANFD_ESI|CANFD_BRS); 4037 canhdr->reserved1 = 0; 4038 canhdr->reserved2 = 0; 4039 break; 4040 4041 case LINUX_SLL_P_CANXL: 4042 /* 4043 * CAN XL frame. 4044 * 4045 * Make sure the CANXL_XLF bit is set in 4046 * the payload_length field, so that 4047 * this frame will appear to be a 4048 * CAN XL frame. 4049 */ 4050 canhdr->payload_length |= CANXL_XLF; 4051 break; 4052 } 4053 4054 /* 4055 * Put multi-byte header fields in a byte-order 4056 *-independent format. 4057 */ 4058 if (canhdr->payload_length & CANXL_XLF) { 4059 /* 4060 * This is a CAN XL frame. 4061 * 4062 * DLT_CAN_SOCKETCAN is specified as having 4063 * the Priority ID/VCID field in big-- 4064 * endian byte order, and the payload length 4065 * and Acceptance Field in little-endian byte 4066 * order. but capturing on a CAN device 4067 * provides them in host byte order. 4068 * Convert them to the appropriate byte 4069 * orders. 4070 * 4071 * The reason we put the first field 4072 * into big-endian byte order is that 4073 * older libpcap code, ignorant of 4074 * CAN XL, treated it as the CAN ID 4075 * field and put it into big-endian 4076 * byte order, and we don't want to 4077 * break code that understands CAN XL 4078 * headers, and treats that field as 4079 * being big-endian. 4080 * 4081 * The other fields are put in little- 4082 * endian byte order is that older 4083 * libpcap code, ignorant of CAN XL, 4084 * left those fields alone, and the 4085 * processors on which the CAN XL 4086 * frames were captured are likely 4087 * to be little-endian processors. 4088 */ 4089 pcap_can_socketcan_xl_hdr *canxl_hdr = (pcap_can_socketcan_xl_hdr *)bp; 4090 4091 #if __BYTE_ORDER == __LITTLE_ENDIAN 4092 /* 4093 * We're capturing on a little-endian 4094 * machine, so we put the priority/VCID 4095 * field into big-endian byte order, and 4096 * leave the payload length and acceptance 4097 * field in little-endian byte order. 4098 */ 4099 /* Byte-swap priority/VCID. */ 4100 canxl_hdr->priority_vcid = SWAPLONG(canxl_hdr->priority_vcid); 4101 #elif __BYTE_ORDER == __BIG_ENDIAN 4102 /* 4103 * We're capturing on a big-endian 4104 * machine, so we want to leave the 4105 * priority/VCID field alone, and byte-swap 4106 * the payload length and acceptance 4107 * fields to little-endian. 4108 */ 4109 /* Byte-swap the payload length */ 4110 canxl_hdr->payload_length = SWAPSHORT(canxl_hdr->payload_length); 4111 4112 /* 4113 * Byte-swap the acceptance field. 4114 * 4115 * XXX - is it just a 4-octet string, 4116 * not in any byte order? 4117 */ 4118 canxl_hdr->acceptance_field = SWAPLONG(canxl_hdr->acceptance_field); 4119 #else 4120 #error "Unknown byte order" 4121 #endif 4122 } else { 4123 /* 4124 * CAN or CAN FD frame. 4125 * 4126 * DLT_CAN_SOCKETCAN is specified as having 4127 * the CAN ID and flags in network byte 4128 * order, but capturing on a CAN device 4129 * provides it in host byte order. Convert 4130 * it to network byte order. 4131 */ 4132 canhdr->can_id = htonl(canhdr->can_id); 4133 } 4134 } 4135 } 4136 4137 if (handlep->filter_in_userland && handle->fcode.bf_insns) { 4138 struct pcap_bpf_aux_data aux_data; 4139 4140 aux_data.vlan_tag_present = tp_vlan_tci_valid; 4141 aux_data.vlan_tag = tp_vlan_tci & 0x0fff; 4142 4143 if (pcapint_filter_with_aux_data(handle->fcode.bf_insns, 4144 bp, 4145 tp_len, 4146 snaplen, 4147 &aux_data) == 0) 4148 return 0; 4149 } 4150 4151 if (!linux_check_direction(handle, sll)) 4152 return 0; 4153 4154 /* get required packet info from ring header */ 4155 pcaphdr.ts.tv_sec = tp_sec; 4156 pcaphdr.ts.tv_usec = tp_usec; 4157 pcaphdr.caplen = tp_snaplen; 4158 pcaphdr.len = tp_len; 4159 4160 /* if required build in place the sll header*/ 4161 if (handlep->cooked) { 4162 /* update packet len */ 4163 if (handle->linktype == DLT_LINUX_SLL2) { 4164 pcaphdr.caplen += SLL2_HDR_LEN; 4165 pcaphdr.len += SLL2_HDR_LEN; 4166 } else { 4167 pcaphdr.caplen += SLL_HDR_LEN; 4168 pcaphdr.len += SLL_HDR_LEN; 4169 } 4170 } 4171 4172 if (tp_vlan_tci_valid && 4173 handlep->vlan_offset != -1 && 4174 tp_snaplen >= (unsigned int) handlep->vlan_offset) 4175 { 4176 struct vlan_tag *tag; 4177 4178 /* 4179 * Move everything in the header, except the type field, 4180 * down VLAN_TAG_LEN bytes, to allow us to insert the 4181 * VLAN tag between that stuff and the type field. 4182 */ 4183 bp -= VLAN_TAG_LEN; 4184 memmove(bp, bp + VLAN_TAG_LEN, handlep->vlan_offset); 4185 4186 /* 4187 * Now insert the tag. 4188 */ 4189 tag = (struct vlan_tag *)(bp + handlep->vlan_offset); 4190 tag->vlan_tpid = htons(tp_vlan_tpid); 4191 tag->vlan_tci = htons(tp_vlan_tci); 4192 4193 /* 4194 * Add the tag to the packet lengths. 4195 */ 4196 pcaphdr.caplen += VLAN_TAG_LEN; 4197 pcaphdr.len += VLAN_TAG_LEN; 4198 } 4199 4200 /* 4201 * The only way to tell the kernel to cut off the 4202 * packet at a snapshot length is with a filter program; 4203 * if there's no filter program, the kernel won't cut 4204 * the packet off. 4205 * 4206 * Trim the snapshot length to be no longer than the 4207 * specified snapshot length. 4208 * 4209 * XXX - an alternative is to put a filter, consisting 4210 * of a "ret <snaplen>" instruction, on the socket 4211 * in the activate routine, so that the truncation is 4212 * done in the kernel even if nobody specified a filter; 4213 * that means that less buffer space is consumed in 4214 * the memory-mapped buffer. 4215 */ 4216 if (pcaphdr.caplen > (bpf_u_int32)handle->snapshot) 4217 pcaphdr.caplen = handle->snapshot; 4218 4219 /* pass the packet to the user */ 4220 callback(user, &pcaphdr, bp); 4221 4222 return 1; 4223 } 4224 4225 static int 4226 pcap_read_linux_mmap_v2(pcap_t *handle, int max_packets, pcap_handler callback, 4227 u_char *user) 4228 { 4229 struct pcap_linux *handlep = handle->priv; 4230 union thdr h; 4231 int pkts = 0; 4232 int ret; 4233 4234 /* wait for frames availability.*/ 4235 h.raw = RING_GET_CURRENT_FRAME(handle); 4236 if (!packet_mmap_acquire(h.h2)) { 4237 /* 4238 * The current frame is owned by the kernel; wait for 4239 * a frame to be handed to us. 4240 */ 4241 ret = pcap_wait_for_frames_mmap(handle); 4242 if (ret) { 4243 return ret; 4244 } 4245 } 4246 4247 /* 4248 * This can conceivably process more than INT_MAX packets, 4249 * which would overflow the packet count, causing it either 4250 * to look like a negative number, and thus cause us to 4251 * return a value that looks like an error, or overflow 4252 * back into positive territory, and thus cause us to 4253 * return a too-low count. 4254 * 4255 * Therefore, if the packet count is unlimited, we clip 4256 * it at INT_MAX; this routine is not expected to 4257 * process packets indefinitely, so that's not an issue. 4258 */ 4259 if (PACKET_COUNT_IS_UNLIMITED(max_packets)) 4260 max_packets = INT_MAX; 4261 4262 while (pkts < max_packets) { 4263 /* 4264 * Get the current ring buffer frame, and break if 4265 * it's still owned by the kernel. 4266 */ 4267 h.raw = RING_GET_CURRENT_FRAME(handle); 4268 if (!packet_mmap_acquire(h.h2)) 4269 break; 4270 4271 ret = pcap_handle_packet_mmap( 4272 handle, 4273 callback, 4274 user, 4275 h.raw, 4276 h.h2->tp_len, 4277 h.h2->tp_mac, 4278 h.h2->tp_snaplen, 4279 h.h2->tp_sec, 4280 handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO ? h.h2->tp_nsec : h.h2->tp_nsec / 1000, 4281 VLAN_VALID(h.h2, h.h2), 4282 h.h2->tp_vlan_tci, 4283 VLAN_TPID(h.h2, h.h2)); 4284 if (ret == 1) { 4285 pkts++; 4286 } else if (ret < 0) { 4287 return ret; 4288 } 4289 4290 /* 4291 * Hand this block back to the kernel, and, if we're 4292 * counting blocks that need to be filtered in userland 4293 * after having been filtered by the kernel, count 4294 * the one we've just processed. 4295 */ 4296 packet_mmap_release(h.h2); 4297 if (handlep->blocks_to_filter_in_userland > 0) { 4298 handlep->blocks_to_filter_in_userland--; 4299 if (handlep->blocks_to_filter_in_userland == 0) { 4300 /* 4301 * No more blocks need to be filtered 4302 * in userland. 4303 */ 4304 handlep->filter_in_userland = 0; 4305 } 4306 } 4307 4308 /* next block */ 4309 if (++handle->offset >= handle->cc) 4310 handle->offset = 0; 4311 4312 /* check for break loop condition*/ 4313 if (handle->break_loop) { 4314 handle->break_loop = 0; 4315 return PCAP_ERROR_BREAK; 4316 } 4317 } 4318 return pkts; 4319 } 4320 4321 #ifdef HAVE_TPACKET3 4322 static int 4323 pcap_read_linux_mmap_v3(pcap_t *handle, int max_packets, pcap_handler callback, 4324 u_char *user) 4325 { 4326 struct pcap_linux *handlep = handle->priv; 4327 union thdr h; 4328 int pkts = 0; 4329 int ret; 4330 4331 again: 4332 if (handlep->current_packet == NULL) { 4333 /* wait for frames availability.*/ 4334 h.raw = RING_GET_CURRENT_FRAME(handle); 4335 if (!packet_mmap_v3_acquire(h.h3)) { 4336 /* 4337 * The current frame is owned by the kernel; wait 4338 * for a frame to be handed to us. 4339 */ 4340 ret = pcap_wait_for_frames_mmap(handle); 4341 if (ret) { 4342 return ret; 4343 } 4344 } 4345 } 4346 h.raw = RING_GET_CURRENT_FRAME(handle); 4347 if (!packet_mmap_v3_acquire(h.h3)) { 4348 if (pkts == 0 && handlep->timeout == 0) { 4349 /* Block until we see a packet. */ 4350 goto again; 4351 } 4352 return pkts; 4353 } 4354 4355 /* 4356 * This can conceivably process more than INT_MAX packets, 4357 * which would overflow the packet count, causing it either 4358 * to look like a negative number, and thus cause us to 4359 * return a value that looks like an error, or overflow 4360 * back into positive territory, and thus cause us to 4361 * return a too-low count. 4362 * 4363 * Therefore, if the packet count is unlimited, we clip 4364 * it at INT_MAX; this routine is not expected to 4365 * process packets indefinitely, so that's not an issue. 4366 */ 4367 if (PACKET_COUNT_IS_UNLIMITED(max_packets)) 4368 max_packets = INT_MAX; 4369 4370 while (pkts < max_packets) { 4371 int packets_to_read; 4372 4373 if (handlep->current_packet == NULL) { 4374 h.raw = RING_GET_CURRENT_FRAME(handle); 4375 if (!packet_mmap_v3_acquire(h.h3)) 4376 break; 4377 4378 handlep->current_packet = h.raw + h.h3->hdr.bh1.offset_to_first_pkt; 4379 handlep->packets_left = h.h3->hdr.bh1.num_pkts; 4380 } 4381 packets_to_read = handlep->packets_left; 4382 4383 if (packets_to_read > (max_packets - pkts)) { 4384 /* 4385 * There are more packets in the buffer than 4386 * the number of packets we have left to 4387 * process to get up to the maximum number 4388 * of packets to process. Only process enough 4389 * of them to get us up to that maximum. 4390 */ 4391 packets_to_read = max_packets - pkts; 4392 } 4393 4394 while (packets_to_read-- && !handle->break_loop) { 4395 struct tpacket3_hdr* tp3_hdr = (struct tpacket3_hdr*) handlep->current_packet; 4396 ret = pcap_handle_packet_mmap( 4397 handle, 4398 callback, 4399 user, 4400 handlep->current_packet, 4401 tp3_hdr->tp_len, 4402 tp3_hdr->tp_mac, 4403 tp3_hdr->tp_snaplen, 4404 tp3_hdr->tp_sec, 4405 handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO ? tp3_hdr->tp_nsec : tp3_hdr->tp_nsec / 1000, 4406 VLAN_VALID(tp3_hdr, &tp3_hdr->hv1), 4407 tp3_hdr->hv1.tp_vlan_tci, 4408 VLAN_TPID(tp3_hdr, &tp3_hdr->hv1)); 4409 if (ret == 1) { 4410 pkts++; 4411 } else if (ret < 0) { 4412 handlep->current_packet = NULL; 4413 return ret; 4414 } 4415 handlep->current_packet += tp3_hdr->tp_next_offset; 4416 handlep->packets_left--; 4417 } 4418 4419 if (handlep->packets_left <= 0) { 4420 /* 4421 * Hand this block back to the kernel, and, if 4422 * we're counting blocks that need to be 4423 * filtered in userland after having been 4424 * filtered by the kernel, count the one we've 4425 * just processed. 4426 */ 4427 packet_mmap_v3_release(h.h3); 4428 if (handlep->blocks_to_filter_in_userland > 0) { 4429 handlep->blocks_to_filter_in_userland--; 4430 if (handlep->blocks_to_filter_in_userland == 0) { 4431 /* 4432 * No more blocks need to be filtered 4433 * in userland. 4434 */ 4435 handlep->filter_in_userland = 0; 4436 } 4437 } 4438 4439 /* next block */ 4440 if (++handle->offset >= handle->cc) 4441 handle->offset = 0; 4442 4443 handlep->current_packet = NULL; 4444 } 4445 4446 /* check for break loop condition*/ 4447 if (handle->break_loop) { 4448 handle->break_loop = 0; 4449 return PCAP_ERROR_BREAK; 4450 } 4451 } 4452 if (pkts == 0 && handlep->timeout == 0) { 4453 /* Block until we see a packet. */ 4454 goto again; 4455 } 4456 return pkts; 4457 } 4458 #endif /* HAVE_TPACKET3 */ 4459 4460 /* 4461 * Attach the given BPF code to the packet capture device. 4462 */ 4463 static int 4464 pcap_setfilter_linux(pcap_t *handle, struct bpf_program *filter) 4465 { 4466 struct pcap_linux *handlep; 4467 struct sock_fprog fcode; 4468 int can_filter_in_kernel; 4469 int err = 0; 4470 int n, offset; 4471 4472 if (!handle) 4473 return -1; 4474 if (!filter) { 4475 pcapint_strlcpy(handle->errbuf, "setfilter: No filter specified", 4476 PCAP_ERRBUF_SIZE); 4477 return -1; 4478 } 4479 4480 handlep = handle->priv; 4481 4482 /* Make our private copy of the filter */ 4483 4484 if (pcapint_install_bpf_program(handle, filter) < 0) 4485 /* pcapint_install_bpf_program() filled in errbuf */ 4486 return -1; 4487 4488 /* 4489 * Run user level packet filter by default. Will be overridden if 4490 * installing a kernel filter succeeds. 4491 */ 4492 handlep->filter_in_userland = 1; 4493 4494 /* Install kernel level filter if possible */ 4495 4496 #ifdef USHRT_MAX 4497 if (handle->fcode.bf_len > USHRT_MAX) { 4498 /* 4499 * fcode.len is an unsigned short for current kernel. 4500 * I have yet to see BPF-Code with that much 4501 * instructions but still it is possible. So for the 4502 * sake of correctness I added this check. 4503 */ 4504 fprintf(stderr, "Warning: Filter too complex for kernel\n"); 4505 fcode.len = 0; 4506 fcode.filter = NULL; 4507 can_filter_in_kernel = 0; 4508 } else 4509 #endif /* USHRT_MAX */ 4510 { 4511 /* 4512 * Oh joy, the Linux kernel uses struct sock_fprog instead 4513 * of struct bpf_program and of course the length field is 4514 * of different size. Pointed out by Sebastian 4515 * 4516 * Oh, and we also need to fix it up so that all "ret" 4517 * instructions with non-zero operands have MAXIMUM_SNAPLEN 4518 * as the operand if we're not capturing in memory-mapped 4519 * mode, and so that, if we're in cooked mode, all memory- 4520 * reference instructions use special magic offsets in 4521 * references to the link-layer header and assume that the 4522 * link-layer payload begins at 0; "fix_program()" will do 4523 * that. 4524 */ 4525 switch (fix_program(handle, &fcode)) { 4526 4527 case -1: 4528 default: 4529 /* 4530 * Fatal error; just quit. 4531 * (The "default" case shouldn't happen; we 4532 * return -1 for that reason.) 4533 */ 4534 return -1; 4535 4536 case 0: 4537 /* 4538 * The program performed checks that we can't make 4539 * work in the kernel. 4540 */ 4541 can_filter_in_kernel = 0; 4542 break; 4543 4544 case 1: 4545 /* 4546 * We have a filter that'll work in the kernel. 4547 */ 4548 can_filter_in_kernel = 1; 4549 break; 4550 } 4551 } 4552 4553 /* 4554 * NOTE: at this point, we've set both the "len" and "filter" 4555 * fields of "fcode". As of the 2.6.32.4 kernel, at least, 4556 * those are the only members of the "sock_fprog" structure, 4557 * so we initialize every member of that structure. 4558 * 4559 * If there is anything in "fcode" that is not initialized, 4560 * it is either a field added in a later kernel, or it's 4561 * padding. 4562 * 4563 * If a new field is added, this code needs to be updated 4564 * to set it correctly. 4565 * 4566 * If there are no other fields, then: 4567 * 4568 * if the Linux kernel looks at the padding, it's 4569 * buggy; 4570 * 4571 * if the Linux kernel doesn't look at the padding, 4572 * then if some tool complains that we're passing 4573 * uninitialized data to the kernel, then the tool 4574 * is buggy and needs to understand that it's just 4575 * padding. 4576 */ 4577 if (can_filter_in_kernel) { 4578 if ((err = set_kernel_filter(handle, &fcode)) == 0) 4579 { 4580 /* 4581 * Installation succeeded - using kernel filter, 4582 * so userland filtering not needed. 4583 */ 4584 handlep->filter_in_userland = 0; 4585 } 4586 else if (err == -1) /* Non-fatal error */ 4587 { 4588 /* 4589 * Print a warning if we weren't able to install 4590 * the filter for a reason other than "this kernel 4591 * isn't configured to support socket filters. 4592 */ 4593 if (errno == ENOMEM) { 4594 /* 4595 * Either a kernel memory allocation 4596 * failure occurred, or there's too 4597 * much "other/option memory" allocated 4598 * for this socket. Suggest that they 4599 * increase the "other/option memory" 4600 * limit. 4601 */ 4602 fprintf(stderr, 4603 "Warning: Couldn't allocate kernel memory for filter: try increasing net.core.optmem_max with sysctl\n"); 4604 } else if (errno != ENOPROTOOPT && errno != EOPNOTSUPP) { 4605 fprintf(stderr, 4606 "Warning: Kernel filter failed: %s\n", 4607 pcap_strerror(errno)); 4608 } 4609 } 4610 } 4611 4612 /* 4613 * If we're not using the kernel filter, get rid of any kernel 4614 * filter that might've been there before, e.g. because the 4615 * previous filter could work in the kernel, or because some other 4616 * code attached a filter to the socket by some means other than 4617 * calling "pcap_setfilter()". Otherwise, the kernel filter may 4618 * filter out packets that would pass the new userland filter. 4619 */ 4620 if (handlep->filter_in_userland) { 4621 if (reset_kernel_filter(handle) == -1) { 4622 pcapint_fmt_errmsg_for_errno(handle->errbuf, 4623 PCAP_ERRBUF_SIZE, errno, 4624 "can't remove kernel filter"); 4625 err = -2; /* fatal error */ 4626 } 4627 } 4628 4629 /* 4630 * Free up the copy of the filter that was made by "fix_program()". 4631 */ 4632 if (fcode.filter != NULL) 4633 free(fcode.filter); 4634 4635 if (err == -2) 4636 /* Fatal error */ 4637 return -1; 4638 4639 /* 4640 * If we're filtering in userland, there's nothing to do; 4641 * the new filter will be used for the next packet. 4642 */ 4643 if (handlep->filter_in_userland) 4644 return 0; 4645 4646 /* 4647 * We're filtering in the kernel; the packets present in 4648 * all blocks currently in the ring were already filtered 4649 * by the old filter, and so will need to be filtered in 4650 * userland by the new filter. 4651 * 4652 * Get an upper bound for the number of such blocks; first, 4653 * walk the ring backward and count the free blocks. 4654 */ 4655 offset = handle->offset; 4656 if (--offset < 0) 4657 offset = handle->cc - 1; 4658 for (n=0; n < handle->cc; ++n) { 4659 if (--offset < 0) 4660 offset = handle->cc - 1; 4661 if (pcap_get_ring_frame_status(handle, offset) != TP_STATUS_KERNEL) 4662 break; 4663 } 4664 4665 /* 4666 * If we found free blocks, decrement the count of free 4667 * blocks by 1, just in case we lost a race with another 4668 * thread of control that was adding a packet while 4669 * we were counting and that had run the filter before 4670 * we changed it. 4671 * 4672 * XXX - could there be more than one block added in 4673 * this fashion? 4674 * 4675 * XXX - is there a way to avoid that race, e.g. somehow 4676 * wait for all packets that passed the old filter to 4677 * be added to the ring? 4678 */ 4679 if (n != 0) 4680 n--; 4681 4682 /* 4683 * Set the count of blocks worth of packets to filter 4684 * in userland to the total number of blocks in the 4685 * ring minus the number of free blocks we found, and 4686 * turn on userland filtering. (The count of blocks 4687 * worth of packets to filter in userland is guaranteed 4688 * not to be zero - n, above, couldn't be set to a 4689 * value > handle->cc, and if it were equal to 4690 * handle->cc, it wouldn't be zero, and thus would 4691 * be decremented to handle->cc - 1.) 4692 */ 4693 handlep->blocks_to_filter_in_userland = handle->cc - n; 4694 handlep->filter_in_userland = 1; 4695 4696 return 0; 4697 } 4698 4699 /* 4700 * Return the index of the given device name. Fill ebuf and return 4701 * -1 on failure. 4702 */ 4703 static int 4704 iface_get_id(int fd, const char *device, char *ebuf) 4705 { 4706 struct ifreq ifr; 4707 4708 memset(&ifr, 0, sizeof(ifr)); 4709 pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 4710 4711 if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) { 4712 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4713 errno, "SIOCGIFINDEX"); 4714 return -1; 4715 } 4716 4717 return ifr.ifr_ifindex; 4718 } 4719 4720 /* 4721 * Bind the socket associated with FD to the given device. 4722 * Return 0 on success or a PCAP_ERROR_ value on a hard error. 4723 */ 4724 static int 4725 iface_bind(int fd, int ifindex, char *ebuf, int protocol) 4726 { 4727 struct sockaddr_ll sll; 4728 int ret, err; 4729 socklen_t errlen = sizeof(err); 4730 4731 memset(&sll, 0, sizeof(sll)); 4732 sll.sll_family = AF_PACKET; 4733 sll.sll_ifindex = ifindex < 0 ? 0 : ifindex; 4734 sll.sll_protocol = protocol; 4735 4736 if (bind(fd, (struct sockaddr *) &sll, sizeof(sll)) == -1) { 4737 if (errno == ENETDOWN) { 4738 /* 4739 * Return a "network down" indication, so that 4740 * the application can report that rather than 4741 * saying we had a mysterious failure and 4742 * suggest that they report a problem to the 4743 * libpcap developers. 4744 */ 4745 return PCAP_ERROR_IFACE_NOT_UP; 4746 } 4747 if (errno == ENODEV) { 4748 /* 4749 * There's nothing more to say, so clear the 4750 * error message. 4751 */ 4752 ebuf[0] = '\0'; 4753 ret = PCAP_ERROR_NO_SUCH_DEVICE; 4754 } else { 4755 ret = PCAP_ERROR; 4756 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4757 errno, "bind"); 4758 } 4759 return ret; 4760 } 4761 4762 /* Any pending errors, e.g., network is down? */ 4763 4764 if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) { 4765 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4766 errno, "getsockopt (SO_ERROR)"); 4767 return PCAP_ERROR; 4768 } 4769 4770 if (err == ENETDOWN) { 4771 /* 4772 * Return a "network down" indication, so that 4773 * the application can report that rather than 4774 * saying we had a mysterious failure and 4775 * suggest that they report a problem to the 4776 * libpcap developers. 4777 */ 4778 return PCAP_ERROR_IFACE_NOT_UP; 4779 } else if (err > 0) { 4780 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4781 err, "bind"); 4782 return PCAP_ERROR; 4783 } 4784 4785 return 0; 4786 } 4787 4788 /* 4789 * Try to enter monitor mode. 4790 * If we have libnl, try to create a new monitor-mode device and 4791 * capture on that; otherwise, just say "not supported". 4792 */ 4793 #ifdef HAVE_LIBNL 4794 static int 4795 enter_rfmon_mode(pcap_t *handle, int sock_fd, const char *device) 4796 { 4797 struct pcap_linux *handlep = handle->priv; 4798 int ret; 4799 char phydev_path[PATH_MAX+1]; 4800 struct nl80211_state nlstate; 4801 struct ifreq ifr; 4802 u_int n; 4803 4804 /* 4805 * Is this a mac80211 device? 4806 */ 4807 ret = get_mac80211_phydev(handle, device, phydev_path, PATH_MAX); 4808 if (ret < 0) 4809 return ret; /* error */ 4810 if (ret == 0) 4811 return 0; /* no error, but not mac80211 device */ 4812 4813 /* 4814 * XXX - is this already a monN device? 4815 * If so, we're done. 4816 */ 4817 4818 /* 4819 * OK, it's apparently a mac80211 device. 4820 * Try to find an unused monN device for it. 4821 */ 4822 ret = nl80211_init(handle, &nlstate, device); 4823 if (ret != 0) 4824 return ret; 4825 for (n = 0; n < UINT_MAX; n++) { 4826 /* 4827 * Try mon{n}. 4828 */ 4829 char mondevice[3+10+1]; /* mon{UINT_MAX}\0 */ 4830 4831 snprintf(mondevice, sizeof mondevice, "mon%u", n); 4832 ret = add_mon_if(handle, sock_fd, &nlstate, device, mondevice); 4833 if (ret == 1) { 4834 /* 4835 * Success. We don't clean up the libnl state 4836 * yet, as we'll be using it later. 4837 */ 4838 goto added; 4839 } 4840 if (ret < 0) { 4841 /* 4842 * Hard failure. Just return ret; handle->errbuf 4843 * has already been set. 4844 */ 4845 nl80211_cleanup(&nlstate); 4846 return ret; 4847 } 4848 } 4849 4850 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 4851 "%s: No free monN interfaces", device); 4852 nl80211_cleanup(&nlstate); 4853 return PCAP_ERROR; 4854 4855 added: 4856 4857 #if 0 4858 /* 4859 * Sleep for .1 seconds. 4860 */ 4861 delay.tv_sec = 0; 4862 delay.tv_nsec = 500000000; 4863 nanosleep(&delay, NULL); 4864 #endif 4865 4866 /* 4867 * If we haven't already done so, arrange to have 4868 * "pcap_close_all()" called when we exit. 4869 */ 4870 if (!pcapint_do_addexit(handle)) { 4871 /* 4872 * "atexit()" failed; don't put the interface 4873 * in rfmon mode, just give up. 4874 */ 4875 del_mon_if(handle, sock_fd, &nlstate, device, 4876 handlep->mondevice); 4877 nl80211_cleanup(&nlstate); 4878 return PCAP_ERROR; 4879 } 4880 4881 /* 4882 * Now configure the monitor interface up. 4883 */ 4884 memset(&ifr, 0, sizeof(ifr)); 4885 pcapint_strlcpy(ifr.ifr_name, handlep->mondevice, sizeof(ifr.ifr_name)); 4886 if (ioctl(sock_fd, SIOCGIFFLAGS, &ifr) == -1) { 4887 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 4888 errno, "%s: Can't get flags for %s", device, 4889 handlep->mondevice); 4890 del_mon_if(handle, sock_fd, &nlstate, device, 4891 handlep->mondevice); 4892 nl80211_cleanup(&nlstate); 4893 return PCAP_ERROR; 4894 } 4895 ifr.ifr_flags |= IFF_UP|IFF_RUNNING; 4896 if (ioctl(sock_fd, SIOCSIFFLAGS, &ifr) == -1) { 4897 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 4898 errno, "%s: Can't set flags for %s", device, 4899 handlep->mondevice); 4900 del_mon_if(handle, sock_fd, &nlstate, device, 4901 handlep->mondevice); 4902 nl80211_cleanup(&nlstate); 4903 return PCAP_ERROR; 4904 } 4905 4906 /* 4907 * Success. Clean up the libnl state. 4908 */ 4909 nl80211_cleanup(&nlstate); 4910 4911 /* 4912 * Note that we have to delete the monitor device when we close 4913 * the handle. 4914 */ 4915 handlep->must_do_on_close |= MUST_DELETE_MONIF; 4916 4917 /* 4918 * Add this to the list of pcaps to close when we exit. 4919 */ 4920 pcapint_add_to_pcaps_to_close(handle); 4921 4922 return 1; 4923 } 4924 #else /* HAVE_LIBNL */ 4925 static int 4926 enter_rfmon_mode(pcap_t *handle _U_, int sock_fd _U_, const char *device _U_) 4927 { 4928 /* 4929 * We don't have libnl, so we can't do monitor mode. 4930 */ 4931 return 0; 4932 } 4933 #endif /* HAVE_LIBNL */ 4934 4935 #if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) 4936 /* 4937 * Map SOF_TIMESTAMPING_ values to PCAP_TSTAMP_ values. 4938 */ 4939 static const struct { 4940 int soft_timestamping_val; 4941 int pcap_tstamp_val; 4942 } sof_ts_type_map[3] = { 4943 { SOF_TIMESTAMPING_SOFTWARE, PCAP_TSTAMP_HOST }, 4944 { SOF_TIMESTAMPING_SYS_HARDWARE, PCAP_TSTAMP_ADAPTER }, 4945 { SOF_TIMESTAMPING_RAW_HARDWARE, PCAP_TSTAMP_ADAPTER_UNSYNCED } 4946 }; 4947 #define NUM_SOF_TIMESTAMPING_TYPES (sizeof sof_ts_type_map / sizeof sof_ts_type_map[0]) 4948 4949 /* 4950 * Set the list of time stamping types to include all types. 4951 */ 4952 static int 4953 iface_set_all_ts_types(pcap_t *handle, char *ebuf) 4954 { 4955 u_int i; 4956 4957 handle->tstamp_type_list = malloc(NUM_SOF_TIMESTAMPING_TYPES * sizeof(u_int)); 4958 if (handle->tstamp_type_list == NULL) { 4959 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4960 errno, "malloc"); 4961 return -1; 4962 } 4963 for (i = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) 4964 handle->tstamp_type_list[i] = sof_ts_type_map[i].pcap_tstamp_val; 4965 handle->tstamp_type_count = NUM_SOF_TIMESTAMPING_TYPES; 4966 return 0; 4967 } 4968 4969 /* 4970 * Get a list of time stamp types. 4971 */ 4972 #ifdef ETHTOOL_GET_TS_INFO 4973 static int 4974 iface_get_ts_types(const char *device, pcap_t *handle, char *ebuf) 4975 { 4976 int fd; 4977 struct ifreq ifr; 4978 struct ethtool_ts_info info; 4979 int num_ts_types; 4980 u_int i, j; 4981 4982 /* 4983 * This doesn't apply to the "any" device; you can't say "turn on 4984 * hardware time stamping for all devices that exist now and arrange 4985 * that it be turned on for any device that appears in the future", 4986 * and not all devices even necessarily *support* hardware time 4987 * stamping, so don't report any time stamp types. 4988 */ 4989 if (strcmp(device, "any") == 0) { 4990 handle->tstamp_type_list = NULL; 4991 return 0; 4992 } 4993 4994 /* 4995 * Create a socket from which to fetch time stamping capabilities. 4996 */ 4997 fd = get_if_ioctl_socket(); 4998 if (fd < 0) { 4999 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5000 errno, "socket for SIOCETHTOOL(ETHTOOL_GET_TS_INFO)"); 5001 return -1; 5002 } 5003 5004 memset(&ifr, 0, sizeof(ifr)); 5005 pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 5006 memset(&info, 0, sizeof(info)); 5007 info.cmd = ETHTOOL_GET_TS_INFO; 5008 ifr.ifr_data = (caddr_t)&info; 5009 if (ioctl(fd, SIOCETHTOOL, &ifr) == -1) { 5010 int save_errno = errno; 5011 5012 close(fd); 5013 switch (save_errno) { 5014 5015 case EOPNOTSUPP: 5016 case EINVAL: 5017 /* 5018 * OK, this OS version or driver doesn't support 5019 * asking for the time stamping types, so let's 5020 * just return all the possible types. 5021 */ 5022 if (iface_set_all_ts_types(handle, ebuf) == -1) 5023 return -1; 5024 return 0; 5025 5026 case ENODEV: 5027 /* 5028 * OK, no such device. 5029 * The user will find that out when they try to 5030 * activate the device; just return an empty 5031 * list of time stamp types. 5032 */ 5033 handle->tstamp_type_list = NULL; 5034 return 0; 5035 5036 default: 5037 /* 5038 * Other error. 5039 */ 5040 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5041 save_errno, 5042 "%s: SIOCETHTOOL(ETHTOOL_GET_TS_INFO) ioctl failed", 5043 device); 5044 return -1; 5045 } 5046 } 5047 close(fd); 5048 5049 /* 5050 * Do we support hardware time stamping of *all* packets? 5051 */ 5052 if (!(info.rx_filters & (1 << HWTSTAMP_FILTER_ALL))) { 5053 /* 5054 * No, so don't report any time stamp types. 5055 * 5056 * XXX - some devices either don't report 5057 * HWTSTAMP_FILTER_ALL when they do support it, or 5058 * report HWTSTAMP_FILTER_ALL but map it to only 5059 * time stamping a few PTP packets. See 5060 * http://marc.info/?l=linux-netdev&m=146318183529571&w=2 5061 * 5062 * Maybe that got fixed later. 5063 */ 5064 handle->tstamp_type_list = NULL; 5065 return 0; 5066 } 5067 5068 num_ts_types = 0; 5069 for (i = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) { 5070 if (info.so_timestamping & sof_ts_type_map[i].soft_timestamping_val) 5071 num_ts_types++; 5072 } 5073 if (num_ts_types != 0) { 5074 handle->tstamp_type_list = malloc(num_ts_types * sizeof(u_int)); 5075 if (handle->tstamp_type_list == NULL) { 5076 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5077 errno, "malloc"); 5078 return -1; 5079 } 5080 for (i = 0, j = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) { 5081 if (info.so_timestamping & sof_ts_type_map[i].soft_timestamping_val) { 5082 handle->tstamp_type_list[j] = sof_ts_type_map[i].pcap_tstamp_val; 5083 j++; 5084 } 5085 } 5086 handle->tstamp_type_count = num_ts_types; 5087 } else 5088 handle->tstamp_type_list = NULL; 5089 5090 return 0; 5091 } 5092 #else /* ETHTOOL_GET_TS_INFO */ 5093 static int 5094 iface_get_ts_types(const char *device, pcap_t *handle, char *ebuf) 5095 { 5096 /* 5097 * This doesn't apply to the "any" device; you can't say "turn on 5098 * hardware time stamping for all devices that exist now and arrange 5099 * that it be turned on for any device that appears in the future", 5100 * and not all devices even necessarily *support* hardware time 5101 * stamping, so don't report any time stamp types. 5102 */ 5103 if (strcmp(device, "any") == 0) { 5104 handle->tstamp_type_list = NULL; 5105 return 0; 5106 } 5107 5108 /* 5109 * We don't have an ioctl to use to ask what's supported, 5110 * so say we support everything. 5111 */ 5112 if (iface_set_all_ts_types(handle, ebuf) == -1) 5113 return -1; 5114 return 0; 5115 } 5116 #endif /* ETHTOOL_GET_TS_INFO */ 5117 #else /* defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) */ 5118 static int 5119 iface_get_ts_types(const char *device _U_, pcap_t *p _U_, char *ebuf _U_) 5120 { 5121 /* 5122 * Nothing to fetch, so it always "succeeds". 5123 */ 5124 return 0; 5125 } 5126 #endif /* defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) */ 5127 5128 /* 5129 * Find out if we have any form of fragmentation/reassembly offloading. 5130 * 5131 * We do so using SIOCETHTOOL checking for various types of offloading; 5132 * if SIOCETHTOOL isn't defined, or we don't have any #defines for any 5133 * of the types of offloading, there's nothing we can do to check, so 5134 * we just say "no, we don't". 5135 * 5136 * We treat EOPNOTSUPP, EINVAL and, if eperm_ok is true, EPERM as 5137 * indications that the operation isn't supported. We do EPERM 5138 * weirdly because the SIOCETHTOOL code in later kernels 1) doesn't 5139 * support ETHTOOL_GUFO, 2) also doesn't include it in the list 5140 * of ethtool operations that don't require CAP_NET_ADMIN privileges, 5141 * and 3) does the "is this permitted" check before doing the "is 5142 * this even supported" check, so it fails with "this is not permitted" 5143 * rather than "this is not even supported". To work around this 5144 * annoyance, we only treat EPERM as an error for the first feature, 5145 * and assume that they all do the same permission checks, so if the 5146 * first one is allowed all the others are allowed if supported. 5147 */ 5148 #if defined(SIOCETHTOOL) && (defined(ETHTOOL_GTSO) || defined(ETHTOOL_GUFO) || defined(ETHTOOL_GGSO) || defined(ETHTOOL_GFLAGS) || defined(ETHTOOL_GGRO)) 5149 static int 5150 iface_ethtool_flag_ioctl(pcap_t *handle, int cmd, const char *cmdname, 5151 int eperm_ok) 5152 { 5153 struct ifreq ifr; 5154 struct ethtool_value eval; 5155 5156 memset(&ifr, 0, sizeof(ifr)); 5157 pcapint_strlcpy(ifr.ifr_name, handle->opt.device, sizeof(ifr.ifr_name)); 5158 eval.cmd = cmd; 5159 eval.data = 0; 5160 ifr.ifr_data = (caddr_t)&eval; 5161 if (ioctl(handle->fd, SIOCETHTOOL, &ifr) == -1) { 5162 if (errno == EOPNOTSUPP || errno == EINVAL || 5163 (errno == EPERM && eperm_ok)) { 5164 /* 5165 * OK, let's just return 0, which, in our 5166 * case, either means "no, what we're asking 5167 * about is not enabled" or "all the flags 5168 * are clear (i.e., nothing is enabled)". 5169 */ 5170 return 0; 5171 } 5172 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5173 errno, "%s: SIOCETHTOOL(%s) ioctl failed", 5174 handle->opt.device, cmdname); 5175 return -1; 5176 } 5177 return eval.data; 5178 } 5179 5180 /* 5181 * XXX - it's annoying that we have to check for offloading at all, but, 5182 * given that we have to, it's still annoying that we have to check for 5183 * particular types of offloading, especially that shiny new types of 5184 * offloading may be added - and, worse, may not be checkable with 5185 * a particular ETHTOOL_ operation; ETHTOOL_GFEATURES would, in 5186 * theory, give those to you, but the actual flags being used are 5187 * opaque (defined in a non-uapi header), and there doesn't seem to 5188 * be any obvious way to ask the kernel what all the offloading flags 5189 * are - at best, you can ask for a set of strings(!) to get *names* 5190 * for various flags. (That whole mechanism appears to have been 5191 * designed for the sole purpose of letting ethtool report flags 5192 * by name and set flags by name, with the names having no semantics 5193 * ethtool understands.) 5194 */ 5195 static int 5196 iface_get_offload(pcap_t *handle) 5197 { 5198 int ret; 5199 5200 #ifdef ETHTOOL_GTSO 5201 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GTSO, "ETHTOOL_GTSO", 0); 5202 if (ret == -1) 5203 return -1; 5204 if (ret) 5205 return 1; /* TCP segmentation offloading on */ 5206 #endif 5207 5208 #ifdef ETHTOOL_GGSO 5209 /* 5210 * XXX - will this cause large unsegmented packets to be 5211 * handed to PF_PACKET sockets on transmission? If not, 5212 * this need not be checked. 5213 */ 5214 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GGSO, "ETHTOOL_GGSO", 0); 5215 if (ret == -1) 5216 return -1; 5217 if (ret) 5218 return 1; /* generic segmentation offloading on */ 5219 #endif 5220 5221 #ifdef ETHTOOL_GFLAGS 5222 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GFLAGS, "ETHTOOL_GFLAGS", 0); 5223 if (ret == -1) 5224 return -1; 5225 if (ret & ETH_FLAG_LRO) 5226 return 1; /* large receive offloading on */ 5227 #endif 5228 5229 #ifdef ETHTOOL_GGRO 5230 /* 5231 * XXX - will this cause large reassembled packets to be 5232 * handed to PF_PACKET sockets on receipt? If not, 5233 * this need not be checked. 5234 */ 5235 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GGRO, "ETHTOOL_GGRO", 0); 5236 if (ret == -1) 5237 return -1; 5238 if (ret) 5239 return 1; /* generic (large) receive offloading on */ 5240 #endif 5241 5242 #ifdef ETHTOOL_GUFO 5243 /* 5244 * Do this one last, as support for it was removed in later 5245 * kernels, and it fails with EPERM on those kernels rather 5246 * than with EOPNOTSUPP (see explanation in comment for 5247 * iface_ethtool_flag_ioctl()). 5248 */ 5249 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GUFO, "ETHTOOL_GUFO", 1); 5250 if (ret == -1) 5251 return -1; 5252 if (ret) 5253 return 1; /* UDP fragmentation offloading on */ 5254 #endif 5255 5256 return 0; 5257 } 5258 #else /* SIOCETHTOOL */ 5259 static int 5260 iface_get_offload(pcap_t *handle _U_) 5261 { 5262 /* 5263 * XXX - do we need to get this information if we don't 5264 * have the ethtool ioctls? If so, how do we do that? 5265 */ 5266 return 0; 5267 } 5268 #endif /* SIOCETHTOOL */ 5269 5270 static struct dsa_proto { 5271 const char *name; 5272 bpf_u_int32 linktype; 5273 } dsa_protos[] = { 5274 /* 5275 * None is special and indicates that the interface does not have 5276 * any tagging protocol configured, and is therefore a standard 5277 * Ethernet interface. 5278 */ 5279 { "none", DLT_EN10MB }, 5280 { "brcm", DLT_DSA_TAG_BRCM }, 5281 { "brcm-prepend", DLT_DSA_TAG_BRCM_PREPEND }, 5282 { "dsa", DLT_DSA_TAG_DSA }, 5283 { "edsa", DLT_DSA_TAG_EDSA }, 5284 }; 5285 5286 static int 5287 iface_dsa_get_proto_info(const char *device, pcap_t *handle) 5288 { 5289 char *pathstr; 5290 unsigned int i; 5291 /* 5292 * Make this significantly smaller than PCAP_ERRBUF_SIZE; 5293 * the tag *shouldn't* have some huge long name, and making 5294 * it smaller keeps newer versions of GCC from whining that 5295 * the error message if we don't support the tag could 5296 * overflow the error message buffer. 5297 */ 5298 char buf[128]; 5299 ssize_t r; 5300 int fd; 5301 5302 fd = asprintf(&pathstr, "/sys/class/net/%s/dsa/tagging", device); 5303 if (fd < 0) { 5304 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5305 fd, "asprintf"); 5306 return PCAP_ERROR; 5307 } 5308 5309 fd = open(pathstr, O_RDONLY); 5310 free(pathstr); 5311 /* 5312 * This is not fatal, kernel >= 4.20 *might* expose this attribute 5313 */ 5314 if (fd < 0) 5315 return 0; 5316 5317 r = read(fd, buf, sizeof(buf) - 1); 5318 if (r <= 0) { 5319 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5320 errno, "read"); 5321 close(fd); 5322 return PCAP_ERROR; 5323 } 5324 close(fd); 5325 5326 /* 5327 * Buffer should be LF terminated. 5328 */ 5329 if (buf[r - 1] == '\n') 5330 r--; 5331 buf[r] = '\0'; 5332 5333 for (i = 0; i < sizeof(dsa_protos) / sizeof(dsa_protos[0]); i++) { 5334 if (strlen(dsa_protos[i].name) == (size_t)r && 5335 strcmp(buf, dsa_protos[i].name) == 0) { 5336 handle->linktype = dsa_protos[i].linktype; 5337 switch (dsa_protos[i].linktype) { 5338 case DLT_EN10MB: 5339 return 0; 5340 default: 5341 return 1; 5342 } 5343 } 5344 } 5345 5346 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 5347 "unsupported DSA tag: %s", buf); 5348 5349 return PCAP_ERROR; 5350 } 5351 5352 /* 5353 * Query the kernel for the MTU of the given interface. 5354 */ 5355 static int 5356 iface_get_mtu(int fd, const char *device, char *ebuf) 5357 { 5358 struct ifreq ifr; 5359 5360 if (!device) 5361 return BIGGER_THAN_ALL_MTUS; 5362 5363 memset(&ifr, 0, sizeof(ifr)); 5364 pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 5365 5366 if (ioctl(fd, SIOCGIFMTU, &ifr) == -1) { 5367 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5368 errno, "SIOCGIFMTU"); 5369 return -1; 5370 } 5371 5372 return ifr.ifr_mtu; 5373 } 5374 5375 /* 5376 * Get the hardware type of the given interface as ARPHRD_xxx constant. 5377 */ 5378 static int 5379 iface_get_arptype(int fd, const char *device, char *ebuf) 5380 { 5381 struct ifreq ifr; 5382 int ret; 5383 5384 memset(&ifr, 0, sizeof(ifr)); 5385 pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 5386 5387 if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) { 5388 if (errno == ENODEV) { 5389 /* 5390 * No such device. 5391 * 5392 * There's nothing more to say, so clear 5393 * the error message. 5394 */ 5395 ret = PCAP_ERROR_NO_SUCH_DEVICE; 5396 ebuf[0] = '\0'; 5397 } else { 5398 ret = PCAP_ERROR; 5399 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5400 errno, "SIOCGIFHWADDR"); 5401 } 5402 return ret; 5403 } 5404 5405 return ifr.ifr_hwaddr.sa_family; 5406 } 5407 5408 static int 5409 fix_program(pcap_t *handle, struct sock_fprog *fcode) 5410 { 5411 struct pcap_linux *handlep = handle->priv; 5412 size_t prog_size; 5413 register int i; 5414 register struct bpf_insn *p; 5415 struct bpf_insn *f; 5416 int len; 5417 5418 /* 5419 * Make a copy of the filter, and modify that copy if 5420 * necessary. 5421 */ 5422 prog_size = sizeof(*handle->fcode.bf_insns) * handle->fcode.bf_len; 5423 len = handle->fcode.bf_len; 5424 f = (struct bpf_insn *)malloc(prog_size); 5425 if (f == NULL) { 5426 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5427 errno, "malloc"); 5428 return -1; 5429 } 5430 memcpy(f, handle->fcode.bf_insns, prog_size); 5431 fcode->len = len; 5432 fcode->filter = (struct sock_filter *) f; 5433 5434 for (i = 0; i < len; ++i) { 5435 p = &f[i]; 5436 /* 5437 * What type of instruction is this? 5438 */ 5439 switch (BPF_CLASS(p->code)) { 5440 5441 case BPF_LD: 5442 case BPF_LDX: 5443 /* 5444 * It's a load instruction; is it loading 5445 * from the packet? 5446 */ 5447 switch (BPF_MODE(p->code)) { 5448 5449 case BPF_ABS: 5450 case BPF_IND: 5451 case BPF_MSH: 5452 /* 5453 * Yes; are we in cooked mode? 5454 */ 5455 if (handlep->cooked) { 5456 /* 5457 * Yes, so we need to fix this 5458 * instruction. 5459 */ 5460 if (fix_offset(handle, p) < 0) { 5461 /* 5462 * We failed to do so. 5463 * Return 0, so our caller 5464 * knows to punt to userland. 5465 */ 5466 return 0; 5467 } 5468 } 5469 break; 5470 } 5471 break; 5472 } 5473 } 5474 return 1; /* we succeeded */ 5475 } 5476 5477 static int 5478 fix_offset(pcap_t *handle, struct bpf_insn *p) 5479 { 5480 /* 5481 * Existing references to auxiliary data shouldn't be adjusted. 5482 * 5483 * Note that SKF_AD_OFF is negative, but p->k is unsigned, so 5484 * we use >= and cast SKF_AD_OFF to unsigned. 5485 */ 5486 if (p->k >= (bpf_u_int32)SKF_AD_OFF) 5487 return 0; 5488 if (handle->linktype == DLT_LINUX_SLL2) { 5489 /* 5490 * What's the offset? 5491 */ 5492 if (p->k >= SLL2_HDR_LEN) { 5493 /* 5494 * It's within the link-layer payload; that starts 5495 * at an offset of 0, as far as the kernel packet 5496 * filter is concerned, so subtract the length of 5497 * the link-layer header. 5498 */ 5499 p->k -= SLL2_HDR_LEN; 5500 } else if (p->k == 0) { 5501 /* 5502 * It's the protocol field; map it to the 5503 * special magic kernel offset for that field. 5504 */ 5505 p->k = SKF_AD_OFF + SKF_AD_PROTOCOL; 5506 } else if (p->k == 4) { 5507 /* 5508 * It's the ifindex field; map it to the 5509 * special magic kernel offset for that field. 5510 */ 5511 p->k = SKF_AD_OFF + SKF_AD_IFINDEX; 5512 } else if (p->k == 10) { 5513 /* 5514 * It's the packet type field; map it to the 5515 * special magic kernel offset for that field. 5516 */ 5517 p->k = SKF_AD_OFF + SKF_AD_PKTTYPE; 5518 } else if ((bpf_int32)(p->k) > 0) { 5519 /* 5520 * It's within the header, but it's not one of 5521 * those fields; we can't do that in the kernel, 5522 * so punt to userland. 5523 */ 5524 return -1; 5525 } 5526 } else { 5527 /* 5528 * What's the offset? 5529 */ 5530 if (p->k >= SLL_HDR_LEN) { 5531 /* 5532 * It's within the link-layer payload; that starts 5533 * at an offset of 0, as far as the kernel packet 5534 * filter is concerned, so subtract the length of 5535 * the link-layer header. 5536 */ 5537 p->k -= SLL_HDR_LEN; 5538 } else if (p->k == 0) { 5539 /* 5540 * It's the packet type field; map it to the 5541 * special magic kernel offset for that field. 5542 */ 5543 p->k = SKF_AD_OFF + SKF_AD_PKTTYPE; 5544 } else if (p->k == 14) { 5545 /* 5546 * It's the protocol field; map it to the 5547 * special magic kernel offset for that field. 5548 */ 5549 p->k = SKF_AD_OFF + SKF_AD_PROTOCOL; 5550 } else if ((bpf_int32)(p->k) > 0) { 5551 /* 5552 * It's within the header, but it's not one of 5553 * those fields; we can't do that in the kernel, 5554 * so punt to userland. 5555 */ 5556 return -1; 5557 } 5558 } 5559 return 0; 5560 } 5561 5562 static int 5563 set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode) 5564 { 5565 int total_filter_on = 0; 5566 int save_mode; 5567 int ret; 5568 int save_errno; 5569 5570 /* 5571 * The socket filter code doesn't discard all packets queued 5572 * up on the socket when the filter is changed; this means 5573 * that packets that don't match the new filter may show up 5574 * after the new filter is put onto the socket, if those 5575 * packets haven't yet been read. 5576 * 5577 * This means, for example, that if you do a tcpdump capture 5578 * with a filter, the first few packets in the capture might 5579 * be packets that wouldn't have passed the filter. 5580 * 5581 * We therefore discard all packets queued up on the socket 5582 * when setting a kernel filter. (This isn't an issue for 5583 * userland filters, as the userland filtering is done after 5584 * packets are queued up.) 5585 * 5586 * To flush those packets, we put the socket in read-only mode, 5587 * and read packets from the socket until there are no more to 5588 * read. 5589 * 5590 * In order to keep that from being an infinite loop - i.e., 5591 * to keep more packets from arriving while we're draining 5592 * the queue - we put the "total filter", which is a filter 5593 * that rejects all packets, onto the socket before draining 5594 * the queue. 5595 * 5596 * This code deliberately ignores any errors, so that you may 5597 * get bogus packets if an error occurs, rather than having 5598 * the filtering done in userland even if it could have been 5599 * done in the kernel. 5600 */ 5601 if (setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER, 5602 &total_fcode, sizeof(total_fcode)) == 0) { 5603 char drain[1]; 5604 5605 /* 5606 * Note that we've put the total filter onto the socket. 5607 */ 5608 total_filter_on = 1; 5609 5610 /* 5611 * Save the socket's current mode, and put it in 5612 * non-blocking mode; we drain it by reading packets 5613 * until we get an error (which is normally a 5614 * "nothing more to be read" error). 5615 */ 5616 save_mode = fcntl(handle->fd, F_GETFL, 0); 5617 if (save_mode == -1) { 5618 pcapint_fmt_errmsg_for_errno(handle->errbuf, 5619 PCAP_ERRBUF_SIZE, errno, 5620 "can't get FD flags when changing filter"); 5621 return -2; 5622 } 5623 if (fcntl(handle->fd, F_SETFL, save_mode | O_NONBLOCK) < 0) { 5624 pcapint_fmt_errmsg_for_errno(handle->errbuf, 5625 PCAP_ERRBUF_SIZE, errno, 5626 "can't set nonblocking mode when changing filter"); 5627 return -2; 5628 } 5629 while (recv(handle->fd, &drain, sizeof drain, MSG_TRUNC) >= 0) 5630 ; 5631 save_errno = errno; 5632 if (save_errno != EAGAIN) { 5633 /* 5634 * Fatal error. 5635 * 5636 * If we can't restore the mode or reset the 5637 * kernel filter, there's nothing we can do. 5638 */ 5639 (void)fcntl(handle->fd, F_SETFL, save_mode); 5640 (void)reset_kernel_filter(handle); 5641 pcapint_fmt_errmsg_for_errno(handle->errbuf, 5642 PCAP_ERRBUF_SIZE, save_errno, 5643 "recv failed when changing filter"); 5644 return -2; 5645 } 5646 if (fcntl(handle->fd, F_SETFL, save_mode) == -1) { 5647 pcapint_fmt_errmsg_for_errno(handle->errbuf, 5648 PCAP_ERRBUF_SIZE, errno, 5649 "can't restore FD flags when changing filter"); 5650 return -2; 5651 } 5652 } 5653 5654 /* 5655 * Now attach the new filter. 5656 */ 5657 ret = setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER, 5658 fcode, sizeof(*fcode)); 5659 if (ret == -1 && total_filter_on) { 5660 /* 5661 * Well, we couldn't set that filter on the socket, 5662 * but we could set the total filter on the socket. 5663 * 5664 * This could, for example, mean that the filter was 5665 * too big to put into the kernel, so we'll have to 5666 * filter in userland; in any case, we'll be doing 5667 * filtering in userland, so we need to remove the 5668 * total filter so we see packets. 5669 */ 5670 save_errno = errno; 5671 5672 /* 5673 * If this fails, we're really screwed; we have the 5674 * total filter on the socket, and it won't come off. 5675 * Report it as a fatal error. 5676 */ 5677 if (reset_kernel_filter(handle) == -1) { 5678 pcapint_fmt_errmsg_for_errno(handle->errbuf, 5679 PCAP_ERRBUF_SIZE, errno, 5680 "can't remove kernel total filter"); 5681 return -2; /* fatal error */ 5682 } 5683 5684 errno = save_errno; 5685 } 5686 return ret; 5687 } 5688 5689 static int 5690 reset_kernel_filter(pcap_t *handle) 5691 { 5692 int ret; 5693 /* 5694 * setsockopt() barfs unless it get a dummy parameter. 5695 * valgrind whines unless the value is initialized, 5696 * as it has no idea that setsockopt() ignores its 5697 * parameter. 5698 */ 5699 int dummy = 0; 5700 5701 ret = setsockopt(handle->fd, SOL_SOCKET, SO_DETACH_FILTER, 5702 &dummy, sizeof(dummy)); 5703 /* 5704 * Ignore ENOENT - it means "we don't have a filter", so there 5705 * was no filter to remove, and there's still no filter. 5706 * 5707 * Also ignore ENONET, as a lot of kernel versions had a 5708 * typo where ENONET, rather than ENOENT, was returned. 5709 */ 5710 if (ret == -1 && errno != ENOENT && errno != ENONET) 5711 return -1; 5712 return 0; 5713 } 5714 5715 int 5716 pcap_set_protocol_linux(pcap_t *p, int protocol) 5717 { 5718 if (pcapint_check_activated(p)) 5719 return (PCAP_ERROR_ACTIVATED); 5720 p->opt.protocol = protocol; 5721 return (0); 5722 } 5723 5724 /* 5725 * Libpcap version string. 5726 */ 5727 const char * 5728 pcap_lib_version(void) 5729 { 5730 #if defined(HAVE_TPACKET3) 5731 return (PCAP_VERSION_STRING " (with TPACKET_V3)"); 5732 #else 5733 return (PCAP_VERSION_STRING " (with TPACKET_V2)"); 5734 #endif 5735 } 5736