1 /* 2 * pcap-linux.c: Packet capture interface to the Linux kernel 3 * 4 * Copyright (c) 2000 Torsten Landschoff <torsten@debian.org> 5 * Sebastian Krahmer <krahmer@cs.uni-potsdam.de> 6 * 7 * License: BSD 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior 21 * written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 26 * 27 * Modifications: Added PACKET_MMAP support 28 * Paolo Abeni <paolo.abeni@email.it> 29 * Added TPACKET_V3 support 30 * Gabor Tatarka <gabor.tatarka@ericsson.com> 31 * 32 * based on previous works of: 33 * Simon Patarin <patarin@cs.unibo.it> 34 * Phil Wood <cpw@lanl.gov> 35 * 36 * Monitor-mode support for mac80211 includes code taken from the iw 37 * command; the copyright notice for that code is 38 * 39 * Copyright (c) 2007, 2008 Johannes Berg 40 * Copyright (c) 2007 Andy Lutomirski 41 * Copyright (c) 2007 Mike Kershaw 42 * Copyright (c) 2008 Gábor Stefanik 43 * 44 * All rights reserved. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. The name of the author may not be used to endorse or promote products 55 * derived from this software without specific prior written permission. 56 * 57 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 58 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 59 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 60 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 61 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 62 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 63 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 64 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 65 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 67 * SUCH DAMAGE. 68 */ 69 70 71 #define _GNU_SOURCE 72 73 #ifdef HAVE_CONFIG_H 74 #include <config.h> 75 #endif 76 77 #include <errno.h> 78 #include <stdio.h> 79 #include <stdlib.h> 80 #include <unistd.h> 81 #include <fcntl.h> 82 #include <string.h> 83 #include <limits.h> 84 #include <sys/stat.h> 85 #include <sys/socket.h> 86 #include <sys/ioctl.h> 87 #include <sys/utsname.h> 88 #include <sys/mman.h> 89 #include <linux/if.h> 90 #include <linux/if_packet.h> 91 #include <linux/sockios.h> 92 #include <linux/ethtool.h> 93 #include <netinet/in.h> 94 #include <linux/if_ether.h> 95 #include <linux/if_arp.h> 96 #include <poll.h> 97 #include <dirent.h> 98 #include <sys/eventfd.h> 99 100 #include "pcap-int.h" 101 #include "pcap/sll.h" 102 #include "pcap/vlan.h" 103 #include "pcap/can_socketcan.h" 104 105 #include "diag-control.h" 106 107 /* 108 * We require TPACKET_V2 support. 109 */ 110 #ifndef TPACKET2_HDRLEN 111 #error "Libpcap will only work if TPACKET_V2 is supported; you must build for a 2.6.27 or later kernel" 112 #endif 113 114 /* check for memory mapped access avaibility. We assume every needed 115 * struct is defined if the macro TPACKET_HDRLEN is defined, because it 116 * uses many ring related structs and macros */ 117 #ifdef TPACKET3_HDRLEN 118 # define HAVE_TPACKET3 119 #endif /* TPACKET3_HDRLEN */ 120 121 /* 122 * Not all compilers that are used to compile code to run on Linux have 123 * these builtins. For example, older versions of GCC don't, and at 124 * least some people are doing cross-builds for MIPS with older versions 125 * of GCC. 126 */ 127 #ifndef HAVE___ATOMIC_LOAD_N 128 #define __atomic_load_n(ptr, memory_model) (*(ptr)) 129 #endif 130 #ifndef HAVE___ATOMIC_STORE_N 131 #define __atomic_store_n(ptr, val, memory_model) *(ptr) = (val) 132 #endif 133 134 #define packet_mmap_acquire(pkt) \ 135 (__atomic_load_n(&pkt->tp_status, __ATOMIC_ACQUIRE) != TP_STATUS_KERNEL) 136 #define packet_mmap_release(pkt) \ 137 (__atomic_store_n(&pkt->tp_status, TP_STATUS_KERNEL, __ATOMIC_RELEASE)) 138 #define packet_mmap_v3_acquire(pkt) \ 139 (__atomic_load_n(&pkt->hdr.bh1.block_status, __ATOMIC_ACQUIRE) != TP_STATUS_KERNEL) 140 #define packet_mmap_v3_release(pkt) \ 141 (__atomic_store_n(&pkt->hdr.bh1.block_status, TP_STATUS_KERNEL, __ATOMIC_RELEASE)) 142 143 #include <linux/types.h> 144 #include <linux/filter.h> 145 146 #ifdef HAVE_LINUX_NET_TSTAMP_H 147 #include <linux/net_tstamp.h> 148 #endif 149 150 /* 151 * For checking whether a device is a bonding device. 152 */ 153 #include <linux/if_bonding.h> 154 155 /* 156 * Got libnl? 157 */ 158 #ifdef HAVE_LIBNL 159 #include <linux/nl80211.h> 160 161 #include <netlink/genl/genl.h> 162 #include <netlink/genl/family.h> 163 #include <netlink/genl/ctrl.h> 164 #include <netlink/msg.h> 165 #include <netlink/attr.h> 166 #endif /* HAVE_LIBNL */ 167 168 #ifndef HAVE_SOCKLEN_T 169 typedef int socklen_t; 170 #endif 171 172 #define MAX_LINKHEADER_SIZE 256 173 174 /* 175 * When capturing on all interfaces we use this as the buffer size. 176 * Should be bigger then all MTUs that occur in real life. 177 * 64kB should be enough for now. 178 */ 179 #define BIGGER_THAN_ALL_MTUS (64*1024) 180 181 /* 182 * Private data for capturing on Linux PF_PACKET sockets. 183 */ 184 struct pcap_linux { 185 long long sysfs_dropped; /* packets reported dropped by /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors */ 186 struct pcap_stat stat; 187 188 char *device; /* device name */ 189 int filter_in_userland; /* must filter in userland */ 190 int blocks_to_filter_in_userland; 191 int must_do_on_close; /* stuff we must do when we close */ 192 int timeout; /* timeout for buffering */ 193 int cooked; /* using SOCK_DGRAM rather than SOCK_RAW */ 194 int ifindex; /* interface index of device we're bound to */ 195 int lo_ifindex; /* interface index of the loopback device */ 196 int netdown; /* we got an ENETDOWN and haven't resolved it */ 197 bpf_u_int32 oldmode; /* mode to restore when turning monitor mode off */ 198 char *mondevice; /* mac80211 monitor device we created */ 199 u_char *mmapbuf; /* memory-mapped region pointer */ 200 size_t mmapbuflen; /* size of region */ 201 int vlan_offset; /* offset at which to insert vlan tags; if -1, don't insert */ 202 u_int tp_version; /* version of tpacket_hdr for mmaped ring */ 203 u_int tp_hdrlen; /* hdrlen of tpacket_hdr for mmaped ring */ 204 u_char *oneshot_buffer; /* buffer for copy of packet */ 205 int poll_timeout; /* timeout to use in poll() */ 206 #ifdef HAVE_TPACKET3 207 unsigned char *current_packet; /* Current packet within the TPACKET_V3 block. Move to next block if NULL. */ 208 int packets_left; /* Unhandled packets left within the block from previous call to pcap_read_linux_mmap_v3 in case of TPACKET_V3. */ 209 #endif 210 int poll_breakloop_fd; /* fd to an eventfd to break from blocking operations */ 211 }; 212 213 /* 214 * Stuff to do when we close. 215 */ 216 #define MUST_CLEAR_RFMON 0x00000001 /* clear rfmon (monitor) mode */ 217 #define MUST_DELETE_MONIF 0x00000002 /* delete monitor-mode interface */ 218 219 /* 220 * Prototypes for internal functions and methods. 221 */ 222 static int get_if_flags(const char *, bpf_u_int32 *, char *); 223 static int is_wifi(const char *); 224 static void map_arphrd_to_dlt(pcap_t *, int, const char *, int); 225 static int pcap_activate_linux(pcap_t *); 226 static int setup_socket(pcap_t *, int); 227 static int setup_mmapped(pcap_t *, int *); 228 static int pcap_can_set_rfmon_linux(pcap_t *); 229 static int pcap_inject_linux(pcap_t *, const void *, int); 230 static int pcap_stats_linux(pcap_t *, struct pcap_stat *); 231 static int pcap_setfilter_linux(pcap_t *, struct bpf_program *); 232 static int pcap_setdirection_linux(pcap_t *, pcap_direction_t); 233 static int pcap_set_datalink_linux(pcap_t *, int); 234 static void pcap_cleanup_linux(pcap_t *); 235 236 union thdr { 237 struct tpacket2_hdr *h2; 238 #ifdef HAVE_TPACKET3 239 struct tpacket_block_desc *h3; 240 #endif 241 u_char *raw; 242 }; 243 244 #define RING_GET_FRAME_AT(h, offset) (((u_char **)h->buffer)[(offset)]) 245 #define RING_GET_CURRENT_FRAME(h) RING_GET_FRAME_AT(h, h->offset) 246 247 static void destroy_ring(pcap_t *handle); 248 static int create_ring(pcap_t *handle, int *status); 249 static int prepare_tpacket_socket(pcap_t *handle); 250 static int pcap_read_linux_mmap_v2(pcap_t *, int, pcap_handler , u_char *); 251 #ifdef HAVE_TPACKET3 252 static int pcap_read_linux_mmap_v3(pcap_t *, int, pcap_handler , u_char *); 253 #endif 254 static int pcap_setnonblock_linux(pcap_t *p, int nonblock); 255 static int pcap_getnonblock_linux(pcap_t *p); 256 static void pcap_oneshot_linux(u_char *user, const struct pcap_pkthdr *h, 257 const u_char *bytes); 258 259 /* 260 * In pre-3.0 kernels, the tp_vlan_tci field is set to whatever the 261 * vlan_tci field in the skbuff is. 0 can either mean "not on a VLAN" 262 * or "on VLAN 0". There is no flag set in the tp_status field to 263 * distinguish between them. 264 * 265 * In 3.0 and later kernels, if there's a VLAN tag present, the tp_vlan_tci 266 * field is set to the VLAN tag, and the TP_STATUS_VLAN_VALID flag is set 267 * in the tp_status field, otherwise the tp_vlan_tci field is set to 0 and 268 * the TP_STATUS_VLAN_VALID flag isn't set in the tp_status field. 269 * 270 * With a pre-3.0 kernel, we cannot distinguish between packets with no 271 * VLAN tag and packets on VLAN 0, so we will mishandle some packets, and 272 * there's nothing we can do about that. 273 * 274 * So, on those systems, which never set the TP_STATUS_VLAN_VALID flag, we 275 * continue the behavior of earlier libpcaps, wherein we treated packets 276 * with a VLAN tag of 0 as being packets without a VLAN tag rather than packets 277 * on VLAN 0. We do this by treating packets with a tp_vlan_tci of 0 and 278 * with the TP_STATUS_VLAN_VALID flag not set in tp_status as not having 279 * VLAN tags. This does the right thing on 3.0 and later kernels, and 280 * continues the old unfixably-imperfect behavior on pre-3.0 kernels. 281 * 282 * If TP_STATUS_VLAN_VALID isn't defined, we test it as the 0x10 bit; it 283 * has that value in 3.0 and later kernels. 284 */ 285 #ifdef TP_STATUS_VLAN_VALID 286 #define VLAN_VALID(hdr, hv) ((hv)->tp_vlan_tci != 0 || ((hdr)->tp_status & TP_STATUS_VLAN_VALID)) 287 #else 288 /* 289 * This is being compiled on a system that lacks TP_STATUS_VLAN_VALID, 290 * so we testwith the value it has in the 3.0 and later kernels, so 291 * we can test it if we're running on a system that has it. (If we're 292 * running on a system that doesn't have it, it won't be set in the 293 * tp_status field, so the tests of it will always fail; that means 294 * we behave the way we did before we introduced this macro.) 295 */ 296 #define VLAN_VALID(hdr, hv) ((hv)->tp_vlan_tci != 0 || ((hdr)->tp_status & 0x10)) 297 #endif 298 299 #ifdef TP_STATUS_VLAN_TPID_VALID 300 # define VLAN_TPID(hdr, hv) (((hv)->tp_vlan_tpid || ((hdr)->tp_status & TP_STATUS_VLAN_TPID_VALID)) ? (hv)->tp_vlan_tpid : ETH_P_8021Q) 301 #else 302 # define VLAN_TPID(hdr, hv) ETH_P_8021Q 303 #endif 304 305 /* 306 * Required select timeout if we're polling for an "interface disappeared" 307 * indication - 1 millisecond. 308 */ 309 static const struct timeval netdown_timeout = { 310 0, 1000 /* 1000 microseconds = 1 millisecond */ 311 }; 312 313 /* 314 * Wrap some ioctl calls 315 */ 316 static int iface_get_id(int fd, const char *device, char *ebuf); 317 static int iface_get_mtu(int fd, const char *device, char *ebuf); 318 static int iface_get_arptype(int fd, const char *device, char *ebuf); 319 static int iface_bind(int fd, int ifindex, char *ebuf, int protocol); 320 static int enter_rfmon_mode(pcap_t *handle, int sock_fd, 321 const char *device); 322 static int iface_get_ts_types(const char *device, pcap_t *handle, 323 char *ebuf); 324 static int iface_get_offload(pcap_t *handle); 325 326 static int fix_program(pcap_t *handle, struct sock_fprog *fcode); 327 static int fix_offset(pcap_t *handle, struct bpf_insn *p); 328 static int set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode); 329 static int reset_kernel_filter(pcap_t *handle); 330 331 static struct sock_filter total_insn 332 = BPF_STMT(BPF_RET | BPF_K, 0); 333 static struct sock_fprog total_fcode 334 = { 1, &total_insn }; 335 336 static int iface_dsa_get_proto_info(const char *device, pcap_t *handle); 337 338 pcap_t * 339 pcap_create_interface(const char *device, char *ebuf) 340 { 341 pcap_t *handle; 342 343 handle = PCAP_CREATE_COMMON(ebuf, struct pcap_linux); 344 if (handle == NULL) 345 return NULL; 346 347 handle->activate_op = pcap_activate_linux; 348 handle->can_set_rfmon_op = pcap_can_set_rfmon_linux; 349 350 /* 351 * See what time stamp types we support. 352 */ 353 if (iface_get_ts_types(device, handle, ebuf) == -1) { 354 pcap_close(handle); 355 return NULL; 356 } 357 358 /* 359 * We claim that we support microsecond and nanosecond time 360 * stamps. 361 * 362 * XXX - with adapter-supplied time stamps, can we choose 363 * microsecond or nanosecond time stamps on arbitrary 364 * adapters? 365 */ 366 handle->tstamp_precision_list = malloc(2 * sizeof(u_int)); 367 if (handle->tstamp_precision_list == NULL) { 368 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 369 errno, "malloc"); 370 pcap_close(handle); 371 return NULL; 372 } 373 handle->tstamp_precision_list[0] = PCAP_TSTAMP_PRECISION_MICRO; 374 handle->tstamp_precision_list[1] = PCAP_TSTAMP_PRECISION_NANO; 375 handle->tstamp_precision_count = 2; 376 377 struct pcap_linux *handlep = handle->priv; 378 handlep->poll_breakloop_fd = eventfd(0, EFD_NONBLOCK); 379 380 return handle; 381 } 382 383 #ifdef HAVE_LIBNL 384 /* 385 * If interface {if_name} is a mac80211 driver, the file 386 * /sys/class/net/{if_name}/phy80211 is a symlink to 387 * /sys/class/ieee80211/{phydev_name}, for some {phydev_name}. 388 * 389 * On Fedora 9, with a 2.6.26.3-29 kernel, my Zydas stick, at 390 * least, has a "wmaster0" device and a "wlan0" device; the 391 * latter is the one with the IP address. Both show up in 392 * "tcpdump -D" output. Capturing on the wmaster0 device 393 * captures with 802.11 headers. 394 * 395 * airmon-ng searches through /sys/class/net for devices named 396 * monN, starting with mon0; as soon as one *doesn't* exist, 397 * it chooses that as the monitor device name. If the "iw" 398 * command exists, it does 399 * 400 * iw dev {if_name} interface add {monif_name} type monitor 401 * 402 * where {monif_name} is the monitor device. It then (sigh) sleeps 403 * .1 second, and then configures the device up. Otherwise, if 404 * /sys/class/ieee80211/{phydev_name}/add_iface is a file, it writes 405 * {mondev_name}, without a newline, to that file, and again (sigh) 406 * sleeps .1 second, and then iwconfig's that device into monitor 407 * mode and configures it up. Otherwise, you can't do monitor mode. 408 * 409 * All these devices are "glued" together by having the 410 * /sys/class/net/{if_name}/phy80211 links pointing to the same 411 * place, so, given a wmaster, wlan, or mon device, you can 412 * find the other devices by looking for devices with 413 * the same phy80211 link. 414 * 415 * To turn monitor mode off, delete the monitor interface, 416 * either with 417 * 418 * iw dev {monif_name} interface del 419 * 420 * or by sending {monif_name}, with no NL, down 421 * /sys/class/ieee80211/{phydev_name}/remove_iface 422 * 423 * Note: if you try to create a monitor device named "monN", and 424 * there's already a "monN" device, it fails, as least with 425 * the netlink interface (which is what iw uses), with a return 426 * value of -ENFILE. (Return values are negative errnos.) We 427 * could probably use that to find an unused device. 428 * 429 * Yes, you can have multiple monitor devices for a given 430 * physical device. 431 */ 432 433 /* 434 * Is this a mac80211 device? If so, fill in the physical device path and 435 * return 1; if not, return 0. On an error, fill in handle->errbuf and 436 * return PCAP_ERROR. 437 */ 438 static int 439 get_mac80211_phydev(pcap_t *handle, const char *device, char *phydev_path, 440 size_t phydev_max_pathlen) 441 { 442 char *pathstr; 443 ssize_t bytes_read; 444 445 /* 446 * Generate the path string for the symlink to the physical device. 447 */ 448 if (asprintf(&pathstr, "/sys/class/net/%s/phy80211", device) == -1) { 449 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 450 "%s: Can't generate path name string for /sys/class/net device", 451 device); 452 return PCAP_ERROR; 453 } 454 bytes_read = readlink(pathstr, phydev_path, phydev_max_pathlen); 455 if (bytes_read == -1) { 456 if (errno == ENOENT || errno == EINVAL) { 457 /* 458 * Doesn't exist, or not a symlink; assume that 459 * means it's not a mac80211 device. 460 */ 461 free(pathstr); 462 return 0; 463 } 464 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 465 errno, "%s: Can't readlink %s", device, pathstr); 466 free(pathstr); 467 return PCAP_ERROR; 468 } 469 free(pathstr); 470 phydev_path[bytes_read] = '\0'; 471 return 1; 472 } 473 474 struct nl80211_state { 475 struct nl_sock *nl_sock; 476 struct nl_cache *nl_cache; 477 struct genl_family *nl80211; 478 }; 479 480 static int 481 nl80211_init(pcap_t *handle, struct nl80211_state *state, const char *device) 482 { 483 int err; 484 485 state->nl_sock = nl_socket_alloc(); 486 if (!state->nl_sock) { 487 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 488 "%s: failed to allocate netlink handle", device); 489 return PCAP_ERROR; 490 } 491 492 if (genl_connect(state->nl_sock)) { 493 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 494 "%s: failed to connect to generic netlink", device); 495 goto out_handle_destroy; 496 } 497 498 err = genl_ctrl_alloc_cache(state->nl_sock, &state->nl_cache); 499 if (err < 0) { 500 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 501 "%s: failed to allocate generic netlink cache: %s", 502 device, nl_geterror(-err)); 503 goto out_handle_destroy; 504 } 505 506 state->nl80211 = genl_ctrl_search_by_name(state->nl_cache, "nl80211"); 507 if (!state->nl80211) { 508 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 509 "%s: nl80211 not found", device); 510 goto out_cache_free; 511 } 512 513 return 0; 514 515 out_cache_free: 516 nl_cache_free(state->nl_cache); 517 out_handle_destroy: 518 nl_socket_free(state->nl_sock); 519 return PCAP_ERROR; 520 } 521 522 static void 523 nl80211_cleanup(struct nl80211_state *state) 524 { 525 genl_family_put(state->nl80211); 526 nl_cache_free(state->nl_cache); 527 nl_socket_free(state->nl_sock); 528 } 529 530 static int 531 del_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state, 532 const char *device, const char *mondevice); 533 534 static int 535 add_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state, 536 const char *device, const char *mondevice) 537 { 538 struct pcap_linux *handlep = handle->priv; 539 int ifindex; 540 struct nl_msg *msg; 541 int err; 542 543 ifindex = iface_get_id(sock_fd, device, handle->errbuf); 544 if (ifindex == -1) 545 return PCAP_ERROR; 546 547 msg = nlmsg_alloc(); 548 if (!msg) { 549 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 550 "%s: failed to allocate netlink msg", device); 551 return PCAP_ERROR; 552 } 553 554 genlmsg_put(msg, 0, 0, genl_family_get_id(state->nl80211), 0, 555 0, NL80211_CMD_NEW_INTERFACE, 0); 556 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex); 557 DIAG_OFF_NARROWING 558 NLA_PUT_STRING(msg, NL80211_ATTR_IFNAME, mondevice); 559 DIAG_ON_NARROWING 560 NLA_PUT_U32(msg, NL80211_ATTR_IFTYPE, NL80211_IFTYPE_MONITOR); 561 562 err = nl_send_auto_complete(state->nl_sock, msg); 563 if (err < 0) { 564 if (err == -NLE_FAILURE) { 565 /* 566 * Device not available; our caller should just 567 * keep trying. (libnl 2.x maps ENFILE to 568 * NLE_FAILURE; it can also map other errors 569 * to that, but there's not much we can do 570 * about that.) 571 */ 572 nlmsg_free(msg); 573 return 0; 574 } else { 575 /* 576 * Real failure, not just "that device is not 577 * available. 578 */ 579 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 580 "%s: nl_send_auto_complete failed adding %s interface: %s", 581 device, mondevice, nl_geterror(-err)); 582 nlmsg_free(msg); 583 return PCAP_ERROR; 584 } 585 } 586 err = nl_wait_for_ack(state->nl_sock); 587 if (err < 0) { 588 if (err == -NLE_FAILURE) { 589 /* 590 * Device not available; our caller should just 591 * keep trying. (libnl 2.x maps ENFILE to 592 * NLE_FAILURE; it can also map other errors 593 * to that, but there's not much we can do 594 * about that.) 595 */ 596 nlmsg_free(msg); 597 return 0; 598 } else { 599 /* 600 * Real failure, not just "that device is not 601 * available. 602 */ 603 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 604 "%s: nl_wait_for_ack failed adding %s interface: %s", 605 device, mondevice, nl_geterror(-err)); 606 nlmsg_free(msg); 607 return PCAP_ERROR; 608 } 609 } 610 611 /* 612 * Success. 613 */ 614 nlmsg_free(msg); 615 616 /* 617 * Try to remember the monitor device. 618 */ 619 handlep->mondevice = strdup(mondevice); 620 if (handlep->mondevice == NULL) { 621 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 622 errno, "strdup"); 623 /* 624 * Get rid of the monitor device. 625 */ 626 del_mon_if(handle, sock_fd, state, device, mondevice); 627 return PCAP_ERROR; 628 } 629 return 1; 630 631 nla_put_failure: 632 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 633 "%s: nl_put failed adding %s interface", 634 device, mondevice); 635 nlmsg_free(msg); 636 return PCAP_ERROR; 637 } 638 639 static int 640 del_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state, 641 const char *device, const char *mondevice) 642 { 643 int ifindex; 644 struct nl_msg *msg; 645 int err; 646 647 ifindex = iface_get_id(sock_fd, mondevice, handle->errbuf); 648 if (ifindex == -1) 649 return PCAP_ERROR; 650 651 msg = nlmsg_alloc(); 652 if (!msg) { 653 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 654 "%s: failed to allocate netlink msg", device); 655 return PCAP_ERROR; 656 } 657 658 genlmsg_put(msg, 0, 0, genl_family_get_id(state->nl80211), 0, 659 0, NL80211_CMD_DEL_INTERFACE, 0); 660 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex); 661 662 err = nl_send_auto_complete(state->nl_sock, msg); 663 if (err < 0) { 664 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 665 "%s: nl_send_auto_complete failed deleting %s interface: %s", 666 device, mondevice, nl_geterror(-err)); 667 nlmsg_free(msg); 668 return PCAP_ERROR; 669 } 670 err = nl_wait_for_ack(state->nl_sock); 671 if (err < 0) { 672 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 673 "%s: nl_wait_for_ack failed adding %s interface: %s", 674 device, mondevice, nl_geterror(-err)); 675 nlmsg_free(msg); 676 return PCAP_ERROR; 677 } 678 679 /* 680 * Success. 681 */ 682 nlmsg_free(msg); 683 return 1; 684 685 nla_put_failure: 686 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 687 "%s: nl_put failed deleting %s interface", 688 device, mondevice); 689 nlmsg_free(msg); 690 return PCAP_ERROR; 691 } 692 #endif /* HAVE_LIBNL */ 693 694 static int pcap_protocol(pcap_t *handle) 695 { 696 int protocol; 697 698 protocol = handle->opt.protocol; 699 if (protocol == 0) 700 protocol = ETH_P_ALL; 701 702 return htons(protocol); 703 } 704 705 static int 706 pcap_can_set_rfmon_linux(pcap_t *handle) 707 { 708 #ifdef HAVE_LIBNL 709 char phydev_path[PATH_MAX+1]; 710 int ret; 711 #endif 712 713 if (strcmp(handle->opt.device, "any") == 0) { 714 /* 715 * Monitor mode makes no sense on the "any" device. 716 */ 717 return 0; 718 } 719 720 #ifdef HAVE_LIBNL 721 /* 722 * Bleah. There doesn't seem to be a way to ask a mac80211 723 * device, through libnl, whether it supports monitor mode; 724 * we'll just check whether the device appears to be a 725 * mac80211 device and, if so, assume the device supports 726 * monitor mode. 727 */ 728 ret = get_mac80211_phydev(handle, handle->opt.device, phydev_path, 729 PATH_MAX); 730 if (ret < 0) 731 return ret; /* error */ 732 if (ret == 1) 733 return 1; /* mac80211 device */ 734 #endif 735 736 return 0; 737 } 738 739 /* 740 * Grabs the number of missed packets by the interface from 741 * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors. 742 * 743 * Compared to /proc/net/dev this avoids counting software drops, 744 * but may be unimplemented and just return 0. 745 * The author has found no straigthforward way to check for support. 746 */ 747 static long long int 748 linux_get_stat(const char * if_name, const char * stat) { 749 ssize_t bytes_read; 750 int fd; 751 char buffer[PATH_MAX]; 752 753 snprintf(buffer, sizeof(buffer), "/sys/class/net/%s/statistics/%s", if_name, stat); 754 fd = open(buffer, O_RDONLY); 755 if (fd == -1) 756 return 0; 757 758 bytes_read = read(fd, buffer, sizeof(buffer) - 1); 759 close(fd); 760 if (bytes_read == -1) 761 return 0; 762 buffer[bytes_read] = '\0'; 763 764 return strtoll(buffer, NULL, 10); 765 } 766 767 static long long int 768 linux_if_drops(const char * if_name) 769 { 770 long long int missed = linux_get_stat(if_name, "rx_missed_errors"); 771 long long int fifo = linux_get_stat(if_name, "rx_fifo_errors"); 772 return missed + fifo; 773 } 774 775 776 /* 777 * Monitor mode is kind of interesting because we have to reset the 778 * interface before exiting. The problem can't really be solved without 779 * some daemon taking care of managing usage counts. If we put the 780 * interface into monitor mode, we set a flag indicating that we must 781 * take it out of that mode when the interface is closed, and, when 782 * closing the interface, if that flag is set we take it out of monitor 783 * mode. 784 */ 785 786 static void pcap_cleanup_linux( pcap_t *handle ) 787 { 788 struct pcap_linux *handlep = handle->priv; 789 #ifdef HAVE_LIBNL 790 struct nl80211_state nlstate; 791 int ret; 792 #endif /* HAVE_LIBNL */ 793 794 if (handlep->must_do_on_close != 0) { 795 /* 796 * There's something we have to do when closing this 797 * pcap_t. 798 */ 799 #ifdef HAVE_LIBNL 800 if (handlep->must_do_on_close & MUST_DELETE_MONIF) { 801 ret = nl80211_init(handle, &nlstate, handlep->device); 802 if (ret >= 0) { 803 ret = del_mon_if(handle, handle->fd, &nlstate, 804 handlep->device, handlep->mondevice); 805 nl80211_cleanup(&nlstate); 806 } 807 if (ret < 0) { 808 fprintf(stderr, 809 "Can't delete monitor interface %s (%s).\n" 810 "Please delete manually.\n", 811 handlep->mondevice, handle->errbuf); 812 } 813 } 814 #endif /* HAVE_LIBNL */ 815 816 /* 817 * Take this pcap out of the list of pcaps for which we 818 * have to take the interface out of some mode. 819 */ 820 pcap_remove_from_pcaps_to_close(handle); 821 } 822 823 if (handle->fd != -1) { 824 /* 825 * Destroy the ring buffer (assuming we've set it up), 826 * and unmap it if it's mapped. 827 */ 828 destroy_ring(handle); 829 } 830 831 if (handlep->oneshot_buffer != NULL) { 832 free(handlep->oneshot_buffer); 833 handlep->oneshot_buffer = NULL; 834 } 835 836 if (handlep->mondevice != NULL) { 837 free(handlep->mondevice); 838 handlep->mondevice = NULL; 839 } 840 if (handlep->device != NULL) { 841 free(handlep->device); 842 handlep->device = NULL; 843 } 844 845 if (handlep->poll_breakloop_fd != -1) { 846 close(handlep->poll_breakloop_fd); 847 handlep->poll_breakloop_fd = -1; 848 } 849 pcap_cleanup_live_common(handle); 850 } 851 852 #ifdef HAVE_TPACKET3 853 /* 854 * Some versions of TPACKET_V3 have annoying bugs/misfeatures 855 * around which we have to work. Determine if we have those 856 * problems or not. 857 * 3.19 is the first release with a fixed version of 858 * TPACKET_V3. We treat anything before that as 859 * not having a fixed version; that may really mean 860 * it has *no* version. 861 */ 862 static int has_broken_tpacket_v3(void) 863 { 864 struct utsname utsname; 865 const char *release; 866 long major, minor; 867 int matches, verlen; 868 869 /* No version information, assume broken. */ 870 if (uname(&utsname) == -1) 871 return 1; 872 release = utsname.release; 873 874 /* A malformed version, ditto. */ 875 matches = sscanf(release, "%ld.%ld%n", &major, &minor, &verlen); 876 if (matches != 2) 877 return 1; 878 if (release[verlen] != '.' && release[verlen] != '\0') 879 return 1; 880 881 /* OK, a fixed version. */ 882 if (major > 3 || (major == 3 && minor >= 19)) 883 return 0; 884 885 /* Too old :( */ 886 return 1; 887 } 888 #endif 889 890 /* 891 * Set the timeout to be used in poll() with memory-mapped packet capture. 892 */ 893 static void 894 set_poll_timeout(struct pcap_linux *handlep) 895 { 896 #ifdef HAVE_TPACKET3 897 int broken_tpacket_v3 = has_broken_tpacket_v3(); 898 #endif 899 if (handlep->timeout == 0) { 900 #ifdef HAVE_TPACKET3 901 /* 902 * XXX - due to a set of (mis)features in the TPACKET_V3 903 * kernel code prior to the 3.19 kernel, blocking forever 904 * with a TPACKET_V3 socket can, if few packets are 905 * arriving and passing the socket filter, cause most 906 * packets to be dropped. See libpcap issue #335 for the 907 * full painful story. 908 * 909 * The workaround is to have poll() time out very quickly, 910 * so we grab the frames handed to us, and return them to 911 * the kernel, ASAP. 912 */ 913 if (handlep->tp_version == TPACKET_V3 && broken_tpacket_v3) 914 handlep->poll_timeout = 1; /* don't block for very long */ 915 else 916 #endif 917 handlep->poll_timeout = -1; /* block forever */ 918 } else if (handlep->timeout > 0) { 919 #ifdef HAVE_TPACKET3 920 /* 921 * For TPACKET_V3, the timeout is handled by the kernel, 922 * so block forever; that way, we don't get extra timeouts. 923 * Don't do that if we have a broken TPACKET_V3, though. 924 */ 925 if (handlep->tp_version == TPACKET_V3 && !broken_tpacket_v3) 926 handlep->poll_timeout = -1; /* block forever, let TPACKET_V3 wake us up */ 927 else 928 #endif 929 handlep->poll_timeout = handlep->timeout; /* block for that amount of time */ 930 } else { 931 /* 932 * Non-blocking mode; we call poll() to pick up error 933 * indications, but we don't want it to wait for 934 * anything. 935 */ 936 handlep->poll_timeout = 0; 937 } 938 } 939 940 static void pcap_breakloop_linux(pcap_t *handle) 941 { 942 pcap_breakloop_common(handle); 943 struct pcap_linux *handlep = handle->priv; 944 945 uint64_t value = 1; 946 /* XXX - what if this fails? */ 947 if (handlep->poll_breakloop_fd != -1) 948 (void)write(handlep->poll_breakloop_fd, &value, sizeof(value)); 949 } 950 951 /* 952 * Set the offset at which to insert VLAN tags. 953 * That should be the offset of the type field. 954 */ 955 static void 956 set_vlan_offset(pcap_t *handle) 957 { 958 struct pcap_linux *handlep = handle->priv; 959 960 switch (handle->linktype) { 961 962 case DLT_EN10MB: 963 /* 964 * The type field is after the destination and source 965 * MAC address. 966 */ 967 handlep->vlan_offset = 2 * ETH_ALEN; 968 break; 969 970 case DLT_LINUX_SLL: 971 /* 972 * The type field is in the last 2 bytes of the 973 * DLT_LINUX_SLL header. 974 */ 975 handlep->vlan_offset = SLL_HDR_LEN - 2; 976 break; 977 978 default: 979 handlep->vlan_offset = -1; /* unknown */ 980 break; 981 } 982 } 983 984 /* 985 * Get a handle for a live capture from the given device. You can 986 * pass NULL as device to get all packages (without link level 987 * information of course). If you pass 1 as promisc the interface 988 * will be set to promiscuous mode (XXX: I think this usage should 989 * be deprecated and functions be added to select that later allow 990 * modification of that values -- Torsten). 991 */ 992 static int 993 pcap_activate_linux(pcap_t *handle) 994 { 995 struct pcap_linux *handlep = handle->priv; 996 const char *device; 997 int is_any_device; 998 struct ifreq ifr; 999 int status = 0; 1000 int status2 = 0; 1001 int ret; 1002 1003 device = handle->opt.device; 1004 1005 /* 1006 * Make sure the name we were handed will fit into the ioctls we 1007 * might perform on the device; if not, return a "No such device" 1008 * indication, as the Linux kernel shouldn't support creating 1009 * a device whose name won't fit into those ioctls. 1010 * 1011 * "Will fit" means "will fit, complete with a null terminator", 1012 * so if the length, which does *not* include the null terminator, 1013 * is greater than *or equal to* the size of the field into which 1014 * we'll be copying it, that won't fit. 1015 */ 1016 if (strlen(device) >= sizeof(ifr.ifr_name)) { 1017 /* 1018 * There's nothing more to say, so clear the error 1019 * message. 1020 */ 1021 handle->errbuf[0] = '\0'; 1022 status = PCAP_ERROR_NO_SUCH_DEVICE; 1023 goto fail; 1024 } 1025 1026 /* 1027 * Turn a negative snapshot value (invalid), a snapshot value of 1028 * 0 (unspecified), or a value bigger than the normal maximum 1029 * value, into the maximum allowed value. 1030 * 1031 * If some application really *needs* a bigger snapshot 1032 * length, we should just increase MAXIMUM_SNAPLEN. 1033 */ 1034 if (handle->snapshot <= 0 || handle->snapshot > MAXIMUM_SNAPLEN) 1035 handle->snapshot = MAXIMUM_SNAPLEN; 1036 1037 handlep->device = strdup(device); 1038 if (handlep->device == NULL) { 1039 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 1040 errno, "strdup"); 1041 status = PCAP_ERROR; 1042 goto fail; 1043 } 1044 1045 /* 1046 * The "any" device is a special device which causes us not 1047 * to bind to a particular device and thus to look at all 1048 * devices. 1049 */ 1050 is_any_device = (strcmp(device, "any") == 0); 1051 if (is_any_device) { 1052 if (handle->opt.promisc) { 1053 handle->opt.promisc = 0; 1054 /* Just a warning. */ 1055 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 1056 "Promiscuous mode not supported on the \"any\" device"); 1057 status = PCAP_WARNING_PROMISC_NOTSUP; 1058 } 1059 } 1060 1061 /* copy timeout value */ 1062 handlep->timeout = handle->opt.timeout; 1063 1064 /* 1065 * If we're in promiscuous mode, then we probably want 1066 * to see when the interface drops packets too, so get an 1067 * initial count from 1068 * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors 1069 */ 1070 if (handle->opt.promisc) 1071 handlep->sysfs_dropped = linux_if_drops(handlep->device); 1072 1073 /* 1074 * If the "any" device is specified, try to open a SOCK_DGRAM. 1075 * Otherwise, open a SOCK_RAW. 1076 */ 1077 ret = setup_socket(handle, is_any_device); 1078 if (ret < 0) { 1079 /* 1080 * Fatal error; the return value is the error code, 1081 * and handle->errbuf has been set to an appropriate 1082 * error message. 1083 */ 1084 status = ret; 1085 goto fail; 1086 } 1087 /* 1088 * Success. 1089 * Try to set up memory-mapped access. 1090 */ 1091 ret = setup_mmapped(handle, &status); 1092 if (ret == -1) { 1093 /* 1094 * We failed to set up to use it, or the 1095 * kernel supports it, but we failed to 1096 * enable it. status has been set to the 1097 * error status to return and, if it's 1098 * PCAP_ERROR, handle->errbuf contains 1099 * the error message. 1100 */ 1101 goto fail; 1102 } 1103 1104 /* 1105 * We succeeded. status has been set to the status to return, 1106 * which might be 0, or might be a PCAP_WARNING_ value. 1107 */ 1108 /* 1109 * Now that we have activated the mmap ring, we can 1110 * set the correct protocol. 1111 */ 1112 if ((status2 = iface_bind(handle->fd, handlep->ifindex, 1113 handle->errbuf, pcap_protocol(handle))) != 0) { 1114 status = status2; 1115 goto fail; 1116 } 1117 1118 handle->inject_op = pcap_inject_linux; 1119 handle->setfilter_op = pcap_setfilter_linux; 1120 handle->setdirection_op = pcap_setdirection_linux; 1121 handle->set_datalink_op = pcap_set_datalink_linux; 1122 handle->setnonblock_op = pcap_setnonblock_linux; 1123 handle->getnonblock_op = pcap_getnonblock_linux; 1124 handle->cleanup_op = pcap_cleanup_linux; 1125 handle->stats_op = pcap_stats_linux; 1126 handle->breakloop_op = pcap_breakloop_linux; 1127 1128 switch (handlep->tp_version) { 1129 1130 case TPACKET_V2: 1131 handle->read_op = pcap_read_linux_mmap_v2; 1132 break; 1133 #ifdef HAVE_TPACKET3 1134 case TPACKET_V3: 1135 handle->read_op = pcap_read_linux_mmap_v3; 1136 break; 1137 #endif 1138 } 1139 handle->oneshot_callback = pcap_oneshot_linux; 1140 handle->selectable_fd = handle->fd; 1141 1142 return status; 1143 1144 fail: 1145 pcap_cleanup_linux(handle); 1146 return status; 1147 } 1148 1149 static int 1150 pcap_set_datalink_linux(pcap_t *handle, int dlt) 1151 { 1152 handle->linktype = dlt; 1153 1154 /* 1155 * Update the offset at which to insert VLAN tags for the 1156 * new link-layer type. 1157 */ 1158 set_vlan_offset(handle); 1159 1160 return 0; 1161 } 1162 1163 /* 1164 * linux_check_direction() 1165 * 1166 * Do checks based on packet direction. 1167 */ 1168 static inline int 1169 linux_check_direction(const pcap_t *handle, const struct sockaddr_ll *sll) 1170 { 1171 struct pcap_linux *handlep = handle->priv; 1172 1173 if (sll->sll_pkttype == PACKET_OUTGOING) { 1174 /* 1175 * Outgoing packet. 1176 * If this is from the loopback device, reject it; 1177 * we'll see the packet as an incoming packet as well, 1178 * and we don't want to see it twice. 1179 */ 1180 if (sll->sll_ifindex == handlep->lo_ifindex) 1181 return 0; 1182 1183 /* 1184 * If this is an outgoing CAN or CAN FD frame, and 1185 * the user doesn't only want outgoing packets, 1186 * reject it; CAN devices and drivers, and the CAN 1187 * stack, always arrange to loop back transmitted 1188 * packets, so they also appear as incoming packets. 1189 * We don't want duplicate packets, and we can't 1190 * easily distinguish packets looped back by the CAN 1191 * layer than those received by the CAN layer, so we 1192 * eliminate this packet instead. 1193 * 1194 * We check whether this is a CAN or CAN FD frame 1195 * by checking whether the device's hardware type 1196 * is ARPHRD_CAN. 1197 */ 1198 if (sll->sll_hatype == ARPHRD_CAN && 1199 handle->direction != PCAP_D_OUT) 1200 return 0; 1201 1202 /* 1203 * If the user only wants incoming packets, reject it. 1204 */ 1205 if (handle->direction == PCAP_D_IN) 1206 return 0; 1207 } else { 1208 /* 1209 * Incoming packet. 1210 * If the user only wants outgoing packets, reject it. 1211 */ 1212 if (handle->direction == PCAP_D_OUT) 1213 return 0; 1214 } 1215 return 1; 1216 } 1217 1218 /* 1219 * Check whether the device to which the pcap_t is bound still exists. 1220 * We do so by asking what address the socket is bound to, and checking 1221 * whether the ifindex in the address is -1, meaning "that device is gone", 1222 * or some other value, meaning "that device still exists". 1223 */ 1224 static int 1225 device_still_exists(pcap_t *handle) 1226 { 1227 struct pcap_linux *handlep = handle->priv; 1228 struct sockaddr_ll addr; 1229 socklen_t addr_len; 1230 1231 /* 1232 * If handlep->ifindex is -1, the socket isn't bound, meaning 1233 * we're capturing on the "any" device; that device never 1234 * disappears. (It should also never be configured down, so 1235 * we shouldn't even get here, but let's make sure.) 1236 */ 1237 if (handlep->ifindex == -1) 1238 return (1); /* it's still here */ 1239 1240 /* 1241 * OK, now try to get the address for the socket. 1242 */ 1243 addr_len = sizeof (addr); 1244 if (getsockname(handle->fd, (struct sockaddr *) &addr, &addr_len) == -1) { 1245 /* 1246 * Error - report an error and return -1. 1247 */ 1248 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 1249 errno, "getsockname failed"); 1250 return (-1); 1251 } 1252 if (addr.sll_ifindex == -1) { 1253 /* 1254 * This means the device went away. 1255 */ 1256 return (0); 1257 } 1258 1259 /* 1260 * The device presumably just went down. 1261 */ 1262 return (1); 1263 } 1264 1265 static int 1266 pcap_inject_linux(pcap_t *handle, const void *buf, int size) 1267 { 1268 struct pcap_linux *handlep = handle->priv; 1269 int ret; 1270 1271 if (handlep->ifindex == -1) { 1272 /* 1273 * We don't support sending on the "any" device. 1274 */ 1275 pcap_strlcpy(handle->errbuf, 1276 "Sending packets isn't supported on the \"any\" device", 1277 PCAP_ERRBUF_SIZE); 1278 return (-1); 1279 } 1280 1281 if (handlep->cooked) { 1282 /* 1283 * We don't support sending on cooked-mode sockets. 1284 * 1285 * XXX - how do you send on a bound cooked-mode 1286 * socket? 1287 * Is a "sendto()" required there? 1288 */ 1289 pcap_strlcpy(handle->errbuf, 1290 "Sending packets isn't supported in cooked mode", 1291 PCAP_ERRBUF_SIZE); 1292 return (-1); 1293 } 1294 1295 ret = (int)send(handle->fd, buf, size, 0); 1296 if (ret == -1) { 1297 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 1298 errno, "send"); 1299 return (-1); 1300 } 1301 return (ret); 1302 } 1303 1304 /* 1305 * Get the statistics for the given packet capture handle. 1306 */ 1307 static int 1308 pcap_stats_linux(pcap_t *handle, struct pcap_stat *stats) 1309 { 1310 struct pcap_linux *handlep = handle->priv; 1311 #ifdef HAVE_TPACKET3 1312 /* 1313 * For sockets using TPACKET_V2, the extra stuff at the end 1314 * of a struct tpacket_stats_v3 will not be filled in, and 1315 * we don't look at it so this is OK even for those sockets. 1316 * In addition, the PF_PACKET socket code in the kernel only 1317 * uses the length parameter to compute how much data to 1318 * copy out and to indicate how much data was copied out, so 1319 * it's OK to base it on the size of a struct tpacket_stats. 1320 * 1321 * XXX - it's probably OK, in fact, to just use a 1322 * struct tpacket_stats for V3 sockets, as we don't 1323 * care about the tp_freeze_q_cnt stat. 1324 */ 1325 struct tpacket_stats_v3 kstats; 1326 #else /* HAVE_TPACKET3 */ 1327 struct tpacket_stats kstats; 1328 #endif /* HAVE_TPACKET3 */ 1329 socklen_t len = sizeof (struct tpacket_stats); 1330 1331 long long if_dropped = 0; 1332 1333 /* 1334 * To fill in ps_ifdrop, we parse 1335 * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors 1336 * for the numbers 1337 */ 1338 if (handle->opt.promisc) 1339 { 1340 /* 1341 * XXX - is there any reason to do this by remembering 1342 * the last counts value, subtracting it from the 1343 * current counts value, and adding that to stat.ps_ifdrop, 1344 * maintaining stat.ps_ifdrop as a count, rather than just 1345 * saving the *initial* counts value and setting 1346 * stat.ps_ifdrop to the difference between the current 1347 * value and the initial value? 1348 * 1349 * One reason might be to handle the count wrapping 1350 * around, on platforms where the count is 32 bits 1351 * and where you might get more than 2^32 dropped 1352 * packets; is there any other reason? 1353 * 1354 * (We maintain the count as a long long int so that, 1355 * if the kernel maintains the counts as 64-bit even 1356 * on 32-bit platforms, we can handle the real count. 1357 * 1358 * Unfortunately, we can't report 64-bit counts; we 1359 * need a better API for reporting statistics, such as 1360 * one that reports them in a style similar to the 1361 * pcapng Interface Statistics Block, so that 1) the 1362 * counts are 64-bit, 2) it's easier to add new statistics 1363 * without breaking the ABI, and 3) it's easier to 1364 * indicate to a caller that wants one particular 1365 * statistic that it's not available by just not supplying 1366 * it.) 1367 */ 1368 if_dropped = handlep->sysfs_dropped; 1369 handlep->sysfs_dropped = linux_if_drops(handlep->device); 1370 handlep->stat.ps_ifdrop += (u_int)(handlep->sysfs_dropped - if_dropped); 1371 } 1372 1373 /* 1374 * Try to get the packet counts from the kernel. 1375 */ 1376 if (getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS, 1377 &kstats, &len) > -1) { 1378 /* 1379 * "ps_recv" counts only packets that *passed* the 1380 * filter, not packets that didn't pass the filter. 1381 * This includes packets later dropped because we 1382 * ran out of buffer space. 1383 * 1384 * "ps_drop" counts packets dropped because we ran 1385 * out of buffer space. It doesn't count packets 1386 * dropped by the interface driver. It counts only 1387 * packets that passed the filter. 1388 * 1389 * See above for ps_ifdrop. 1390 * 1391 * Both statistics include packets not yet read from 1392 * the kernel by libpcap, and thus not yet seen by 1393 * the application. 1394 * 1395 * In "linux/net/packet/af_packet.c", at least in 2.6.27 1396 * through 5.6 kernels, "tp_packets" is incremented for 1397 * every packet that passes the packet filter *and* is 1398 * successfully copied to the ring buffer; "tp_drops" is 1399 * incremented for every packet dropped because there's 1400 * not enough free space in the ring buffer. 1401 * 1402 * When the statistics are returned for a PACKET_STATISTICS 1403 * "getsockopt()" call, "tp_drops" is added to "tp_packets", 1404 * so that "tp_packets" counts all packets handed to 1405 * the PF_PACKET socket, including packets dropped because 1406 * there wasn't room on the socket buffer - but not 1407 * including packets that didn't pass the filter. 1408 * 1409 * In the BSD BPF, the count of received packets is 1410 * incremented for every packet handed to BPF, regardless 1411 * of whether it passed the filter. 1412 * 1413 * We can't make "pcap_stats()" work the same on both 1414 * platforms, but the best approximation is to return 1415 * "tp_packets" as the count of packets and "tp_drops" 1416 * as the count of drops. 1417 * 1418 * Keep a running total because each call to 1419 * getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS, .... 1420 * resets the counters to zero. 1421 */ 1422 handlep->stat.ps_recv += kstats.tp_packets; 1423 handlep->stat.ps_drop += kstats.tp_drops; 1424 *stats = handlep->stat; 1425 return 0; 1426 } 1427 1428 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, errno, 1429 "failed to get statistics from socket"); 1430 return -1; 1431 } 1432 1433 /* 1434 * Description string for the "any" device. 1435 */ 1436 static const char any_descr[] = "Pseudo-device that captures on all interfaces"; 1437 1438 /* 1439 * A PF_PACKET socket can be bound to any network interface. 1440 */ 1441 static int 1442 can_be_bound(const char *name _U_) 1443 { 1444 return (1); 1445 } 1446 1447 /* 1448 * Get a socket to use with various interface ioctls. 1449 */ 1450 static int 1451 get_if_ioctl_socket(void) 1452 { 1453 int fd; 1454 1455 /* 1456 * This is a bit ugly. 1457 * 1458 * There isn't a socket type that's guaranteed to work. 1459 * 1460 * AF_NETLINK will work *if* you have Netlink configured into the 1461 * kernel (can it be configured out if you have any networking 1462 * support at all?) *and* if you're running a sufficiently recent 1463 * kernel, but not all the kernels we support are sufficiently 1464 * recent - that feature was introduced in Linux 4.6. 1465 * 1466 * AF_UNIX will work *if* you have UNIX-domain sockets configured 1467 * into the kernel and *if* you're not on a system that doesn't 1468 * allow them - some SELinux systems don't allow you create them. 1469 * Most systems probably have them configured in, but not all systems 1470 * have them configured in and allow them to be created. 1471 * 1472 * AF_INET will work *if* you have IPv4 configured into the kernel, 1473 * but, apparently, some systems have network adapters but have 1474 * kernels without IPv4 support. 1475 * 1476 * AF_INET6 will work *if* you have IPv6 configured into the 1477 * kernel, but if you don't have AF_INET, you might not have 1478 * AF_INET6, either (that is, independently on its own grounds). 1479 * 1480 * AF_PACKET would work, except that some of these calls should 1481 * work even if you *don't* have capture permission (you should be 1482 * able to enumerate interfaces and get information about them 1483 * without capture permission; you shouldn't get a failure until 1484 * you try pcap_activate()). (If you don't allow programs to 1485 * get as much information as possible about interfaces if you 1486 * don't have permission to capture, you run the risk of users 1487 * asking "why isn't it showing XXX" - or, worse, if you don't 1488 * show interfaces *at all* if you don't have permission to 1489 * capture on them, "why do no interfaces show up?" - when the 1490 * real problem is a permissions problem. Error reports of that 1491 * type require a lot more back-and-forth to debug, as evidenced 1492 * by many Wireshark bugs/mailing list questions/Q&A questions.) 1493 * 1494 * So: 1495 * 1496 * we first try an AF_NETLINK socket, where "try" includes 1497 * "try to do a device ioctl on it", as, in the future, once 1498 * pre-4.6 kernels are sufficiently rare, that will probably 1499 * be the mechanism most likely to work; 1500 * 1501 * if that fails, we try an AF_UNIX socket, as that's less 1502 * likely to be configured out on a networking-capable system 1503 * than is IP; 1504 * 1505 * if that fails, we try an AF_INET6 socket; 1506 * 1507 * if that fails, we try an AF_INET socket. 1508 */ 1509 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 1510 if (fd != -1) { 1511 /* 1512 * OK, let's make sure we can do an SIOCGIFNAME 1513 * ioctl. 1514 */ 1515 struct ifreq ifr; 1516 1517 memset(&ifr, 0, sizeof(ifr)); 1518 if (ioctl(fd, SIOCGIFNAME, &ifr) == 0 || 1519 errno != EOPNOTSUPP) { 1520 /* 1521 * It succeeded, or failed for some reason 1522 * other than "netlink sockets don't support 1523 * device ioctls". Go with the AF_NETLINK 1524 * socket. 1525 */ 1526 return (fd); 1527 } 1528 1529 /* 1530 * OK, that didn't work, so it's as bad as "netlink 1531 * sockets aren't available". Close the socket and 1532 * drive on. 1533 */ 1534 close(fd); 1535 } 1536 1537 /* 1538 * Now try an AF_UNIX socket. 1539 */ 1540 fd = socket(AF_UNIX, SOCK_RAW, 0); 1541 if (fd != -1) { 1542 /* 1543 * OK, we got it! 1544 */ 1545 return (fd); 1546 } 1547 1548 /* 1549 * Now try an AF_INET6 socket. 1550 */ 1551 fd = socket(AF_INET6, SOCK_DGRAM, 0); 1552 if (fd != -1) { 1553 return (fd); 1554 } 1555 1556 /* 1557 * Now try an AF_INET socket. 1558 * 1559 * XXX - if that fails, is there anything else we should try? 1560 * AF_CAN, for embedded systems in vehicles, in case they're 1561 * built without Internet protocol support? Any other socket 1562 * types popular in non-Internet embedded systems? 1563 */ 1564 return (socket(AF_INET, SOCK_DGRAM, 0)); 1565 } 1566 1567 /* 1568 * Get additional flags for a device, using SIOCGIFMEDIA. 1569 */ 1570 static int 1571 get_if_flags(const char *name, bpf_u_int32 *flags, char *errbuf) 1572 { 1573 int sock; 1574 FILE *fh; 1575 unsigned int arptype; 1576 struct ifreq ifr; 1577 struct ethtool_value info; 1578 1579 if (*flags & PCAP_IF_LOOPBACK) { 1580 /* 1581 * Loopback devices aren't wireless, and "connected"/ 1582 * "disconnected" doesn't apply to them. 1583 */ 1584 *flags |= PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE; 1585 return 0; 1586 } 1587 1588 sock = get_if_ioctl_socket(); 1589 if (sock == -1) { 1590 pcap_fmt_errmsg_for_errno(errbuf, PCAP_ERRBUF_SIZE, errno, 1591 "Can't create socket to get ethtool information for %s", 1592 name); 1593 return -1; 1594 } 1595 1596 /* 1597 * OK, what type of network is this? 1598 * In particular, is it wired or wireless? 1599 */ 1600 if (is_wifi(name)) { 1601 /* 1602 * Wi-Fi, hence wireless. 1603 */ 1604 *flags |= PCAP_IF_WIRELESS; 1605 } else { 1606 /* 1607 * OK, what does /sys/class/net/{if_name}/type contain? 1608 * (We don't use that for Wi-Fi, as it'll report 1609 * "Ethernet", i.e. ARPHRD_ETHER, for non-monitor- 1610 * mode devices.) 1611 */ 1612 char *pathstr; 1613 1614 if (asprintf(&pathstr, "/sys/class/net/%s/type", name) == -1) { 1615 snprintf(errbuf, PCAP_ERRBUF_SIZE, 1616 "%s: Can't generate path name string for /sys/class/net device", 1617 name); 1618 close(sock); 1619 return -1; 1620 } 1621 fh = fopen(pathstr, "r"); 1622 if (fh != NULL) { 1623 if (fscanf(fh, "%u", &arptype) == 1) { 1624 /* 1625 * OK, we got an ARPHRD_ type; what is it? 1626 */ 1627 switch (arptype) { 1628 1629 case ARPHRD_LOOPBACK: 1630 /* 1631 * These are types to which 1632 * "connected" and "disconnected" 1633 * don't apply, so don't bother 1634 * asking about it. 1635 * 1636 * XXX - add other types? 1637 */ 1638 close(sock); 1639 fclose(fh); 1640 free(pathstr); 1641 return 0; 1642 1643 case ARPHRD_IRDA: 1644 case ARPHRD_IEEE80211: 1645 case ARPHRD_IEEE80211_PRISM: 1646 case ARPHRD_IEEE80211_RADIOTAP: 1647 #ifdef ARPHRD_IEEE802154 1648 case ARPHRD_IEEE802154: 1649 #endif 1650 #ifdef ARPHRD_IEEE802154_MONITOR 1651 case ARPHRD_IEEE802154_MONITOR: 1652 #endif 1653 #ifdef ARPHRD_6LOWPAN 1654 case ARPHRD_6LOWPAN: 1655 #endif 1656 /* 1657 * Various wireless types. 1658 */ 1659 *flags |= PCAP_IF_WIRELESS; 1660 break; 1661 } 1662 } 1663 fclose(fh); 1664 } 1665 free(pathstr); 1666 } 1667 1668 #ifdef ETHTOOL_GLINK 1669 memset(&ifr, 0, sizeof(ifr)); 1670 pcap_strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); 1671 info.cmd = ETHTOOL_GLINK; 1672 /* 1673 * XXX - while Valgrind handles SIOCETHTOOL and knows that 1674 * the ETHTOOL_GLINK command sets the .data member of the 1675 * structure, Memory Sanitizer doesn't yet do so: 1676 * 1677 * https://bugs.llvm.org/show_bug.cgi?id=45814 1678 * 1679 * For now, we zero it out to squelch warnings; if the bug 1680 * in question is fixed, we can remove this. 1681 */ 1682 info.data = 0; 1683 ifr.ifr_data = (caddr_t)&info; 1684 if (ioctl(sock, SIOCETHTOOL, &ifr) == -1) { 1685 int save_errno = errno; 1686 1687 switch (save_errno) { 1688 1689 case EOPNOTSUPP: 1690 case EINVAL: 1691 /* 1692 * OK, this OS version or driver doesn't support 1693 * asking for this information. 1694 * XXX - distinguish between "this doesn't 1695 * support ethtool at all because it's not 1696 * that type of device" vs. "this doesn't 1697 * support ethtool even though it's that 1698 * type of device", and return "unknown". 1699 */ 1700 *flags |= PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE; 1701 close(sock); 1702 return 0; 1703 1704 case ENODEV: 1705 /* 1706 * OK, no such device. 1707 * The user will find that out when they try to 1708 * activate the device; just say "OK" and 1709 * don't set anything. 1710 */ 1711 close(sock); 1712 return 0; 1713 1714 default: 1715 /* 1716 * Other error. 1717 */ 1718 pcap_fmt_errmsg_for_errno(errbuf, PCAP_ERRBUF_SIZE, 1719 save_errno, 1720 "%s: SIOCETHTOOL(ETHTOOL_GLINK) ioctl failed", 1721 name); 1722 close(sock); 1723 return -1; 1724 } 1725 } 1726 1727 /* 1728 * Is it connected? 1729 */ 1730 if (info.data) { 1731 /* 1732 * It's connected. 1733 */ 1734 *flags |= PCAP_IF_CONNECTION_STATUS_CONNECTED; 1735 } else { 1736 /* 1737 * It's disconnected. 1738 */ 1739 *flags |= PCAP_IF_CONNECTION_STATUS_DISCONNECTED; 1740 } 1741 #endif 1742 1743 close(sock); 1744 return 0; 1745 } 1746 1747 int 1748 pcap_platform_finddevs(pcap_if_list_t *devlistp, char *errbuf) 1749 { 1750 /* 1751 * Get the list of regular interfaces first. 1752 */ 1753 if (pcap_findalldevs_interfaces(devlistp, errbuf, can_be_bound, 1754 get_if_flags) == -1) 1755 return (-1); /* failure */ 1756 1757 /* 1758 * Add the "any" device. 1759 * As it refers to all network devices, not to any particular 1760 * network device, the notion of "connected" vs. "disconnected" 1761 * doesn't apply. 1762 */ 1763 if (add_dev(devlistp, "any", 1764 PCAP_IF_UP|PCAP_IF_RUNNING|PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE, 1765 any_descr, errbuf) == NULL) 1766 return (-1); 1767 1768 return (0); 1769 } 1770 1771 /* 1772 * Set direction flag: Which packets do we accept on a forwarding 1773 * single device? IN, OUT or both? 1774 */ 1775 static int 1776 pcap_setdirection_linux(pcap_t *handle, pcap_direction_t d) 1777 { 1778 /* 1779 * It's guaranteed, at this point, that d is a valid 1780 * direction value. 1781 */ 1782 handle->direction = d; 1783 return 0; 1784 } 1785 1786 static int 1787 is_wifi(const char *device) 1788 { 1789 char *pathstr; 1790 struct stat statb; 1791 1792 /* 1793 * See if there's a sysfs wireless directory for it. 1794 * If so, it's a wireless interface. 1795 */ 1796 if (asprintf(&pathstr, "/sys/class/net/%s/wireless", device) == -1) { 1797 /* 1798 * Just give up here. 1799 */ 1800 return 0; 1801 } 1802 if (stat(pathstr, &statb) == 0) { 1803 free(pathstr); 1804 return 1; 1805 } 1806 free(pathstr); 1807 1808 return 0; 1809 } 1810 1811 /* 1812 * Linux uses the ARP hardware type to identify the type of an 1813 * interface. pcap uses the DLT_xxx constants for this. This 1814 * function takes a pointer to a "pcap_t", and an ARPHRD_xxx 1815 * constant, as arguments, and sets "handle->linktype" to the 1816 * appropriate DLT_XXX constant and sets "handle->offset" to 1817 * the appropriate value (to make "handle->offset" plus link-layer 1818 * header length be a multiple of 4, so that the link-layer payload 1819 * will be aligned on a 4-byte boundary when capturing packets). 1820 * (If the offset isn't set here, it'll be 0; add code as appropriate 1821 * for cases where it shouldn't be 0.) 1822 * 1823 * If "cooked_ok" is non-zero, we can use DLT_LINUX_SLL and capture 1824 * in cooked mode; otherwise, we can't use cooked mode, so we have 1825 * to pick some type that works in raw mode, or fail. 1826 * 1827 * Sets the link type to -1 if unable to map the type. 1828 */ 1829 static void map_arphrd_to_dlt(pcap_t *handle, int arptype, 1830 const char *device, int cooked_ok) 1831 { 1832 static const char cdma_rmnet[] = "cdma_rmnet"; 1833 1834 switch (arptype) { 1835 1836 case ARPHRD_ETHER: 1837 /* 1838 * For various annoying reasons having to do with DHCP 1839 * software, some versions of Android give the mobile- 1840 * phone-network interface an ARPHRD_ value of 1841 * ARPHRD_ETHER, even though the packets supplied by 1842 * that interface have no link-layer header, and begin 1843 * with an IP header, so that the ARPHRD_ value should 1844 * be ARPHRD_NONE. 1845 * 1846 * Detect those devices by checking the device name, and 1847 * use DLT_RAW for them. 1848 */ 1849 if (strncmp(device, cdma_rmnet, sizeof cdma_rmnet - 1) == 0) { 1850 handle->linktype = DLT_RAW; 1851 return; 1852 } 1853 1854 /* 1855 * Is this a real Ethernet device? If so, give it a 1856 * link-layer-type list with DLT_EN10MB and DLT_DOCSIS, so 1857 * that an application can let you choose it, in case you're 1858 * capturing DOCSIS traffic that a Cisco Cable Modem 1859 * Termination System is putting out onto an Ethernet (it 1860 * doesn't put an Ethernet header onto the wire, it puts raw 1861 * DOCSIS frames out on the wire inside the low-level 1862 * Ethernet framing). 1863 * 1864 * XXX - are there any other sorts of "fake Ethernet" that 1865 * have ARPHRD_ETHER but that shouldn't offer DLT_DOCSIS as 1866 * a Cisco CMTS won't put traffic onto it or get traffic 1867 * bridged onto it? ISDN is handled in "setup_socket()", 1868 * as we fall back on cooked mode there, and we use 1869 * is_wifi() to check for 802.11 devices; are there any 1870 * others? 1871 */ 1872 if (!is_wifi(device)) { 1873 int ret; 1874 1875 /* 1876 * This is not a Wi-Fi device but it could be 1877 * a DSA master/management network device. 1878 */ 1879 ret = iface_dsa_get_proto_info(device, handle); 1880 if (ret < 0) 1881 return; 1882 1883 if (ret == 1) { 1884 /* 1885 * This is a DSA master/management network 1886 * device linktype is already set by 1887 * iface_dsa_get_proto_info() set an 1888 * appropriate offset here. 1889 */ 1890 handle->offset = 2; 1891 break; 1892 } 1893 1894 /* 1895 * It's not a Wi-Fi device; offer DOCSIS. 1896 */ 1897 handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2); 1898 /* 1899 * If that fails, just leave the list empty. 1900 */ 1901 if (handle->dlt_list != NULL) { 1902 handle->dlt_list[0] = DLT_EN10MB; 1903 handle->dlt_list[1] = DLT_DOCSIS; 1904 handle->dlt_count = 2; 1905 } 1906 } 1907 /* FALLTHROUGH */ 1908 1909 case ARPHRD_METRICOM: 1910 case ARPHRD_LOOPBACK: 1911 handle->linktype = DLT_EN10MB; 1912 handle->offset = 2; 1913 break; 1914 1915 case ARPHRD_EETHER: 1916 handle->linktype = DLT_EN3MB; 1917 break; 1918 1919 case ARPHRD_AX25: 1920 handle->linktype = DLT_AX25_KISS; 1921 break; 1922 1923 case ARPHRD_PRONET: 1924 handle->linktype = DLT_PRONET; 1925 break; 1926 1927 case ARPHRD_CHAOS: 1928 handle->linktype = DLT_CHAOS; 1929 break; 1930 #ifndef ARPHRD_CAN 1931 #define ARPHRD_CAN 280 1932 #endif 1933 case ARPHRD_CAN: 1934 handle->linktype = DLT_CAN_SOCKETCAN; 1935 break; 1936 1937 #ifndef ARPHRD_IEEE802_TR 1938 #define ARPHRD_IEEE802_TR 800 /* From Linux 2.4 */ 1939 #endif 1940 case ARPHRD_IEEE802_TR: 1941 case ARPHRD_IEEE802: 1942 handle->linktype = DLT_IEEE802; 1943 handle->offset = 2; 1944 break; 1945 1946 case ARPHRD_ARCNET: 1947 handle->linktype = DLT_ARCNET_LINUX; 1948 break; 1949 1950 #ifndef ARPHRD_FDDI /* From Linux 2.2.13 */ 1951 #define ARPHRD_FDDI 774 1952 #endif 1953 case ARPHRD_FDDI: 1954 handle->linktype = DLT_FDDI; 1955 handle->offset = 3; 1956 break; 1957 1958 #ifndef ARPHRD_ATM /* FIXME: How to #include this? */ 1959 #define ARPHRD_ATM 19 1960 #endif 1961 case ARPHRD_ATM: 1962 /* 1963 * The Classical IP implementation in ATM for Linux 1964 * supports both what RFC 1483 calls "LLC Encapsulation", 1965 * in which each packet has an LLC header, possibly 1966 * with a SNAP header as well, prepended to it, and 1967 * what RFC 1483 calls "VC Based Multiplexing", in which 1968 * different virtual circuits carry different network 1969 * layer protocols, and no header is prepended to packets. 1970 * 1971 * They both have an ARPHRD_ type of ARPHRD_ATM, so 1972 * you can't use the ARPHRD_ type to find out whether 1973 * captured packets will have an LLC header, and, 1974 * while there's a socket ioctl to *set* the encapsulation 1975 * type, there's no ioctl to *get* the encapsulation type. 1976 * 1977 * This means that 1978 * 1979 * programs that dissect Linux Classical IP frames 1980 * would have to check for an LLC header and, 1981 * depending on whether they see one or not, dissect 1982 * the frame as LLC-encapsulated or as raw IP (I 1983 * don't know whether there's any traffic other than 1984 * IP that would show up on the socket, or whether 1985 * there's any support for IPv6 in the Linux 1986 * Classical IP code); 1987 * 1988 * filter expressions would have to compile into 1989 * code that checks for an LLC header and does 1990 * the right thing. 1991 * 1992 * Both of those are a nuisance - and, at least on systems 1993 * that support PF_PACKET sockets, we don't have to put 1994 * up with those nuisances; instead, we can just capture 1995 * in cooked mode. That's what we'll do, if we can. 1996 * Otherwise, we'll just fail. 1997 */ 1998 if (cooked_ok) 1999 handle->linktype = DLT_LINUX_SLL; 2000 else 2001 handle->linktype = -1; 2002 break; 2003 2004 #ifndef ARPHRD_IEEE80211 /* From Linux 2.4.6 */ 2005 #define ARPHRD_IEEE80211 801 2006 #endif 2007 case ARPHRD_IEEE80211: 2008 handle->linktype = DLT_IEEE802_11; 2009 break; 2010 2011 #ifndef ARPHRD_IEEE80211_PRISM /* From Linux 2.4.18 */ 2012 #define ARPHRD_IEEE80211_PRISM 802 2013 #endif 2014 case ARPHRD_IEEE80211_PRISM: 2015 handle->linktype = DLT_PRISM_HEADER; 2016 break; 2017 2018 #ifndef ARPHRD_IEEE80211_RADIOTAP /* new */ 2019 #define ARPHRD_IEEE80211_RADIOTAP 803 2020 #endif 2021 case ARPHRD_IEEE80211_RADIOTAP: 2022 handle->linktype = DLT_IEEE802_11_RADIO; 2023 break; 2024 2025 case ARPHRD_PPP: 2026 /* 2027 * Some PPP code in the kernel supplies no link-layer 2028 * header whatsoever to PF_PACKET sockets; other PPP 2029 * code supplies PPP link-layer headers ("syncppp.c"); 2030 * some PPP code might supply random link-layer 2031 * headers (PPP over ISDN - there's code in Ethereal, 2032 * for example, to cope with PPP-over-ISDN captures 2033 * with which the Ethereal developers have had to cope, 2034 * heuristically trying to determine which of the 2035 * oddball link-layer headers particular packets have). 2036 * 2037 * As such, we just punt, and run all PPP interfaces 2038 * in cooked mode, if we can; otherwise, we just treat 2039 * it as DLT_RAW, for now - if somebody needs to capture, 2040 * on a 2.0[.x] kernel, on PPP devices that supply a 2041 * link-layer header, they'll have to add code here to 2042 * map to the appropriate DLT_ type (possibly adding a 2043 * new DLT_ type, if necessary). 2044 */ 2045 if (cooked_ok) 2046 handle->linktype = DLT_LINUX_SLL; 2047 else { 2048 /* 2049 * XXX - handle ISDN types here? We can't fall 2050 * back on cooked sockets, so we'd have to 2051 * figure out from the device name what type of 2052 * link-layer encapsulation it's using, and map 2053 * that to an appropriate DLT_ value, meaning 2054 * we'd map "isdnN" devices to DLT_RAW (they 2055 * supply raw IP packets with no link-layer 2056 * header) and "isdY" devices to a new DLT_I4L_IP 2057 * type that has only an Ethernet packet type as 2058 * a link-layer header. 2059 * 2060 * But sometimes we seem to get random crap 2061 * in the link-layer header when capturing on 2062 * ISDN devices.... 2063 */ 2064 handle->linktype = DLT_RAW; 2065 } 2066 break; 2067 2068 #ifndef ARPHRD_CISCO 2069 #define ARPHRD_CISCO 513 /* previously ARPHRD_HDLC */ 2070 #endif 2071 case ARPHRD_CISCO: 2072 handle->linktype = DLT_C_HDLC; 2073 break; 2074 2075 /* Not sure if this is correct for all tunnels, but it 2076 * works for CIPE */ 2077 case ARPHRD_TUNNEL: 2078 #ifndef ARPHRD_SIT 2079 #define ARPHRD_SIT 776 /* From Linux 2.2.13 */ 2080 #endif 2081 case ARPHRD_SIT: 2082 case ARPHRD_CSLIP: 2083 case ARPHRD_SLIP6: 2084 case ARPHRD_CSLIP6: 2085 case ARPHRD_ADAPT: 2086 case ARPHRD_SLIP: 2087 #ifndef ARPHRD_RAWHDLC 2088 #define ARPHRD_RAWHDLC 518 2089 #endif 2090 case ARPHRD_RAWHDLC: 2091 #ifndef ARPHRD_DLCI 2092 #define ARPHRD_DLCI 15 2093 #endif 2094 case ARPHRD_DLCI: 2095 /* 2096 * XXX - should some of those be mapped to DLT_LINUX_SLL 2097 * instead? Should we just map all of them to DLT_LINUX_SLL? 2098 */ 2099 handle->linktype = DLT_RAW; 2100 break; 2101 2102 #ifndef ARPHRD_FRAD 2103 #define ARPHRD_FRAD 770 2104 #endif 2105 case ARPHRD_FRAD: 2106 handle->linktype = DLT_FRELAY; 2107 break; 2108 2109 case ARPHRD_LOCALTLK: 2110 handle->linktype = DLT_LTALK; 2111 break; 2112 2113 case 18: 2114 /* 2115 * RFC 4338 defines an encapsulation for IP and ARP 2116 * packets that's compatible with the RFC 2625 2117 * encapsulation, but that uses a different ARP 2118 * hardware type and hardware addresses. That 2119 * ARP hardware type is 18; Linux doesn't define 2120 * any ARPHRD_ value as 18, but if it ever officially 2121 * supports RFC 4338-style IP-over-FC, it should define 2122 * one. 2123 * 2124 * For now, we map it to DLT_IP_OVER_FC, in the hopes 2125 * that this will encourage its use in the future, 2126 * should Linux ever officially support RFC 4338-style 2127 * IP-over-FC. 2128 */ 2129 handle->linktype = DLT_IP_OVER_FC; 2130 break; 2131 2132 #ifndef ARPHRD_FCPP 2133 #define ARPHRD_FCPP 784 2134 #endif 2135 case ARPHRD_FCPP: 2136 #ifndef ARPHRD_FCAL 2137 #define ARPHRD_FCAL 785 2138 #endif 2139 case ARPHRD_FCAL: 2140 #ifndef ARPHRD_FCPL 2141 #define ARPHRD_FCPL 786 2142 #endif 2143 case ARPHRD_FCPL: 2144 #ifndef ARPHRD_FCFABRIC 2145 #define ARPHRD_FCFABRIC 787 2146 #endif 2147 case ARPHRD_FCFABRIC: 2148 /* 2149 * Back in 2002, Donald Lee at Cray wanted a DLT_ for 2150 * IP-over-FC: 2151 * 2152 * https://www.mail-archive.com/tcpdump-workers@sandelman.ottawa.on.ca/msg01043.html 2153 * 2154 * and one was assigned. 2155 * 2156 * In a later private discussion (spun off from a message 2157 * on the ethereal-users list) on how to get that DLT_ 2158 * value in libpcap on Linux, I ended up deciding that 2159 * the best thing to do would be to have him tweak the 2160 * driver to set the ARPHRD_ value to some ARPHRD_FCxx 2161 * type, and map all those types to DLT_IP_OVER_FC: 2162 * 2163 * I've checked into the libpcap and tcpdump CVS tree 2164 * support for DLT_IP_OVER_FC. In order to use that, 2165 * you'd have to modify your modified driver to return 2166 * one of the ARPHRD_FCxxx types, in "fcLINUXfcp.c" - 2167 * change it to set "dev->type" to ARPHRD_FCFABRIC, for 2168 * example (the exact value doesn't matter, it can be 2169 * any of ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, or 2170 * ARPHRD_FCFABRIC). 2171 * 2172 * 11 years later, Christian Svensson wanted to map 2173 * various ARPHRD_ values to DLT_FC_2 and 2174 * DLT_FC_2_WITH_FRAME_DELIMS for raw Fibre Channel 2175 * frames: 2176 * 2177 * https://github.com/mcr/libpcap/pull/29 2178 * 2179 * There doesn't seem to be any network drivers that uses 2180 * any of the ARPHRD_FC* values for IP-over-FC, and 2181 * it's not exactly clear what the "Dummy types for non 2182 * ARP hardware" are supposed to mean (link-layer 2183 * header type? Physical network type?), so it's 2184 * not exactly clear why the ARPHRD_FC* types exist 2185 * in the first place. 2186 * 2187 * For now, we map them to DLT_FC_2, and provide an 2188 * option of DLT_FC_2_WITH_FRAME_DELIMS, as well as 2189 * DLT_IP_OVER_FC just in case there's some old 2190 * driver out there that uses one of those types for 2191 * IP-over-FC on which somebody wants to capture 2192 * packets. 2193 */ 2194 handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 3); 2195 /* 2196 * If that fails, just leave the list empty. 2197 */ 2198 if (handle->dlt_list != NULL) { 2199 handle->dlt_list[0] = DLT_FC_2; 2200 handle->dlt_list[1] = DLT_FC_2_WITH_FRAME_DELIMS; 2201 handle->dlt_list[2] = DLT_IP_OVER_FC; 2202 handle->dlt_count = 3; 2203 } 2204 handle->linktype = DLT_FC_2; 2205 break; 2206 2207 #ifndef ARPHRD_IRDA 2208 #define ARPHRD_IRDA 783 2209 #endif 2210 case ARPHRD_IRDA: 2211 /* Don't expect IP packet out of this interfaces... */ 2212 handle->linktype = DLT_LINUX_IRDA; 2213 /* We need to save packet direction for IrDA decoding, 2214 * so let's use "Linux-cooked" mode. Jean II 2215 * 2216 * XXX - this is handled in setup_socket(). */ 2217 /* handlep->cooked = 1; */ 2218 break; 2219 2220 /* ARPHRD_LAPD is unofficial and randomly allocated, if reallocation 2221 * is needed, please report it to <daniele@orlandi.com> */ 2222 #ifndef ARPHRD_LAPD 2223 #define ARPHRD_LAPD 8445 2224 #endif 2225 case ARPHRD_LAPD: 2226 /* Don't expect IP packet out of this interfaces... */ 2227 handle->linktype = DLT_LINUX_LAPD; 2228 break; 2229 2230 #ifndef ARPHRD_NONE 2231 #define ARPHRD_NONE 0xFFFE 2232 #endif 2233 case ARPHRD_NONE: 2234 /* 2235 * No link-layer header; packets are just IP 2236 * packets, so use DLT_RAW. 2237 */ 2238 handle->linktype = DLT_RAW; 2239 break; 2240 2241 #ifndef ARPHRD_IEEE802154 2242 #define ARPHRD_IEEE802154 804 2243 #endif 2244 case ARPHRD_IEEE802154: 2245 handle->linktype = DLT_IEEE802_15_4_NOFCS; 2246 break; 2247 2248 #ifndef ARPHRD_NETLINK 2249 #define ARPHRD_NETLINK 824 2250 #endif 2251 case ARPHRD_NETLINK: 2252 handle->linktype = DLT_NETLINK; 2253 /* 2254 * We need to use cooked mode, so that in sll_protocol we 2255 * pick up the netlink protocol type such as NETLINK_ROUTE, 2256 * NETLINK_GENERIC, NETLINK_FIB_LOOKUP, etc. 2257 * 2258 * XXX - this is handled in setup_socket(). 2259 */ 2260 /* handlep->cooked = 1; */ 2261 break; 2262 2263 #ifndef ARPHRD_VSOCKMON 2264 #define ARPHRD_VSOCKMON 826 2265 #endif 2266 case ARPHRD_VSOCKMON: 2267 handle->linktype = DLT_VSOCK; 2268 break; 2269 2270 default: 2271 handle->linktype = -1; 2272 break; 2273 } 2274 } 2275 2276 static void 2277 set_dlt_list_cooked(pcap_t *handle) 2278 { 2279 /* 2280 * Support both DLT_LINUX_SLL and DLT_LINUX_SLL2. 2281 */ 2282 handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2); 2283 2284 /* 2285 * If that failed, just leave the list empty. 2286 */ 2287 if (handle->dlt_list != NULL) { 2288 handle->dlt_list[0] = DLT_LINUX_SLL; 2289 handle->dlt_list[1] = DLT_LINUX_SLL2; 2290 handle->dlt_count = 2; 2291 } 2292 } 2293 2294 /* 2295 * Try to set up a PF_PACKET socket. 2296 * Returns 0 on success and a PCAP_ERROR_ value on failure. 2297 */ 2298 static int 2299 setup_socket(pcap_t *handle, int is_any_device) 2300 { 2301 struct pcap_linux *handlep = handle->priv; 2302 const char *device = handle->opt.device; 2303 int status = 0; 2304 int sock_fd, arptype; 2305 int val; 2306 int err = 0; 2307 struct packet_mreq mr; 2308 #if defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT) 2309 int bpf_extensions; 2310 socklen_t len = sizeof(bpf_extensions); 2311 #endif 2312 2313 /* 2314 * Open a socket with protocol family packet. If cooked is true, 2315 * we open a SOCK_DGRAM socket for the cooked interface, otherwise 2316 * we open a SOCK_RAW socket for the raw interface. 2317 * 2318 * The protocol is set to 0. This means we will receive no 2319 * packets until we "bind" the socket with a non-zero 2320 * protocol. This allows us to setup the ring buffers without 2321 * dropping any packets. 2322 */ 2323 sock_fd = is_any_device ? 2324 socket(PF_PACKET, SOCK_DGRAM, 0) : 2325 socket(PF_PACKET, SOCK_RAW, 0); 2326 2327 if (sock_fd == -1) { 2328 if (errno == EPERM || errno == EACCES) { 2329 /* 2330 * You don't have permission to open the 2331 * socket. 2332 */ 2333 status = PCAP_ERROR_PERM_DENIED; 2334 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2335 "Attempt to create packet socket failed - CAP_NET_RAW may be required"); 2336 } else { 2337 /* 2338 * Other error. 2339 */ 2340 status = PCAP_ERROR; 2341 } 2342 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2343 errno, "socket"); 2344 return status; 2345 } 2346 2347 /* 2348 * Get the interface index of the loopback device. 2349 * If the attempt fails, don't fail, just set the 2350 * "handlep->lo_ifindex" to -1. 2351 * 2352 * XXX - can there be more than one device that loops 2353 * packets back, i.e. devices other than "lo"? If so, 2354 * we'd need to find them all, and have an array of 2355 * indices for them, and check all of them in 2356 * "pcap_read_packet()". 2357 */ 2358 handlep->lo_ifindex = iface_get_id(sock_fd, "lo", handle->errbuf); 2359 2360 /* 2361 * Default value for offset to align link-layer payload 2362 * on a 4-byte boundary. 2363 */ 2364 handle->offset = 0; 2365 2366 /* 2367 * What kind of frames do we have to deal with? Fall back 2368 * to cooked mode if we have an unknown interface type 2369 * or a type we know doesn't work well in raw mode. 2370 */ 2371 if (!is_any_device) { 2372 /* Assume for now we don't need cooked mode. */ 2373 handlep->cooked = 0; 2374 2375 if (handle->opt.rfmon) { 2376 /* 2377 * We were asked to turn on monitor mode. 2378 * Do so before we get the link-layer type, 2379 * because entering monitor mode could change 2380 * the link-layer type. 2381 */ 2382 err = enter_rfmon_mode(handle, sock_fd, device); 2383 if (err < 0) { 2384 /* Hard failure */ 2385 close(sock_fd); 2386 return err; 2387 } 2388 if (err == 0) { 2389 /* 2390 * Nothing worked for turning monitor mode 2391 * on. 2392 */ 2393 close(sock_fd); 2394 return PCAP_ERROR_RFMON_NOTSUP; 2395 } 2396 2397 /* 2398 * Either monitor mode has been turned on for 2399 * the device, or we've been given a different 2400 * device to open for monitor mode. If we've 2401 * been given a different device, use it. 2402 */ 2403 if (handlep->mondevice != NULL) 2404 device = handlep->mondevice; 2405 } 2406 arptype = iface_get_arptype(sock_fd, device, handle->errbuf); 2407 if (arptype < 0) { 2408 close(sock_fd); 2409 return arptype; 2410 } 2411 map_arphrd_to_dlt(handle, arptype, device, 1); 2412 if (handle->linktype == -1 || 2413 handle->linktype == DLT_LINUX_SLL || 2414 handle->linktype == DLT_LINUX_IRDA || 2415 handle->linktype == DLT_LINUX_LAPD || 2416 handle->linktype == DLT_NETLINK || 2417 (handle->linktype == DLT_EN10MB && 2418 (strncmp("isdn", device, 4) == 0 || 2419 strncmp("isdY", device, 4) == 0))) { 2420 /* 2421 * Unknown interface type (-1), or a 2422 * device we explicitly chose to run 2423 * in cooked mode (e.g., PPP devices), 2424 * or an ISDN device (whose link-layer 2425 * type we can only determine by using 2426 * APIs that may be different on different 2427 * kernels) - reopen in cooked mode. 2428 * 2429 * If the type is unknown, return a warning; 2430 * map_arphrd_to_dlt() has already set the 2431 * warning message. 2432 */ 2433 if (close(sock_fd) == -1) { 2434 pcap_fmt_errmsg_for_errno(handle->errbuf, 2435 PCAP_ERRBUF_SIZE, errno, "close"); 2436 return PCAP_ERROR; 2437 } 2438 sock_fd = socket(PF_PACKET, SOCK_DGRAM, 0); 2439 if (sock_fd < 0) { 2440 /* 2441 * Fatal error. We treat this as 2442 * a generic error; we already know 2443 * that we were able to open a 2444 * PF_PACKET/SOCK_RAW socket, so 2445 * any failure is a "this shouldn't 2446 * happen" case. 2447 */ 2448 pcap_fmt_errmsg_for_errno(handle->errbuf, 2449 PCAP_ERRBUF_SIZE, errno, "socket"); 2450 return PCAP_ERROR; 2451 } 2452 handlep->cooked = 1; 2453 2454 /* 2455 * Get rid of any link-layer type list 2456 * we allocated - this only supports cooked 2457 * capture. 2458 */ 2459 if (handle->dlt_list != NULL) { 2460 free(handle->dlt_list); 2461 handle->dlt_list = NULL; 2462 handle->dlt_count = 0; 2463 set_dlt_list_cooked(handle); 2464 } 2465 2466 if (handle->linktype == -1) { 2467 /* 2468 * Warn that we're falling back on 2469 * cooked mode; we may want to 2470 * update "map_arphrd_to_dlt()" 2471 * to handle the new type. 2472 */ 2473 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2474 "arptype %d not " 2475 "supported by libpcap - " 2476 "falling back to cooked " 2477 "socket", 2478 arptype); 2479 } 2480 2481 /* 2482 * IrDA capture is not a real "cooked" capture, 2483 * it's IrLAP frames, not IP packets. The 2484 * same applies to LAPD capture. 2485 */ 2486 if (handle->linktype != DLT_LINUX_IRDA && 2487 handle->linktype != DLT_LINUX_LAPD && 2488 handle->linktype != DLT_NETLINK) 2489 handle->linktype = DLT_LINUX_SLL; 2490 if (handle->linktype == -1) { 2491 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2492 "unknown arptype %d, defaulting to cooked mode", 2493 arptype); 2494 status = PCAP_WARNING; 2495 } 2496 } 2497 2498 handlep->ifindex = iface_get_id(sock_fd, device, 2499 handle->errbuf); 2500 if (handlep->ifindex == -1) { 2501 close(sock_fd); 2502 return PCAP_ERROR; 2503 } 2504 2505 if ((err = iface_bind(sock_fd, handlep->ifindex, 2506 handle->errbuf, 0)) != 0) { 2507 close(sock_fd); 2508 return err; 2509 } 2510 } else { 2511 /* 2512 * The "any" device. 2513 */ 2514 if (handle->opt.rfmon) { 2515 /* 2516 * It doesn't support monitor mode. 2517 */ 2518 close(sock_fd); 2519 return PCAP_ERROR_RFMON_NOTSUP; 2520 } 2521 2522 /* 2523 * It uses cooked mode. 2524 */ 2525 handlep->cooked = 1; 2526 handle->linktype = DLT_LINUX_SLL; 2527 handle->dlt_list = NULL; 2528 handle->dlt_count = 0; 2529 set_dlt_list_cooked(handle); 2530 2531 /* 2532 * We're not bound to a device. 2533 * For now, we're using this as an indication 2534 * that we can't transmit; stop doing that only 2535 * if we figure out how to transmit in cooked 2536 * mode. 2537 */ 2538 handlep->ifindex = -1; 2539 } 2540 2541 /* 2542 * Select promiscuous mode on if "promisc" is set. 2543 * 2544 * Do not turn allmulti mode on if we don't select 2545 * promiscuous mode - on some devices (e.g., Orinoco 2546 * wireless interfaces), allmulti mode isn't supported 2547 * and the driver implements it by turning promiscuous 2548 * mode on, and that screws up the operation of the 2549 * card as a normal networking interface, and on no 2550 * other platform I know of does starting a non- 2551 * promiscuous capture affect which multicast packets 2552 * are received by the interface. 2553 */ 2554 2555 /* 2556 * Hmm, how can we set promiscuous mode on all interfaces? 2557 * I am not sure if that is possible at all. For now, we 2558 * silently ignore attempts to turn promiscuous mode on 2559 * for the "any" device (so you don't have to explicitly 2560 * disable it in programs such as tcpdump). 2561 */ 2562 2563 if (!is_any_device && handle->opt.promisc) { 2564 memset(&mr, 0, sizeof(mr)); 2565 mr.mr_ifindex = handlep->ifindex; 2566 mr.mr_type = PACKET_MR_PROMISC; 2567 if (setsockopt(sock_fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, 2568 &mr, sizeof(mr)) == -1) { 2569 pcap_fmt_errmsg_for_errno(handle->errbuf, 2570 PCAP_ERRBUF_SIZE, errno, "setsockopt (PACKET_ADD_MEMBERSHIP)"); 2571 close(sock_fd); 2572 return PCAP_ERROR; 2573 } 2574 } 2575 2576 /* 2577 * Enable auxiliary data and reserve room for reconstructing 2578 * VLAN headers. 2579 * 2580 * XXX - is enabling auxiliary data necessary, now that we 2581 * only support memory-mapped capture? The kernel's memory-mapped 2582 * capture code doesn't seem to check whether auxiliary data 2583 * is enabled, it seems to provide it whether it is or not. 2584 */ 2585 val = 1; 2586 if (setsockopt(sock_fd, SOL_PACKET, PACKET_AUXDATA, &val, 2587 sizeof(val)) == -1 && errno != ENOPROTOOPT) { 2588 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2589 errno, "setsockopt (PACKET_AUXDATA)"); 2590 close(sock_fd); 2591 return PCAP_ERROR; 2592 } 2593 handle->offset += VLAN_TAG_LEN; 2594 2595 /* 2596 * If we're in cooked mode, make the snapshot length 2597 * large enough to hold a "cooked mode" header plus 2598 * 1 byte of packet data (so we don't pass a byte 2599 * count of 0 to "recvfrom()"). 2600 * XXX - we don't know whether this will be DLT_LINUX_SLL 2601 * or DLT_LINUX_SLL2, so make sure it's big enough for 2602 * a DLT_LINUX_SLL2 "cooked mode" header; a snapshot length 2603 * that small is silly anyway. 2604 */ 2605 if (handlep->cooked) { 2606 if (handle->snapshot < SLL2_HDR_LEN + 1) 2607 handle->snapshot = SLL2_HDR_LEN + 1; 2608 } 2609 handle->bufsize = handle->snapshot; 2610 2611 /* 2612 * Set the offset at which to insert VLAN tags. 2613 */ 2614 set_vlan_offset(handle); 2615 2616 if (handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO) { 2617 int nsec_tstamps = 1; 2618 2619 if (setsockopt(sock_fd, SOL_SOCKET, SO_TIMESTAMPNS, &nsec_tstamps, sizeof(nsec_tstamps)) < 0) { 2620 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "setsockopt: unable to set SO_TIMESTAMPNS"); 2621 close(sock_fd); 2622 return PCAP_ERROR; 2623 } 2624 } 2625 2626 /* 2627 * We've succeeded. Save the socket FD in the pcap structure. 2628 */ 2629 handle->fd = sock_fd; 2630 2631 #if defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT) 2632 /* 2633 * Can we generate special code for VLAN checks? 2634 * (XXX - what if we need the special code but it's not supported 2635 * by the OS? Is that possible?) 2636 */ 2637 if (getsockopt(sock_fd, SOL_SOCKET, SO_BPF_EXTENSIONS, 2638 &bpf_extensions, &len) == 0) { 2639 if (bpf_extensions >= SKF_AD_VLAN_TAG_PRESENT) { 2640 /* 2641 * Yes, we can. Request that we do so. 2642 */ 2643 handle->bpf_codegen_flags |= BPF_SPECIAL_VLAN_HANDLING; 2644 } 2645 } 2646 #endif /* defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT) */ 2647 2648 return status; 2649 } 2650 2651 /* 2652 * Attempt to setup memory-mapped access. 2653 * 2654 * On success, returns 1, and sets *status to 0 if there are no warnings 2655 * or to a PCAP_WARNING_ code if there is a warning. 2656 * 2657 * On error, returns -1, and sets *status to the appropriate error code; 2658 * if that is PCAP_ERROR, sets handle->errbuf to the appropriate message. 2659 */ 2660 static int 2661 setup_mmapped(pcap_t *handle, int *status) 2662 { 2663 struct pcap_linux *handlep = handle->priv; 2664 int ret; 2665 2666 /* 2667 * Attempt to allocate a buffer to hold the contents of one 2668 * packet, for use by the oneshot callback. 2669 */ 2670 handlep->oneshot_buffer = malloc(handle->snapshot); 2671 if (handlep->oneshot_buffer == NULL) { 2672 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2673 errno, "can't allocate oneshot buffer"); 2674 *status = PCAP_ERROR; 2675 return -1; 2676 } 2677 2678 if (handle->opt.buffer_size == 0) { 2679 /* by default request 2M for the ring buffer */ 2680 handle->opt.buffer_size = 2*1024*1024; 2681 } 2682 ret = prepare_tpacket_socket(handle); 2683 if (ret == -1) { 2684 free(handlep->oneshot_buffer); 2685 handlep->oneshot_buffer = NULL; 2686 *status = PCAP_ERROR; 2687 return ret; 2688 } 2689 ret = create_ring(handle, status); 2690 if (ret == -1) { 2691 /* 2692 * Error attempting to enable memory-mapped capture; 2693 * fail. create_ring() has set *status. 2694 */ 2695 free(handlep->oneshot_buffer); 2696 handlep->oneshot_buffer = NULL; 2697 return -1; 2698 } 2699 2700 /* 2701 * Success. *status has been set either to 0 if there are no 2702 * warnings or to a PCAP_WARNING_ value if there is a warning. 2703 * 2704 * handle->offset is used to get the current position into the rx ring. 2705 * handle->cc is used to store the ring size. 2706 */ 2707 2708 /* 2709 * Set the timeout to use in poll() before returning. 2710 */ 2711 set_poll_timeout(handlep); 2712 2713 return 1; 2714 } 2715 2716 /* 2717 * Attempt to set the socket to the specified version of the memory-mapped 2718 * header. 2719 * 2720 * Return 0 if we succeed; return 1 if we fail because that version isn't 2721 * supported; return -1 on any other error, and set handle->errbuf. 2722 */ 2723 static int 2724 init_tpacket(pcap_t *handle, int version, const char *version_str) 2725 { 2726 struct pcap_linux *handlep = handle->priv; 2727 int val = version; 2728 socklen_t len = sizeof(val); 2729 2730 /* 2731 * Probe whether kernel supports the specified TPACKET version; 2732 * this also gets the length of the header for that version. 2733 * 2734 * This socket option was introduced in 2.6.27, which was 2735 * also the first release with TPACKET_V2 support. 2736 */ 2737 if (getsockopt(handle->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) { 2738 if (errno == EINVAL) { 2739 /* 2740 * EINVAL means this specific version of TPACKET 2741 * is not supported. Tell the caller they can try 2742 * with a different one; if they've run out of 2743 * others to try, let them set the error message 2744 * appropriately. 2745 */ 2746 return 1; 2747 } 2748 2749 /* 2750 * All other errors are fatal. 2751 */ 2752 if (errno == ENOPROTOOPT) { 2753 /* 2754 * PACKET_HDRLEN isn't supported, which means 2755 * that memory-mapped capture isn't supported. 2756 * Indicate that in the message. 2757 */ 2758 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2759 "Kernel doesn't support memory-mapped capture; a 2.6.27 or later 2.x kernel is required, with CONFIG_PACKET_MMAP specified for 2.x kernels"); 2760 } else { 2761 /* 2762 * Some unexpected error. 2763 */ 2764 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2765 errno, "can't get %s header len on packet socket", 2766 version_str); 2767 } 2768 return -1; 2769 } 2770 handlep->tp_hdrlen = val; 2771 2772 val = version; 2773 if (setsockopt(handle->fd, SOL_PACKET, PACKET_VERSION, &val, 2774 sizeof(val)) < 0) { 2775 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2776 errno, "can't activate %s on packet socket", version_str); 2777 return -1; 2778 } 2779 handlep->tp_version = version; 2780 2781 return 0; 2782 } 2783 2784 /* 2785 * Attempt to set the socket to version 3 of the memory-mapped header and, 2786 * if that fails because version 3 isn't supported, attempt to fall 2787 * back to version 2. If version 2 isn't supported, just fail. 2788 * 2789 * Return 0 if we succeed and -1 on any other error, and set handle->errbuf. 2790 */ 2791 static int 2792 prepare_tpacket_socket(pcap_t *handle) 2793 { 2794 int ret; 2795 2796 #ifdef HAVE_TPACKET3 2797 /* 2798 * Try setting the version to TPACKET_V3. 2799 * 2800 * The only mode in which buffering is done on PF_PACKET 2801 * sockets, so that packets might not be delivered 2802 * immediately, is TPACKET_V3 mode. 2803 * 2804 * The buffering cannot be disabled in that mode, so 2805 * if the user has requested immediate mode, we don't 2806 * use TPACKET_V3. 2807 */ 2808 if (!handle->opt.immediate) { 2809 ret = init_tpacket(handle, TPACKET_V3, "TPACKET_V3"); 2810 if (ret == 0) { 2811 /* 2812 * Success. 2813 */ 2814 return 0; 2815 } 2816 if (ret == -1) { 2817 /* 2818 * We failed for some reason other than "the 2819 * kernel doesn't support TPACKET_V3". 2820 */ 2821 return -1; 2822 } 2823 2824 /* 2825 * This means it returned 1, which means "the kernel 2826 * doesn't support TPACKET_V3"; try TPACKET_V2. 2827 */ 2828 } 2829 #endif /* HAVE_TPACKET3 */ 2830 2831 /* 2832 * Try setting the version to TPACKET_V2. 2833 */ 2834 ret = init_tpacket(handle, TPACKET_V2, "TPACKET_V2"); 2835 if (ret == 0) { 2836 /* 2837 * Success. 2838 */ 2839 return 0; 2840 } 2841 2842 if (ret == 1) { 2843 /* 2844 * OK, the kernel supports memory-mapped capture, but 2845 * not TPACKET_V2. Set the error message appropriately. 2846 */ 2847 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2848 "Kernel doesn't support TPACKET_V2; a 2.6.27 or later kernel is required"); 2849 } 2850 2851 /* 2852 * We failed. 2853 */ 2854 return -1; 2855 } 2856 2857 #define MAX(a,b) ((a)>(b)?(a):(b)) 2858 2859 /* 2860 * Attempt to set up memory-mapped access. 2861 * 2862 * On success, returns 1, and sets *status to 0 if there are no warnings 2863 * or to a PCAP_WARNING_ code if there is a warning. 2864 * 2865 * On error, returns -1, and sets *status to the appropriate error code; 2866 * if that is PCAP_ERROR, sets handle->errbuf to the appropriate message. 2867 */ 2868 static int 2869 create_ring(pcap_t *handle, int *status) 2870 { 2871 struct pcap_linux *handlep = handle->priv; 2872 unsigned i, j, frames_per_block; 2873 #ifdef HAVE_TPACKET3 2874 /* 2875 * For sockets using TPACKET_V2, the extra stuff at the end of a 2876 * struct tpacket_req3 will be ignored, so this is OK even for 2877 * those sockets. 2878 */ 2879 struct tpacket_req3 req; 2880 #else 2881 struct tpacket_req req; 2882 #endif 2883 socklen_t len; 2884 unsigned int sk_type, tp_reserve, maclen, tp_hdrlen, netoff, macoff; 2885 unsigned int frame_size; 2886 2887 /* 2888 * Start out assuming no warnings or errors. 2889 */ 2890 *status = 0; 2891 2892 /* 2893 * Reserve space for VLAN tag reconstruction. 2894 */ 2895 tp_reserve = VLAN_TAG_LEN; 2896 2897 /* 2898 * If we're capturing in cooked mode, reserve space for 2899 * a DLT_LINUX_SLL2 header; we don't know yet whether 2900 * we'll be using DLT_LINUX_SLL or DLT_LINUX_SLL2, as 2901 * that can be changed on an open device, so we reserve 2902 * space for the larger of the two. 2903 * 2904 * XXX - we assume that the kernel is still adding 2905 * 16 bytes of extra space, so we subtract 16 from 2906 * SLL2_HDR_LEN to get the additional space needed. 2907 * (Are they doing that for DLT_LINUX_SLL, the link- 2908 * layer header for which is 16 bytes?) 2909 * 2910 * XXX - should we use TPACKET_ALIGN(SLL2_HDR_LEN - 16)? 2911 */ 2912 if (handlep->cooked) 2913 tp_reserve += SLL2_HDR_LEN - 16; 2914 2915 /* 2916 * Try to request that amount of reserve space. 2917 * This must be done before creating the ring buffer. 2918 */ 2919 len = sizeof(tp_reserve); 2920 if (setsockopt(handle->fd, SOL_PACKET, PACKET_RESERVE, 2921 &tp_reserve, len) < 0) { 2922 pcap_fmt_errmsg_for_errno(handle->errbuf, 2923 PCAP_ERRBUF_SIZE, errno, 2924 "setsockopt (PACKET_RESERVE)"); 2925 *status = PCAP_ERROR; 2926 return -1; 2927 } 2928 2929 switch (handlep->tp_version) { 2930 2931 case TPACKET_V2: 2932 /* Note that with large snapshot length (say 256K, which is 2933 * the default for recent versions of tcpdump, Wireshark, 2934 * TShark, dumpcap or 64K, the value that "-s 0" has given for 2935 * a long time with tcpdump), if we use the snapshot 2936 * length to calculate the frame length, only a few frames 2937 * will be available in the ring even with pretty 2938 * large ring size (and a lot of memory will be unused). 2939 * 2940 * Ideally, we should choose a frame length based on the 2941 * minimum of the specified snapshot length and the maximum 2942 * packet size. That's not as easy as it sounds; consider, 2943 * for example, an 802.11 interface in monitor mode, where 2944 * the frame would include a radiotap header, where the 2945 * maximum radiotap header length is device-dependent. 2946 * 2947 * So, for now, we just do this for Ethernet devices, where 2948 * there's no metadata header, and the link-layer header is 2949 * fixed length. We can get the maximum packet size by 2950 * adding 18, the Ethernet header length plus the CRC length 2951 * (just in case we happen to get the CRC in the packet), to 2952 * the MTU of the interface; we fetch the MTU in the hopes 2953 * that it reflects support for jumbo frames. (Even if the 2954 * interface is just being used for passive snooping, the 2955 * driver might set the size of buffers in the receive ring 2956 * based on the MTU, so that the MTU limits the maximum size 2957 * of packets that we can receive.) 2958 * 2959 * If segmentation/fragmentation or receive offload are 2960 * enabled, we can get reassembled/aggregated packets larger 2961 * than MTU, but bounded to 65535 plus the Ethernet overhead, 2962 * due to kernel and protocol constraints */ 2963 frame_size = handle->snapshot; 2964 if (handle->linktype == DLT_EN10MB) { 2965 unsigned int max_frame_len; 2966 int mtu; 2967 int offload; 2968 2969 mtu = iface_get_mtu(handle->fd, handle->opt.device, 2970 handle->errbuf); 2971 if (mtu == -1) { 2972 *status = PCAP_ERROR; 2973 return -1; 2974 } 2975 offload = iface_get_offload(handle); 2976 if (offload == -1) { 2977 *status = PCAP_ERROR; 2978 return -1; 2979 } 2980 if (offload) 2981 max_frame_len = MAX(mtu, 65535); 2982 else 2983 max_frame_len = mtu; 2984 max_frame_len += 18; 2985 2986 if (frame_size > max_frame_len) 2987 frame_size = max_frame_len; 2988 } 2989 2990 /* NOTE: calculus matching those in tpacket_rcv() 2991 * in linux-2.6/net/packet/af_packet.c 2992 */ 2993 len = sizeof(sk_type); 2994 if (getsockopt(handle->fd, SOL_SOCKET, SO_TYPE, &sk_type, 2995 &len) < 0) { 2996 pcap_fmt_errmsg_for_errno(handle->errbuf, 2997 PCAP_ERRBUF_SIZE, errno, "getsockopt (SO_TYPE)"); 2998 *status = PCAP_ERROR; 2999 return -1; 3000 } 3001 maclen = (sk_type == SOCK_DGRAM) ? 0 : MAX_LINKHEADER_SIZE; 3002 /* XXX: in the kernel maclen is calculated from 3003 * LL_ALLOCATED_SPACE(dev) and vnet_hdr.hdr_len 3004 * in: packet_snd() in linux-2.6/net/packet/af_packet.c 3005 * then packet_alloc_skb() in linux-2.6/net/packet/af_packet.c 3006 * then sock_alloc_send_pskb() in linux-2.6/net/core/sock.c 3007 * but I see no way to get those sizes in userspace, 3008 * like for instance with an ifreq ioctl(); 3009 * the best thing I've found so far is MAX_HEADER in 3010 * the kernel part of linux-2.6/include/linux/netdevice.h 3011 * which goes up to 128+48=176; since pcap-linux.c 3012 * defines a MAX_LINKHEADER_SIZE of 256 which is 3013 * greater than that, let's use it.. maybe is it even 3014 * large enough to directly replace macoff.. 3015 */ 3016 tp_hdrlen = TPACKET_ALIGN(handlep->tp_hdrlen) + sizeof(struct sockaddr_ll) ; 3017 netoff = TPACKET_ALIGN(tp_hdrlen + (maclen < 16 ? 16 : maclen)) + tp_reserve; 3018 /* NOTE: AFAICS tp_reserve may break the TPACKET_ALIGN 3019 * of netoff, which contradicts 3020 * linux-2.6/Documentation/networking/packet_mmap.txt 3021 * documenting that: 3022 * "- Gap, chosen so that packet data (Start+tp_net) 3023 * aligns to TPACKET_ALIGNMENT=16" 3024 */ 3025 /* NOTE: in linux-2.6/include/linux/skbuff.h: 3026 * "CPUs often take a performance hit 3027 * when accessing unaligned memory locations" 3028 */ 3029 macoff = netoff - maclen; 3030 req.tp_frame_size = TPACKET_ALIGN(macoff + frame_size); 3031 /* 3032 * Round the buffer size up to a multiple of the 3033 * frame size (rather than rounding down, which 3034 * would give a buffer smaller than our caller asked 3035 * for, and possibly give zero frames if the requested 3036 * buffer size is too small for one frame). 3037 */ 3038 req.tp_frame_nr = (handle->opt.buffer_size + req.tp_frame_size - 1)/req.tp_frame_size; 3039 break; 3040 3041 #ifdef HAVE_TPACKET3 3042 case TPACKET_V3: 3043 /* The "frames" for this are actually buffers that 3044 * contain multiple variable-sized frames. 3045 * 3046 * We pick a "frame" size of MAXIMUM_SNAPLEN to leave 3047 * enough room for at least one reasonably-sized packet 3048 * in the "frame". */ 3049 req.tp_frame_size = MAXIMUM_SNAPLEN; 3050 /* 3051 * Round the buffer size up to a multiple of the 3052 * "frame" size (rather than rounding down, which 3053 * would give a buffer smaller than our caller asked 3054 * for, and possibly give zero "frames" if the requested 3055 * buffer size is too small for one "frame"). 3056 */ 3057 req.tp_frame_nr = (handle->opt.buffer_size + req.tp_frame_size - 1)/req.tp_frame_size; 3058 break; 3059 #endif 3060 default: 3061 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3062 "Internal error: unknown TPACKET_ value %u", 3063 handlep->tp_version); 3064 *status = PCAP_ERROR; 3065 return -1; 3066 } 3067 3068 /* compute the minimum block size that will handle this frame. 3069 * The block has to be page size aligned. 3070 * The max block size allowed by the kernel is arch-dependent and 3071 * it's not explicitly checked here. */ 3072 req.tp_block_size = getpagesize(); 3073 while (req.tp_block_size < req.tp_frame_size) 3074 req.tp_block_size <<= 1; 3075 3076 frames_per_block = req.tp_block_size/req.tp_frame_size; 3077 3078 /* 3079 * PACKET_TIMESTAMP was added after linux/net_tstamp.h was, 3080 * so we check for PACKET_TIMESTAMP. We check for 3081 * linux/net_tstamp.h just in case a system somehow has 3082 * PACKET_TIMESTAMP but not linux/net_tstamp.h; that might 3083 * be unnecessary. 3084 * 3085 * SIOCSHWTSTAMP was introduced in the patch that introduced 3086 * linux/net_tstamp.h, so we don't bother checking whether 3087 * SIOCSHWTSTAMP is defined (if your Linux system has 3088 * linux/net_tstamp.h but doesn't define SIOCSHWTSTAMP, your 3089 * Linux system is badly broken). 3090 */ 3091 #if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) 3092 /* 3093 * If we were told to do so, ask the kernel and the driver 3094 * to use hardware timestamps. 3095 * 3096 * Hardware timestamps are only supported with mmapped 3097 * captures. 3098 */ 3099 if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER || 3100 handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER_UNSYNCED) { 3101 struct hwtstamp_config hwconfig; 3102 struct ifreq ifr; 3103 int timesource; 3104 3105 /* 3106 * Ask for hardware time stamps on all packets, 3107 * including transmitted packets. 3108 */ 3109 memset(&hwconfig, 0, sizeof(hwconfig)); 3110 hwconfig.tx_type = HWTSTAMP_TX_ON; 3111 hwconfig.rx_filter = HWTSTAMP_FILTER_ALL; 3112 3113 memset(&ifr, 0, sizeof(ifr)); 3114 pcap_strlcpy(ifr.ifr_name, handle->opt.device, sizeof(ifr.ifr_name)); 3115 ifr.ifr_data = (void *)&hwconfig; 3116 3117 /* 3118 * This may require CAP_NET_ADMIN. 3119 */ 3120 if (ioctl(handle->fd, SIOCSHWTSTAMP, &ifr) < 0) { 3121 switch (errno) { 3122 3123 case EPERM: 3124 /* 3125 * Treat this as an error, as the 3126 * user should try to run this 3127 * with the appropriate privileges - 3128 * and, if they can't, shouldn't 3129 * try requesting hardware time stamps. 3130 */ 3131 *status = PCAP_ERROR_PERM_DENIED; 3132 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3133 "Attempt to set hardware timestamp failed - CAP_NET_ADMIN may be required"); 3134 return -1; 3135 3136 case EOPNOTSUPP: 3137 case ERANGE: 3138 /* 3139 * Treat this as a warning, as the 3140 * only way to fix the warning is to 3141 * get an adapter that supports hardware 3142 * time stamps for *all* packets. 3143 * (ERANGE means "we support hardware 3144 * time stamps, but for packets matching 3145 * that particular filter", so it means 3146 * "we don't support hardware time stamps 3147 * for all incoming packets" here.) 3148 * 3149 * We'll just fall back on the standard 3150 * host time stamps. 3151 */ 3152 *status = PCAP_WARNING_TSTAMP_TYPE_NOTSUP; 3153 break; 3154 3155 default: 3156 pcap_fmt_errmsg_for_errno(handle->errbuf, 3157 PCAP_ERRBUF_SIZE, errno, 3158 "SIOCSHWTSTAMP failed"); 3159 *status = PCAP_ERROR; 3160 return -1; 3161 } 3162 } else { 3163 /* 3164 * Well, that worked. Now specify the type of 3165 * hardware time stamp we want for this 3166 * socket. 3167 */ 3168 if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER) { 3169 /* 3170 * Hardware timestamp, synchronized 3171 * with the system clock. 3172 */ 3173 timesource = SOF_TIMESTAMPING_SYS_HARDWARE; 3174 } else { 3175 /* 3176 * PCAP_TSTAMP_ADAPTER_UNSYNCED - hardware 3177 * timestamp, not synchronized with the 3178 * system clock. 3179 */ 3180 timesource = SOF_TIMESTAMPING_RAW_HARDWARE; 3181 } 3182 if (setsockopt(handle->fd, SOL_PACKET, PACKET_TIMESTAMP, 3183 (void *)×ource, sizeof(timesource))) { 3184 pcap_fmt_errmsg_for_errno(handle->errbuf, 3185 PCAP_ERRBUF_SIZE, errno, 3186 "can't set PACKET_TIMESTAMP"); 3187 *status = PCAP_ERROR; 3188 return -1; 3189 } 3190 } 3191 } 3192 #endif /* HAVE_LINUX_NET_TSTAMP_H && PACKET_TIMESTAMP */ 3193 3194 /* ask the kernel to create the ring */ 3195 retry: 3196 req.tp_block_nr = req.tp_frame_nr / frames_per_block; 3197 3198 /* req.tp_frame_nr is requested to match frames_per_block*req.tp_block_nr */ 3199 req.tp_frame_nr = req.tp_block_nr * frames_per_block; 3200 3201 #ifdef HAVE_TPACKET3 3202 /* timeout value to retire block - use the configured buffering timeout, or default if <0. */ 3203 if (handlep->timeout > 0) { 3204 /* Use the user specified timeout as the block timeout */ 3205 req.tp_retire_blk_tov = handlep->timeout; 3206 } else if (handlep->timeout == 0) { 3207 /* 3208 * In pcap, this means "infinite timeout"; TPACKET_V3 3209 * doesn't support that, so just set it to UINT_MAX 3210 * milliseconds. In the TPACKET_V3 loop, if the 3211 * timeout is 0, and we haven't yet seen any packets, 3212 * and we block and still don't have any packets, we 3213 * keep blocking until we do. 3214 */ 3215 req.tp_retire_blk_tov = UINT_MAX; 3216 } else { 3217 /* 3218 * XXX - this is not valid; use 0, meaning "have the 3219 * kernel pick a default", for now. 3220 */ 3221 req.tp_retire_blk_tov = 0; 3222 } 3223 /* private data not used */ 3224 req.tp_sizeof_priv = 0; 3225 /* Rx ring - feature request bits - none (rxhash will not be filled) */ 3226 req.tp_feature_req_word = 0; 3227 #endif 3228 3229 if (setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING, 3230 (void *) &req, sizeof(req))) { 3231 if ((errno == ENOMEM) && (req.tp_block_nr > 1)) { 3232 /* 3233 * Memory failure; try to reduce the requested ring 3234 * size. 3235 * 3236 * We used to reduce this by half -- do 5% instead. 3237 * That may result in more iterations and a longer 3238 * startup, but the user will be much happier with 3239 * the resulting buffer size. 3240 */ 3241 if (req.tp_frame_nr < 20) 3242 req.tp_frame_nr -= 1; 3243 else 3244 req.tp_frame_nr -= req.tp_frame_nr/20; 3245 goto retry; 3246 } 3247 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 3248 errno, "can't create rx ring on packet socket"); 3249 *status = PCAP_ERROR; 3250 return -1; 3251 } 3252 3253 /* memory map the rx ring */ 3254 handlep->mmapbuflen = req.tp_block_nr * req.tp_block_size; 3255 handlep->mmapbuf = mmap(0, handlep->mmapbuflen, 3256 PROT_READ|PROT_WRITE, MAP_SHARED, handle->fd, 0); 3257 if (handlep->mmapbuf == MAP_FAILED) { 3258 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 3259 errno, "can't mmap rx ring"); 3260 3261 /* clear the allocated ring on error*/ 3262 destroy_ring(handle); 3263 *status = PCAP_ERROR; 3264 return -1; 3265 } 3266 3267 /* allocate a ring for each frame header pointer*/ 3268 handle->cc = req.tp_frame_nr; 3269 handle->buffer = malloc(handle->cc * sizeof(union thdr *)); 3270 if (!handle->buffer) { 3271 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 3272 errno, "can't allocate ring of frame headers"); 3273 3274 destroy_ring(handle); 3275 *status = PCAP_ERROR; 3276 return -1; 3277 } 3278 3279 /* fill the header ring with proper frame ptr*/ 3280 handle->offset = 0; 3281 for (i=0; i<req.tp_block_nr; ++i) { 3282 u_char *base = &handlep->mmapbuf[i*req.tp_block_size]; 3283 for (j=0; j<frames_per_block; ++j, ++handle->offset) { 3284 RING_GET_CURRENT_FRAME(handle) = base; 3285 base += req.tp_frame_size; 3286 } 3287 } 3288 3289 handle->bufsize = req.tp_frame_size; 3290 handle->offset = 0; 3291 return 1; 3292 } 3293 3294 /* free all ring related resources*/ 3295 static void 3296 destroy_ring(pcap_t *handle) 3297 { 3298 struct pcap_linux *handlep = handle->priv; 3299 3300 /* 3301 * Tell the kernel to destroy the ring. 3302 * We don't check for setsockopt failure, as 1) we can't recover 3303 * from an error and 2) we might not yet have set it up in the 3304 * first place. 3305 */ 3306 struct tpacket_req req; 3307 memset(&req, 0, sizeof(req)); 3308 (void)setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING, 3309 (void *) &req, sizeof(req)); 3310 3311 /* if ring is mapped, unmap it*/ 3312 if (handlep->mmapbuf) { 3313 /* do not test for mmap failure, as we can't recover from any error */ 3314 (void)munmap(handlep->mmapbuf, handlep->mmapbuflen); 3315 handlep->mmapbuf = NULL; 3316 } 3317 } 3318 3319 /* 3320 * Special one-shot callback, used for pcap_next() and pcap_next_ex(), 3321 * for Linux mmapped capture. 3322 * 3323 * The problem is that pcap_next() and pcap_next_ex() expect the packet 3324 * data handed to the callback to be valid after the callback returns, 3325 * but pcap_read_linux_mmap() has to release that packet as soon as 3326 * the callback returns (otherwise, the kernel thinks there's still 3327 * at least one unprocessed packet available in the ring, so a select() 3328 * will immediately return indicating that there's data to process), so, 3329 * in the callback, we have to make a copy of the packet. 3330 * 3331 * Yes, this means that, if the capture is using the ring buffer, using 3332 * pcap_next() or pcap_next_ex() requires more copies than using 3333 * pcap_loop() or pcap_dispatch(). If that bothers you, don't use 3334 * pcap_next() or pcap_next_ex(). 3335 */ 3336 static void 3337 pcap_oneshot_linux(u_char *user, const struct pcap_pkthdr *h, 3338 const u_char *bytes) 3339 { 3340 struct oneshot_userdata *sp = (struct oneshot_userdata *)user; 3341 pcap_t *handle = sp->pd; 3342 struct pcap_linux *handlep = handle->priv; 3343 3344 *sp->hdr = *h; 3345 memcpy(handlep->oneshot_buffer, bytes, h->caplen); 3346 *sp->pkt = handlep->oneshot_buffer; 3347 } 3348 3349 static int 3350 pcap_getnonblock_linux(pcap_t *handle) 3351 { 3352 struct pcap_linux *handlep = handle->priv; 3353 3354 /* use negative value of timeout to indicate non blocking ops */ 3355 return (handlep->timeout<0); 3356 } 3357 3358 static int 3359 pcap_setnonblock_linux(pcap_t *handle, int nonblock) 3360 { 3361 struct pcap_linux *handlep = handle->priv; 3362 3363 /* 3364 * Set the file descriptor to non-blocking mode, as we use 3365 * it for sending packets. 3366 */ 3367 if (pcap_setnonblock_fd(handle, nonblock) == -1) 3368 return -1; 3369 3370 /* 3371 * Map each value to their corresponding negation to 3372 * preserve the timeout value provided with pcap_set_timeout. 3373 */ 3374 if (nonblock) { 3375 if (handlep->timeout >= 0) { 3376 /* 3377 * Indicate that we're switching to 3378 * non-blocking mode. 3379 */ 3380 handlep->timeout = ~handlep->timeout; 3381 } 3382 if (handlep->poll_breakloop_fd != -1) { 3383 /* Close the eventfd; we do not need it in nonblock mode. */ 3384 close(handlep->poll_breakloop_fd); 3385 handlep->poll_breakloop_fd = -1; 3386 } 3387 } else { 3388 if (handlep->poll_breakloop_fd == -1) { 3389 /* If we did not have an eventfd, open one now that we are blocking. */ 3390 if ( ( handlep->poll_breakloop_fd = eventfd(0, EFD_NONBLOCK) ) == -1 ) { 3391 int save_errno = errno; 3392 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3393 "Could not open eventfd: %s", 3394 strerror(errno)); 3395 errno = save_errno; 3396 return -1; 3397 } 3398 } 3399 if (handlep->timeout < 0) { 3400 handlep->timeout = ~handlep->timeout; 3401 } 3402 } 3403 /* Update the timeout to use in poll(). */ 3404 set_poll_timeout(handlep); 3405 return 0; 3406 } 3407 3408 /* 3409 * Get the status field of the ring buffer frame at a specified offset. 3410 */ 3411 static inline u_int 3412 pcap_get_ring_frame_status(pcap_t *handle, int offset) 3413 { 3414 struct pcap_linux *handlep = handle->priv; 3415 union thdr h; 3416 3417 h.raw = RING_GET_FRAME_AT(handle, offset); 3418 switch (handlep->tp_version) { 3419 case TPACKET_V2: 3420 return __atomic_load_n(&h.h2->tp_status, __ATOMIC_ACQUIRE); 3421 break; 3422 #ifdef HAVE_TPACKET3 3423 case TPACKET_V3: 3424 return __atomic_load_n(&h.h3->hdr.bh1.block_status, __ATOMIC_ACQUIRE); 3425 break; 3426 #endif 3427 } 3428 /* This should not happen. */ 3429 return 0; 3430 } 3431 3432 /* 3433 * Block waiting for frames to be available. 3434 */ 3435 static int pcap_wait_for_frames_mmap(pcap_t *handle) 3436 { 3437 struct pcap_linux *handlep = handle->priv; 3438 int timeout; 3439 struct ifreq ifr; 3440 int ret; 3441 struct pollfd pollinfo[2]; 3442 int numpollinfo; 3443 pollinfo[0].fd = handle->fd; 3444 pollinfo[0].events = POLLIN; 3445 if ( handlep->poll_breakloop_fd == -1 ) { 3446 numpollinfo = 1; 3447 pollinfo[1].revents = 0; 3448 /* 3449 * We set pollinfo[1].revents to zero, even though 3450 * numpollinfo = 1 meaning that poll() doesn't see 3451 * pollinfo[1], so that we do not have to add a 3452 * conditional of numpollinfo > 1 below when we 3453 * test pollinfo[1].revents. 3454 */ 3455 } else { 3456 pollinfo[1].fd = handlep->poll_breakloop_fd; 3457 pollinfo[1].events = POLLIN; 3458 numpollinfo = 2; 3459 } 3460 3461 /* 3462 * Keep polling until we either get some packets to read, see 3463 * that we got told to break out of the loop, get a fatal error, 3464 * or discover that the device went away. 3465 * 3466 * In non-blocking mode, we must still do one poll() to catch 3467 * any pending error indications, but the poll() has a timeout 3468 * of 0, so that it doesn't block, and we quit after that one 3469 * poll(). 3470 * 3471 * If we've seen an ENETDOWN, it might be the first indication 3472 * that the device went away, or it might just be that it was 3473 * configured down. Unfortunately, there's no guarantee that 3474 * the device has actually been removed as an interface, because: 3475 * 3476 * 1) if, as appears to be the case at least some of the time, 3477 * the PF_PACKET socket code first gets a NETDEV_DOWN indication 3478 * for the device and then gets a NETDEV_UNREGISTER indication 3479 * for it, the first indication will cause a wakeup with ENETDOWN 3480 * but won't set the packet socket's field for the interface index 3481 * to -1, and the second indication won't cause a wakeup (because 3482 * the first indication also caused the protocol hook to be 3483 * unregistered) but will set the packet socket's field for the 3484 * interface index to -1; 3485 * 3486 * 2) even if just a NETDEV_UNREGISTER indication is registered, 3487 * the packet socket's field for the interface index only gets 3488 * set to -1 after the wakeup, so there's a small but non-zero 3489 * risk that a thread blocked waiting for the wakeup will get 3490 * to the "fetch the socket name" code before the interface index 3491 * gets set to -1, so it'll get the old interface index. 3492 * 3493 * Therefore, if we got an ENETDOWN and haven't seen a packet 3494 * since then, we assume that we might be waiting for the interface 3495 * to disappear, and poll with a timeout to try again in a short 3496 * period of time. If we *do* see a packet, the interface has 3497 * come back up again, and is *definitely* still there, so we 3498 * don't need to poll. 3499 */ 3500 for (;;) { 3501 /* 3502 * Yes, we do this even in non-blocking mode, as it's 3503 * the only way to get error indications from a 3504 * tpacket socket. 3505 * 3506 * The timeout is 0 in non-blocking mode, so poll() 3507 * returns immediately. 3508 */ 3509 timeout = handlep->poll_timeout; 3510 3511 /* 3512 * If we got an ENETDOWN and haven't gotten an indication 3513 * that the device has gone away or that the device is up, 3514 * we don't yet know for certain whether the device has 3515 * gone away or not, do a poll() with a 1-millisecond timeout, 3516 * as we have to poll indefinitely for "device went away" 3517 * indications until we either get one or see that the 3518 * device is up. 3519 */ 3520 if (handlep->netdown) { 3521 if (timeout != 0) 3522 timeout = 1; 3523 } 3524 ret = poll(pollinfo, numpollinfo, timeout); 3525 if (ret < 0) { 3526 /* 3527 * Error. If it's not EINTR, report it. 3528 */ 3529 if (errno != EINTR) { 3530 pcap_fmt_errmsg_for_errno(handle->errbuf, 3531 PCAP_ERRBUF_SIZE, errno, 3532 "can't poll on packet socket"); 3533 return PCAP_ERROR; 3534 } 3535 3536 /* 3537 * It's EINTR; if we were told to break out of 3538 * the loop, do so. 3539 */ 3540 if (handle->break_loop) { 3541 handle->break_loop = 0; 3542 return PCAP_ERROR_BREAK; 3543 } 3544 } else if (ret > 0) { 3545 /* 3546 * OK, some descriptor is ready. 3547 * Check the socket descriptor first. 3548 * 3549 * As I read the Linux man page, pollinfo[0].revents 3550 * will either be POLLIN, POLLERR, POLLHUP, or POLLNVAL. 3551 */ 3552 if (pollinfo[0].revents == POLLIN) { 3553 /* 3554 * OK, we may have packets to 3555 * read. 3556 */ 3557 break; 3558 } 3559 if (pollinfo[0].revents != 0) { 3560 /* 3561 * There's some indication other than 3562 * "you can read on this descriptor" on 3563 * the descriptor. 3564 */ 3565 if (pollinfo[0].revents & POLLNVAL) { 3566 snprintf(handle->errbuf, 3567 PCAP_ERRBUF_SIZE, 3568 "Invalid polling request on packet socket"); 3569 return PCAP_ERROR; 3570 } 3571 if (pollinfo[0].revents & (POLLHUP | POLLRDHUP)) { 3572 snprintf(handle->errbuf, 3573 PCAP_ERRBUF_SIZE, 3574 "Hangup on packet socket"); 3575 return PCAP_ERROR; 3576 } 3577 if (pollinfo[0].revents & POLLERR) { 3578 /* 3579 * Get the error. 3580 */ 3581 int err; 3582 socklen_t errlen; 3583 3584 errlen = sizeof(err); 3585 if (getsockopt(handle->fd, SOL_SOCKET, 3586 SO_ERROR, &err, &errlen) == -1) { 3587 /* 3588 * The call *itself* returned 3589 * an error; make *that* 3590 * the error. 3591 */ 3592 err = errno; 3593 } 3594 3595 /* 3596 * OK, we have the error. 3597 */ 3598 if (err == ENETDOWN) { 3599 /* 3600 * The device on which we're 3601 * capturing went away or the 3602 * interface was taken down. 3603 * 3604 * We don't know for certain 3605 * which happened, and the 3606 * next poll() may indicate 3607 * that there are packets 3608 * to be read, so just set 3609 * a flag to get us to do 3610 * checks later, and set 3611 * the required select 3612 * timeout to 1 millisecond 3613 * so that event loops that 3614 * check our socket descriptor 3615 * also time out so that 3616 * they can call us and we 3617 * can do the checks. 3618 */ 3619 handlep->netdown = 1; 3620 handle->required_select_timeout = &netdown_timeout; 3621 } else if (err == 0) { 3622 /* 3623 * This shouldn't happen, so 3624 * report a special indication 3625 * that it did. 3626 */ 3627 snprintf(handle->errbuf, 3628 PCAP_ERRBUF_SIZE, 3629 "Error condition on packet socket: Reported error was 0"); 3630 return PCAP_ERROR; 3631 } else { 3632 pcap_fmt_errmsg_for_errno(handle->errbuf, 3633 PCAP_ERRBUF_SIZE, 3634 err, 3635 "Error condition on packet socket"); 3636 return PCAP_ERROR; 3637 } 3638 } 3639 } 3640 /* 3641 * Now check the event device. 3642 */ 3643 if (pollinfo[1].revents & POLLIN) { 3644 ssize_t nread; 3645 uint64_t value; 3646 3647 /* 3648 * This should never fail, but, just 3649 * in case.... 3650 */ 3651 nread = read(handlep->poll_breakloop_fd, &value, 3652 sizeof(value)); 3653 if (nread == -1) { 3654 pcap_fmt_errmsg_for_errno(handle->errbuf, 3655 PCAP_ERRBUF_SIZE, 3656 errno, 3657 "Error reading from event FD"); 3658 return PCAP_ERROR; 3659 } 3660 3661 /* 3662 * According to the Linux read(2) man 3663 * page, read() will transfer at most 3664 * 2^31-1 bytes, so the return value is 3665 * either -1 or a value between 0 3666 * and 2^31-1, so it's non-negative. 3667 * 3668 * Cast it to size_t to squelch 3669 * warnings from the compiler; add this 3670 * comment to squelch warnings from 3671 * humans reading the code. :-) 3672 * 3673 * Don't treat an EOF as an error, but 3674 * *do* treat a short read as an error; 3675 * that "shouldn't happen", but.... 3676 */ 3677 if (nread != 0 && 3678 (size_t)nread < sizeof(value)) { 3679 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3680 "Short read from event FD: expected %zu, got %zd", 3681 sizeof(value), nread); 3682 return PCAP_ERROR; 3683 } 3684 3685 /* 3686 * This event gets signaled by a 3687 * pcap_breakloop() call; if we were told 3688 * to break out of the loop, do so. 3689 */ 3690 if (handle->break_loop) { 3691 handle->break_loop = 0; 3692 return PCAP_ERROR_BREAK; 3693 } 3694 } 3695 } 3696 3697 /* 3698 * Either: 3699 * 3700 * 1) we got neither an error from poll() nor any 3701 * readable descriptors, in which case there 3702 * are no packets waiting to read 3703 * 3704 * or 3705 * 3706 * 2) We got readable descriptors but the PF_PACKET 3707 * socket wasn't one of them, in which case there 3708 * are no packets waiting to read 3709 * 3710 * so, if we got an ENETDOWN, we've drained whatever 3711 * packets were available to read at the point of the 3712 * ENETDOWN. 3713 * 3714 * So, if we got an ENETDOWN and haven't gotten an indication 3715 * that the device has gone away or that the device is up, 3716 * we don't yet know for certain whether the device has 3717 * gone away or not, check whether the device exists and is 3718 * up. 3719 */ 3720 if (handlep->netdown) { 3721 if (!device_still_exists(handle)) { 3722 /* 3723 * The device doesn't exist any more; 3724 * report that. 3725 * 3726 * XXX - we should really return an 3727 * appropriate error for that, but 3728 * pcap_dispatch() etc. aren't documented 3729 * as having error returns other than 3730 * PCAP_ERROR or PCAP_ERROR_BREAK. 3731 */ 3732 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3733 "The interface disappeared"); 3734 return PCAP_ERROR; 3735 } 3736 3737 /* 3738 * The device still exists; try to see if it's up. 3739 */ 3740 memset(&ifr, 0, sizeof(ifr)); 3741 pcap_strlcpy(ifr.ifr_name, handlep->device, 3742 sizeof(ifr.ifr_name)); 3743 if (ioctl(handle->fd, SIOCGIFFLAGS, &ifr) == -1) { 3744 if (errno == ENXIO || errno == ENODEV) { 3745 /* 3746 * OK, *now* it's gone. 3747 * 3748 * XXX - see above comment. 3749 */ 3750 snprintf(handle->errbuf, 3751 PCAP_ERRBUF_SIZE, 3752 "The interface disappeared"); 3753 return PCAP_ERROR; 3754 } else { 3755 pcap_fmt_errmsg_for_errno(handle->errbuf, 3756 PCAP_ERRBUF_SIZE, errno, 3757 "%s: Can't get flags", 3758 handlep->device); 3759 return PCAP_ERROR; 3760 } 3761 } 3762 if (ifr.ifr_flags & IFF_UP) { 3763 /* 3764 * It's up, so it definitely still exists. 3765 * Cancel the ENETDOWN indication - we 3766 * presumably got it due to the interface 3767 * going down rather than the device going 3768 * away - and revert to "no required select 3769 * timeout. 3770 */ 3771 handlep->netdown = 0; 3772 handle->required_select_timeout = NULL; 3773 } 3774 } 3775 3776 /* 3777 * If we're in non-blocking mode, just quit now, rather 3778 * than spinning in a loop doing poll()s that immediately 3779 * time out if there's no indication on any descriptor. 3780 */ 3781 if (handlep->poll_timeout == 0) 3782 break; 3783 } 3784 return 0; 3785 } 3786 3787 /* handle a single memory mapped packet */ 3788 static int pcap_handle_packet_mmap( 3789 pcap_t *handle, 3790 pcap_handler callback, 3791 u_char *user, 3792 unsigned char *frame, 3793 unsigned int tp_len, 3794 unsigned int tp_mac, 3795 unsigned int tp_snaplen, 3796 unsigned int tp_sec, 3797 unsigned int tp_usec, 3798 int tp_vlan_tci_valid, 3799 __u16 tp_vlan_tci, 3800 __u16 tp_vlan_tpid) 3801 { 3802 struct pcap_linux *handlep = handle->priv; 3803 unsigned char *bp; 3804 struct sockaddr_ll *sll; 3805 struct pcap_pkthdr pcaphdr; 3806 pcap_can_socketcan_hdr *canhdr; 3807 unsigned int snaplen = tp_snaplen; 3808 struct utsname utsname; 3809 3810 /* perform sanity check on internal offset. */ 3811 if (tp_mac + tp_snaplen > handle->bufsize) { 3812 /* 3813 * Report some system information as a debugging aid. 3814 */ 3815 if (uname(&utsname) != -1) { 3816 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3817 "corrupted frame on kernel ring mac " 3818 "offset %u + caplen %u > frame len %d " 3819 "(kernel %.32s version %s, machine %.16s)", 3820 tp_mac, tp_snaplen, handle->bufsize, 3821 utsname.release, utsname.version, 3822 utsname.machine); 3823 } else { 3824 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3825 "corrupted frame on kernel ring mac " 3826 "offset %u + caplen %u > frame len %d", 3827 tp_mac, tp_snaplen, handle->bufsize); 3828 } 3829 return -1; 3830 } 3831 3832 /* run filter on received packet 3833 * If the kernel filtering is enabled we need to run the 3834 * filter until all the frames present into the ring 3835 * at filter creation time are processed. 3836 * In this case, blocks_to_filter_in_userland is used 3837 * as a counter for the packet we need to filter. 3838 * Note: alternatively it could be possible to stop applying 3839 * the filter when the ring became empty, but it can possibly 3840 * happen a lot later... */ 3841 bp = frame + tp_mac; 3842 3843 /* if required build in place the sll header*/ 3844 sll = (void *)(frame + TPACKET_ALIGN(handlep->tp_hdrlen)); 3845 if (handlep->cooked) { 3846 if (handle->linktype == DLT_LINUX_SLL2) { 3847 struct sll2_header *hdrp; 3848 3849 /* 3850 * The kernel should have left us with enough 3851 * space for an sll header; back up the packet 3852 * data pointer into that space, as that'll be 3853 * the beginning of the packet we pass to the 3854 * callback. 3855 */ 3856 bp -= SLL2_HDR_LEN; 3857 3858 /* 3859 * Let's make sure that's past the end of 3860 * the tpacket header, i.e. >= 3861 * ((u_char *)thdr + TPACKET_HDRLEN), so we 3862 * don't step on the header when we construct 3863 * the sll header. 3864 */ 3865 if (bp < (u_char *)frame + 3866 TPACKET_ALIGN(handlep->tp_hdrlen) + 3867 sizeof(struct sockaddr_ll)) { 3868 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3869 "cooked-mode frame doesn't have room for sll header"); 3870 return -1; 3871 } 3872 3873 /* 3874 * OK, that worked; construct the sll header. 3875 */ 3876 hdrp = (struct sll2_header *)bp; 3877 hdrp->sll2_protocol = sll->sll_protocol; 3878 hdrp->sll2_reserved_mbz = 0; 3879 hdrp->sll2_if_index = htonl(sll->sll_ifindex); 3880 hdrp->sll2_hatype = htons(sll->sll_hatype); 3881 hdrp->sll2_pkttype = sll->sll_pkttype; 3882 hdrp->sll2_halen = sll->sll_halen; 3883 memcpy(hdrp->sll2_addr, sll->sll_addr, SLL_ADDRLEN); 3884 3885 snaplen += sizeof(struct sll2_header); 3886 } else { 3887 struct sll_header *hdrp; 3888 3889 /* 3890 * The kernel should have left us with enough 3891 * space for an sll header; back up the packet 3892 * data pointer into that space, as that'll be 3893 * the beginning of the packet we pass to the 3894 * callback. 3895 */ 3896 bp -= SLL_HDR_LEN; 3897 3898 /* 3899 * Let's make sure that's past the end of 3900 * the tpacket header, i.e. >= 3901 * ((u_char *)thdr + TPACKET_HDRLEN), so we 3902 * don't step on the header when we construct 3903 * the sll header. 3904 */ 3905 if (bp < (u_char *)frame + 3906 TPACKET_ALIGN(handlep->tp_hdrlen) + 3907 sizeof(struct sockaddr_ll)) { 3908 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3909 "cooked-mode frame doesn't have room for sll header"); 3910 return -1; 3911 } 3912 3913 /* 3914 * OK, that worked; construct the sll header. 3915 */ 3916 hdrp = (struct sll_header *)bp; 3917 hdrp->sll_pkttype = htons(sll->sll_pkttype); 3918 hdrp->sll_hatype = htons(sll->sll_hatype); 3919 hdrp->sll_halen = htons(sll->sll_halen); 3920 memcpy(hdrp->sll_addr, sll->sll_addr, SLL_ADDRLEN); 3921 hdrp->sll_protocol = sll->sll_protocol; 3922 3923 snaplen += sizeof(struct sll_header); 3924 } 3925 } else { 3926 /* 3927 * If this is a packet from a CAN device, so that 3928 * sll->sll_hatype is ARPHRD_CAN, then, as we're 3929 * not capturing in cooked mode, its link-layer 3930 * type is DLT_CAN_SOCKETCAN. Fix up the header 3931 * provided by the code below us to match what 3932 * DLT_CAN_SOCKETCAN is expected to provide. 3933 */ 3934 if (sll->sll_hatype == ARPHRD_CAN) { 3935 /* 3936 * DLT_CAN_SOCKETCAN is specified as having the 3937 * CAN ID and flags in network byte order, but 3938 * capturing on a CAN device provides it in host 3939 * byte order. Convert it to network byte order. 3940 */ 3941 canhdr = (pcap_can_socketcan_hdr *)bp; 3942 canhdr->can_id = htonl(canhdr->can_id); 3943 3944 /* 3945 * In addition, set the CANFD_FDF flag if 3946 * the protocol is LINUX_SLL_P_CANFD, as 3947 * the protocol field itself isn't in 3948 * the packet to indicate that it's a 3949 * CAN FD packet. 3950 */ 3951 uint16_t protocol = ntohs(sll->sll_protocol); 3952 if (protocol == LINUX_SLL_P_CANFD) { 3953 canhdr->fd_flags |= CANFD_FDF; 3954 3955 /* 3956 * Zero out all the unknown bits in 3957 * fd_flags and clear the reserved 3958 * fields, so that a program reading 3959 * this can assume that CANFD_FDF 3960 * is set because we set it, not 3961 * because some uninitialized crap 3962 * was provided in the fd_flags 3963 * field. 3964 * 3965 * (At least some LINKTYPE_CAN_SOCKETCAN 3966 * files attached to Wireshark bugs 3967 * had uninitialized junk there, so it 3968 * does happen.) 3969 * 3970 * Update this if Linux adds more flag 3971 * bits to the fd_flags field or uses 3972 * either of the reserved fields for 3973 * FD frames. 3974 */ 3975 canhdr->fd_flags &= ~(CANFD_FDF|CANFD_ESI|CANFD_BRS); 3976 canhdr->reserved1 = 0; 3977 canhdr->reserved2 = 0; 3978 } else { 3979 /* 3980 * Clear CANFD_FDF if it's set (probably 3981 * again meaning that this field is 3982 * uninitialized junk). 3983 */ 3984 canhdr->fd_flags &= ~CANFD_FDF; 3985 } 3986 } 3987 } 3988 3989 if (handlep->filter_in_userland && handle->fcode.bf_insns) { 3990 struct pcap_bpf_aux_data aux_data; 3991 3992 aux_data.vlan_tag_present = tp_vlan_tci_valid; 3993 aux_data.vlan_tag = tp_vlan_tci & 0x0fff; 3994 3995 if (pcap_filter_with_aux_data(handle->fcode.bf_insns, 3996 bp, 3997 tp_len, 3998 snaplen, 3999 &aux_data) == 0) 4000 return 0; 4001 } 4002 4003 if (!linux_check_direction(handle, sll)) 4004 return 0; 4005 4006 /* get required packet info from ring header */ 4007 pcaphdr.ts.tv_sec = tp_sec; 4008 pcaphdr.ts.tv_usec = tp_usec; 4009 pcaphdr.caplen = tp_snaplen; 4010 pcaphdr.len = tp_len; 4011 4012 /* if required build in place the sll header*/ 4013 if (handlep->cooked) { 4014 /* update packet len */ 4015 if (handle->linktype == DLT_LINUX_SLL2) { 4016 pcaphdr.caplen += SLL2_HDR_LEN; 4017 pcaphdr.len += SLL2_HDR_LEN; 4018 } else { 4019 pcaphdr.caplen += SLL_HDR_LEN; 4020 pcaphdr.len += SLL_HDR_LEN; 4021 } 4022 } 4023 4024 if (tp_vlan_tci_valid && 4025 handlep->vlan_offset != -1 && 4026 tp_snaplen >= (unsigned int) handlep->vlan_offset) 4027 { 4028 struct vlan_tag *tag; 4029 4030 /* 4031 * Move everything in the header, except the type field, 4032 * down VLAN_TAG_LEN bytes, to allow us to insert the 4033 * VLAN tag between that stuff and the type field. 4034 */ 4035 bp -= VLAN_TAG_LEN; 4036 memmove(bp, bp + VLAN_TAG_LEN, handlep->vlan_offset); 4037 4038 /* 4039 * Now insert the tag. 4040 */ 4041 tag = (struct vlan_tag *)(bp + handlep->vlan_offset); 4042 tag->vlan_tpid = htons(tp_vlan_tpid); 4043 tag->vlan_tci = htons(tp_vlan_tci); 4044 4045 /* 4046 * Add the tag to the packet lengths. 4047 */ 4048 pcaphdr.caplen += VLAN_TAG_LEN; 4049 pcaphdr.len += VLAN_TAG_LEN; 4050 } 4051 4052 /* 4053 * The only way to tell the kernel to cut off the 4054 * packet at a snapshot length is with a filter program; 4055 * if there's no filter program, the kernel won't cut 4056 * the packet off. 4057 * 4058 * Trim the snapshot length to be no longer than the 4059 * specified snapshot length. 4060 * 4061 * XXX - an alternative is to put a filter, consisting 4062 * of a "ret <snaplen>" instruction, on the socket 4063 * in the activate routine, so that the truncation is 4064 * done in the kernel even if nobody specified a filter; 4065 * that means that less buffer space is consumed in 4066 * the memory-mapped buffer. 4067 */ 4068 if (pcaphdr.caplen > (bpf_u_int32)handle->snapshot) 4069 pcaphdr.caplen = handle->snapshot; 4070 4071 /* pass the packet to the user */ 4072 callback(user, &pcaphdr, bp); 4073 4074 return 1; 4075 } 4076 4077 static int 4078 pcap_read_linux_mmap_v2(pcap_t *handle, int max_packets, pcap_handler callback, 4079 u_char *user) 4080 { 4081 struct pcap_linux *handlep = handle->priv; 4082 union thdr h; 4083 int pkts = 0; 4084 int ret; 4085 4086 /* wait for frames availability.*/ 4087 h.raw = RING_GET_CURRENT_FRAME(handle); 4088 if (!packet_mmap_acquire(h.h2)) { 4089 /* 4090 * The current frame is owned by the kernel; wait for 4091 * a frame to be handed to us. 4092 */ 4093 ret = pcap_wait_for_frames_mmap(handle); 4094 if (ret) { 4095 return ret; 4096 } 4097 } 4098 4099 /* 4100 * This can conceivably process more than INT_MAX packets, 4101 * which would overflow the packet count, causing it either 4102 * to look like a negative number, and thus cause us to 4103 * return a value that looks like an error, or overflow 4104 * back into positive territory, and thus cause us to 4105 * return a too-low count. 4106 * 4107 * Therefore, if the packet count is unlimited, we clip 4108 * it at INT_MAX; this routine is not expected to 4109 * process packets indefinitely, so that's not an issue. 4110 */ 4111 if (PACKET_COUNT_IS_UNLIMITED(max_packets)) 4112 max_packets = INT_MAX; 4113 4114 while (pkts < max_packets) { 4115 /* 4116 * Get the current ring buffer frame, and break if 4117 * it's still owned by the kernel. 4118 */ 4119 h.raw = RING_GET_CURRENT_FRAME(handle); 4120 if (!packet_mmap_acquire(h.h2)) 4121 break; 4122 4123 ret = pcap_handle_packet_mmap( 4124 handle, 4125 callback, 4126 user, 4127 h.raw, 4128 h.h2->tp_len, 4129 h.h2->tp_mac, 4130 h.h2->tp_snaplen, 4131 h.h2->tp_sec, 4132 handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO ? h.h2->tp_nsec : h.h2->tp_nsec / 1000, 4133 VLAN_VALID(h.h2, h.h2), 4134 h.h2->tp_vlan_tci, 4135 VLAN_TPID(h.h2, h.h2)); 4136 if (ret == 1) { 4137 pkts++; 4138 } else if (ret < 0) { 4139 return ret; 4140 } 4141 4142 /* 4143 * Hand this block back to the kernel, and, if we're 4144 * counting blocks that need to be filtered in userland 4145 * after having been filtered by the kernel, count 4146 * the one we've just processed. 4147 */ 4148 packet_mmap_release(h.h2); 4149 if (handlep->blocks_to_filter_in_userland > 0) { 4150 handlep->blocks_to_filter_in_userland--; 4151 if (handlep->blocks_to_filter_in_userland == 0) { 4152 /* 4153 * No more blocks need to be filtered 4154 * in userland. 4155 */ 4156 handlep->filter_in_userland = 0; 4157 } 4158 } 4159 4160 /* next block */ 4161 if (++handle->offset >= handle->cc) 4162 handle->offset = 0; 4163 4164 /* check for break loop condition*/ 4165 if (handle->break_loop) { 4166 handle->break_loop = 0; 4167 return PCAP_ERROR_BREAK; 4168 } 4169 } 4170 return pkts; 4171 } 4172 4173 #ifdef HAVE_TPACKET3 4174 static int 4175 pcap_read_linux_mmap_v3(pcap_t *handle, int max_packets, pcap_handler callback, 4176 u_char *user) 4177 { 4178 struct pcap_linux *handlep = handle->priv; 4179 union thdr h; 4180 int pkts = 0; 4181 int ret; 4182 4183 again: 4184 if (handlep->current_packet == NULL) { 4185 /* wait for frames availability.*/ 4186 h.raw = RING_GET_CURRENT_FRAME(handle); 4187 if (!packet_mmap_v3_acquire(h.h3)) { 4188 /* 4189 * The current frame is owned by the kernel; wait 4190 * for a frame to be handed to us. 4191 */ 4192 ret = pcap_wait_for_frames_mmap(handle); 4193 if (ret) { 4194 return ret; 4195 } 4196 } 4197 } 4198 h.raw = RING_GET_CURRENT_FRAME(handle); 4199 if (!packet_mmap_v3_acquire(h.h3)) { 4200 if (pkts == 0 && handlep->timeout == 0) { 4201 /* Block until we see a packet. */ 4202 goto again; 4203 } 4204 return pkts; 4205 } 4206 4207 /* 4208 * This can conceivably process more than INT_MAX packets, 4209 * which would overflow the packet count, causing it either 4210 * to look like a negative number, and thus cause us to 4211 * return a value that looks like an error, or overflow 4212 * back into positive territory, and thus cause us to 4213 * return a too-low count. 4214 * 4215 * Therefore, if the packet count is unlimited, we clip 4216 * it at INT_MAX; this routine is not expected to 4217 * process packets indefinitely, so that's not an issue. 4218 */ 4219 if (PACKET_COUNT_IS_UNLIMITED(max_packets)) 4220 max_packets = INT_MAX; 4221 4222 while (pkts < max_packets) { 4223 int packets_to_read; 4224 4225 if (handlep->current_packet == NULL) { 4226 h.raw = RING_GET_CURRENT_FRAME(handle); 4227 if (!packet_mmap_v3_acquire(h.h3)) 4228 break; 4229 4230 handlep->current_packet = h.raw + h.h3->hdr.bh1.offset_to_first_pkt; 4231 handlep->packets_left = h.h3->hdr.bh1.num_pkts; 4232 } 4233 packets_to_read = handlep->packets_left; 4234 4235 if (packets_to_read > (max_packets - pkts)) { 4236 /* 4237 * There are more packets in the buffer than 4238 * the number of packets we have left to 4239 * process to get up to the maximum number 4240 * of packets to process. Only process enough 4241 * of them to get us up to that maximum. 4242 */ 4243 packets_to_read = max_packets - pkts; 4244 } 4245 4246 while (packets_to_read-- && !handle->break_loop) { 4247 struct tpacket3_hdr* tp3_hdr = (struct tpacket3_hdr*) handlep->current_packet; 4248 ret = pcap_handle_packet_mmap( 4249 handle, 4250 callback, 4251 user, 4252 handlep->current_packet, 4253 tp3_hdr->tp_len, 4254 tp3_hdr->tp_mac, 4255 tp3_hdr->tp_snaplen, 4256 tp3_hdr->tp_sec, 4257 handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO ? tp3_hdr->tp_nsec : tp3_hdr->tp_nsec / 1000, 4258 VLAN_VALID(tp3_hdr, &tp3_hdr->hv1), 4259 tp3_hdr->hv1.tp_vlan_tci, 4260 VLAN_TPID(tp3_hdr, &tp3_hdr->hv1)); 4261 if (ret == 1) { 4262 pkts++; 4263 } else if (ret < 0) { 4264 handlep->current_packet = NULL; 4265 return ret; 4266 } 4267 handlep->current_packet += tp3_hdr->tp_next_offset; 4268 handlep->packets_left--; 4269 } 4270 4271 if (handlep->packets_left <= 0) { 4272 /* 4273 * Hand this block back to the kernel, and, if 4274 * we're counting blocks that need to be 4275 * filtered in userland after having been 4276 * filtered by the kernel, count the one we've 4277 * just processed. 4278 */ 4279 packet_mmap_v3_release(h.h3); 4280 if (handlep->blocks_to_filter_in_userland > 0) { 4281 handlep->blocks_to_filter_in_userland--; 4282 if (handlep->blocks_to_filter_in_userland == 0) { 4283 /* 4284 * No more blocks need to be filtered 4285 * in userland. 4286 */ 4287 handlep->filter_in_userland = 0; 4288 } 4289 } 4290 4291 /* next block */ 4292 if (++handle->offset >= handle->cc) 4293 handle->offset = 0; 4294 4295 handlep->current_packet = NULL; 4296 } 4297 4298 /* check for break loop condition*/ 4299 if (handle->break_loop) { 4300 handle->break_loop = 0; 4301 return PCAP_ERROR_BREAK; 4302 } 4303 } 4304 if (pkts == 0 && handlep->timeout == 0) { 4305 /* Block until we see a packet. */ 4306 goto again; 4307 } 4308 return pkts; 4309 } 4310 #endif /* HAVE_TPACKET3 */ 4311 4312 /* 4313 * Attach the given BPF code to the packet capture device. 4314 */ 4315 static int 4316 pcap_setfilter_linux(pcap_t *handle, struct bpf_program *filter) 4317 { 4318 struct pcap_linux *handlep; 4319 struct sock_fprog fcode; 4320 int can_filter_in_kernel; 4321 int err = 0; 4322 int n, offset; 4323 4324 if (!handle) 4325 return -1; 4326 if (!filter) { 4327 pcap_strlcpy(handle->errbuf, "setfilter: No filter specified", 4328 PCAP_ERRBUF_SIZE); 4329 return -1; 4330 } 4331 4332 handlep = handle->priv; 4333 4334 /* Make our private copy of the filter */ 4335 4336 if (install_bpf_program(handle, filter) < 0) 4337 /* install_bpf_program() filled in errbuf */ 4338 return -1; 4339 4340 /* 4341 * Run user level packet filter by default. Will be overridden if 4342 * installing a kernel filter succeeds. 4343 */ 4344 handlep->filter_in_userland = 1; 4345 4346 /* Install kernel level filter if possible */ 4347 4348 #ifdef USHRT_MAX 4349 if (handle->fcode.bf_len > USHRT_MAX) { 4350 /* 4351 * fcode.len is an unsigned short for current kernel. 4352 * I have yet to see BPF-Code with that much 4353 * instructions but still it is possible. So for the 4354 * sake of correctness I added this check. 4355 */ 4356 fprintf(stderr, "Warning: Filter too complex for kernel\n"); 4357 fcode.len = 0; 4358 fcode.filter = NULL; 4359 can_filter_in_kernel = 0; 4360 } else 4361 #endif /* USHRT_MAX */ 4362 { 4363 /* 4364 * Oh joy, the Linux kernel uses struct sock_fprog instead 4365 * of struct bpf_program and of course the length field is 4366 * of different size. Pointed out by Sebastian 4367 * 4368 * Oh, and we also need to fix it up so that all "ret" 4369 * instructions with non-zero operands have MAXIMUM_SNAPLEN 4370 * as the operand if we're not capturing in memory-mapped 4371 * mode, and so that, if we're in cooked mode, all memory- 4372 * reference instructions use special magic offsets in 4373 * references to the link-layer header and assume that the 4374 * link-layer payload begins at 0; "fix_program()" will do 4375 * that. 4376 */ 4377 switch (fix_program(handle, &fcode)) { 4378 4379 case -1: 4380 default: 4381 /* 4382 * Fatal error; just quit. 4383 * (The "default" case shouldn't happen; we 4384 * return -1 for that reason.) 4385 */ 4386 return -1; 4387 4388 case 0: 4389 /* 4390 * The program performed checks that we can't make 4391 * work in the kernel. 4392 */ 4393 can_filter_in_kernel = 0; 4394 break; 4395 4396 case 1: 4397 /* 4398 * We have a filter that'll work in the kernel. 4399 */ 4400 can_filter_in_kernel = 1; 4401 break; 4402 } 4403 } 4404 4405 /* 4406 * NOTE: at this point, we've set both the "len" and "filter" 4407 * fields of "fcode". As of the 2.6.32.4 kernel, at least, 4408 * those are the only members of the "sock_fprog" structure, 4409 * so we initialize every member of that structure. 4410 * 4411 * If there is anything in "fcode" that is not initialized, 4412 * it is either a field added in a later kernel, or it's 4413 * padding. 4414 * 4415 * If a new field is added, this code needs to be updated 4416 * to set it correctly. 4417 * 4418 * If there are no other fields, then: 4419 * 4420 * if the Linux kernel looks at the padding, it's 4421 * buggy; 4422 * 4423 * if the Linux kernel doesn't look at the padding, 4424 * then if some tool complains that we're passing 4425 * uninitialized data to the kernel, then the tool 4426 * is buggy and needs to understand that it's just 4427 * padding. 4428 */ 4429 if (can_filter_in_kernel) { 4430 if ((err = set_kernel_filter(handle, &fcode)) == 0) 4431 { 4432 /* 4433 * Installation succeeded - using kernel filter, 4434 * so userland filtering not needed. 4435 */ 4436 handlep->filter_in_userland = 0; 4437 } 4438 else if (err == -1) /* Non-fatal error */ 4439 { 4440 /* 4441 * Print a warning if we weren't able to install 4442 * the filter for a reason other than "this kernel 4443 * isn't configured to support socket filters. 4444 */ 4445 if (errno == ENOMEM) { 4446 /* 4447 * Either a kernel memory allocation 4448 * failure occurred, or there's too 4449 * much "other/option memory" allocated 4450 * for this socket. Suggest that they 4451 * increase the "other/option memory" 4452 * limit. 4453 */ 4454 fprintf(stderr, 4455 "Warning: Couldn't allocate kernel memory for filter: try increasing net.core.optmem_max with sysctl\n"); 4456 } else if (errno != ENOPROTOOPT && errno != EOPNOTSUPP) { 4457 fprintf(stderr, 4458 "Warning: Kernel filter failed: %s\n", 4459 pcap_strerror(errno)); 4460 } 4461 } 4462 } 4463 4464 /* 4465 * If we're not using the kernel filter, get rid of any kernel 4466 * filter that might've been there before, e.g. because the 4467 * previous filter could work in the kernel, or because some other 4468 * code attached a filter to the socket by some means other than 4469 * calling "pcap_setfilter()". Otherwise, the kernel filter may 4470 * filter out packets that would pass the new userland filter. 4471 */ 4472 if (handlep->filter_in_userland) { 4473 if (reset_kernel_filter(handle) == -1) { 4474 pcap_fmt_errmsg_for_errno(handle->errbuf, 4475 PCAP_ERRBUF_SIZE, errno, 4476 "can't remove kernel filter"); 4477 err = -2; /* fatal error */ 4478 } 4479 } 4480 4481 /* 4482 * Free up the copy of the filter that was made by "fix_program()". 4483 */ 4484 if (fcode.filter != NULL) 4485 free(fcode.filter); 4486 4487 if (err == -2) 4488 /* Fatal error */ 4489 return -1; 4490 4491 /* 4492 * If we're filtering in userland, there's nothing to do; 4493 * the new filter will be used for the next packet. 4494 */ 4495 if (handlep->filter_in_userland) 4496 return 0; 4497 4498 /* 4499 * We're filtering in the kernel; the packets present in 4500 * all blocks currently in the ring were already filtered 4501 * by the old filter, and so will need to be filtered in 4502 * userland by the new filter. 4503 * 4504 * Get an upper bound for the number of such blocks; first, 4505 * walk the ring backward and count the free blocks. 4506 */ 4507 offset = handle->offset; 4508 if (--offset < 0) 4509 offset = handle->cc - 1; 4510 for (n=0; n < handle->cc; ++n) { 4511 if (--offset < 0) 4512 offset = handle->cc - 1; 4513 if (pcap_get_ring_frame_status(handle, offset) != TP_STATUS_KERNEL) 4514 break; 4515 } 4516 4517 /* 4518 * If we found free blocks, decrement the count of free 4519 * blocks by 1, just in case we lost a race with another 4520 * thread of control that was adding a packet while 4521 * we were counting and that had run the filter before 4522 * we changed it. 4523 * 4524 * XXX - could there be more than one block added in 4525 * this fashion? 4526 * 4527 * XXX - is there a way to avoid that race, e.g. somehow 4528 * wait for all packets that passed the old filter to 4529 * be added to the ring? 4530 */ 4531 if (n != 0) 4532 n--; 4533 4534 /* 4535 * Set the count of blocks worth of packets to filter 4536 * in userland to the total number of blocks in the 4537 * ring minus the number of free blocks we found, and 4538 * turn on userland filtering. (The count of blocks 4539 * worth of packets to filter in userland is guaranteed 4540 * not to be zero - n, above, couldn't be set to a 4541 * value > handle->cc, and if it were equal to 4542 * handle->cc, it wouldn't be zero, and thus would 4543 * be decremented to handle->cc - 1.) 4544 */ 4545 handlep->blocks_to_filter_in_userland = handle->cc - n; 4546 handlep->filter_in_userland = 1; 4547 4548 return 0; 4549 } 4550 4551 /* 4552 * Return the index of the given device name. Fill ebuf and return 4553 * -1 on failure. 4554 */ 4555 static int 4556 iface_get_id(int fd, const char *device, char *ebuf) 4557 { 4558 struct ifreq ifr; 4559 4560 memset(&ifr, 0, sizeof(ifr)); 4561 pcap_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 4562 4563 if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) { 4564 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4565 errno, "SIOCGIFINDEX"); 4566 return -1; 4567 } 4568 4569 return ifr.ifr_ifindex; 4570 } 4571 4572 /* 4573 * Bind the socket associated with FD to the given device. 4574 * Return 0 on success or a PCAP_ERROR_ value on a hard error. 4575 */ 4576 static int 4577 iface_bind(int fd, int ifindex, char *ebuf, int protocol) 4578 { 4579 struct sockaddr_ll sll; 4580 int ret, err; 4581 socklen_t errlen = sizeof(err); 4582 4583 memset(&sll, 0, sizeof(sll)); 4584 sll.sll_family = AF_PACKET; 4585 sll.sll_ifindex = ifindex < 0 ? 0 : ifindex; 4586 sll.sll_protocol = protocol; 4587 4588 if (bind(fd, (struct sockaddr *) &sll, sizeof(sll)) == -1) { 4589 if (errno == ENETDOWN) { 4590 /* 4591 * Return a "network down" indication, so that 4592 * the application can report that rather than 4593 * saying we had a mysterious failure and 4594 * suggest that they report a problem to the 4595 * libpcap developers. 4596 */ 4597 return PCAP_ERROR_IFACE_NOT_UP; 4598 } 4599 if (errno == ENODEV) { 4600 /* 4601 * There's nothing more to say, so clear the 4602 * error message. 4603 */ 4604 ebuf[0] = '\0'; 4605 ret = PCAP_ERROR_NO_SUCH_DEVICE; 4606 } else { 4607 ret = PCAP_ERROR; 4608 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4609 errno, "bind"); 4610 } 4611 return ret; 4612 } 4613 4614 /* Any pending errors, e.g., network is down? */ 4615 4616 if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) { 4617 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4618 errno, "getsockopt (SO_ERROR)"); 4619 return PCAP_ERROR; 4620 } 4621 4622 if (err == ENETDOWN) { 4623 /* 4624 * Return a "network down" indication, so that 4625 * the application can report that rather than 4626 * saying we had a mysterious failure and 4627 * suggest that they report a problem to the 4628 * libpcap developers. 4629 */ 4630 return PCAP_ERROR_IFACE_NOT_UP; 4631 } else if (err > 0) { 4632 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4633 err, "bind"); 4634 return PCAP_ERROR; 4635 } 4636 4637 return 0; 4638 } 4639 4640 /* 4641 * Try to enter monitor mode. 4642 * If we have libnl, try to create a new monitor-mode device and 4643 * capture on that; otherwise, just say "not supported". 4644 */ 4645 #ifdef HAVE_LIBNL 4646 static int 4647 enter_rfmon_mode(pcap_t *handle, int sock_fd, const char *device) 4648 { 4649 struct pcap_linux *handlep = handle->priv; 4650 int ret; 4651 char phydev_path[PATH_MAX+1]; 4652 struct nl80211_state nlstate; 4653 struct ifreq ifr; 4654 u_int n; 4655 4656 /* 4657 * Is this a mac80211 device? 4658 */ 4659 ret = get_mac80211_phydev(handle, device, phydev_path, PATH_MAX); 4660 if (ret < 0) 4661 return ret; /* error */ 4662 if (ret == 0) 4663 return 0; /* no error, but not mac80211 device */ 4664 4665 /* 4666 * XXX - is this already a monN device? 4667 * If so, we're done. 4668 */ 4669 4670 /* 4671 * OK, it's apparently a mac80211 device. 4672 * Try to find an unused monN device for it. 4673 */ 4674 ret = nl80211_init(handle, &nlstate, device); 4675 if (ret != 0) 4676 return ret; 4677 for (n = 0; n < UINT_MAX; n++) { 4678 /* 4679 * Try mon{n}. 4680 */ 4681 char mondevice[3+10+1]; /* mon{UINT_MAX}\0 */ 4682 4683 snprintf(mondevice, sizeof mondevice, "mon%u", n); 4684 ret = add_mon_if(handle, sock_fd, &nlstate, device, mondevice); 4685 if (ret == 1) { 4686 /* 4687 * Success. We don't clean up the libnl state 4688 * yet, as we'll be using it later. 4689 */ 4690 goto added; 4691 } 4692 if (ret < 0) { 4693 /* 4694 * Hard failure. Just return ret; handle->errbuf 4695 * has already been set. 4696 */ 4697 nl80211_cleanup(&nlstate); 4698 return ret; 4699 } 4700 } 4701 4702 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 4703 "%s: No free monN interfaces", device); 4704 nl80211_cleanup(&nlstate); 4705 return PCAP_ERROR; 4706 4707 added: 4708 4709 #if 0 4710 /* 4711 * Sleep for .1 seconds. 4712 */ 4713 delay.tv_sec = 0; 4714 delay.tv_nsec = 500000000; 4715 nanosleep(&delay, NULL); 4716 #endif 4717 4718 /* 4719 * If we haven't already done so, arrange to have 4720 * "pcap_close_all()" called when we exit. 4721 */ 4722 if (!pcap_do_addexit(handle)) { 4723 /* 4724 * "atexit()" failed; don't put the interface 4725 * in rfmon mode, just give up. 4726 */ 4727 del_mon_if(handle, sock_fd, &nlstate, device, 4728 handlep->mondevice); 4729 nl80211_cleanup(&nlstate); 4730 return PCAP_ERROR; 4731 } 4732 4733 /* 4734 * Now configure the monitor interface up. 4735 */ 4736 memset(&ifr, 0, sizeof(ifr)); 4737 pcap_strlcpy(ifr.ifr_name, handlep->mondevice, sizeof(ifr.ifr_name)); 4738 if (ioctl(sock_fd, SIOCGIFFLAGS, &ifr) == -1) { 4739 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 4740 errno, "%s: Can't get flags for %s", device, 4741 handlep->mondevice); 4742 del_mon_if(handle, sock_fd, &nlstate, device, 4743 handlep->mondevice); 4744 nl80211_cleanup(&nlstate); 4745 return PCAP_ERROR; 4746 } 4747 ifr.ifr_flags |= IFF_UP|IFF_RUNNING; 4748 if (ioctl(sock_fd, SIOCSIFFLAGS, &ifr) == -1) { 4749 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 4750 errno, "%s: Can't set flags for %s", device, 4751 handlep->mondevice); 4752 del_mon_if(handle, sock_fd, &nlstate, device, 4753 handlep->mondevice); 4754 nl80211_cleanup(&nlstate); 4755 return PCAP_ERROR; 4756 } 4757 4758 /* 4759 * Success. Clean up the libnl state. 4760 */ 4761 nl80211_cleanup(&nlstate); 4762 4763 /* 4764 * Note that we have to delete the monitor device when we close 4765 * the handle. 4766 */ 4767 handlep->must_do_on_close |= MUST_DELETE_MONIF; 4768 4769 /* 4770 * Add this to the list of pcaps to close when we exit. 4771 */ 4772 pcap_add_to_pcaps_to_close(handle); 4773 4774 return 1; 4775 } 4776 #else /* HAVE_LIBNL */ 4777 static int 4778 enter_rfmon_mode(pcap_t *handle _U_, int sock_fd _U_, const char *device _U_) 4779 { 4780 /* 4781 * We don't have libnl, so we can't do monitor mode. 4782 */ 4783 return 0; 4784 } 4785 #endif /* HAVE_LIBNL */ 4786 4787 #if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) 4788 /* 4789 * Map SOF_TIMESTAMPING_ values to PCAP_TSTAMP_ values. 4790 */ 4791 static const struct { 4792 int soft_timestamping_val; 4793 int pcap_tstamp_val; 4794 } sof_ts_type_map[3] = { 4795 { SOF_TIMESTAMPING_SOFTWARE, PCAP_TSTAMP_HOST }, 4796 { SOF_TIMESTAMPING_SYS_HARDWARE, PCAP_TSTAMP_ADAPTER }, 4797 { SOF_TIMESTAMPING_RAW_HARDWARE, PCAP_TSTAMP_ADAPTER_UNSYNCED } 4798 }; 4799 #define NUM_SOF_TIMESTAMPING_TYPES (sizeof sof_ts_type_map / sizeof sof_ts_type_map[0]) 4800 4801 /* 4802 * Set the list of time stamping types to include all types. 4803 */ 4804 static int 4805 iface_set_all_ts_types(pcap_t *handle, char *ebuf) 4806 { 4807 u_int i; 4808 4809 handle->tstamp_type_list = malloc(NUM_SOF_TIMESTAMPING_TYPES * sizeof(u_int)); 4810 if (handle->tstamp_type_list == NULL) { 4811 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4812 errno, "malloc"); 4813 return -1; 4814 } 4815 for (i = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) 4816 handle->tstamp_type_list[i] = sof_ts_type_map[i].pcap_tstamp_val; 4817 handle->tstamp_type_count = NUM_SOF_TIMESTAMPING_TYPES; 4818 return 0; 4819 } 4820 4821 /* 4822 * Get a list of time stamp types. 4823 */ 4824 #ifdef ETHTOOL_GET_TS_INFO 4825 static int 4826 iface_get_ts_types(const char *device, pcap_t *handle, char *ebuf) 4827 { 4828 int fd; 4829 struct ifreq ifr; 4830 struct ethtool_ts_info info; 4831 int num_ts_types; 4832 u_int i, j; 4833 4834 /* 4835 * This doesn't apply to the "any" device; you can't say "turn on 4836 * hardware time stamping for all devices that exist now and arrange 4837 * that it be turned on for any device that appears in the future", 4838 * and not all devices even necessarily *support* hardware time 4839 * stamping, so don't report any time stamp types. 4840 */ 4841 if (strcmp(device, "any") == 0) { 4842 handle->tstamp_type_list = NULL; 4843 return 0; 4844 } 4845 4846 /* 4847 * Create a socket from which to fetch time stamping capabilities. 4848 */ 4849 fd = get_if_ioctl_socket(); 4850 if (fd < 0) { 4851 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4852 errno, "socket for SIOCETHTOOL(ETHTOOL_GET_TS_INFO)"); 4853 return -1; 4854 } 4855 4856 memset(&ifr, 0, sizeof(ifr)); 4857 pcap_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 4858 memset(&info, 0, sizeof(info)); 4859 info.cmd = ETHTOOL_GET_TS_INFO; 4860 ifr.ifr_data = (caddr_t)&info; 4861 if (ioctl(fd, SIOCETHTOOL, &ifr) == -1) { 4862 int save_errno = errno; 4863 4864 close(fd); 4865 switch (save_errno) { 4866 4867 case EOPNOTSUPP: 4868 case EINVAL: 4869 /* 4870 * OK, this OS version or driver doesn't support 4871 * asking for the time stamping types, so let's 4872 * just return all the possible types. 4873 */ 4874 if (iface_set_all_ts_types(handle, ebuf) == -1) 4875 return -1; 4876 return 0; 4877 4878 case ENODEV: 4879 /* 4880 * OK, no such device. 4881 * The user will find that out when they try to 4882 * activate the device; just return an empty 4883 * list of time stamp types. 4884 */ 4885 handle->tstamp_type_list = NULL; 4886 return 0; 4887 4888 default: 4889 /* 4890 * Other error. 4891 */ 4892 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4893 save_errno, 4894 "%s: SIOCETHTOOL(ETHTOOL_GET_TS_INFO) ioctl failed", 4895 device); 4896 return -1; 4897 } 4898 } 4899 close(fd); 4900 4901 /* 4902 * Do we support hardware time stamping of *all* packets? 4903 */ 4904 if (!(info.rx_filters & (1 << HWTSTAMP_FILTER_ALL))) { 4905 /* 4906 * No, so don't report any time stamp types. 4907 * 4908 * XXX - some devices either don't report 4909 * HWTSTAMP_FILTER_ALL when they do support it, or 4910 * report HWTSTAMP_FILTER_ALL but map it to only 4911 * time stamping a few PTP packets. See 4912 * http://marc.info/?l=linux-netdev&m=146318183529571&w=2 4913 * 4914 * Maybe that got fixed later. 4915 */ 4916 handle->tstamp_type_list = NULL; 4917 return 0; 4918 } 4919 4920 num_ts_types = 0; 4921 for (i = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) { 4922 if (info.so_timestamping & sof_ts_type_map[i].soft_timestamping_val) 4923 num_ts_types++; 4924 } 4925 if (num_ts_types != 0) { 4926 handle->tstamp_type_list = malloc(num_ts_types * sizeof(u_int)); 4927 if (handle->tstamp_type_list == NULL) { 4928 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4929 errno, "malloc"); 4930 return -1; 4931 } 4932 for (i = 0, j = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) { 4933 if (info.so_timestamping & sof_ts_type_map[i].soft_timestamping_val) { 4934 handle->tstamp_type_list[j] = sof_ts_type_map[i].pcap_tstamp_val; 4935 j++; 4936 } 4937 } 4938 handle->tstamp_type_count = num_ts_types; 4939 } else 4940 handle->tstamp_type_list = NULL; 4941 4942 return 0; 4943 } 4944 #else /* ETHTOOL_GET_TS_INFO */ 4945 static int 4946 iface_get_ts_types(const char *device, pcap_t *handle, char *ebuf) 4947 { 4948 /* 4949 * This doesn't apply to the "any" device; you can't say "turn on 4950 * hardware time stamping for all devices that exist now and arrange 4951 * that it be turned on for any device that appears in the future", 4952 * and not all devices even necessarily *support* hardware time 4953 * stamping, so don't report any time stamp types. 4954 */ 4955 if (strcmp(device, "any") == 0) { 4956 handle->tstamp_type_list = NULL; 4957 return 0; 4958 } 4959 4960 /* 4961 * We don't have an ioctl to use to ask what's supported, 4962 * so say we support everything. 4963 */ 4964 if (iface_set_all_ts_types(handle, ebuf) == -1) 4965 return -1; 4966 return 0; 4967 } 4968 #endif /* ETHTOOL_GET_TS_INFO */ 4969 #else /* defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) */ 4970 static int 4971 iface_get_ts_types(const char *device _U_, pcap_t *p _U_, char *ebuf _U_) 4972 { 4973 /* 4974 * Nothing to fetch, so it always "succeeds". 4975 */ 4976 return 0; 4977 } 4978 #endif /* defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) */ 4979 4980 /* 4981 * Find out if we have any form of fragmentation/reassembly offloading. 4982 * 4983 * We do so using SIOCETHTOOL checking for various types of offloading; 4984 * if SIOCETHTOOL isn't defined, or we don't have any #defines for any 4985 * of the types of offloading, there's nothing we can do to check, so 4986 * we just say "no, we don't". 4987 * 4988 * We treat EOPNOTSUPP, EINVAL and, if eperm_ok is true, EPERM as 4989 * indications that the operation isn't supported. We do EPERM 4990 * weirdly because the SIOCETHTOOL code in later kernels 1) doesn't 4991 * support ETHTOOL_GUFO, 2) also doesn't include it in the list 4992 * of ethtool operations that don't require CAP_NET_ADMIN privileges, 4993 * and 3) does the "is this permitted" check before doing the "is 4994 * this even supported" check, so it fails with "this is not permitted" 4995 * rather than "this is not even supported". To work around this 4996 * annoyance, we only treat EPERM as an error for the first feature, 4997 * and assume that they all do the same permission checks, so if the 4998 * first one is allowed all the others are allowed if supported. 4999 */ 5000 #if defined(SIOCETHTOOL) && (defined(ETHTOOL_GTSO) || defined(ETHTOOL_GUFO) || defined(ETHTOOL_GGSO) || defined(ETHTOOL_GFLAGS) || defined(ETHTOOL_GGRO)) 5001 static int 5002 iface_ethtool_flag_ioctl(pcap_t *handle, int cmd, const char *cmdname, 5003 int eperm_ok) 5004 { 5005 struct ifreq ifr; 5006 struct ethtool_value eval; 5007 5008 memset(&ifr, 0, sizeof(ifr)); 5009 pcap_strlcpy(ifr.ifr_name, handle->opt.device, sizeof(ifr.ifr_name)); 5010 eval.cmd = cmd; 5011 eval.data = 0; 5012 ifr.ifr_data = (caddr_t)&eval; 5013 if (ioctl(handle->fd, SIOCETHTOOL, &ifr) == -1) { 5014 if (errno == EOPNOTSUPP || errno == EINVAL || 5015 (errno == EPERM && eperm_ok)) { 5016 /* 5017 * OK, let's just return 0, which, in our 5018 * case, either means "no, what we're asking 5019 * about is not enabled" or "all the flags 5020 * are clear (i.e., nothing is enabled)". 5021 */ 5022 return 0; 5023 } 5024 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5025 errno, "%s: SIOCETHTOOL(%s) ioctl failed", 5026 handle->opt.device, cmdname); 5027 return -1; 5028 } 5029 return eval.data; 5030 } 5031 5032 /* 5033 * XXX - it's annoying that we have to check for offloading at all, but, 5034 * given that we have to, it's still annoying that we have to check for 5035 * particular types of offloading, especially that shiny new types of 5036 * offloading may be added - and, worse, may not be checkable with 5037 * a particular ETHTOOL_ operation; ETHTOOL_GFEATURES would, in 5038 * theory, give those to you, but the actual flags being used are 5039 * opaque (defined in a non-uapi header), and there doesn't seem to 5040 * be any obvious way to ask the kernel what all the offloading flags 5041 * are - at best, you can ask for a set of strings(!) to get *names* 5042 * for various flags. (That whole mechanism appears to have been 5043 * designed for the sole purpose of letting ethtool report flags 5044 * by name and set flags by name, with the names having no semantics 5045 * ethtool understands.) 5046 */ 5047 static int 5048 iface_get_offload(pcap_t *handle) 5049 { 5050 int ret; 5051 5052 #ifdef ETHTOOL_GTSO 5053 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GTSO, "ETHTOOL_GTSO", 0); 5054 if (ret == -1) 5055 return -1; 5056 if (ret) 5057 return 1; /* TCP segmentation offloading on */ 5058 #endif 5059 5060 #ifdef ETHTOOL_GGSO 5061 /* 5062 * XXX - will this cause large unsegmented packets to be 5063 * handed to PF_PACKET sockets on transmission? If not, 5064 * this need not be checked. 5065 */ 5066 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GGSO, "ETHTOOL_GGSO", 0); 5067 if (ret == -1) 5068 return -1; 5069 if (ret) 5070 return 1; /* generic segmentation offloading on */ 5071 #endif 5072 5073 #ifdef ETHTOOL_GFLAGS 5074 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GFLAGS, "ETHTOOL_GFLAGS", 0); 5075 if (ret == -1) 5076 return -1; 5077 if (ret & ETH_FLAG_LRO) 5078 return 1; /* large receive offloading on */ 5079 #endif 5080 5081 #ifdef ETHTOOL_GGRO 5082 /* 5083 * XXX - will this cause large reassembled packets to be 5084 * handed to PF_PACKET sockets on receipt? If not, 5085 * this need not be checked. 5086 */ 5087 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GGRO, "ETHTOOL_GGRO", 0); 5088 if (ret == -1) 5089 return -1; 5090 if (ret) 5091 return 1; /* generic (large) receive offloading on */ 5092 #endif 5093 5094 #ifdef ETHTOOL_GUFO 5095 /* 5096 * Do this one last, as support for it was removed in later 5097 * kernels, and it fails with EPERM on those kernels rather 5098 * than with EOPNOTSUPP (see explanation in comment for 5099 * iface_ethtool_flag_ioctl()). 5100 */ 5101 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GUFO, "ETHTOOL_GUFO", 1); 5102 if (ret == -1) 5103 return -1; 5104 if (ret) 5105 return 1; /* UDP fragmentation offloading on */ 5106 #endif 5107 5108 return 0; 5109 } 5110 #else /* SIOCETHTOOL */ 5111 static int 5112 iface_get_offload(pcap_t *handle _U_) 5113 { 5114 /* 5115 * XXX - do we need to get this information if we don't 5116 * have the ethtool ioctls? If so, how do we do that? 5117 */ 5118 return 0; 5119 } 5120 #endif /* SIOCETHTOOL */ 5121 5122 static struct dsa_proto { 5123 const char *name; 5124 bpf_u_int32 linktype; 5125 } dsa_protos[] = { 5126 /* 5127 * None is special and indicates that the interface does not have 5128 * any tagging protocol configured, and is therefore a standard 5129 * Ethernet interface. 5130 */ 5131 { "none", DLT_EN10MB }, 5132 { "brcm", DLT_DSA_TAG_BRCM }, 5133 { "brcm-prepend", DLT_DSA_TAG_BRCM_PREPEND }, 5134 { "dsa", DLT_DSA_TAG_DSA }, 5135 { "edsa", DLT_DSA_TAG_EDSA }, 5136 }; 5137 5138 static int 5139 iface_dsa_get_proto_info(const char *device, pcap_t *handle) 5140 { 5141 char *pathstr; 5142 unsigned int i; 5143 /* 5144 * Make this significantly smaller than PCAP_ERRBUF_SIZE; 5145 * the tag *shouldn't* have some huge long name, and making 5146 * it smaller keeps newer versions of GCC from whining that 5147 * the error message if we don't support the tag could 5148 * overflow the error message buffer. 5149 */ 5150 char buf[128]; 5151 ssize_t r; 5152 int fd; 5153 5154 fd = asprintf(&pathstr, "/sys/class/net/%s/dsa/tagging", device); 5155 if (fd < 0) { 5156 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5157 fd, "asprintf"); 5158 return PCAP_ERROR; 5159 } 5160 5161 fd = open(pathstr, O_RDONLY); 5162 free(pathstr); 5163 /* 5164 * This is not fatal, kernel >= 4.20 *might* expose this attribute 5165 */ 5166 if (fd < 0) 5167 return 0; 5168 5169 r = read(fd, buf, sizeof(buf) - 1); 5170 if (r <= 0) { 5171 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5172 errno, "read"); 5173 close(fd); 5174 return PCAP_ERROR; 5175 } 5176 close(fd); 5177 5178 /* 5179 * Buffer should be LF terminated. 5180 */ 5181 if (buf[r - 1] == '\n') 5182 r--; 5183 buf[r] = '\0'; 5184 5185 for (i = 0; i < sizeof(dsa_protos) / sizeof(dsa_protos[0]); i++) { 5186 if (strlen(dsa_protos[i].name) == (size_t)r && 5187 strcmp(buf, dsa_protos[i].name) == 0) { 5188 handle->linktype = dsa_protos[i].linktype; 5189 switch (dsa_protos[i].linktype) { 5190 case DLT_EN10MB: 5191 return 0; 5192 default: 5193 return 1; 5194 } 5195 } 5196 } 5197 5198 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 5199 "unsupported DSA tag: %s", buf); 5200 5201 return PCAP_ERROR; 5202 } 5203 5204 /* 5205 * Query the kernel for the MTU of the given interface. 5206 */ 5207 static int 5208 iface_get_mtu(int fd, const char *device, char *ebuf) 5209 { 5210 struct ifreq ifr; 5211 5212 if (!device) 5213 return BIGGER_THAN_ALL_MTUS; 5214 5215 memset(&ifr, 0, sizeof(ifr)); 5216 pcap_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 5217 5218 if (ioctl(fd, SIOCGIFMTU, &ifr) == -1) { 5219 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5220 errno, "SIOCGIFMTU"); 5221 return -1; 5222 } 5223 5224 return ifr.ifr_mtu; 5225 } 5226 5227 /* 5228 * Get the hardware type of the given interface as ARPHRD_xxx constant. 5229 */ 5230 static int 5231 iface_get_arptype(int fd, const char *device, char *ebuf) 5232 { 5233 struct ifreq ifr; 5234 int ret; 5235 5236 memset(&ifr, 0, sizeof(ifr)); 5237 pcap_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 5238 5239 if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) { 5240 if (errno == ENODEV) { 5241 /* 5242 * No such device. 5243 * 5244 * There's nothing more to say, so clear 5245 * the error message. 5246 */ 5247 ret = PCAP_ERROR_NO_SUCH_DEVICE; 5248 ebuf[0] = '\0'; 5249 } else { 5250 ret = PCAP_ERROR; 5251 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5252 errno, "SIOCGIFHWADDR"); 5253 } 5254 return ret; 5255 } 5256 5257 return ifr.ifr_hwaddr.sa_family; 5258 } 5259 5260 static int 5261 fix_program(pcap_t *handle, struct sock_fprog *fcode) 5262 { 5263 struct pcap_linux *handlep = handle->priv; 5264 size_t prog_size; 5265 register int i; 5266 register struct bpf_insn *p; 5267 struct bpf_insn *f; 5268 int len; 5269 5270 /* 5271 * Make a copy of the filter, and modify that copy if 5272 * necessary. 5273 */ 5274 prog_size = sizeof(*handle->fcode.bf_insns) * handle->fcode.bf_len; 5275 len = handle->fcode.bf_len; 5276 f = (struct bpf_insn *)malloc(prog_size); 5277 if (f == NULL) { 5278 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5279 errno, "malloc"); 5280 return -1; 5281 } 5282 memcpy(f, handle->fcode.bf_insns, prog_size); 5283 fcode->len = len; 5284 fcode->filter = (struct sock_filter *) f; 5285 5286 for (i = 0; i < len; ++i) { 5287 p = &f[i]; 5288 /* 5289 * What type of instruction is this? 5290 */ 5291 switch (BPF_CLASS(p->code)) { 5292 5293 case BPF_LD: 5294 case BPF_LDX: 5295 /* 5296 * It's a load instruction; is it loading 5297 * from the packet? 5298 */ 5299 switch (BPF_MODE(p->code)) { 5300 5301 case BPF_ABS: 5302 case BPF_IND: 5303 case BPF_MSH: 5304 /* 5305 * Yes; are we in cooked mode? 5306 */ 5307 if (handlep->cooked) { 5308 /* 5309 * Yes, so we need to fix this 5310 * instruction. 5311 */ 5312 if (fix_offset(handle, p) < 0) { 5313 /* 5314 * We failed to do so. 5315 * Return 0, so our caller 5316 * knows to punt to userland. 5317 */ 5318 return 0; 5319 } 5320 } 5321 break; 5322 } 5323 break; 5324 } 5325 } 5326 return 1; /* we succeeded */ 5327 } 5328 5329 static int 5330 fix_offset(pcap_t *handle, struct bpf_insn *p) 5331 { 5332 /* 5333 * Existing references to auxiliary data shouldn't be adjusted. 5334 * 5335 * Note that SKF_AD_OFF is negative, but p->k is unsigned, so 5336 * we use >= and cast SKF_AD_OFF to unsigned. 5337 */ 5338 if (p->k >= (bpf_u_int32)SKF_AD_OFF) 5339 return 0; 5340 if (handle->linktype == DLT_LINUX_SLL2) { 5341 /* 5342 * What's the offset? 5343 */ 5344 if (p->k >= SLL2_HDR_LEN) { 5345 /* 5346 * It's within the link-layer payload; that starts 5347 * at an offset of 0, as far as the kernel packet 5348 * filter is concerned, so subtract the length of 5349 * the link-layer header. 5350 */ 5351 p->k -= SLL2_HDR_LEN; 5352 } else if (p->k == 0) { 5353 /* 5354 * It's the protocol field; map it to the 5355 * special magic kernel offset for that field. 5356 */ 5357 p->k = SKF_AD_OFF + SKF_AD_PROTOCOL; 5358 } else if (p->k == 4) { 5359 /* 5360 * It's the ifindex field; map it to the 5361 * special magic kernel offset for that field. 5362 */ 5363 p->k = SKF_AD_OFF + SKF_AD_IFINDEX; 5364 } else if (p->k == 10) { 5365 /* 5366 * It's the packet type field; map it to the 5367 * special magic kernel offset for that field. 5368 */ 5369 p->k = SKF_AD_OFF + SKF_AD_PKTTYPE; 5370 } else if ((bpf_int32)(p->k) > 0) { 5371 /* 5372 * It's within the header, but it's not one of 5373 * those fields; we can't do that in the kernel, 5374 * so punt to userland. 5375 */ 5376 return -1; 5377 } 5378 } else { 5379 /* 5380 * What's the offset? 5381 */ 5382 if (p->k >= SLL_HDR_LEN) { 5383 /* 5384 * It's within the link-layer payload; that starts 5385 * at an offset of 0, as far as the kernel packet 5386 * filter is concerned, so subtract the length of 5387 * the link-layer header. 5388 */ 5389 p->k -= SLL_HDR_LEN; 5390 } else if (p->k == 0) { 5391 /* 5392 * It's the packet type field; map it to the 5393 * special magic kernel offset for that field. 5394 */ 5395 p->k = SKF_AD_OFF + SKF_AD_PKTTYPE; 5396 } else if (p->k == 14) { 5397 /* 5398 * It's the protocol field; map it to the 5399 * special magic kernel offset for that field. 5400 */ 5401 p->k = SKF_AD_OFF + SKF_AD_PROTOCOL; 5402 } else if ((bpf_int32)(p->k) > 0) { 5403 /* 5404 * It's within the header, but it's not one of 5405 * those fields; we can't do that in the kernel, 5406 * so punt to userland. 5407 */ 5408 return -1; 5409 } 5410 } 5411 return 0; 5412 } 5413 5414 static int 5415 set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode) 5416 { 5417 int total_filter_on = 0; 5418 int save_mode; 5419 int ret; 5420 int save_errno; 5421 5422 /* 5423 * The socket filter code doesn't discard all packets queued 5424 * up on the socket when the filter is changed; this means 5425 * that packets that don't match the new filter may show up 5426 * after the new filter is put onto the socket, if those 5427 * packets haven't yet been read. 5428 * 5429 * This means, for example, that if you do a tcpdump capture 5430 * with a filter, the first few packets in the capture might 5431 * be packets that wouldn't have passed the filter. 5432 * 5433 * We therefore discard all packets queued up on the socket 5434 * when setting a kernel filter. (This isn't an issue for 5435 * userland filters, as the userland filtering is done after 5436 * packets are queued up.) 5437 * 5438 * To flush those packets, we put the socket in read-only mode, 5439 * and read packets from the socket until there are no more to 5440 * read. 5441 * 5442 * In order to keep that from being an infinite loop - i.e., 5443 * to keep more packets from arriving while we're draining 5444 * the queue - we put the "total filter", which is a filter 5445 * that rejects all packets, onto the socket before draining 5446 * the queue. 5447 * 5448 * This code deliberately ignores any errors, so that you may 5449 * get bogus packets if an error occurs, rather than having 5450 * the filtering done in userland even if it could have been 5451 * done in the kernel. 5452 */ 5453 if (setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER, 5454 &total_fcode, sizeof(total_fcode)) == 0) { 5455 char drain[1]; 5456 5457 /* 5458 * Note that we've put the total filter onto the socket. 5459 */ 5460 total_filter_on = 1; 5461 5462 /* 5463 * Save the socket's current mode, and put it in 5464 * non-blocking mode; we drain it by reading packets 5465 * until we get an error (which is normally a 5466 * "nothing more to be read" error). 5467 */ 5468 save_mode = fcntl(handle->fd, F_GETFL, 0); 5469 if (save_mode == -1) { 5470 pcap_fmt_errmsg_for_errno(handle->errbuf, 5471 PCAP_ERRBUF_SIZE, errno, 5472 "can't get FD flags when changing filter"); 5473 return -2; 5474 } 5475 if (fcntl(handle->fd, F_SETFL, save_mode | O_NONBLOCK) < 0) { 5476 pcap_fmt_errmsg_for_errno(handle->errbuf, 5477 PCAP_ERRBUF_SIZE, errno, 5478 "can't set nonblocking mode when changing filter"); 5479 return -2; 5480 } 5481 while (recv(handle->fd, &drain, sizeof drain, MSG_TRUNC) >= 0) 5482 ; 5483 save_errno = errno; 5484 if (save_errno != EAGAIN) { 5485 /* 5486 * Fatal error. 5487 * 5488 * If we can't restore the mode or reset the 5489 * kernel filter, there's nothing we can do. 5490 */ 5491 (void)fcntl(handle->fd, F_SETFL, save_mode); 5492 (void)reset_kernel_filter(handle); 5493 pcap_fmt_errmsg_for_errno(handle->errbuf, 5494 PCAP_ERRBUF_SIZE, save_errno, 5495 "recv failed when changing filter"); 5496 return -2; 5497 } 5498 if (fcntl(handle->fd, F_SETFL, save_mode) == -1) { 5499 pcap_fmt_errmsg_for_errno(handle->errbuf, 5500 PCAP_ERRBUF_SIZE, errno, 5501 "can't restore FD flags when changing filter"); 5502 return -2; 5503 } 5504 } 5505 5506 /* 5507 * Now attach the new filter. 5508 */ 5509 ret = setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER, 5510 fcode, sizeof(*fcode)); 5511 if (ret == -1 && total_filter_on) { 5512 /* 5513 * Well, we couldn't set that filter on the socket, 5514 * but we could set the total filter on the socket. 5515 * 5516 * This could, for example, mean that the filter was 5517 * too big to put into the kernel, so we'll have to 5518 * filter in userland; in any case, we'll be doing 5519 * filtering in userland, so we need to remove the 5520 * total filter so we see packets. 5521 */ 5522 save_errno = errno; 5523 5524 /* 5525 * If this fails, we're really screwed; we have the 5526 * total filter on the socket, and it won't come off. 5527 * Report it as a fatal error. 5528 */ 5529 if (reset_kernel_filter(handle) == -1) { 5530 pcap_fmt_errmsg_for_errno(handle->errbuf, 5531 PCAP_ERRBUF_SIZE, errno, 5532 "can't remove kernel total filter"); 5533 return -2; /* fatal error */ 5534 } 5535 5536 errno = save_errno; 5537 } 5538 return ret; 5539 } 5540 5541 static int 5542 reset_kernel_filter(pcap_t *handle) 5543 { 5544 int ret; 5545 /* 5546 * setsockopt() barfs unless it get a dummy parameter. 5547 * valgrind whines unless the value is initialized, 5548 * as it has no idea that setsockopt() ignores its 5549 * parameter. 5550 */ 5551 int dummy = 0; 5552 5553 ret = setsockopt(handle->fd, SOL_SOCKET, SO_DETACH_FILTER, 5554 &dummy, sizeof(dummy)); 5555 /* 5556 * Ignore ENOENT - it means "we don't have a filter", so there 5557 * was no filter to remove, and there's still no filter. 5558 * 5559 * Also ignore ENONET, as a lot of kernel versions had a 5560 * typo where ENONET, rather than ENOENT, was returned. 5561 */ 5562 if (ret == -1 && errno != ENOENT && errno != ENONET) 5563 return -1; 5564 return 0; 5565 } 5566 5567 int 5568 pcap_set_protocol_linux(pcap_t *p, int protocol) 5569 { 5570 if (pcap_check_activated(p)) 5571 return (PCAP_ERROR_ACTIVATED); 5572 p->opt.protocol = protocol; 5573 return (0); 5574 } 5575 5576 /* 5577 * Libpcap version string. 5578 */ 5579 const char * 5580 pcap_lib_version(void) 5581 { 5582 #if defined(HAVE_TPACKET3) 5583 return (PCAP_VERSION_STRING " (with TPACKET_V3)"); 5584 #else 5585 return (PCAP_VERSION_STRING " (with TPACKET_V2)"); 5586 #endif 5587 } 5588