xref: /freebsd/contrib/libpcap/pcap-dpdk.c (revision afdbf109c6a661a729938f68211054a0a50d38ac)
16f9cba8fSJoseph Mingrone /*
26f9cba8fSJoseph Mingrone  * Copyright (C) 2018 jingle YANG. All rights reserved.
36f9cba8fSJoseph Mingrone  *
46f9cba8fSJoseph Mingrone  * Redistribution and use in source and binary forms, with or without
56f9cba8fSJoseph Mingrone  * modification, are permitted provided that the following conditions
66f9cba8fSJoseph Mingrone  * are met:
76f9cba8fSJoseph Mingrone  *
86f9cba8fSJoseph Mingrone  *   1. Redistributions of source code must retain the above copyright
96f9cba8fSJoseph Mingrone  *      notice, this list of conditions and the following disclaimer.
106f9cba8fSJoseph Mingrone  *   2. Redistributions in binary form must reproduce the above copyright
116f9cba8fSJoseph Mingrone  *      notice, this list of conditions and the following disclaimer in the
126f9cba8fSJoseph Mingrone  *      documentation and/or other materials provided with the distribution.
136f9cba8fSJoseph Mingrone  *
146f9cba8fSJoseph Mingrone  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''AND
156f9cba8fSJoseph Mingrone  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
166f9cba8fSJoseph Mingrone  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
176f9cba8fSJoseph Mingrone  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
186f9cba8fSJoseph Mingrone  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
196f9cba8fSJoseph Mingrone  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
206f9cba8fSJoseph Mingrone  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
216f9cba8fSJoseph Mingrone  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
226f9cba8fSJoseph Mingrone  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
236f9cba8fSJoseph Mingrone  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
246f9cba8fSJoseph Mingrone  * SUCH DAMAGE.
256f9cba8fSJoseph Mingrone  */
266f9cba8fSJoseph Mingrone 
276f9cba8fSJoseph Mingrone /*
286f9cba8fSJoseph Mingrone Date: Dec 16, 2018
296f9cba8fSJoseph Mingrone 
306f9cba8fSJoseph Mingrone Description:
316f9cba8fSJoseph Mingrone 1. Pcap-dpdk provides libpcap the ability to use DPDK with the device name as dpdk:{portid}, such as dpdk:0.
326f9cba8fSJoseph Mingrone 2. DPDK is a set of libraries and drivers for fast packet processing. (https://www.dpdk.org/)
336f9cba8fSJoseph Mingrone 3. The testprogs/capturetest provides 6.4Gbps/800,000 pps on Intel 10-Gigabit X540-AT2 with DPDK 18.11.
346f9cba8fSJoseph Mingrone 
356f9cba8fSJoseph Mingrone Limitations:
366f9cba8fSJoseph Mingrone 1. DPDK support will be on if DPDK is available. Please set DIR for --with-dpdk[=DIR] with ./configure or -DDPDK_DIR[=DIR] with cmake if DPDK is installed manually.
376f9cba8fSJoseph Mingrone 2. Only support link libdpdk.so dynamically, because the libdpdk.a will not work correctly.
386f9cba8fSJoseph Mingrone 3. Only support read operation, and packet injection has not been supported yet.
396f9cba8fSJoseph Mingrone 
406f9cba8fSJoseph Mingrone Usage:
416f9cba8fSJoseph Mingrone 1. Compile DPDK as shared library and install.(https://github.com/DPDK/dpdk.git)
426f9cba8fSJoseph Mingrone 
436f9cba8fSJoseph Mingrone You shall modify the file $RTE_SDK/$RTE_TARGET/.config and set:
446f9cba8fSJoseph Mingrone CONFIG_RTE_BUILD_SHARED_LIB=y
456f9cba8fSJoseph Mingrone By the following command:
466f9cba8fSJoseph Mingrone sed -i 's/CONFIG_RTE_BUILD_SHARED_LIB=n/CONFIG_RTE_BUILD_SHARED_LIB=y/' $RTE_SDK/$RTE_TARGET/.config
476f9cba8fSJoseph Mingrone 
486f9cba8fSJoseph Mingrone 2. Launch l2fwd that is one of DPDK examples correctly, and get device information.
496f9cba8fSJoseph Mingrone 
506f9cba8fSJoseph Mingrone You shall learn how to bind nic with DPDK-compatible driver by $RTE_SDK/usertools/dpdk-devbind.py, such as igb_uio.
516f9cba8fSJoseph Mingrone And enable hugepages by dpdk-setup.sh
526f9cba8fSJoseph Mingrone 
536f9cba8fSJoseph Mingrone Then launch the l2fwd with dynamic driver support. For example:
546f9cba8fSJoseph Mingrone $RTE_SDK/examples/l2fwd/$RTE_TARGET/l2fwd -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so -- -p 0x1
556f9cba8fSJoseph Mingrone 
566f9cba8fSJoseph Mingrone 3. Compile libpcap with dpdk options.
576f9cba8fSJoseph Mingrone 
586f9cba8fSJoseph Mingrone If DPDK has not been found automatically, you shall export DPDK environment variable which are used for compiling DPDK. And then pass $RTE_SDK/$RTE_TARGET to --with-dpdk or -DDPDK_DIR
596f9cba8fSJoseph Mingrone 
606f9cba8fSJoseph Mingrone export RTE_SDK={your DPDK base directory}
616f9cba8fSJoseph Mingrone export RTE_TARGET={your target name}
626f9cba8fSJoseph Mingrone 
636f9cba8fSJoseph Mingrone 3.1 With configure
646f9cba8fSJoseph Mingrone 
656f9cba8fSJoseph Mingrone ./configure --with-dpdk=$RTE_SDK/$RTE_TARGET && make -s all && make -s testprogs && make install
666f9cba8fSJoseph Mingrone 
676f9cba8fSJoseph Mingrone 3.2 With cmake
686f9cba8fSJoseph Mingrone 
696f9cba8fSJoseph Mingrone mkdir -p build && cd build && cmake -DDPDK_DIR=$RTE_SDK/$RTE_TARGET ../ && make -s all && make -s testprogs && make install
706f9cba8fSJoseph Mingrone 
716f9cba8fSJoseph Mingrone 4. Link your own program with libpcap, and use DPDK with the device name as dpdk:{portid}, such as dpdk:0.
726f9cba8fSJoseph Mingrone And you shall set DPDK configure options by environment variable DPDK_CFG
73*afdbf109SJoseph Mingrone For example, the testprogs/capturetest could be launched by:
746f9cba8fSJoseph Mingrone 
756f9cba8fSJoseph Mingrone env DPDK_CFG="--log-level=debug -l0 -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so" ./capturetest -i dpdk:0
766f9cba8fSJoseph Mingrone */
776f9cba8fSJoseph Mingrone 
786f9cba8fSJoseph Mingrone #include <config.h>
796f9cba8fSJoseph Mingrone 
806f9cba8fSJoseph Mingrone #include <errno.h>
816f9cba8fSJoseph Mingrone #include <netdb.h>
826f9cba8fSJoseph Mingrone #include <stdio.h>
836f9cba8fSJoseph Mingrone #include <stdlib.h>
846f9cba8fSJoseph Mingrone #include <string.h>
856f9cba8fSJoseph Mingrone #include <unistd.h>
866f9cba8fSJoseph Mingrone #include <limits.h> /* for INT_MAX */
876f9cba8fSJoseph Mingrone #include <time.h>
886f9cba8fSJoseph Mingrone 
896f9cba8fSJoseph Mingrone #include <sys/time.h>
906f9cba8fSJoseph Mingrone 
916f9cba8fSJoseph Mingrone //header for calling dpdk
926f9cba8fSJoseph Mingrone #include <rte_config.h>
936f9cba8fSJoseph Mingrone #include <rte_common.h>
946f9cba8fSJoseph Mingrone #include <rte_errno.h>
956f9cba8fSJoseph Mingrone #include <rte_log.h>
966f9cba8fSJoseph Mingrone #include <rte_malloc.h>
976f9cba8fSJoseph Mingrone #include <rte_memory.h>
986f9cba8fSJoseph Mingrone #include <rte_eal.h>
996f9cba8fSJoseph Mingrone #include <rte_launch.h>
1006f9cba8fSJoseph Mingrone #include <rte_atomic.h>
1016f9cba8fSJoseph Mingrone #include <rte_cycles.h>
1026f9cba8fSJoseph Mingrone #include <rte_lcore.h>
1036f9cba8fSJoseph Mingrone #include <rte_per_lcore.h>
1046f9cba8fSJoseph Mingrone #include <rte_branch_prediction.h>
1056f9cba8fSJoseph Mingrone #include <rte_interrupts.h>
1066f9cba8fSJoseph Mingrone #include <rte_random.h>
1076f9cba8fSJoseph Mingrone #include <rte_debug.h>
1086f9cba8fSJoseph Mingrone #include <rte_ether.h>
1096f9cba8fSJoseph Mingrone #include <rte_ethdev.h>
1106f9cba8fSJoseph Mingrone #include <rte_mempool.h>
1116f9cba8fSJoseph Mingrone #include <rte_mbuf.h>
1126f9cba8fSJoseph Mingrone #include <rte_bus.h>
1136f9cba8fSJoseph Mingrone 
1146f9cba8fSJoseph Mingrone #include "pcap-int.h"
1156f9cba8fSJoseph Mingrone #include "pcap-dpdk.h"
1166f9cba8fSJoseph Mingrone 
1176f9cba8fSJoseph Mingrone /*
1186f9cba8fSJoseph Mingrone  * Deal with API changes that break source compatibility.
1196f9cba8fSJoseph Mingrone  */
1206f9cba8fSJoseph Mingrone 
1216f9cba8fSJoseph Mingrone #ifdef HAVE_STRUCT_RTE_ETHER_ADDR
1226f9cba8fSJoseph Mingrone #define ETHER_ADDR_TYPE	struct rte_ether_addr
1236f9cba8fSJoseph Mingrone #else
1246f9cba8fSJoseph Mingrone #define ETHER_ADDR_TYPE	struct ether_addr
1256f9cba8fSJoseph Mingrone #endif
1266f9cba8fSJoseph Mingrone 
1276f9cba8fSJoseph Mingrone #define DPDK_DEF_LOG_LEV RTE_LOG_ERR
1286f9cba8fSJoseph Mingrone //
1296f9cba8fSJoseph Mingrone // This is set to 0 if we haven't initialized DPDK yet, 1 if we've
1306f9cba8fSJoseph Mingrone // successfully initialized it, a negative value, which is the negative
1316f9cba8fSJoseph Mingrone // of the rte_errno from rte_eal_init(), if we tried to initialize it
1326f9cba8fSJoseph Mingrone // and got an error.
1336f9cba8fSJoseph Mingrone //
1346f9cba8fSJoseph Mingrone static int is_dpdk_pre_inited=0;
1356f9cba8fSJoseph Mingrone #define DPDK_LIB_NAME "libpcap_dpdk"
1366f9cba8fSJoseph Mingrone #define DPDK_DESC "Data Plane Development Kit (DPDK) Interface"
1376f9cba8fSJoseph Mingrone #define DPDK_ERR_PERM_MSG "permission denied, DPDK needs root permission"
1386f9cba8fSJoseph Mingrone #define DPDK_ARGC_MAX 64
1396f9cba8fSJoseph Mingrone #define DPDK_CFG_MAX_LEN 1024
1406f9cba8fSJoseph Mingrone #define DPDK_DEV_NAME_MAX 32
1416f9cba8fSJoseph Mingrone #define DPDK_DEV_DESC_MAX 512
1426f9cba8fSJoseph Mingrone #define DPDK_CFG_ENV_NAME "DPDK_CFG"
1436f9cba8fSJoseph Mingrone #define DPDK_DEF_MIN_SLEEP_MS 1
1446f9cba8fSJoseph Mingrone static char dpdk_cfg_buf[DPDK_CFG_MAX_LEN];
1456f9cba8fSJoseph Mingrone #define DPDK_MAC_ADDR_SIZE 32
1466f9cba8fSJoseph Mingrone #define DPDK_DEF_MAC_ADDR "00:00:00:00:00:00"
1476f9cba8fSJoseph Mingrone #define DPDK_PCI_ADDR_SIZE 16
1486f9cba8fSJoseph Mingrone #define DPDK_DEF_CFG "--log-level=error -l0 -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so"
1496f9cba8fSJoseph Mingrone #define DPDK_PREFIX "dpdk:"
1506f9cba8fSJoseph Mingrone #define DPDK_PORTID_MAX 65535U
1516f9cba8fSJoseph Mingrone #define MBUF_POOL_NAME "mbuf_pool"
1526f9cba8fSJoseph Mingrone #define DPDK_TX_BUF_NAME "tx_buffer"
1536f9cba8fSJoseph Mingrone //The number of elements in the mbuf pool.
1546f9cba8fSJoseph Mingrone #define DPDK_NB_MBUFS 8192U
1556f9cba8fSJoseph Mingrone #define MEMPOOL_CACHE_SIZE 256
1566f9cba8fSJoseph Mingrone #define MAX_PKT_BURST 32
1576f9cba8fSJoseph Mingrone // Configurable number of RX/TX ring descriptors
1586f9cba8fSJoseph Mingrone #define RTE_TEST_RX_DESC_DEFAULT 1024
1596f9cba8fSJoseph Mingrone #define RTE_TEST_TX_DESC_DEFAULT 1024
1606f9cba8fSJoseph Mingrone 
1616f9cba8fSJoseph Mingrone static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
1626f9cba8fSJoseph Mingrone static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
1636f9cba8fSJoseph Mingrone 
1646f9cba8fSJoseph Mingrone #ifdef RTE_ETHER_MAX_JUMBO_FRAME_LEN
1656f9cba8fSJoseph Mingrone #define RTE_ETH_PCAP_SNAPLEN RTE_ETHER_MAX_JUMBO_FRAME_LEN
1666f9cba8fSJoseph Mingrone #else
1676f9cba8fSJoseph Mingrone #define RTE_ETH_PCAP_SNAPLEN ETHER_MAX_JUMBO_FRAME_LEN
1686f9cba8fSJoseph Mingrone #endif
1696f9cba8fSJoseph Mingrone 
1706f9cba8fSJoseph Mingrone static struct rte_eth_dev_tx_buffer *tx_buffer;
1716f9cba8fSJoseph Mingrone 
1726f9cba8fSJoseph Mingrone struct dpdk_ts_helper{
1736f9cba8fSJoseph Mingrone 	struct timeval start_time;
1746f9cba8fSJoseph Mingrone 	uint64_t start_cycles;
1756f9cba8fSJoseph Mingrone 	uint64_t hz;
1766f9cba8fSJoseph Mingrone };
1776f9cba8fSJoseph Mingrone struct pcap_dpdk{
1786f9cba8fSJoseph Mingrone 	pcap_t * orig;
1796f9cba8fSJoseph Mingrone 	uint16_t portid; // portid of DPDK
1806f9cba8fSJoseph Mingrone 	int must_clear_promisc;
1816f9cba8fSJoseph Mingrone 	uint64_t bpf_drop;
1826f9cba8fSJoseph Mingrone 	int nonblock;
1836f9cba8fSJoseph Mingrone 	struct timeval required_select_timeout;
1846f9cba8fSJoseph Mingrone 	struct timeval prev_ts;
1856f9cba8fSJoseph Mingrone 	struct rte_eth_stats prev_stats;
1866f9cba8fSJoseph Mingrone 	struct timeval curr_ts;
1876f9cba8fSJoseph Mingrone 	struct rte_eth_stats curr_stats;
1886f9cba8fSJoseph Mingrone 	uint64_t pps;
1896f9cba8fSJoseph Mingrone 	uint64_t bps;
1906f9cba8fSJoseph Mingrone 	struct rte_mempool * pktmbuf_pool;
1916f9cba8fSJoseph Mingrone 	struct dpdk_ts_helper ts_helper;
1926f9cba8fSJoseph Mingrone 	ETHER_ADDR_TYPE eth_addr;
1936f9cba8fSJoseph Mingrone 	char mac_addr[DPDK_MAC_ADDR_SIZE];
1946f9cba8fSJoseph Mingrone 	char pci_addr[DPDK_PCI_ADDR_SIZE];
1956f9cba8fSJoseph Mingrone 	unsigned char pcap_tmp_buf[RTE_ETH_PCAP_SNAPLEN];
1966f9cba8fSJoseph Mingrone };
1976f9cba8fSJoseph Mingrone 
1986f9cba8fSJoseph Mingrone static struct rte_eth_conf port_conf = {
1996f9cba8fSJoseph Mingrone 	.rxmode = {
2006f9cba8fSJoseph Mingrone 		.split_hdr_size = 0,
2016f9cba8fSJoseph Mingrone 	},
2026f9cba8fSJoseph Mingrone 	.txmode = {
2036f9cba8fSJoseph Mingrone 		.mq_mode = ETH_MQ_TX_NONE,
2046f9cba8fSJoseph Mingrone 	},
2056f9cba8fSJoseph Mingrone };
2066f9cba8fSJoseph Mingrone 
2076f9cba8fSJoseph Mingrone static void	dpdk_fmt_errmsg_for_rte_errno(char *, size_t, int,
2086f9cba8fSJoseph Mingrone     PCAP_FORMAT_STRING(const char *), ...) PCAP_PRINTFLIKE(4, 5);
2096f9cba8fSJoseph Mingrone 
2106f9cba8fSJoseph Mingrone /*
2116f9cba8fSJoseph Mingrone  * Generate an error message based on a format, arguments, and an
2126f9cba8fSJoseph Mingrone  * rte_errno, with a message for the rte_errno after the formatted output.
2136f9cba8fSJoseph Mingrone  */
dpdk_fmt_errmsg_for_rte_errno(char * errbuf,size_t errbuflen,int errnum,const char * fmt,...)2146f9cba8fSJoseph Mingrone static void dpdk_fmt_errmsg_for_rte_errno(char *errbuf, size_t errbuflen,
2156f9cba8fSJoseph Mingrone     int errnum, const char *fmt, ...)
2166f9cba8fSJoseph Mingrone {
2176f9cba8fSJoseph Mingrone 	va_list ap;
2186f9cba8fSJoseph Mingrone 	size_t msglen;
2196f9cba8fSJoseph Mingrone 	char *p;
2206f9cba8fSJoseph Mingrone 	size_t errbuflen_remaining;
2216f9cba8fSJoseph Mingrone 
2226f9cba8fSJoseph Mingrone 	va_start(ap, fmt);
2236f9cba8fSJoseph Mingrone 	vsnprintf(errbuf, errbuflen, fmt, ap);
2246f9cba8fSJoseph Mingrone 	va_end(ap);
2256f9cba8fSJoseph Mingrone 	msglen = strlen(errbuf);
2266f9cba8fSJoseph Mingrone 
2276f9cba8fSJoseph Mingrone 	/*
2286f9cba8fSJoseph Mingrone 	 * Do we have enough space to append ": "?
2296f9cba8fSJoseph Mingrone 	 * Including the terminating '\0', that's 3 bytes.
2306f9cba8fSJoseph Mingrone 	 */
2316f9cba8fSJoseph Mingrone 	if (msglen + 3 > errbuflen) {
2326f9cba8fSJoseph Mingrone 		/* No - just give them what we've produced. */
2336f9cba8fSJoseph Mingrone 		return;
2346f9cba8fSJoseph Mingrone 	}
2356f9cba8fSJoseph Mingrone 	p = errbuf + msglen;
2366f9cba8fSJoseph Mingrone 	errbuflen_remaining = errbuflen - msglen;
2376f9cba8fSJoseph Mingrone 	*p++ = ':';
2386f9cba8fSJoseph Mingrone 	*p++ = ' ';
2396f9cba8fSJoseph Mingrone 	*p = '\0';
2406f9cba8fSJoseph Mingrone 	msglen += 2;
2416f9cba8fSJoseph Mingrone 	errbuflen_remaining -= 2;
2426f9cba8fSJoseph Mingrone 
2436f9cba8fSJoseph Mingrone 	/*
2446f9cba8fSJoseph Mingrone 	 * Now append the string for the error code.
2456f9cba8fSJoseph Mingrone 	 * rte_strerror() is thread-safe, at least as of dpdk 18.11,
2466f9cba8fSJoseph Mingrone 	 * unlike strerror() - it uses strerror_r() rather than strerror()
2476f9cba8fSJoseph Mingrone 	 * for UN*X errno values, and prints to what I assume is a per-thread
2486f9cba8fSJoseph Mingrone 	 * buffer (based on the "PER_LCORE" in "RTE_DEFINE_PER_LCORE" used
2496f9cba8fSJoseph Mingrone 	 * to declare the buffers statically) for DPDK errors.
2506f9cba8fSJoseph Mingrone 	 */
2516f9cba8fSJoseph Mingrone 	snprintf(p, errbuflen_remaining, "%s", rte_strerror(errnum));
2526f9cba8fSJoseph Mingrone }
2536f9cba8fSJoseph Mingrone 
dpdk_init_timer(struct pcap_dpdk * pd)2546f9cba8fSJoseph Mingrone static int dpdk_init_timer(struct pcap_dpdk *pd){
2556f9cba8fSJoseph Mingrone 	gettimeofday(&(pd->ts_helper.start_time),NULL);
2566f9cba8fSJoseph Mingrone 	pd->ts_helper.start_cycles = rte_get_timer_cycles();
2576f9cba8fSJoseph Mingrone 	pd->ts_helper.hz = rte_get_timer_hz();
2586f9cba8fSJoseph Mingrone 	if (pd->ts_helper.hz == 0){
2596f9cba8fSJoseph Mingrone 		return -1;
2606f9cba8fSJoseph Mingrone 	}
2616f9cba8fSJoseph Mingrone 	return 0;
2626f9cba8fSJoseph Mingrone }
calculate_timestamp(struct dpdk_ts_helper * helper,struct timeval * ts)2636f9cba8fSJoseph Mingrone static inline void calculate_timestamp(struct dpdk_ts_helper *helper,struct timeval *ts)
2646f9cba8fSJoseph Mingrone {
2656f9cba8fSJoseph Mingrone 	uint64_t cycles;
2666f9cba8fSJoseph Mingrone 	// delta
2676f9cba8fSJoseph Mingrone 	struct timeval cur_time;
2686f9cba8fSJoseph Mingrone 	cycles = rte_get_timer_cycles() - helper->start_cycles;
2696f9cba8fSJoseph Mingrone 	cur_time.tv_sec = (time_t)(cycles/helper->hz);
2706f9cba8fSJoseph Mingrone 	cur_time.tv_usec = (suseconds_t)((cycles%helper->hz)*1e6/helper->hz);
2716f9cba8fSJoseph Mingrone 	timeradd(&(helper->start_time), &cur_time, ts);
2726f9cba8fSJoseph Mingrone }
2736f9cba8fSJoseph Mingrone 
dpdk_gather_data(unsigned char * data,uint32_t len,struct rte_mbuf * mbuf)2746f9cba8fSJoseph Mingrone static uint32_t dpdk_gather_data(unsigned char *data, uint32_t len, struct rte_mbuf *mbuf)
2756f9cba8fSJoseph Mingrone {
2766f9cba8fSJoseph Mingrone 	uint32_t total_len = 0;
2776f9cba8fSJoseph Mingrone 	while (mbuf && (total_len+mbuf->data_len) < len ){
2786f9cba8fSJoseph Mingrone 		rte_memcpy(data+total_len, rte_pktmbuf_mtod(mbuf,void *),mbuf->data_len);
2796f9cba8fSJoseph Mingrone 		total_len+=mbuf->data_len;
2806f9cba8fSJoseph Mingrone 		mbuf=mbuf->next;
2816f9cba8fSJoseph Mingrone 	}
2826f9cba8fSJoseph Mingrone 	return total_len;
2836f9cba8fSJoseph Mingrone }
2846f9cba8fSJoseph Mingrone 
2856f9cba8fSJoseph Mingrone 
dpdk_read_with_timeout(pcap_t * p,struct rte_mbuf ** pkts_burst,const uint16_t burst_cnt)2866f9cba8fSJoseph Mingrone static int dpdk_read_with_timeout(pcap_t *p, struct rte_mbuf **pkts_burst, const uint16_t burst_cnt){
2876f9cba8fSJoseph Mingrone 	struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
2886f9cba8fSJoseph Mingrone 	int nb_rx = 0;
2896f9cba8fSJoseph Mingrone 	int timeout_ms = p->opt.timeout;
2906f9cba8fSJoseph Mingrone 	int sleep_ms = 0;
2916f9cba8fSJoseph Mingrone 	if (pd->nonblock){
2926f9cba8fSJoseph Mingrone 		// In non-blocking mode, just read once, no matter how many packets are captured.
2936f9cba8fSJoseph Mingrone 		nb_rx = (int)rte_eth_rx_burst(pd->portid, 0, pkts_burst, burst_cnt);
2946f9cba8fSJoseph Mingrone 	}else{
2956f9cba8fSJoseph Mingrone 		// In blocking mode, read many times until packets are captured or timeout or break_loop is set.
2966f9cba8fSJoseph Mingrone 		// if timeout_ms == 0, it may be blocked forever.
2976f9cba8fSJoseph Mingrone 		while (timeout_ms == 0 || sleep_ms < timeout_ms){
2986f9cba8fSJoseph Mingrone 			nb_rx = (int)rte_eth_rx_burst(pd->portid, 0, pkts_burst, burst_cnt);
2996f9cba8fSJoseph Mingrone 			if (nb_rx){ // got packets within timeout_ms
3006f9cba8fSJoseph Mingrone 				break;
3016f9cba8fSJoseph Mingrone 			}else{ // no packet arrives at this round.
3026f9cba8fSJoseph Mingrone 				if (p->break_loop){
3036f9cba8fSJoseph Mingrone 					break;
3046f9cba8fSJoseph Mingrone 				}
3056f9cba8fSJoseph Mingrone 				// sleep for a very short while.
3066f9cba8fSJoseph Mingrone 				// block sleep is the only choice, since usleep() will impact performance dramatically.
3076f9cba8fSJoseph Mingrone 				rte_delay_us_block(DPDK_DEF_MIN_SLEEP_MS*1000);
3086f9cba8fSJoseph Mingrone 				sleep_ms += DPDK_DEF_MIN_SLEEP_MS;
3096f9cba8fSJoseph Mingrone 			}
3106f9cba8fSJoseph Mingrone 		}
3116f9cba8fSJoseph Mingrone 	}
3126f9cba8fSJoseph Mingrone 	return nb_rx;
3136f9cba8fSJoseph Mingrone }
3146f9cba8fSJoseph Mingrone 
pcap_dpdk_dispatch(pcap_t * p,int max_cnt,pcap_handler cb,u_char * cb_arg)3156f9cba8fSJoseph Mingrone static int pcap_dpdk_dispatch(pcap_t *p, int max_cnt, pcap_handler cb, u_char *cb_arg)
3166f9cba8fSJoseph Mingrone {
3176f9cba8fSJoseph Mingrone 	struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
3186f9cba8fSJoseph Mingrone 	int burst_cnt = 0;
3196f9cba8fSJoseph Mingrone 	int nb_rx = 0;
3206f9cba8fSJoseph Mingrone 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
3216f9cba8fSJoseph Mingrone 	struct rte_mbuf *m;
3226f9cba8fSJoseph Mingrone 	struct pcap_pkthdr pcap_header;
3236f9cba8fSJoseph Mingrone 	// In DPDK, pkt_len is sum of lengths for all segments. And data_len is for one segment
3246f9cba8fSJoseph Mingrone 	uint32_t pkt_len = 0;
3256f9cba8fSJoseph Mingrone 	uint32_t caplen = 0;
3266f9cba8fSJoseph Mingrone 	u_char *bp = NULL;
3276f9cba8fSJoseph Mingrone 	int i=0;
3286f9cba8fSJoseph Mingrone 	unsigned int gather_len =0;
3296f9cba8fSJoseph Mingrone 	int pkt_cnt = 0;
3306f9cba8fSJoseph Mingrone 	u_char *large_buffer=NULL;
3316f9cba8fSJoseph Mingrone 	int timeout_ms = p->opt.timeout;
3326f9cba8fSJoseph Mingrone 
3336f9cba8fSJoseph Mingrone 	/*
3346f9cba8fSJoseph Mingrone 	 * This can conceivably process more than INT_MAX packets,
3356f9cba8fSJoseph Mingrone 	 * which would overflow the packet count, causing it either
3366f9cba8fSJoseph Mingrone 	 * to look like a negative number, and thus cause us to
3376f9cba8fSJoseph Mingrone 	 * return a value that looks like an error, or overflow
3386f9cba8fSJoseph Mingrone 	 * back into positive territory, and thus cause us to
3396f9cba8fSJoseph Mingrone 	 * return a too-low count.
3406f9cba8fSJoseph Mingrone 	 *
3416f9cba8fSJoseph Mingrone 	 * Therefore, if the packet count is unlimited, we clip
3426f9cba8fSJoseph Mingrone 	 * it at INT_MAX; this routine is not expected to
3436f9cba8fSJoseph Mingrone 	 * process packets indefinitely, so that's not an issue.
3446f9cba8fSJoseph Mingrone 	 */
3456f9cba8fSJoseph Mingrone 	if (PACKET_COUNT_IS_UNLIMITED(max_cnt))
3466f9cba8fSJoseph Mingrone 		max_cnt = INT_MAX;
3476f9cba8fSJoseph Mingrone 
3486f9cba8fSJoseph Mingrone 	if (max_cnt < MAX_PKT_BURST){
3496f9cba8fSJoseph Mingrone 		burst_cnt = max_cnt;
3506f9cba8fSJoseph Mingrone 	}else{
3516f9cba8fSJoseph Mingrone 		burst_cnt = MAX_PKT_BURST;
3526f9cba8fSJoseph Mingrone 	}
3536f9cba8fSJoseph Mingrone 
3546f9cba8fSJoseph Mingrone 	while( pkt_cnt < max_cnt){
3556f9cba8fSJoseph Mingrone 		if (p->break_loop){
3566f9cba8fSJoseph Mingrone 			p->break_loop = 0;
3576f9cba8fSJoseph Mingrone 			return PCAP_ERROR_BREAK;
3586f9cba8fSJoseph Mingrone 		}
3596f9cba8fSJoseph Mingrone 		// read once in non-blocking mode, or try many times waiting for timeout_ms.
3606f9cba8fSJoseph Mingrone 		// if timeout_ms == 0, it will be blocked until one packet arrives or break_loop is set.
3616f9cba8fSJoseph Mingrone 		nb_rx = dpdk_read_with_timeout(p, pkts_burst, burst_cnt);
3626f9cba8fSJoseph Mingrone 		if (nb_rx == 0){
3636f9cba8fSJoseph Mingrone 			if (pd->nonblock){
3646f9cba8fSJoseph Mingrone 				RTE_LOG(DEBUG, USER1, "dpdk: no packets available in non-blocking mode.\n");
3656f9cba8fSJoseph Mingrone 			}else{
3666f9cba8fSJoseph Mingrone 				if (p->break_loop){
3676f9cba8fSJoseph Mingrone 					RTE_LOG(DEBUG, USER1, "dpdk: no packets available and break_loop is set in blocking mode.\n");
3686f9cba8fSJoseph Mingrone 					p->break_loop = 0;
3696f9cba8fSJoseph Mingrone 					return PCAP_ERROR_BREAK;
3706f9cba8fSJoseph Mingrone 
3716f9cba8fSJoseph Mingrone 				}
3726f9cba8fSJoseph Mingrone 				RTE_LOG(DEBUG, USER1, "dpdk: no packets available for timeout %d ms in blocking mode.\n", timeout_ms);
3736f9cba8fSJoseph Mingrone 			}
3746f9cba8fSJoseph Mingrone 			// break if dpdk reads 0 packet, no matter in blocking(timeout) or non-blocking mode.
3756f9cba8fSJoseph Mingrone 			break;
3766f9cba8fSJoseph Mingrone 		}
3776f9cba8fSJoseph Mingrone 		pkt_cnt += nb_rx;
3786f9cba8fSJoseph Mingrone 		for ( i = 0; i < nb_rx; i++) {
3796f9cba8fSJoseph Mingrone 			m = pkts_burst[i];
3806f9cba8fSJoseph Mingrone 			calculate_timestamp(&(pd->ts_helper),&(pcap_header.ts));
3816f9cba8fSJoseph Mingrone 			pkt_len = rte_pktmbuf_pkt_len(m);
3826f9cba8fSJoseph Mingrone 			// caplen = min(pkt_len, p->snapshot);
3836f9cba8fSJoseph Mingrone 			// caplen will not be changed, no matter how long the rte_pktmbuf
3846f9cba8fSJoseph Mingrone 			caplen = pkt_len < (uint32_t)p->snapshot ? pkt_len: (uint32_t)p->snapshot;
3856f9cba8fSJoseph Mingrone 			pcap_header.caplen = caplen;
3866f9cba8fSJoseph Mingrone 			pcap_header.len = pkt_len;
3876f9cba8fSJoseph Mingrone 			// volatile prefetch
3886f9cba8fSJoseph Mingrone 			rte_prefetch0(rte_pktmbuf_mtod(m, void *));
3896f9cba8fSJoseph Mingrone 			bp = NULL;
3906f9cba8fSJoseph Mingrone 			if (m->nb_segs == 1)
3916f9cba8fSJoseph Mingrone 			{
3926f9cba8fSJoseph Mingrone 				bp = rte_pktmbuf_mtod(m, u_char *);
3936f9cba8fSJoseph Mingrone 			}else{
3946f9cba8fSJoseph Mingrone 				// use fast buffer pcap_tmp_buf if pkt_len is small, no need to call malloc and free
3956f9cba8fSJoseph Mingrone 				if ( pkt_len <= RTE_ETH_PCAP_SNAPLEN)
3966f9cba8fSJoseph Mingrone 				{
3976f9cba8fSJoseph Mingrone 					gather_len = dpdk_gather_data(pd->pcap_tmp_buf, RTE_ETH_PCAP_SNAPLEN, m);
3986f9cba8fSJoseph Mingrone 					bp = pd->pcap_tmp_buf;
3996f9cba8fSJoseph Mingrone 				}else{
4006f9cba8fSJoseph Mingrone 					// need call free later
4016f9cba8fSJoseph Mingrone 					large_buffer = (u_char *)malloc(caplen*sizeof(u_char));
4026f9cba8fSJoseph Mingrone 					gather_len = dpdk_gather_data(large_buffer, caplen, m);
4036f9cba8fSJoseph Mingrone 					bp = large_buffer;
4046f9cba8fSJoseph Mingrone 				}
4056f9cba8fSJoseph Mingrone 
4066f9cba8fSJoseph Mingrone 			}
4076f9cba8fSJoseph Mingrone 			if (bp){
408*afdbf109SJoseph Mingrone 				if (p->fcode.bf_insns==NULL || pcapint_filter(p->fcode.bf_insns, bp, pcap_header.len, pcap_header.caplen)){
4096f9cba8fSJoseph Mingrone 					cb(cb_arg, &pcap_header, bp);
4106f9cba8fSJoseph Mingrone 				}else{
4116f9cba8fSJoseph Mingrone 					pd->bpf_drop++;
4126f9cba8fSJoseph Mingrone 				}
4136f9cba8fSJoseph Mingrone 			}
4146f9cba8fSJoseph Mingrone 			//free all pktmbuf
4156f9cba8fSJoseph Mingrone 			rte_pktmbuf_free(m);
4166f9cba8fSJoseph Mingrone 			if (large_buffer){
4176f9cba8fSJoseph Mingrone 				free(large_buffer);
4186f9cba8fSJoseph Mingrone 				large_buffer=NULL;
4196f9cba8fSJoseph Mingrone 			}
4206f9cba8fSJoseph Mingrone 		}
4216f9cba8fSJoseph Mingrone 	}
4226f9cba8fSJoseph Mingrone 	return pkt_cnt;
4236f9cba8fSJoseph Mingrone }
4246f9cba8fSJoseph Mingrone 
pcap_dpdk_inject(pcap_t * p,const void * buf _U_,int size _U_)4256f9cba8fSJoseph Mingrone static int pcap_dpdk_inject(pcap_t *p, const void *buf _U_, int size _U_)
4266f9cba8fSJoseph Mingrone {
4276f9cba8fSJoseph Mingrone 	//not implemented yet
428*afdbf109SJoseph Mingrone 	pcapint_strlcpy(p->errbuf,
4296f9cba8fSJoseph Mingrone 	    "dpdk error: Inject function has not been implemented yet",
4306f9cba8fSJoseph Mingrone 	    PCAP_ERRBUF_SIZE);
4316f9cba8fSJoseph Mingrone 	return PCAP_ERROR;
4326f9cba8fSJoseph Mingrone }
4336f9cba8fSJoseph Mingrone 
pcap_dpdk_close(pcap_t * p)4346f9cba8fSJoseph Mingrone static void pcap_dpdk_close(pcap_t *p)
4356f9cba8fSJoseph Mingrone {
4366f9cba8fSJoseph Mingrone 	struct pcap_dpdk *pd = p->priv;
4376f9cba8fSJoseph Mingrone 	if (pd==NULL)
4386f9cba8fSJoseph Mingrone 	{
4396f9cba8fSJoseph Mingrone 		return;
4406f9cba8fSJoseph Mingrone 	}
4416f9cba8fSJoseph Mingrone 	if (pd->must_clear_promisc)
4426f9cba8fSJoseph Mingrone 	{
4436f9cba8fSJoseph Mingrone 		rte_eth_promiscuous_disable(pd->portid);
4446f9cba8fSJoseph Mingrone 	}
4456f9cba8fSJoseph Mingrone 	rte_eth_dev_stop(pd->portid);
4466f9cba8fSJoseph Mingrone 	rte_eth_dev_close(pd->portid);
447*afdbf109SJoseph Mingrone 	pcapint_cleanup_live_common(p);
4486f9cba8fSJoseph Mingrone }
4496f9cba8fSJoseph Mingrone 
nic_stats_display(struct pcap_dpdk * pd)4506f9cba8fSJoseph Mingrone static void nic_stats_display(struct pcap_dpdk *pd)
4516f9cba8fSJoseph Mingrone {
4526f9cba8fSJoseph Mingrone 	uint16_t portid = pd->portid;
4536f9cba8fSJoseph Mingrone 	struct rte_eth_stats stats;
4546f9cba8fSJoseph Mingrone 	rte_eth_stats_get(portid, &stats);
4556f9cba8fSJoseph Mingrone 	RTE_LOG(INFO,USER1, "portid:%d, RX-packets: %-10"PRIu64"  RX-errors:  %-10"PRIu64
4566f9cba8fSJoseph Mingrone 	       "  RX-bytes:  %-10"PRIu64"  RX-Imissed:  %-10"PRIu64"\n", portid, stats.ipackets, stats.ierrors,
4576f9cba8fSJoseph Mingrone 	       stats.ibytes,stats.imissed);
4586f9cba8fSJoseph Mingrone 	RTE_LOG(INFO,USER1, "portid:%d, RX-PPS: %-10"PRIu64" RX-Mbps: %.2lf\n", portid, pd->pps, pd->bps/1e6f );
4596f9cba8fSJoseph Mingrone }
4606f9cba8fSJoseph Mingrone 
pcap_dpdk_stats(pcap_t * p,struct pcap_stat * ps)4616f9cba8fSJoseph Mingrone static int pcap_dpdk_stats(pcap_t *p, struct pcap_stat *ps)
4626f9cba8fSJoseph Mingrone {
4636f9cba8fSJoseph Mingrone 	struct pcap_dpdk *pd = p->priv;
4646f9cba8fSJoseph Mingrone 	calculate_timestamp(&(pd->ts_helper), &(pd->curr_ts));
4656f9cba8fSJoseph Mingrone 	rte_eth_stats_get(pd->portid,&(pd->curr_stats));
4666f9cba8fSJoseph Mingrone 	if (ps){
4676f9cba8fSJoseph Mingrone 		ps->ps_recv = pd->curr_stats.ipackets;
4686f9cba8fSJoseph Mingrone 		ps->ps_drop = pd->curr_stats.ierrors;
4696f9cba8fSJoseph Mingrone 		ps->ps_drop += pd->bpf_drop;
4706f9cba8fSJoseph Mingrone 		ps->ps_ifdrop = pd->curr_stats.imissed;
4716f9cba8fSJoseph Mingrone 	}
4726f9cba8fSJoseph Mingrone 	uint64_t delta_pkt = pd->curr_stats.ipackets - pd->prev_stats.ipackets;
4736f9cba8fSJoseph Mingrone 	struct timeval delta_tm;
4746f9cba8fSJoseph Mingrone 	timersub(&(pd->curr_ts),&(pd->prev_ts), &delta_tm);
4756f9cba8fSJoseph Mingrone 	uint64_t delta_usec = delta_tm.tv_sec*1e6+delta_tm.tv_usec;
4766f9cba8fSJoseph Mingrone 	uint64_t delta_bit = (pd->curr_stats.ibytes-pd->prev_stats.ibytes)*8;
4776f9cba8fSJoseph Mingrone 	RTE_LOG(DEBUG, USER1, "delta_usec: %-10"PRIu64" delta_pkt: %-10"PRIu64" delta_bit: %-10"PRIu64"\n", delta_usec, delta_pkt, delta_bit);
4786f9cba8fSJoseph Mingrone 	pd->pps = (uint64_t)(delta_pkt*1e6f/delta_usec);
4796f9cba8fSJoseph Mingrone 	pd->bps = (uint64_t)(delta_bit*1e6f/delta_usec);
4806f9cba8fSJoseph Mingrone 	nic_stats_display(pd);
4816f9cba8fSJoseph Mingrone 	pd->prev_stats = pd->curr_stats;
4826f9cba8fSJoseph Mingrone 	pd->prev_ts = pd->curr_ts;
4836f9cba8fSJoseph Mingrone 	return 0;
4846f9cba8fSJoseph Mingrone }
4856f9cba8fSJoseph Mingrone 
pcap_dpdk_setnonblock(pcap_t * p,int nonblock)4866f9cba8fSJoseph Mingrone static int pcap_dpdk_setnonblock(pcap_t *p, int nonblock){
4876f9cba8fSJoseph Mingrone 	struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
4886f9cba8fSJoseph Mingrone 	pd->nonblock = nonblock;
4896f9cba8fSJoseph Mingrone 	return 0;
4906f9cba8fSJoseph Mingrone }
4916f9cba8fSJoseph Mingrone 
pcap_dpdk_getnonblock(pcap_t * p)4926f9cba8fSJoseph Mingrone static int pcap_dpdk_getnonblock(pcap_t *p){
4936f9cba8fSJoseph Mingrone 	struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
4946f9cba8fSJoseph Mingrone 	return pd->nonblock;
4956f9cba8fSJoseph Mingrone }
check_link_status(uint16_t portid,struct rte_eth_link * plink)4966f9cba8fSJoseph Mingrone static int check_link_status(uint16_t portid, struct rte_eth_link *plink)
4976f9cba8fSJoseph Mingrone {
4986f9cba8fSJoseph Mingrone 	// wait up to 9 seconds to get link status
4996f9cba8fSJoseph Mingrone 	rte_eth_link_get(portid, plink);
5006f9cba8fSJoseph Mingrone 	return plink->link_status == ETH_LINK_UP;
5016f9cba8fSJoseph Mingrone }
eth_addr_str(ETHER_ADDR_TYPE * addrp,char * mac_str,int len)5026f9cba8fSJoseph Mingrone static void eth_addr_str(ETHER_ADDR_TYPE *addrp, char* mac_str, int len)
5036f9cba8fSJoseph Mingrone {
5046f9cba8fSJoseph Mingrone 	int offset=0;
5056f9cba8fSJoseph Mingrone 	if (addrp == NULL){
5066f9cba8fSJoseph Mingrone 		snprintf(mac_str, len-1, DPDK_DEF_MAC_ADDR);
5076f9cba8fSJoseph Mingrone 		return;
5086f9cba8fSJoseph Mingrone 	}
5096f9cba8fSJoseph Mingrone 	for (int i=0; i<6; i++)
5106f9cba8fSJoseph Mingrone 	{
5116f9cba8fSJoseph Mingrone 		if (offset >= len)
5126f9cba8fSJoseph Mingrone 		{ // buffer overflow
5136f9cba8fSJoseph Mingrone 			return;
5146f9cba8fSJoseph Mingrone 		}
5156f9cba8fSJoseph Mingrone 		if (i==0)
5166f9cba8fSJoseph Mingrone 		{
5176f9cba8fSJoseph Mingrone 			snprintf(mac_str+offset, len-1-offset, "%02X",addrp->addr_bytes[i]);
5186f9cba8fSJoseph Mingrone 			offset+=2; // FF
5196f9cba8fSJoseph Mingrone 		}else{
5206f9cba8fSJoseph Mingrone 			snprintf(mac_str+offset, len-1-offset, ":%02X", addrp->addr_bytes[i]);
5216f9cba8fSJoseph Mingrone 			offset+=3; // :FF
5226f9cba8fSJoseph Mingrone 		}
5236f9cba8fSJoseph Mingrone 	}
5246f9cba8fSJoseph Mingrone 	return;
5256f9cba8fSJoseph Mingrone }
5266f9cba8fSJoseph Mingrone // return portid by device name, otherwise return -1
portid_by_device(char * device)5276f9cba8fSJoseph Mingrone static uint16_t portid_by_device(char * device)
5286f9cba8fSJoseph Mingrone {
5296f9cba8fSJoseph Mingrone 	uint16_t ret = DPDK_PORTID_MAX;
530*afdbf109SJoseph Mingrone 	size_t len = strlen(device);
531*afdbf109SJoseph Mingrone 	size_t prefix_len = strlen(DPDK_PREFIX);
5326f9cba8fSJoseph Mingrone 	unsigned long ret_ul = 0L;
5336f9cba8fSJoseph Mingrone 	char *pEnd;
5346f9cba8fSJoseph Mingrone 	if (len<=prefix_len || strncmp(device, DPDK_PREFIX, prefix_len)) // check prefix dpdk:
5356f9cba8fSJoseph Mingrone 	{
5366f9cba8fSJoseph Mingrone 		return ret;
5376f9cba8fSJoseph Mingrone 	}
5386f9cba8fSJoseph Mingrone 	//check all chars are digital
5396f9cba8fSJoseph Mingrone 	for (int i=prefix_len; device[i]; i++){
5406f9cba8fSJoseph Mingrone 		if (device[i]<'0' || device[i]>'9'){
5416f9cba8fSJoseph Mingrone 			return ret;
5426f9cba8fSJoseph Mingrone 		}
5436f9cba8fSJoseph Mingrone 	}
5446f9cba8fSJoseph Mingrone 	ret_ul = strtoul(&(device[prefix_len]), &pEnd, 10);
5456f9cba8fSJoseph Mingrone 	if (pEnd == &(device[prefix_len]) || *pEnd != '\0'){
5466f9cba8fSJoseph Mingrone 		return ret;
5476f9cba8fSJoseph Mingrone 	}
5486f9cba8fSJoseph Mingrone 	// too large for portid
5496f9cba8fSJoseph Mingrone 	if (ret_ul >= DPDK_PORTID_MAX){
5506f9cba8fSJoseph Mingrone 		return ret;
5516f9cba8fSJoseph Mingrone 	}
5526f9cba8fSJoseph Mingrone 	ret = (uint16_t)ret_ul;
5536f9cba8fSJoseph Mingrone 	return ret;
5546f9cba8fSJoseph Mingrone }
5556f9cba8fSJoseph Mingrone 
parse_dpdk_cfg(char * dpdk_cfg,char ** dargv)5566f9cba8fSJoseph Mingrone static int parse_dpdk_cfg(char* dpdk_cfg,char** dargv)
5576f9cba8fSJoseph Mingrone {
5586f9cba8fSJoseph Mingrone 	int cnt=0;
5596f9cba8fSJoseph Mingrone 	memset(dargv,0,sizeof(dargv[0])*DPDK_ARGC_MAX);
5606f9cba8fSJoseph Mingrone 	//current process name
5616f9cba8fSJoseph Mingrone 	int skip_space = 1;
5626f9cba8fSJoseph Mingrone 	int i=0;
5636f9cba8fSJoseph Mingrone 	RTE_LOG(INFO, USER1,"dpdk cfg: %s\n",dpdk_cfg);
5646f9cba8fSJoseph Mingrone 	// find first non space char
5656f9cba8fSJoseph Mingrone 	// The last opt is NULL
5666f9cba8fSJoseph Mingrone 	for (i=0;dpdk_cfg[i] && cnt<DPDK_ARGC_MAX-1;i++){
5676f9cba8fSJoseph Mingrone 		if (skip_space && dpdk_cfg[i]!=' '){ // not space
5686f9cba8fSJoseph Mingrone 			skip_space=!skip_space; // skip normal char
5696f9cba8fSJoseph Mingrone 			dargv[cnt++] = dpdk_cfg+i;
5706f9cba8fSJoseph Mingrone 		}
571*afdbf109SJoseph Mingrone 		if (!skip_space && dpdk_cfg[i]==' '){ // find a space
5726f9cba8fSJoseph Mingrone 			dpdk_cfg[i]=0x00; // end of this opt
5736f9cba8fSJoseph Mingrone 			skip_space=!skip_space; // skip space char
5746f9cba8fSJoseph Mingrone 		}
5756f9cba8fSJoseph Mingrone 	}
5766f9cba8fSJoseph Mingrone 	dargv[cnt]=NULL;
5776f9cba8fSJoseph Mingrone 	return cnt;
5786f9cba8fSJoseph Mingrone }
5796f9cba8fSJoseph Mingrone 
5806f9cba8fSJoseph Mingrone // only called once
5816f9cba8fSJoseph Mingrone // Returns:
5826f9cba8fSJoseph Mingrone //
5836f9cba8fSJoseph Mingrone //    1 on success;
5846f9cba8fSJoseph Mingrone //
5856f9cba8fSJoseph Mingrone //    0 if "the EAL cannot initialize on this system", which we treat as
5866f9cba8fSJoseph Mingrone //    meaning "DPDK isn't available";
5876f9cba8fSJoseph Mingrone //
5886f9cba8fSJoseph Mingrone //    a PCAP_ERROR_ code for other errors.
5896f9cba8fSJoseph Mingrone //
5906f9cba8fSJoseph Mingrone // If eaccess_not_fatal is non-zero, treat "a permissions issue" the way
5916f9cba8fSJoseph Mingrone // we treat "the EAL cannot initialize on this system".  We use that
5926f9cba8fSJoseph Mingrone // when trying to find DPDK devices, as we don't want to fail to return
5936f9cba8fSJoseph Mingrone // *any* devices just because we can't support DPDK; when we're trying
5946f9cba8fSJoseph Mingrone // to open a device, we need to return a permissions error in that case.
dpdk_pre_init(char * ebuf,int eaccess_not_fatal)5956f9cba8fSJoseph Mingrone static int dpdk_pre_init(char * ebuf, int eaccess_not_fatal)
5966f9cba8fSJoseph Mingrone {
5976f9cba8fSJoseph Mingrone 	int dargv_cnt=0;
5986f9cba8fSJoseph Mingrone 	char *dargv[DPDK_ARGC_MAX];
5996f9cba8fSJoseph Mingrone 	char *ptr_dpdk_cfg = NULL;
6006f9cba8fSJoseph Mingrone 	int ret;
601*afdbf109SJoseph Mingrone 	// global var
6026f9cba8fSJoseph Mingrone 	if (is_dpdk_pre_inited != 0)
6036f9cba8fSJoseph Mingrone 	{
6046f9cba8fSJoseph Mingrone 		// already inited; did that succeed?
6056f9cba8fSJoseph Mingrone 		if (is_dpdk_pre_inited < 0)
6066f9cba8fSJoseph Mingrone 		{
6076f9cba8fSJoseph Mingrone 			// failed
6086f9cba8fSJoseph Mingrone 			goto error;
6096f9cba8fSJoseph Mingrone 		}
6106f9cba8fSJoseph Mingrone 		else
6116f9cba8fSJoseph Mingrone 		{
6126f9cba8fSJoseph Mingrone 			// succeeded
6136f9cba8fSJoseph Mingrone 			return 1;
6146f9cba8fSJoseph Mingrone 		}
6156f9cba8fSJoseph Mingrone 	}
6166f9cba8fSJoseph Mingrone 	// init EAL
6176f9cba8fSJoseph Mingrone 	ptr_dpdk_cfg = getenv(DPDK_CFG_ENV_NAME);
6186f9cba8fSJoseph Mingrone 	// set default log level to debug
6196f9cba8fSJoseph Mingrone 	rte_log_set_global_level(DPDK_DEF_LOG_LEV);
6206f9cba8fSJoseph Mingrone 	if (ptr_dpdk_cfg == NULL)
6216f9cba8fSJoseph Mingrone 	{
6226f9cba8fSJoseph Mingrone 		RTE_LOG(INFO,USER1,"env $DPDK_CFG is unset, so using default: %s\n",DPDK_DEF_CFG);
6236f9cba8fSJoseph Mingrone 		ptr_dpdk_cfg = DPDK_DEF_CFG;
6246f9cba8fSJoseph Mingrone 	}
6256f9cba8fSJoseph Mingrone 	memset(dpdk_cfg_buf,0,sizeof(dpdk_cfg_buf));
6266f9cba8fSJoseph Mingrone 	snprintf(dpdk_cfg_buf,DPDK_CFG_MAX_LEN-1,"%s %s",DPDK_LIB_NAME,ptr_dpdk_cfg);
6276f9cba8fSJoseph Mingrone 	dargv_cnt = parse_dpdk_cfg(dpdk_cfg_buf,dargv);
6286f9cba8fSJoseph Mingrone 	ret = rte_eal_init(dargv_cnt,dargv);
6296f9cba8fSJoseph Mingrone 	if (ret == -1)
6306f9cba8fSJoseph Mingrone 	{
6316f9cba8fSJoseph Mingrone 		// Indicate that we've called rte_eal_init() by setting
6326f9cba8fSJoseph Mingrone 		// is_dpdk_pre_inited to the negative of the error code,
6336f9cba8fSJoseph Mingrone 		// and process the error.
6346f9cba8fSJoseph Mingrone 		is_dpdk_pre_inited = -rte_errno;
6356f9cba8fSJoseph Mingrone 		goto error;
6366f9cba8fSJoseph Mingrone 	}
6376f9cba8fSJoseph Mingrone 	// init succeeded, so we do not need to do it again later.
6386f9cba8fSJoseph Mingrone 	is_dpdk_pre_inited = 1;
6396f9cba8fSJoseph Mingrone 	return 1;
6406f9cba8fSJoseph Mingrone 
6416f9cba8fSJoseph Mingrone error:
6426f9cba8fSJoseph Mingrone 	switch (-is_dpdk_pre_inited)
6436f9cba8fSJoseph Mingrone 	{
6446f9cba8fSJoseph Mingrone 		case EACCES:
6456f9cba8fSJoseph Mingrone 			// This "indicates a permissions issue.".
6466f9cba8fSJoseph Mingrone 			RTE_LOG(ERR, USER1, "%s\n", DPDK_ERR_PERM_MSG);
6476f9cba8fSJoseph Mingrone 			// If we were told to treat this as just meaning
6486f9cba8fSJoseph Mingrone 			// DPDK isn't available, do so.
6496f9cba8fSJoseph Mingrone 			if (eaccess_not_fatal)
6506f9cba8fSJoseph Mingrone 				return 0;
6516f9cba8fSJoseph Mingrone 			// Otherwise report a fatal error.
6526f9cba8fSJoseph Mingrone 			snprintf(ebuf, PCAP_ERRBUF_SIZE,
6536f9cba8fSJoseph Mingrone 			    "DPDK requires that it run as root");
6546f9cba8fSJoseph Mingrone 			return PCAP_ERROR_PERM_DENIED;
6556f9cba8fSJoseph Mingrone 
6566f9cba8fSJoseph Mingrone 		case EAGAIN:
6576f9cba8fSJoseph Mingrone 			// This "indicates either a bus or system
6586f9cba8fSJoseph Mingrone 			// resource was not available, setup may
6596f9cba8fSJoseph Mingrone 			// be attempted again."
6606f9cba8fSJoseph Mingrone 			// There's no such error in pcap, so I'm
6616f9cba8fSJoseph Mingrone 			// not sure what we should do here.
6626f9cba8fSJoseph Mingrone 			snprintf(ebuf, PCAP_ERRBUF_SIZE,
6636f9cba8fSJoseph Mingrone 			    "Bus or system resource was not available");
6646f9cba8fSJoseph Mingrone 			break;
6656f9cba8fSJoseph Mingrone 
6666f9cba8fSJoseph Mingrone 		case EALREADY:
6676f9cba8fSJoseph Mingrone 			// This "indicates that the rte_eal_init
6686f9cba8fSJoseph Mingrone 			// function has already been called, and
6696f9cba8fSJoseph Mingrone 			// cannot be called again."
6706f9cba8fSJoseph Mingrone 			// That's not an error; set the "we've
6716f9cba8fSJoseph Mingrone 			// been here before" flag and return
6726f9cba8fSJoseph Mingrone 			// success.
6736f9cba8fSJoseph Mingrone 			is_dpdk_pre_inited = 1;
6746f9cba8fSJoseph Mingrone 			return 1;
6756f9cba8fSJoseph Mingrone 
6766f9cba8fSJoseph Mingrone 		case EFAULT:
6776f9cba8fSJoseph Mingrone 			// This "indicates the tailq configuration
6786f9cba8fSJoseph Mingrone 			// name was not found in memory configuration."
6796f9cba8fSJoseph Mingrone 			snprintf(ebuf, PCAP_ERRBUF_SIZE,
6806f9cba8fSJoseph Mingrone 			    "The tailq configuration name was not found in the memory configuration");
6816f9cba8fSJoseph Mingrone 			return PCAP_ERROR;
6826f9cba8fSJoseph Mingrone 
6836f9cba8fSJoseph Mingrone 		case EINVAL:
6846f9cba8fSJoseph Mingrone 			// This "indicates invalid parameters were
6856f9cba8fSJoseph Mingrone 			// passed as argv/argc."  Those came from
6866f9cba8fSJoseph Mingrone 			// the configuration file.
6876f9cba8fSJoseph Mingrone 			snprintf(ebuf, PCAP_ERRBUF_SIZE,
6886f9cba8fSJoseph Mingrone 			    "The configuration file has invalid parameters");
6896f9cba8fSJoseph Mingrone 			break;
6906f9cba8fSJoseph Mingrone 
6916f9cba8fSJoseph Mingrone 		case ENOMEM:
6926f9cba8fSJoseph Mingrone 			// This "indicates failure likely caused by
6936f9cba8fSJoseph Mingrone 			// an out-of-memory condition."
6946f9cba8fSJoseph Mingrone 			snprintf(ebuf, PCAP_ERRBUF_SIZE,
6956f9cba8fSJoseph Mingrone 			    "Out of memory");
6966f9cba8fSJoseph Mingrone 			break;
6976f9cba8fSJoseph Mingrone 
6986f9cba8fSJoseph Mingrone 		case ENODEV:
6996f9cba8fSJoseph Mingrone 			// This "indicates memory setup issues."
7006f9cba8fSJoseph Mingrone 			snprintf(ebuf, PCAP_ERRBUF_SIZE,
7016f9cba8fSJoseph Mingrone 			    "An error occurred setting up memory");
7026f9cba8fSJoseph Mingrone 			break;
7036f9cba8fSJoseph Mingrone 
7046f9cba8fSJoseph Mingrone 		case ENOTSUP:
7056f9cba8fSJoseph Mingrone 			// This "indicates that the EAL cannot
7066f9cba8fSJoseph Mingrone 			// initialize on this system."  We treat
7076f9cba8fSJoseph Mingrone 			// that as meaning DPDK isn't available
7086f9cba8fSJoseph Mingrone 			// on this machine, rather than as a
7096f9cba8fSJoseph Mingrone 			// fatal error, and let our caller decide
7106f9cba8fSJoseph Mingrone 			// whether that's a fatal error (if trying
7116f9cba8fSJoseph Mingrone 			// to activate a DPDK device) or not (if
7126f9cba8fSJoseph Mingrone 			// trying to enumerate devices).
7136f9cba8fSJoseph Mingrone 			return 0;
7146f9cba8fSJoseph Mingrone 
7156f9cba8fSJoseph Mingrone 		case EPROTO:
7166f9cba8fSJoseph Mingrone 			// This "indicates that the PCI bus is
7176f9cba8fSJoseph Mingrone 			// either not present, or is not readable
7186f9cba8fSJoseph Mingrone 			// by the eal."  Does "the PCI bus is not
7196f9cba8fSJoseph Mingrone 			// present" mean "this machine has no PCI
7206f9cba8fSJoseph Mingrone 			// bus", which strikes me as a "not available"
7216f9cba8fSJoseph Mingrone 			// case?  If so, should "is not readable by
7226f9cba8fSJoseph Mingrone 			// the EAL" also something we should treat
7236f9cba8fSJoseph Mingrone 			// as a "not available" case?  If not, we
7246f9cba8fSJoseph Mingrone 			// can't distinguish between the two, so
7256f9cba8fSJoseph Mingrone 			// we're stuck.
7266f9cba8fSJoseph Mingrone 			snprintf(ebuf, PCAP_ERRBUF_SIZE,
7276f9cba8fSJoseph Mingrone 			    "PCI bus is not present or not readable by the EAL");
7286f9cba8fSJoseph Mingrone 			break;
7296f9cba8fSJoseph Mingrone 
7306f9cba8fSJoseph Mingrone 		case ENOEXEC:
7316f9cba8fSJoseph Mingrone 			// This "indicates that a service core
7326f9cba8fSJoseph Mingrone 			// failed to launch successfully."
7336f9cba8fSJoseph Mingrone 			snprintf(ebuf, PCAP_ERRBUF_SIZE,
7346f9cba8fSJoseph Mingrone 			    "A service core failed to launch successfully");
7356f9cba8fSJoseph Mingrone 			break;
7366f9cba8fSJoseph Mingrone 
7376f9cba8fSJoseph Mingrone 		default:
7386f9cba8fSJoseph Mingrone 			//
7396f9cba8fSJoseph Mingrone 			// That's not in the list of errors in
7406f9cba8fSJoseph Mingrone 			// the documentation; let it be reported
7416f9cba8fSJoseph Mingrone 			// as an error.
7426f9cba8fSJoseph Mingrone 			//
7436f9cba8fSJoseph Mingrone 			dpdk_fmt_errmsg_for_rte_errno(ebuf,
7446f9cba8fSJoseph Mingrone 			    PCAP_ERRBUF_SIZE, -is_dpdk_pre_inited,
7456f9cba8fSJoseph Mingrone 			    "dpdk error: dpdk_pre_init failed");
7466f9cba8fSJoseph Mingrone 			break;
7476f9cba8fSJoseph Mingrone 	}
7486f9cba8fSJoseph Mingrone 	// Error.
7496f9cba8fSJoseph Mingrone 	return PCAP_ERROR;
7506f9cba8fSJoseph Mingrone }
7516f9cba8fSJoseph Mingrone 
pcap_dpdk_activate(pcap_t * p)7526f9cba8fSJoseph Mingrone static int pcap_dpdk_activate(pcap_t *p)
7536f9cba8fSJoseph Mingrone {
7546f9cba8fSJoseph Mingrone 	struct pcap_dpdk *pd = p->priv;
7556f9cba8fSJoseph Mingrone 	pd->orig = p;
7566f9cba8fSJoseph Mingrone 	int ret = PCAP_ERROR;
7576f9cba8fSJoseph Mingrone 	uint16_t nb_ports=0;
7586f9cba8fSJoseph Mingrone 	uint16_t portid= DPDK_PORTID_MAX;
7596f9cba8fSJoseph Mingrone 	unsigned nb_mbufs = DPDK_NB_MBUFS;
7606f9cba8fSJoseph Mingrone 	struct rte_eth_rxconf rxq_conf;
7616f9cba8fSJoseph Mingrone 	struct rte_eth_txconf txq_conf;
7626f9cba8fSJoseph Mingrone 	struct rte_eth_conf local_port_conf = port_conf;
7636f9cba8fSJoseph Mingrone 	struct rte_eth_dev_info dev_info;
7646f9cba8fSJoseph Mingrone 	int is_port_up = 0;
7656f9cba8fSJoseph Mingrone 	struct rte_eth_link link;
7666f9cba8fSJoseph Mingrone 	do{
7676f9cba8fSJoseph Mingrone 		//init EAL; fail if we have insufficient permission
7686f9cba8fSJoseph Mingrone 		char dpdk_pre_init_errbuf[PCAP_ERRBUF_SIZE];
7696f9cba8fSJoseph Mingrone 		ret = dpdk_pre_init(dpdk_pre_init_errbuf, 0);
7706f9cba8fSJoseph Mingrone 		if (ret < 0)
7716f9cba8fSJoseph Mingrone 		{
7726f9cba8fSJoseph Mingrone 			// This returns a negative value on an error.
7736f9cba8fSJoseph Mingrone 			snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
7746f9cba8fSJoseph Mingrone 			    "Can't open device %s: %s",
7756f9cba8fSJoseph Mingrone 			    p->opt.device, dpdk_pre_init_errbuf);
7766f9cba8fSJoseph Mingrone 			// ret is set to the correct error
7776f9cba8fSJoseph Mingrone 			break;
7786f9cba8fSJoseph Mingrone 		}
7796f9cba8fSJoseph Mingrone 		if (ret == 0)
7806f9cba8fSJoseph Mingrone 		{
7816f9cba8fSJoseph Mingrone 			// This means DPDK isn't available on this machine.
7826f9cba8fSJoseph Mingrone 			snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
7836f9cba8fSJoseph Mingrone 			    "Can't open device %s: DPDK is not available on this machine",
7846f9cba8fSJoseph Mingrone 			    p->opt.device);
7856f9cba8fSJoseph Mingrone 			return PCAP_ERROR_NO_SUCH_DEVICE;
7866f9cba8fSJoseph Mingrone 		}
7876f9cba8fSJoseph Mingrone 
7886f9cba8fSJoseph Mingrone 		ret = dpdk_init_timer(pd);
7896f9cba8fSJoseph Mingrone 		if (ret<0)
7906f9cba8fSJoseph Mingrone 		{
7916f9cba8fSJoseph Mingrone 			snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
7926f9cba8fSJoseph Mingrone 				"dpdk error: Init timer is zero with device %s",
7936f9cba8fSJoseph Mingrone 				p->opt.device);
7946f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
7956f9cba8fSJoseph Mingrone 			break;
7966f9cba8fSJoseph Mingrone 		}
7976f9cba8fSJoseph Mingrone 
7986f9cba8fSJoseph Mingrone 		nb_ports = rte_eth_dev_count_avail();
7996f9cba8fSJoseph Mingrone 		if (nb_ports == 0)
8006f9cba8fSJoseph Mingrone 		{
8016f9cba8fSJoseph Mingrone 			snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
8026f9cba8fSJoseph Mingrone 			    "dpdk error: No Ethernet ports");
8036f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
8046f9cba8fSJoseph Mingrone 			break;
8056f9cba8fSJoseph Mingrone 		}
8066f9cba8fSJoseph Mingrone 
8076f9cba8fSJoseph Mingrone 		portid = portid_by_device(p->opt.device);
8086f9cba8fSJoseph Mingrone 		if (portid == DPDK_PORTID_MAX){
8096f9cba8fSJoseph Mingrone 			snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
8106f9cba8fSJoseph Mingrone 			    "dpdk error: portid is invalid. device %s",
8116f9cba8fSJoseph Mingrone 			    p->opt.device);
8126f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR_NO_SUCH_DEVICE;
8136f9cba8fSJoseph Mingrone 			break;
8146f9cba8fSJoseph Mingrone 		}
8156f9cba8fSJoseph Mingrone 
8166f9cba8fSJoseph Mingrone 		pd->portid = portid;
8176f9cba8fSJoseph Mingrone 
8186f9cba8fSJoseph Mingrone 		if (p->snapshot <= 0 || p->snapshot > MAXIMUM_SNAPLEN)
8196f9cba8fSJoseph Mingrone 		{
8206f9cba8fSJoseph Mingrone 			p->snapshot = MAXIMUM_SNAPLEN;
8216f9cba8fSJoseph Mingrone 		}
8226f9cba8fSJoseph Mingrone 		// create the mbuf pool
8236f9cba8fSJoseph Mingrone 		pd->pktmbuf_pool = rte_pktmbuf_pool_create(MBUF_POOL_NAME, nb_mbufs,
8246f9cba8fSJoseph Mingrone 			MEMPOOL_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
8256f9cba8fSJoseph Mingrone 			rte_socket_id());
8266f9cba8fSJoseph Mingrone 		if (pd->pktmbuf_pool == NULL)
8276f9cba8fSJoseph Mingrone 		{
8286f9cba8fSJoseph Mingrone 			dpdk_fmt_errmsg_for_rte_errno(p->errbuf,
8296f9cba8fSJoseph Mingrone 			    PCAP_ERRBUF_SIZE, rte_errno,
8306f9cba8fSJoseph Mingrone 			    "dpdk error: Cannot init mbuf pool");
8316f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
8326f9cba8fSJoseph Mingrone 			break;
8336f9cba8fSJoseph Mingrone 		}
8346f9cba8fSJoseph Mingrone 		// config dev
8356f9cba8fSJoseph Mingrone 		rte_eth_dev_info_get(portid, &dev_info);
8366f9cba8fSJoseph Mingrone 		if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
8376f9cba8fSJoseph Mingrone 		{
8386f9cba8fSJoseph Mingrone 			local_port_conf.txmode.offloads |=DEV_TX_OFFLOAD_MBUF_FAST_FREE;
8396f9cba8fSJoseph Mingrone 		}
8406f9cba8fSJoseph Mingrone 		// only support 1 queue
8416f9cba8fSJoseph Mingrone 		ret = rte_eth_dev_configure(portid, 1, 1, &local_port_conf);
8426f9cba8fSJoseph Mingrone 		if (ret < 0)
8436f9cba8fSJoseph Mingrone 		{
8446f9cba8fSJoseph Mingrone 			dpdk_fmt_errmsg_for_rte_errno(p->errbuf,
8456f9cba8fSJoseph Mingrone 			    PCAP_ERRBUF_SIZE, -ret,
8466f9cba8fSJoseph Mingrone 			    "dpdk error: Cannot configure device: port=%u",
8476f9cba8fSJoseph Mingrone 			    portid);
8486f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
8496f9cba8fSJoseph Mingrone 			break;
8506f9cba8fSJoseph Mingrone 		}
8516f9cba8fSJoseph Mingrone 		// adjust rx tx
8526f9cba8fSJoseph Mingrone 		ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, &nb_txd);
8536f9cba8fSJoseph Mingrone 		if (ret < 0)
8546f9cba8fSJoseph Mingrone 		{
8556f9cba8fSJoseph Mingrone 			dpdk_fmt_errmsg_for_rte_errno(p->errbuf,
8566f9cba8fSJoseph Mingrone 			    PCAP_ERRBUF_SIZE, -ret,
8576f9cba8fSJoseph Mingrone 			    "dpdk error: Cannot adjust number of descriptors: port=%u",
8586f9cba8fSJoseph Mingrone 			    portid);
8596f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
8606f9cba8fSJoseph Mingrone 			break;
8616f9cba8fSJoseph Mingrone 		}
8626f9cba8fSJoseph Mingrone 		// get MAC addr
8636f9cba8fSJoseph Mingrone 		rte_eth_macaddr_get(portid, &(pd->eth_addr));
8646f9cba8fSJoseph Mingrone 		eth_addr_str(&(pd->eth_addr), pd->mac_addr, DPDK_MAC_ADDR_SIZE-1);
8656f9cba8fSJoseph Mingrone 
8666f9cba8fSJoseph Mingrone 		// init one RX queue
8676f9cba8fSJoseph Mingrone 		rxq_conf = dev_info.default_rxconf;
8686f9cba8fSJoseph Mingrone 		rxq_conf.offloads = local_port_conf.rxmode.offloads;
8696f9cba8fSJoseph Mingrone 		ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
8706f9cba8fSJoseph Mingrone 					     rte_eth_dev_socket_id(portid),
8716f9cba8fSJoseph Mingrone 					     &rxq_conf,
8726f9cba8fSJoseph Mingrone 					     pd->pktmbuf_pool);
8736f9cba8fSJoseph Mingrone 		if (ret < 0)
8746f9cba8fSJoseph Mingrone 		{
8756f9cba8fSJoseph Mingrone 			dpdk_fmt_errmsg_for_rte_errno(p->errbuf,
8766f9cba8fSJoseph Mingrone 			    PCAP_ERRBUF_SIZE, -ret,
8776f9cba8fSJoseph Mingrone 			    "dpdk error: rte_eth_rx_queue_setup:port=%u",
8786f9cba8fSJoseph Mingrone 			    portid);
8796f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
8806f9cba8fSJoseph Mingrone 			break;
8816f9cba8fSJoseph Mingrone 		}
8826f9cba8fSJoseph Mingrone 
8836f9cba8fSJoseph Mingrone 		// init one TX queue
8846f9cba8fSJoseph Mingrone 		txq_conf = dev_info.default_txconf;
8856f9cba8fSJoseph Mingrone 		txq_conf.offloads = local_port_conf.txmode.offloads;
8866f9cba8fSJoseph Mingrone 		ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
8876f9cba8fSJoseph Mingrone 				rte_eth_dev_socket_id(portid),
8886f9cba8fSJoseph Mingrone 				&txq_conf);
8896f9cba8fSJoseph Mingrone 		if (ret < 0)
8906f9cba8fSJoseph Mingrone 		{
8916f9cba8fSJoseph Mingrone 			dpdk_fmt_errmsg_for_rte_errno(p->errbuf,
8926f9cba8fSJoseph Mingrone 			    PCAP_ERRBUF_SIZE, -ret,
8936f9cba8fSJoseph Mingrone 			    "dpdk error: rte_eth_tx_queue_setup:port=%u",
8946f9cba8fSJoseph Mingrone 			    portid);
8956f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
8966f9cba8fSJoseph Mingrone 			break;
8976f9cba8fSJoseph Mingrone 		}
8986f9cba8fSJoseph Mingrone 		// Initialize TX buffers
8996f9cba8fSJoseph Mingrone 		tx_buffer = rte_zmalloc_socket(DPDK_TX_BUF_NAME,
9006f9cba8fSJoseph Mingrone 				RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
9016f9cba8fSJoseph Mingrone 				rte_eth_dev_socket_id(portid));
9026f9cba8fSJoseph Mingrone 		if (tx_buffer == NULL)
9036f9cba8fSJoseph Mingrone 		{
9046f9cba8fSJoseph Mingrone 			snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
9056f9cba8fSJoseph Mingrone 			    "dpdk error: Cannot allocate buffer for tx on port %u", portid);
9066f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
9076f9cba8fSJoseph Mingrone 			break;
9086f9cba8fSJoseph Mingrone 		}
9096f9cba8fSJoseph Mingrone 		rte_eth_tx_buffer_init(tx_buffer, MAX_PKT_BURST);
9106f9cba8fSJoseph Mingrone 		// Start device
9116f9cba8fSJoseph Mingrone 		ret = rte_eth_dev_start(portid);
9126f9cba8fSJoseph Mingrone 		if (ret < 0)
9136f9cba8fSJoseph Mingrone 		{
9146f9cba8fSJoseph Mingrone 			dpdk_fmt_errmsg_for_rte_errno(p->errbuf,
9156f9cba8fSJoseph Mingrone 			    PCAP_ERRBUF_SIZE, -ret,
9166f9cba8fSJoseph Mingrone 			    "dpdk error: rte_eth_dev_start:port=%u",
9176f9cba8fSJoseph Mingrone 			    portid);
9186f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
9196f9cba8fSJoseph Mingrone 			break;
9206f9cba8fSJoseph Mingrone 		}
9216f9cba8fSJoseph Mingrone 		// set promiscuous mode
9226f9cba8fSJoseph Mingrone 		if (p->opt.promisc){
9236f9cba8fSJoseph Mingrone 			pd->must_clear_promisc=1;
9246f9cba8fSJoseph Mingrone 			rte_eth_promiscuous_enable(portid);
9256f9cba8fSJoseph Mingrone 		}
9266f9cba8fSJoseph Mingrone 		// check link status
9276f9cba8fSJoseph Mingrone 		is_port_up = check_link_status(portid, &link);
9286f9cba8fSJoseph Mingrone 		if (!is_port_up){
9296f9cba8fSJoseph Mingrone 			snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
9306f9cba8fSJoseph Mingrone 			    "dpdk error: link is down, port=%u",portid);
9316f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR_IFACE_NOT_UP;
9326f9cba8fSJoseph Mingrone 			break;
9336f9cba8fSJoseph Mingrone 		}
9346f9cba8fSJoseph Mingrone 		// reset statistics
9356f9cba8fSJoseph Mingrone 		rte_eth_stats_reset(pd->portid);
9366f9cba8fSJoseph Mingrone 		calculate_timestamp(&(pd->ts_helper), &(pd->prev_ts));
9376f9cba8fSJoseph Mingrone 		rte_eth_stats_get(pd->portid,&(pd->prev_stats));
9386f9cba8fSJoseph Mingrone 		// format pcap_t
9396f9cba8fSJoseph Mingrone 		pd->portid = portid;
9406f9cba8fSJoseph Mingrone 		p->fd = pd->portid;
9416f9cba8fSJoseph Mingrone 		if (p->snapshot <=0 || p->snapshot> MAXIMUM_SNAPLEN)
9426f9cba8fSJoseph Mingrone 		{
9436f9cba8fSJoseph Mingrone 			p->snapshot = MAXIMUM_SNAPLEN;
9446f9cba8fSJoseph Mingrone 		}
9456f9cba8fSJoseph Mingrone 		p->linktype = DLT_EN10MB; // Ethernet, the 10MB is historical.
9466f9cba8fSJoseph Mingrone 		p->selectable_fd = p->fd;
9476f9cba8fSJoseph Mingrone 		p->read_op = pcap_dpdk_dispatch;
9486f9cba8fSJoseph Mingrone 		p->inject_op = pcap_dpdk_inject;
949*afdbf109SJoseph Mingrone 		// using pcapint_filter currently, though DPDK provides their own BPF function. Because DPDK BPF needs load a ELF file as a filter.
950*afdbf109SJoseph Mingrone 		p->setfilter_op = pcapint_install_bpf_program;
9516f9cba8fSJoseph Mingrone 		p->setdirection_op = NULL;
9526f9cba8fSJoseph Mingrone 		p->set_datalink_op = NULL;
9536f9cba8fSJoseph Mingrone 		p->getnonblock_op = pcap_dpdk_getnonblock;
9546f9cba8fSJoseph Mingrone 		p->setnonblock_op = pcap_dpdk_setnonblock;
9556f9cba8fSJoseph Mingrone 		p->stats_op = pcap_dpdk_stats;
9566f9cba8fSJoseph Mingrone 		p->cleanup_op = pcap_dpdk_close;
957*afdbf109SJoseph Mingrone 		p->breakloop_op = pcapint_breakloop_common;
9586f9cba8fSJoseph Mingrone 		// set default timeout
9596f9cba8fSJoseph Mingrone 		pd->required_select_timeout.tv_sec = 0;
9606f9cba8fSJoseph Mingrone 		pd->required_select_timeout.tv_usec = DPDK_DEF_MIN_SLEEP_MS*1000;
9616f9cba8fSJoseph Mingrone 		p->required_select_timeout = &pd->required_select_timeout;
9626f9cba8fSJoseph Mingrone 		ret = 0; // OK
9636f9cba8fSJoseph Mingrone 	}while(0);
9646f9cba8fSJoseph Mingrone 
9656f9cba8fSJoseph Mingrone 	if (ret <= PCAP_ERROR) // all kinds of error code
9666f9cba8fSJoseph Mingrone 	{
967*afdbf109SJoseph Mingrone 		pcapint_cleanup_live_common(p);
9686f9cba8fSJoseph Mingrone 	}else{
9696f9cba8fSJoseph Mingrone 		rte_eth_dev_get_name_by_port(portid,pd->pci_addr);
9706f9cba8fSJoseph Mingrone 		RTE_LOG(INFO, USER1,"Port %d device: %s, MAC:%s, PCI:%s\n", portid, p->opt.device, pd->mac_addr, pd->pci_addr);
9716f9cba8fSJoseph Mingrone 		RTE_LOG(INFO, USER1,"Port %d Link Up. Speed %u Mbps - %s\n",
9726f9cba8fSJoseph Mingrone 							portid, link.link_speed,
9736f9cba8fSJoseph Mingrone 					(link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
9746f9cba8fSJoseph Mingrone 						("full-duplex") : ("half-duplex\n"));
9756f9cba8fSJoseph Mingrone 	}
9766f9cba8fSJoseph Mingrone 	return ret;
9776f9cba8fSJoseph Mingrone }
9786f9cba8fSJoseph Mingrone 
9796f9cba8fSJoseph Mingrone // device name for dpdk should be in the form as dpdk:number, such as dpdk:0
pcap_dpdk_create(const char * device,char * ebuf,int * is_ours)9806f9cba8fSJoseph Mingrone pcap_t * pcap_dpdk_create(const char *device, char *ebuf, int *is_ours)
9816f9cba8fSJoseph Mingrone {
9826f9cba8fSJoseph Mingrone 	pcap_t *p=NULL;
9836f9cba8fSJoseph Mingrone 	*is_ours = 0;
9846f9cba8fSJoseph Mingrone 
9856f9cba8fSJoseph Mingrone 	*is_ours = !strncmp(device, "dpdk:", 5);
9866f9cba8fSJoseph Mingrone 	if (! *is_ours)
9876f9cba8fSJoseph Mingrone 		return NULL;
9886f9cba8fSJoseph Mingrone 	//memset will happen
9896f9cba8fSJoseph Mingrone 	p = PCAP_CREATE_COMMON(ebuf, struct pcap_dpdk);
9906f9cba8fSJoseph Mingrone 
9916f9cba8fSJoseph Mingrone 	if (p == NULL)
9926f9cba8fSJoseph Mingrone 		return NULL;
9936f9cba8fSJoseph Mingrone 	p->activate_op = pcap_dpdk_activate;
9946f9cba8fSJoseph Mingrone 	return p;
9956f9cba8fSJoseph Mingrone }
9966f9cba8fSJoseph Mingrone 
pcap_dpdk_findalldevs(pcap_if_list_t * devlistp,char * ebuf)9976f9cba8fSJoseph Mingrone int pcap_dpdk_findalldevs(pcap_if_list_t *devlistp, char *ebuf)
9986f9cba8fSJoseph Mingrone {
9996f9cba8fSJoseph Mingrone 	int ret=0;
10006f9cba8fSJoseph Mingrone 	unsigned int nb_ports = 0;
10016f9cba8fSJoseph Mingrone 	char dpdk_name[DPDK_DEV_NAME_MAX];
10026f9cba8fSJoseph Mingrone 	char dpdk_desc[DPDK_DEV_DESC_MAX];
10036f9cba8fSJoseph Mingrone 	ETHER_ADDR_TYPE eth_addr;
10046f9cba8fSJoseph Mingrone 	char mac_addr[DPDK_MAC_ADDR_SIZE];
10056f9cba8fSJoseph Mingrone 	char pci_addr[DPDK_PCI_ADDR_SIZE];
10066f9cba8fSJoseph Mingrone 	do{
10076f9cba8fSJoseph Mingrone 		// init EAL; return "DPDK not available" if we
10086f9cba8fSJoseph Mingrone 		// have insufficient permission
10096f9cba8fSJoseph Mingrone 		char dpdk_pre_init_errbuf[PCAP_ERRBUF_SIZE];
10106f9cba8fSJoseph Mingrone 		ret = dpdk_pre_init(dpdk_pre_init_errbuf, 1);
10116f9cba8fSJoseph Mingrone 		if (ret < 0)
10126f9cba8fSJoseph Mingrone 		{
10136f9cba8fSJoseph Mingrone 			// This returns a negative value on an error.
10146f9cba8fSJoseph Mingrone 			snprintf(ebuf, PCAP_ERRBUF_SIZE,
10156f9cba8fSJoseph Mingrone 			    "Can't look for DPDK devices: %s",
10166f9cba8fSJoseph Mingrone 			    dpdk_pre_init_errbuf);
10176f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
10186f9cba8fSJoseph Mingrone 			break;
10196f9cba8fSJoseph Mingrone 		}
10206f9cba8fSJoseph Mingrone 		if (ret == 0)
10216f9cba8fSJoseph Mingrone 		{
10226f9cba8fSJoseph Mingrone 			// This means DPDK isn't available on this machine.
10236f9cba8fSJoseph Mingrone 			// That just means "don't return any devices".
10246f9cba8fSJoseph Mingrone 			break;
10256f9cba8fSJoseph Mingrone 		}
10266f9cba8fSJoseph Mingrone 		nb_ports = rte_eth_dev_count_avail();
10276f9cba8fSJoseph Mingrone 		if (nb_ports == 0)
10286f9cba8fSJoseph Mingrone 		{
10296f9cba8fSJoseph Mingrone 			// That just means "don't return any devices".
10306f9cba8fSJoseph Mingrone 			ret = 0;
10316f9cba8fSJoseph Mingrone 			break;
10326f9cba8fSJoseph Mingrone 		}
10336f9cba8fSJoseph Mingrone 		for (unsigned int i=0; i<nb_ports; i++){
10346f9cba8fSJoseph Mingrone 			snprintf(dpdk_name, DPDK_DEV_NAME_MAX-1,
10356f9cba8fSJoseph Mingrone 			    "%s%u", DPDK_PREFIX, i);
10366f9cba8fSJoseph Mingrone 			// mac addr
10376f9cba8fSJoseph Mingrone 			rte_eth_macaddr_get(i, &eth_addr);
10386f9cba8fSJoseph Mingrone 			eth_addr_str(&eth_addr,mac_addr,DPDK_MAC_ADDR_SIZE);
10396f9cba8fSJoseph Mingrone 			// PCI addr
10406f9cba8fSJoseph Mingrone 			rte_eth_dev_get_name_by_port(i,pci_addr);
10416f9cba8fSJoseph Mingrone 			snprintf(dpdk_desc,DPDK_DEV_DESC_MAX-1,"%s %s, MAC:%s, PCI:%s", DPDK_DESC, dpdk_name, mac_addr, pci_addr);
1042*afdbf109SJoseph Mingrone 			if (pcapint_add_dev(devlistp, dpdk_name, 0, dpdk_desc, ebuf)==NULL){
10436f9cba8fSJoseph Mingrone 				ret = PCAP_ERROR;
10446f9cba8fSJoseph Mingrone 				break;
10456f9cba8fSJoseph Mingrone 			}
10466f9cba8fSJoseph Mingrone 		}
10476f9cba8fSJoseph Mingrone 	}while(0);
10486f9cba8fSJoseph Mingrone 	return ret;
10496f9cba8fSJoseph Mingrone }
10506f9cba8fSJoseph Mingrone 
10516f9cba8fSJoseph Mingrone #ifdef DPDK_ONLY
10526f9cba8fSJoseph Mingrone /*
10536f9cba8fSJoseph Mingrone  * This libpcap build supports only DPDK, not regular network interfaces.
10546f9cba8fSJoseph Mingrone  */
10556f9cba8fSJoseph Mingrone 
10566f9cba8fSJoseph Mingrone /*
10576f9cba8fSJoseph Mingrone  * There are no regular interfaces, just DPDK interfaces.
10586f9cba8fSJoseph Mingrone  */
10596f9cba8fSJoseph Mingrone int
pcapint_platform_finddevs(pcap_if_list_t * devlistp _U_,char * errbuf)1060*afdbf109SJoseph Mingrone pcapint_platform_finddevs(pcap_if_list_t *devlistp _U_, char *errbuf)
10616f9cba8fSJoseph Mingrone {
10626f9cba8fSJoseph Mingrone 	return (0);
10636f9cba8fSJoseph Mingrone }
10646f9cba8fSJoseph Mingrone 
10656f9cba8fSJoseph Mingrone /*
10666f9cba8fSJoseph Mingrone  * Attempts to open a regular interface fail.
10676f9cba8fSJoseph Mingrone  */
10686f9cba8fSJoseph Mingrone pcap_t *
pcapint_create_interface(const char * device,char * errbuf)1069*afdbf109SJoseph Mingrone pcapint_create_interface(const char *device, char *errbuf)
10706f9cba8fSJoseph Mingrone {
10716f9cba8fSJoseph Mingrone 	snprintf(errbuf, PCAP_ERRBUF_SIZE,
10726f9cba8fSJoseph Mingrone 	    "This version of libpcap only supports DPDK");
10736f9cba8fSJoseph Mingrone 	return NULL;
10746f9cba8fSJoseph Mingrone }
10756f9cba8fSJoseph Mingrone 
10766f9cba8fSJoseph Mingrone /*
10776f9cba8fSJoseph Mingrone  * Libpcap version string.
10786f9cba8fSJoseph Mingrone  */
10796f9cba8fSJoseph Mingrone const char *
pcap_lib_version(void)10806f9cba8fSJoseph Mingrone pcap_lib_version(void)
10816f9cba8fSJoseph Mingrone {
10826f9cba8fSJoseph Mingrone 	return (PCAP_VERSION_STRING " (DPDK-only)");
10836f9cba8fSJoseph Mingrone }
10846f9cba8fSJoseph Mingrone #endif
1085