xref: /freebsd/sys/netinet/tcp_pcap.c (revision f7d5900aa063c6e9d19e6372ba981fc76aa326a2)
186a996e6SHiren Panchasara /*-
286a996e6SHiren Panchasara  * Copyright (c) 2015
386a996e6SHiren Panchasara  *	Jonathan Looney. All rights reserved.
486a996e6SHiren Panchasara  *
586a996e6SHiren Panchasara  * Redistribution and use in source and binary forms, with or without
686a996e6SHiren Panchasara  * modification, are permitted provided that the following conditions
786a996e6SHiren Panchasara  * are met:
886a996e6SHiren Panchasara  * 1. Redistributions of source code must retain the above copyright
986a996e6SHiren Panchasara  *    notice, this list of conditions and the following disclaimer.
1086a996e6SHiren Panchasara  * 2. Redistributions in binary form must reproduce the above copyright
1186a996e6SHiren Panchasara  *    notice, this list of conditions and the following disclaimer in the
1286a996e6SHiren Panchasara  *    documentation and/or other materials provided with the distribution.
1386a996e6SHiren Panchasara  *
1486a996e6SHiren Panchasara  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1586a996e6SHiren Panchasara  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1686a996e6SHiren Panchasara  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1786a996e6SHiren Panchasara  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1886a996e6SHiren Panchasara  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1986a996e6SHiren Panchasara  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2086a996e6SHiren Panchasara  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2186a996e6SHiren Panchasara  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2286a996e6SHiren Panchasara  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2386a996e6SHiren Panchasara  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2486a996e6SHiren Panchasara  * SUCH DAMAGE.
2586a996e6SHiren Panchasara  */
2686a996e6SHiren Panchasara 
2786a996e6SHiren Panchasara #include <sys/queue.h>
2886a996e6SHiren Panchasara #include <sys/param.h>
2986a996e6SHiren Panchasara #include <sys/types.h>
3086a996e6SHiren Panchasara #include <sys/socket.h>
3186a996e6SHiren Panchasara #include <sys/socketvar.h>
3286a996e6SHiren Panchasara #include <sys/sysctl.h>
3386a996e6SHiren Panchasara #include <sys/systm.h>
3486a996e6SHiren Panchasara #include <sys/mbuf.h>
3586a996e6SHiren Panchasara #include <sys/eventhandler.h>
3686a996e6SHiren Panchasara #include <machine/atomic.h>
37*e68b3792SGleb Smirnoff #include <netinet/in.h>
38*e68b3792SGleb Smirnoff #include <netinet/in_pcb.h>
3986a996e6SHiren Panchasara #include <netinet/tcp_var.h>
4086a996e6SHiren Panchasara #include <netinet/tcp_pcap.h>
4186a996e6SHiren Panchasara 
4286a996e6SHiren Panchasara #define M_LEADINGSPACE_NOWRITE(m)					\
4386a996e6SHiren Panchasara 	((m)->m_data - M_START(m))
4486a996e6SHiren Panchasara 
4524b9bb56SJonathan T. Looney int tcp_pcap_aggressive_free = 1;
4686a996e6SHiren Panchasara static int tcp_pcap_clusters_referenced_cur = 0;
4786a996e6SHiren Panchasara static int tcp_pcap_clusters_referenced_max = 0;
4886a996e6SHiren Panchasara 
4924b9bb56SJonathan T. Looney SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_aggressive_free,
5024b9bb56SJonathan T. Looney 	CTLFLAG_RW, &tcp_pcap_aggressive_free, 0,
5124b9bb56SJonathan T. Looney 	"Free saved packets when the memory system comes under pressure");
5286a996e6SHiren Panchasara SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_clusters_referenced_cur,
5386a996e6SHiren Panchasara 	CTLFLAG_RD, &tcp_pcap_clusters_referenced_cur, 0,
5486a996e6SHiren Panchasara 	"Number of clusters currently referenced on TCP PCAP queues");
5586a996e6SHiren Panchasara SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_clusters_referenced_max,
5686a996e6SHiren Panchasara 	CTLFLAG_RW, &tcp_pcap_clusters_referenced_max, 0,
5786a996e6SHiren Panchasara 	"Maximum number of clusters allowed to be referenced on TCP PCAP "
5886a996e6SHiren Panchasara 	"queues");
5986a996e6SHiren Panchasara 
6086a996e6SHiren Panchasara static int tcp_pcap_alloc_reuse_ext = 0;
6186a996e6SHiren Panchasara static int tcp_pcap_alloc_reuse_mbuf = 0;
6286a996e6SHiren Panchasara static int tcp_pcap_alloc_new_mbuf = 0;
6386a996e6SHiren Panchasara SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_reuse_ext,
6486a996e6SHiren Panchasara 	CTLFLAG_RD, &tcp_pcap_alloc_reuse_ext, 0,
6586a996e6SHiren Panchasara 	"Number of mbufs with external storage reused for the TCP PCAP "
6686a996e6SHiren Panchasara 	"functionality");
6786a996e6SHiren Panchasara SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_reuse_mbuf,
6886a996e6SHiren Panchasara 	CTLFLAG_RD, &tcp_pcap_alloc_reuse_mbuf, 0,
6986a996e6SHiren Panchasara 	"Number of mbufs with internal storage reused for the TCP PCAP "
7086a996e6SHiren Panchasara 	"functionality");
7186a996e6SHiren Panchasara SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_new_mbuf,
7286a996e6SHiren Panchasara 	CTLFLAG_RD, &tcp_pcap_alloc_new_mbuf, 0,
7386a996e6SHiren Panchasara 	"Number of new mbufs allocated for the TCP PCAP functionality");
7486a996e6SHiren Panchasara 
7586a996e6SHiren Panchasara VNET_DEFINE(int, tcp_pcap_packets) = 0;
7686a996e6SHiren Panchasara #define V_tcp_pcap_packets	VNET(tcp_pcap_packets)
77962d02b0SBjoern A. Zeeb SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_packets,
78bf6c6162SMichael Tuexen 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_pcap_packets), 0,
79962d02b0SBjoern A. Zeeb 	"Default number of packets saved per direction per TCPCB");
8086a996e6SHiren Panchasara 
8186a996e6SHiren Panchasara /* Initialize the values. */
8286a996e6SHiren Panchasara static void
tcp_pcap_max_set(void)83f87ec781SBjoern A. Zeeb tcp_pcap_max_set(void)
84f87ec781SBjoern A. Zeeb {
85f87ec781SBjoern A. Zeeb 
8686a996e6SHiren Panchasara 	tcp_pcap_clusters_referenced_max = nmbclusters / 4;
8786a996e6SHiren Panchasara }
8886a996e6SHiren Panchasara 
8986a996e6SHiren Panchasara void
tcp_pcap_init(void)90f87ec781SBjoern A. Zeeb tcp_pcap_init(void)
91f87ec781SBjoern A. Zeeb {
92f87ec781SBjoern A. Zeeb 
9386a996e6SHiren Panchasara 	tcp_pcap_max_set();
9486a996e6SHiren Panchasara 	EVENTHANDLER_REGISTER(nmbclusters_change, tcp_pcap_max_set,
9586a996e6SHiren Panchasara 		NULL, EVENTHANDLER_PRI_ANY);
9686a996e6SHiren Panchasara }
9786a996e6SHiren Panchasara 
9886a996e6SHiren Panchasara /*
9986a996e6SHiren Panchasara  * If we are below the maximum allowed cluster references,
10086a996e6SHiren Panchasara  * increment the reference count and return TRUE. Otherwise,
10186a996e6SHiren Panchasara  * leave the reference count alone and return FALSE.
10286a996e6SHiren Panchasara  */
10386a996e6SHiren Panchasara static __inline bool
tcp_pcap_take_cluster_reference(void)10486a996e6SHiren Panchasara tcp_pcap_take_cluster_reference(void)
10586a996e6SHiren Panchasara {
10686a996e6SHiren Panchasara 	if (atomic_fetchadd_int(&tcp_pcap_clusters_referenced_cur, 1) >=
10786a996e6SHiren Panchasara 		tcp_pcap_clusters_referenced_max) {
10886a996e6SHiren Panchasara 		atomic_add_int(&tcp_pcap_clusters_referenced_cur, -1);
10986a996e6SHiren Panchasara 		return FALSE;
11086a996e6SHiren Panchasara 	}
11186a996e6SHiren Panchasara 	return TRUE;
11286a996e6SHiren Panchasara }
11386a996e6SHiren Panchasara 
11486a996e6SHiren Panchasara /*
11586a996e6SHiren Panchasara  * For all the external entries in m, apply the given adjustment.
11686a996e6SHiren Panchasara  * This can be used to adjust the counter when an mbuf chain is
11786a996e6SHiren Panchasara  * copied or freed.
11886a996e6SHiren Panchasara  */
11986a996e6SHiren Panchasara static __inline void
tcp_pcap_adj_cluster_reference(struct mbuf * m,int adj)12086a996e6SHiren Panchasara tcp_pcap_adj_cluster_reference(struct mbuf *m, int adj)
12186a996e6SHiren Panchasara {
12286a996e6SHiren Panchasara 	while (m) {
12386a996e6SHiren Panchasara 		if (m->m_flags & M_EXT)
12486a996e6SHiren Panchasara 			atomic_add_int(&tcp_pcap_clusters_referenced_cur, adj);
12586a996e6SHiren Panchasara 
12686a996e6SHiren Panchasara 		m = m->m_next;
12786a996e6SHiren Panchasara 	}
12886a996e6SHiren Panchasara }
12986a996e6SHiren Panchasara 
13086a996e6SHiren Panchasara /*
13186a996e6SHiren Panchasara  * Free all mbufs in a chain, decrementing the reference count as
13286a996e6SHiren Panchasara  * necessary.
13386a996e6SHiren Panchasara  *
13486a996e6SHiren Panchasara  * Functions in this file should use this instead of m_freem() when
13586a996e6SHiren Panchasara  * they are freeing mbuf chains that may contain clusters that were
13686a996e6SHiren Panchasara  * already included in tcp_pcap_clusters_referenced_cur.
13786a996e6SHiren Panchasara  */
13886a996e6SHiren Panchasara static void
tcp_pcap_m_freem(struct mbuf * mb)13986a996e6SHiren Panchasara tcp_pcap_m_freem(struct mbuf *mb)
14086a996e6SHiren Panchasara {
14186a996e6SHiren Panchasara 	while (mb != NULL) {
14286a996e6SHiren Panchasara 		if (mb->m_flags & M_EXT)
14386a996e6SHiren Panchasara 			atomic_subtract_int(&tcp_pcap_clusters_referenced_cur,
14486a996e6SHiren Panchasara 			    1);
14586a996e6SHiren Panchasara 		mb = m_free(mb);
14686a996e6SHiren Panchasara 	}
14786a996e6SHiren Panchasara }
14886a996e6SHiren Panchasara 
14986a996e6SHiren Panchasara /*
15086a996e6SHiren Panchasara  * Copy data from m to n, where n cannot fit all the data we might
15186a996e6SHiren Panchasara  * want from m.
15286a996e6SHiren Panchasara  *
15386a996e6SHiren Panchasara  * Prioritize data like this:
15486a996e6SHiren Panchasara  * 1. TCP header
15586a996e6SHiren Panchasara  * 2. IP header
15686a996e6SHiren Panchasara  * 3. Data
15786a996e6SHiren Panchasara  */
15886a996e6SHiren Panchasara static void
tcp_pcap_copy_bestfit(struct tcphdr * th,struct mbuf * m,struct mbuf * n)15986a996e6SHiren Panchasara tcp_pcap_copy_bestfit(struct tcphdr *th, struct mbuf *m, struct mbuf *n)
16086a996e6SHiren Panchasara {
16186a996e6SHiren Panchasara 	struct mbuf *m_cur = m;
16286a996e6SHiren Panchasara 	int bytes_to_copy=0, trailing_data, skip=0, tcp_off;
16386a996e6SHiren Panchasara 
16486a996e6SHiren Panchasara 	/* Below, we assume these will be non-NULL. */
16586a996e6SHiren Panchasara 	KASSERT(th, ("%s: called with th == NULL", __func__));
16686a996e6SHiren Panchasara 	KASSERT(m, ("%s: called with m == NULL", __func__));
16786a996e6SHiren Panchasara 	KASSERT(n, ("%s: called with n == NULL", __func__));
16886a996e6SHiren Panchasara 
16986a996e6SHiren Panchasara 	/* We assume this initialization occurred elsewhere. */
17086a996e6SHiren Panchasara 	KASSERT(n->m_len == 0, ("%s: called with n->m_len=%d (expected 0)",
17186a996e6SHiren Panchasara 		__func__, n->m_len));
17286a996e6SHiren Panchasara 	KASSERT(n->m_data == M_START(n),
17386a996e6SHiren Panchasara 		("%s: called with n->m_data != M_START(n)", __func__));
17486a996e6SHiren Panchasara 
17586a996e6SHiren Panchasara 	/*
17686a996e6SHiren Panchasara 	 * Calculate the size of the TCP header. We use this often
17786a996e6SHiren Panchasara 	 * enough that it is worth just calculating at the start.
17886a996e6SHiren Panchasara 	 */
17986a996e6SHiren Panchasara 	tcp_off = th->th_off << 2;
18086a996e6SHiren Panchasara 
18186a996e6SHiren Panchasara 	/* Trim off leading empty mbufs. */
18286a996e6SHiren Panchasara 	while (m && m->m_len == 0)
18386a996e6SHiren Panchasara 		m = m->m_next;
18486a996e6SHiren Panchasara 
18586a996e6SHiren Panchasara 	if (m) {
18686a996e6SHiren Panchasara 		m_cur = m;
18786a996e6SHiren Panchasara 	}
18886a996e6SHiren Panchasara 	else {
18986a996e6SHiren Panchasara 		/*
19086a996e6SHiren Panchasara 		 * No data? Highly unusual. We would expect to at
19186a996e6SHiren Panchasara 		 * least see a TCP header in the mbuf.
19286a996e6SHiren Panchasara 		 * As we have a pointer to the TCP header, I guess
19386a996e6SHiren Panchasara 		 * we should just copy that. (???)
19486a996e6SHiren Panchasara 		 */
19586a996e6SHiren Panchasara fallback:
19686a996e6SHiren Panchasara 		bytes_to_copy = tcp_off;
19786a996e6SHiren Panchasara 		if (bytes_to_copy > M_SIZE(n))
19886a996e6SHiren Panchasara 			bytes_to_copy = M_SIZE(n);
19986a996e6SHiren Panchasara 		bcopy(th, n->m_data, bytes_to_copy);
20086a996e6SHiren Panchasara 		n->m_len = bytes_to_copy;
20186a996e6SHiren Panchasara 		return;
20286a996e6SHiren Panchasara 	}
20386a996e6SHiren Panchasara 
20486a996e6SHiren Panchasara 	/*
20586a996e6SHiren Panchasara 	 * Find TCP header. Record the total number of bytes up to,
20686a996e6SHiren Panchasara 	 * and including, the TCP header.
20786a996e6SHiren Panchasara 	 */
20886a996e6SHiren Panchasara 	while (m_cur) {
20986a996e6SHiren Panchasara 		if ((caddr_t) th >= (caddr_t) m_cur->m_data &&
21086a996e6SHiren Panchasara 			(caddr_t) th < (caddr_t) (m_cur->m_data + m_cur->m_len))
21186a996e6SHiren Panchasara 			break;
21286a996e6SHiren Panchasara 		bytes_to_copy += m_cur->m_len;
21386a996e6SHiren Panchasara 		m_cur = m_cur->m_next;
21486a996e6SHiren Panchasara 	}
21586a996e6SHiren Panchasara 	if (m_cur)
21686a996e6SHiren Panchasara 		bytes_to_copy += (caddr_t) th - (caddr_t) m_cur->m_data;
21786a996e6SHiren Panchasara 	else
21886a996e6SHiren Panchasara 		goto fallback;
21986a996e6SHiren Panchasara 	bytes_to_copy += tcp_off;
22086a996e6SHiren Panchasara 
22186a996e6SHiren Panchasara 	/*
22286a996e6SHiren Panchasara 	 * If we already want to copy more bytes than we can hold
22386a996e6SHiren Panchasara 	 * in the destination mbuf, skip leading bytes and copy
22486a996e6SHiren Panchasara 	 * what we can.
22586a996e6SHiren Panchasara 	 *
22686a996e6SHiren Panchasara 	 * Otherwise, consider trailing data.
22786a996e6SHiren Panchasara 	 */
22886a996e6SHiren Panchasara 	if (bytes_to_copy > M_SIZE(n)) {
22986a996e6SHiren Panchasara 		skip  = bytes_to_copy - M_SIZE(n);
23086a996e6SHiren Panchasara 		bytes_to_copy = M_SIZE(n);
23186a996e6SHiren Panchasara 	}
23286a996e6SHiren Panchasara 	else {
23386a996e6SHiren Panchasara 		/*
23486a996e6SHiren Panchasara 		 * Determine how much trailing data is in the chain.
23586a996e6SHiren Panchasara 		 * We start with the length of this mbuf (the one
23686a996e6SHiren Panchasara 		 * containing th) and subtract the size of the TCP
23786a996e6SHiren Panchasara 		 * header (tcp_off) and the size of the data prior
23886a996e6SHiren Panchasara 		 * to th (th - m_cur->m_data).
23986a996e6SHiren Panchasara 		 *
24086a996e6SHiren Panchasara 		 * This *should not* be negative, as the TCP code
24186a996e6SHiren Panchasara 		 * should put the whole TCP header in a single
24286a996e6SHiren Panchasara 		 * mbuf. But, it isn't a problem if it is. We will
24386a996e6SHiren Panchasara 		 * simple work off our negative balance as we look
24486a996e6SHiren Panchasara 		 * at subsequent mbufs.
24586a996e6SHiren Panchasara 		 */
24686a996e6SHiren Panchasara 		trailing_data = m_cur->m_len - tcp_off;
24786a996e6SHiren Panchasara 		trailing_data -= (caddr_t) th - (caddr_t) m_cur->m_data;
24886a996e6SHiren Panchasara 		m_cur = m_cur->m_next;
24986a996e6SHiren Panchasara 		while (m_cur) {
25086a996e6SHiren Panchasara 			trailing_data += m_cur->m_len;
25186a996e6SHiren Panchasara 			m_cur = m_cur->m_next;
25286a996e6SHiren Panchasara 		}
25386a996e6SHiren Panchasara 		if ((bytes_to_copy + trailing_data) > M_SIZE(n))
25486a996e6SHiren Panchasara 			bytes_to_copy = M_SIZE(n);
25586a996e6SHiren Panchasara 		else
25686a996e6SHiren Panchasara 			bytes_to_copy += trailing_data;
25786a996e6SHiren Panchasara 	}
25886a996e6SHiren Panchasara 
25986a996e6SHiren Panchasara 	m_copydata(m, skip, bytes_to_copy, n->m_data);
26086a996e6SHiren Panchasara 	n->m_len = bytes_to_copy;
26186a996e6SHiren Panchasara }
26286a996e6SHiren Panchasara 
26386a996e6SHiren Panchasara void
tcp_pcap_add(struct tcphdr * th,struct mbuf * m,struct mbufq * queue)26486a996e6SHiren Panchasara tcp_pcap_add(struct tcphdr *th, struct mbuf *m, struct mbufq *queue)
26586a996e6SHiren Panchasara {
26686a996e6SHiren Panchasara 	struct mbuf *n = NULL, *mhead;
26786a996e6SHiren Panchasara 
26886a996e6SHiren Panchasara 	KASSERT(th, ("%s: called with th == NULL", __func__));
26986a996e6SHiren Panchasara 	KASSERT(m, ("%s: called with m == NULL", __func__));
27086a996e6SHiren Panchasara 	KASSERT(queue, ("%s: called with queue == NULL", __func__));
27186a996e6SHiren Panchasara 
27286a996e6SHiren Panchasara 	/* We only care about data packets. */
27386a996e6SHiren Panchasara 	while (m && m->m_type != MT_DATA)
27486a996e6SHiren Panchasara 		m = m->m_next;
27586a996e6SHiren Panchasara 
27686a996e6SHiren Panchasara 	/* We only need to do something if we still have an mbuf. */
27786a996e6SHiren Panchasara 	if (!m)
27886a996e6SHiren Panchasara 		return;
27986a996e6SHiren Panchasara 
28086a996e6SHiren Panchasara 	/* If we are not saving mbufs, return now. */
28186a996e6SHiren Panchasara 	if (queue->mq_maxlen == 0)
28286a996e6SHiren Panchasara 		return;
28386a996e6SHiren Panchasara 
28486a996e6SHiren Panchasara 	/*
28586a996e6SHiren Panchasara 	 * Check to see if we will need to recycle mbufs.
28686a996e6SHiren Panchasara 	 *
28786a996e6SHiren Panchasara 	 * If we need to get rid of mbufs to stay below
28886a996e6SHiren Panchasara 	 * our packet count, try to reuse the mbuf. Once
28986a996e6SHiren Panchasara 	 * we already have a new mbuf (n), then we can
29086a996e6SHiren Panchasara 	 * simply free subsequent mbufs.
29186a996e6SHiren Panchasara 	 *
29286a996e6SHiren Panchasara 	 * Note that most of the logic in here is to deal
29386a996e6SHiren Panchasara 	 * with the reuse. If we are fine with constant
29486a996e6SHiren Panchasara 	 * mbuf allocs/deallocs, we could ditch this logic.
29586a996e6SHiren Panchasara 	 * But, it only seems to make sense to reuse
29686a996e6SHiren Panchasara 	 * mbufs we already have.
29786a996e6SHiren Panchasara 	 */
29886a996e6SHiren Panchasara 	while (mbufq_full(queue)) {
29986a996e6SHiren Panchasara 		mhead = mbufq_dequeue(queue);
30086a996e6SHiren Panchasara 
30186a996e6SHiren Panchasara 		if (n) {
30286a996e6SHiren Panchasara 			tcp_pcap_m_freem(mhead);
30386a996e6SHiren Panchasara 		}
30486a996e6SHiren Panchasara 		else {
30586a996e6SHiren Panchasara 			/*
30686a996e6SHiren Panchasara 			 * If this held an external cluster, try to
30786a996e6SHiren Panchasara 			 * detach the cluster. But, if we held the
30886a996e6SHiren Panchasara 			 * last reference, go through the normal
30986a996e6SHiren Panchasara 			 * free-ing process.
31086a996e6SHiren Panchasara 			 */
31161664ee7SGleb Smirnoff 			if (mhead->m_flags & M_EXTPG) {
31261664ee7SGleb Smirnoff 				/* Don't mess around with these. */
31361664ee7SGleb Smirnoff 				tcp_pcap_m_freem(mhead);
31461664ee7SGleb Smirnoff 				continue;
31561664ee7SGleb Smirnoff 			} else if (mhead->m_flags & M_EXT) {
31686a996e6SHiren Panchasara 				switch (mhead->m_ext.ext_type) {
31786a996e6SHiren Panchasara 				case EXT_SFBUF:
31886a996e6SHiren Panchasara 					/* Don't mess around with these. */
31986a996e6SHiren Panchasara 					tcp_pcap_m_freem(mhead);
32086a996e6SHiren Panchasara 					continue;
32186a996e6SHiren Panchasara 				default:
32286a996e6SHiren Panchasara 					if (atomic_fetchadd_int(
32386a996e6SHiren Panchasara 						mhead->m_ext.ext_cnt, -1) == 1)
32486a996e6SHiren Panchasara 					{
32586a996e6SHiren Panchasara 						/*
32686a996e6SHiren Panchasara 						 * We held the last reference
32786a996e6SHiren Panchasara 						 * on this cluster. Restore
32886a996e6SHiren Panchasara 						 * the reference count and put
32986a996e6SHiren Panchasara 						 * it back in the pool.
33086a996e6SHiren Panchasara 				 		 */
33186a996e6SHiren Panchasara 						*(mhead->m_ext.ext_cnt) = 1;
33286a996e6SHiren Panchasara 						tcp_pcap_m_freem(mhead);
33386a996e6SHiren Panchasara 						continue;
33486a996e6SHiren Panchasara 					}
33586a996e6SHiren Panchasara 					/*
33686a996e6SHiren Panchasara 					 * We were able to cleanly free the
33786a996e6SHiren Panchasara 					 * reference.
33886a996e6SHiren Panchasara 				 	 */
33986a996e6SHiren Panchasara 					atomic_subtract_int(
34086a996e6SHiren Panchasara 					    &tcp_pcap_clusters_referenced_cur,
34186a996e6SHiren Panchasara 					    1);
34286a996e6SHiren Panchasara 					tcp_pcap_alloc_reuse_ext++;
34386a996e6SHiren Panchasara 					break;
34486a996e6SHiren Panchasara 				}
34561664ee7SGleb Smirnoff 			} else {
34686a996e6SHiren Panchasara 				tcp_pcap_alloc_reuse_mbuf++;
34786a996e6SHiren Panchasara 			}
34886a996e6SHiren Panchasara 
34986a996e6SHiren Panchasara 			n = mhead;
35086a996e6SHiren Panchasara 			tcp_pcap_m_freem(n->m_next);
351b4b12e52SGleb Smirnoff 			m_init(n, M_NOWAIT, MT_DATA, 0);
35286a996e6SHiren Panchasara 		}
35386a996e6SHiren Panchasara 	}
35486a996e6SHiren Panchasara 
35586a996e6SHiren Panchasara 	/* Check to see if we need to get a new mbuf. */
35686a996e6SHiren Panchasara 	if (!n) {
35786a996e6SHiren Panchasara 		if (!(n = m_get(M_NOWAIT, MT_DATA)))
35886a996e6SHiren Panchasara 			return;
35986a996e6SHiren Panchasara 		tcp_pcap_alloc_new_mbuf++;
36086a996e6SHiren Panchasara 	}
36186a996e6SHiren Panchasara 
36286a996e6SHiren Panchasara 	/*
36386a996e6SHiren Panchasara 	 * What are we dealing with? If a cluster, attach it. Otherwise,
36486a996e6SHiren Panchasara 	 * try to copy the data from the beginning of the mbuf to the
36586a996e6SHiren Panchasara 	 * end of data. (There may be data between the start of the data
36686a996e6SHiren Panchasara 	 * area and the current data pointer. We want to get this, because
36786a996e6SHiren Panchasara 	 * it may contain header information that is useful.)
36886a996e6SHiren Panchasara 	 * In cases where that isn't possible, settle for what we can
36986a996e6SHiren Panchasara 	 * get.
37086a996e6SHiren Panchasara 	 */
37161664ee7SGleb Smirnoff 	if ((m->m_flags & (M_EXT | M_EXTPG)) &&
37261664ee7SGleb Smirnoff 	    tcp_pcap_take_cluster_reference()) {
37386a996e6SHiren Panchasara 		n->m_data = m->m_data;
37486a996e6SHiren Panchasara 		n->m_len = m->m_len;
37586a996e6SHiren Panchasara 		mb_dupcl(n, m);
37686a996e6SHiren Panchasara 	}
37786a996e6SHiren Panchasara 	else if (((m->m_data + m->m_len) - M_START(m)) <= M_SIZE(n)) {
37886a996e6SHiren Panchasara 		/*
37986a996e6SHiren Panchasara 		 * At this point, n is guaranteed to be a normal mbuf
38086a996e6SHiren Panchasara 		 * with no cluster and no packet header. Because the
38186a996e6SHiren Panchasara 		 * logic in this code block requires this, the assert
38286a996e6SHiren Panchasara 		 * is here to catch any instances where someone
38386a996e6SHiren Panchasara 		 * changes the logic to invalidate that assumption.
38486a996e6SHiren Panchasara 		 */
38586a996e6SHiren Panchasara 		KASSERT((n->m_flags & (M_EXT | M_PKTHDR)) == 0,
38686a996e6SHiren Panchasara 			("%s: Unexpected flags (%#x) for mbuf",
38786a996e6SHiren Panchasara 			__func__, n->m_flags));
38886a996e6SHiren Panchasara 		n->m_data = n->m_dat + M_LEADINGSPACE_NOWRITE(m);
38986a996e6SHiren Panchasara 		n->m_len = m->m_len;
3906edfd179SGleb Smirnoff 		if (m->m_flags & M_EXTPG)
39182334850SJohn Baldwin 			m_copydata(m, 0, m->m_len, n->m_data);
39282334850SJohn Baldwin 		else
39386a996e6SHiren Panchasara 			bcopy(M_START(m), n->m_dat,
39486a996e6SHiren Panchasara 			    m->m_len + M_LEADINGSPACE_NOWRITE(m));
39586a996e6SHiren Panchasara 	}
39686a996e6SHiren Panchasara 	else {
39786a996e6SHiren Panchasara 		/*
39886a996e6SHiren Panchasara 		 * This is the case where we need to "settle for what
39986a996e6SHiren Panchasara 		 * we can get". The most probable way to this code
40086a996e6SHiren Panchasara 		 * path is that we've already taken references to the
40186a996e6SHiren Panchasara 		 * maximum number of mbuf clusters we can, and the data
40286a996e6SHiren Panchasara 		 * is too long to fit in an mbuf's internal storage.
40386a996e6SHiren Panchasara 		 * Try for a "best fit".
40486a996e6SHiren Panchasara 		 */
40586a996e6SHiren Panchasara 		tcp_pcap_copy_bestfit(th, m, n);
40686a996e6SHiren Panchasara 
40786a996e6SHiren Panchasara 		/* Don't try to get additional data. */
40886a996e6SHiren Panchasara 		goto add_to_queue;
40986a996e6SHiren Panchasara 	}
41086a996e6SHiren Panchasara 
41186a996e6SHiren Panchasara 	if (m->m_next) {
41286a996e6SHiren Panchasara 		n->m_next = m_copym(m->m_next, 0, M_COPYALL, M_NOWAIT);
41386a996e6SHiren Panchasara 		tcp_pcap_adj_cluster_reference(n->m_next, 1);
41486a996e6SHiren Panchasara 	}
41586a996e6SHiren Panchasara 
41686a996e6SHiren Panchasara add_to_queue:
41786a996e6SHiren Panchasara 	/* Add the new mbuf to the list. */
41886a996e6SHiren Panchasara 	if (mbufq_enqueue(queue, n)) {
41986a996e6SHiren Panchasara 		/* This shouldn't happen. If INVARIANTS is defined, panic. */
42086a996e6SHiren Panchasara 		KASSERT(0, ("%s: mbufq was unexpectedly full!", __func__));
42186a996e6SHiren Panchasara 		tcp_pcap_m_freem(n);
42286a996e6SHiren Panchasara 	}
42386a996e6SHiren Panchasara }
42486a996e6SHiren Panchasara 
42586a996e6SHiren Panchasara void
tcp_pcap_drain(struct mbufq * queue)42686a996e6SHiren Panchasara tcp_pcap_drain(struct mbufq *queue)
42786a996e6SHiren Panchasara {
42886a996e6SHiren Panchasara 	struct mbuf *m;
42986a996e6SHiren Panchasara 	while ((m = mbufq_dequeue(queue)))
43086a996e6SHiren Panchasara 		tcp_pcap_m_freem(m);
43186a996e6SHiren Panchasara }
43286a996e6SHiren Panchasara 
43386a996e6SHiren Panchasara void
tcp_pcap_tcpcb_init(struct tcpcb * tp)43486a996e6SHiren Panchasara tcp_pcap_tcpcb_init(struct tcpcb *tp)
43586a996e6SHiren Panchasara {
43686a996e6SHiren Panchasara 	mbufq_init(&(tp->t_inpkts), V_tcp_pcap_packets);
43786a996e6SHiren Panchasara 	mbufq_init(&(tp->t_outpkts), V_tcp_pcap_packets);
43886a996e6SHiren Panchasara }
43986a996e6SHiren Panchasara 
44086a996e6SHiren Panchasara void
tcp_pcap_set_sock_max(struct mbufq * queue,int newval)44186a996e6SHiren Panchasara tcp_pcap_set_sock_max(struct mbufq *queue, int newval)
44286a996e6SHiren Panchasara {
44386a996e6SHiren Panchasara 	queue->mq_maxlen = newval;
44486a996e6SHiren Panchasara 	while (queue->mq_len > queue->mq_maxlen)
44586a996e6SHiren Panchasara 		tcp_pcap_m_freem(mbufq_dequeue(queue));
44686a996e6SHiren Panchasara }
44786a996e6SHiren Panchasara 
44886a996e6SHiren Panchasara int
tcp_pcap_get_sock_max(struct mbufq * queue)44986a996e6SHiren Panchasara tcp_pcap_get_sock_max(struct mbufq *queue)
45086a996e6SHiren Panchasara {
45186a996e6SHiren Panchasara 	return queue->mq_maxlen;
45286a996e6SHiren Panchasara }
453