tcp_hpts_internal.h (revision 2241cd10d6e0eaf16d1f3090f1d438543e127f0c) - OpenGrok cross reference for /freebsd/sys/netinet/tcp_hpts_internal.h

/*-
 * Copyright (c) 2025 Netflix, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ifndef __tcp_hpts_internal_h__
#define __tcp_hpts_internal_h__

/*
 * TCP High Precision Timer System (HPTS) - Internal Definitions
 *
 * This header contains internal structures, constants, and interfaces that are
 * implemented in tcp_hpts.c but exposed to enable comprehensive unit testing of
 * the HPTS subsystem.
 */

#if defined(_KERNEL)

/*
 * The hpts uses a 102400 wheel. The wheel
 * defines the time in 10 usec increments (102400 x 10).
 * This gives a range of 10usec - 1024ms to place
 * an entry within. If the user requests more than
 * 1.024 second, a remaineder is attached and the hpts
 * when seeing the remainder will re-insert the
 * inpcb forward in time from where it is until
 * the remainder is zero.
 */

#define NUM_OF_HPTSI_SLOTS 102400

/* The number of connections after which the dynamic sleep logic kicks in. */
#define DEFAULT_CONNECTION_THRESHOLD 100

/*
 * The hpts uses a 102400 wheel. The wheel
 * defines the time in 10 usec increments (102400 x 10).
 * This gives a range of 10usec - 1024ms to place
 * an entry within. If the user requests more than
 * 1.024 second, a remaineder is attached and the hpts
 * when seeing the remainder will re-insert the
 * inpcb forward in time from where it is until
 * the remainder is zero.
 */

#define NUM_OF_HPTSI_SLOTS 102400

/* Convert microseconds to HPTS slots */
#define HPTS_USEC_TO_SLOTS(x) ((x+9) /10)

/* The number of connections after which the dynamic sleep logic kicks in. */
#define DEFAULT_CONNECTION_THRESHOLD 100

extern int tcp_bind_threads; 		/* Thread binding configuration
					 * (0=none, 1=cpu, 2=numa) */

/*
 * Abstraction layer controlling time, interrupts and callouts.
 */
struct tcp_hptsi_funcs {
	void (*microuptime)(struct timeval *tv);
	int (*swi_add)(struct intr_event **eventp, const char *name,
		driver_intr_t handler, void *arg, int pri, enum intr_type flags,
		void **cookiep);
	int (*swi_remove)(void *cookie);
	void (*swi_sched)(void *cookie, int flags);
	int (*intr_event_bind)(struct intr_event *ie, int cpu);
	int (*intr_event_bind_ithread_cpuset)(struct intr_event *ie,
		struct _cpuset *mask);
	void (*callout_init)(struct callout *c, int mpsafe);
	int (*callout_reset_sbt_on)(struct callout *c, sbintime_t sbt,
		sbintime_t precision, void (*func)(void *), void *arg, int cpu,
		int flags);
	int (*_callout_stop_safe)(struct callout *c, int flags);
};

/* Default function table for system operation */
extern const struct tcp_hptsi_funcs tcp_hptsi_default_funcs;

/* Each hpts has its own p_mtx which is used for locking */
#define	HPTS_MTX_ASSERT(hpts)	mtx_assert(&(hpts)->p_mtx, MA_OWNED)
#define	HPTS_LOCK(hpts)		mtx_lock(&(hpts)->p_mtx)
#define	HPTS_TRYLOCK(hpts)	mtx_trylock(&(hpts)->p_mtx)
#define	HPTS_UNLOCK(hpts)	mtx_unlock(&(hpts)->p_mtx)

struct tcp_hpts_entry {
	/* Cache line 0x00 */
	struct mtx p_mtx;		/* Mutex for hpts */
	struct timeval p_mysleep;	/* Our min sleep time */
	uint64_t syscall_cnt;
	uint64_t sleeping;		/* What the actual sleep was (if sleeping) */
	uint16_t p_hpts_active; 	/* Flag that says hpts is awake  */
	uint8_t p_wheel_complete; 	/* have we completed the wheel arc walk? */
	uint32_t p_runningslot; 	/* Current slot we are at if we are running */
	uint32_t p_prev_slot;		/* Previous slot we were on */
	uint32_t p_cur_slot;		/* Current slot in wheel hpts is draining */
	uint32_t p_nxt_slot;		/* The next slot outside the current range
					 * of slots that the hpts is running on. */
	int32_t p_on_queue_cnt;		/* Count on queue in this hpts */
	uint8_t p_direct_wake :1, 	/* boolean */
		p_on_min_sleep:1, 	/* boolean */
		p_hpts_wake_scheduled:1,/* boolean */
		hit_callout_thresh:1,
		p_avail:4;
	uint8_t p_fill[3];		/* Fill to 32 bits */
	/* Cache line 0x40 */
	struct hptsh {
		TAILQ_HEAD(, tcpcb)	head;
		uint32_t		count;
		uint32_t		gencnt;
	} *p_hptss;			/* Hptsi wheel */
	uint32_t p_hpts_sleep_time;	/* Current sleep interval having a max
					 * of 255ms */
	uint32_t overidden_sleep;	/* what was overrided by min-sleep for logging */
	uint32_t saved_curslot;		/* for logging */
	uint32_t saved_prev_slot;	/* for logging */
	uint32_t p_delayed_by;		/* How much were we delayed by */
	/* Cache line 0x80 */
	struct sysctl_ctx_list hpts_ctx;
	struct sysctl_oid *hpts_root;
	struct intr_event *ie;
	void *ie_cookie;
	uint16_t p_cpu;			/* The hpts CPU */
	struct tcp_hptsi *p_hptsi;	/* Back pointer to parent hptsi structure */
	/* There is extra space in here */
	/* Cache line 0x100 */
	struct callout co __aligned(CACHE_LINE_SIZE);
}               __aligned(CACHE_LINE_SIZE);

struct tcp_hptsi {
	struct cpu_group **grps;
	struct tcp_hpts_entry **rp_ent;	/* Array of hptss */
	uint32_t *cts_last_ran;
	uint32_t grp_cnt;
	uint32_t rp_num_hptss;		/* Number of hpts threads */
	struct hpts_domain_info {
		int count;
		int cpu[MAXCPU];
	} domains[MAXMEMDOM];		/* Per-NUMA domain CPU assignments */
	const struct tcp_hptsi_funcs *funcs;	/* Function table for testability */
};

/*
 * Core tcp_hptsi structure manipulation functions.
 */
struct tcp_hptsi* tcp_hptsi_create(const struct tcp_hptsi_funcs *funcs,
	bool enable_sysctl);
void tcp_hptsi_destroy(struct tcp_hptsi *pace);
void tcp_hptsi_start(struct tcp_hptsi *pace);
void tcp_hptsi_stop(struct tcp_hptsi *pace);
uint16_t tcp_hptsi_random_cpu(struct tcp_hptsi *pace);
int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout);

void tcp_hpts_wake(struct tcp_hpts_entry *hpts);

/*
 * LRO HPTS initialization and uninitialization, only for internal use by the
 * HPTS code.
 */
void tcp_lro_hpts_init(void);
void tcp_lro_hpts_uninit(void);

#endif /* defined(_KERNEL) */
#endif /* __tcp_hpts_internal_h__ */