xref: /freebsd/sys/netinet/tcp_hpts_internal.h (revision 2241cd10d6e0eaf16d1f3090f1d438543e127f0c)
1 /*-
2  * Copyright (c) 2025 Netflix, Inc.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 #ifndef __tcp_hpts_internal_h__
27 #define __tcp_hpts_internal_h__
28 
29 /*
30  * TCP High Precision Timer System (HPTS) - Internal Definitions
31  *
32  * This header contains internal structures, constants, and interfaces that are
33  * implemented in tcp_hpts.c but exposed to enable comprehensive unit testing of
34  * the HPTS subsystem.
35  */
36 
37 #if defined(_KERNEL)
38 
39 /*
40  * The hpts uses a 102400 wheel. The wheel
41  * defines the time in 10 usec increments (102400 x 10).
42  * This gives a range of 10usec - 1024ms to place
43  * an entry within. If the user requests more than
44  * 1.024 second, a remaineder is attached and the hpts
45  * when seeing the remainder will re-insert the
46  * inpcb forward in time from where it is until
47  * the remainder is zero.
48  */
49 
50 #define NUM_OF_HPTSI_SLOTS 102400
51 
52 /* The number of connections after which the dynamic sleep logic kicks in. */
53 #define DEFAULT_CONNECTION_THRESHOLD 100
54 
55 /*
56  * The hpts uses a 102400 wheel. The wheel
57  * defines the time in 10 usec increments (102400 x 10).
58  * This gives a range of 10usec - 1024ms to place
59  * an entry within. If the user requests more than
60  * 1.024 second, a remaineder is attached and the hpts
61  * when seeing the remainder will re-insert the
62  * inpcb forward in time from where it is until
63  * the remainder is zero.
64  */
65 
66 #define NUM_OF_HPTSI_SLOTS 102400
67 
68 /* Convert microseconds to HPTS slots */
69 #define HPTS_USEC_TO_SLOTS(x) ((x+9) /10)
70 
71 /* The number of connections after which the dynamic sleep logic kicks in. */
72 #define DEFAULT_CONNECTION_THRESHOLD 100
73 
74 extern int tcp_bind_threads; 		/* Thread binding configuration
75 					 * (0=none, 1=cpu, 2=numa) */
76 
77 /*
78  * Abstraction layer controlling time, interrupts and callouts.
79  */
80 struct tcp_hptsi_funcs {
81 	void (*microuptime)(struct timeval *tv);
82 	int (*swi_add)(struct intr_event **eventp, const char *name,
83 		driver_intr_t handler, void *arg, int pri, enum intr_type flags,
84 		void **cookiep);
85 	int (*swi_remove)(void *cookie);
86 	void (*swi_sched)(void *cookie, int flags);
87 	int (*intr_event_bind)(struct intr_event *ie, int cpu);
88 	int (*intr_event_bind_ithread_cpuset)(struct intr_event *ie,
89 		struct _cpuset *mask);
90 	void (*callout_init)(struct callout *c, int mpsafe);
91 	int (*callout_reset_sbt_on)(struct callout *c, sbintime_t sbt,
92 		sbintime_t precision, void (*func)(void *), void *arg, int cpu,
93 		int flags);
94 	int (*_callout_stop_safe)(struct callout *c, int flags);
95 };
96 
97 /* Default function table for system operation */
98 extern const struct tcp_hptsi_funcs tcp_hptsi_default_funcs;
99 
100 /* Each hpts has its own p_mtx which is used for locking */
101 #define	HPTS_MTX_ASSERT(hpts)	mtx_assert(&(hpts)->p_mtx, MA_OWNED)
102 #define	HPTS_LOCK(hpts)		mtx_lock(&(hpts)->p_mtx)
103 #define	HPTS_TRYLOCK(hpts)	mtx_trylock(&(hpts)->p_mtx)
104 #define	HPTS_UNLOCK(hpts)	mtx_unlock(&(hpts)->p_mtx)
105 
106 struct tcp_hpts_entry {
107 	/* Cache line 0x00 */
108 	struct mtx p_mtx;		/* Mutex for hpts */
109 	struct timeval p_mysleep;	/* Our min sleep time */
110 	uint64_t syscall_cnt;
111 	uint64_t sleeping;		/* What the actual sleep was (if sleeping) */
112 	uint16_t p_hpts_active; 	/* Flag that says hpts is awake  */
113 	uint8_t p_wheel_complete; 	/* have we completed the wheel arc walk? */
114 	uint32_t p_runningslot; 	/* Current slot we are at if we are running */
115 	uint32_t p_prev_slot;		/* Previous slot we were on */
116 	uint32_t p_cur_slot;		/* Current slot in wheel hpts is draining */
117 	uint32_t p_nxt_slot;		/* The next slot outside the current range
118 					 * of slots that the hpts is running on. */
119 	int32_t p_on_queue_cnt;		/* Count on queue in this hpts */
120 	uint8_t p_direct_wake :1, 	/* boolean */
121 		p_on_min_sleep:1, 	/* boolean */
122 		p_hpts_wake_scheduled:1,/* boolean */
123 		hit_callout_thresh:1,
124 		p_avail:4;
125 	uint8_t p_fill[3];		/* Fill to 32 bits */
126 	/* Cache line 0x40 */
127 	struct hptsh {
128 		TAILQ_HEAD(, tcpcb)	head;
129 		uint32_t		count;
130 		uint32_t		gencnt;
131 	} *p_hptss;			/* Hptsi wheel */
132 	uint32_t p_hpts_sleep_time;	/* Current sleep interval having a max
133 					 * of 255ms */
134 	uint32_t overidden_sleep;	/* what was overrided by min-sleep for logging */
135 	uint32_t saved_curslot;		/* for logging */
136 	uint32_t saved_prev_slot;	/* for logging */
137 	uint32_t p_delayed_by;		/* How much were we delayed by */
138 	/* Cache line 0x80 */
139 	struct sysctl_ctx_list hpts_ctx;
140 	struct sysctl_oid *hpts_root;
141 	struct intr_event *ie;
142 	void *ie_cookie;
143 	uint16_t p_cpu;			/* The hpts CPU */
144 	struct tcp_hptsi *p_hptsi;	/* Back pointer to parent hptsi structure */
145 	/* There is extra space in here */
146 	/* Cache line 0x100 */
147 	struct callout co __aligned(CACHE_LINE_SIZE);
148 }               __aligned(CACHE_LINE_SIZE);
149 
150 struct tcp_hptsi {
151 	struct cpu_group **grps;
152 	struct tcp_hpts_entry **rp_ent;	/* Array of hptss */
153 	uint32_t *cts_last_ran;
154 	uint32_t grp_cnt;
155 	uint32_t rp_num_hptss;		/* Number of hpts threads */
156 	struct hpts_domain_info {
157 		int count;
158 		int cpu[MAXCPU];
159 	} domains[MAXMEMDOM];		/* Per-NUMA domain CPU assignments */
160 	const struct tcp_hptsi_funcs *funcs;	/* Function table for testability */
161 };
162 
163 /*
164  * Core tcp_hptsi structure manipulation functions.
165  */
166 struct tcp_hptsi* tcp_hptsi_create(const struct tcp_hptsi_funcs *funcs,
167 	bool enable_sysctl);
168 void tcp_hptsi_destroy(struct tcp_hptsi *pace);
169 void tcp_hptsi_start(struct tcp_hptsi *pace);
170 void tcp_hptsi_stop(struct tcp_hptsi *pace);
171 uint16_t tcp_hptsi_random_cpu(struct tcp_hptsi *pace);
172 int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout);
173 
174 void tcp_hpts_wake(struct tcp_hpts_entry *hpts);
175 
176 /*
177  * LRO HPTS initialization and uninitialization, only for internal use by the
178  * HPTS code.
179  */
180 void tcp_lro_hpts_init(void);
181 void tcp_lro_hpts_uninit(void);
182 
183 #endif /* defined(_KERNEL) */
184 #endif /* __tcp_hpts_internal_h__ */
185