/*- * Copyright (c) 2025 Netflix, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef __tcp_hpts_internal_h__ #define __tcp_hpts_internal_h__ /* * TCP High Precision Timer System (HPTS) - Internal Definitions * * This header contains internal structures, constants, and interfaces that are * implemented in tcp_hpts.c but exposed to enable comprehensive unit testing of * the HPTS subsystem. */ #if defined(_KERNEL) /* * The hpts uses a 102400 wheel. The wheel * defines the time in 10 usec increments (102400 x 10). * This gives a range of 10usec - 1024ms to place * an entry within. If the user requests more than * 1.024 second, a remaineder is attached and the hpts * when seeing the remainder will re-insert the * inpcb forward in time from where it is until * the remainder is zero. */ #define NUM_OF_HPTSI_SLOTS 102400 /* The number of connections after which the dynamic sleep logic kicks in. */ #define DEFAULT_CONNECTION_THRESHOLD 100 /* * The hpts uses a 102400 wheel. The wheel * defines the time in 10 usec increments (102400 x 10). * This gives a range of 10usec - 1024ms to place * an entry within. If the user requests more than * 1.024 second, a remaineder is attached and the hpts * when seeing the remainder will re-insert the * inpcb forward in time from where it is until * the remainder is zero. */ #define NUM_OF_HPTSI_SLOTS 102400 /* Convert microseconds to HPTS slots */ #define HPTS_USEC_TO_SLOTS(x) ((x+9) /10) /* The number of connections after which the dynamic sleep logic kicks in. */ #define DEFAULT_CONNECTION_THRESHOLD 100 extern int tcp_bind_threads; /* Thread binding configuration * (0=none, 1=cpu, 2=numa) */ /* * Abstraction layer controlling time, interrupts and callouts. */ struct tcp_hptsi_funcs { void (*microuptime)(struct timeval *tv); int (*swi_add)(struct intr_event **eventp, const char *name, driver_intr_t handler, void *arg, int pri, enum intr_type flags, void **cookiep); int (*swi_remove)(void *cookie); void (*swi_sched)(void *cookie, int flags); int (*intr_event_bind)(struct intr_event *ie, int cpu); int (*intr_event_bind_ithread_cpuset)(struct intr_event *ie, struct _cpuset *mask); void (*callout_init)(struct callout *c, int mpsafe); int (*callout_reset_sbt_on)(struct callout *c, sbintime_t sbt, sbintime_t precision, void (*func)(void *), void *arg, int cpu, int flags); int (*_callout_stop_safe)(struct callout *c, int flags); }; /* Default function table for system operation */ extern const struct tcp_hptsi_funcs tcp_hptsi_default_funcs; /* Each hpts has its own p_mtx which is used for locking */ #define HPTS_MTX_ASSERT(hpts) mtx_assert(&(hpts)->p_mtx, MA_OWNED) #define HPTS_LOCK(hpts) mtx_lock(&(hpts)->p_mtx) #define HPTS_TRYLOCK(hpts) mtx_trylock(&(hpts)->p_mtx) #define HPTS_UNLOCK(hpts) mtx_unlock(&(hpts)->p_mtx) struct tcp_hpts_entry { /* Cache line 0x00 */ struct mtx p_mtx; /* Mutex for hpts */ struct timeval p_mysleep; /* Our min sleep time */ uint64_t syscall_cnt; uint64_t sleeping; /* What the actual sleep was (if sleeping) */ uint16_t p_hpts_active; /* Flag that says hpts is awake */ uint8_t p_wheel_complete; /* have we completed the wheel arc walk? */ uint32_t p_runningslot; /* Current slot we are at if we are running */ uint32_t p_prev_slot; /* Previous slot we were on */ uint32_t p_cur_slot; /* Current slot in wheel hpts is draining */ uint32_t p_nxt_slot; /* The next slot outside the current range * of slots that the hpts is running on. */ int32_t p_on_queue_cnt; /* Count on queue in this hpts */ uint8_t p_direct_wake :1, /* boolean */ p_on_min_sleep:1, /* boolean */ p_hpts_wake_scheduled:1,/* boolean */ hit_callout_thresh:1, p_avail:4; uint8_t p_fill[3]; /* Fill to 32 bits */ /* Cache line 0x40 */ struct hptsh { TAILQ_HEAD(, tcpcb) head; uint32_t count; uint32_t gencnt; } *p_hptss; /* Hptsi wheel */ uint32_t p_hpts_sleep_time; /* Current sleep interval having a max * of 255ms */ uint32_t overidden_sleep; /* what was overrided by min-sleep for logging */ uint32_t saved_curslot; /* for logging */ uint32_t saved_prev_slot; /* for logging */ uint32_t p_delayed_by; /* How much were we delayed by */ /* Cache line 0x80 */ struct sysctl_ctx_list hpts_ctx; struct sysctl_oid *hpts_root; struct intr_event *ie; void *ie_cookie; uint16_t p_cpu; /* The hpts CPU */ struct tcp_hptsi *p_hptsi; /* Back pointer to parent hptsi structure */ /* There is extra space in here */ /* Cache line 0x100 */ struct callout co __aligned(CACHE_LINE_SIZE); } __aligned(CACHE_LINE_SIZE); struct tcp_hptsi { struct cpu_group **grps; struct tcp_hpts_entry **rp_ent; /* Array of hptss */ uint32_t *cts_last_ran; uint32_t grp_cnt; uint32_t rp_num_hptss; /* Number of hpts threads */ struct hpts_domain_info { int count; int cpu[MAXCPU]; } domains[MAXMEMDOM]; /* Per-NUMA domain CPU assignments */ const struct tcp_hptsi_funcs *funcs; /* Function table for testability */ }; /* * Core tcp_hptsi structure manipulation functions. */ struct tcp_hptsi* tcp_hptsi_create(const struct tcp_hptsi_funcs *funcs, bool enable_sysctl); void tcp_hptsi_destroy(struct tcp_hptsi *pace); void tcp_hptsi_start(struct tcp_hptsi *pace); void tcp_hptsi_stop(struct tcp_hptsi *pace); uint16_t tcp_hptsi_random_cpu(struct tcp_hptsi *pace); int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout); void tcp_hpts_wake(struct tcp_hpts_entry *hpts); /* * LRO HPTS initialization and uninitialization, only for internal use by the * HPTS code. */ void tcp_lro_hpts_init(void); void tcp_lro_hpts_uninit(void); #endif /* defined(_KERNEL) */ #endif /* __tcp_hpts_internal_h__ */