1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 #ifndef _NET_RPS_H 3 #define _NET_RPS_H 4 5 #include <linux/types.h> 6 #include <linux/static_key.h> 7 #include <net/sock.h> 8 #include <net/hotdata.h> 9 10 #ifdef CONFIG_RPS 11 #include <net/rps-types.h> 12 13 extern struct static_key_false rps_needed; 14 extern struct static_key_false rfs_needed; 15 16 /* 17 * This structure holds an RPS map which can be of variable length. The 18 * map is an array of CPUs. 19 */ 20 struct rps_map { 21 unsigned int len; 22 struct rcu_head rcu; 23 u16 cpus[]; 24 }; 25 #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) 26 27 /* 28 * The rps_dev_flow structure contains the mapping of a flow to a CPU, the 29 * tail pointer for that CPU's input queue at the time of last enqueue, a 30 * hardware filter index, and the hash of the flow if aRFS is enabled. 31 */ 32 struct rps_dev_flow { 33 u16 cpu; 34 u16 filter; 35 unsigned int last_qtail; 36 #ifdef CONFIG_RFS_ACCEL 37 u32 hash; 38 #endif 39 }; 40 #define RPS_NO_FILTER 0xffff 41 42 /* 43 * The rps_sock_flow_table contains mappings of flows to the last CPU 44 * on which they were processed by the application (set in recvmsg). 45 * Each entry is a 32bit value. Upper part is the high-order bits 46 * of flow hash, lower part is CPU number. 47 * rps_cpu_mask is used to partition the space, depending on number of 48 * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 49 * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, 50 * meaning we use 32-6=26 bits for the hash. 51 */ 52 struct rps_sock_flow_table { 53 u32 ent; 54 }; 55 56 #define RPS_NO_CPU 0xffff 57 58 static inline void rps_record_sock_flow(rps_tag_ptr tag_ptr, u32 hash) 59 { 60 unsigned int index = hash & rps_tag_to_mask(tag_ptr); 61 u32 val = hash & ~net_hotdata.rps_cpu_mask; 62 struct rps_sock_flow_table *table; 63 64 /* We only give a hint, preemption can change CPU under us */ 65 val |= raw_smp_processor_id(); 66 67 table = rps_tag_to_table(tag_ptr); 68 /* The following WRITE_ONCE() is paired with the READ_ONCE() 69 * here, and another one in get_rps_cpu(). 70 */ 71 if (READ_ONCE(table[index].ent) != val) 72 WRITE_ONCE(table[index].ent, val); 73 } 74 75 static inline void _sock_rps_record_flow_hash(__u32 hash) 76 { 77 rps_tag_ptr tag_ptr; 78 79 if (!hash) 80 return; 81 rcu_read_lock(); 82 tag_ptr = READ_ONCE(net_hotdata.rps_sock_flow_table); 83 if (tag_ptr) 84 rps_record_sock_flow(tag_ptr, hash); 85 rcu_read_unlock(); 86 } 87 88 static inline void _sock_rps_record_flow(const struct sock *sk) 89 { 90 /* Reading sk->sk_rxhash might incur an expensive cache line 91 * miss. 92 * 93 * TCP_ESTABLISHED does cover almost all states where RFS 94 * might be useful, and is cheaper [1] than testing : 95 * IPv4: inet_sk(sk)->inet_daddr 96 * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) 97 * OR an additional socket flag 98 * [1] : sk_state and sk_prot are in the same cache line. 99 */ 100 if (sk->sk_state == TCP_ESTABLISHED) { 101 /* This READ_ONCE() is paired with the WRITE_ONCE() 102 * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). 103 */ 104 _sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); 105 } 106 } 107 108 static inline void _sock_rps_delete_flow(const struct sock *sk) 109 { 110 struct rps_sock_flow_table *table; 111 rps_tag_ptr tag_ptr; 112 u32 hash, index; 113 114 hash = READ_ONCE(sk->sk_rxhash); 115 if (!hash) 116 return; 117 118 rcu_read_lock(); 119 tag_ptr = READ_ONCE(net_hotdata.rps_sock_flow_table); 120 if (tag_ptr) { 121 index = hash & rps_tag_to_mask(tag_ptr); 122 table = rps_tag_to_table(tag_ptr); 123 if (READ_ONCE(table[index].ent) != RPS_NO_CPU) 124 WRITE_ONCE(table[index].ent, RPS_NO_CPU); 125 } 126 rcu_read_unlock(); 127 } 128 #endif /* CONFIG_RPS */ 129 130 static inline bool rfs_is_needed(void) 131 { 132 #ifdef CONFIG_RPS 133 return static_branch_unlikely(&rfs_needed); 134 #else 135 return false; 136 #endif 137 } 138 139 static inline void sock_rps_record_flow_hash(__u32 hash) 140 { 141 #ifdef CONFIG_RPS 142 if (!rfs_is_needed()) 143 return; 144 145 _sock_rps_record_flow_hash(hash); 146 #endif 147 } 148 149 static inline void sock_rps_record_flow(const struct sock *sk) 150 { 151 #ifdef CONFIG_RPS 152 if (!rfs_is_needed()) 153 return; 154 155 _sock_rps_record_flow(sk); 156 #endif 157 } 158 159 static inline void sock_rps_delete_flow(const struct sock *sk) 160 { 161 #ifdef CONFIG_RPS 162 if (!rfs_is_needed()) 163 return; 164 165 _sock_rps_delete_flow(sk); 166 #endif 167 } 168 169 static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd) 170 { 171 #ifdef CONFIG_RPS 172 return ++sd->input_queue_tail; 173 #else 174 return 0; 175 #endif 176 } 177 178 static inline void rps_input_queue_tail_save(u32 *dest, u32 tail) 179 { 180 #ifdef CONFIG_RPS 181 WRITE_ONCE(*dest, tail); 182 #endif 183 } 184 185 static inline void rps_input_queue_head_add(struct softnet_data *sd, int val) 186 { 187 #ifdef CONFIG_RPS 188 WRITE_ONCE(sd->input_queue_head, sd->input_queue_head + val); 189 #endif 190 } 191 192 static inline void rps_input_queue_head_incr(struct softnet_data *sd) 193 { 194 rps_input_queue_head_add(sd, 1); 195 } 196 197 #endif /* _NET_RPS_H */ 198