xref: /linux/include/net/rps.h (revision 77de28cd7cf172e782319a144bf64e693794d78b)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 #ifndef _NET_RPS_H
3 #define _NET_RPS_H
4 
5 #include <linux/types.h>
6 #include <linux/static_key.h>
7 #include <net/sock.h>
8 #include <net/hotdata.h>
9 
10 #ifdef CONFIG_RPS
11 #include <net/rps-types.h>
12 
13 extern struct static_key_false rps_needed;
14 extern struct static_key_false rfs_needed;
15 
16 /*
17  * This structure holds an RPS map which can be of variable length.  The
18  * map is an array of CPUs.
19  */
20 struct rps_map {
21 	unsigned int	len;
22 	struct rcu_head	rcu;
23 	u16		cpus[];
24 };
25 #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
26 
27 /*
28  * The rps_dev_flow structure contains the mapping of a flow to a CPU, the
29  * tail pointer for that CPU's input queue at the time of last enqueue, a
30  * hardware filter index, and the hash of the flow if aRFS is enabled.
31  */
32 struct rps_dev_flow {
33 	u16		cpu;
34 	u16		filter;
35 	unsigned int	last_qtail;
36 #ifdef CONFIG_RFS_ACCEL
37 	u32		hash;
38 #endif
39 };
40 #define RPS_NO_FILTER 0xffff
41 
42 /*
43  * The rps_sock_flow_table contains mappings of flows to the last CPU
44  * on which they were processed by the application (set in recvmsg).
45  * Each entry is a 32bit value. Upper part is the high-order bits
46  * of flow hash, lower part is CPU number.
47  * rps_cpu_mask is used to partition the space, depending on number of
48  * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
49  * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
50  * meaning we use 32-6=26 bits for the hash.
51  */
52 struct rps_sock_flow_table {
53 	u32	ent;
54 };
55 
56 #define RPS_NO_CPU 0xffff
57 
58 static inline void rps_record_sock_flow(rps_tag_ptr tag_ptr, u32 hash)
59 {
60 	unsigned int index = hash & rps_tag_to_mask(tag_ptr);
61 	u32 val = hash & ~net_hotdata.rps_cpu_mask;
62 	struct rps_sock_flow_table *table;
63 
64 	/* We only give a hint, preemption can change CPU under us */
65 	val |= raw_smp_processor_id();
66 
67 	table = rps_tag_to_table(tag_ptr);
68 	/* The following WRITE_ONCE() is paired with the READ_ONCE()
69 	 * here, and another one in get_rps_cpu().
70 	 */
71 	if (READ_ONCE(table[index].ent) != val)
72 		WRITE_ONCE(table[index].ent, val);
73 }
74 
75 static inline void _sock_rps_record_flow_hash(__u32 hash)
76 {
77 	rps_tag_ptr tag_ptr;
78 
79 	if (!hash)
80 		return;
81 	rcu_read_lock();
82 	tag_ptr = READ_ONCE(net_hotdata.rps_sock_flow_table);
83 	if (tag_ptr)
84 		rps_record_sock_flow(tag_ptr, hash);
85 	rcu_read_unlock();
86 }
87 
88 static inline void _sock_rps_record_flow(const struct sock *sk)
89 {
90 	/* Reading sk->sk_rxhash might incur an expensive cache line
91 	 * miss.
92 	 *
93 	 * TCP_ESTABLISHED does cover almost all states where RFS
94 	 * might be useful, and is cheaper [1] than testing :
95 	 *	IPv4: inet_sk(sk)->inet_daddr
96 	 *	IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
97 	 * OR	an additional socket flag
98 	 * [1] : sk_state and sk_prot are in the same cache line.
99 	 */
100 	if (sk->sk_state == TCP_ESTABLISHED) {
101 		/* This READ_ONCE() is paired with the WRITE_ONCE()
102 		 * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
103 		 */
104 		_sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
105 	}
106 }
107 
108 static inline void _sock_rps_delete_flow(const struct sock *sk)
109 {
110 	struct rps_sock_flow_table *table;
111 	rps_tag_ptr tag_ptr;
112 	u32 hash, index;
113 
114 	hash = READ_ONCE(sk->sk_rxhash);
115 	if (!hash)
116 		return;
117 
118 	rcu_read_lock();
119 	tag_ptr = READ_ONCE(net_hotdata.rps_sock_flow_table);
120 	if (tag_ptr) {
121 		index = hash & rps_tag_to_mask(tag_ptr);
122 		table = rps_tag_to_table(tag_ptr);
123 		if (READ_ONCE(table[index].ent) != RPS_NO_CPU)
124 			WRITE_ONCE(table[index].ent, RPS_NO_CPU);
125 	}
126 	rcu_read_unlock();
127 }
128 #endif /* CONFIG_RPS */
129 
130 static inline bool rfs_is_needed(void)
131 {
132 #ifdef CONFIG_RPS
133 	return static_branch_unlikely(&rfs_needed);
134 #else
135 	return false;
136 #endif
137 }
138 
139 static inline void sock_rps_record_flow_hash(__u32 hash)
140 {
141 #ifdef CONFIG_RPS
142 	if (!rfs_is_needed())
143 		return;
144 
145 	_sock_rps_record_flow_hash(hash);
146 #endif
147 }
148 
149 static inline void sock_rps_record_flow(const struct sock *sk)
150 {
151 #ifdef CONFIG_RPS
152 	if (!rfs_is_needed())
153 		return;
154 
155 	_sock_rps_record_flow(sk);
156 #endif
157 }
158 
159 static inline void sock_rps_delete_flow(const struct sock *sk)
160 {
161 #ifdef CONFIG_RPS
162 	if (!rfs_is_needed())
163 		return;
164 
165 	_sock_rps_delete_flow(sk);
166 #endif
167 }
168 
169 static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd)
170 {
171 #ifdef CONFIG_RPS
172 	return ++sd->input_queue_tail;
173 #else
174 	return 0;
175 #endif
176 }
177 
178 static inline void rps_input_queue_tail_save(u32 *dest, u32 tail)
179 {
180 #ifdef CONFIG_RPS
181 	WRITE_ONCE(*dest, tail);
182 #endif
183 }
184 
185 static inline void rps_input_queue_head_add(struct softnet_data *sd, int val)
186 {
187 #ifdef CONFIG_RPS
188 	WRITE_ONCE(sd->input_queue_head, sd->input_queue_head + val);
189 #endif
190 }
191 
192 static inline void rps_input_queue_head_incr(struct softnet_data *sd)
193 {
194 	rps_input_queue_head_add(sd, 1);
195 }
196 
197 #endif /* _NET_RPS_H */
198