xref: /linux/lib/dim/net_dim.c (revision a83c29e1d145cca5240952100acd1cd60f25fb5f)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
4  */
5 
6 #include <linux/dim.h>
7 #include <linux/rtnetlink.h>
8 
9 /*
10  * Net DIM profiles:
11  *        There are different set of profiles for each CQ period mode.
12  *        There are different set of profiles for RX/TX CQs.
13  *        Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES
14  */
15 #define NET_DIM_RX_EQE_PROFILES { \
16 	{.usec = 1,   .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
17 	{.usec = 8,   .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
18 	{.usec = 64,  .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
19 	{.usec = 128, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
20 	{.usec = 256, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}  \
21 }
22 
23 #define NET_DIM_RX_CQE_PROFILES { \
24 	{.usec = 2,  .pkts = 256,},             \
25 	{.usec = 8,  .pkts = 128,},             \
26 	{.usec = 16, .pkts = 64,},              \
27 	{.usec = 32, .pkts = 64,},              \
28 	{.usec = 64, .pkts = 64,}               \
29 }
30 
31 #define NET_DIM_TX_EQE_PROFILES { \
32 	{.usec = 1,   .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,},  \
33 	{.usec = 8,   .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,},  \
34 	{.usec = 32,  .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,},  \
35 	{.usec = 64,  .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,},  \
36 	{.usec = 128, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}   \
37 }
38 
39 #define NET_DIM_TX_CQE_PROFILES { \
40 	{.usec = 5,  .pkts = 128,},  \
41 	{.usec = 8,  .pkts = 64,},  \
42 	{.usec = 16, .pkts = 32,},  \
43 	{.usec = 32, .pkts = 32,},  \
44 	{.usec = 64, .pkts = 32,}   \
45 }
46 
47 static const struct dim_cq_moder
48 rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
49 	NET_DIM_RX_EQE_PROFILES,
50 	NET_DIM_RX_CQE_PROFILES,
51 };
52 
53 static const struct dim_cq_moder
54 tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
55 	NET_DIM_TX_EQE_PROFILES,
56 	NET_DIM_TX_CQE_PROFILES,
57 };
58 
59 struct dim_cq_moder
60 net_dim_get_rx_moderation(u8 cq_period_mode, int ix)
61 {
62 	struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix];
63 
64 	cq_moder.cq_period_mode = cq_period_mode;
65 	return cq_moder;
66 }
67 EXPORT_SYMBOL(net_dim_get_rx_moderation);
68 
69 struct dim_cq_moder
70 net_dim_get_def_rx_moderation(u8 cq_period_mode)
71 {
72 	u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
73 			NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
74 
75 	return net_dim_get_rx_moderation(cq_period_mode, profile_ix);
76 }
77 EXPORT_SYMBOL(net_dim_get_def_rx_moderation);
78 
79 struct dim_cq_moder
80 net_dim_get_tx_moderation(u8 cq_period_mode, int ix)
81 {
82 	struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix];
83 
84 	cq_moder.cq_period_mode = cq_period_mode;
85 	return cq_moder;
86 }
87 EXPORT_SYMBOL(net_dim_get_tx_moderation);
88 
89 struct dim_cq_moder
90 net_dim_get_def_tx_moderation(u8 cq_period_mode)
91 {
92 	u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
93 			NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
94 
95 	return net_dim_get_tx_moderation(cq_period_mode, profile_ix);
96 }
97 EXPORT_SYMBOL(net_dim_get_def_tx_moderation);
98 
99 int net_dim_init_irq_moder(struct net_device *dev, u8 profile_flags,
100 			   u8 coal_flags, u8 rx_mode, u8 tx_mode,
101 			   void (*rx_dim_work)(struct work_struct *work),
102 			   void (*tx_dim_work)(struct work_struct *work))
103 {
104 	struct dim_cq_moder *rxp = NULL, *txp;
105 	struct dim_irq_moder *moder;
106 	int len;
107 
108 	dev->irq_moder = kzalloc(sizeof(*dev->irq_moder), GFP_KERNEL);
109 	if (!dev->irq_moder)
110 		return -ENOMEM;
111 
112 	moder = dev->irq_moder;
113 	len = NET_DIM_PARAMS_NUM_PROFILES * sizeof(*moder->rx_profile);
114 
115 	moder->coal_flags = coal_flags;
116 	moder->profile_flags = profile_flags;
117 
118 	if (profile_flags & DIM_PROFILE_RX) {
119 		moder->rx_dim_work = rx_dim_work;
120 		moder->dim_rx_mode = rx_mode;
121 		rxp = kmemdup(rx_profile[rx_mode], len, GFP_KERNEL);
122 		if (!rxp)
123 			goto free_moder;
124 
125 		rcu_assign_pointer(moder->rx_profile, rxp);
126 	}
127 
128 	if (profile_flags & DIM_PROFILE_TX) {
129 		moder->tx_dim_work = tx_dim_work;
130 		moder->dim_tx_mode = tx_mode;
131 		txp = kmemdup(tx_profile[tx_mode], len, GFP_KERNEL);
132 		if (!txp)
133 			goto free_rxp;
134 
135 		rcu_assign_pointer(moder->tx_profile, txp);
136 	}
137 
138 	return 0;
139 
140 free_rxp:
141 	kfree(rxp);
142 free_moder:
143 	kfree(moder);
144 	return -ENOMEM;
145 }
146 EXPORT_SYMBOL(net_dim_init_irq_moder);
147 
148 /* RTNL lock is held. */
149 void net_dim_free_irq_moder(struct net_device *dev)
150 {
151 	struct dim_cq_moder *rxp, *txp;
152 
153 	if (!dev->irq_moder)
154 		return;
155 
156 	rxp = rtnl_dereference(dev->irq_moder->rx_profile);
157 	txp = rtnl_dereference(dev->irq_moder->tx_profile);
158 
159 	rcu_assign_pointer(dev->irq_moder->rx_profile, NULL);
160 	rcu_assign_pointer(dev->irq_moder->tx_profile, NULL);
161 
162 	kfree_rcu(rxp, rcu);
163 	kfree_rcu(txp, rcu);
164 	kfree(dev->irq_moder);
165 }
166 EXPORT_SYMBOL(net_dim_free_irq_moder);
167 
168 void net_dim_setting(struct net_device *dev, struct dim *dim, bool is_tx)
169 {
170 	struct dim_irq_moder *irq_moder = dev->irq_moder;
171 
172 	if (!irq_moder)
173 		return;
174 
175 	if (is_tx) {
176 		INIT_WORK(&dim->work, irq_moder->tx_dim_work);
177 		dim->mode = READ_ONCE(irq_moder->dim_tx_mode);
178 		return;
179 	}
180 
181 	INIT_WORK(&dim->work, irq_moder->rx_dim_work);
182 	dim->mode = READ_ONCE(irq_moder->dim_rx_mode);
183 }
184 EXPORT_SYMBOL(net_dim_setting);
185 
186 void net_dim_work_cancel(struct dim *dim)
187 {
188 	cancel_work_sync(&dim->work);
189 }
190 EXPORT_SYMBOL(net_dim_work_cancel);
191 
192 struct dim_cq_moder net_dim_get_rx_irq_moder(struct net_device *dev,
193 					     struct dim *dim)
194 {
195 	struct dim_cq_moder res, *profile;
196 
197 	rcu_read_lock();
198 	profile = rcu_dereference(dev->irq_moder->rx_profile);
199 	res = profile[dim->profile_ix];
200 	rcu_read_unlock();
201 
202 	res.cq_period_mode = dim->mode;
203 
204 	return res;
205 }
206 EXPORT_SYMBOL(net_dim_get_rx_irq_moder);
207 
208 struct dim_cq_moder net_dim_get_tx_irq_moder(struct net_device *dev,
209 					     struct dim *dim)
210 {
211 	struct dim_cq_moder res, *profile;
212 
213 	rcu_read_lock();
214 	profile = rcu_dereference(dev->irq_moder->tx_profile);
215 	res = profile[dim->profile_ix];
216 	rcu_read_unlock();
217 
218 	res.cq_period_mode = dim->mode;
219 
220 	return res;
221 }
222 EXPORT_SYMBOL(net_dim_get_tx_irq_moder);
223 
224 void net_dim_set_rx_mode(struct net_device *dev, u8 rx_mode)
225 {
226 	WRITE_ONCE(dev->irq_moder->dim_rx_mode, rx_mode);
227 }
228 EXPORT_SYMBOL(net_dim_set_rx_mode);
229 
230 void net_dim_set_tx_mode(struct net_device *dev, u8 tx_mode)
231 {
232 	WRITE_ONCE(dev->irq_moder->dim_tx_mode, tx_mode);
233 }
234 EXPORT_SYMBOL(net_dim_set_tx_mode);
235 
236 static int net_dim_step(struct dim *dim)
237 {
238 	if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2))
239 		return DIM_TOO_TIRED;
240 
241 	switch (dim->tune_state) {
242 	case DIM_PARKING_ON_TOP:
243 	case DIM_PARKING_TIRED:
244 		break;
245 	case DIM_GOING_RIGHT:
246 		if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1))
247 			return DIM_ON_EDGE;
248 		dim->profile_ix++;
249 		dim->steps_right++;
250 		break;
251 	case DIM_GOING_LEFT:
252 		if (dim->profile_ix == 0)
253 			return DIM_ON_EDGE;
254 		dim->profile_ix--;
255 		dim->steps_left++;
256 		break;
257 	}
258 
259 	dim->tired++;
260 	return DIM_STEPPED;
261 }
262 
263 static void net_dim_exit_parking(struct dim *dim)
264 {
265 	dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : DIM_GOING_RIGHT;
266 	net_dim_step(dim);
267 }
268 
269 static int net_dim_stats_compare(struct dim_stats *curr,
270 				 struct dim_stats *prev)
271 {
272 	if (!prev->bpms)
273 		return curr->bpms ? DIM_STATS_BETTER : DIM_STATS_SAME;
274 
275 	if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms))
276 		return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER :
277 						   DIM_STATS_WORSE;
278 
279 	if (!prev->ppms)
280 		return curr->ppms ? DIM_STATS_BETTER :
281 				    DIM_STATS_SAME;
282 
283 	if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
284 		return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER :
285 						   DIM_STATS_WORSE;
286 
287 	if (!prev->epms)
288 		return DIM_STATS_SAME;
289 
290 	if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
291 		return (curr->epms < prev->epms) ? DIM_STATS_BETTER :
292 						   DIM_STATS_WORSE;
293 
294 	return DIM_STATS_SAME;
295 }
296 
297 static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
298 {
299 	int prev_state = dim->tune_state;
300 	int prev_ix = dim->profile_ix;
301 	int stats_res;
302 	int step_res;
303 
304 	switch (dim->tune_state) {
305 	case DIM_PARKING_ON_TOP:
306 		stats_res = net_dim_stats_compare(curr_stats,
307 						  &dim->prev_stats);
308 		if (stats_res != DIM_STATS_SAME)
309 			net_dim_exit_parking(dim);
310 		break;
311 
312 	case DIM_PARKING_TIRED:
313 		dim->tired--;
314 		if (!dim->tired)
315 			net_dim_exit_parking(dim);
316 		break;
317 
318 	case DIM_GOING_RIGHT:
319 	case DIM_GOING_LEFT:
320 		stats_res = net_dim_stats_compare(curr_stats,
321 						  &dim->prev_stats);
322 		if (stats_res != DIM_STATS_BETTER)
323 			dim_turn(dim);
324 
325 		if (dim_on_top(dim)) {
326 			dim_park_on_top(dim);
327 			break;
328 		}
329 
330 		step_res = net_dim_step(dim);
331 		switch (step_res) {
332 		case DIM_ON_EDGE:
333 			dim_park_on_top(dim);
334 			break;
335 		case DIM_TOO_TIRED:
336 			dim_park_tired(dim);
337 			break;
338 		}
339 
340 		break;
341 	}
342 
343 	if (prev_state != DIM_PARKING_ON_TOP ||
344 	    dim->tune_state != DIM_PARKING_ON_TOP)
345 		dim->prev_stats = *curr_stats;
346 
347 	return dim->profile_ix != prev_ix;
348 }
349 
350 void net_dim(struct dim *dim, struct dim_sample end_sample)
351 {
352 	struct dim_stats curr_stats;
353 	u16 nevents;
354 
355 	switch (dim->state) {
356 	case DIM_MEASURE_IN_PROGRESS:
357 		nevents = BIT_GAP(BITS_PER_TYPE(u16),
358 				  end_sample.event_ctr,
359 				  dim->start_sample.event_ctr);
360 		if (nevents < DIM_NEVENTS)
361 			break;
362 		if (!dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats))
363 			break;
364 		if (net_dim_decision(&curr_stats, dim)) {
365 			dim->state = DIM_APPLY_NEW_PROFILE;
366 			schedule_work(&dim->work);
367 			break;
368 		}
369 		fallthrough;
370 	case DIM_START_MEASURE:
371 		dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr,
372 				  end_sample.byte_ctr, &dim->start_sample);
373 		dim->state = DIM_MEASURE_IN_PROGRESS;
374 		break;
375 	case DIM_APPLY_NEW_PROFILE:
376 		break;
377 	}
378 }
379 EXPORT_SYMBOL(net_dim);
380