1 /*-
2 * Copyright (c) 2013-2020, Mellanox Technologies, Ltd. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26 #include "opt_rss.h"
27 #include "opt_ratelimit.h"
28
29 #include <dev/mlx5/mlx5_ib/mlx5_ib.h>
30 #include <dev/mlx5/cmd.h>
31
32 static const char *mlx5_ib_cong_params_desc[] = {
33 MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_DESC)
34 };
35
36 static const char *mlx5_ib_cong_status_desc[] = {
37 MLX5_IB_CONG_STATUS(MLX5_IB_STATS_DESC)
38 };
39
40 static const char *mlx5_ib_cong_stats_desc[] = {
41 MLX5_IB_CONG_STATS(MLX5_IB_STATS_DESC)
42 };
43
44 #define MLX5_IB_INDEX(field) ( \
45 (__offsetof(struct mlx5_ib_congestion, field) - \
46 __offsetof(struct mlx5_ib_congestion, arg[0])) / sizeof(u64))
47 #define MLX5_IB_FLD_MAX(type, field) ((1ULL << __mlx5_bit_sz(type, field)) - 1ULL)
48 #define MLX5_IB_SET_CLIPPED(type, ptr, field, var) do { \
49 /* rangecheck */ \
50 if ((var) > MLX5_IB_FLD_MAX(type, field)) \
51 (var) = MLX5_IB_FLD_MAX(type, field); \
52 /* set value */ \
53 MLX5_SET(type, ptr, field, var); \
54 } while (0)
55
56 #define CONG_LOCK(dev) sx_xlock(&(dev)->congestion.lock)
57 #define CONG_UNLOCK(dev) sx_xunlock(&(dev)->congestion.lock)
58 #define CONG_LOCKED(dev) sx_xlocked(&(dev)->congestion.lock)
59
60 #define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1)
61 #define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR BIT(2)
62 #define MLX5_IB_RP_TIME_RESET_ATTR BIT(3)
63 #define MLX5_IB_RP_BYTE_RESET_ATTR BIT(4)
64 #define MLX5_IB_RP_THRESHOLD_ATTR BIT(5)
65 #define MLX5_IB_RP_AI_RATE_ATTR BIT(7)
66 #define MLX5_IB_RP_HAI_RATE_ATTR BIT(8)
67 #define MLX5_IB_RP_MIN_DEC_FAC_ATTR BIT(9)
68 #define MLX5_IB_RP_MIN_RATE_ATTR BIT(10)
69 #define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR BIT(11)
70 #define MLX5_IB_RP_DCE_TCP_G_ATTR BIT(12)
71 #define MLX5_IB_RP_DCE_TCP_RTT_ATTR BIT(13)
72 #define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR BIT(14)
73 #define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR BIT(15)
74 #define MLX5_IB_RP_GD_ATTR BIT(16)
75
76 #define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3)
77 #define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4)
78
79 enum mlx5_ib_cong_node_type {
80 MLX5_IB_RROCE_ECN_RP = 1,
81 MLX5_IB_RROCE_ECN_NP = 2,
82 };
83
84 static enum mlx5_ib_cong_node_type
mlx5_ib_param_to_node(u32 index)85 mlx5_ib_param_to_node(u32 index)
86 {
87
88 if (index >= MLX5_IB_INDEX(rp_clamp_tgt_rate) &&
89 index <= MLX5_IB_INDEX(rp_gd))
90 return MLX5_IB_RROCE_ECN_RP;
91 else
92 return MLX5_IB_RROCE_ECN_NP;
93 }
94
95 static u64
mlx5_get_cc_param_val(void * field,u32 index)96 mlx5_get_cc_param_val(void *field, u32 index)
97 {
98
99 switch (index) {
100 case MLX5_IB_INDEX(rp_clamp_tgt_rate):
101 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
102 clamp_tgt_rate);
103 case MLX5_IB_INDEX(rp_clamp_tgt_rate_ati):
104 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
105 clamp_tgt_rate_after_time_inc);
106 case MLX5_IB_INDEX(rp_time_reset):
107 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
108 rpg_time_reset);
109 case MLX5_IB_INDEX(rp_byte_reset):
110 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
111 rpg_byte_reset);
112 case MLX5_IB_INDEX(rp_threshold):
113 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
114 rpg_threshold);
115 case MLX5_IB_INDEX(rp_ai_rate):
116 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
117 rpg_ai_rate);
118 case MLX5_IB_INDEX(rp_hai_rate):
119 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
120 rpg_hai_rate);
121 case MLX5_IB_INDEX(rp_min_dec_fac):
122 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
123 rpg_min_dec_fac);
124 case MLX5_IB_INDEX(rp_min_rate):
125 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
126 rpg_min_rate);
127 case MLX5_IB_INDEX(rp_rate_to_set_on_first_cnp):
128 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
129 rate_to_set_on_first_cnp);
130 case MLX5_IB_INDEX(rp_dce_tcp_g):
131 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
132 dce_tcp_g);
133 case MLX5_IB_INDEX(rp_dce_tcp_rtt):
134 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
135 dce_tcp_rtt);
136 case MLX5_IB_INDEX(rp_rate_reduce_monitor_period):
137 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
138 rate_reduce_monitor_period);
139 case MLX5_IB_INDEX(rp_initial_alpha_value):
140 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
141 initial_alpha_value);
142 case MLX5_IB_INDEX(rp_gd):
143 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
144 rpg_gd);
145 case MLX5_IB_INDEX(np_cnp_dscp):
146 return MLX5_GET(cong_control_r_roce_ecn_np, field,
147 cnp_dscp);
148 case MLX5_IB_INDEX(np_cnp_prio_mode):
149 return MLX5_GET(cong_control_r_roce_ecn_np, field,
150 cnp_prio_mode);
151 case MLX5_IB_INDEX(np_cnp_prio):
152 return MLX5_GET(cong_control_r_roce_ecn_np, field,
153 cnp_802p_prio);
154 default:
155 return 0;
156 }
157 }
158
159 static void
mlx5_ib_set_cc_param_mask_val(void * field,u32 index,u64 var,u32 * attr_mask)160 mlx5_ib_set_cc_param_mask_val(void *field, u32 index,
161 u64 var, u32 *attr_mask)
162 {
163
164 switch (index) {
165 case MLX5_IB_INDEX(rp_clamp_tgt_rate):
166 *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR;
167 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
168 clamp_tgt_rate, var);
169 break;
170 case MLX5_IB_INDEX(rp_clamp_tgt_rate_ati):
171 *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR;
172 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
173 clamp_tgt_rate_after_time_inc, var);
174 break;
175 case MLX5_IB_INDEX(rp_time_reset):
176 *attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR;
177 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
178 rpg_time_reset, var);
179 break;
180 case MLX5_IB_INDEX(rp_byte_reset):
181 *attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR;
182 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
183 rpg_byte_reset, var);
184 break;
185 case MLX5_IB_INDEX(rp_threshold):
186 *attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR;
187 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
188 rpg_threshold, var);
189 break;
190 case MLX5_IB_INDEX(rp_ai_rate):
191 *attr_mask |= MLX5_IB_RP_AI_RATE_ATTR;
192 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
193 rpg_ai_rate, var);
194 break;
195 case MLX5_IB_INDEX(rp_hai_rate):
196 *attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR;
197 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
198 rpg_hai_rate, var);
199 break;
200 case MLX5_IB_INDEX(rp_min_dec_fac):
201 *attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR;
202 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
203 rpg_min_dec_fac, var);
204 break;
205 case MLX5_IB_INDEX(rp_min_rate):
206 *attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR;
207 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
208 rpg_min_rate, var);
209 break;
210 case MLX5_IB_INDEX(rp_rate_to_set_on_first_cnp):
211 *attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR;
212 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
213 rate_to_set_on_first_cnp, var);
214 break;
215 case MLX5_IB_INDEX(rp_dce_tcp_g):
216 *attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR;
217 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
218 dce_tcp_g, var);
219 break;
220 case MLX5_IB_INDEX(rp_dce_tcp_rtt):
221 *attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR;
222 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
223 dce_tcp_rtt, var);
224 break;
225 case MLX5_IB_INDEX(rp_rate_reduce_monitor_period):
226 *attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR;
227 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
228 rate_reduce_monitor_period, var);
229 break;
230 case MLX5_IB_INDEX(rp_initial_alpha_value):
231 *attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR;
232 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
233 initial_alpha_value, var);
234 break;
235 case MLX5_IB_INDEX(rp_gd):
236 *attr_mask |= MLX5_IB_RP_GD_ATTR;
237 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
238 rpg_gd, var);
239 break;
240 case MLX5_IB_INDEX(np_cnp_dscp):
241 *attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR;
242 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_np, field, cnp_dscp, var);
243 break;
244 case MLX5_IB_INDEX(np_cnp_prio_mode):
245 *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
246 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var);
247 break;
248 case MLX5_IB_INDEX(np_cnp_prio):
249 *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
250 MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0);
251 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var);
252 break;
253 default:
254 break;
255 }
256 }
257
258 static int
mlx5_ib_get_all_cc_params(struct mlx5_ib_dev * dev)259 mlx5_ib_get_all_cc_params(struct mlx5_ib_dev *dev)
260 {
261 int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out);
262 enum mlx5_ib_cong_node_type node = 0;
263 void *out;
264 void *field;
265 u32 x;
266 int err = 0;
267
268 out = kzalloc(outlen, GFP_KERNEL);
269 if (!out)
270 return -ENOMEM;
271
272 /* get the current values */
273 for (x = 0; x != MLX5_IB_CONG_PARAMS_NUM; x++) {
274 if (node != mlx5_ib_param_to_node(x)) {
275 node = mlx5_ib_param_to_node(x);
276
277 err = mlx5_cmd_query_cong_params(dev->mdev, node, out, outlen);
278 if (err)
279 break;
280 }
281 field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters);
282 dev->congestion.arg[x] = mlx5_get_cc_param_val(field, x);
283 }
284 kfree(out);
285 return err;
286 }
287
288 static int
mlx5_ib_set_cc_params(struct mlx5_ib_dev * dev,u32 index,u64 var)289 mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u32 index, u64 var)
290 {
291 int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in);
292 enum mlx5_ib_cong_node_type node;
293 u32 attr_mask = 0;
294 void *field;
295 void *in;
296 int err;
297
298 in = kzalloc(inlen, GFP_KERNEL);
299 if (!in)
300 return -ENOMEM;
301
302 MLX5_SET(modify_cong_params_in, in, opcode,
303 MLX5_CMD_OP_MODIFY_CONG_PARAMS);
304
305 node = mlx5_ib_param_to_node(index);
306 MLX5_SET(modify_cong_params_in, in, cong_protocol, node);
307
308 field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters);
309 mlx5_ib_set_cc_param_mask_val(field, index, var, &attr_mask);
310
311 field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select);
312 MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp,
313 attr_mask);
314
315 err = mlx5_cmd_modify_cong_params(dev->mdev, in, inlen);
316 kfree(in);
317
318 return err;
319 }
320
321 static int
mlx5_ib_cong_params_handler(SYSCTL_HANDLER_ARGS)322 mlx5_ib_cong_params_handler(SYSCTL_HANDLER_ARGS)
323 {
324 struct mlx5_ib_dev *dev = arg1;
325 u64 value;
326 int error;
327
328 CONG_LOCK(dev);
329 value = dev->congestion.arg[arg2];
330 if (req != NULL) {
331 error = sysctl_handle_64(oidp, &value, 0, req);
332 if (error || req->newptr == NULL ||
333 value == dev->congestion.arg[arg2])
334 goto done;
335
336 /* assign new value */
337 dev->congestion.arg[arg2] = value;
338 } else {
339 error = 0;
340 }
341 if (!MLX5_CAP_GEN(dev->mdev, cc_modify_allowed))
342 error = EPERM;
343 else {
344 error = -mlx5_ib_set_cc_params(dev, MLX5_IB_INDEX(arg[arg2]),
345 dev->congestion.arg[arg2]);
346 }
347 done:
348 CONG_UNLOCK(dev);
349
350 return (error);
351 }
352
353 static int
mlx5_ib_get_all_cc_status(struct mlx5_ib_dev * dev)354 mlx5_ib_get_all_cc_status(struct mlx5_ib_dev *dev)
355 {
356 const int outlen = MLX5_ST_SZ_BYTES(query_cong_status_out);
357 uint32_t out[MLX5_ST_SZ_DW(query_cong_status_out)] = {};
358 int error;
359
360 #define MLX5_IB_CONG_STATUS_READ(a,b,c,d,e,node,prio,field) do { \
361 error = mlx5_cmd_query_cong_status(dev->mdev, node, prio, out, outlen); \
362 if (error) \
363 goto done; \
364 dev->congestion.c = MLX5_GET(query_cong_status_out, out, field); \
365 } while (0);
366
367 MLX5_IB_CONG_STATUS(MLX5_IB_CONG_STATUS_READ);
368 done:
369 return (error);
370 }
371
372 static int
mlx5_ib_cong_status_handler(SYSCTL_HANDLER_ARGS)373 mlx5_ib_cong_status_handler(SYSCTL_HANDLER_ARGS)
374 {
375 const int inlen = MLX5_ST_SZ_BYTES(modify_cong_status_in);
376 uint32_t in[MLX5_ST_SZ_DW(modify_cong_status_in)] = {};
377 struct mlx5_ib_dev *dev = arg1;
378 u64 value;
379 int error;
380
381 CONG_LOCK(dev);
382 value = dev->congestion.arg[arg2];
383 if (req != NULL) {
384 error = sysctl_handle_64(oidp, &value, 0, req);
385 /* convert value into a boolean */
386 value = value ? 1 : 0;
387 if (error || req->newptr == NULL ||
388 value == dev->congestion.arg[arg2])
389 goto done;
390
391 /* assign new binary value */
392 dev->congestion.arg[arg2] = value;
393 } else {
394 error = 0;
395 }
396 if (!MLX5_CAP_GEN(dev->mdev, cc_modify_allowed))
397 error = EPERM;
398 else switch (arg2) {
399 #define MLX5_IB_CONG_STATUS_WRITE(a,b,c,d,e,node,prio,field) \
400 case MLX5_IB_INDEX(c): \
401 MLX5_SET(modify_cong_status_in, in, opcode, \
402 MLX5_CMD_OP_MODIFY_CONG_STATUS); \
403 MLX5_SET(modify_cong_status_in, in, priority, prio); \
404 MLX5_SET(modify_cong_status_in, in, cong_protocol, node); \
405 MLX5_SET(modify_cong_status_in, in, field, value); \
406 error = -mlx5_cmd_modify_cong_status(dev->mdev, in, inlen); \
407 break;
408 MLX5_IB_CONG_STATUS(MLX5_IB_CONG_STATUS_WRITE)
409 default:
410 error = EINVAL;
411 break;
412 }
413 done:
414 CONG_UNLOCK(dev);
415
416 return (error);
417 }
418
419 #define MLX5_GET_UNALIGNED_64(t,p,f) \
420 (((u64)MLX5_GET(t,p,f##_high) << 32) | MLX5_GET(t,p,f##_low))
421
422 static void
mlx5_ib_read_cong_stats(struct work_struct * work)423 mlx5_ib_read_cong_stats(struct work_struct *work)
424 {
425 struct mlx5_ib_dev *dev =
426 container_of(work, struct mlx5_ib_dev, congestion.dwork.work);
427 const int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
428 void *out;
429
430 out = kzalloc(outlen, GFP_KERNEL);
431 if (!out)
432 goto done;
433
434 CONG_LOCK(dev);
435 if (mlx5_cmd_query_cong_counter(dev->mdev, 0, out, outlen))
436 memset(out, 0, outlen);
437
438 dev->congestion.syndrome =
439 MLX5_GET(query_cong_statistics_out, out, syndrome);
440 dev->congestion.rp_cur_flows =
441 MLX5_GET(query_cong_statistics_out, out, rp_cur_flows);
442 dev->congestion.sum_flows =
443 MLX5_GET(query_cong_statistics_out, out, sum_flows);
444 dev->congestion.rp_cnp_ignored =
445 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, rp_cnp_ignored);
446 dev->congestion.rp_cnp_handled =
447 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, rp_cnp_handled);
448 dev->congestion.time_stamp =
449 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, time_stamp);
450 dev->congestion.accumulators_period =
451 MLX5_GET(query_cong_statistics_out, out, accumulators_period);
452 dev->congestion.np_ecn_marked_roce_packets =
453 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, np_ecn_marked_roce_packets);
454 dev->congestion.np_cnp_sent =
455 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, np_cnp_sent);
456
457 CONG_UNLOCK(dev);
458 kfree(out);
459
460 done:
461 schedule_delayed_work(&dev->congestion.dwork, hz);
462 }
463
464 void
mlx5_ib_cleanup_congestion(struct mlx5_ib_dev * dev)465 mlx5_ib_cleanup_congestion(struct mlx5_ib_dev *dev)
466 {
467
468 while (cancel_delayed_work_sync(&dev->congestion.dwork))
469 ;
470 sysctl_ctx_free(&dev->congestion.ctx);
471 sx_destroy(&dev->congestion.lock);
472 }
473
474 int
mlx5_ib_init_congestion(struct mlx5_ib_dev * dev)475 mlx5_ib_init_congestion(struct mlx5_ib_dev *dev)
476 {
477 struct sysctl_ctx_list *ctx;
478 struct sysctl_oid *parent;
479 struct sysctl_oid *node;
480 int err;
481 u32 x;
482
483 ctx = &dev->congestion.ctx;
484 sysctl_ctx_init(ctx);
485 sx_init(&dev->congestion.lock, "mlx5ibcong");
486 INIT_DELAYED_WORK(&dev->congestion.dwork, mlx5_ib_read_cong_stats);
487
488 if (!MLX5_CAP_GEN(dev->mdev, cc_query_allowed))
489 return (0);
490
491 err = mlx5_ib_get_all_cc_params(dev);
492 if (err)
493 return (err);
494
495 err = mlx5_ib_get_all_cc_status(dev);
496 if (err)
497 return (err);
498
499 parent = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(dev->ib_dev.dev.kobj.oidp),
500 OID_AUTO, "cong", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
501 "Congestion control");
502 if (parent == NULL)
503 return (-ENOMEM);
504
505 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(parent),
506 OID_AUTO, "conf", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
507 "Configuration");
508 if (node == NULL) {
509 sysctl_ctx_free(&dev->congestion.ctx);
510 return (-ENOMEM);
511 }
512
513 for (x = 0; x != MLX5_IB_CONG_PARAMS_NUM; x++) {
514 SYSCTL_ADD_PROC(ctx,
515 SYSCTL_CHILDREN(node), OID_AUTO,
516 mlx5_ib_cong_params_desc[2 * x],
517 CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
518 dev, x, &mlx5_ib_cong_params_handler, "QU",
519 mlx5_ib_cong_params_desc[2 * x + 1]);
520 }
521
522 for (x = 0; x != MLX5_IB_CONG_STATUS_NUM; x++) {
523 SYSCTL_ADD_PROC(ctx,
524 SYSCTL_CHILDREN(node), OID_AUTO,
525 mlx5_ib_cong_status_desc[2 * x],
526 CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
527 dev, x + MLX5_IB_CONG_PARAMS_NUM + MLX5_IB_CONG_STATS_NUM,
528 &mlx5_ib_cong_status_handler, "QU",
529 mlx5_ib_cong_status_desc[2 * x + 1]);
530 }
531
532 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(parent),
533 OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
534 "Statistics");
535 if (node == NULL) {
536 sysctl_ctx_free(&dev->congestion.ctx);
537 return (-ENOMEM);
538 }
539
540 for (x = 0; x != MLX5_IB_CONG_STATS_NUM; x++) {
541 /* read-only SYSCTLs */
542 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
543 mlx5_ib_cong_stats_desc[2 * x],
544 CTLFLAG_RD | CTLFLAG_MPSAFE,
545 &dev->congestion.arg[x + MLX5_IB_CONG_PARAMS_NUM],
546 0, mlx5_ib_cong_stats_desc[2 * x + 1]);
547 }
548 schedule_delayed_work(&dev->congestion.dwork, hz);
549 return (0);
550 }
551