1 /*- 2 * Copyright (c) 2013-2020, Mellanox Technologies, Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include "mlx5_ib.h" 29 30 #include <dev/mlx5/cmd.h> 31 32 static const char *mlx5_ib_cong_params_desc[] = { 33 MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_DESC) 34 }; 35 36 static const char *mlx5_ib_cong_stats_desc[] = { 37 MLX5_IB_CONG_STATS(MLX5_IB_STATS_DESC) 38 }; 39 40 #define MLX5_IB_INDEX(field) ( \ 41 (__offsetof(struct mlx5_ib_congestion, field) - \ 42 __offsetof(struct mlx5_ib_congestion, arg[0])) / sizeof(u64)) 43 #define MLX5_IB_FLD_MAX(type, field) ((1ULL << __mlx5_bit_sz(type, field)) - 1ULL) 44 #define MLX5_IB_SET_CLIPPED(type, ptr, field, var) do { \ 45 /* rangecheck */ \ 46 if ((var) > MLX5_IB_FLD_MAX(type, field)) \ 47 (var) = MLX5_IB_FLD_MAX(type, field); \ 48 /* set value */ \ 49 MLX5_SET(type, ptr, field, var); \ 50 } while (0) 51 52 #define CONG_LOCK(dev) sx_xlock(&(dev)->congestion.lock) 53 #define CONG_UNLOCK(dev) sx_xunlock(&(dev)->congestion.lock) 54 #define CONG_LOCKED(dev) sx_xlocked(&(dev)->congestion.lock) 55 56 #define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1) 57 #define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR BIT(2) 58 #define MLX5_IB_RP_TIME_RESET_ATTR BIT(3) 59 #define MLX5_IB_RP_BYTE_RESET_ATTR BIT(4) 60 #define MLX5_IB_RP_THRESHOLD_ATTR BIT(5) 61 #define MLX5_IB_RP_AI_RATE_ATTR BIT(7) 62 #define MLX5_IB_RP_HAI_RATE_ATTR BIT(8) 63 #define MLX5_IB_RP_MIN_DEC_FAC_ATTR BIT(9) 64 #define MLX5_IB_RP_MIN_RATE_ATTR BIT(10) 65 #define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR BIT(11) 66 #define MLX5_IB_RP_DCE_TCP_G_ATTR BIT(12) 67 #define MLX5_IB_RP_DCE_TCP_RTT_ATTR BIT(13) 68 #define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR BIT(14) 69 #define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR BIT(15) 70 #define MLX5_IB_RP_GD_ATTR BIT(16) 71 72 #define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3) 73 #define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4) 74 75 enum mlx5_ib_cong_node_type { 76 MLX5_IB_RROCE_ECN_RP = 1, 77 MLX5_IB_RROCE_ECN_NP = 2, 78 }; 79 80 static enum mlx5_ib_cong_node_type 81 mlx5_ib_param_to_node(u32 index) 82 { 83 84 if (index >= MLX5_IB_INDEX(rp_clamp_tgt_rate) && 85 index <= MLX5_IB_INDEX(rp_gd)) 86 return MLX5_IB_RROCE_ECN_RP; 87 else 88 return MLX5_IB_RROCE_ECN_NP; 89 } 90 91 static u64 92 mlx5_get_cc_param_val(void *field, u32 index) 93 { 94 95 switch (index) { 96 case MLX5_IB_INDEX(rp_clamp_tgt_rate): 97 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 98 clamp_tgt_rate); 99 case MLX5_IB_INDEX(rp_clamp_tgt_rate_ati): 100 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 101 clamp_tgt_rate_after_time_inc); 102 case MLX5_IB_INDEX(rp_time_reset): 103 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 104 rpg_time_reset); 105 case MLX5_IB_INDEX(rp_byte_reset): 106 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 107 rpg_byte_reset); 108 case MLX5_IB_INDEX(rp_threshold): 109 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 110 rpg_threshold); 111 case MLX5_IB_INDEX(rp_ai_rate): 112 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 113 rpg_ai_rate); 114 case MLX5_IB_INDEX(rp_hai_rate): 115 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 116 rpg_hai_rate); 117 case MLX5_IB_INDEX(rp_min_dec_fac): 118 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 119 rpg_min_dec_fac); 120 case MLX5_IB_INDEX(rp_min_rate): 121 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 122 rpg_min_rate); 123 case MLX5_IB_INDEX(rp_rate_to_set_on_first_cnp): 124 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 125 rate_to_set_on_first_cnp); 126 case MLX5_IB_INDEX(rp_dce_tcp_g): 127 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 128 dce_tcp_g); 129 case MLX5_IB_INDEX(rp_dce_tcp_rtt): 130 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 131 dce_tcp_rtt); 132 case MLX5_IB_INDEX(rp_rate_reduce_monitor_period): 133 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 134 rate_reduce_monitor_period); 135 case MLX5_IB_INDEX(rp_initial_alpha_value): 136 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 137 initial_alpha_value); 138 case MLX5_IB_INDEX(rp_gd): 139 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 140 rpg_gd); 141 case MLX5_IB_INDEX(np_cnp_dscp): 142 return MLX5_GET(cong_control_r_roce_ecn_np, field, 143 cnp_dscp); 144 case MLX5_IB_INDEX(np_cnp_prio_mode): 145 return MLX5_GET(cong_control_r_roce_ecn_np, field, 146 cnp_prio_mode); 147 case MLX5_IB_INDEX(np_cnp_prio): 148 return MLX5_GET(cong_control_r_roce_ecn_np, field, 149 cnp_802p_prio); 150 default: 151 return 0; 152 } 153 } 154 155 static void 156 mlx5_ib_set_cc_param_mask_val(void *field, u32 index, 157 u64 var, u32 *attr_mask) 158 { 159 160 switch (index) { 161 case MLX5_IB_INDEX(rp_clamp_tgt_rate): 162 *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR; 163 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 164 clamp_tgt_rate, var); 165 break; 166 case MLX5_IB_INDEX(rp_clamp_tgt_rate_ati): 167 *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR; 168 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 169 clamp_tgt_rate_after_time_inc, var); 170 break; 171 case MLX5_IB_INDEX(rp_time_reset): 172 *attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR; 173 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 174 rpg_time_reset, var); 175 break; 176 case MLX5_IB_INDEX(rp_byte_reset): 177 *attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR; 178 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 179 rpg_byte_reset, var); 180 break; 181 case MLX5_IB_INDEX(rp_threshold): 182 *attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR; 183 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 184 rpg_threshold, var); 185 break; 186 case MLX5_IB_INDEX(rp_ai_rate): 187 *attr_mask |= MLX5_IB_RP_AI_RATE_ATTR; 188 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 189 rpg_ai_rate, var); 190 break; 191 case MLX5_IB_INDEX(rp_hai_rate): 192 *attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR; 193 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 194 rpg_hai_rate, var); 195 break; 196 case MLX5_IB_INDEX(rp_min_dec_fac): 197 *attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR; 198 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 199 rpg_min_dec_fac, var); 200 break; 201 case MLX5_IB_INDEX(rp_min_rate): 202 *attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR; 203 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 204 rpg_min_rate, var); 205 break; 206 case MLX5_IB_INDEX(rp_rate_to_set_on_first_cnp): 207 *attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR; 208 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 209 rate_to_set_on_first_cnp, var); 210 break; 211 case MLX5_IB_INDEX(rp_dce_tcp_g): 212 *attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR; 213 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 214 dce_tcp_g, var); 215 break; 216 case MLX5_IB_INDEX(rp_dce_tcp_rtt): 217 *attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR; 218 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 219 dce_tcp_rtt, var); 220 break; 221 case MLX5_IB_INDEX(rp_rate_reduce_monitor_period): 222 *attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR; 223 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 224 rate_reduce_monitor_period, var); 225 break; 226 case MLX5_IB_INDEX(rp_initial_alpha_value): 227 *attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR; 228 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 229 initial_alpha_value, var); 230 break; 231 case MLX5_IB_INDEX(rp_gd): 232 *attr_mask |= MLX5_IB_RP_GD_ATTR; 233 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 234 rpg_gd, var); 235 break; 236 case MLX5_IB_INDEX(np_cnp_dscp): 237 *attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR; 238 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_np, field, cnp_dscp, var); 239 break; 240 case MLX5_IB_INDEX(np_cnp_prio_mode): 241 *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR; 242 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var); 243 break; 244 case MLX5_IB_INDEX(np_cnp_prio): 245 *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR; 246 MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0); 247 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var); 248 break; 249 default: 250 break; 251 } 252 } 253 254 static int 255 mlx5_ib_get_all_cc_params(struct mlx5_ib_dev *dev) 256 { 257 int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out); 258 enum mlx5_ib_cong_node_type node = 0; 259 void *out; 260 void *field; 261 u32 x; 262 int err = 0; 263 264 out = kzalloc(outlen, GFP_KERNEL); 265 if (!out) 266 return -ENOMEM; 267 268 /* get the current values */ 269 for (x = 0; x != MLX5_IB_CONG_PARAMS_NUM; x++) { 270 if (node != mlx5_ib_param_to_node(x)) { 271 node = mlx5_ib_param_to_node(x); 272 273 err = mlx5_cmd_query_cong_params(dev->mdev, node, out, outlen); 274 if (err) 275 break; 276 } 277 field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters); 278 dev->congestion.arg[x] = mlx5_get_cc_param_val(field, x); 279 } 280 kfree(out); 281 return err; 282 } 283 284 static int 285 mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u32 index, u64 var) 286 { 287 int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in); 288 enum mlx5_ib_cong_node_type node; 289 u32 attr_mask = 0; 290 void *field; 291 void *in; 292 int err; 293 294 in = kzalloc(inlen, GFP_KERNEL); 295 if (!in) 296 return -ENOMEM; 297 298 MLX5_SET(modify_cong_params_in, in, opcode, 299 MLX5_CMD_OP_MODIFY_CONG_PARAMS); 300 301 node = mlx5_ib_param_to_node(index); 302 MLX5_SET(modify_cong_params_in, in, cong_protocol, node); 303 304 field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters); 305 mlx5_ib_set_cc_param_mask_val(field, index, var, &attr_mask); 306 307 field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select); 308 MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp, 309 attr_mask); 310 311 err = mlx5_cmd_modify_cong_params(dev->mdev, in, inlen); 312 kfree(in); 313 314 return err; 315 } 316 317 static int 318 mlx5_ib_cong_params_handler(SYSCTL_HANDLER_ARGS) 319 { 320 struct mlx5_ib_dev *dev = arg1; 321 u64 value; 322 int error; 323 324 CONG_LOCK(dev); 325 value = dev->congestion.arg[arg2]; 326 if (req != NULL) { 327 error = sysctl_handle_64(oidp, &value, 0, req); 328 if (error || req->newptr == NULL || 329 value == dev->congestion.arg[arg2]) 330 goto done; 331 332 /* assign new value */ 333 dev->congestion.arg[arg2] = value; 334 } else { 335 error = 0; 336 } 337 if (!MLX5_CAP_GEN(dev->mdev, cc_modify_allowed)) 338 error = EPERM; 339 else { 340 error = -mlx5_ib_set_cc_params(dev, MLX5_IB_INDEX(arg[arg2]), 341 dev->congestion.arg[arg2]); 342 } 343 done: 344 CONG_UNLOCK(dev); 345 346 return (error); 347 } 348 349 #define MLX5_GET_UNALIGNED_64(t,p,f) \ 350 (((u64)MLX5_GET(t,p,f##_high) << 32) | MLX5_GET(t,p,f##_low)) 351 352 static void 353 mlx5_ib_read_cong_stats(struct work_struct *work) 354 { 355 struct mlx5_ib_dev *dev = 356 container_of(work, struct mlx5_ib_dev, congestion.dwork.work); 357 const int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); 358 void *out; 359 360 out = kzalloc(outlen, GFP_KERNEL); 361 if (!out) 362 goto done; 363 364 CONG_LOCK(dev); 365 if (mlx5_cmd_query_cong_counter(dev->mdev, 0, out, outlen)) 366 memset(out, 0, outlen); 367 368 dev->congestion.syndrome = 369 MLX5_GET(query_cong_statistics_out, out, syndrome); 370 dev->congestion.rp_cur_flows = 371 MLX5_GET(query_cong_statistics_out, out, rp_cur_flows); 372 dev->congestion.sum_flows = 373 MLX5_GET(query_cong_statistics_out, out, sum_flows); 374 dev->congestion.rp_cnp_ignored = 375 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, rp_cnp_ignored); 376 dev->congestion.rp_cnp_handled = 377 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, rp_cnp_handled); 378 dev->congestion.time_stamp = 379 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, time_stamp); 380 dev->congestion.accumulators_period = 381 MLX5_GET(query_cong_statistics_out, out, accumulators_period); 382 dev->congestion.np_ecn_marked_roce_packets = 383 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, np_ecn_marked_roce_packets); 384 dev->congestion.np_cnp_sent = 385 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, np_cnp_sent); 386 387 CONG_UNLOCK(dev); 388 kfree(out); 389 390 done: 391 schedule_delayed_work(&dev->congestion.dwork, hz); 392 } 393 394 void 395 mlx5_ib_cleanup_congestion(struct mlx5_ib_dev *dev) 396 { 397 398 while (cancel_delayed_work_sync(&dev->congestion.dwork)) 399 ; 400 sysctl_ctx_free(&dev->congestion.ctx); 401 sx_destroy(&dev->congestion.lock); 402 } 403 404 int 405 mlx5_ib_init_congestion(struct mlx5_ib_dev *dev) 406 { 407 struct sysctl_ctx_list *ctx; 408 struct sysctl_oid *parent; 409 struct sysctl_oid *node; 410 int err; 411 u32 x; 412 413 ctx = &dev->congestion.ctx; 414 sysctl_ctx_init(ctx); 415 sx_init(&dev->congestion.lock, "mlx5ibcong"); 416 INIT_DELAYED_WORK(&dev->congestion.dwork, mlx5_ib_read_cong_stats); 417 418 if (!MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) 419 return (0); 420 421 err = mlx5_ib_get_all_cc_params(dev); 422 if (err) 423 return (err); 424 425 parent = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(dev->ib_dev.dev.kobj.oidp), 426 OID_AUTO, "cong", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 427 "Congestion control"); 428 if (parent == NULL) 429 return (-ENOMEM); 430 431 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(parent), 432 OID_AUTO, "conf", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 433 "Configuration"); 434 if (node == NULL) { 435 sysctl_ctx_free(&dev->congestion.ctx); 436 return (-ENOMEM); 437 } 438 439 for (x = 0; x != MLX5_IB_CONG_PARAMS_NUM; x++) { 440 SYSCTL_ADD_PROC(ctx, 441 SYSCTL_CHILDREN(node), OID_AUTO, 442 mlx5_ib_cong_params_desc[2 * x], 443 CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 444 dev, x, &mlx5_ib_cong_params_handler, "QU", 445 mlx5_ib_cong_params_desc[2 * x + 1]); 446 } 447 448 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(parent), 449 OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 450 "Statistics"); 451 if (node == NULL) { 452 sysctl_ctx_free(&dev->congestion.ctx); 453 return (-ENOMEM); 454 } 455 456 for (x = 0; x != MLX5_IB_CONG_STATS_NUM; x++) { 457 /* read-only SYSCTLs */ 458 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, 459 mlx5_ib_cong_stats_desc[2 * x], 460 CTLFLAG_RD | CTLFLAG_MPSAFE, 461 &dev->congestion.arg[x + MLX5_IB_CONG_PARAMS_NUM], 462 0, mlx5_ib_cong_stats_desc[2 * x + 1]); 463 } 464 schedule_delayed_work(&dev->congestion.dwork, hz); 465 return (0); 466 } 467