1 /*- 2 * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include "mlx5_ib.h" 29 30 #include <dev/mlx5/cmd.h> 31 32 static const char *mlx5_ib_cong_params_desc[] = { 33 MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_DESC) 34 }; 35 36 static const char *mlx5_ib_cong_stats_desc[] = { 37 MLX5_IB_CONG_STATS(MLX5_IB_STATS_DESC) 38 }; 39 40 #define MLX5_IB_INDEX(field) (__offsetof(struct mlx5_ib_congestion, field) / sizeof(u64)) 41 #define MLX5_IB_FLD_MAX(type, field) ((1ULL << __mlx5_bit_sz(type, field)) - 1ULL) 42 #define MLX5_IB_SET_CLIPPED(type, ptr, field, var) do { \ 43 /* rangecheck */ \ 44 if ((var) > MLX5_IB_FLD_MAX(type, field)) \ 45 (var) = MLX5_IB_FLD_MAX(type, field); \ 46 /* set value */ \ 47 MLX5_SET(type, ptr, field, var); \ 48 } while (0) 49 50 #define CONG_LOCK(dev) sx_xlock(&(dev)->congestion.lock) 51 #define CONG_UNLOCK(dev) sx_xunlock(&(dev)->congestion.lock) 52 #define CONG_LOCKED(dev) sx_xlocked(&(dev)->congestion.lock) 53 54 #define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1) 55 #define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR BIT(2) 56 #define MLX5_IB_RP_TIME_RESET_ATTR BIT(3) 57 #define MLX5_IB_RP_BYTE_RESET_ATTR BIT(4) 58 #define MLX5_IB_RP_THRESHOLD_ATTR BIT(5) 59 #define MLX5_IB_RP_AI_RATE_ATTR BIT(7) 60 #define MLX5_IB_RP_HAI_RATE_ATTR BIT(8) 61 #define MLX5_IB_RP_MIN_DEC_FAC_ATTR BIT(9) 62 #define MLX5_IB_RP_MIN_RATE_ATTR BIT(10) 63 #define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR BIT(11) 64 #define MLX5_IB_RP_DCE_TCP_G_ATTR BIT(12) 65 #define MLX5_IB_RP_DCE_TCP_RTT_ATTR BIT(13) 66 #define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR BIT(14) 67 #define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR BIT(15) 68 #define MLX5_IB_RP_GD_ATTR BIT(16) 69 70 #define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3) 71 #define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4) 72 73 enum mlx5_ib_cong_node_type { 74 MLX5_IB_RROCE_ECN_RP = 1, 75 MLX5_IB_RROCE_ECN_NP = 2, 76 }; 77 78 static enum mlx5_ib_cong_node_type 79 mlx5_ib_param_to_node(u32 index) 80 { 81 82 if (index >= MLX5_IB_INDEX(rp_clamp_tgt_rate) && 83 index <= MLX5_IB_INDEX(rp_gd)) 84 return MLX5_IB_RROCE_ECN_RP; 85 else 86 return MLX5_IB_RROCE_ECN_NP; 87 } 88 89 static u64 90 mlx5_get_cc_param_val(void *field, u32 index) 91 { 92 93 switch (index) { 94 case MLX5_IB_INDEX(rp_clamp_tgt_rate): 95 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 96 clamp_tgt_rate); 97 case MLX5_IB_INDEX(rp_clamp_tgt_rate_ati): 98 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 99 clamp_tgt_rate_after_time_inc); 100 case MLX5_IB_INDEX(rp_time_reset): 101 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 102 rpg_time_reset); 103 case MLX5_IB_INDEX(rp_byte_reset): 104 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 105 rpg_byte_reset); 106 case MLX5_IB_INDEX(rp_threshold): 107 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 108 rpg_threshold); 109 case MLX5_IB_INDEX(rp_ai_rate): 110 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 111 rpg_ai_rate); 112 case MLX5_IB_INDEX(rp_hai_rate): 113 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 114 rpg_hai_rate); 115 case MLX5_IB_INDEX(rp_min_dec_fac): 116 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 117 rpg_min_dec_fac); 118 case MLX5_IB_INDEX(rp_min_rate): 119 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 120 rpg_min_rate); 121 case MLX5_IB_INDEX(rp_rate_to_set_on_first_cnp): 122 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 123 rate_to_set_on_first_cnp); 124 case MLX5_IB_INDEX(rp_dce_tcp_g): 125 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 126 dce_tcp_g); 127 case MLX5_IB_INDEX(rp_dce_tcp_rtt): 128 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 129 dce_tcp_rtt); 130 case MLX5_IB_INDEX(rp_rate_reduce_monitor_period): 131 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 132 rate_reduce_monitor_period); 133 case MLX5_IB_INDEX(rp_initial_alpha_value): 134 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 135 initial_alpha_value); 136 case MLX5_IB_INDEX(rp_gd): 137 return MLX5_GET(cong_control_r_roce_ecn_rp, field, 138 rpg_gd); 139 case MLX5_IB_INDEX(np_cnp_dscp): 140 return MLX5_GET(cong_control_r_roce_ecn_np, field, 141 cnp_dscp); 142 case MLX5_IB_INDEX(np_cnp_prio_mode): 143 return MLX5_GET(cong_control_r_roce_ecn_np, field, 144 cnp_prio_mode); 145 case MLX5_IB_INDEX(np_cnp_prio): 146 return MLX5_GET(cong_control_r_roce_ecn_np, field, 147 cnp_802p_prio); 148 default: 149 return 0; 150 } 151 } 152 153 static void 154 mlx5_ib_set_cc_param_mask_val(void *field, u32 index, 155 u64 var, u32 *attr_mask) 156 { 157 158 switch (index) { 159 case MLX5_IB_INDEX(rp_clamp_tgt_rate): 160 *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR; 161 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 162 clamp_tgt_rate, var); 163 break; 164 case MLX5_IB_INDEX(rp_clamp_tgt_rate_ati): 165 *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR; 166 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 167 clamp_tgt_rate_after_time_inc, var); 168 break; 169 case MLX5_IB_INDEX(rp_time_reset): 170 *attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR; 171 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 172 rpg_time_reset, var); 173 break; 174 case MLX5_IB_INDEX(rp_byte_reset): 175 *attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR; 176 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 177 rpg_byte_reset, var); 178 break; 179 case MLX5_IB_INDEX(rp_threshold): 180 *attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR; 181 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 182 rpg_threshold, var); 183 break; 184 case MLX5_IB_INDEX(rp_ai_rate): 185 *attr_mask |= MLX5_IB_RP_AI_RATE_ATTR; 186 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 187 rpg_ai_rate, var); 188 break; 189 case MLX5_IB_INDEX(rp_hai_rate): 190 *attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR; 191 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 192 rpg_hai_rate, var); 193 break; 194 case MLX5_IB_INDEX(rp_min_dec_fac): 195 *attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR; 196 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 197 rpg_min_dec_fac, var); 198 break; 199 case MLX5_IB_INDEX(rp_min_rate): 200 *attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR; 201 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 202 rpg_min_rate, var); 203 break; 204 case MLX5_IB_INDEX(rp_rate_to_set_on_first_cnp): 205 *attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR; 206 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 207 rate_to_set_on_first_cnp, var); 208 break; 209 case MLX5_IB_INDEX(rp_dce_tcp_g): 210 *attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR; 211 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 212 dce_tcp_g, var); 213 break; 214 case MLX5_IB_INDEX(rp_dce_tcp_rtt): 215 *attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR; 216 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 217 dce_tcp_rtt, var); 218 break; 219 case MLX5_IB_INDEX(rp_rate_reduce_monitor_period): 220 *attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR; 221 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 222 rate_reduce_monitor_period, var); 223 break; 224 case MLX5_IB_INDEX(rp_initial_alpha_value): 225 *attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR; 226 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 227 initial_alpha_value, var); 228 break; 229 case MLX5_IB_INDEX(rp_gd): 230 *attr_mask |= MLX5_IB_RP_GD_ATTR; 231 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field, 232 rpg_gd, var); 233 break; 234 case MLX5_IB_INDEX(np_cnp_dscp): 235 *attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR; 236 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_np, field, cnp_dscp, var); 237 break; 238 case MLX5_IB_INDEX(np_cnp_prio_mode): 239 *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR; 240 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var); 241 break; 242 case MLX5_IB_INDEX(np_cnp_prio): 243 *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR; 244 MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0); 245 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var); 246 break; 247 default: 248 break; 249 } 250 } 251 252 static int 253 mlx5_ib_get_all_cc_params(struct mlx5_ib_dev *dev) 254 { 255 int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out); 256 enum mlx5_ib_cong_node_type node = 0; 257 void *out; 258 void *field; 259 u32 x; 260 int err = 0; 261 262 out = kzalloc(outlen, GFP_KERNEL); 263 if (!out) 264 return -ENOMEM; 265 266 /* get the current values */ 267 for (x = 0; x != MLX5_IB_CONG_PARAMS_NUM; x++) { 268 if (node != mlx5_ib_param_to_node(x)) { 269 node = mlx5_ib_param_to_node(x); 270 271 err = mlx5_cmd_query_cong_params(dev->mdev, node, out, outlen); 272 if (err) 273 break; 274 } 275 field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters); 276 dev->congestion.arg[x] = mlx5_get_cc_param_val(field, x); 277 } 278 kfree(out); 279 return err; 280 } 281 282 static int 283 mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u32 index, u64 var) 284 { 285 int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in); 286 enum mlx5_ib_cong_node_type node; 287 u32 attr_mask = 0; 288 void *field; 289 void *in; 290 int err; 291 292 in = kzalloc(inlen, GFP_KERNEL); 293 if (!in) 294 return -ENOMEM; 295 296 MLX5_SET(modify_cong_params_in, in, opcode, 297 MLX5_CMD_OP_MODIFY_CONG_PARAMS); 298 299 node = mlx5_ib_param_to_node(index); 300 MLX5_SET(modify_cong_params_in, in, cong_protocol, node); 301 302 field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters); 303 mlx5_ib_set_cc_param_mask_val(field, index, var, &attr_mask); 304 305 field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select); 306 MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp, 307 attr_mask); 308 309 err = mlx5_cmd_modify_cong_params(dev->mdev, in, inlen); 310 kfree(in); 311 312 return err; 313 } 314 315 static int 316 mlx5_ib_cong_params_handler(SYSCTL_HANDLER_ARGS) 317 { 318 struct mlx5_ib_dev *dev = arg1; 319 u64 value; 320 int error; 321 322 CONG_LOCK(dev); 323 value = dev->congestion.arg[arg2]; 324 if (req != NULL) { 325 error = sysctl_handle_64(oidp, &value, 0, req); 326 if (error || req->newptr == NULL || 327 value == dev->congestion.arg[arg2]) 328 goto done; 329 330 /* assign new value */ 331 dev->congestion.arg[arg2] = value; 332 } else { 333 error = 0; 334 } 335 if (!MLX5_CAP_GEN(dev->mdev, cc_modify_allowed)) 336 error = EPERM; 337 else { 338 error = -mlx5_ib_set_cc_params(dev, MLX5_IB_INDEX(arg[arg2]), 339 dev->congestion.arg[arg2]); 340 } 341 done: 342 CONG_UNLOCK(dev); 343 344 return (error); 345 } 346 347 #define MLX5_GET_UNALIGNED_64(t,p,f) \ 348 (((u64)MLX5_GET(t,p,f##_high) << 32) | MLX5_GET(t,p,f##_low)) 349 350 static void 351 mlx5_ib_read_cong_stats(struct work_struct *work) 352 { 353 struct mlx5_ib_dev *dev = 354 container_of(work, struct mlx5_ib_dev, congestion.dwork.work); 355 const int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); 356 void *out; 357 358 out = kzalloc(outlen, GFP_KERNEL); 359 if (!out) 360 goto done; 361 362 CONG_LOCK(dev); 363 if (mlx5_cmd_query_cong_counter(dev->mdev, 0, out, outlen)) 364 memset(out, 0, outlen); 365 366 dev->congestion.syndrome = 367 MLX5_GET(query_cong_statistics_out, out, syndrome); 368 dev->congestion.rp_cur_flows = 369 MLX5_GET(query_cong_statistics_out, out, rp_cur_flows); 370 dev->congestion.sum_flows = 371 MLX5_GET(query_cong_statistics_out, out, sum_flows); 372 dev->congestion.rp_cnp_ignored = 373 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, rp_cnp_ignored); 374 dev->congestion.rp_cnp_handled = 375 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, rp_cnp_handled); 376 dev->congestion.time_stamp = 377 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, time_stamp); 378 dev->congestion.accumulators_period = 379 MLX5_GET(query_cong_statistics_out, out, accumulators_period); 380 dev->congestion.np_ecn_marked_roce_packets = 381 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, np_ecn_marked_roce_packets); 382 dev->congestion.np_cnp_sent = 383 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, np_cnp_sent); 384 385 CONG_UNLOCK(dev); 386 kfree(out); 387 388 done: 389 schedule_delayed_work(&dev->congestion.dwork, hz); 390 } 391 392 void 393 mlx5_ib_cleanup_congestion(struct mlx5_ib_dev *dev) 394 { 395 396 while (cancel_delayed_work_sync(&dev->congestion.dwork)) 397 ; 398 sysctl_ctx_free(&dev->congestion.ctx); 399 sx_destroy(&dev->congestion.lock); 400 } 401 402 int 403 mlx5_ib_init_congestion(struct mlx5_ib_dev *dev) 404 { 405 struct sysctl_ctx_list *ctx; 406 struct sysctl_oid *parent; 407 struct sysctl_oid *node; 408 int err; 409 u32 x; 410 411 ctx = &dev->congestion.ctx; 412 sysctl_ctx_init(ctx); 413 sx_init(&dev->congestion.lock, "mlx5ibcong"); 414 INIT_DELAYED_WORK(&dev->congestion.dwork, mlx5_ib_read_cong_stats); 415 416 if (!MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) 417 return (0); 418 419 err = mlx5_ib_get_all_cc_params(dev); 420 if (err) 421 return (err); 422 423 parent = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(dev->ib_dev.dev.kobj.oidp), 424 OID_AUTO, "cong", CTLFLAG_RW, NULL, "Congestion control"); 425 if (parent == NULL) 426 return (-ENOMEM); 427 428 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(parent), 429 OID_AUTO, "conf", CTLFLAG_RW, NULL, "Configuration"); 430 if (node == NULL) { 431 sysctl_ctx_free(&dev->congestion.ctx); 432 return (-ENOMEM); 433 } 434 435 for (x = 0; x != MLX5_IB_CONG_PARAMS_NUM; x++) { 436 SYSCTL_ADD_PROC(ctx, 437 SYSCTL_CHILDREN(node), OID_AUTO, 438 mlx5_ib_cong_params_desc[2 * x], 439 CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 440 dev, x, &mlx5_ib_cong_params_handler, "QU", 441 mlx5_ib_cong_params_desc[2 * x + 1]); 442 } 443 444 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(parent), 445 OID_AUTO, "stats", CTLFLAG_RD, NULL, "Statistics"); 446 if (node == NULL) { 447 sysctl_ctx_free(&dev->congestion.ctx); 448 return (-ENOMEM); 449 } 450 451 for (x = 0; x != MLX5_IB_CONG_STATS_NUM; x++) { 452 /* read-only SYSCTLs */ 453 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, 454 mlx5_ib_cong_stats_desc[2 * x], 455 CTLFLAG_RD | CTLFLAG_MPSAFE, 456 &dev->congestion.arg[x + MLX5_IB_CONG_PARAMS_NUM], 457 0, mlx5_ib_cong_stats_desc[2 * x + 1]); 458 } 459 schedule_delayed_work(&dev->congestion.dwork, hz); 460 return (0); 461 } 462