1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ 3 4 #include "eswitch.h" 5 #include "lib/mlx5.h" 6 #include "esw/qos.h" 7 #include "en/port.h" 8 #define CREATE_TRACE_POINTS 9 #include "diag/qos_tracepoint.h" 10 11 /* Minimum supported BW share value by the HW is 1 Mbit/sec */ 12 #define MLX5_MIN_BW_SHARE 1 13 14 /* Holds rate nodes associated with an E-Switch. */ 15 struct mlx5_qos_domain { 16 /* Serializes access to all qos changes in the qos domain. */ 17 struct mutex lock; 18 /* List of all mlx5_esw_sched_nodes. */ 19 struct list_head nodes; 20 }; 21 22 static void esw_qos_lock(struct mlx5_eswitch *esw) 23 { 24 mutex_lock(&esw->qos.domain->lock); 25 } 26 27 static void esw_qos_unlock(struct mlx5_eswitch *esw) 28 { 29 mutex_unlock(&esw->qos.domain->lock); 30 } 31 32 static void esw_assert_qos_lock_held(struct mlx5_eswitch *esw) 33 { 34 lockdep_assert_held(&esw->qos.domain->lock); 35 } 36 37 static struct mlx5_qos_domain *esw_qos_domain_alloc(void) 38 { 39 struct mlx5_qos_domain *qos_domain; 40 41 qos_domain = kzalloc(sizeof(*qos_domain), GFP_KERNEL); 42 if (!qos_domain) 43 return NULL; 44 45 mutex_init(&qos_domain->lock); 46 INIT_LIST_HEAD(&qos_domain->nodes); 47 48 return qos_domain; 49 } 50 51 static int esw_qos_domain_init(struct mlx5_eswitch *esw) 52 { 53 esw->qos.domain = esw_qos_domain_alloc(); 54 55 return esw->qos.domain ? 0 : -ENOMEM; 56 } 57 58 static void esw_qos_domain_release(struct mlx5_eswitch *esw) 59 { 60 kfree(esw->qos.domain); 61 esw->qos.domain = NULL; 62 } 63 64 enum sched_node_type { 65 SCHED_NODE_TYPE_VPORTS_TSAR, 66 SCHED_NODE_TYPE_VPORT, 67 SCHED_NODE_TYPE_TC_ARBITER_TSAR, 68 SCHED_NODE_TYPE_RATE_LIMITER, 69 SCHED_NODE_TYPE_VPORT_TC, 70 SCHED_NODE_TYPE_VPORTS_TC_TSAR, 71 }; 72 73 static const char * const sched_node_type_str[] = { 74 [SCHED_NODE_TYPE_VPORTS_TSAR] = "vports TSAR", 75 [SCHED_NODE_TYPE_VPORT] = "vport", 76 [SCHED_NODE_TYPE_TC_ARBITER_TSAR] = "TC Arbiter TSAR", 77 [SCHED_NODE_TYPE_RATE_LIMITER] = "Rate Limiter", 78 [SCHED_NODE_TYPE_VPORT_TC] = "vport TC", 79 [SCHED_NODE_TYPE_VPORTS_TC_TSAR] = "vports TC TSAR", 80 }; 81 82 struct mlx5_esw_sched_node { 83 u32 ix; 84 /* Bandwidth parameters. */ 85 u32 max_rate; 86 u32 min_rate; 87 /* A computed value indicating relative min_rate between node's children. */ 88 u32 bw_share; 89 /* The parent node in the rate hierarchy. */ 90 struct mlx5_esw_sched_node *parent; 91 /* Entry in the parent node's children list. */ 92 struct list_head entry; 93 /* The type of this node in the rate hierarchy. */ 94 enum sched_node_type type; 95 /* The eswitch this node belongs to. */ 96 struct mlx5_eswitch *esw; 97 /* The children nodes of this node, empty list for leaf nodes. */ 98 struct list_head children; 99 /* Valid only if this node is associated with a vport. */ 100 struct mlx5_vport *vport; 101 /* Level in the hierarchy. The root node level is 1. */ 102 u8 level; 103 /* Valid only when this node represents a traffic class. */ 104 u8 tc; 105 /* Valid only for a TC arbiter node or vport TC arbiter. */ 106 u32 tc_bw[DEVLINK_RATE_TCS_MAX]; 107 }; 108 109 static void esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node *node) 110 { 111 if (!node->parent) { 112 /* Root children are assigned a depth level of 2. */ 113 node->level = 2; 114 list_add_tail(&node->entry, &node->esw->qos.domain->nodes); 115 } else { 116 node->level = node->parent->level + 1; 117 list_add_tail(&node->entry, &node->parent->children); 118 } 119 } 120 121 static int esw_qos_num_tcs(struct mlx5_core_dev *dev) 122 { 123 int num_tcs = mlx5_max_tc(dev) + 1; 124 125 return num_tcs < DEVLINK_RATE_TCS_MAX ? num_tcs : DEVLINK_RATE_TCS_MAX; 126 } 127 128 static void 129 esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_node *parent) 130 { 131 list_del_init(&node->entry); 132 node->parent = parent; 133 if (parent) 134 node->esw = parent->esw; 135 esw_qos_node_attach_to_parent(node); 136 } 137 138 static void esw_qos_nodes_set_parent(struct list_head *nodes, 139 struct mlx5_esw_sched_node *parent) 140 { 141 struct mlx5_esw_sched_node *node, *tmp; 142 143 list_for_each_entry_safe(node, tmp, nodes, entry) { 144 esw_qos_node_set_parent(node, parent); 145 if (!list_empty(&node->children) && 146 parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { 147 struct mlx5_esw_sched_node *child; 148 149 list_for_each_entry(child, &node->children, entry) { 150 struct mlx5_vport *vport = child->vport; 151 152 if (vport) 153 vport->qos.sched_node->parent = parent; 154 } 155 } 156 } 157 } 158 159 void mlx5_esw_qos_vport_qos_free(struct mlx5_vport *vport) 160 { 161 if (vport->qos.sched_nodes) { 162 int num_tcs = esw_qos_num_tcs(vport->qos.sched_node->esw->dev); 163 int i; 164 165 for (i = 0; i < num_tcs; i++) 166 kfree(vport->qos.sched_nodes[i]); 167 kfree(vport->qos.sched_nodes); 168 } 169 170 kfree(vport->qos.sched_node); 171 memset(&vport->qos, 0, sizeof(vport->qos)); 172 } 173 174 u32 mlx5_esw_qos_vport_get_sched_elem_ix(const struct mlx5_vport *vport) 175 { 176 if (!vport->qos.sched_node) 177 return 0; 178 179 return vport->qos.sched_node->ix; 180 } 181 182 struct mlx5_esw_sched_node * 183 mlx5_esw_qos_vport_get_parent(const struct mlx5_vport *vport) 184 { 185 if (!vport->qos.sched_node) 186 return NULL; 187 188 return vport->qos.sched_node->parent; 189 } 190 191 static void esw_qos_sched_elem_warn(struct mlx5_esw_sched_node *node, int err, const char *op) 192 { 193 switch (node->type) { 194 case SCHED_NODE_TYPE_VPORTS_TC_TSAR: 195 esw_warn(node->esw->dev, 196 "E-Switch %s %s scheduling element failed (tc=%d,err=%d)\n", 197 op, sched_node_type_str[node->type], node->tc, err); 198 break; 199 case SCHED_NODE_TYPE_VPORT_TC: 200 esw_warn(node->esw->dev, 201 "E-Switch %s %s scheduling element failed (vport=%d,tc=%d,err=%d)\n", 202 op, 203 sched_node_type_str[node->type], 204 node->vport->vport, node->tc, err); 205 break; 206 case SCHED_NODE_TYPE_VPORT: 207 esw_warn(node->esw->dev, 208 "E-Switch %s %s scheduling element failed (vport=%d,err=%d)\n", 209 op, sched_node_type_str[node->type], node->vport->vport, err); 210 break; 211 case SCHED_NODE_TYPE_RATE_LIMITER: 212 case SCHED_NODE_TYPE_TC_ARBITER_TSAR: 213 case SCHED_NODE_TYPE_VPORTS_TSAR: 214 esw_warn(node->esw->dev, 215 "E-Switch %s %s scheduling element failed (err=%d)\n", 216 op, sched_node_type_str[node->type], err); 217 break; 218 default: 219 esw_warn(node->esw->dev, 220 "E-Switch %s scheduling element failed (err=%d)\n", 221 op, err); 222 break; 223 } 224 } 225 226 static int esw_qos_node_create_sched_element(struct mlx5_esw_sched_node *node, void *ctx, 227 struct netlink_ext_ack *extack) 228 { 229 int err; 230 231 err = mlx5_create_scheduling_element_cmd(node->esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, ctx, 232 &node->ix); 233 if (err) { 234 esw_qos_sched_elem_warn(node, err, "create"); 235 NL_SET_ERR_MSG_MOD(extack, "E-Switch create scheduling element failed"); 236 } 237 238 return err; 239 } 240 241 static int esw_qos_node_destroy_sched_element(struct mlx5_esw_sched_node *node, 242 struct netlink_ext_ack *extack) 243 { 244 int err; 245 246 err = mlx5_destroy_scheduling_element_cmd(node->esw->dev, 247 SCHEDULING_HIERARCHY_E_SWITCH, 248 node->ix); 249 if (err) { 250 esw_qos_sched_elem_warn(node, err, "destroy"); 251 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroying scheduling element failed."); 252 } 253 254 return err; 255 } 256 257 static int esw_qos_sched_elem_config(struct mlx5_esw_sched_node *node, u32 max_rate, u32 bw_share, 258 struct netlink_ext_ack *extack) 259 { 260 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 261 struct mlx5_core_dev *dev = node->esw->dev; 262 u32 bitmask = 0; 263 int err; 264 265 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) 266 return -EOPNOTSUPP; 267 268 if (bw_share && (!MLX5_CAP_QOS(dev, esw_bw_share) || 269 MLX5_CAP_QOS(dev, max_tsar_bw_share) < MLX5_MIN_BW_SHARE)) 270 return -EOPNOTSUPP; 271 272 if (node->max_rate == max_rate && node->bw_share == bw_share) 273 return 0; 274 275 if (node->max_rate != max_rate) { 276 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); 277 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; 278 } 279 if (node->bw_share != bw_share) { 280 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); 281 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; 282 } 283 284 err = mlx5_modify_scheduling_element_cmd(dev, 285 SCHEDULING_HIERARCHY_E_SWITCH, 286 sched_ctx, 287 node->ix, 288 bitmask); 289 if (err) { 290 esw_qos_sched_elem_warn(node, err, "modify"); 291 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify scheduling element failed"); 292 293 return err; 294 } 295 296 node->max_rate = max_rate; 297 node->bw_share = bw_share; 298 if (node->type == SCHED_NODE_TYPE_VPORTS_TSAR) 299 trace_mlx5_esw_node_qos_config(dev, node, node->ix, bw_share, max_rate); 300 else if (node->type == SCHED_NODE_TYPE_VPORT) 301 trace_mlx5_esw_vport_qos_config(dev, node->vport, bw_share, max_rate); 302 303 return 0; 304 } 305 306 static int esw_qos_create_rate_limit_element(struct mlx5_esw_sched_node *node, 307 struct netlink_ext_ack *extack) 308 { 309 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 310 311 if (!mlx5_qos_element_type_supported( 312 node->esw->dev, 313 SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT, 314 SCHEDULING_HIERARCHY_E_SWITCH)) 315 return -EOPNOTSUPP; 316 317 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, node->max_rate); 318 MLX5_SET(scheduling_context, sched_ctx, element_type, 319 SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT); 320 321 return esw_qos_node_create_sched_element(node, sched_ctx, extack); 322 } 323 324 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, 325 struct mlx5_esw_sched_node *parent) 326 { 327 struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes; 328 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 329 struct mlx5_esw_sched_node *node; 330 u32 max_guarantee = 0; 331 332 /* Find max min_rate across all nodes. 333 * This will correspond to fw_max_bw_share in the final bw_share calculation. 334 */ 335 list_for_each_entry(node, nodes, entry) { 336 if (node->esw == esw && node->ix != esw->qos.root_tsar_ix && 337 node->min_rate > max_guarantee) 338 max_guarantee = node->min_rate; 339 } 340 341 if (max_guarantee) 342 return max_t(u32, max_guarantee / fw_max_bw_share, 1); 343 344 /* If the node nodes has min_rate configured, a divider of 0 sets all 345 * nodes' bw_share to 0, effectively disabling min guarantees. 346 */ 347 return 0; 348 } 349 350 static u32 esw_qos_calc_bw_share(u32 value, u32 divider, u32 fw_max) 351 { 352 if (!divider) 353 return 0; 354 return min_t(u32, fw_max, 355 max_t(u32, 356 DIV_ROUND_UP(value, divider), MLX5_MIN_BW_SHARE)); 357 } 358 359 static void esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node *node, 360 u32 divider, 361 struct netlink_ext_ack *extack) 362 { 363 u32 fw_max_bw_share = MLX5_CAP_QOS(node->esw->dev, max_tsar_bw_share); 364 u32 bw_share; 365 366 bw_share = esw_qos_calc_bw_share(node->min_rate, divider, fw_max_bw_share); 367 368 esw_qos_sched_elem_config(node, node->max_rate, bw_share, extack); 369 } 370 371 static void esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, 372 struct mlx5_esw_sched_node *parent, 373 struct netlink_ext_ack *extack) 374 { 375 struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes; 376 u32 divider = esw_qos_calculate_min_rate_divider(esw, parent); 377 struct mlx5_esw_sched_node *node; 378 379 list_for_each_entry(node, nodes, entry) { 380 if (node->esw != esw || node->ix == esw->qos.root_tsar_ix) 381 continue; 382 383 /* Vports TC TSARs don't have a minimum rate configured, 384 * so there's no need to update the bw_share on them. 385 */ 386 if (node->type != SCHED_NODE_TYPE_VPORTS_TC_TSAR) { 387 esw_qos_update_sched_node_bw_share(node, divider, 388 extack); 389 } 390 391 if (list_empty(&node->children)) 392 continue; 393 394 esw_qos_normalize_min_rate(node->esw, node, extack); 395 } 396 } 397 398 static u32 esw_qos_calculate_tc_bw_divider(u32 *tc_bw) 399 { 400 u32 total = 0; 401 int i; 402 403 for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) 404 total += tc_bw[i]; 405 406 /* If total is zero, tc-bw config is disabled and we shouldn't reach 407 * here. 408 */ 409 return WARN_ON(!total) ? 1 : total; 410 } 411 412 static int esw_qos_set_node_min_rate(struct mlx5_esw_sched_node *node, 413 u32 min_rate, struct netlink_ext_ack *extack) 414 { 415 struct mlx5_eswitch *esw = node->esw; 416 417 if (min_rate == node->min_rate) 418 return 0; 419 420 node->min_rate = min_rate; 421 esw_qos_normalize_min_rate(esw, node->parent, extack); 422 423 return 0; 424 } 425 426 static int 427 esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_element_id, 428 u32 max_rate, u32 bw_share, u32 *tsar_ix) 429 { 430 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 431 void *attr; 432 433 if (!mlx5_qos_element_type_supported(dev, 434 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR, 435 SCHEDULING_HIERARCHY_E_SWITCH) || 436 !mlx5_qos_tsar_type_supported(dev, 437 TSAR_ELEMENT_TSAR_TYPE_DWRR, 438 SCHEDULING_HIERARCHY_E_SWITCH)) 439 return -EOPNOTSUPP; 440 441 MLX5_SET(scheduling_context, tsar_ctx, element_type, 442 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); 443 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, 444 parent_element_id); 445 MLX5_SET(scheduling_context, tsar_ctx, max_average_bw, max_rate); 446 MLX5_SET(scheduling_context, tsar_ctx, bw_share, bw_share); 447 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); 448 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); 449 450 return mlx5_create_scheduling_element_cmd(dev, 451 SCHEDULING_HIERARCHY_E_SWITCH, 452 tsar_ctx, 453 tsar_ix); 454 } 455 456 static int 457 esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node, 458 struct netlink_ext_ack *extack) 459 { 460 struct mlx5_esw_sched_node *parent = vport_node->parent; 461 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 462 struct mlx5_core_dev *dev = vport_node->esw->dev; 463 void *attr; 464 465 if (!mlx5_qos_element_type_supported( 466 dev, 467 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT, 468 SCHEDULING_HIERARCHY_E_SWITCH)) 469 return -EOPNOTSUPP; 470 471 MLX5_SET(scheduling_context, sched_ctx, element_type, 472 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); 473 attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); 474 MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport); 475 MLX5_SET(scheduling_context, sched_ctx, parent_element_id, 476 parent ? parent->ix : vport_node->esw->qos.root_tsar_ix); 477 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, 478 vport_node->max_rate); 479 480 return esw_qos_node_create_sched_element(vport_node, sched_ctx, extack); 481 } 482 483 static int 484 esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node *vport_tc_node, 485 u32 rate_limit_elem_ix, 486 struct netlink_ext_ack *extack) 487 { 488 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 489 struct mlx5_core_dev *dev = vport_tc_node->esw->dev; 490 void *attr; 491 492 if (!mlx5_qos_element_type_supported( 493 dev, 494 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC, 495 SCHEDULING_HIERARCHY_E_SWITCH)) 496 return -EOPNOTSUPP; 497 498 MLX5_SET(scheduling_context, sched_ctx, element_type, 499 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC); 500 attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); 501 MLX5_SET(vport_tc_element, attr, vport_number, 502 vport_tc_node->vport->vport); 503 MLX5_SET(vport_tc_element, attr, traffic_class, vport_tc_node->tc); 504 MLX5_SET(scheduling_context, sched_ctx, max_bw_obj_id, 505 rate_limit_elem_ix); 506 MLX5_SET(scheduling_context, sched_ctx, parent_element_id, 507 vport_tc_node->parent->ix); 508 MLX5_SET(scheduling_context, sched_ctx, bw_share, 509 vport_tc_node->bw_share); 510 511 return esw_qos_node_create_sched_element(vport_tc_node, sched_ctx, 512 extack); 513 } 514 515 static struct mlx5_esw_sched_node * 516 __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type type, 517 struct mlx5_esw_sched_node *parent) 518 { 519 struct mlx5_esw_sched_node *node; 520 521 node = kzalloc(sizeof(*node), GFP_KERNEL); 522 if (!node) 523 return NULL; 524 525 node->esw = esw; 526 node->ix = tsar_ix; 527 node->type = type; 528 node->parent = parent; 529 INIT_LIST_HEAD(&node->children); 530 esw_qos_node_attach_to_parent(node); 531 if (!parent) { 532 /* The caller is responsible for inserting the node into the 533 * parent list if necessary. This function can also be used with 534 * a NULL parent, which doesn't necessarily indicate that it 535 * refers to the root scheduling element. 536 */ 537 list_del_init(&node->entry); 538 } 539 540 return node; 541 } 542 543 static void __esw_qos_free_node(struct mlx5_esw_sched_node *node) 544 { 545 list_del(&node->entry); 546 kfree(node); 547 } 548 549 static void esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack) 550 { 551 esw_qos_node_destroy_sched_element(node, extack); 552 __esw_qos_free_node(node); 553 } 554 555 static int esw_qos_create_vports_tc_node(struct mlx5_esw_sched_node *parent, 556 u8 tc, struct netlink_ext_ack *extack) 557 { 558 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 559 struct mlx5_core_dev *dev = parent->esw->dev; 560 struct mlx5_esw_sched_node *vports_tc_node; 561 void *attr; 562 int err; 563 564 if (!mlx5_qos_element_type_supported( 565 dev, 566 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR, 567 SCHEDULING_HIERARCHY_E_SWITCH) || 568 !mlx5_qos_tsar_type_supported(dev, 569 TSAR_ELEMENT_TSAR_TYPE_DWRR, 570 SCHEDULING_HIERARCHY_E_SWITCH)) 571 return -EOPNOTSUPP; 572 573 vports_tc_node = __esw_qos_alloc_node(parent->esw, 0, 574 SCHED_NODE_TYPE_VPORTS_TC_TSAR, 575 parent); 576 if (!vports_tc_node) { 577 NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed"); 578 esw_warn(dev, "Failed to alloc vports TC node (tc=%d)\n", tc); 579 return -ENOMEM; 580 } 581 582 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); 583 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); 584 MLX5_SET(tsar_element, attr, traffic_class, tc); 585 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, parent->ix); 586 MLX5_SET(scheduling_context, tsar_ctx, element_type, 587 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); 588 589 err = esw_qos_node_create_sched_element(vports_tc_node, tsar_ctx, 590 extack); 591 if (err) 592 goto err_create_sched_element; 593 594 vports_tc_node->tc = tc; 595 596 return 0; 597 598 err_create_sched_element: 599 __esw_qos_free_node(vports_tc_node); 600 return err; 601 } 602 603 static void 604 esw_qos_tc_arbiter_get_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node, 605 u32 *tc_bw) 606 { 607 memcpy(tc_bw, tc_arbiter_node->tc_bw, sizeof(tc_arbiter_node->tc_bw)); 608 } 609 610 static void 611 esw_qos_set_tc_arbiter_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node, 612 u32 *tc_bw, struct netlink_ext_ack *extack) 613 { 614 struct mlx5_eswitch *esw = tc_arbiter_node->esw; 615 struct mlx5_esw_sched_node *vports_tc_node; 616 u32 divider, fw_max_bw_share; 617 618 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 619 divider = esw_qos_calculate_tc_bw_divider(tc_bw); 620 list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) { 621 u8 tc = vports_tc_node->tc; 622 u32 bw_share; 623 624 tc_arbiter_node->tc_bw[tc] = tc_bw[tc]; 625 bw_share = tc_bw[tc] * fw_max_bw_share; 626 bw_share = esw_qos_calc_bw_share(bw_share, divider, 627 fw_max_bw_share); 628 esw_qos_sched_elem_config(vports_tc_node, 0, bw_share, extack); 629 } 630 } 631 632 static void 633 esw_qos_destroy_vports_tc_nodes(struct mlx5_esw_sched_node *tc_arbiter_node, 634 struct netlink_ext_ack *extack) 635 { 636 struct mlx5_esw_sched_node *vports_tc_node, *tmp; 637 638 list_for_each_entry_safe(vports_tc_node, tmp, 639 &tc_arbiter_node->children, entry) 640 esw_qos_destroy_node(vports_tc_node, extack); 641 } 642 643 static int 644 esw_qos_create_vports_tc_nodes(struct mlx5_esw_sched_node *tc_arbiter_node, 645 struct netlink_ext_ack *extack) 646 { 647 struct mlx5_eswitch *esw = tc_arbiter_node->esw; 648 int err, i, num_tcs = esw_qos_num_tcs(esw->dev); 649 650 for (i = 0; i < num_tcs; i++) { 651 err = esw_qos_create_vports_tc_node(tc_arbiter_node, i, extack); 652 if (err) 653 goto err_tc_node_create; 654 } 655 656 return 0; 657 658 err_tc_node_create: 659 esw_qos_destroy_vports_tc_nodes(tc_arbiter_node, NULL); 660 return err; 661 } 662 663 static int esw_qos_create_tc_arbiter_sched_elem( 664 struct mlx5_esw_sched_node *tc_arbiter_node, 665 struct netlink_ext_ack *extack) 666 { 667 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 668 u32 tsar_parent_ix; 669 void *attr; 670 671 if (!mlx5_qos_tsar_type_supported(tc_arbiter_node->esw->dev, 672 TSAR_ELEMENT_TSAR_TYPE_TC_ARB, 673 SCHEDULING_HIERARCHY_E_SWITCH)) { 674 NL_SET_ERR_MSG_MOD(extack, 675 "E-Switch TC Arbiter scheduling element is not supported"); 676 return -EOPNOTSUPP; 677 } 678 679 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); 680 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_TC_ARB); 681 tsar_parent_ix = tc_arbiter_node->parent ? tc_arbiter_node->parent->ix : 682 tc_arbiter_node->esw->qos.root_tsar_ix; 683 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, 684 tsar_parent_ix); 685 MLX5_SET(scheduling_context, tsar_ctx, element_type, 686 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); 687 MLX5_SET(scheduling_context, tsar_ctx, max_average_bw, 688 tc_arbiter_node->max_rate); 689 MLX5_SET(scheduling_context, tsar_ctx, bw_share, 690 tc_arbiter_node->bw_share); 691 692 return esw_qos_node_create_sched_element(tc_arbiter_node, tsar_ctx, 693 extack); 694 } 695 696 static struct mlx5_esw_sched_node * 697 __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sched_node *parent, 698 struct netlink_ext_ack *extack) 699 { 700 struct mlx5_esw_sched_node *node; 701 u32 tsar_ix; 702 int err; 703 704 err = esw_qos_create_node_sched_elem(esw->dev, esw->qos.root_tsar_ix, 0, 705 0, &tsar_ix); 706 if (err) { 707 NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for node failed"); 708 return ERR_PTR(err); 709 } 710 711 node = __esw_qos_alloc_node(esw, tsar_ix, SCHED_NODE_TYPE_VPORTS_TSAR, parent); 712 if (!node) { 713 NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed"); 714 err = -ENOMEM; 715 goto err_alloc_node; 716 } 717 718 list_add_tail(&node->entry, &esw->qos.domain->nodes); 719 esw_qos_normalize_min_rate(esw, NULL, extack); 720 trace_mlx5_esw_node_qos_create(esw->dev, node, node->ix); 721 722 return node; 723 724 err_alloc_node: 725 if (mlx5_destroy_scheduling_element_cmd(esw->dev, 726 SCHEDULING_HIERARCHY_E_SWITCH, 727 tsar_ix)) 728 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for node failed"); 729 return ERR_PTR(err); 730 } 731 732 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack); 733 static void esw_qos_put(struct mlx5_eswitch *esw); 734 735 static struct mlx5_esw_sched_node * 736 esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) 737 { 738 struct mlx5_esw_sched_node *node; 739 int err; 740 741 esw_assert_qos_lock_held(esw); 742 if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth)) 743 return ERR_PTR(-EOPNOTSUPP); 744 745 err = esw_qos_get(esw, extack); 746 if (err) 747 return ERR_PTR(err); 748 749 node = __esw_qos_create_vports_sched_node(esw, NULL, extack); 750 if (IS_ERR(node)) 751 esw_qos_put(esw); 752 753 return node; 754 } 755 756 static void __esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack) 757 { 758 struct mlx5_eswitch *esw = node->esw; 759 760 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) 761 esw_qos_destroy_vports_tc_nodes(node, extack); 762 763 trace_mlx5_esw_node_qos_destroy(esw->dev, node, node->ix); 764 esw_qos_destroy_node(node, extack); 765 esw_qos_normalize_min_rate(esw, NULL, extack); 766 } 767 768 static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) 769 { 770 struct mlx5_core_dev *dev = esw->dev; 771 int err; 772 773 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) 774 return -EOPNOTSUPP; 775 776 err = esw_qos_create_node_sched_elem(esw->dev, 0, 0, 0, 777 &esw->qos.root_tsar_ix); 778 if (err) { 779 esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err); 780 return err; 781 } 782 783 refcount_set(&esw->qos.refcnt, 1); 784 785 return 0; 786 } 787 788 static void esw_qos_destroy(struct mlx5_eswitch *esw) 789 { 790 int err; 791 792 err = mlx5_destroy_scheduling_element_cmd(esw->dev, 793 SCHEDULING_HIERARCHY_E_SWITCH, 794 esw->qos.root_tsar_ix); 795 if (err) 796 esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err); 797 } 798 799 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) 800 { 801 int err = 0; 802 803 esw_assert_qos_lock_held(esw); 804 if (!refcount_inc_not_zero(&esw->qos.refcnt)) { 805 /* esw_qos_create() set refcount to 1 only on success. 806 * No need to decrement on failure. 807 */ 808 err = esw_qos_create(esw, extack); 809 } 810 811 return err; 812 } 813 814 static void esw_qos_put(struct mlx5_eswitch *esw) 815 { 816 esw_assert_qos_lock_held(esw); 817 if (refcount_dec_and_test(&esw->qos.refcnt)) 818 esw_qos_destroy(esw); 819 } 820 821 static void 822 esw_qos_tc_arbiter_scheduling_teardown(struct mlx5_esw_sched_node *node, 823 struct netlink_ext_ack *extack) 824 { 825 /* Clean up all Vports TC nodes within the TC arbiter node. */ 826 esw_qos_destroy_vports_tc_nodes(node, extack); 827 /* Destroy the scheduling element for the TC arbiter node itself. */ 828 esw_qos_node_destroy_sched_element(node, extack); 829 } 830 831 static int esw_qos_tc_arbiter_scheduling_setup(struct mlx5_esw_sched_node *node, 832 struct netlink_ext_ack *extack) 833 { 834 u32 curr_ix = node->ix; 835 int err; 836 837 err = esw_qos_create_tc_arbiter_sched_elem(node, extack); 838 if (err) 839 return err; 840 /* Initialize the vports TC nodes within created TC arbiter TSAR. */ 841 err = esw_qos_create_vports_tc_nodes(node, extack); 842 if (err) 843 goto err_vports_tc_nodes; 844 845 node->type = SCHED_NODE_TYPE_TC_ARBITER_TSAR; 846 847 return 0; 848 849 err_vports_tc_nodes: 850 /* If initialization fails, clean up the scheduling element 851 * for the TC arbiter node. 852 */ 853 esw_qos_node_destroy_sched_element(node, NULL); 854 node->ix = curr_ix; 855 return err; 856 } 857 858 static int 859 esw_qos_create_vport_tc_sched_node(struct mlx5_vport *vport, 860 u32 rate_limit_elem_ix, 861 struct mlx5_esw_sched_node *vports_tc_node, 862 struct netlink_ext_ack *extack) 863 { 864 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; 865 struct mlx5_esw_sched_node *vport_tc_node; 866 u8 tc = vports_tc_node->tc; 867 int err; 868 869 vport_tc_node = __esw_qos_alloc_node(vport_node->esw, 0, 870 SCHED_NODE_TYPE_VPORT_TC, 871 vports_tc_node); 872 if (!vport_tc_node) 873 return -ENOMEM; 874 875 vport_tc_node->min_rate = vport_node->min_rate; 876 vport_tc_node->tc = tc; 877 vport_tc_node->vport = vport; 878 err = esw_qos_vport_tc_create_sched_element(vport_tc_node, 879 rate_limit_elem_ix, 880 extack); 881 if (err) 882 goto err_out; 883 884 vport->qos.sched_nodes[tc] = vport_tc_node; 885 886 return 0; 887 err_out: 888 __esw_qos_free_node(vport_tc_node); 889 return err; 890 } 891 892 static void 893 esw_qos_destroy_vport_tc_sched_elements(struct mlx5_vport *vport, 894 struct netlink_ext_ack *extack) 895 { 896 int i, num_tcs = esw_qos_num_tcs(vport->qos.sched_node->esw->dev); 897 898 for (i = 0; i < num_tcs; i++) { 899 if (vport->qos.sched_nodes[i]) { 900 __esw_qos_destroy_node(vport->qos.sched_nodes[i], 901 extack); 902 } 903 } 904 905 kfree(vport->qos.sched_nodes); 906 vport->qos.sched_nodes = NULL; 907 } 908 909 static int 910 esw_qos_create_vport_tc_sched_elements(struct mlx5_vport *vport, 911 enum sched_node_type type, 912 struct netlink_ext_ack *extack) 913 { 914 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; 915 struct mlx5_esw_sched_node *tc_arbiter_node, *vports_tc_node; 916 int err, num_tcs = esw_qos_num_tcs(vport_node->esw->dev); 917 u32 rate_limit_elem_ix; 918 919 vport->qos.sched_nodes = kcalloc(num_tcs, 920 sizeof(struct mlx5_esw_sched_node *), 921 GFP_KERNEL); 922 if (!vport->qos.sched_nodes) { 923 NL_SET_ERR_MSG_MOD(extack, 924 "Allocating the vport TC scheduling elements failed."); 925 return -ENOMEM; 926 } 927 928 rate_limit_elem_ix = type == SCHED_NODE_TYPE_RATE_LIMITER ? 929 vport_node->ix : 0; 930 tc_arbiter_node = type == SCHED_NODE_TYPE_RATE_LIMITER ? 931 vport_node->parent : vport_node; 932 list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) { 933 err = esw_qos_create_vport_tc_sched_node(vport, 934 rate_limit_elem_ix, 935 vports_tc_node, 936 extack); 937 if (err) 938 goto err_create_vport_tc; 939 } 940 941 return 0; 942 943 err_create_vport_tc: 944 esw_qos_destroy_vport_tc_sched_elements(vport, NULL); 945 946 return err; 947 } 948 949 static int 950 esw_qos_vport_tc_enable(struct mlx5_vport *vport, enum sched_node_type type, 951 struct netlink_ext_ack *extack) 952 { 953 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; 954 struct mlx5_esw_sched_node *parent = vport_node->parent; 955 int err; 956 957 if (type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { 958 int new_level, max_level; 959 960 /* Increase the parent's level by 2 to account for both the 961 * TC arbiter and the vports TC scheduling element. 962 */ 963 new_level = (parent ? parent->level : 2) + 2; 964 max_level = 1 << MLX5_CAP_QOS(vport_node->esw->dev, 965 log_esw_max_sched_depth); 966 if (new_level > max_level) { 967 NL_SET_ERR_MSG_FMT_MOD(extack, 968 "TC arbitration on leafs is not supported beyond max depth %d", 969 max_level); 970 return -EOPNOTSUPP; 971 } 972 } 973 974 esw_assert_qos_lock_held(vport->dev->priv.eswitch); 975 976 if (type == SCHED_NODE_TYPE_RATE_LIMITER) 977 err = esw_qos_create_rate_limit_element(vport_node, extack); 978 else 979 err = esw_qos_tc_arbiter_scheduling_setup(vport_node, extack); 980 if (err) 981 return err; 982 983 /* Rate limiters impact multiple nodes not directly connected to them 984 * and are not direct members of the QoS hierarchy. 985 * Unlink it from the parent to reflect that. 986 */ 987 if (type == SCHED_NODE_TYPE_RATE_LIMITER) { 988 list_del_init(&vport_node->entry); 989 vport_node->level = 0; 990 } 991 992 err = esw_qos_create_vport_tc_sched_elements(vport, type, extack); 993 if (err) 994 goto err_sched_nodes; 995 996 return 0; 997 998 err_sched_nodes: 999 if (type == SCHED_NODE_TYPE_RATE_LIMITER) { 1000 esw_qos_node_destroy_sched_element(vport_node, NULL); 1001 esw_qos_node_attach_to_parent(vport_node); 1002 } else { 1003 esw_qos_tc_arbiter_scheduling_teardown(vport_node, NULL); 1004 } 1005 return err; 1006 } 1007 1008 static void esw_qos_vport_tc_disable(struct mlx5_vport *vport, 1009 struct netlink_ext_ack *extack) 1010 { 1011 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; 1012 enum sched_node_type curr_type = vport_node->type; 1013 1014 esw_qos_destroy_vport_tc_sched_elements(vport, extack); 1015 1016 if (curr_type == SCHED_NODE_TYPE_RATE_LIMITER) 1017 esw_qos_node_destroy_sched_element(vport_node, extack); 1018 else 1019 esw_qos_tc_arbiter_scheduling_teardown(vport_node, extack); 1020 } 1021 1022 static int esw_qos_set_vport_tcs_min_rate(struct mlx5_vport *vport, 1023 u32 min_rate, 1024 struct netlink_ext_ack *extack) 1025 { 1026 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; 1027 int err, i, num_tcs = esw_qos_num_tcs(vport_node->esw->dev); 1028 1029 for (i = 0; i < num_tcs; i++) { 1030 err = esw_qos_set_node_min_rate(vport->qos.sched_nodes[i], 1031 min_rate, extack); 1032 if (err) 1033 goto err_out; 1034 } 1035 vport_node->min_rate = min_rate; 1036 1037 return 0; 1038 err_out: 1039 for (--i; i >= 0; i--) { 1040 esw_qos_set_node_min_rate(vport->qos.sched_nodes[i], 1041 vport_node->min_rate, extack); 1042 } 1043 return err; 1044 } 1045 1046 static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_ack *extack) 1047 { 1048 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; 1049 enum sched_node_type curr_type = vport_node->type; 1050 1051 if (curr_type == SCHED_NODE_TYPE_VPORT) 1052 esw_qos_node_destroy_sched_element(vport_node, extack); 1053 else 1054 esw_qos_vport_tc_disable(vport, extack); 1055 1056 vport_node->bw_share = 0; 1057 memset(vport_node->tc_bw, 0, sizeof(vport_node->tc_bw)); 1058 list_del_init(&vport_node->entry); 1059 esw_qos_normalize_min_rate(vport_node->esw, vport_node->parent, extack); 1060 1061 trace_mlx5_esw_vport_qos_destroy(vport_node->esw->dev, vport); 1062 } 1063 1064 static int esw_qos_vport_enable(struct mlx5_vport *vport, 1065 enum sched_node_type type, 1066 struct mlx5_esw_sched_node *parent, 1067 struct netlink_ext_ack *extack) 1068 { 1069 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; 1070 int err; 1071 1072 esw_assert_qos_lock_held(vport->dev->priv.eswitch); 1073 1074 esw_qos_node_set_parent(vport_node, parent); 1075 if (type == SCHED_NODE_TYPE_VPORT) 1076 err = esw_qos_vport_create_sched_element(vport_node, extack); 1077 else 1078 err = esw_qos_vport_tc_enable(vport, type, extack); 1079 if (err) 1080 return err; 1081 1082 vport_node->type = type; 1083 esw_qos_normalize_min_rate(vport_node->esw, parent, extack); 1084 trace_mlx5_esw_vport_qos_create(vport->dev, vport, vport_node->max_rate, 1085 vport_node->bw_share); 1086 1087 return 0; 1088 } 1089 1090 static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_type type, 1091 struct mlx5_esw_sched_node *parent, u32 max_rate, 1092 u32 min_rate, struct netlink_ext_ack *extack) 1093 { 1094 struct mlx5_eswitch *esw = vport->dev->priv.eswitch; 1095 struct mlx5_esw_sched_node *sched_node; 1096 struct mlx5_eswitch *parent_esw; 1097 int err; 1098 1099 esw_assert_qos_lock_held(esw); 1100 err = esw_qos_get(esw, extack); 1101 if (err) 1102 return err; 1103 1104 parent_esw = parent ? parent->esw : esw; 1105 sched_node = __esw_qos_alloc_node(parent_esw, 0, type, parent); 1106 if (!sched_node) { 1107 esw_qos_put(esw); 1108 return -ENOMEM; 1109 } 1110 if (!parent) 1111 list_add_tail(&sched_node->entry, &esw->qos.domain->nodes); 1112 1113 sched_node->max_rate = max_rate; 1114 sched_node->min_rate = min_rate; 1115 sched_node->vport = vport; 1116 vport->qos.sched_node = sched_node; 1117 err = esw_qos_vport_enable(vport, type, parent, extack); 1118 if (err) { 1119 __esw_qos_free_node(sched_node); 1120 esw_qos_put(esw); 1121 vport->qos.sched_node = NULL; 1122 } 1123 1124 return err; 1125 } 1126 1127 static void mlx5_esw_qos_vport_disable_locked(struct mlx5_vport *vport) 1128 { 1129 struct mlx5_eswitch *esw = vport->dev->priv.eswitch; 1130 1131 esw_assert_qos_lock_held(esw); 1132 if (!vport->qos.sched_node) 1133 return; 1134 1135 esw_qos_vport_disable(vport, NULL); 1136 mlx5_esw_qos_vport_qos_free(vport); 1137 esw_qos_put(esw); 1138 } 1139 1140 void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport) 1141 { 1142 struct mlx5_eswitch *esw = vport->dev->priv.eswitch; 1143 struct mlx5_esw_sched_node *parent; 1144 1145 lockdep_assert_held(&esw->state_lock); 1146 esw_qos_lock(esw); 1147 if (!vport->qos.sched_node) 1148 goto unlock; 1149 1150 parent = vport->qos.sched_node->parent; 1151 WARN(parent, "Disabling QoS on port before detaching it from node"); 1152 1153 mlx5_esw_qos_vport_disable_locked(vport); 1154 unlock: 1155 esw_qos_unlock(esw); 1156 } 1157 1158 static int mlx5_esw_qos_set_vport_max_rate(struct mlx5_vport *vport, u32 max_rate, 1159 struct netlink_ext_ack *extack) 1160 { 1161 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; 1162 1163 esw_assert_qos_lock_held(vport->dev->priv.eswitch); 1164 1165 if (!vport_node) 1166 return mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, NULL, max_rate, 0, 1167 extack); 1168 else 1169 return esw_qos_sched_elem_config(vport_node, max_rate, vport_node->bw_share, 1170 extack); 1171 } 1172 1173 static int mlx5_esw_qos_set_vport_min_rate(struct mlx5_vport *vport, u32 min_rate, 1174 struct netlink_ext_ack *extack) 1175 { 1176 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; 1177 1178 esw_assert_qos_lock_held(vport->dev->priv.eswitch); 1179 1180 if (!vport_node) 1181 return mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, NULL, 0, min_rate, 1182 extack); 1183 else if (vport_node->type == SCHED_NODE_TYPE_RATE_LIMITER) 1184 return esw_qos_set_vport_tcs_min_rate(vport, min_rate, extack); 1185 else 1186 return esw_qos_set_node_min_rate(vport_node, min_rate, extack); 1187 } 1188 1189 int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *vport, u32 max_rate, u32 min_rate) 1190 { 1191 struct mlx5_eswitch *esw = vport->dev->priv.eswitch; 1192 int err; 1193 1194 esw_qos_lock(esw); 1195 err = mlx5_esw_qos_set_vport_min_rate(vport, min_rate, NULL); 1196 if (!err) 1197 err = mlx5_esw_qos_set_vport_max_rate(vport, max_rate, NULL); 1198 esw_qos_unlock(esw); 1199 return err; 1200 } 1201 1202 bool mlx5_esw_qos_get_vport_rate(struct mlx5_vport *vport, u32 *max_rate, u32 *min_rate) 1203 { 1204 struct mlx5_eswitch *esw = vport->dev->priv.eswitch; 1205 bool enabled; 1206 1207 esw_qos_lock(esw); 1208 enabled = !!vport->qos.sched_node; 1209 if (enabled) { 1210 *max_rate = vport->qos.sched_node->max_rate; 1211 *min_rate = vport->qos.sched_node->min_rate; 1212 } 1213 esw_qos_unlock(esw); 1214 return enabled; 1215 } 1216 1217 static int esw_qos_vport_tc_check_type(enum sched_node_type curr_type, 1218 enum sched_node_type new_type, 1219 struct netlink_ext_ack *extack) 1220 { 1221 if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && 1222 new_type == SCHED_NODE_TYPE_RATE_LIMITER) { 1223 NL_SET_ERR_MSG_MOD(extack, 1224 "Cannot switch from vport-level TC arbitration to node-level TC arbitration"); 1225 return -EOPNOTSUPP; 1226 } 1227 1228 if (curr_type == SCHED_NODE_TYPE_RATE_LIMITER && 1229 new_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { 1230 NL_SET_ERR_MSG_MOD(extack, 1231 "Cannot switch from node-level TC arbitration to vport-level TC arbitration"); 1232 return -EOPNOTSUPP; 1233 } 1234 1235 return 0; 1236 } 1237 1238 static int esw_qos_vport_update(struct mlx5_vport *vport, 1239 enum sched_node_type type, 1240 struct mlx5_esw_sched_node *parent, 1241 struct netlink_ext_ack *extack) 1242 { 1243 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; 1244 struct mlx5_esw_sched_node *curr_parent = vport_node->parent; 1245 enum sched_node_type curr_type = vport_node->type; 1246 u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0}; 1247 int err; 1248 1249 esw_assert_qos_lock_held(vport->dev->priv.eswitch); 1250 if (curr_type == type && curr_parent == parent) 1251 return 0; 1252 1253 err = esw_qos_vport_tc_check_type(curr_type, type, extack); 1254 if (err) 1255 return err; 1256 1257 if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) 1258 esw_qos_tc_arbiter_get_bw_shares(vport_node, curr_tc_bw); 1259 1260 esw_qos_vport_disable(vport, extack); 1261 1262 err = esw_qos_vport_enable(vport, type, parent, extack); 1263 if (err) { 1264 esw_qos_vport_enable(vport, curr_type, curr_parent, NULL); 1265 extack = NULL; 1266 } 1267 1268 if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) { 1269 esw_qos_set_tc_arbiter_bw_shares(vport_node, curr_tc_bw, 1270 extack); 1271 } 1272 1273 return err; 1274 } 1275 1276 static int esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent, 1277 struct netlink_ext_ack *extack) 1278 { 1279 struct mlx5_eswitch *esw = vport->dev->priv.eswitch; 1280 struct mlx5_esw_sched_node *curr_parent; 1281 enum sched_node_type type; 1282 1283 esw_assert_qos_lock_held(esw); 1284 curr_parent = vport->qos.sched_node->parent; 1285 if (curr_parent == parent) 1286 return 0; 1287 1288 /* Set vport QoS type based on parent node type if different from 1289 * default QoS; otherwise, use the vport's current QoS type. 1290 */ 1291 if (parent && parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) 1292 type = SCHED_NODE_TYPE_RATE_LIMITER; 1293 else if (curr_parent && 1294 curr_parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) 1295 type = SCHED_NODE_TYPE_VPORT; 1296 else 1297 type = vport->qos.sched_node->type; 1298 1299 return esw_qos_vport_update(vport, type, parent, extack); 1300 } 1301 1302 static void 1303 esw_qos_switch_vport_tcs_to_vport(struct mlx5_esw_sched_node *tc_arbiter_node, 1304 struct mlx5_esw_sched_node *node, 1305 struct netlink_ext_ack *extack) 1306 { 1307 struct mlx5_esw_sched_node *vports_tc_node, *vport_tc_node, *tmp; 1308 1309 vports_tc_node = list_first_entry(&tc_arbiter_node->children, 1310 struct mlx5_esw_sched_node, 1311 entry); 1312 1313 list_for_each_entry_safe(vport_tc_node, tmp, &vports_tc_node->children, 1314 entry) 1315 esw_qos_vport_update_parent(vport_tc_node->vport, node, extack); 1316 } 1317 1318 static int esw_qos_switch_tc_arbiter_node_to_vports( 1319 struct mlx5_esw_sched_node *tc_arbiter_node, 1320 struct mlx5_esw_sched_node *node, 1321 struct netlink_ext_ack *extack) 1322 { 1323 u32 parent_tsar_ix = node->parent ? 1324 node->parent->ix : node->esw->qos.root_tsar_ix; 1325 int err; 1326 1327 err = esw_qos_create_node_sched_elem(node->esw->dev, parent_tsar_ix, 1328 node->max_rate, node->bw_share, 1329 &node->ix); 1330 if (err) { 1331 NL_SET_ERR_MSG_MOD(extack, 1332 "Failed to create scheduling element for vports node when disabling vports TC QoS"); 1333 return err; 1334 } 1335 1336 node->type = SCHED_NODE_TYPE_VPORTS_TSAR; 1337 1338 /* Disable TC QoS for vports in the arbiter node. */ 1339 esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, extack); 1340 1341 return 0; 1342 } 1343 1344 static int esw_qos_switch_vports_node_to_tc_arbiter( 1345 struct mlx5_esw_sched_node *node, 1346 struct mlx5_esw_sched_node *tc_arbiter_node, 1347 struct netlink_ext_ack *extack) 1348 { 1349 struct mlx5_esw_sched_node *vport_node, *tmp; 1350 struct mlx5_vport *vport; 1351 int err; 1352 1353 /* Enable TC QoS for each vport in the node. */ 1354 list_for_each_entry_safe(vport_node, tmp, &node->children, entry) { 1355 vport = vport_node->vport; 1356 err = esw_qos_vport_update_parent(vport, tc_arbiter_node, 1357 extack); 1358 if (err) 1359 goto err_out; 1360 } 1361 1362 /* Destroy the current vports node TSAR. */ 1363 err = mlx5_destroy_scheduling_element_cmd(node->esw->dev, 1364 SCHEDULING_HIERARCHY_E_SWITCH, 1365 node->ix); 1366 if (err) 1367 goto err_out; 1368 1369 return 0; 1370 err_out: 1371 /* Restore vports back into the node if an error occurs. */ 1372 esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, NULL); 1373 1374 return err; 1375 } 1376 1377 static struct mlx5_esw_sched_node * 1378 esw_qos_move_node(struct mlx5_esw_sched_node *curr_node) 1379 { 1380 struct mlx5_esw_sched_node *new_node; 1381 1382 new_node = __esw_qos_alloc_node(curr_node->esw, curr_node->ix, 1383 curr_node->type, NULL); 1384 if (!new_node) 1385 return ERR_PTR(-ENOMEM); 1386 1387 esw_qos_nodes_set_parent(&curr_node->children, new_node); 1388 return new_node; 1389 } 1390 1391 static int esw_qos_node_disable_tc_arbitration(struct mlx5_esw_sched_node *node, 1392 struct netlink_ext_ack *extack) 1393 { 1394 struct mlx5_esw_sched_node *curr_node; 1395 int err; 1396 1397 if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR) 1398 return 0; 1399 1400 /* Allocate a new rate node to hold the current state, which will allow 1401 * for restoring the vports back to this node after disabling TC 1402 * arbitration. 1403 */ 1404 curr_node = esw_qos_move_node(node); 1405 if (IS_ERR(curr_node)) { 1406 NL_SET_ERR_MSG_MOD(extack, "Failed setting up vports node"); 1407 return PTR_ERR(curr_node); 1408 } 1409 1410 /* Disable TC QoS for all vports, and assign them back to the node. */ 1411 err = esw_qos_switch_tc_arbiter_node_to_vports(curr_node, node, extack); 1412 if (err) 1413 goto err_out; 1414 1415 /* Clean up the TC arbiter node after disabling TC QoS for vports. */ 1416 esw_qos_tc_arbiter_scheduling_teardown(curr_node, extack); 1417 goto out; 1418 err_out: 1419 esw_qos_nodes_set_parent(&curr_node->children, node); 1420 out: 1421 __esw_qos_free_node(curr_node); 1422 return err; 1423 } 1424 1425 static int esw_qos_node_enable_tc_arbitration(struct mlx5_esw_sched_node *node, 1426 struct netlink_ext_ack *extack) 1427 { 1428 struct mlx5_esw_sched_node *curr_node, *child; 1429 int err, new_level, max_level; 1430 1431 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) 1432 return 0; 1433 1434 /* Increase the hierarchy level by one to account for the additional 1435 * vports TC scheduling node, and verify that the new level does not 1436 * exceed the maximum allowed depth. 1437 */ 1438 new_level = node->level + 1; 1439 max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth); 1440 if (new_level > max_level) { 1441 NL_SET_ERR_MSG_FMT_MOD(extack, 1442 "TC arbitration on nodes is not supported beyond max depth %d", 1443 max_level); 1444 return -EOPNOTSUPP; 1445 } 1446 1447 /* Ensure the node does not contain non-leaf children before assigning 1448 * TC bandwidth. 1449 */ 1450 if (!list_empty(&node->children)) { 1451 list_for_each_entry(child, &node->children, entry) { 1452 if (!child->vport) { 1453 NL_SET_ERR_MSG_MOD(extack, 1454 "Cannot configure TC bandwidth on a node with non-leaf children"); 1455 return -EOPNOTSUPP; 1456 } 1457 } 1458 } 1459 1460 /* Allocate a new node that will store the information of the current 1461 * node. This will be used later to restore the node if necessary. 1462 */ 1463 curr_node = esw_qos_move_node(node); 1464 if (IS_ERR(curr_node)) { 1465 NL_SET_ERR_MSG_MOD(extack, "Failed setting up node TC QoS"); 1466 return PTR_ERR(curr_node); 1467 } 1468 1469 /* Initialize the TC arbiter node for QoS management. 1470 * This step prepares the node for handling Traffic Class arbitration. 1471 */ 1472 err = esw_qos_tc_arbiter_scheduling_setup(node, extack); 1473 if (err) 1474 goto err_setup; 1475 1476 /* Enable TC QoS for each vport within the current node. */ 1477 err = esw_qos_switch_vports_node_to_tc_arbiter(curr_node, node, extack); 1478 if (err) 1479 goto err_switch_vports; 1480 goto out; 1481 1482 err_switch_vports: 1483 esw_qos_tc_arbiter_scheduling_teardown(node, NULL); 1484 node->ix = curr_node->ix; 1485 node->type = curr_node->type; 1486 err_setup: 1487 esw_qos_nodes_set_parent(&curr_node->children, node); 1488 out: 1489 __esw_qos_free_node(curr_node); 1490 return err; 1491 } 1492 1493 static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev) 1494 { 1495 struct ethtool_link_ksettings lksettings; 1496 struct net_device *slave, *master; 1497 u32 speed = SPEED_UNKNOWN; 1498 1499 /* Lock ensures a stable reference to master and slave netdevice 1500 * while port speed of master is queried. 1501 */ 1502 ASSERT_RTNL(); 1503 1504 slave = mlx5_uplink_netdev_get(mdev); 1505 if (!slave) 1506 goto out; 1507 1508 master = netdev_master_upper_dev_get(slave); 1509 if (master && !__ethtool_get_link_ksettings(master, &lksettings)) 1510 speed = lksettings.base.speed; 1511 1512 out: 1513 mlx5_uplink_netdev_put(mdev, slave); 1514 return speed; 1515 } 1516 1517 static int mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev *mdev, u32 *link_speed_max, 1518 bool hold_rtnl_lock, struct netlink_ext_ack *extack) 1519 { 1520 int err; 1521 1522 if (!mlx5_lag_is_active(mdev)) 1523 goto skip_lag; 1524 1525 if (hold_rtnl_lock) 1526 rtnl_lock(); 1527 1528 *link_speed_max = mlx5_esw_qos_lag_link_speed_get_locked(mdev); 1529 1530 if (hold_rtnl_lock) 1531 rtnl_unlock(); 1532 1533 if (*link_speed_max != (u32)SPEED_UNKNOWN) 1534 return 0; 1535 1536 skip_lag: 1537 err = mlx5_port_max_linkspeed(mdev, link_speed_max); 1538 if (err) 1539 NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed"); 1540 1541 return err; 1542 } 1543 1544 static int mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev *mdev, 1545 const char *name, u32 link_speed_max, 1546 u64 value, struct netlink_ext_ack *extack) 1547 { 1548 if (value > link_speed_max) { 1549 pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n", 1550 name, value, link_speed_max); 1551 NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed"); 1552 return -EINVAL; 1553 } 1554 1555 return 0; 1556 } 1557 1558 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps) 1559 { 1560 struct mlx5_vport *vport; 1561 u32 link_speed_max; 1562 int err; 1563 1564 vport = mlx5_eswitch_get_vport(esw, vport_num); 1565 if (IS_ERR(vport)) 1566 return PTR_ERR(vport); 1567 1568 if (rate_mbps) { 1569 err = mlx5_esw_qos_max_link_speed_get(esw->dev, &link_speed_max, false, NULL); 1570 if (err) 1571 return err; 1572 1573 err = mlx5_esw_qos_link_speed_verify(esw->dev, "Police", 1574 link_speed_max, rate_mbps, NULL); 1575 if (err) 1576 return err; 1577 } 1578 1579 esw_qos_lock(esw); 1580 err = mlx5_esw_qos_set_vport_max_rate(vport, rate_mbps, NULL); 1581 esw_qos_unlock(esw); 1582 1583 return err; 1584 } 1585 1586 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */ 1587 1588 /* Converts bytes per second value passed in a pointer into megabits per 1589 * second, rewriting last. If converted rate exceed link speed or is not a 1590 * fraction of Mbps - returns error. 1591 */ 1592 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name, 1593 u64 *rate, struct netlink_ext_ack *extack) 1594 { 1595 u32 link_speed_max, remainder; 1596 u64 value; 1597 int err; 1598 1599 value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &remainder); 1600 if (remainder) { 1601 pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n", 1602 name, *rate); 1603 NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps"); 1604 return -EINVAL; 1605 } 1606 1607 err = mlx5_esw_qos_max_link_speed_get(mdev, &link_speed_max, true, extack); 1608 if (err) 1609 return err; 1610 1611 err = mlx5_esw_qos_link_speed_verify(mdev, name, link_speed_max, value, extack); 1612 if (err) 1613 return err; 1614 1615 *rate = value; 1616 return 0; 1617 } 1618 1619 static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw, 1620 u32 *tc_bw) 1621 { 1622 int i, num_tcs = esw_qos_num_tcs(esw->dev); 1623 1624 for (i = num_tcs; i < DEVLINK_RATE_TCS_MAX; i++) { 1625 if (tc_bw[i]) 1626 return false; 1627 } 1628 1629 return true; 1630 } 1631 1632 static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport, 1633 u32 *tc_bw) 1634 { 1635 struct mlx5_esw_sched_node *node = vport->qos.sched_node; 1636 struct mlx5_eswitch *esw = vport->dev->priv.eswitch; 1637 1638 esw = (node && node->parent) ? node->parent->esw : esw; 1639 1640 return esw_qos_validate_unsupported_tc_bw(esw, tc_bw); 1641 } 1642 1643 static bool esw_qos_tc_bw_disabled(u32 *tc_bw) 1644 { 1645 int i; 1646 1647 for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) { 1648 if (tc_bw[i]) 1649 return false; 1650 } 1651 1652 return true; 1653 } 1654 1655 static void esw_vport_qos_prune_empty(struct mlx5_vport *vport) 1656 { 1657 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; 1658 1659 esw_assert_qos_lock_held(vport->dev->priv.eswitch); 1660 if (!vport_node) 1661 return; 1662 1663 if (vport_node->parent || vport_node->max_rate || 1664 vport_node->min_rate || !esw_qos_tc_bw_disabled(vport_node->tc_bw)) 1665 return; 1666 1667 mlx5_esw_qos_vport_disable_locked(vport); 1668 } 1669 1670 int mlx5_esw_qos_init(struct mlx5_eswitch *esw) 1671 { 1672 if (esw->qos.domain) 1673 return 0; /* Nothing to change. */ 1674 1675 return esw_qos_domain_init(esw); 1676 } 1677 1678 void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw) 1679 { 1680 if (esw->qos.domain) 1681 esw_qos_domain_release(esw); 1682 } 1683 1684 /* Eswitch devlink rate API */ 1685 1686 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, 1687 u64 tx_share, struct netlink_ext_ack *extack) 1688 { 1689 struct mlx5_vport *vport = priv; 1690 struct mlx5_eswitch *esw; 1691 int err; 1692 1693 esw = vport->dev->priv.eswitch; 1694 if (!mlx5_esw_allowed(esw)) 1695 return -EPERM; 1696 1697 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack); 1698 if (err) 1699 return err; 1700 1701 esw_qos_lock(esw); 1702 err = mlx5_esw_qos_set_vport_min_rate(vport, tx_share, extack); 1703 if (err) 1704 goto out; 1705 esw_vport_qos_prune_empty(vport); 1706 out: 1707 esw_qos_unlock(esw); 1708 return err; 1709 } 1710 1711 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, 1712 u64 tx_max, struct netlink_ext_ack *extack) 1713 { 1714 struct mlx5_vport *vport = priv; 1715 struct mlx5_eswitch *esw; 1716 int err; 1717 1718 esw = vport->dev->priv.eswitch; 1719 if (!mlx5_esw_allowed(esw)) 1720 return -EPERM; 1721 1722 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack); 1723 if (err) 1724 return err; 1725 1726 esw_qos_lock(esw); 1727 err = mlx5_esw_qos_set_vport_max_rate(vport, tx_max, extack); 1728 if (err) 1729 goto out; 1730 esw_vport_qos_prune_empty(vport); 1731 out: 1732 esw_qos_unlock(esw); 1733 return err; 1734 } 1735 1736 int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate *rate_leaf, 1737 void *priv, 1738 u32 *tc_bw, 1739 struct netlink_ext_ack *extack) 1740 { 1741 struct mlx5_esw_sched_node *vport_node; 1742 struct mlx5_vport *vport = priv; 1743 struct mlx5_eswitch *esw; 1744 bool disable; 1745 int err = 0; 1746 1747 esw = vport->dev->priv.eswitch; 1748 if (!mlx5_esw_allowed(esw)) 1749 return -EPERM; 1750 1751 disable = esw_qos_tc_bw_disabled(tc_bw); 1752 esw_qos_lock(esw); 1753 1754 if (!esw_qos_vport_validate_unsupported_tc_bw(vport, tc_bw)) { 1755 NL_SET_ERR_MSG_MOD(extack, 1756 "E-Switch traffic classes number is not supported"); 1757 err = -EOPNOTSUPP; 1758 goto unlock; 1759 } 1760 1761 vport_node = vport->qos.sched_node; 1762 if (disable && !vport_node) 1763 goto unlock; 1764 1765 if (disable) { 1766 if (vport_node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) 1767 err = esw_qos_vport_update(vport, SCHED_NODE_TYPE_VPORT, 1768 vport_node->parent, extack); 1769 esw_vport_qos_prune_empty(vport); 1770 goto unlock; 1771 } 1772 1773 if (!vport_node) { 1774 err = mlx5_esw_qos_vport_enable(vport, 1775 SCHED_NODE_TYPE_TC_ARBITER_TSAR, 1776 NULL, 0, 0, extack); 1777 vport_node = vport->qos.sched_node; 1778 } else { 1779 err = esw_qos_vport_update(vport, 1780 SCHED_NODE_TYPE_TC_ARBITER_TSAR, 1781 vport_node->parent, extack); 1782 } 1783 if (!err) 1784 esw_qos_set_tc_arbiter_bw_shares(vport_node, tc_bw, extack); 1785 unlock: 1786 esw_qos_unlock(esw); 1787 return err; 1788 } 1789 1790 int mlx5_esw_devlink_rate_node_tc_bw_set(struct devlink_rate *rate_node, 1791 void *priv, 1792 u32 *tc_bw, 1793 struct netlink_ext_ack *extack) 1794 { 1795 struct mlx5_esw_sched_node *node = priv; 1796 struct mlx5_eswitch *esw = node->esw; 1797 bool disable; 1798 int err; 1799 1800 if (!esw_qos_validate_unsupported_tc_bw(esw, tc_bw)) { 1801 NL_SET_ERR_MSG_MOD(extack, 1802 "E-Switch traffic classes number is not supported"); 1803 return -EOPNOTSUPP; 1804 } 1805 1806 disable = esw_qos_tc_bw_disabled(tc_bw); 1807 esw_qos_lock(esw); 1808 if (disable) { 1809 err = esw_qos_node_disable_tc_arbitration(node, extack); 1810 goto unlock; 1811 } 1812 1813 err = esw_qos_node_enable_tc_arbitration(node, extack); 1814 if (!err) 1815 esw_qos_set_tc_arbiter_bw_shares(node, tc_bw, extack); 1816 unlock: 1817 esw_qos_unlock(esw); 1818 return err; 1819 } 1820 1821 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, 1822 u64 tx_share, struct netlink_ext_ack *extack) 1823 { 1824 struct mlx5_esw_sched_node *node = priv; 1825 struct mlx5_eswitch *esw = node->esw; 1826 int err; 1827 1828 err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_share", &tx_share, extack); 1829 if (err) 1830 return err; 1831 1832 esw_qos_lock(esw); 1833 err = esw_qos_set_node_min_rate(node, tx_share, extack); 1834 esw_qos_unlock(esw); 1835 return err; 1836 } 1837 1838 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, 1839 u64 tx_max, struct netlink_ext_ack *extack) 1840 { 1841 struct mlx5_esw_sched_node *node = priv; 1842 struct mlx5_eswitch *esw = node->esw; 1843 int err; 1844 1845 err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_max", &tx_max, extack); 1846 if (err) 1847 return err; 1848 1849 esw_qos_lock(esw); 1850 err = esw_qos_sched_elem_config(node, tx_max, node->bw_share, extack); 1851 esw_qos_unlock(esw); 1852 return err; 1853 } 1854 1855 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, 1856 struct netlink_ext_ack *extack) 1857 { 1858 struct mlx5_esw_sched_node *node; 1859 struct mlx5_eswitch *esw; 1860 int err = 0; 1861 1862 esw = mlx5_devlink_eswitch_get(rate_node->devlink); 1863 if (IS_ERR(esw)) 1864 return PTR_ERR(esw); 1865 1866 esw_qos_lock(esw); 1867 if (esw->mode != MLX5_ESWITCH_OFFLOADS) { 1868 NL_SET_ERR_MSG_MOD(extack, 1869 "Rate node creation supported only in switchdev mode"); 1870 err = -EOPNOTSUPP; 1871 goto unlock; 1872 } 1873 1874 node = esw_qos_create_vports_sched_node(esw, extack); 1875 if (IS_ERR(node)) { 1876 err = PTR_ERR(node); 1877 goto unlock; 1878 } 1879 1880 *priv = node; 1881 unlock: 1882 esw_qos_unlock(esw); 1883 return err; 1884 } 1885 1886 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, 1887 struct netlink_ext_ack *extack) 1888 { 1889 struct mlx5_esw_sched_node *node = priv; 1890 struct mlx5_eswitch *esw = node->esw; 1891 1892 esw_qos_lock(esw); 1893 __esw_qos_destroy_node(node, extack); 1894 esw_qos_put(esw); 1895 esw_qos_unlock(esw); 1896 return 0; 1897 } 1898 1899 int mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent, 1900 struct netlink_ext_ack *extack) 1901 { 1902 struct mlx5_eswitch *esw = vport->dev->priv.eswitch; 1903 int err = 0; 1904 1905 if (parent && parent->esw != esw) { 1906 NL_SET_ERR_MSG_MOD(extack, "Cross E-Switch scheduling is not supported"); 1907 return -EOPNOTSUPP; 1908 } 1909 1910 esw_qos_lock(esw); 1911 if (!vport->qos.sched_node && parent) { 1912 enum sched_node_type type; 1913 1914 type = parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR ? 1915 SCHED_NODE_TYPE_RATE_LIMITER : SCHED_NODE_TYPE_VPORT; 1916 err = mlx5_esw_qos_vport_enable(vport, type, parent, 0, 0, 1917 extack); 1918 } else if (vport->qos.sched_node) { 1919 err = esw_qos_vport_update_parent(vport, parent, extack); 1920 } 1921 esw_qos_unlock(esw); 1922 return err; 1923 } 1924 1925 int mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate *devlink_rate, 1926 struct devlink_rate *parent, 1927 void *priv, void *parent_priv, 1928 struct netlink_ext_ack *extack) 1929 { 1930 struct mlx5_esw_sched_node *node = parent ? parent_priv : NULL; 1931 struct mlx5_vport *vport = priv; 1932 int err; 1933 1934 err = mlx5_esw_qos_vport_update_parent(vport, node, extack); 1935 if (!err) { 1936 struct mlx5_eswitch *esw = vport->dev->priv.eswitch; 1937 1938 esw_qos_lock(esw); 1939 esw_vport_qos_prune_empty(vport); 1940 esw_qos_unlock(esw); 1941 } 1942 1943 return err; 1944 } 1945 1946 static bool esw_qos_is_node_empty(struct mlx5_esw_sched_node *node) 1947 { 1948 if (list_empty(&node->children)) 1949 return true; 1950 1951 if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR) 1952 return false; 1953 1954 node = list_first_entry(&node->children, struct mlx5_esw_sched_node, 1955 entry); 1956 1957 return esw_qos_is_node_empty(node); 1958 } 1959 1960 static int 1961 mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node *node, 1962 struct mlx5_esw_sched_node *parent, 1963 struct netlink_ext_ack *extack) 1964 { 1965 u8 new_level, max_level; 1966 1967 if (parent && parent->esw != node->esw) { 1968 NL_SET_ERR_MSG_MOD(extack, 1969 "Cannot assign node to another E-Switch"); 1970 return -EOPNOTSUPP; 1971 } 1972 1973 if (!esw_qos_is_node_empty(node)) { 1974 NL_SET_ERR_MSG_MOD(extack, 1975 "Cannot reassign a node that contains rate objects"); 1976 return -EOPNOTSUPP; 1977 } 1978 1979 if (parent && parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { 1980 NL_SET_ERR_MSG_MOD(extack, 1981 "Cannot attach a node to a parent with TC bandwidth configured"); 1982 return -EOPNOTSUPP; 1983 } 1984 1985 new_level = parent ? parent->level + 1 : 2; 1986 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { 1987 /* Increase by one to account for the vports TC scheduling 1988 * element. 1989 */ 1990 new_level += 1; 1991 } 1992 1993 max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth); 1994 if (new_level > max_level) { 1995 NL_SET_ERR_MSG_FMT_MOD(extack, 1996 "Node hierarchy depth %d exceeds the maximum supported level %d", 1997 new_level, max_level); 1998 return -EOPNOTSUPP; 1999 } 2000 2001 return 0; 2002 } 2003 2004 static int 2005 esw_qos_tc_arbiter_node_update_parent(struct mlx5_esw_sched_node *node, 2006 struct mlx5_esw_sched_node *parent, 2007 struct netlink_ext_ack *extack) 2008 { 2009 struct mlx5_esw_sched_node *curr_parent = node->parent; 2010 u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0}; 2011 struct mlx5_eswitch *esw = node->esw; 2012 int err; 2013 2014 esw_qos_tc_arbiter_get_bw_shares(node, curr_tc_bw); 2015 esw_qos_tc_arbiter_scheduling_teardown(node, extack); 2016 esw_qos_node_set_parent(node, parent); 2017 err = esw_qos_tc_arbiter_scheduling_setup(node, extack); 2018 if (err) { 2019 esw_qos_node_set_parent(node, curr_parent); 2020 if (esw_qos_tc_arbiter_scheduling_setup(node, extack)) { 2021 esw_warn(esw->dev, "Node restore QoS failed\n"); 2022 return err; 2023 } 2024 } 2025 esw_qos_set_tc_arbiter_bw_shares(node, curr_tc_bw, extack); 2026 2027 return err; 2028 } 2029 2030 static int esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node *node, 2031 struct mlx5_esw_sched_node *parent, 2032 struct netlink_ext_ack *extack) 2033 { 2034 struct mlx5_esw_sched_node *curr_parent = node->parent; 2035 struct mlx5_eswitch *esw = node->esw; 2036 u32 parent_ix; 2037 int err; 2038 2039 parent_ix = parent ? parent->ix : node->esw->qos.root_tsar_ix; 2040 mlx5_destroy_scheduling_element_cmd(esw->dev, 2041 SCHEDULING_HIERARCHY_E_SWITCH, 2042 node->ix); 2043 err = esw_qos_create_node_sched_elem(esw->dev, parent_ix, 2044 node->max_rate, 0, &node->ix); 2045 if (err) { 2046 NL_SET_ERR_MSG_MOD(extack, 2047 "Failed to create a node under the new hierarchy."); 2048 if (esw_qos_create_node_sched_elem(esw->dev, curr_parent->ix, 2049 node->max_rate, 2050 node->bw_share, 2051 &node->ix)) 2052 esw_warn(esw->dev, "Node restore QoS failed\n"); 2053 2054 return err; 2055 } 2056 esw_qos_node_set_parent(node, parent); 2057 node->bw_share = 0; 2058 2059 return 0; 2060 } 2061 2062 static int mlx5_esw_qos_node_update_parent(struct mlx5_esw_sched_node *node, 2063 struct mlx5_esw_sched_node *parent, 2064 struct netlink_ext_ack *extack) 2065 { 2066 struct mlx5_esw_sched_node *curr_parent; 2067 struct mlx5_eswitch *esw = node->esw; 2068 int err; 2069 2070 err = mlx5_esw_qos_node_validate_set_parent(node, parent, extack); 2071 if (err) 2072 return err; 2073 2074 esw_qos_lock(esw); 2075 curr_parent = node->parent; 2076 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { 2077 err = esw_qos_tc_arbiter_node_update_parent(node, parent, 2078 extack); 2079 } else { 2080 err = esw_qos_vports_node_update_parent(node, parent, extack); 2081 } 2082 2083 if (err) 2084 goto out; 2085 2086 esw_qos_normalize_min_rate(esw, curr_parent, extack); 2087 esw_qos_normalize_min_rate(esw, parent, extack); 2088 2089 out: 2090 esw_qos_unlock(esw); 2091 2092 return err; 2093 } 2094 2095 int mlx5_esw_devlink_rate_node_parent_set(struct devlink_rate *devlink_rate, 2096 struct devlink_rate *parent, 2097 void *priv, void *parent_priv, 2098 struct netlink_ext_ack *extack) 2099 { 2100 struct mlx5_esw_sched_node *node = priv, *parent_node; 2101 2102 if (!parent) 2103 return mlx5_esw_qos_node_update_parent(node, NULL, extack); 2104 2105 parent_node = parent_priv; 2106 return mlx5_esw_qos_node_update_parent(node, parent_node, extack); 2107 } 2108