1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. 4 */ 5 6 #include "mlx5_ib.h" 7 #include <linux/mlx5/eswitch.h> 8 #include <linux/mlx5/vport.h> 9 #include "counters.h" 10 #include "ib_rep.h" 11 #include "qp.h" 12 13 struct mlx5_ib_counter { 14 const char *name; 15 size_t offset; 16 u32 type; 17 }; 18 19 #define INIT_Q_COUNTER(_name) \ 20 { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)} 21 22 #define INIT_VPORT_Q_COUNTER(_name) \ 23 { .name = "vport_" #_name, .offset = \ 24 MLX5_BYTE_OFF(query_q_counter_out, _name)} 25 26 static const struct mlx5_ib_counter basic_q_cnts[] = { 27 INIT_Q_COUNTER(rx_write_requests), 28 INIT_Q_COUNTER(rx_read_requests), 29 INIT_Q_COUNTER(rx_atomic_requests), 30 INIT_Q_COUNTER(rx_dct_connect), 31 INIT_Q_COUNTER(out_of_buffer), 32 }; 33 34 static const struct mlx5_ib_counter out_of_seq_q_cnts[] = { 35 INIT_Q_COUNTER(out_of_sequence), 36 }; 37 38 static const struct mlx5_ib_counter retrans_q_cnts[] = { 39 INIT_Q_COUNTER(duplicate_request), 40 INIT_Q_COUNTER(rnr_nak_retry_err), 41 INIT_Q_COUNTER(packet_seq_err), 42 INIT_Q_COUNTER(implied_nak_seq_err), 43 INIT_Q_COUNTER(local_ack_timeout_err), 44 }; 45 46 static const struct mlx5_ib_counter vport_basic_q_cnts[] = { 47 INIT_VPORT_Q_COUNTER(rx_write_requests), 48 INIT_VPORT_Q_COUNTER(rx_read_requests), 49 INIT_VPORT_Q_COUNTER(rx_atomic_requests), 50 INIT_VPORT_Q_COUNTER(rx_dct_connect), 51 INIT_VPORT_Q_COUNTER(out_of_buffer), 52 }; 53 54 static const struct mlx5_ib_counter vport_out_of_seq_q_cnts[] = { 55 INIT_VPORT_Q_COUNTER(out_of_sequence), 56 }; 57 58 static const struct mlx5_ib_counter vport_retrans_q_cnts[] = { 59 INIT_VPORT_Q_COUNTER(duplicate_request), 60 INIT_VPORT_Q_COUNTER(rnr_nak_retry_err), 61 INIT_VPORT_Q_COUNTER(packet_seq_err), 62 INIT_VPORT_Q_COUNTER(implied_nak_seq_err), 63 INIT_VPORT_Q_COUNTER(local_ack_timeout_err), 64 }; 65 66 #define INIT_CONG_COUNTER(_name) \ 67 { .name = #_name, .offset = \ 68 MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)} 69 70 static const struct mlx5_ib_counter cong_cnts[] = { 71 INIT_CONG_COUNTER(rp_cnp_ignored), 72 INIT_CONG_COUNTER(rp_cnp_handled), 73 INIT_CONG_COUNTER(np_ecn_marked_roce_packets), 74 INIT_CONG_COUNTER(np_cnp_sent), 75 }; 76 77 static const struct mlx5_ib_counter extended_err_cnts[] = { 78 INIT_Q_COUNTER(resp_local_length_error), 79 INIT_Q_COUNTER(resp_cqe_error), 80 INIT_Q_COUNTER(req_cqe_error), 81 INIT_Q_COUNTER(req_remote_invalid_request), 82 INIT_Q_COUNTER(req_remote_access_errors), 83 INIT_Q_COUNTER(resp_remote_access_errors), 84 INIT_Q_COUNTER(resp_cqe_flush_error), 85 INIT_Q_COUNTER(req_cqe_flush_error), 86 INIT_Q_COUNTER(req_transport_retries_exceeded), 87 INIT_Q_COUNTER(req_rnr_retries_exceeded), 88 }; 89 90 static const struct mlx5_ib_counter roce_accl_cnts[] = { 91 INIT_Q_COUNTER(roce_adp_retrans), 92 INIT_Q_COUNTER(roce_adp_retrans_to), 93 INIT_Q_COUNTER(roce_slow_restart), 94 INIT_Q_COUNTER(roce_slow_restart_cnps), 95 INIT_Q_COUNTER(roce_slow_restart_trans), 96 }; 97 98 static const struct mlx5_ib_counter vport_extended_err_cnts[] = { 99 INIT_VPORT_Q_COUNTER(resp_local_length_error), 100 INIT_VPORT_Q_COUNTER(resp_cqe_error), 101 INIT_VPORT_Q_COUNTER(req_cqe_error), 102 INIT_VPORT_Q_COUNTER(req_remote_invalid_request), 103 INIT_VPORT_Q_COUNTER(req_remote_access_errors), 104 INIT_VPORT_Q_COUNTER(resp_remote_access_errors), 105 INIT_VPORT_Q_COUNTER(resp_cqe_flush_error), 106 INIT_VPORT_Q_COUNTER(req_cqe_flush_error), 107 INIT_VPORT_Q_COUNTER(req_transport_retries_exceeded), 108 INIT_VPORT_Q_COUNTER(req_rnr_retries_exceeded), 109 }; 110 111 static const struct mlx5_ib_counter vport_roce_accl_cnts[] = { 112 INIT_VPORT_Q_COUNTER(roce_adp_retrans), 113 INIT_VPORT_Q_COUNTER(roce_adp_retrans_to), 114 INIT_VPORT_Q_COUNTER(roce_slow_restart), 115 INIT_VPORT_Q_COUNTER(roce_slow_restart_cnps), 116 INIT_VPORT_Q_COUNTER(roce_slow_restart_trans), 117 }; 118 119 #define INIT_EXT_PPCNT_COUNTER(_name) \ 120 { .name = #_name, .offset = \ 121 MLX5_BYTE_OFF(ppcnt_reg, \ 122 counter_set.eth_extended_cntrs_grp_data_layout._name##_high)} 123 124 static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { 125 INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), 126 }; 127 128 #define INIT_OP_COUNTER(_name, _type) \ 129 { .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type} 130 131 static const struct mlx5_ib_counter basic_op_cnts[] = { 132 INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS), 133 }; 134 135 static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = { 136 INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS), 137 }; 138 139 static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = { 140 INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS), 141 }; 142 143 static int mlx5_ib_read_counters(struct ib_counters *counters, 144 struct ib_counters_read_attr *read_attr, 145 struct uverbs_attr_bundle *attrs) 146 { 147 struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); 148 struct mlx5_read_counters_attr mread_attr = {}; 149 struct mlx5_ib_flow_counters_desc *desc; 150 int ret, i; 151 152 mutex_lock(&mcounters->mcntrs_mutex); 153 if (mcounters->cntrs_max_index > read_attr->ncounters) { 154 ret = -EINVAL; 155 goto err_bound; 156 } 157 158 mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64), 159 GFP_KERNEL); 160 if (!mread_attr.out) { 161 ret = -ENOMEM; 162 goto err_bound; 163 } 164 165 mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl; 166 mread_attr.flags = read_attr->flags; 167 ret = mcounters->read_counters(counters->device, &mread_attr); 168 if (ret) 169 goto err_read; 170 171 /* do the pass over the counters data array to assign according to the 172 * descriptions and indexing pairs 173 */ 174 desc = mcounters->counters_data; 175 for (i = 0; i < mcounters->ncounters; i++) 176 read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description]; 177 178 err_read: 179 kfree(mread_attr.out); 180 err_bound: 181 mutex_unlock(&mcounters->mcntrs_mutex); 182 return ret; 183 } 184 185 static int mlx5_ib_destroy_counters(struct ib_counters *counters) 186 { 187 struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); 188 189 mlx5_ib_counters_clear_description(counters); 190 if (mcounters->hw_cntrs_hndl) 191 mlx5_fc_destroy(to_mdev(counters->device)->mdev, 192 mcounters->hw_cntrs_hndl); 193 return 0; 194 } 195 196 static int mlx5_ib_create_counters(struct ib_counters *counters, 197 struct uverbs_attr_bundle *attrs) 198 { 199 struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); 200 201 mutex_init(&mcounters->mcntrs_mutex); 202 return 0; 203 } 204 205 static bool vport_qcounters_supported(struct mlx5_ib_dev *dev) 206 { 207 return MLX5_CAP_GEN(dev->mdev, q_counter_other_vport) && 208 MLX5_CAP_GEN(dev->mdev, q_counter_aggregation); 209 } 210 211 static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, 212 u32 port_num) 213 { 214 if ((is_mdev_switchdev_mode(dev->mdev) && 215 !vport_qcounters_supported(dev)) || !port_num) 216 return &dev->port[0].cnts; 217 218 return is_mdev_switchdev_mode(dev->mdev) ? 219 &dev->port[1].cnts : &dev->port[port_num - 1].cnts; 220 } 221 222 /** 223 * mlx5_ib_get_counters_id - Returns counters id to use for device+port 224 * @dev: Pointer to mlx5 IB device 225 * @port_num: Zero based port number 226 * 227 * mlx5_ib_get_counters_id() Returns counters set id to use for given 228 * device port combination in switchdev and non switchdev mode of the 229 * parent device. 230 */ 231 u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num) 232 { 233 const struct mlx5_ib_counters *cnts = get_counters(dev, port_num + 1); 234 235 return cnts->set_id; 236 } 237 238 static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts) 239 { 240 struct rdma_hw_stats *stats; 241 u32 num_hw_counters; 242 int i; 243 244 num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + 245 cnts->num_ext_ppcnt_counters; 246 stats = rdma_alloc_hw_stats_struct(cnts->descs, 247 num_hw_counters + 248 cnts->num_op_counters, 249 RDMA_HW_STATS_DEFAULT_LIFESPAN); 250 if (!stats) 251 return NULL; 252 253 for (i = 0; i < cnts->num_op_counters; i++) 254 set_bit(num_hw_counters + i, stats->is_disabled); 255 256 return stats; 257 } 258 259 static struct rdma_hw_stats * 260 mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev) 261 { 262 struct mlx5_ib_dev *dev = to_mdev(ibdev); 263 const struct mlx5_ib_counters *cnts = &dev->port[0].cnts; 264 265 return do_alloc_stats(cnts); 266 } 267 268 static struct rdma_hw_stats * 269 mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num) 270 { 271 struct mlx5_ib_dev *dev = to_mdev(ibdev); 272 const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); 273 274 return do_alloc_stats(cnts); 275 } 276 277 static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, 278 const struct mlx5_ib_counters *cnts, 279 struct rdma_hw_stats *stats, 280 u16 set_id) 281 { 282 u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; 283 u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; 284 __be32 val; 285 int ret, i; 286 287 MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); 288 MLX5_SET(query_q_counter_in, in, counter_set_id, set_id); 289 ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out); 290 if (ret) 291 return ret; 292 293 for (i = 0; i < cnts->num_q_counters; i++) { 294 val = *(__be32 *)((void *)out + cnts->offsets[i]); 295 stats->value[i] = (u64)be32_to_cpu(val); 296 } 297 298 return 0; 299 } 300 301 static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, 302 const struct mlx5_ib_counters *cnts, 303 struct rdma_hw_stats *stats) 304 { 305 int offset = cnts->num_q_counters + cnts->num_cong_counters; 306 u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; 307 int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); 308 int ret, i; 309 void *out; 310 311 out = kvzalloc(sz, GFP_KERNEL); 312 if (!out) 313 return -ENOMEM; 314 315 MLX5_SET(ppcnt_reg, in, local_port, 1); 316 MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); 317 ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT, 318 0, 0); 319 if (ret) 320 goto free; 321 322 for (i = 0; i < cnts->num_ext_ppcnt_counters; i++) 323 stats->value[i + offset] = 324 be64_to_cpup((__be64 *)(out + 325 cnts->offsets[i + offset])); 326 free: 327 kvfree(out); 328 return ret; 329 } 330 331 static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev, 332 u32 port_num, 333 const struct mlx5_ib_counters *cnts, 334 struct rdma_hw_stats *stats) 335 336 { 337 u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; 338 u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; 339 struct mlx5_core_dev *mdev; 340 __be32 val; 341 int ret, i; 342 343 if (!dev->port[port_num].rep || 344 dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK) 345 return 0; 346 347 mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw); 348 if (!mdev) 349 return -EOPNOTSUPP; 350 351 MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); 352 MLX5_SET(query_q_counter_in, in, other_vport, 1); 353 MLX5_SET(query_q_counter_in, in, vport_number, 354 dev->port[port_num].rep->vport); 355 MLX5_SET(query_q_counter_in, in, aggregate, 1); 356 ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out); 357 if (ret) 358 return ret; 359 360 for (i = 0; i < cnts->num_q_counters; i++) { 361 val = *(__be32 *)((void *)out + cnts->offsets[i]); 362 stats->value[i] = (u64)be32_to_cpu(val); 363 } 364 365 return 0; 366 } 367 368 static int do_get_hw_stats(struct ib_device *ibdev, 369 struct rdma_hw_stats *stats, 370 u32 port_num, int index) 371 { 372 struct mlx5_ib_dev *dev = to_mdev(ibdev); 373 const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); 374 struct mlx5_core_dev *mdev; 375 int ret, num_counters; 376 377 if (!stats) 378 return -EINVAL; 379 380 num_counters = cnts->num_q_counters + 381 cnts->num_cong_counters + 382 cnts->num_ext_ppcnt_counters; 383 384 if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0) 385 ret = mlx5_ib_query_q_counters_vport(dev, port_num - 1, cnts, 386 stats); 387 else 388 ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, 389 cnts->set_id); 390 if (ret) 391 return ret; 392 393 /* We don't expose device counters over Vports */ 394 if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0) 395 goto done; 396 397 if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { 398 ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats); 399 if (ret) 400 return ret; 401 } 402 403 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 404 if (!port_num) 405 port_num = 1; 406 mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL); 407 if (!mdev) { 408 /* If port is not affiliated yet, its in down state 409 * which doesn't have any counters yet, so it would be 410 * zero. So no need to read from the HCA. 411 */ 412 goto done; 413 } 414 ret = mlx5_lag_query_cong_counters(dev->mdev, 415 stats->value + 416 cnts->num_q_counters, 417 cnts->num_cong_counters, 418 cnts->offsets + 419 cnts->num_q_counters); 420 421 mlx5_ib_put_native_port_mdev(dev, port_num); 422 if (ret) 423 return ret; 424 } 425 426 done: 427 return num_counters; 428 } 429 430 static int do_get_op_stat(struct ib_device *ibdev, 431 struct rdma_hw_stats *stats, 432 u32 port_num, int index) 433 { 434 struct mlx5_ib_dev *dev = to_mdev(ibdev); 435 const struct mlx5_ib_counters *cnts; 436 const struct mlx5_ib_op_fc *opfcs; 437 u64 packets = 0, bytes; 438 u32 type; 439 int ret; 440 441 cnts = get_counters(dev, port_num); 442 443 opfcs = cnts->opfcs; 444 type = *(u32 *)cnts->descs[index].priv; 445 if (type >= MLX5_IB_OPCOUNTER_MAX) 446 return -EINVAL; 447 448 if (!opfcs[type].fc) 449 goto out; 450 451 ret = mlx5_fc_query(dev->mdev, opfcs[type].fc, 452 &packets, &bytes); 453 if (ret) 454 return ret; 455 456 out: 457 stats->value[index] = packets; 458 return index; 459 } 460 461 static int do_get_op_stats(struct ib_device *ibdev, 462 struct rdma_hw_stats *stats, 463 u32 port_num) 464 { 465 struct mlx5_ib_dev *dev = to_mdev(ibdev); 466 const struct mlx5_ib_counters *cnts; 467 int index, ret, num_hw_counters; 468 469 cnts = get_counters(dev, port_num); 470 num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + 471 cnts->num_ext_ppcnt_counters; 472 for (index = num_hw_counters; 473 index < (num_hw_counters + cnts->num_op_counters); index++) { 474 ret = do_get_op_stat(ibdev, stats, port_num, index); 475 if (ret != index) 476 return ret; 477 } 478 479 return cnts->num_op_counters; 480 } 481 482 static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, 483 struct rdma_hw_stats *stats, 484 u32 port_num, int index) 485 { 486 int num_counters, num_hw_counters, num_op_counters; 487 struct mlx5_ib_dev *dev = to_mdev(ibdev); 488 const struct mlx5_ib_counters *cnts; 489 490 cnts = get_counters(dev, port_num); 491 num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + 492 cnts->num_ext_ppcnt_counters; 493 num_counters = num_hw_counters + cnts->num_op_counters; 494 495 if (index < 0 || index > num_counters) 496 return -EINVAL; 497 else if (index > 0 && index < num_hw_counters) 498 return do_get_hw_stats(ibdev, stats, port_num, index); 499 else if (index >= num_hw_counters && index < num_counters) 500 return do_get_op_stat(ibdev, stats, port_num, index); 501 502 num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index); 503 if (num_hw_counters < 0) 504 return num_hw_counters; 505 506 num_op_counters = do_get_op_stats(ibdev, stats, port_num); 507 if (num_op_counters < 0) 508 return num_op_counters; 509 510 return num_hw_counters + num_op_counters; 511 } 512 513 static struct rdma_hw_stats * 514 mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) 515 { 516 struct mlx5_ib_dev *dev = to_mdev(counter->device); 517 const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port); 518 519 return do_alloc_stats(cnts); 520 } 521 522 static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) 523 { 524 struct mlx5_ib_dev *dev = to_mdev(counter->device); 525 const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port); 526 527 return mlx5_ib_query_q_counters(dev->mdev, cnts, 528 counter->stats, counter->id); 529 } 530 531 static int mlx5_ib_counter_dealloc(struct rdma_counter *counter) 532 { 533 struct mlx5_ib_dev *dev = to_mdev(counter->device); 534 u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; 535 536 if (!counter->id) 537 return 0; 538 539 MLX5_SET(dealloc_q_counter_in, in, opcode, 540 MLX5_CMD_OP_DEALLOC_Q_COUNTER); 541 MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id); 542 return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); 543 } 544 545 static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, 546 struct ib_qp *qp) 547 { 548 struct mlx5_ib_dev *dev = to_mdev(qp->device); 549 int err; 550 551 if (!counter->id) { 552 u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; 553 u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; 554 555 MLX5_SET(alloc_q_counter_in, in, opcode, 556 MLX5_CMD_OP_ALLOC_Q_COUNTER); 557 MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID); 558 err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); 559 if (err) 560 return err; 561 counter->id = 562 MLX5_GET(alloc_q_counter_out, out, counter_set_id); 563 } 564 565 err = mlx5_ib_qp_set_counter(qp, counter); 566 if (err) 567 goto fail_set_counter; 568 569 return 0; 570 571 fail_set_counter: 572 mlx5_ib_counter_dealloc(counter); 573 counter->id = 0; 574 575 return err; 576 } 577 578 static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) 579 { 580 return mlx5_ib_qp_set_counter(qp, NULL); 581 } 582 583 static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, 584 struct rdma_stat_desc *descs, size_t *offsets, 585 u32 port_num) 586 { 587 bool is_vport = is_mdev_switchdev_mode(dev->mdev) && 588 port_num != MLX5_VPORT_PF; 589 const struct mlx5_ib_counter *names; 590 int j = 0, i, size; 591 592 names = is_vport ? vport_basic_q_cnts : basic_q_cnts; 593 size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) : 594 ARRAY_SIZE(basic_q_cnts); 595 for (i = 0; i < size; i++, j++) { 596 descs[j].name = names[i].name; 597 offsets[j] = names[i].offset; 598 } 599 600 names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts; 601 size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) : 602 ARRAY_SIZE(out_of_seq_q_cnts); 603 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { 604 for (i = 0; i < size; i++, j++) { 605 descs[j].name = names[i].name; 606 offsets[j] = names[i].offset; 607 } 608 } 609 610 names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts; 611 size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) : 612 ARRAY_SIZE(retrans_q_cnts); 613 if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { 614 for (i = 0; i < size; i++, j++) { 615 descs[j].name = names[i].name; 616 offsets[j] = names[i].offset; 617 } 618 } 619 620 names = is_vport ? vport_extended_err_cnts : extended_err_cnts; 621 size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) : 622 ARRAY_SIZE(extended_err_cnts); 623 if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { 624 for (i = 0; i < size; i++, j++) { 625 descs[j].name = names[i].name; 626 offsets[j] = names[i].offset; 627 } 628 } 629 630 names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts; 631 size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) : 632 ARRAY_SIZE(roce_accl_cnts); 633 if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { 634 for (i = 0; i < size; i++, j++) { 635 descs[j].name = names[i].name; 636 offsets[j] = names[i].offset; 637 } 638 } 639 640 if (is_vport) 641 return; 642 643 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 644 for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { 645 descs[j].name = cong_cnts[i].name; 646 offsets[j] = cong_cnts[i].offset; 647 } 648 } 649 650 if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { 651 for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { 652 descs[j].name = ext_ppcnt_cnts[i].name; 653 offsets[j] = ext_ppcnt_cnts[i].offset; 654 } 655 } 656 657 for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) { 658 descs[j].name = basic_op_cnts[i].name; 659 descs[j].flags |= IB_STAT_FLAG_OPTIONAL; 660 descs[j].priv = &basic_op_cnts[i].type; 661 } 662 663 if (MLX5_CAP_FLOWTABLE(dev->mdev, 664 ft_field_support_2_nic_receive_rdma.bth_opcode)) { 665 for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) { 666 descs[j].name = rdmarx_cnp_op_cnts[i].name; 667 descs[j].flags |= IB_STAT_FLAG_OPTIONAL; 668 descs[j].priv = &rdmarx_cnp_op_cnts[i].type; 669 } 670 } 671 672 if (MLX5_CAP_FLOWTABLE(dev->mdev, 673 ft_field_support_2_nic_transmit_rdma.bth_opcode)) { 674 for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) { 675 descs[j].name = rdmatx_cnp_op_cnts[i].name; 676 descs[j].flags |= IB_STAT_FLAG_OPTIONAL; 677 descs[j].priv = &rdmatx_cnp_op_cnts[i].type; 678 } 679 } 680 } 681 682 683 static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, 684 struct mlx5_ib_counters *cnts, u32 port_num) 685 { 686 bool is_vport = is_mdev_switchdev_mode(dev->mdev) && 687 port_num != MLX5_VPORT_PF; 688 u32 num_counters, num_op_counters = 0, size; 689 690 size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) : 691 ARRAY_SIZE(basic_q_cnts); 692 num_counters = size; 693 694 size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) : 695 ARRAY_SIZE(out_of_seq_q_cnts); 696 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) 697 num_counters += size; 698 699 size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) : 700 ARRAY_SIZE(retrans_q_cnts); 701 if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) 702 num_counters += size; 703 704 size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) : 705 ARRAY_SIZE(extended_err_cnts); 706 if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) 707 num_counters += size; 708 709 size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) : 710 ARRAY_SIZE(roce_accl_cnts); 711 if (MLX5_CAP_GEN(dev->mdev, roce_accl)) 712 num_counters += size; 713 714 cnts->num_q_counters = num_counters; 715 716 if (is_vport) 717 goto skip_non_qcounters; 718 719 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 720 cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); 721 num_counters += ARRAY_SIZE(cong_cnts); 722 } 723 if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { 724 cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); 725 num_counters += ARRAY_SIZE(ext_ppcnt_cnts); 726 } 727 728 num_op_counters = ARRAY_SIZE(basic_op_cnts); 729 730 if (MLX5_CAP_FLOWTABLE(dev->mdev, 731 ft_field_support_2_nic_receive_rdma.bth_opcode)) 732 num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts); 733 734 if (MLX5_CAP_FLOWTABLE(dev->mdev, 735 ft_field_support_2_nic_transmit_rdma.bth_opcode)) 736 num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts); 737 738 skip_non_qcounters: 739 cnts->num_op_counters = num_op_counters; 740 num_counters += num_op_counters; 741 cnts->descs = kcalloc(num_counters, 742 sizeof(struct rdma_stat_desc), GFP_KERNEL); 743 if (!cnts->descs) 744 return -ENOMEM; 745 746 cnts->offsets = kcalloc(num_counters, 747 sizeof(*cnts->offsets), GFP_KERNEL); 748 if (!cnts->offsets) 749 goto err; 750 751 return 0; 752 753 err: 754 kfree(cnts->descs); 755 cnts->descs = NULL; 756 return -ENOMEM; 757 } 758 759 static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) 760 { 761 u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; 762 int num_cnt_ports = dev->num_ports; 763 int i, j; 764 765 if (is_mdev_switchdev_mode(dev->mdev)) 766 num_cnt_ports = min(2, num_cnt_ports); 767 768 MLX5_SET(dealloc_q_counter_in, in, opcode, 769 MLX5_CMD_OP_DEALLOC_Q_COUNTER); 770 771 for (i = 0; i < num_cnt_ports; i++) { 772 if (dev->port[i].cnts.set_id) { 773 MLX5_SET(dealloc_q_counter_in, in, counter_set_id, 774 dev->port[i].cnts.set_id); 775 mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); 776 } 777 kfree(dev->port[i].cnts.descs); 778 kfree(dev->port[i].cnts.offsets); 779 780 for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) { 781 if (!dev->port[i].cnts.opfcs[j].fc) 782 continue; 783 784 if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) 785 mlx5_ib_fs_remove_op_fc(dev, 786 &dev->port[i].cnts.opfcs[j], j); 787 mlx5_fc_destroy(dev->mdev, 788 dev->port[i].cnts.opfcs[j].fc); 789 dev->port[i].cnts.opfcs[j].fc = NULL; 790 } 791 } 792 } 793 794 static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) 795 { 796 u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; 797 u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; 798 int num_cnt_ports = dev->num_ports; 799 int err = 0; 800 int i; 801 bool is_shared; 802 803 MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); 804 is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; 805 806 /* 807 * In switchdev we need to allocate two ports, one that is used for 808 * the device Q_counters and it is essentially the real Q_counters of 809 * this device, while the other is used as a helper for PF to be able to 810 * query all other vports. 811 */ 812 if (is_mdev_switchdev_mode(dev->mdev)) 813 num_cnt_ports = min(2, num_cnt_ports); 814 815 for (i = 0; i < num_cnt_ports; i++) { 816 err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i); 817 if (err) 818 goto err_alloc; 819 820 mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs, 821 dev->port[i].cnts.offsets, i); 822 823 MLX5_SET(alloc_q_counter_in, in, uid, 824 is_shared ? MLX5_SHARED_RESOURCE_UID : 0); 825 826 err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); 827 if (err) { 828 mlx5_ib_warn(dev, 829 "couldn't allocate queue counter for port %d, err %d\n", 830 i + 1, err); 831 goto err_alloc; 832 } 833 834 dev->port[i].cnts.set_id = 835 MLX5_GET(alloc_q_counter_out, out, counter_set_id); 836 } 837 return 0; 838 839 err_alloc: 840 mlx5_ib_dealloc_counters(dev); 841 return err; 842 } 843 844 static int read_flow_counters(struct ib_device *ibdev, 845 struct mlx5_read_counters_attr *read_attr) 846 { 847 struct mlx5_fc *fc = read_attr->hw_cntrs_hndl; 848 struct mlx5_ib_dev *dev = to_mdev(ibdev); 849 850 return mlx5_fc_query(dev->mdev, fc, 851 &read_attr->out[IB_COUNTER_PACKETS], 852 &read_attr->out[IB_COUNTER_BYTES]); 853 } 854 855 /* flow counters currently expose two counters packets and bytes */ 856 #define FLOW_COUNTERS_NUM 2 857 static int counters_set_description( 858 struct ib_counters *counters, enum mlx5_ib_counters_type counters_type, 859 struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters) 860 { 861 struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); 862 u32 cntrs_max_index = 0; 863 int i; 864 865 if (counters_type != MLX5_IB_COUNTERS_FLOW) 866 return -EINVAL; 867 868 /* init the fields for the object */ 869 mcounters->type = counters_type; 870 mcounters->read_counters = read_flow_counters; 871 mcounters->counters_num = FLOW_COUNTERS_NUM; 872 mcounters->ncounters = ncounters; 873 /* each counter entry have both description and index pair */ 874 for (i = 0; i < ncounters; i++) { 875 if (desc_data[i].description > IB_COUNTER_BYTES) 876 return -EINVAL; 877 878 if (cntrs_max_index <= desc_data[i].index) 879 cntrs_max_index = desc_data[i].index + 1; 880 } 881 882 mutex_lock(&mcounters->mcntrs_mutex); 883 mcounters->counters_data = desc_data; 884 mcounters->cntrs_max_index = cntrs_max_index; 885 mutex_unlock(&mcounters->mcntrs_mutex); 886 887 return 0; 888 } 889 890 #define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2)) 891 int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters, 892 struct mlx5_ib_create_flow *ucmd) 893 { 894 struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters); 895 struct mlx5_ib_flow_counters_data *cntrs_data = NULL; 896 struct mlx5_ib_flow_counters_desc *desc_data = NULL; 897 bool hw_hndl = false; 898 int ret = 0; 899 900 if (ucmd && ucmd->ncounters_data != 0) { 901 cntrs_data = ucmd->data; 902 if (cntrs_data->ncounters > MAX_COUNTERS_NUM) 903 return -EINVAL; 904 905 desc_data = kcalloc(cntrs_data->ncounters, 906 sizeof(*desc_data), 907 GFP_KERNEL); 908 if (!desc_data) 909 return -ENOMEM; 910 911 if (copy_from_user(desc_data, 912 u64_to_user_ptr(cntrs_data->counters_data), 913 sizeof(*desc_data) * cntrs_data->ncounters)) { 914 ret = -EFAULT; 915 goto free; 916 } 917 } 918 919 if (!mcounters->hw_cntrs_hndl) { 920 mcounters->hw_cntrs_hndl = mlx5_fc_create( 921 to_mdev(ibcounters->device)->mdev, false); 922 if (IS_ERR(mcounters->hw_cntrs_hndl)) { 923 ret = PTR_ERR(mcounters->hw_cntrs_hndl); 924 goto free; 925 } 926 hw_hndl = true; 927 } 928 929 if (desc_data) { 930 /* counters already bound to at least one flow */ 931 if (mcounters->cntrs_max_index) { 932 ret = -EINVAL; 933 goto free_hndl; 934 } 935 936 ret = counters_set_description(ibcounters, 937 MLX5_IB_COUNTERS_FLOW, 938 desc_data, 939 cntrs_data->ncounters); 940 if (ret) 941 goto free_hndl; 942 943 } else if (!mcounters->cntrs_max_index) { 944 /* counters not bound yet, must have udata passed */ 945 ret = -EINVAL; 946 goto free_hndl; 947 } 948 949 return 0; 950 951 free_hndl: 952 if (hw_hndl) { 953 mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev, 954 mcounters->hw_cntrs_hndl); 955 mcounters->hw_cntrs_hndl = NULL; 956 } 957 free: 958 kfree(desc_data); 959 return ret; 960 } 961 962 void mlx5_ib_counters_clear_description(struct ib_counters *counters) 963 { 964 struct mlx5_ib_mcounters *mcounters; 965 966 if (!counters || atomic_read(&counters->usecnt) != 1) 967 return; 968 969 mcounters = to_mcounters(counters); 970 971 mutex_lock(&mcounters->mcntrs_mutex); 972 kfree(mcounters->counters_data); 973 mcounters->counters_data = NULL; 974 mcounters->cntrs_max_index = 0; 975 mutex_unlock(&mcounters->mcntrs_mutex); 976 } 977 978 static int mlx5_ib_modify_stat(struct ib_device *device, u32 port, 979 unsigned int index, bool enable) 980 { 981 struct mlx5_ib_dev *dev = to_mdev(device); 982 struct mlx5_ib_counters *cnts; 983 struct mlx5_ib_op_fc *opfc; 984 u32 num_hw_counters, type; 985 int ret; 986 987 cnts = &dev->port[port - 1].cnts; 988 num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + 989 cnts->num_ext_ppcnt_counters; 990 if (index < num_hw_counters || 991 index >= (num_hw_counters + cnts->num_op_counters)) 992 return -EINVAL; 993 994 if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) 995 return -EINVAL; 996 997 type = *(u32 *)cnts->descs[index].priv; 998 if (type >= MLX5_IB_OPCOUNTER_MAX) 999 return -EINVAL; 1000 1001 opfc = &cnts->opfcs[type]; 1002 1003 if (enable) { 1004 if (opfc->fc) 1005 return -EEXIST; 1006 1007 opfc->fc = mlx5_fc_create(dev->mdev, false); 1008 if (IS_ERR(opfc->fc)) 1009 return PTR_ERR(opfc->fc); 1010 1011 ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type); 1012 if (ret) { 1013 mlx5_fc_destroy(dev->mdev, opfc->fc); 1014 opfc->fc = NULL; 1015 } 1016 return ret; 1017 } 1018 1019 if (!opfc->fc) 1020 return -EINVAL; 1021 1022 mlx5_ib_fs_remove_op_fc(dev, opfc, type); 1023 mlx5_fc_destroy(dev->mdev, opfc->fc); 1024 opfc->fc = NULL; 1025 return 0; 1026 } 1027 1028 static const struct ib_device_ops hw_stats_ops = { 1029 .alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats, 1030 .get_hw_stats = mlx5_ib_get_hw_stats, 1031 .counter_bind_qp = mlx5_ib_counter_bind_qp, 1032 .counter_unbind_qp = mlx5_ib_counter_unbind_qp, 1033 .counter_dealloc = mlx5_ib_counter_dealloc, 1034 .counter_alloc_stats = mlx5_ib_counter_alloc_stats, 1035 .counter_update_stats = mlx5_ib_counter_update_stats, 1036 .modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ? 1037 mlx5_ib_modify_stat : NULL, 1038 }; 1039 1040 static const struct ib_device_ops hw_switchdev_vport_op = { 1041 .alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats, 1042 }; 1043 1044 static const struct ib_device_ops hw_switchdev_stats_ops = { 1045 .alloc_hw_device_stats = mlx5_ib_alloc_hw_device_stats, 1046 .get_hw_stats = mlx5_ib_get_hw_stats, 1047 .counter_bind_qp = mlx5_ib_counter_bind_qp, 1048 .counter_unbind_qp = mlx5_ib_counter_unbind_qp, 1049 .counter_dealloc = mlx5_ib_counter_dealloc, 1050 .counter_alloc_stats = mlx5_ib_counter_alloc_stats, 1051 .counter_update_stats = mlx5_ib_counter_update_stats, 1052 }; 1053 1054 static const struct ib_device_ops counters_ops = { 1055 .create_counters = mlx5_ib_create_counters, 1056 .destroy_counters = mlx5_ib_destroy_counters, 1057 .read_counters = mlx5_ib_read_counters, 1058 1059 INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs), 1060 }; 1061 1062 int mlx5_ib_counters_init(struct mlx5_ib_dev *dev) 1063 { 1064 ib_set_device_ops(&dev->ib_dev, &counters_ops); 1065 1066 if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) 1067 return 0; 1068 1069 if (is_mdev_switchdev_mode(dev->mdev)) { 1070 ib_set_device_ops(&dev->ib_dev, &hw_switchdev_stats_ops); 1071 if (vport_qcounters_supported(dev)) 1072 ib_set_device_ops(&dev->ib_dev, &hw_switchdev_vport_op); 1073 } else 1074 ib_set_device_ops(&dev->ib_dev, &hw_stats_ops); 1075 return mlx5_ib_alloc_counters(dev); 1076 } 1077 1078 void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev) 1079 { 1080 if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) 1081 return; 1082 1083 mlx5_ib_dealloc_counters(dev); 1084 } 1085