1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. 4 */ 5 6 #include "mlx5_ib.h" 7 #include <linux/mlx5/eswitch.h> 8 #include <linux/mlx5/vport.h> 9 #include "counters.h" 10 #include "ib_rep.h" 11 #include "qp.h" 12 13 struct mlx5_ib_counter { 14 const char *name; 15 size_t offset; 16 u32 type; 17 }; 18 19 #define INIT_Q_COUNTER(_name) \ 20 { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)} 21 22 #define INIT_VPORT_Q_COUNTER(_name) \ 23 { .name = "vport_" #_name, .offset = \ 24 MLX5_BYTE_OFF(query_q_counter_out, _name)} 25 26 static const struct mlx5_ib_counter basic_q_cnts[] = { 27 INIT_Q_COUNTER(rx_write_requests), 28 INIT_Q_COUNTER(rx_read_requests), 29 INIT_Q_COUNTER(rx_atomic_requests), 30 INIT_Q_COUNTER(rx_dct_connect), 31 INIT_Q_COUNTER(out_of_buffer), 32 }; 33 34 static const struct mlx5_ib_counter out_of_seq_q_cnts[] = { 35 INIT_Q_COUNTER(out_of_sequence), 36 }; 37 38 static const struct mlx5_ib_counter retrans_q_cnts[] = { 39 INIT_Q_COUNTER(duplicate_request), 40 INIT_Q_COUNTER(rnr_nak_retry_err), 41 INIT_Q_COUNTER(packet_seq_err), 42 INIT_Q_COUNTER(implied_nak_seq_err), 43 INIT_Q_COUNTER(local_ack_timeout_err), 44 }; 45 46 static const struct mlx5_ib_counter vport_basic_q_cnts[] = { 47 INIT_VPORT_Q_COUNTER(rx_write_requests), 48 INIT_VPORT_Q_COUNTER(rx_read_requests), 49 INIT_VPORT_Q_COUNTER(rx_atomic_requests), 50 INIT_VPORT_Q_COUNTER(rx_dct_connect), 51 INIT_VPORT_Q_COUNTER(out_of_buffer), 52 }; 53 54 static const struct mlx5_ib_counter vport_out_of_seq_q_cnts[] = { 55 INIT_VPORT_Q_COUNTER(out_of_sequence), 56 }; 57 58 static const struct mlx5_ib_counter vport_retrans_q_cnts[] = { 59 INIT_VPORT_Q_COUNTER(duplicate_request), 60 INIT_VPORT_Q_COUNTER(rnr_nak_retry_err), 61 INIT_VPORT_Q_COUNTER(packet_seq_err), 62 INIT_VPORT_Q_COUNTER(implied_nak_seq_err), 63 INIT_VPORT_Q_COUNTER(local_ack_timeout_err), 64 }; 65 66 #define INIT_CONG_COUNTER(_name) \ 67 { .name = #_name, .offset = \ 68 MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)} 69 70 static const struct mlx5_ib_counter cong_cnts[] = { 71 INIT_CONG_COUNTER(rp_cnp_ignored), 72 INIT_CONG_COUNTER(rp_cnp_handled), 73 INIT_CONG_COUNTER(np_ecn_marked_roce_packets), 74 INIT_CONG_COUNTER(np_cnp_sent), 75 }; 76 77 static const struct mlx5_ib_counter extended_err_cnts[] = { 78 INIT_Q_COUNTER(resp_local_length_error), 79 INIT_Q_COUNTER(resp_cqe_error), 80 INIT_Q_COUNTER(req_cqe_error), 81 INIT_Q_COUNTER(req_remote_invalid_request), 82 INIT_Q_COUNTER(req_remote_access_errors), 83 INIT_Q_COUNTER(resp_remote_access_errors), 84 INIT_Q_COUNTER(resp_cqe_flush_error), 85 INIT_Q_COUNTER(req_cqe_flush_error), 86 INIT_Q_COUNTER(req_transport_retries_exceeded), 87 INIT_Q_COUNTER(req_rnr_retries_exceeded), 88 }; 89 90 static const struct mlx5_ib_counter roce_accl_cnts[] = { 91 INIT_Q_COUNTER(roce_adp_retrans), 92 INIT_Q_COUNTER(roce_adp_retrans_to), 93 INIT_Q_COUNTER(roce_slow_restart), 94 INIT_Q_COUNTER(roce_slow_restart_cnps), 95 INIT_Q_COUNTER(roce_slow_restart_trans), 96 }; 97 98 static const struct mlx5_ib_counter vport_extended_err_cnts[] = { 99 INIT_VPORT_Q_COUNTER(resp_local_length_error), 100 INIT_VPORT_Q_COUNTER(resp_cqe_error), 101 INIT_VPORT_Q_COUNTER(req_cqe_error), 102 INIT_VPORT_Q_COUNTER(req_remote_invalid_request), 103 INIT_VPORT_Q_COUNTER(req_remote_access_errors), 104 INIT_VPORT_Q_COUNTER(resp_remote_access_errors), 105 INIT_VPORT_Q_COUNTER(resp_cqe_flush_error), 106 INIT_VPORT_Q_COUNTER(req_cqe_flush_error), 107 INIT_VPORT_Q_COUNTER(req_transport_retries_exceeded), 108 INIT_VPORT_Q_COUNTER(req_rnr_retries_exceeded), 109 }; 110 111 static const struct mlx5_ib_counter vport_roce_accl_cnts[] = { 112 INIT_VPORT_Q_COUNTER(roce_adp_retrans), 113 INIT_VPORT_Q_COUNTER(roce_adp_retrans_to), 114 INIT_VPORT_Q_COUNTER(roce_slow_restart), 115 INIT_VPORT_Q_COUNTER(roce_slow_restart_cnps), 116 INIT_VPORT_Q_COUNTER(roce_slow_restart_trans), 117 }; 118 119 #define INIT_EXT_PPCNT_COUNTER(_name) \ 120 { .name = #_name, .offset = \ 121 MLX5_BYTE_OFF(ppcnt_reg, \ 122 counter_set.eth_extended_cntrs_grp_data_layout._name##_high)} 123 124 static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { 125 INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), 126 }; 127 128 #define INIT_OP_COUNTER(_name, _type) \ 129 { .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type} 130 131 static const struct mlx5_ib_counter basic_op_cnts[] = { 132 INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS), 133 }; 134 135 static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = { 136 INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS), 137 }; 138 139 static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = { 140 INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS), 141 }; 142 143 static int mlx5_ib_read_counters(struct ib_counters *counters, 144 struct ib_counters_read_attr *read_attr, 145 struct uverbs_attr_bundle *attrs) 146 { 147 struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); 148 struct mlx5_read_counters_attr mread_attr = {}; 149 struct mlx5_ib_flow_counters_desc *desc; 150 int ret, i; 151 152 mutex_lock(&mcounters->mcntrs_mutex); 153 if (mcounters->cntrs_max_index > read_attr->ncounters) { 154 ret = -EINVAL; 155 goto err_bound; 156 } 157 158 mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64), 159 GFP_KERNEL); 160 if (!mread_attr.out) { 161 ret = -ENOMEM; 162 goto err_bound; 163 } 164 165 mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl; 166 mread_attr.flags = read_attr->flags; 167 ret = mcounters->read_counters(counters->device, &mread_attr); 168 if (ret) 169 goto err_read; 170 171 /* do the pass over the counters data array to assign according to the 172 * descriptions and indexing pairs 173 */ 174 desc = mcounters->counters_data; 175 for (i = 0; i < mcounters->ncounters; i++) 176 read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description]; 177 178 err_read: 179 kfree(mread_attr.out); 180 err_bound: 181 mutex_unlock(&mcounters->mcntrs_mutex); 182 return ret; 183 } 184 185 static int mlx5_ib_destroy_counters(struct ib_counters *counters) 186 { 187 struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); 188 189 mlx5_ib_counters_clear_description(counters); 190 if (mcounters->hw_cntrs_hndl) 191 mlx5_fc_destroy(to_mdev(counters->device)->mdev, 192 mcounters->hw_cntrs_hndl); 193 return 0; 194 } 195 196 static int mlx5_ib_create_counters(struct ib_counters *counters, 197 struct uverbs_attr_bundle *attrs) 198 { 199 struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); 200 201 mutex_init(&mcounters->mcntrs_mutex); 202 return 0; 203 } 204 205 static bool vport_qcounters_supported(struct mlx5_ib_dev *dev) 206 { 207 return MLX5_CAP_GEN(dev->mdev, q_counter_other_vport) && 208 MLX5_CAP_GEN(dev->mdev, q_counter_aggregation); 209 } 210 211 static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, 212 u32 port_num) 213 { 214 if ((is_mdev_switchdev_mode(dev->mdev) && 215 !vport_qcounters_supported(dev)) || !port_num) 216 return &dev->port[0].cnts; 217 218 return is_mdev_switchdev_mode(dev->mdev) ? 219 &dev->port[1].cnts : &dev->port[port_num - 1].cnts; 220 } 221 222 /** 223 * mlx5_ib_get_counters_id - Returns counters id to use for device+port 224 * @dev: Pointer to mlx5 IB device 225 * @port_num: Zero based port number 226 * 227 * mlx5_ib_get_counters_id() Returns counters set id to use for given 228 * device port combination in switchdev and non switchdev mode of the 229 * parent device. 230 */ 231 u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num) 232 { 233 const struct mlx5_ib_counters *cnts = get_counters(dev, port_num + 1); 234 235 return cnts->set_id; 236 } 237 238 static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts) 239 { 240 struct rdma_hw_stats *stats; 241 u32 num_hw_counters; 242 int i; 243 244 num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + 245 cnts->num_ext_ppcnt_counters; 246 stats = rdma_alloc_hw_stats_struct(cnts->descs, 247 num_hw_counters + 248 cnts->num_op_counters, 249 RDMA_HW_STATS_DEFAULT_LIFESPAN); 250 if (!stats) 251 return NULL; 252 253 for (i = 0; i < cnts->num_op_counters; i++) 254 set_bit(num_hw_counters + i, stats->is_disabled); 255 256 return stats; 257 } 258 259 static struct rdma_hw_stats * 260 mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev) 261 { 262 struct mlx5_ib_dev *dev = to_mdev(ibdev); 263 const struct mlx5_ib_counters *cnts = &dev->port[0].cnts; 264 265 return do_alloc_stats(cnts); 266 } 267 268 static struct rdma_hw_stats * 269 mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num) 270 { 271 struct mlx5_ib_dev *dev = to_mdev(ibdev); 272 const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); 273 274 return do_alloc_stats(cnts); 275 } 276 277 static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, 278 const struct mlx5_ib_counters *cnts, 279 struct rdma_hw_stats *stats, 280 u16 set_id) 281 { 282 u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; 283 u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; 284 __be32 val; 285 int ret, i; 286 287 MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); 288 MLX5_SET(query_q_counter_in, in, counter_set_id, set_id); 289 ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out); 290 if (ret) 291 return ret; 292 293 for (i = 0; i < cnts->num_q_counters; i++) { 294 val = *(__be32 *)((void *)out + cnts->offsets[i]); 295 stats->value[i] = (u64)be32_to_cpu(val); 296 } 297 298 return 0; 299 } 300 301 static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, 302 const struct mlx5_ib_counters *cnts, 303 struct rdma_hw_stats *stats) 304 { 305 int offset = cnts->num_q_counters + cnts->num_cong_counters; 306 u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; 307 int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); 308 int ret, i; 309 void *out; 310 311 out = kvzalloc(sz, GFP_KERNEL); 312 if (!out) 313 return -ENOMEM; 314 315 MLX5_SET(ppcnt_reg, in, local_port, 1); 316 MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); 317 ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT, 318 0, 0); 319 if (ret) 320 goto free; 321 322 for (i = 0; i < cnts->num_ext_ppcnt_counters; i++) 323 stats->value[i + offset] = 324 be64_to_cpup((__be64 *)(out + 325 cnts->offsets[i + offset])); 326 free: 327 kvfree(out); 328 return ret; 329 } 330 331 static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev, 332 u32 port_num, 333 const struct mlx5_ib_counters *cnts, 334 struct rdma_hw_stats *stats) 335 336 { 337 u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; 338 u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; 339 struct mlx5_core_dev *mdev; 340 __be32 val; 341 int ret, i; 342 343 if (!dev->port[port_num].rep || 344 dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK) 345 return 0; 346 347 mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw); 348 if (!mdev) 349 return -EOPNOTSUPP; 350 351 MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); 352 MLX5_SET(query_q_counter_in, in, other_vport, 1); 353 MLX5_SET(query_q_counter_in, in, vport_number, 354 dev->port[port_num].rep->vport); 355 MLX5_SET(query_q_counter_in, in, aggregate, 1); 356 ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out); 357 if (ret) 358 return ret; 359 360 for (i = 0; i < cnts->num_q_counters; i++) { 361 val = *(__be32 *)((void *)out + cnts->offsets[i]); 362 stats->value[i] = (u64)be32_to_cpu(val); 363 } 364 365 return 0; 366 } 367 368 static int do_get_hw_stats(struct ib_device *ibdev, 369 struct rdma_hw_stats *stats, 370 u32 port_num, int index) 371 { 372 struct mlx5_ib_dev *dev = to_mdev(ibdev); 373 const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); 374 struct mlx5_core_dev *mdev; 375 int ret, num_counters; 376 377 if (!stats) 378 return -EINVAL; 379 380 num_counters = cnts->num_q_counters + 381 cnts->num_cong_counters + 382 cnts->num_ext_ppcnt_counters; 383 384 if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0) 385 ret = mlx5_ib_query_q_counters_vport(dev, port_num - 1, cnts, 386 stats); 387 else 388 ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, 389 cnts->set_id); 390 if (ret) 391 return ret; 392 393 /* We don't expose device counters over Vports */ 394 if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0) 395 goto done; 396 397 if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { 398 ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats); 399 if (ret) 400 return ret; 401 } 402 403 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 404 if (!port_num) 405 port_num = 1; 406 mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL); 407 if (!mdev) { 408 /* If port is not affiliated yet, its in down state 409 * which doesn't have any counters yet, so it would be 410 * zero. So no need to read from the HCA. 411 */ 412 goto done; 413 } 414 ret = mlx5_lag_query_cong_counters(dev->mdev, 415 stats->value + 416 cnts->num_q_counters, 417 cnts->num_cong_counters, 418 cnts->offsets + 419 cnts->num_q_counters); 420 421 mlx5_ib_put_native_port_mdev(dev, port_num); 422 if (ret) 423 return ret; 424 } 425 426 done: 427 return num_counters; 428 } 429 430 static int do_get_op_stat(struct ib_device *ibdev, 431 struct rdma_hw_stats *stats, 432 u32 port_num, int index) 433 { 434 struct mlx5_ib_dev *dev = to_mdev(ibdev); 435 const struct mlx5_ib_counters *cnts; 436 const struct mlx5_ib_op_fc *opfcs; 437 u64 packets = 0, bytes; 438 u32 type; 439 int ret; 440 441 cnts = get_counters(dev, port_num); 442 443 opfcs = cnts->opfcs; 444 type = *(u32 *)cnts->descs[index].priv; 445 if (type >= MLX5_IB_OPCOUNTER_MAX) 446 return -EINVAL; 447 448 if (!opfcs[type].fc) 449 goto out; 450 451 ret = mlx5_fc_query(dev->mdev, opfcs[type].fc, 452 &packets, &bytes); 453 if (ret) 454 return ret; 455 456 out: 457 stats->value[index] = packets; 458 return index; 459 } 460 461 static int do_get_op_stats(struct ib_device *ibdev, 462 struct rdma_hw_stats *stats, 463 u32 port_num) 464 { 465 struct mlx5_ib_dev *dev = to_mdev(ibdev); 466 const struct mlx5_ib_counters *cnts; 467 int index, ret, num_hw_counters; 468 469 cnts = get_counters(dev, port_num); 470 num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + 471 cnts->num_ext_ppcnt_counters; 472 for (index = num_hw_counters; 473 index < (num_hw_counters + cnts->num_op_counters); index++) { 474 ret = do_get_op_stat(ibdev, stats, port_num, index); 475 if (ret != index) 476 return ret; 477 } 478 479 return cnts->num_op_counters; 480 } 481 482 static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, 483 struct rdma_hw_stats *stats, 484 u32 port_num, int index) 485 { 486 int num_counters, num_hw_counters, num_op_counters; 487 struct mlx5_ib_dev *dev = to_mdev(ibdev); 488 const struct mlx5_ib_counters *cnts; 489 490 cnts = get_counters(dev, port_num); 491 num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + 492 cnts->num_ext_ppcnt_counters; 493 num_counters = num_hw_counters + cnts->num_op_counters; 494 495 if (index < 0 || index > num_counters) 496 return -EINVAL; 497 else if (index > 0 && index < num_hw_counters) 498 return do_get_hw_stats(ibdev, stats, port_num, index); 499 else if (index >= num_hw_counters && index < num_counters) 500 return do_get_op_stat(ibdev, stats, port_num, index); 501 502 num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index); 503 if (num_hw_counters < 0) 504 return num_hw_counters; 505 506 num_op_counters = do_get_op_stats(ibdev, stats, port_num); 507 if (num_op_counters < 0) 508 return num_op_counters; 509 510 return num_hw_counters + num_op_counters; 511 } 512 513 static struct rdma_hw_stats * 514 mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) 515 { 516 struct mlx5_ib_dev *dev = to_mdev(counter->device); 517 const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port); 518 519 return do_alloc_stats(cnts); 520 } 521 522 static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) 523 { 524 struct mlx5_ib_dev *dev = to_mdev(counter->device); 525 const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port); 526 527 return mlx5_ib_query_q_counters(dev->mdev, cnts, 528 counter->stats, counter->id); 529 } 530 531 static int mlx5_ib_counter_dealloc(struct rdma_counter *counter) 532 { 533 struct mlx5_ib_dev *dev = to_mdev(counter->device); 534 u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; 535 536 if (!counter->id) 537 return 0; 538 539 MLX5_SET(dealloc_q_counter_in, in, opcode, 540 MLX5_CMD_OP_DEALLOC_Q_COUNTER); 541 MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id); 542 return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); 543 } 544 545 static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, 546 struct ib_qp *qp) 547 { 548 struct mlx5_ib_dev *dev = to_mdev(qp->device); 549 bool new = false; 550 int err; 551 552 if (!counter->id) { 553 u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; 554 u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; 555 556 MLX5_SET(alloc_q_counter_in, in, opcode, 557 MLX5_CMD_OP_ALLOC_Q_COUNTER); 558 MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID); 559 err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); 560 if (err) 561 return err; 562 counter->id = 563 MLX5_GET(alloc_q_counter_out, out, counter_set_id); 564 new = true; 565 } 566 567 err = mlx5_ib_qp_set_counter(qp, counter); 568 if (err) 569 goto fail_set_counter; 570 571 return 0; 572 573 fail_set_counter: 574 if (new) { 575 mlx5_ib_counter_dealloc(counter); 576 counter->id = 0; 577 } 578 579 return err; 580 } 581 582 static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) 583 { 584 return mlx5_ib_qp_set_counter(qp, NULL); 585 } 586 587 static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, 588 struct rdma_stat_desc *descs, size_t *offsets, 589 u32 port_num) 590 { 591 bool is_vport = is_mdev_switchdev_mode(dev->mdev) && 592 port_num != MLX5_VPORT_PF; 593 const struct mlx5_ib_counter *names; 594 int j = 0, i, size; 595 596 names = is_vport ? vport_basic_q_cnts : basic_q_cnts; 597 size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) : 598 ARRAY_SIZE(basic_q_cnts); 599 for (i = 0; i < size; i++, j++) { 600 descs[j].name = names[i].name; 601 offsets[j] = names[i].offset; 602 } 603 604 names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts; 605 size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) : 606 ARRAY_SIZE(out_of_seq_q_cnts); 607 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { 608 for (i = 0; i < size; i++, j++) { 609 descs[j].name = names[i].name; 610 offsets[j] = names[i].offset; 611 } 612 } 613 614 names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts; 615 size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) : 616 ARRAY_SIZE(retrans_q_cnts); 617 if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { 618 for (i = 0; i < size; i++, j++) { 619 descs[j].name = names[i].name; 620 offsets[j] = names[i].offset; 621 } 622 } 623 624 names = is_vport ? vport_extended_err_cnts : extended_err_cnts; 625 size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) : 626 ARRAY_SIZE(extended_err_cnts); 627 if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { 628 for (i = 0; i < size; i++, j++) { 629 descs[j].name = names[i].name; 630 offsets[j] = names[i].offset; 631 } 632 } 633 634 names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts; 635 size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) : 636 ARRAY_SIZE(roce_accl_cnts); 637 if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { 638 for (i = 0; i < size; i++, j++) { 639 descs[j].name = names[i].name; 640 offsets[j] = names[i].offset; 641 } 642 } 643 644 if (is_vport) 645 return; 646 647 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 648 for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { 649 descs[j].name = cong_cnts[i].name; 650 offsets[j] = cong_cnts[i].offset; 651 } 652 } 653 654 if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { 655 for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { 656 descs[j].name = ext_ppcnt_cnts[i].name; 657 offsets[j] = ext_ppcnt_cnts[i].offset; 658 } 659 } 660 661 for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) { 662 descs[j].name = basic_op_cnts[i].name; 663 descs[j].flags |= IB_STAT_FLAG_OPTIONAL; 664 descs[j].priv = &basic_op_cnts[i].type; 665 } 666 667 if (MLX5_CAP_FLOWTABLE(dev->mdev, 668 ft_field_support_2_nic_receive_rdma.bth_opcode)) { 669 for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) { 670 descs[j].name = rdmarx_cnp_op_cnts[i].name; 671 descs[j].flags |= IB_STAT_FLAG_OPTIONAL; 672 descs[j].priv = &rdmarx_cnp_op_cnts[i].type; 673 } 674 } 675 676 if (MLX5_CAP_FLOWTABLE(dev->mdev, 677 ft_field_support_2_nic_transmit_rdma.bth_opcode)) { 678 for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) { 679 descs[j].name = rdmatx_cnp_op_cnts[i].name; 680 descs[j].flags |= IB_STAT_FLAG_OPTIONAL; 681 descs[j].priv = &rdmatx_cnp_op_cnts[i].type; 682 } 683 } 684 } 685 686 687 static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, 688 struct mlx5_ib_counters *cnts, u32 port_num) 689 { 690 bool is_vport = is_mdev_switchdev_mode(dev->mdev) && 691 port_num != MLX5_VPORT_PF; 692 u32 num_counters, num_op_counters = 0, size; 693 694 size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) : 695 ARRAY_SIZE(basic_q_cnts); 696 num_counters = size; 697 698 size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) : 699 ARRAY_SIZE(out_of_seq_q_cnts); 700 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) 701 num_counters += size; 702 703 size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) : 704 ARRAY_SIZE(retrans_q_cnts); 705 if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) 706 num_counters += size; 707 708 size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) : 709 ARRAY_SIZE(extended_err_cnts); 710 if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) 711 num_counters += size; 712 713 size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) : 714 ARRAY_SIZE(roce_accl_cnts); 715 if (MLX5_CAP_GEN(dev->mdev, roce_accl)) 716 num_counters += size; 717 718 cnts->num_q_counters = num_counters; 719 720 if (is_vport) 721 goto skip_non_qcounters; 722 723 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 724 cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); 725 num_counters += ARRAY_SIZE(cong_cnts); 726 } 727 if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { 728 cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); 729 num_counters += ARRAY_SIZE(ext_ppcnt_cnts); 730 } 731 732 num_op_counters = ARRAY_SIZE(basic_op_cnts); 733 734 if (MLX5_CAP_FLOWTABLE(dev->mdev, 735 ft_field_support_2_nic_receive_rdma.bth_opcode)) 736 num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts); 737 738 if (MLX5_CAP_FLOWTABLE(dev->mdev, 739 ft_field_support_2_nic_transmit_rdma.bth_opcode)) 740 num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts); 741 742 skip_non_qcounters: 743 cnts->num_op_counters = num_op_counters; 744 num_counters += num_op_counters; 745 cnts->descs = kcalloc(num_counters, 746 sizeof(struct rdma_stat_desc), GFP_KERNEL); 747 if (!cnts->descs) 748 return -ENOMEM; 749 750 cnts->offsets = kcalloc(num_counters, 751 sizeof(*cnts->offsets), GFP_KERNEL); 752 if (!cnts->offsets) 753 goto err; 754 755 return 0; 756 757 err: 758 kfree(cnts->descs); 759 cnts->descs = NULL; 760 return -ENOMEM; 761 } 762 763 static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) 764 { 765 u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; 766 int num_cnt_ports = dev->num_ports; 767 int i, j; 768 769 if (is_mdev_switchdev_mode(dev->mdev)) 770 num_cnt_ports = min(2, num_cnt_ports); 771 772 MLX5_SET(dealloc_q_counter_in, in, opcode, 773 MLX5_CMD_OP_DEALLOC_Q_COUNTER); 774 775 for (i = 0; i < num_cnt_ports; i++) { 776 if (dev->port[i].cnts.set_id) { 777 MLX5_SET(dealloc_q_counter_in, in, counter_set_id, 778 dev->port[i].cnts.set_id); 779 mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); 780 } 781 kfree(dev->port[i].cnts.descs); 782 kfree(dev->port[i].cnts.offsets); 783 784 for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) { 785 if (!dev->port[i].cnts.opfcs[j].fc) 786 continue; 787 788 if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) 789 mlx5_ib_fs_remove_op_fc(dev, 790 &dev->port[i].cnts.opfcs[j], j); 791 mlx5_fc_destroy(dev->mdev, 792 dev->port[i].cnts.opfcs[j].fc); 793 dev->port[i].cnts.opfcs[j].fc = NULL; 794 } 795 } 796 } 797 798 static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) 799 { 800 u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; 801 u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; 802 int num_cnt_ports = dev->num_ports; 803 int err = 0; 804 int i; 805 bool is_shared; 806 807 MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); 808 is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; 809 810 /* 811 * In switchdev we need to allocate two ports, one that is used for 812 * the device Q_counters and it is essentially the real Q_counters of 813 * this device, while the other is used as a helper for PF to be able to 814 * query all other vports. 815 */ 816 if (is_mdev_switchdev_mode(dev->mdev)) 817 num_cnt_ports = min(2, num_cnt_ports); 818 819 for (i = 0; i < num_cnt_ports; i++) { 820 err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i); 821 if (err) 822 goto err_alloc; 823 824 mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs, 825 dev->port[i].cnts.offsets, i); 826 827 MLX5_SET(alloc_q_counter_in, in, uid, 828 is_shared ? MLX5_SHARED_RESOURCE_UID : 0); 829 830 err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); 831 if (err) { 832 mlx5_ib_warn(dev, 833 "couldn't allocate queue counter for port %d, err %d\n", 834 i + 1, err); 835 goto err_alloc; 836 } 837 838 dev->port[i].cnts.set_id = 839 MLX5_GET(alloc_q_counter_out, out, counter_set_id); 840 } 841 return 0; 842 843 err_alloc: 844 mlx5_ib_dealloc_counters(dev); 845 return err; 846 } 847 848 static int read_flow_counters(struct ib_device *ibdev, 849 struct mlx5_read_counters_attr *read_attr) 850 { 851 struct mlx5_fc *fc = read_attr->hw_cntrs_hndl; 852 struct mlx5_ib_dev *dev = to_mdev(ibdev); 853 854 return mlx5_fc_query(dev->mdev, fc, 855 &read_attr->out[IB_COUNTER_PACKETS], 856 &read_attr->out[IB_COUNTER_BYTES]); 857 } 858 859 /* flow counters currently expose two counters packets and bytes */ 860 #define FLOW_COUNTERS_NUM 2 861 static int counters_set_description( 862 struct ib_counters *counters, enum mlx5_ib_counters_type counters_type, 863 struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters) 864 { 865 struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); 866 u32 cntrs_max_index = 0; 867 int i; 868 869 if (counters_type != MLX5_IB_COUNTERS_FLOW) 870 return -EINVAL; 871 872 /* init the fields for the object */ 873 mcounters->type = counters_type; 874 mcounters->read_counters = read_flow_counters; 875 mcounters->counters_num = FLOW_COUNTERS_NUM; 876 mcounters->ncounters = ncounters; 877 /* each counter entry have both description and index pair */ 878 for (i = 0; i < ncounters; i++) { 879 if (desc_data[i].description > IB_COUNTER_BYTES) 880 return -EINVAL; 881 882 if (cntrs_max_index <= desc_data[i].index) 883 cntrs_max_index = desc_data[i].index + 1; 884 } 885 886 mutex_lock(&mcounters->mcntrs_mutex); 887 mcounters->counters_data = desc_data; 888 mcounters->cntrs_max_index = cntrs_max_index; 889 mutex_unlock(&mcounters->mcntrs_mutex); 890 891 return 0; 892 } 893 894 #define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2)) 895 int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters, 896 struct mlx5_ib_create_flow *ucmd) 897 { 898 struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters); 899 struct mlx5_ib_flow_counters_data *cntrs_data = NULL; 900 struct mlx5_ib_flow_counters_desc *desc_data = NULL; 901 bool hw_hndl = false; 902 int ret = 0; 903 904 if (ucmd && ucmd->ncounters_data != 0) { 905 cntrs_data = ucmd->data; 906 if (cntrs_data->ncounters > MAX_COUNTERS_NUM) 907 return -EINVAL; 908 909 desc_data = kcalloc(cntrs_data->ncounters, 910 sizeof(*desc_data), 911 GFP_KERNEL); 912 if (!desc_data) 913 return -ENOMEM; 914 915 if (copy_from_user(desc_data, 916 u64_to_user_ptr(cntrs_data->counters_data), 917 sizeof(*desc_data) * cntrs_data->ncounters)) { 918 ret = -EFAULT; 919 goto free; 920 } 921 } 922 923 if (!mcounters->hw_cntrs_hndl) { 924 mcounters->hw_cntrs_hndl = mlx5_fc_create( 925 to_mdev(ibcounters->device)->mdev, false); 926 if (IS_ERR(mcounters->hw_cntrs_hndl)) { 927 ret = PTR_ERR(mcounters->hw_cntrs_hndl); 928 goto free; 929 } 930 hw_hndl = true; 931 } 932 933 if (desc_data) { 934 /* counters already bound to at least one flow */ 935 if (mcounters->cntrs_max_index) { 936 ret = -EINVAL; 937 goto free_hndl; 938 } 939 940 ret = counters_set_description(ibcounters, 941 MLX5_IB_COUNTERS_FLOW, 942 desc_data, 943 cntrs_data->ncounters); 944 if (ret) 945 goto free_hndl; 946 947 } else if (!mcounters->cntrs_max_index) { 948 /* counters not bound yet, must have udata passed */ 949 ret = -EINVAL; 950 goto free_hndl; 951 } 952 953 return 0; 954 955 free_hndl: 956 if (hw_hndl) { 957 mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev, 958 mcounters->hw_cntrs_hndl); 959 mcounters->hw_cntrs_hndl = NULL; 960 } 961 free: 962 kfree(desc_data); 963 return ret; 964 } 965 966 void mlx5_ib_counters_clear_description(struct ib_counters *counters) 967 { 968 struct mlx5_ib_mcounters *mcounters; 969 970 if (!counters || atomic_read(&counters->usecnt) != 1) 971 return; 972 973 mcounters = to_mcounters(counters); 974 975 mutex_lock(&mcounters->mcntrs_mutex); 976 kfree(mcounters->counters_data); 977 mcounters->counters_data = NULL; 978 mcounters->cntrs_max_index = 0; 979 mutex_unlock(&mcounters->mcntrs_mutex); 980 } 981 982 static int mlx5_ib_modify_stat(struct ib_device *device, u32 port, 983 unsigned int index, bool enable) 984 { 985 struct mlx5_ib_dev *dev = to_mdev(device); 986 struct mlx5_ib_counters *cnts; 987 struct mlx5_ib_op_fc *opfc; 988 u32 num_hw_counters, type; 989 int ret; 990 991 cnts = &dev->port[port - 1].cnts; 992 num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + 993 cnts->num_ext_ppcnt_counters; 994 if (index < num_hw_counters || 995 index >= (num_hw_counters + cnts->num_op_counters)) 996 return -EINVAL; 997 998 if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) 999 return -EINVAL; 1000 1001 type = *(u32 *)cnts->descs[index].priv; 1002 if (type >= MLX5_IB_OPCOUNTER_MAX) 1003 return -EINVAL; 1004 1005 opfc = &cnts->opfcs[type]; 1006 1007 if (enable) { 1008 if (opfc->fc) 1009 return -EEXIST; 1010 1011 opfc->fc = mlx5_fc_create(dev->mdev, false); 1012 if (IS_ERR(opfc->fc)) 1013 return PTR_ERR(opfc->fc); 1014 1015 ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type); 1016 if (ret) { 1017 mlx5_fc_destroy(dev->mdev, opfc->fc); 1018 opfc->fc = NULL; 1019 } 1020 return ret; 1021 } 1022 1023 if (!opfc->fc) 1024 return -EINVAL; 1025 1026 mlx5_ib_fs_remove_op_fc(dev, opfc, type); 1027 mlx5_fc_destroy(dev->mdev, opfc->fc); 1028 opfc->fc = NULL; 1029 return 0; 1030 } 1031 1032 static const struct ib_device_ops hw_stats_ops = { 1033 .alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats, 1034 .get_hw_stats = mlx5_ib_get_hw_stats, 1035 .counter_bind_qp = mlx5_ib_counter_bind_qp, 1036 .counter_unbind_qp = mlx5_ib_counter_unbind_qp, 1037 .counter_dealloc = mlx5_ib_counter_dealloc, 1038 .counter_alloc_stats = mlx5_ib_counter_alloc_stats, 1039 .counter_update_stats = mlx5_ib_counter_update_stats, 1040 .modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ? 1041 mlx5_ib_modify_stat : NULL, 1042 }; 1043 1044 static const struct ib_device_ops hw_switchdev_vport_op = { 1045 .alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats, 1046 }; 1047 1048 static const struct ib_device_ops hw_switchdev_stats_ops = { 1049 .alloc_hw_device_stats = mlx5_ib_alloc_hw_device_stats, 1050 .get_hw_stats = mlx5_ib_get_hw_stats, 1051 .counter_bind_qp = mlx5_ib_counter_bind_qp, 1052 .counter_unbind_qp = mlx5_ib_counter_unbind_qp, 1053 .counter_dealloc = mlx5_ib_counter_dealloc, 1054 .counter_alloc_stats = mlx5_ib_counter_alloc_stats, 1055 .counter_update_stats = mlx5_ib_counter_update_stats, 1056 }; 1057 1058 static const struct ib_device_ops counters_ops = { 1059 .create_counters = mlx5_ib_create_counters, 1060 .destroy_counters = mlx5_ib_destroy_counters, 1061 .read_counters = mlx5_ib_read_counters, 1062 1063 INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs), 1064 }; 1065 1066 int mlx5_ib_counters_init(struct mlx5_ib_dev *dev) 1067 { 1068 ib_set_device_ops(&dev->ib_dev, &counters_ops); 1069 1070 if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) 1071 return 0; 1072 1073 if (is_mdev_switchdev_mode(dev->mdev)) { 1074 ib_set_device_ops(&dev->ib_dev, &hw_switchdev_stats_ops); 1075 if (vport_qcounters_supported(dev)) 1076 ib_set_device_ops(&dev->ib_dev, &hw_switchdev_vport_op); 1077 } else 1078 ib_set_device_ops(&dev->ib_dev, &hw_stats_ops); 1079 return mlx5_ib_alloc_counters(dev); 1080 } 1081 1082 void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev) 1083 { 1084 if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) 1085 return; 1086 1087 mlx5_ib_dealloc_counters(dev); 1088 } 1089