1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2019 Mellanox Technologies. All rights reserved. 4 */ 5 #include <rdma/ib_verbs.h> 6 #include <rdma/rdma_counter.h> 7 8 #include "core_priv.h" 9 #include "restrack.h" 10 11 #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID) 12 13 static int __counter_set_mode(struct rdma_port_counter *port_counter, 14 enum rdma_nl_counter_mode new_mode, 15 enum rdma_nl_counter_mask new_mask, 16 bool bind_opcnt) 17 { 18 if (new_mode == RDMA_COUNTER_MODE_AUTO) { 19 if (new_mask & (~ALL_AUTO_MODE_MASKS)) 20 return -EINVAL; 21 if (port_counter->num_counters) 22 return -EBUSY; 23 } 24 25 port_counter->mode.mode = new_mode; 26 port_counter->mode.mask = new_mask; 27 port_counter->mode.bind_opcnt = bind_opcnt; 28 return 0; 29 } 30 31 /* 32 * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode 33 * 34 * @dev: Device to operate 35 * @port: Port to use 36 * @mask: Mask to configure 37 * @extack: Message to the user 38 * 39 * Return 0 on success. If counter mode wasn't changed then it is considered 40 * as success as well. 41 * Return -EBUSY when changing to auto mode while there are bounded counters. 42 * 43 */ 44 int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port, 45 enum rdma_nl_counter_mask mask, 46 bool bind_opcnt, 47 struct netlink_ext_ack *extack) 48 { 49 struct rdma_port_counter *port_counter; 50 enum rdma_nl_counter_mode mode; 51 int ret; 52 53 port_counter = &dev->port_data[port].port_counter; 54 if (!port_counter->hstats) 55 return -EOPNOTSUPP; 56 57 mutex_lock(&port_counter->lock); 58 if (mask) 59 mode = RDMA_COUNTER_MODE_AUTO; 60 else 61 mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL : 62 RDMA_COUNTER_MODE_NONE; 63 64 if (port_counter->mode.mode == mode && 65 port_counter->mode.mask == mask && 66 port_counter->mode.bind_opcnt == bind_opcnt) { 67 ret = 0; 68 goto out; 69 } 70 71 ret = __counter_set_mode(port_counter, mode, mask, bind_opcnt); 72 73 out: 74 mutex_unlock(&port_counter->lock); 75 if (ret == -EBUSY) 76 NL_SET_ERR_MSG( 77 extack, 78 "Modifying auto mode is not allowed when there is a bound QP"); 79 return ret; 80 } 81 82 static void auto_mode_init_counter(struct rdma_counter *counter, 83 const struct ib_qp *qp, 84 enum rdma_nl_counter_mask new_mask) 85 { 86 struct auto_mode_param *param = &counter->mode.param; 87 88 counter->mode.mode = RDMA_COUNTER_MODE_AUTO; 89 counter->mode.mask = new_mask; 90 91 if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) 92 param->qp_type = qp->qp_type; 93 } 94 95 static int __rdma_counter_bind_qp(struct rdma_counter *counter, 96 struct ib_qp *qp, u32 port) 97 { 98 int ret; 99 100 if (qp->counter) 101 return -EINVAL; 102 103 if (!qp->device->ops.counter_bind_qp) 104 return -EOPNOTSUPP; 105 106 mutex_lock(&counter->lock); 107 ret = qp->device->ops.counter_bind_qp(counter, qp, port); 108 mutex_unlock(&counter->lock); 109 110 return ret; 111 } 112 113 int rdma_counter_modify(struct ib_device *dev, u32 port, 114 unsigned int index, bool enable) 115 { 116 struct rdma_hw_stats *stats; 117 int ret = 0; 118 119 if (!dev->ops.modify_hw_stat) 120 return -EOPNOTSUPP; 121 122 stats = ib_get_hw_stats_port(dev, port); 123 if (!stats || index >= stats->num_counters || 124 !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) 125 return -EINVAL; 126 127 mutex_lock(&stats->lock); 128 129 if (enable != test_bit(index, stats->is_disabled)) 130 goto out; 131 132 ret = dev->ops.modify_hw_stat(dev, port, index, enable); 133 if (ret) 134 goto out; 135 136 if (enable) 137 clear_bit(index, stats->is_disabled); 138 else 139 set_bit(index, stats->is_disabled); 140 out: 141 mutex_unlock(&stats->lock); 142 return ret; 143 } 144 145 static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port, 146 struct ib_qp *qp, 147 enum rdma_nl_counter_mode mode, 148 bool bind_opcnt) 149 { 150 struct rdma_port_counter *port_counter; 151 struct rdma_counter *counter; 152 int ret; 153 154 if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats) 155 return NULL; 156 157 counter = rdma_zalloc_drv_obj(dev, rdma_counter); 158 if (!counter) 159 return NULL; 160 161 counter->device = dev; 162 counter->port = port; 163 164 dev->ops.counter_init(counter); 165 166 rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER); 167 counter->stats = dev->ops.counter_alloc_stats(counter); 168 if (!counter->stats) 169 goto err_stats; 170 171 port_counter = &dev->port_data[port].port_counter; 172 mutex_lock(&port_counter->lock); 173 switch (mode) { 174 case RDMA_COUNTER_MODE_MANUAL: 175 ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL, 176 0, bind_opcnt); 177 if (ret) { 178 mutex_unlock(&port_counter->lock); 179 goto err_mode; 180 } 181 break; 182 case RDMA_COUNTER_MODE_AUTO: 183 auto_mode_init_counter(counter, qp, port_counter->mode.mask); 184 break; 185 default: 186 ret = -EOPNOTSUPP; 187 mutex_unlock(&port_counter->lock); 188 goto err_mode; 189 } 190 191 port_counter->num_counters++; 192 mutex_unlock(&port_counter->lock); 193 194 counter->mode.mode = mode; 195 counter->mode.bind_opcnt = bind_opcnt; 196 kref_init(&counter->kref); 197 mutex_init(&counter->lock); 198 199 ret = __rdma_counter_bind_qp(counter, qp, port); 200 if (ret) 201 goto err_mode; 202 203 rdma_restrack_parent_name(&counter->res, &qp->res); 204 rdma_restrack_add(&counter->res); 205 return counter; 206 207 err_mode: 208 rdma_free_hw_stats_struct(counter->stats); 209 err_stats: 210 rdma_restrack_put(&counter->res); 211 kfree(counter); 212 return NULL; 213 } 214 215 static void rdma_counter_free(struct rdma_counter *counter) 216 { 217 struct rdma_port_counter *port_counter; 218 219 port_counter = &counter->device->port_data[counter->port].port_counter; 220 mutex_lock(&port_counter->lock); 221 port_counter->num_counters--; 222 if (!port_counter->num_counters && 223 (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL)) 224 __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0, 225 false); 226 227 mutex_unlock(&port_counter->lock); 228 229 rdma_restrack_del(&counter->res); 230 rdma_free_hw_stats_struct(counter->stats); 231 kfree(counter); 232 } 233 234 static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, 235 enum rdma_nl_counter_mask auto_mask) 236 { 237 struct auto_mode_param *param = &counter->mode.param; 238 bool match = true; 239 240 if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) 241 match &= (param->qp_type == qp->qp_type); 242 243 if (auto_mask & RDMA_COUNTER_MASK_PID) 244 match &= (task_pid_nr(counter->res.task) == 245 task_pid_nr(qp->res.task)); 246 247 return match; 248 } 249 250 static int __rdma_counter_unbind_qp(struct ib_qp *qp, u32 port) 251 { 252 struct rdma_counter *counter = qp->counter; 253 int ret; 254 255 if (!qp->device->ops.counter_unbind_qp) 256 return -EOPNOTSUPP; 257 258 mutex_lock(&counter->lock); 259 ret = qp->device->ops.counter_unbind_qp(qp, port); 260 mutex_unlock(&counter->lock); 261 262 return ret; 263 } 264 265 static void counter_history_stat_update(struct rdma_counter *counter) 266 { 267 struct ib_device *dev = counter->device; 268 struct rdma_port_counter *port_counter; 269 int i; 270 271 port_counter = &dev->port_data[counter->port].port_counter; 272 if (!port_counter->hstats) 273 return; 274 275 rdma_counter_query_stats(counter); 276 277 for (i = 0; i < counter->stats->num_counters; i++) 278 port_counter->hstats->value[i] += counter->stats->value[i]; 279 } 280 281 /* 282 * rdma_get_counter_auto_mode - Find the counter that @qp should be bound 283 * with in auto mode 284 * 285 * Return: The counter (with ref-count increased) if found 286 */ 287 static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, 288 u32 port) 289 { 290 struct rdma_port_counter *port_counter; 291 struct rdma_counter *counter = NULL; 292 struct ib_device *dev = qp->device; 293 struct rdma_restrack_entry *res; 294 struct rdma_restrack_root *rt; 295 unsigned long id = 0; 296 297 port_counter = &dev->port_data[port].port_counter; 298 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 299 xa_lock(&rt->xa); 300 xa_for_each(&rt->xa, id, res) { 301 counter = container_of(res, struct rdma_counter, res); 302 if ((counter->device != qp->device) || (counter->port != port)) 303 goto next; 304 305 if (auto_mode_match(qp, counter, port_counter->mode.mask)) 306 break; 307 next: 308 counter = NULL; 309 } 310 311 if (counter && !kref_get_unless_zero(&counter->kref)) 312 counter = NULL; 313 314 xa_unlock(&rt->xa); 315 return counter; 316 } 317 318 static void counter_release(struct kref *kref) 319 { 320 struct rdma_counter *counter; 321 322 counter = container_of(kref, struct rdma_counter, kref); 323 counter_history_stat_update(counter); 324 counter->device->ops.counter_dealloc(counter); 325 rdma_counter_free(counter); 326 } 327 328 /* 329 * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on 330 * the auto-mode rule 331 */ 332 int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port) 333 { 334 struct rdma_port_counter *port_counter; 335 struct ib_device *dev = qp->device; 336 struct rdma_counter *counter; 337 int ret; 338 339 if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res)) 340 return 0; 341 342 if (!rdma_is_port_valid(dev, port)) 343 return -EINVAL; 344 345 port_counter = &dev->port_data[port].port_counter; 346 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) 347 return 0; 348 349 counter = rdma_get_counter_auto_mode(qp, port); 350 if (counter) { 351 ret = __rdma_counter_bind_qp(counter, qp, port); 352 if (ret) { 353 kref_put(&counter->kref, counter_release); 354 return ret; 355 } 356 } else { 357 counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO, 358 port_counter->mode.bind_opcnt); 359 if (!counter) 360 return -ENOMEM; 361 } 362 363 return 0; 364 } 365 366 /* 367 * rdma_counter_unbind_qp - Unbind a qp from a counter 368 * @force: 369 * true - Decrease the counter ref-count anyway (e.g., qp destroy) 370 */ 371 int rdma_counter_unbind_qp(struct ib_qp *qp, u32 port, bool force) 372 { 373 struct rdma_counter *counter = qp->counter; 374 int ret; 375 376 if (!counter) 377 return -EINVAL; 378 379 ret = __rdma_counter_unbind_qp(qp, port); 380 if (ret && !force) 381 return ret; 382 383 kref_put(&counter->kref, counter_release); 384 return 0; 385 } 386 387 int rdma_counter_query_stats(struct rdma_counter *counter) 388 { 389 struct ib_device *dev = counter->device; 390 int ret; 391 392 if (!dev->ops.counter_update_stats) 393 return -EINVAL; 394 395 mutex_lock(&counter->lock); 396 ret = dev->ops.counter_update_stats(counter); 397 mutex_unlock(&counter->lock); 398 399 return ret; 400 } 401 402 static u64 get_running_counters_hwstat_sum(struct ib_device *dev, 403 u32 port, u32 index) 404 { 405 struct rdma_restrack_entry *res; 406 struct rdma_restrack_root *rt; 407 struct rdma_counter *counter; 408 unsigned long id = 0; 409 u64 sum = 0; 410 411 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 412 xa_lock(&rt->xa); 413 xa_for_each(&rt->xa, id, res) { 414 if (!rdma_restrack_get(res)) 415 continue; 416 417 xa_unlock(&rt->xa); 418 419 counter = container_of(res, struct rdma_counter, res); 420 if ((counter->device != dev) || (counter->port != port) || 421 rdma_counter_query_stats(counter)) 422 goto next; 423 424 sum += counter->stats->value[index]; 425 426 next: 427 xa_lock(&rt->xa); 428 rdma_restrack_put(res); 429 } 430 431 xa_unlock(&rt->xa); 432 return sum; 433 } 434 435 /* 436 * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a 437 * specific port, including the running ones and history data 438 */ 439 u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index) 440 { 441 struct rdma_port_counter *port_counter; 442 u64 sum; 443 444 port_counter = &dev->port_data[port].port_counter; 445 if (!port_counter->hstats) 446 return 0; 447 448 sum = get_running_counters_hwstat_sum(dev, port, index); 449 sum += port_counter->hstats->value[index]; 450 451 return sum; 452 } 453 454 static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) 455 { 456 struct rdma_restrack_entry *res = NULL; 457 struct ib_qp *qp = NULL; 458 459 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num); 460 if (IS_ERR(res)) 461 return NULL; 462 463 qp = container_of(res, struct ib_qp, res); 464 if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) 465 goto err; 466 467 return qp; 468 469 err: 470 rdma_restrack_put(res); 471 return NULL; 472 } 473 474 static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, 475 u32 counter_id) 476 { 477 struct rdma_restrack_entry *res; 478 struct rdma_counter *counter; 479 480 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id); 481 if (IS_ERR(res)) 482 return NULL; 483 484 counter = container_of(res, struct rdma_counter, res); 485 kref_get(&counter->kref); 486 rdma_restrack_put(res); 487 488 return counter; 489 } 490 491 /* 492 * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id 493 */ 494 int rdma_counter_bind_qpn(struct ib_device *dev, u32 port, 495 u32 qp_num, u32 counter_id) 496 { 497 struct rdma_port_counter *port_counter; 498 struct rdma_counter *counter; 499 struct ib_qp *qp; 500 int ret; 501 502 port_counter = &dev->port_data[port].port_counter; 503 if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) 504 return -EINVAL; 505 506 qp = rdma_counter_get_qp(dev, qp_num); 507 if (!qp) 508 return -ENOENT; 509 510 counter = rdma_get_counter_by_id(dev, counter_id); 511 if (!counter) { 512 ret = -ENOENT; 513 goto err; 514 } 515 516 if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) { 517 ret = -EINVAL; 518 goto err_task; 519 } 520 521 if ((counter->device != qp->device) || (counter->port != qp->port)) { 522 ret = -EINVAL; 523 goto err_task; 524 } 525 526 ret = __rdma_counter_bind_qp(counter, qp, port); 527 if (ret) 528 goto err_task; 529 530 rdma_restrack_put(&qp->res); 531 return 0; 532 533 err_task: 534 kref_put(&counter->kref, counter_release); 535 err: 536 rdma_restrack_put(&qp->res); 537 return ret; 538 } 539 540 /* 541 * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it 542 * The id of new counter is returned in @counter_id 543 */ 544 int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u32 port, 545 u32 qp_num, u32 *counter_id) 546 { 547 struct rdma_port_counter *port_counter; 548 struct rdma_counter *counter; 549 struct ib_qp *qp; 550 int ret; 551 552 if (!rdma_is_port_valid(dev, port)) 553 return -EINVAL; 554 555 port_counter = &dev->port_data[port].port_counter; 556 if (!port_counter->hstats) 557 return -EOPNOTSUPP; 558 559 if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) 560 return -EINVAL; 561 562 qp = rdma_counter_get_qp(dev, qp_num); 563 if (!qp) 564 return -ENOENT; 565 566 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 567 ret = -EINVAL; 568 goto err; 569 } 570 571 counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL, true); 572 if (!counter) { 573 ret = -ENOMEM; 574 goto err; 575 } 576 577 if (counter_id) 578 *counter_id = counter->id; 579 580 rdma_restrack_put(&qp->res); 581 return 0; 582 583 err: 584 rdma_restrack_put(&qp->res); 585 return ret; 586 } 587 588 /* 589 * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter 590 */ 591 int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port, 592 u32 qp_num, u32 counter_id) 593 { 594 struct rdma_port_counter *port_counter; 595 struct ib_qp *qp; 596 int ret; 597 598 if (!rdma_is_port_valid(dev, port)) 599 return -EINVAL; 600 601 qp = rdma_counter_get_qp(dev, qp_num); 602 if (!qp) 603 return -ENOENT; 604 605 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 606 ret = -EINVAL; 607 goto out; 608 } 609 610 port_counter = &dev->port_data[port].port_counter; 611 if (!qp->counter || qp->counter->id != counter_id || 612 port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) { 613 ret = -EINVAL; 614 goto out; 615 } 616 617 ret = rdma_counter_unbind_qp(qp, port, false); 618 619 out: 620 rdma_restrack_put(&qp->res); 621 return ret; 622 } 623 624 int rdma_counter_get_mode(struct ib_device *dev, u32 port, 625 enum rdma_nl_counter_mode *mode, 626 enum rdma_nl_counter_mask *mask, 627 bool *opcnt) 628 { 629 struct rdma_port_counter *port_counter; 630 631 port_counter = &dev->port_data[port].port_counter; 632 *mode = port_counter->mode.mode; 633 *mask = port_counter->mode.mask; 634 *opcnt = port_counter->mode.bind_opcnt; 635 636 return 0; 637 } 638 639 void rdma_counter_init(struct ib_device *dev) 640 { 641 struct rdma_port_counter *port_counter; 642 u32 port, i; 643 644 if (!dev->port_data) 645 return; 646 647 rdma_for_each_port(dev, port) { 648 port_counter = &dev->port_data[port].port_counter; 649 port_counter->mode.mode = RDMA_COUNTER_MODE_NONE; 650 mutex_init(&port_counter->lock); 651 652 if (!dev->ops.alloc_hw_port_stats) 653 continue; 654 655 port_counter->hstats = dev->ops.alloc_hw_port_stats(dev, port); 656 if (!port_counter->hstats) 657 goto fail; 658 } 659 660 return; 661 662 fail: 663 for (i = port; i >= rdma_start_port(dev); i--) { 664 port_counter = &dev->port_data[port].port_counter; 665 rdma_free_hw_stats_struct(port_counter->hstats); 666 port_counter->hstats = NULL; 667 mutex_destroy(&port_counter->lock); 668 } 669 } 670 671 void rdma_counter_release(struct ib_device *dev) 672 { 673 struct rdma_port_counter *port_counter; 674 u32 port; 675 676 rdma_for_each_port(dev, port) { 677 port_counter = &dev->port_data[port].port_counter; 678 rdma_free_hw_stats_struct(port_counter->hstats); 679 mutex_destroy(&port_counter->lock); 680 } 681 } 682