1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2019 Mellanox Technologies. All rights reserved. 4 */ 5 #include <rdma/ib_verbs.h> 6 #include <rdma/rdma_counter.h> 7 8 #include "core_priv.h" 9 #include "restrack.h" 10 11 #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID) 12 13 static int __counter_set_mode(struct rdma_port_counter *port_counter, 14 enum rdma_nl_counter_mode new_mode, 15 enum rdma_nl_counter_mask new_mask) 16 { 17 if (new_mode == RDMA_COUNTER_MODE_AUTO) { 18 if (new_mask & (~ALL_AUTO_MODE_MASKS)) 19 return -EINVAL; 20 if (port_counter->num_counters) 21 return -EBUSY; 22 } 23 24 port_counter->mode.mode = new_mode; 25 port_counter->mode.mask = new_mask; 26 return 0; 27 } 28 29 /* 30 * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode 31 * 32 * @dev: Device to operate 33 * @port: Port to use 34 * @mask: Mask to configure 35 * @extack: Message to the user 36 * 37 * Return 0 on success. If counter mode wasn't changed then it is considered 38 * as success as well. 39 * Return -EBUSY when changing to auto mode while there are bounded counters. 40 * 41 */ 42 int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port, 43 enum rdma_nl_counter_mask mask, 44 struct netlink_ext_ack *extack) 45 { 46 struct rdma_port_counter *port_counter; 47 enum rdma_nl_counter_mode mode; 48 int ret; 49 50 port_counter = &dev->port_data[port].port_counter; 51 if (!port_counter->hstats) 52 return -EOPNOTSUPP; 53 54 mutex_lock(&port_counter->lock); 55 if (mask) 56 mode = RDMA_COUNTER_MODE_AUTO; 57 else 58 mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL : 59 RDMA_COUNTER_MODE_NONE; 60 61 if (port_counter->mode.mode == mode && 62 port_counter->mode.mask == mask) { 63 ret = 0; 64 goto out; 65 } 66 67 ret = __counter_set_mode(port_counter, mode, mask); 68 69 out: 70 mutex_unlock(&port_counter->lock); 71 if (ret == -EBUSY) 72 NL_SET_ERR_MSG( 73 extack, 74 "Modifying auto mode is not allowed when there is a bound QP"); 75 return ret; 76 } 77 78 static void auto_mode_init_counter(struct rdma_counter *counter, 79 const struct ib_qp *qp, 80 enum rdma_nl_counter_mask new_mask) 81 { 82 struct auto_mode_param *param = &counter->mode.param; 83 84 counter->mode.mode = RDMA_COUNTER_MODE_AUTO; 85 counter->mode.mask = new_mask; 86 87 if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) 88 param->qp_type = qp->qp_type; 89 } 90 91 static int __rdma_counter_bind_qp(struct rdma_counter *counter, 92 struct ib_qp *qp) 93 { 94 int ret; 95 96 if (qp->counter) 97 return -EINVAL; 98 99 if (!qp->device->ops.counter_bind_qp) 100 return -EOPNOTSUPP; 101 102 mutex_lock(&counter->lock); 103 ret = qp->device->ops.counter_bind_qp(counter, qp); 104 mutex_unlock(&counter->lock); 105 106 return ret; 107 } 108 109 int rdma_counter_modify(struct ib_device *dev, u32 port, 110 unsigned int index, bool enable) 111 { 112 struct rdma_hw_stats *stats; 113 int ret = 0; 114 115 if (!dev->ops.modify_hw_stat) 116 return -EOPNOTSUPP; 117 118 stats = ib_get_hw_stats_port(dev, port); 119 if (!stats || index >= stats->num_counters || 120 !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) 121 return -EINVAL; 122 123 mutex_lock(&stats->lock); 124 125 if (enable != test_bit(index, stats->is_disabled)) 126 goto out; 127 128 ret = dev->ops.modify_hw_stat(dev, port, index, enable); 129 if (ret) 130 goto out; 131 132 if (enable) 133 clear_bit(index, stats->is_disabled); 134 else 135 set_bit(index, stats->is_disabled); 136 out: 137 mutex_unlock(&stats->lock); 138 return ret; 139 } 140 141 static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port, 142 struct ib_qp *qp, 143 enum rdma_nl_counter_mode mode) 144 { 145 struct rdma_port_counter *port_counter; 146 struct rdma_counter *counter; 147 int ret; 148 149 if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats) 150 return NULL; 151 152 counter = kzalloc(sizeof(*counter), GFP_KERNEL); 153 if (!counter) 154 return NULL; 155 156 counter->device = dev; 157 counter->port = port; 158 159 rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER); 160 counter->stats = dev->ops.counter_alloc_stats(counter); 161 if (!counter->stats) 162 goto err_stats; 163 164 port_counter = &dev->port_data[port].port_counter; 165 mutex_lock(&port_counter->lock); 166 switch (mode) { 167 case RDMA_COUNTER_MODE_MANUAL: 168 ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL, 169 0); 170 if (ret) { 171 mutex_unlock(&port_counter->lock); 172 goto err_mode; 173 } 174 break; 175 case RDMA_COUNTER_MODE_AUTO: 176 auto_mode_init_counter(counter, qp, port_counter->mode.mask); 177 break; 178 default: 179 ret = -EOPNOTSUPP; 180 mutex_unlock(&port_counter->lock); 181 goto err_mode; 182 } 183 184 port_counter->num_counters++; 185 mutex_unlock(&port_counter->lock); 186 187 counter->mode.mode = mode; 188 kref_init(&counter->kref); 189 mutex_init(&counter->lock); 190 191 ret = __rdma_counter_bind_qp(counter, qp); 192 if (ret) 193 goto err_mode; 194 195 rdma_restrack_parent_name(&counter->res, &qp->res); 196 rdma_restrack_add(&counter->res); 197 return counter; 198 199 err_mode: 200 rdma_free_hw_stats_struct(counter->stats); 201 err_stats: 202 rdma_restrack_put(&counter->res); 203 kfree(counter); 204 return NULL; 205 } 206 207 static void rdma_counter_free(struct rdma_counter *counter) 208 { 209 struct rdma_port_counter *port_counter; 210 211 port_counter = &counter->device->port_data[counter->port].port_counter; 212 mutex_lock(&port_counter->lock); 213 port_counter->num_counters--; 214 if (!port_counter->num_counters && 215 (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL)) 216 __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0); 217 218 mutex_unlock(&port_counter->lock); 219 220 rdma_restrack_del(&counter->res); 221 rdma_free_hw_stats_struct(counter->stats); 222 kfree(counter); 223 } 224 225 static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, 226 enum rdma_nl_counter_mask auto_mask) 227 { 228 struct auto_mode_param *param = &counter->mode.param; 229 bool match = true; 230 231 if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) 232 match &= (param->qp_type == qp->qp_type); 233 234 if (auto_mask & RDMA_COUNTER_MASK_PID) 235 match &= (task_pid_nr(counter->res.task) == 236 task_pid_nr(qp->res.task)); 237 238 return match; 239 } 240 241 static int __rdma_counter_unbind_qp(struct ib_qp *qp) 242 { 243 struct rdma_counter *counter = qp->counter; 244 int ret; 245 246 if (!qp->device->ops.counter_unbind_qp) 247 return -EOPNOTSUPP; 248 249 mutex_lock(&counter->lock); 250 ret = qp->device->ops.counter_unbind_qp(qp); 251 mutex_unlock(&counter->lock); 252 253 return ret; 254 } 255 256 static void counter_history_stat_update(struct rdma_counter *counter) 257 { 258 struct ib_device *dev = counter->device; 259 struct rdma_port_counter *port_counter; 260 int i; 261 262 port_counter = &dev->port_data[counter->port].port_counter; 263 if (!port_counter->hstats) 264 return; 265 266 rdma_counter_query_stats(counter); 267 268 for (i = 0; i < counter->stats->num_counters; i++) 269 port_counter->hstats->value[i] += counter->stats->value[i]; 270 } 271 272 /* 273 * rdma_get_counter_auto_mode - Find the counter that @qp should be bound 274 * with in auto mode 275 * 276 * Return: The counter (with ref-count increased) if found 277 */ 278 static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, 279 u32 port) 280 { 281 struct rdma_port_counter *port_counter; 282 struct rdma_counter *counter = NULL; 283 struct ib_device *dev = qp->device; 284 struct rdma_restrack_entry *res; 285 struct rdma_restrack_root *rt; 286 unsigned long id = 0; 287 288 port_counter = &dev->port_data[port].port_counter; 289 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 290 xa_lock(&rt->xa); 291 xa_for_each(&rt->xa, id, res) { 292 counter = container_of(res, struct rdma_counter, res); 293 if ((counter->device != qp->device) || (counter->port != port)) 294 goto next; 295 296 if (auto_mode_match(qp, counter, port_counter->mode.mask)) 297 break; 298 next: 299 counter = NULL; 300 } 301 302 if (counter && !kref_get_unless_zero(&counter->kref)) 303 counter = NULL; 304 305 xa_unlock(&rt->xa); 306 return counter; 307 } 308 309 static void counter_release(struct kref *kref) 310 { 311 struct rdma_counter *counter; 312 313 counter = container_of(kref, struct rdma_counter, kref); 314 counter_history_stat_update(counter); 315 counter->device->ops.counter_dealloc(counter); 316 rdma_counter_free(counter); 317 } 318 319 /* 320 * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on 321 * the auto-mode rule 322 */ 323 int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port) 324 { 325 struct rdma_port_counter *port_counter; 326 struct ib_device *dev = qp->device; 327 struct rdma_counter *counter; 328 int ret; 329 330 if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res)) 331 return 0; 332 333 if (!rdma_is_port_valid(dev, port)) 334 return -EINVAL; 335 336 port_counter = &dev->port_data[port].port_counter; 337 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) 338 return 0; 339 340 counter = rdma_get_counter_auto_mode(qp, port); 341 if (counter) { 342 ret = __rdma_counter_bind_qp(counter, qp); 343 if (ret) { 344 kref_put(&counter->kref, counter_release); 345 return ret; 346 } 347 } else { 348 counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO); 349 if (!counter) 350 return -ENOMEM; 351 } 352 353 return 0; 354 } 355 356 /* 357 * rdma_counter_unbind_qp - Unbind a qp from a counter 358 * @force: 359 * true - Decrease the counter ref-count anyway (e.g., qp destroy) 360 */ 361 int rdma_counter_unbind_qp(struct ib_qp *qp, bool force) 362 { 363 struct rdma_counter *counter = qp->counter; 364 int ret; 365 366 if (!counter) 367 return -EINVAL; 368 369 ret = __rdma_counter_unbind_qp(qp); 370 if (ret && !force) 371 return ret; 372 373 kref_put(&counter->kref, counter_release); 374 return 0; 375 } 376 377 int rdma_counter_query_stats(struct rdma_counter *counter) 378 { 379 struct ib_device *dev = counter->device; 380 int ret; 381 382 if (!dev->ops.counter_update_stats) 383 return -EINVAL; 384 385 mutex_lock(&counter->lock); 386 ret = dev->ops.counter_update_stats(counter); 387 mutex_unlock(&counter->lock); 388 389 return ret; 390 } 391 392 static u64 get_running_counters_hwstat_sum(struct ib_device *dev, 393 u32 port, u32 index) 394 { 395 struct rdma_restrack_entry *res; 396 struct rdma_restrack_root *rt; 397 struct rdma_counter *counter; 398 unsigned long id = 0; 399 u64 sum = 0; 400 401 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 402 xa_lock(&rt->xa); 403 xa_for_each(&rt->xa, id, res) { 404 if (!rdma_restrack_get(res)) 405 continue; 406 407 xa_unlock(&rt->xa); 408 409 counter = container_of(res, struct rdma_counter, res); 410 if ((counter->device != dev) || (counter->port != port) || 411 rdma_counter_query_stats(counter)) 412 goto next; 413 414 sum += counter->stats->value[index]; 415 416 next: 417 xa_lock(&rt->xa); 418 rdma_restrack_put(res); 419 } 420 421 xa_unlock(&rt->xa); 422 return sum; 423 } 424 425 /* 426 * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a 427 * specific port, including the running ones and history data 428 */ 429 u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index) 430 { 431 struct rdma_port_counter *port_counter; 432 u64 sum; 433 434 port_counter = &dev->port_data[port].port_counter; 435 if (!port_counter->hstats) 436 return 0; 437 438 sum = get_running_counters_hwstat_sum(dev, port, index); 439 sum += port_counter->hstats->value[index]; 440 441 return sum; 442 } 443 444 static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) 445 { 446 struct rdma_restrack_entry *res = NULL; 447 struct ib_qp *qp = NULL; 448 449 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num); 450 if (IS_ERR(res)) 451 return NULL; 452 453 qp = container_of(res, struct ib_qp, res); 454 if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) 455 goto err; 456 457 return qp; 458 459 err: 460 rdma_restrack_put(res); 461 return NULL; 462 } 463 464 static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, 465 u32 counter_id) 466 { 467 struct rdma_restrack_entry *res; 468 struct rdma_counter *counter; 469 470 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id); 471 if (IS_ERR(res)) 472 return NULL; 473 474 counter = container_of(res, struct rdma_counter, res); 475 kref_get(&counter->kref); 476 rdma_restrack_put(res); 477 478 return counter; 479 } 480 481 /* 482 * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id 483 */ 484 int rdma_counter_bind_qpn(struct ib_device *dev, u32 port, 485 u32 qp_num, u32 counter_id) 486 { 487 struct rdma_port_counter *port_counter; 488 struct rdma_counter *counter; 489 struct ib_qp *qp; 490 int ret; 491 492 port_counter = &dev->port_data[port].port_counter; 493 if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) 494 return -EINVAL; 495 496 qp = rdma_counter_get_qp(dev, qp_num); 497 if (!qp) 498 return -ENOENT; 499 500 counter = rdma_get_counter_by_id(dev, counter_id); 501 if (!counter) { 502 ret = -ENOENT; 503 goto err; 504 } 505 506 if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) { 507 ret = -EINVAL; 508 goto err_task; 509 } 510 511 if ((counter->device != qp->device) || (counter->port != qp->port)) { 512 ret = -EINVAL; 513 goto err_task; 514 } 515 516 ret = __rdma_counter_bind_qp(counter, qp); 517 if (ret) 518 goto err_task; 519 520 rdma_restrack_put(&qp->res); 521 return 0; 522 523 err_task: 524 kref_put(&counter->kref, counter_release); 525 err: 526 rdma_restrack_put(&qp->res); 527 return ret; 528 } 529 530 /* 531 * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it 532 * The id of new counter is returned in @counter_id 533 */ 534 int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u32 port, 535 u32 qp_num, u32 *counter_id) 536 { 537 struct rdma_port_counter *port_counter; 538 struct rdma_counter *counter; 539 struct ib_qp *qp; 540 int ret; 541 542 if (!rdma_is_port_valid(dev, port)) 543 return -EINVAL; 544 545 port_counter = &dev->port_data[port].port_counter; 546 if (!port_counter->hstats) 547 return -EOPNOTSUPP; 548 549 if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) 550 return -EINVAL; 551 552 qp = rdma_counter_get_qp(dev, qp_num); 553 if (!qp) 554 return -ENOENT; 555 556 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 557 ret = -EINVAL; 558 goto err; 559 } 560 561 counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL); 562 if (!counter) { 563 ret = -ENOMEM; 564 goto err; 565 } 566 567 if (counter_id) 568 *counter_id = counter->id; 569 570 rdma_restrack_put(&qp->res); 571 return 0; 572 573 err: 574 rdma_restrack_put(&qp->res); 575 return ret; 576 } 577 578 /* 579 * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter 580 */ 581 int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port, 582 u32 qp_num, u32 counter_id) 583 { 584 struct rdma_port_counter *port_counter; 585 struct ib_qp *qp; 586 int ret; 587 588 if (!rdma_is_port_valid(dev, port)) 589 return -EINVAL; 590 591 qp = rdma_counter_get_qp(dev, qp_num); 592 if (!qp) 593 return -ENOENT; 594 595 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 596 ret = -EINVAL; 597 goto out; 598 } 599 600 port_counter = &dev->port_data[port].port_counter; 601 if (!qp->counter || qp->counter->id != counter_id || 602 port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) { 603 ret = -EINVAL; 604 goto out; 605 } 606 607 ret = rdma_counter_unbind_qp(qp, false); 608 609 out: 610 rdma_restrack_put(&qp->res); 611 return ret; 612 } 613 614 int rdma_counter_get_mode(struct ib_device *dev, u32 port, 615 enum rdma_nl_counter_mode *mode, 616 enum rdma_nl_counter_mask *mask) 617 { 618 struct rdma_port_counter *port_counter; 619 620 port_counter = &dev->port_data[port].port_counter; 621 *mode = port_counter->mode.mode; 622 *mask = port_counter->mode.mask; 623 624 return 0; 625 } 626 627 void rdma_counter_init(struct ib_device *dev) 628 { 629 struct rdma_port_counter *port_counter; 630 u32 port, i; 631 632 if (!dev->port_data) 633 return; 634 635 rdma_for_each_port(dev, port) { 636 port_counter = &dev->port_data[port].port_counter; 637 port_counter->mode.mode = RDMA_COUNTER_MODE_NONE; 638 mutex_init(&port_counter->lock); 639 640 if (!dev->ops.alloc_hw_port_stats) 641 continue; 642 643 port_counter->hstats = dev->ops.alloc_hw_port_stats(dev, port); 644 if (!port_counter->hstats) 645 goto fail; 646 } 647 648 return; 649 650 fail: 651 for (i = port; i >= rdma_start_port(dev); i--) { 652 port_counter = &dev->port_data[port].port_counter; 653 rdma_free_hw_stats_struct(port_counter->hstats); 654 port_counter->hstats = NULL; 655 mutex_destroy(&port_counter->lock); 656 } 657 } 658 659 void rdma_counter_release(struct ib_device *dev) 660 { 661 struct rdma_port_counter *port_counter; 662 u32 port; 663 664 rdma_for_each_port(dev, port) { 665 port_counter = &dev->port_data[port].port_counter; 666 rdma_free_hw_stats_struct(port_counter->hstats); 667 mutex_destroy(&port_counter->lock); 668 } 669 } 670