1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2019 Mellanox Technologies. All rights reserved. 4 */ 5 #include <rdma/ib_verbs.h> 6 #include <rdma/rdma_counter.h> 7 8 #include "core_priv.h" 9 #include "restrack.h" 10 11 #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID) 12 13 static int __counter_set_mode(struct rdma_port_counter *port_counter, 14 enum rdma_nl_counter_mode new_mode, 15 enum rdma_nl_counter_mask new_mask) 16 { 17 if (new_mode == RDMA_COUNTER_MODE_AUTO) { 18 if (new_mask & (~ALL_AUTO_MODE_MASKS)) 19 return -EINVAL; 20 if (port_counter->num_counters) 21 return -EBUSY; 22 } 23 24 port_counter->mode.mode = new_mode; 25 port_counter->mode.mask = new_mask; 26 return 0; 27 } 28 29 /* 30 * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode 31 * 32 * @dev: Device to operate 33 * @port: Port to use 34 * @mask: Mask to configure 35 * @extack: Message to the user 36 * 37 * Return 0 on success. If counter mode wasn't changed then it is considered 38 * as success as well. 39 * Return -EBUSY when changing to auto mode while there are bounded counters. 40 * 41 */ 42 int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port, 43 enum rdma_nl_counter_mask mask, 44 struct netlink_ext_ack *extack) 45 { 46 struct rdma_port_counter *port_counter; 47 enum rdma_nl_counter_mode mode; 48 int ret; 49 50 port_counter = &dev->port_data[port].port_counter; 51 if (!port_counter->hstats) 52 return -EOPNOTSUPP; 53 54 mutex_lock(&port_counter->lock); 55 if (mask) 56 mode = RDMA_COUNTER_MODE_AUTO; 57 else 58 mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL : 59 RDMA_COUNTER_MODE_NONE; 60 61 if (port_counter->mode.mode == mode && 62 port_counter->mode.mask == mask) { 63 ret = 0; 64 goto out; 65 } 66 67 ret = __counter_set_mode(port_counter, mode, mask); 68 69 out: 70 mutex_unlock(&port_counter->lock); 71 if (ret == -EBUSY) 72 NL_SET_ERR_MSG( 73 extack, 74 "Modifying auto mode is not allowed when there is a bound QP"); 75 return ret; 76 } 77 78 static void auto_mode_init_counter(struct rdma_counter *counter, 79 const struct ib_qp *qp, 80 enum rdma_nl_counter_mask new_mask) 81 { 82 struct auto_mode_param *param = &counter->mode.param; 83 84 counter->mode.mode = RDMA_COUNTER_MODE_AUTO; 85 counter->mode.mask = new_mask; 86 87 if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) 88 param->qp_type = qp->qp_type; 89 } 90 91 static int __rdma_counter_bind_qp(struct rdma_counter *counter, 92 struct ib_qp *qp) 93 { 94 int ret; 95 96 if (qp->counter) 97 return -EINVAL; 98 99 if (!qp->device->ops.counter_bind_qp) 100 return -EOPNOTSUPP; 101 102 mutex_lock(&counter->lock); 103 ret = qp->device->ops.counter_bind_qp(counter, qp); 104 mutex_unlock(&counter->lock); 105 106 return ret; 107 } 108 109 static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port, 110 struct ib_qp *qp, 111 enum rdma_nl_counter_mode mode) 112 { 113 struct rdma_port_counter *port_counter; 114 struct rdma_counter *counter; 115 int ret; 116 117 if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats) 118 return NULL; 119 120 counter = kzalloc(sizeof(*counter), GFP_KERNEL); 121 if (!counter) 122 return NULL; 123 124 counter->device = dev; 125 counter->port = port; 126 127 rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER); 128 counter->stats = dev->ops.counter_alloc_stats(counter); 129 if (!counter->stats) 130 goto err_stats; 131 132 port_counter = &dev->port_data[port].port_counter; 133 mutex_lock(&port_counter->lock); 134 switch (mode) { 135 case RDMA_COUNTER_MODE_MANUAL: 136 ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL, 137 0); 138 if (ret) { 139 mutex_unlock(&port_counter->lock); 140 goto err_mode; 141 } 142 break; 143 case RDMA_COUNTER_MODE_AUTO: 144 auto_mode_init_counter(counter, qp, port_counter->mode.mask); 145 break; 146 default: 147 ret = -EOPNOTSUPP; 148 mutex_unlock(&port_counter->lock); 149 goto err_mode; 150 } 151 152 port_counter->num_counters++; 153 mutex_unlock(&port_counter->lock); 154 155 counter->mode.mode = mode; 156 kref_init(&counter->kref); 157 mutex_init(&counter->lock); 158 159 ret = __rdma_counter_bind_qp(counter, qp); 160 if (ret) 161 goto err_mode; 162 163 rdma_restrack_parent_name(&counter->res, &qp->res); 164 rdma_restrack_add(&counter->res); 165 return counter; 166 167 err_mode: 168 kfree(counter->stats); 169 err_stats: 170 rdma_restrack_put(&counter->res); 171 kfree(counter); 172 return NULL; 173 } 174 175 static void rdma_counter_free(struct rdma_counter *counter) 176 { 177 struct rdma_port_counter *port_counter; 178 179 port_counter = &counter->device->port_data[counter->port].port_counter; 180 mutex_lock(&port_counter->lock); 181 port_counter->num_counters--; 182 if (!port_counter->num_counters && 183 (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL)) 184 __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0); 185 186 mutex_unlock(&port_counter->lock); 187 188 rdma_restrack_del(&counter->res); 189 kfree(counter->stats); 190 kfree(counter); 191 } 192 193 static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, 194 enum rdma_nl_counter_mask auto_mask) 195 { 196 struct auto_mode_param *param = &counter->mode.param; 197 bool match = true; 198 199 if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) 200 match &= (param->qp_type == qp->qp_type); 201 202 if (auto_mask & RDMA_COUNTER_MASK_PID) 203 match &= (task_pid_nr(counter->res.task) == 204 task_pid_nr(qp->res.task)); 205 206 return match; 207 } 208 209 static int __rdma_counter_unbind_qp(struct ib_qp *qp) 210 { 211 struct rdma_counter *counter = qp->counter; 212 int ret; 213 214 if (!qp->device->ops.counter_unbind_qp) 215 return -EOPNOTSUPP; 216 217 mutex_lock(&counter->lock); 218 ret = qp->device->ops.counter_unbind_qp(qp); 219 mutex_unlock(&counter->lock); 220 221 return ret; 222 } 223 224 static void counter_history_stat_update(struct rdma_counter *counter) 225 { 226 struct ib_device *dev = counter->device; 227 struct rdma_port_counter *port_counter; 228 int i; 229 230 port_counter = &dev->port_data[counter->port].port_counter; 231 if (!port_counter->hstats) 232 return; 233 234 rdma_counter_query_stats(counter); 235 236 for (i = 0; i < counter->stats->num_counters; i++) 237 port_counter->hstats->value[i] += counter->stats->value[i]; 238 } 239 240 /* 241 * rdma_get_counter_auto_mode - Find the counter that @qp should be bound 242 * with in auto mode 243 * 244 * Return: The counter (with ref-count increased) if found 245 */ 246 static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, 247 u32 port) 248 { 249 struct rdma_port_counter *port_counter; 250 struct rdma_counter *counter = NULL; 251 struct ib_device *dev = qp->device; 252 struct rdma_restrack_entry *res; 253 struct rdma_restrack_root *rt; 254 unsigned long id = 0; 255 256 port_counter = &dev->port_data[port].port_counter; 257 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 258 xa_lock(&rt->xa); 259 xa_for_each(&rt->xa, id, res) { 260 counter = container_of(res, struct rdma_counter, res); 261 if ((counter->device != qp->device) || (counter->port != port)) 262 goto next; 263 264 if (auto_mode_match(qp, counter, port_counter->mode.mask)) 265 break; 266 next: 267 counter = NULL; 268 } 269 270 if (counter && !kref_get_unless_zero(&counter->kref)) 271 counter = NULL; 272 273 xa_unlock(&rt->xa); 274 return counter; 275 } 276 277 static void counter_release(struct kref *kref) 278 { 279 struct rdma_counter *counter; 280 281 counter = container_of(kref, struct rdma_counter, kref); 282 counter_history_stat_update(counter); 283 counter->device->ops.counter_dealloc(counter); 284 rdma_counter_free(counter); 285 } 286 287 /* 288 * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on 289 * the auto-mode rule 290 */ 291 int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port) 292 { 293 struct rdma_port_counter *port_counter; 294 struct ib_device *dev = qp->device; 295 struct rdma_counter *counter; 296 int ret; 297 298 if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res)) 299 return 0; 300 301 if (!rdma_is_port_valid(dev, port)) 302 return -EINVAL; 303 304 port_counter = &dev->port_data[port].port_counter; 305 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) 306 return 0; 307 308 counter = rdma_get_counter_auto_mode(qp, port); 309 if (counter) { 310 ret = __rdma_counter_bind_qp(counter, qp); 311 if (ret) { 312 kref_put(&counter->kref, counter_release); 313 return ret; 314 } 315 } else { 316 counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO); 317 if (!counter) 318 return -ENOMEM; 319 } 320 321 return 0; 322 } 323 324 /* 325 * rdma_counter_unbind_qp - Unbind a qp from a counter 326 * @force: 327 * true - Decrease the counter ref-count anyway (e.g., qp destroy) 328 */ 329 int rdma_counter_unbind_qp(struct ib_qp *qp, bool force) 330 { 331 struct rdma_counter *counter = qp->counter; 332 int ret; 333 334 if (!counter) 335 return -EINVAL; 336 337 ret = __rdma_counter_unbind_qp(qp); 338 if (ret && !force) 339 return ret; 340 341 kref_put(&counter->kref, counter_release); 342 return 0; 343 } 344 345 int rdma_counter_query_stats(struct rdma_counter *counter) 346 { 347 struct ib_device *dev = counter->device; 348 int ret; 349 350 if (!dev->ops.counter_update_stats) 351 return -EINVAL; 352 353 mutex_lock(&counter->lock); 354 ret = dev->ops.counter_update_stats(counter); 355 mutex_unlock(&counter->lock); 356 357 return ret; 358 } 359 360 static u64 get_running_counters_hwstat_sum(struct ib_device *dev, 361 u32 port, u32 index) 362 { 363 struct rdma_restrack_entry *res; 364 struct rdma_restrack_root *rt; 365 struct rdma_counter *counter; 366 unsigned long id = 0; 367 u64 sum = 0; 368 369 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 370 xa_lock(&rt->xa); 371 xa_for_each(&rt->xa, id, res) { 372 if (!rdma_restrack_get(res)) 373 continue; 374 375 xa_unlock(&rt->xa); 376 377 counter = container_of(res, struct rdma_counter, res); 378 if ((counter->device != dev) || (counter->port != port) || 379 rdma_counter_query_stats(counter)) 380 goto next; 381 382 sum += counter->stats->value[index]; 383 384 next: 385 xa_lock(&rt->xa); 386 rdma_restrack_put(res); 387 } 388 389 xa_unlock(&rt->xa); 390 return sum; 391 } 392 393 /* 394 * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a 395 * specific port, including the running ones and history data 396 */ 397 u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index) 398 { 399 struct rdma_port_counter *port_counter; 400 u64 sum; 401 402 port_counter = &dev->port_data[port].port_counter; 403 if (!port_counter->hstats) 404 return 0; 405 406 sum = get_running_counters_hwstat_sum(dev, port, index); 407 sum += port_counter->hstats->value[index]; 408 409 return sum; 410 } 411 412 static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) 413 { 414 struct rdma_restrack_entry *res = NULL; 415 struct ib_qp *qp = NULL; 416 417 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num); 418 if (IS_ERR(res)) 419 return NULL; 420 421 qp = container_of(res, struct ib_qp, res); 422 if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) 423 goto err; 424 425 return qp; 426 427 err: 428 rdma_restrack_put(res); 429 return NULL; 430 } 431 432 static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, 433 u32 counter_id) 434 { 435 struct rdma_restrack_entry *res; 436 struct rdma_counter *counter; 437 438 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id); 439 if (IS_ERR(res)) 440 return NULL; 441 442 counter = container_of(res, struct rdma_counter, res); 443 kref_get(&counter->kref); 444 rdma_restrack_put(res); 445 446 return counter; 447 } 448 449 /* 450 * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id 451 */ 452 int rdma_counter_bind_qpn(struct ib_device *dev, u32 port, 453 u32 qp_num, u32 counter_id) 454 { 455 struct rdma_port_counter *port_counter; 456 struct rdma_counter *counter; 457 struct ib_qp *qp; 458 int ret; 459 460 port_counter = &dev->port_data[port].port_counter; 461 if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) 462 return -EINVAL; 463 464 qp = rdma_counter_get_qp(dev, qp_num); 465 if (!qp) 466 return -ENOENT; 467 468 counter = rdma_get_counter_by_id(dev, counter_id); 469 if (!counter) { 470 ret = -ENOENT; 471 goto err; 472 } 473 474 if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) { 475 ret = -EINVAL; 476 goto err_task; 477 } 478 479 if ((counter->device != qp->device) || (counter->port != qp->port)) { 480 ret = -EINVAL; 481 goto err_task; 482 } 483 484 ret = __rdma_counter_bind_qp(counter, qp); 485 if (ret) 486 goto err_task; 487 488 rdma_restrack_put(&qp->res); 489 return 0; 490 491 err_task: 492 kref_put(&counter->kref, counter_release); 493 err: 494 rdma_restrack_put(&qp->res); 495 return ret; 496 } 497 498 /* 499 * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it 500 * The id of new counter is returned in @counter_id 501 */ 502 int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u32 port, 503 u32 qp_num, u32 *counter_id) 504 { 505 struct rdma_port_counter *port_counter; 506 struct rdma_counter *counter; 507 struct ib_qp *qp; 508 int ret; 509 510 if (!rdma_is_port_valid(dev, port)) 511 return -EINVAL; 512 513 port_counter = &dev->port_data[port].port_counter; 514 if (!port_counter->hstats) 515 return -EOPNOTSUPP; 516 517 if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) 518 return -EINVAL; 519 520 qp = rdma_counter_get_qp(dev, qp_num); 521 if (!qp) 522 return -ENOENT; 523 524 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 525 ret = -EINVAL; 526 goto err; 527 } 528 529 counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL); 530 if (!counter) { 531 ret = -ENOMEM; 532 goto err; 533 } 534 535 if (counter_id) 536 *counter_id = counter->id; 537 538 rdma_restrack_put(&qp->res); 539 return 0; 540 541 err: 542 rdma_restrack_put(&qp->res); 543 return ret; 544 } 545 546 /* 547 * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter 548 */ 549 int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port, 550 u32 qp_num, u32 counter_id) 551 { 552 struct rdma_port_counter *port_counter; 553 struct ib_qp *qp; 554 int ret; 555 556 if (!rdma_is_port_valid(dev, port)) 557 return -EINVAL; 558 559 qp = rdma_counter_get_qp(dev, qp_num); 560 if (!qp) 561 return -ENOENT; 562 563 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 564 ret = -EINVAL; 565 goto out; 566 } 567 568 port_counter = &dev->port_data[port].port_counter; 569 if (!qp->counter || qp->counter->id != counter_id || 570 port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) { 571 ret = -EINVAL; 572 goto out; 573 } 574 575 ret = rdma_counter_unbind_qp(qp, false); 576 577 out: 578 rdma_restrack_put(&qp->res); 579 return ret; 580 } 581 582 int rdma_counter_get_mode(struct ib_device *dev, u32 port, 583 enum rdma_nl_counter_mode *mode, 584 enum rdma_nl_counter_mask *mask) 585 { 586 struct rdma_port_counter *port_counter; 587 588 port_counter = &dev->port_data[port].port_counter; 589 *mode = port_counter->mode.mode; 590 *mask = port_counter->mode.mask; 591 592 return 0; 593 } 594 595 void rdma_counter_init(struct ib_device *dev) 596 { 597 struct rdma_port_counter *port_counter; 598 u32 port, i; 599 600 if (!dev->port_data) 601 return; 602 603 rdma_for_each_port(dev, port) { 604 port_counter = &dev->port_data[port].port_counter; 605 port_counter->mode.mode = RDMA_COUNTER_MODE_NONE; 606 mutex_init(&port_counter->lock); 607 608 if (!dev->ops.alloc_hw_port_stats) 609 continue; 610 611 port_counter->hstats = dev->ops.alloc_hw_port_stats(dev, port); 612 if (!port_counter->hstats) 613 goto fail; 614 } 615 616 return; 617 618 fail: 619 for (i = port; i >= rdma_start_port(dev); i--) { 620 port_counter = &dev->port_data[port].port_counter; 621 kfree(port_counter->hstats); 622 port_counter->hstats = NULL; 623 mutex_destroy(&port_counter->lock); 624 } 625 } 626 627 void rdma_counter_release(struct ib_device *dev) 628 { 629 struct rdma_port_counter *port_counter; 630 u32 port; 631 632 rdma_for_each_port(dev, port) { 633 port_counter = &dev->port_data[port].port_counter; 634 kfree(port_counter->hstats); 635 mutex_destroy(&port_counter->lock); 636 } 637 } 638