1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2019 Mellanox Technologies. All rights reserved. 4 */ 5 #include <rdma/ib_verbs.h> 6 #include <rdma/rdma_counter.h> 7 8 #include "core_priv.h" 9 #include "restrack.h" 10 11 #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE) 12 13 static int __counter_set_mode(struct rdma_counter_mode *curr, 14 enum rdma_nl_counter_mode new_mode, 15 enum rdma_nl_counter_mask new_mask) 16 { 17 if ((new_mode == RDMA_COUNTER_MODE_AUTO) && 18 ((new_mask & (~ALL_AUTO_MODE_MASKS)) || 19 (curr->mode != RDMA_COUNTER_MODE_NONE))) 20 return -EINVAL; 21 22 curr->mode = new_mode; 23 curr->mask = new_mask; 24 return 0; 25 } 26 27 /** 28 * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode 29 * 30 * When @on is true, the @mask must be set; When @on is false, it goes 31 * into manual mode if there's any counter, so that the user is able to 32 * manually access them. 33 */ 34 int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, 35 bool on, enum rdma_nl_counter_mask mask) 36 { 37 struct rdma_port_counter *port_counter; 38 int ret; 39 40 port_counter = &dev->port_data[port].port_counter; 41 if (!port_counter->hstats) 42 return -EOPNOTSUPP; 43 44 mutex_lock(&port_counter->lock); 45 if (on) { 46 ret = __counter_set_mode(&port_counter->mode, 47 RDMA_COUNTER_MODE_AUTO, mask); 48 } else { 49 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) { 50 ret = -EINVAL; 51 goto out; 52 } 53 54 if (port_counter->num_counters) 55 ret = __counter_set_mode(&port_counter->mode, 56 RDMA_COUNTER_MODE_MANUAL, 0); 57 else 58 ret = __counter_set_mode(&port_counter->mode, 59 RDMA_COUNTER_MODE_NONE, 0); 60 } 61 62 out: 63 mutex_unlock(&port_counter->lock); 64 return ret; 65 } 66 67 static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, 68 enum rdma_nl_counter_mode mode) 69 { 70 struct rdma_port_counter *port_counter; 71 struct rdma_counter *counter; 72 int ret; 73 74 if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats) 75 return NULL; 76 77 counter = kzalloc(sizeof(*counter), GFP_KERNEL); 78 if (!counter) 79 return NULL; 80 81 counter->device = dev; 82 counter->port = port; 83 counter->res.type = RDMA_RESTRACK_COUNTER; 84 counter->stats = dev->ops.counter_alloc_stats(counter); 85 if (!counter->stats) 86 goto err_stats; 87 88 port_counter = &dev->port_data[port].port_counter; 89 mutex_lock(&port_counter->lock); 90 if (mode == RDMA_COUNTER_MODE_MANUAL) { 91 ret = __counter_set_mode(&port_counter->mode, 92 RDMA_COUNTER_MODE_MANUAL, 0); 93 if (ret) 94 goto err_mode; 95 } 96 97 port_counter->num_counters++; 98 mutex_unlock(&port_counter->lock); 99 100 counter->mode.mode = mode; 101 kref_init(&counter->kref); 102 mutex_init(&counter->lock); 103 104 return counter; 105 106 err_mode: 107 mutex_unlock(&port_counter->lock); 108 kfree(counter->stats); 109 err_stats: 110 kfree(counter); 111 return NULL; 112 } 113 114 static void rdma_counter_free(struct rdma_counter *counter) 115 { 116 struct rdma_port_counter *port_counter; 117 118 port_counter = &counter->device->port_data[counter->port].port_counter; 119 mutex_lock(&port_counter->lock); 120 port_counter->num_counters--; 121 if (!port_counter->num_counters && 122 (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL)) 123 __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE, 124 0); 125 126 mutex_unlock(&port_counter->lock); 127 128 rdma_restrack_del(&counter->res); 129 kfree(counter->stats); 130 kfree(counter); 131 } 132 133 static void auto_mode_init_counter(struct rdma_counter *counter, 134 const struct ib_qp *qp, 135 enum rdma_nl_counter_mask new_mask) 136 { 137 struct auto_mode_param *param = &counter->mode.param; 138 139 counter->mode.mode = RDMA_COUNTER_MODE_AUTO; 140 counter->mode.mask = new_mask; 141 142 if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) 143 param->qp_type = qp->qp_type; 144 } 145 146 static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, 147 enum rdma_nl_counter_mask auto_mask) 148 { 149 struct auto_mode_param *param = &counter->mode.param; 150 bool match = true; 151 152 if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) 153 return false; 154 155 /* Ensure that counter belong to right PID */ 156 if (!rdma_is_kernel_res(&counter->res) && 157 !rdma_is_kernel_res(&qp->res) && 158 (task_pid_vnr(counter->res.task) != current->pid)) 159 return false; 160 161 if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) 162 match &= (param->qp_type == qp->qp_type); 163 164 return match; 165 } 166 167 static int __rdma_counter_bind_qp(struct rdma_counter *counter, 168 struct ib_qp *qp) 169 { 170 int ret; 171 172 if (qp->counter) 173 return -EINVAL; 174 175 if (!qp->device->ops.counter_bind_qp) 176 return -EOPNOTSUPP; 177 178 mutex_lock(&counter->lock); 179 ret = qp->device->ops.counter_bind_qp(counter, qp); 180 mutex_unlock(&counter->lock); 181 182 return ret; 183 } 184 185 static int __rdma_counter_unbind_qp(struct ib_qp *qp) 186 { 187 struct rdma_counter *counter = qp->counter; 188 int ret; 189 190 if (!qp->device->ops.counter_unbind_qp) 191 return -EOPNOTSUPP; 192 193 mutex_lock(&counter->lock); 194 ret = qp->device->ops.counter_unbind_qp(qp); 195 mutex_unlock(&counter->lock); 196 197 return ret; 198 } 199 200 static void counter_history_stat_update(const struct rdma_counter *counter) 201 { 202 struct ib_device *dev = counter->device; 203 struct rdma_port_counter *port_counter; 204 int i; 205 206 port_counter = &dev->port_data[counter->port].port_counter; 207 if (!port_counter->hstats) 208 return; 209 210 for (i = 0; i < counter->stats->num_counters; i++) 211 port_counter->hstats->value[i] += counter->stats->value[i]; 212 } 213 214 /** 215 * rdma_get_counter_auto_mode - Find the counter that @qp should be bound 216 * with in auto mode 217 * 218 * Return: The counter (with ref-count increased) if found 219 */ 220 static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, 221 u8 port) 222 { 223 struct rdma_port_counter *port_counter; 224 struct rdma_counter *counter = NULL; 225 struct ib_device *dev = qp->device; 226 struct rdma_restrack_entry *res; 227 struct rdma_restrack_root *rt; 228 unsigned long id = 0; 229 230 port_counter = &dev->port_data[port].port_counter; 231 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 232 xa_lock(&rt->xa); 233 xa_for_each(&rt->xa, id, res) { 234 if (!rdma_is_visible_in_pid_ns(res)) 235 continue; 236 237 counter = container_of(res, struct rdma_counter, res); 238 if ((counter->device != qp->device) || (counter->port != port)) 239 goto next; 240 241 if (auto_mode_match(qp, counter, port_counter->mode.mask)) 242 break; 243 next: 244 counter = NULL; 245 } 246 247 if (counter && !kref_get_unless_zero(&counter->kref)) 248 counter = NULL; 249 250 xa_unlock(&rt->xa); 251 return counter; 252 } 253 254 static void rdma_counter_res_add(struct rdma_counter *counter, 255 struct ib_qp *qp) 256 { 257 if (rdma_is_kernel_res(&qp->res)) { 258 rdma_restrack_set_task(&counter->res, qp->res.kern_name); 259 rdma_restrack_kadd(&counter->res); 260 } else { 261 rdma_restrack_attach_task(&counter->res, qp->res.task); 262 rdma_restrack_uadd(&counter->res); 263 } 264 } 265 266 static void counter_release(struct kref *kref) 267 { 268 struct rdma_counter *counter; 269 270 counter = container_of(kref, struct rdma_counter, kref); 271 counter_history_stat_update(counter); 272 counter->device->ops.counter_dealloc(counter); 273 rdma_counter_free(counter); 274 } 275 276 /** 277 * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on 278 * the auto-mode rule 279 */ 280 int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) 281 { 282 struct rdma_port_counter *port_counter; 283 struct ib_device *dev = qp->device; 284 struct rdma_counter *counter; 285 int ret; 286 287 if (!rdma_is_port_valid(dev, port)) 288 return -EINVAL; 289 290 port_counter = &dev->port_data[port].port_counter; 291 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) 292 return 0; 293 294 counter = rdma_get_counter_auto_mode(qp, port); 295 if (counter) { 296 ret = __rdma_counter_bind_qp(counter, qp); 297 if (ret) { 298 kref_put(&counter->kref, counter_release); 299 return ret; 300 } 301 } else { 302 counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO); 303 if (!counter) 304 return -ENOMEM; 305 306 auto_mode_init_counter(counter, qp, port_counter->mode.mask); 307 308 ret = __rdma_counter_bind_qp(counter, qp); 309 if (ret) { 310 rdma_counter_free(counter); 311 return ret; 312 } 313 314 rdma_counter_res_add(counter, qp); 315 } 316 317 return 0; 318 } 319 320 /** 321 * rdma_counter_unbind_qp - Unbind a qp from a counter 322 * @force: 323 * true - Decrease the counter ref-count anyway (e.g., qp destroy) 324 */ 325 int rdma_counter_unbind_qp(struct ib_qp *qp, bool force) 326 { 327 struct rdma_counter *counter = qp->counter; 328 int ret; 329 330 if (!counter) 331 return -EINVAL; 332 333 ret = __rdma_counter_unbind_qp(qp); 334 if (ret && !force) 335 return ret; 336 337 kref_put(&counter->kref, counter_release); 338 return 0; 339 } 340 341 int rdma_counter_query_stats(struct rdma_counter *counter) 342 { 343 struct ib_device *dev = counter->device; 344 int ret; 345 346 if (!dev->ops.counter_update_stats) 347 return -EINVAL; 348 349 mutex_lock(&counter->lock); 350 ret = dev->ops.counter_update_stats(counter); 351 mutex_unlock(&counter->lock); 352 353 return ret; 354 } 355 356 static u64 get_running_counters_hwstat_sum(struct ib_device *dev, 357 u8 port, u32 index) 358 { 359 struct rdma_restrack_entry *res; 360 struct rdma_restrack_root *rt; 361 struct rdma_counter *counter; 362 unsigned long id = 0; 363 u64 sum = 0; 364 365 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 366 xa_lock(&rt->xa); 367 xa_for_each(&rt->xa, id, res) { 368 if (!rdma_restrack_get(res)) 369 continue; 370 371 xa_unlock(&rt->xa); 372 373 counter = container_of(res, struct rdma_counter, res); 374 if ((counter->device != dev) || (counter->port != port) || 375 rdma_counter_query_stats(counter)) 376 goto next; 377 378 sum += counter->stats->value[index]; 379 380 next: 381 xa_lock(&rt->xa); 382 rdma_restrack_put(res); 383 } 384 385 xa_unlock(&rt->xa); 386 return sum; 387 } 388 389 /** 390 * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a 391 * specific port, including the running ones and history data 392 */ 393 u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index) 394 { 395 struct rdma_port_counter *port_counter; 396 u64 sum; 397 398 port_counter = &dev->port_data[port].port_counter; 399 if (!port_counter->hstats) 400 return 0; 401 402 sum = get_running_counters_hwstat_sum(dev, port, index); 403 sum += port_counter->hstats->value[index]; 404 405 return sum; 406 } 407 408 static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) 409 { 410 struct rdma_restrack_entry *res = NULL; 411 struct ib_qp *qp = NULL; 412 413 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num); 414 if (IS_ERR(res)) 415 return NULL; 416 417 if (!rdma_is_visible_in_pid_ns(res)) 418 goto err; 419 420 qp = container_of(res, struct ib_qp, res); 421 if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) 422 goto err; 423 424 return qp; 425 426 err: 427 rdma_restrack_put(&qp->res); 428 return NULL; 429 } 430 431 static int rdma_counter_bind_qp_manual(struct rdma_counter *counter, 432 struct ib_qp *qp) 433 { 434 if ((counter->device != qp->device) || (counter->port != qp->port)) 435 return -EINVAL; 436 437 return __rdma_counter_bind_qp(counter, qp); 438 } 439 440 static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, 441 u32 counter_id) 442 { 443 struct rdma_restrack_entry *res; 444 struct rdma_counter *counter; 445 446 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id); 447 if (IS_ERR(res)) 448 return NULL; 449 450 if (!rdma_is_visible_in_pid_ns(res)) { 451 rdma_restrack_put(res); 452 return NULL; 453 } 454 455 counter = container_of(res, struct rdma_counter, res); 456 kref_get(&counter->kref); 457 rdma_restrack_put(res); 458 459 return counter; 460 } 461 462 /** 463 * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id 464 */ 465 int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, 466 u32 qp_num, u32 counter_id) 467 { 468 struct rdma_counter *counter; 469 struct ib_qp *qp; 470 int ret; 471 472 qp = rdma_counter_get_qp(dev, qp_num); 473 if (!qp) 474 return -ENOENT; 475 476 counter = rdma_get_counter_by_id(dev, counter_id); 477 if (!counter) { 478 ret = -ENOENT; 479 goto err; 480 } 481 482 if (counter->res.task != qp->res.task) { 483 ret = -EINVAL; 484 goto err_task; 485 } 486 487 ret = rdma_counter_bind_qp_manual(counter, qp); 488 if (ret) 489 goto err_task; 490 491 rdma_restrack_put(&qp->res); 492 return 0; 493 494 err_task: 495 kref_put(&counter->kref, counter_release); 496 err: 497 rdma_restrack_put(&qp->res); 498 return ret; 499 } 500 501 /** 502 * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it 503 * The id of new counter is returned in @counter_id 504 */ 505 int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, 506 u32 qp_num, u32 *counter_id) 507 { 508 struct rdma_counter *counter; 509 struct ib_qp *qp; 510 int ret; 511 512 if (!rdma_is_port_valid(dev, port)) 513 return -EINVAL; 514 515 if (!dev->port_data[port].port_counter.hstats) 516 return -EOPNOTSUPP; 517 518 qp = rdma_counter_get_qp(dev, qp_num); 519 if (!qp) 520 return -ENOENT; 521 522 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 523 ret = -EINVAL; 524 goto err; 525 } 526 527 counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_MANUAL); 528 if (!counter) { 529 ret = -ENOMEM; 530 goto err; 531 } 532 533 ret = rdma_counter_bind_qp_manual(counter, qp); 534 if (ret) 535 goto err_bind; 536 537 if (counter_id) 538 *counter_id = counter->id; 539 540 rdma_counter_res_add(counter, qp); 541 542 rdma_restrack_put(&qp->res); 543 return ret; 544 545 err_bind: 546 rdma_counter_free(counter); 547 err: 548 rdma_restrack_put(&qp->res); 549 return ret; 550 } 551 552 /** 553 * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter 554 */ 555 int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port, 556 u32 qp_num, u32 counter_id) 557 { 558 struct rdma_port_counter *port_counter; 559 struct ib_qp *qp; 560 int ret; 561 562 if (!rdma_is_port_valid(dev, port)) 563 return -EINVAL; 564 565 qp = rdma_counter_get_qp(dev, qp_num); 566 if (!qp) 567 return -ENOENT; 568 569 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 570 ret = -EINVAL; 571 goto out; 572 } 573 574 port_counter = &dev->port_data[port].port_counter; 575 if (!qp->counter || qp->counter->id != counter_id || 576 port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) { 577 ret = -EINVAL; 578 goto out; 579 } 580 581 ret = rdma_counter_unbind_qp(qp, false); 582 583 out: 584 rdma_restrack_put(&qp->res); 585 return ret; 586 } 587 588 int rdma_counter_get_mode(struct ib_device *dev, u8 port, 589 enum rdma_nl_counter_mode *mode, 590 enum rdma_nl_counter_mask *mask) 591 { 592 struct rdma_port_counter *port_counter; 593 594 port_counter = &dev->port_data[port].port_counter; 595 *mode = port_counter->mode.mode; 596 *mask = port_counter->mode.mask; 597 598 return 0; 599 } 600 601 void rdma_counter_init(struct ib_device *dev) 602 { 603 struct rdma_port_counter *port_counter; 604 u32 port; 605 606 if (!dev->port_data) 607 return; 608 609 rdma_for_each_port(dev, port) { 610 port_counter = &dev->port_data[port].port_counter; 611 port_counter->mode.mode = RDMA_COUNTER_MODE_NONE; 612 mutex_init(&port_counter->lock); 613 614 if (!dev->ops.alloc_hw_stats) 615 continue; 616 617 port_counter->hstats = dev->ops.alloc_hw_stats(dev, port); 618 if (!port_counter->hstats) 619 goto fail; 620 } 621 622 return; 623 624 fail: 625 rdma_for_each_port(dev, port) { 626 port_counter = &dev->port_data[port].port_counter; 627 kfree(port_counter->hstats); 628 port_counter->hstats = NULL; 629 } 630 631 return; 632 } 633 634 void rdma_counter_release(struct ib_device *dev) 635 { 636 struct rdma_port_counter *port_counter; 637 u32 port; 638 639 rdma_for_each_port(dev, port) { 640 port_counter = &dev->port_data[port].port_counter; 641 kfree(port_counter->hstats); 642 } 643 } 644