1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * RDMA resource limiting controller for cgroups. 4 * 5 * Used to allow a cgroup hierarchy to stop processes from consuming 6 * additional RDMA resources after a certain limit is reached. 7 * 8 * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com> 9 */ 10 11 #include <linux/bitops.h> 12 #include <linux/limits.h> 13 #include <linux/slab.h> 14 #include <linux/seq_file.h> 15 #include <linux/cgroup.h> 16 #include <linux/parser.h> 17 #include <linux/cgroup_rdma.h> 18 19 #define RDMACG_MAX_STR "max" 20 21 enum rdmacg_limit_tokens { 22 RDMACG_HCA_HANDLE_VAL, 23 RDMACG_HCA_HANDLE_MAX, 24 RDMACG_HCA_OBJECT_VAL, 25 RDMACG_HCA_OBJECT_MAX, 26 NR_RDMACG_LIMIT_TOKENS, 27 }; 28 29 static const match_table_t rdmacg_limit_tokens = { 30 { RDMACG_HCA_HANDLE_VAL, "hca_handle=%d" }, 31 { RDMACG_HCA_HANDLE_MAX, "hca_handle=max" }, 32 { RDMACG_HCA_OBJECT_VAL, "hca_object=%d" }, 33 { RDMACG_HCA_OBJECT_MAX, "hca_object=max" }, 34 { NR_RDMACG_LIMIT_TOKENS, NULL }, 35 }; 36 37 /* 38 * Protects list of resource pools maintained on per cgroup basis 39 * and rdma device list. 40 */ 41 static DEFINE_MUTEX(rdmacg_mutex); 42 static LIST_HEAD(rdmacg_devices); 43 44 enum rdmacg_file_type { 45 RDMACG_RESOURCE_TYPE_MAX, 46 RDMACG_RESOURCE_TYPE_STAT, 47 RDMACG_RESOURCE_TYPE_PEAK, 48 }; 49 50 /* 51 * resource table definition as to be seen by the user. 52 * Need to add entries to it when more resources are 53 * added/defined at IB verb/core layer. 54 */ 55 static char const *rdmacg_resource_names[] = { 56 [RDMACG_RESOURCE_HCA_HANDLE] = "hca_handle", 57 [RDMACG_RESOURCE_HCA_OBJECT] = "hca_object", 58 }; 59 60 /* resource tracker for each resource of rdma cgroup */ 61 struct rdmacg_resource { 62 int max; 63 int usage; 64 int peak; 65 }; 66 67 /* 68 * resource pool object which represents per cgroup, per device 69 * resources. There are multiple instances of this object per cgroup, 70 * therefore it cannot be embedded within rdma_cgroup structure. It 71 * is maintained as list. 72 */ 73 struct rdmacg_resource_pool { 74 struct rdmacg_device *device; 75 struct rdmacg_resource resources[RDMACG_RESOURCE_MAX]; 76 77 struct list_head cg_node; 78 struct list_head dev_node; 79 80 /* count active user tasks of this pool */ 81 u64 usage_sum; 82 /* total number counts which are set to max */ 83 int num_max_cnt; 84 85 /* per-resource event counters */ 86 u64 events_max[RDMACG_RESOURCE_MAX]; 87 u64 events_alloc_fail[RDMACG_RESOURCE_MAX]; 88 u64 events_local_max[RDMACG_RESOURCE_MAX]; 89 u64 events_local_alloc_fail[RDMACG_RESOURCE_MAX]; 90 }; 91 92 static struct rdma_cgroup *css_rdmacg(struct cgroup_subsys_state *css) 93 { 94 return container_of(css, struct rdma_cgroup, css); 95 } 96 97 static struct rdma_cgroup *parent_rdmacg(struct rdma_cgroup *cg) 98 { 99 return css_rdmacg(cg->css.parent); 100 } 101 102 static inline struct rdma_cgroup *get_current_rdmacg(void) 103 { 104 return css_rdmacg(task_get_css(current, rdma_cgrp_id)); 105 } 106 107 static void set_resource_limit(struct rdmacg_resource_pool *rpool, 108 int index, int new_max) 109 { 110 if (new_max == S32_MAX) { 111 if (rpool->resources[index].max != S32_MAX) 112 rpool->num_max_cnt++; 113 } else { 114 if (rpool->resources[index].max == S32_MAX) 115 rpool->num_max_cnt--; 116 } 117 rpool->resources[index].max = new_max; 118 } 119 120 static void set_all_resource_max_limit(struct rdmacg_resource_pool *rpool) 121 { 122 int i; 123 124 for (i = 0; i < RDMACG_RESOURCE_MAX; i++) 125 set_resource_limit(rpool, i, S32_MAX); 126 } 127 128 static void free_cg_rpool_locked(struct rdmacg_resource_pool *rpool) 129 { 130 lockdep_assert_held(&rdmacg_mutex); 131 132 list_del(&rpool->cg_node); 133 list_del(&rpool->dev_node); 134 kfree(rpool); 135 } 136 137 static bool rpool_has_persistent_state(struct rdmacg_resource_pool *rpool) 138 { 139 int i; 140 141 /* 142 * Keep the rpool alive if any peak value is non-zero, 143 * so that rdma.peak persists as a historical high- 144 * watermark even after all resources are freed. 145 */ 146 for (i = 0; i < RDMACG_RESOURCE_MAX; i++) { 147 if (rpool->resources[i].peak || 148 rpool->events_max[i] || 149 rpool->events_local_max[i] || 150 rpool->events_alloc_fail[i] || 151 rpool->events_local_alloc_fail[i]) 152 return true; 153 } 154 return false; 155 } 156 157 static struct rdmacg_resource_pool * 158 find_cg_rpool_locked(struct rdma_cgroup *cg, 159 struct rdmacg_device *device) 160 161 { 162 struct rdmacg_resource_pool *pool; 163 164 lockdep_assert_held(&rdmacg_mutex); 165 166 list_for_each_entry(pool, &cg->rpools, cg_node) 167 if (pool->device == device) 168 return pool; 169 170 return NULL; 171 } 172 173 static struct rdmacg_resource_pool * 174 get_cg_rpool_locked(struct rdma_cgroup *cg, struct rdmacg_device *device) 175 { 176 struct rdmacg_resource_pool *rpool; 177 178 rpool = find_cg_rpool_locked(cg, device); 179 if (rpool) 180 return rpool; 181 182 rpool = kzalloc_obj(*rpool); 183 if (!rpool) 184 return ERR_PTR(-ENOMEM); 185 186 rpool->device = device; 187 set_all_resource_max_limit(rpool); 188 189 INIT_LIST_HEAD(&rpool->cg_node); 190 INIT_LIST_HEAD(&rpool->dev_node); 191 list_add_tail(&rpool->cg_node, &cg->rpools); 192 list_add_tail(&rpool->dev_node, &device->rpools); 193 return rpool; 194 } 195 196 /** 197 * uncharge_cg_locked - uncharge resource for rdma cgroup 198 * @cg: pointer to cg to uncharge and all parents in hierarchy 199 * @device: pointer to rdmacg device 200 * @index: index of the resource to uncharge in cg (resource pool) 201 * 202 * It also frees the resource pool which was created as part of 203 * charging operation when there are no resources attached to 204 * resource pool. 205 */ 206 static void 207 uncharge_cg_locked(struct rdma_cgroup *cg, 208 struct rdmacg_device *device, 209 enum rdmacg_resource_type index) 210 { 211 struct rdmacg_resource_pool *rpool; 212 213 rpool = find_cg_rpool_locked(cg, device); 214 215 /* 216 * rpool cannot be null at this stage. Let kernel operate in case 217 * if there a bug in IB stack or rdma controller, instead of crashing 218 * the system. 219 */ 220 if (unlikely(!rpool)) { 221 pr_warn("Invalid device %p or rdma cgroup %p\n", device, cg); 222 return; 223 } 224 225 rpool->resources[index].usage--; 226 227 /* 228 * A negative count (or overflow) is invalid, 229 * it indicates a bug in the rdma controller. 230 */ 231 WARN_ON_ONCE(rpool->resources[index].usage < 0); 232 rpool->usage_sum--; 233 if (rpool->usage_sum == 0 && 234 rpool->num_max_cnt == RDMACG_RESOURCE_MAX) { 235 if (!rpool_has_persistent_state(rpool)) { 236 /* 237 * No user of the rpool and all entries are set to max, so 238 * safe to delete this rpool. 239 */ 240 free_cg_rpool_locked(rpool); 241 } 242 } 243 } 244 245 /** 246 * rdmacg_event_locked - fire event when resource allocation exceeds limit 247 * @cg: requesting cgroup 248 * @over_cg: cgroup whose limit was exceeded 249 * @device: rdma device 250 * @index: resource type index 251 * 252 * Must be called under rdmacg_mutex. Updates event counters in the 253 * resource pools of @cg and @over_cg, propagates hierarchical max 254 * events from @over_cg (including itself) upward, and notifies 255 * userspace via cgroup_file_notify(). 256 */ 257 static void rdmacg_event_locked(struct rdma_cgroup *cg, 258 struct rdma_cgroup *over_cg, 259 struct rdmacg_device *device, 260 enum rdmacg_resource_type index) 261 { 262 struct rdmacg_resource_pool *rpool; 263 struct rdma_cgroup *p; 264 265 lockdep_assert_held(&rdmacg_mutex); 266 267 /* Increment local alloc_fail in requesting cgroup */ 268 rpool = find_cg_rpool_locked(cg, device); 269 if (rpool) { 270 rpool->events_local_alloc_fail[index]++; 271 cgroup_file_notify(&cg->events_local_file); 272 } 273 274 /* Increment local max in the over-limit cgroup */ 275 rpool = find_cg_rpool_locked(over_cg, device); 276 if (rpool) { 277 rpool->events_local_max[index]++; 278 cgroup_file_notify(&over_cg->events_local_file); 279 } 280 281 /* Propagate hierarchical max events upward */ 282 for (p = over_cg; parent_rdmacg(p); p = parent_rdmacg(p)) { 283 rpool = get_cg_rpool_locked(p, device); 284 if (!IS_ERR(rpool)) { 285 rpool->events_max[index]++; 286 cgroup_file_notify(&p->events_file); 287 } 288 } 289 /* Propagate hierarchical alloc_fail from requesting cgroup upward */ 290 for (p = cg; parent_rdmacg(p); p = parent_rdmacg(p)) { 291 rpool = get_cg_rpool_locked(p, device); 292 if (!IS_ERR(rpool)) { 293 rpool->events_alloc_fail[index]++; 294 cgroup_file_notify(&p->events_file); 295 } 296 } 297 } 298 299 /** 300 * rdmacg_uncharge_hierarchy - hierarchically uncharge rdma resource count 301 * @cg: pointer to cg to uncharge and all parents in hierarchy 302 * @device: pointer to rdmacg device 303 * @stop_cg: while traversing hirerchy, when meet with stop_cg cgroup 304 * stop uncharging 305 * @index: index of the resource to uncharge in cg in given resource pool 306 */ 307 static void rdmacg_uncharge_hierarchy(struct rdma_cgroup *cg, 308 struct rdmacg_device *device, 309 struct rdma_cgroup *stop_cg, 310 enum rdmacg_resource_type index) 311 { 312 struct rdma_cgroup *p; 313 314 mutex_lock(&rdmacg_mutex); 315 316 for (p = cg; p != stop_cg; p = parent_rdmacg(p)) 317 uncharge_cg_locked(p, device, index); 318 319 mutex_unlock(&rdmacg_mutex); 320 321 css_put(&cg->css); 322 } 323 324 /** 325 * rdmacg_uncharge - hierarchically uncharge rdma resource count 326 * @cg: pointer to cg to uncharge and all parents in hierarchy 327 * @device: pointer to rdmacg device 328 * @index: index of the resource to uncharge in cgroup in given resource pool 329 */ 330 void rdmacg_uncharge(struct rdma_cgroup *cg, 331 struct rdmacg_device *device, 332 enum rdmacg_resource_type index) 333 { 334 if (index >= RDMACG_RESOURCE_MAX) 335 return; 336 337 rdmacg_uncharge_hierarchy(cg, device, NULL, index); 338 } 339 EXPORT_SYMBOL(rdmacg_uncharge); 340 341 /** 342 * rdmacg_try_charge - hierarchically try to charge the rdma resource 343 * @rdmacg: pointer to rdma cgroup which will own this resource 344 * @device: pointer to rdmacg device 345 * @index: index of the resource to charge in cgroup (resource pool) 346 * 347 * This function follows charging resource in hierarchical way. 348 * It will fail if the charge would cause the new value to exceed the 349 * hierarchical limit. 350 * Returns 0 if the charge succeeded, otherwise -EAGAIN, -ENOMEM or -EINVAL. 351 * Returns pointer to rdmacg for this resource when charging is successful. 352 * 353 * Charger needs to account resources on two criteria. 354 * (a) per cgroup & (b) per device resource usage. 355 * Per cgroup resource usage ensures that tasks of cgroup doesn't cross 356 * the configured limits. Per device provides granular configuration 357 * in multi device usage. It allocates resource pool in the hierarchy 358 * for each parent it come across for first resource. Later on resource 359 * pool will be available. Therefore it will be much faster thereon 360 * to charge/uncharge. 361 */ 362 int rdmacg_try_charge(struct rdma_cgroup **rdmacg, 363 struct rdmacg_device *device, 364 enum rdmacg_resource_type index) 365 { 366 struct rdma_cgroup *cg, *p; 367 struct rdmacg_resource_pool *rpool; 368 s64 new; 369 int ret = 0; 370 371 if (index >= RDMACG_RESOURCE_MAX) 372 return -EINVAL; 373 374 /* 375 * hold on to css, as cgroup can be removed but resource 376 * accounting happens on css. 377 */ 378 cg = get_current_rdmacg(); 379 380 mutex_lock(&rdmacg_mutex); 381 for (p = cg; p; p = parent_rdmacg(p)) { 382 rpool = get_cg_rpool_locked(p, device); 383 if (IS_ERR(rpool)) { 384 ret = PTR_ERR(rpool); 385 goto err; 386 } else { 387 new = (s64)rpool->resources[index].usage + 1; 388 if (new > rpool->resources[index].max) { 389 ret = -EAGAIN; 390 goto err; 391 } else { 392 rpool->resources[index].usage = new; 393 rpool->usage_sum++; 394 } 395 } 396 } 397 /* Update peak only after all charges succeed */ 398 for (p = cg; p; p = parent_rdmacg(p)) { 399 rpool = find_cg_rpool_locked(p, device); 400 if (rpool && rpool->resources[index].usage > rpool->resources[index].peak) 401 rpool->resources[index].peak = rpool->resources[index].usage; 402 } 403 mutex_unlock(&rdmacg_mutex); 404 405 *rdmacg = cg; 406 return 0; 407 408 err: 409 if (ret == -EAGAIN) 410 rdmacg_event_locked(cg, p, device, index); 411 mutex_unlock(&rdmacg_mutex); 412 rdmacg_uncharge_hierarchy(cg, device, p, index); 413 return ret; 414 } 415 EXPORT_SYMBOL(rdmacg_try_charge); 416 417 /** 418 * rdmacg_register_device - register rdmacg device to rdma controller. 419 * @device: pointer to rdmacg device whose resources need to be accounted. 420 * 421 * If IB stack wish a device to participate in rdma cgroup resource 422 * tracking, it must invoke this API to register with rdma cgroup before 423 * any user space application can start using the RDMA resources. 424 */ 425 void rdmacg_register_device(struct rdmacg_device *device) 426 { 427 INIT_LIST_HEAD(&device->dev_node); 428 INIT_LIST_HEAD(&device->rpools); 429 430 mutex_lock(&rdmacg_mutex); 431 list_add_tail(&device->dev_node, &rdmacg_devices); 432 mutex_unlock(&rdmacg_mutex); 433 } 434 EXPORT_SYMBOL(rdmacg_register_device); 435 436 /** 437 * rdmacg_unregister_device - unregister rdmacg device from rdma controller. 438 * @device: pointer to rdmacg device which was previously registered with rdma 439 * controller using rdmacg_register_device(). 440 * 441 * IB stack must invoke this after all the resources of the IB device 442 * are destroyed and after ensuring that no more resources will be created 443 * when this API is invoked. 444 */ 445 void rdmacg_unregister_device(struct rdmacg_device *device) 446 { 447 struct rdmacg_resource_pool *rpool, *tmp; 448 449 /* 450 * Synchronize with any active resource settings, 451 * usage query happening via configfs. 452 */ 453 mutex_lock(&rdmacg_mutex); 454 list_del_init(&device->dev_node); 455 456 /* 457 * Now that this device is off the cgroup list, its safe to free 458 * all the rpool resources. 459 */ 460 list_for_each_entry_safe(rpool, tmp, &device->rpools, dev_node) 461 free_cg_rpool_locked(rpool); 462 463 mutex_unlock(&rdmacg_mutex); 464 } 465 EXPORT_SYMBOL(rdmacg_unregister_device); 466 467 static struct rdmacg_device *rdmacg_get_device_locked(const char *name) 468 { 469 struct rdmacg_device *device; 470 471 lockdep_assert_held(&rdmacg_mutex); 472 473 list_for_each_entry(device, &rdmacg_devices, dev_node) 474 if (!strcmp(name, device->name)) 475 return device; 476 477 return NULL; 478 } 479 480 static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of, 481 char *buf, size_t nbytes, loff_t off) 482 { 483 struct rdma_cgroup *cg = css_rdmacg(of_css(of)); 484 const char *dev_name; 485 struct rdmacg_resource_pool *rpool; 486 struct rdmacg_device *device; 487 char *options = strstrip(buf); 488 char *p; 489 int *new_limits; 490 unsigned long enables = 0; 491 int i = 0, ret = 0; 492 493 /* extract the device name first */ 494 dev_name = strsep(&options, " "); 495 if (!dev_name) { 496 ret = -EINVAL; 497 goto err; 498 } 499 500 new_limits = kzalloc_objs(int, RDMACG_RESOURCE_MAX); 501 if (!new_limits) { 502 ret = -ENOMEM; 503 goto err; 504 } 505 506 /* parse resource limit tokens */ 507 while ((p = strsep(&options, " \t\n"))) { 508 substring_t args[MAX_OPT_ARGS]; 509 int tok, intval; 510 511 if (!*p) 512 continue; 513 514 tok = match_token(p, rdmacg_limit_tokens, args); 515 switch (tok) { 516 case RDMACG_HCA_HANDLE_VAL: 517 if (match_int(&args[0], &intval) || intval < 0) { 518 ret = -EINVAL; 519 goto parse_err; 520 } 521 new_limits[RDMACG_RESOURCE_HCA_HANDLE] = intval; 522 enables |= BIT(RDMACG_RESOURCE_HCA_HANDLE); 523 break; 524 case RDMACG_HCA_HANDLE_MAX: 525 new_limits[RDMACG_RESOURCE_HCA_HANDLE] = S32_MAX; 526 enables |= BIT(RDMACG_RESOURCE_HCA_HANDLE); 527 break; 528 case RDMACG_HCA_OBJECT_VAL: 529 if (match_int(&args[0], &intval) || intval < 0) { 530 ret = -EINVAL; 531 goto parse_err; 532 } 533 new_limits[RDMACG_RESOURCE_HCA_OBJECT] = intval; 534 enables |= BIT(RDMACG_RESOURCE_HCA_OBJECT); 535 break; 536 case RDMACG_HCA_OBJECT_MAX: 537 new_limits[RDMACG_RESOURCE_HCA_OBJECT] = S32_MAX; 538 enables |= BIT(RDMACG_RESOURCE_HCA_OBJECT); 539 break; 540 default: 541 ret = -EINVAL; 542 goto parse_err; 543 } 544 } 545 546 /* acquire lock to synchronize with hot plug devices */ 547 mutex_lock(&rdmacg_mutex); 548 549 device = rdmacg_get_device_locked(dev_name); 550 if (!device) { 551 ret = -ENODEV; 552 goto dev_err; 553 } 554 555 rpool = get_cg_rpool_locked(cg, device); 556 if (IS_ERR(rpool)) { 557 ret = PTR_ERR(rpool); 558 goto dev_err; 559 } 560 561 /* now set the new limits of the rpool */ 562 for_each_set_bit(i, &enables, RDMACG_RESOURCE_MAX) 563 set_resource_limit(rpool, i, new_limits[i]); 564 565 if (rpool->usage_sum == 0 && 566 rpool->num_max_cnt == RDMACG_RESOURCE_MAX) { 567 if (!rpool_has_persistent_state(rpool)) { 568 /* 569 * No user of the rpool and all entries are set to max, so 570 * safe to delete this rpool. 571 */ 572 free_cg_rpool_locked(rpool); 573 } 574 } 575 576 dev_err: 577 mutex_unlock(&rdmacg_mutex); 578 579 parse_err: 580 kfree(new_limits); 581 582 err: 583 return ret ?: nbytes; 584 } 585 586 static void print_rpool_values(struct seq_file *sf, 587 struct rdmacg_resource_pool *rpool) 588 { 589 enum rdmacg_file_type sf_type; 590 int i; 591 u32 value; 592 593 sf_type = seq_cft(sf)->private; 594 595 for (i = 0; i < RDMACG_RESOURCE_MAX; i++) { 596 seq_puts(sf, rdmacg_resource_names[i]); 597 seq_putc(sf, '='); 598 if (sf_type == RDMACG_RESOURCE_TYPE_MAX) { 599 if (rpool) 600 value = rpool->resources[i].max; 601 else 602 value = S32_MAX; 603 } else if (sf_type == RDMACG_RESOURCE_TYPE_PEAK) { 604 value = rpool ? rpool->resources[i].peak : 0; 605 } else { 606 if (rpool) 607 value = rpool->resources[i].usage; 608 else 609 value = 0; 610 } 611 612 if (value == S32_MAX) 613 seq_puts(sf, RDMACG_MAX_STR); 614 else 615 seq_printf(sf, "%d", value); 616 seq_putc(sf, ' '); 617 } 618 } 619 620 static int rdmacg_resource_read(struct seq_file *sf, void *v) 621 { 622 struct rdmacg_device *device; 623 struct rdmacg_resource_pool *rpool; 624 struct rdma_cgroup *cg = css_rdmacg(seq_css(sf)); 625 626 mutex_lock(&rdmacg_mutex); 627 628 list_for_each_entry(device, &rdmacg_devices, dev_node) { 629 seq_printf(sf, "%s ", device->name); 630 631 rpool = find_cg_rpool_locked(cg, device); 632 print_rpool_values(sf, rpool); 633 634 seq_putc(sf, '\n'); 635 } 636 637 mutex_unlock(&rdmacg_mutex); 638 return 0; 639 } 640 641 static int rdmacg_events_show(struct seq_file *sf, void *v) 642 { 643 struct rdma_cgroup *cg = css_rdmacg(seq_css(sf)); 644 struct rdmacg_resource_pool *rpool; 645 struct rdmacg_device *device; 646 int i; 647 648 mutex_lock(&rdmacg_mutex); 649 650 list_for_each_entry(device, &rdmacg_devices, dev_node) { 651 rpool = find_cg_rpool_locked(cg, device); 652 653 seq_printf(sf, "%s ", device->name); 654 for (i = 0; i < RDMACG_RESOURCE_MAX; i++) { 655 seq_printf(sf, "%s.max=%llu %s.alloc_fail=%llu", 656 rdmacg_resource_names[i], 657 rpool ? rpool->events_max[i] : 0ULL, 658 rdmacg_resource_names[i], 659 rpool ? rpool->events_alloc_fail[i] : 0ULL); 660 if (i < RDMACG_RESOURCE_MAX - 1) 661 seq_putc(sf, ' '); 662 } 663 seq_putc(sf, '\n'); 664 } 665 666 mutex_unlock(&rdmacg_mutex); 667 return 0; 668 } 669 670 static int rdmacg_events_local_show(struct seq_file *sf, void *v) 671 { 672 struct rdma_cgroup *cg = css_rdmacg(seq_css(sf)); 673 struct rdmacg_resource_pool *rpool; 674 struct rdmacg_device *device; 675 int i; 676 677 mutex_lock(&rdmacg_mutex); 678 679 list_for_each_entry(device, &rdmacg_devices, dev_node) { 680 rpool = find_cg_rpool_locked(cg, device); 681 682 seq_printf(sf, "%s ", device->name); 683 for (i = 0; i < RDMACG_RESOURCE_MAX; i++) { 684 seq_printf(sf, "%s.max=%llu %s.alloc_fail=%llu", 685 rdmacg_resource_names[i], 686 rpool ? rpool->events_local_max[i] : 0ULL, 687 rdmacg_resource_names[i], 688 rpool ? rpool->events_local_alloc_fail[i] : 0ULL); 689 if (i < RDMACG_RESOURCE_MAX - 1) 690 seq_putc(sf, ' '); 691 } 692 seq_putc(sf, '\n'); 693 } 694 695 mutex_unlock(&rdmacg_mutex); 696 return 0; 697 } 698 699 static struct cftype rdmacg_files[] = { 700 { 701 .name = "max", 702 .write = rdmacg_resource_set_max, 703 .seq_show = rdmacg_resource_read, 704 .private = RDMACG_RESOURCE_TYPE_MAX, 705 .flags = CFTYPE_NOT_ON_ROOT, 706 }, 707 { 708 .name = "current", 709 .seq_show = rdmacg_resource_read, 710 .private = RDMACG_RESOURCE_TYPE_STAT, 711 .flags = CFTYPE_NOT_ON_ROOT, 712 }, 713 { 714 .name = "peak", 715 .seq_show = rdmacg_resource_read, 716 .private = RDMACG_RESOURCE_TYPE_PEAK, 717 .flags = CFTYPE_NOT_ON_ROOT, 718 }, 719 { 720 .name = "events", 721 .seq_show = rdmacg_events_show, 722 .file_offset = offsetof(struct rdma_cgroup, events_file), 723 .flags = CFTYPE_NOT_ON_ROOT, 724 }, 725 { 726 .name = "events.local", 727 .seq_show = rdmacg_events_local_show, 728 .file_offset = offsetof(struct rdma_cgroup, events_local_file), 729 .flags = CFTYPE_NOT_ON_ROOT, 730 }, 731 { } /* terminate */ 732 }; 733 734 static struct cgroup_subsys_state * 735 rdmacg_css_alloc(struct cgroup_subsys_state *parent) 736 { 737 struct rdma_cgroup *cg; 738 739 cg = kzalloc_obj(*cg); 740 if (!cg) 741 return ERR_PTR(-ENOMEM); 742 743 INIT_LIST_HEAD(&cg->rpools); 744 return &cg->css; 745 } 746 747 static void rdmacg_css_free(struct cgroup_subsys_state *css) 748 { 749 struct rdma_cgroup *cg = css_rdmacg(css); 750 struct rdmacg_resource_pool *rpool, *tmp; 751 752 /* Clean up rpools kept alive by non-zero peak values */ 753 mutex_lock(&rdmacg_mutex); 754 list_for_each_entry_safe(rpool, tmp, &cg->rpools, cg_node) 755 free_cg_rpool_locked(rpool); 756 mutex_unlock(&rdmacg_mutex); 757 758 kfree(cg); 759 } 760 761 /** 762 * rdmacg_css_offline - cgroup css_offline callback 763 * @css: css of interest 764 * 765 * This function is called when @css is about to go away and responsible 766 * for shooting down all rdmacg associated with @css. As part of that it 767 * marks all the resource pool entries to max value, so that when resources are 768 * uncharged, associated resource pool can be freed as well. 769 */ 770 static void rdmacg_css_offline(struct cgroup_subsys_state *css) 771 { 772 struct rdma_cgroup *cg = css_rdmacg(css); 773 struct rdmacg_resource_pool *rpool; 774 775 mutex_lock(&rdmacg_mutex); 776 777 list_for_each_entry(rpool, &cg->rpools, cg_node) 778 set_all_resource_max_limit(rpool); 779 780 mutex_unlock(&rdmacg_mutex); 781 } 782 783 struct cgroup_subsys rdma_cgrp_subsys = { 784 .css_alloc = rdmacg_css_alloc, 785 .css_free = rdmacg_css_free, 786 .css_offline = rdmacg_css_offline, 787 .legacy_cftypes = rdmacg_files, 788 .dfl_cftypes = rdmacg_files, 789 }; 790