1 /* 2 * Copyright(c) 2015, 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 #include <linux/topology.h> 48 #include <linux/cpumask.h> 49 #include <linux/module.h> 50 #include <linux/cpumask.h> 51 52 #include "hfi.h" 53 #include "affinity.h" 54 #include "sdma.h" 55 #include "trace.h" 56 57 struct hfi1_affinity_node_list node_affinity = { 58 .list = LIST_HEAD_INIT(node_affinity.list), 59 .lock = __SPIN_LOCK_UNLOCKED(&node_affinity.lock), 60 }; 61 62 /* Name of IRQ types, indexed by enum irq_type */ 63 static const char * const irq_type_names[] = { 64 "SDMA", 65 "RCVCTXT", 66 "GENERAL", 67 "OTHER", 68 }; 69 70 /* Per NUMA node count of HFI devices */ 71 static unsigned int *hfi1_per_node_cntr; 72 73 static inline void init_cpu_mask_set(struct cpu_mask_set *set) 74 { 75 cpumask_clear(&set->mask); 76 cpumask_clear(&set->used); 77 set->gen = 0; 78 } 79 80 /* Initialize non-HT cpu cores mask */ 81 void init_real_cpu_mask(void) 82 { 83 int possible, curr_cpu, i, ht; 84 85 cpumask_clear(&node_affinity.real_cpu_mask); 86 87 /* Start with cpu online mask as the real cpu mask */ 88 cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask); 89 90 /* 91 * Remove HT cores from the real cpu mask. Do this in two steps below. 92 */ 93 possible = cpumask_weight(&node_affinity.real_cpu_mask); 94 ht = cpumask_weight(topology_sibling_cpumask( 95 cpumask_first(&node_affinity.real_cpu_mask))); 96 /* 97 * Step 1. Skip over the first N HT siblings and use them as the 98 * "real" cores. Assumes that HT cores are not enumerated in 99 * succession (except in the single core case). 100 */ 101 curr_cpu = cpumask_first(&node_affinity.real_cpu_mask); 102 for (i = 0; i < possible / ht; i++) 103 curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask); 104 /* 105 * Step 2. Remove the remaining HT siblings. Use cpumask_next() to 106 * skip any gaps. 107 */ 108 for (; i < possible; i++) { 109 cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask); 110 curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask); 111 } 112 } 113 114 int node_affinity_init(void) 115 { 116 int node; 117 struct pci_dev *dev = NULL; 118 const struct pci_device_id *ids = hfi1_pci_tbl; 119 120 cpumask_clear(&node_affinity.proc.used); 121 cpumask_copy(&node_affinity.proc.mask, cpu_online_mask); 122 123 node_affinity.proc.gen = 0; 124 node_affinity.num_core_siblings = 125 cpumask_weight(topology_sibling_cpumask( 126 cpumask_first(&node_affinity.proc.mask) 127 )); 128 node_affinity.num_online_nodes = num_online_nodes(); 129 node_affinity.num_online_cpus = num_online_cpus(); 130 131 /* 132 * The real cpu mask is part of the affinity struct but it has to be 133 * initialized early. It is needed to calculate the number of user 134 * contexts in set_up_context_variables(). 135 */ 136 init_real_cpu_mask(); 137 138 hfi1_per_node_cntr = kcalloc(num_possible_nodes(), 139 sizeof(*hfi1_per_node_cntr), GFP_KERNEL); 140 if (!hfi1_per_node_cntr) 141 return -ENOMEM; 142 143 while (ids->vendor) { 144 dev = NULL; 145 while ((dev = pci_get_device(ids->vendor, ids->device, dev))) { 146 node = pcibus_to_node(dev->bus); 147 if (node < 0) 148 node = numa_node_id(); 149 150 hfi1_per_node_cntr[node]++; 151 } 152 ids++; 153 } 154 155 return 0; 156 } 157 158 void node_affinity_destroy(void) 159 { 160 struct list_head *pos, *q; 161 struct hfi1_affinity_node *entry; 162 163 spin_lock(&node_affinity.lock); 164 list_for_each_safe(pos, q, &node_affinity.list) { 165 entry = list_entry(pos, struct hfi1_affinity_node, 166 list); 167 list_del(pos); 168 kfree(entry); 169 } 170 spin_unlock(&node_affinity.lock); 171 kfree(hfi1_per_node_cntr); 172 } 173 174 static struct hfi1_affinity_node *node_affinity_allocate(int node) 175 { 176 struct hfi1_affinity_node *entry; 177 178 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 179 if (!entry) 180 return NULL; 181 entry->node = node; 182 INIT_LIST_HEAD(&entry->list); 183 184 return entry; 185 } 186 187 /* 188 * It appends an entry to the list. 189 * It *must* be called with node_affinity.lock held. 190 */ 191 static void node_affinity_add_tail(struct hfi1_affinity_node *entry) 192 { 193 list_add_tail(&entry->list, &node_affinity.list); 194 } 195 196 /* It must be called with node_affinity.lock held */ 197 static struct hfi1_affinity_node *node_affinity_lookup(int node) 198 { 199 struct list_head *pos; 200 struct hfi1_affinity_node *entry; 201 202 list_for_each(pos, &node_affinity.list) { 203 entry = list_entry(pos, struct hfi1_affinity_node, list); 204 if (entry->node == node) 205 return entry; 206 } 207 208 return NULL; 209 } 210 211 /* 212 * Interrupt affinity. 213 * 214 * non-rcv avail gets a default mask that 215 * starts as possible cpus with threads reset 216 * and each rcv avail reset. 217 * 218 * rcv avail gets node relative 1 wrapping back 219 * to the node relative 1 as necessary. 220 * 221 */ 222 int hfi1_dev_affinity_init(struct hfi1_devdata *dd) 223 { 224 int node = pcibus_to_node(dd->pcidev->bus); 225 struct hfi1_affinity_node *entry; 226 const struct cpumask *local_mask; 227 int curr_cpu, possible, i; 228 229 if (node < 0) 230 node = numa_node_id(); 231 dd->node = node; 232 233 local_mask = cpumask_of_node(dd->node); 234 if (cpumask_first(local_mask) >= nr_cpu_ids) 235 local_mask = topology_core_cpumask(0); 236 237 spin_lock(&node_affinity.lock); 238 entry = node_affinity_lookup(dd->node); 239 spin_unlock(&node_affinity.lock); 240 241 /* 242 * If this is the first time this NUMA node's affinity is used, 243 * create an entry in the global affinity structure and initialize it. 244 */ 245 if (!entry) { 246 entry = node_affinity_allocate(node); 247 if (!entry) { 248 dd_dev_err(dd, 249 "Unable to allocate global affinity node\n"); 250 return -ENOMEM; 251 } 252 init_cpu_mask_set(&entry->def_intr); 253 init_cpu_mask_set(&entry->rcv_intr); 254 cpumask_clear(&entry->general_intr_mask); 255 /* Use the "real" cpu mask of this node as the default */ 256 cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask, 257 local_mask); 258 259 /* fill in the receive list */ 260 possible = cpumask_weight(&entry->def_intr.mask); 261 curr_cpu = cpumask_first(&entry->def_intr.mask); 262 263 if (possible == 1) { 264 /* only one CPU, everyone will use it */ 265 cpumask_set_cpu(curr_cpu, &entry->rcv_intr.mask); 266 cpumask_set_cpu(curr_cpu, &entry->general_intr_mask); 267 } else { 268 /* 269 * The general/control context will be the first CPU in 270 * the default list, so it is removed from the default 271 * list and added to the general interrupt list. 272 */ 273 cpumask_clear_cpu(curr_cpu, &entry->def_intr.mask); 274 cpumask_set_cpu(curr_cpu, &entry->general_intr_mask); 275 curr_cpu = cpumask_next(curr_cpu, 276 &entry->def_intr.mask); 277 278 /* 279 * Remove the remaining kernel receive queues from 280 * the default list and add them to the receive list. 281 */ 282 for (i = 0; 283 i < (dd->n_krcv_queues - 1) * 284 hfi1_per_node_cntr[dd->node]; 285 i++) { 286 cpumask_clear_cpu(curr_cpu, 287 &entry->def_intr.mask); 288 cpumask_set_cpu(curr_cpu, 289 &entry->rcv_intr.mask); 290 curr_cpu = cpumask_next(curr_cpu, 291 &entry->def_intr.mask); 292 if (curr_cpu >= nr_cpu_ids) 293 break; 294 } 295 296 /* 297 * If there ends up being 0 CPU cores leftover for SDMA 298 * engines, use the same CPU cores as general/control 299 * context. 300 */ 301 if (cpumask_weight(&entry->def_intr.mask) == 0) 302 cpumask_copy(&entry->def_intr.mask, 303 &entry->general_intr_mask); 304 } 305 306 spin_lock(&node_affinity.lock); 307 node_affinity_add_tail(entry); 308 spin_unlock(&node_affinity.lock); 309 } 310 311 return 0; 312 } 313 314 int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) 315 { 316 int ret; 317 cpumask_var_t diff; 318 struct hfi1_affinity_node *entry; 319 struct cpu_mask_set *set = NULL; 320 struct sdma_engine *sde = NULL; 321 struct hfi1_ctxtdata *rcd = NULL; 322 char extra[64]; 323 int cpu = -1; 324 325 extra[0] = '\0'; 326 cpumask_clear(&msix->mask); 327 328 ret = zalloc_cpumask_var(&diff, GFP_KERNEL); 329 if (!ret) 330 return -ENOMEM; 331 332 spin_lock(&node_affinity.lock); 333 entry = node_affinity_lookup(dd->node); 334 spin_unlock(&node_affinity.lock); 335 336 switch (msix->type) { 337 case IRQ_SDMA: 338 sde = (struct sdma_engine *)msix->arg; 339 scnprintf(extra, 64, "engine %u", sde->this_idx); 340 set = &entry->def_intr; 341 break; 342 case IRQ_GENERAL: 343 cpu = cpumask_first(&entry->general_intr_mask); 344 break; 345 case IRQ_RCVCTXT: 346 rcd = (struct hfi1_ctxtdata *)msix->arg; 347 if (rcd->ctxt == HFI1_CTRL_CTXT) 348 cpu = cpumask_first(&entry->general_intr_mask); 349 else 350 set = &entry->rcv_intr; 351 scnprintf(extra, 64, "ctxt %u", rcd->ctxt); 352 break; 353 default: 354 dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type); 355 return -EINVAL; 356 } 357 358 /* 359 * The general and control contexts are placed on a particular 360 * CPU, which is set above. Skip accounting for it. Everything else 361 * finds its CPU here. 362 */ 363 if (cpu == -1 && set) { 364 spin_lock(&node_affinity.lock); 365 if (cpumask_equal(&set->mask, &set->used)) { 366 /* 367 * We've used up all the CPUs, bump up the generation 368 * and reset the 'used' map 369 */ 370 set->gen++; 371 cpumask_clear(&set->used); 372 } 373 cpumask_andnot(diff, &set->mask, &set->used); 374 cpu = cpumask_first(diff); 375 cpumask_set_cpu(cpu, &set->used); 376 spin_unlock(&node_affinity.lock); 377 } 378 379 switch (msix->type) { 380 case IRQ_SDMA: 381 sde->cpu = cpu; 382 break; 383 case IRQ_GENERAL: 384 case IRQ_RCVCTXT: 385 case IRQ_OTHER: 386 break; 387 } 388 389 cpumask_set_cpu(cpu, &msix->mask); 390 dd_dev_info(dd, "IRQ vector: %u, type %s %s -> cpu: %d\n", 391 msix->msix.vector, irq_type_names[msix->type], 392 extra, cpu); 393 irq_set_affinity_hint(msix->msix.vector, &msix->mask); 394 395 free_cpumask_var(diff); 396 return 0; 397 } 398 399 void hfi1_put_irq_affinity(struct hfi1_devdata *dd, 400 struct hfi1_msix_entry *msix) 401 { 402 struct cpu_mask_set *set = NULL; 403 struct hfi1_ctxtdata *rcd; 404 struct hfi1_affinity_node *entry; 405 406 spin_lock(&node_affinity.lock); 407 entry = node_affinity_lookup(dd->node); 408 spin_unlock(&node_affinity.lock); 409 410 switch (msix->type) { 411 case IRQ_SDMA: 412 set = &entry->def_intr; 413 break; 414 case IRQ_GENERAL: 415 /* Don't do accounting for general contexts */ 416 break; 417 case IRQ_RCVCTXT: 418 rcd = (struct hfi1_ctxtdata *)msix->arg; 419 /* Don't do accounting for control contexts */ 420 if (rcd->ctxt != HFI1_CTRL_CTXT) 421 set = &entry->rcv_intr; 422 break; 423 default: 424 return; 425 } 426 427 if (set) { 428 spin_lock(&node_affinity.lock); 429 cpumask_andnot(&set->used, &set->used, &msix->mask); 430 if (cpumask_empty(&set->used) && set->gen) { 431 set->gen--; 432 cpumask_copy(&set->used, &set->mask); 433 } 434 spin_unlock(&node_affinity.lock); 435 } 436 437 irq_set_affinity_hint(msix->msix.vector, NULL); 438 cpumask_clear(&msix->mask); 439 } 440 441 /* This should be called with node_affinity.lock held */ 442 static void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask, 443 struct hfi1_affinity_node_list *affinity) 444 { 445 int possible, curr_cpu, i; 446 uint num_cores_per_socket = node_affinity.num_online_cpus / 447 affinity->num_core_siblings / 448 node_affinity.num_online_nodes; 449 450 cpumask_copy(hw_thread_mask, &affinity->proc.mask); 451 if (affinity->num_core_siblings > 0) { 452 /* Removing other siblings not needed for now */ 453 possible = cpumask_weight(hw_thread_mask); 454 curr_cpu = cpumask_first(hw_thread_mask); 455 for (i = 0; 456 i < num_cores_per_socket * node_affinity.num_online_nodes; 457 i++) 458 curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); 459 460 for (; i < possible; i++) { 461 cpumask_clear_cpu(curr_cpu, hw_thread_mask); 462 curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); 463 } 464 465 /* Identifying correct HW threads within physical cores */ 466 cpumask_shift_left(hw_thread_mask, hw_thread_mask, 467 num_cores_per_socket * 468 node_affinity.num_online_nodes * 469 hw_thread_no); 470 } 471 } 472 473 int hfi1_get_proc_affinity(int node) 474 { 475 int cpu = -1, ret, i; 476 struct hfi1_affinity_node *entry; 477 cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask; 478 const struct cpumask *node_mask, 479 *proc_mask = tsk_cpus_allowed(current); 480 struct hfi1_affinity_node_list *affinity = &node_affinity; 481 struct cpu_mask_set *set = &affinity->proc; 482 483 /* 484 * check whether process/context affinity has already 485 * been set 486 */ 487 if (cpumask_weight(proc_mask) == 1) { 488 hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl", 489 current->pid, current->comm, 490 cpumask_pr_args(proc_mask)); 491 /* 492 * Mark the pre-set CPU as used. This is atomic so we don't 493 * need the lock 494 */ 495 cpu = cpumask_first(proc_mask); 496 cpumask_set_cpu(cpu, &set->used); 497 goto done; 498 } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) { 499 hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl", 500 current->pid, current->comm, 501 cpumask_pr_args(proc_mask)); 502 goto done; 503 } 504 505 /* 506 * The process does not have a preset CPU affinity so find one to 507 * recommend using the following algorithm: 508 * 509 * For each user process that is opening a context on HFI Y: 510 * a) If all cores are filled, reinitialize the bitmask 511 * b) Fill real cores first, then HT cores (First set of HT 512 * cores on all physical cores, then second set of HT core, 513 * and, so on) in the following order: 514 * 515 * 1. Same NUMA node as HFI Y and not running an IRQ 516 * handler 517 * 2. Same NUMA node as HFI Y and running an IRQ handler 518 * 3. Different NUMA node to HFI Y and not running an IRQ 519 * handler 520 * 4. Different NUMA node to HFI Y and running an IRQ 521 * handler 522 * c) Mark core as filled in the bitmask. As user processes are 523 * done, clear cores from the bitmask. 524 */ 525 526 ret = zalloc_cpumask_var(&diff, GFP_KERNEL); 527 if (!ret) 528 goto done; 529 ret = zalloc_cpumask_var(&hw_thread_mask, GFP_KERNEL); 530 if (!ret) 531 goto free_diff; 532 ret = zalloc_cpumask_var(&available_mask, GFP_KERNEL); 533 if (!ret) 534 goto free_hw_thread_mask; 535 ret = zalloc_cpumask_var(&intrs_mask, GFP_KERNEL); 536 if (!ret) 537 goto free_available_mask; 538 539 spin_lock(&affinity->lock); 540 /* 541 * If we've used all available HW threads, clear the mask and start 542 * overloading. 543 */ 544 if (cpumask_equal(&set->mask, &set->used)) { 545 set->gen++; 546 cpumask_clear(&set->used); 547 } 548 549 /* 550 * If NUMA node has CPUs used by interrupt handlers, include them in the 551 * interrupt handler mask. 552 */ 553 entry = node_affinity_lookup(node); 554 if (entry) { 555 cpumask_copy(intrs_mask, (entry->def_intr.gen ? 556 &entry->def_intr.mask : 557 &entry->def_intr.used)); 558 cpumask_or(intrs_mask, intrs_mask, (entry->rcv_intr.gen ? 559 &entry->rcv_intr.mask : 560 &entry->rcv_intr.used)); 561 cpumask_or(intrs_mask, intrs_mask, &entry->general_intr_mask); 562 } 563 hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl", 564 cpumask_pr_args(intrs_mask)); 565 566 cpumask_copy(hw_thread_mask, &set->mask); 567 568 /* 569 * If HT cores are enabled, identify which HW threads within the 570 * physical cores should be used. 571 */ 572 if (affinity->num_core_siblings > 0) { 573 for (i = 0; i < affinity->num_core_siblings; i++) { 574 find_hw_thread_mask(i, hw_thread_mask, affinity); 575 576 /* 577 * If there's at least one available core for this HW 578 * thread number, stop looking for a core. 579 * 580 * diff will always be not empty at least once in this 581 * loop as the used mask gets reset when 582 * (set->mask == set->used) before this loop. 583 */ 584 cpumask_andnot(diff, hw_thread_mask, &set->used); 585 if (!cpumask_empty(diff)) 586 break; 587 } 588 } 589 hfi1_cdbg(PROC, "Same available HW thread on all physical CPUs: %*pbl", 590 cpumask_pr_args(hw_thread_mask)); 591 592 node_mask = cpumask_of_node(node); 593 hfi1_cdbg(PROC, "Device on NUMA %u, CPUs %*pbl", node, 594 cpumask_pr_args(node_mask)); 595 596 /* Get cpumask of available CPUs on preferred NUMA */ 597 cpumask_and(available_mask, hw_thread_mask, node_mask); 598 cpumask_andnot(available_mask, available_mask, &set->used); 599 hfi1_cdbg(PROC, "Available CPUs on NUMA %u: %*pbl", node, 600 cpumask_pr_args(available_mask)); 601 602 /* 603 * At first, we don't want to place processes on the same 604 * CPUs as interrupt handlers. Then, CPUs running interrupt 605 * handlers are used. 606 * 607 * 1) If diff is not empty, then there are CPUs not running 608 * non-interrupt handlers available, so diff gets copied 609 * over to available_mask. 610 * 2) If diff is empty, then all CPUs not running interrupt 611 * handlers are taken, so available_mask contains all 612 * available CPUs running interrupt handlers. 613 * 3) If available_mask is empty, then all CPUs on the 614 * preferred NUMA node are taken, so other NUMA nodes are 615 * used for process assignments using the same method as 616 * the preferred NUMA node. 617 */ 618 cpumask_andnot(diff, available_mask, intrs_mask); 619 if (!cpumask_empty(diff)) 620 cpumask_copy(available_mask, diff); 621 622 /* If we don't have CPUs on the preferred node, use other NUMA nodes */ 623 if (cpumask_empty(available_mask)) { 624 cpumask_andnot(available_mask, hw_thread_mask, &set->used); 625 /* Excluding preferred NUMA cores */ 626 cpumask_andnot(available_mask, available_mask, node_mask); 627 hfi1_cdbg(PROC, 628 "Preferred NUMA node cores are taken, cores available in other NUMA nodes: %*pbl", 629 cpumask_pr_args(available_mask)); 630 631 /* 632 * At first, we don't want to place processes on the same 633 * CPUs as interrupt handlers. 634 */ 635 cpumask_andnot(diff, available_mask, intrs_mask); 636 if (!cpumask_empty(diff)) 637 cpumask_copy(available_mask, diff); 638 } 639 hfi1_cdbg(PROC, "Possible CPUs for process: %*pbl", 640 cpumask_pr_args(available_mask)); 641 642 cpu = cpumask_first(available_mask); 643 if (cpu >= nr_cpu_ids) /* empty */ 644 cpu = -1; 645 else 646 cpumask_set_cpu(cpu, &set->used); 647 spin_unlock(&affinity->lock); 648 hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu); 649 650 free_cpumask_var(intrs_mask); 651 free_available_mask: 652 free_cpumask_var(available_mask); 653 free_hw_thread_mask: 654 free_cpumask_var(hw_thread_mask); 655 free_diff: 656 free_cpumask_var(diff); 657 done: 658 return cpu; 659 } 660 661 void hfi1_put_proc_affinity(int cpu) 662 { 663 struct hfi1_affinity_node_list *affinity = &node_affinity; 664 struct cpu_mask_set *set = &affinity->proc; 665 666 if (cpu < 0) 667 return; 668 spin_lock(&affinity->lock); 669 cpumask_clear_cpu(cpu, &set->used); 670 hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu); 671 if (cpumask_empty(&set->used) && set->gen) { 672 set->gen--; 673 cpumask_copy(&set->used, &set->mask); 674 } 675 spin_unlock(&affinity->lock); 676 } 677 678 /* Prevents concurrent reads and writes of the sdma_affinity attrib */ 679 static DEFINE_MUTEX(sdma_affinity_mutex); 680 681 int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, 682 size_t count) 683 { 684 struct hfi1_affinity_node *entry; 685 struct cpumask mask; 686 int ret, i; 687 688 spin_lock(&node_affinity.lock); 689 entry = node_affinity_lookup(dd->node); 690 spin_unlock(&node_affinity.lock); 691 692 if (!entry) 693 return -EINVAL; 694 695 ret = cpulist_parse(buf, &mask); 696 if (ret) 697 return ret; 698 699 if (!cpumask_subset(&mask, cpu_online_mask) || cpumask_empty(&mask)) { 700 dd_dev_warn(dd, "Invalid CPU mask\n"); 701 return -EINVAL; 702 } 703 704 mutex_lock(&sdma_affinity_mutex); 705 /* reset the SDMA interrupt affinity details */ 706 init_cpu_mask_set(&entry->def_intr); 707 cpumask_copy(&entry->def_intr.mask, &mask); 708 /* 709 * Reassign the affinity for each SDMA interrupt. 710 */ 711 for (i = 0; i < dd->num_msix_entries; i++) { 712 struct hfi1_msix_entry *msix; 713 714 msix = &dd->msix_entries[i]; 715 if (msix->type != IRQ_SDMA) 716 continue; 717 718 ret = hfi1_get_irq_affinity(dd, msix); 719 720 if (ret) 721 break; 722 } 723 724 mutex_unlock(&sdma_affinity_mutex); 725 return ret ? ret : strnlen(buf, PAGE_SIZE); 726 } 727 728 int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf) 729 { 730 struct hfi1_affinity_node *entry; 731 732 spin_lock(&node_affinity.lock); 733 entry = node_affinity_lookup(dd->node); 734 spin_unlock(&node_affinity.lock); 735 736 if (!entry) 737 return -EINVAL; 738 739 mutex_lock(&sdma_affinity_mutex); 740 cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask); 741 mutex_unlock(&sdma_affinity_mutex); 742 return strnlen(buf, PAGE_SIZE); 743 } 744