1f48ad614SDennis Dalessandro /* 2*bb7dde87SMichael J. Ruhl * Copyright(c) 2015 - 2017 Intel Corporation. 3f48ad614SDennis Dalessandro * 4f48ad614SDennis Dalessandro * This file is provided under a dual BSD/GPLv2 license. When using or 5f48ad614SDennis Dalessandro * redistributing this file, you may do so under either license. 6f48ad614SDennis Dalessandro * 7f48ad614SDennis Dalessandro * GPL LICENSE SUMMARY 8f48ad614SDennis Dalessandro * 9f48ad614SDennis Dalessandro * This program is free software; you can redistribute it and/or modify 10f48ad614SDennis Dalessandro * it under the terms of version 2 of the GNU General Public License as 11f48ad614SDennis Dalessandro * published by the Free Software Foundation. 12f48ad614SDennis Dalessandro * 13f48ad614SDennis Dalessandro * This program is distributed in the hope that it will be useful, but 14f48ad614SDennis Dalessandro * WITHOUT ANY WARRANTY; without even the implied warranty of 15f48ad614SDennis Dalessandro * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16f48ad614SDennis Dalessandro * General Public License for more details. 17f48ad614SDennis Dalessandro * 18f48ad614SDennis Dalessandro * BSD LICENSE 19f48ad614SDennis Dalessandro * 20f48ad614SDennis Dalessandro * Redistribution and use in source and binary forms, with or without 21f48ad614SDennis Dalessandro * modification, are permitted provided that the following conditions 22f48ad614SDennis Dalessandro * are met: 23f48ad614SDennis Dalessandro * 24f48ad614SDennis Dalessandro * - Redistributions of source code must retain the above copyright 25f48ad614SDennis Dalessandro * notice, this list of conditions and the following disclaimer. 26f48ad614SDennis Dalessandro * - Redistributions in binary form must reproduce the above copyright 27f48ad614SDennis Dalessandro * notice, this list of conditions and the following disclaimer in 28f48ad614SDennis Dalessandro * the documentation and/or other materials provided with the 29f48ad614SDennis Dalessandro * distribution. 30f48ad614SDennis Dalessandro * - Neither the name of Intel Corporation nor the names of its 31f48ad614SDennis Dalessandro * contributors may be used to endorse or promote products derived 32f48ad614SDennis Dalessandro * from this software without specific prior written permission. 33f48ad614SDennis Dalessandro * 34f48ad614SDennis Dalessandro * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35f48ad614SDennis Dalessandro * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36f48ad614SDennis Dalessandro * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37f48ad614SDennis Dalessandro * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38f48ad614SDennis Dalessandro * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39f48ad614SDennis Dalessandro * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40f48ad614SDennis Dalessandro * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41f48ad614SDennis Dalessandro * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42f48ad614SDennis Dalessandro * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43f48ad614SDennis Dalessandro * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44f48ad614SDennis Dalessandro * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45f48ad614SDennis Dalessandro * 46f48ad614SDennis Dalessandro */ 47f48ad614SDennis Dalessandro #include <linux/topology.h> 48f48ad614SDennis Dalessandro #include <linux/cpumask.h> 49f48ad614SDennis Dalessandro #include <linux/module.h> 502d01c37dSTadeusz Struk #include <linux/interrupt.h> 51f48ad614SDennis Dalessandro 52f48ad614SDennis Dalessandro #include "hfi.h" 53f48ad614SDennis Dalessandro #include "affinity.h" 54f48ad614SDennis Dalessandro #include "sdma.h" 55f48ad614SDennis Dalessandro #include "trace.h" 56f48ad614SDennis Dalessandro 574197344bSDennis Dalessandro struct hfi1_affinity_node_list node_affinity = { 584197344bSDennis Dalessandro .list = LIST_HEAD_INIT(node_affinity.list), 59584d9577STadeusz Struk .lock = __MUTEX_INITIALIZER(node_affinity.lock) 604197344bSDennis Dalessandro }; 614197344bSDennis Dalessandro 62f48ad614SDennis Dalessandro /* Name of IRQ types, indexed by enum irq_type */ 63f48ad614SDennis Dalessandro static const char * const irq_type_names[] = { 64f48ad614SDennis Dalessandro "SDMA", 65f48ad614SDennis Dalessandro "RCVCTXT", 66f48ad614SDennis Dalessandro "GENERAL", 67f48ad614SDennis Dalessandro "OTHER", 68f48ad614SDennis Dalessandro }; 69f48ad614SDennis Dalessandro 70d6373019SSebastian Sanchez /* Per NUMA node count of HFI devices */ 71d6373019SSebastian Sanchez static unsigned int *hfi1_per_node_cntr; 72d6373019SSebastian Sanchez 73f48ad614SDennis Dalessandro static inline void init_cpu_mask_set(struct cpu_mask_set *set) 74f48ad614SDennis Dalessandro { 75f48ad614SDennis Dalessandro cpumask_clear(&set->mask); 76f48ad614SDennis Dalessandro cpumask_clear(&set->used); 77f48ad614SDennis Dalessandro set->gen = 0; 78f48ad614SDennis Dalessandro } 79f48ad614SDennis Dalessandro 80f48ad614SDennis Dalessandro /* Initialize non-HT cpu cores mask */ 814197344bSDennis Dalessandro void init_real_cpu_mask(void) 82f48ad614SDennis Dalessandro { 83f48ad614SDennis Dalessandro int possible, curr_cpu, i, ht; 84f48ad614SDennis Dalessandro 854197344bSDennis Dalessandro cpumask_clear(&node_affinity.real_cpu_mask); 86f48ad614SDennis Dalessandro 87f48ad614SDennis Dalessandro /* Start with cpu online mask as the real cpu mask */ 884197344bSDennis Dalessandro cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask); 89f48ad614SDennis Dalessandro 90f48ad614SDennis Dalessandro /* 91f48ad614SDennis Dalessandro * Remove HT cores from the real cpu mask. Do this in two steps below. 92f48ad614SDennis Dalessandro */ 934197344bSDennis Dalessandro possible = cpumask_weight(&node_affinity.real_cpu_mask); 94f48ad614SDennis Dalessandro ht = cpumask_weight(topology_sibling_cpumask( 954197344bSDennis Dalessandro cpumask_first(&node_affinity.real_cpu_mask))); 96f48ad614SDennis Dalessandro /* 97f48ad614SDennis Dalessandro * Step 1. Skip over the first N HT siblings and use them as the 98f48ad614SDennis Dalessandro * "real" cores. Assumes that HT cores are not enumerated in 99f48ad614SDennis Dalessandro * succession (except in the single core case). 100f48ad614SDennis Dalessandro */ 1014197344bSDennis Dalessandro curr_cpu = cpumask_first(&node_affinity.real_cpu_mask); 102f48ad614SDennis Dalessandro for (i = 0; i < possible / ht; i++) 1034197344bSDennis Dalessandro curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask); 104f48ad614SDennis Dalessandro /* 105f48ad614SDennis Dalessandro * Step 2. Remove the remaining HT siblings. Use cpumask_next() to 106f48ad614SDennis Dalessandro * skip any gaps. 107f48ad614SDennis Dalessandro */ 108f48ad614SDennis Dalessandro for (; i < possible; i++) { 1094197344bSDennis Dalessandro cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask); 1104197344bSDennis Dalessandro curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask); 1114197344bSDennis Dalessandro } 112f48ad614SDennis Dalessandro } 113f48ad614SDennis Dalessandro 114d6373019SSebastian Sanchez int node_affinity_init(void) 1154197344bSDennis Dalessandro { 116d6373019SSebastian Sanchez int node; 117d6373019SSebastian Sanchez struct pci_dev *dev = NULL; 118d6373019SSebastian Sanchez const struct pci_device_id *ids = hfi1_pci_tbl; 119d6373019SSebastian Sanchez 120b094a36fSSebastian Sanchez cpumask_clear(&node_affinity.proc.used); 1214197344bSDennis Dalessandro cpumask_copy(&node_affinity.proc.mask, cpu_online_mask); 122b094a36fSSebastian Sanchez 123b094a36fSSebastian Sanchez node_affinity.proc.gen = 0; 124b094a36fSSebastian Sanchez node_affinity.num_core_siblings = 125b094a36fSSebastian Sanchez cpumask_weight(topology_sibling_cpumask( 126b094a36fSSebastian Sanchez cpumask_first(&node_affinity.proc.mask) 127b094a36fSSebastian Sanchez )); 1289d8145a6SHarish Chegondi node_affinity.num_possible_nodes = num_possible_nodes(); 129b094a36fSSebastian Sanchez node_affinity.num_online_nodes = num_online_nodes(); 130b094a36fSSebastian Sanchez node_affinity.num_online_cpus = num_online_cpus(); 131b094a36fSSebastian Sanchez 1324197344bSDennis Dalessandro /* 1334197344bSDennis Dalessandro * The real cpu mask is part of the affinity struct but it has to be 1344197344bSDennis Dalessandro * initialized early. It is needed to calculate the number of user 1354197344bSDennis Dalessandro * contexts in set_up_context_variables(). 1364197344bSDennis Dalessandro */ 1374197344bSDennis Dalessandro init_real_cpu_mask(); 138d6373019SSebastian Sanchez 1399d8145a6SHarish Chegondi hfi1_per_node_cntr = kcalloc(node_affinity.num_possible_nodes, 140d6373019SSebastian Sanchez sizeof(*hfi1_per_node_cntr), GFP_KERNEL); 141d6373019SSebastian Sanchez if (!hfi1_per_node_cntr) 142d6373019SSebastian Sanchez return -ENOMEM; 143d6373019SSebastian Sanchez 144d6373019SSebastian Sanchez while (ids->vendor) { 145d6373019SSebastian Sanchez dev = NULL; 146d6373019SSebastian Sanchez while ((dev = pci_get_device(ids->vendor, ids->device, dev))) { 147d6373019SSebastian Sanchez node = pcibus_to_node(dev->bus); 148d6373019SSebastian Sanchez if (node < 0) 149d6373019SSebastian Sanchez node = numa_node_id(); 150d6373019SSebastian Sanchez 151d6373019SSebastian Sanchez hfi1_per_node_cntr[node]++; 152d6373019SSebastian Sanchez } 153d6373019SSebastian Sanchez ids++; 154d6373019SSebastian Sanchez } 155d6373019SSebastian Sanchez 156d6373019SSebastian Sanchez return 0; 1574197344bSDennis Dalessandro } 1584197344bSDennis Dalessandro 1594197344bSDennis Dalessandro void node_affinity_destroy(void) 1604197344bSDennis Dalessandro { 1614197344bSDennis Dalessandro struct list_head *pos, *q; 1624197344bSDennis Dalessandro struct hfi1_affinity_node *entry; 1634197344bSDennis Dalessandro 164584d9577STadeusz Struk mutex_lock(&node_affinity.lock); 1654197344bSDennis Dalessandro list_for_each_safe(pos, q, &node_affinity.list) { 1664197344bSDennis Dalessandro entry = list_entry(pos, struct hfi1_affinity_node, 1674197344bSDennis Dalessandro list); 1684197344bSDennis Dalessandro list_del(pos); 1694197344bSDennis Dalessandro kfree(entry); 1704197344bSDennis Dalessandro } 171584d9577STadeusz Struk mutex_unlock(&node_affinity.lock); 172d6373019SSebastian Sanchez kfree(hfi1_per_node_cntr); 1734197344bSDennis Dalessandro } 1744197344bSDennis Dalessandro 1754197344bSDennis Dalessandro static struct hfi1_affinity_node *node_affinity_allocate(int node) 1764197344bSDennis Dalessandro { 1774197344bSDennis Dalessandro struct hfi1_affinity_node *entry; 1784197344bSDennis Dalessandro 1794197344bSDennis Dalessandro entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1804197344bSDennis Dalessandro if (!entry) 1814197344bSDennis Dalessandro return NULL; 1824197344bSDennis Dalessandro entry->node = node; 1834197344bSDennis Dalessandro INIT_LIST_HEAD(&entry->list); 1844197344bSDennis Dalessandro 1854197344bSDennis Dalessandro return entry; 1864197344bSDennis Dalessandro } 1874197344bSDennis Dalessandro 1884197344bSDennis Dalessandro /* 1894197344bSDennis Dalessandro * It appends an entry to the list. 1904197344bSDennis Dalessandro * It *must* be called with node_affinity.lock held. 1914197344bSDennis Dalessandro */ 1924197344bSDennis Dalessandro static void node_affinity_add_tail(struct hfi1_affinity_node *entry) 1934197344bSDennis Dalessandro { 1944197344bSDennis Dalessandro list_add_tail(&entry->list, &node_affinity.list); 1954197344bSDennis Dalessandro } 1964197344bSDennis Dalessandro 1974197344bSDennis Dalessandro /* It must be called with node_affinity.lock held */ 1984197344bSDennis Dalessandro static struct hfi1_affinity_node *node_affinity_lookup(int node) 1994197344bSDennis Dalessandro { 2004197344bSDennis Dalessandro struct list_head *pos; 2014197344bSDennis Dalessandro struct hfi1_affinity_node *entry; 2024197344bSDennis Dalessandro 2034197344bSDennis Dalessandro list_for_each(pos, &node_affinity.list) { 2044197344bSDennis Dalessandro entry = list_entry(pos, struct hfi1_affinity_node, list); 2054197344bSDennis Dalessandro if (entry->node == node) 2064197344bSDennis Dalessandro return entry; 2074197344bSDennis Dalessandro } 2084197344bSDennis Dalessandro 2094197344bSDennis Dalessandro return NULL; 210f48ad614SDennis Dalessandro } 211f48ad614SDennis Dalessandro 212f48ad614SDennis Dalessandro /* 213f48ad614SDennis Dalessandro * Interrupt affinity. 214f48ad614SDennis Dalessandro * 215f48ad614SDennis Dalessandro * non-rcv avail gets a default mask that 216f48ad614SDennis Dalessandro * starts as possible cpus with threads reset 217f48ad614SDennis Dalessandro * and each rcv avail reset. 218f48ad614SDennis Dalessandro * 219f48ad614SDennis Dalessandro * rcv avail gets node relative 1 wrapping back 220f48ad614SDennis Dalessandro * to the node relative 1 as necessary. 221f48ad614SDennis Dalessandro * 222f48ad614SDennis Dalessandro */ 2234197344bSDennis Dalessandro int hfi1_dev_affinity_init(struct hfi1_devdata *dd) 224f48ad614SDennis Dalessandro { 225f48ad614SDennis Dalessandro int node = pcibus_to_node(dd->pcidev->bus); 2264197344bSDennis Dalessandro struct hfi1_affinity_node *entry; 227f48ad614SDennis Dalessandro const struct cpumask *local_mask; 228f48ad614SDennis Dalessandro int curr_cpu, possible, i; 229f48ad614SDennis Dalessandro 230f48ad614SDennis Dalessandro if (node < 0) 231f48ad614SDennis Dalessandro node = numa_node_id(); 232f48ad614SDennis Dalessandro dd->node = node; 233f48ad614SDennis Dalessandro 234f48ad614SDennis Dalessandro local_mask = cpumask_of_node(dd->node); 235f48ad614SDennis Dalessandro if (cpumask_first(local_mask) >= nr_cpu_ids) 236f48ad614SDennis Dalessandro local_mask = topology_core_cpumask(0); 2374197344bSDennis Dalessandro 238584d9577STadeusz Struk mutex_lock(&node_affinity.lock); 2394197344bSDennis Dalessandro entry = node_affinity_lookup(dd->node); 2404197344bSDennis Dalessandro 2414197344bSDennis Dalessandro /* 2424197344bSDennis Dalessandro * If this is the first time this NUMA node's affinity is used, 2434197344bSDennis Dalessandro * create an entry in the global affinity structure and initialize it. 2444197344bSDennis Dalessandro */ 2454197344bSDennis Dalessandro if (!entry) { 2464197344bSDennis Dalessandro entry = node_affinity_allocate(node); 2474197344bSDennis Dalessandro if (!entry) { 2484197344bSDennis Dalessandro dd_dev_err(dd, 2494197344bSDennis Dalessandro "Unable to allocate global affinity node\n"); 250584d9577STadeusz Struk mutex_unlock(&node_affinity.lock); 2514197344bSDennis Dalessandro return -ENOMEM; 2524197344bSDennis Dalessandro } 2534197344bSDennis Dalessandro init_cpu_mask_set(&entry->def_intr); 2544197344bSDennis Dalessandro init_cpu_mask_set(&entry->rcv_intr); 255d6373019SSebastian Sanchez cpumask_clear(&entry->general_intr_mask); 256f48ad614SDennis Dalessandro /* Use the "real" cpu mask of this node as the default */ 2574197344bSDennis Dalessandro cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask, 2584197344bSDennis Dalessandro local_mask); 259f48ad614SDennis Dalessandro 260f48ad614SDennis Dalessandro /* fill in the receive list */ 2614197344bSDennis Dalessandro possible = cpumask_weight(&entry->def_intr.mask); 2624197344bSDennis Dalessandro curr_cpu = cpumask_first(&entry->def_intr.mask); 2634197344bSDennis Dalessandro 264f48ad614SDennis Dalessandro if (possible == 1) { 265f48ad614SDennis Dalessandro /* only one CPU, everyone will use it */ 2664197344bSDennis Dalessandro cpumask_set_cpu(curr_cpu, &entry->rcv_intr.mask); 267d6373019SSebastian Sanchez cpumask_set_cpu(curr_cpu, &entry->general_intr_mask); 268f48ad614SDennis Dalessandro } else { 269f48ad614SDennis Dalessandro /* 270d6373019SSebastian Sanchez * The general/control context will be the first CPU in 271d6373019SSebastian Sanchez * the default list, so it is removed from the default 272d6373019SSebastian Sanchez * list and added to the general interrupt list. 273f48ad614SDennis Dalessandro */ 274d6373019SSebastian Sanchez cpumask_clear_cpu(curr_cpu, &entry->def_intr.mask); 275d6373019SSebastian Sanchez cpumask_set_cpu(curr_cpu, &entry->general_intr_mask); 2764197344bSDennis Dalessandro curr_cpu = cpumask_next(curr_cpu, 2774197344bSDennis Dalessandro &entry->def_intr.mask); 2784197344bSDennis Dalessandro 279f48ad614SDennis Dalessandro /* 280f48ad614SDennis Dalessandro * Remove the remaining kernel receive queues from 281f48ad614SDennis Dalessandro * the default list and add them to the receive list. 282f48ad614SDennis Dalessandro */ 283d6373019SSebastian Sanchez for (i = 0; 284d6373019SSebastian Sanchez i < (dd->n_krcv_queues - 1) * 285d6373019SSebastian Sanchez hfi1_per_node_cntr[dd->node]; 286d6373019SSebastian Sanchez i++) { 2874197344bSDennis Dalessandro cpumask_clear_cpu(curr_cpu, 2884197344bSDennis Dalessandro &entry->def_intr.mask); 2894197344bSDennis Dalessandro cpumask_set_cpu(curr_cpu, 2904197344bSDennis Dalessandro &entry->rcv_intr.mask); 2914197344bSDennis Dalessandro curr_cpu = cpumask_next(curr_cpu, 2924197344bSDennis Dalessandro &entry->def_intr.mask); 293f48ad614SDennis Dalessandro if (curr_cpu >= nr_cpu_ids) 294f48ad614SDennis Dalessandro break; 295f48ad614SDennis Dalessandro } 296d6373019SSebastian Sanchez 297d6373019SSebastian Sanchez /* 298d6373019SSebastian Sanchez * If there ends up being 0 CPU cores leftover for SDMA 299d6373019SSebastian Sanchez * engines, use the same CPU cores as general/control 300d6373019SSebastian Sanchez * context. 301d6373019SSebastian Sanchez */ 302d6373019SSebastian Sanchez if (cpumask_weight(&entry->def_intr.mask) == 0) 303d6373019SSebastian Sanchez cpumask_copy(&entry->def_intr.mask, 304d6373019SSebastian Sanchez &entry->general_intr_mask); 305f48ad614SDennis Dalessandro } 306f48ad614SDennis Dalessandro 3074197344bSDennis Dalessandro node_affinity_add_tail(entry); 308f48ad614SDennis Dalessandro } 309584d9577STadeusz Struk mutex_unlock(&node_affinity.lock); 3104197344bSDennis Dalessandro return 0; 311f48ad614SDennis Dalessandro } 312f48ad614SDennis Dalessandro 313584d9577STadeusz Struk /* 3142d01c37dSTadeusz Struk * Function updates the irq affinity hint for msix after it has been changed 3152d01c37dSTadeusz Struk * by the user using the /proc/irq interface. This function only accepts 3162d01c37dSTadeusz Struk * one cpu in the mask. 3172d01c37dSTadeusz Struk */ 3182d01c37dSTadeusz Struk static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu) 3192d01c37dSTadeusz Struk { 3202d01c37dSTadeusz Struk struct sdma_engine *sde = msix->arg; 3212d01c37dSTadeusz Struk struct hfi1_devdata *dd = sde->dd; 3222d01c37dSTadeusz Struk struct hfi1_affinity_node *entry; 3232d01c37dSTadeusz Struk struct cpu_mask_set *set; 3242d01c37dSTadeusz Struk int i, old_cpu; 3252d01c37dSTadeusz Struk 3262d01c37dSTadeusz Struk if (cpu > num_online_cpus() || cpu == sde->cpu) 3272d01c37dSTadeusz Struk return; 3282d01c37dSTadeusz Struk 3292d01c37dSTadeusz Struk mutex_lock(&node_affinity.lock); 3302d01c37dSTadeusz Struk entry = node_affinity_lookup(dd->node); 3312d01c37dSTadeusz Struk if (!entry) 3322d01c37dSTadeusz Struk goto unlock; 3332d01c37dSTadeusz Struk 3342d01c37dSTadeusz Struk old_cpu = sde->cpu; 3352d01c37dSTadeusz Struk sde->cpu = cpu; 3362d01c37dSTadeusz Struk cpumask_clear(&msix->mask); 3372d01c37dSTadeusz Struk cpumask_set_cpu(cpu, &msix->mask); 338*bb7dde87SMichael J. Ruhl dd_dev_dbg(dd, "IRQ: %u, type %s engine %u -> cpu: %d\n", 339*bb7dde87SMichael J. Ruhl msix->irq, irq_type_names[msix->type], 3402d01c37dSTadeusz Struk sde->this_idx, cpu); 341*bb7dde87SMichael J. Ruhl irq_set_affinity_hint(msix->irq, &msix->mask); 3422d01c37dSTadeusz Struk 3432d01c37dSTadeusz Struk /* 3442d01c37dSTadeusz Struk * Set the new cpu in the hfi1_affinity_node and clean 3452d01c37dSTadeusz Struk * the old cpu if it is not used by any other IRQ 3462d01c37dSTadeusz Struk */ 3472d01c37dSTadeusz Struk set = &entry->def_intr; 3482d01c37dSTadeusz Struk cpumask_set_cpu(cpu, &set->mask); 3492d01c37dSTadeusz Struk cpumask_set_cpu(cpu, &set->used); 3502d01c37dSTadeusz Struk for (i = 0; i < dd->num_msix_entries; i++) { 3512d01c37dSTadeusz Struk struct hfi1_msix_entry *other_msix; 3522d01c37dSTadeusz Struk 3532d01c37dSTadeusz Struk other_msix = &dd->msix_entries[i]; 3542d01c37dSTadeusz Struk if (other_msix->type != IRQ_SDMA || other_msix == msix) 3552d01c37dSTadeusz Struk continue; 3562d01c37dSTadeusz Struk 3572d01c37dSTadeusz Struk if (cpumask_test_cpu(old_cpu, &other_msix->mask)) 3582d01c37dSTadeusz Struk goto unlock; 3592d01c37dSTadeusz Struk } 3602d01c37dSTadeusz Struk cpumask_clear_cpu(old_cpu, &set->mask); 3612d01c37dSTadeusz Struk cpumask_clear_cpu(old_cpu, &set->used); 3622d01c37dSTadeusz Struk unlock: 3632d01c37dSTadeusz Struk mutex_unlock(&node_affinity.lock); 3642d01c37dSTadeusz Struk } 3652d01c37dSTadeusz Struk 3662d01c37dSTadeusz Struk static void hfi1_irq_notifier_notify(struct irq_affinity_notify *notify, 3672d01c37dSTadeusz Struk const cpumask_t *mask) 3682d01c37dSTadeusz Struk { 3692d01c37dSTadeusz Struk int cpu = cpumask_first(mask); 3702d01c37dSTadeusz Struk struct hfi1_msix_entry *msix = container_of(notify, 3712d01c37dSTadeusz Struk struct hfi1_msix_entry, 3722d01c37dSTadeusz Struk notify); 3732d01c37dSTadeusz Struk 3742d01c37dSTadeusz Struk /* Only one CPU configuration supported currently */ 3752d01c37dSTadeusz Struk hfi1_update_sdma_affinity(msix, cpu); 3762d01c37dSTadeusz Struk } 3772d01c37dSTadeusz Struk 3782d01c37dSTadeusz Struk static void hfi1_irq_notifier_release(struct kref *ref) 3792d01c37dSTadeusz Struk { 3802d01c37dSTadeusz Struk /* 3812d01c37dSTadeusz Struk * This is required by affinity notifier. We don't have anything to 3822d01c37dSTadeusz Struk * free here. 3832d01c37dSTadeusz Struk */ 3842d01c37dSTadeusz Struk } 3852d01c37dSTadeusz Struk 3862d01c37dSTadeusz Struk static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix) 3872d01c37dSTadeusz Struk { 3882d01c37dSTadeusz Struk struct irq_affinity_notify *notify = &msix->notify; 3892d01c37dSTadeusz Struk 390*bb7dde87SMichael J. Ruhl notify->irq = msix->irq; 3912d01c37dSTadeusz Struk notify->notify = hfi1_irq_notifier_notify; 3922d01c37dSTadeusz Struk notify->release = hfi1_irq_notifier_release; 3932d01c37dSTadeusz Struk 3942d01c37dSTadeusz Struk if (irq_set_affinity_notifier(notify->irq, notify)) 3952d01c37dSTadeusz Struk pr_err("Failed to register sdma irq affinity notifier for irq %d\n", 3962d01c37dSTadeusz Struk notify->irq); 3972d01c37dSTadeusz Struk } 3982d01c37dSTadeusz Struk 3992d01c37dSTadeusz Struk static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix) 4002d01c37dSTadeusz Struk { 4012d01c37dSTadeusz Struk struct irq_affinity_notify *notify = &msix->notify; 4022d01c37dSTadeusz Struk 4032d01c37dSTadeusz Struk if (irq_set_affinity_notifier(notify->irq, NULL)) 4042d01c37dSTadeusz Struk pr_err("Failed to cleanup sdma irq affinity notifier for irq %d\n", 4052d01c37dSTadeusz Struk notify->irq); 4062d01c37dSTadeusz Struk } 4072d01c37dSTadeusz Struk 4082d01c37dSTadeusz Struk /* 409584d9577STadeusz Struk * Function sets the irq affinity for msix. 410584d9577STadeusz Struk * It *must* be called with node_affinity.lock held. 411584d9577STadeusz Struk */ 412584d9577STadeusz Struk static int get_irq_affinity(struct hfi1_devdata *dd, 413584d9577STadeusz Struk struct hfi1_msix_entry *msix) 414f48ad614SDennis Dalessandro { 415f48ad614SDennis Dalessandro int ret; 416f48ad614SDennis Dalessandro cpumask_var_t diff; 4174197344bSDennis Dalessandro struct hfi1_affinity_node *entry; 418d6373019SSebastian Sanchez struct cpu_mask_set *set = NULL; 419f48ad614SDennis Dalessandro struct sdma_engine *sde = NULL; 420f48ad614SDennis Dalessandro struct hfi1_ctxtdata *rcd = NULL; 421f48ad614SDennis Dalessandro char extra[64]; 422f48ad614SDennis Dalessandro int cpu = -1; 423f48ad614SDennis Dalessandro 424f48ad614SDennis Dalessandro extra[0] = '\0'; 425f48ad614SDennis Dalessandro cpumask_clear(&msix->mask); 426f48ad614SDennis Dalessandro 427f48ad614SDennis Dalessandro ret = zalloc_cpumask_var(&diff, GFP_KERNEL); 428f48ad614SDennis Dalessandro if (!ret) 429f48ad614SDennis Dalessandro return -ENOMEM; 430f48ad614SDennis Dalessandro 4314197344bSDennis Dalessandro entry = node_affinity_lookup(dd->node); 4324197344bSDennis Dalessandro 433f48ad614SDennis Dalessandro switch (msix->type) { 434f48ad614SDennis Dalessandro case IRQ_SDMA: 435f48ad614SDennis Dalessandro sde = (struct sdma_engine *)msix->arg; 436f48ad614SDennis Dalessandro scnprintf(extra, 64, "engine %u", sde->this_idx); 4374197344bSDennis Dalessandro set = &entry->def_intr; 438f48ad614SDennis Dalessandro break; 439d6373019SSebastian Sanchez case IRQ_GENERAL: 440d6373019SSebastian Sanchez cpu = cpumask_first(&entry->general_intr_mask); 441d6373019SSebastian Sanchez break; 442f48ad614SDennis Dalessandro case IRQ_RCVCTXT: 443f48ad614SDennis Dalessandro rcd = (struct hfi1_ctxtdata *)msix->arg; 444d6373019SSebastian Sanchez if (rcd->ctxt == HFI1_CTRL_CTXT) 445d6373019SSebastian Sanchez cpu = cpumask_first(&entry->general_intr_mask); 446d6373019SSebastian Sanchez else 4474197344bSDennis Dalessandro set = &entry->rcv_intr; 448f48ad614SDennis Dalessandro scnprintf(extra, 64, "ctxt %u", rcd->ctxt); 449f48ad614SDennis Dalessandro break; 450f48ad614SDennis Dalessandro default: 451f48ad614SDennis Dalessandro dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type); 452f48ad614SDennis Dalessandro return -EINVAL; 453f48ad614SDennis Dalessandro } 454f48ad614SDennis Dalessandro 455f48ad614SDennis Dalessandro /* 456d6373019SSebastian Sanchez * The general and control contexts are placed on a particular 457d6373019SSebastian Sanchez * CPU, which is set above. Skip accounting for it. Everything else 458d6373019SSebastian Sanchez * finds its CPU here. 459f48ad614SDennis Dalessandro */ 4604197344bSDennis Dalessandro if (cpu == -1 && set) { 461f48ad614SDennis Dalessandro if (cpumask_equal(&set->mask, &set->used)) { 462f48ad614SDennis Dalessandro /* 463f48ad614SDennis Dalessandro * We've used up all the CPUs, bump up the generation 464f48ad614SDennis Dalessandro * and reset the 'used' map 465f48ad614SDennis Dalessandro */ 466f48ad614SDennis Dalessandro set->gen++; 467f48ad614SDennis Dalessandro cpumask_clear(&set->used); 468f48ad614SDennis Dalessandro } 469f48ad614SDennis Dalessandro cpumask_andnot(diff, &set->mask, &set->used); 470f48ad614SDennis Dalessandro cpu = cpumask_first(diff); 471f48ad614SDennis Dalessandro cpumask_set_cpu(cpu, &set->used); 472f48ad614SDennis Dalessandro } 473f48ad614SDennis Dalessandro 474f48ad614SDennis Dalessandro cpumask_set_cpu(cpu, &msix->mask); 475*bb7dde87SMichael J. Ruhl dd_dev_info(dd, "IRQ: %u, type %s %s -> cpu: %d\n", 476*bb7dde87SMichael J. Ruhl msix->irq, irq_type_names[msix->type], 477f48ad614SDennis Dalessandro extra, cpu); 478*bb7dde87SMichael J. Ruhl irq_set_affinity_hint(msix->irq, &msix->mask); 479f48ad614SDennis Dalessandro 4802d01c37dSTadeusz Struk if (msix->type == IRQ_SDMA) { 4812d01c37dSTadeusz Struk sde->cpu = cpu; 4822d01c37dSTadeusz Struk hfi1_setup_sdma_notifier(msix); 4832d01c37dSTadeusz Struk } 4842d01c37dSTadeusz Struk 485f48ad614SDennis Dalessandro free_cpumask_var(diff); 486f48ad614SDennis Dalessandro return 0; 487f48ad614SDennis Dalessandro } 488f48ad614SDennis Dalessandro 489584d9577STadeusz Struk int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) 490584d9577STadeusz Struk { 491584d9577STadeusz Struk int ret; 492584d9577STadeusz Struk 493584d9577STadeusz Struk mutex_lock(&node_affinity.lock); 494584d9577STadeusz Struk ret = get_irq_affinity(dd, msix); 495584d9577STadeusz Struk mutex_unlock(&node_affinity.lock); 496584d9577STadeusz Struk return ret; 497584d9577STadeusz Struk } 498584d9577STadeusz Struk 499f48ad614SDennis Dalessandro void hfi1_put_irq_affinity(struct hfi1_devdata *dd, 500f48ad614SDennis Dalessandro struct hfi1_msix_entry *msix) 501f48ad614SDennis Dalessandro { 502f48ad614SDennis Dalessandro struct cpu_mask_set *set = NULL; 503f48ad614SDennis Dalessandro struct hfi1_ctxtdata *rcd; 5044197344bSDennis Dalessandro struct hfi1_affinity_node *entry; 5054197344bSDennis Dalessandro 506584d9577STadeusz Struk mutex_lock(&node_affinity.lock); 5074197344bSDennis Dalessandro entry = node_affinity_lookup(dd->node); 508f48ad614SDennis Dalessandro 509f48ad614SDennis Dalessandro switch (msix->type) { 510f48ad614SDennis Dalessandro case IRQ_SDMA: 5114197344bSDennis Dalessandro set = &entry->def_intr; 5122d01c37dSTadeusz Struk hfi1_cleanup_sdma_notifier(msix); 513f48ad614SDennis Dalessandro break; 514d6373019SSebastian Sanchez case IRQ_GENERAL: 515b094a36fSSebastian Sanchez /* Don't do accounting for general contexts */ 516d6373019SSebastian Sanchez break; 517f48ad614SDennis Dalessandro case IRQ_RCVCTXT: 518f48ad614SDennis Dalessandro rcd = (struct hfi1_ctxtdata *)msix->arg; 519d6373019SSebastian Sanchez /* Don't do accounting for control contexts */ 520f48ad614SDennis Dalessandro if (rcd->ctxt != HFI1_CTRL_CTXT) 5214197344bSDennis Dalessandro set = &entry->rcv_intr; 522f48ad614SDennis Dalessandro break; 523f48ad614SDennis Dalessandro default: 524584d9577STadeusz Struk mutex_unlock(&node_affinity.lock); 525f48ad614SDennis Dalessandro return; 526f48ad614SDennis Dalessandro } 527f48ad614SDennis Dalessandro 528f48ad614SDennis Dalessandro if (set) { 529f48ad614SDennis Dalessandro cpumask_andnot(&set->used, &set->used, &msix->mask); 530f48ad614SDennis Dalessandro if (cpumask_empty(&set->used) && set->gen) { 531f48ad614SDennis Dalessandro set->gen--; 532f48ad614SDennis Dalessandro cpumask_copy(&set->used, &set->mask); 533f48ad614SDennis Dalessandro } 534f48ad614SDennis Dalessandro } 535f48ad614SDennis Dalessandro 536*bb7dde87SMichael J. Ruhl irq_set_affinity_hint(msix->irq, NULL); 537f48ad614SDennis Dalessandro cpumask_clear(&msix->mask); 538584d9577STadeusz Struk mutex_unlock(&node_affinity.lock); 539f48ad614SDennis Dalessandro } 540f48ad614SDennis Dalessandro 541b094a36fSSebastian Sanchez /* This should be called with node_affinity.lock held */ 542b094a36fSSebastian Sanchez static void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask, 543b094a36fSSebastian Sanchez struct hfi1_affinity_node_list *affinity) 544f48ad614SDennis Dalessandro { 545b094a36fSSebastian Sanchez int possible, curr_cpu, i; 546b094a36fSSebastian Sanchez uint num_cores_per_socket = node_affinity.num_online_cpus / 547b094a36fSSebastian Sanchez affinity->num_core_siblings / 548b094a36fSSebastian Sanchez node_affinity.num_online_nodes; 549b094a36fSSebastian Sanchez 550b094a36fSSebastian Sanchez cpumask_copy(hw_thread_mask, &affinity->proc.mask); 551b094a36fSSebastian Sanchez if (affinity->num_core_siblings > 0) { 552b094a36fSSebastian Sanchez /* Removing other siblings not needed for now */ 553b094a36fSSebastian Sanchez possible = cpumask_weight(hw_thread_mask); 554b094a36fSSebastian Sanchez curr_cpu = cpumask_first(hw_thread_mask); 555b094a36fSSebastian Sanchez for (i = 0; 556b094a36fSSebastian Sanchez i < num_cores_per_socket * node_affinity.num_online_nodes; 557b094a36fSSebastian Sanchez i++) 558b094a36fSSebastian Sanchez curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); 559b094a36fSSebastian Sanchez 560b094a36fSSebastian Sanchez for (; i < possible; i++) { 561b094a36fSSebastian Sanchez cpumask_clear_cpu(curr_cpu, hw_thread_mask); 562b094a36fSSebastian Sanchez curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); 563b094a36fSSebastian Sanchez } 564b094a36fSSebastian Sanchez 565b094a36fSSebastian Sanchez /* Identifying correct HW threads within physical cores */ 566b094a36fSSebastian Sanchez cpumask_shift_left(hw_thread_mask, hw_thread_mask, 567b094a36fSSebastian Sanchez num_cores_per_socket * 568b094a36fSSebastian Sanchez node_affinity.num_online_nodes * 569b094a36fSSebastian Sanchez hw_thread_no); 570b094a36fSSebastian Sanchez } 571b094a36fSSebastian Sanchez } 572b094a36fSSebastian Sanchez 573b094a36fSSebastian Sanchez int hfi1_get_proc_affinity(int node) 574b094a36fSSebastian Sanchez { 575b094a36fSSebastian Sanchez int cpu = -1, ret, i; 5764197344bSDennis Dalessandro struct hfi1_affinity_node *entry; 577b094a36fSSebastian Sanchez cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask; 578f48ad614SDennis Dalessandro const struct cpumask *node_mask, 5790c98d344SIngo Molnar *proc_mask = ¤t->cpus_allowed; 580b094a36fSSebastian Sanchez struct hfi1_affinity_node_list *affinity = &node_affinity; 581b094a36fSSebastian Sanchez struct cpu_mask_set *set = &affinity->proc; 582f48ad614SDennis Dalessandro 583f48ad614SDennis Dalessandro /* 584f48ad614SDennis Dalessandro * check whether process/context affinity has already 585f48ad614SDennis Dalessandro * been set 586f48ad614SDennis Dalessandro */ 587f48ad614SDennis Dalessandro if (cpumask_weight(proc_mask) == 1) { 588f242d93aSLeon Romanovsky hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl", 589f242d93aSLeon Romanovsky current->pid, current->comm, 590f242d93aSLeon Romanovsky cpumask_pr_args(proc_mask)); 591f48ad614SDennis Dalessandro /* 592f48ad614SDennis Dalessandro * Mark the pre-set CPU as used. This is atomic so we don't 593f48ad614SDennis Dalessandro * need the lock 594f48ad614SDennis Dalessandro */ 595f48ad614SDennis Dalessandro cpu = cpumask_first(proc_mask); 596f48ad614SDennis Dalessandro cpumask_set_cpu(cpu, &set->used); 597f48ad614SDennis Dalessandro goto done; 598f48ad614SDennis Dalessandro } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) { 599f242d93aSLeon Romanovsky hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl", 600f242d93aSLeon Romanovsky current->pid, current->comm, 601f242d93aSLeon Romanovsky cpumask_pr_args(proc_mask)); 602f48ad614SDennis Dalessandro goto done; 603f48ad614SDennis Dalessandro } 604f48ad614SDennis Dalessandro 605f48ad614SDennis Dalessandro /* 606f48ad614SDennis Dalessandro * The process does not have a preset CPU affinity so find one to 607b094a36fSSebastian Sanchez * recommend using the following algorithm: 608b094a36fSSebastian Sanchez * 609b094a36fSSebastian Sanchez * For each user process that is opening a context on HFI Y: 610b094a36fSSebastian Sanchez * a) If all cores are filled, reinitialize the bitmask 611b094a36fSSebastian Sanchez * b) Fill real cores first, then HT cores (First set of HT 612b094a36fSSebastian Sanchez * cores on all physical cores, then second set of HT core, 613b094a36fSSebastian Sanchez * and, so on) in the following order: 614b094a36fSSebastian Sanchez * 615b094a36fSSebastian Sanchez * 1. Same NUMA node as HFI Y and not running an IRQ 616b094a36fSSebastian Sanchez * handler 617b094a36fSSebastian Sanchez * 2. Same NUMA node as HFI Y and running an IRQ handler 618b094a36fSSebastian Sanchez * 3. Different NUMA node to HFI Y and not running an IRQ 619b094a36fSSebastian Sanchez * handler 620b094a36fSSebastian Sanchez * 4. Different NUMA node to HFI Y and running an IRQ 621b094a36fSSebastian Sanchez * handler 622b094a36fSSebastian Sanchez * c) Mark core as filled in the bitmask. As user processes are 623b094a36fSSebastian Sanchez * done, clear cores from the bitmask. 624f48ad614SDennis Dalessandro */ 625f48ad614SDennis Dalessandro 626f48ad614SDennis Dalessandro ret = zalloc_cpumask_var(&diff, GFP_KERNEL); 627f48ad614SDennis Dalessandro if (!ret) 628f48ad614SDennis Dalessandro goto done; 629b094a36fSSebastian Sanchez ret = zalloc_cpumask_var(&hw_thread_mask, GFP_KERNEL); 630f48ad614SDennis Dalessandro if (!ret) 631f48ad614SDennis Dalessandro goto free_diff; 632b094a36fSSebastian Sanchez ret = zalloc_cpumask_var(&available_mask, GFP_KERNEL); 633f48ad614SDennis Dalessandro if (!ret) 634b094a36fSSebastian Sanchez goto free_hw_thread_mask; 635b094a36fSSebastian Sanchez ret = zalloc_cpumask_var(&intrs_mask, GFP_KERNEL); 636b094a36fSSebastian Sanchez if (!ret) 637b094a36fSSebastian Sanchez goto free_available_mask; 638f48ad614SDennis Dalessandro 639584d9577STadeusz Struk mutex_lock(&affinity->lock); 640f48ad614SDennis Dalessandro /* 641b094a36fSSebastian Sanchez * If we've used all available HW threads, clear the mask and start 642f48ad614SDennis Dalessandro * overloading. 643f48ad614SDennis Dalessandro */ 644f48ad614SDennis Dalessandro if (cpumask_equal(&set->mask, &set->used)) { 645f48ad614SDennis Dalessandro set->gen++; 646f48ad614SDennis Dalessandro cpumask_clear(&set->used); 647f48ad614SDennis Dalessandro } 648f48ad614SDennis Dalessandro 649d6373019SSebastian Sanchez /* 650d6373019SSebastian Sanchez * If NUMA node has CPUs used by interrupt handlers, include them in the 651d6373019SSebastian Sanchez * interrupt handler mask. 652d6373019SSebastian Sanchez */ 653d6373019SSebastian Sanchez entry = node_affinity_lookup(node); 654d6373019SSebastian Sanchez if (entry) { 655b094a36fSSebastian Sanchez cpumask_copy(intrs_mask, (entry->def_intr.gen ? 6564197344bSDennis Dalessandro &entry->def_intr.mask : 6574197344bSDennis Dalessandro &entry->def_intr.used)); 658b094a36fSSebastian Sanchez cpumask_or(intrs_mask, intrs_mask, (entry->rcv_intr.gen ? 6594197344bSDennis Dalessandro &entry->rcv_intr.mask : 6604197344bSDennis Dalessandro &entry->rcv_intr.used)); 661b094a36fSSebastian Sanchez cpumask_or(intrs_mask, intrs_mask, &entry->general_intr_mask); 662d6373019SSebastian Sanchez } 663f242d93aSLeon Romanovsky hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl", 664b094a36fSSebastian Sanchez cpumask_pr_args(intrs_mask)); 665b094a36fSSebastian Sanchez 666b094a36fSSebastian Sanchez cpumask_copy(hw_thread_mask, &set->mask); 667f48ad614SDennis Dalessandro 668f48ad614SDennis Dalessandro /* 669b094a36fSSebastian Sanchez * If HT cores are enabled, identify which HW threads within the 670b094a36fSSebastian Sanchez * physical cores should be used. 671f48ad614SDennis Dalessandro */ 672b094a36fSSebastian Sanchez if (affinity->num_core_siblings > 0) { 673b094a36fSSebastian Sanchez for (i = 0; i < affinity->num_core_siblings; i++) { 674b094a36fSSebastian Sanchez find_hw_thread_mask(i, hw_thread_mask, affinity); 675b094a36fSSebastian Sanchez 676b094a36fSSebastian Sanchez /* 677b094a36fSSebastian Sanchez * If there's at least one available core for this HW 678b094a36fSSebastian Sanchez * thread number, stop looking for a core. 679b094a36fSSebastian Sanchez * 680b094a36fSSebastian Sanchez * diff will always be not empty at least once in this 681b094a36fSSebastian Sanchez * loop as the used mask gets reset when 682b094a36fSSebastian Sanchez * (set->mask == set->used) before this loop. 683b094a36fSSebastian Sanchez */ 684b094a36fSSebastian Sanchez cpumask_andnot(diff, hw_thread_mask, &set->used); 685b094a36fSSebastian Sanchez if (!cpumask_empty(diff)) 686b094a36fSSebastian Sanchez break; 687b094a36fSSebastian Sanchez } 688b094a36fSSebastian Sanchez } 689b094a36fSSebastian Sanchez hfi1_cdbg(PROC, "Same available HW thread on all physical CPUs: %*pbl", 690b094a36fSSebastian Sanchez cpumask_pr_args(hw_thread_mask)); 691b094a36fSSebastian Sanchez 692f48ad614SDennis Dalessandro node_mask = cpumask_of_node(node); 693b094a36fSSebastian Sanchez hfi1_cdbg(PROC, "Device on NUMA %u, CPUs %*pbl", node, 694f242d93aSLeon Romanovsky cpumask_pr_args(node_mask)); 695f48ad614SDennis Dalessandro 696b094a36fSSebastian Sanchez /* Get cpumask of available CPUs on preferred NUMA */ 697b094a36fSSebastian Sanchez cpumask_and(available_mask, hw_thread_mask, node_mask); 698b094a36fSSebastian Sanchez cpumask_andnot(available_mask, available_mask, &set->used); 699b094a36fSSebastian Sanchez hfi1_cdbg(PROC, "Available CPUs on NUMA %u: %*pbl", node, 700b094a36fSSebastian Sanchez cpumask_pr_args(available_mask)); 701f48ad614SDennis Dalessandro 702b094a36fSSebastian Sanchez /* 703b094a36fSSebastian Sanchez * At first, we don't want to place processes on the same 704b094a36fSSebastian Sanchez * CPUs as interrupt handlers. Then, CPUs running interrupt 705b094a36fSSebastian Sanchez * handlers are used. 706b094a36fSSebastian Sanchez * 707b094a36fSSebastian Sanchez * 1) If diff is not empty, then there are CPUs not running 708b094a36fSSebastian Sanchez * non-interrupt handlers available, so diff gets copied 709b094a36fSSebastian Sanchez * over to available_mask. 710b094a36fSSebastian Sanchez * 2) If diff is empty, then all CPUs not running interrupt 711b094a36fSSebastian Sanchez * handlers are taken, so available_mask contains all 712b094a36fSSebastian Sanchez * available CPUs running interrupt handlers. 713b094a36fSSebastian Sanchez * 3) If available_mask is empty, then all CPUs on the 714b094a36fSSebastian Sanchez * preferred NUMA node are taken, so other NUMA nodes are 715b094a36fSSebastian Sanchez * used for process assignments using the same method as 716b094a36fSSebastian Sanchez * the preferred NUMA node. 717b094a36fSSebastian Sanchez */ 718b094a36fSSebastian Sanchez cpumask_andnot(diff, available_mask, intrs_mask); 719b094a36fSSebastian Sanchez if (!cpumask_empty(diff)) 720b094a36fSSebastian Sanchez cpumask_copy(available_mask, diff); 721b094a36fSSebastian Sanchez 722b094a36fSSebastian Sanchez /* If we don't have CPUs on the preferred node, use other NUMA nodes */ 723b094a36fSSebastian Sanchez if (cpumask_empty(available_mask)) { 724b094a36fSSebastian Sanchez cpumask_andnot(available_mask, hw_thread_mask, &set->used); 725b094a36fSSebastian Sanchez /* Excluding preferred NUMA cores */ 726b094a36fSSebastian Sanchez cpumask_andnot(available_mask, available_mask, node_mask); 727b094a36fSSebastian Sanchez hfi1_cdbg(PROC, 728b094a36fSSebastian Sanchez "Preferred NUMA node cores are taken, cores available in other NUMA nodes: %*pbl", 729b094a36fSSebastian Sanchez cpumask_pr_args(available_mask)); 730f48ad614SDennis Dalessandro 731f48ad614SDennis Dalessandro /* 732f48ad614SDennis Dalessandro * At first, we don't want to place processes on the same 733f48ad614SDennis Dalessandro * CPUs as interrupt handlers. 734f48ad614SDennis Dalessandro */ 735b094a36fSSebastian Sanchez cpumask_andnot(diff, available_mask, intrs_mask); 736f48ad614SDennis Dalessandro if (!cpumask_empty(diff)) 737b094a36fSSebastian Sanchez cpumask_copy(available_mask, diff); 738f48ad614SDennis Dalessandro } 739b094a36fSSebastian Sanchez hfi1_cdbg(PROC, "Possible CPUs for process: %*pbl", 740b094a36fSSebastian Sanchez cpumask_pr_args(available_mask)); 741f48ad614SDennis Dalessandro 742b094a36fSSebastian Sanchez cpu = cpumask_first(available_mask); 743f48ad614SDennis Dalessandro if (cpu >= nr_cpu_ids) /* empty */ 744f48ad614SDennis Dalessandro cpu = -1; 745f48ad614SDennis Dalessandro else 746f48ad614SDennis Dalessandro cpumask_set_cpu(cpu, &set->used); 747584d9577STadeusz Struk 748584d9577STadeusz Struk mutex_unlock(&affinity->lock); 749b094a36fSSebastian Sanchez hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu); 750f48ad614SDennis Dalessandro 751b094a36fSSebastian Sanchez free_cpumask_var(intrs_mask); 752b094a36fSSebastian Sanchez free_available_mask: 753b094a36fSSebastian Sanchez free_cpumask_var(available_mask); 754b094a36fSSebastian Sanchez free_hw_thread_mask: 755b094a36fSSebastian Sanchez free_cpumask_var(hw_thread_mask); 756f48ad614SDennis Dalessandro free_diff: 757f48ad614SDennis Dalessandro free_cpumask_var(diff); 758f48ad614SDennis Dalessandro done: 759f48ad614SDennis Dalessandro return cpu; 760f48ad614SDennis Dalessandro } 761f48ad614SDennis Dalessandro 762b094a36fSSebastian Sanchez void hfi1_put_proc_affinity(int cpu) 763f48ad614SDennis Dalessandro { 764b094a36fSSebastian Sanchez struct hfi1_affinity_node_list *affinity = &node_affinity; 765b094a36fSSebastian Sanchez struct cpu_mask_set *set = &affinity->proc; 766f48ad614SDennis Dalessandro 767f48ad614SDennis Dalessandro if (cpu < 0) 768f48ad614SDennis Dalessandro return; 769584d9577STadeusz Struk 770584d9577STadeusz Struk mutex_lock(&affinity->lock); 771f48ad614SDennis Dalessandro cpumask_clear_cpu(cpu, &set->used); 772b094a36fSSebastian Sanchez hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu); 773f48ad614SDennis Dalessandro if (cpumask_empty(&set->used) && set->gen) { 774f48ad614SDennis Dalessandro set->gen--; 775f48ad614SDennis Dalessandro cpumask_copy(&set->used, &set->mask); 776f48ad614SDennis Dalessandro } 777584d9577STadeusz Struk mutex_unlock(&affinity->lock); 778f48ad614SDennis Dalessandro } 779