xref: /linux/drivers/infiniband/hw/hfi1/affinity.c (revision bb7dde8784913c06ccd1456bed6dcc5ebd0b3c24)
1f48ad614SDennis Dalessandro /*
2*bb7dde87SMichael J. Ruhl  * Copyright(c) 2015 - 2017 Intel Corporation.
3f48ad614SDennis Dalessandro  *
4f48ad614SDennis Dalessandro  * This file is provided under a dual BSD/GPLv2 license.  When using or
5f48ad614SDennis Dalessandro  * redistributing this file, you may do so under either license.
6f48ad614SDennis Dalessandro  *
7f48ad614SDennis Dalessandro  * GPL LICENSE SUMMARY
8f48ad614SDennis Dalessandro  *
9f48ad614SDennis Dalessandro  * This program is free software; you can redistribute it and/or modify
10f48ad614SDennis Dalessandro  * it under the terms of version 2 of the GNU General Public License as
11f48ad614SDennis Dalessandro  * published by the Free Software Foundation.
12f48ad614SDennis Dalessandro  *
13f48ad614SDennis Dalessandro  * This program is distributed in the hope that it will be useful, but
14f48ad614SDennis Dalessandro  * WITHOUT ANY WARRANTY; without even the implied warranty of
15f48ad614SDennis Dalessandro  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16f48ad614SDennis Dalessandro  * General Public License for more details.
17f48ad614SDennis Dalessandro  *
18f48ad614SDennis Dalessandro  * BSD LICENSE
19f48ad614SDennis Dalessandro  *
20f48ad614SDennis Dalessandro  * Redistribution and use in source and binary forms, with or without
21f48ad614SDennis Dalessandro  * modification, are permitted provided that the following conditions
22f48ad614SDennis Dalessandro  * are met:
23f48ad614SDennis Dalessandro  *
24f48ad614SDennis Dalessandro  *  - Redistributions of source code must retain the above copyright
25f48ad614SDennis Dalessandro  *    notice, this list of conditions and the following disclaimer.
26f48ad614SDennis Dalessandro  *  - Redistributions in binary form must reproduce the above copyright
27f48ad614SDennis Dalessandro  *    notice, this list of conditions and the following disclaimer in
28f48ad614SDennis Dalessandro  *    the documentation and/or other materials provided with the
29f48ad614SDennis Dalessandro  *    distribution.
30f48ad614SDennis Dalessandro  *  - Neither the name of Intel Corporation nor the names of its
31f48ad614SDennis Dalessandro  *    contributors may be used to endorse or promote products derived
32f48ad614SDennis Dalessandro  *    from this software without specific prior written permission.
33f48ad614SDennis Dalessandro  *
34f48ad614SDennis Dalessandro  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35f48ad614SDennis Dalessandro  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36f48ad614SDennis Dalessandro  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37f48ad614SDennis Dalessandro  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38f48ad614SDennis Dalessandro  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39f48ad614SDennis Dalessandro  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40f48ad614SDennis Dalessandro  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41f48ad614SDennis Dalessandro  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42f48ad614SDennis Dalessandro  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43f48ad614SDennis Dalessandro  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44f48ad614SDennis Dalessandro  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45f48ad614SDennis Dalessandro  *
46f48ad614SDennis Dalessandro  */
47f48ad614SDennis Dalessandro #include <linux/topology.h>
48f48ad614SDennis Dalessandro #include <linux/cpumask.h>
49f48ad614SDennis Dalessandro #include <linux/module.h>
502d01c37dSTadeusz Struk #include <linux/interrupt.h>
51f48ad614SDennis Dalessandro 
52f48ad614SDennis Dalessandro #include "hfi.h"
53f48ad614SDennis Dalessandro #include "affinity.h"
54f48ad614SDennis Dalessandro #include "sdma.h"
55f48ad614SDennis Dalessandro #include "trace.h"
56f48ad614SDennis Dalessandro 
574197344bSDennis Dalessandro struct hfi1_affinity_node_list node_affinity = {
584197344bSDennis Dalessandro 	.list = LIST_HEAD_INIT(node_affinity.list),
59584d9577STadeusz Struk 	.lock = __MUTEX_INITIALIZER(node_affinity.lock)
604197344bSDennis Dalessandro };
614197344bSDennis Dalessandro 
62f48ad614SDennis Dalessandro /* Name of IRQ types, indexed by enum irq_type */
63f48ad614SDennis Dalessandro static const char * const irq_type_names[] = {
64f48ad614SDennis Dalessandro 	"SDMA",
65f48ad614SDennis Dalessandro 	"RCVCTXT",
66f48ad614SDennis Dalessandro 	"GENERAL",
67f48ad614SDennis Dalessandro 	"OTHER",
68f48ad614SDennis Dalessandro };
69f48ad614SDennis Dalessandro 
70d6373019SSebastian Sanchez /* Per NUMA node count of HFI devices */
71d6373019SSebastian Sanchez static unsigned int *hfi1_per_node_cntr;
72d6373019SSebastian Sanchez 
73f48ad614SDennis Dalessandro static inline void init_cpu_mask_set(struct cpu_mask_set *set)
74f48ad614SDennis Dalessandro {
75f48ad614SDennis Dalessandro 	cpumask_clear(&set->mask);
76f48ad614SDennis Dalessandro 	cpumask_clear(&set->used);
77f48ad614SDennis Dalessandro 	set->gen = 0;
78f48ad614SDennis Dalessandro }
79f48ad614SDennis Dalessandro 
80f48ad614SDennis Dalessandro /* Initialize non-HT cpu cores mask */
814197344bSDennis Dalessandro void init_real_cpu_mask(void)
82f48ad614SDennis Dalessandro {
83f48ad614SDennis Dalessandro 	int possible, curr_cpu, i, ht;
84f48ad614SDennis Dalessandro 
854197344bSDennis Dalessandro 	cpumask_clear(&node_affinity.real_cpu_mask);
86f48ad614SDennis Dalessandro 
87f48ad614SDennis Dalessandro 	/* Start with cpu online mask as the real cpu mask */
884197344bSDennis Dalessandro 	cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask);
89f48ad614SDennis Dalessandro 
90f48ad614SDennis Dalessandro 	/*
91f48ad614SDennis Dalessandro 	 * Remove HT cores from the real cpu mask.  Do this in two steps below.
92f48ad614SDennis Dalessandro 	 */
934197344bSDennis Dalessandro 	possible = cpumask_weight(&node_affinity.real_cpu_mask);
94f48ad614SDennis Dalessandro 	ht = cpumask_weight(topology_sibling_cpumask(
954197344bSDennis Dalessandro 				cpumask_first(&node_affinity.real_cpu_mask)));
96f48ad614SDennis Dalessandro 	/*
97f48ad614SDennis Dalessandro 	 * Step 1.  Skip over the first N HT siblings and use them as the
98f48ad614SDennis Dalessandro 	 * "real" cores.  Assumes that HT cores are not enumerated in
99f48ad614SDennis Dalessandro 	 * succession (except in the single core case).
100f48ad614SDennis Dalessandro 	 */
1014197344bSDennis Dalessandro 	curr_cpu = cpumask_first(&node_affinity.real_cpu_mask);
102f48ad614SDennis Dalessandro 	for (i = 0; i < possible / ht; i++)
1034197344bSDennis Dalessandro 		curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
104f48ad614SDennis Dalessandro 	/*
105f48ad614SDennis Dalessandro 	 * Step 2.  Remove the remaining HT siblings.  Use cpumask_next() to
106f48ad614SDennis Dalessandro 	 * skip any gaps.
107f48ad614SDennis Dalessandro 	 */
108f48ad614SDennis Dalessandro 	for (; i < possible; i++) {
1094197344bSDennis Dalessandro 		cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask);
1104197344bSDennis Dalessandro 		curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
1114197344bSDennis Dalessandro 	}
112f48ad614SDennis Dalessandro }
113f48ad614SDennis Dalessandro 
114d6373019SSebastian Sanchez int node_affinity_init(void)
1154197344bSDennis Dalessandro {
116d6373019SSebastian Sanchez 	int node;
117d6373019SSebastian Sanchez 	struct pci_dev *dev = NULL;
118d6373019SSebastian Sanchez 	const struct pci_device_id *ids = hfi1_pci_tbl;
119d6373019SSebastian Sanchez 
120b094a36fSSebastian Sanchez 	cpumask_clear(&node_affinity.proc.used);
1214197344bSDennis Dalessandro 	cpumask_copy(&node_affinity.proc.mask, cpu_online_mask);
122b094a36fSSebastian Sanchez 
123b094a36fSSebastian Sanchez 	node_affinity.proc.gen = 0;
124b094a36fSSebastian Sanchez 	node_affinity.num_core_siblings =
125b094a36fSSebastian Sanchez 				cpumask_weight(topology_sibling_cpumask(
126b094a36fSSebastian Sanchez 					cpumask_first(&node_affinity.proc.mask)
127b094a36fSSebastian Sanchez 					));
1289d8145a6SHarish Chegondi 	node_affinity.num_possible_nodes = num_possible_nodes();
129b094a36fSSebastian Sanchez 	node_affinity.num_online_nodes = num_online_nodes();
130b094a36fSSebastian Sanchez 	node_affinity.num_online_cpus = num_online_cpus();
131b094a36fSSebastian Sanchez 
1324197344bSDennis Dalessandro 	/*
1334197344bSDennis Dalessandro 	 * The real cpu mask is part of the affinity struct but it has to be
1344197344bSDennis Dalessandro 	 * initialized early. It is needed to calculate the number of user
1354197344bSDennis Dalessandro 	 * contexts in set_up_context_variables().
1364197344bSDennis Dalessandro 	 */
1374197344bSDennis Dalessandro 	init_real_cpu_mask();
138d6373019SSebastian Sanchez 
1399d8145a6SHarish Chegondi 	hfi1_per_node_cntr = kcalloc(node_affinity.num_possible_nodes,
140d6373019SSebastian Sanchez 				     sizeof(*hfi1_per_node_cntr), GFP_KERNEL);
141d6373019SSebastian Sanchez 	if (!hfi1_per_node_cntr)
142d6373019SSebastian Sanchez 		return -ENOMEM;
143d6373019SSebastian Sanchez 
144d6373019SSebastian Sanchez 	while (ids->vendor) {
145d6373019SSebastian Sanchez 		dev = NULL;
146d6373019SSebastian Sanchez 		while ((dev = pci_get_device(ids->vendor, ids->device, dev))) {
147d6373019SSebastian Sanchez 			node = pcibus_to_node(dev->bus);
148d6373019SSebastian Sanchez 			if (node < 0)
149d6373019SSebastian Sanchez 				node = numa_node_id();
150d6373019SSebastian Sanchez 
151d6373019SSebastian Sanchez 			hfi1_per_node_cntr[node]++;
152d6373019SSebastian Sanchez 		}
153d6373019SSebastian Sanchez 		ids++;
154d6373019SSebastian Sanchez 	}
155d6373019SSebastian Sanchez 
156d6373019SSebastian Sanchez 	return 0;
1574197344bSDennis Dalessandro }
1584197344bSDennis Dalessandro 
1594197344bSDennis Dalessandro void node_affinity_destroy(void)
1604197344bSDennis Dalessandro {
1614197344bSDennis Dalessandro 	struct list_head *pos, *q;
1624197344bSDennis Dalessandro 	struct hfi1_affinity_node *entry;
1634197344bSDennis Dalessandro 
164584d9577STadeusz Struk 	mutex_lock(&node_affinity.lock);
1654197344bSDennis Dalessandro 	list_for_each_safe(pos, q, &node_affinity.list) {
1664197344bSDennis Dalessandro 		entry = list_entry(pos, struct hfi1_affinity_node,
1674197344bSDennis Dalessandro 				   list);
1684197344bSDennis Dalessandro 		list_del(pos);
1694197344bSDennis Dalessandro 		kfree(entry);
1704197344bSDennis Dalessandro 	}
171584d9577STadeusz Struk 	mutex_unlock(&node_affinity.lock);
172d6373019SSebastian Sanchez 	kfree(hfi1_per_node_cntr);
1734197344bSDennis Dalessandro }
1744197344bSDennis Dalessandro 
1754197344bSDennis Dalessandro static struct hfi1_affinity_node *node_affinity_allocate(int node)
1764197344bSDennis Dalessandro {
1774197344bSDennis Dalessandro 	struct hfi1_affinity_node *entry;
1784197344bSDennis Dalessandro 
1794197344bSDennis Dalessandro 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1804197344bSDennis Dalessandro 	if (!entry)
1814197344bSDennis Dalessandro 		return NULL;
1824197344bSDennis Dalessandro 	entry->node = node;
1834197344bSDennis Dalessandro 	INIT_LIST_HEAD(&entry->list);
1844197344bSDennis Dalessandro 
1854197344bSDennis Dalessandro 	return entry;
1864197344bSDennis Dalessandro }
1874197344bSDennis Dalessandro 
1884197344bSDennis Dalessandro /*
1894197344bSDennis Dalessandro  * It appends an entry to the list.
1904197344bSDennis Dalessandro  * It *must* be called with node_affinity.lock held.
1914197344bSDennis Dalessandro  */
1924197344bSDennis Dalessandro static void node_affinity_add_tail(struct hfi1_affinity_node *entry)
1934197344bSDennis Dalessandro {
1944197344bSDennis Dalessandro 	list_add_tail(&entry->list, &node_affinity.list);
1954197344bSDennis Dalessandro }
1964197344bSDennis Dalessandro 
1974197344bSDennis Dalessandro /* It must be called with node_affinity.lock held */
1984197344bSDennis Dalessandro static struct hfi1_affinity_node *node_affinity_lookup(int node)
1994197344bSDennis Dalessandro {
2004197344bSDennis Dalessandro 	struct list_head *pos;
2014197344bSDennis Dalessandro 	struct hfi1_affinity_node *entry;
2024197344bSDennis Dalessandro 
2034197344bSDennis Dalessandro 	list_for_each(pos, &node_affinity.list) {
2044197344bSDennis Dalessandro 		entry = list_entry(pos, struct hfi1_affinity_node, list);
2054197344bSDennis Dalessandro 		if (entry->node == node)
2064197344bSDennis Dalessandro 			return entry;
2074197344bSDennis Dalessandro 	}
2084197344bSDennis Dalessandro 
2094197344bSDennis Dalessandro 	return NULL;
210f48ad614SDennis Dalessandro }
211f48ad614SDennis Dalessandro 
212f48ad614SDennis Dalessandro /*
213f48ad614SDennis Dalessandro  * Interrupt affinity.
214f48ad614SDennis Dalessandro  *
215f48ad614SDennis Dalessandro  * non-rcv avail gets a default mask that
216f48ad614SDennis Dalessandro  * starts as possible cpus with threads reset
217f48ad614SDennis Dalessandro  * and each rcv avail reset.
218f48ad614SDennis Dalessandro  *
219f48ad614SDennis Dalessandro  * rcv avail gets node relative 1 wrapping back
220f48ad614SDennis Dalessandro  * to the node relative 1 as necessary.
221f48ad614SDennis Dalessandro  *
222f48ad614SDennis Dalessandro  */
2234197344bSDennis Dalessandro int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
224f48ad614SDennis Dalessandro {
225f48ad614SDennis Dalessandro 	int node = pcibus_to_node(dd->pcidev->bus);
2264197344bSDennis Dalessandro 	struct hfi1_affinity_node *entry;
227f48ad614SDennis Dalessandro 	const struct cpumask *local_mask;
228f48ad614SDennis Dalessandro 	int curr_cpu, possible, i;
229f48ad614SDennis Dalessandro 
230f48ad614SDennis Dalessandro 	if (node < 0)
231f48ad614SDennis Dalessandro 		node = numa_node_id();
232f48ad614SDennis Dalessandro 	dd->node = node;
233f48ad614SDennis Dalessandro 
234f48ad614SDennis Dalessandro 	local_mask = cpumask_of_node(dd->node);
235f48ad614SDennis Dalessandro 	if (cpumask_first(local_mask) >= nr_cpu_ids)
236f48ad614SDennis Dalessandro 		local_mask = topology_core_cpumask(0);
2374197344bSDennis Dalessandro 
238584d9577STadeusz Struk 	mutex_lock(&node_affinity.lock);
2394197344bSDennis Dalessandro 	entry = node_affinity_lookup(dd->node);
2404197344bSDennis Dalessandro 
2414197344bSDennis Dalessandro 	/*
2424197344bSDennis Dalessandro 	 * If this is the first time this NUMA node's affinity is used,
2434197344bSDennis Dalessandro 	 * create an entry in the global affinity structure and initialize it.
2444197344bSDennis Dalessandro 	 */
2454197344bSDennis Dalessandro 	if (!entry) {
2464197344bSDennis Dalessandro 		entry = node_affinity_allocate(node);
2474197344bSDennis Dalessandro 		if (!entry) {
2484197344bSDennis Dalessandro 			dd_dev_err(dd,
2494197344bSDennis Dalessandro 				   "Unable to allocate global affinity node\n");
250584d9577STadeusz Struk 			mutex_unlock(&node_affinity.lock);
2514197344bSDennis Dalessandro 			return -ENOMEM;
2524197344bSDennis Dalessandro 		}
2534197344bSDennis Dalessandro 		init_cpu_mask_set(&entry->def_intr);
2544197344bSDennis Dalessandro 		init_cpu_mask_set(&entry->rcv_intr);
255d6373019SSebastian Sanchez 		cpumask_clear(&entry->general_intr_mask);
256f48ad614SDennis Dalessandro 		/* Use the "real" cpu mask of this node as the default */
2574197344bSDennis Dalessandro 		cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask,
2584197344bSDennis Dalessandro 			    local_mask);
259f48ad614SDennis Dalessandro 
260f48ad614SDennis Dalessandro 		/* fill in the receive list */
2614197344bSDennis Dalessandro 		possible = cpumask_weight(&entry->def_intr.mask);
2624197344bSDennis Dalessandro 		curr_cpu = cpumask_first(&entry->def_intr.mask);
2634197344bSDennis Dalessandro 
264f48ad614SDennis Dalessandro 		if (possible == 1) {
265f48ad614SDennis Dalessandro 			/* only one CPU, everyone will use it */
2664197344bSDennis Dalessandro 			cpumask_set_cpu(curr_cpu, &entry->rcv_intr.mask);
267d6373019SSebastian Sanchez 			cpumask_set_cpu(curr_cpu, &entry->general_intr_mask);
268f48ad614SDennis Dalessandro 		} else {
269f48ad614SDennis Dalessandro 			/*
270d6373019SSebastian Sanchez 			 * The general/control context will be the first CPU in
271d6373019SSebastian Sanchez 			 * the default list, so it is removed from the default
272d6373019SSebastian Sanchez 			 * list and added to the general interrupt list.
273f48ad614SDennis Dalessandro 			 */
274d6373019SSebastian Sanchez 			cpumask_clear_cpu(curr_cpu, &entry->def_intr.mask);
275d6373019SSebastian Sanchez 			cpumask_set_cpu(curr_cpu, &entry->general_intr_mask);
2764197344bSDennis Dalessandro 			curr_cpu = cpumask_next(curr_cpu,
2774197344bSDennis Dalessandro 						&entry->def_intr.mask);
2784197344bSDennis Dalessandro 
279f48ad614SDennis Dalessandro 			/*
280f48ad614SDennis Dalessandro 			 * Remove the remaining kernel receive queues from
281f48ad614SDennis Dalessandro 			 * the default list and add them to the receive list.
282f48ad614SDennis Dalessandro 			 */
283d6373019SSebastian Sanchez 			for (i = 0;
284d6373019SSebastian Sanchez 			     i < (dd->n_krcv_queues - 1) *
285d6373019SSebastian Sanchez 				  hfi1_per_node_cntr[dd->node];
286d6373019SSebastian Sanchez 			     i++) {
2874197344bSDennis Dalessandro 				cpumask_clear_cpu(curr_cpu,
2884197344bSDennis Dalessandro 						  &entry->def_intr.mask);
2894197344bSDennis Dalessandro 				cpumask_set_cpu(curr_cpu,
2904197344bSDennis Dalessandro 						&entry->rcv_intr.mask);
2914197344bSDennis Dalessandro 				curr_cpu = cpumask_next(curr_cpu,
2924197344bSDennis Dalessandro 							&entry->def_intr.mask);
293f48ad614SDennis Dalessandro 				if (curr_cpu >= nr_cpu_ids)
294f48ad614SDennis Dalessandro 					break;
295f48ad614SDennis Dalessandro 			}
296d6373019SSebastian Sanchez 
297d6373019SSebastian Sanchez 			/*
298d6373019SSebastian Sanchez 			 * If there ends up being 0 CPU cores leftover for SDMA
299d6373019SSebastian Sanchez 			 * engines, use the same CPU cores as general/control
300d6373019SSebastian Sanchez 			 * context.
301d6373019SSebastian Sanchez 			 */
302d6373019SSebastian Sanchez 			if (cpumask_weight(&entry->def_intr.mask) == 0)
303d6373019SSebastian Sanchez 				cpumask_copy(&entry->def_intr.mask,
304d6373019SSebastian Sanchez 					     &entry->general_intr_mask);
305f48ad614SDennis Dalessandro 		}
306f48ad614SDennis Dalessandro 
3074197344bSDennis Dalessandro 		node_affinity_add_tail(entry);
308f48ad614SDennis Dalessandro 	}
309584d9577STadeusz Struk 	mutex_unlock(&node_affinity.lock);
3104197344bSDennis Dalessandro 	return 0;
311f48ad614SDennis Dalessandro }
312f48ad614SDennis Dalessandro 
313584d9577STadeusz Struk /*
3142d01c37dSTadeusz Struk  * Function updates the irq affinity hint for msix after it has been changed
3152d01c37dSTadeusz Struk  * by the user using the /proc/irq interface. This function only accepts
3162d01c37dSTadeusz Struk  * one cpu in the mask.
3172d01c37dSTadeusz Struk  */
3182d01c37dSTadeusz Struk static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
3192d01c37dSTadeusz Struk {
3202d01c37dSTadeusz Struk 	struct sdma_engine *sde = msix->arg;
3212d01c37dSTadeusz Struk 	struct hfi1_devdata *dd = sde->dd;
3222d01c37dSTadeusz Struk 	struct hfi1_affinity_node *entry;
3232d01c37dSTadeusz Struk 	struct cpu_mask_set *set;
3242d01c37dSTadeusz Struk 	int i, old_cpu;
3252d01c37dSTadeusz Struk 
3262d01c37dSTadeusz Struk 	if (cpu > num_online_cpus() || cpu == sde->cpu)
3272d01c37dSTadeusz Struk 		return;
3282d01c37dSTadeusz Struk 
3292d01c37dSTadeusz Struk 	mutex_lock(&node_affinity.lock);
3302d01c37dSTadeusz Struk 	entry = node_affinity_lookup(dd->node);
3312d01c37dSTadeusz Struk 	if (!entry)
3322d01c37dSTadeusz Struk 		goto unlock;
3332d01c37dSTadeusz Struk 
3342d01c37dSTadeusz Struk 	old_cpu = sde->cpu;
3352d01c37dSTadeusz Struk 	sde->cpu = cpu;
3362d01c37dSTadeusz Struk 	cpumask_clear(&msix->mask);
3372d01c37dSTadeusz Struk 	cpumask_set_cpu(cpu, &msix->mask);
338*bb7dde87SMichael J. Ruhl 	dd_dev_dbg(dd, "IRQ: %u, type %s engine %u -> cpu: %d\n",
339*bb7dde87SMichael J. Ruhl 		   msix->irq, irq_type_names[msix->type],
3402d01c37dSTadeusz Struk 		   sde->this_idx, cpu);
341*bb7dde87SMichael J. Ruhl 	irq_set_affinity_hint(msix->irq, &msix->mask);
3422d01c37dSTadeusz Struk 
3432d01c37dSTadeusz Struk 	/*
3442d01c37dSTadeusz Struk 	 * Set the new cpu in the hfi1_affinity_node and clean
3452d01c37dSTadeusz Struk 	 * the old cpu if it is not used by any other IRQ
3462d01c37dSTadeusz Struk 	 */
3472d01c37dSTadeusz Struk 	set = &entry->def_intr;
3482d01c37dSTadeusz Struk 	cpumask_set_cpu(cpu, &set->mask);
3492d01c37dSTadeusz Struk 	cpumask_set_cpu(cpu, &set->used);
3502d01c37dSTadeusz Struk 	for (i = 0; i < dd->num_msix_entries; i++) {
3512d01c37dSTadeusz Struk 		struct hfi1_msix_entry *other_msix;
3522d01c37dSTadeusz Struk 
3532d01c37dSTadeusz Struk 		other_msix = &dd->msix_entries[i];
3542d01c37dSTadeusz Struk 		if (other_msix->type != IRQ_SDMA || other_msix == msix)
3552d01c37dSTadeusz Struk 			continue;
3562d01c37dSTadeusz Struk 
3572d01c37dSTadeusz Struk 		if (cpumask_test_cpu(old_cpu, &other_msix->mask))
3582d01c37dSTadeusz Struk 			goto unlock;
3592d01c37dSTadeusz Struk 	}
3602d01c37dSTadeusz Struk 	cpumask_clear_cpu(old_cpu, &set->mask);
3612d01c37dSTadeusz Struk 	cpumask_clear_cpu(old_cpu, &set->used);
3622d01c37dSTadeusz Struk unlock:
3632d01c37dSTadeusz Struk 	mutex_unlock(&node_affinity.lock);
3642d01c37dSTadeusz Struk }
3652d01c37dSTadeusz Struk 
3662d01c37dSTadeusz Struk static void hfi1_irq_notifier_notify(struct irq_affinity_notify *notify,
3672d01c37dSTadeusz Struk 				     const cpumask_t *mask)
3682d01c37dSTadeusz Struk {
3692d01c37dSTadeusz Struk 	int cpu = cpumask_first(mask);
3702d01c37dSTadeusz Struk 	struct hfi1_msix_entry *msix = container_of(notify,
3712d01c37dSTadeusz Struk 						    struct hfi1_msix_entry,
3722d01c37dSTadeusz Struk 						    notify);
3732d01c37dSTadeusz Struk 
3742d01c37dSTadeusz Struk 	/* Only one CPU configuration supported currently */
3752d01c37dSTadeusz Struk 	hfi1_update_sdma_affinity(msix, cpu);
3762d01c37dSTadeusz Struk }
3772d01c37dSTadeusz Struk 
3782d01c37dSTadeusz Struk static void hfi1_irq_notifier_release(struct kref *ref)
3792d01c37dSTadeusz Struk {
3802d01c37dSTadeusz Struk 	/*
3812d01c37dSTadeusz Struk 	 * This is required by affinity notifier. We don't have anything to
3822d01c37dSTadeusz Struk 	 * free here.
3832d01c37dSTadeusz Struk 	 */
3842d01c37dSTadeusz Struk }
3852d01c37dSTadeusz Struk 
3862d01c37dSTadeusz Struk static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix)
3872d01c37dSTadeusz Struk {
3882d01c37dSTadeusz Struk 	struct irq_affinity_notify *notify = &msix->notify;
3892d01c37dSTadeusz Struk 
390*bb7dde87SMichael J. Ruhl 	notify->irq = msix->irq;
3912d01c37dSTadeusz Struk 	notify->notify = hfi1_irq_notifier_notify;
3922d01c37dSTadeusz Struk 	notify->release = hfi1_irq_notifier_release;
3932d01c37dSTadeusz Struk 
3942d01c37dSTadeusz Struk 	if (irq_set_affinity_notifier(notify->irq, notify))
3952d01c37dSTadeusz Struk 		pr_err("Failed to register sdma irq affinity notifier for irq %d\n",
3962d01c37dSTadeusz Struk 		       notify->irq);
3972d01c37dSTadeusz Struk }
3982d01c37dSTadeusz Struk 
3992d01c37dSTadeusz Struk static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix)
4002d01c37dSTadeusz Struk {
4012d01c37dSTadeusz Struk 	struct irq_affinity_notify *notify = &msix->notify;
4022d01c37dSTadeusz Struk 
4032d01c37dSTadeusz Struk 	if (irq_set_affinity_notifier(notify->irq, NULL))
4042d01c37dSTadeusz Struk 		pr_err("Failed to cleanup sdma irq affinity notifier for irq %d\n",
4052d01c37dSTadeusz Struk 		       notify->irq);
4062d01c37dSTadeusz Struk }
4072d01c37dSTadeusz Struk 
4082d01c37dSTadeusz Struk /*
409584d9577STadeusz Struk  * Function sets the irq affinity for msix.
410584d9577STadeusz Struk  * It *must* be called with node_affinity.lock held.
411584d9577STadeusz Struk  */
412584d9577STadeusz Struk static int get_irq_affinity(struct hfi1_devdata *dd,
413584d9577STadeusz Struk 			    struct hfi1_msix_entry *msix)
414f48ad614SDennis Dalessandro {
415f48ad614SDennis Dalessandro 	int ret;
416f48ad614SDennis Dalessandro 	cpumask_var_t diff;
4174197344bSDennis Dalessandro 	struct hfi1_affinity_node *entry;
418d6373019SSebastian Sanchez 	struct cpu_mask_set *set = NULL;
419f48ad614SDennis Dalessandro 	struct sdma_engine *sde = NULL;
420f48ad614SDennis Dalessandro 	struct hfi1_ctxtdata *rcd = NULL;
421f48ad614SDennis Dalessandro 	char extra[64];
422f48ad614SDennis Dalessandro 	int cpu = -1;
423f48ad614SDennis Dalessandro 
424f48ad614SDennis Dalessandro 	extra[0] = '\0';
425f48ad614SDennis Dalessandro 	cpumask_clear(&msix->mask);
426f48ad614SDennis Dalessandro 
427f48ad614SDennis Dalessandro 	ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
428f48ad614SDennis Dalessandro 	if (!ret)
429f48ad614SDennis Dalessandro 		return -ENOMEM;
430f48ad614SDennis Dalessandro 
4314197344bSDennis Dalessandro 	entry = node_affinity_lookup(dd->node);
4324197344bSDennis Dalessandro 
433f48ad614SDennis Dalessandro 	switch (msix->type) {
434f48ad614SDennis Dalessandro 	case IRQ_SDMA:
435f48ad614SDennis Dalessandro 		sde = (struct sdma_engine *)msix->arg;
436f48ad614SDennis Dalessandro 		scnprintf(extra, 64, "engine %u", sde->this_idx);
4374197344bSDennis Dalessandro 		set = &entry->def_intr;
438f48ad614SDennis Dalessandro 		break;
439d6373019SSebastian Sanchez 	case IRQ_GENERAL:
440d6373019SSebastian Sanchez 		cpu = cpumask_first(&entry->general_intr_mask);
441d6373019SSebastian Sanchez 		break;
442f48ad614SDennis Dalessandro 	case IRQ_RCVCTXT:
443f48ad614SDennis Dalessandro 		rcd = (struct hfi1_ctxtdata *)msix->arg;
444d6373019SSebastian Sanchez 		if (rcd->ctxt == HFI1_CTRL_CTXT)
445d6373019SSebastian Sanchez 			cpu = cpumask_first(&entry->general_intr_mask);
446d6373019SSebastian Sanchez 		else
4474197344bSDennis Dalessandro 			set = &entry->rcv_intr;
448f48ad614SDennis Dalessandro 		scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
449f48ad614SDennis Dalessandro 		break;
450f48ad614SDennis Dalessandro 	default:
451f48ad614SDennis Dalessandro 		dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type);
452f48ad614SDennis Dalessandro 		return -EINVAL;
453f48ad614SDennis Dalessandro 	}
454f48ad614SDennis Dalessandro 
455f48ad614SDennis Dalessandro 	/*
456d6373019SSebastian Sanchez 	 * The general and control contexts are placed on a particular
457d6373019SSebastian Sanchez 	 * CPU, which is set above. Skip accounting for it. Everything else
458d6373019SSebastian Sanchez 	 * finds its CPU here.
459f48ad614SDennis Dalessandro 	 */
4604197344bSDennis Dalessandro 	if (cpu == -1 && set) {
461f48ad614SDennis Dalessandro 		if (cpumask_equal(&set->mask, &set->used)) {
462f48ad614SDennis Dalessandro 			/*
463f48ad614SDennis Dalessandro 			 * We've used up all the CPUs, bump up the generation
464f48ad614SDennis Dalessandro 			 * and reset the 'used' map
465f48ad614SDennis Dalessandro 			 */
466f48ad614SDennis Dalessandro 			set->gen++;
467f48ad614SDennis Dalessandro 			cpumask_clear(&set->used);
468f48ad614SDennis Dalessandro 		}
469f48ad614SDennis Dalessandro 		cpumask_andnot(diff, &set->mask, &set->used);
470f48ad614SDennis Dalessandro 		cpu = cpumask_first(diff);
471f48ad614SDennis Dalessandro 		cpumask_set_cpu(cpu, &set->used);
472f48ad614SDennis Dalessandro 	}
473f48ad614SDennis Dalessandro 
474f48ad614SDennis Dalessandro 	cpumask_set_cpu(cpu, &msix->mask);
475*bb7dde87SMichael J. Ruhl 	dd_dev_info(dd, "IRQ: %u, type %s %s -> cpu: %d\n",
476*bb7dde87SMichael J. Ruhl 		    msix->irq, irq_type_names[msix->type],
477f48ad614SDennis Dalessandro 		    extra, cpu);
478*bb7dde87SMichael J. Ruhl 	irq_set_affinity_hint(msix->irq, &msix->mask);
479f48ad614SDennis Dalessandro 
4802d01c37dSTadeusz Struk 	if (msix->type == IRQ_SDMA) {
4812d01c37dSTadeusz Struk 		sde->cpu = cpu;
4822d01c37dSTadeusz Struk 		hfi1_setup_sdma_notifier(msix);
4832d01c37dSTadeusz Struk 	}
4842d01c37dSTadeusz Struk 
485f48ad614SDennis Dalessandro 	free_cpumask_var(diff);
486f48ad614SDennis Dalessandro 	return 0;
487f48ad614SDennis Dalessandro }
488f48ad614SDennis Dalessandro 
489584d9577STadeusz Struk int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
490584d9577STadeusz Struk {
491584d9577STadeusz Struk 	int ret;
492584d9577STadeusz Struk 
493584d9577STadeusz Struk 	mutex_lock(&node_affinity.lock);
494584d9577STadeusz Struk 	ret = get_irq_affinity(dd, msix);
495584d9577STadeusz Struk 	mutex_unlock(&node_affinity.lock);
496584d9577STadeusz Struk 	return ret;
497584d9577STadeusz Struk }
498584d9577STadeusz Struk 
499f48ad614SDennis Dalessandro void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
500f48ad614SDennis Dalessandro 			   struct hfi1_msix_entry *msix)
501f48ad614SDennis Dalessandro {
502f48ad614SDennis Dalessandro 	struct cpu_mask_set *set = NULL;
503f48ad614SDennis Dalessandro 	struct hfi1_ctxtdata *rcd;
5044197344bSDennis Dalessandro 	struct hfi1_affinity_node *entry;
5054197344bSDennis Dalessandro 
506584d9577STadeusz Struk 	mutex_lock(&node_affinity.lock);
5074197344bSDennis Dalessandro 	entry = node_affinity_lookup(dd->node);
508f48ad614SDennis Dalessandro 
509f48ad614SDennis Dalessandro 	switch (msix->type) {
510f48ad614SDennis Dalessandro 	case IRQ_SDMA:
5114197344bSDennis Dalessandro 		set = &entry->def_intr;
5122d01c37dSTadeusz Struk 		hfi1_cleanup_sdma_notifier(msix);
513f48ad614SDennis Dalessandro 		break;
514d6373019SSebastian Sanchez 	case IRQ_GENERAL:
515b094a36fSSebastian Sanchez 		/* Don't do accounting for general contexts */
516d6373019SSebastian Sanchez 		break;
517f48ad614SDennis Dalessandro 	case IRQ_RCVCTXT:
518f48ad614SDennis Dalessandro 		rcd = (struct hfi1_ctxtdata *)msix->arg;
519d6373019SSebastian Sanchez 		/* Don't do accounting for control contexts */
520f48ad614SDennis Dalessandro 		if (rcd->ctxt != HFI1_CTRL_CTXT)
5214197344bSDennis Dalessandro 			set = &entry->rcv_intr;
522f48ad614SDennis Dalessandro 		break;
523f48ad614SDennis Dalessandro 	default:
524584d9577STadeusz Struk 		mutex_unlock(&node_affinity.lock);
525f48ad614SDennis Dalessandro 		return;
526f48ad614SDennis Dalessandro 	}
527f48ad614SDennis Dalessandro 
528f48ad614SDennis Dalessandro 	if (set) {
529f48ad614SDennis Dalessandro 		cpumask_andnot(&set->used, &set->used, &msix->mask);
530f48ad614SDennis Dalessandro 		if (cpumask_empty(&set->used) && set->gen) {
531f48ad614SDennis Dalessandro 			set->gen--;
532f48ad614SDennis Dalessandro 			cpumask_copy(&set->used, &set->mask);
533f48ad614SDennis Dalessandro 		}
534f48ad614SDennis Dalessandro 	}
535f48ad614SDennis Dalessandro 
536*bb7dde87SMichael J. Ruhl 	irq_set_affinity_hint(msix->irq, NULL);
537f48ad614SDennis Dalessandro 	cpumask_clear(&msix->mask);
538584d9577STadeusz Struk 	mutex_unlock(&node_affinity.lock);
539f48ad614SDennis Dalessandro }
540f48ad614SDennis Dalessandro 
541b094a36fSSebastian Sanchez /* This should be called with node_affinity.lock held */
542b094a36fSSebastian Sanchez static void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask,
543b094a36fSSebastian Sanchez 				struct hfi1_affinity_node_list *affinity)
544f48ad614SDennis Dalessandro {
545b094a36fSSebastian Sanchez 	int possible, curr_cpu, i;
546b094a36fSSebastian Sanchez 	uint num_cores_per_socket = node_affinity.num_online_cpus /
547b094a36fSSebastian Sanchez 					affinity->num_core_siblings /
548b094a36fSSebastian Sanchez 						node_affinity.num_online_nodes;
549b094a36fSSebastian Sanchez 
550b094a36fSSebastian Sanchez 	cpumask_copy(hw_thread_mask, &affinity->proc.mask);
551b094a36fSSebastian Sanchez 	if (affinity->num_core_siblings > 0) {
552b094a36fSSebastian Sanchez 		/* Removing other siblings not needed for now */
553b094a36fSSebastian Sanchez 		possible = cpumask_weight(hw_thread_mask);
554b094a36fSSebastian Sanchez 		curr_cpu = cpumask_first(hw_thread_mask);
555b094a36fSSebastian Sanchez 		for (i = 0;
556b094a36fSSebastian Sanchez 		     i < num_cores_per_socket * node_affinity.num_online_nodes;
557b094a36fSSebastian Sanchez 		     i++)
558b094a36fSSebastian Sanchez 			curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
559b094a36fSSebastian Sanchez 
560b094a36fSSebastian Sanchez 		for (; i < possible; i++) {
561b094a36fSSebastian Sanchez 			cpumask_clear_cpu(curr_cpu, hw_thread_mask);
562b094a36fSSebastian Sanchez 			curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
563b094a36fSSebastian Sanchez 		}
564b094a36fSSebastian Sanchez 
565b094a36fSSebastian Sanchez 		/* Identifying correct HW threads within physical cores */
566b094a36fSSebastian Sanchez 		cpumask_shift_left(hw_thread_mask, hw_thread_mask,
567b094a36fSSebastian Sanchez 				   num_cores_per_socket *
568b094a36fSSebastian Sanchez 				   node_affinity.num_online_nodes *
569b094a36fSSebastian Sanchez 				   hw_thread_no);
570b094a36fSSebastian Sanchez 	}
571b094a36fSSebastian Sanchez }
572b094a36fSSebastian Sanchez 
573b094a36fSSebastian Sanchez int hfi1_get_proc_affinity(int node)
574b094a36fSSebastian Sanchez {
575b094a36fSSebastian Sanchez 	int cpu = -1, ret, i;
5764197344bSDennis Dalessandro 	struct hfi1_affinity_node *entry;
577b094a36fSSebastian Sanchez 	cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
578f48ad614SDennis Dalessandro 	const struct cpumask *node_mask,
5790c98d344SIngo Molnar 		*proc_mask = &current->cpus_allowed;
580b094a36fSSebastian Sanchez 	struct hfi1_affinity_node_list *affinity = &node_affinity;
581b094a36fSSebastian Sanchez 	struct cpu_mask_set *set = &affinity->proc;
582f48ad614SDennis Dalessandro 
583f48ad614SDennis Dalessandro 	/*
584f48ad614SDennis Dalessandro 	 * check whether process/context affinity has already
585f48ad614SDennis Dalessandro 	 * been set
586f48ad614SDennis Dalessandro 	 */
587f48ad614SDennis Dalessandro 	if (cpumask_weight(proc_mask) == 1) {
588f242d93aSLeon Romanovsky 		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl",
589f242d93aSLeon Romanovsky 			  current->pid, current->comm,
590f242d93aSLeon Romanovsky 			  cpumask_pr_args(proc_mask));
591f48ad614SDennis Dalessandro 		/*
592f48ad614SDennis Dalessandro 		 * Mark the pre-set CPU as used. This is atomic so we don't
593f48ad614SDennis Dalessandro 		 * need the lock
594f48ad614SDennis Dalessandro 		 */
595f48ad614SDennis Dalessandro 		cpu = cpumask_first(proc_mask);
596f48ad614SDennis Dalessandro 		cpumask_set_cpu(cpu, &set->used);
597f48ad614SDennis Dalessandro 		goto done;
598f48ad614SDennis Dalessandro 	} else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) {
599f242d93aSLeon Romanovsky 		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl",
600f242d93aSLeon Romanovsky 			  current->pid, current->comm,
601f242d93aSLeon Romanovsky 			  cpumask_pr_args(proc_mask));
602f48ad614SDennis Dalessandro 		goto done;
603f48ad614SDennis Dalessandro 	}
604f48ad614SDennis Dalessandro 
605f48ad614SDennis Dalessandro 	/*
606f48ad614SDennis Dalessandro 	 * The process does not have a preset CPU affinity so find one to
607b094a36fSSebastian Sanchez 	 * recommend using the following algorithm:
608b094a36fSSebastian Sanchez 	 *
609b094a36fSSebastian Sanchez 	 * For each user process that is opening a context on HFI Y:
610b094a36fSSebastian Sanchez 	 *  a) If all cores are filled, reinitialize the bitmask
611b094a36fSSebastian Sanchez 	 *  b) Fill real cores first, then HT cores (First set of HT
612b094a36fSSebastian Sanchez 	 *     cores on all physical cores, then second set of HT core,
613b094a36fSSebastian Sanchez 	 *     and, so on) in the following order:
614b094a36fSSebastian Sanchez 	 *
615b094a36fSSebastian Sanchez 	 *     1. Same NUMA node as HFI Y and not running an IRQ
616b094a36fSSebastian Sanchez 	 *        handler
617b094a36fSSebastian Sanchez 	 *     2. Same NUMA node as HFI Y and running an IRQ handler
618b094a36fSSebastian Sanchez 	 *     3. Different NUMA node to HFI Y and not running an IRQ
619b094a36fSSebastian Sanchez 	 *        handler
620b094a36fSSebastian Sanchez 	 *     4. Different NUMA node to HFI Y and running an IRQ
621b094a36fSSebastian Sanchez 	 *        handler
622b094a36fSSebastian Sanchez 	 *  c) Mark core as filled in the bitmask. As user processes are
623b094a36fSSebastian Sanchez 	 *     done, clear cores from the bitmask.
624f48ad614SDennis Dalessandro 	 */
625f48ad614SDennis Dalessandro 
626f48ad614SDennis Dalessandro 	ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
627f48ad614SDennis Dalessandro 	if (!ret)
628f48ad614SDennis Dalessandro 		goto done;
629b094a36fSSebastian Sanchez 	ret = zalloc_cpumask_var(&hw_thread_mask, GFP_KERNEL);
630f48ad614SDennis Dalessandro 	if (!ret)
631f48ad614SDennis Dalessandro 		goto free_diff;
632b094a36fSSebastian Sanchez 	ret = zalloc_cpumask_var(&available_mask, GFP_KERNEL);
633f48ad614SDennis Dalessandro 	if (!ret)
634b094a36fSSebastian Sanchez 		goto free_hw_thread_mask;
635b094a36fSSebastian Sanchez 	ret = zalloc_cpumask_var(&intrs_mask, GFP_KERNEL);
636b094a36fSSebastian Sanchez 	if (!ret)
637b094a36fSSebastian Sanchez 		goto free_available_mask;
638f48ad614SDennis Dalessandro 
639584d9577STadeusz Struk 	mutex_lock(&affinity->lock);
640f48ad614SDennis Dalessandro 	/*
641b094a36fSSebastian Sanchez 	 * If we've used all available HW threads, clear the mask and start
642f48ad614SDennis Dalessandro 	 * overloading.
643f48ad614SDennis Dalessandro 	 */
644f48ad614SDennis Dalessandro 	if (cpumask_equal(&set->mask, &set->used)) {
645f48ad614SDennis Dalessandro 		set->gen++;
646f48ad614SDennis Dalessandro 		cpumask_clear(&set->used);
647f48ad614SDennis Dalessandro 	}
648f48ad614SDennis Dalessandro 
649d6373019SSebastian Sanchez 	/*
650d6373019SSebastian Sanchez 	 * If NUMA node has CPUs used by interrupt handlers, include them in the
651d6373019SSebastian Sanchez 	 * interrupt handler mask.
652d6373019SSebastian Sanchez 	 */
653d6373019SSebastian Sanchez 	entry = node_affinity_lookup(node);
654d6373019SSebastian Sanchez 	if (entry) {
655b094a36fSSebastian Sanchez 		cpumask_copy(intrs_mask, (entry->def_intr.gen ?
6564197344bSDennis Dalessandro 					  &entry->def_intr.mask :
6574197344bSDennis Dalessandro 					  &entry->def_intr.used));
658b094a36fSSebastian Sanchez 		cpumask_or(intrs_mask, intrs_mask, (entry->rcv_intr.gen ?
6594197344bSDennis Dalessandro 						    &entry->rcv_intr.mask :
6604197344bSDennis Dalessandro 						    &entry->rcv_intr.used));
661b094a36fSSebastian Sanchez 		cpumask_or(intrs_mask, intrs_mask, &entry->general_intr_mask);
662d6373019SSebastian Sanchez 	}
663f242d93aSLeon Romanovsky 	hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl",
664b094a36fSSebastian Sanchez 		  cpumask_pr_args(intrs_mask));
665b094a36fSSebastian Sanchez 
666b094a36fSSebastian Sanchez 	cpumask_copy(hw_thread_mask, &set->mask);
667f48ad614SDennis Dalessandro 
668f48ad614SDennis Dalessandro 	/*
669b094a36fSSebastian Sanchez 	 * If HT cores are enabled, identify which HW threads within the
670b094a36fSSebastian Sanchez 	 * physical cores should be used.
671f48ad614SDennis Dalessandro 	 */
672b094a36fSSebastian Sanchez 	if (affinity->num_core_siblings > 0) {
673b094a36fSSebastian Sanchez 		for (i = 0; i < affinity->num_core_siblings; i++) {
674b094a36fSSebastian Sanchez 			find_hw_thread_mask(i, hw_thread_mask, affinity);
675b094a36fSSebastian Sanchez 
676b094a36fSSebastian Sanchez 			/*
677b094a36fSSebastian Sanchez 			 * If there's at least one available core for this HW
678b094a36fSSebastian Sanchez 			 * thread number, stop looking for a core.
679b094a36fSSebastian Sanchez 			 *
680b094a36fSSebastian Sanchez 			 * diff will always be not empty at least once in this
681b094a36fSSebastian Sanchez 			 * loop as the used mask gets reset when
682b094a36fSSebastian Sanchez 			 * (set->mask == set->used) before this loop.
683b094a36fSSebastian Sanchez 			 */
684b094a36fSSebastian Sanchez 			cpumask_andnot(diff, hw_thread_mask, &set->used);
685b094a36fSSebastian Sanchez 			if (!cpumask_empty(diff))
686b094a36fSSebastian Sanchez 				break;
687b094a36fSSebastian Sanchez 		}
688b094a36fSSebastian Sanchez 	}
689b094a36fSSebastian Sanchez 	hfi1_cdbg(PROC, "Same available HW thread on all physical CPUs: %*pbl",
690b094a36fSSebastian Sanchez 		  cpumask_pr_args(hw_thread_mask));
691b094a36fSSebastian Sanchez 
692f48ad614SDennis Dalessandro 	node_mask = cpumask_of_node(node);
693b094a36fSSebastian Sanchez 	hfi1_cdbg(PROC, "Device on NUMA %u, CPUs %*pbl", node,
694f242d93aSLeon Romanovsky 		  cpumask_pr_args(node_mask));
695f48ad614SDennis Dalessandro 
696b094a36fSSebastian Sanchez 	/* Get cpumask of available CPUs on preferred NUMA */
697b094a36fSSebastian Sanchez 	cpumask_and(available_mask, hw_thread_mask, node_mask);
698b094a36fSSebastian Sanchez 	cpumask_andnot(available_mask, available_mask, &set->used);
699b094a36fSSebastian Sanchez 	hfi1_cdbg(PROC, "Available CPUs on NUMA %u: %*pbl", node,
700b094a36fSSebastian Sanchez 		  cpumask_pr_args(available_mask));
701f48ad614SDennis Dalessandro 
702b094a36fSSebastian Sanchez 	/*
703b094a36fSSebastian Sanchez 	 * At first, we don't want to place processes on the same
704b094a36fSSebastian Sanchez 	 * CPUs as interrupt handlers. Then, CPUs running interrupt
705b094a36fSSebastian Sanchez 	 * handlers are used.
706b094a36fSSebastian Sanchez 	 *
707b094a36fSSebastian Sanchez 	 * 1) If diff is not empty, then there are CPUs not running
708b094a36fSSebastian Sanchez 	 *    non-interrupt handlers available, so diff gets copied
709b094a36fSSebastian Sanchez 	 *    over to available_mask.
710b094a36fSSebastian Sanchez 	 * 2) If diff is empty, then all CPUs not running interrupt
711b094a36fSSebastian Sanchez 	 *    handlers are taken, so available_mask contains all
712b094a36fSSebastian Sanchez 	 *    available CPUs running interrupt handlers.
713b094a36fSSebastian Sanchez 	 * 3) If available_mask is empty, then all CPUs on the
714b094a36fSSebastian Sanchez 	 *    preferred NUMA node are taken, so other NUMA nodes are
715b094a36fSSebastian Sanchez 	 *    used for process assignments using the same method as
716b094a36fSSebastian Sanchez 	 *    the preferred NUMA node.
717b094a36fSSebastian Sanchez 	 */
718b094a36fSSebastian Sanchez 	cpumask_andnot(diff, available_mask, intrs_mask);
719b094a36fSSebastian Sanchez 	if (!cpumask_empty(diff))
720b094a36fSSebastian Sanchez 		cpumask_copy(available_mask, diff);
721b094a36fSSebastian Sanchez 
722b094a36fSSebastian Sanchez 	/* If we don't have CPUs on the preferred node, use other NUMA nodes */
723b094a36fSSebastian Sanchez 	if (cpumask_empty(available_mask)) {
724b094a36fSSebastian Sanchez 		cpumask_andnot(available_mask, hw_thread_mask, &set->used);
725b094a36fSSebastian Sanchez 		/* Excluding preferred NUMA cores */
726b094a36fSSebastian Sanchez 		cpumask_andnot(available_mask, available_mask, node_mask);
727b094a36fSSebastian Sanchez 		hfi1_cdbg(PROC,
728b094a36fSSebastian Sanchez 			  "Preferred NUMA node cores are taken, cores available in other NUMA nodes: %*pbl",
729b094a36fSSebastian Sanchez 			  cpumask_pr_args(available_mask));
730f48ad614SDennis Dalessandro 
731f48ad614SDennis Dalessandro 		/*
732f48ad614SDennis Dalessandro 		 * At first, we don't want to place processes on the same
733f48ad614SDennis Dalessandro 		 * CPUs as interrupt handlers.
734f48ad614SDennis Dalessandro 		 */
735b094a36fSSebastian Sanchez 		cpumask_andnot(diff, available_mask, intrs_mask);
736f48ad614SDennis Dalessandro 		if (!cpumask_empty(diff))
737b094a36fSSebastian Sanchez 			cpumask_copy(available_mask, diff);
738f48ad614SDennis Dalessandro 	}
739b094a36fSSebastian Sanchez 	hfi1_cdbg(PROC, "Possible CPUs for process: %*pbl",
740b094a36fSSebastian Sanchez 		  cpumask_pr_args(available_mask));
741f48ad614SDennis Dalessandro 
742b094a36fSSebastian Sanchez 	cpu = cpumask_first(available_mask);
743f48ad614SDennis Dalessandro 	if (cpu >= nr_cpu_ids) /* empty */
744f48ad614SDennis Dalessandro 		cpu = -1;
745f48ad614SDennis Dalessandro 	else
746f48ad614SDennis Dalessandro 		cpumask_set_cpu(cpu, &set->used);
747584d9577STadeusz Struk 
748584d9577STadeusz Struk 	mutex_unlock(&affinity->lock);
749b094a36fSSebastian Sanchez 	hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu);
750f48ad614SDennis Dalessandro 
751b094a36fSSebastian Sanchez 	free_cpumask_var(intrs_mask);
752b094a36fSSebastian Sanchez free_available_mask:
753b094a36fSSebastian Sanchez 	free_cpumask_var(available_mask);
754b094a36fSSebastian Sanchez free_hw_thread_mask:
755b094a36fSSebastian Sanchez 	free_cpumask_var(hw_thread_mask);
756f48ad614SDennis Dalessandro free_diff:
757f48ad614SDennis Dalessandro 	free_cpumask_var(diff);
758f48ad614SDennis Dalessandro done:
759f48ad614SDennis Dalessandro 	return cpu;
760f48ad614SDennis Dalessandro }
761f48ad614SDennis Dalessandro 
762b094a36fSSebastian Sanchez void hfi1_put_proc_affinity(int cpu)
763f48ad614SDennis Dalessandro {
764b094a36fSSebastian Sanchez 	struct hfi1_affinity_node_list *affinity = &node_affinity;
765b094a36fSSebastian Sanchez 	struct cpu_mask_set *set = &affinity->proc;
766f48ad614SDennis Dalessandro 
767f48ad614SDennis Dalessandro 	if (cpu < 0)
768f48ad614SDennis Dalessandro 		return;
769584d9577STadeusz Struk 
770584d9577STadeusz Struk 	mutex_lock(&affinity->lock);
771f48ad614SDennis Dalessandro 	cpumask_clear_cpu(cpu, &set->used);
772b094a36fSSebastian Sanchez 	hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu);
773f48ad614SDennis Dalessandro 	if (cpumask_empty(&set->used) && set->gen) {
774f48ad614SDennis Dalessandro 		set->gen--;
775f48ad614SDennis Dalessandro 		cpumask_copy(&set->used, &set->mask);
776f48ad614SDennis Dalessandro 	}
777584d9577STadeusz Struk 	mutex_unlock(&affinity->lock);
778f48ad614SDennis Dalessandro }
779