xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c (revision 8e621c9a337555c914cf1664605edfaa6f839774)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <linux/pci.h>
5 #include <linux/interrupt.h>
6 #include <linux/notifier.h>
7 #include <linux/mlx5/driver.h>
8 #include <linux/mlx5/vport.h>
9 #include "mlx5_core.h"
10 #include "mlx5_irq.h"
11 #include "pci_irq.h"
12 #include "lib/sf.h"
13 #include "lib/eq.h"
14 #ifdef CONFIG_RFS_ACCEL
15 #include <linux/cpu_rmap.h>
16 #endif
17 
18 #define MLX5_SFS_PER_CTRL_IRQ 64
19 #define MLX5_MAX_MSIX_PER_SF 256
20 #define MLX5_IRQ_CTRL_SF_MAX 8
21 /* min num of vectors for SFs to be enabled */
22 #define MLX5_IRQ_VEC_COMP_BASE_SF 2
23 #define MLX5_IRQ_VEC_COMP_BASE 1
24 
25 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
26 #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
27 #define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
28 #define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
29 
30 struct mlx5_irq {
31 	struct atomic_notifier_head nh;
32 	cpumask_var_t mask;
33 	char name[MLX5_MAX_IRQ_FORMATTED_NAME];
34 	struct mlx5_irq_pool *pool;
35 	int refcount;
36 	struct msi_map map;
37 	u32 pool_index;
38 };
39 
40 struct mlx5_irq_table {
41 	struct mlx5_irq_pool *pcif_pool;
42 	struct mlx5_irq_pool *sf_ctrl_pool;
43 	struct mlx5_irq_pool *sf_comp_pool;
44 };
45 
mlx5_core_func_to_vport(const struct mlx5_core_dev * dev,int func,bool ec_vf_func)46 static int mlx5_core_func_to_vport(const struct mlx5_core_dev *dev,
47 				   int func,
48 				   bool ec_vf_func)
49 {
50 	if (!ec_vf_func)
51 		return func;
52 	return mlx5_core_ec_vf_vport_base(dev) + func - 1;
53 }
54 
55 /**
56  * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
57  *                                   to be assigned to each VF.
58  * @dev: PF to work on
59  * @num_vfs: Number of enabled VFs
60  */
mlx5_get_default_msix_vec_count(struct mlx5_core_dev * dev,int num_vfs)61 int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
62 {
63 	int num_vf_msix, min_msix, max_msix;
64 
65 	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
66 	if (!num_vf_msix)
67 		return 0;
68 
69 	min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
70 	max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
71 
72 	/* Limit maximum number of MSI-X vectors so the default configuration
73 	 * has some available in the pool. This will allow the user to increase
74 	 * the number of vectors in a VF without having to first size-down other
75 	 * VFs.
76 	 */
77 	return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix);
78 }
79 
80 /**
81  * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF
82  * @dev: PF to work on
83  * @function_id: Internal PCI VF function IDd
84  * @msix_vec_count: Number of MSI-X vectors to set
85  */
mlx5_set_msix_vec_count(struct mlx5_core_dev * dev,int function_id,int msix_vec_count)86 int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
87 			    int msix_vec_count)
88 {
89 	int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
90 	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
91 	void *hca_cap = NULL, *query_cap = NULL, *cap;
92 	int num_vf_msix, min_msix, max_msix;
93 	bool ec_vf_function;
94 	int vport;
95 	int ret;
96 
97 	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
98 	if (!num_vf_msix)
99 		return 0;
100 
101 	if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev))
102 		return -EOPNOTSUPP;
103 
104 	min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
105 	max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
106 
107 	if (msix_vec_count < min_msix)
108 		return -EINVAL;
109 
110 	if (msix_vec_count > max_msix)
111 		return -EOVERFLOW;
112 
113 	query_cap = kvzalloc(query_sz, GFP_KERNEL);
114 	hca_cap = kvzalloc(set_sz, GFP_KERNEL);
115 	if (!hca_cap || !query_cap) {
116 		ret = -ENOMEM;
117 		goto out;
118 	}
119 
120 	ec_vf_function = mlx5_core_ec_sriov_enabled(dev);
121 	vport = mlx5_core_func_to_vport(dev, function_id, ec_vf_function);
122 	ret = mlx5_vport_get_other_func_general_cap(dev, vport, query_cap);
123 	if (ret)
124 		goto out;
125 
126 	cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
127 	memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
128 	       MLX5_UN_SZ_BYTES(hca_cap_union));
129 	MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
130 
131 	MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
132 	MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
133 	MLX5_SET(set_hca_cap_in, hca_cap, ec_vf_function, ec_vf_function);
134 	MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id);
135 
136 	MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
137 		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
138 	ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
139 out:
140 	kvfree(hca_cap);
141 	kvfree(query_cap);
142 	return ret;
143 }
144 
145 /* mlx5_system_free_irq - Free an IRQ
146  * @irq: IRQ to free
147  *
148  * Free the IRQ and other resources such as rmap from the system.
149  * BUT doesn't free or remove reference from mlx5.
150  * This function is very important for the shutdown flow, where we need to
151  * cleanup system resources but keep mlx5 objects alive,
152  * see mlx5_irq_table_free_irqs().
153  */
mlx5_system_free_irq(struct mlx5_irq * irq)154 static void mlx5_system_free_irq(struct mlx5_irq *irq)
155 {
156 	struct mlx5_irq_pool *pool = irq->pool;
157 #ifdef CONFIG_RFS_ACCEL
158 	struct cpu_rmap *rmap;
159 #endif
160 
161 	/* free_irq requires that affinity_hint and rmap will be cleared before
162 	 * calling it. To satisfy this requirement, we call
163 	 * irq_cpu_rmap_remove() to remove the notifier
164 	 */
165 	irq_update_affinity_hint(irq->map.virq, NULL);
166 #ifdef CONFIG_RFS_ACCEL
167 	rmap = mlx5_eq_table_get_rmap(pool->dev);
168 	if (rmap)
169 		irq_cpu_rmap_remove(rmap, irq->map.virq);
170 #endif
171 
172 	free_irq(irq->map.virq, &irq->nh);
173 	if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev))
174 		pci_msix_free_irq(pool->dev->pdev, irq->map);
175 }
176 
irq_release(struct mlx5_irq * irq)177 static void irq_release(struct mlx5_irq *irq)
178 {
179 	struct mlx5_irq_pool *pool = irq->pool;
180 
181 	xa_erase(&pool->irqs, irq->pool_index);
182 	mlx5_system_free_irq(irq);
183 	free_cpumask_var(irq->mask);
184 	kfree(irq);
185 }
186 
mlx5_irq_put(struct mlx5_irq * irq)187 int mlx5_irq_put(struct mlx5_irq *irq)
188 {
189 	struct mlx5_irq_pool *pool = irq->pool;
190 	int ret = 0;
191 
192 	mutex_lock(&pool->lock);
193 	irq->refcount--;
194 	if (!irq->refcount) {
195 		irq_release(irq);
196 		ret = 1;
197 	}
198 	mutex_unlock(&pool->lock);
199 	return ret;
200 }
201 
mlx5_irq_read_locked(struct mlx5_irq * irq)202 int mlx5_irq_read_locked(struct mlx5_irq *irq)
203 {
204 	lockdep_assert_held(&irq->pool->lock);
205 	return irq->refcount;
206 }
207 
mlx5_irq_get_locked(struct mlx5_irq * irq)208 int mlx5_irq_get_locked(struct mlx5_irq *irq)
209 {
210 	lockdep_assert_held(&irq->pool->lock);
211 	if (WARN_ON_ONCE(!irq->refcount))
212 		return 0;
213 	irq->refcount++;
214 	return 1;
215 }
216 
irq_get(struct mlx5_irq * irq)217 static int irq_get(struct mlx5_irq *irq)
218 {
219 	int err;
220 
221 	mutex_lock(&irq->pool->lock);
222 	err = mlx5_irq_get_locked(irq);
223 	mutex_unlock(&irq->pool->lock);
224 	return err;
225 }
226 
irq_int_handler(int irq,void * nh)227 static irqreturn_t irq_int_handler(int irq, void *nh)
228 {
229 	atomic_notifier_call_chain(nh, 0, NULL);
230 	return IRQ_HANDLED;
231 }
232 
irq_sf_set_name(struct mlx5_irq_pool * pool,char * name,int vecidx)233 static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
234 {
235 	snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
236 }
237 
irq_set_name(struct mlx5_irq_pool * pool,char * name,int vecidx)238 static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
239 {
240 	if (!pool->xa_num_irqs.max) {
241 		/* in case we only have a single irq for the device */
242 		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx);
243 		return;
244 	}
245 
246 	if (!vecidx) {
247 		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
248 		return;
249 	}
250 
251 	vecidx -= MLX5_IRQ_VEC_COMP_BASE;
252 	snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
253 }
254 
mlx5_irq_alloc(struct mlx5_irq_pool * pool,int i,struct irq_affinity_desc * af_desc,struct cpu_rmap ** rmap)255 struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
256 				struct irq_affinity_desc *af_desc,
257 				struct cpu_rmap **rmap)
258 {
259 	struct mlx5_core_dev *dev = pool->dev;
260 	char name[MLX5_MAX_IRQ_NAME];
261 	struct mlx5_irq *irq;
262 	int err;
263 
264 	irq = kzalloc(sizeof(*irq), GFP_KERNEL);
265 	if (!irq || !zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
266 		kfree(irq);
267 		return ERR_PTR(-ENOMEM);
268 	}
269 
270 	if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) {
271 		/* The vector at index 0 is always statically allocated. If
272 		 * dynamic irq is not supported all vectors are statically
273 		 * allocated. In both cases just get the irq number and set
274 		 * the index.
275 		 */
276 		irq->map.virq = pci_irq_vector(dev->pdev, i);
277 		irq->map.index = i;
278 	} else {
279 		irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc);
280 		if (!irq->map.virq) {
281 			err = irq->map.index;
282 			goto err_alloc_irq;
283 		}
284 	}
285 
286 	if (i && rmap && *rmap) {
287 #ifdef CONFIG_RFS_ACCEL
288 		err = irq_cpu_rmap_add(*rmap, irq->map.virq);
289 		if (err)
290 			goto err_irq_rmap;
291 #endif
292 	}
293 	if (!mlx5_irq_pool_is_sf_pool(pool))
294 		irq_set_name(pool, name, i);
295 	else
296 		irq_sf_set_name(pool, name, i);
297 	ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
298 	snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME,
299 		 MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev));
300 	err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name,
301 			  &irq->nh);
302 	if (err) {
303 		mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
304 		goto err_req_irq;
305 	}
306 
307 	if (af_desc) {
308 		cpumask_copy(irq->mask, &af_desc->mask);
309 		irq_set_affinity_and_hint(irq->map.virq, irq->mask);
310 	}
311 	irq->pool = pool;
312 	irq->refcount = 1;
313 	irq->pool_index = i;
314 	err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL));
315 	if (err) {
316 		mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
317 			      irq->pool_index, err);
318 		goto err_xa;
319 	}
320 	return irq;
321 err_xa:
322 	if (af_desc)
323 		irq_update_affinity_hint(irq->map.virq, NULL);
324 	free_irq(irq->map.virq, &irq->nh);
325 err_req_irq:
326 #ifdef CONFIG_RFS_ACCEL
327 	if (i && rmap && *rmap)
328 		irq_cpu_rmap_remove(*rmap, irq->map.virq);
329 err_irq_rmap:
330 #endif
331 	if (i && pci_msix_can_alloc_dyn(dev->pdev))
332 		pci_msix_free_irq(dev->pdev, irq->map);
333 err_alloc_irq:
334 	free_cpumask_var(irq->mask);
335 	kfree(irq);
336 	return ERR_PTR(err);
337 }
338 
mlx5_irq_attach_nb(struct mlx5_irq * irq,struct notifier_block * nb)339 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
340 {
341 	int ret;
342 
343 	ret = irq_get(irq);
344 	if (!ret)
345 		/* Something very bad happens here, we are enabling EQ
346 		 * on non-existing IRQ.
347 		 */
348 		return -ENOENT;
349 	ret = atomic_notifier_chain_register(&irq->nh, nb);
350 	if (ret)
351 		mlx5_irq_put(irq);
352 	return ret;
353 }
354 
mlx5_irq_detach_nb(struct mlx5_irq * irq,struct notifier_block * nb)355 int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
356 {
357 	int err = 0;
358 
359 	err = atomic_notifier_chain_unregister(&irq->nh, nb);
360 	mlx5_irq_put(irq);
361 	return err;
362 }
363 
mlx5_irq_get_affinity_mask(struct mlx5_irq * irq)364 struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
365 {
366 	return irq->mask;
367 }
368 
mlx5_irq_get_irq(const struct mlx5_irq * irq)369 int mlx5_irq_get_irq(const struct mlx5_irq *irq)
370 {
371 	return irq->map.virq;
372 }
373 
mlx5_irq_get_index(struct mlx5_irq * irq)374 int mlx5_irq_get_index(struct mlx5_irq *irq)
375 {
376 	return irq->map.index;
377 }
378 
mlx5_irq_get_pool(struct mlx5_irq * irq)379 struct mlx5_irq_pool *mlx5_irq_get_pool(struct mlx5_irq *irq)
380 {
381 	return irq->pool;
382 }
383 
384 /* irq_pool API */
385 
386 /* requesting an irq from a given pool according to given index */
387 static struct mlx5_irq *
irq_pool_request_vector(struct mlx5_irq_pool * pool,int vecidx,struct irq_affinity_desc * af_desc,struct cpu_rmap ** rmap)388 irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
389 			struct irq_affinity_desc *af_desc,
390 			struct cpu_rmap **rmap)
391 {
392 	struct mlx5_irq *irq;
393 
394 	mutex_lock(&pool->lock);
395 	irq = xa_load(&pool->irqs, vecidx);
396 	if (irq) {
397 		mlx5_irq_get_locked(irq);
398 		goto unlock;
399 	}
400 	irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap);
401 unlock:
402 	mutex_unlock(&pool->lock);
403 	return irq;
404 }
405 
sf_ctrl_irq_pool_get(struct mlx5_irq_table * irq_table)406 static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
407 {
408 	return irq_table->sf_ctrl_pool;
409 }
410 
411 static struct mlx5_irq_pool *
sf_comp_irq_pool_get(struct mlx5_irq_table * irq_table)412 sf_comp_irq_pool_get(struct mlx5_irq_table *irq_table)
413 {
414 	return irq_table->sf_comp_pool;
415 }
416 
417 struct mlx5_irq_pool *
mlx5_irq_table_get_comp_irq_pool(struct mlx5_core_dev * dev)418 mlx5_irq_table_get_comp_irq_pool(struct mlx5_core_dev *dev)
419 {
420 	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
421 	struct mlx5_irq_pool *pool = NULL;
422 
423 	if (mlx5_core_is_sf(dev))
424 		pool = sf_comp_irq_pool_get(irq_table);
425 
426 	/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
427 	 * the PF IRQs pool in case the SF pool doesn't exist.
428 	 */
429 	return pool ? pool : irq_table->pcif_pool;
430 }
431 
ctrl_irq_pool_get(struct mlx5_core_dev * dev)432 static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
433 {
434 	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
435 	struct mlx5_irq_pool *pool = NULL;
436 
437 	if (mlx5_core_is_sf(dev))
438 		pool = sf_ctrl_irq_pool_get(irq_table);
439 
440 	/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
441 	 * the PF IRQs pool in case the SF pool doesn't exist.
442 	 */
443 	return pool ? pool : irq_table->pcif_pool;
444 }
445 
_mlx5_irq_release(struct mlx5_irq * irq)446 static void _mlx5_irq_release(struct mlx5_irq *irq)
447 {
448 	synchronize_irq(irq->map.virq);
449 	mlx5_irq_put(irq);
450 }
451 
452 /**
453  * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
454  * @dev: mlx5 device that releasing the IRQ.
455  * @ctrl_irq: ctrl IRQ to be released.
456  */
mlx5_ctrl_irq_release(struct mlx5_core_dev * dev,struct mlx5_irq * ctrl_irq)457 void mlx5_ctrl_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *ctrl_irq)
458 {
459 	mlx5_irq_affinity_irq_release(dev, ctrl_irq);
460 }
461 
462 /**
463  * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
464  * @dev: mlx5 device that requesting the IRQ.
465  *
466  * This function returns a pointer to IRQ, or ERR_PTR in case of error.
467  */
mlx5_ctrl_irq_request(struct mlx5_core_dev * dev)468 struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
469 {
470 	struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
471 	struct irq_affinity_desc *af_desc;
472 	struct mlx5_irq *irq;
473 
474 	af_desc = kvzalloc(sizeof(*af_desc), GFP_KERNEL);
475 	if (!af_desc)
476 		return ERR_PTR(-ENOMEM);
477 
478 	cpumask_copy(&af_desc->mask, cpu_online_mask);
479 	af_desc->is_managed = false;
480 	if (!mlx5_irq_pool_is_sf_pool(pool)) {
481 		/* In case we are allocating a control IRQ from a pci device's pool.
482 		 * This can happen also for a SF if the SFs pool is empty.
483 		 */
484 		if (!pool->xa_num_irqs.max) {
485 			cpumask_clear(&af_desc->mask);
486 			/* In case we only have a single IRQ for PF/VF */
487 			cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc->mask);
488 		}
489 		/* Allocate the IRQ in index 0. The vector was already allocated */
490 		irq = irq_pool_request_vector(pool, 0, af_desc, NULL);
491 	} else {
492 		irq = mlx5_irq_affinity_request(dev, pool, af_desc);
493 	}
494 
495 	kvfree(af_desc);
496 
497 	return irq;
498 }
499 
500 /**
501  * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
502  * @dev: mlx5 device that requesting the IRQ.
503  * @vecidx: vector index of the IRQ. This argument is ignore if affinity is
504  * provided.
505  * @af_desc: affinity descriptor for this IRQ.
506  * @rmap: pointer to reverse map pointer for completion interrupts
507  *
508  * This function returns a pointer to IRQ, or ERR_PTR in case of error.
509  */
mlx5_irq_request(struct mlx5_core_dev * dev,u16 vecidx,struct irq_affinity_desc * af_desc,struct cpu_rmap ** rmap)510 struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
511 				  struct irq_affinity_desc *af_desc,
512 				  struct cpu_rmap **rmap)
513 {
514 	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
515 	struct mlx5_irq_pool *pool;
516 	struct mlx5_irq *irq;
517 
518 	pool = irq_table->pcif_pool;
519 	irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap);
520 	if (IS_ERR(irq))
521 		return irq;
522 	mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
523 		      irq->map.virq, cpumask_pr_args(&af_desc->mask),
524 		      irq->refcount / MLX5_EQ_REFS_PER_IRQ);
525 	return irq;
526 }
527 
528 /**
529  * mlx5_irq_release_vector - release one IRQ back to the system.
530  * @irq: the irq to release.
531  */
mlx5_irq_release_vector(struct mlx5_irq * irq)532 void mlx5_irq_release_vector(struct mlx5_irq *irq)
533 {
534 	_mlx5_irq_release(irq);
535 }
536 
537 /**
538  * mlx5_irq_request_vector - request one IRQ for mlx5 device.
539  * @dev: mlx5 device that is requesting the IRQ.
540  * @cpu: CPU to bind the IRQ to.
541  * @vecidx: vector index to request an IRQ for.
542  * @rmap: pointer to reverse map pointer for completion interrupts
543  *
544  * Each IRQ is bound to at most 1 CPU.
545  * This function is requests one IRQ, for the given @vecidx.
546  *
547  * This function returns a pointer to the irq on success, or an error pointer
548  * in case of an error.
549  */
mlx5_irq_request_vector(struct mlx5_core_dev * dev,u16 cpu,u16 vecidx,struct cpu_rmap ** rmap)550 struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu,
551 					 u16 vecidx, struct cpu_rmap **rmap)
552 {
553 	struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
554 	struct mlx5_irq_pool *pool = table->pcif_pool;
555 	int offset = MLX5_IRQ_VEC_COMP_BASE;
556 	struct irq_affinity_desc *af_desc;
557 	struct mlx5_irq *irq;
558 
559 	af_desc = kvzalloc(sizeof(*af_desc), GFP_KERNEL);
560 	if (!af_desc)
561 		return ERR_PTR(-ENOMEM);
562 
563 	if (!pool->xa_num_irqs.max)
564 		offset = 0;
565 
566 	af_desc->is_managed = false;
567 	cpumask_clear(&af_desc->mask);
568 	cpumask_set_cpu(cpu, &af_desc->mask);
569 
570 	irq = mlx5_irq_request(dev, vecidx + offset, af_desc, rmap);
571 
572 	kvfree(af_desc);
573 
574 	return irq;
575 }
576 
577 static struct mlx5_irq_pool *
irq_pool_alloc(struct mlx5_core_dev * dev,int start,int size,char * name,u32 min_threshold,u32 max_threshold)578 irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
579 	       u32 min_threshold, u32 max_threshold)
580 {
581 	struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
582 
583 	if (!pool)
584 		return ERR_PTR(-ENOMEM);
585 	pool->dev = dev;
586 	mutex_init(&pool->lock);
587 	xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
588 	pool->xa_num_irqs.min = start;
589 	pool->xa_num_irqs.max = start + size - 1;
590 	if (name)
591 		snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
592 			 "%s", name);
593 	pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
594 	pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
595 	mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
596 		      name ? name : "mlx5_pcif_pool", size, start);
597 	return pool;
598 }
599 
irq_pool_free(struct mlx5_irq_pool * pool)600 static void irq_pool_free(struct mlx5_irq_pool *pool)
601 {
602 	struct mlx5_irq *irq;
603 	unsigned long index;
604 
605 	/* There are cases in which we are destroying the irq_table before
606 	 * freeing all the IRQs, fast teardown for example. Hence, free the irqs
607 	 * which might not have been freed.
608 	 */
609 	xa_for_each(&pool->irqs, index, irq)
610 		irq_release(irq);
611 	xa_destroy(&pool->irqs);
612 	mutex_destroy(&pool->lock);
613 	kfree(pool->irqs_per_cpu);
614 	kvfree(pool);
615 }
616 
irq_pools_init(struct mlx5_core_dev * dev,int sf_vec,int pcif_vec,bool dynamic_vec)617 static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec,
618 			  bool dynamic_vec)
619 {
620 	struct mlx5_irq_table *table = dev->priv.irq_table;
621 	int sf_vec_available = sf_vec;
622 	int num_sf_ctrl;
623 	int err;
624 
625 	/* init pcif_pool */
626 	table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL,
627 					  MLX5_EQ_SHARE_IRQ_MIN_COMP,
628 					  MLX5_EQ_SHARE_IRQ_MAX_COMP);
629 	if (IS_ERR(table->pcif_pool))
630 		return PTR_ERR(table->pcif_pool);
631 	if (!mlx5_sf_max_functions(dev))
632 		return 0;
633 	if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
634 		mlx5_core_dbg(dev, "Not enough IRQs for SFs. SF may run at lower performance\n");
635 		return 0;
636 	}
637 
638 	/* init sf_ctrl_pool */
639 	num_sf_ctrl = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
640 				   MLX5_SFS_PER_CTRL_IRQ);
641 	num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
642 	if (!dynamic_vec && (num_sf_ctrl + 1) > sf_vec_available) {
643 		mlx5_core_dbg(dev,
644 			      "Not enough IRQs for SFs control and completion pool, required=%d avail=%d\n",
645 			      num_sf_ctrl + 1, sf_vec_available);
646 		return 0;
647 	}
648 
649 	table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl,
650 					     "mlx5_sf_ctrl",
651 					     MLX5_EQ_SHARE_IRQ_MIN_CTRL,
652 					     MLX5_EQ_SHARE_IRQ_MAX_CTRL);
653 	if (IS_ERR(table->sf_ctrl_pool)) {
654 		err = PTR_ERR(table->sf_ctrl_pool);
655 		goto err_pf;
656 	}
657 	sf_vec_available -= num_sf_ctrl;
658 
659 	/* init sf_comp_pool, remaining vectors are for the SF completions */
660 	table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl,
661 					     sf_vec_available, "mlx5_sf_comp",
662 					     MLX5_EQ_SHARE_IRQ_MIN_COMP,
663 					     MLX5_EQ_SHARE_IRQ_MAX_COMP);
664 	if (IS_ERR(table->sf_comp_pool)) {
665 		err = PTR_ERR(table->sf_comp_pool);
666 		goto err_sf_ctrl;
667 	}
668 
669 	table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
670 	if (!table->sf_comp_pool->irqs_per_cpu) {
671 		err = -ENOMEM;
672 		goto err_irqs_per_cpu;
673 	}
674 
675 	return 0;
676 
677 err_irqs_per_cpu:
678 	irq_pool_free(table->sf_comp_pool);
679 err_sf_ctrl:
680 	irq_pool_free(table->sf_ctrl_pool);
681 err_pf:
682 	irq_pool_free(table->pcif_pool);
683 	return err;
684 }
685 
irq_pools_destroy(struct mlx5_irq_table * table)686 static void irq_pools_destroy(struct mlx5_irq_table *table)
687 {
688 	if (table->sf_ctrl_pool) {
689 		irq_pool_free(table->sf_comp_pool);
690 		irq_pool_free(table->sf_ctrl_pool);
691 	}
692 	irq_pool_free(table->pcif_pool);
693 }
694 
mlx5_irq_pool_free_irqs(struct mlx5_irq_pool * pool)695 static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool)
696 {
697 	struct mlx5_irq *irq;
698 	unsigned long index;
699 
700 	xa_for_each(&pool->irqs, index, irq)
701 		mlx5_system_free_irq(irq);
702 
703 }
704 
mlx5_irq_pools_free_irqs(struct mlx5_irq_table * table)705 static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table)
706 {
707 	if (table->sf_ctrl_pool) {
708 		mlx5_irq_pool_free_irqs(table->sf_comp_pool);
709 		mlx5_irq_pool_free_irqs(table->sf_ctrl_pool);
710 	}
711 	mlx5_irq_pool_free_irqs(table->pcif_pool);
712 }
713 
714 /* irq_table API */
715 
mlx5_irq_table_init(struct mlx5_core_dev * dev)716 int mlx5_irq_table_init(struct mlx5_core_dev *dev)
717 {
718 	struct mlx5_irq_table *irq_table;
719 
720 	if (mlx5_core_is_sf(dev))
721 		return 0;
722 
723 	irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL,
724 				  dev->priv.numa_node);
725 	if (!irq_table)
726 		return -ENOMEM;
727 
728 	dev->priv.irq_table = irq_table;
729 	return 0;
730 }
731 
mlx5_irq_table_cleanup(struct mlx5_core_dev * dev)732 void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
733 {
734 	if (mlx5_core_is_sf(dev))
735 		return;
736 
737 	kvfree(dev->priv.irq_table);
738 }
739 
mlx5_irq_table_get_num_comp(struct mlx5_irq_table * table)740 int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
741 {
742 	if (!table->pcif_pool->xa_num_irqs.max)
743 		return 1;
744 	return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min;
745 }
746 
mlx5_irq_table_create(struct mlx5_core_dev * dev)747 int mlx5_irq_table_create(struct mlx5_core_dev *dev)
748 {
749 	int num_eqs = mlx5_max_eq_cap_get(dev);
750 	bool dynamic_vec;
751 	int total_vec;
752 	int pcif_vec;
753 	int req_vec;
754 	int err;
755 	int n;
756 
757 	if (mlx5_core_is_sf(dev))
758 		return 0;
759 
760 	/* PCI PF vectors usage is limited by online cpus, device EQs and
761 	 * PCI MSI-X capability.
762 	 */
763 	pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
764 	pcif_vec = min_t(int, pcif_vec, num_eqs);
765 	pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev));
766 
767 	total_vec = pcif_vec;
768 	if (mlx5_sf_max_functions(dev))
769 		total_vec += MLX5_MAX_MSIX_PER_SF * mlx5_sf_max_functions(dev);
770 	total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev));
771 
772 	req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec;
773 	n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX);
774 	if (n < 0)
775 		return n;
776 
777 	/* Further limit vectors of the pools based on platform for non dynamic case */
778 	dynamic_vec = pci_msix_can_alloc_dyn(dev->pdev);
779 	if (!dynamic_vec) {
780 		pcif_vec = min_t(int, n, pcif_vec);
781 		total_vec = min_t(int, n, total_vec);
782 	}
783 
784 	err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec, dynamic_vec);
785 	if (err)
786 		pci_free_irq_vectors(dev->pdev);
787 
788 	return err;
789 }
790 
mlx5_irq_table_destroy(struct mlx5_core_dev * dev)791 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
792 {
793 	struct mlx5_irq_table *table = dev->priv.irq_table;
794 
795 	if (mlx5_core_is_sf(dev))
796 		return;
797 
798 	/* There are cases where IRQs still will be in used when we reaching
799 	 * to here. Hence, making sure all the irqs are released.
800 	 */
801 	irq_pools_destroy(table);
802 	pci_free_irq_vectors(dev->pdev);
803 }
804 
mlx5_irq_table_free_irqs(struct mlx5_core_dev * dev)805 void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev)
806 {
807 	struct mlx5_irq_table *table = dev->priv.irq_table;
808 
809 	if (mlx5_core_is_sf(dev))
810 		return;
811 
812 	mlx5_irq_pools_free_irqs(table);
813 	pci_free_irq_vectors(dev->pdev);
814 }
815 
mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table * table)816 int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
817 {
818 	if (table->sf_comp_pool)
819 		return min_t(int, num_online_cpus(),
820 			     table->sf_comp_pool->xa_num_irqs.max -
821 			     table->sf_comp_pool->xa_num_irqs.min + 1);
822 	else
823 		return mlx5_irq_table_get_num_comp(table);
824 }
825 
mlx5_irq_table_get(struct mlx5_core_dev * dev)826 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
827 {
828 #ifdef CONFIG_MLX5_SF
829 	if (mlx5_core_is_sf(dev))
830 		return dev->priv.parent_mdev->priv.irq_table;
831 #endif
832 	return dev->priv.irq_table;
833 }
834