xref: /linux/drivers/net/ethernet/mellanox/mlxsw/pci.c (revision 54fd6bd42e7bd351802ff1d193a2e33e4bfb1836)
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
3 
4 #include <linux/kernel.h>
5 #include <linux/module.h>
6 #include <linux/export.h>
7 #include <linux/err.h>
8 #include <linux/device.h>
9 #include <linux/pci.h>
10 #include <linux/interrupt.h>
11 #include <linux/types.h>
12 #include <linux/skbuff.h>
13 #include <linux/if_vlan.h>
14 #include <linux/log2.h>
15 #include <linux/string.h>
16 #include <net/page_pool/helpers.h>
17 
18 #include "pci_hw.h"
19 #include "pci.h"
20 #include "core.h"
21 #include "cmd.h"
22 #include "port.h"
23 #include "resources.h"
24 #include "txheader.h"
25 
26 #define mlxsw_pci_write32(mlxsw_pci, reg, val) \
27 	iowrite32be(val, (mlxsw_pci)->hw_addr + (MLXSW_PCI_ ## reg))
28 #define mlxsw_pci_read32(mlxsw_pci, reg) \
29 	ioread32be((mlxsw_pci)->hw_addr + (MLXSW_PCI_ ## reg))
30 
31 enum mlxsw_pci_queue_type {
32 	MLXSW_PCI_QUEUE_TYPE_SDQ,
33 	MLXSW_PCI_QUEUE_TYPE_RDQ,
34 	MLXSW_PCI_QUEUE_TYPE_CQ,
35 	MLXSW_PCI_QUEUE_TYPE_EQ,
36 };
37 
38 #define MLXSW_PCI_QUEUE_TYPE_COUNT	4
39 
40 enum mlxsw_pci_cq_type {
41 	MLXSW_PCI_CQ_SDQ,
42 	MLXSW_PCI_CQ_RDQ,
43 };
44 
45 static const u16 mlxsw_pci_doorbell_type_offset[] = {
46 	MLXSW_PCI_DOORBELL_SDQ_OFFSET,	/* for type MLXSW_PCI_QUEUE_TYPE_SDQ */
47 	MLXSW_PCI_DOORBELL_RDQ_OFFSET,	/* for type MLXSW_PCI_QUEUE_TYPE_RDQ */
48 	MLXSW_PCI_DOORBELL_CQ_OFFSET,	/* for type MLXSW_PCI_QUEUE_TYPE_CQ */
49 	MLXSW_PCI_DOORBELL_EQ_OFFSET,	/* for type MLXSW_PCI_QUEUE_TYPE_EQ */
50 };
51 
52 static const u16 mlxsw_pci_doorbell_arm_type_offset[] = {
53 	0, /* unused */
54 	0, /* unused */
55 	MLXSW_PCI_DOORBELL_ARM_CQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_CQ */
56 	MLXSW_PCI_DOORBELL_ARM_EQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_EQ */
57 };
58 
59 struct mlxsw_pci_mem_item {
60 	char *buf;
61 	dma_addr_t mapaddr;
62 	size_t size;
63 };
64 
65 struct mlxsw_pci_queue_elem_info {
66 	struct page *pages[MLXSW_PCI_WQE_SG_ENTRIES];
67 	char *elem; /* pointer to actual dma mapped element mem chunk */
68 	struct {
69 		struct sk_buff *skb;
70 	} sdq;
71 };
72 
73 struct mlxsw_pci_queue {
74 	spinlock_t lock; /* for queue accesses */
75 	struct mlxsw_pci_mem_item mem_item;
76 	struct mlxsw_pci_queue_elem_info *elem_info;
77 	u16 producer_counter;
78 	u16 consumer_counter;
79 	u16 count; /* number of elements in queue */
80 	u8 num; /* queue number */
81 	u8 elem_size; /* size of one element */
82 	enum mlxsw_pci_queue_type type;
83 	struct mlxsw_pci *pci;
84 	union {
85 		struct {
86 			enum mlxsw_pci_cqe_v v;
87 			struct mlxsw_pci_queue *dq;
88 			struct napi_struct napi;
89 			struct page_pool *page_pool;
90 		} cq;
91 		struct {
92 			struct tasklet_struct tasklet;
93 		} eq;
94 		struct {
95 			struct mlxsw_pci_queue *cq;
96 		} rdq;
97 	} u;
98 };
99 
100 struct mlxsw_pci_queue_type_group {
101 	struct mlxsw_pci_queue *q;
102 	u8 count; /* number of queues in group */
103 };
104 
105 struct mlxsw_pci {
106 	struct pci_dev *pdev;
107 	u8 __iomem *hw_addr;
108 	u64 free_running_clock_offset;
109 	u64 utc_sec_offset;
110 	u64 utc_nsec_offset;
111 	bool lag_mode_support;
112 	bool cff_support;
113 	enum mlxsw_cmd_mbox_config_profile_lag_mode lag_mode;
114 	enum mlxsw_cmd_mbox_config_profile_flood_mode flood_mode;
115 	u8 num_sg_entries; /* Number of scatter/gather entries for packets. */
116 	struct mlxsw_pci_queue_type_group queues[MLXSW_PCI_QUEUE_TYPE_COUNT];
117 	u32 doorbell_offset;
118 	struct mlxsw_core *core;
119 	struct {
120 		struct mlxsw_pci_mem_item *items;
121 		unsigned int count;
122 	} fw_area;
123 	struct {
124 		struct mlxsw_pci_mem_item out_mbox;
125 		struct mlxsw_pci_mem_item in_mbox;
126 		struct mutex lock; /* Lock access to command registers */
127 		struct {
128 			u8 status;
129 			u64 out_param;
130 		} comp;
131 	} cmd;
132 	struct mlxsw_bus_info bus_info;
133 	const struct pci_device_id *id;
134 	enum mlxsw_pci_cqe_v max_cqe_ver; /* Maximal supported CQE version */
135 	u8 num_cqs; /* Number of CQs */
136 	u8 num_sdqs; /* Number of SDQs */
137 	bool skip_reset;
138 	struct net_device *napi_dev_tx;
139 	struct net_device *napi_dev_rx;
140 };
141 
142 static int mlxsw_pci_napi_devs_init(struct mlxsw_pci *mlxsw_pci)
143 {
144 	int err;
145 
146 	mlxsw_pci->napi_dev_tx = alloc_netdev_dummy(0);
147 	if (!mlxsw_pci->napi_dev_tx)
148 		return -ENOMEM;
149 	strscpy(mlxsw_pci->napi_dev_tx->name, "mlxsw_tx",
150 		sizeof(mlxsw_pci->napi_dev_tx->name));
151 
152 	mlxsw_pci->napi_dev_rx = alloc_netdev_dummy(0);
153 	if (!mlxsw_pci->napi_dev_rx) {
154 		err = -ENOMEM;
155 		goto err_alloc_rx;
156 	}
157 	strscpy(mlxsw_pci->napi_dev_rx->name, "mlxsw_rx",
158 		sizeof(mlxsw_pci->napi_dev_rx->name));
159 	netif_threaded_enable(mlxsw_pci->napi_dev_rx);
160 
161 	return 0;
162 
163 err_alloc_rx:
164 	free_netdev(mlxsw_pci->napi_dev_tx);
165 	return err;
166 }
167 
168 static void mlxsw_pci_napi_devs_fini(struct mlxsw_pci *mlxsw_pci)
169 {
170 	free_netdev(mlxsw_pci->napi_dev_rx);
171 	free_netdev(mlxsw_pci->napi_dev_tx);
172 }
173 
174 static char *__mlxsw_pci_queue_elem_get(struct mlxsw_pci_queue *q,
175 					size_t elem_size, int elem_index)
176 {
177 	return q->mem_item.buf + (elem_size * elem_index);
178 }
179 
180 static struct mlxsw_pci_queue_elem_info *
181 mlxsw_pci_queue_elem_info_get(struct mlxsw_pci_queue *q, int elem_index)
182 {
183 	return &q->elem_info[elem_index];
184 }
185 
186 static struct mlxsw_pci_queue_elem_info *
187 mlxsw_pci_queue_elem_info_producer_get(struct mlxsw_pci_queue *q)
188 {
189 	int index = q->producer_counter & (q->count - 1);
190 
191 	if ((u16) (q->producer_counter - q->consumer_counter) == q->count)
192 		return NULL;
193 	return mlxsw_pci_queue_elem_info_get(q, index);
194 }
195 
196 static struct mlxsw_pci_queue_elem_info *
197 mlxsw_pci_queue_elem_info_consumer_get(struct mlxsw_pci_queue *q)
198 {
199 	int index = q->consumer_counter & (q->count - 1);
200 
201 	return mlxsw_pci_queue_elem_info_get(q, index);
202 }
203 
204 static char *mlxsw_pci_queue_elem_get(struct mlxsw_pci_queue *q, int elem_index)
205 {
206 	return mlxsw_pci_queue_elem_info_get(q, elem_index)->elem;
207 }
208 
209 static bool mlxsw_pci_elem_hw_owned(struct mlxsw_pci_queue *q, bool owner_bit)
210 {
211 	return owner_bit != !!(q->consumer_counter & q->count);
212 }
213 
214 static struct mlxsw_pci_queue_type_group *
215 mlxsw_pci_queue_type_group_get(struct mlxsw_pci *mlxsw_pci,
216 			       enum mlxsw_pci_queue_type q_type)
217 {
218 	return &mlxsw_pci->queues[q_type];
219 }
220 
221 static struct mlxsw_pci_queue *
222 __mlxsw_pci_queue_get(struct mlxsw_pci *mlxsw_pci,
223 		      enum mlxsw_pci_queue_type q_type, u8 q_num)
224 {
225 	return &mlxsw_pci->queues[q_type].q[q_num];
226 }
227 
228 static struct mlxsw_pci_queue *mlxsw_pci_sdq_get(struct mlxsw_pci *mlxsw_pci,
229 						 u8 q_num)
230 {
231 	return __mlxsw_pci_queue_get(mlxsw_pci,
232 				     MLXSW_PCI_QUEUE_TYPE_SDQ, q_num);
233 }
234 
235 static struct mlxsw_pci_queue *mlxsw_pci_cq_get(struct mlxsw_pci *mlxsw_pci,
236 						u8 q_num)
237 {
238 	return __mlxsw_pci_queue_get(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_CQ, q_num);
239 }
240 
241 static struct mlxsw_pci_queue *mlxsw_pci_eq_get(struct mlxsw_pci *mlxsw_pci)
242 {
243 	/* There is only one EQ at index 0. */
244 	return __mlxsw_pci_queue_get(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_EQ, 0);
245 }
246 
247 static void __mlxsw_pci_queue_doorbell_set(struct mlxsw_pci *mlxsw_pci,
248 					   struct mlxsw_pci_queue *q,
249 					   u16 val)
250 {
251 	mlxsw_pci_write32(mlxsw_pci,
252 			  DOORBELL(mlxsw_pci->doorbell_offset,
253 				   mlxsw_pci_doorbell_type_offset[q->type],
254 				   q->num), val);
255 }
256 
257 static void __mlxsw_pci_queue_doorbell_arm_set(struct mlxsw_pci *mlxsw_pci,
258 					       struct mlxsw_pci_queue *q,
259 					       u16 val)
260 {
261 	mlxsw_pci_write32(mlxsw_pci,
262 			  DOORBELL(mlxsw_pci->doorbell_offset,
263 				   mlxsw_pci_doorbell_arm_type_offset[q->type],
264 				   q->num), val);
265 }
266 
267 static void mlxsw_pci_queue_doorbell_producer_ring(struct mlxsw_pci *mlxsw_pci,
268 						   struct mlxsw_pci_queue *q)
269 {
270 	wmb(); /* ensure all writes are done before we ring a bell */
271 	__mlxsw_pci_queue_doorbell_set(mlxsw_pci, q, q->producer_counter);
272 }
273 
274 static void mlxsw_pci_queue_doorbell_consumer_ring(struct mlxsw_pci *mlxsw_pci,
275 						   struct mlxsw_pci_queue *q)
276 {
277 	wmb(); /* ensure all writes are done before we ring a bell */
278 	__mlxsw_pci_queue_doorbell_set(mlxsw_pci, q,
279 				       q->consumer_counter + q->count);
280 }
281 
282 static void
283 mlxsw_pci_queue_doorbell_arm_consumer_ring(struct mlxsw_pci *mlxsw_pci,
284 					   struct mlxsw_pci_queue *q)
285 {
286 	wmb(); /* ensure all writes are done before we ring a bell */
287 	__mlxsw_pci_queue_doorbell_arm_set(mlxsw_pci, q, q->consumer_counter);
288 }
289 
290 static dma_addr_t __mlxsw_pci_queue_page_get(struct mlxsw_pci_queue *q,
291 					     int page_index)
292 {
293 	return q->mem_item.mapaddr + MLXSW_PCI_PAGE_SIZE * page_index;
294 }
295 
296 static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
297 			      struct mlxsw_pci_queue *q)
298 {
299 	struct mlxsw_pci_queue *cq;
300 	int tclass;
301 	u8 cq_num;
302 	int lp;
303 	int i;
304 	int err;
305 
306 	q->producer_counter = 0;
307 	q->consumer_counter = 0;
308 	tclass = q->num == MLXSW_PCI_SDQ_EMAD_INDEX ? MLXSW_PCI_SDQ_EMAD_TC :
309 						      MLXSW_PCI_SDQ_CTL_TC;
310 	lp = q->num == MLXSW_PCI_SDQ_EMAD_INDEX ? MLXSW_CMD_MBOX_SW2HW_DQ_SDQ_LP_IGNORE_WQE :
311 						  MLXSW_CMD_MBOX_SW2HW_DQ_SDQ_LP_WQE;
312 
313 	/* Set CQ of same number of this SDQ. */
314 	cq_num = q->num;
315 	mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, cq_num);
316 	mlxsw_cmd_mbox_sw2hw_dq_sdq_lp_set(mbox, lp);
317 	mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, tclass);
318 	mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */
319 	for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) {
320 		dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i);
321 
322 		mlxsw_cmd_mbox_sw2hw_dq_pa_set(mbox, i, mapaddr);
323 	}
324 
325 	err = mlxsw_cmd_sw2hw_sdq(mlxsw_pci->core, mbox, q->num);
326 	if (err)
327 		return err;
328 
329 	cq = mlxsw_pci_cq_get(mlxsw_pci, cq_num);
330 	cq->u.cq.dq = q;
331 	mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
332 	return 0;
333 }
334 
335 static void mlxsw_pci_sdq_fini(struct mlxsw_pci *mlxsw_pci,
336 			       struct mlxsw_pci_queue *q)
337 {
338 	mlxsw_cmd_hw2sw_sdq(mlxsw_pci->core, q->num);
339 }
340 
341 #define MLXSW_PCI_SKB_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN)
342 
343 #define MLXSW_PCI_RX_BUF_SW_OVERHEAD		\
344 		(MLXSW_PCI_SKB_HEADROOM +	\
345 		SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
346 
347 static void
348 mlxsw_pci_wqe_rx_frag_set(struct mlxsw_pci *mlxsw_pci, struct page *page,
349 			  char *wqe, int index, size_t frag_len)
350 {
351 	dma_addr_t mapaddr;
352 
353 	mapaddr = page_pool_get_dma_addr(page);
354 
355 	if (index == 0) {
356 		mapaddr += MLXSW_PCI_SKB_HEADROOM;
357 		frag_len = frag_len - MLXSW_PCI_RX_BUF_SW_OVERHEAD;
358 	}
359 
360 	mlxsw_pci_wqe_address_set(wqe, index, mapaddr);
361 	mlxsw_pci_wqe_byte_count_set(wqe, index, frag_len);
362 }
363 
364 static int mlxsw_pci_wqe_frag_map(struct mlxsw_pci *mlxsw_pci, char *wqe,
365 				  int index, char *frag_data, size_t frag_len,
366 				  int direction)
367 {
368 	struct pci_dev *pdev = mlxsw_pci->pdev;
369 	dma_addr_t mapaddr;
370 
371 	mapaddr = dma_map_single(&pdev->dev, frag_data, frag_len, direction);
372 	if (unlikely(dma_mapping_error(&pdev->dev, mapaddr))) {
373 		dev_err_ratelimited(&pdev->dev, "failed to dma map tx frag\n");
374 		return -EIO;
375 	}
376 	mlxsw_pci_wqe_address_set(wqe, index, mapaddr);
377 	mlxsw_pci_wqe_byte_count_set(wqe, index, frag_len);
378 	return 0;
379 }
380 
381 static void mlxsw_pci_wqe_frag_unmap(struct mlxsw_pci *mlxsw_pci, char *wqe,
382 				     int index, int direction)
383 {
384 	struct pci_dev *pdev = mlxsw_pci->pdev;
385 	size_t frag_len = mlxsw_pci_wqe_byte_count_get(wqe, index);
386 	dma_addr_t mapaddr = mlxsw_pci_wqe_address_get(wqe, index);
387 
388 	if (!frag_len)
389 		return;
390 	dma_unmap_single(&pdev->dev, mapaddr, frag_len, direction);
391 }
392 
393 static struct sk_buff *mlxsw_pci_rdq_build_skb(struct mlxsw_pci_queue *q,
394 					       struct page *pages[],
395 					       u16 byte_count)
396 {
397 	struct mlxsw_pci_queue *cq = q->u.rdq.cq;
398 	unsigned int linear_data_size;
399 	struct page_pool *page_pool;
400 	struct sk_buff *skb;
401 	int page_index = 0;
402 	bool linear_only;
403 	void *data;
404 
405 	linear_only = byte_count + MLXSW_PCI_RX_BUF_SW_OVERHEAD <= PAGE_SIZE;
406 	linear_data_size = linear_only ? byte_count :
407 					 PAGE_SIZE -
408 					 MLXSW_PCI_RX_BUF_SW_OVERHEAD;
409 
410 	page_pool = cq->u.cq.page_pool;
411 	page_pool_dma_sync_for_cpu(page_pool, pages[page_index],
412 				   MLXSW_PCI_SKB_HEADROOM, linear_data_size);
413 
414 	data = page_address(pages[page_index]);
415 	net_prefetch(data);
416 
417 	skb = napi_build_skb(data, PAGE_SIZE);
418 	if (unlikely(!skb))
419 		return ERR_PTR(-ENOMEM);
420 
421 	skb_reserve(skb, MLXSW_PCI_SKB_HEADROOM);
422 	skb_put(skb, linear_data_size);
423 
424 	if (linear_only)
425 		return skb;
426 
427 	byte_count -= linear_data_size;
428 	page_index++;
429 
430 	while (byte_count > 0) {
431 		unsigned int frag_size;
432 		struct page *page;
433 
434 		page = pages[page_index];
435 		frag_size = min(byte_count, PAGE_SIZE);
436 		page_pool_dma_sync_for_cpu(page_pool, page, 0, frag_size);
437 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
438 				page, 0, frag_size, PAGE_SIZE);
439 		byte_count -= frag_size;
440 		page_index++;
441 	}
442 
443 	return skb;
444 }
445 
446 static int mlxsw_pci_rdq_page_alloc(struct mlxsw_pci_queue *q,
447 				    struct mlxsw_pci_queue_elem_info *elem_info,
448 				    int index)
449 {
450 	struct mlxsw_pci_queue *cq = q->u.rdq.cq;
451 	char *wqe = elem_info->elem;
452 	struct page *page;
453 
454 	page = page_pool_dev_alloc_pages(cq->u.cq.page_pool);
455 	if (unlikely(!page))
456 		return -ENOMEM;
457 
458 	mlxsw_pci_wqe_rx_frag_set(q->pci, page, wqe, index, PAGE_SIZE);
459 	elem_info->pages[index] = page;
460 	return 0;
461 }
462 
463 static void mlxsw_pci_rdq_page_free(struct mlxsw_pci_queue *q,
464 				    struct mlxsw_pci_queue_elem_info *elem_info,
465 				    int index)
466 {
467 	struct mlxsw_pci_queue *cq = q->u.rdq.cq;
468 
469 	page_pool_put_page(cq->u.cq.page_pool, elem_info->pages[index], -1,
470 			   false);
471 }
472 
473 static u8 mlxsw_pci_num_sg_entries_get(u16 byte_count)
474 {
475 	return DIV_ROUND_UP(byte_count + MLXSW_PCI_RX_BUF_SW_OVERHEAD,
476 			    PAGE_SIZE);
477 }
478 
479 static int
480 mlxsw_pci_elem_info_pages_ref_store(const struct mlxsw_pci_queue *q,
481 				    const struct mlxsw_pci_queue_elem_info *el,
482 				    u16 byte_count, struct page *pages[],
483 				    u8 *p_num_sg_entries)
484 {
485 	u8 num_sg_entries;
486 	int i;
487 
488 	num_sg_entries = mlxsw_pci_num_sg_entries_get(byte_count);
489 	if (WARN_ON_ONCE(num_sg_entries > q->pci->num_sg_entries))
490 		return -EINVAL;
491 
492 	for (i = 0; i < num_sg_entries; i++)
493 		pages[i] = el->pages[i];
494 
495 	*p_num_sg_entries = num_sg_entries;
496 	return 0;
497 }
498 
499 static int
500 mlxsw_pci_rdq_pages_alloc(struct mlxsw_pci_queue *q,
501 			  struct mlxsw_pci_queue_elem_info *elem_info,
502 			  u8 num_sg_entries)
503 {
504 	struct page *old_pages[MLXSW_PCI_WQE_SG_ENTRIES];
505 	struct mlxsw_pci_queue *cq = q->u.rdq.cq;
506 	int i, err;
507 
508 	for (i = 0; i < num_sg_entries; i++) {
509 		old_pages[i] = elem_info->pages[i];
510 		err = mlxsw_pci_rdq_page_alloc(q, elem_info, i);
511 		if (err) {
512 			dev_err_ratelimited(&q->pci->pdev->dev, "Failed to alloc page\n");
513 			goto err_page_alloc;
514 		}
515 	}
516 
517 	return 0;
518 
519 err_page_alloc:
520 	for (i--; i >= 0; i--)
521 		page_pool_recycle_direct(cq->u.cq.page_pool, old_pages[i]);
522 
523 	return err;
524 }
525 
526 static void
527 mlxsw_pci_rdq_pages_recycle(struct mlxsw_pci_queue *q, struct page *pages[],
528 			    u8 num_sg_entries)
529 {
530 	struct mlxsw_pci_queue *cq = q->u.rdq.cq;
531 	int i;
532 
533 	for (i = 0; i < num_sg_entries; i++)
534 		page_pool_recycle_direct(cq->u.cq.page_pool, pages[i]);
535 }
536 
537 static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
538 			      struct mlxsw_pci_queue *q)
539 {
540 	struct mlxsw_pci_queue_elem_info *elem_info;
541 	u8 sdq_count = mlxsw_pci->num_sdqs;
542 	struct mlxsw_pci_queue *cq;
543 	u8 cq_num;
544 	int i, j;
545 	int err;
546 
547 	q->producer_counter = 0;
548 	q->consumer_counter = 0;
549 
550 	/* Set CQ of same number of this RDQ with base
551 	 * above SDQ count as the lower ones are assigned to SDQs.
552 	 */
553 	cq_num = sdq_count + q->num;
554 	mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, cq_num);
555 	mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */
556 	for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) {
557 		dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i);
558 
559 		mlxsw_cmd_mbox_sw2hw_dq_pa_set(mbox, i, mapaddr);
560 	}
561 
562 	err = mlxsw_cmd_sw2hw_rdq(mlxsw_pci->core, mbox, q->num);
563 	if (err)
564 		return err;
565 
566 	cq = mlxsw_pci_cq_get(mlxsw_pci, cq_num);
567 	cq->u.cq.dq = q;
568 	q->u.rdq.cq = cq;
569 
570 	mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
571 
572 	for (i = 0; i < q->count; i++) {
573 		elem_info = mlxsw_pci_queue_elem_info_producer_get(q);
574 		BUG_ON(!elem_info);
575 
576 		for (j = 0; j < mlxsw_pci->num_sg_entries; j++) {
577 			err = mlxsw_pci_rdq_page_alloc(q, elem_info, j);
578 			if (err)
579 				goto rollback;
580 		}
581 		/* Everything is set up, ring doorbell to pass elem to HW */
582 		q->producer_counter++;
583 		mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
584 	}
585 
586 	return 0;
587 
588 rollback:
589 	for (i--; i >= 0; i--) {
590 		elem_info = mlxsw_pci_queue_elem_info_get(q, i);
591 		for (j--; j >= 0; j--)
592 			mlxsw_pci_rdq_page_free(q, elem_info, j);
593 		j = mlxsw_pci->num_sg_entries;
594 	}
595 	q->u.rdq.cq = NULL;
596 	cq->u.cq.dq = NULL;
597 	mlxsw_cmd_hw2sw_rdq(mlxsw_pci->core, q->num);
598 
599 	return err;
600 }
601 
602 static void mlxsw_pci_rdq_fini(struct mlxsw_pci *mlxsw_pci,
603 			       struct mlxsw_pci_queue *q)
604 {
605 	struct mlxsw_pci_queue_elem_info *elem_info;
606 	int i, j;
607 
608 	mlxsw_cmd_hw2sw_rdq(mlxsw_pci->core, q->num);
609 	for (i = 0; i < q->count; i++) {
610 		elem_info = mlxsw_pci_queue_elem_info_get(q, i);
611 		for (j = 0; j < mlxsw_pci->num_sg_entries; j++)
612 			mlxsw_pci_rdq_page_free(q, elem_info, j);
613 	}
614 }
615 
616 static void mlxsw_pci_cq_pre_init(struct mlxsw_pci *mlxsw_pci,
617 				  struct mlxsw_pci_queue *q)
618 {
619 	q->u.cq.v = mlxsw_pci->max_cqe_ver;
620 
621 	if (q->u.cq.v == MLXSW_PCI_CQE_V2 &&
622 	    q->num < mlxsw_pci->num_sdqs &&
623 	    !mlxsw_core_sdq_supports_cqe_v2(mlxsw_pci->core))
624 		q->u.cq.v = MLXSW_PCI_CQE_V1;
625 }
626 
627 static unsigned int mlxsw_pci_read32_off(struct mlxsw_pci *mlxsw_pci,
628 					 ptrdiff_t off)
629 {
630 	return ioread32be(mlxsw_pci->hw_addr + off);
631 }
632 
633 static void mlxsw_pci_skb_cb_ts_set(struct mlxsw_pci *mlxsw_pci,
634 				    struct sk_buff *skb,
635 				    enum mlxsw_pci_cqe_v cqe_v, char *cqe)
636 {
637 	u8 ts_type;
638 
639 	if (cqe_v != MLXSW_PCI_CQE_V2)
640 		return;
641 
642 	ts_type = mlxsw_pci_cqe2_time_stamp_type_get(cqe);
643 
644 	if (ts_type != MLXSW_PCI_CQE_TIME_STAMP_TYPE_UTC &&
645 	    ts_type != MLXSW_PCI_CQE_TIME_STAMP_TYPE_MIRROR_UTC)
646 		return;
647 
648 	mlxsw_skb_cb(skb)->cqe_ts.sec = mlxsw_pci_cqe2_time_stamp_sec_get(cqe);
649 	mlxsw_skb_cb(skb)->cqe_ts.nsec =
650 		mlxsw_pci_cqe2_time_stamp_nsec_get(cqe);
651 }
652 
653 static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci,
654 				     struct mlxsw_pci_queue *q,
655 				     u16 consumer_counter_limit,
656 				     enum mlxsw_pci_cqe_v cqe_v,
657 				     char *cqe, int budget)
658 {
659 	struct pci_dev *pdev = mlxsw_pci->pdev;
660 	struct mlxsw_pci_queue_elem_info *elem_info;
661 	struct mlxsw_tx_info tx_info;
662 	char *wqe;
663 	struct sk_buff *skb;
664 	int i;
665 
666 	spin_lock(&q->lock);
667 	elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
668 	tx_info = mlxsw_skb_cb(elem_info->sdq.skb)->tx_info;
669 	skb = elem_info->sdq.skb;
670 	wqe = elem_info->elem;
671 	for (i = 0; i < MLXSW_PCI_WQE_SG_ENTRIES; i++)
672 		mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE);
673 
674 	if (unlikely(!tx_info.is_emad &&
675 		     skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
676 		mlxsw_pci_skb_cb_ts_set(mlxsw_pci, skb, cqe_v, cqe);
677 		mlxsw_core_ptp_transmitted(mlxsw_pci->core, skb,
678 					   tx_info.local_port);
679 		skb = NULL;
680 	}
681 
682 	if (skb)
683 		napi_consume_skb(skb, budget);
684 	elem_info->sdq.skb = NULL;
685 
686 	if (q->consumer_counter++ != consumer_counter_limit)
687 		dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in SDQ\n");
688 	spin_unlock(&q->lock);
689 }
690 
691 static void mlxsw_pci_cqe_rdq_md_tx_port_init(struct sk_buff *skb,
692 					      const char *cqe)
693 {
694 	struct mlxsw_skb_cb *cb = mlxsw_skb_cb(skb);
695 
696 	if (mlxsw_pci_cqe2_tx_lag_get(cqe)) {
697 		cb->rx_md_info.tx_port_is_lag = true;
698 		cb->rx_md_info.tx_lag_id = mlxsw_pci_cqe2_tx_lag_id_get(cqe);
699 		cb->rx_md_info.tx_lag_port_index =
700 			mlxsw_pci_cqe2_tx_lag_subport_get(cqe);
701 	} else {
702 		cb->rx_md_info.tx_port_is_lag = false;
703 		cb->rx_md_info.tx_sys_port =
704 			mlxsw_pci_cqe2_tx_system_port_get(cqe);
705 	}
706 
707 	if (cb->rx_md_info.tx_sys_port != MLXSW_PCI_CQE2_TX_PORT_MULTI_PORT &&
708 	    cb->rx_md_info.tx_sys_port != MLXSW_PCI_CQE2_TX_PORT_INVALID)
709 		cb->rx_md_info.tx_port_valid = 1;
710 	else
711 		cb->rx_md_info.tx_port_valid = 0;
712 }
713 
714 static void mlxsw_pci_cqe_rdq_md_init(struct sk_buff *skb, const char *cqe)
715 {
716 	struct mlxsw_skb_cb *cb = mlxsw_skb_cb(skb);
717 
718 	cb->rx_md_info.tx_congestion = mlxsw_pci_cqe2_mirror_cong_get(cqe);
719 	if (cb->rx_md_info.tx_congestion != MLXSW_PCI_CQE2_MIRROR_CONG_INVALID)
720 		cb->rx_md_info.tx_congestion_valid = 1;
721 	else
722 		cb->rx_md_info.tx_congestion_valid = 0;
723 	cb->rx_md_info.tx_congestion <<= MLXSW_PCI_CQE2_MIRROR_CONG_SHIFT;
724 
725 	cb->rx_md_info.latency = mlxsw_pci_cqe2_mirror_latency_get(cqe);
726 	if (cb->rx_md_info.latency != MLXSW_PCI_CQE2_MIRROR_LATENCY_INVALID)
727 		cb->rx_md_info.latency_valid = 1;
728 	else
729 		cb->rx_md_info.latency_valid = 0;
730 
731 	cb->rx_md_info.tx_tc = mlxsw_pci_cqe2_mirror_tclass_get(cqe);
732 	if (cb->rx_md_info.tx_tc != MLXSW_PCI_CQE2_MIRROR_TCLASS_INVALID)
733 		cb->rx_md_info.tx_tc_valid = 1;
734 	else
735 		cb->rx_md_info.tx_tc_valid = 0;
736 
737 	mlxsw_pci_cqe_rdq_md_tx_port_init(skb, cqe);
738 }
739 
740 static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
741 				     struct napi_struct *napi,
742 				     struct mlxsw_pci_queue *q,
743 				     u16 consumer_counter_limit,
744 				     enum mlxsw_pci_cqe_v cqe_v, char *cqe)
745 {
746 	struct pci_dev *pdev = mlxsw_pci->pdev;
747 	struct page *pages[MLXSW_PCI_WQE_SG_ENTRIES];
748 	struct mlxsw_pci_queue_elem_info *elem_info;
749 	struct mlxsw_rx_info rx_info = {};
750 	struct sk_buff *skb;
751 	u8 num_sg_entries;
752 	u16 byte_count;
753 	int err;
754 
755 	elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
756 
757 	if (q->consumer_counter++ != consumer_counter_limit)
758 		dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n");
759 
760 	byte_count = mlxsw_pci_cqe_byte_count_get(cqe);
761 	if (mlxsw_pci_cqe_crc_get(cqe_v, cqe))
762 		byte_count -= ETH_FCS_LEN;
763 
764 	err = mlxsw_pci_elem_info_pages_ref_store(q, elem_info, byte_count,
765 						  pages, &num_sg_entries);
766 	if (err)
767 		goto out;
768 
769 	err = mlxsw_pci_rdq_pages_alloc(q, elem_info, num_sg_entries);
770 	if (err)
771 		goto out;
772 
773 	skb = mlxsw_pci_rdq_build_skb(q, pages, byte_count);
774 	if (IS_ERR(skb)) {
775 		dev_err_ratelimited(&pdev->dev, "Failed to build skb for RDQ\n");
776 		mlxsw_pci_rdq_pages_recycle(q, pages, num_sg_entries);
777 		goto out;
778 	}
779 
780 	skb_mark_for_recycle(skb);
781 
782 	if (mlxsw_pci_cqe_lag_get(cqe_v, cqe)) {
783 		rx_info.is_lag = true;
784 		rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe_v, cqe);
785 		rx_info.lag_port_index =
786 			mlxsw_pci_cqe_lag_subport_get(cqe_v, cqe);
787 	} else {
788 		rx_info.is_lag = false;
789 		rx_info.u.sys_port = mlxsw_pci_cqe_system_port_get(cqe);
790 	}
791 
792 	rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe);
793 
794 	if (rx_info.trap_id == MLXSW_TRAP_ID_DISCARD_INGRESS_ACL ||
795 	    rx_info.trap_id == MLXSW_TRAP_ID_DISCARD_EGRESS_ACL) {
796 		u32 cookie_index = 0;
797 
798 		if (mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2)
799 			cookie_index = mlxsw_pci_cqe2_user_def_val_orig_pkt_len_get(cqe);
800 		mlxsw_skb_cb(skb)->rx_md_info.cookie_index = cookie_index;
801 	} else if (rx_info.trap_id >= MLXSW_TRAP_ID_MIRROR_SESSION0 &&
802 		   rx_info.trap_id <= MLXSW_TRAP_ID_MIRROR_SESSION7 &&
803 		   mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2) {
804 		rx_info.mirror_reason = mlxsw_pci_cqe2_mirror_reason_get(cqe);
805 		mlxsw_pci_cqe_rdq_md_init(skb, cqe);
806 	} else if (rx_info.trap_id == MLXSW_TRAP_ID_PKT_SAMPLE &&
807 		   mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2) {
808 		mlxsw_pci_cqe_rdq_md_tx_port_init(skb, cqe);
809 	}
810 
811 	mlxsw_pci_skb_cb_ts_set(mlxsw_pci, skb, cqe_v, cqe);
812 	mlxsw_skb_cb(skb)->rx_md_info.napi = napi;
813 
814 	mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info);
815 
816 out:
817 	q->producer_counter++;
818 	return;
819 }
820 
821 static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q)
822 {
823 	struct mlxsw_pci_queue_elem_info *elem_info;
824 	char *elem;
825 	bool owner_bit;
826 
827 	elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
828 	elem = elem_info->elem;
829 	owner_bit = mlxsw_pci_cqe_owner_get(q->u.cq.v, elem);
830 	if (mlxsw_pci_elem_hw_owned(q, owner_bit))
831 		return NULL;
832 	q->consumer_counter++;
833 	rmb(); /* make sure we read owned bit before the rest of elem */
834 	return elem;
835 }
836 
837 static bool mlxsw_pci_cq_cqe_to_handle(struct mlxsw_pci_queue *q)
838 {
839 	struct mlxsw_pci_queue_elem_info *elem_info;
840 	bool owner_bit;
841 
842 	elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
843 	owner_bit = mlxsw_pci_cqe_owner_get(q->u.cq.v, elem_info->elem);
844 	return !mlxsw_pci_elem_hw_owned(q, owner_bit);
845 }
846 
847 static int mlxsw_pci_napi_poll_cq_rx(struct napi_struct *napi, int budget)
848 {
849 	struct mlxsw_pci_queue *q = container_of(napi, struct mlxsw_pci_queue,
850 						 u.cq.napi);
851 	struct mlxsw_pci_queue *rdq = q->u.cq.dq;
852 	struct mlxsw_pci *mlxsw_pci = q->pci;
853 	int work_done = 0;
854 	char *cqe;
855 
856 	/* If the budget is 0, Rx processing should be skipped. */
857 	if (unlikely(!budget))
858 		return 0;
859 
860 	while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) {
861 		u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe);
862 		u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe);
863 		u8 dqn = mlxsw_pci_cqe_dqn_get(q->u.cq.v, cqe);
864 
865 		if (unlikely(sendq)) {
866 			WARN_ON_ONCE(1);
867 			continue;
868 		}
869 
870 		if (unlikely(dqn != rdq->num)) {
871 			WARN_ON_ONCE(1);
872 			continue;
873 		}
874 
875 		mlxsw_pci_cqe_rdq_handle(mlxsw_pci, napi, rdq,
876 					 wqe_counter, q->u.cq.v, cqe);
877 
878 		if (++work_done == budget)
879 			break;
880 	}
881 
882 	mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
883 	mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, rdq);
884 
885 	if (work_done < budget)
886 		goto processing_completed;
887 
888 	/* The driver still has outstanding work to do, budget was exhausted.
889 	 * Return exactly budget. In that case, the NAPI instance will be polled
890 	 * again.
891 	 */
892 	if (mlxsw_pci_cq_cqe_to_handle(q))
893 		goto out;
894 
895 	/* The driver processed all the completions and handled exactly
896 	 * 'budget'. Return 'budget - 1' to distinguish from the case that
897 	 * driver still has completions to handle.
898 	 */
899 	if (work_done == budget)
900 		work_done--;
901 
902 processing_completed:
903 	if (napi_complete_done(napi, work_done))
904 		mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
905 out:
906 	return work_done;
907 }
908 
909 static int mlxsw_pci_napi_poll_cq_tx(struct napi_struct *napi, int budget)
910 {
911 	struct mlxsw_pci_queue *q = container_of(napi, struct mlxsw_pci_queue,
912 						 u.cq.napi);
913 	struct mlxsw_pci_queue *sdq = q->u.cq.dq;
914 	struct mlxsw_pci *mlxsw_pci = q->pci;
915 	int work_done = 0;
916 	char *cqe;
917 
918 	while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) {
919 		u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe);
920 		u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe);
921 		u8 dqn = mlxsw_pci_cqe_dqn_get(q->u.cq.v, cqe);
922 		char ncqe[MLXSW_PCI_CQE_SIZE_MAX];
923 
924 		if (unlikely(!sendq)) {
925 			WARN_ON_ONCE(1);
926 			continue;
927 		}
928 
929 		if (unlikely(dqn != sdq->num)) {
930 			WARN_ON_ONCE(1);
931 			continue;
932 		}
933 
934 		memcpy(ncqe, cqe, q->elem_size);
935 		mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
936 
937 		mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq,
938 					 wqe_counter, q->u.cq.v, ncqe, budget);
939 
940 		work_done++;
941 	}
942 
943 	/* If the budget is 0 napi_complete_done() should never be called. */
944 	if (unlikely(!budget))
945 		goto processing_completed;
946 
947 	work_done = min(work_done, budget - 1);
948 	if (unlikely(!napi_complete_done(napi, work_done)))
949 		goto out;
950 
951 processing_completed:
952 	mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
953 out:
954 	return work_done;
955 }
956 
957 static enum mlxsw_pci_cq_type
958 mlxsw_pci_cq_type(const struct mlxsw_pci *mlxsw_pci,
959 		  const struct mlxsw_pci_queue *q)
960 {
961 	/* Each CQ is mapped to one DQ. The first 'num_sdqs' queues are used
962 	 * for SDQs and the rest are used for RDQs.
963 	 */
964 	if (q->num < mlxsw_pci->num_sdqs)
965 		return MLXSW_PCI_CQ_SDQ;
966 
967 	return MLXSW_PCI_CQ_RDQ;
968 }
969 
970 static void mlxsw_pci_cq_napi_setup(struct mlxsw_pci_queue *q,
971 				    enum mlxsw_pci_cq_type cq_type)
972 {
973 	struct mlxsw_pci *mlxsw_pci = q->pci;
974 
975 	switch (cq_type) {
976 	case MLXSW_PCI_CQ_SDQ:
977 		netif_napi_add(mlxsw_pci->napi_dev_tx, &q->u.cq.napi,
978 			       mlxsw_pci_napi_poll_cq_tx);
979 		break;
980 	case MLXSW_PCI_CQ_RDQ:
981 		netif_napi_add(mlxsw_pci->napi_dev_rx, &q->u.cq.napi,
982 			       mlxsw_pci_napi_poll_cq_rx);
983 		break;
984 	}
985 }
986 
987 static void mlxsw_pci_cq_napi_teardown(struct mlxsw_pci_queue *q)
988 {
989 	netif_napi_del(&q->u.cq.napi);
990 }
991 
992 static int mlxsw_pci_cq_page_pool_init(struct mlxsw_pci_queue *q,
993 				       enum mlxsw_pci_cq_type cq_type)
994 {
995 	struct page_pool_params pp_params = {};
996 	struct mlxsw_pci *mlxsw_pci = q->pci;
997 	struct page_pool *page_pool;
998 
999 	if (cq_type != MLXSW_PCI_CQ_RDQ)
1000 		return 0;
1001 
1002 	pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
1003 	pp_params.pool_size = MLXSW_PCI_WQE_COUNT * mlxsw_pci->num_sg_entries;
1004 	pp_params.nid = dev_to_node(&mlxsw_pci->pdev->dev);
1005 	pp_params.dev = &mlxsw_pci->pdev->dev;
1006 	pp_params.napi = &q->u.cq.napi;
1007 	pp_params.dma_dir = DMA_FROM_DEVICE;
1008 	pp_params.max_len = PAGE_SIZE;
1009 
1010 	page_pool = page_pool_create(&pp_params);
1011 	if (IS_ERR(page_pool))
1012 		return PTR_ERR(page_pool);
1013 
1014 	q->u.cq.page_pool = page_pool;
1015 	return 0;
1016 }
1017 
1018 static void mlxsw_pci_cq_page_pool_fini(struct mlxsw_pci_queue *q,
1019 					enum mlxsw_pci_cq_type cq_type)
1020 {
1021 	if (cq_type != MLXSW_PCI_CQ_RDQ)
1022 		return;
1023 
1024 	page_pool_destroy(q->u.cq.page_pool);
1025 }
1026 
1027 static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
1028 			     struct mlxsw_pci_queue *q)
1029 {
1030 	enum mlxsw_pci_cq_type cq_type = mlxsw_pci_cq_type(mlxsw_pci, q);
1031 	int i;
1032 	int err;
1033 
1034 	q->consumer_counter = 0;
1035 
1036 	for (i = 0; i < q->count; i++) {
1037 		char *elem = mlxsw_pci_queue_elem_get(q, i);
1038 
1039 		mlxsw_pci_cqe_owner_set(q->u.cq.v, elem, 1);
1040 	}
1041 
1042 	if (q->u.cq.v == MLXSW_PCI_CQE_V1)
1043 		mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox,
1044 				MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_1);
1045 	else if (q->u.cq.v == MLXSW_PCI_CQE_V2)
1046 		mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox,
1047 				MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_2);
1048 
1049 	mlxsw_cmd_mbox_sw2hw_cq_c_eqn_set(mbox, MLXSW_PCI_EQ_COMP_NUM);
1050 	mlxsw_cmd_mbox_sw2hw_cq_st_set(mbox, 0);
1051 	mlxsw_cmd_mbox_sw2hw_cq_log_cq_size_set(mbox, ilog2(q->count));
1052 	for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) {
1053 		dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i);
1054 
1055 		mlxsw_cmd_mbox_sw2hw_cq_pa_set(mbox, i, mapaddr);
1056 	}
1057 	err = mlxsw_cmd_sw2hw_cq(mlxsw_pci->core, mbox, q->num);
1058 	if (err)
1059 		return err;
1060 	mlxsw_pci_cq_napi_setup(q, cq_type);
1061 
1062 	err = mlxsw_pci_cq_page_pool_init(q, cq_type);
1063 	if (err)
1064 		goto err_page_pool_init;
1065 
1066 	napi_enable(&q->u.cq.napi);
1067 	mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
1068 	mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
1069 	return 0;
1070 
1071 err_page_pool_init:
1072 	mlxsw_pci_cq_napi_teardown(q);
1073 	return err;
1074 }
1075 
1076 static void mlxsw_pci_cq_fini(struct mlxsw_pci *mlxsw_pci,
1077 			      struct mlxsw_pci_queue *q)
1078 {
1079 	enum mlxsw_pci_cq_type cq_type = mlxsw_pci_cq_type(mlxsw_pci, q);
1080 
1081 	napi_disable(&q->u.cq.napi);
1082 	mlxsw_pci_cq_page_pool_fini(q, cq_type);
1083 	mlxsw_pci_cq_napi_teardown(q);
1084 	mlxsw_cmd_hw2sw_cq(mlxsw_pci->core, q->num);
1085 }
1086 
1087 static u16 mlxsw_pci_cq_elem_count(const struct mlxsw_pci_queue *q)
1088 {
1089 	return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_COUNT :
1090 					     MLXSW_PCI_CQE01_COUNT;
1091 }
1092 
1093 static u8 mlxsw_pci_cq_elem_size(const struct mlxsw_pci_queue *q)
1094 {
1095 	return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_SIZE :
1096 					       MLXSW_PCI_CQE01_SIZE;
1097 }
1098 
1099 static char *mlxsw_pci_eq_sw_eqe_get(struct mlxsw_pci_queue *q)
1100 {
1101 	struct mlxsw_pci_queue_elem_info *elem_info;
1102 	char *elem;
1103 	bool owner_bit;
1104 
1105 	elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
1106 	elem = elem_info->elem;
1107 	owner_bit = mlxsw_pci_eqe_owner_get(elem);
1108 	if (mlxsw_pci_elem_hw_owned(q, owner_bit))
1109 		return NULL;
1110 	q->consumer_counter++;
1111 	rmb(); /* make sure we read owned bit before the rest of elem */
1112 	return elem;
1113 }
1114 
1115 static void mlxsw_pci_eq_tasklet(struct tasklet_struct *t)
1116 {
1117 	unsigned long active_cqns[BITS_TO_LONGS(MLXSW_PCI_CQS_MAX)];
1118 	struct mlxsw_pci_queue *q = from_tasklet(q, t, u.eq.tasklet);
1119 	struct mlxsw_pci *mlxsw_pci = q->pci;
1120 	int credits = q->count >> 1;
1121 	u8 cqn, cq_count;
1122 	int items = 0;
1123 	char *eqe;
1124 
1125 	memset(&active_cqns, 0, sizeof(active_cqns));
1126 
1127 	while ((eqe = mlxsw_pci_eq_sw_eqe_get(q))) {
1128 		cqn = mlxsw_pci_eqe_cqn_get(eqe);
1129 		set_bit(cqn, active_cqns);
1130 
1131 		if (++items == credits)
1132 			break;
1133 	}
1134 
1135 	if (!items)
1136 		return;
1137 
1138 	mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
1139 	mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
1140 
1141 	cq_count = mlxsw_pci->num_cqs;
1142 	for_each_set_bit(cqn, active_cqns, cq_count) {
1143 		q = mlxsw_pci_cq_get(mlxsw_pci, cqn);
1144 		napi_schedule(&q->u.cq.napi);
1145 	}
1146 }
1147 
1148 static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
1149 			     struct mlxsw_pci_queue *q)
1150 {
1151 	int i;
1152 	int err;
1153 
1154 	/* We expect to initialize only one EQ, which gets num=0 as it is
1155 	 * located at index zero. We use the EQ as EQ1, so set the number for
1156 	 * future use.
1157 	 */
1158 	WARN_ON_ONCE(q->num);
1159 	q->num = MLXSW_PCI_EQ_COMP_NUM;
1160 
1161 	q->consumer_counter = 0;
1162 
1163 	for (i = 0; i < q->count; i++) {
1164 		char *elem = mlxsw_pci_queue_elem_get(q, i);
1165 
1166 		mlxsw_pci_eqe_owner_set(elem, 1);
1167 	}
1168 
1169 	mlxsw_cmd_mbox_sw2hw_eq_int_msix_set(mbox, 1); /* MSI-X used */
1170 	mlxsw_cmd_mbox_sw2hw_eq_st_set(mbox, 1); /* armed */
1171 	mlxsw_cmd_mbox_sw2hw_eq_log_eq_size_set(mbox, ilog2(q->count));
1172 	for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) {
1173 		dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i);
1174 
1175 		mlxsw_cmd_mbox_sw2hw_eq_pa_set(mbox, i, mapaddr);
1176 	}
1177 	err = mlxsw_cmd_sw2hw_eq(mlxsw_pci->core, mbox, q->num);
1178 	if (err)
1179 		return err;
1180 	tasklet_setup(&q->u.eq.tasklet, mlxsw_pci_eq_tasklet);
1181 	mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
1182 	mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
1183 	return 0;
1184 }
1185 
1186 static void mlxsw_pci_eq_fini(struct mlxsw_pci *mlxsw_pci,
1187 			      struct mlxsw_pci_queue *q)
1188 {
1189 	mlxsw_cmd_hw2sw_eq(mlxsw_pci->core, q->num);
1190 }
1191 
1192 struct mlxsw_pci_queue_ops {
1193 	const char *name;
1194 	enum mlxsw_pci_queue_type type;
1195 	void (*pre_init)(struct mlxsw_pci *mlxsw_pci,
1196 			 struct mlxsw_pci_queue *q);
1197 	int (*init)(struct mlxsw_pci *mlxsw_pci, char *mbox,
1198 		    struct mlxsw_pci_queue *q);
1199 	void (*fini)(struct mlxsw_pci *mlxsw_pci,
1200 		     struct mlxsw_pci_queue *q);
1201 	u16 (*elem_count_f)(const struct mlxsw_pci_queue *q);
1202 	u8 (*elem_size_f)(const struct mlxsw_pci_queue *q);
1203 	u16 elem_count;
1204 	u8 elem_size;
1205 };
1206 
1207 static const struct mlxsw_pci_queue_ops mlxsw_pci_sdq_ops = {
1208 	.type		= MLXSW_PCI_QUEUE_TYPE_SDQ,
1209 	.init		= mlxsw_pci_sdq_init,
1210 	.fini		= mlxsw_pci_sdq_fini,
1211 	.elem_count	= MLXSW_PCI_WQE_COUNT,
1212 	.elem_size	= MLXSW_PCI_WQE_SIZE,
1213 };
1214 
1215 static const struct mlxsw_pci_queue_ops mlxsw_pci_rdq_ops = {
1216 	.type		= MLXSW_PCI_QUEUE_TYPE_RDQ,
1217 	.init		= mlxsw_pci_rdq_init,
1218 	.fini		= mlxsw_pci_rdq_fini,
1219 	.elem_count	= MLXSW_PCI_WQE_COUNT,
1220 	.elem_size	= MLXSW_PCI_WQE_SIZE
1221 };
1222 
1223 static const struct mlxsw_pci_queue_ops mlxsw_pci_cq_ops = {
1224 	.type		= MLXSW_PCI_QUEUE_TYPE_CQ,
1225 	.pre_init	= mlxsw_pci_cq_pre_init,
1226 	.init		= mlxsw_pci_cq_init,
1227 	.fini		= mlxsw_pci_cq_fini,
1228 	.elem_count_f	= mlxsw_pci_cq_elem_count,
1229 	.elem_size_f	= mlxsw_pci_cq_elem_size
1230 };
1231 
1232 static const struct mlxsw_pci_queue_ops mlxsw_pci_eq_ops = {
1233 	.type		= MLXSW_PCI_QUEUE_TYPE_EQ,
1234 	.init		= mlxsw_pci_eq_init,
1235 	.fini		= mlxsw_pci_eq_fini,
1236 	.elem_count	= MLXSW_PCI_EQE_COUNT,
1237 	.elem_size	= MLXSW_PCI_EQE_SIZE
1238 };
1239 
1240 static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
1241 				const struct mlxsw_pci_queue_ops *q_ops,
1242 				struct mlxsw_pci_queue *q, u8 q_num)
1243 {
1244 	struct mlxsw_pci_mem_item *mem_item = &q->mem_item;
1245 	int i;
1246 	int err;
1247 
1248 	q->num = q_num;
1249 	if (q_ops->pre_init)
1250 		q_ops->pre_init(mlxsw_pci, q);
1251 
1252 	spin_lock_init(&q->lock);
1253 	q->count = q_ops->elem_count_f ? q_ops->elem_count_f(q) :
1254 					 q_ops->elem_count;
1255 	q->elem_size = q_ops->elem_size_f ? q_ops->elem_size_f(q) :
1256 					    q_ops->elem_size;
1257 	q->type = q_ops->type;
1258 	q->pci = mlxsw_pci;
1259 
1260 	mem_item->size = MLXSW_PCI_AQ_SIZE;
1261 	mem_item->buf = dma_alloc_coherent(&mlxsw_pci->pdev->dev,
1262 					   mem_item->size, &mem_item->mapaddr,
1263 					   GFP_KERNEL);
1264 	if (!mem_item->buf)
1265 		return -ENOMEM;
1266 
1267 	q->elem_info = kcalloc(q->count, sizeof(*q->elem_info), GFP_KERNEL);
1268 	if (!q->elem_info) {
1269 		err = -ENOMEM;
1270 		goto err_elem_info_alloc;
1271 	}
1272 
1273 	/* Initialize dma mapped elements info elem_info for
1274 	 * future easy access.
1275 	 */
1276 	for (i = 0; i < q->count; i++) {
1277 		struct mlxsw_pci_queue_elem_info *elem_info;
1278 
1279 		elem_info = mlxsw_pci_queue_elem_info_get(q, i);
1280 		elem_info->elem =
1281 			__mlxsw_pci_queue_elem_get(q, q->elem_size, i);
1282 	}
1283 
1284 	mlxsw_cmd_mbox_zero(mbox);
1285 	err = q_ops->init(mlxsw_pci, mbox, q);
1286 	if (err)
1287 		goto err_q_ops_init;
1288 	return 0;
1289 
1290 err_q_ops_init:
1291 	kfree(q->elem_info);
1292 err_elem_info_alloc:
1293 	dma_free_coherent(&mlxsw_pci->pdev->dev, mem_item->size,
1294 			  mem_item->buf, mem_item->mapaddr);
1295 	return err;
1296 }
1297 
1298 static void mlxsw_pci_queue_fini(struct mlxsw_pci *mlxsw_pci,
1299 				 const struct mlxsw_pci_queue_ops *q_ops,
1300 				 struct mlxsw_pci_queue *q)
1301 {
1302 	struct mlxsw_pci_mem_item *mem_item = &q->mem_item;
1303 
1304 	q_ops->fini(mlxsw_pci, q);
1305 	kfree(q->elem_info);
1306 	dma_free_coherent(&mlxsw_pci->pdev->dev, mem_item->size,
1307 			  mem_item->buf, mem_item->mapaddr);
1308 }
1309 
1310 static int mlxsw_pci_queue_group_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
1311 				      const struct mlxsw_pci_queue_ops *q_ops,
1312 				      u8 num_qs)
1313 {
1314 	struct mlxsw_pci_queue_type_group *queue_group;
1315 	int i;
1316 	int err;
1317 
1318 	queue_group = mlxsw_pci_queue_type_group_get(mlxsw_pci, q_ops->type);
1319 	queue_group->q = kcalloc(num_qs, sizeof(*queue_group->q), GFP_KERNEL);
1320 	if (!queue_group->q)
1321 		return -ENOMEM;
1322 
1323 	for (i = 0; i < num_qs; i++) {
1324 		err = mlxsw_pci_queue_init(mlxsw_pci, mbox, q_ops,
1325 					   &queue_group->q[i], i);
1326 		if (err)
1327 			goto err_queue_init;
1328 	}
1329 	queue_group->count = num_qs;
1330 
1331 	return 0;
1332 
1333 err_queue_init:
1334 	for (i--; i >= 0; i--)
1335 		mlxsw_pci_queue_fini(mlxsw_pci, q_ops, &queue_group->q[i]);
1336 	kfree(queue_group->q);
1337 	return err;
1338 }
1339 
1340 static void mlxsw_pci_queue_group_fini(struct mlxsw_pci *mlxsw_pci,
1341 				       const struct mlxsw_pci_queue_ops *q_ops)
1342 {
1343 	struct mlxsw_pci_queue_type_group *queue_group;
1344 	int i;
1345 
1346 	queue_group = mlxsw_pci_queue_type_group_get(mlxsw_pci, q_ops->type);
1347 	for (i = 0; i < queue_group->count; i++)
1348 		mlxsw_pci_queue_fini(mlxsw_pci, q_ops, &queue_group->q[i]);
1349 	kfree(queue_group->q);
1350 }
1351 
1352 static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox)
1353 {
1354 	struct pci_dev *pdev = mlxsw_pci->pdev;
1355 	u8 num_sdqs;
1356 	u8 sdq_log2sz;
1357 	u8 num_rdqs;
1358 	u8 rdq_log2sz;
1359 	u8 num_cqs;
1360 	u8 cq_log2sz;
1361 	u8 cqv2_log2sz;
1362 	u8 num_eqs;
1363 	u8 eq_log2sz;
1364 	int err;
1365 
1366 	mlxsw_cmd_mbox_zero(mbox);
1367 	err = mlxsw_cmd_query_aq_cap(mlxsw_pci->core, mbox);
1368 	if (err)
1369 		return err;
1370 
1371 	num_sdqs = mlxsw_cmd_mbox_query_aq_cap_max_num_sdqs_get(mbox);
1372 	sdq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_sdq_sz_get(mbox);
1373 	num_rdqs = mlxsw_cmd_mbox_query_aq_cap_max_num_rdqs_get(mbox);
1374 	rdq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_rdq_sz_get(mbox);
1375 	num_cqs = mlxsw_cmd_mbox_query_aq_cap_max_num_cqs_get(mbox);
1376 	cq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_cq_sz_get(mbox);
1377 	cqv2_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_cqv2_sz_get(mbox);
1378 	num_eqs = mlxsw_cmd_mbox_query_aq_cap_max_num_eqs_get(mbox);
1379 	eq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_eq_sz_get(mbox);
1380 
1381 	if (num_sdqs + num_rdqs > num_cqs ||
1382 	    num_sdqs < MLXSW_PCI_SDQS_MIN ||
1383 	    num_cqs > MLXSW_PCI_CQS_MAX || num_eqs != MLXSW_PCI_EQS_MAX) {
1384 		dev_err(&pdev->dev, "Unsupported number of queues\n");
1385 		return -EINVAL;
1386 	}
1387 
1388 	if ((1 << sdq_log2sz != MLXSW_PCI_WQE_COUNT) ||
1389 	    (1 << rdq_log2sz != MLXSW_PCI_WQE_COUNT) ||
1390 	    (1 << cq_log2sz != MLXSW_PCI_CQE01_COUNT) ||
1391 	    (mlxsw_pci->max_cqe_ver == MLXSW_PCI_CQE_V2 &&
1392 	     (1 << cqv2_log2sz != MLXSW_PCI_CQE2_COUNT)) ||
1393 	    (1 << eq_log2sz != MLXSW_PCI_EQE_COUNT)) {
1394 		dev_err(&pdev->dev, "Unsupported number of async queue descriptors\n");
1395 		return -EINVAL;
1396 	}
1397 
1398 	mlxsw_pci->num_cqs = num_cqs;
1399 	mlxsw_pci->num_sdqs = num_sdqs;
1400 
1401 	err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_eq_ops,
1402 					 MLXSW_PCI_EQS_COUNT);
1403 	if (err) {
1404 		dev_err(&pdev->dev, "Failed to initialize event queues\n");
1405 		return err;
1406 	}
1407 
1408 	err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_cq_ops,
1409 					 num_cqs);
1410 	if (err) {
1411 		dev_err(&pdev->dev, "Failed to initialize completion queues\n");
1412 		goto err_cqs_init;
1413 	}
1414 
1415 	err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_sdq_ops,
1416 					 num_sdqs);
1417 	if (err) {
1418 		dev_err(&pdev->dev, "Failed to initialize send descriptor queues\n");
1419 		goto err_sdqs_init;
1420 	}
1421 
1422 	err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_rdq_ops,
1423 					 num_rdqs);
1424 	if (err) {
1425 		dev_err(&pdev->dev, "Failed to initialize receive descriptor queues\n");
1426 		goto err_rdqs_init;
1427 	}
1428 
1429 	return 0;
1430 
1431 err_rdqs_init:
1432 	mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_sdq_ops);
1433 err_sdqs_init:
1434 	mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_cq_ops);
1435 err_cqs_init:
1436 	mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_eq_ops);
1437 	return err;
1438 }
1439 
1440 static void mlxsw_pci_aqs_fini(struct mlxsw_pci *mlxsw_pci)
1441 {
1442 	mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_rdq_ops);
1443 	mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_sdq_ops);
1444 	mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_cq_ops);
1445 	mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_eq_ops);
1446 }
1447 
1448 static void
1449 mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci,
1450 				     char *mbox, int index,
1451 				     const struct mlxsw_swid_config *swid)
1452 {
1453 	u8 mask = 0;
1454 
1455 	if (swid->used_type) {
1456 		mlxsw_cmd_mbox_config_profile_swid_config_type_set(
1457 			mbox, index, swid->type);
1458 		mask |= 1;
1459 	}
1460 	if (swid->used_properties) {
1461 		mlxsw_cmd_mbox_config_profile_swid_config_properties_set(
1462 			mbox, index, swid->properties);
1463 		mask |= 2;
1464 	}
1465 	mlxsw_cmd_mbox_config_profile_swid_config_mask_set(mbox, index, mask);
1466 }
1467 
1468 static int
1469 mlxsw_pci_profile_get_kvd_sizes(const struct mlxsw_pci *mlxsw_pci,
1470 				const struct mlxsw_config_profile *profile,
1471 				struct mlxsw_res *res)
1472 {
1473 	u64 single_size, double_size, linear_size;
1474 	int err;
1475 
1476 	err = mlxsw_core_kvd_sizes_get(mlxsw_pci->core, profile,
1477 				       &single_size, &double_size,
1478 				       &linear_size);
1479 	if (err)
1480 		return err;
1481 
1482 	MLXSW_RES_SET(res, KVD_SINGLE_SIZE, single_size);
1483 	MLXSW_RES_SET(res, KVD_DOUBLE_SIZE, double_size);
1484 	MLXSW_RES_SET(res, KVD_LINEAR_SIZE, linear_size);
1485 
1486 	return 0;
1487 }
1488 
1489 static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
1490 				    const struct mlxsw_config_profile *profile,
1491 				    struct mlxsw_res *res)
1492 {
1493 	int i;
1494 	int err;
1495 
1496 	mlxsw_cmd_mbox_zero(mbox);
1497 
1498 	if (profile->used_max_vepa_channels) {
1499 		mlxsw_cmd_mbox_config_profile_set_max_vepa_channels_set(
1500 			mbox, 1);
1501 		mlxsw_cmd_mbox_config_profile_max_vepa_channels_set(
1502 			mbox, profile->max_vepa_channels);
1503 	}
1504 	if (profile->used_max_lag) {
1505 		mlxsw_cmd_mbox_config_profile_set_max_lag_set(mbox, 1);
1506 		mlxsw_cmd_mbox_config_profile_max_lag_set(mbox,
1507 							  profile->max_lag);
1508 	}
1509 	if (profile->used_max_mid) {
1510 		mlxsw_cmd_mbox_config_profile_set_max_mid_set(
1511 			mbox, 1);
1512 		mlxsw_cmd_mbox_config_profile_max_mid_set(
1513 			mbox, profile->max_mid);
1514 	}
1515 	if (profile->used_max_pgt) {
1516 		mlxsw_cmd_mbox_config_profile_set_max_pgt_set(
1517 			mbox, 1);
1518 		mlxsw_cmd_mbox_config_profile_max_pgt_set(
1519 			mbox, profile->max_pgt);
1520 	}
1521 	if (profile->used_max_system_port) {
1522 		mlxsw_cmd_mbox_config_profile_set_max_system_port_set(
1523 			mbox, 1);
1524 		mlxsw_cmd_mbox_config_profile_max_system_port_set(
1525 			mbox, profile->max_system_port);
1526 	}
1527 	if (profile->used_max_vlan_groups) {
1528 		mlxsw_cmd_mbox_config_profile_set_max_vlan_groups_set(
1529 			mbox, 1);
1530 		mlxsw_cmd_mbox_config_profile_max_vlan_groups_set(
1531 			mbox, profile->max_vlan_groups);
1532 	}
1533 	if (profile->used_max_regions) {
1534 		mlxsw_cmd_mbox_config_profile_set_max_regions_set(
1535 			mbox, 1);
1536 		mlxsw_cmd_mbox_config_profile_max_regions_set(
1537 			mbox, profile->max_regions);
1538 	}
1539 	if (profile->used_flood_tables) {
1540 		mlxsw_cmd_mbox_config_profile_set_flood_tables_set(
1541 			mbox, 1);
1542 		mlxsw_cmd_mbox_config_profile_max_flood_tables_set(
1543 			mbox, profile->max_flood_tables);
1544 		mlxsw_cmd_mbox_config_profile_max_vid_flood_tables_set(
1545 			mbox, profile->max_vid_flood_tables);
1546 		mlxsw_cmd_mbox_config_profile_max_fid_offset_flood_tables_set(
1547 			mbox, profile->max_fid_offset_flood_tables);
1548 		mlxsw_cmd_mbox_config_profile_fid_offset_flood_table_size_set(
1549 			mbox, profile->fid_offset_flood_table_size);
1550 		mlxsw_cmd_mbox_config_profile_max_fid_flood_tables_set(
1551 			mbox, profile->max_fid_flood_tables);
1552 		mlxsw_cmd_mbox_config_profile_fid_flood_table_size_set(
1553 			mbox, profile->fid_flood_table_size);
1554 	}
1555 	if (profile->flood_mode_prefer_cff && mlxsw_pci->cff_support) {
1556 		enum mlxsw_cmd_mbox_config_profile_flood_mode flood_mode =
1557 			MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_CFF;
1558 
1559 		mlxsw_cmd_mbox_config_profile_set_flood_mode_set(mbox, 1);
1560 		mlxsw_cmd_mbox_config_profile_flood_mode_set(mbox, flood_mode);
1561 		mlxsw_pci->flood_mode = flood_mode;
1562 	} else if (profile->used_flood_mode) {
1563 		mlxsw_cmd_mbox_config_profile_set_flood_mode_set(
1564 			mbox, 1);
1565 		mlxsw_cmd_mbox_config_profile_flood_mode_set(
1566 			mbox, profile->flood_mode);
1567 		mlxsw_pci->flood_mode = profile->flood_mode;
1568 	} else {
1569 		WARN_ON(1);
1570 		return -EINVAL;
1571 	}
1572 	if (profile->used_max_ib_mc) {
1573 		mlxsw_cmd_mbox_config_profile_set_max_ib_mc_set(
1574 			mbox, 1);
1575 		mlxsw_cmd_mbox_config_profile_max_ib_mc_set(
1576 			mbox, profile->max_ib_mc);
1577 	}
1578 	if (profile->used_max_pkey) {
1579 		mlxsw_cmd_mbox_config_profile_set_max_pkey_set(
1580 			mbox, 1);
1581 		mlxsw_cmd_mbox_config_profile_max_pkey_set(
1582 			mbox, profile->max_pkey);
1583 	}
1584 	if (profile->used_ar_sec) {
1585 		mlxsw_cmd_mbox_config_profile_set_ar_sec_set(
1586 			mbox, 1);
1587 		mlxsw_cmd_mbox_config_profile_ar_sec_set(
1588 			mbox, profile->ar_sec);
1589 	}
1590 	if (profile->used_adaptive_routing_group_cap) {
1591 		mlxsw_cmd_mbox_config_profile_set_adaptive_routing_group_cap_set(
1592 			mbox, 1);
1593 		mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set(
1594 			mbox, profile->adaptive_routing_group_cap);
1595 	}
1596 	if (profile->used_ubridge) {
1597 		mlxsw_cmd_mbox_config_profile_set_ubridge_set(mbox, 1);
1598 		mlxsw_cmd_mbox_config_profile_ubridge_set(mbox,
1599 							  profile->ubridge);
1600 	}
1601 	if (profile->used_kvd_sizes && MLXSW_RES_VALID(res, KVD_SIZE)) {
1602 		err = mlxsw_pci_profile_get_kvd_sizes(mlxsw_pci, profile, res);
1603 		if (err)
1604 			return err;
1605 
1606 		mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set(mbox, 1);
1607 		mlxsw_cmd_mbox_config_profile_kvd_linear_size_set(mbox,
1608 					MLXSW_RES_GET(res, KVD_LINEAR_SIZE));
1609 		mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set(mbox,
1610 									   1);
1611 		mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set(mbox,
1612 					MLXSW_RES_GET(res, KVD_SINGLE_SIZE));
1613 		mlxsw_cmd_mbox_config_profile_set_kvd_hash_double_size_set(
1614 								mbox, 1);
1615 		mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set(mbox,
1616 					MLXSW_RES_GET(res, KVD_DOUBLE_SIZE));
1617 	}
1618 
1619 	for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++)
1620 		mlxsw_pci_config_profile_swid_config(mlxsw_pci, mbox, i,
1621 						     &profile->swid_config[i]);
1622 
1623 	if (mlxsw_pci->max_cqe_ver > MLXSW_PCI_CQE_V0) {
1624 		mlxsw_cmd_mbox_config_profile_set_cqe_version_set(mbox, 1);
1625 		mlxsw_cmd_mbox_config_profile_cqe_version_set(mbox, 1);
1626 	}
1627 
1628 	if (profile->used_cqe_time_stamp_type) {
1629 		mlxsw_cmd_mbox_config_profile_set_cqe_time_stamp_type_set(mbox,
1630 									  1);
1631 		mlxsw_cmd_mbox_config_profile_cqe_time_stamp_type_set(mbox,
1632 					profile->cqe_time_stamp_type);
1633 	}
1634 
1635 	if (profile->lag_mode_prefer_sw && mlxsw_pci->lag_mode_support) {
1636 		enum mlxsw_cmd_mbox_config_profile_lag_mode lag_mode =
1637 			MLXSW_CMD_MBOX_CONFIG_PROFILE_LAG_MODE_SW;
1638 
1639 		mlxsw_cmd_mbox_config_profile_set_lag_mode_set(mbox, 1);
1640 		mlxsw_cmd_mbox_config_profile_lag_mode_set(mbox, lag_mode);
1641 		mlxsw_pci->lag_mode = lag_mode;
1642 	} else {
1643 		mlxsw_pci->lag_mode = MLXSW_CMD_MBOX_CONFIG_PROFILE_LAG_MODE_FW;
1644 	}
1645 	return mlxsw_cmd_config_profile_set(mlxsw_pci->core, mbox);
1646 }
1647 
1648 static int mlxsw_pci_boardinfo(struct mlxsw_pci *mlxsw_pci, char *mbox)
1649 {
1650 	struct mlxsw_bus_info *bus_info = &mlxsw_pci->bus_info;
1651 	int err;
1652 
1653 	mlxsw_cmd_mbox_zero(mbox);
1654 	err = mlxsw_cmd_boardinfo(mlxsw_pci->core, mbox);
1655 	if (err)
1656 		return err;
1657 	mlxsw_cmd_mbox_boardinfo_vsd_memcpy_from(mbox, bus_info->vsd);
1658 	mlxsw_cmd_mbox_boardinfo_psid_memcpy_from(mbox, bus_info->psid);
1659 	return 0;
1660 }
1661 
1662 static int mlxsw_pci_fw_area_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
1663 				  u16 num_pages)
1664 {
1665 	struct mlxsw_pci_mem_item *mem_item;
1666 	int nent = 0;
1667 	int i;
1668 	int err;
1669 
1670 	mlxsw_pci->fw_area.items = kcalloc(num_pages, sizeof(*mem_item),
1671 					   GFP_KERNEL);
1672 	if (!mlxsw_pci->fw_area.items)
1673 		return -ENOMEM;
1674 	mlxsw_pci->fw_area.count = num_pages;
1675 
1676 	mlxsw_cmd_mbox_zero(mbox);
1677 	for (i = 0; i < num_pages; i++) {
1678 		mem_item = &mlxsw_pci->fw_area.items[i];
1679 
1680 		mem_item->size = MLXSW_PCI_PAGE_SIZE;
1681 		mem_item->buf = dma_alloc_coherent(&mlxsw_pci->pdev->dev,
1682 						   mem_item->size,
1683 						   &mem_item->mapaddr, GFP_KERNEL);
1684 		if (!mem_item->buf) {
1685 			err = -ENOMEM;
1686 			goto err_alloc;
1687 		}
1688 		mlxsw_cmd_mbox_map_fa_pa_set(mbox, nent, mem_item->mapaddr);
1689 		mlxsw_cmd_mbox_map_fa_log2size_set(mbox, nent, 0); /* 1 page */
1690 		if (++nent == MLXSW_CMD_MAP_FA_VPM_ENTRIES_MAX) {
1691 			err = mlxsw_cmd_map_fa(mlxsw_pci->core, mbox, nent);
1692 			if (err)
1693 				goto err_cmd_map_fa;
1694 			nent = 0;
1695 			mlxsw_cmd_mbox_zero(mbox);
1696 		}
1697 	}
1698 
1699 	if (nent) {
1700 		err = mlxsw_cmd_map_fa(mlxsw_pci->core, mbox, nent);
1701 		if (err)
1702 			goto err_cmd_map_fa;
1703 	}
1704 
1705 	return 0;
1706 
1707 err_cmd_map_fa:
1708 err_alloc:
1709 	for (i--; i >= 0; i--) {
1710 		mem_item = &mlxsw_pci->fw_area.items[i];
1711 
1712 		dma_free_coherent(&mlxsw_pci->pdev->dev, mem_item->size,
1713 				  mem_item->buf, mem_item->mapaddr);
1714 	}
1715 	kfree(mlxsw_pci->fw_area.items);
1716 	return err;
1717 }
1718 
1719 static void mlxsw_pci_fw_area_fini(struct mlxsw_pci *mlxsw_pci)
1720 {
1721 	struct mlxsw_pci_mem_item *mem_item;
1722 	int i;
1723 
1724 	mlxsw_cmd_unmap_fa(mlxsw_pci->core);
1725 
1726 	for (i = 0; i < mlxsw_pci->fw_area.count; i++) {
1727 		mem_item = &mlxsw_pci->fw_area.items[i];
1728 
1729 		dma_free_coherent(&mlxsw_pci->pdev->dev, mem_item->size,
1730 				  mem_item->buf, mem_item->mapaddr);
1731 	}
1732 	kfree(mlxsw_pci->fw_area.items);
1733 }
1734 
1735 static irqreturn_t mlxsw_pci_eq_irq_handler(int irq, void *dev_id)
1736 {
1737 	struct mlxsw_pci *mlxsw_pci = dev_id;
1738 	struct mlxsw_pci_queue *q;
1739 
1740 	q = mlxsw_pci_eq_get(mlxsw_pci);
1741 	tasklet_schedule(&q->u.eq.tasklet);
1742 	return IRQ_HANDLED;
1743 }
1744 
1745 static int mlxsw_pci_mbox_alloc(struct mlxsw_pci *mlxsw_pci,
1746 				struct mlxsw_pci_mem_item *mbox)
1747 {
1748 	struct pci_dev *pdev = mlxsw_pci->pdev;
1749 	int err = 0;
1750 
1751 	mbox->size = MLXSW_CMD_MBOX_SIZE;
1752 	mbox->buf = dma_alloc_coherent(&pdev->dev, MLXSW_CMD_MBOX_SIZE,
1753 				       &mbox->mapaddr, GFP_KERNEL);
1754 	if (!mbox->buf) {
1755 		dev_err(&pdev->dev, "Failed allocating memory for mailbox\n");
1756 		err = -ENOMEM;
1757 	}
1758 
1759 	return err;
1760 }
1761 
1762 static void mlxsw_pci_mbox_free(struct mlxsw_pci *mlxsw_pci,
1763 				struct mlxsw_pci_mem_item *mbox)
1764 {
1765 	struct pci_dev *pdev = mlxsw_pci->pdev;
1766 
1767 	dma_free_coherent(&pdev->dev, MLXSW_CMD_MBOX_SIZE, mbox->buf,
1768 			  mbox->mapaddr);
1769 }
1770 
1771 static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci,
1772 				    const struct pci_device_id *id,
1773 				    u32 *p_sys_status)
1774 {
1775 	unsigned long end;
1776 	u32 val;
1777 
1778 	/* We must wait for the HW to become responsive. */
1779 	msleep(MLXSW_PCI_SW_RESET_WAIT_MSECS);
1780 
1781 	end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
1782 	do {
1783 		val = mlxsw_pci_read32(mlxsw_pci, FW_READY);
1784 		if ((val & MLXSW_PCI_FW_READY_MASK) == MLXSW_PCI_FW_READY_MAGIC)
1785 			return 0;
1786 		cond_resched();
1787 	} while (time_before(jiffies, end));
1788 
1789 	*p_sys_status = val & MLXSW_PCI_FW_READY_MASK;
1790 
1791 	return -EBUSY;
1792 }
1793 
1794 static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci,
1795 					  bool pci_reset_sbr_supported)
1796 {
1797 	struct pci_dev *pdev = mlxsw_pci->pdev;
1798 	char mrsr_pl[MLXSW_REG_MRSR_LEN];
1799 	struct pci_dev *bridge;
1800 	int err;
1801 
1802 	if (!pci_reset_sbr_supported) {
1803 		pci_dbg(pdev, "Performing PCI hot reset instead of \"all reset\"\n");
1804 		goto sbr;
1805 	}
1806 
1807 	mlxsw_reg_mrsr_pack(mrsr_pl,
1808 			    MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE);
1809 	err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
1810 	if (err)
1811 		return err;
1812 
1813 sbr:
1814 	device_lock_assert(&pdev->dev);
1815 
1816 	bridge = pci_upstream_bridge(pdev);
1817 	if (bridge)
1818 		pci_cfg_access_lock(bridge);
1819 	pci_cfg_access_lock(pdev);
1820 	pci_save_state(pdev);
1821 
1822 	err = __pci_reset_function_locked(pdev);
1823 	if (err)
1824 		pci_err(pdev, "PCI function reset failed with %d\n", err);
1825 
1826 	pci_restore_state(pdev);
1827 	pci_cfg_access_unlock(pdev);
1828 	if (bridge)
1829 		pci_cfg_access_unlock(bridge);
1830 
1831 	return err;
1832 }
1833 
1834 static int mlxsw_pci_reset_sw(struct mlxsw_pci *mlxsw_pci)
1835 {
1836 	char mrsr_pl[MLXSW_REG_MRSR_LEN];
1837 
1838 	mlxsw_reg_mrsr_pack(mrsr_pl, MLXSW_REG_MRSR_COMMAND_SOFTWARE_RESET);
1839 	return mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
1840 }
1841 
1842 static int
1843 mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
1844 {
1845 	struct pci_dev *pdev = mlxsw_pci->pdev;
1846 	bool pci_reset_sbr_supported = false;
1847 	char mcam_pl[MLXSW_REG_MCAM_LEN];
1848 	bool pci_reset_supported = false;
1849 	u32 sys_status;
1850 	int err;
1851 
1852 	err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status);
1853 	if (err) {
1854 		dev_err(&pdev->dev, "Failed to reach system ready status before reset. Status is 0x%x\n",
1855 			sys_status);
1856 		return err;
1857 	}
1858 
1859 	/* PCI core already issued a PCI reset, do not issue another reset. */
1860 	if (mlxsw_pci->skip_reset)
1861 		return 0;
1862 
1863 	mlxsw_reg_mcam_pack(mcam_pl,
1864 			    MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES);
1865 	err = mlxsw_reg_query(mlxsw_pci->core, MLXSW_REG(mcam), mcam_pl);
1866 	if (!err) {
1867 		mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET,
1868 				      &pci_reset_supported);
1869 		mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET_SBR,
1870 				      &pci_reset_sbr_supported);
1871 	}
1872 
1873 	if (pci_reset_supported) {
1874 		pci_dbg(pdev, "Starting PCI reset flow\n");
1875 		err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci,
1876 						     pci_reset_sbr_supported);
1877 	} else {
1878 		pci_dbg(pdev, "Starting software reset flow\n");
1879 		err = mlxsw_pci_reset_sw(mlxsw_pci);
1880 	}
1881 	if (err)
1882 		return err;
1883 
1884 	err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status);
1885 	if (err) {
1886 		dev_err(&pdev->dev, "Failed to reach system ready status after reset. Status is 0x%x\n",
1887 			sys_status);
1888 		return err;
1889 	}
1890 
1891 	return 0;
1892 }
1893 
1894 static int mlxsw_pci_alloc_irq_vectors(struct mlxsw_pci *mlxsw_pci)
1895 {
1896 	int err;
1897 
1898 	err = pci_alloc_irq_vectors(mlxsw_pci->pdev, 1, 1, PCI_IRQ_MSIX);
1899 	if (err < 0)
1900 		dev_err(&mlxsw_pci->pdev->dev, "MSI-X init failed\n");
1901 	return err;
1902 }
1903 
1904 static void mlxsw_pci_free_irq_vectors(struct mlxsw_pci *mlxsw_pci)
1905 {
1906 	pci_free_irq_vectors(mlxsw_pci->pdev);
1907 }
1908 
1909 static void mlxsw_pci_num_sg_entries_set(struct mlxsw_pci *mlxsw_pci)
1910 {
1911 	u8 num_sg_entries;
1912 
1913 	num_sg_entries = mlxsw_pci_num_sg_entries_get(MLXSW_PORT_MAX_MTU);
1914 	mlxsw_pci->num_sg_entries = min(num_sg_entries,
1915 					MLXSW_PCI_WQE_SG_ENTRIES);
1916 
1917 	WARN_ON(num_sg_entries > MLXSW_PCI_WQE_SG_ENTRIES);
1918 }
1919 
1920 static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
1921 			  const struct mlxsw_config_profile *profile,
1922 			  struct mlxsw_res *res)
1923 {
1924 	struct mlxsw_pci *mlxsw_pci = bus_priv;
1925 	struct pci_dev *pdev = mlxsw_pci->pdev;
1926 	char *mbox;
1927 	u16 num_pages;
1928 	int err;
1929 
1930 	mlxsw_pci->core = mlxsw_core;
1931 
1932 	mbox = mlxsw_cmd_mbox_alloc();
1933 	if (!mbox)
1934 		return -ENOMEM;
1935 
1936 	err = mlxsw_pci_reset(mlxsw_pci, mlxsw_pci->id);
1937 	if (err)
1938 		goto err_reset;
1939 
1940 	err = mlxsw_pci_alloc_irq_vectors(mlxsw_pci);
1941 	if (err < 0) {
1942 		dev_err(&pdev->dev, "MSI-X init failed\n");
1943 		goto err_alloc_irq;
1944 	}
1945 
1946 	err = mlxsw_cmd_query_fw(mlxsw_core, mbox);
1947 	if (err)
1948 		goto err_query_fw;
1949 
1950 	mlxsw_pci->bus_info.fw_rev.major =
1951 		mlxsw_cmd_mbox_query_fw_fw_rev_major_get(mbox);
1952 	mlxsw_pci->bus_info.fw_rev.minor =
1953 		mlxsw_cmd_mbox_query_fw_fw_rev_minor_get(mbox);
1954 	mlxsw_pci->bus_info.fw_rev.subminor =
1955 		mlxsw_cmd_mbox_query_fw_fw_rev_subminor_get(mbox);
1956 
1957 	if (mlxsw_cmd_mbox_query_fw_cmd_interface_rev_get(mbox) != 1) {
1958 		dev_err(&pdev->dev, "Unsupported cmd interface revision ID queried from hw\n");
1959 		err = -EINVAL;
1960 		goto err_iface_rev;
1961 	}
1962 	if (mlxsw_cmd_mbox_query_fw_doorbell_page_bar_get(mbox) != 0) {
1963 		dev_err(&pdev->dev, "Unsupported doorbell page bar queried from hw\n");
1964 		err = -EINVAL;
1965 		goto err_doorbell_page_bar;
1966 	}
1967 
1968 	mlxsw_pci->doorbell_offset =
1969 		mlxsw_cmd_mbox_query_fw_doorbell_page_offset_get(mbox);
1970 
1971 	if (mlxsw_cmd_mbox_query_fw_fr_rn_clk_bar_get(mbox) != 0) {
1972 		dev_err(&pdev->dev, "Unsupported free running clock BAR queried from hw\n");
1973 		err = -EINVAL;
1974 		goto err_fr_rn_clk_bar;
1975 	}
1976 
1977 	mlxsw_pci->free_running_clock_offset =
1978 		mlxsw_cmd_mbox_query_fw_free_running_clock_offset_get(mbox);
1979 
1980 	if (mlxsw_cmd_mbox_query_fw_utc_sec_bar_get(mbox) != 0) {
1981 		dev_err(&pdev->dev, "Unsupported UTC sec BAR queried from hw\n");
1982 		err = -EINVAL;
1983 		goto err_utc_sec_bar;
1984 	}
1985 
1986 	mlxsw_pci->utc_sec_offset =
1987 		mlxsw_cmd_mbox_query_fw_utc_sec_offset_get(mbox);
1988 
1989 	if (mlxsw_cmd_mbox_query_fw_utc_nsec_bar_get(mbox) != 0) {
1990 		dev_err(&pdev->dev, "Unsupported UTC nsec BAR queried from hw\n");
1991 		err = -EINVAL;
1992 		goto err_utc_nsec_bar;
1993 	}
1994 
1995 	mlxsw_pci->utc_nsec_offset =
1996 		mlxsw_cmd_mbox_query_fw_utc_nsec_offset_get(mbox);
1997 
1998 	mlxsw_pci->lag_mode_support =
1999 		mlxsw_cmd_mbox_query_fw_lag_mode_support_get(mbox);
2000 	mlxsw_pci->cff_support =
2001 		mlxsw_cmd_mbox_query_fw_cff_support_get(mbox);
2002 
2003 	num_pages = mlxsw_cmd_mbox_query_fw_fw_pages_get(mbox);
2004 	err = mlxsw_pci_fw_area_init(mlxsw_pci, mbox, num_pages);
2005 	if (err)
2006 		goto err_fw_area_init;
2007 
2008 	err = mlxsw_pci_boardinfo(mlxsw_pci, mbox);
2009 	if (err)
2010 		goto err_boardinfo;
2011 
2012 	err = mlxsw_core_resources_query(mlxsw_core, mbox, res);
2013 	if (err)
2014 		goto err_query_resources;
2015 
2016 	if (MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V2) &&
2017 	    MLXSW_CORE_RES_GET(mlxsw_core, CQE_V2))
2018 		mlxsw_pci->max_cqe_ver = MLXSW_PCI_CQE_V2;
2019 	else if (MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V1) &&
2020 		 MLXSW_CORE_RES_GET(mlxsw_core, CQE_V1))
2021 		mlxsw_pci->max_cqe_ver = MLXSW_PCI_CQE_V1;
2022 	else if ((MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V0) &&
2023 		  MLXSW_CORE_RES_GET(mlxsw_core, CQE_V0)) ||
2024 		 !MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V0)) {
2025 		mlxsw_pci->max_cqe_ver = MLXSW_PCI_CQE_V0;
2026 	} else {
2027 		dev_err(&pdev->dev, "Invalid supported CQE version combination reported\n");
2028 		goto err_cqe_v_check;
2029 	}
2030 
2031 	err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile, res);
2032 	if (err)
2033 		goto err_config_profile;
2034 
2035 	/* Some resources depend on details of config_profile, such as unified
2036 	 * bridge model. Query the resources again to get correct values.
2037 	 */
2038 	err = mlxsw_core_resources_query(mlxsw_core, mbox, res);
2039 	if (err)
2040 		goto err_requery_resources;
2041 
2042 	mlxsw_pci_num_sg_entries_set(mlxsw_pci);
2043 
2044 	err = mlxsw_pci_napi_devs_init(mlxsw_pci);
2045 	if (err)
2046 		goto err_napi_devs_init;
2047 
2048 	err = mlxsw_pci_aqs_init(mlxsw_pci, mbox);
2049 	if (err)
2050 		goto err_aqs_init;
2051 
2052 	err = request_irq(pci_irq_vector(pdev, 0),
2053 			  mlxsw_pci_eq_irq_handler, 0,
2054 			  mlxsw_pci->bus_info.device_kind, mlxsw_pci);
2055 	if (err) {
2056 		dev_err(&pdev->dev, "IRQ request failed\n");
2057 		goto err_request_eq_irq;
2058 	}
2059 
2060 	goto mbox_put;
2061 
2062 err_request_eq_irq:
2063 	mlxsw_pci_aqs_fini(mlxsw_pci);
2064 err_aqs_init:
2065 	mlxsw_pci_napi_devs_fini(mlxsw_pci);
2066 err_napi_devs_init:
2067 err_requery_resources:
2068 err_config_profile:
2069 err_cqe_v_check:
2070 err_query_resources:
2071 err_boardinfo:
2072 	mlxsw_pci_fw_area_fini(mlxsw_pci);
2073 err_fw_area_init:
2074 err_utc_nsec_bar:
2075 err_utc_sec_bar:
2076 err_fr_rn_clk_bar:
2077 err_doorbell_page_bar:
2078 err_iface_rev:
2079 err_query_fw:
2080 	mlxsw_pci_free_irq_vectors(mlxsw_pci);
2081 err_alloc_irq:
2082 err_reset:
2083 mbox_put:
2084 	mlxsw_cmd_mbox_free(mbox);
2085 	return err;
2086 }
2087 
2088 static void mlxsw_pci_fini(void *bus_priv)
2089 {
2090 	struct mlxsw_pci *mlxsw_pci = bus_priv;
2091 
2092 	free_irq(pci_irq_vector(mlxsw_pci->pdev, 0), mlxsw_pci);
2093 	mlxsw_pci_aqs_fini(mlxsw_pci);
2094 	mlxsw_pci_napi_devs_fini(mlxsw_pci);
2095 	mlxsw_pci_fw_area_fini(mlxsw_pci);
2096 	mlxsw_pci_free_irq_vectors(mlxsw_pci);
2097 }
2098 
2099 static int mlxsw_pci_txhdr_construct(struct sk_buff *skb,
2100 				     const struct mlxsw_txhdr_info *txhdr_info)
2101 {
2102 	const struct mlxsw_tx_info tx_info = txhdr_info->tx_info;
2103 	char *txhdr;
2104 
2105 	if (skb_cow_head(skb, MLXSW_TXHDR_LEN))
2106 		return -ENOMEM;
2107 
2108 	txhdr = skb_push(skb, MLXSW_TXHDR_LEN);
2109 	memset(txhdr, 0, MLXSW_TXHDR_LEN);
2110 
2111 	mlxsw_tx_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_1);
2112 	mlxsw_tx_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH);
2113 	mlxsw_tx_hdr_swid_set(txhdr, 0);
2114 
2115 	if (unlikely(txhdr_info->data)) {
2116 		u16 fid = txhdr_info->max_fid + tx_info.local_port - 1;
2117 
2118 		mlxsw_tx_hdr_rx_is_router_set(txhdr, true);
2119 		mlxsw_tx_hdr_fid_valid_set(txhdr, true);
2120 		mlxsw_tx_hdr_fid_set(txhdr, fid);
2121 		mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_DATA);
2122 	} else {
2123 		mlxsw_tx_hdr_ctl_set(txhdr, MLXSW_TXHDR_ETH_CTL);
2124 		mlxsw_tx_hdr_control_tclass_set(txhdr, 1);
2125 		mlxsw_tx_hdr_port_mid_set(txhdr, tx_info.local_port);
2126 		mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL);
2127 	}
2128 
2129 	return 0;
2130 }
2131 
2132 static struct mlxsw_pci_queue *
2133 mlxsw_pci_sdq_pick(struct mlxsw_pci *mlxsw_pci,
2134 		   const struct mlxsw_tx_info *tx_info)
2135 {
2136 	u8 ctl_sdq_count = mlxsw_pci->num_sdqs - 1;
2137 	u8 sdqn;
2138 
2139 	if (tx_info->is_emad) {
2140 		sdqn = MLXSW_PCI_SDQ_EMAD_INDEX;
2141 	} else {
2142 		BUILD_BUG_ON(MLXSW_PCI_SDQ_EMAD_INDEX != 0);
2143 		sdqn = 1 + (tx_info->local_port % ctl_sdq_count);
2144 	}
2145 
2146 	return mlxsw_pci_sdq_get(mlxsw_pci, sdqn);
2147 }
2148 
2149 static bool mlxsw_pci_skb_transmit_busy(void *bus_priv,
2150 					const struct mlxsw_tx_info *tx_info)
2151 {
2152 	struct mlxsw_pci *mlxsw_pci = bus_priv;
2153 	struct mlxsw_pci_queue *q = mlxsw_pci_sdq_pick(mlxsw_pci, tx_info);
2154 
2155 	return !mlxsw_pci_queue_elem_info_producer_get(q);
2156 }
2157 
2158 static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb,
2159 				  const struct mlxsw_txhdr_info *txhdr_info)
2160 {
2161 	struct mlxsw_pci *mlxsw_pci = bus_priv;
2162 	struct mlxsw_pci_queue *q;
2163 	struct mlxsw_pci_queue_elem_info *elem_info;
2164 	char *wqe;
2165 	int i;
2166 	int err;
2167 
2168 	err = mlxsw_pci_txhdr_construct(skb, txhdr_info);
2169 	if (err)
2170 		return err;
2171 
2172 	if (skb_shinfo(skb)->nr_frags > MLXSW_PCI_WQE_SG_ENTRIES - 1) {
2173 		err = skb_linearize(skb);
2174 		if (err)
2175 			return err;
2176 	}
2177 
2178 	q = mlxsw_pci_sdq_pick(mlxsw_pci, &txhdr_info->tx_info);
2179 	spin_lock_bh(&q->lock);
2180 	elem_info = mlxsw_pci_queue_elem_info_producer_get(q);
2181 	if (!elem_info) {
2182 		/* queue is full */
2183 		err = -EAGAIN;
2184 		goto unlock;
2185 	}
2186 	mlxsw_skb_cb(skb)->tx_info = txhdr_info->tx_info;
2187 	elem_info->sdq.skb = skb;
2188 
2189 	wqe = elem_info->elem;
2190 	mlxsw_pci_wqe_c_set(wqe, 1); /* always report completion */
2191 	mlxsw_pci_wqe_lp_set(wqe, 0);
2192 	mlxsw_pci_wqe_type_set(wqe, MLXSW_PCI_WQE_TYPE_ETHERNET);
2193 
2194 	err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data,
2195 				     skb_headlen(skb), DMA_TO_DEVICE);
2196 	if (err)
2197 		goto unlock;
2198 
2199 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2200 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2201 
2202 		err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, i + 1,
2203 					     skb_frag_address(frag),
2204 					     skb_frag_size(frag),
2205 					     DMA_TO_DEVICE);
2206 		if (err)
2207 			goto unmap_frags;
2208 	}
2209 
2210 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
2211 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
2212 
2213 	/* Set unused sq entries byte count to zero. */
2214 	for (i++; i < MLXSW_PCI_WQE_SG_ENTRIES; i++)
2215 		mlxsw_pci_wqe_byte_count_set(wqe, i, 0);
2216 
2217 	mlxsw_pci_wqe_ipcs_set(wqe, skb->ip_summed == CHECKSUM_PARTIAL);
2218 
2219 	/* Everything is set up, ring producer doorbell to get HW going */
2220 	q->producer_counter++;
2221 	mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
2222 
2223 	goto unlock;
2224 
2225 unmap_frags:
2226 	for (; i >= 0; i--)
2227 		mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE);
2228 unlock:
2229 	spin_unlock_bh(&q->lock);
2230 	return err;
2231 }
2232 
2233 static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod,
2234 			      u32 in_mod, bool out_mbox_direct,
2235 			      char *in_mbox, size_t in_mbox_size,
2236 			      char *out_mbox, size_t out_mbox_size,
2237 			      u8 *p_status)
2238 {
2239 	struct mlxsw_pci *mlxsw_pci = bus_priv;
2240 	dma_addr_t in_mapaddr = 0, out_mapaddr = 0;
2241 	unsigned long timeout = msecs_to_jiffies(MLXSW_PCI_CIR_TIMEOUT_MSECS);
2242 	unsigned long end;
2243 	bool wait_done;
2244 	int err;
2245 
2246 	*p_status = MLXSW_CMD_STATUS_OK;
2247 
2248 	err = mutex_lock_interruptible(&mlxsw_pci->cmd.lock);
2249 	if (err)
2250 		return err;
2251 
2252 	if (in_mbox) {
2253 		memcpy(mlxsw_pci->cmd.in_mbox.buf, in_mbox, in_mbox_size);
2254 		in_mapaddr = mlxsw_pci->cmd.in_mbox.mapaddr;
2255 	}
2256 	mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_HI, upper_32_bits(in_mapaddr));
2257 	mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_LO, lower_32_bits(in_mapaddr));
2258 
2259 	if (out_mbox)
2260 		out_mapaddr = mlxsw_pci->cmd.out_mbox.mapaddr;
2261 	mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_HI, upper_32_bits(out_mapaddr));
2262 	mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_LO, lower_32_bits(out_mapaddr));
2263 
2264 	mlxsw_pci_write32(mlxsw_pci, CIR_IN_MODIFIER, in_mod);
2265 	mlxsw_pci_write32(mlxsw_pci, CIR_TOKEN, 0);
2266 
2267 	wait_done = false;
2268 
2269 	wmb(); /* all needs to be written before we write control register */
2270 	mlxsw_pci_write32(mlxsw_pci, CIR_CTRL,
2271 			  MLXSW_PCI_CIR_CTRL_GO_BIT |
2272 			  (opcode_mod << MLXSW_PCI_CIR_CTRL_OPCODE_MOD_SHIFT) |
2273 			  opcode);
2274 
2275 	end = jiffies + timeout;
2276 	do {
2277 		u32 ctrl = mlxsw_pci_read32(mlxsw_pci, CIR_CTRL);
2278 
2279 		if (!(ctrl & MLXSW_PCI_CIR_CTRL_GO_BIT)) {
2280 			wait_done = true;
2281 			*p_status = ctrl >> MLXSW_PCI_CIR_CTRL_STATUS_SHIFT;
2282 			break;
2283 		}
2284 		cond_resched();
2285 	} while (time_before(jiffies, end));
2286 
2287 	err = 0;
2288 	if (wait_done) {
2289 		if (*p_status)
2290 			err = -EIO;
2291 	} else {
2292 		err = -ETIMEDOUT;
2293 	}
2294 
2295 	if (!err && out_mbox && out_mbox_direct) {
2296 		/* Some commands don't use output param as address to mailbox
2297 		 * but they store output directly into registers. In that case,
2298 		 * copy registers into mbox buffer.
2299 		 */
2300 		__be32 tmp;
2301 
2302 		tmp = cpu_to_be32(mlxsw_pci_read32(mlxsw_pci,
2303 						   CIR_OUT_PARAM_HI));
2304 		memcpy(out_mbox, &tmp, sizeof(tmp));
2305 		tmp = cpu_to_be32(mlxsw_pci_read32(mlxsw_pci,
2306 						   CIR_OUT_PARAM_LO));
2307 		memcpy(out_mbox + sizeof(tmp), &tmp, sizeof(tmp));
2308 	} else if (!err && out_mbox) {
2309 		memcpy(out_mbox, mlxsw_pci->cmd.out_mbox.buf, out_mbox_size);
2310 	}
2311 
2312 	mutex_unlock(&mlxsw_pci->cmd.lock);
2313 
2314 	return err;
2315 }
2316 
2317 static u32 mlxsw_pci_read_frc_h(void *bus_priv)
2318 {
2319 	struct mlxsw_pci *mlxsw_pci = bus_priv;
2320 	u64 frc_offset_h;
2321 
2322 	frc_offset_h = mlxsw_pci->free_running_clock_offset;
2323 	return mlxsw_pci_read32_off(mlxsw_pci, frc_offset_h);
2324 }
2325 
2326 static u32 mlxsw_pci_read_frc_l(void *bus_priv)
2327 {
2328 	struct mlxsw_pci *mlxsw_pci = bus_priv;
2329 	u64 frc_offset_l;
2330 
2331 	frc_offset_l = mlxsw_pci->free_running_clock_offset + 4;
2332 	return mlxsw_pci_read32_off(mlxsw_pci, frc_offset_l);
2333 }
2334 
2335 static u32 mlxsw_pci_read_utc_sec(void *bus_priv)
2336 {
2337 	struct mlxsw_pci *mlxsw_pci = bus_priv;
2338 
2339 	return mlxsw_pci_read32_off(mlxsw_pci, mlxsw_pci->utc_sec_offset);
2340 }
2341 
2342 static u32 mlxsw_pci_read_utc_nsec(void *bus_priv)
2343 {
2344 	struct mlxsw_pci *mlxsw_pci = bus_priv;
2345 
2346 	return mlxsw_pci_read32_off(mlxsw_pci, mlxsw_pci->utc_nsec_offset);
2347 }
2348 
2349 static enum mlxsw_cmd_mbox_config_profile_lag_mode
2350 mlxsw_pci_lag_mode(void *bus_priv)
2351 {
2352 	struct mlxsw_pci *mlxsw_pci = bus_priv;
2353 
2354 	return mlxsw_pci->lag_mode;
2355 }
2356 
2357 static enum mlxsw_cmd_mbox_config_profile_flood_mode
2358 mlxsw_pci_flood_mode(void *bus_priv)
2359 {
2360 	struct mlxsw_pci *mlxsw_pci = bus_priv;
2361 
2362 	return mlxsw_pci->flood_mode;
2363 }
2364 
2365 static const struct mlxsw_bus mlxsw_pci_bus = {
2366 	.kind			= "pci",
2367 	.init			= mlxsw_pci_init,
2368 	.fini			= mlxsw_pci_fini,
2369 	.skb_transmit_busy	= mlxsw_pci_skb_transmit_busy,
2370 	.skb_transmit		= mlxsw_pci_skb_transmit,
2371 	.cmd_exec		= mlxsw_pci_cmd_exec,
2372 	.read_frc_h		= mlxsw_pci_read_frc_h,
2373 	.read_frc_l		= mlxsw_pci_read_frc_l,
2374 	.read_utc_sec		= mlxsw_pci_read_utc_sec,
2375 	.read_utc_nsec		= mlxsw_pci_read_utc_nsec,
2376 	.lag_mode		= mlxsw_pci_lag_mode,
2377 	.flood_mode		= mlxsw_pci_flood_mode,
2378 	.features		= MLXSW_BUS_F_TXRX | MLXSW_BUS_F_RESET,
2379 };
2380 
2381 static int mlxsw_pci_cmd_init(struct mlxsw_pci *mlxsw_pci)
2382 {
2383 	int err;
2384 
2385 	mutex_init(&mlxsw_pci->cmd.lock);
2386 
2387 	err = mlxsw_pci_mbox_alloc(mlxsw_pci, &mlxsw_pci->cmd.in_mbox);
2388 	if (err)
2389 		goto err_in_mbox_alloc;
2390 
2391 	err = mlxsw_pci_mbox_alloc(mlxsw_pci, &mlxsw_pci->cmd.out_mbox);
2392 	if (err)
2393 		goto err_out_mbox_alloc;
2394 
2395 	return 0;
2396 
2397 err_out_mbox_alloc:
2398 	mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.in_mbox);
2399 err_in_mbox_alloc:
2400 	mutex_destroy(&mlxsw_pci->cmd.lock);
2401 	return err;
2402 }
2403 
2404 static void mlxsw_pci_cmd_fini(struct mlxsw_pci *mlxsw_pci)
2405 {
2406 	mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.out_mbox);
2407 	mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.in_mbox);
2408 	mutex_destroy(&mlxsw_pci->cmd.lock);
2409 }
2410 
2411 static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2412 {
2413 	const char *driver_name = dev_driver_string(&pdev->dev);
2414 	struct mlxsw_pci *mlxsw_pci;
2415 	int err;
2416 
2417 	mlxsw_pci = kzalloc(sizeof(*mlxsw_pci), GFP_KERNEL);
2418 	if (!mlxsw_pci)
2419 		return -ENOMEM;
2420 
2421 	err = pci_enable_device(pdev);
2422 	if (err) {
2423 		dev_err(&pdev->dev, "pci_enable_device failed\n");
2424 		goto err_pci_enable_device;
2425 	}
2426 
2427 	err = pci_request_regions(pdev, driver_name);
2428 	if (err) {
2429 		dev_err(&pdev->dev, "pci_request_regions failed\n");
2430 		goto err_pci_request_regions;
2431 	}
2432 
2433 	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2434 	if (err) {
2435 		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
2436 		if (err) {
2437 			dev_err(&pdev->dev, "dma_set_mask failed\n");
2438 			goto err_pci_set_dma_mask;
2439 		}
2440 	}
2441 
2442 	if (pci_resource_len(pdev, 0) < MLXSW_PCI_BAR0_SIZE) {
2443 		dev_err(&pdev->dev, "invalid PCI region size\n");
2444 		err = -EINVAL;
2445 		goto err_pci_resource_len_check;
2446 	}
2447 
2448 	mlxsw_pci->hw_addr = ioremap(pci_resource_start(pdev, 0),
2449 				     pci_resource_len(pdev, 0));
2450 	if (!mlxsw_pci->hw_addr) {
2451 		dev_err(&pdev->dev, "ioremap failed\n");
2452 		err = -EIO;
2453 		goto err_ioremap;
2454 	}
2455 	pci_set_master(pdev);
2456 
2457 	mlxsw_pci->pdev = pdev;
2458 	pci_set_drvdata(pdev, mlxsw_pci);
2459 
2460 	err = mlxsw_pci_cmd_init(mlxsw_pci);
2461 	if (err)
2462 		goto err_pci_cmd_init;
2463 
2464 	mlxsw_pci->bus_info.device_kind = driver_name;
2465 	mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev);
2466 	mlxsw_pci->bus_info.dev = &pdev->dev;
2467 	mlxsw_pci->bus_info.read_clock_capable = true;
2468 	mlxsw_pci->id = id;
2469 
2470 	err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info,
2471 					     &mlxsw_pci_bus, mlxsw_pci, false,
2472 					     NULL, NULL);
2473 	if (err) {
2474 		dev_err(&pdev->dev, "cannot register bus device\n");
2475 		goto err_bus_device_register;
2476 	}
2477 
2478 	return 0;
2479 
2480 err_bus_device_register:
2481 	mlxsw_pci_cmd_fini(mlxsw_pci);
2482 err_pci_cmd_init:
2483 	iounmap(mlxsw_pci->hw_addr);
2484 err_ioremap:
2485 err_pci_resource_len_check:
2486 err_pci_set_dma_mask:
2487 	pci_release_regions(pdev);
2488 err_pci_request_regions:
2489 	pci_disable_device(pdev);
2490 err_pci_enable_device:
2491 	kfree(mlxsw_pci);
2492 	return err;
2493 }
2494 
2495 static void mlxsw_pci_remove(struct pci_dev *pdev)
2496 {
2497 	struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev);
2498 
2499 	mlxsw_core_bus_device_unregister(mlxsw_pci->core, false);
2500 	mlxsw_pci_cmd_fini(mlxsw_pci);
2501 	iounmap(mlxsw_pci->hw_addr);
2502 	pci_release_regions(mlxsw_pci->pdev);
2503 	pci_disable_device(mlxsw_pci->pdev);
2504 	kfree(mlxsw_pci);
2505 }
2506 
2507 static void mlxsw_pci_reset_prepare(struct pci_dev *pdev)
2508 {
2509 	struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev);
2510 
2511 	mlxsw_core_bus_device_unregister(mlxsw_pci->core, false);
2512 }
2513 
2514 static void mlxsw_pci_reset_done(struct pci_dev *pdev)
2515 {
2516 	struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev);
2517 
2518 	mlxsw_pci->skip_reset = true;
2519 	mlxsw_core_bus_device_register(&mlxsw_pci->bus_info, &mlxsw_pci_bus,
2520 				       mlxsw_pci, false, NULL, NULL);
2521 	mlxsw_pci->skip_reset = false;
2522 }
2523 
2524 static const struct pci_error_handlers mlxsw_pci_err_handler = {
2525 	.reset_prepare = mlxsw_pci_reset_prepare,
2526 	.reset_done = mlxsw_pci_reset_done,
2527 };
2528 
2529 int mlxsw_pci_driver_register(struct pci_driver *pci_driver)
2530 {
2531 	pci_driver->probe = mlxsw_pci_probe;
2532 	pci_driver->remove = mlxsw_pci_remove;
2533 	pci_driver->shutdown = mlxsw_pci_remove;
2534 	pci_driver->err_handler = &mlxsw_pci_err_handler;
2535 	return pci_register_driver(pci_driver);
2536 }
2537 EXPORT_SYMBOL(mlxsw_pci_driver_register);
2538 
2539 void mlxsw_pci_driver_unregister(struct pci_driver *pci_driver)
2540 {
2541 	pci_unregister_driver(pci_driver);
2542 }
2543 EXPORT_SYMBOL(mlxsw_pci_driver_unregister);
2544 
2545 static int __init mlxsw_pci_module_init(void)
2546 {
2547 	return 0;
2548 }
2549 
2550 static void __exit mlxsw_pci_module_exit(void)
2551 {
2552 }
2553 
2554 module_init(mlxsw_pci_module_init);
2555 module_exit(mlxsw_pci_module_exit);
2556 
2557 MODULE_LICENSE("Dual BSD/GPL");
2558 MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
2559 MODULE_DESCRIPTION("Mellanox switch PCI interface driver");
2560