xref: /linux/drivers/net/ethernet/microsoft/mana/mana_en.c (revision af2d6148d2a159e1a0862bce5a2c88c1618a2b27)
1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 /* Copyright (c) 2021, Microsoft Corporation. */
3 
4 #include <uapi/linux/bpf.h>
5 
6 #include <linux/debugfs.h>
7 #include <linux/inetdevice.h>
8 #include <linux/etherdevice.h>
9 #include <linux/ethtool.h>
10 #include <linux/filter.h>
11 #include <linux/mm.h>
12 #include <linux/pci.h>
13 
14 #include <net/checksum.h>
15 #include <net/ip6_checksum.h>
16 #include <net/netdev_lock.h>
17 #include <net/page_pool/helpers.h>
18 #include <net/xdp.h>
19 
20 #include <net/mana/mana.h>
21 #include <net/mana/mana_auxiliary.h>
22 
23 static DEFINE_IDA(mana_adev_ida);
24 
25 static int mana_adev_idx_alloc(void)
26 {
27 	return ida_alloc(&mana_adev_ida, GFP_KERNEL);
28 }
29 
30 static void mana_adev_idx_free(int idx)
31 {
32 	ida_free(&mana_adev_ida, idx);
33 }
34 
35 static ssize_t mana_dbg_q_read(struct file *filp, char __user *buf, size_t count,
36 			       loff_t *pos)
37 {
38 	struct gdma_queue *gdma_q = filp->private_data;
39 
40 	return simple_read_from_buffer(buf, count, pos, gdma_q->queue_mem_ptr,
41 				       gdma_q->queue_size);
42 }
43 
44 static const struct file_operations mana_dbg_q_fops = {
45 	.owner  = THIS_MODULE,
46 	.open   = simple_open,
47 	.read   = mana_dbg_q_read,
48 };
49 
50 static bool mana_en_need_log(struct mana_port_context *apc, int err)
51 {
52 	if (apc && apc->ac && apc->ac->gdma_dev &&
53 	    apc->ac->gdma_dev->gdma_context)
54 		return mana_need_log(apc->ac->gdma_dev->gdma_context, err);
55 	else
56 		return true;
57 }
58 
59 /* Microsoft Azure Network Adapter (MANA) functions */
60 
61 static int mana_open(struct net_device *ndev)
62 {
63 	struct mana_port_context *apc = netdev_priv(ndev);
64 	int err;
65 	err = mana_alloc_queues(ndev);
66 
67 	if (err) {
68 		netdev_err(ndev, "%s failed to allocate queues: %d\n", __func__, err);
69 		return err;
70 	}
71 
72 	apc->port_is_up = true;
73 
74 	/* Ensure port state updated before txq state */
75 	smp_wmb();
76 
77 	netif_carrier_on(ndev);
78 	netif_tx_wake_all_queues(ndev);
79 	netdev_dbg(ndev, "%s successful\n", __func__);
80 	return 0;
81 }
82 
83 static int mana_close(struct net_device *ndev)
84 {
85 	struct mana_port_context *apc = netdev_priv(ndev);
86 
87 	if (!apc->port_is_up)
88 		return 0;
89 
90 	return mana_detach(ndev, true);
91 }
92 
93 static bool mana_can_tx(struct gdma_queue *wq)
94 {
95 	return mana_gd_wq_avail_space(wq) >= MAX_TX_WQE_SIZE;
96 }
97 
98 static unsigned int mana_checksum_info(struct sk_buff *skb)
99 {
100 	if (skb->protocol == htons(ETH_P_IP)) {
101 		struct iphdr *ip = ip_hdr(skb);
102 
103 		if (ip->protocol == IPPROTO_TCP)
104 			return IPPROTO_TCP;
105 
106 		if (ip->protocol == IPPROTO_UDP)
107 			return IPPROTO_UDP;
108 	} else if (skb->protocol == htons(ETH_P_IPV6)) {
109 		struct ipv6hdr *ip6 = ipv6_hdr(skb);
110 
111 		if (ip6->nexthdr == IPPROTO_TCP)
112 			return IPPROTO_TCP;
113 
114 		if (ip6->nexthdr == IPPROTO_UDP)
115 			return IPPROTO_UDP;
116 	}
117 
118 	/* No csum offloading */
119 	return 0;
120 }
121 
122 static void mana_add_sge(struct mana_tx_package *tp, struct mana_skb_head *ash,
123 			 int sg_i, dma_addr_t da, int sge_len, u32 gpa_mkey)
124 {
125 	ash->dma_handle[sg_i] = da;
126 	ash->size[sg_i] = sge_len;
127 
128 	tp->wqe_req.sgl[sg_i].address = da;
129 	tp->wqe_req.sgl[sg_i].mem_key = gpa_mkey;
130 	tp->wqe_req.sgl[sg_i].size = sge_len;
131 }
132 
133 static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc,
134 			struct mana_tx_package *tp, int gso_hs)
135 {
136 	struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
137 	int hsg = 1; /* num of SGEs of linear part */
138 	struct gdma_dev *gd = apc->ac->gdma_dev;
139 	int skb_hlen = skb_headlen(skb);
140 	int sge0_len, sge1_len = 0;
141 	struct gdma_context *gc;
142 	struct device *dev;
143 	skb_frag_t *frag;
144 	dma_addr_t da;
145 	int sg_i;
146 	int i;
147 
148 	gc = gd->gdma_context;
149 	dev = gc->dev;
150 
151 	if (gso_hs && gso_hs < skb_hlen) {
152 		sge0_len = gso_hs;
153 		sge1_len = skb_hlen - gso_hs;
154 	} else {
155 		sge0_len = skb_hlen;
156 	}
157 
158 	da = dma_map_single(dev, skb->data, sge0_len, DMA_TO_DEVICE);
159 	if (dma_mapping_error(dev, da))
160 		return -ENOMEM;
161 
162 	mana_add_sge(tp, ash, 0, da, sge0_len, gd->gpa_mkey);
163 
164 	if (sge1_len) {
165 		sg_i = 1;
166 		da = dma_map_single(dev, skb->data + sge0_len, sge1_len,
167 				    DMA_TO_DEVICE);
168 		if (dma_mapping_error(dev, da))
169 			goto frag_err;
170 
171 		mana_add_sge(tp, ash, sg_i, da, sge1_len, gd->gpa_mkey);
172 		hsg = 2;
173 	}
174 
175 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
176 		sg_i = hsg + i;
177 
178 		frag = &skb_shinfo(skb)->frags[i];
179 		da = skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag),
180 				      DMA_TO_DEVICE);
181 		if (dma_mapping_error(dev, da))
182 			goto frag_err;
183 
184 		mana_add_sge(tp, ash, sg_i, da, skb_frag_size(frag),
185 			     gd->gpa_mkey);
186 	}
187 
188 	return 0;
189 
190 frag_err:
191 	if (net_ratelimit())
192 		netdev_err(apc->ndev, "Failed to map skb of size %u to DMA\n",
193 			   skb->len);
194 	for (i = sg_i - 1; i >= hsg; i--)
195 		dma_unmap_page(dev, ash->dma_handle[i], ash->size[i],
196 			       DMA_TO_DEVICE);
197 
198 	for (i = hsg - 1; i >= 0; i--)
199 		dma_unmap_single(dev, ash->dma_handle[i], ash->size[i],
200 				 DMA_TO_DEVICE);
201 
202 	return -ENOMEM;
203 }
204 
205 /* Handle the case when GSO SKB linear length is too large.
206  * MANA NIC requires GSO packets to put only the packet header to SGE0.
207  * So, we need 2 SGEs for the skb linear part which contains more than the
208  * header.
209  * Return a positive value for the number of SGEs, or a negative value
210  * for an error.
211  */
212 static int mana_fix_skb_head(struct net_device *ndev, struct sk_buff *skb,
213 			     int gso_hs)
214 {
215 	int num_sge = 1 + skb_shinfo(skb)->nr_frags;
216 	int skb_hlen = skb_headlen(skb);
217 
218 	if (gso_hs < skb_hlen) {
219 		num_sge++;
220 	} else if (gso_hs > skb_hlen) {
221 		if (net_ratelimit())
222 			netdev_err(ndev,
223 				   "TX nonlinear head: hs:%d, skb_hlen:%d\n",
224 				   gso_hs, skb_hlen);
225 
226 		return -EINVAL;
227 	}
228 
229 	return num_sge;
230 }
231 
232 /* Get the GSO packet's header size */
233 static int mana_get_gso_hs(struct sk_buff *skb)
234 {
235 	int gso_hs;
236 
237 	if (skb->encapsulation) {
238 		gso_hs = skb_inner_tcp_all_headers(skb);
239 	} else {
240 		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
241 			gso_hs = skb_transport_offset(skb) +
242 				 sizeof(struct udphdr);
243 		} else {
244 			gso_hs = skb_tcp_all_headers(skb);
245 		}
246 	}
247 
248 	return gso_hs;
249 }
250 
251 netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
252 {
253 	enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT;
254 	struct mana_port_context *apc = netdev_priv(ndev);
255 	int gso_hs = 0; /* zero for non-GSO pkts */
256 	u16 txq_idx = skb_get_queue_mapping(skb);
257 	struct gdma_dev *gd = apc->ac->gdma_dev;
258 	bool ipv4 = false, ipv6 = false;
259 	struct mana_tx_package pkg = {};
260 	struct netdev_queue *net_txq;
261 	struct mana_stats_tx *tx_stats;
262 	struct gdma_queue *gdma_sq;
263 	int err, len, num_gso_seg;
264 	unsigned int csum_type;
265 	struct mana_txq *txq;
266 	struct mana_cq *cq;
267 
268 	if (unlikely(!apc->port_is_up))
269 		goto tx_drop;
270 
271 	if (skb_cow_head(skb, MANA_HEADROOM))
272 		goto tx_drop_count;
273 
274 	if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
275 		goto tx_drop_count;
276 
277 	txq = &apc->tx_qp[txq_idx].txq;
278 	gdma_sq = txq->gdma_sq;
279 	cq = &apc->tx_qp[txq_idx].tx_cq;
280 	tx_stats = &txq->stats;
281 
282 	pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
283 	pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
284 
285 	if (txq->vp_offset > MANA_SHORT_VPORT_OFFSET_MAX) {
286 		pkg.tx_oob.l_oob.long_vp_offset = txq->vp_offset;
287 		pkt_fmt = MANA_LONG_PKT_FMT;
288 	} else {
289 		pkg.tx_oob.s_oob.short_vp_offset = txq->vp_offset;
290 	}
291 
292 	if (skb_vlan_tag_present(skb)) {
293 		pkt_fmt = MANA_LONG_PKT_FMT;
294 		pkg.tx_oob.l_oob.inject_vlan_pri_tag = 1;
295 		pkg.tx_oob.l_oob.pcp = skb_vlan_tag_get_prio(skb);
296 		pkg.tx_oob.l_oob.dei = skb_vlan_tag_get_cfi(skb);
297 		pkg.tx_oob.l_oob.vlan_id = skb_vlan_tag_get_id(skb);
298 	}
299 
300 	pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt;
301 
302 	if (pkt_fmt == MANA_SHORT_PKT_FMT) {
303 		pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_short_oob);
304 		u64_stats_update_begin(&tx_stats->syncp);
305 		tx_stats->short_pkt_fmt++;
306 		u64_stats_update_end(&tx_stats->syncp);
307 	} else {
308 		pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_oob);
309 		u64_stats_update_begin(&tx_stats->syncp);
310 		tx_stats->long_pkt_fmt++;
311 		u64_stats_update_end(&tx_stats->syncp);
312 	}
313 
314 	pkg.wqe_req.inline_oob_data = &pkg.tx_oob;
315 	pkg.wqe_req.flags = 0;
316 	pkg.wqe_req.client_data_unit = 0;
317 
318 	pkg.wqe_req.num_sge = 1 + skb_shinfo(skb)->nr_frags;
319 
320 	if (skb->protocol == htons(ETH_P_IP))
321 		ipv4 = true;
322 	else if (skb->protocol == htons(ETH_P_IPV6))
323 		ipv6 = true;
324 
325 	if (skb_is_gso(skb)) {
326 		int num_sge;
327 
328 		gso_hs = mana_get_gso_hs(skb);
329 
330 		num_sge = mana_fix_skb_head(ndev, skb, gso_hs);
331 		if (num_sge > 0)
332 			pkg.wqe_req.num_sge = num_sge;
333 		else
334 			goto tx_drop_count;
335 
336 		u64_stats_update_begin(&tx_stats->syncp);
337 		if (skb->encapsulation) {
338 			tx_stats->tso_inner_packets++;
339 			tx_stats->tso_inner_bytes += skb->len - gso_hs;
340 		} else {
341 			tx_stats->tso_packets++;
342 			tx_stats->tso_bytes += skb->len - gso_hs;
343 		}
344 		u64_stats_update_end(&tx_stats->syncp);
345 
346 		pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4;
347 		pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6;
348 
349 		pkg.tx_oob.s_oob.comp_iphdr_csum = 1;
350 		pkg.tx_oob.s_oob.comp_tcp_csum = 1;
351 		pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb);
352 
353 		pkg.wqe_req.client_data_unit = skb_shinfo(skb)->gso_size;
354 		pkg.wqe_req.flags = GDMA_WR_OOB_IN_SGL | GDMA_WR_PAD_BY_SGE0;
355 		if (ipv4) {
356 			ip_hdr(skb)->tot_len = 0;
357 			ip_hdr(skb)->check = 0;
358 			tcp_hdr(skb)->check =
359 				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
360 						   ip_hdr(skb)->daddr, 0,
361 						   IPPROTO_TCP, 0);
362 		} else {
363 			ipv6_hdr(skb)->payload_len = 0;
364 			tcp_hdr(skb)->check =
365 				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
366 						 &ipv6_hdr(skb)->daddr, 0,
367 						 IPPROTO_TCP, 0);
368 		}
369 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
370 		csum_type = mana_checksum_info(skb);
371 
372 		u64_stats_update_begin(&tx_stats->syncp);
373 		tx_stats->csum_partial++;
374 		u64_stats_update_end(&tx_stats->syncp);
375 
376 		if (csum_type == IPPROTO_TCP) {
377 			pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4;
378 			pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6;
379 
380 			pkg.tx_oob.s_oob.comp_tcp_csum = 1;
381 			pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb);
382 
383 		} else if (csum_type == IPPROTO_UDP) {
384 			pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4;
385 			pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6;
386 
387 			pkg.tx_oob.s_oob.comp_udp_csum = 1;
388 		} else {
389 			/* Can't do offload of this type of checksum */
390 			if (skb_checksum_help(skb))
391 				goto tx_drop_count;
392 		}
393 	}
394 
395 	WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
396 
397 	if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
398 		pkg.wqe_req.sgl = pkg.sgl_array;
399 	} else {
400 		pkg.sgl_ptr = kmalloc_array(pkg.wqe_req.num_sge,
401 					    sizeof(struct gdma_sge),
402 					    GFP_ATOMIC);
403 		if (!pkg.sgl_ptr)
404 			goto tx_drop_count;
405 
406 		pkg.wqe_req.sgl = pkg.sgl_ptr;
407 	}
408 
409 	if (mana_map_skb(skb, apc, &pkg, gso_hs)) {
410 		u64_stats_update_begin(&tx_stats->syncp);
411 		tx_stats->mana_map_err++;
412 		u64_stats_update_end(&tx_stats->syncp);
413 		goto free_sgl_ptr;
414 	}
415 
416 	skb_queue_tail(&txq->pending_skbs, skb);
417 
418 	len = skb->len;
419 	num_gso_seg = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
420 	net_txq = netdev_get_tx_queue(ndev, txq_idx);
421 
422 	err = mana_gd_post_work_request(gdma_sq, &pkg.wqe_req,
423 					(struct gdma_posted_wqe_info *)skb->cb);
424 	if (!mana_can_tx(gdma_sq)) {
425 		netif_tx_stop_queue(net_txq);
426 		apc->eth_stats.stop_queue++;
427 	}
428 
429 	if (err) {
430 		(void)skb_dequeue_tail(&txq->pending_skbs);
431 		netdev_warn(ndev, "Failed to post TX OOB: %d\n", err);
432 		err = NETDEV_TX_BUSY;
433 		goto tx_busy;
434 	}
435 
436 	err = NETDEV_TX_OK;
437 	atomic_inc(&txq->pending_sends);
438 
439 	mana_gd_wq_ring_doorbell(gd->gdma_context, gdma_sq);
440 
441 	/* skb may be freed after mana_gd_post_work_request. Do not use it. */
442 	skb = NULL;
443 
444 	/* Populated the packet and bytes counters based on post GSO packet
445 	 * calculations
446 	 */
447 	tx_stats = &txq->stats;
448 	u64_stats_update_begin(&tx_stats->syncp);
449 	tx_stats->packets += num_gso_seg;
450 	tx_stats->bytes += len + ((num_gso_seg - 1) * gso_hs);
451 	u64_stats_update_end(&tx_stats->syncp);
452 
453 tx_busy:
454 	if (netif_tx_queue_stopped(net_txq) && mana_can_tx(gdma_sq)) {
455 		netif_tx_wake_queue(net_txq);
456 		apc->eth_stats.wake_queue++;
457 	}
458 
459 	kfree(pkg.sgl_ptr);
460 	return err;
461 
462 free_sgl_ptr:
463 	kfree(pkg.sgl_ptr);
464 tx_drop_count:
465 	ndev->stats.tx_dropped++;
466 tx_drop:
467 	dev_kfree_skb_any(skb);
468 	return NETDEV_TX_OK;
469 }
470 
471 static void mana_get_stats64(struct net_device *ndev,
472 			     struct rtnl_link_stats64 *st)
473 {
474 	struct mana_port_context *apc = netdev_priv(ndev);
475 	unsigned int num_queues = apc->num_queues;
476 	struct mana_stats_rx *rx_stats;
477 	struct mana_stats_tx *tx_stats;
478 	unsigned int start;
479 	u64 packets, bytes;
480 	int q;
481 
482 	if (!apc->port_is_up)
483 		return;
484 
485 	netdev_stats_to_stats64(st, &ndev->stats);
486 
487 	for (q = 0; q < num_queues; q++) {
488 		rx_stats = &apc->rxqs[q]->stats;
489 
490 		do {
491 			start = u64_stats_fetch_begin(&rx_stats->syncp);
492 			packets = rx_stats->packets;
493 			bytes = rx_stats->bytes;
494 		} while (u64_stats_fetch_retry(&rx_stats->syncp, start));
495 
496 		st->rx_packets += packets;
497 		st->rx_bytes += bytes;
498 	}
499 
500 	for (q = 0; q < num_queues; q++) {
501 		tx_stats = &apc->tx_qp[q].txq.stats;
502 
503 		do {
504 			start = u64_stats_fetch_begin(&tx_stats->syncp);
505 			packets = tx_stats->packets;
506 			bytes = tx_stats->bytes;
507 		} while (u64_stats_fetch_retry(&tx_stats->syncp, start));
508 
509 		st->tx_packets += packets;
510 		st->tx_bytes += bytes;
511 	}
512 }
513 
514 static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb,
515 			     int old_q)
516 {
517 	struct mana_port_context *apc = netdev_priv(ndev);
518 	u32 hash = skb_get_hash(skb);
519 	struct sock *sk = skb->sk;
520 	int txq;
521 
522 	txq = apc->indir_table[hash & (apc->indir_table_sz - 1)];
523 
524 	if (txq != old_q && sk && sk_fullsock(sk) &&
525 	    rcu_access_pointer(sk->sk_dst_cache))
526 		sk_tx_queue_set(sk, txq);
527 
528 	return txq;
529 }
530 
531 static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb,
532 			     struct net_device *sb_dev)
533 {
534 	int txq;
535 
536 	if (ndev->real_num_tx_queues == 1)
537 		return 0;
538 
539 	txq = sk_tx_queue_get(skb->sk);
540 
541 	if (txq < 0 || skb->ooo_okay || txq >= ndev->real_num_tx_queues) {
542 		if (skb_rx_queue_recorded(skb))
543 			txq = skb_get_rx_queue(skb);
544 		else
545 			txq = mana_get_tx_queue(ndev, skb, txq);
546 	}
547 
548 	return txq;
549 }
550 
551 /* Release pre-allocated RX buffers */
552 void mana_pre_dealloc_rxbufs(struct mana_port_context *mpc)
553 {
554 	struct device *dev;
555 	int i;
556 
557 	dev = mpc->ac->gdma_dev->gdma_context->dev;
558 
559 	if (!mpc->rxbufs_pre)
560 		goto out1;
561 
562 	if (!mpc->das_pre)
563 		goto out2;
564 
565 	while (mpc->rxbpre_total) {
566 		i = --mpc->rxbpre_total;
567 		dma_unmap_single(dev, mpc->das_pre[i], mpc->rxbpre_datasize,
568 				 DMA_FROM_DEVICE);
569 		put_page(virt_to_head_page(mpc->rxbufs_pre[i]));
570 	}
571 
572 	kfree(mpc->das_pre);
573 	mpc->das_pre = NULL;
574 
575 out2:
576 	kfree(mpc->rxbufs_pre);
577 	mpc->rxbufs_pre = NULL;
578 
579 out1:
580 	mpc->rxbpre_datasize = 0;
581 	mpc->rxbpre_alloc_size = 0;
582 	mpc->rxbpre_headroom = 0;
583 }
584 
585 /* Get a buffer from the pre-allocated RX buffers */
586 static void *mana_get_rxbuf_pre(struct mana_rxq *rxq, dma_addr_t *da)
587 {
588 	struct net_device *ndev = rxq->ndev;
589 	struct mana_port_context *mpc;
590 	void *va;
591 
592 	mpc = netdev_priv(ndev);
593 
594 	if (!mpc->rxbufs_pre || !mpc->das_pre || !mpc->rxbpre_total) {
595 		netdev_err(ndev, "No RX pre-allocated bufs\n");
596 		return NULL;
597 	}
598 
599 	/* Check sizes to catch unexpected coding error */
600 	if (mpc->rxbpre_datasize != rxq->datasize) {
601 		netdev_err(ndev, "rxbpre_datasize mismatch: %u: %u\n",
602 			   mpc->rxbpre_datasize, rxq->datasize);
603 		return NULL;
604 	}
605 
606 	if (mpc->rxbpre_alloc_size != rxq->alloc_size) {
607 		netdev_err(ndev, "rxbpre_alloc_size mismatch: %u: %u\n",
608 			   mpc->rxbpre_alloc_size, rxq->alloc_size);
609 		return NULL;
610 	}
611 
612 	if (mpc->rxbpre_headroom != rxq->headroom) {
613 		netdev_err(ndev, "rxbpre_headroom mismatch: %u: %u\n",
614 			   mpc->rxbpre_headroom, rxq->headroom);
615 		return NULL;
616 	}
617 
618 	mpc->rxbpre_total--;
619 
620 	*da = mpc->das_pre[mpc->rxbpre_total];
621 	va = mpc->rxbufs_pre[mpc->rxbpre_total];
622 	mpc->rxbufs_pre[mpc->rxbpre_total] = NULL;
623 
624 	/* Deallocate the array after all buffers are gone */
625 	if (!mpc->rxbpre_total)
626 		mana_pre_dealloc_rxbufs(mpc);
627 
628 	return va;
629 }
630 
631 /* Get RX buffer's data size, alloc size, XDP headroom based on MTU */
632 static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size,
633 			       u32 *headroom)
634 {
635 	if (mtu > MANA_XDP_MTU_MAX)
636 		*headroom = 0; /* no support for XDP */
637 	else
638 		*headroom = XDP_PACKET_HEADROOM;
639 
640 	*alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom);
641 
642 	/* Using page pool in this case, so alloc_size is PAGE_SIZE */
643 	if (*alloc_size < PAGE_SIZE)
644 		*alloc_size = PAGE_SIZE;
645 
646 	*datasize = mtu + ETH_HLEN;
647 }
648 
649 int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_queues)
650 {
651 	struct device *dev;
652 	struct page *page;
653 	dma_addr_t da;
654 	int num_rxb;
655 	void *va;
656 	int i;
657 
658 	mana_get_rxbuf_cfg(new_mtu, &mpc->rxbpre_datasize,
659 			   &mpc->rxbpre_alloc_size, &mpc->rxbpre_headroom);
660 
661 	dev = mpc->ac->gdma_dev->gdma_context->dev;
662 
663 	num_rxb = num_queues * mpc->rx_queue_size;
664 
665 	WARN(mpc->rxbufs_pre, "mana rxbufs_pre exists\n");
666 	mpc->rxbufs_pre = kmalloc_array(num_rxb, sizeof(void *), GFP_KERNEL);
667 	if (!mpc->rxbufs_pre)
668 		goto error;
669 
670 	mpc->das_pre = kmalloc_array(num_rxb, sizeof(dma_addr_t), GFP_KERNEL);
671 	if (!mpc->das_pre)
672 		goto error;
673 
674 	mpc->rxbpre_total = 0;
675 
676 	for (i = 0; i < num_rxb; i++) {
677 		page = dev_alloc_pages(get_order(mpc->rxbpre_alloc_size));
678 		if (!page)
679 			goto error;
680 
681 		va = page_to_virt(page);
682 
683 		da = dma_map_single(dev, va + mpc->rxbpre_headroom,
684 				    mpc->rxbpre_datasize, DMA_FROM_DEVICE);
685 		if (dma_mapping_error(dev, da)) {
686 			put_page(page);
687 			goto error;
688 		}
689 
690 		mpc->rxbufs_pre[i] = va;
691 		mpc->das_pre[i] = da;
692 		mpc->rxbpre_total = i + 1;
693 	}
694 
695 	return 0;
696 
697 error:
698 	netdev_err(mpc->ndev, "Failed to pre-allocate RX buffers for %d queues\n", num_queues);
699 	mana_pre_dealloc_rxbufs(mpc);
700 	return -ENOMEM;
701 }
702 
703 static int mana_change_mtu(struct net_device *ndev, int new_mtu)
704 {
705 	struct mana_port_context *mpc = netdev_priv(ndev);
706 	unsigned int old_mtu = ndev->mtu;
707 	int err;
708 
709 	/* Pre-allocate buffers to prevent failure in mana_attach later */
710 	err = mana_pre_alloc_rxbufs(mpc, new_mtu, mpc->num_queues);
711 	if (err) {
712 		netdev_err(ndev, "Insufficient memory for new MTU\n");
713 		return err;
714 	}
715 
716 	err = mana_detach(ndev, false);
717 	if (err) {
718 		netdev_err(ndev, "mana_detach failed: %d\n", err);
719 		goto out;
720 	}
721 
722 	WRITE_ONCE(ndev->mtu, new_mtu);
723 
724 	err = mana_attach(ndev);
725 	if (err) {
726 		netdev_err(ndev, "mana_attach failed: %d\n", err);
727 		WRITE_ONCE(ndev->mtu, old_mtu);
728 	}
729 
730 out:
731 	mana_pre_dealloc_rxbufs(mpc);
732 	return err;
733 }
734 
735 static int mana_shaper_set(struct net_shaper_binding *binding,
736 			   const struct net_shaper *shaper,
737 			   struct netlink_ext_ack *extack)
738 {
739 	struct mana_port_context *apc = netdev_priv(binding->netdev);
740 	u32 old_speed, rate;
741 	int err;
742 
743 	if (shaper->handle.scope != NET_SHAPER_SCOPE_NETDEV) {
744 		NL_SET_ERR_MSG_MOD(extack, "net shaper scope should be netdev");
745 		return -EINVAL;
746 	}
747 
748 	if (apc->handle.id && shaper->handle.id != apc->handle.id) {
749 		NL_SET_ERR_MSG_MOD(extack, "Cannot create multiple shapers");
750 		return -EOPNOTSUPP;
751 	}
752 
753 	if (!shaper->bw_max || (shaper->bw_max % 100000000)) {
754 		NL_SET_ERR_MSG_MOD(extack, "Please use multiples of 100Mbps for bandwidth");
755 		return -EINVAL;
756 	}
757 
758 	rate = div_u64(shaper->bw_max, 1000); /* Convert bps to Kbps */
759 	rate = div_u64(rate, 1000);	      /* Convert Kbps to Mbps */
760 
761 	/* Get current speed */
762 	err = mana_query_link_cfg(apc);
763 	old_speed = (err) ? SPEED_UNKNOWN : apc->speed;
764 
765 	if (!err) {
766 		err = mana_set_bw_clamp(apc, rate, TRI_STATE_TRUE);
767 		apc->speed = (err) ? old_speed : rate;
768 		apc->handle = (err) ? apc->handle : shaper->handle;
769 	}
770 
771 	return err;
772 }
773 
774 static int mana_shaper_del(struct net_shaper_binding *binding,
775 			   const struct net_shaper_handle *handle,
776 			   struct netlink_ext_ack *extack)
777 {
778 	struct mana_port_context *apc = netdev_priv(binding->netdev);
779 	int err;
780 
781 	err = mana_set_bw_clamp(apc, 0, TRI_STATE_FALSE);
782 
783 	if (!err) {
784 		/* Reset mana port context parameters */
785 		apc->handle.id = 0;
786 		apc->handle.scope = NET_SHAPER_SCOPE_UNSPEC;
787 		apc->speed = 0;
788 	}
789 
790 	return err;
791 }
792 
793 static void mana_shaper_cap(struct net_shaper_binding *binding,
794 			    enum net_shaper_scope scope,
795 			    unsigned long *flags)
796 {
797 	*flags = BIT(NET_SHAPER_A_CAPS_SUPPORT_BW_MAX) |
798 		 BIT(NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS);
799 }
800 
801 static const struct net_shaper_ops mana_shaper_ops = {
802 	.set = mana_shaper_set,
803 	.delete = mana_shaper_del,
804 	.capabilities = mana_shaper_cap,
805 };
806 
807 static const struct net_device_ops mana_devops = {
808 	.ndo_open		= mana_open,
809 	.ndo_stop		= mana_close,
810 	.ndo_select_queue	= mana_select_queue,
811 	.ndo_start_xmit		= mana_start_xmit,
812 	.ndo_validate_addr	= eth_validate_addr,
813 	.ndo_get_stats64	= mana_get_stats64,
814 	.ndo_bpf		= mana_bpf,
815 	.ndo_xdp_xmit		= mana_xdp_xmit,
816 	.ndo_change_mtu		= mana_change_mtu,
817 	.net_shaper_ops         = &mana_shaper_ops,
818 };
819 
820 static void mana_cleanup_port_context(struct mana_port_context *apc)
821 {
822 	/*
823 	 * make sure subsequent cleanup attempts don't end up removing already
824 	 * cleaned dentry pointer
825 	 */
826 	debugfs_remove(apc->mana_port_debugfs);
827 	apc->mana_port_debugfs = NULL;
828 	kfree(apc->rxqs);
829 	apc->rxqs = NULL;
830 }
831 
832 static void mana_cleanup_indir_table(struct mana_port_context *apc)
833 {
834 	apc->indir_table_sz = 0;
835 	kfree(apc->indir_table);
836 	kfree(apc->rxobj_table);
837 }
838 
839 static int mana_init_port_context(struct mana_port_context *apc)
840 {
841 	apc->rxqs = kcalloc(apc->num_queues, sizeof(struct mana_rxq *),
842 			    GFP_KERNEL);
843 
844 	return !apc->rxqs ? -ENOMEM : 0;
845 }
846 
847 static int mana_send_request(struct mana_context *ac, void *in_buf,
848 			     u32 in_len, void *out_buf, u32 out_len)
849 {
850 	struct gdma_context *gc = ac->gdma_dev->gdma_context;
851 	struct gdma_resp_hdr *resp = out_buf;
852 	struct gdma_req_hdr *req = in_buf;
853 	struct device *dev = gc->dev;
854 	static atomic_t activity_id;
855 	int err;
856 
857 	req->dev_id = gc->mana.dev_id;
858 	req->activity_id = atomic_inc_return(&activity_id);
859 
860 	err = mana_gd_send_request(gc, in_len, in_buf, out_len,
861 				   out_buf);
862 	if (err || resp->status) {
863 		if (err == -EOPNOTSUPP)
864 			return err;
865 
866 		if (req->req.msg_type != MANA_QUERY_PHY_STAT &&
867 		    mana_need_log(gc, err))
868 			dev_err(dev, "Failed to send mana message: %d, 0x%x\n",
869 				err, resp->status);
870 		return err ? err : -EPROTO;
871 	}
872 
873 	if (req->dev_id.as_uint32 != resp->dev_id.as_uint32 ||
874 	    req->activity_id != resp->activity_id) {
875 		dev_err(dev, "Unexpected mana message response: %x,%x,%x,%x\n",
876 			req->dev_id.as_uint32, resp->dev_id.as_uint32,
877 			req->activity_id, resp->activity_id);
878 		return -EPROTO;
879 	}
880 
881 	return 0;
882 }
883 
884 static int mana_verify_resp_hdr(const struct gdma_resp_hdr *resp_hdr,
885 				const enum mana_command_code expected_code,
886 				const u32 min_size)
887 {
888 	if (resp_hdr->response.msg_type != expected_code)
889 		return -EPROTO;
890 
891 	if (resp_hdr->response.msg_version < GDMA_MESSAGE_V1)
892 		return -EPROTO;
893 
894 	if (resp_hdr->response.msg_size < min_size)
895 		return -EPROTO;
896 
897 	return 0;
898 }
899 
900 static int mana_pf_register_hw_vport(struct mana_port_context *apc)
901 {
902 	struct mana_register_hw_vport_resp resp = {};
903 	struct mana_register_hw_vport_req req = {};
904 	int err;
905 
906 	mana_gd_init_req_hdr(&req.hdr, MANA_REGISTER_HW_PORT,
907 			     sizeof(req), sizeof(resp));
908 	req.attached_gfid = 1;
909 	req.is_pf_default_vport = 1;
910 	req.allow_all_ether_types = 1;
911 
912 	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
913 				sizeof(resp));
914 	if (err) {
915 		netdev_err(apc->ndev, "Failed to register hw vPort: %d\n", err);
916 		return err;
917 	}
918 
919 	err = mana_verify_resp_hdr(&resp.hdr, MANA_REGISTER_HW_PORT,
920 				   sizeof(resp));
921 	if (err || resp.hdr.status) {
922 		netdev_err(apc->ndev, "Failed to register hw vPort: %d, 0x%x\n",
923 			   err, resp.hdr.status);
924 		return err ? err : -EPROTO;
925 	}
926 
927 	apc->port_handle = resp.hw_vport_handle;
928 	return 0;
929 }
930 
931 static void mana_pf_deregister_hw_vport(struct mana_port_context *apc)
932 {
933 	struct mana_deregister_hw_vport_resp resp = {};
934 	struct mana_deregister_hw_vport_req req = {};
935 	int err;
936 
937 	mana_gd_init_req_hdr(&req.hdr, MANA_DEREGISTER_HW_PORT,
938 			     sizeof(req), sizeof(resp));
939 	req.hw_vport_handle = apc->port_handle;
940 
941 	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
942 				sizeof(resp));
943 	if (err) {
944 		if (mana_en_need_log(apc, err))
945 			netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n",
946 				   err);
947 
948 		return;
949 	}
950 
951 	err = mana_verify_resp_hdr(&resp.hdr, MANA_DEREGISTER_HW_PORT,
952 				   sizeof(resp));
953 	if (err || resp.hdr.status)
954 		netdev_err(apc->ndev,
955 			   "Failed to deregister hw vPort: %d, 0x%x\n",
956 			   err, resp.hdr.status);
957 }
958 
959 static int mana_pf_register_filter(struct mana_port_context *apc)
960 {
961 	struct mana_register_filter_resp resp = {};
962 	struct mana_register_filter_req req = {};
963 	int err;
964 
965 	mana_gd_init_req_hdr(&req.hdr, MANA_REGISTER_FILTER,
966 			     sizeof(req), sizeof(resp));
967 	req.vport = apc->port_handle;
968 	memcpy(req.mac_addr, apc->mac_addr, ETH_ALEN);
969 
970 	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
971 				sizeof(resp));
972 	if (err) {
973 		netdev_err(apc->ndev, "Failed to register filter: %d\n", err);
974 		return err;
975 	}
976 
977 	err = mana_verify_resp_hdr(&resp.hdr, MANA_REGISTER_FILTER,
978 				   sizeof(resp));
979 	if (err || resp.hdr.status) {
980 		netdev_err(apc->ndev, "Failed to register filter: %d, 0x%x\n",
981 			   err, resp.hdr.status);
982 		return err ? err : -EPROTO;
983 	}
984 
985 	apc->pf_filter_handle = resp.filter_handle;
986 	return 0;
987 }
988 
989 static void mana_pf_deregister_filter(struct mana_port_context *apc)
990 {
991 	struct mana_deregister_filter_resp resp = {};
992 	struct mana_deregister_filter_req req = {};
993 	int err;
994 
995 	mana_gd_init_req_hdr(&req.hdr, MANA_DEREGISTER_FILTER,
996 			     sizeof(req), sizeof(resp));
997 	req.filter_handle = apc->pf_filter_handle;
998 
999 	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
1000 				sizeof(resp));
1001 	if (err) {
1002 		if (mana_en_need_log(apc, err))
1003 			netdev_err(apc->ndev, "Failed to unregister filter: %d\n",
1004 				   err);
1005 
1006 		return;
1007 	}
1008 
1009 	err = mana_verify_resp_hdr(&resp.hdr, MANA_DEREGISTER_FILTER,
1010 				   sizeof(resp));
1011 	if (err || resp.hdr.status)
1012 		netdev_err(apc->ndev,
1013 			   "Failed to deregister filter: %d, 0x%x\n",
1014 			   err, resp.hdr.status);
1015 }
1016 
1017 static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver,
1018 				 u32 proto_minor_ver, u32 proto_micro_ver,
1019 				 u16 *max_num_vports, u8 *bm_hostmode)
1020 {
1021 	struct gdma_context *gc = ac->gdma_dev->gdma_context;
1022 	struct mana_query_device_cfg_resp resp = {};
1023 	struct mana_query_device_cfg_req req = {};
1024 	struct device *dev = gc->dev;
1025 	int err = 0;
1026 
1027 	mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG,
1028 			     sizeof(req), sizeof(resp));
1029 
1030 	req.hdr.resp.msg_version = GDMA_MESSAGE_V3;
1031 
1032 	req.proto_major_ver = proto_major_ver;
1033 	req.proto_minor_ver = proto_minor_ver;
1034 	req.proto_micro_ver = proto_micro_ver;
1035 
1036 	err = mana_send_request(ac, &req, sizeof(req), &resp, sizeof(resp));
1037 	if (err) {
1038 		dev_err(dev, "Failed to query config: %d", err);
1039 		return err;
1040 	}
1041 
1042 	err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_DEV_CONFIG,
1043 				   sizeof(resp));
1044 	if (err || resp.hdr.status) {
1045 		dev_err(dev, "Invalid query result: %d, 0x%x\n", err,
1046 			resp.hdr.status);
1047 		if (!err)
1048 			err = -EPROTO;
1049 		return err;
1050 	}
1051 
1052 	*max_num_vports = resp.max_num_vports;
1053 
1054 	if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V2)
1055 		gc->adapter_mtu = resp.adapter_mtu;
1056 	else
1057 		gc->adapter_mtu = ETH_FRAME_LEN;
1058 
1059 	if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V3)
1060 		*bm_hostmode = resp.bm_hostmode;
1061 	else
1062 		*bm_hostmode = 0;
1063 
1064 	debugfs_create_u16("adapter-MTU", 0400, gc->mana_pci_debugfs, &gc->adapter_mtu);
1065 
1066 	return 0;
1067 }
1068 
1069 static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index,
1070 				u32 *max_sq, u32 *max_rq, u32 *num_indir_entry)
1071 {
1072 	struct mana_query_vport_cfg_resp resp = {};
1073 	struct mana_query_vport_cfg_req req = {};
1074 	int err;
1075 
1076 	mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_VPORT_CONFIG,
1077 			     sizeof(req), sizeof(resp));
1078 
1079 	req.vport_index = vport_index;
1080 
1081 	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
1082 				sizeof(resp));
1083 	if (err)
1084 		return err;
1085 
1086 	err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_VPORT_CONFIG,
1087 				   sizeof(resp));
1088 	if (err)
1089 		return err;
1090 
1091 	if (resp.hdr.status)
1092 		return -EPROTO;
1093 
1094 	*max_sq = resp.max_num_sq;
1095 	*max_rq = resp.max_num_rq;
1096 	if (resp.num_indirection_ent > 0 &&
1097 	    resp.num_indirection_ent <= MANA_INDIRECT_TABLE_MAX_SIZE &&
1098 	    is_power_of_2(resp.num_indirection_ent)) {
1099 		*num_indir_entry = resp.num_indirection_ent;
1100 	} else {
1101 		netdev_warn(apc->ndev,
1102 			    "Setting indirection table size to default %d for vPort %d\n",
1103 			    MANA_INDIRECT_TABLE_DEF_SIZE, apc->port_idx);
1104 		*num_indir_entry = MANA_INDIRECT_TABLE_DEF_SIZE;
1105 	}
1106 
1107 	apc->port_handle = resp.vport;
1108 	ether_addr_copy(apc->mac_addr, resp.mac_addr);
1109 
1110 	return 0;
1111 }
1112 
1113 void mana_uncfg_vport(struct mana_port_context *apc)
1114 {
1115 	mutex_lock(&apc->vport_mutex);
1116 	apc->vport_use_count--;
1117 	WARN_ON(apc->vport_use_count < 0);
1118 	mutex_unlock(&apc->vport_mutex);
1119 }
1120 EXPORT_SYMBOL_NS(mana_uncfg_vport, "NET_MANA");
1121 
1122 int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id,
1123 		   u32 doorbell_pg_id)
1124 {
1125 	struct mana_config_vport_resp resp = {};
1126 	struct mana_config_vport_req req = {};
1127 	int err;
1128 
1129 	/* This function is used to program the Ethernet port in the hardware
1130 	 * table. It can be called from the Ethernet driver or the RDMA driver.
1131 	 *
1132 	 * For Ethernet usage, the hardware supports only one active user on a
1133 	 * physical port. The driver checks on the port usage before programming
1134 	 * the hardware when creating the RAW QP (RDMA driver) or exposing the
1135 	 * device to kernel NET layer (Ethernet driver).
1136 	 *
1137 	 * Because the RDMA driver doesn't know in advance which QP type the
1138 	 * user will create, it exposes the device with all its ports. The user
1139 	 * may not be able to create RAW QP on a port if this port is already
1140 	 * in used by the Ethernet driver from the kernel.
1141 	 *
1142 	 * This physical port limitation only applies to the RAW QP. For RC QP,
1143 	 * the hardware doesn't have this limitation. The user can create RC
1144 	 * QPs on a physical port up to the hardware limits independent of the
1145 	 * Ethernet usage on the same port.
1146 	 */
1147 	mutex_lock(&apc->vport_mutex);
1148 	if (apc->vport_use_count > 0) {
1149 		mutex_unlock(&apc->vport_mutex);
1150 		return -EBUSY;
1151 	}
1152 	apc->vport_use_count++;
1153 	mutex_unlock(&apc->vport_mutex);
1154 
1155 	mana_gd_init_req_hdr(&req.hdr, MANA_CONFIG_VPORT_TX,
1156 			     sizeof(req), sizeof(resp));
1157 	req.vport = apc->port_handle;
1158 	req.pdid = protection_dom_id;
1159 	req.doorbell_pageid = doorbell_pg_id;
1160 
1161 	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
1162 				sizeof(resp));
1163 	if (err) {
1164 		netdev_err(apc->ndev, "Failed to configure vPort: %d\n", err);
1165 		goto out;
1166 	}
1167 
1168 	err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_TX,
1169 				   sizeof(resp));
1170 	if (err || resp.hdr.status) {
1171 		netdev_err(apc->ndev, "Failed to configure vPort: %d, 0x%x\n",
1172 			   err, resp.hdr.status);
1173 		if (!err)
1174 			err = -EPROTO;
1175 
1176 		goto out;
1177 	}
1178 
1179 	apc->tx_shortform_allowed = resp.short_form_allowed;
1180 	apc->tx_vp_offset = resp.tx_vport_offset;
1181 
1182 	netdev_info(apc->ndev, "Configured vPort %llu PD %u DB %u\n",
1183 		    apc->port_handle, protection_dom_id, doorbell_pg_id);
1184 out:
1185 	if (err)
1186 		mana_uncfg_vport(apc);
1187 
1188 	return err;
1189 }
1190 EXPORT_SYMBOL_NS(mana_cfg_vport, "NET_MANA");
1191 
1192 static int mana_cfg_vport_steering(struct mana_port_context *apc,
1193 				   enum TRI_STATE rx,
1194 				   bool update_default_rxobj, bool update_key,
1195 				   bool update_tab)
1196 {
1197 	struct mana_cfg_rx_steer_req_v2 *req;
1198 	struct mana_cfg_rx_steer_resp resp = {};
1199 	struct net_device *ndev = apc->ndev;
1200 	u32 req_buf_size;
1201 	int err;
1202 
1203 	req_buf_size = struct_size(req, indir_tab, apc->indir_table_sz);
1204 	req = kzalloc(req_buf_size, GFP_KERNEL);
1205 	if (!req)
1206 		return -ENOMEM;
1207 
1208 	mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size,
1209 			     sizeof(resp));
1210 
1211 	req->hdr.req.msg_version = GDMA_MESSAGE_V2;
1212 
1213 	req->vport = apc->port_handle;
1214 	req->num_indir_entries = apc->indir_table_sz;
1215 	req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2,
1216 					 indir_tab);
1217 	req->rx_enable = rx;
1218 	req->rss_enable = apc->rss_state;
1219 	req->update_default_rxobj = update_default_rxobj;
1220 	req->update_hashkey = update_key;
1221 	req->update_indir_tab = update_tab;
1222 	req->default_rxobj = apc->default_rxobj;
1223 	req->cqe_coalescing_enable = 0;
1224 
1225 	if (update_key)
1226 		memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE);
1227 
1228 	if (update_tab)
1229 		memcpy(req->indir_tab, apc->rxobj_table,
1230 		       flex_array_size(req, indir_tab, req->num_indir_entries));
1231 
1232 	err = mana_send_request(apc->ac, req, req_buf_size, &resp,
1233 				sizeof(resp));
1234 	if (err) {
1235 		if (mana_en_need_log(apc, err))
1236 			netdev_err(ndev, "Failed to configure vPort RX: %d\n", err);
1237 
1238 		goto out;
1239 	}
1240 
1241 	err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_RX,
1242 				   sizeof(resp));
1243 	if (err) {
1244 		netdev_err(ndev, "vPort RX configuration failed: %d\n", err);
1245 		goto out;
1246 	}
1247 
1248 	if (resp.hdr.status) {
1249 		netdev_err(ndev, "vPort RX configuration failed: 0x%x\n",
1250 			   resp.hdr.status);
1251 		err = -EPROTO;
1252 	}
1253 
1254 	netdev_info(ndev, "Configured steering vPort %llu entries %u\n",
1255 		    apc->port_handle, apc->indir_table_sz);
1256 out:
1257 	kfree(req);
1258 	return err;
1259 }
1260 
1261 int mana_query_link_cfg(struct mana_port_context *apc)
1262 {
1263 	struct net_device *ndev = apc->ndev;
1264 	struct mana_query_link_config_resp resp = {};
1265 	struct mana_query_link_config_req req = {};
1266 	int err;
1267 
1268 	mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_LINK_CONFIG,
1269 			     sizeof(req), sizeof(resp));
1270 
1271 	req.vport = apc->port_handle;
1272 	req.hdr.resp.msg_version = GDMA_MESSAGE_V2;
1273 
1274 	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
1275 				sizeof(resp));
1276 
1277 	if (err) {
1278 		if (err == -EOPNOTSUPP) {
1279 			netdev_info_once(ndev, "MANA_QUERY_LINK_CONFIG not supported\n");
1280 			return err;
1281 		}
1282 		netdev_err(ndev, "Failed to query link config: %d\n", err);
1283 		return err;
1284 	}
1285 
1286 	err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_LINK_CONFIG,
1287 				   sizeof(resp));
1288 
1289 	if (err || resp.hdr.status) {
1290 		netdev_err(ndev, "Failed to query link config: %d, 0x%x\n", err,
1291 			   resp.hdr.status);
1292 		if (!err)
1293 			err = -EOPNOTSUPP;
1294 		return err;
1295 	}
1296 
1297 	if (resp.qos_unconfigured) {
1298 		err = -EINVAL;
1299 		return err;
1300 	}
1301 	apc->speed = resp.link_speed_mbps;
1302 	apc->max_speed = resp.qos_speed_mbps;
1303 	return 0;
1304 }
1305 
1306 int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed,
1307 		      int enable_clamping)
1308 {
1309 	struct mana_set_bw_clamp_resp resp = {};
1310 	struct mana_set_bw_clamp_req req = {};
1311 	struct net_device *ndev = apc->ndev;
1312 	int err;
1313 
1314 	mana_gd_init_req_hdr(&req.hdr, MANA_SET_BW_CLAMP,
1315 			     sizeof(req), sizeof(resp));
1316 	req.vport = apc->port_handle;
1317 	req.link_speed_mbps = speed;
1318 	req.enable_clamping = enable_clamping;
1319 
1320 	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
1321 				sizeof(resp));
1322 
1323 	if (err) {
1324 		if (err == -EOPNOTSUPP) {
1325 			netdev_info_once(ndev, "MANA_SET_BW_CLAMP not supported\n");
1326 			return err;
1327 		}
1328 		netdev_err(ndev, "Failed to set bandwidth clamp for speed %u, err = %d",
1329 			   speed, err);
1330 		return err;
1331 	}
1332 
1333 	err = mana_verify_resp_hdr(&resp.hdr, MANA_SET_BW_CLAMP,
1334 				   sizeof(resp));
1335 
1336 	if (err || resp.hdr.status) {
1337 		netdev_err(ndev, "Failed to set bandwidth clamp: %d, 0x%x\n", err,
1338 			   resp.hdr.status);
1339 		if (!err)
1340 			err = -EOPNOTSUPP;
1341 		return err;
1342 	}
1343 
1344 	if (resp.qos_unconfigured)
1345 		netdev_info(ndev, "QoS is unconfigured\n");
1346 
1347 	return 0;
1348 }
1349 
1350 int mana_create_wq_obj(struct mana_port_context *apc,
1351 		       mana_handle_t vport,
1352 		       u32 wq_type, struct mana_obj_spec *wq_spec,
1353 		       struct mana_obj_spec *cq_spec,
1354 		       mana_handle_t *wq_obj)
1355 {
1356 	struct mana_create_wqobj_resp resp = {};
1357 	struct mana_create_wqobj_req req = {};
1358 	struct net_device *ndev = apc->ndev;
1359 	int err;
1360 
1361 	mana_gd_init_req_hdr(&req.hdr, MANA_CREATE_WQ_OBJ,
1362 			     sizeof(req), sizeof(resp));
1363 	req.vport = vport;
1364 	req.wq_type = wq_type;
1365 	req.wq_gdma_region = wq_spec->gdma_region;
1366 	req.cq_gdma_region = cq_spec->gdma_region;
1367 	req.wq_size = wq_spec->queue_size;
1368 	req.cq_size = cq_spec->queue_size;
1369 	req.cq_moderation_ctx_id = cq_spec->modr_ctx_id;
1370 	req.cq_parent_qid = cq_spec->attached_eq;
1371 
1372 	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
1373 				sizeof(resp));
1374 	if (err) {
1375 		netdev_err(ndev, "Failed to create WQ object: %d\n", err);
1376 		goto out;
1377 	}
1378 
1379 	err = mana_verify_resp_hdr(&resp.hdr, MANA_CREATE_WQ_OBJ,
1380 				   sizeof(resp));
1381 	if (err || resp.hdr.status) {
1382 		netdev_err(ndev, "Failed to create WQ object: %d, 0x%x\n", err,
1383 			   resp.hdr.status);
1384 		if (!err)
1385 			err = -EPROTO;
1386 		goto out;
1387 	}
1388 
1389 	if (resp.wq_obj == INVALID_MANA_HANDLE) {
1390 		netdev_err(ndev, "Got an invalid WQ object handle\n");
1391 		err = -EPROTO;
1392 		goto out;
1393 	}
1394 
1395 	*wq_obj = resp.wq_obj;
1396 	wq_spec->queue_index = resp.wq_id;
1397 	cq_spec->queue_index = resp.cq_id;
1398 
1399 	return 0;
1400 out:
1401 	return err;
1402 }
1403 EXPORT_SYMBOL_NS(mana_create_wq_obj, "NET_MANA");
1404 
1405 void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type,
1406 			 mana_handle_t wq_obj)
1407 {
1408 	struct mana_destroy_wqobj_resp resp = {};
1409 	struct mana_destroy_wqobj_req req = {};
1410 	struct net_device *ndev = apc->ndev;
1411 	int err;
1412 
1413 	mana_gd_init_req_hdr(&req.hdr, MANA_DESTROY_WQ_OBJ,
1414 			     sizeof(req), sizeof(resp));
1415 	req.wq_type = wq_type;
1416 	req.wq_obj_handle = wq_obj;
1417 
1418 	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
1419 				sizeof(resp));
1420 	if (err) {
1421 		if (mana_en_need_log(apc, err))
1422 			netdev_err(ndev, "Failed to destroy WQ object: %d\n", err);
1423 
1424 		return;
1425 	}
1426 
1427 	err = mana_verify_resp_hdr(&resp.hdr, MANA_DESTROY_WQ_OBJ,
1428 				   sizeof(resp));
1429 	if (err || resp.hdr.status)
1430 		netdev_err(ndev, "Failed to destroy WQ object: %d, 0x%x\n", err,
1431 			   resp.hdr.status);
1432 }
1433 EXPORT_SYMBOL_NS(mana_destroy_wq_obj, "NET_MANA");
1434 
1435 static void mana_destroy_eq(struct mana_context *ac)
1436 {
1437 	struct gdma_context *gc = ac->gdma_dev->gdma_context;
1438 	struct gdma_queue *eq;
1439 	int i;
1440 
1441 	if (!ac->eqs)
1442 		return;
1443 
1444 	debugfs_remove_recursive(ac->mana_eqs_debugfs);
1445 	ac->mana_eqs_debugfs = NULL;
1446 
1447 	for (i = 0; i < gc->max_num_queues; i++) {
1448 		eq = ac->eqs[i].eq;
1449 		if (!eq)
1450 			continue;
1451 
1452 		mana_gd_destroy_queue(gc, eq);
1453 	}
1454 
1455 	kfree(ac->eqs);
1456 	ac->eqs = NULL;
1457 }
1458 
1459 static void mana_create_eq_debugfs(struct mana_context *ac, int i)
1460 {
1461 	struct mana_eq eq = ac->eqs[i];
1462 	char eqnum[32];
1463 
1464 	sprintf(eqnum, "eq%d", i);
1465 	eq.mana_eq_debugfs = debugfs_create_dir(eqnum, ac->mana_eqs_debugfs);
1466 	debugfs_create_u32("head", 0400, eq.mana_eq_debugfs, &eq.eq->head);
1467 	debugfs_create_u32("tail", 0400, eq.mana_eq_debugfs, &eq.eq->tail);
1468 	debugfs_create_file("eq_dump", 0400, eq.mana_eq_debugfs, eq.eq, &mana_dbg_q_fops);
1469 }
1470 
1471 static int mana_create_eq(struct mana_context *ac)
1472 {
1473 	struct gdma_dev *gd = ac->gdma_dev;
1474 	struct gdma_context *gc = gd->gdma_context;
1475 	struct gdma_queue_spec spec = {};
1476 	int err;
1477 	int i;
1478 
1479 	ac->eqs = kcalloc(gc->max_num_queues, sizeof(struct mana_eq),
1480 			  GFP_KERNEL);
1481 	if (!ac->eqs)
1482 		return -ENOMEM;
1483 
1484 	spec.type = GDMA_EQ;
1485 	spec.monitor_avl_buf = false;
1486 	spec.queue_size = EQ_SIZE;
1487 	spec.eq.callback = NULL;
1488 	spec.eq.context = ac->eqs;
1489 	spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
1490 
1491 	ac->mana_eqs_debugfs = debugfs_create_dir("EQs", gc->mana_pci_debugfs);
1492 
1493 	for (i = 0; i < gc->max_num_queues; i++) {
1494 		spec.eq.msix_index = (i + 1) % gc->num_msix_usable;
1495 		err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq);
1496 		if (err) {
1497 			dev_err(gc->dev, "Failed to create EQ %d : %d\n", i, err);
1498 			goto out;
1499 		}
1500 		mana_create_eq_debugfs(ac, i);
1501 	}
1502 
1503 	return 0;
1504 out:
1505 	mana_destroy_eq(ac);
1506 	return err;
1507 }
1508 
1509 static int mana_fence_rq(struct mana_port_context *apc, struct mana_rxq *rxq)
1510 {
1511 	struct mana_fence_rq_resp resp = {};
1512 	struct mana_fence_rq_req req = {};
1513 	int err;
1514 
1515 	init_completion(&rxq->fence_event);
1516 
1517 	mana_gd_init_req_hdr(&req.hdr, MANA_FENCE_RQ,
1518 			     sizeof(req), sizeof(resp));
1519 	req.wq_obj_handle =  rxq->rxobj;
1520 
1521 	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
1522 				sizeof(resp));
1523 	if (err) {
1524 		netdev_err(apc->ndev, "Failed to fence RQ %u: %d\n",
1525 			   rxq->rxq_idx, err);
1526 		return err;
1527 	}
1528 
1529 	err = mana_verify_resp_hdr(&resp.hdr, MANA_FENCE_RQ, sizeof(resp));
1530 	if (err || resp.hdr.status) {
1531 		netdev_err(apc->ndev, "Failed to fence RQ %u: %d, 0x%x\n",
1532 			   rxq->rxq_idx, err, resp.hdr.status);
1533 		if (!err)
1534 			err = -EPROTO;
1535 
1536 		return err;
1537 	}
1538 
1539 	if (wait_for_completion_timeout(&rxq->fence_event, 10 * HZ) == 0) {
1540 		netdev_err(apc->ndev, "Failed to fence RQ %u: timed out\n",
1541 			   rxq->rxq_idx);
1542 		return -ETIMEDOUT;
1543 	}
1544 
1545 	return 0;
1546 }
1547 
1548 static void mana_fence_rqs(struct mana_port_context *apc)
1549 {
1550 	unsigned int rxq_idx;
1551 	struct mana_rxq *rxq;
1552 	int err;
1553 
1554 	for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) {
1555 		rxq = apc->rxqs[rxq_idx];
1556 		err = mana_fence_rq(apc, rxq);
1557 
1558 		/* In case of any error, use sleep instead. */
1559 		if (err)
1560 			msleep(100);
1561 	}
1562 }
1563 
1564 static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units)
1565 {
1566 	u32 used_space_old;
1567 	u32 used_space_new;
1568 
1569 	used_space_old = wq->head - wq->tail;
1570 	used_space_new = wq->head - (wq->tail + num_units);
1571 
1572 	if (WARN_ON_ONCE(used_space_new > used_space_old))
1573 		return -ERANGE;
1574 
1575 	wq->tail += num_units;
1576 	return 0;
1577 }
1578 
1579 static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
1580 {
1581 	struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
1582 	struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
1583 	struct device *dev = gc->dev;
1584 	int hsg, i;
1585 
1586 	/* Number of SGEs of linear part */
1587 	hsg = (skb_is_gso(skb) && skb_headlen(skb) > ash->size[0]) ? 2 : 1;
1588 
1589 	for (i = 0; i < hsg; i++)
1590 		dma_unmap_single(dev, ash->dma_handle[i], ash->size[i],
1591 				 DMA_TO_DEVICE);
1592 
1593 	for (i = hsg; i < skb_shinfo(skb)->nr_frags + hsg; i++)
1594 		dma_unmap_page(dev, ash->dma_handle[i], ash->size[i],
1595 			       DMA_TO_DEVICE);
1596 }
1597 
1598 static void mana_poll_tx_cq(struct mana_cq *cq)
1599 {
1600 	struct gdma_comp *completions = cq->gdma_comp_buf;
1601 	struct gdma_posted_wqe_info *wqe_info;
1602 	unsigned int pkt_transmitted = 0;
1603 	unsigned int wqe_unit_cnt = 0;
1604 	struct mana_txq *txq = cq->txq;
1605 	struct mana_port_context *apc;
1606 	struct netdev_queue *net_txq;
1607 	struct gdma_queue *gdma_wq;
1608 	unsigned int avail_space;
1609 	struct net_device *ndev;
1610 	struct sk_buff *skb;
1611 	bool txq_stopped;
1612 	int comp_read;
1613 	int i;
1614 
1615 	ndev = txq->ndev;
1616 	apc = netdev_priv(ndev);
1617 
1618 	comp_read = mana_gd_poll_cq(cq->gdma_cq, completions,
1619 				    CQE_POLLING_BUFFER);
1620 
1621 	if (comp_read < 1)
1622 		return;
1623 
1624 	for (i = 0; i < comp_read; i++) {
1625 		struct mana_tx_comp_oob *cqe_oob;
1626 
1627 		if (WARN_ON_ONCE(!completions[i].is_sq))
1628 			return;
1629 
1630 		cqe_oob = (struct mana_tx_comp_oob *)completions[i].cqe_data;
1631 		if (WARN_ON_ONCE(cqe_oob->cqe_hdr.client_type !=
1632 				 MANA_CQE_COMPLETION))
1633 			return;
1634 
1635 		switch (cqe_oob->cqe_hdr.cqe_type) {
1636 		case CQE_TX_OKAY:
1637 			break;
1638 
1639 		case CQE_TX_SA_DROP:
1640 		case CQE_TX_MTU_DROP:
1641 		case CQE_TX_INVALID_OOB:
1642 		case CQE_TX_INVALID_ETH_TYPE:
1643 		case CQE_TX_HDR_PROCESSING_ERROR:
1644 		case CQE_TX_VF_DISABLED:
1645 		case CQE_TX_VPORT_IDX_OUT_OF_RANGE:
1646 		case CQE_TX_VPORT_DISABLED:
1647 		case CQE_TX_VLAN_TAGGING_VIOLATION:
1648 			if (net_ratelimit())
1649 				netdev_err(ndev, "TX: CQE error %d\n",
1650 					   cqe_oob->cqe_hdr.cqe_type);
1651 
1652 			apc->eth_stats.tx_cqe_err++;
1653 			break;
1654 
1655 		default:
1656 			/* If the CQE type is unknown, log an error,
1657 			 * and still free the SKB, update tail, etc.
1658 			 */
1659 			if (net_ratelimit())
1660 				netdev_err(ndev, "TX: unknown CQE type %d\n",
1661 					   cqe_oob->cqe_hdr.cqe_type);
1662 
1663 			apc->eth_stats.tx_cqe_unknown_type++;
1664 			break;
1665 		}
1666 
1667 		if (WARN_ON_ONCE(txq->gdma_txq_id != completions[i].wq_num))
1668 			return;
1669 
1670 		skb = skb_dequeue(&txq->pending_skbs);
1671 		if (WARN_ON_ONCE(!skb))
1672 			return;
1673 
1674 		wqe_info = (struct gdma_posted_wqe_info *)skb->cb;
1675 		wqe_unit_cnt += wqe_info->wqe_size_in_bu;
1676 
1677 		mana_unmap_skb(skb, apc);
1678 
1679 		napi_consume_skb(skb, cq->budget);
1680 
1681 		pkt_transmitted++;
1682 	}
1683 
1684 	if (WARN_ON_ONCE(wqe_unit_cnt == 0))
1685 		return;
1686 
1687 	mana_move_wq_tail(txq->gdma_sq, wqe_unit_cnt);
1688 
1689 	gdma_wq = txq->gdma_sq;
1690 	avail_space = mana_gd_wq_avail_space(gdma_wq);
1691 
1692 	/* Ensure tail updated before checking q stop */
1693 	smp_mb();
1694 
1695 	net_txq = txq->net_txq;
1696 	txq_stopped = netif_tx_queue_stopped(net_txq);
1697 
1698 	/* Ensure checking txq_stopped before apc->port_is_up. */
1699 	smp_rmb();
1700 
1701 	if (txq_stopped && apc->port_is_up && avail_space >= MAX_TX_WQE_SIZE) {
1702 		netif_tx_wake_queue(net_txq);
1703 		apc->eth_stats.wake_queue++;
1704 	}
1705 
1706 	if (atomic_sub_return(pkt_transmitted, &txq->pending_sends) < 0)
1707 		WARN_ON_ONCE(1);
1708 
1709 	cq->work_done = pkt_transmitted;
1710 }
1711 
1712 static void mana_post_pkt_rxq(struct mana_rxq *rxq)
1713 {
1714 	struct mana_recv_buf_oob *recv_buf_oob;
1715 	u32 curr_index;
1716 	int err;
1717 
1718 	curr_index = rxq->buf_index++;
1719 	if (rxq->buf_index == rxq->num_rx_buf)
1720 		rxq->buf_index = 0;
1721 
1722 	recv_buf_oob = &rxq->rx_oobs[curr_index];
1723 
1724 	err = mana_gd_post_work_request(rxq->gdma_rq, &recv_buf_oob->wqe_req,
1725 					&recv_buf_oob->wqe_inf);
1726 	if (WARN_ON_ONCE(err))
1727 		return;
1728 
1729 	WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1);
1730 }
1731 
1732 static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va,
1733 				      uint pkt_len, struct xdp_buff *xdp)
1734 {
1735 	struct sk_buff *skb = napi_build_skb(buf_va, rxq->alloc_size);
1736 
1737 	if (!skb)
1738 		return NULL;
1739 
1740 	if (xdp->data_hard_start) {
1741 		u32 metasize = xdp->data - xdp->data_meta;
1742 
1743 		skb_reserve(skb, xdp->data - xdp->data_hard_start);
1744 		skb_put(skb, xdp->data_end - xdp->data);
1745 		if (metasize)
1746 			skb_metadata_set(skb, metasize);
1747 		return skb;
1748 	}
1749 
1750 	skb_reserve(skb, rxq->headroom);
1751 	skb_put(skb, pkt_len);
1752 
1753 	return skb;
1754 }
1755 
1756 static void mana_rx_skb(void *buf_va, bool from_pool,
1757 			struct mana_rxcomp_oob *cqe, struct mana_rxq *rxq)
1758 {
1759 	struct mana_stats_rx *rx_stats = &rxq->stats;
1760 	struct net_device *ndev = rxq->ndev;
1761 	uint pkt_len = cqe->ppi[0].pkt_len;
1762 	u16 rxq_idx = rxq->rxq_idx;
1763 	struct napi_struct *napi;
1764 	struct xdp_buff xdp = {};
1765 	struct sk_buff *skb;
1766 	u32 hash_value;
1767 	u32 act;
1768 
1769 	rxq->rx_cq.work_done++;
1770 	napi = &rxq->rx_cq.napi;
1771 
1772 	if (!buf_va) {
1773 		++ndev->stats.rx_dropped;
1774 		return;
1775 	}
1776 
1777 	act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len);
1778 
1779 	if (act == XDP_REDIRECT && !rxq->xdp_rc)
1780 		return;
1781 
1782 	if (act != XDP_PASS && act != XDP_TX)
1783 		goto drop_xdp;
1784 
1785 	skb = mana_build_skb(rxq, buf_va, pkt_len, &xdp);
1786 
1787 	if (!skb)
1788 		goto drop;
1789 
1790 	if (from_pool)
1791 		skb_mark_for_recycle(skb);
1792 
1793 	skb->dev = napi->dev;
1794 
1795 	skb->protocol = eth_type_trans(skb, ndev);
1796 	skb_checksum_none_assert(skb);
1797 	skb_record_rx_queue(skb, rxq_idx);
1798 
1799 	if ((ndev->features & NETIF_F_RXCSUM) && cqe->rx_iphdr_csum_succeed) {
1800 		if (cqe->rx_tcp_csum_succeed || cqe->rx_udp_csum_succeed)
1801 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1802 	}
1803 
1804 	if (cqe->rx_hashtype != 0 && (ndev->features & NETIF_F_RXHASH)) {
1805 		hash_value = cqe->ppi[0].pkt_hash;
1806 
1807 		if (cqe->rx_hashtype & MANA_HASH_L4)
1808 			skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L4);
1809 		else
1810 			skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3);
1811 	}
1812 
1813 	if (cqe->rx_vlantag_present) {
1814 		u16 vlan_tci = cqe->rx_vlan_id;
1815 
1816 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
1817 	}
1818 
1819 	u64_stats_update_begin(&rx_stats->syncp);
1820 	rx_stats->packets++;
1821 	rx_stats->bytes += pkt_len;
1822 
1823 	if (act == XDP_TX)
1824 		rx_stats->xdp_tx++;
1825 	u64_stats_update_end(&rx_stats->syncp);
1826 
1827 	if (act == XDP_TX) {
1828 		skb_set_queue_mapping(skb, rxq_idx);
1829 		mana_xdp_tx(skb, ndev);
1830 		return;
1831 	}
1832 
1833 	napi_gro_receive(napi, skb);
1834 
1835 	return;
1836 
1837 drop_xdp:
1838 	u64_stats_update_begin(&rx_stats->syncp);
1839 	rx_stats->xdp_drop++;
1840 	u64_stats_update_end(&rx_stats->syncp);
1841 
1842 drop:
1843 	if (from_pool) {
1844 		page_pool_recycle_direct(rxq->page_pool,
1845 					 virt_to_head_page(buf_va));
1846 	} else {
1847 		WARN_ON_ONCE(rxq->xdp_save_va);
1848 		/* Save for reuse */
1849 		rxq->xdp_save_va = buf_va;
1850 	}
1851 
1852 	++ndev->stats.rx_dropped;
1853 
1854 	return;
1855 }
1856 
1857 static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
1858 			     dma_addr_t *da, bool *from_pool)
1859 {
1860 	struct page *page;
1861 	void *va;
1862 
1863 	*from_pool = false;
1864 
1865 	/* Reuse XDP dropped page if available */
1866 	if (rxq->xdp_save_va) {
1867 		va = rxq->xdp_save_va;
1868 		rxq->xdp_save_va = NULL;
1869 	} else {
1870 		page = page_pool_dev_alloc_pages(rxq->page_pool);
1871 		if (!page)
1872 			return NULL;
1873 
1874 		*from_pool = true;
1875 		va = page_to_virt(page);
1876 	}
1877 
1878 	*da = dma_map_single(dev, va + rxq->headroom, rxq->datasize,
1879 			     DMA_FROM_DEVICE);
1880 	if (dma_mapping_error(dev, *da)) {
1881 		if (*from_pool)
1882 			page_pool_put_full_page(rxq->page_pool, page, false);
1883 		else
1884 			put_page(virt_to_head_page(va));
1885 
1886 		return NULL;
1887 	}
1888 
1889 	return va;
1890 }
1891 
1892 /* Allocate frag for rx buffer, and save the old buf */
1893 static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq,
1894 			       struct mana_recv_buf_oob *rxoob, void **old_buf,
1895 			       bool *old_fp)
1896 {
1897 	bool from_pool;
1898 	dma_addr_t da;
1899 	void *va;
1900 
1901 	va = mana_get_rxfrag(rxq, dev, &da, &from_pool);
1902 	if (!va)
1903 		return;
1904 
1905 	dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize,
1906 			 DMA_FROM_DEVICE);
1907 	*old_buf = rxoob->buf_va;
1908 	*old_fp = rxoob->from_pool;
1909 
1910 	rxoob->buf_va = va;
1911 	rxoob->sgl[0].address = da;
1912 	rxoob->from_pool = from_pool;
1913 }
1914 
1915 static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
1916 				struct gdma_comp *cqe)
1917 {
1918 	struct mana_rxcomp_oob *oob = (struct mana_rxcomp_oob *)cqe->cqe_data;
1919 	struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context;
1920 	struct net_device *ndev = rxq->ndev;
1921 	struct mana_recv_buf_oob *rxbuf_oob;
1922 	struct mana_port_context *apc;
1923 	struct device *dev = gc->dev;
1924 	void *old_buf = NULL;
1925 	u32 curr, pktlen;
1926 	bool old_fp;
1927 
1928 	apc = netdev_priv(ndev);
1929 
1930 	switch (oob->cqe_hdr.cqe_type) {
1931 	case CQE_RX_OKAY:
1932 		break;
1933 
1934 	case CQE_RX_TRUNCATED:
1935 		++ndev->stats.rx_dropped;
1936 		rxbuf_oob = &rxq->rx_oobs[rxq->buf_index];
1937 		netdev_warn_once(ndev, "Dropped a truncated packet\n");
1938 		goto drop;
1939 
1940 	case CQE_RX_COALESCED_4:
1941 		netdev_err(ndev, "RX coalescing is unsupported\n");
1942 		apc->eth_stats.rx_coalesced_err++;
1943 		return;
1944 
1945 	case CQE_RX_OBJECT_FENCE:
1946 		complete(&rxq->fence_event);
1947 		return;
1948 
1949 	default:
1950 		netdev_err(ndev, "Unknown RX CQE type = %d\n",
1951 			   oob->cqe_hdr.cqe_type);
1952 		apc->eth_stats.rx_cqe_unknown_type++;
1953 		return;
1954 	}
1955 
1956 	pktlen = oob->ppi[0].pkt_len;
1957 
1958 	if (pktlen == 0) {
1959 		/* data packets should never have packetlength of zero */
1960 		netdev_err(ndev, "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n",
1961 			   rxq->gdma_id, cq->gdma_id, rxq->rxobj);
1962 		return;
1963 	}
1964 
1965 	curr = rxq->buf_index;
1966 	rxbuf_oob = &rxq->rx_oobs[curr];
1967 	WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1);
1968 
1969 	mana_refill_rx_oob(dev, rxq, rxbuf_oob, &old_buf, &old_fp);
1970 
1971 	/* Unsuccessful refill will have old_buf == NULL.
1972 	 * In this case, mana_rx_skb() will drop the packet.
1973 	 */
1974 	mana_rx_skb(old_buf, old_fp, oob, rxq);
1975 
1976 drop:
1977 	mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu);
1978 
1979 	mana_post_pkt_rxq(rxq);
1980 }
1981 
1982 static void mana_poll_rx_cq(struct mana_cq *cq)
1983 {
1984 	struct gdma_comp *comp = cq->gdma_comp_buf;
1985 	struct mana_rxq *rxq = cq->rxq;
1986 	int comp_read, i;
1987 
1988 	comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER);
1989 	WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER);
1990 
1991 	rxq->xdp_flush = false;
1992 
1993 	for (i = 0; i < comp_read; i++) {
1994 		if (WARN_ON_ONCE(comp[i].is_sq))
1995 			return;
1996 
1997 		/* verify recv cqe references the right rxq */
1998 		if (WARN_ON_ONCE(comp[i].wq_num != cq->rxq->gdma_id))
1999 			return;
2000 
2001 		mana_process_rx_cqe(rxq, cq, &comp[i]);
2002 	}
2003 
2004 	if (comp_read > 0) {
2005 		struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context;
2006 
2007 		mana_gd_wq_ring_doorbell(gc, rxq->gdma_rq);
2008 	}
2009 
2010 	if (rxq->xdp_flush)
2011 		xdp_do_flush();
2012 }
2013 
2014 static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
2015 {
2016 	struct mana_cq *cq = context;
2017 	int w;
2018 
2019 	WARN_ON_ONCE(cq->gdma_cq != gdma_queue);
2020 
2021 	if (cq->type == MANA_CQ_TYPE_RX)
2022 		mana_poll_rx_cq(cq);
2023 	else
2024 		mana_poll_tx_cq(cq);
2025 
2026 	w = cq->work_done;
2027 	cq->work_done_since_doorbell += w;
2028 
2029 	if (w < cq->budget) {
2030 		mana_gd_ring_cq(gdma_queue, SET_ARM_BIT);
2031 		cq->work_done_since_doorbell = 0;
2032 		napi_complete_done(&cq->napi, w);
2033 	} else if (cq->work_done_since_doorbell >
2034 		   cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) {
2035 		/* MANA hardware requires at least one doorbell ring every 8
2036 		 * wraparounds of CQ even if there is no need to arm the CQ.
2037 		 * This driver rings the doorbell as soon as we have exceeded
2038 		 * 4 wraparounds.
2039 		 */
2040 		mana_gd_ring_cq(gdma_queue, 0);
2041 		cq->work_done_since_doorbell = 0;
2042 	}
2043 
2044 	return w;
2045 }
2046 
2047 static int mana_poll(struct napi_struct *napi, int budget)
2048 {
2049 	struct mana_cq *cq = container_of(napi, struct mana_cq, napi);
2050 	int w;
2051 
2052 	cq->work_done = 0;
2053 	cq->budget = budget;
2054 
2055 	w = mana_cq_handler(cq, cq->gdma_cq);
2056 
2057 	return min(w, budget);
2058 }
2059 
2060 static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue)
2061 {
2062 	struct mana_cq *cq = context;
2063 
2064 	napi_schedule_irqoff(&cq->napi);
2065 }
2066 
2067 static void mana_deinit_cq(struct mana_port_context *apc, struct mana_cq *cq)
2068 {
2069 	struct gdma_dev *gd = apc->ac->gdma_dev;
2070 
2071 	if (!cq->gdma_cq)
2072 		return;
2073 
2074 	mana_gd_destroy_queue(gd->gdma_context, cq->gdma_cq);
2075 }
2076 
2077 static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq)
2078 {
2079 	struct gdma_dev *gd = apc->ac->gdma_dev;
2080 
2081 	if (!txq->gdma_sq)
2082 		return;
2083 
2084 	mana_gd_destroy_queue(gd->gdma_context, txq->gdma_sq);
2085 }
2086 
2087 static void mana_destroy_txq(struct mana_port_context *apc)
2088 {
2089 	struct napi_struct *napi;
2090 	int i;
2091 
2092 	if (!apc->tx_qp)
2093 		return;
2094 
2095 	for (i = 0; i < apc->num_queues; i++) {
2096 		debugfs_remove_recursive(apc->tx_qp[i].mana_tx_debugfs);
2097 		apc->tx_qp[i].mana_tx_debugfs = NULL;
2098 
2099 		napi = &apc->tx_qp[i].tx_cq.napi;
2100 		if (apc->tx_qp[i].txq.napi_initialized) {
2101 			napi_synchronize(napi);
2102 			netdev_lock_ops_to_full(napi->dev);
2103 			napi_disable_locked(napi);
2104 			netif_napi_del_locked(napi);
2105 			netdev_unlock_full_to_ops(napi->dev);
2106 			apc->tx_qp[i].txq.napi_initialized = false;
2107 		}
2108 		mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
2109 
2110 		mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
2111 
2112 		mana_deinit_txq(apc, &apc->tx_qp[i].txq);
2113 	}
2114 
2115 	kfree(apc->tx_qp);
2116 	apc->tx_qp = NULL;
2117 }
2118 
2119 static void mana_create_txq_debugfs(struct mana_port_context *apc, int idx)
2120 {
2121 	struct mana_tx_qp *tx_qp = &apc->tx_qp[idx];
2122 	char qnum[32];
2123 
2124 	sprintf(qnum, "TX-%d", idx);
2125 	tx_qp->mana_tx_debugfs = debugfs_create_dir(qnum, apc->mana_port_debugfs);
2126 	debugfs_create_u32("sq_head", 0400, tx_qp->mana_tx_debugfs,
2127 			   &tx_qp->txq.gdma_sq->head);
2128 	debugfs_create_u32("sq_tail", 0400, tx_qp->mana_tx_debugfs,
2129 			   &tx_qp->txq.gdma_sq->tail);
2130 	debugfs_create_u32("sq_pend_skb_qlen", 0400, tx_qp->mana_tx_debugfs,
2131 			   &tx_qp->txq.pending_skbs.qlen);
2132 	debugfs_create_u32("cq_head", 0400, tx_qp->mana_tx_debugfs,
2133 			   &tx_qp->tx_cq.gdma_cq->head);
2134 	debugfs_create_u32("cq_tail", 0400, tx_qp->mana_tx_debugfs,
2135 			   &tx_qp->tx_cq.gdma_cq->tail);
2136 	debugfs_create_u32("cq_budget", 0400, tx_qp->mana_tx_debugfs,
2137 			   &tx_qp->tx_cq.budget);
2138 	debugfs_create_file("txq_dump", 0400, tx_qp->mana_tx_debugfs,
2139 			    tx_qp->txq.gdma_sq, &mana_dbg_q_fops);
2140 	debugfs_create_file("cq_dump", 0400, tx_qp->mana_tx_debugfs,
2141 			    tx_qp->tx_cq.gdma_cq, &mana_dbg_q_fops);
2142 }
2143 
2144 static int mana_create_txq(struct mana_port_context *apc,
2145 			   struct net_device *net)
2146 {
2147 	struct mana_context *ac = apc->ac;
2148 	struct gdma_dev *gd = ac->gdma_dev;
2149 	struct mana_obj_spec wq_spec;
2150 	struct mana_obj_spec cq_spec;
2151 	struct gdma_queue_spec spec;
2152 	struct gdma_context *gc;
2153 	struct mana_txq *txq;
2154 	struct mana_cq *cq;
2155 	u32 txq_size;
2156 	u32 cq_size;
2157 	int err;
2158 	int i;
2159 
2160 	apc->tx_qp = kcalloc(apc->num_queues, sizeof(struct mana_tx_qp),
2161 			     GFP_KERNEL);
2162 	if (!apc->tx_qp)
2163 		return -ENOMEM;
2164 
2165 	/*  The minimum size of the WQE is 32 bytes, hence
2166 	 *  apc->tx_queue_size represents the maximum number of WQEs
2167 	 *  the SQ can store. This value is then used to size other queues
2168 	 *  to prevent overflow.
2169 	 *  Also note that the txq_size is always going to be MANA_PAGE_ALIGNED,
2170 	 *  as min val of apc->tx_queue_size is 128 and that would make
2171 	 *  txq_size 128*32 = 4096 and the other higher values of apc->tx_queue_size
2172 	 *  are always power of two
2173 	 */
2174 	txq_size = apc->tx_queue_size * 32;
2175 
2176 	cq_size = apc->tx_queue_size * COMP_ENTRY_SIZE;
2177 
2178 	gc = gd->gdma_context;
2179 
2180 	for (i = 0; i < apc->num_queues; i++) {
2181 		apc->tx_qp[i].tx_object = INVALID_MANA_HANDLE;
2182 
2183 		/* Create SQ */
2184 		txq = &apc->tx_qp[i].txq;
2185 
2186 		u64_stats_init(&txq->stats.syncp);
2187 		txq->ndev = net;
2188 		txq->net_txq = netdev_get_tx_queue(net, i);
2189 		txq->vp_offset = apc->tx_vp_offset;
2190 		txq->napi_initialized = false;
2191 		skb_queue_head_init(&txq->pending_skbs);
2192 
2193 		memset(&spec, 0, sizeof(spec));
2194 		spec.type = GDMA_SQ;
2195 		spec.monitor_avl_buf = true;
2196 		spec.queue_size = txq_size;
2197 		err = mana_gd_create_mana_wq_cq(gd, &spec, &txq->gdma_sq);
2198 		if (err)
2199 			goto out;
2200 
2201 		/* Create SQ's CQ */
2202 		cq = &apc->tx_qp[i].tx_cq;
2203 		cq->type = MANA_CQ_TYPE_TX;
2204 
2205 		cq->txq = txq;
2206 
2207 		memset(&spec, 0, sizeof(spec));
2208 		spec.type = GDMA_CQ;
2209 		spec.monitor_avl_buf = false;
2210 		spec.queue_size = cq_size;
2211 		spec.cq.callback = mana_schedule_napi;
2212 		spec.cq.parent_eq = ac->eqs[i].eq;
2213 		spec.cq.context = cq;
2214 		err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq);
2215 		if (err)
2216 			goto out;
2217 
2218 		memset(&wq_spec, 0, sizeof(wq_spec));
2219 		memset(&cq_spec, 0, sizeof(cq_spec));
2220 
2221 		wq_spec.gdma_region = txq->gdma_sq->mem_info.dma_region_handle;
2222 		wq_spec.queue_size = txq->gdma_sq->queue_size;
2223 
2224 		cq_spec.gdma_region = cq->gdma_cq->mem_info.dma_region_handle;
2225 		cq_spec.queue_size = cq->gdma_cq->queue_size;
2226 		cq_spec.modr_ctx_id = 0;
2227 		cq_spec.attached_eq = cq->gdma_cq->cq.parent->id;
2228 
2229 		err = mana_create_wq_obj(apc, apc->port_handle, GDMA_SQ,
2230 					 &wq_spec, &cq_spec,
2231 					 &apc->tx_qp[i].tx_object);
2232 
2233 		if (err)
2234 			goto out;
2235 
2236 		txq->gdma_sq->id = wq_spec.queue_index;
2237 		cq->gdma_cq->id = cq_spec.queue_index;
2238 
2239 		txq->gdma_sq->mem_info.dma_region_handle =
2240 			GDMA_INVALID_DMA_REGION;
2241 		cq->gdma_cq->mem_info.dma_region_handle =
2242 			GDMA_INVALID_DMA_REGION;
2243 
2244 		txq->gdma_txq_id = txq->gdma_sq->id;
2245 
2246 		cq->gdma_id = cq->gdma_cq->id;
2247 
2248 		if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) {
2249 			err = -EINVAL;
2250 			goto out;
2251 		}
2252 
2253 		gc->cq_table[cq->gdma_id] = cq->gdma_cq;
2254 
2255 		mana_create_txq_debugfs(apc, i);
2256 
2257 		set_bit(NAPI_STATE_NO_BUSY_POLL, &cq->napi.state);
2258 		netdev_lock_ops_to_full(net);
2259 		netif_napi_add_locked(net, &cq->napi, mana_poll);
2260 		napi_enable_locked(&cq->napi);
2261 		netdev_unlock_full_to_ops(net);
2262 		txq->napi_initialized = true;
2263 
2264 		mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
2265 	}
2266 
2267 	return 0;
2268 out:
2269 	netdev_err(net, "Failed to create %d TX queues, %d\n",
2270 		   apc->num_queues, err);
2271 	mana_destroy_txq(apc);
2272 	return err;
2273 }
2274 
2275 static void mana_destroy_rxq(struct mana_port_context *apc,
2276 			     struct mana_rxq *rxq, bool napi_initialized)
2277 
2278 {
2279 	struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
2280 	struct mana_recv_buf_oob *rx_oob;
2281 	struct device *dev = gc->dev;
2282 	struct napi_struct *napi;
2283 	struct page *page;
2284 	int i;
2285 
2286 	if (!rxq)
2287 		return;
2288 
2289 	debugfs_remove_recursive(rxq->mana_rx_debugfs);
2290 	rxq->mana_rx_debugfs = NULL;
2291 
2292 	napi = &rxq->rx_cq.napi;
2293 
2294 	if (napi_initialized) {
2295 		napi_synchronize(napi);
2296 
2297 		netdev_lock_ops_to_full(napi->dev);
2298 		napi_disable_locked(napi);
2299 		netif_napi_del_locked(napi);
2300 		netdev_unlock_full_to_ops(napi->dev);
2301 	}
2302 	xdp_rxq_info_unreg(&rxq->xdp_rxq);
2303 
2304 	mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
2305 
2306 	mana_deinit_cq(apc, &rxq->rx_cq);
2307 
2308 	if (rxq->xdp_save_va)
2309 		put_page(virt_to_head_page(rxq->xdp_save_va));
2310 
2311 	for (i = 0; i < rxq->num_rx_buf; i++) {
2312 		rx_oob = &rxq->rx_oobs[i];
2313 
2314 		if (!rx_oob->buf_va)
2315 			continue;
2316 
2317 		dma_unmap_single(dev, rx_oob->sgl[0].address,
2318 				 rx_oob->sgl[0].size, DMA_FROM_DEVICE);
2319 
2320 		page = virt_to_head_page(rx_oob->buf_va);
2321 
2322 		if (rx_oob->from_pool)
2323 			page_pool_put_full_page(rxq->page_pool, page, false);
2324 		else
2325 			put_page(page);
2326 
2327 		rx_oob->buf_va = NULL;
2328 	}
2329 
2330 	page_pool_destroy(rxq->page_pool);
2331 
2332 	if (rxq->gdma_rq)
2333 		mana_gd_destroy_queue(gc, rxq->gdma_rq);
2334 
2335 	kfree(rxq);
2336 }
2337 
2338 static int mana_fill_rx_oob(struct mana_recv_buf_oob *rx_oob, u32 mem_key,
2339 			    struct mana_rxq *rxq, struct device *dev)
2340 {
2341 	struct mana_port_context *mpc = netdev_priv(rxq->ndev);
2342 	bool from_pool = false;
2343 	dma_addr_t da;
2344 	void *va;
2345 
2346 	if (mpc->rxbufs_pre)
2347 		va = mana_get_rxbuf_pre(rxq, &da);
2348 	else
2349 		va = mana_get_rxfrag(rxq, dev, &da, &from_pool);
2350 
2351 	if (!va)
2352 		return -ENOMEM;
2353 
2354 	rx_oob->buf_va = va;
2355 	rx_oob->from_pool = from_pool;
2356 
2357 	rx_oob->sgl[0].address = da;
2358 	rx_oob->sgl[0].size = rxq->datasize;
2359 	rx_oob->sgl[0].mem_key = mem_key;
2360 
2361 	return 0;
2362 }
2363 
2364 #define MANA_WQE_HEADER_SIZE 16
2365 #define MANA_WQE_SGE_SIZE 16
2366 
2367 static int mana_alloc_rx_wqe(struct mana_port_context *apc,
2368 			     struct mana_rxq *rxq, u32 *rxq_size, u32 *cq_size)
2369 {
2370 	struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
2371 	struct mana_recv_buf_oob *rx_oob;
2372 	struct device *dev = gc->dev;
2373 	u32 buf_idx;
2374 	int ret;
2375 
2376 	WARN_ON(rxq->datasize == 0);
2377 
2378 	*rxq_size = 0;
2379 	*cq_size = 0;
2380 
2381 	for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) {
2382 		rx_oob = &rxq->rx_oobs[buf_idx];
2383 		memset(rx_oob, 0, sizeof(*rx_oob));
2384 
2385 		rx_oob->num_sge = 1;
2386 
2387 		ret = mana_fill_rx_oob(rx_oob, apc->ac->gdma_dev->gpa_mkey, rxq,
2388 				       dev);
2389 		if (ret)
2390 			return ret;
2391 
2392 		rx_oob->wqe_req.sgl = rx_oob->sgl;
2393 		rx_oob->wqe_req.num_sge = rx_oob->num_sge;
2394 		rx_oob->wqe_req.inline_oob_size = 0;
2395 		rx_oob->wqe_req.inline_oob_data = NULL;
2396 		rx_oob->wqe_req.flags = 0;
2397 		rx_oob->wqe_req.client_data_unit = 0;
2398 
2399 		*rxq_size += ALIGN(MANA_WQE_HEADER_SIZE +
2400 				   MANA_WQE_SGE_SIZE * rx_oob->num_sge, 32);
2401 		*cq_size += COMP_ENTRY_SIZE;
2402 	}
2403 
2404 	return 0;
2405 }
2406 
2407 static int mana_push_wqe(struct mana_rxq *rxq)
2408 {
2409 	struct mana_recv_buf_oob *rx_oob;
2410 	u32 buf_idx;
2411 	int err;
2412 
2413 	for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) {
2414 		rx_oob = &rxq->rx_oobs[buf_idx];
2415 
2416 		err = mana_gd_post_and_ring(rxq->gdma_rq, &rx_oob->wqe_req,
2417 					    &rx_oob->wqe_inf);
2418 		if (err)
2419 			return -ENOSPC;
2420 	}
2421 
2422 	return 0;
2423 }
2424 
2425 static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc)
2426 {
2427 	struct mana_port_context *mpc = netdev_priv(rxq->ndev);
2428 	struct page_pool_params pprm = {};
2429 	int ret;
2430 
2431 	pprm.pool_size = mpc->rx_queue_size;
2432 	pprm.nid = gc->numa_node;
2433 	pprm.napi = &rxq->rx_cq.napi;
2434 	pprm.netdev = rxq->ndev;
2435 	pprm.order = get_order(rxq->alloc_size);
2436 
2437 	rxq->page_pool = page_pool_create(&pprm);
2438 
2439 	if (IS_ERR(rxq->page_pool)) {
2440 		ret = PTR_ERR(rxq->page_pool);
2441 		rxq->page_pool = NULL;
2442 		return ret;
2443 	}
2444 
2445 	return 0;
2446 }
2447 
2448 static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
2449 					u32 rxq_idx, struct mana_eq *eq,
2450 					struct net_device *ndev)
2451 {
2452 	struct gdma_dev *gd = apc->ac->gdma_dev;
2453 	struct mana_obj_spec wq_spec;
2454 	struct mana_obj_spec cq_spec;
2455 	struct gdma_queue_spec spec;
2456 	struct mana_cq *cq = NULL;
2457 	struct gdma_context *gc;
2458 	u32 cq_size, rq_size;
2459 	struct mana_rxq *rxq;
2460 	int err;
2461 
2462 	gc = gd->gdma_context;
2463 
2464 	rxq = kzalloc(struct_size(rxq, rx_oobs, apc->rx_queue_size),
2465 		      GFP_KERNEL);
2466 	if (!rxq)
2467 		return NULL;
2468 
2469 	rxq->ndev = ndev;
2470 	rxq->num_rx_buf = apc->rx_queue_size;
2471 	rxq->rxq_idx = rxq_idx;
2472 	rxq->rxobj = INVALID_MANA_HANDLE;
2473 
2474 	mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size,
2475 			   &rxq->headroom);
2476 
2477 	/* Create page pool for RX queue */
2478 	err = mana_create_page_pool(rxq, gc);
2479 	if (err) {
2480 		netdev_err(ndev, "Create page pool err:%d\n", err);
2481 		goto out;
2482 	}
2483 
2484 	err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size);
2485 	if (err)
2486 		goto out;
2487 
2488 	rq_size = MANA_PAGE_ALIGN(rq_size);
2489 	cq_size = MANA_PAGE_ALIGN(cq_size);
2490 
2491 	/* Create RQ */
2492 	memset(&spec, 0, sizeof(spec));
2493 	spec.type = GDMA_RQ;
2494 	spec.monitor_avl_buf = true;
2495 	spec.queue_size = rq_size;
2496 	err = mana_gd_create_mana_wq_cq(gd, &spec, &rxq->gdma_rq);
2497 	if (err)
2498 		goto out;
2499 
2500 	/* Create RQ's CQ */
2501 	cq = &rxq->rx_cq;
2502 	cq->type = MANA_CQ_TYPE_RX;
2503 	cq->rxq = rxq;
2504 
2505 	memset(&spec, 0, sizeof(spec));
2506 	spec.type = GDMA_CQ;
2507 	spec.monitor_avl_buf = false;
2508 	spec.queue_size = cq_size;
2509 	spec.cq.callback = mana_schedule_napi;
2510 	spec.cq.parent_eq = eq->eq;
2511 	spec.cq.context = cq;
2512 	err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq);
2513 	if (err)
2514 		goto out;
2515 
2516 	memset(&wq_spec, 0, sizeof(wq_spec));
2517 	memset(&cq_spec, 0, sizeof(cq_spec));
2518 	wq_spec.gdma_region = rxq->gdma_rq->mem_info.dma_region_handle;
2519 	wq_spec.queue_size = rxq->gdma_rq->queue_size;
2520 
2521 	cq_spec.gdma_region = cq->gdma_cq->mem_info.dma_region_handle;
2522 	cq_spec.queue_size = cq->gdma_cq->queue_size;
2523 	cq_spec.modr_ctx_id = 0;
2524 	cq_spec.attached_eq = cq->gdma_cq->cq.parent->id;
2525 
2526 	err = mana_create_wq_obj(apc, apc->port_handle, GDMA_RQ,
2527 				 &wq_spec, &cq_spec, &rxq->rxobj);
2528 	if (err)
2529 		goto out;
2530 
2531 	rxq->gdma_rq->id = wq_spec.queue_index;
2532 	cq->gdma_cq->id = cq_spec.queue_index;
2533 
2534 	rxq->gdma_rq->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION;
2535 	cq->gdma_cq->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION;
2536 
2537 	rxq->gdma_id = rxq->gdma_rq->id;
2538 	cq->gdma_id = cq->gdma_cq->id;
2539 
2540 	err = mana_push_wqe(rxq);
2541 	if (err)
2542 		goto out;
2543 
2544 	if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) {
2545 		err = -EINVAL;
2546 		goto out;
2547 	}
2548 
2549 	gc->cq_table[cq->gdma_id] = cq->gdma_cq;
2550 
2551 	netdev_lock_ops_to_full(ndev);
2552 	netif_napi_add_weight_locked(ndev, &cq->napi, mana_poll, 1);
2553 	netdev_unlock_full_to_ops(ndev);
2554 
2555 	WARN_ON(xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq_idx,
2556 				 cq->napi.napi_id));
2557 	WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL,
2558 					   rxq->page_pool));
2559 
2560 	netdev_lock_ops_to_full(ndev);
2561 	napi_enable_locked(&cq->napi);
2562 	netdev_unlock_full_to_ops(ndev);
2563 
2564 	mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
2565 out:
2566 	if (!err)
2567 		return rxq;
2568 
2569 	netdev_err(ndev, "Failed to create RXQ: err = %d\n", err);
2570 
2571 	mana_destroy_rxq(apc, rxq, false);
2572 
2573 	if (cq)
2574 		mana_deinit_cq(apc, cq);
2575 
2576 	return NULL;
2577 }
2578 
2579 static void mana_create_rxq_debugfs(struct mana_port_context *apc, int idx)
2580 {
2581 	struct mana_rxq *rxq;
2582 	char qnum[32];
2583 
2584 	rxq = apc->rxqs[idx];
2585 
2586 	sprintf(qnum, "RX-%d", idx);
2587 	rxq->mana_rx_debugfs = debugfs_create_dir(qnum, apc->mana_port_debugfs);
2588 	debugfs_create_u32("rq_head", 0400, rxq->mana_rx_debugfs, &rxq->gdma_rq->head);
2589 	debugfs_create_u32("rq_tail", 0400, rxq->mana_rx_debugfs, &rxq->gdma_rq->tail);
2590 	debugfs_create_u32("rq_nbuf", 0400, rxq->mana_rx_debugfs, &rxq->num_rx_buf);
2591 	debugfs_create_u32("cq_head", 0400, rxq->mana_rx_debugfs,
2592 			   &rxq->rx_cq.gdma_cq->head);
2593 	debugfs_create_u32("cq_tail", 0400, rxq->mana_rx_debugfs,
2594 			   &rxq->rx_cq.gdma_cq->tail);
2595 	debugfs_create_u32("cq_budget", 0400, rxq->mana_rx_debugfs, &rxq->rx_cq.budget);
2596 	debugfs_create_file("rxq_dump", 0400, rxq->mana_rx_debugfs, rxq->gdma_rq, &mana_dbg_q_fops);
2597 	debugfs_create_file("cq_dump", 0400, rxq->mana_rx_debugfs, rxq->rx_cq.gdma_cq,
2598 			    &mana_dbg_q_fops);
2599 }
2600 
2601 static int mana_add_rx_queues(struct mana_port_context *apc,
2602 			      struct net_device *ndev)
2603 {
2604 	struct mana_context *ac = apc->ac;
2605 	struct mana_rxq *rxq;
2606 	int err = 0;
2607 	int i;
2608 
2609 	for (i = 0; i < apc->num_queues; i++) {
2610 		rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev);
2611 		if (!rxq) {
2612 			err = -ENOMEM;
2613 			netdev_err(ndev, "Failed to create rxq %d : %d\n", i, err);
2614 			goto out;
2615 		}
2616 
2617 		u64_stats_init(&rxq->stats.syncp);
2618 
2619 		apc->rxqs[i] = rxq;
2620 
2621 		mana_create_rxq_debugfs(apc, i);
2622 	}
2623 
2624 	apc->default_rxobj = apc->rxqs[0]->rxobj;
2625 out:
2626 	return err;
2627 }
2628 
2629 static void mana_destroy_vport(struct mana_port_context *apc)
2630 {
2631 	struct gdma_dev *gd = apc->ac->gdma_dev;
2632 	struct mana_rxq *rxq;
2633 	u32 rxq_idx;
2634 
2635 	for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) {
2636 		rxq = apc->rxqs[rxq_idx];
2637 		if (!rxq)
2638 			continue;
2639 
2640 		mana_destroy_rxq(apc, rxq, true);
2641 		apc->rxqs[rxq_idx] = NULL;
2642 	}
2643 
2644 	mana_destroy_txq(apc);
2645 	mana_uncfg_vport(apc);
2646 
2647 	if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode)
2648 		mana_pf_deregister_hw_vport(apc);
2649 }
2650 
2651 static int mana_create_vport(struct mana_port_context *apc,
2652 			     struct net_device *net)
2653 {
2654 	struct gdma_dev *gd = apc->ac->gdma_dev;
2655 	int err;
2656 
2657 	apc->default_rxobj = INVALID_MANA_HANDLE;
2658 
2659 	if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) {
2660 		err = mana_pf_register_hw_vport(apc);
2661 		if (err)
2662 			return err;
2663 	}
2664 
2665 	err = mana_cfg_vport(apc, gd->pdid, gd->doorbell);
2666 	if (err)
2667 		return err;
2668 
2669 	return mana_create_txq(apc, net);
2670 }
2671 
2672 static int mana_rss_table_alloc(struct mana_port_context *apc)
2673 {
2674 	if (!apc->indir_table_sz) {
2675 		netdev_err(apc->ndev,
2676 			   "Indirection table size not set for vPort %d\n",
2677 			   apc->port_idx);
2678 		return -EINVAL;
2679 	}
2680 
2681 	apc->indir_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL);
2682 	if (!apc->indir_table)
2683 		return -ENOMEM;
2684 
2685 	apc->rxobj_table = kcalloc(apc->indir_table_sz, sizeof(mana_handle_t), GFP_KERNEL);
2686 	if (!apc->rxobj_table) {
2687 		kfree(apc->indir_table);
2688 		return -ENOMEM;
2689 	}
2690 
2691 	return 0;
2692 }
2693 
2694 static void mana_rss_table_init(struct mana_port_context *apc)
2695 {
2696 	int i;
2697 
2698 	for (i = 0; i < apc->indir_table_sz; i++)
2699 		apc->indir_table[i] =
2700 			ethtool_rxfh_indir_default(i, apc->num_queues);
2701 }
2702 
2703 int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx,
2704 		    bool update_hash, bool update_tab)
2705 {
2706 	u32 queue_idx;
2707 	int err;
2708 	int i;
2709 
2710 	if (update_tab) {
2711 		for (i = 0; i < apc->indir_table_sz; i++) {
2712 			queue_idx = apc->indir_table[i];
2713 			apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj;
2714 		}
2715 	}
2716 
2717 	err = mana_cfg_vport_steering(apc, rx, true, update_hash, update_tab);
2718 	if (err)
2719 		return err;
2720 
2721 	mana_fence_rqs(apc);
2722 
2723 	return 0;
2724 }
2725 
2726 void mana_query_gf_stats(struct mana_port_context *apc)
2727 {
2728 	struct mana_query_gf_stat_resp resp = {};
2729 	struct mana_query_gf_stat_req req = {};
2730 	struct net_device *ndev = apc->ndev;
2731 	int err;
2732 
2733 	mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_GF_STAT,
2734 			     sizeof(req), sizeof(resp));
2735 	req.hdr.resp.msg_version = GDMA_MESSAGE_V2;
2736 	req.req_stats = STATISTICS_FLAGS_RX_DISCARDS_NO_WQE |
2737 			STATISTICS_FLAGS_RX_ERRORS_VPORT_DISABLED |
2738 			STATISTICS_FLAGS_HC_RX_BYTES |
2739 			STATISTICS_FLAGS_HC_RX_UCAST_PACKETS |
2740 			STATISTICS_FLAGS_HC_RX_UCAST_BYTES |
2741 			STATISTICS_FLAGS_HC_RX_MCAST_PACKETS |
2742 			STATISTICS_FLAGS_HC_RX_MCAST_BYTES |
2743 			STATISTICS_FLAGS_HC_RX_BCAST_PACKETS |
2744 			STATISTICS_FLAGS_HC_RX_BCAST_BYTES |
2745 			STATISTICS_FLAGS_TX_ERRORS_GF_DISABLED |
2746 			STATISTICS_FLAGS_TX_ERRORS_VPORT_DISABLED |
2747 			STATISTICS_FLAGS_TX_ERRORS_INVAL_VPORT_OFFSET_PACKETS |
2748 			STATISTICS_FLAGS_TX_ERRORS_VLAN_ENFORCEMENT |
2749 			STATISTICS_FLAGS_TX_ERRORS_ETH_TYPE_ENFORCEMENT |
2750 			STATISTICS_FLAGS_TX_ERRORS_SA_ENFORCEMENT |
2751 			STATISTICS_FLAGS_TX_ERRORS_SQPDID_ENFORCEMENT |
2752 			STATISTICS_FLAGS_TX_ERRORS_CQPDID_ENFORCEMENT |
2753 			STATISTICS_FLAGS_TX_ERRORS_MTU_VIOLATION |
2754 			STATISTICS_FLAGS_TX_ERRORS_INVALID_OOB |
2755 			STATISTICS_FLAGS_HC_TX_BYTES |
2756 			STATISTICS_FLAGS_HC_TX_UCAST_PACKETS |
2757 			STATISTICS_FLAGS_HC_TX_UCAST_BYTES |
2758 			STATISTICS_FLAGS_HC_TX_MCAST_PACKETS |
2759 			STATISTICS_FLAGS_HC_TX_MCAST_BYTES |
2760 			STATISTICS_FLAGS_HC_TX_BCAST_PACKETS |
2761 			STATISTICS_FLAGS_HC_TX_BCAST_BYTES |
2762 			STATISTICS_FLAGS_TX_ERRORS_GDMA_ERROR;
2763 
2764 	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
2765 				sizeof(resp));
2766 	if (err) {
2767 		netdev_err(ndev, "Failed to query GF stats: %d\n", err);
2768 		return;
2769 	}
2770 	err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_GF_STAT,
2771 				   sizeof(resp));
2772 	if (err || resp.hdr.status) {
2773 		netdev_err(ndev, "Failed to query GF stats: %d, 0x%x\n", err,
2774 			   resp.hdr.status);
2775 		return;
2776 	}
2777 
2778 	apc->eth_stats.hc_rx_discards_no_wqe = resp.rx_discards_nowqe;
2779 	apc->eth_stats.hc_rx_err_vport_disabled = resp.rx_err_vport_disabled;
2780 	apc->eth_stats.hc_rx_bytes = resp.hc_rx_bytes;
2781 	apc->eth_stats.hc_rx_ucast_pkts = resp.hc_rx_ucast_pkts;
2782 	apc->eth_stats.hc_rx_ucast_bytes = resp.hc_rx_ucast_bytes;
2783 	apc->eth_stats.hc_rx_bcast_pkts = resp.hc_rx_bcast_pkts;
2784 	apc->eth_stats.hc_rx_bcast_bytes = resp.hc_rx_bcast_bytes;
2785 	apc->eth_stats.hc_rx_mcast_pkts = resp.hc_rx_mcast_pkts;
2786 	apc->eth_stats.hc_rx_mcast_bytes = resp.hc_rx_mcast_bytes;
2787 	apc->eth_stats.hc_tx_err_gf_disabled = resp.tx_err_gf_disabled;
2788 	apc->eth_stats.hc_tx_err_vport_disabled = resp.tx_err_vport_disabled;
2789 	apc->eth_stats.hc_tx_err_inval_vportoffset_pkt =
2790 					     resp.tx_err_inval_vport_offset_pkt;
2791 	apc->eth_stats.hc_tx_err_vlan_enforcement =
2792 					     resp.tx_err_vlan_enforcement;
2793 	apc->eth_stats.hc_tx_err_eth_type_enforcement =
2794 					     resp.tx_err_ethtype_enforcement;
2795 	apc->eth_stats.hc_tx_err_sa_enforcement = resp.tx_err_SA_enforcement;
2796 	apc->eth_stats.hc_tx_err_sqpdid_enforcement =
2797 					     resp.tx_err_SQPDID_enforcement;
2798 	apc->eth_stats.hc_tx_err_cqpdid_enforcement =
2799 					     resp.tx_err_CQPDID_enforcement;
2800 	apc->eth_stats.hc_tx_err_mtu_violation = resp.tx_err_mtu_violation;
2801 	apc->eth_stats.hc_tx_err_inval_oob = resp.tx_err_inval_oob;
2802 	apc->eth_stats.hc_tx_bytes = resp.hc_tx_bytes;
2803 	apc->eth_stats.hc_tx_ucast_pkts = resp.hc_tx_ucast_pkts;
2804 	apc->eth_stats.hc_tx_ucast_bytes = resp.hc_tx_ucast_bytes;
2805 	apc->eth_stats.hc_tx_bcast_pkts = resp.hc_tx_bcast_pkts;
2806 	apc->eth_stats.hc_tx_bcast_bytes = resp.hc_tx_bcast_bytes;
2807 	apc->eth_stats.hc_tx_mcast_pkts = resp.hc_tx_mcast_pkts;
2808 	apc->eth_stats.hc_tx_mcast_bytes = resp.hc_tx_mcast_bytes;
2809 	apc->eth_stats.hc_tx_err_gdma = resp.tx_err_gdma;
2810 }
2811 
2812 void mana_query_phy_stats(struct mana_port_context *apc)
2813 {
2814 	struct mana_query_phy_stat_resp resp = {};
2815 	struct mana_query_phy_stat_req req = {};
2816 	struct net_device *ndev = apc->ndev;
2817 	int err;
2818 
2819 	mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_PHY_STAT,
2820 			     sizeof(req), sizeof(resp));
2821 	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
2822 				sizeof(resp));
2823 	if (err)
2824 		return;
2825 
2826 	err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_PHY_STAT,
2827 				   sizeof(resp));
2828 	if (err || resp.hdr.status) {
2829 		netdev_err(ndev,
2830 			   "Failed to query PHY stats: %d, resp:0x%x\n",
2831 				err, resp.hdr.status);
2832 		return;
2833 	}
2834 
2835 	/* Aggregate drop counters */
2836 	apc->phy_stats.rx_pkt_drop_phy = resp.rx_pkt_drop_phy;
2837 	apc->phy_stats.tx_pkt_drop_phy = resp.tx_pkt_drop_phy;
2838 
2839 	/* Per TC traffic Counters */
2840 	apc->phy_stats.rx_pkt_tc0_phy = resp.rx_pkt_tc0_phy;
2841 	apc->phy_stats.tx_pkt_tc0_phy = resp.tx_pkt_tc0_phy;
2842 	apc->phy_stats.rx_pkt_tc1_phy = resp.rx_pkt_tc1_phy;
2843 	apc->phy_stats.tx_pkt_tc1_phy = resp.tx_pkt_tc1_phy;
2844 	apc->phy_stats.rx_pkt_tc2_phy = resp.rx_pkt_tc2_phy;
2845 	apc->phy_stats.tx_pkt_tc2_phy = resp.tx_pkt_tc2_phy;
2846 	apc->phy_stats.rx_pkt_tc3_phy = resp.rx_pkt_tc3_phy;
2847 	apc->phy_stats.tx_pkt_tc3_phy = resp.tx_pkt_tc3_phy;
2848 	apc->phy_stats.rx_pkt_tc4_phy = resp.rx_pkt_tc4_phy;
2849 	apc->phy_stats.tx_pkt_tc4_phy = resp.tx_pkt_tc4_phy;
2850 	apc->phy_stats.rx_pkt_tc5_phy = resp.rx_pkt_tc5_phy;
2851 	apc->phy_stats.tx_pkt_tc5_phy = resp.tx_pkt_tc5_phy;
2852 	apc->phy_stats.rx_pkt_tc6_phy = resp.rx_pkt_tc6_phy;
2853 	apc->phy_stats.tx_pkt_tc6_phy = resp.tx_pkt_tc6_phy;
2854 	apc->phy_stats.rx_pkt_tc7_phy = resp.rx_pkt_tc7_phy;
2855 	apc->phy_stats.tx_pkt_tc7_phy = resp.tx_pkt_tc7_phy;
2856 
2857 	/* Per TC byte Counters */
2858 	apc->phy_stats.rx_byte_tc0_phy = resp.rx_byte_tc0_phy;
2859 	apc->phy_stats.tx_byte_tc0_phy = resp.tx_byte_tc0_phy;
2860 	apc->phy_stats.rx_byte_tc1_phy = resp.rx_byte_tc1_phy;
2861 	apc->phy_stats.tx_byte_tc1_phy = resp.tx_byte_tc1_phy;
2862 	apc->phy_stats.rx_byte_tc2_phy = resp.rx_byte_tc2_phy;
2863 	apc->phy_stats.tx_byte_tc2_phy = resp.tx_byte_tc2_phy;
2864 	apc->phy_stats.rx_byte_tc3_phy = resp.rx_byte_tc3_phy;
2865 	apc->phy_stats.tx_byte_tc3_phy = resp.tx_byte_tc3_phy;
2866 	apc->phy_stats.rx_byte_tc4_phy = resp.rx_byte_tc4_phy;
2867 	apc->phy_stats.tx_byte_tc4_phy = resp.tx_byte_tc4_phy;
2868 	apc->phy_stats.rx_byte_tc5_phy = resp.rx_byte_tc5_phy;
2869 	apc->phy_stats.tx_byte_tc5_phy = resp.tx_byte_tc5_phy;
2870 	apc->phy_stats.rx_byte_tc6_phy = resp.rx_byte_tc6_phy;
2871 	apc->phy_stats.tx_byte_tc6_phy = resp.tx_byte_tc6_phy;
2872 	apc->phy_stats.rx_byte_tc7_phy = resp.rx_byte_tc7_phy;
2873 	apc->phy_stats.tx_byte_tc7_phy = resp.tx_byte_tc7_phy;
2874 
2875 	/* Per TC pause Counters */
2876 	apc->phy_stats.rx_pause_tc0_phy = resp.rx_pause_tc0_phy;
2877 	apc->phy_stats.tx_pause_tc0_phy = resp.tx_pause_tc0_phy;
2878 	apc->phy_stats.rx_pause_tc1_phy = resp.rx_pause_tc1_phy;
2879 	apc->phy_stats.tx_pause_tc1_phy = resp.tx_pause_tc1_phy;
2880 	apc->phy_stats.rx_pause_tc2_phy = resp.rx_pause_tc2_phy;
2881 	apc->phy_stats.tx_pause_tc2_phy = resp.tx_pause_tc2_phy;
2882 	apc->phy_stats.rx_pause_tc3_phy = resp.rx_pause_tc3_phy;
2883 	apc->phy_stats.tx_pause_tc3_phy = resp.tx_pause_tc3_phy;
2884 	apc->phy_stats.rx_pause_tc4_phy = resp.rx_pause_tc4_phy;
2885 	apc->phy_stats.tx_pause_tc4_phy = resp.tx_pause_tc4_phy;
2886 	apc->phy_stats.rx_pause_tc5_phy = resp.rx_pause_tc5_phy;
2887 	apc->phy_stats.tx_pause_tc5_phy = resp.tx_pause_tc5_phy;
2888 	apc->phy_stats.rx_pause_tc6_phy = resp.rx_pause_tc6_phy;
2889 	apc->phy_stats.tx_pause_tc6_phy = resp.tx_pause_tc6_phy;
2890 	apc->phy_stats.rx_pause_tc7_phy = resp.rx_pause_tc7_phy;
2891 	apc->phy_stats.tx_pause_tc7_phy = resp.tx_pause_tc7_phy;
2892 }
2893 
2894 static int mana_init_port(struct net_device *ndev)
2895 {
2896 	struct mana_port_context *apc = netdev_priv(ndev);
2897 	struct gdma_dev *gd = apc->ac->gdma_dev;
2898 	u32 max_txq, max_rxq, max_queues;
2899 	int port_idx = apc->port_idx;
2900 	struct gdma_context *gc;
2901 	char vport[32];
2902 	int err;
2903 
2904 	err = mana_init_port_context(apc);
2905 	if (err)
2906 		return err;
2907 
2908 	gc = gd->gdma_context;
2909 
2910 	err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq,
2911 				   &apc->indir_table_sz);
2912 	if (err) {
2913 		netdev_err(ndev, "Failed to query info for vPort %d\n",
2914 			   port_idx);
2915 		goto reset_apc;
2916 	}
2917 
2918 	max_queues = min_t(u32, max_txq, max_rxq);
2919 	if (apc->max_queues > max_queues)
2920 		apc->max_queues = max_queues;
2921 
2922 	if (apc->num_queues > apc->max_queues)
2923 		apc->num_queues = apc->max_queues;
2924 
2925 	eth_hw_addr_set(ndev, apc->mac_addr);
2926 	sprintf(vport, "vport%d", port_idx);
2927 	apc->mana_port_debugfs = debugfs_create_dir(vport, gc->mana_pci_debugfs);
2928 	return 0;
2929 
2930 reset_apc:
2931 	mana_cleanup_port_context(apc);
2932 	return err;
2933 }
2934 
2935 int mana_alloc_queues(struct net_device *ndev)
2936 {
2937 	struct mana_port_context *apc = netdev_priv(ndev);
2938 	struct gdma_dev *gd = apc->ac->gdma_dev;
2939 	int err;
2940 
2941 	err = mana_create_vport(apc, ndev);
2942 	if (err) {
2943 		netdev_err(ndev, "Failed to create vPort %u : %d\n", apc->port_idx, err);
2944 		return err;
2945 	}
2946 
2947 	err = netif_set_real_num_tx_queues(ndev, apc->num_queues);
2948 	if (err) {
2949 		netdev_err(ndev,
2950 			   "netif_set_real_num_tx_queues () failed for ndev with num_queues %u : %d\n",
2951 			   apc->num_queues, err);
2952 		goto destroy_vport;
2953 	}
2954 
2955 	err = mana_add_rx_queues(apc, ndev);
2956 	if (err)
2957 		goto destroy_vport;
2958 
2959 	apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE;
2960 
2961 	err = netif_set_real_num_rx_queues(ndev, apc->num_queues);
2962 	if (err) {
2963 		netdev_err(ndev,
2964 			   "netif_set_real_num_rx_queues () failed for ndev with num_queues %u : %d\n",
2965 			   apc->num_queues, err);
2966 		goto destroy_vport;
2967 	}
2968 
2969 	mana_rss_table_init(apc);
2970 
2971 	err = mana_config_rss(apc, TRI_STATE_TRUE, true, true);
2972 	if (err) {
2973 		netdev_err(ndev, "Failed to configure RSS table: %d\n", err);
2974 		goto destroy_vport;
2975 	}
2976 
2977 	if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) {
2978 		err = mana_pf_register_filter(apc);
2979 		if (err)
2980 			goto destroy_vport;
2981 	}
2982 
2983 	mana_chn_setxdp(apc, mana_xdp_get(apc));
2984 
2985 	return 0;
2986 
2987 destroy_vport:
2988 	mana_destroy_vport(apc);
2989 	return err;
2990 }
2991 
2992 int mana_attach(struct net_device *ndev)
2993 {
2994 	struct mana_port_context *apc = netdev_priv(ndev);
2995 	int err;
2996 
2997 	ASSERT_RTNL();
2998 
2999 	err = mana_init_port(ndev);
3000 	if (err)
3001 		return err;
3002 
3003 	if (apc->port_st_save) {
3004 		err = mana_alloc_queues(ndev);
3005 		if (err) {
3006 			mana_cleanup_port_context(apc);
3007 			return err;
3008 		}
3009 	}
3010 
3011 	apc->port_is_up = apc->port_st_save;
3012 
3013 	/* Ensure port state updated before txq state */
3014 	smp_wmb();
3015 
3016 	if (apc->port_is_up)
3017 		netif_carrier_on(ndev);
3018 
3019 	netif_device_attach(ndev);
3020 
3021 	return 0;
3022 }
3023 
3024 static int mana_dealloc_queues(struct net_device *ndev)
3025 {
3026 	struct mana_port_context *apc = netdev_priv(ndev);
3027 	unsigned long timeout = jiffies + 120 * HZ;
3028 	struct gdma_dev *gd = apc->ac->gdma_dev;
3029 	struct mana_txq *txq;
3030 	struct sk_buff *skb;
3031 	int i, err;
3032 	u32 tsleep;
3033 
3034 	if (apc->port_is_up)
3035 		return -EINVAL;
3036 
3037 	mana_chn_setxdp(apc, NULL);
3038 
3039 	if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode)
3040 		mana_pf_deregister_filter(apc);
3041 
3042 	/* No packet can be transmitted now since apc->port_is_up is false.
3043 	 * There is still a tiny chance that mana_poll_tx_cq() can re-enable
3044 	 * a txq because it may not timely see apc->port_is_up being cleared
3045 	 * to false, but it doesn't matter since mana_start_xmit() drops any
3046 	 * new packets due to apc->port_is_up being false.
3047 	 *
3048 	 * Drain all the in-flight TX packets.
3049 	 * A timeout of 120 seconds for all the queues is used.
3050 	 * This will break the while loop when h/w is not responding.
3051 	 * This value of 120 has been decided here considering max
3052 	 * number of queues.
3053 	 */
3054 
3055 	for (i = 0; i < apc->num_queues; i++) {
3056 		txq = &apc->tx_qp[i].txq;
3057 		tsleep = 1000;
3058 		while (atomic_read(&txq->pending_sends) > 0 &&
3059 		       time_before(jiffies, timeout)) {
3060 			usleep_range(tsleep, tsleep + 1000);
3061 			tsleep <<= 1;
3062 		}
3063 		if (atomic_read(&txq->pending_sends)) {
3064 			err = pcie_flr(to_pci_dev(gd->gdma_context->dev));
3065 			if (err) {
3066 				netdev_err(ndev, "flr failed %d with %d pkts pending in txq %u\n",
3067 					   err, atomic_read(&txq->pending_sends),
3068 					   txq->gdma_txq_id);
3069 			}
3070 			break;
3071 		}
3072 	}
3073 
3074 	for (i = 0; i < apc->num_queues; i++) {
3075 		txq = &apc->tx_qp[i].txq;
3076 		while ((skb = skb_dequeue(&txq->pending_skbs))) {
3077 			mana_unmap_skb(skb, apc);
3078 			dev_kfree_skb_any(skb);
3079 		}
3080 		atomic_set(&txq->pending_sends, 0);
3081 	}
3082 	/* We're 100% sure the queues can no longer be woken up, because
3083 	 * we're sure now mana_poll_tx_cq() can't be running.
3084 	 */
3085 
3086 	apc->rss_state = TRI_STATE_FALSE;
3087 	err = mana_config_rss(apc, TRI_STATE_FALSE, false, false);
3088 	if (err && mana_en_need_log(apc, err))
3089 		netdev_err(ndev, "Failed to disable vPort: %d\n", err);
3090 
3091 	/* Even in err case, still need to cleanup the vPort */
3092 	mana_destroy_vport(apc);
3093 
3094 	return 0;
3095 }
3096 
3097 int mana_detach(struct net_device *ndev, bool from_close)
3098 {
3099 	struct mana_port_context *apc = netdev_priv(ndev);
3100 	int err;
3101 
3102 	ASSERT_RTNL();
3103 
3104 	apc->port_st_save = apc->port_is_up;
3105 	apc->port_is_up = false;
3106 
3107 	/* Ensure port state updated before txq state */
3108 	smp_wmb();
3109 
3110 	netif_tx_disable(ndev);
3111 	netif_carrier_off(ndev);
3112 
3113 	if (apc->port_st_save) {
3114 		err = mana_dealloc_queues(ndev);
3115 		if (err) {
3116 			netdev_err(ndev, "%s failed to deallocate queues: %d\n", __func__, err);
3117 			return err;
3118 		}
3119 	}
3120 
3121 	if (!from_close) {
3122 		netif_device_detach(ndev);
3123 		mana_cleanup_port_context(apc);
3124 	}
3125 
3126 	return 0;
3127 }
3128 
3129 static int mana_probe_port(struct mana_context *ac, int port_idx,
3130 			   struct net_device **ndev_storage)
3131 {
3132 	struct gdma_context *gc = ac->gdma_dev->gdma_context;
3133 	struct mana_port_context *apc;
3134 	struct net_device *ndev;
3135 	int err;
3136 
3137 	ndev = alloc_etherdev_mq(sizeof(struct mana_port_context),
3138 				 gc->max_num_queues);
3139 	if (!ndev)
3140 		return -ENOMEM;
3141 
3142 	*ndev_storage = ndev;
3143 
3144 	apc = netdev_priv(ndev);
3145 	apc->ac = ac;
3146 	apc->ndev = ndev;
3147 	apc->max_queues = gc->max_num_queues;
3148 	apc->num_queues = gc->max_num_queues;
3149 	apc->tx_queue_size = DEF_TX_BUFFERS_PER_QUEUE;
3150 	apc->rx_queue_size = DEF_RX_BUFFERS_PER_QUEUE;
3151 	apc->port_handle = INVALID_MANA_HANDLE;
3152 	apc->pf_filter_handle = INVALID_MANA_HANDLE;
3153 	apc->port_idx = port_idx;
3154 
3155 	mutex_init(&apc->vport_mutex);
3156 	apc->vport_use_count = 0;
3157 
3158 	ndev->netdev_ops = &mana_devops;
3159 	ndev->ethtool_ops = &mana_ethtool_ops;
3160 	ndev->mtu = ETH_DATA_LEN;
3161 	ndev->max_mtu = gc->adapter_mtu - ETH_HLEN;
3162 	ndev->min_mtu = ETH_MIN_MTU;
3163 	ndev->needed_headroom = MANA_HEADROOM;
3164 	ndev->dev_port = port_idx;
3165 	SET_NETDEV_DEV(ndev, gc->dev);
3166 
3167 	netif_set_tso_max_size(ndev, GSO_MAX_SIZE);
3168 
3169 	netif_carrier_off(ndev);
3170 
3171 	netdev_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE);
3172 
3173 	err = mana_init_port(ndev);
3174 	if (err)
3175 		goto free_net;
3176 
3177 	err = mana_rss_table_alloc(apc);
3178 	if (err)
3179 		goto reset_apc;
3180 
3181 	netdev_lockdep_set_classes(ndev);
3182 
3183 	ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
3184 	ndev->hw_features |= NETIF_F_RXCSUM;
3185 	ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
3186 	ndev->hw_features |= NETIF_F_RXHASH;
3187 	ndev->features = ndev->hw_features | NETIF_F_HW_VLAN_CTAG_TX |
3188 			 NETIF_F_HW_VLAN_CTAG_RX;
3189 	ndev->vlan_features = ndev->features;
3190 	xdp_set_features_flag(ndev, NETDEV_XDP_ACT_BASIC |
3191 			      NETDEV_XDP_ACT_REDIRECT |
3192 			      NETDEV_XDP_ACT_NDO_XMIT);
3193 
3194 	err = register_netdev(ndev);
3195 	if (err) {
3196 		netdev_err(ndev, "Unable to register netdev.\n");
3197 		goto free_indir;
3198 	}
3199 
3200 	debugfs_create_u32("current_speed", 0400, apc->mana_port_debugfs, &apc->speed);
3201 
3202 	return 0;
3203 
3204 free_indir:
3205 	mana_cleanup_indir_table(apc);
3206 reset_apc:
3207 	mana_cleanup_port_context(apc);
3208 free_net:
3209 	*ndev_storage = NULL;
3210 	netdev_err(ndev, "Failed to probe vPort %d: %d\n", port_idx, err);
3211 	free_netdev(ndev);
3212 	return err;
3213 }
3214 
3215 static void adev_release(struct device *dev)
3216 {
3217 	struct mana_adev *madev = container_of(dev, struct mana_adev, adev.dev);
3218 
3219 	kfree(madev);
3220 }
3221 
3222 static void remove_adev(struct gdma_dev *gd)
3223 {
3224 	struct auxiliary_device *adev = gd->adev;
3225 	int id = adev->id;
3226 
3227 	auxiliary_device_delete(adev);
3228 	auxiliary_device_uninit(adev);
3229 
3230 	mana_adev_idx_free(id);
3231 	gd->adev = NULL;
3232 }
3233 
3234 static int add_adev(struct gdma_dev *gd, const char *name)
3235 {
3236 	struct auxiliary_device *adev;
3237 	struct mana_adev *madev;
3238 	int ret;
3239 
3240 	madev = kzalloc(sizeof(*madev), GFP_KERNEL);
3241 	if (!madev)
3242 		return -ENOMEM;
3243 
3244 	adev = &madev->adev;
3245 	ret = mana_adev_idx_alloc();
3246 	if (ret < 0)
3247 		goto idx_fail;
3248 	adev->id = ret;
3249 
3250 	adev->name = name;
3251 	adev->dev.parent = gd->gdma_context->dev;
3252 	adev->dev.release = adev_release;
3253 	madev->mdev = gd;
3254 
3255 	ret = auxiliary_device_init(adev);
3256 	if (ret)
3257 		goto init_fail;
3258 
3259 	/* madev is owned by the auxiliary device */
3260 	madev = NULL;
3261 	ret = auxiliary_device_add(adev);
3262 	if (ret)
3263 		goto add_fail;
3264 
3265 	gd->adev = adev;
3266 	dev_dbg(gd->gdma_context->dev,
3267 		"Auxiliary device added successfully\n");
3268 	return 0;
3269 
3270 add_fail:
3271 	auxiliary_device_uninit(adev);
3272 
3273 init_fail:
3274 	mana_adev_idx_free(adev->id);
3275 
3276 idx_fail:
3277 	kfree(madev);
3278 
3279 	return ret;
3280 }
3281 
3282 static void mana_rdma_service_handle(struct work_struct *work)
3283 {
3284 	struct mana_service_work *serv_work =
3285 		container_of(work, struct mana_service_work, work);
3286 	struct gdma_dev *gd = serv_work->gdma_dev;
3287 	struct device *dev = gd->gdma_context->dev;
3288 	int ret;
3289 
3290 	if (READ_ONCE(gd->rdma_teardown))
3291 		goto out;
3292 
3293 	switch (serv_work->event) {
3294 	case GDMA_SERVICE_TYPE_RDMA_SUSPEND:
3295 		if (!gd->adev || gd->is_suspended)
3296 			break;
3297 
3298 		remove_adev(gd);
3299 		gd->is_suspended = true;
3300 		break;
3301 
3302 	case GDMA_SERVICE_TYPE_RDMA_RESUME:
3303 		if (!gd->is_suspended)
3304 			break;
3305 
3306 		ret = add_adev(gd, "rdma");
3307 		if (ret)
3308 			dev_err(dev, "Failed to add adev on resume: %d\n", ret);
3309 		else
3310 			gd->is_suspended = false;
3311 		break;
3312 
3313 	default:
3314 		dev_warn(dev, "unknown adev service event %u\n",
3315 			 serv_work->event);
3316 		break;
3317 	}
3318 
3319 out:
3320 	kfree(serv_work);
3321 }
3322 
3323 int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event)
3324 {
3325 	struct gdma_dev *gd = &gc->mana_ib;
3326 	struct mana_service_work *serv_work;
3327 
3328 	if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) {
3329 		/* RDMA device is not detected on pci */
3330 		return 0;
3331 	}
3332 
3333 	serv_work = kzalloc(sizeof(*serv_work), GFP_ATOMIC);
3334 	if (!serv_work)
3335 		return -ENOMEM;
3336 
3337 	serv_work->event = event;
3338 	serv_work->gdma_dev = gd;
3339 
3340 	INIT_WORK(&serv_work->work, mana_rdma_service_handle);
3341 	queue_work(gc->service_wq, &serv_work->work);
3342 
3343 	return 0;
3344 }
3345 
3346 int mana_probe(struct gdma_dev *gd, bool resuming)
3347 {
3348 	struct gdma_context *gc = gd->gdma_context;
3349 	struct mana_context *ac = gd->driver_data;
3350 	struct device *dev = gc->dev;
3351 	u8 bm_hostmode = 0;
3352 	u16 num_ports = 0;
3353 	int err;
3354 	int i;
3355 
3356 	dev_info(dev,
3357 		 "Microsoft Azure Network Adapter protocol version: %d.%d.%d\n",
3358 		 MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION);
3359 
3360 	err = mana_gd_register_device(gd);
3361 	if (err)
3362 		return err;
3363 
3364 	if (!resuming) {
3365 		ac = kzalloc(sizeof(*ac), GFP_KERNEL);
3366 		if (!ac)
3367 			return -ENOMEM;
3368 
3369 		ac->gdma_dev = gd;
3370 		gd->driver_data = ac;
3371 	}
3372 
3373 	err = mana_create_eq(ac);
3374 	if (err) {
3375 		dev_err(dev, "Failed to create EQs: %d\n", err);
3376 		goto out;
3377 	}
3378 
3379 	err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION,
3380 				    MANA_MICRO_VERSION, &num_ports, &bm_hostmode);
3381 	if (err)
3382 		goto out;
3383 
3384 	ac->bm_hostmode = bm_hostmode;
3385 
3386 	if (!resuming) {
3387 		ac->num_ports = num_ports;
3388 	} else {
3389 		if (ac->num_ports != num_ports) {
3390 			dev_err(dev, "The number of vPorts changed: %d->%d\n",
3391 				ac->num_ports, num_ports);
3392 			err = -EPROTO;
3393 			goto out;
3394 		}
3395 	}
3396 
3397 	if (ac->num_ports == 0)
3398 		dev_err(dev, "Failed to detect any vPort\n");
3399 
3400 	if (ac->num_ports > MAX_PORTS_IN_MANA_DEV)
3401 		ac->num_ports = MAX_PORTS_IN_MANA_DEV;
3402 
3403 	if (!resuming) {
3404 		for (i = 0; i < ac->num_ports; i++) {
3405 			err = mana_probe_port(ac, i, &ac->ports[i]);
3406 			/* we log the port for which the probe failed and stop
3407 			 * probes for subsequent ports.
3408 			 * Note that we keep running ports, for which the probes
3409 			 * were successful, unless add_adev fails too
3410 			 */
3411 			if (err) {
3412 				dev_err(dev, "Probe Failed for port %d\n", i);
3413 				break;
3414 			}
3415 		}
3416 	} else {
3417 		for (i = 0; i < ac->num_ports; i++) {
3418 			rtnl_lock();
3419 			err = mana_attach(ac->ports[i]);
3420 			rtnl_unlock();
3421 			/* we log the port for which the attach failed and stop
3422 			 * attach for subsequent ports
3423 			 * Note that we keep running ports, for which the attach
3424 			 * were successful, unless add_adev fails too
3425 			 */
3426 			if (err) {
3427 				dev_err(dev, "Attach Failed for port %d\n", i);
3428 				break;
3429 			}
3430 		}
3431 	}
3432 
3433 	err = add_adev(gd, "eth");
3434 out:
3435 	if (err) {
3436 		mana_remove(gd, false);
3437 	} else {
3438 		dev_dbg(dev, "gd=%p, id=%u, num_ports=%d, type=%u, instance=%u\n",
3439 			gd, gd->dev_id.as_uint32, ac->num_ports,
3440 			gd->dev_id.type, gd->dev_id.instance);
3441 		dev_dbg(dev, "%s succeeded\n", __func__);
3442 	}
3443 
3444 	return err;
3445 }
3446 
3447 void mana_remove(struct gdma_dev *gd, bool suspending)
3448 {
3449 	struct gdma_context *gc = gd->gdma_context;
3450 	struct mana_context *ac = gd->driver_data;
3451 	struct mana_port_context *apc;
3452 	struct device *dev = gc->dev;
3453 	struct net_device *ndev;
3454 	int err;
3455 	int i;
3456 
3457 	/* adev currently doesn't support suspending, always remove it */
3458 	if (gd->adev)
3459 		remove_adev(gd);
3460 
3461 	for (i = 0; i < ac->num_ports; i++) {
3462 		ndev = ac->ports[i];
3463 		apc = netdev_priv(ndev);
3464 		if (!ndev) {
3465 			if (i == 0)
3466 				dev_err(dev, "No net device to remove\n");
3467 			goto out;
3468 		}
3469 
3470 		/* All cleanup actions should stay after rtnl_lock(), otherwise
3471 		 * other functions may access partially cleaned up data.
3472 		 */
3473 		rtnl_lock();
3474 
3475 		err = mana_detach(ndev, false);
3476 		if (err)
3477 			netdev_err(ndev, "Failed to detach vPort %d: %d\n",
3478 				   i, err);
3479 
3480 		if (suspending) {
3481 			/* No need to unregister the ndev. */
3482 			rtnl_unlock();
3483 			continue;
3484 		}
3485 
3486 		unregister_netdevice(ndev);
3487 		mana_cleanup_indir_table(apc);
3488 
3489 		rtnl_unlock();
3490 
3491 		free_netdev(ndev);
3492 	}
3493 
3494 	mana_destroy_eq(ac);
3495 out:
3496 	mana_gd_deregister_device(gd);
3497 
3498 	if (suspending)
3499 		return;
3500 
3501 	gd->driver_data = NULL;
3502 	gd->gdma_context = NULL;
3503 	kfree(ac);
3504 	dev_dbg(dev, "%s succeeded\n", __func__);
3505 }
3506 
3507 int mana_rdma_probe(struct gdma_dev *gd)
3508 {
3509 	int err = 0;
3510 
3511 	if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) {
3512 		/* RDMA device is not detected on pci */
3513 		return err;
3514 	}
3515 
3516 	err = mana_gd_register_device(gd);
3517 	if (err)
3518 		return err;
3519 
3520 	err = add_adev(gd, "rdma");
3521 	if (err)
3522 		mana_gd_deregister_device(gd);
3523 
3524 	return err;
3525 }
3526 
3527 void mana_rdma_remove(struct gdma_dev *gd)
3528 {
3529 	struct gdma_context *gc = gd->gdma_context;
3530 
3531 	if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) {
3532 		/* RDMA device is not detected on pci */
3533 		return;
3534 	}
3535 
3536 	WRITE_ONCE(gd->rdma_teardown, true);
3537 	flush_workqueue(gc->service_wq);
3538 
3539 	if (gd->adev)
3540 		remove_adev(gd);
3541 
3542 	mana_gd_deregister_device(gd);
3543 }
3544 
3545 struct net_device *mana_get_primary_netdev(struct mana_context *ac,
3546 					   u32 port_index,
3547 					   netdevice_tracker *tracker)
3548 {
3549 	struct net_device *ndev;
3550 
3551 	if (port_index >= ac->num_ports)
3552 		return NULL;
3553 
3554 	rcu_read_lock();
3555 
3556 	/* If mana is used in netvsc, the upper netdevice should be returned. */
3557 	ndev = netdev_master_upper_dev_get_rcu(ac->ports[port_index]);
3558 
3559 	/* If there is no upper device, use the parent Ethernet device */
3560 	if (!ndev)
3561 		ndev = ac->ports[port_index];
3562 
3563 	netdev_hold(ndev, tracker, GFP_ATOMIC);
3564 	rcu_read_unlock();
3565 
3566 	return ndev;
3567 }
3568 EXPORT_SYMBOL_NS(mana_get_primary_netdev, "NET_MANA");
3569