xref: /linux/drivers/net/ethernet/fungible/funeth/funeth_main.c (revision 288440de9e5fdb4a3ff73864850f080c1250fc81)
1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
2 
3 #include <linux/bpf.h>
4 #include <linux/crash_dump.h>
5 #include <linux/etherdevice.h>
6 #include <linux/ethtool.h>
7 #include <linux/filter.h>
8 #include <linux/idr.h>
9 #include <linux/if_vlan.h>
10 #include <linux/module.h>
11 #include <linux/netdevice.h>
12 #include <linux/pci.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/inetdevice.h>
15 
16 #include "funeth.h"
17 #include "funeth_devlink.h"
18 #include "funeth_ktls.h"
19 #include "fun_port.h"
20 #include "fun_queue.h"
21 #include "funeth_txrx.h"
22 
23 #define ADMIN_SQ_DEPTH 32
24 #define ADMIN_CQ_DEPTH 64
25 #define ADMIN_RQ_DEPTH 16
26 
27 /* Default number of Tx/Rx queues. */
28 #define FUN_DFLT_QUEUES 16U
29 
30 enum {
31 	FUN_SERV_RES_CHANGE = FUN_SERV_FIRST_AVAIL,
32 	FUN_SERV_DEL_PORTS,
33 };
34 
35 static const struct pci_device_id funeth_id_table[] = {
36 	{ PCI_VDEVICE(FUNGIBLE, 0x0101) },
37 	{ PCI_VDEVICE(FUNGIBLE, 0x0181) },
38 	{ 0, }
39 };
40 
41 /* Issue a port write admin command with @n key/value pairs. */
42 static int fun_port_write_cmds(struct funeth_priv *fp, unsigned int n,
43 			       const int *keys, const u64 *data)
44 {
45 	unsigned int cmd_size, i;
46 	union {
47 		struct fun_admin_port_req req;
48 		struct fun_admin_port_rsp rsp;
49 		u8 v[ADMIN_SQE_SIZE];
50 	} cmd;
51 
52 	cmd_size = offsetof(struct fun_admin_port_req, u.write.write48) +
53 		n * sizeof(struct fun_admin_write48_req);
54 	if (cmd_size > sizeof(cmd) || cmd_size > ADMIN_RSP_MAX_LEN)
55 		return -EINVAL;
56 
57 	cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT,
58 						    cmd_size);
59 	cmd.req.u.write =
60 		FUN_ADMIN_PORT_WRITE_REQ_INIT(FUN_ADMIN_SUBOP_WRITE, 0,
61 					      fp->netdev->dev_port);
62 	for (i = 0; i < n; i++)
63 		cmd.req.u.write.write48[i] =
64 			FUN_ADMIN_WRITE48_REQ_INIT(keys[i], data[i]);
65 
66 	return fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common,
67 					 &cmd.rsp, cmd_size, 0);
68 }
69 
70 int fun_port_write_cmd(struct funeth_priv *fp, int key, u64 data)
71 {
72 	return fun_port_write_cmds(fp, 1, &key, &data);
73 }
74 
75 /* Issue a port read admin command with @n key/value pairs. */
76 static int fun_port_read_cmds(struct funeth_priv *fp, unsigned int n,
77 			      const int *keys, u64 *data)
78 {
79 	const struct fun_admin_read48_rsp *r48rsp;
80 	unsigned int cmd_size, i;
81 	int rc;
82 	union {
83 		struct fun_admin_port_req req;
84 		struct fun_admin_port_rsp rsp;
85 		u8 v[ADMIN_SQE_SIZE];
86 	} cmd;
87 
88 	cmd_size = offsetof(struct fun_admin_port_req, u.read.read48) +
89 		n * sizeof(struct fun_admin_read48_req);
90 	if (cmd_size > sizeof(cmd) || cmd_size > ADMIN_RSP_MAX_LEN)
91 		return -EINVAL;
92 
93 	cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT,
94 						    cmd_size);
95 	cmd.req.u.read =
96 		FUN_ADMIN_PORT_READ_REQ_INIT(FUN_ADMIN_SUBOP_READ, 0,
97 					     fp->netdev->dev_port);
98 	for (i = 0; i < n; i++)
99 		cmd.req.u.read.read48[i] = FUN_ADMIN_READ48_REQ_INIT(keys[i]);
100 
101 	rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common,
102 				       &cmd.rsp, cmd_size, 0);
103 	if (rc)
104 		return rc;
105 
106 	for (r48rsp = cmd.rsp.u.read.read48, i = 0; i < n; i++, r48rsp++) {
107 		data[i] = FUN_ADMIN_READ48_RSP_DATA_G(r48rsp->key_to_data);
108 		dev_dbg(fp->fdev->dev,
109 			"port_read_rsp lport=%u (key_to_data=0x%llx) key=%d data:%lld retval:%lld",
110 			fp->lport, r48rsp->key_to_data, keys[i], data[i],
111 			FUN_ADMIN_READ48_RSP_RET_G(r48rsp->key_to_data));
112 	}
113 	return 0;
114 }
115 
116 int fun_port_read_cmd(struct funeth_priv *fp, int key, u64 *data)
117 {
118 	return fun_port_read_cmds(fp, 1, &key, data);
119 }
120 
121 static void fun_report_link(struct net_device *netdev)
122 {
123 	if (netif_carrier_ok(netdev)) {
124 		const struct funeth_priv *fp = netdev_priv(netdev);
125 		const char *fec = "", *pause = "";
126 		int speed = fp->link_speed;
127 		char unit = 'M';
128 
129 		if (fp->link_speed >= SPEED_1000) {
130 			speed /= 1000;
131 			unit = 'G';
132 		}
133 
134 		if (fp->active_fec & FUN_PORT_FEC_RS)
135 			fec = ", RS-FEC";
136 		else if (fp->active_fec & FUN_PORT_FEC_FC)
137 			fec = ", BASER-FEC";
138 
139 		if ((fp->active_fc & FUN_PORT_CAP_PAUSE_MASK) == FUN_PORT_CAP_PAUSE_MASK)
140 			pause = ", Tx/Rx PAUSE";
141 		else if (fp->active_fc & FUN_PORT_CAP_RX_PAUSE)
142 			pause = ", Rx PAUSE";
143 		else if (fp->active_fc & FUN_PORT_CAP_TX_PAUSE)
144 			pause = ", Tx PAUSE";
145 
146 		netdev_info(netdev, "Link up at %d %cb/s full-duplex%s%s\n",
147 			    speed, unit, pause, fec);
148 	} else {
149 		netdev_info(netdev, "Link down\n");
150 	}
151 }
152 
153 static int fun_adi_write(struct fun_dev *fdev, enum fun_admin_adi_attr attr,
154 			 unsigned int adi_id, const struct fun_adi_param *param)
155 {
156 	struct fun_admin_adi_req req = {
157 		.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_ADI,
158 						     sizeof(req)),
159 		.u.write.subop = FUN_ADMIN_SUBOP_WRITE,
160 		.u.write.attribute = attr,
161 		.u.write.id = cpu_to_be32(adi_id),
162 		.u.write.param = *param
163 	};
164 
165 	return fun_submit_admin_sync_cmd(fdev, &req.common, NULL, 0, 0);
166 }
167 
168 /* Configure RSS for the given port. @op determines whether a new RSS context
169  * is to be created or whether an existing one should be reconfigured. The
170  * remaining parameters specify the hashing algorithm, key, and indirection
171  * table.
172  *
173  * This initiates packet delivery to the Rx queues set in the indirection
174  * table.
175  */
176 int fun_config_rss(struct net_device *dev, int algo, const u8 *key,
177 		   const u32 *qtable, u8 op)
178 {
179 	struct funeth_priv *fp = netdev_priv(dev);
180 	unsigned int table_len = fp->indir_table_nentries;
181 	unsigned int len = FUN_ETH_RSS_MAX_KEY_SIZE + sizeof(u32) * table_len;
182 	struct funeth_rxq **rxqs = rtnl_dereference(fp->rxqs);
183 	union {
184 		struct {
185 			struct fun_admin_rss_req req;
186 			struct fun_dataop_gl gl;
187 		};
188 		struct fun_admin_generic_create_rsp rsp;
189 	} cmd;
190 	__be32 *indir_tab;
191 	u16 flags;
192 	int rc;
193 
194 	if (op != FUN_ADMIN_SUBOP_CREATE && fp->rss_hw_id == FUN_HCI_ID_INVALID)
195 		return -EINVAL;
196 
197 	flags = op == FUN_ADMIN_SUBOP_CREATE ?
198 			FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR : 0;
199 	cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_RSS,
200 						    sizeof(cmd));
201 	cmd.req.u.create =
202 		FUN_ADMIN_RSS_CREATE_REQ_INIT(op, flags, fp->rss_hw_id,
203 					      dev->dev_port, algo,
204 					      FUN_ETH_RSS_MAX_KEY_SIZE,
205 					      table_len, 0,
206 					      FUN_ETH_RSS_MAX_KEY_SIZE);
207 	cmd.req.u.create.dataop = FUN_DATAOP_HDR_INIT(1, 0, 1, 0, len);
208 	fun_dataop_gl_init(&cmd.gl, 0, 0, len, fp->rss_dma_addr);
209 
210 	/* write the key and indirection table into the RSS DMA area */
211 	memcpy(fp->rss_cfg, key, FUN_ETH_RSS_MAX_KEY_SIZE);
212 	indir_tab = fp->rss_cfg + FUN_ETH_RSS_MAX_KEY_SIZE;
213 	for (rc = 0; rc < table_len; rc++)
214 		*indir_tab++ = cpu_to_be32(rxqs[*qtable++]->hw_cqid);
215 
216 	rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common,
217 				       &cmd.rsp, sizeof(cmd.rsp), 0);
218 	if (!rc && op == FUN_ADMIN_SUBOP_CREATE)
219 		fp->rss_hw_id = be32_to_cpu(cmd.rsp.id);
220 	return rc;
221 }
222 
223 /* Destroy the HW RSS conntext associated with the given port. This also stops
224  * all packet delivery to our Rx queues.
225  */
226 static void fun_destroy_rss(struct funeth_priv *fp)
227 {
228 	if (fp->rss_hw_id != FUN_HCI_ID_INVALID) {
229 		fun_res_destroy(fp->fdev, FUN_ADMIN_OP_RSS, 0, fp->rss_hw_id);
230 		fp->rss_hw_id = FUN_HCI_ID_INVALID;
231 	}
232 }
233 
234 static void fun_irq_aff_notify(struct irq_affinity_notify *notify,
235 			       const cpumask_t *mask)
236 {
237 	struct fun_irq *p = container_of(notify, struct fun_irq, aff_notify);
238 
239 	cpumask_copy(&p->affinity_mask, mask);
240 }
241 
242 static void fun_irq_aff_release(struct kref __always_unused *ref)
243 {
244 }
245 
246 /* Allocate an IRQ structure, assign an MSI-X index and initial affinity to it,
247  * and add it to the IRQ XArray.
248  */
249 static struct fun_irq *fun_alloc_qirq(struct funeth_priv *fp, unsigned int idx,
250 				      int node, unsigned int xa_idx_offset)
251 {
252 	struct fun_irq *irq;
253 	int cpu, res;
254 
255 	cpu = cpumask_local_spread(idx, node);
256 	node = cpu_to_mem(cpu);
257 
258 	irq = kzalloc_node(sizeof(*irq), GFP_KERNEL, node);
259 	if (!irq)
260 		return ERR_PTR(-ENOMEM);
261 
262 	res = fun_reserve_irqs(fp->fdev, 1, &irq->irq_idx);
263 	if (res != 1)
264 		goto free_irq;
265 
266 	res = xa_insert(&fp->irqs, idx + xa_idx_offset, irq, GFP_KERNEL);
267 	if (res)
268 		goto release_irq;
269 
270 	irq->irq = pci_irq_vector(fp->pdev, irq->irq_idx);
271 	cpumask_set_cpu(cpu, &irq->affinity_mask);
272 	irq->aff_notify.notify = fun_irq_aff_notify;
273 	irq->aff_notify.release = fun_irq_aff_release;
274 	irq->state = FUN_IRQ_INIT;
275 	return irq;
276 
277 release_irq:
278 	fun_release_irqs(fp->fdev, 1, &irq->irq_idx);
279 free_irq:
280 	kfree(irq);
281 	return ERR_PTR(res);
282 }
283 
284 static void fun_free_qirq(struct funeth_priv *fp, struct fun_irq *irq)
285 {
286 	netif_napi_del(&irq->napi);
287 	fun_release_irqs(fp->fdev, 1, &irq->irq_idx);
288 	kfree(irq);
289 }
290 
291 /* Release the IRQs reserved for Tx/Rx queues that aren't being used. */
292 static void fun_prune_queue_irqs(struct net_device *dev)
293 {
294 	struct funeth_priv *fp = netdev_priv(dev);
295 	unsigned int nreleased = 0;
296 	struct fun_irq *irq;
297 	unsigned long idx;
298 
299 	xa_for_each(&fp->irqs, idx, irq) {
300 		if (irq->txq || irq->rxq)  /* skip those in use */
301 			continue;
302 
303 		xa_erase(&fp->irqs, idx);
304 		fun_free_qirq(fp, irq);
305 		nreleased++;
306 		if (idx < fp->rx_irq_ofst)
307 			fp->num_tx_irqs--;
308 		else
309 			fp->num_rx_irqs--;
310 	}
311 	netif_info(fp, intr, dev, "Released %u queue IRQs\n", nreleased);
312 }
313 
314 /* Reserve IRQs, one per queue, to acommodate the requested queue numbers @ntx
315  * and @nrx. IRQs are added incrementally to those we already have.
316  * We hold on to allocated IRQs until garbage collection of unused IRQs is
317  * separately requested.
318  */
319 static int fun_alloc_queue_irqs(struct net_device *dev, unsigned int ntx,
320 				unsigned int nrx)
321 {
322 	struct funeth_priv *fp = netdev_priv(dev);
323 	int node = dev_to_node(&fp->pdev->dev);
324 	struct fun_irq *irq;
325 	unsigned int i;
326 
327 	for (i = fp->num_tx_irqs; i < ntx; i++) {
328 		irq = fun_alloc_qirq(fp, i, node, 0);
329 		if (IS_ERR(irq))
330 			return PTR_ERR(irq);
331 
332 		fp->num_tx_irqs++;
333 		netif_napi_add_tx(dev, &irq->napi, fun_txq_napi_poll);
334 	}
335 
336 	for (i = fp->num_rx_irqs; i < nrx; i++) {
337 		irq = fun_alloc_qirq(fp, i, node, fp->rx_irq_ofst);
338 		if (IS_ERR(irq))
339 			return PTR_ERR(irq);
340 
341 		fp->num_rx_irqs++;
342 		netif_napi_add(dev, &irq->napi, fun_rxq_napi_poll,
343 			       NAPI_POLL_WEIGHT);
344 	}
345 
346 	netif_info(fp, intr, dev, "Reserved %u/%u IRQs for Tx/Rx queues\n",
347 		   ntx, nrx);
348 	return 0;
349 }
350 
351 static void free_txqs(struct funeth_txq **txqs, unsigned int nqs,
352 		      unsigned int start, int state)
353 {
354 	unsigned int i;
355 
356 	for (i = start; i < nqs && txqs[i]; i++)
357 		txqs[i] = funeth_txq_free(txqs[i], state);
358 }
359 
360 static int alloc_txqs(struct net_device *dev, struct funeth_txq **txqs,
361 		      unsigned int nqs, unsigned int depth, unsigned int start,
362 		      int state)
363 {
364 	struct funeth_priv *fp = netdev_priv(dev);
365 	unsigned int i;
366 	int err;
367 
368 	for (i = start; i < nqs; i++) {
369 		err = funeth_txq_create(dev, i, depth, xa_load(&fp->irqs, i),
370 					state, &txqs[i]);
371 		if (err) {
372 			free_txqs(txqs, nqs, start, FUN_QSTATE_DESTROYED);
373 			return err;
374 		}
375 	}
376 	return 0;
377 }
378 
379 static void free_rxqs(struct funeth_rxq **rxqs, unsigned int nqs,
380 		      unsigned int start, int state)
381 {
382 	unsigned int i;
383 
384 	for (i = start; i < nqs && rxqs[i]; i++)
385 		rxqs[i] = funeth_rxq_free(rxqs[i], state);
386 }
387 
388 static int alloc_rxqs(struct net_device *dev, struct funeth_rxq **rxqs,
389 		      unsigned int nqs, unsigned int ncqe, unsigned int nrqe,
390 		      unsigned int start, int state)
391 {
392 	struct funeth_priv *fp = netdev_priv(dev);
393 	unsigned int i;
394 	int err;
395 
396 	for (i = start; i < nqs; i++) {
397 		err = funeth_rxq_create(dev, i, ncqe, nrqe,
398 					xa_load(&fp->irqs, i + fp->rx_irq_ofst),
399 					state, &rxqs[i]);
400 		if (err) {
401 			free_rxqs(rxqs, nqs, start, FUN_QSTATE_DESTROYED);
402 			return err;
403 		}
404 	}
405 	return 0;
406 }
407 
408 static void free_xdpqs(struct funeth_txq **xdpqs, unsigned int nqs,
409 		       unsigned int start, int state)
410 {
411 	unsigned int i;
412 
413 	for (i = start; i < nqs && xdpqs[i]; i++)
414 		xdpqs[i] = funeth_txq_free(xdpqs[i], state);
415 
416 	if (state == FUN_QSTATE_DESTROYED)
417 		kfree(xdpqs);
418 }
419 
420 static struct funeth_txq **alloc_xdpqs(struct net_device *dev, unsigned int nqs,
421 				       unsigned int depth, unsigned int start,
422 				       int state)
423 {
424 	struct funeth_txq **xdpqs;
425 	unsigned int i;
426 	int err;
427 
428 	xdpqs = kcalloc(nqs, sizeof(*xdpqs), GFP_KERNEL);
429 	if (!xdpqs)
430 		return ERR_PTR(-ENOMEM);
431 
432 	for (i = start; i < nqs; i++) {
433 		err = funeth_txq_create(dev, i, depth, NULL, state, &xdpqs[i]);
434 		if (err) {
435 			free_xdpqs(xdpqs, nqs, start, FUN_QSTATE_DESTROYED);
436 			return ERR_PTR(err);
437 		}
438 	}
439 	return xdpqs;
440 }
441 
442 static void fun_free_rings(struct net_device *netdev, struct fun_qset *qset)
443 {
444 	struct funeth_priv *fp = netdev_priv(netdev);
445 	struct funeth_txq **xdpqs = qset->xdpqs;
446 	struct funeth_rxq **rxqs = qset->rxqs;
447 
448 	/* qset may not specify any queues to operate on. In that case the
449 	 * currently installed queues are implied.
450 	 */
451 	if (!rxqs) {
452 		rxqs = rtnl_dereference(fp->rxqs);
453 		xdpqs = rtnl_dereference(fp->xdpqs);
454 		qset->txqs = fp->txqs;
455 		qset->nrxqs = netdev->real_num_rx_queues;
456 		qset->ntxqs = netdev->real_num_tx_queues;
457 		qset->nxdpqs = fp->num_xdpqs;
458 	}
459 	if (!rxqs)
460 		return;
461 
462 	if (rxqs == rtnl_dereference(fp->rxqs)) {
463 		rcu_assign_pointer(fp->rxqs, NULL);
464 		rcu_assign_pointer(fp->xdpqs, NULL);
465 		synchronize_net();
466 		fp->txqs = NULL;
467 	}
468 
469 	free_rxqs(rxqs, qset->nrxqs, qset->rxq_start, qset->state);
470 	free_txqs(qset->txqs, qset->ntxqs, qset->txq_start, qset->state);
471 	free_xdpqs(xdpqs, qset->nxdpqs, qset->xdpq_start, qset->state);
472 	if (qset->state == FUN_QSTATE_DESTROYED)
473 		kfree(rxqs);
474 
475 	/* Tell the caller which queues were operated on. */
476 	qset->rxqs = rxqs;
477 	qset->xdpqs = xdpqs;
478 }
479 
480 static int fun_alloc_rings(struct net_device *netdev, struct fun_qset *qset)
481 {
482 	struct funeth_txq **xdpqs = NULL, **txqs;
483 	struct funeth_rxq **rxqs;
484 	int err;
485 
486 	err = fun_alloc_queue_irqs(netdev, qset->ntxqs, qset->nrxqs);
487 	if (err)
488 		return err;
489 
490 	rxqs = kcalloc(qset->ntxqs + qset->nrxqs, sizeof(*rxqs), GFP_KERNEL);
491 	if (!rxqs)
492 		return -ENOMEM;
493 
494 	if (qset->nxdpqs) {
495 		xdpqs = alloc_xdpqs(netdev, qset->nxdpqs, qset->sq_depth,
496 				    qset->xdpq_start, qset->state);
497 		if (IS_ERR(xdpqs)) {
498 			err = PTR_ERR(xdpqs);
499 			goto free_qvec;
500 		}
501 	}
502 
503 	txqs = (struct funeth_txq **)&rxqs[qset->nrxqs];
504 	err = alloc_txqs(netdev, txqs, qset->ntxqs, qset->sq_depth,
505 			 qset->txq_start, qset->state);
506 	if (err)
507 		goto free_xdpqs;
508 
509 	err = alloc_rxqs(netdev, rxqs, qset->nrxqs, qset->cq_depth,
510 			 qset->rq_depth, qset->rxq_start, qset->state);
511 	if (err)
512 		goto free_txqs;
513 
514 	qset->rxqs = rxqs;
515 	qset->txqs = txqs;
516 	qset->xdpqs = xdpqs;
517 	return 0;
518 
519 free_txqs:
520 	free_txqs(txqs, qset->ntxqs, qset->txq_start, FUN_QSTATE_DESTROYED);
521 free_xdpqs:
522 	free_xdpqs(xdpqs, qset->nxdpqs, qset->xdpq_start, FUN_QSTATE_DESTROYED);
523 free_qvec:
524 	kfree(rxqs);
525 	return err;
526 }
527 
528 /* Take queues to the next level. Presently this means creating them on the
529  * device.
530  */
531 static int fun_advance_ring_state(struct net_device *dev, struct fun_qset *qset)
532 {
533 	struct funeth_priv *fp = netdev_priv(dev);
534 	int i, err;
535 
536 	for (i = 0; i < qset->nrxqs; i++) {
537 		err = fun_rxq_create_dev(qset->rxqs[i],
538 					 xa_load(&fp->irqs,
539 						 i + fp->rx_irq_ofst));
540 		if (err)
541 			goto out;
542 	}
543 
544 	for (i = 0; i < qset->ntxqs; i++) {
545 		err = fun_txq_create_dev(qset->txqs[i], xa_load(&fp->irqs, i));
546 		if (err)
547 			goto out;
548 	}
549 
550 	for (i = 0; i < qset->nxdpqs; i++) {
551 		err = fun_txq_create_dev(qset->xdpqs[i], NULL);
552 		if (err)
553 			goto out;
554 	}
555 
556 	return 0;
557 
558 out:
559 	fun_free_rings(dev, qset);
560 	return err;
561 }
562 
563 static int fun_port_create(struct net_device *netdev)
564 {
565 	struct funeth_priv *fp = netdev_priv(netdev);
566 	union {
567 		struct fun_admin_port_req req;
568 		struct fun_admin_port_rsp rsp;
569 	} cmd;
570 	int rc;
571 
572 	if (fp->lport != INVALID_LPORT)
573 		return 0;
574 
575 	cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT,
576 						    sizeof(cmd.req));
577 	cmd.req.u.create =
578 		FUN_ADMIN_PORT_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, 0,
579 					       netdev->dev_port);
580 
581 	rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, &cmd.rsp,
582 				       sizeof(cmd.rsp), 0);
583 
584 	if (!rc)
585 		fp->lport = be16_to_cpu(cmd.rsp.u.create.lport);
586 	return rc;
587 }
588 
589 static int fun_port_destroy(struct net_device *netdev)
590 {
591 	struct funeth_priv *fp = netdev_priv(netdev);
592 
593 	if (fp->lport == INVALID_LPORT)
594 		return 0;
595 
596 	fp->lport = INVALID_LPORT;
597 	return fun_res_destroy(fp->fdev, FUN_ADMIN_OP_PORT, 0,
598 			       netdev->dev_port);
599 }
600 
601 static int fun_eth_create(struct funeth_priv *fp)
602 {
603 	union {
604 		struct fun_admin_eth_req req;
605 		struct fun_admin_generic_create_rsp rsp;
606 	} cmd;
607 	int rc;
608 
609 	cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_ETH,
610 						    sizeof(cmd.req));
611 	cmd.req.u.create = FUN_ADMIN_ETH_CREATE_REQ_INIT(
612 				FUN_ADMIN_SUBOP_CREATE,
613 				FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR,
614 				0, fp->netdev->dev_port);
615 
616 	rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, &cmd.rsp,
617 				       sizeof(cmd.rsp), 0);
618 	return rc ? rc : be32_to_cpu(cmd.rsp.id);
619 }
620 
621 static int fun_vi_create(struct funeth_priv *fp)
622 {
623 	struct fun_admin_vi_req req = {
624 		.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_VI,
625 						     sizeof(req)),
626 		.u.create = FUN_ADMIN_VI_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE,
627 							 0,
628 							 fp->netdev->dev_port,
629 							 fp->netdev->dev_port)
630 	};
631 
632 	return fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0);
633 }
634 
635 /* Helper to create an ETH flow and bind an SQ to it.
636  * Returns the ETH id (>= 0) on success or a negative error.
637  */
638 int fun_create_and_bind_tx(struct funeth_priv *fp, u32 sqid)
639 {
640 	int rc, ethid;
641 
642 	ethid = fun_eth_create(fp);
643 	if (ethid >= 0) {
644 		rc = fun_bind(fp->fdev, FUN_ADMIN_BIND_TYPE_EPSQ, sqid,
645 			      FUN_ADMIN_BIND_TYPE_ETH, ethid);
646 		if (rc) {
647 			fun_res_destroy(fp->fdev, FUN_ADMIN_OP_ETH, 0, ethid);
648 			ethid = rc;
649 		}
650 	}
651 	return ethid;
652 }
653 
654 static irqreturn_t fun_queue_irq_handler(int irq, void *data)
655 {
656 	struct fun_irq *p = data;
657 
658 	if (p->rxq) {
659 		prefetch(p->rxq->next_cqe_info);
660 		p->rxq->irq_cnt++;
661 	}
662 	napi_schedule_irqoff(&p->napi);
663 	return IRQ_HANDLED;
664 }
665 
666 static int fun_enable_irqs(struct net_device *dev)
667 {
668 	struct funeth_priv *fp = netdev_priv(dev);
669 	unsigned long idx, last;
670 	unsigned int qidx;
671 	struct fun_irq *p;
672 	const char *qtype;
673 	int err;
674 
675 	xa_for_each(&fp->irqs, idx, p) {
676 		if (p->txq) {
677 			qtype = "tx";
678 			qidx = p->txq->qidx;
679 		} else if (p->rxq) {
680 			qtype = "rx";
681 			qidx = p->rxq->qidx;
682 		} else {
683 			continue;
684 		}
685 
686 		if (p->state != FUN_IRQ_INIT)
687 			continue;
688 
689 		snprintf(p->name, sizeof(p->name) - 1, "%s-%s-%u", dev->name,
690 			 qtype, qidx);
691 		err = request_irq(p->irq, fun_queue_irq_handler, 0, p->name, p);
692 		if (err) {
693 			netdev_err(dev, "Failed to allocate IRQ %u, err %d\n",
694 				   p->irq, err);
695 			goto unroll;
696 		}
697 		p->state = FUN_IRQ_REQUESTED;
698 	}
699 
700 	xa_for_each(&fp->irqs, idx, p) {
701 		if (p->state != FUN_IRQ_REQUESTED)
702 			continue;
703 		irq_set_affinity_notifier(p->irq, &p->aff_notify);
704 		irq_set_affinity_and_hint(p->irq, &p->affinity_mask);
705 		napi_enable(&p->napi);
706 		p->state = FUN_IRQ_ENABLED;
707 	}
708 
709 	return 0;
710 
711 unroll:
712 	last = idx - 1;
713 	xa_for_each_range(&fp->irqs, idx, p, 0, last)
714 		if (p->state == FUN_IRQ_REQUESTED) {
715 			free_irq(p->irq, p);
716 			p->state = FUN_IRQ_INIT;
717 		}
718 
719 	return err;
720 }
721 
722 static void fun_disable_one_irq(struct fun_irq *irq)
723 {
724 	napi_disable(&irq->napi);
725 	irq_set_affinity_notifier(irq->irq, NULL);
726 	irq_update_affinity_hint(irq->irq, NULL);
727 	free_irq(irq->irq, irq);
728 	irq->state = FUN_IRQ_INIT;
729 }
730 
731 static void fun_disable_irqs(struct net_device *dev)
732 {
733 	struct funeth_priv *fp = netdev_priv(dev);
734 	struct fun_irq *p;
735 	unsigned long idx;
736 
737 	xa_for_each(&fp->irqs, idx, p)
738 		if (p->state == FUN_IRQ_ENABLED)
739 			fun_disable_one_irq(p);
740 }
741 
742 static void fun_down(struct net_device *dev, struct fun_qset *qset)
743 {
744 	struct funeth_priv *fp = netdev_priv(dev);
745 
746 	/* If we don't have queues the data path is already down.
747 	 * Note netif_running(dev) may be true.
748 	 */
749 	if (!rcu_access_pointer(fp->rxqs))
750 		return;
751 
752 	/* It is also down if the queues aren't on the device. */
753 	if (fp->txqs[0]->init_state >= FUN_QSTATE_INIT_FULL) {
754 		netif_info(fp, ifdown, dev,
755 			   "Tearing down data path on device\n");
756 		fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_DISABLE, 0);
757 
758 		netif_carrier_off(dev);
759 		netif_tx_disable(dev);
760 
761 		fun_destroy_rss(fp);
762 		fun_res_destroy(fp->fdev, FUN_ADMIN_OP_VI, 0, dev->dev_port);
763 		fun_disable_irqs(dev);
764 	}
765 
766 	fun_free_rings(dev, qset);
767 }
768 
769 static int fun_up(struct net_device *dev, struct fun_qset *qset)
770 {
771 	static const int port_keys[] = {
772 		FUN_ADMIN_PORT_KEY_STATS_DMA_LOW,
773 		FUN_ADMIN_PORT_KEY_STATS_DMA_HIGH,
774 		FUN_ADMIN_PORT_KEY_ENABLE
775 	};
776 
777 	struct funeth_priv *fp = netdev_priv(dev);
778 	u64 vals[] = {
779 		lower_32_bits(fp->stats_dma_addr),
780 		upper_32_bits(fp->stats_dma_addr),
781 		FUN_PORT_FLAG_ENABLE_NOTIFY
782 	};
783 	int err;
784 
785 	netif_info(fp, ifup, dev, "Setting up data path on device\n");
786 
787 	if (qset->rxqs[0]->init_state < FUN_QSTATE_INIT_FULL) {
788 		err = fun_advance_ring_state(dev, qset);
789 		if (err)
790 			return err;
791 	}
792 
793 	err = fun_vi_create(fp);
794 	if (err)
795 		goto free_queues;
796 
797 	fp->txqs = qset->txqs;
798 	rcu_assign_pointer(fp->rxqs, qset->rxqs);
799 	rcu_assign_pointer(fp->xdpqs, qset->xdpqs);
800 
801 	err = fun_enable_irqs(dev);
802 	if (err)
803 		goto destroy_vi;
804 
805 	if (fp->rss_cfg) {
806 		err = fun_config_rss(dev, fp->hash_algo, fp->rss_key,
807 				     fp->indir_table, FUN_ADMIN_SUBOP_CREATE);
808 	} else {
809 		/* The non-RSS case has only 1 queue. */
810 		err = fun_bind(fp->fdev, FUN_ADMIN_BIND_TYPE_VI, dev->dev_port,
811 			       FUN_ADMIN_BIND_TYPE_EPCQ,
812 			       qset->rxqs[0]->hw_cqid);
813 	}
814 	if (err)
815 		goto disable_irqs;
816 
817 	err = fun_port_write_cmds(fp, 3, port_keys, vals);
818 	if (err)
819 		goto free_rss;
820 
821 	netif_tx_start_all_queues(dev);
822 	return 0;
823 
824 free_rss:
825 	fun_destroy_rss(fp);
826 disable_irqs:
827 	fun_disable_irqs(dev);
828 destroy_vi:
829 	fun_res_destroy(fp->fdev, FUN_ADMIN_OP_VI, 0, dev->dev_port);
830 free_queues:
831 	fun_free_rings(dev, qset);
832 	return err;
833 }
834 
835 static int funeth_open(struct net_device *netdev)
836 {
837 	struct funeth_priv *fp = netdev_priv(netdev);
838 	struct fun_qset qset = {
839 		.nrxqs = netdev->real_num_rx_queues,
840 		.ntxqs = netdev->real_num_tx_queues,
841 		.nxdpqs = fp->num_xdpqs,
842 		.cq_depth = fp->cq_depth,
843 		.rq_depth = fp->rq_depth,
844 		.sq_depth = fp->sq_depth,
845 		.state = FUN_QSTATE_INIT_FULL,
846 	};
847 	int rc;
848 
849 	rc = fun_alloc_rings(netdev, &qset);
850 	if (rc)
851 		return rc;
852 
853 	rc = fun_up(netdev, &qset);
854 	if (rc) {
855 		qset.state = FUN_QSTATE_DESTROYED;
856 		fun_free_rings(netdev, &qset);
857 	}
858 
859 	return rc;
860 }
861 
862 static int funeth_close(struct net_device *netdev)
863 {
864 	struct fun_qset qset = { .state = FUN_QSTATE_DESTROYED };
865 
866 	fun_down(netdev, &qset);
867 	return 0;
868 }
869 
870 static void fun_get_stats64(struct net_device *netdev,
871 			    struct rtnl_link_stats64 *stats)
872 {
873 	struct funeth_priv *fp = netdev_priv(netdev);
874 	struct funeth_txq **xdpqs;
875 	struct funeth_rxq **rxqs;
876 	unsigned int i, start;
877 
878 	stats->tx_packets = fp->tx_packets;
879 	stats->tx_bytes   = fp->tx_bytes;
880 	stats->tx_dropped = fp->tx_dropped;
881 
882 	stats->rx_packets = fp->rx_packets;
883 	stats->rx_bytes   = fp->rx_bytes;
884 	stats->rx_dropped = fp->rx_dropped;
885 
886 	rcu_read_lock();
887 	rxqs = rcu_dereference(fp->rxqs);
888 	if (!rxqs)
889 		goto unlock;
890 
891 	for (i = 0; i < netdev->real_num_tx_queues; i++) {
892 		struct funeth_txq_stats txs;
893 
894 		FUN_QSTAT_READ(fp->txqs[i], start, txs);
895 		stats->tx_packets += txs.tx_pkts;
896 		stats->tx_bytes   += txs.tx_bytes;
897 		stats->tx_dropped += txs.tx_map_err;
898 	}
899 
900 	for (i = 0; i < netdev->real_num_rx_queues; i++) {
901 		struct funeth_rxq_stats rxs;
902 
903 		FUN_QSTAT_READ(rxqs[i], start, rxs);
904 		stats->rx_packets += rxs.rx_pkts;
905 		stats->rx_bytes   += rxs.rx_bytes;
906 		stats->rx_dropped += rxs.rx_map_err + rxs.rx_mem_drops;
907 	}
908 
909 	xdpqs = rcu_dereference(fp->xdpqs);
910 	if (!xdpqs)
911 		goto unlock;
912 
913 	for (i = 0; i < fp->num_xdpqs; i++) {
914 		struct funeth_txq_stats txs;
915 
916 		FUN_QSTAT_READ(xdpqs[i], start, txs);
917 		stats->tx_packets += txs.tx_pkts;
918 		stats->tx_bytes   += txs.tx_bytes;
919 	}
920 unlock:
921 	rcu_read_unlock();
922 }
923 
924 static int fun_change_mtu(struct net_device *netdev, int new_mtu)
925 {
926 	struct funeth_priv *fp = netdev_priv(netdev);
927 	int rc;
928 
929 	rc = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MTU, new_mtu);
930 	if (!rc)
931 		netdev->mtu = new_mtu;
932 	return rc;
933 }
934 
935 static int fun_set_macaddr(struct net_device *netdev, void *addr)
936 {
937 	struct funeth_priv *fp = netdev_priv(netdev);
938 	struct sockaddr *saddr = addr;
939 	int rc;
940 
941 	if (!is_valid_ether_addr(saddr->sa_data))
942 		return -EADDRNOTAVAIL;
943 
944 	if (ether_addr_equal(netdev->dev_addr, saddr->sa_data))
945 		return 0;
946 
947 	rc = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MACADDR,
948 				ether_addr_to_u64(saddr->sa_data));
949 	if (!rc)
950 		eth_hw_addr_set(netdev, saddr->sa_data);
951 	return rc;
952 }
953 
954 static int fun_get_port_attributes(struct net_device *netdev)
955 {
956 	static const int keys[] = {
957 		FUN_ADMIN_PORT_KEY_MACADDR, FUN_ADMIN_PORT_KEY_CAPABILITIES,
958 		FUN_ADMIN_PORT_KEY_ADVERT, FUN_ADMIN_PORT_KEY_MTU
959 	};
960 	static const int phys_keys[] = {
961 		FUN_ADMIN_PORT_KEY_LANE_ATTRS,
962 	};
963 
964 	struct funeth_priv *fp = netdev_priv(netdev);
965 	u64 data[ARRAY_SIZE(keys)];
966 	u8 mac[ETH_ALEN];
967 	int i, rc;
968 
969 	rc = fun_port_read_cmds(fp, ARRAY_SIZE(keys), keys, data);
970 	if (rc)
971 		return rc;
972 
973 	for (i = 0; i < ARRAY_SIZE(keys); i++) {
974 		switch (keys[i]) {
975 		case FUN_ADMIN_PORT_KEY_MACADDR:
976 			u64_to_ether_addr(data[i], mac);
977 			if (is_zero_ether_addr(mac)) {
978 				eth_hw_addr_random(netdev);
979 			} else if (is_valid_ether_addr(mac)) {
980 				eth_hw_addr_set(netdev, mac);
981 			} else {
982 				netdev_err(netdev,
983 					   "device provided a bad MAC address %pM\n",
984 					   mac);
985 				return -EINVAL;
986 			}
987 			break;
988 
989 		case FUN_ADMIN_PORT_KEY_CAPABILITIES:
990 			fp->port_caps = data[i];
991 			break;
992 
993 		case FUN_ADMIN_PORT_KEY_ADVERT:
994 			fp->advertising = data[i];
995 			break;
996 
997 		case FUN_ADMIN_PORT_KEY_MTU:
998 			netdev->mtu = data[i];
999 			break;
1000 		}
1001 	}
1002 
1003 	if (!(fp->port_caps & FUN_PORT_CAP_VPORT)) {
1004 		rc = fun_port_read_cmds(fp, ARRAY_SIZE(phys_keys), phys_keys,
1005 					data);
1006 		if (rc)
1007 			return rc;
1008 
1009 		fp->lane_attrs = data[0];
1010 	}
1011 
1012 	if (netdev->addr_assign_type == NET_ADDR_RANDOM)
1013 		return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MACADDR,
1014 					  ether_addr_to_u64(netdev->dev_addr));
1015 	return 0;
1016 }
1017 
1018 static int fun_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
1019 {
1020 	const struct funeth_priv *fp = netdev_priv(dev);
1021 
1022 	return copy_to_user(ifr->ifr_data, &fp->hwtstamp_cfg,
1023 			    sizeof(fp->hwtstamp_cfg)) ? -EFAULT : 0;
1024 }
1025 
1026 static int fun_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
1027 {
1028 	struct funeth_priv *fp = netdev_priv(dev);
1029 	struct hwtstamp_config cfg;
1030 
1031 	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
1032 		return -EFAULT;
1033 
1034 	/* no TX HW timestamps */
1035 	cfg.tx_type = HWTSTAMP_TX_OFF;
1036 
1037 	switch (cfg.rx_filter) {
1038 	case HWTSTAMP_FILTER_NONE:
1039 		break;
1040 	case HWTSTAMP_FILTER_ALL:
1041 	case HWTSTAMP_FILTER_SOME:
1042 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
1043 	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
1044 	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
1045 	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
1046 	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
1047 	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
1048 	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
1049 	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
1050 	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
1051 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
1052 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
1053 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
1054 	case HWTSTAMP_FILTER_NTP_ALL:
1055 		cfg.rx_filter = HWTSTAMP_FILTER_ALL;
1056 		break;
1057 	default:
1058 		return -ERANGE;
1059 	}
1060 
1061 	fp->hwtstamp_cfg = cfg;
1062 	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
1063 }
1064 
1065 static int fun_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1066 {
1067 	switch (cmd) {
1068 	case SIOCSHWTSTAMP:
1069 		return fun_hwtstamp_set(dev, ifr);
1070 	case SIOCGHWTSTAMP:
1071 		return fun_hwtstamp_get(dev, ifr);
1072 	default:
1073 		return -EOPNOTSUPP;
1074 	}
1075 }
1076 
1077 /* Prepare the queues for XDP. */
1078 static int fun_enter_xdp(struct net_device *dev, struct bpf_prog *prog)
1079 {
1080 	struct funeth_priv *fp = netdev_priv(dev);
1081 	unsigned int i, nqs = num_online_cpus();
1082 	struct funeth_txq **xdpqs;
1083 	struct funeth_rxq **rxqs;
1084 	int err;
1085 
1086 	xdpqs = alloc_xdpqs(dev, nqs, fp->sq_depth, 0, FUN_QSTATE_INIT_FULL);
1087 	if (IS_ERR(xdpqs))
1088 		return PTR_ERR(xdpqs);
1089 
1090 	rxqs = rtnl_dereference(fp->rxqs);
1091 	for (i = 0; i < dev->real_num_rx_queues; i++) {
1092 		err = fun_rxq_set_bpf(rxqs[i], prog);
1093 		if (err)
1094 			goto out;
1095 	}
1096 
1097 	fp->num_xdpqs = nqs;
1098 	rcu_assign_pointer(fp->xdpqs, xdpqs);
1099 	return 0;
1100 out:
1101 	while (i--)
1102 		fun_rxq_set_bpf(rxqs[i], NULL);
1103 
1104 	free_xdpqs(xdpqs, nqs, 0, FUN_QSTATE_DESTROYED);
1105 	return err;
1106 }
1107 
1108 /* Set the queues for non-XDP operation. */
1109 static void fun_end_xdp(struct net_device *dev)
1110 {
1111 	struct funeth_priv *fp = netdev_priv(dev);
1112 	struct funeth_txq **xdpqs;
1113 	struct funeth_rxq **rxqs;
1114 	unsigned int i;
1115 
1116 	xdpqs = rtnl_dereference(fp->xdpqs);
1117 	rcu_assign_pointer(fp->xdpqs, NULL);
1118 	synchronize_net();
1119 	/* at this point both Rx and Tx XDP processing has ended */
1120 
1121 	free_xdpqs(xdpqs, fp->num_xdpqs, 0, FUN_QSTATE_DESTROYED);
1122 	fp->num_xdpqs = 0;
1123 
1124 	rxqs = rtnl_dereference(fp->rxqs);
1125 	for (i = 0; i < dev->real_num_rx_queues; i++)
1126 		fun_rxq_set_bpf(rxqs[i], NULL);
1127 }
1128 
1129 #define XDP_MAX_MTU \
1130 	(PAGE_SIZE - FUN_XDP_HEADROOM - VLAN_ETH_HLEN - FUN_RX_TAILROOM)
1131 
1132 static int fun_xdp_setup(struct net_device *dev, struct netdev_bpf *xdp)
1133 {
1134 	struct bpf_prog *old_prog, *prog = xdp->prog;
1135 	struct funeth_priv *fp = netdev_priv(dev);
1136 	int i, err;
1137 
1138 	/* XDP uses at most one buffer */
1139 	if (prog && dev->mtu > XDP_MAX_MTU) {
1140 		netdev_err(dev, "device MTU %u too large for XDP\n", dev->mtu);
1141 		NL_SET_ERR_MSG_MOD(xdp->extack,
1142 				   "Device MTU too large for XDP");
1143 		return -EINVAL;
1144 	}
1145 
1146 	if (!netif_running(dev)) {
1147 		fp->num_xdpqs = prog ? num_online_cpus() : 0;
1148 	} else if (prog && !fp->xdp_prog) {
1149 		err = fun_enter_xdp(dev, prog);
1150 		if (err) {
1151 			NL_SET_ERR_MSG_MOD(xdp->extack,
1152 					   "Failed to set queues for XDP.");
1153 			return err;
1154 		}
1155 	} else if (!prog && fp->xdp_prog) {
1156 		fun_end_xdp(dev);
1157 	} else {
1158 		struct funeth_rxq **rxqs = rtnl_dereference(fp->rxqs);
1159 
1160 		for (i = 0; i < dev->real_num_rx_queues; i++)
1161 			WRITE_ONCE(rxqs[i]->xdp_prog, prog);
1162 	}
1163 
1164 	dev->max_mtu = prog ? XDP_MAX_MTU : FUN_MAX_MTU;
1165 	old_prog = xchg(&fp->xdp_prog, prog);
1166 	if (old_prog)
1167 		bpf_prog_put(old_prog);
1168 
1169 	return 0;
1170 }
1171 
1172 static int fun_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1173 {
1174 	switch (xdp->command) {
1175 	case XDP_SETUP_PROG:
1176 		return fun_xdp_setup(dev, xdp);
1177 	default:
1178 		return -EINVAL;
1179 	}
1180 }
1181 
1182 static struct devlink_port *fun_get_devlink_port(struct net_device *netdev)
1183 {
1184 	struct funeth_priv *fp = netdev_priv(netdev);
1185 
1186 	return &fp->dl_port;
1187 }
1188 
1189 static int fun_init_vports(struct fun_ethdev *ed, unsigned int n)
1190 {
1191 	if (ed->num_vports)
1192 		return -EINVAL;
1193 
1194 	ed->vport_info = kvcalloc(n, sizeof(*ed->vport_info), GFP_KERNEL);
1195 	if (!ed->vport_info)
1196 		return -ENOMEM;
1197 	ed->num_vports = n;
1198 	return 0;
1199 }
1200 
1201 static void fun_free_vports(struct fun_ethdev *ed)
1202 {
1203 	kvfree(ed->vport_info);
1204 	ed->vport_info = NULL;
1205 	ed->num_vports = 0;
1206 }
1207 
1208 static struct fun_vport_info *fun_get_vport(struct fun_ethdev *ed,
1209 					    unsigned int vport)
1210 {
1211 	if (!ed->vport_info || vport >= ed->num_vports)
1212 		return NULL;
1213 
1214 	return ed->vport_info + vport;
1215 }
1216 
1217 static int fun_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
1218 {
1219 	struct funeth_priv *fp = netdev_priv(dev);
1220 	struct fun_adi_param mac_param = {};
1221 	struct fun_dev *fdev = fp->fdev;
1222 	struct fun_ethdev *ed = to_fun_ethdev(fdev);
1223 	struct fun_vport_info *vi;
1224 	int rc = -EINVAL;
1225 
1226 	if (is_multicast_ether_addr(mac))
1227 		return -EINVAL;
1228 
1229 	mutex_lock(&ed->state_mutex);
1230 	vi = fun_get_vport(ed, vf);
1231 	if (!vi)
1232 		goto unlock;
1233 
1234 	mac_param.u.mac = FUN_ADI_MAC_INIT(ether_addr_to_u64(mac));
1235 	rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_MACADDR, vf + 1,
1236 			   &mac_param);
1237 	if (!rc)
1238 		ether_addr_copy(vi->mac, mac);
1239 unlock:
1240 	mutex_unlock(&ed->state_mutex);
1241 	return rc;
1242 }
1243 
1244 static int fun_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
1245 			   __be16 vlan_proto)
1246 {
1247 	struct funeth_priv *fp = netdev_priv(dev);
1248 	struct fun_adi_param vlan_param = {};
1249 	struct fun_dev *fdev = fp->fdev;
1250 	struct fun_ethdev *ed = to_fun_ethdev(fdev);
1251 	struct fun_vport_info *vi;
1252 	int rc = -EINVAL;
1253 
1254 	if (vlan > 4095 || qos > 7)
1255 		return -EINVAL;
1256 	if (vlan_proto && vlan_proto != htons(ETH_P_8021Q) &&
1257 	    vlan_proto != htons(ETH_P_8021AD))
1258 		return -EINVAL;
1259 
1260 	mutex_lock(&ed->state_mutex);
1261 	vi = fun_get_vport(ed, vf);
1262 	if (!vi)
1263 		goto unlock;
1264 
1265 	vlan_param.u.vlan = FUN_ADI_VLAN_INIT(be16_to_cpu(vlan_proto),
1266 					      ((u16)qos << VLAN_PRIO_SHIFT) | vlan);
1267 	rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_VLAN, vf + 1, &vlan_param);
1268 	if (!rc) {
1269 		vi->vlan = vlan;
1270 		vi->qos = qos;
1271 		vi->vlan_proto = vlan_proto;
1272 	}
1273 unlock:
1274 	mutex_unlock(&ed->state_mutex);
1275 	return rc;
1276 }
1277 
1278 static int fun_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
1279 			   int max_tx_rate)
1280 {
1281 	struct funeth_priv *fp = netdev_priv(dev);
1282 	struct fun_adi_param rate_param = {};
1283 	struct fun_dev *fdev = fp->fdev;
1284 	struct fun_ethdev *ed = to_fun_ethdev(fdev);
1285 	struct fun_vport_info *vi;
1286 	int rc = -EINVAL;
1287 
1288 	if (min_tx_rate)
1289 		return -EINVAL;
1290 
1291 	mutex_lock(&ed->state_mutex);
1292 	vi = fun_get_vport(ed, vf);
1293 	if (!vi)
1294 		goto unlock;
1295 
1296 	rate_param.u.rate = FUN_ADI_RATE_INIT(max_tx_rate);
1297 	rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_RATE, vf + 1, &rate_param);
1298 	if (!rc)
1299 		vi->max_rate = max_tx_rate;
1300 unlock:
1301 	mutex_unlock(&ed->state_mutex);
1302 	return rc;
1303 }
1304 
1305 static int fun_get_vf_config(struct net_device *dev, int vf,
1306 			     struct ifla_vf_info *ivi)
1307 {
1308 	struct funeth_priv *fp = netdev_priv(dev);
1309 	struct fun_ethdev *ed = to_fun_ethdev(fp->fdev);
1310 	const struct fun_vport_info *vi;
1311 
1312 	mutex_lock(&ed->state_mutex);
1313 	vi = fun_get_vport(ed, vf);
1314 	if (!vi)
1315 		goto unlock;
1316 
1317 	memset(ivi, 0, sizeof(*ivi));
1318 	ivi->vf = vf;
1319 	ether_addr_copy(ivi->mac, vi->mac);
1320 	ivi->vlan = vi->vlan;
1321 	ivi->qos = vi->qos;
1322 	ivi->vlan_proto = vi->vlan_proto;
1323 	ivi->max_tx_rate = vi->max_rate;
1324 	ivi->spoofchk = vi->spoofchk;
1325 unlock:
1326 	mutex_unlock(&ed->state_mutex);
1327 	return vi ? 0 : -EINVAL;
1328 }
1329 
1330 static void fun_uninit(struct net_device *dev)
1331 {
1332 	struct funeth_priv *fp = netdev_priv(dev);
1333 
1334 	fun_prune_queue_irqs(dev);
1335 	xa_destroy(&fp->irqs);
1336 }
1337 
1338 static const struct net_device_ops fun_netdev_ops = {
1339 	.ndo_open		= funeth_open,
1340 	.ndo_stop		= funeth_close,
1341 	.ndo_start_xmit		= fun_start_xmit,
1342 	.ndo_get_stats64	= fun_get_stats64,
1343 	.ndo_change_mtu		= fun_change_mtu,
1344 	.ndo_set_mac_address	= fun_set_macaddr,
1345 	.ndo_validate_addr	= eth_validate_addr,
1346 	.ndo_eth_ioctl		= fun_ioctl,
1347 	.ndo_uninit		= fun_uninit,
1348 	.ndo_bpf		= fun_xdp,
1349 	.ndo_xdp_xmit		= fun_xdp_xmit_frames,
1350 	.ndo_set_vf_mac		= fun_set_vf_mac,
1351 	.ndo_set_vf_vlan	= fun_set_vf_vlan,
1352 	.ndo_set_vf_rate	= fun_set_vf_rate,
1353 	.ndo_get_vf_config	= fun_get_vf_config,
1354 	.ndo_get_devlink_port	= fun_get_devlink_port,
1355 };
1356 
1357 #define GSO_ENCAP_FLAGS (NETIF_F_GSO_GRE | NETIF_F_GSO_IPXIP4 | \
1358 			 NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_TUNNEL | \
1359 			 NETIF_F_GSO_UDP_TUNNEL_CSUM)
1360 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN | \
1361 		   NETIF_F_GSO_UDP_L4)
1362 #define VLAN_FEAT (NETIF_F_SG | NETIF_F_HW_CSUM | TSO_FLAGS | \
1363 		   GSO_ENCAP_FLAGS | NETIF_F_HIGHDMA)
1364 
1365 static void fun_dflt_rss_indir(struct funeth_priv *fp, unsigned int nrx)
1366 {
1367 	unsigned int i;
1368 
1369 	for (i = 0; i < fp->indir_table_nentries; i++)
1370 		fp->indir_table[i] = ethtool_rxfh_indir_default(i, nrx);
1371 }
1372 
1373 /* Reset the RSS indirection table to equal distribution across the current
1374  * number of Rx queues. Called at init time and whenever the number of Rx
1375  * queues changes subsequently. Note that this may also resize the indirection
1376  * table.
1377  */
1378 static void fun_reset_rss_indir(struct net_device *dev, unsigned int nrx)
1379 {
1380 	struct funeth_priv *fp = netdev_priv(dev);
1381 
1382 	if (!fp->rss_cfg)
1383 		return;
1384 
1385 	/* Set the table size to the max possible that allows an equal number
1386 	 * of occurrences of each CQ.
1387 	 */
1388 	fp->indir_table_nentries = rounddown(FUN_ETH_RSS_MAX_INDIR_ENT, nrx);
1389 	fun_dflt_rss_indir(fp, nrx);
1390 }
1391 
1392 /* Update the RSS LUT to contain only queues in [0, nrx). Normally this will
1393  * update the LUT to an equal distribution among nrx queues, If @only_if_needed
1394  * is set the LUT is left unchanged if it already does not reference any queues
1395  * >= nrx.
1396  */
1397 static int fun_rss_set_qnum(struct net_device *dev, unsigned int nrx,
1398 			    bool only_if_needed)
1399 {
1400 	struct funeth_priv *fp = netdev_priv(dev);
1401 	u32 old_lut[FUN_ETH_RSS_MAX_INDIR_ENT];
1402 	unsigned int i, oldsz;
1403 	int err;
1404 
1405 	if (!fp->rss_cfg)
1406 		return 0;
1407 
1408 	if (only_if_needed) {
1409 		for (i = 0; i < fp->indir_table_nentries; i++)
1410 			if (fp->indir_table[i] >= nrx)
1411 				break;
1412 
1413 		if (i >= fp->indir_table_nentries)
1414 			return 0;
1415 	}
1416 
1417 	memcpy(old_lut, fp->indir_table, sizeof(old_lut));
1418 	oldsz = fp->indir_table_nentries;
1419 	fun_reset_rss_indir(dev, nrx);
1420 
1421 	err = fun_config_rss(dev, fp->hash_algo, fp->rss_key,
1422 			     fp->indir_table, FUN_ADMIN_SUBOP_MODIFY);
1423 	if (!err)
1424 		return 0;
1425 
1426 	memcpy(fp->indir_table, old_lut, sizeof(old_lut));
1427 	fp->indir_table_nentries = oldsz;
1428 	return err;
1429 }
1430 
1431 /* Allocate the DMA area for the RSS configuration commands to the device, and
1432  * initialize the hash, hash key, indirection table size and its entries to
1433  * their defaults. The indirection table defaults to equal distribution across
1434  * the Rx queues.
1435  */
1436 static int fun_init_rss(struct net_device *dev)
1437 {
1438 	struct funeth_priv *fp = netdev_priv(dev);
1439 	size_t size = sizeof(fp->rss_key) + sizeof(fp->indir_table);
1440 
1441 	fp->rss_hw_id = FUN_HCI_ID_INVALID;
1442 	if (!(fp->port_caps & FUN_PORT_CAP_OFFLOADS))
1443 		return 0;
1444 
1445 	fp->rss_cfg = dma_alloc_coherent(&fp->pdev->dev, size,
1446 					 &fp->rss_dma_addr, GFP_KERNEL);
1447 	if (!fp->rss_cfg)
1448 		return -ENOMEM;
1449 
1450 	fp->hash_algo = FUN_ETH_RSS_ALG_TOEPLITZ;
1451 	netdev_rss_key_fill(fp->rss_key, sizeof(fp->rss_key));
1452 	fun_reset_rss_indir(dev, dev->real_num_rx_queues);
1453 	return 0;
1454 }
1455 
1456 static void fun_free_rss(struct funeth_priv *fp)
1457 {
1458 	if (fp->rss_cfg) {
1459 		dma_free_coherent(&fp->pdev->dev,
1460 				  sizeof(fp->rss_key) + sizeof(fp->indir_table),
1461 				  fp->rss_cfg, fp->rss_dma_addr);
1462 		fp->rss_cfg = NULL;
1463 	}
1464 }
1465 
1466 void fun_set_ring_count(struct net_device *netdev, unsigned int ntx,
1467 			unsigned int nrx)
1468 {
1469 	netif_set_real_num_tx_queues(netdev, ntx);
1470 	if (nrx != netdev->real_num_rx_queues) {
1471 		netif_set_real_num_rx_queues(netdev, nrx);
1472 		fun_reset_rss_indir(netdev, nrx);
1473 	}
1474 }
1475 
1476 static int fun_init_stats_area(struct funeth_priv *fp)
1477 {
1478 	unsigned int nstats;
1479 
1480 	if (!(fp->port_caps & FUN_PORT_CAP_STATS))
1481 		return 0;
1482 
1483 	nstats = PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_STATS_MAX +
1484 		 PORT_MAC_FEC_STATS_MAX;
1485 
1486 	fp->stats = dma_alloc_coherent(&fp->pdev->dev, nstats * sizeof(u64),
1487 				       &fp->stats_dma_addr, GFP_KERNEL);
1488 	if (!fp->stats)
1489 		return -ENOMEM;
1490 	return 0;
1491 }
1492 
1493 static void fun_free_stats_area(struct funeth_priv *fp)
1494 {
1495 	unsigned int nstats;
1496 
1497 	if (fp->stats) {
1498 		nstats = PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_STATS_MAX;
1499 		dma_free_coherent(&fp->pdev->dev, nstats * sizeof(u64),
1500 				  fp->stats, fp->stats_dma_addr);
1501 		fp->stats = NULL;
1502 	}
1503 }
1504 
1505 static int fun_dl_port_register(struct net_device *netdev)
1506 {
1507 	struct funeth_priv *fp = netdev_priv(netdev);
1508 	struct devlink *dl = priv_to_devlink(fp->fdev);
1509 	struct devlink_port_attrs attrs = {};
1510 	unsigned int idx;
1511 
1512 	if (fp->port_caps & FUN_PORT_CAP_VPORT) {
1513 		attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL;
1514 		idx = fp->lport;
1515 	} else {
1516 		idx = netdev->dev_port;
1517 		attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
1518 		attrs.lanes = fp->lane_attrs & 7;
1519 		if (fp->lane_attrs & FUN_PORT_LANE_SPLIT) {
1520 			attrs.split = 1;
1521 			attrs.phys.port_number = fp->lport & ~3;
1522 			attrs.phys.split_subport_number = fp->lport & 3;
1523 		} else {
1524 			attrs.phys.port_number = fp->lport;
1525 		}
1526 	}
1527 
1528 	devlink_port_attrs_set(&fp->dl_port, &attrs);
1529 
1530 	return devlink_port_register(dl, &fp->dl_port, idx);
1531 }
1532 
1533 /* Determine the max Tx/Rx queues for a port. */
1534 static int fun_max_qs(struct fun_ethdev *ed, unsigned int *ntx,
1535 		      unsigned int *nrx)
1536 {
1537 	int neth;
1538 
1539 	if (ed->num_ports > 1 || is_kdump_kernel()) {
1540 		*ntx = 1;
1541 		*nrx = 1;
1542 		return 0;
1543 	}
1544 
1545 	neth = fun_get_res_count(&ed->fdev, FUN_ADMIN_OP_ETH);
1546 	if (neth < 0)
1547 		return neth;
1548 
1549 	/* We determine the max number of queues based on the CPU
1550 	 * cores, device interrupts and queues, RSS size, and device Tx flows.
1551 	 *
1552 	 * - At least 1 Rx and 1 Tx queues.
1553 	 * - At most 1 Rx/Tx queue per core.
1554 	 * - Each Rx/Tx queue needs 1 SQ.
1555 	 */
1556 	*ntx = min(ed->nsqs_per_port - 1, num_online_cpus());
1557 	*nrx = *ntx;
1558 	if (*ntx > neth)
1559 		*ntx = neth;
1560 	if (*nrx > FUN_ETH_RSS_MAX_INDIR_ENT)
1561 		*nrx = FUN_ETH_RSS_MAX_INDIR_ENT;
1562 	return 0;
1563 }
1564 
1565 static void fun_queue_defaults(struct net_device *dev, unsigned int nsqs)
1566 {
1567 	unsigned int ntx, nrx;
1568 
1569 	ntx = min(dev->num_tx_queues, FUN_DFLT_QUEUES);
1570 	nrx = min(dev->num_rx_queues, FUN_DFLT_QUEUES);
1571 	if (ntx <= nrx) {
1572 		ntx = min(ntx, nsqs / 2);
1573 		nrx = min(nrx, nsqs - ntx);
1574 	} else {
1575 		nrx = min(nrx, nsqs / 2);
1576 		ntx = min(ntx, nsqs - nrx);
1577 	}
1578 
1579 	netif_set_real_num_tx_queues(dev, ntx);
1580 	netif_set_real_num_rx_queues(dev, nrx);
1581 }
1582 
1583 /* Replace the existing Rx/Tx/XDP queues with equal number of queues with
1584  * different settings, e.g. depth. This is a disruptive replacement that
1585  * temporarily shuts down the data path and should be limited to changes that
1586  * can't be applied to live queues. The old queues are always discarded.
1587  */
1588 int fun_replace_queues(struct net_device *dev, struct fun_qset *newqs,
1589 		       struct netlink_ext_ack *extack)
1590 {
1591 	struct fun_qset oldqs = { .state = FUN_QSTATE_DESTROYED };
1592 	struct funeth_priv *fp = netdev_priv(dev);
1593 	int err;
1594 
1595 	newqs->nrxqs = dev->real_num_rx_queues;
1596 	newqs->ntxqs = dev->real_num_tx_queues;
1597 	newqs->nxdpqs = fp->num_xdpqs;
1598 	newqs->state = FUN_QSTATE_INIT_SW;
1599 	err = fun_alloc_rings(dev, newqs);
1600 	if (err) {
1601 		NL_SET_ERR_MSG_MOD(extack,
1602 				   "Unable to allocate memory for new queues, keeping current settings");
1603 		return err;
1604 	}
1605 
1606 	fun_down(dev, &oldqs);
1607 
1608 	err = fun_up(dev, newqs);
1609 	if (!err)
1610 		return 0;
1611 
1612 	/* The new queues couldn't be installed. We do not retry the old queues
1613 	 * as they are the same to the device as the new queues and would
1614 	 * similarly fail.
1615 	 */
1616 	newqs->state = FUN_QSTATE_DESTROYED;
1617 	fun_free_rings(dev, newqs);
1618 	NL_SET_ERR_MSG_MOD(extack, "Unable to restore the data path with the new queues.");
1619 	return err;
1620 }
1621 
1622 /* Change the number of Rx/Tx queues of a device while it is up. This is done
1623  * by incrementally adding/removing queues to meet the new requirements while
1624  * handling ongoing traffic.
1625  */
1626 int fun_change_num_queues(struct net_device *dev, unsigned int ntx,
1627 			  unsigned int nrx)
1628 {
1629 	unsigned int keep_tx = min(dev->real_num_tx_queues, ntx);
1630 	unsigned int keep_rx = min(dev->real_num_rx_queues, nrx);
1631 	struct funeth_priv *fp = netdev_priv(dev);
1632 	struct fun_qset oldqs = {
1633 		.rxqs = rtnl_dereference(fp->rxqs),
1634 		.txqs = fp->txqs,
1635 		.nrxqs = dev->real_num_rx_queues,
1636 		.ntxqs = dev->real_num_tx_queues,
1637 		.rxq_start = keep_rx,
1638 		.txq_start = keep_tx,
1639 		.state = FUN_QSTATE_DESTROYED
1640 	};
1641 	struct fun_qset newqs = {
1642 		.nrxqs = nrx,
1643 		.ntxqs = ntx,
1644 		.rxq_start = keep_rx,
1645 		.txq_start = keep_tx,
1646 		.cq_depth = fp->cq_depth,
1647 		.rq_depth = fp->rq_depth,
1648 		.sq_depth = fp->sq_depth,
1649 		.state = FUN_QSTATE_INIT_FULL
1650 	};
1651 	int i, err;
1652 
1653 	err = fun_alloc_rings(dev, &newqs);
1654 	if (err)
1655 		goto free_irqs;
1656 
1657 	err = fun_enable_irqs(dev); /* of any newly added queues */
1658 	if (err)
1659 		goto free_rings;
1660 
1661 	/* copy the queues we are keeping to the new set */
1662 	memcpy(newqs.rxqs, oldqs.rxqs, keep_rx * sizeof(*oldqs.rxqs));
1663 	memcpy(newqs.txqs, fp->txqs, keep_tx * sizeof(*fp->txqs));
1664 
1665 	if (nrx < dev->real_num_rx_queues) {
1666 		err = fun_rss_set_qnum(dev, nrx, true);
1667 		if (err)
1668 			goto disable_tx_irqs;
1669 
1670 		for (i = nrx; i < dev->real_num_rx_queues; i++)
1671 			fun_disable_one_irq(container_of(oldqs.rxqs[i]->napi,
1672 							 struct fun_irq, napi));
1673 
1674 		netif_set_real_num_rx_queues(dev, nrx);
1675 	}
1676 
1677 	if (ntx < dev->real_num_tx_queues)
1678 		netif_set_real_num_tx_queues(dev, ntx);
1679 
1680 	rcu_assign_pointer(fp->rxqs, newqs.rxqs);
1681 	fp->txqs = newqs.txqs;
1682 	synchronize_net();
1683 
1684 	if (ntx > dev->real_num_tx_queues)
1685 		netif_set_real_num_tx_queues(dev, ntx);
1686 
1687 	if (nrx > dev->real_num_rx_queues) {
1688 		netif_set_real_num_rx_queues(dev, nrx);
1689 		fun_rss_set_qnum(dev, nrx, false);
1690 	}
1691 
1692 	/* disable interrupts of any excess Tx queues */
1693 	for (i = keep_tx; i < oldqs.ntxqs; i++)
1694 		fun_disable_one_irq(oldqs.txqs[i]->irq);
1695 
1696 	fun_free_rings(dev, &oldqs);
1697 	fun_prune_queue_irqs(dev);
1698 	return 0;
1699 
1700 disable_tx_irqs:
1701 	for (i = oldqs.ntxqs; i < ntx; i++)
1702 		fun_disable_one_irq(newqs.txqs[i]->irq);
1703 free_rings:
1704 	newqs.state = FUN_QSTATE_DESTROYED;
1705 	fun_free_rings(dev, &newqs);
1706 free_irqs:
1707 	fun_prune_queue_irqs(dev);
1708 	return err;
1709 }
1710 
1711 static int fun_create_netdev(struct fun_ethdev *ed, unsigned int portid)
1712 {
1713 	struct fun_dev *fdev = &ed->fdev;
1714 	struct net_device *netdev;
1715 	struct funeth_priv *fp;
1716 	unsigned int ntx, nrx;
1717 	int rc;
1718 
1719 	rc = fun_max_qs(ed, &ntx, &nrx);
1720 	if (rc)
1721 		return rc;
1722 
1723 	netdev = alloc_etherdev_mqs(sizeof(*fp), ntx, nrx);
1724 	if (!netdev) {
1725 		rc = -ENOMEM;
1726 		goto done;
1727 	}
1728 
1729 	netdev->dev_port = portid;
1730 	fun_queue_defaults(netdev, ed->nsqs_per_port);
1731 
1732 	fp = netdev_priv(netdev);
1733 	fp->fdev = fdev;
1734 	fp->pdev = to_pci_dev(fdev->dev);
1735 	fp->netdev = netdev;
1736 	xa_init(&fp->irqs);
1737 	fp->rx_irq_ofst = ntx;
1738 	seqcount_init(&fp->link_seq);
1739 
1740 	fp->lport = INVALID_LPORT;
1741 	rc = fun_port_create(netdev);
1742 	if (rc)
1743 		goto free_netdev;
1744 
1745 	/* bind port to admin CQ for async events */
1746 	rc = fun_bind(fdev, FUN_ADMIN_BIND_TYPE_PORT, portid,
1747 		      FUN_ADMIN_BIND_TYPE_EPCQ, 0);
1748 	if (rc)
1749 		goto destroy_port;
1750 
1751 	rc = fun_get_port_attributes(netdev);
1752 	if (rc)
1753 		goto destroy_port;
1754 
1755 	rc = fun_init_rss(netdev);
1756 	if (rc)
1757 		goto destroy_port;
1758 
1759 	rc = fun_init_stats_area(fp);
1760 	if (rc)
1761 		goto free_rss;
1762 
1763 	SET_NETDEV_DEV(netdev, fdev->dev);
1764 	netdev->netdev_ops = &fun_netdev_ops;
1765 
1766 	netdev->hw_features = NETIF_F_SG | NETIF_F_RXHASH | NETIF_F_RXCSUM;
1767 	if (fp->port_caps & FUN_PORT_CAP_OFFLOADS)
1768 		netdev->hw_features |= NETIF_F_HW_CSUM | TSO_FLAGS;
1769 	if (fp->port_caps & FUN_PORT_CAP_ENCAP_OFFLOADS)
1770 		netdev->hw_features |= GSO_ENCAP_FLAGS;
1771 
1772 	netdev->features |= netdev->hw_features | NETIF_F_HIGHDMA;
1773 	netdev->vlan_features = netdev->features & VLAN_FEAT;
1774 	netdev->mpls_features = netdev->vlan_features;
1775 	netdev->hw_enc_features = netdev->hw_features;
1776 
1777 	netdev->min_mtu = ETH_MIN_MTU;
1778 	netdev->max_mtu = FUN_MAX_MTU;
1779 
1780 	fun_set_ethtool_ops(netdev);
1781 
1782 	/* configurable parameters */
1783 	fp->sq_depth = min(SQ_DEPTH, fdev->q_depth);
1784 	fp->cq_depth = min(CQ_DEPTH, fdev->q_depth);
1785 	fp->rq_depth = min_t(unsigned int, RQ_DEPTH, fdev->q_depth);
1786 	fp->rx_coal_usec  = CQ_INTCOAL_USEC;
1787 	fp->rx_coal_count = CQ_INTCOAL_NPKT;
1788 	fp->tx_coal_usec  = SQ_INTCOAL_USEC;
1789 	fp->tx_coal_count = SQ_INTCOAL_NPKT;
1790 	fp->cq_irq_db = FUN_IRQ_CQ_DB(fp->rx_coal_usec, fp->rx_coal_count);
1791 
1792 	rc = fun_dl_port_register(netdev);
1793 	if (rc)
1794 		goto free_stats;
1795 
1796 	fp->ktls_id = FUN_HCI_ID_INVALID;
1797 	fun_ktls_init(netdev);            /* optional, failure OK */
1798 
1799 	netif_carrier_off(netdev);
1800 	ed->netdevs[portid] = netdev;
1801 	rc = register_netdev(netdev);
1802 	if (rc)
1803 		goto unreg_devlink;
1804 
1805 	if (fp->dl_port.devlink)
1806 		devlink_port_type_eth_set(&fp->dl_port, netdev);
1807 
1808 	return 0;
1809 
1810 unreg_devlink:
1811 	ed->netdevs[portid] = NULL;
1812 	fun_ktls_cleanup(fp);
1813 	if (fp->dl_port.devlink)
1814 		devlink_port_unregister(&fp->dl_port);
1815 free_stats:
1816 	fun_free_stats_area(fp);
1817 free_rss:
1818 	fun_free_rss(fp);
1819 destroy_port:
1820 	fun_port_destroy(netdev);
1821 free_netdev:
1822 	free_netdev(netdev);
1823 done:
1824 	dev_err(fdev->dev, "couldn't allocate port %u, error %d", portid, rc);
1825 	return rc;
1826 }
1827 
1828 static void fun_destroy_netdev(struct net_device *netdev)
1829 {
1830 	struct funeth_priv *fp;
1831 
1832 	fp = netdev_priv(netdev);
1833 	if (fp->dl_port.devlink) {
1834 		devlink_port_type_clear(&fp->dl_port);
1835 		devlink_port_unregister(&fp->dl_port);
1836 	}
1837 	unregister_netdev(netdev);
1838 	fun_ktls_cleanup(fp);
1839 	fun_free_stats_area(fp);
1840 	fun_free_rss(fp);
1841 	fun_port_destroy(netdev);
1842 	free_netdev(netdev);
1843 }
1844 
1845 static int fun_create_ports(struct fun_ethdev *ed, unsigned int nports)
1846 {
1847 	struct fun_dev *fd = &ed->fdev;
1848 	int i, rc;
1849 
1850 	/* The admin queue takes 1 IRQ and 2 SQs. */
1851 	ed->nsqs_per_port = min(fd->num_irqs - 1,
1852 				fd->kern_end_qid - 2) / nports;
1853 	if (ed->nsqs_per_port < 2) {
1854 		dev_err(fd->dev, "Too few SQs for %u ports", nports);
1855 		return -EINVAL;
1856 	}
1857 
1858 	ed->netdevs = kcalloc(nports, sizeof(*ed->netdevs), GFP_KERNEL);
1859 	if (!ed->netdevs)
1860 		return -ENOMEM;
1861 
1862 	ed->num_ports = nports;
1863 	for (i = 0; i < nports; i++) {
1864 		rc = fun_create_netdev(ed, i);
1865 		if (rc)
1866 			goto free_netdevs;
1867 	}
1868 
1869 	return 0;
1870 
1871 free_netdevs:
1872 	while (i)
1873 		fun_destroy_netdev(ed->netdevs[--i]);
1874 	kfree(ed->netdevs);
1875 	ed->netdevs = NULL;
1876 	ed->num_ports = 0;
1877 	return rc;
1878 }
1879 
1880 static void fun_destroy_ports(struct fun_ethdev *ed)
1881 {
1882 	unsigned int i;
1883 
1884 	for (i = 0; i < ed->num_ports; i++)
1885 		fun_destroy_netdev(ed->netdevs[i]);
1886 
1887 	kfree(ed->netdevs);
1888 	ed->netdevs = NULL;
1889 	ed->num_ports = 0;
1890 }
1891 
1892 static void fun_update_link_state(const struct fun_ethdev *ed,
1893 				  const struct fun_admin_port_notif *notif)
1894 {
1895 	unsigned int port_idx = be16_to_cpu(notif->id);
1896 	struct net_device *netdev;
1897 	struct funeth_priv *fp;
1898 
1899 	if (port_idx >= ed->num_ports)
1900 		return;
1901 
1902 	netdev = ed->netdevs[port_idx];
1903 	fp = netdev_priv(netdev);
1904 
1905 	write_seqcount_begin(&fp->link_seq);
1906 	fp->link_speed = be32_to_cpu(notif->speed) * 10;  /* 10 Mbps->Mbps */
1907 	fp->active_fc = notif->flow_ctrl;
1908 	fp->active_fec = notif->fec;
1909 	fp->xcvr_type = notif->xcvr_type;
1910 	fp->link_down_reason = notif->link_down_reason;
1911 	fp->lp_advertising = be64_to_cpu(notif->lp_advertising);
1912 
1913 	if ((notif->link_state | notif->missed_events) & FUN_PORT_FLAG_MAC_DOWN)
1914 		netif_carrier_off(netdev);
1915 	if (notif->link_state & FUN_PORT_FLAG_MAC_UP)
1916 		netif_carrier_on(netdev);
1917 
1918 	write_seqcount_end(&fp->link_seq);
1919 	fun_report_link(netdev);
1920 }
1921 
1922 /* handler for async events delivered through the admin CQ */
1923 static void fun_event_cb(struct fun_dev *fdev, void *entry)
1924 {
1925 	u8 op = ((struct fun_admin_rsp_common *)entry)->op;
1926 
1927 	if (op == FUN_ADMIN_OP_PORT) {
1928 		const struct fun_admin_port_notif *rsp = entry;
1929 
1930 		if (rsp->subop == FUN_ADMIN_SUBOP_NOTIFY) {
1931 			fun_update_link_state(to_fun_ethdev(fdev), rsp);
1932 		} else if (rsp->subop == FUN_ADMIN_SUBOP_RES_COUNT) {
1933 			const struct fun_admin_res_count_rsp *r = entry;
1934 
1935 			if (r->count.data)
1936 				set_bit(FUN_SERV_RES_CHANGE, &fdev->service_flags);
1937 			else
1938 				set_bit(FUN_SERV_DEL_PORTS, &fdev->service_flags);
1939 			fun_serv_sched(fdev);
1940 		} else {
1941 			dev_info(fdev->dev, "adminq event unexpected op %u subop %u",
1942 				 op, rsp->subop);
1943 		}
1944 	} else {
1945 		dev_info(fdev->dev, "adminq event unexpected op %u", op);
1946 	}
1947 }
1948 
1949 /* handler for pending work managed by the service task */
1950 static void fun_service_cb(struct fun_dev *fdev)
1951 {
1952 	struct fun_ethdev *ed = to_fun_ethdev(fdev);
1953 	int rc;
1954 
1955 	if (test_and_clear_bit(FUN_SERV_DEL_PORTS, &fdev->service_flags))
1956 		fun_destroy_ports(ed);
1957 
1958 	if (!test_and_clear_bit(FUN_SERV_RES_CHANGE, &fdev->service_flags))
1959 		return;
1960 
1961 	rc = fun_get_res_count(fdev, FUN_ADMIN_OP_PORT);
1962 	if (rc < 0 || rc == ed->num_ports)
1963 		return;
1964 
1965 	if (ed->num_ports)
1966 		fun_destroy_ports(ed);
1967 	if (rc)
1968 		fun_create_ports(ed, rc);
1969 }
1970 
1971 static int funeth_sriov_configure(struct pci_dev *pdev, int nvfs)
1972 {
1973 	struct fun_dev *fdev = pci_get_drvdata(pdev);
1974 	struct fun_ethdev *ed = to_fun_ethdev(fdev);
1975 	int rc;
1976 
1977 	if (nvfs == 0) {
1978 		if (pci_vfs_assigned(pdev)) {
1979 			dev_warn(&pdev->dev,
1980 				 "Cannot disable SR-IOV while VFs are assigned\n");
1981 			return -EPERM;
1982 		}
1983 
1984 		mutex_lock(&ed->state_mutex);
1985 		fun_free_vports(ed);
1986 		mutex_unlock(&ed->state_mutex);
1987 		pci_disable_sriov(pdev);
1988 		return 0;
1989 	}
1990 
1991 	rc = pci_enable_sriov(pdev, nvfs);
1992 	if (rc)
1993 		return rc;
1994 
1995 	mutex_lock(&ed->state_mutex);
1996 	rc = fun_init_vports(ed, nvfs);
1997 	mutex_unlock(&ed->state_mutex);
1998 	if (rc) {
1999 		pci_disable_sriov(pdev);
2000 		return rc;
2001 	}
2002 
2003 	return nvfs;
2004 }
2005 
2006 static int funeth_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2007 {
2008 	struct fun_dev_params aqreq = {
2009 		.cqe_size_log2 = ilog2(ADMIN_CQE_SIZE),
2010 		.sqe_size_log2 = ilog2(ADMIN_SQE_SIZE),
2011 		.cq_depth      = ADMIN_CQ_DEPTH,
2012 		.sq_depth      = ADMIN_SQ_DEPTH,
2013 		.rq_depth      = ADMIN_RQ_DEPTH,
2014 		.min_msix      = 2,              /* 1 Rx + 1 Tx */
2015 		.event_cb      = fun_event_cb,
2016 		.serv_cb       = fun_service_cb,
2017 	};
2018 	struct devlink *devlink;
2019 	struct fun_ethdev *ed;
2020 	struct fun_dev *fdev;
2021 	int rc;
2022 
2023 	devlink = fun_devlink_alloc(&pdev->dev);
2024 	if (!devlink) {
2025 		dev_err(&pdev->dev, "devlink alloc failed\n");
2026 		return -ENOMEM;
2027 	}
2028 
2029 	ed = devlink_priv(devlink);
2030 	mutex_init(&ed->state_mutex);
2031 
2032 	fdev = &ed->fdev;
2033 	rc = fun_dev_enable(fdev, pdev, &aqreq, KBUILD_MODNAME);
2034 	if (rc)
2035 		goto free_devlink;
2036 
2037 	rc = fun_get_res_count(fdev, FUN_ADMIN_OP_PORT);
2038 	if (rc > 0)
2039 		rc = fun_create_ports(ed, rc);
2040 	if (rc < 0)
2041 		goto disable_dev;
2042 
2043 	fun_serv_restart(fdev);
2044 	fun_devlink_register(devlink);
2045 	return 0;
2046 
2047 disable_dev:
2048 	fun_dev_disable(fdev);
2049 free_devlink:
2050 	mutex_destroy(&ed->state_mutex);
2051 	fun_devlink_free(devlink);
2052 	return rc;
2053 }
2054 
2055 static void funeth_remove(struct pci_dev *pdev)
2056 {
2057 	struct fun_dev *fdev = pci_get_drvdata(pdev);
2058 	struct devlink *devlink;
2059 	struct fun_ethdev *ed;
2060 
2061 	ed = to_fun_ethdev(fdev);
2062 	devlink = priv_to_devlink(ed);
2063 	fun_devlink_unregister(devlink);
2064 
2065 #ifdef CONFIG_PCI_IOV
2066 	funeth_sriov_configure(pdev, 0);
2067 #endif
2068 
2069 	fun_serv_stop(fdev);
2070 	fun_destroy_ports(ed);
2071 	fun_dev_disable(fdev);
2072 	mutex_destroy(&ed->state_mutex);
2073 
2074 	fun_devlink_free(devlink);
2075 }
2076 
2077 static struct pci_driver funeth_driver = {
2078 	.name		 = KBUILD_MODNAME,
2079 	.id_table	 = funeth_id_table,
2080 	.probe		 = funeth_probe,
2081 	.remove		 = funeth_remove,
2082 	.shutdown	 = funeth_remove,
2083 	.sriov_configure = funeth_sriov_configure,
2084 };
2085 
2086 module_pci_driver(funeth_driver);
2087 
2088 MODULE_AUTHOR("Dimitris Michailidis <dmichail@fungible.com>");
2089 MODULE_DESCRIPTION("Fungible Ethernet Network Driver");
2090 MODULE_LICENSE("Dual BSD/GPL");
2091 MODULE_DEVICE_TABLE(pci, funeth_id_table);
2092