xref: /linux/drivers/net/ethernet/sfc/ef10.c (revision d458cdf712e0c671e8e819abb16ecd6e44f9daec)
1 /****************************************************************************
2  * Driver for Solarflare network controllers and boards
3  * Copyright 2012-2013 Solarflare Communications Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published
7  * by the Free Software Foundation, incorporated herein by reference.
8  */
9 
10 #include "net_driver.h"
11 #include "ef10_regs.h"
12 #include "io.h"
13 #include "mcdi.h"
14 #include "mcdi_pcol.h"
15 #include "nic.h"
16 #include "workarounds.h"
17 #include <linux/in.h>
18 #include <linux/jhash.h>
19 #include <linux/wait.h>
20 #include <linux/workqueue.h>
21 
22 /* Hardware control for EF10 architecture including 'Huntington'. */
23 
24 #define EFX_EF10_DRVGEN_EV		7
25 enum {
26 	EFX_EF10_TEST = 1,
27 	EFX_EF10_REFILL,
28 };
29 
30 /* The reserved RSS context value */
31 #define EFX_EF10_RSS_CONTEXT_INVALID	0xffffffff
32 
33 /* The filter table(s) are managed by firmware and we have write-only
34  * access.  When removing filters we must identify them to the
35  * firmware by a 64-bit handle, but this is too wide for Linux kernel
36  * interfaces (32-bit for RX NFC, 16-bit for RFS).  Also, we need to
37  * be able to tell in advance whether a requested insertion will
38  * replace an existing filter.  Therefore we maintain a software hash
39  * table, which should be at least as large as the hardware hash
40  * table.
41  *
42  * Huntington has a single 8K filter table shared between all filter
43  * types and both ports.
44  */
45 #define HUNT_FILTER_TBL_ROWS 8192
46 
47 struct efx_ef10_filter_table {
48 /* The RX match field masks supported by this fw & hw, in order of priority */
49 	enum efx_filter_match_flags rx_match_flags[
50 		MC_CMD_GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES_MAXNUM];
51 	unsigned int rx_match_count;
52 
53 	struct {
54 		unsigned long spec;	/* pointer to spec plus flag bits */
55 /* BUSY flag indicates that an update is in progress.  STACK_OLD is
56  * used to mark and sweep stack-owned MAC filters.
57  */
58 #define EFX_EF10_FILTER_FLAG_BUSY	1UL
59 #define EFX_EF10_FILTER_FLAG_STACK_OLD	2UL
60 #define EFX_EF10_FILTER_FLAGS		3UL
61 		u64 handle;		/* firmware handle */
62 	} *entry;
63 	wait_queue_head_t waitq;
64 /* Shadow of net_device address lists, guarded by mac_lock */
65 #define EFX_EF10_FILTER_STACK_UC_MAX	32
66 #define EFX_EF10_FILTER_STACK_MC_MAX	256
67 	struct {
68 		u8 addr[ETH_ALEN];
69 		u16 id;
70 	} stack_uc_list[EFX_EF10_FILTER_STACK_UC_MAX],
71 	  stack_mc_list[EFX_EF10_FILTER_STACK_MC_MAX];
72 	int stack_uc_count;		/* negative for PROMISC */
73 	int stack_mc_count;		/* negative for PROMISC/ALLMULTI */
74 };
75 
76 /* An arbitrary search limit for the software hash table */
77 #define EFX_EF10_FILTER_SEARCH_LIMIT 200
78 
79 static void efx_ef10_rx_push_indir_table(struct efx_nic *efx);
80 static void efx_ef10_rx_free_indir_table(struct efx_nic *efx);
81 static void efx_ef10_filter_table_remove(struct efx_nic *efx);
82 
83 static int efx_ef10_get_warm_boot_count(struct efx_nic *efx)
84 {
85 	efx_dword_t reg;
86 
87 	efx_readd(efx, &reg, ER_DZ_BIU_MC_SFT_STATUS);
88 	return EFX_DWORD_FIELD(reg, EFX_WORD_1) == 0xb007 ?
89 		EFX_DWORD_FIELD(reg, EFX_WORD_0) : -EIO;
90 }
91 
92 static unsigned int efx_ef10_mem_map_size(struct efx_nic *efx)
93 {
94 	return resource_size(&efx->pci_dev->resource[EFX_MEM_BAR]);
95 }
96 
97 static int efx_ef10_init_datapath_caps(struct efx_nic *efx)
98 {
99 	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_OUT_LEN);
100 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
101 	size_t outlen;
102 	int rc;
103 
104 	BUILD_BUG_ON(MC_CMD_GET_CAPABILITIES_IN_LEN != 0);
105 
106 	rc = efx_mcdi_rpc(efx, MC_CMD_GET_CAPABILITIES, NULL, 0,
107 			  outbuf, sizeof(outbuf), &outlen);
108 	if (rc)
109 		return rc;
110 	if (outlen < sizeof(outbuf)) {
111 		netif_err(efx, drv, efx->net_dev,
112 			  "unable to read datapath firmware capabilities\n");
113 		return -EIO;
114 	}
115 
116 	nic_data->datapath_caps =
117 		MCDI_DWORD(outbuf, GET_CAPABILITIES_OUT_FLAGS1);
118 
119 	if (!(nic_data->datapath_caps &
120 	      (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN))) {
121 		netif_err(efx, drv, efx->net_dev,
122 			  "current firmware does not support TSO\n");
123 		return -ENODEV;
124 	}
125 
126 	if (!(nic_data->datapath_caps &
127 	      (1 << MC_CMD_GET_CAPABILITIES_OUT_RX_PREFIX_LEN_14_LBN))) {
128 		netif_err(efx, probe, efx->net_dev,
129 			  "current firmware does not support an RX prefix\n");
130 		return -ENODEV;
131 	}
132 
133 	return 0;
134 }
135 
136 static int efx_ef10_get_sysclk_freq(struct efx_nic *efx)
137 {
138 	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CLOCK_OUT_LEN);
139 	int rc;
140 
141 	rc = efx_mcdi_rpc(efx, MC_CMD_GET_CLOCK, NULL, 0,
142 			  outbuf, sizeof(outbuf), NULL);
143 	if (rc)
144 		return rc;
145 	rc = MCDI_DWORD(outbuf, GET_CLOCK_OUT_SYS_FREQ);
146 	return rc > 0 ? rc : -ERANGE;
147 }
148 
149 static int efx_ef10_get_mac_address(struct efx_nic *efx, u8 *mac_address)
150 {
151 	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_MAC_ADDRESSES_OUT_LEN);
152 	size_t outlen;
153 	int rc;
154 
155 	BUILD_BUG_ON(MC_CMD_GET_MAC_ADDRESSES_IN_LEN != 0);
156 
157 	rc = efx_mcdi_rpc(efx, MC_CMD_GET_MAC_ADDRESSES, NULL, 0,
158 			  outbuf, sizeof(outbuf), &outlen);
159 	if (rc)
160 		return rc;
161 	if (outlen < MC_CMD_GET_MAC_ADDRESSES_OUT_LEN)
162 		return -EIO;
163 
164 	memcpy(mac_address,
165 	       MCDI_PTR(outbuf, GET_MAC_ADDRESSES_OUT_MAC_ADDR_BASE), ETH_ALEN);
166 	return 0;
167 }
168 
169 static int efx_ef10_probe(struct efx_nic *efx)
170 {
171 	struct efx_ef10_nic_data *nic_data;
172 	int i, rc;
173 
174 	/* We can have one VI for each 8K region.  However we need
175 	 * multiple TX queues per channel.
176 	 */
177 	efx->max_channels =
178 		min_t(unsigned int,
179 		      EFX_MAX_CHANNELS,
180 		      resource_size(&efx->pci_dev->resource[EFX_MEM_BAR]) /
181 		      (EFX_VI_PAGE_SIZE * EFX_TXQ_TYPES));
182 	BUG_ON(efx->max_channels == 0);
183 
184 	nic_data = kzalloc(sizeof(*nic_data), GFP_KERNEL);
185 	if (!nic_data)
186 		return -ENOMEM;
187 	efx->nic_data = nic_data;
188 
189 	rc = efx_nic_alloc_buffer(efx, &nic_data->mcdi_buf,
190 				  8 + MCDI_CTL_SDU_LEN_MAX_V2, GFP_KERNEL);
191 	if (rc)
192 		goto fail1;
193 
194 	/* Get the MC's warm boot count.  In case it's rebooting right
195 	 * now, be prepared to retry.
196 	 */
197 	i = 0;
198 	for (;;) {
199 		rc = efx_ef10_get_warm_boot_count(efx);
200 		if (rc >= 0)
201 			break;
202 		if (++i == 5)
203 			goto fail2;
204 		ssleep(1);
205 	}
206 	nic_data->warm_boot_count = rc;
207 
208 	nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID;
209 
210 	/* In case we're recovering from a crash (kexec), we want to
211 	 * cancel any outstanding request by the previous user of this
212 	 * function.  We send a special message using the least
213 	 * significant bits of the 'high' (doorbell) register.
214 	 */
215 	_efx_writed(efx, cpu_to_le32(1), ER_DZ_MC_DB_HWRD);
216 
217 	rc = efx_mcdi_init(efx);
218 	if (rc)
219 		goto fail2;
220 
221 	/* Reset (most) configuration for this function */
222 	rc = efx_mcdi_reset(efx, RESET_TYPE_ALL);
223 	if (rc)
224 		goto fail3;
225 
226 	/* Enable event logging */
227 	rc = efx_mcdi_log_ctrl(efx, true, false, 0);
228 	if (rc)
229 		goto fail3;
230 
231 	rc = efx_ef10_init_datapath_caps(efx);
232 	if (rc < 0)
233 		goto fail3;
234 
235 	efx->rx_packet_len_offset =
236 		ES_DZ_RX_PREFIX_PKTLEN_OFST - ES_DZ_RX_PREFIX_SIZE;
237 
238 	rc = efx_mcdi_port_get_number(efx);
239 	if (rc < 0)
240 		goto fail3;
241 	efx->port_num = rc;
242 
243 	rc = efx_ef10_get_mac_address(efx, efx->net_dev->perm_addr);
244 	if (rc)
245 		goto fail3;
246 
247 	rc = efx_ef10_get_sysclk_freq(efx);
248 	if (rc < 0)
249 		goto fail3;
250 	efx->timer_quantum_ns = 1536000 / rc; /* 1536 cycles */
251 
252 	/* Check whether firmware supports bug 35388 workaround */
253 	rc = efx_mcdi_set_workaround(efx, MC_CMD_WORKAROUND_BUG35388, true);
254 	if (rc == 0)
255 		nic_data->workaround_35388 = true;
256 	else if (rc != -ENOSYS && rc != -ENOENT)
257 		goto fail3;
258 	netif_dbg(efx, probe, efx->net_dev,
259 		  "workaround for bug 35388 is %sabled\n",
260 		  nic_data->workaround_35388 ? "en" : "dis");
261 
262 	rc = efx_mcdi_mon_probe(efx);
263 	if (rc)
264 		goto fail3;
265 
266 	return 0;
267 
268 fail3:
269 	efx_mcdi_fini(efx);
270 fail2:
271 	efx_nic_free_buffer(efx, &nic_data->mcdi_buf);
272 fail1:
273 	kfree(nic_data);
274 	efx->nic_data = NULL;
275 	return rc;
276 }
277 
278 static int efx_ef10_free_vis(struct efx_nic *efx)
279 {
280 	int rc = efx_mcdi_rpc(efx, MC_CMD_FREE_VIS, NULL, 0, NULL, 0, NULL);
281 
282 	/* -EALREADY means nothing to free, so ignore */
283 	if (rc == -EALREADY)
284 		rc = 0;
285 	return rc;
286 }
287 
288 #ifdef EFX_USE_PIO
289 
290 static void efx_ef10_free_piobufs(struct efx_nic *efx)
291 {
292 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
293 	MCDI_DECLARE_BUF(inbuf, MC_CMD_FREE_PIOBUF_IN_LEN);
294 	unsigned int i;
295 	int rc;
296 
297 	BUILD_BUG_ON(MC_CMD_FREE_PIOBUF_OUT_LEN != 0);
298 
299 	for (i = 0; i < nic_data->n_piobufs; i++) {
300 		MCDI_SET_DWORD(inbuf, FREE_PIOBUF_IN_PIOBUF_HANDLE,
301 			       nic_data->piobuf_handle[i]);
302 		rc = efx_mcdi_rpc(efx, MC_CMD_FREE_PIOBUF, inbuf, sizeof(inbuf),
303 				  NULL, 0, NULL);
304 		WARN_ON(rc);
305 	}
306 
307 	nic_data->n_piobufs = 0;
308 }
309 
310 static int efx_ef10_alloc_piobufs(struct efx_nic *efx, unsigned int n)
311 {
312 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
313 	MCDI_DECLARE_BUF(outbuf, MC_CMD_ALLOC_PIOBUF_OUT_LEN);
314 	unsigned int i;
315 	size_t outlen;
316 	int rc = 0;
317 
318 	BUILD_BUG_ON(MC_CMD_ALLOC_PIOBUF_IN_LEN != 0);
319 
320 	for (i = 0; i < n; i++) {
321 		rc = efx_mcdi_rpc(efx, MC_CMD_ALLOC_PIOBUF, NULL, 0,
322 				  outbuf, sizeof(outbuf), &outlen);
323 		if (rc)
324 			break;
325 		if (outlen < MC_CMD_ALLOC_PIOBUF_OUT_LEN) {
326 			rc = -EIO;
327 			break;
328 		}
329 		nic_data->piobuf_handle[i] =
330 			MCDI_DWORD(outbuf, ALLOC_PIOBUF_OUT_PIOBUF_HANDLE);
331 		netif_dbg(efx, probe, efx->net_dev,
332 			  "allocated PIO buffer %u handle %x\n", i,
333 			  nic_data->piobuf_handle[i]);
334 	}
335 
336 	nic_data->n_piobufs = i;
337 	if (rc)
338 		efx_ef10_free_piobufs(efx);
339 	return rc;
340 }
341 
342 static int efx_ef10_link_piobufs(struct efx_nic *efx)
343 {
344 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
345 	MCDI_DECLARE_BUF(inbuf,
346 			 max(MC_CMD_LINK_PIOBUF_IN_LEN,
347 			     MC_CMD_UNLINK_PIOBUF_IN_LEN));
348 	struct efx_channel *channel;
349 	struct efx_tx_queue *tx_queue;
350 	unsigned int offset, index;
351 	int rc;
352 
353 	BUILD_BUG_ON(MC_CMD_LINK_PIOBUF_OUT_LEN != 0);
354 	BUILD_BUG_ON(MC_CMD_UNLINK_PIOBUF_OUT_LEN != 0);
355 
356 	/* Link a buffer to each VI in the write-combining mapping */
357 	for (index = 0; index < nic_data->n_piobufs; ++index) {
358 		MCDI_SET_DWORD(inbuf, LINK_PIOBUF_IN_PIOBUF_HANDLE,
359 			       nic_data->piobuf_handle[index]);
360 		MCDI_SET_DWORD(inbuf, LINK_PIOBUF_IN_TXQ_INSTANCE,
361 			       nic_data->pio_write_vi_base + index);
362 		rc = efx_mcdi_rpc(efx, MC_CMD_LINK_PIOBUF,
363 				  inbuf, MC_CMD_LINK_PIOBUF_IN_LEN,
364 				  NULL, 0, NULL);
365 		if (rc) {
366 			netif_err(efx, drv, efx->net_dev,
367 				  "failed to link VI %u to PIO buffer %u (%d)\n",
368 				  nic_data->pio_write_vi_base + index, index,
369 				  rc);
370 			goto fail;
371 		}
372 		netif_dbg(efx, probe, efx->net_dev,
373 			  "linked VI %u to PIO buffer %u\n",
374 			  nic_data->pio_write_vi_base + index, index);
375 	}
376 
377 	/* Link a buffer to each TX queue */
378 	efx_for_each_channel(channel, efx) {
379 		efx_for_each_channel_tx_queue(tx_queue, channel) {
380 			/* We assign the PIO buffers to queues in
381 			 * reverse order to allow for the following
382 			 * special case.
383 			 */
384 			offset = ((efx->tx_channel_offset + efx->n_tx_channels -
385 				   tx_queue->channel->channel - 1) *
386 				  efx_piobuf_size);
387 			index = offset / ER_DZ_TX_PIOBUF_SIZE;
388 			offset = offset % ER_DZ_TX_PIOBUF_SIZE;
389 
390 			/* When the host page size is 4K, the first
391 			 * host page in the WC mapping may be within
392 			 * the same VI page as the last TX queue.  We
393 			 * can only link one buffer to each VI.
394 			 */
395 			if (tx_queue->queue == nic_data->pio_write_vi_base) {
396 				BUG_ON(index != 0);
397 				rc = 0;
398 			} else {
399 				MCDI_SET_DWORD(inbuf,
400 					       LINK_PIOBUF_IN_PIOBUF_HANDLE,
401 					       nic_data->piobuf_handle[index]);
402 				MCDI_SET_DWORD(inbuf,
403 					       LINK_PIOBUF_IN_TXQ_INSTANCE,
404 					       tx_queue->queue);
405 				rc = efx_mcdi_rpc(efx, MC_CMD_LINK_PIOBUF,
406 						  inbuf, MC_CMD_LINK_PIOBUF_IN_LEN,
407 						  NULL, 0, NULL);
408 			}
409 
410 			if (rc) {
411 				/* This is non-fatal; the TX path just
412 				 * won't use PIO for this queue
413 				 */
414 				netif_err(efx, drv, efx->net_dev,
415 					  "failed to link VI %u to PIO buffer %u (%d)\n",
416 					  tx_queue->queue, index, rc);
417 				tx_queue->piobuf = NULL;
418 			} else {
419 				tx_queue->piobuf =
420 					nic_data->pio_write_base +
421 					index * EFX_VI_PAGE_SIZE + offset;
422 				tx_queue->piobuf_offset = offset;
423 				netif_dbg(efx, probe, efx->net_dev,
424 					  "linked VI %u to PIO buffer %u offset %x addr %p\n",
425 					  tx_queue->queue, index,
426 					  tx_queue->piobuf_offset,
427 					  tx_queue->piobuf);
428 			}
429 		}
430 	}
431 
432 	return 0;
433 
434 fail:
435 	while (index--) {
436 		MCDI_SET_DWORD(inbuf, UNLINK_PIOBUF_IN_TXQ_INSTANCE,
437 			       nic_data->pio_write_vi_base + index);
438 		efx_mcdi_rpc(efx, MC_CMD_UNLINK_PIOBUF,
439 			     inbuf, MC_CMD_UNLINK_PIOBUF_IN_LEN,
440 			     NULL, 0, NULL);
441 	}
442 	return rc;
443 }
444 
445 #else /* !EFX_USE_PIO */
446 
447 static int efx_ef10_alloc_piobufs(struct efx_nic *efx, unsigned int n)
448 {
449 	return n == 0 ? 0 : -ENOBUFS;
450 }
451 
452 static int efx_ef10_link_piobufs(struct efx_nic *efx)
453 {
454 	return 0;
455 }
456 
457 static void efx_ef10_free_piobufs(struct efx_nic *efx)
458 {
459 }
460 
461 #endif /* EFX_USE_PIO */
462 
463 static void efx_ef10_remove(struct efx_nic *efx)
464 {
465 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
466 	int rc;
467 
468 	efx_mcdi_mon_remove(efx);
469 
470 	/* This needs to be after efx_ptp_remove_channel() with no filters */
471 	efx_ef10_rx_free_indir_table(efx);
472 
473 	if (nic_data->wc_membase)
474 		iounmap(nic_data->wc_membase);
475 
476 	rc = efx_ef10_free_vis(efx);
477 	WARN_ON(rc != 0);
478 
479 	if (!nic_data->must_restore_piobufs)
480 		efx_ef10_free_piobufs(efx);
481 
482 	efx_mcdi_fini(efx);
483 	efx_nic_free_buffer(efx, &nic_data->mcdi_buf);
484 	kfree(nic_data);
485 }
486 
487 static int efx_ef10_alloc_vis(struct efx_nic *efx,
488 			      unsigned int min_vis, unsigned int max_vis)
489 {
490 	MCDI_DECLARE_BUF(inbuf, MC_CMD_ALLOC_VIS_IN_LEN);
491 	MCDI_DECLARE_BUF(outbuf, MC_CMD_ALLOC_VIS_OUT_LEN);
492 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
493 	size_t outlen;
494 	int rc;
495 
496 	MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MIN_VI_COUNT, min_vis);
497 	MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MAX_VI_COUNT, max_vis);
498 	rc = efx_mcdi_rpc(efx, MC_CMD_ALLOC_VIS, inbuf, sizeof(inbuf),
499 			  outbuf, sizeof(outbuf), &outlen);
500 	if (rc != 0)
501 		return rc;
502 
503 	if (outlen < MC_CMD_ALLOC_VIS_OUT_LEN)
504 		return -EIO;
505 
506 	netif_dbg(efx, drv, efx->net_dev, "base VI is A0x%03x\n",
507 		  MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE));
508 
509 	nic_data->vi_base = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE);
510 	nic_data->n_allocated_vis = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_COUNT);
511 	return 0;
512 }
513 
514 /* Note that the failure path of this function does not free
515  * resources, as this will be done by efx_ef10_remove().
516  */
517 static int efx_ef10_dimension_resources(struct efx_nic *efx)
518 {
519 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
520 	unsigned int uc_mem_map_size, wc_mem_map_size;
521 	unsigned int min_vis, pio_write_vi_base, max_vis;
522 	void __iomem *membase;
523 	int rc;
524 
525 	min_vis = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES);
526 
527 #ifdef EFX_USE_PIO
528 	/* Try to allocate PIO buffers if wanted and if the full
529 	 * number of PIO buffers would be sufficient to allocate one
530 	 * copy-buffer per TX channel.  Failure is non-fatal, as there
531 	 * are only a small number of PIO buffers shared between all
532 	 * functions of the controller.
533 	 */
534 	if (efx_piobuf_size != 0 &&
535 	    ER_DZ_TX_PIOBUF_SIZE / efx_piobuf_size * EF10_TX_PIOBUF_COUNT >=
536 	    efx->n_tx_channels) {
537 		unsigned int n_piobufs =
538 			DIV_ROUND_UP(efx->n_tx_channels,
539 				     ER_DZ_TX_PIOBUF_SIZE / efx_piobuf_size);
540 
541 		rc = efx_ef10_alloc_piobufs(efx, n_piobufs);
542 		if (rc)
543 			netif_err(efx, probe, efx->net_dev,
544 				  "failed to allocate PIO buffers (%d)\n", rc);
545 		else
546 			netif_dbg(efx, probe, efx->net_dev,
547 				  "allocated %u PIO buffers\n", n_piobufs);
548 	}
549 #else
550 	nic_data->n_piobufs = 0;
551 #endif
552 
553 	/* PIO buffers should be mapped with write-combining enabled,
554 	 * and we want to make single UC and WC mappings rather than
555 	 * several of each (in fact that's the only option if host
556 	 * page size is >4K).  So we may allocate some extra VIs just
557 	 * for writing PIO buffers through.
558 	 */
559 	uc_mem_map_size = PAGE_ALIGN((min_vis - 1) * EFX_VI_PAGE_SIZE +
560 				     ER_DZ_TX_PIOBUF);
561 	if (nic_data->n_piobufs) {
562 		pio_write_vi_base = uc_mem_map_size / EFX_VI_PAGE_SIZE;
563 		wc_mem_map_size = (PAGE_ALIGN((pio_write_vi_base +
564 					       nic_data->n_piobufs) *
565 					      EFX_VI_PAGE_SIZE) -
566 				   uc_mem_map_size);
567 		max_vis = pio_write_vi_base + nic_data->n_piobufs;
568 	} else {
569 		pio_write_vi_base = 0;
570 		wc_mem_map_size = 0;
571 		max_vis = min_vis;
572 	}
573 
574 	/* In case the last attached driver failed to free VIs, do it now */
575 	rc = efx_ef10_free_vis(efx);
576 	if (rc != 0)
577 		return rc;
578 
579 	rc = efx_ef10_alloc_vis(efx, min_vis, max_vis);
580 	if (rc != 0)
581 		return rc;
582 
583 	/* If we didn't get enough VIs to map all the PIO buffers, free the
584 	 * PIO buffers
585 	 */
586 	if (nic_data->n_piobufs &&
587 	    nic_data->n_allocated_vis <
588 	    pio_write_vi_base + nic_data->n_piobufs) {
589 		netif_dbg(efx, probe, efx->net_dev,
590 			  "%u VIs are not sufficient to map %u PIO buffers\n",
591 			  nic_data->n_allocated_vis, nic_data->n_piobufs);
592 		efx_ef10_free_piobufs(efx);
593 	}
594 
595 	/* Shrink the original UC mapping of the memory BAR */
596 	membase = ioremap_nocache(efx->membase_phys, uc_mem_map_size);
597 	if (!membase) {
598 		netif_err(efx, probe, efx->net_dev,
599 			  "could not shrink memory BAR to %x\n",
600 			  uc_mem_map_size);
601 		return -ENOMEM;
602 	}
603 	iounmap(efx->membase);
604 	efx->membase = membase;
605 
606 	/* Set up the WC mapping if needed */
607 	if (wc_mem_map_size) {
608 		nic_data->wc_membase = ioremap_wc(efx->membase_phys +
609 						  uc_mem_map_size,
610 						  wc_mem_map_size);
611 		if (!nic_data->wc_membase) {
612 			netif_err(efx, probe, efx->net_dev,
613 				  "could not allocate WC mapping of size %x\n",
614 				  wc_mem_map_size);
615 			return -ENOMEM;
616 		}
617 		nic_data->pio_write_vi_base = pio_write_vi_base;
618 		nic_data->pio_write_base =
619 			nic_data->wc_membase +
620 			(pio_write_vi_base * EFX_VI_PAGE_SIZE + ER_DZ_TX_PIOBUF -
621 			 uc_mem_map_size);
622 
623 		rc = efx_ef10_link_piobufs(efx);
624 		if (rc)
625 			efx_ef10_free_piobufs(efx);
626 	}
627 
628 	netif_dbg(efx, probe, efx->net_dev,
629 		  "memory BAR at %pa (virtual %p+%x UC, %p+%x WC)\n",
630 		  &efx->membase_phys, efx->membase, uc_mem_map_size,
631 		  nic_data->wc_membase, wc_mem_map_size);
632 
633 	return 0;
634 }
635 
636 static int efx_ef10_init_nic(struct efx_nic *efx)
637 {
638 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
639 	int rc;
640 
641 	if (nic_data->must_check_datapath_caps) {
642 		rc = efx_ef10_init_datapath_caps(efx);
643 		if (rc)
644 			return rc;
645 		nic_data->must_check_datapath_caps = false;
646 	}
647 
648 	if (nic_data->must_realloc_vis) {
649 		/* We cannot let the number of VIs change now */
650 		rc = efx_ef10_alloc_vis(efx, nic_data->n_allocated_vis,
651 					nic_data->n_allocated_vis);
652 		if (rc)
653 			return rc;
654 		nic_data->must_realloc_vis = false;
655 	}
656 
657 	if (nic_data->must_restore_piobufs && nic_data->n_piobufs) {
658 		rc = efx_ef10_alloc_piobufs(efx, nic_data->n_piobufs);
659 		if (rc == 0) {
660 			rc = efx_ef10_link_piobufs(efx);
661 			if (rc)
662 				efx_ef10_free_piobufs(efx);
663 		}
664 
665 		/* Log an error on failure, but this is non-fatal */
666 		if (rc)
667 			netif_err(efx, drv, efx->net_dev,
668 				  "failed to restore PIO buffers (%d)\n", rc);
669 		nic_data->must_restore_piobufs = false;
670 	}
671 
672 	efx_ef10_rx_push_indir_table(efx);
673 	return 0;
674 }
675 
676 static int efx_ef10_map_reset_flags(u32 *flags)
677 {
678 	enum {
679 		EF10_RESET_PORT = ((ETH_RESET_MAC | ETH_RESET_PHY) <<
680 				   ETH_RESET_SHARED_SHIFT),
681 		EF10_RESET_MC = ((ETH_RESET_DMA | ETH_RESET_FILTER |
682 				  ETH_RESET_OFFLOAD | ETH_RESET_MAC |
683 				  ETH_RESET_PHY | ETH_RESET_MGMT) <<
684 				 ETH_RESET_SHARED_SHIFT)
685 	};
686 
687 	/* We assume for now that our PCI function is permitted to
688 	 * reset everything.
689 	 */
690 
691 	if ((*flags & EF10_RESET_MC) == EF10_RESET_MC) {
692 		*flags &= ~EF10_RESET_MC;
693 		return RESET_TYPE_WORLD;
694 	}
695 
696 	if ((*flags & EF10_RESET_PORT) == EF10_RESET_PORT) {
697 		*flags &= ~EF10_RESET_PORT;
698 		return RESET_TYPE_ALL;
699 	}
700 
701 	/* no invisible reset implemented */
702 
703 	return -EINVAL;
704 }
705 
706 #define EF10_DMA_STAT(ext_name, mcdi_name)			\
707 	[EF10_STAT_ ## ext_name] =				\
708 	{ #ext_name, 64, 8 * MC_CMD_MAC_ ## mcdi_name }
709 #define EF10_DMA_INVIS_STAT(int_name, mcdi_name)		\
710 	[EF10_STAT_ ## int_name] =				\
711 	{ NULL, 64, 8 * MC_CMD_MAC_ ## mcdi_name }
712 #define EF10_OTHER_STAT(ext_name)				\
713 	[EF10_STAT_ ## ext_name] = { #ext_name, 0, 0 }
714 
715 static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = {
716 	EF10_DMA_STAT(tx_bytes, TX_BYTES),
717 	EF10_DMA_STAT(tx_packets, TX_PKTS),
718 	EF10_DMA_STAT(tx_pause, TX_PAUSE_PKTS),
719 	EF10_DMA_STAT(tx_control, TX_CONTROL_PKTS),
720 	EF10_DMA_STAT(tx_unicast, TX_UNICAST_PKTS),
721 	EF10_DMA_STAT(tx_multicast, TX_MULTICAST_PKTS),
722 	EF10_DMA_STAT(tx_broadcast, TX_BROADCAST_PKTS),
723 	EF10_DMA_STAT(tx_lt64, TX_LT64_PKTS),
724 	EF10_DMA_STAT(tx_64, TX_64_PKTS),
725 	EF10_DMA_STAT(tx_65_to_127, TX_65_TO_127_PKTS),
726 	EF10_DMA_STAT(tx_128_to_255, TX_128_TO_255_PKTS),
727 	EF10_DMA_STAT(tx_256_to_511, TX_256_TO_511_PKTS),
728 	EF10_DMA_STAT(tx_512_to_1023, TX_512_TO_1023_PKTS),
729 	EF10_DMA_STAT(tx_1024_to_15xx, TX_1024_TO_15XX_PKTS),
730 	EF10_DMA_STAT(tx_15xx_to_jumbo, TX_15XX_TO_JUMBO_PKTS),
731 	EF10_DMA_STAT(rx_bytes, RX_BYTES),
732 	EF10_DMA_INVIS_STAT(rx_bytes_minus_good_bytes, RX_BAD_BYTES),
733 	EF10_OTHER_STAT(rx_good_bytes),
734 	EF10_OTHER_STAT(rx_bad_bytes),
735 	EF10_DMA_STAT(rx_packets, RX_PKTS),
736 	EF10_DMA_STAT(rx_good, RX_GOOD_PKTS),
737 	EF10_DMA_STAT(rx_bad, RX_BAD_FCS_PKTS),
738 	EF10_DMA_STAT(rx_pause, RX_PAUSE_PKTS),
739 	EF10_DMA_STAT(rx_control, RX_CONTROL_PKTS),
740 	EF10_DMA_STAT(rx_unicast, RX_UNICAST_PKTS),
741 	EF10_DMA_STAT(rx_multicast, RX_MULTICAST_PKTS),
742 	EF10_DMA_STAT(rx_broadcast, RX_BROADCAST_PKTS),
743 	EF10_DMA_STAT(rx_lt64, RX_UNDERSIZE_PKTS),
744 	EF10_DMA_STAT(rx_64, RX_64_PKTS),
745 	EF10_DMA_STAT(rx_65_to_127, RX_65_TO_127_PKTS),
746 	EF10_DMA_STAT(rx_128_to_255, RX_128_TO_255_PKTS),
747 	EF10_DMA_STAT(rx_256_to_511, RX_256_TO_511_PKTS),
748 	EF10_DMA_STAT(rx_512_to_1023, RX_512_TO_1023_PKTS),
749 	EF10_DMA_STAT(rx_1024_to_15xx, RX_1024_TO_15XX_PKTS),
750 	EF10_DMA_STAT(rx_15xx_to_jumbo, RX_15XX_TO_JUMBO_PKTS),
751 	EF10_DMA_STAT(rx_gtjumbo, RX_GTJUMBO_PKTS),
752 	EF10_DMA_STAT(rx_bad_gtjumbo, RX_JABBER_PKTS),
753 	EF10_DMA_STAT(rx_overflow, RX_OVERFLOW_PKTS),
754 	EF10_DMA_STAT(rx_align_error, RX_ALIGN_ERROR_PKTS),
755 	EF10_DMA_STAT(rx_length_error, RX_LENGTH_ERROR_PKTS),
756 	EF10_DMA_STAT(rx_nodesc_drops, RX_NODESC_DROPS),
757 };
758 
759 #define HUNT_COMMON_STAT_MASK ((1ULL << EF10_STAT_tx_bytes) |		\
760 			       (1ULL << EF10_STAT_tx_packets) |		\
761 			       (1ULL << EF10_STAT_tx_pause) |		\
762 			       (1ULL << EF10_STAT_tx_unicast) |		\
763 			       (1ULL << EF10_STAT_tx_multicast) |	\
764 			       (1ULL << EF10_STAT_tx_broadcast) |	\
765 			       (1ULL << EF10_STAT_rx_bytes) |		\
766 			       (1ULL << EF10_STAT_rx_bytes_minus_good_bytes) | \
767 			       (1ULL << EF10_STAT_rx_good_bytes) |	\
768 			       (1ULL << EF10_STAT_rx_bad_bytes) |	\
769 			       (1ULL << EF10_STAT_rx_packets) |		\
770 			       (1ULL << EF10_STAT_rx_good) |		\
771 			       (1ULL << EF10_STAT_rx_bad) |		\
772 			       (1ULL << EF10_STAT_rx_pause) |		\
773 			       (1ULL << EF10_STAT_rx_control) |		\
774 			       (1ULL << EF10_STAT_rx_unicast) |		\
775 			       (1ULL << EF10_STAT_rx_multicast) |	\
776 			       (1ULL << EF10_STAT_rx_broadcast) |	\
777 			       (1ULL << EF10_STAT_rx_lt64) |		\
778 			       (1ULL << EF10_STAT_rx_64) |		\
779 			       (1ULL << EF10_STAT_rx_65_to_127) |	\
780 			       (1ULL << EF10_STAT_rx_128_to_255) |	\
781 			       (1ULL << EF10_STAT_rx_256_to_511) |	\
782 			       (1ULL << EF10_STAT_rx_512_to_1023) |	\
783 			       (1ULL << EF10_STAT_rx_1024_to_15xx) |	\
784 			       (1ULL << EF10_STAT_rx_15xx_to_jumbo) |	\
785 			       (1ULL << EF10_STAT_rx_gtjumbo) |		\
786 			       (1ULL << EF10_STAT_rx_bad_gtjumbo) |	\
787 			       (1ULL << EF10_STAT_rx_overflow) |	\
788 			       (1ULL << EF10_STAT_rx_nodesc_drops))
789 
790 /* These statistics are only provided by the 10G MAC.  For a 10G/40G
791  * switchable port we do not expose these because they might not
792  * include all the packets they should.
793  */
794 #define HUNT_10G_ONLY_STAT_MASK ((1ULL << EF10_STAT_tx_control) |	\
795 				 (1ULL << EF10_STAT_tx_lt64) |		\
796 				 (1ULL << EF10_STAT_tx_64) |		\
797 				 (1ULL << EF10_STAT_tx_65_to_127) |	\
798 				 (1ULL << EF10_STAT_tx_128_to_255) |	\
799 				 (1ULL << EF10_STAT_tx_256_to_511) |	\
800 				 (1ULL << EF10_STAT_tx_512_to_1023) |	\
801 				 (1ULL << EF10_STAT_tx_1024_to_15xx) |	\
802 				 (1ULL << EF10_STAT_tx_15xx_to_jumbo))
803 
804 /* These statistics are only provided by the 40G MAC.  For a 10G/40G
805  * switchable port we do expose these because the errors will otherwise
806  * be silent.
807  */
808 #define HUNT_40G_EXTRA_STAT_MASK ((1ULL << EF10_STAT_rx_align_error) |	\
809 				  (1ULL << EF10_STAT_rx_length_error))
810 
811 #if BITS_PER_LONG == 64
812 #define STAT_MASK_BITMAP(bits) (bits)
813 #else
814 #define STAT_MASK_BITMAP(bits) (bits) & 0xffffffff, (bits) >> 32
815 #endif
816 
817 static const unsigned long *efx_ef10_stat_mask(struct efx_nic *efx)
818 {
819 	static const unsigned long hunt_40g_stat_mask[] = {
820 		STAT_MASK_BITMAP(HUNT_COMMON_STAT_MASK |
821 				 HUNT_40G_EXTRA_STAT_MASK)
822 	};
823 	static const unsigned long hunt_10g_only_stat_mask[] = {
824 		STAT_MASK_BITMAP(HUNT_COMMON_STAT_MASK |
825 				 HUNT_10G_ONLY_STAT_MASK)
826 	};
827 	u32 port_caps = efx_mcdi_phy_get_caps(efx);
828 
829 	if (port_caps & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
830 		return hunt_40g_stat_mask;
831 	else
832 		return hunt_10g_only_stat_mask;
833 }
834 
835 static size_t efx_ef10_describe_stats(struct efx_nic *efx, u8 *names)
836 {
837 	return efx_nic_describe_stats(efx_ef10_stat_desc, EF10_STAT_COUNT,
838 				      efx_ef10_stat_mask(efx), names);
839 }
840 
841 static int efx_ef10_try_update_nic_stats(struct efx_nic *efx)
842 {
843 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
844 	const unsigned long *stats_mask = efx_ef10_stat_mask(efx);
845 	__le64 generation_start, generation_end;
846 	u64 *stats = nic_data->stats;
847 	__le64 *dma_stats;
848 
849 	dma_stats = efx->stats_buffer.addr;
850 	nic_data = efx->nic_data;
851 
852 	generation_end = dma_stats[MC_CMD_MAC_GENERATION_END];
853 	if (generation_end == EFX_MC_STATS_GENERATION_INVALID)
854 		return 0;
855 	rmb();
856 	efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, stats_mask,
857 			     stats, efx->stats_buffer.addr, false);
858 	generation_start = dma_stats[MC_CMD_MAC_GENERATION_START];
859 	if (generation_end != generation_start)
860 		return -EAGAIN;
861 
862 	/* Update derived statistics */
863 	stats[EF10_STAT_rx_good_bytes] =
864 		stats[EF10_STAT_rx_bytes] -
865 		stats[EF10_STAT_rx_bytes_minus_good_bytes];
866 	efx_update_diff_stat(&stats[EF10_STAT_rx_bad_bytes],
867 			     stats[EF10_STAT_rx_bytes_minus_good_bytes]);
868 
869 	return 0;
870 }
871 
872 
873 static size_t efx_ef10_update_stats(struct efx_nic *efx, u64 *full_stats,
874 				    struct rtnl_link_stats64 *core_stats)
875 {
876 	const unsigned long *mask = efx_ef10_stat_mask(efx);
877 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
878 	u64 *stats = nic_data->stats;
879 	size_t stats_count = 0, index;
880 	int retry;
881 
882 	/* If we're unlucky enough to read statistics during the DMA, wait
883 	 * up to 10ms for it to finish (typically takes <500us)
884 	 */
885 	for (retry = 0; retry < 100; ++retry) {
886 		if (efx_ef10_try_update_nic_stats(efx) == 0)
887 			break;
888 		udelay(100);
889 	}
890 
891 	if (full_stats) {
892 		for_each_set_bit(index, mask, EF10_STAT_COUNT) {
893 			if (efx_ef10_stat_desc[index].name) {
894 				*full_stats++ = stats[index];
895 				++stats_count;
896 			}
897 		}
898 	}
899 
900 	if (core_stats) {
901 		core_stats->rx_packets = stats[EF10_STAT_rx_packets];
902 		core_stats->tx_packets = stats[EF10_STAT_tx_packets];
903 		core_stats->rx_bytes = stats[EF10_STAT_rx_bytes];
904 		core_stats->tx_bytes = stats[EF10_STAT_tx_bytes];
905 		core_stats->rx_dropped = stats[EF10_STAT_rx_nodesc_drops];
906 		core_stats->multicast = stats[EF10_STAT_rx_multicast];
907 		core_stats->rx_length_errors =
908 			stats[EF10_STAT_rx_gtjumbo] +
909 			stats[EF10_STAT_rx_length_error];
910 		core_stats->rx_crc_errors = stats[EF10_STAT_rx_bad];
911 		core_stats->rx_frame_errors = stats[EF10_STAT_rx_align_error];
912 		core_stats->rx_fifo_errors = stats[EF10_STAT_rx_overflow];
913 		core_stats->rx_errors = (core_stats->rx_length_errors +
914 					 core_stats->rx_crc_errors +
915 					 core_stats->rx_frame_errors);
916 	}
917 
918 	return stats_count;
919 }
920 
921 static void efx_ef10_push_irq_moderation(struct efx_channel *channel)
922 {
923 	struct efx_nic *efx = channel->efx;
924 	unsigned int mode, value;
925 	efx_dword_t timer_cmd;
926 
927 	if (channel->irq_moderation) {
928 		mode = 3;
929 		value = channel->irq_moderation - 1;
930 	} else {
931 		mode = 0;
932 		value = 0;
933 	}
934 
935 	if (EFX_EF10_WORKAROUND_35388(efx)) {
936 		EFX_POPULATE_DWORD_3(timer_cmd, ERF_DD_EVQ_IND_TIMER_FLAGS,
937 				     EFE_DD_EVQ_IND_TIMER_FLAGS,
938 				     ERF_DD_EVQ_IND_TIMER_MODE, mode,
939 				     ERF_DD_EVQ_IND_TIMER_VAL, value);
940 		efx_writed_page(efx, &timer_cmd, ER_DD_EVQ_INDIRECT,
941 				channel->channel);
942 	} else {
943 		EFX_POPULATE_DWORD_2(timer_cmd, ERF_DZ_TC_TIMER_MODE, mode,
944 				     ERF_DZ_TC_TIMER_VAL, value);
945 		efx_writed_page(efx, &timer_cmd, ER_DZ_EVQ_TMR,
946 				channel->channel);
947 	}
948 }
949 
950 static void efx_ef10_get_wol(struct efx_nic *efx, struct ethtool_wolinfo *wol)
951 {
952 	wol->supported = 0;
953 	wol->wolopts = 0;
954 	memset(&wol->sopass, 0, sizeof(wol->sopass));
955 }
956 
957 static int efx_ef10_set_wol(struct efx_nic *efx, u32 type)
958 {
959 	if (type != 0)
960 		return -EINVAL;
961 	return 0;
962 }
963 
964 static void efx_ef10_mcdi_request(struct efx_nic *efx,
965 				  const efx_dword_t *hdr, size_t hdr_len,
966 				  const efx_dword_t *sdu, size_t sdu_len)
967 {
968 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
969 	u8 *pdu = nic_data->mcdi_buf.addr;
970 
971 	memcpy(pdu, hdr, hdr_len);
972 	memcpy(pdu + hdr_len, sdu, sdu_len);
973 	wmb();
974 
975 	/* The hardware provides 'low' and 'high' (doorbell) registers
976 	 * for passing the 64-bit address of an MCDI request to
977 	 * firmware.  However the dwords are swapped by firmware.  The
978 	 * least significant bits of the doorbell are then 0 for all
979 	 * MCDI requests due to alignment.
980 	 */
981 	_efx_writed(efx, cpu_to_le32((u64)nic_data->mcdi_buf.dma_addr >> 32),
982 		    ER_DZ_MC_DB_LWRD);
983 	_efx_writed(efx, cpu_to_le32((u32)nic_data->mcdi_buf.dma_addr),
984 		    ER_DZ_MC_DB_HWRD);
985 }
986 
987 static bool efx_ef10_mcdi_poll_response(struct efx_nic *efx)
988 {
989 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
990 	const efx_dword_t hdr = *(const efx_dword_t *)nic_data->mcdi_buf.addr;
991 
992 	rmb();
993 	return EFX_DWORD_FIELD(hdr, MCDI_HEADER_RESPONSE);
994 }
995 
996 static void
997 efx_ef10_mcdi_read_response(struct efx_nic *efx, efx_dword_t *outbuf,
998 			    size_t offset, size_t outlen)
999 {
1000 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
1001 	const u8 *pdu = nic_data->mcdi_buf.addr;
1002 
1003 	memcpy(outbuf, pdu + offset, outlen);
1004 }
1005 
1006 static int efx_ef10_mcdi_poll_reboot(struct efx_nic *efx)
1007 {
1008 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
1009 	int rc;
1010 
1011 	rc = efx_ef10_get_warm_boot_count(efx);
1012 	if (rc < 0) {
1013 		/* The firmware is presumably in the process of
1014 		 * rebooting.  However, we are supposed to report each
1015 		 * reboot just once, so we must only do that once we
1016 		 * can read and store the updated warm boot count.
1017 		 */
1018 		return 0;
1019 	}
1020 
1021 	if (rc == nic_data->warm_boot_count)
1022 		return 0;
1023 
1024 	nic_data->warm_boot_count = rc;
1025 
1026 	/* All our allocations have been reset */
1027 	nic_data->must_realloc_vis = true;
1028 	nic_data->must_restore_filters = true;
1029 	nic_data->must_restore_piobufs = true;
1030 	nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID;
1031 
1032 	/* The datapath firmware might have been changed */
1033 	nic_data->must_check_datapath_caps = true;
1034 
1035 	/* MAC statistics have been cleared on the NIC; clear the local
1036 	 * statistic that we update with efx_update_diff_stat().
1037 	 */
1038 	nic_data->stats[EF10_STAT_rx_bad_bytes] = 0;
1039 
1040 	return -EIO;
1041 }
1042 
1043 /* Handle an MSI interrupt
1044  *
1045  * Handle an MSI hardware interrupt.  This routine schedules event
1046  * queue processing.  No interrupt acknowledgement cycle is necessary.
1047  * Also, we never need to check that the interrupt is for us, since
1048  * MSI interrupts cannot be shared.
1049  */
1050 static irqreturn_t efx_ef10_msi_interrupt(int irq, void *dev_id)
1051 {
1052 	struct efx_msi_context *context = dev_id;
1053 	struct efx_nic *efx = context->efx;
1054 
1055 	netif_vdbg(efx, intr, efx->net_dev,
1056 		   "IRQ %d on CPU %d\n", irq, raw_smp_processor_id());
1057 
1058 	if (likely(ACCESS_ONCE(efx->irq_soft_enabled))) {
1059 		/* Note test interrupts */
1060 		if (context->index == efx->irq_level)
1061 			efx->last_irq_cpu = raw_smp_processor_id();
1062 
1063 		/* Schedule processing of the channel */
1064 		efx_schedule_channel_irq(efx->channel[context->index]);
1065 	}
1066 
1067 	return IRQ_HANDLED;
1068 }
1069 
1070 static irqreturn_t efx_ef10_legacy_interrupt(int irq, void *dev_id)
1071 {
1072 	struct efx_nic *efx = dev_id;
1073 	bool soft_enabled = ACCESS_ONCE(efx->irq_soft_enabled);
1074 	struct efx_channel *channel;
1075 	efx_dword_t reg;
1076 	u32 queues;
1077 
1078 	/* Read the ISR which also ACKs the interrupts */
1079 	efx_readd(efx, &reg, ER_DZ_BIU_INT_ISR);
1080 	queues = EFX_DWORD_FIELD(reg, ERF_DZ_ISR_REG);
1081 
1082 	if (queues == 0)
1083 		return IRQ_NONE;
1084 
1085 	if (likely(soft_enabled)) {
1086 		/* Note test interrupts */
1087 		if (queues & (1U << efx->irq_level))
1088 			efx->last_irq_cpu = raw_smp_processor_id();
1089 
1090 		efx_for_each_channel(channel, efx) {
1091 			if (queues & 1)
1092 				efx_schedule_channel_irq(channel);
1093 			queues >>= 1;
1094 		}
1095 	}
1096 
1097 	netif_vdbg(efx, intr, efx->net_dev,
1098 		   "IRQ %d on CPU %d status " EFX_DWORD_FMT "\n",
1099 		   irq, raw_smp_processor_id(), EFX_DWORD_VAL(reg));
1100 
1101 	return IRQ_HANDLED;
1102 }
1103 
1104 static void efx_ef10_irq_test_generate(struct efx_nic *efx)
1105 {
1106 	MCDI_DECLARE_BUF(inbuf, MC_CMD_TRIGGER_INTERRUPT_IN_LEN);
1107 
1108 	BUILD_BUG_ON(MC_CMD_TRIGGER_INTERRUPT_OUT_LEN != 0);
1109 
1110 	MCDI_SET_DWORD(inbuf, TRIGGER_INTERRUPT_IN_INTR_LEVEL, efx->irq_level);
1111 	(void) efx_mcdi_rpc(efx, MC_CMD_TRIGGER_INTERRUPT,
1112 			    inbuf, sizeof(inbuf), NULL, 0, NULL);
1113 }
1114 
1115 static int efx_ef10_tx_probe(struct efx_tx_queue *tx_queue)
1116 {
1117 	return efx_nic_alloc_buffer(tx_queue->efx, &tx_queue->txd.buf,
1118 				    (tx_queue->ptr_mask + 1) *
1119 				    sizeof(efx_qword_t),
1120 				    GFP_KERNEL);
1121 }
1122 
1123 /* This writes to the TX_DESC_WPTR and also pushes data */
1124 static inline void efx_ef10_push_tx_desc(struct efx_tx_queue *tx_queue,
1125 					 const efx_qword_t *txd)
1126 {
1127 	unsigned int write_ptr;
1128 	efx_oword_t reg;
1129 
1130 	write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
1131 	EFX_POPULATE_OWORD_1(reg, ERF_DZ_TX_DESC_WPTR, write_ptr);
1132 	reg.qword[0] = *txd;
1133 	efx_writeo_page(tx_queue->efx, &reg,
1134 			ER_DZ_TX_DESC_UPD, tx_queue->queue);
1135 }
1136 
1137 static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
1138 {
1139 	MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
1140 						       EFX_BUF_SIZE));
1141 	MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_TXQ_OUT_LEN);
1142 	bool csum_offload = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD;
1143 	size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE;
1144 	struct efx_channel *channel = tx_queue->channel;
1145 	struct efx_nic *efx = tx_queue->efx;
1146 	size_t inlen, outlen;
1147 	dma_addr_t dma_addr;
1148 	efx_qword_t *txd;
1149 	int rc;
1150 	int i;
1151 
1152 	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1);
1153 	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel);
1154 	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue);
1155 	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue);
1156 	MCDI_POPULATE_DWORD_2(inbuf, INIT_TXQ_IN_FLAGS,
1157 			      INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload,
1158 			      INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload);
1159 	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0);
1160 	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, EVB_PORT_ID_ASSIGNED);
1161 
1162 	dma_addr = tx_queue->txd.buf.dma_addr;
1163 
1164 	netif_dbg(efx, hw, efx->net_dev, "pushing TXQ %d. %zu entries (%llx)\n",
1165 		  tx_queue->queue, entries, (u64)dma_addr);
1166 
1167 	for (i = 0; i < entries; ++i) {
1168 		MCDI_SET_ARRAY_QWORD(inbuf, INIT_TXQ_IN_DMA_ADDR, i, dma_addr);
1169 		dma_addr += EFX_BUF_SIZE;
1170 	}
1171 
1172 	inlen = MC_CMD_INIT_TXQ_IN_LEN(entries);
1173 
1174 	rc = efx_mcdi_rpc(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
1175 			  outbuf, sizeof(outbuf), &outlen);
1176 	if (rc)
1177 		goto fail;
1178 
1179 	/* A previous user of this TX queue might have set us up the
1180 	 * bomb by writing a descriptor to the TX push collector but
1181 	 * not the doorbell.  (Each collector belongs to a port, not a
1182 	 * queue or function, so cannot easily be reset.)  We must
1183 	 * attempt to push a no-op descriptor in its place.
1184 	 */
1185 	tx_queue->buffer[0].flags = EFX_TX_BUF_OPTION;
1186 	tx_queue->insert_count = 1;
1187 	txd = efx_tx_desc(tx_queue, 0);
1188 	EFX_POPULATE_QWORD_4(*txd,
1189 			     ESF_DZ_TX_DESC_IS_OPT, true,
1190 			     ESF_DZ_TX_OPTION_TYPE,
1191 			     ESE_DZ_TX_OPTION_DESC_CRC_CSUM,
1192 			     ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload,
1193 			     ESF_DZ_TX_OPTION_IP_CSUM, csum_offload);
1194 	tx_queue->write_count = 1;
1195 	wmb();
1196 	efx_ef10_push_tx_desc(tx_queue, txd);
1197 
1198 	return;
1199 
1200 fail:
1201 	WARN_ON(true);
1202 	netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
1203 }
1204 
1205 static void efx_ef10_tx_fini(struct efx_tx_queue *tx_queue)
1206 {
1207 	MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_TXQ_IN_LEN);
1208 	MCDI_DECLARE_BUF(outbuf, MC_CMD_FINI_TXQ_OUT_LEN);
1209 	struct efx_nic *efx = tx_queue->efx;
1210 	size_t outlen;
1211 	int rc;
1212 
1213 	MCDI_SET_DWORD(inbuf, FINI_TXQ_IN_INSTANCE,
1214 		       tx_queue->queue);
1215 
1216 	rc = efx_mcdi_rpc(efx, MC_CMD_FINI_TXQ, inbuf, sizeof(inbuf),
1217 			  outbuf, sizeof(outbuf), &outlen);
1218 
1219 	if (rc && rc != -EALREADY)
1220 		goto fail;
1221 
1222 	return;
1223 
1224 fail:
1225 	netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
1226 }
1227 
1228 static void efx_ef10_tx_remove(struct efx_tx_queue *tx_queue)
1229 {
1230 	efx_nic_free_buffer(tx_queue->efx, &tx_queue->txd.buf);
1231 }
1232 
1233 /* This writes to the TX_DESC_WPTR; write pointer for TX descriptor ring */
1234 static inline void efx_ef10_notify_tx_desc(struct efx_tx_queue *tx_queue)
1235 {
1236 	unsigned int write_ptr;
1237 	efx_dword_t reg;
1238 
1239 	write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
1240 	EFX_POPULATE_DWORD_1(reg, ERF_DZ_TX_DESC_WPTR_DWORD, write_ptr);
1241 	efx_writed_page(tx_queue->efx, &reg,
1242 			ER_DZ_TX_DESC_UPD_DWORD, tx_queue->queue);
1243 }
1244 
1245 static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue)
1246 {
1247 	unsigned int old_write_count = tx_queue->write_count;
1248 	struct efx_tx_buffer *buffer;
1249 	unsigned int write_ptr;
1250 	efx_qword_t *txd;
1251 
1252 	BUG_ON(tx_queue->write_count == tx_queue->insert_count);
1253 
1254 	do {
1255 		write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
1256 		buffer = &tx_queue->buffer[write_ptr];
1257 		txd = efx_tx_desc(tx_queue, write_ptr);
1258 		++tx_queue->write_count;
1259 
1260 		/* Create TX descriptor ring entry */
1261 		if (buffer->flags & EFX_TX_BUF_OPTION) {
1262 			*txd = buffer->option;
1263 		} else {
1264 			BUILD_BUG_ON(EFX_TX_BUF_CONT != 1);
1265 			EFX_POPULATE_QWORD_3(
1266 				*txd,
1267 				ESF_DZ_TX_KER_CONT,
1268 				buffer->flags & EFX_TX_BUF_CONT,
1269 				ESF_DZ_TX_KER_BYTE_CNT, buffer->len,
1270 				ESF_DZ_TX_KER_BUF_ADDR, buffer->dma_addr);
1271 		}
1272 	} while (tx_queue->write_count != tx_queue->insert_count);
1273 
1274 	wmb(); /* Ensure descriptors are written before they are fetched */
1275 
1276 	if (efx_nic_may_push_tx_desc(tx_queue, old_write_count)) {
1277 		txd = efx_tx_desc(tx_queue,
1278 				  old_write_count & tx_queue->ptr_mask);
1279 		efx_ef10_push_tx_desc(tx_queue, txd);
1280 		++tx_queue->pushes;
1281 	} else {
1282 		efx_ef10_notify_tx_desc(tx_queue);
1283 	}
1284 }
1285 
1286 static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context)
1287 {
1288 	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN);
1289 	MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN);
1290 	size_t outlen;
1291 	int rc;
1292 
1293 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_UPSTREAM_PORT_ID,
1294 		       EVB_PORT_ID_ASSIGNED);
1295 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_TYPE,
1296 		       MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE);
1297 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_NUM_QUEUES,
1298 		       EFX_MAX_CHANNELS);
1299 
1300 	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_ALLOC, inbuf, sizeof(inbuf),
1301 		outbuf, sizeof(outbuf), &outlen);
1302 	if (rc != 0)
1303 		return rc;
1304 
1305 	if (outlen < MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN)
1306 		return -EIO;
1307 
1308 	*context = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID);
1309 
1310 	return 0;
1311 }
1312 
1313 static void efx_ef10_free_rss_context(struct efx_nic *efx, u32 context)
1314 {
1315 	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_FREE_IN_LEN);
1316 	int rc;
1317 
1318 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_FREE_IN_RSS_CONTEXT_ID,
1319 		       context);
1320 
1321 	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_FREE, inbuf, sizeof(inbuf),
1322 			    NULL, 0, NULL);
1323 	WARN_ON(rc != 0);
1324 }
1325 
1326 static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context)
1327 {
1328 	MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_SET_TABLE_IN_LEN);
1329 	MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_SET_KEY_IN_LEN);
1330 	int i, rc;
1331 
1332 	MCDI_SET_DWORD(tablebuf, RSS_CONTEXT_SET_TABLE_IN_RSS_CONTEXT_ID,
1333 		       context);
1334 	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
1335 		     MC_CMD_RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE_LEN);
1336 
1337 	for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); ++i)
1338 		MCDI_PTR(tablebuf,
1339 			 RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE)[i] =
1340 				(u8) efx->rx_indir_table[i];
1341 
1342 	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_TABLE, tablebuf,
1343 			  sizeof(tablebuf), NULL, 0, NULL);
1344 	if (rc != 0)
1345 		return rc;
1346 
1347 	MCDI_SET_DWORD(keybuf, RSS_CONTEXT_SET_KEY_IN_RSS_CONTEXT_ID,
1348 		       context);
1349 	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_hash_key) !=
1350 		     MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
1351 	for (i = 0; i < ARRAY_SIZE(efx->rx_hash_key); ++i)
1352 		MCDI_PTR(keybuf, RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY)[i] =
1353 			efx->rx_hash_key[i];
1354 
1355 	return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_KEY, keybuf,
1356 			    sizeof(keybuf), NULL, 0, NULL);
1357 }
1358 
1359 static void efx_ef10_rx_free_indir_table(struct efx_nic *efx)
1360 {
1361 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
1362 
1363 	if (nic_data->rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID)
1364 		efx_ef10_free_rss_context(efx, nic_data->rx_rss_context);
1365 	nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID;
1366 }
1367 
1368 static void efx_ef10_rx_push_indir_table(struct efx_nic *efx)
1369 {
1370 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
1371 	int rc;
1372 
1373 	netif_dbg(efx, drv, efx->net_dev, "pushing RX indirection table\n");
1374 
1375 	if (nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID) {
1376 		rc = efx_ef10_alloc_rss_context(efx, &nic_data->rx_rss_context);
1377 		if (rc != 0)
1378 			goto fail;
1379 	}
1380 
1381 	rc = efx_ef10_populate_rss_table(efx, nic_data->rx_rss_context);
1382 	if (rc != 0)
1383 		goto fail;
1384 
1385 	return;
1386 
1387 fail:
1388 	netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
1389 }
1390 
1391 static int efx_ef10_rx_probe(struct efx_rx_queue *rx_queue)
1392 {
1393 	return efx_nic_alloc_buffer(rx_queue->efx, &rx_queue->rxd.buf,
1394 				    (rx_queue->ptr_mask + 1) *
1395 				    sizeof(efx_qword_t),
1396 				    GFP_KERNEL);
1397 }
1398 
1399 static void efx_ef10_rx_init(struct efx_rx_queue *rx_queue)
1400 {
1401 	MCDI_DECLARE_BUF(inbuf,
1402 			 MC_CMD_INIT_RXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
1403 						EFX_BUF_SIZE));
1404 	MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_RXQ_OUT_LEN);
1405 	struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
1406 	size_t entries = rx_queue->rxd.buf.len / EFX_BUF_SIZE;
1407 	struct efx_nic *efx = rx_queue->efx;
1408 	size_t inlen, outlen;
1409 	dma_addr_t dma_addr;
1410 	int rc;
1411 	int i;
1412 
1413 	rx_queue->scatter_n = 0;
1414 	rx_queue->scatter_len = 0;
1415 
1416 	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_SIZE, rx_queue->ptr_mask + 1);
1417 	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_TARGET_EVQ, channel->channel);
1418 	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_LABEL, efx_rx_queue_index(rx_queue));
1419 	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_INSTANCE,
1420 		       efx_rx_queue_index(rx_queue));
1421 	MCDI_POPULATE_DWORD_1(inbuf, INIT_RXQ_IN_FLAGS,
1422 			      INIT_RXQ_IN_FLAG_PREFIX, 1);
1423 	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_OWNER_ID, 0);
1424 	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_PORT_ID, EVB_PORT_ID_ASSIGNED);
1425 
1426 	dma_addr = rx_queue->rxd.buf.dma_addr;
1427 
1428 	netif_dbg(efx, hw, efx->net_dev, "pushing RXQ %d. %zu entries (%llx)\n",
1429 		  efx_rx_queue_index(rx_queue), entries, (u64)dma_addr);
1430 
1431 	for (i = 0; i < entries; ++i) {
1432 		MCDI_SET_ARRAY_QWORD(inbuf, INIT_RXQ_IN_DMA_ADDR, i, dma_addr);
1433 		dma_addr += EFX_BUF_SIZE;
1434 	}
1435 
1436 	inlen = MC_CMD_INIT_RXQ_IN_LEN(entries);
1437 
1438 	rc = efx_mcdi_rpc(efx, MC_CMD_INIT_RXQ, inbuf, inlen,
1439 			  outbuf, sizeof(outbuf), &outlen);
1440 	if (rc)
1441 		goto fail;
1442 
1443 	return;
1444 
1445 fail:
1446 	WARN_ON(true);
1447 	netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
1448 }
1449 
1450 static void efx_ef10_rx_fini(struct efx_rx_queue *rx_queue)
1451 {
1452 	MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_RXQ_IN_LEN);
1453 	MCDI_DECLARE_BUF(outbuf, MC_CMD_FINI_RXQ_OUT_LEN);
1454 	struct efx_nic *efx = rx_queue->efx;
1455 	size_t outlen;
1456 	int rc;
1457 
1458 	MCDI_SET_DWORD(inbuf, FINI_RXQ_IN_INSTANCE,
1459 		       efx_rx_queue_index(rx_queue));
1460 
1461 	rc = efx_mcdi_rpc(efx, MC_CMD_FINI_RXQ, inbuf, sizeof(inbuf),
1462 			  outbuf, sizeof(outbuf), &outlen);
1463 
1464 	if (rc && rc != -EALREADY)
1465 		goto fail;
1466 
1467 	return;
1468 
1469 fail:
1470 	netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
1471 }
1472 
1473 static void efx_ef10_rx_remove(struct efx_rx_queue *rx_queue)
1474 {
1475 	efx_nic_free_buffer(rx_queue->efx, &rx_queue->rxd.buf);
1476 }
1477 
1478 /* This creates an entry in the RX descriptor queue */
1479 static inline void
1480 efx_ef10_build_rx_desc(struct efx_rx_queue *rx_queue, unsigned int index)
1481 {
1482 	struct efx_rx_buffer *rx_buf;
1483 	efx_qword_t *rxd;
1484 
1485 	rxd = efx_rx_desc(rx_queue, index);
1486 	rx_buf = efx_rx_buffer(rx_queue, index);
1487 	EFX_POPULATE_QWORD_2(*rxd,
1488 			     ESF_DZ_RX_KER_BYTE_CNT, rx_buf->len,
1489 			     ESF_DZ_RX_KER_BUF_ADDR, rx_buf->dma_addr);
1490 }
1491 
1492 static void efx_ef10_rx_write(struct efx_rx_queue *rx_queue)
1493 {
1494 	struct efx_nic *efx = rx_queue->efx;
1495 	unsigned int write_count;
1496 	efx_dword_t reg;
1497 
1498 	/* Firmware requires that RX_DESC_WPTR be a multiple of 8 */
1499 	write_count = rx_queue->added_count & ~7;
1500 	if (rx_queue->notified_count == write_count)
1501 		return;
1502 
1503 	do
1504 		efx_ef10_build_rx_desc(
1505 			rx_queue,
1506 			rx_queue->notified_count & rx_queue->ptr_mask);
1507 	while (++rx_queue->notified_count != write_count);
1508 
1509 	wmb();
1510 	EFX_POPULATE_DWORD_1(reg, ERF_DZ_RX_DESC_WPTR,
1511 			     write_count & rx_queue->ptr_mask);
1512 	efx_writed_page(efx, &reg, ER_DZ_RX_DESC_UPD,
1513 			efx_rx_queue_index(rx_queue));
1514 }
1515 
1516 static efx_mcdi_async_completer efx_ef10_rx_defer_refill_complete;
1517 
1518 static void efx_ef10_rx_defer_refill(struct efx_rx_queue *rx_queue)
1519 {
1520 	struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
1521 	MCDI_DECLARE_BUF(inbuf, MC_CMD_DRIVER_EVENT_IN_LEN);
1522 	efx_qword_t event;
1523 
1524 	EFX_POPULATE_QWORD_2(event,
1525 			     ESF_DZ_EV_CODE, EFX_EF10_DRVGEN_EV,
1526 			     ESF_DZ_EV_DATA, EFX_EF10_REFILL);
1527 
1528 	MCDI_SET_DWORD(inbuf, DRIVER_EVENT_IN_EVQ, channel->channel);
1529 
1530 	/* MCDI_SET_QWORD is not appropriate here since EFX_POPULATE_* has
1531 	 * already swapped the data to little-endian order.
1532 	 */
1533 	memcpy(MCDI_PTR(inbuf, DRIVER_EVENT_IN_DATA), &event.u64[0],
1534 	       sizeof(efx_qword_t));
1535 
1536 	efx_mcdi_rpc_async(channel->efx, MC_CMD_DRIVER_EVENT,
1537 			   inbuf, sizeof(inbuf), 0,
1538 			   efx_ef10_rx_defer_refill_complete, 0);
1539 }
1540 
1541 static void
1542 efx_ef10_rx_defer_refill_complete(struct efx_nic *efx, unsigned long cookie,
1543 				  int rc, efx_dword_t *outbuf,
1544 				  size_t outlen_actual)
1545 {
1546 	/* nothing to do */
1547 }
1548 
1549 static int efx_ef10_ev_probe(struct efx_channel *channel)
1550 {
1551 	return efx_nic_alloc_buffer(channel->efx, &channel->eventq.buf,
1552 				    (channel->eventq_mask + 1) *
1553 				    sizeof(efx_qword_t),
1554 				    GFP_KERNEL);
1555 }
1556 
1557 static int efx_ef10_ev_init(struct efx_channel *channel)
1558 {
1559 	MCDI_DECLARE_BUF(inbuf,
1560 			 MC_CMD_INIT_EVQ_IN_LEN(EFX_MAX_EVQ_SIZE * 8 /
1561 						EFX_BUF_SIZE));
1562 	MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_OUT_LEN);
1563 	size_t entries = channel->eventq.buf.len / EFX_BUF_SIZE;
1564 	struct efx_nic *efx = channel->efx;
1565 	struct efx_ef10_nic_data *nic_data;
1566 	bool supports_rx_merge;
1567 	size_t inlen, outlen;
1568 	dma_addr_t dma_addr;
1569 	int rc;
1570 	int i;
1571 
1572 	nic_data = efx->nic_data;
1573 	supports_rx_merge =
1574 		!!(nic_data->datapath_caps &
1575 		   1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN);
1576 
1577 	/* Fill event queue with all ones (i.e. empty events) */
1578 	memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len);
1579 
1580 	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_SIZE, channel->eventq_mask + 1);
1581 	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_INSTANCE, channel->channel);
1582 	/* INIT_EVQ expects index in vector table, not absolute */
1583 	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_IRQ_NUM, channel->channel);
1584 	MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS,
1585 			      INIT_EVQ_IN_FLAG_INTERRUPTING, 1,
1586 			      INIT_EVQ_IN_FLAG_RX_MERGE, 1,
1587 			      INIT_EVQ_IN_FLAG_TX_MERGE, 1,
1588 			      INIT_EVQ_IN_FLAG_CUT_THRU, !supports_rx_merge);
1589 	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_MODE,
1590 		       MC_CMD_INIT_EVQ_IN_TMR_MODE_DIS);
1591 	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_LOAD, 0);
1592 	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_RELOAD, 0);
1593 	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_MODE,
1594 		       MC_CMD_INIT_EVQ_IN_COUNT_MODE_DIS);
1595 	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_THRSHLD, 0);
1596 
1597 	dma_addr = channel->eventq.buf.dma_addr;
1598 	for (i = 0; i < entries; ++i) {
1599 		MCDI_SET_ARRAY_QWORD(inbuf, INIT_EVQ_IN_DMA_ADDR, i, dma_addr);
1600 		dma_addr += EFX_BUF_SIZE;
1601 	}
1602 
1603 	inlen = MC_CMD_INIT_EVQ_IN_LEN(entries);
1604 
1605 	rc = efx_mcdi_rpc(efx, MC_CMD_INIT_EVQ, inbuf, inlen,
1606 			  outbuf, sizeof(outbuf), &outlen);
1607 	if (rc)
1608 		goto fail;
1609 
1610 	/* IRQ return is ignored */
1611 
1612 	return 0;
1613 
1614 fail:
1615 	netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
1616 	return rc;
1617 }
1618 
1619 static void efx_ef10_ev_fini(struct efx_channel *channel)
1620 {
1621 	MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_EVQ_IN_LEN);
1622 	MCDI_DECLARE_BUF(outbuf, MC_CMD_FINI_EVQ_OUT_LEN);
1623 	struct efx_nic *efx = channel->efx;
1624 	size_t outlen;
1625 	int rc;
1626 
1627 	MCDI_SET_DWORD(inbuf, FINI_EVQ_IN_INSTANCE, channel->channel);
1628 
1629 	rc = efx_mcdi_rpc(efx, MC_CMD_FINI_EVQ, inbuf, sizeof(inbuf),
1630 			  outbuf, sizeof(outbuf), &outlen);
1631 
1632 	if (rc && rc != -EALREADY)
1633 		goto fail;
1634 
1635 	return;
1636 
1637 fail:
1638 	netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
1639 }
1640 
1641 static void efx_ef10_ev_remove(struct efx_channel *channel)
1642 {
1643 	efx_nic_free_buffer(channel->efx, &channel->eventq.buf);
1644 }
1645 
1646 static void efx_ef10_handle_rx_wrong_queue(struct efx_rx_queue *rx_queue,
1647 					   unsigned int rx_queue_label)
1648 {
1649 	struct efx_nic *efx = rx_queue->efx;
1650 
1651 	netif_info(efx, hw, efx->net_dev,
1652 		   "rx event arrived on queue %d labeled as queue %u\n",
1653 		   efx_rx_queue_index(rx_queue), rx_queue_label);
1654 
1655 	efx_schedule_reset(efx, RESET_TYPE_DISABLE);
1656 }
1657 
1658 static void
1659 efx_ef10_handle_rx_bad_lbits(struct efx_rx_queue *rx_queue,
1660 			     unsigned int actual, unsigned int expected)
1661 {
1662 	unsigned int dropped = (actual - expected) & rx_queue->ptr_mask;
1663 	struct efx_nic *efx = rx_queue->efx;
1664 
1665 	netif_info(efx, hw, efx->net_dev,
1666 		   "dropped %d events (index=%d expected=%d)\n",
1667 		   dropped, actual, expected);
1668 
1669 	efx_schedule_reset(efx, RESET_TYPE_DISABLE);
1670 }
1671 
1672 /* partially received RX was aborted. clean up. */
1673 static void efx_ef10_handle_rx_abort(struct efx_rx_queue *rx_queue)
1674 {
1675 	unsigned int rx_desc_ptr;
1676 
1677 	WARN_ON(rx_queue->scatter_n == 0);
1678 
1679 	netif_dbg(rx_queue->efx, hw, rx_queue->efx->net_dev,
1680 		  "scattered RX aborted (dropping %u buffers)\n",
1681 		  rx_queue->scatter_n);
1682 
1683 	rx_desc_ptr = rx_queue->removed_count & rx_queue->ptr_mask;
1684 
1685 	efx_rx_packet(rx_queue, rx_desc_ptr, rx_queue->scatter_n,
1686 		      0, EFX_RX_PKT_DISCARD);
1687 
1688 	rx_queue->removed_count += rx_queue->scatter_n;
1689 	rx_queue->scatter_n = 0;
1690 	rx_queue->scatter_len = 0;
1691 	++efx_rx_queue_channel(rx_queue)->n_rx_nodesc_trunc;
1692 }
1693 
1694 static int efx_ef10_handle_rx_event(struct efx_channel *channel,
1695 				    const efx_qword_t *event)
1696 {
1697 	unsigned int rx_bytes, next_ptr_lbits, rx_queue_label, rx_l4_class;
1698 	unsigned int n_descs, n_packets, i;
1699 	struct efx_nic *efx = channel->efx;
1700 	struct efx_rx_queue *rx_queue;
1701 	bool rx_cont;
1702 	u16 flags = 0;
1703 
1704 	if (unlikely(ACCESS_ONCE(efx->reset_pending)))
1705 		return 0;
1706 
1707 	/* Basic packet information */
1708 	rx_bytes = EFX_QWORD_FIELD(*event, ESF_DZ_RX_BYTES);
1709 	next_ptr_lbits = EFX_QWORD_FIELD(*event, ESF_DZ_RX_DSC_PTR_LBITS);
1710 	rx_queue_label = EFX_QWORD_FIELD(*event, ESF_DZ_RX_QLABEL);
1711 	rx_l4_class = EFX_QWORD_FIELD(*event, ESF_DZ_RX_L4_CLASS);
1712 	rx_cont = EFX_QWORD_FIELD(*event, ESF_DZ_RX_CONT);
1713 
1714 	WARN_ON(EFX_QWORD_FIELD(*event, ESF_DZ_RX_DROP_EVENT));
1715 
1716 	rx_queue = efx_channel_get_rx_queue(channel);
1717 
1718 	if (unlikely(rx_queue_label != efx_rx_queue_index(rx_queue)))
1719 		efx_ef10_handle_rx_wrong_queue(rx_queue, rx_queue_label);
1720 
1721 	n_descs = ((next_ptr_lbits - rx_queue->removed_count) &
1722 		   ((1 << ESF_DZ_RX_DSC_PTR_LBITS_WIDTH) - 1));
1723 
1724 	if (n_descs != rx_queue->scatter_n + 1) {
1725 		/* detect rx abort */
1726 		if (unlikely(n_descs == rx_queue->scatter_n)) {
1727 			WARN_ON(rx_bytes != 0);
1728 			efx_ef10_handle_rx_abort(rx_queue);
1729 			return 0;
1730 		}
1731 
1732 		if (unlikely(rx_queue->scatter_n != 0)) {
1733 			/* Scattered packet completions cannot be
1734 			 * merged, so something has gone wrong.
1735 			 */
1736 			efx_ef10_handle_rx_bad_lbits(
1737 				rx_queue, next_ptr_lbits,
1738 				(rx_queue->removed_count +
1739 				 rx_queue->scatter_n + 1) &
1740 				((1 << ESF_DZ_RX_DSC_PTR_LBITS_WIDTH) - 1));
1741 			return 0;
1742 		}
1743 
1744 		/* Merged completion for multiple non-scattered packets */
1745 		rx_queue->scatter_n = 1;
1746 		rx_queue->scatter_len = 0;
1747 		n_packets = n_descs;
1748 		++channel->n_rx_merge_events;
1749 		channel->n_rx_merge_packets += n_packets;
1750 		flags |= EFX_RX_PKT_PREFIX_LEN;
1751 	} else {
1752 		++rx_queue->scatter_n;
1753 		rx_queue->scatter_len += rx_bytes;
1754 		if (rx_cont)
1755 			return 0;
1756 		n_packets = 1;
1757 	}
1758 
1759 	if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_RX_ECRC_ERR)))
1760 		flags |= EFX_RX_PKT_DISCARD;
1761 
1762 	if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_RX_IPCKSUM_ERR))) {
1763 		channel->n_rx_ip_hdr_chksum_err += n_packets;
1764 	} else if (unlikely(EFX_QWORD_FIELD(*event,
1765 					    ESF_DZ_RX_TCPUDP_CKSUM_ERR))) {
1766 		channel->n_rx_tcp_udp_chksum_err += n_packets;
1767 	} else if (rx_l4_class == ESE_DZ_L4_CLASS_TCP ||
1768 		   rx_l4_class == ESE_DZ_L4_CLASS_UDP) {
1769 		flags |= EFX_RX_PKT_CSUMMED;
1770 	}
1771 
1772 	if (rx_l4_class == ESE_DZ_L4_CLASS_TCP)
1773 		flags |= EFX_RX_PKT_TCP;
1774 
1775 	channel->irq_mod_score += 2 * n_packets;
1776 
1777 	/* Handle received packet(s) */
1778 	for (i = 0; i < n_packets; i++) {
1779 		efx_rx_packet(rx_queue,
1780 			      rx_queue->removed_count & rx_queue->ptr_mask,
1781 			      rx_queue->scatter_n, rx_queue->scatter_len,
1782 			      flags);
1783 		rx_queue->removed_count += rx_queue->scatter_n;
1784 	}
1785 
1786 	rx_queue->scatter_n = 0;
1787 	rx_queue->scatter_len = 0;
1788 
1789 	return n_packets;
1790 }
1791 
1792 static int
1793 efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
1794 {
1795 	struct efx_nic *efx = channel->efx;
1796 	struct efx_tx_queue *tx_queue;
1797 	unsigned int tx_ev_desc_ptr;
1798 	unsigned int tx_ev_q_label;
1799 	int tx_descs = 0;
1800 
1801 	if (unlikely(ACCESS_ONCE(efx->reset_pending)))
1802 		return 0;
1803 
1804 	if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_TX_DROP_EVENT)))
1805 		return 0;
1806 
1807 	/* Transmit completion */
1808 	tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, ESF_DZ_TX_DESCR_INDX);
1809 	tx_ev_q_label = EFX_QWORD_FIELD(*event, ESF_DZ_TX_QLABEL);
1810 	tx_queue = efx_channel_get_tx_queue(channel,
1811 					    tx_ev_q_label % EFX_TXQ_TYPES);
1812 	tx_descs = ((tx_ev_desc_ptr + 1 - tx_queue->read_count) &
1813 		    tx_queue->ptr_mask);
1814 	efx_xmit_done(tx_queue, tx_ev_desc_ptr & tx_queue->ptr_mask);
1815 
1816 	return tx_descs;
1817 }
1818 
1819 static void
1820 efx_ef10_handle_driver_event(struct efx_channel *channel, efx_qword_t *event)
1821 {
1822 	struct efx_nic *efx = channel->efx;
1823 	int subcode;
1824 
1825 	subcode = EFX_QWORD_FIELD(*event, ESF_DZ_DRV_SUB_CODE);
1826 
1827 	switch (subcode) {
1828 	case ESE_DZ_DRV_TIMER_EV:
1829 	case ESE_DZ_DRV_WAKE_UP_EV:
1830 		break;
1831 	case ESE_DZ_DRV_START_UP_EV:
1832 		/* event queue init complete. ok. */
1833 		break;
1834 	default:
1835 		netif_err(efx, hw, efx->net_dev,
1836 			  "channel %d unknown driver event type %d"
1837 			  " (data " EFX_QWORD_FMT ")\n",
1838 			  channel->channel, subcode,
1839 			  EFX_QWORD_VAL(*event));
1840 
1841 	}
1842 }
1843 
1844 static void efx_ef10_handle_driver_generated_event(struct efx_channel *channel,
1845 						   efx_qword_t *event)
1846 {
1847 	struct efx_nic *efx = channel->efx;
1848 	u32 subcode;
1849 
1850 	subcode = EFX_QWORD_FIELD(*event, EFX_DWORD_0);
1851 
1852 	switch (subcode) {
1853 	case EFX_EF10_TEST:
1854 		channel->event_test_cpu = raw_smp_processor_id();
1855 		break;
1856 	case EFX_EF10_REFILL:
1857 		/* The queue must be empty, so we won't receive any rx
1858 		 * events, so efx_process_channel() won't refill the
1859 		 * queue. Refill it here
1860 		 */
1861 		efx_fast_push_rx_descriptors(&channel->rx_queue);
1862 		break;
1863 	default:
1864 		netif_err(efx, hw, efx->net_dev,
1865 			  "channel %d unknown driver event type %u"
1866 			  " (data " EFX_QWORD_FMT ")\n",
1867 			  channel->channel, (unsigned) subcode,
1868 			  EFX_QWORD_VAL(*event));
1869 	}
1870 }
1871 
1872 static int efx_ef10_ev_process(struct efx_channel *channel, int quota)
1873 {
1874 	struct efx_nic *efx = channel->efx;
1875 	efx_qword_t event, *p_event;
1876 	unsigned int read_ptr;
1877 	int ev_code;
1878 	int tx_descs = 0;
1879 	int spent = 0;
1880 
1881 	read_ptr = channel->eventq_read_ptr;
1882 
1883 	for (;;) {
1884 		p_event = efx_event(channel, read_ptr);
1885 		event = *p_event;
1886 
1887 		if (!efx_event_present(&event))
1888 			break;
1889 
1890 		EFX_SET_QWORD(*p_event);
1891 
1892 		++read_ptr;
1893 
1894 		ev_code = EFX_QWORD_FIELD(event, ESF_DZ_EV_CODE);
1895 
1896 		netif_vdbg(efx, drv, efx->net_dev,
1897 			   "processing event on %d " EFX_QWORD_FMT "\n",
1898 			   channel->channel, EFX_QWORD_VAL(event));
1899 
1900 		switch (ev_code) {
1901 		case ESE_DZ_EV_CODE_MCDI_EV:
1902 			efx_mcdi_process_event(channel, &event);
1903 			break;
1904 		case ESE_DZ_EV_CODE_RX_EV:
1905 			spent += efx_ef10_handle_rx_event(channel, &event);
1906 			if (spent >= quota) {
1907 				/* XXX can we split a merged event to
1908 				 * avoid going over-quota?
1909 				 */
1910 				spent = quota;
1911 				goto out;
1912 			}
1913 			break;
1914 		case ESE_DZ_EV_CODE_TX_EV:
1915 			tx_descs += efx_ef10_handle_tx_event(channel, &event);
1916 			if (tx_descs > efx->txq_entries) {
1917 				spent = quota;
1918 				goto out;
1919 			} else if (++spent == quota) {
1920 				goto out;
1921 			}
1922 			break;
1923 		case ESE_DZ_EV_CODE_DRIVER_EV:
1924 			efx_ef10_handle_driver_event(channel, &event);
1925 			if (++spent == quota)
1926 				goto out;
1927 			break;
1928 		case EFX_EF10_DRVGEN_EV:
1929 			efx_ef10_handle_driver_generated_event(channel, &event);
1930 			break;
1931 		default:
1932 			netif_err(efx, hw, efx->net_dev,
1933 				  "channel %d unknown event type %d"
1934 				  " (data " EFX_QWORD_FMT ")\n",
1935 				  channel->channel, ev_code,
1936 				  EFX_QWORD_VAL(event));
1937 		}
1938 	}
1939 
1940 out:
1941 	channel->eventq_read_ptr = read_ptr;
1942 	return spent;
1943 }
1944 
1945 static void efx_ef10_ev_read_ack(struct efx_channel *channel)
1946 {
1947 	struct efx_nic *efx = channel->efx;
1948 	efx_dword_t rptr;
1949 
1950 	if (EFX_EF10_WORKAROUND_35388(efx)) {
1951 		BUILD_BUG_ON(EFX_MIN_EVQ_SIZE <
1952 			     (1 << ERF_DD_EVQ_IND_RPTR_WIDTH));
1953 		BUILD_BUG_ON(EFX_MAX_EVQ_SIZE >
1954 			     (1 << 2 * ERF_DD_EVQ_IND_RPTR_WIDTH));
1955 
1956 		EFX_POPULATE_DWORD_2(rptr, ERF_DD_EVQ_IND_RPTR_FLAGS,
1957 				     EFE_DD_EVQ_IND_RPTR_FLAGS_HIGH,
1958 				     ERF_DD_EVQ_IND_RPTR,
1959 				     (channel->eventq_read_ptr &
1960 				      channel->eventq_mask) >>
1961 				     ERF_DD_EVQ_IND_RPTR_WIDTH);
1962 		efx_writed_page(efx, &rptr, ER_DD_EVQ_INDIRECT,
1963 				channel->channel);
1964 		EFX_POPULATE_DWORD_2(rptr, ERF_DD_EVQ_IND_RPTR_FLAGS,
1965 				     EFE_DD_EVQ_IND_RPTR_FLAGS_LOW,
1966 				     ERF_DD_EVQ_IND_RPTR,
1967 				     channel->eventq_read_ptr &
1968 				     ((1 << ERF_DD_EVQ_IND_RPTR_WIDTH) - 1));
1969 		efx_writed_page(efx, &rptr, ER_DD_EVQ_INDIRECT,
1970 				channel->channel);
1971 	} else {
1972 		EFX_POPULATE_DWORD_1(rptr, ERF_DZ_EVQ_RPTR,
1973 				     channel->eventq_read_ptr &
1974 				     channel->eventq_mask);
1975 		efx_writed_page(efx, &rptr, ER_DZ_EVQ_RPTR, channel->channel);
1976 	}
1977 }
1978 
1979 static void efx_ef10_ev_test_generate(struct efx_channel *channel)
1980 {
1981 	MCDI_DECLARE_BUF(inbuf, MC_CMD_DRIVER_EVENT_IN_LEN);
1982 	struct efx_nic *efx = channel->efx;
1983 	efx_qword_t event;
1984 	int rc;
1985 
1986 	EFX_POPULATE_QWORD_2(event,
1987 			     ESF_DZ_EV_CODE, EFX_EF10_DRVGEN_EV,
1988 			     ESF_DZ_EV_DATA, EFX_EF10_TEST);
1989 
1990 	MCDI_SET_DWORD(inbuf, DRIVER_EVENT_IN_EVQ, channel->channel);
1991 
1992 	/* MCDI_SET_QWORD is not appropriate here since EFX_POPULATE_* has
1993 	 * already swapped the data to little-endian order.
1994 	 */
1995 	memcpy(MCDI_PTR(inbuf, DRIVER_EVENT_IN_DATA), &event.u64[0],
1996 	       sizeof(efx_qword_t));
1997 
1998 	rc = efx_mcdi_rpc(efx, MC_CMD_DRIVER_EVENT, inbuf, sizeof(inbuf),
1999 			  NULL, 0, NULL);
2000 	if (rc != 0)
2001 		goto fail;
2002 
2003 	return;
2004 
2005 fail:
2006 	WARN_ON(true);
2007 	netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
2008 }
2009 
2010 void efx_ef10_handle_drain_event(struct efx_nic *efx)
2011 {
2012 	if (atomic_dec_and_test(&efx->active_queues))
2013 		wake_up(&efx->flush_wq);
2014 
2015 	WARN_ON(atomic_read(&efx->active_queues) < 0);
2016 }
2017 
2018 static int efx_ef10_fini_dmaq(struct efx_nic *efx)
2019 {
2020 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
2021 	struct efx_channel *channel;
2022 	struct efx_tx_queue *tx_queue;
2023 	struct efx_rx_queue *rx_queue;
2024 	int pending;
2025 
2026 	/* If the MC has just rebooted, the TX/RX queues will have already been
2027 	 * torn down, but efx->active_queues needs to be set to zero.
2028 	 */
2029 	if (nic_data->must_realloc_vis) {
2030 		atomic_set(&efx->active_queues, 0);
2031 		return 0;
2032 	}
2033 
2034 	/* Do not attempt to write to the NIC during EEH recovery */
2035 	if (efx->state != STATE_RECOVERY) {
2036 		efx_for_each_channel(channel, efx) {
2037 			efx_for_each_channel_rx_queue(rx_queue, channel)
2038 				efx_ef10_rx_fini(rx_queue);
2039 			efx_for_each_channel_tx_queue(tx_queue, channel)
2040 				efx_ef10_tx_fini(tx_queue);
2041 		}
2042 
2043 		wait_event_timeout(efx->flush_wq,
2044 				   atomic_read(&efx->active_queues) == 0,
2045 				   msecs_to_jiffies(EFX_MAX_FLUSH_TIME));
2046 		pending = atomic_read(&efx->active_queues);
2047 		if (pending) {
2048 			netif_err(efx, hw, efx->net_dev, "failed to flush %d queues\n",
2049 				  pending);
2050 			return -ETIMEDOUT;
2051 		}
2052 	}
2053 
2054 	return 0;
2055 }
2056 
2057 static bool efx_ef10_filter_equal(const struct efx_filter_spec *left,
2058 				  const struct efx_filter_spec *right)
2059 {
2060 	if ((left->match_flags ^ right->match_flags) |
2061 	    ((left->flags ^ right->flags) &
2062 	     (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)))
2063 		return false;
2064 
2065 	return memcmp(&left->outer_vid, &right->outer_vid,
2066 		      sizeof(struct efx_filter_spec) -
2067 		      offsetof(struct efx_filter_spec, outer_vid)) == 0;
2068 }
2069 
2070 static unsigned int efx_ef10_filter_hash(const struct efx_filter_spec *spec)
2071 {
2072 	BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3);
2073 	return jhash2((const u32 *)&spec->outer_vid,
2074 		      (sizeof(struct efx_filter_spec) -
2075 		       offsetof(struct efx_filter_spec, outer_vid)) / 4,
2076 		      0);
2077 	/* XXX should we randomise the initval? */
2078 }
2079 
2080 /* Decide whether a filter should be exclusive or else should allow
2081  * delivery to additional recipients.  Currently we decide that
2082  * filters for specific local unicast MAC and IP addresses are
2083  * exclusive.
2084  */
2085 static bool efx_ef10_filter_is_exclusive(const struct efx_filter_spec *spec)
2086 {
2087 	if (spec->match_flags & EFX_FILTER_MATCH_LOC_MAC &&
2088 	    !is_multicast_ether_addr(spec->loc_mac))
2089 		return true;
2090 
2091 	if ((spec->match_flags &
2092 	     (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) ==
2093 	    (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) {
2094 		if (spec->ether_type == htons(ETH_P_IP) &&
2095 		    !ipv4_is_multicast(spec->loc_host[0]))
2096 			return true;
2097 		if (spec->ether_type == htons(ETH_P_IPV6) &&
2098 		    ((const u8 *)spec->loc_host)[0] != 0xff)
2099 			return true;
2100 	}
2101 
2102 	return false;
2103 }
2104 
2105 static struct efx_filter_spec *
2106 efx_ef10_filter_entry_spec(const struct efx_ef10_filter_table *table,
2107 			   unsigned int filter_idx)
2108 {
2109 	return (struct efx_filter_spec *)(table->entry[filter_idx].spec &
2110 					  ~EFX_EF10_FILTER_FLAGS);
2111 }
2112 
2113 static unsigned int
2114 efx_ef10_filter_entry_flags(const struct efx_ef10_filter_table *table,
2115 			   unsigned int filter_idx)
2116 {
2117 	return table->entry[filter_idx].spec & EFX_EF10_FILTER_FLAGS;
2118 }
2119 
2120 static void
2121 efx_ef10_filter_set_entry(struct efx_ef10_filter_table *table,
2122 			  unsigned int filter_idx,
2123 			  const struct efx_filter_spec *spec,
2124 			  unsigned int flags)
2125 {
2126 	table->entry[filter_idx].spec =	(unsigned long)spec | flags;
2127 }
2128 
2129 static void efx_ef10_filter_push_prep(struct efx_nic *efx,
2130 				      const struct efx_filter_spec *spec,
2131 				      efx_dword_t *inbuf, u64 handle,
2132 				      bool replacing)
2133 {
2134 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
2135 
2136 	memset(inbuf, 0, MC_CMD_FILTER_OP_IN_LEN);
2137 
2138 	if (replacing) {
2139 		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
2140 			       MC_CMD_FILTER_OP_IN_OP_REPLACE);
2141 		MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, handle);
2142 	} else {
2143 		u32 match_fields = 0;
2144 
2145 		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
2146 			       efx_ef10_filter_is_exclusive(spec) ?
2147 			       MC_CMD_FILTER_OP_IN_OP_INSERT :
2148 			       MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE);
2149 
2150 		/* Convert match flags and values.  Unlike almost
2151 		 * everything else in MCDI, these fields are in
2152 		 * network byte order.
2153 		 */
2154 		if (spec->match_flags & EFX_FILTER_MATCH_LOC_MAC_IG)
2155 			match_fields |=
2156 				is_multicast_ether_addr(spec->loc_mac) ?
2157 				1 << MC_CMD_FILTER_OP_IN_MATCH_UNKNOWN_MCAST_DST_LBN :
2158 				1 << MC_CMD_FILTER_OP_IN_MATCH_UNKNOWN_UCAST_DST_LBN;
2159 #define COPY_FIELD(gen_flag, gen_field, mcdi_field)			     \
2160 		if (spec->match_flags & EFX_FILTER_MATCH_ ## gen_flag) {     \
2161 			match_fields |=					     \
2162 				1 << MC_CMD_FILTER_OP_IN_MATCH_ ##	     \
2163 				mcdi_field ## _LBN;			     \
2164 			BUILD_BUG_ON(					     \
2165 				MC_CMD_FILTER_OP_IN_ ## mcdi_field ## _LEN < \
2166 				sizeof(spec->gen_field));		     \
2167 			memcpy(MCDI_PTR(inbuf, FILTER_OP_IN_ ##	mcdi_field), \
2168 			       &spec->gen_field, sizeof(spec->gen_field));   \
2169 		}
2170 		COPY_FIELD(REM_HOST, rem_host, SRC_IP);
2171 		COPY_FIELD(LOC_HOST, loc_host, DST_IP);
2172 		COPY_FIELD(REM_MAC, rem_mac, SRC_MAC);
2173 		COPY_FIELD(REM_PORT, rem_port, SRC_PORT);
2174 		COPY_FIELD(LOC_MAC, loc_mac, DST_MAC);
2175 		COPY_FIELD(LOC_PORT, loc_port, DST_PORT);
2176 		COPY_FIELD(ETHER_TYPE, ether_type, ETHER_TYPE);
2177 		COPY_FIELD(INNER_VID, inner_vid, INNER_VLAN);
2178 		COPY_FIELD(OUTER_VID, outer_vid, OUTER_VLAN);
2179 		COPY_FIELD(IP_PROTO, ip_proto, IP_PROTO);
2180 #undef COPY_FIELD
2181 		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_MATCH_FIELDS,
2182 			       match_fields);
2183 	}
2184 
2185 	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_PORT_ID, EVB_PORT_ID_ASSIGNED);
2186 	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_DEST,
2187 		       spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ?
2188 		       MC_CMD_FILTER_OP_IN_RX_DEST_DROP :
2189 		       MC_CMD_FILTER_OP_IN_RX_DEST_HOST);
2190 	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DEST,
2191 		       MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT);
2192 	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_QUEUE, spec->dmaq_id);
2193 	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_MODE,
2194 		       (spec->flags & EFX_FILTER_FLAG_RX_RSS) ?
2195 		       MC_CMD_FILTER_OP_IN_RX_MODE_RSS :
2196 		       MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE);
2197 	if (spec->flags & EFX_FILTER_FLAG_RX_RSS)
2198 		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_CONTEXT,
2199 			       spec->rss_context !=
2200 			       EFX_FILTER_RSS_CONTEXT_DEFAULT ?
2201 			       spec->rss_context : nic_data->rx_rss_context);
2202 }
2203 
2204 static int efx_ef10_filter_push(struct efx_nic *efx,
2205 				const struct efx_filter_spec *spec,
2206 				u64 *handle, bool replacing)
2207 {
2208 	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
2209 	MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_OUT_LEN);
2210 	int rc;
2211 
2212 	efx_ef10_filter_push_prep(efx, spec, inbuf, *handle, replacing);
2213 	rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
2214 			  outbuf, sizeof(outbuf), NULL);
2215 	if (rc == 0)
2216 		*handle = MCDI_QWORD(outbuf, FILTER_OP_OUT_HANDLE);
2217 	return rc;
2218 }
2219 
2220 static int efx_ef10_filter_rx_match_pri(struct efx_ef10_filter_table *table,
2221 					enum efx_filter_match_flags match_flags)
2222 {
2223 	unsigned int match_pri;
2224 
2225 	for (match_pri = 0;
2226 	     match_pri < table->rx_match_count;
2227 	     match_pri++)
2228 		if (table->rx_match_flags[match_pri] == match_flags)
2229 			return match_pri;
2230 
2231 	return -EPROTONOSUPPORT;
2232 }
2233 
2234 static s32 efx_ef10_filter_insert(struct efx_nic *efx,
2235 				  struct efx_filter_spec *spec,
2236 				  bool replace_equal)
2237 {
2238 	struct efx_ef10_filter_table *table = efx->filter_state;
2239 	DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
2240 	struct efx_filter_spec *saved_spec;
2241 	unsigned int match_pri, hash;
2242 	unsigned int priv_flags;
2243 	bool replacing = false;
2244 	int ins_index = -1;
2245 	DEFINE_WAIT(wait);
2246 	bool is_mc_recip;
2247 	s32 rc;
2248 
2249 	/* For now, only support RX filters */
2250 	if ((spec->flags & (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)) !=
2251 	    EFX_FILTER_FLAG_RX)
2252 		return -EINVAL;
2253 
2254 	rc = efx_ef10_filter_rx_match_pri(table, spec->match_flags);
2255 	if (rc < 0)
2256 		return rc;
2257 	match_pri = rc;
2258 
2259 	hash = efx_ef10_filter_hash(spec);
2260 	is_mc_recip = efx_filter_is_mc_recipient(spec);
2261 	if (is_mc_recip)
2262 		bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
2263 
2264 	/* Find any existing filters with the same match tuple or
2265 	 * else a free slot to insert at.  If any of them are busy,
2266 	 * we have to wait and retry.
2267 	 */
2268 	for (;;) {
2269 		unsigned int depth = 1;
2270 		unsigned int i;
2271 
2272 		spin_lock_bh(&efx->filter_lock);
2273 
2274 		for (;;) {
2275 			i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1);
2276 			saved_spec = efx_ef10_filter_entry_spec(table, i);
2277 
2278 			if (!saved_spec) {
2279 				if (ins_index < 0)
2280 					ins_index = i;
2281 			} else if (efx_ef10_filter_equal(spec, saved_spec)) {
2282 				if (table->entry[i].spec &
2283 				    EFX_EF10_FILTER_FLAG_BUSY)
2284 					break;
2285 				if (spec->priority < saved_spec->priority &&
2286 				    !(saved_spec->priority ==
2287 				      EFX_FILTER_PRI_REQUIRED &&
2288 				      saved_spec->flags &
2289 				      EFX_FILTER_FLAG_RX_STACK)) {
2290 					rc = -EPERM;
2291 					goto out_unlock;
2292 				}
2293 				if (!is_mc_recip) {
2294 					/* This is the only one */
2295 					if (spec->priority ==
2296 					    saved_spec->priority &&
2297 					    !replace_equal) {
2298 						rc = -EEXIST;
2299 						goto out_unlock;
2300 					}
2301 					ins_index = i;
2302 					goto found;
2303 				} else if (spec->priority >
2304 					   saved_spec->priority ||
2305 					   (spec->priority ==
2306 					    saved_spec->priority &&
2307 					    replace_equal)) {
2308 					if (ins_index < 0)
2309 						ins_index = i;
2310 					else
2311 						__set_bit(depth, mc_rem_map);
2312 				}
2313 			}
2314 
2315 			/* Once we reach the maximum search depth, use
2316 			 * the first suitable slot or return -EBUSY if
2317 			 * there was none
2318 			 */
2319 			if (depth == EFX_EF10_FILTER_SEARCH_LIMIT) {
2320 				if (ins_index < 0) {
2321 					rc = -EBUSY;
2322 					goto out_unlock;
2323 				}
2324 				goto found;
2325 			}
2326 
2327 			++depth;
2328 		}
2329 
2330 		prepare_to_wait(&table->waitq, &wait, TASK_UNINTERRUPTIBLE);
2331 		spin_unlock_bh(&efx->filter_lock);
2332 		schedule();
2333 	}
2334 
2335 found:
2336 	/* Create a software table entry if necessary, and mark it
2337 	 * busy.  We might yet fail to insert, but any attempt to
2338 	 * insert a conflicting filter while we're waiting for the
2339 	 * firmware must find the busy entry.
2340 	 */
2341 	saved_spec = efx_ef10_filter_entry_spec(table, ins_index);
2342 	if (saved_spec) {
2343 		if (spec->flags & EFX_FILTER_FLAG_RX_STACK) {
2344 			/* Just make sure it won't be removed */
2345 			saved_spec->flags |= EFX_FILTER_FLAG_RX_STACK;
2346 			table->entry[ins_index].spec &=
2347 				~EFX_EF10_FILTER_FLAG_STACK_OLD;
2348 			rc = ins_index;
2349 			goto out_unlock;
2350 		}
2351 		replacing = true;
2352 		priv_flags = efx_ef10_filter_entry_flags(table, ins_index);
2353 	} else {
2354 		saved_spec = kmalloc(sizeof(*spec), GFP_ATOMIC);
2355 		if (!saved_spec) {
2356 			rc = -ENOMEM;
2357 			goto out_unlock;
2358 		}
2359 		*saved_spec = *spec;
2360 		priv_flags = 0;
2361 	}
2362 	efx_ef10_filter_set_entry(table, ins_index, saved_spec,
2363 				  priv_flags | EFX_EF10_FILTER_FLAG_BUSY);
2364 
2365 	/* Mark lower-priority multicast recipients busy prior to removal */
2366 	if (is_mc_recip) {
2367 		unsigned int depth, i;
2368 
2369 		for (depth = 0; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) {
2370 			i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1);
2371 			if (test_bit(depth, mc_rem_map))
2372 				table->entry[i].spec |=
2373 					EFX_EF10_FILTER_FLAG_BUSY;
2374 		}
2375 	}
2376 
2377 	spin_unlock_bh(&efx->filter_lock);
2378 
2379 	rc = efx_ef10_filter_push(efx, spec, &table->entry[ins_index].handle,
2380 				  replacing);
2381 
2382 	/* Finalise the software table entry */
2383 	spin_lock_bh(&efx->filter_lock);
2384 	if (rc == 0) {
2385 		if (replacing) {
2386 			/* Update the fields that may differ */
2387 			saved_spec->priority = spec->priority;
2388 			saved_spec->flags &= EFX_FILTER_FLAG_RX_STACK;
2389 			saved_spec->flags |= spec->flags;
2390 			saved_spec->rss_context = spec->rss_context;
2391 			saved_spec->dmaq_id = spec->dmaq_id;
2392 		}
2393 	} else if (!replacing) {
2394 		kfree(saved_spec);
2395 		saved_spec = NULL;
2396 	}
2397 	efx_ef10_filter_set_entry(table, ins_index, saved_spec, priv_flags);
2398 
2399 	/* Remove and finalise entries for lower-priority multicast
2400 	 * recipients
2401 	 */
2402 	if (is_mc_recip) {
2403 		MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
2404 		unsigned int depth, i;
2405 
2406 		memset(inbuf, 0, sizeof(inbuf));
2407 
2408 		for (depth = 0; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) {
2409 			if (!test_bit(depth, mc_rem_map))
2410 				continue;
2411 
2412 			i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1);
2413 			saved_spec = efx_ef10_filter_entry_spec(table, i);
2414 			priv_flags = efx_ef10_filter_entry_flags(table, i);
2415 
2416 			if (rc == 0) {
2417 				spin_unlock_bh(&efx->filter_lock);
2418 				MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
2419 					       MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
2420 				MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
2421 					       table->entry[i].handle);
2422 				rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP,
2423 						  inbuf, sizeof(inbuf),
2424 						  NULL, 0, NULL);
2425 				spin_lock_bh(&efx->filter_lock);
2426 			}
2427 
2428 			if (rc == 0) {
2429 				kfree(saved_spec);
2430 				saved_spec = NULL;
2431 				priv_flags = 0;
2432 			} else {
2433 				priv_flags &= ~EFX_EF10_FILTER_FLAG_BUSY;
2434 			}
2435 			efx_ef10_filter_set_entry(table, i, saved_spec,
2436 						  priv_flags);
2437 		}
2438 	}
2439 
2440 	/* If successful, return the inserted filter ID */
2441 	if (rc == 0)
2442 		rc = match_pri * HUNT_FILTER_TBL_ROWS + ins_index;
2443 
2444 	wake_up_all(&table->waitq);
2445 out_unlock:
2446 	spin_unlock_bh(&efx->filter_lock);
2447 	finish_wait(&table->waitq, &wait);
2448 	return rc;
2449 }
2450 
2451 static void efx_ef10_filter_update_rx_scatter(struct efx_nic *efx)
2452 {
2453 	/* no need to do anything here on EF10 */
2454 }
2455 
2456 /* Remove a filter.
2457  * If !stack_requested, remove by ID
2458  * If stack_requested, remove by index
2459  * Filter ID may come from userland and must be range-checked.
2460  */
2461 static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
2462 					   enum efx_filter_priority priority,
2463 					   u32 filter_id, bool stack_requested)
2464 {
2465 	unsigned int filter_idx = filter_id % HUNT_FILTER_TBL_ROWS;
2466 	struct efx_ef10_filter_table *table = efx->filter_state;
2467 	MCDI_DECLARE_BUF(inbuf,
2468 			 MC_CMD_FILTER_OP_IN_HANDLE_OFST +
2469 			 MC_CMD_FILTER_OP_IN_HANDLE_LEN);
2470 	struct efx_filter_spec *spec;
2471 	DEFINE_WAIT(wait);
2472 	int rc;
2473 
2474 	/* Find the software table entry and mark it busy.  Don't
2475 	 * remove it yet; any attempt to update while we're waiting
2476 	 * for the firmware must find the busy entry.
2477 	 */
2478 	for (;;) {
2479 		spin_lock_bh(&efx->filter_lock);
2480 		if (!(table->entry[filter_idx].spec &
2481 		      EFX_EF10_FILTER_FLAG_BUSY))
2482 			break;
2483 		prepare_to_wait(&table->waitq, &wait, TASK_UNINTERRUPTIBLE);
2484 		spin_unlock_bh(&efx->filter_lock);
2485 		schedule();
2486 	}
2487 	spec = efx_ef10_filter_entry_spec(table, filter_idx);
2488 	if (!spec || spec->priority > priority ||
2489 	    (!stack_requested &&
2490 	     efx_ef10_filter_rx_match_pri(table, spec->match_flags) !=
2491 	     filter_id / HUNT_FILTER_TBL_ROWS)) {
2492 		rc = -ENOENT;
2493 		goto out_unlock;
2494 	}
2495 	table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY;
2496 	spin_unlock_bh(&efx->filter_lock);
2497 
2498 	if (spec->flags & EFX_FILTER_FLAG_RX_STACK && !stack_requested) {
2499 		/* Reset steering of a stack-owned filter */
2500 
2501 		struct efx_filter_spec new_spec = *spec;
2502 
2503 		new_spec.priority = EFX_FILTER_PRI_REQUIRED;
2504 		new_spec.flags = (EFX_FILTER_FLAG_RX |
2505 				  EFX_FILTER_FLAG_RX_RSS |
2506 				  EFX_FILTER_FLAG_RX_STACK);
2507 		new_spec.dmaq_id = 0;
2508 		new_spec.rss_context = EFX_FILTER_RSS_CONTEXT_DEFAULT;
2509 		rc = efx_ef10_filter_push(efx, &new_spec,
2510 					  &table->entry[filter_idx].handle,
2511 					  true);
2512 
2513 		spin_lock_bh(&efx->filter_lock);
2514 		if (rc == 0)
2515 			*spec = new_spec;
2516 	} else {
2517 		/* Really remove the filter */
2518 
2519 		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
2520 			       efx_ef10_filter_is_exclusive(spec) ?
2521 			       MC_CMD_FILTER_OP_IN_OP_REMOVE :
2522 			       MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
2523 		MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
2524 			       table->entry[filter_idx].handle);
2525 		rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP,
2526 				  inbuf, sizeof(inbuf), NULL, 0, NULL);
2527 
2528 		spin_lock_bh(&efx->filter_lock);
2529 		if (rc == 0) {
2530 			kfree(spec);
2531 			efx_ef10_filter_set_entry(table, filter_idx, NULL, 0);
2532 		}
2533 	}
2534 	table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_BUSY;
2535 	wake_up_all(&table->waitq);
2536 out_unlock:
2537 	spin_unlock_bh(&efx->filter_lock);
2538 	finish_wait(&table->waitq, &wait);
2539 	return rc;
2540 }
2541 
2542 static int efx_ef10_filter_remove_safe(struct efx_nic *efx,
2543 				       enum efx_filter_priority priority,
2544 				       u32 filter_id)
2545 {
2546 	return efx_ef10_filter_remove_internal(efx, priority, filter_id, false);
2547 }
2548 
2549 static int efx_ef10_filter_get_safe(struct efx_nic *efx,
2550 				    enum efx_filter_priority priority,
2551 				    u32 filter_id, struct efx_filter_spec *spec)
2552 {
2553 	unsigned int filter_idx = filter_id % HUNT_FILTER_TBL_ROWS;
2554 	struct efx_ef10_filter_table *table = efx->filter_state;
2555 	const struct efx_filter_spec *saved_spec;
2556 	int rc;
2557 
2558 	spin_lock_bh(&efx->filter_lock);
2559 	saved_spec = efx_ef10_filter_entry_spec(table, filter_idx);
2560 	if (saved_spec && saved_spec->priority == priority &&
2561 	    efx_ef10_filter_rx_match_pri(table, saved_spec->match_flags) ==
2562 	    filter_id / HUNT_FILTER_TBL_ROWS) {
2563 		*spec = *saved_spec;
2564 		rc = 0;
2565 	} else {
2566 		rc = -ENOENT;
2567 	}
2568 	spin_unlock_bh(&efx->filter_lock);
2569 	return rc;
2570 }
2571 
2572 static void efx_ef10_filter_clear_rx(struct efx_nic *efx,
2573 				     enum efx_filter_priority priority)
2574 {
2575 	/* TODO */
2576 }
2577 
2578 static u32 efx_ef10_filter_count_rx_used(struct efx_nic *efx,
2579 					 enum efx_filter_priority priority)
2580 {
2581 	struct efx_ef10_filter_table *table = efx->filter_state;
2582 	unsigned int filter_idx;
2583 	s32 count = 0;
2584 
2585 	spin_lock_bh(&efx->filter_lock);
2586 	for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
2587 		if (table->entry[filter_idx].spec &&
2588 		    efx_ef10_filter_entry_spec(table, filter_idx)->priority ==
2589 		    priority)
2590 			++count;
2591 	}
2592 	spin_unlock_bh(&efx->filter_lock);
2593 	return count;
2594 }
2595 
2596 static u32 efx_ef10_filter_get_rx_id_limit(struct efx_nic *efx)
2597 {
2598 	struct efx_ef10_filter_table *table = efx->filter_state;
2599 
2600 	return table->rx_match_count * HUNT_FILTER_TBL_ROWS;
2601 }
2602 
2603 static s32 efx_ef10_filter_get_rx_ids(struct efx_nic *efx,
2604 				      enum efx_filter_priority priority,
2605 				      u32 *buf, u32 size)
2606 {
2607 	struct efx_ef10_filter_table *table = efx->filter_state;
2608 	struct efx_filter_spec *spec;
2609 	unsigned int filter_idx;
2610 	s32 count = 0;
2611 
2612 	spin_lock_bh(&efx->filter_lock);
2613 	for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
2614 		spec = efx_ef10_filter_entry_spec(table, filter_idx);
2615 		if (spec && spec->priority == priority) {
2616 			if (count == size) {
2617 				count = -EMSGSIZE;
2618 				break;
2619 			}
2620 			buf[count++] = (efx_ef10_filter_rx_match_pri(
2621 						table, spec->match_flags) *
2622 					HUNT_FILTER_TBL_ROWS +
2623 					filter_idx);
2624 		}
2625 	}
2626 	spin_unlock_bh(&efx->filter_lock);
2627 	return count;
2628 }
2629 
2630 #ifdef CONFIG_RFS_ACCEL
2631 
2632 static efx_mcdi_async_completer efx_ef10_filter_rfs_insert_complete;
2633 
2634 static s32 efx_ef10_filter_rfs_insert(struct efx_nic *efx,
2635 				      struct efx_filter_spec *spec)
2636 {
2637 	struct efx_ef10_filter_table *table = efx->filter_state;
2638 	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
2639 	struct efx_filter_spec *saved_spec;
2640 	unsigned int hash, i, depth = 1;
2641 	bool replacing = false;
2642 	int ins_index = -1;
2643 	u64 cookie;
2644 	s32 rc;
2645 
2646 	/* Must be an RX filter without RSS and not for a multicast
2647 	 * destination address (RFS only works for connected sockets).
2648 	 * These restrictions allow us to pass only a tiny amount of
2649 	 * data through to the completion function.
2650 	 */
2651 	EFX_WARN_ON_PARANOID(spec->flags !=
2652 			     (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_RX_SCATTER));
2653 	EFX_WARN_ON_PARANOID(spec->priority != EFX_FILTER_PRI_HINT);
2654 	EFX_WARN_ON_PARANOID(efx_filter_is_mc_recipient(spec));
2655 
2656 	hash = efx_ef10_filter_hash(spec);
2657 
2658 	spin_lock_bh(&efx->filter_lock);
2659 
2660 	/* Find any existing filter with the same match tuple or else
2661 	 * a free slot to insert at.  If an existing filter is busy,
2662 	 * we have to give up.
2663 	 */
2664 	for (;;) {
2665 		i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1);
2666 		saved_spec = efx_ef10_filter_entry_spec(table, i);
2667 
2668 		if (!saved_spec) {
2669 			if (ins_index < 0)
2670 				ins_index = i;
2671 		} else if (efx_ef10_filter_equal(spec, saved_spec)) {
2672 			if (table->entry[i].spec & EFX_EF10_FILTER_FLAG_BUSY) {
2673 				rc = -EBUSY;
2674 				goto fail_unlock;
2675 			}
2676 			EFX_WARN_ON_PARANOID(saved_spec->flags &
2677 					     EFX_FILTER_FLAG_RX_STACK);
2678 			if (spec->priority < saved_spec->priority) {
2679 				rc = -EPERM;
2680 				goto fail_unlock;
2681 			}
2682 			ins_index = i;
2683 			break;
2684 		}
2685 
2686 		/* Once we reach the maximum search depth, use the
2687 		 * first suitable slot or return -EBUSY if there was
2688 		 * none
2689 		 */
2690 		if (depth == EFX_EF10_FILTER_SEARCH_LIMIT) {
2691 			if (ins_index < 0) {
2692 				rc = -EBUSY;
2693 				goto fail_unlock;
2694 			}
2695 			break;
2696 		}
2697 
2698 		++depth;
2699 	}
2700 
2701 	/* Create a software table entry if necessary, and mark it
2702 	 * busy.  We might yet fail to insert, but any attempt to
2703 	 * insert a conflicting filter while we're waiting for the
2704 	 * firmware must find the busy entry.
2705 	 */
2706 	saved_spec = efx_ef10_filter_entry_spec(table, ins_index);
2707 	if (saved_spec) {
2708 		replacing = true;
2709 	} else {
2710 		saved_spec = kmalloc(sizeof(*spec), GFP_ATOMIC);
2711 		if (!saved_spec) {
2712 			rc = -ENOMEM;
2713 			goto fail_unlock;
2714 		}
2715 		*saved_spec = *spec;
2716 	}
2717 	efx_ef10_filter_set_entry(table, ins_index, saved_spec,
2718 				  EFX_EF10_FILTER_FLAG_BUSY);
2719 
2720 	spin_unlock_bh(&efx->filter_lock);
2721 
2722 	/* Pack up the variables needed on completion */
2723 	cookie = replacing << 31 | ins_index << 16 | spec->dmaq_id;
2724 
2725 	efx_ef10_filter_push_prep(efx, spec, inbuf,
2726 				  table->entry[ins_index].handle, replacing);
2727 	efx_mcdi_rpc_async(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
2728 			   MC_CMD_FILTER_OP_OUT_LEN,
2729 			   efx_ef10_filter_rfs_insert_complete, cookie);
2730 
2731 	return ins_index;
2732 
2733 fail_unlock:
2734 	spin_unlock_bh(&efx->filter_lock);
2735 	return rc;
2736 }
2737 
2738 static void
2739 efx_ef10_filter_rfs_insert_complete(struct efx_nic *efx, unsigned long cookie,
2740 				    int rc, efx_dword_t *outbuf,
2741 				    size_t outlen_actual)
2742 {
2743 	struct efx_ef10_filter_table *table = efx->filter_state;
2744 	unsigned int ins_index, dmaq_id;
2745 	struct efx_filter_spec *spec;
2746 	bool replacing;
2747 
2748 	/* Unpack the cookie */
2749 	replacing = cookie >> 31;
2750 	ins_index = (cookie >> 16) & (HUNT_FILTER_TBL_ROWS - 1);
2751 	dmaq_id = cookie & 0xffff;
2752 
2753 	spin_lock_bh(&efx->filter_lock);
2754 	spec = efx_ef10_filter_entry_spec(table, ins_index);
2755 	if (rc == 0) {
2756 		table->entry[ins_index].handle =
2757 			MCDI_QWORD(outbuf, FILTER_OP_OUT_HANDLE);
2758 		if (replacing)
2759 			spec->dmaq_id = dmaq_id;
2760 	} else if (!replacing) {
2761 		kfree(spec);
2762 		spec = NULL;
2763 	}
2764 	efx_ef10_filter_set_entry(table, ins_index, spec, 0);
2765 	spin_unlock_bh(&efx->filter_lock);
2766 
2767 	wake_up_all(&table->waitq);
2768 }
2769 
2770 static void
2771 efx_ef10_filter_rfs_expire_complete(struct efx_nic *efx,
2772 				    unsigned long filter_idx,
2773 				    int rc, efx_dword_t *outbuf,
2774 				    size_t outlen_actual);
2775 
2776 static bool efx_ef10_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
2777 					   unsigned int filter_idx)
2778 {
2779 	struct efx_ef10_filter_table *table = efx->filter_state;
2780 	struct efx_filter_spec *spec =
2781 		efx_ef10_filter_entry_spec(table, filter_idx);
2782 	MCDI_DECLARE_BUF(inbuf,
2783 			 MC_CMD_FILTER_OP_IN_HANDLE_OFST +
2784 			 MC_CMD_FILTER_OP_IN_HANDLE_LEN);
2785 
2786 	if (!spec ||
2787 	    (table->entry[filter_idx].spec & EFX_EF10_FILTER_FLAG_BUSY) ||
2788 	    spec->priority != EFX_FILTER_PRI_HINT ||
2789 	    !rps_may_expire_flow(efx->net_dev, spec->dmaq_id,
2790 				 flow_id, filter_idx))
2791 		return false;
2792 
2793 	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
2794 		       MC_CMD_FILTER_OP_IN_OP_REMOVE);
2795 	MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
2796 		       table->entry[filter_idx].handle);
2797 	if (efx_mcdi_rpc_async(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), 0,
2798 			       efx_ef10_filter_rfs_expire_complete, filter_idx))
2799 		return false;
2800 
2801 	table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY;
2802 	return true;
2803 }
2804 
2805 static void
2806 efx_ef10_filter_rfs_expire_complete(struct efx_nic *efx,
2807 				    unsigned long filter_idx,
2808 				    int rc, efx_dword_t *outbuf,
2809 				    size_t outlen_actual)
2810 {
2811 	struct efx_ef10_filter_table *table = efx->filter_state;
2812 	struct efx_filter_spec *spec =
2813 		efx_ef10_filter_entry_spec(table, filter_idx);
2814 
2815 	spin_lock_bh(&efx->filter_lock);
2816 	if (rc == 0) {
2817 		kfree(spec);
2818 		efx_ef10_filter_set_entry(table, filter_idx, NULL, 0);
2819 	}
2820 	table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_BUSY;
2821 	wake_up_all(&table->waitq);
2822 	spin_unlock_bh(&efx->filter_lock);
2823 }
2824 
2825 #endif /* CONFIG_RFS_ACCEL */
2826 
2827 static int efx_ef10_filter_match_flags_from_mcdi(u32 mcdi_flags)
2828 {
2829 	int match_flags = 0;
2830 
2831 #define MAP_FLAG(gen_flag, mcdi_field) {				\
2832 		u32 old_mcdi_flags = mcdi_flags;			\
2833 		mcdi_flags &= ~(1 << MC_CMD_FILTER_OP_IN_MATCH_ ##	\
2834 				mcdi_field ## _LBN);			\
2835 		if (mcdi_flags != old_mcdi_flags)			\
2836 			match_flags |= EFX_FILTER_MATCH_ ## gen_flag;	\
2837 	}
2838 	MAP_FLAG(LOC_MAC_IG, UNKNOWN_UCAST_DST);
2839 	MAP_FLAG(LOC_MAC_IG, UNKNOWN_MCAST_DST);
2840 	MAP_FLAG(REM_HOST, SRC_IP);
2841 	MAP_FLAG(LOC_HOST, DST_IP);
2842 	MAP_FLAG(REM_MAC, SRC_MAC);
2843 	MAP_FLAG(REM_PORT, SRC_PORT);
2844 	MAP_FLAG(LOC_MAC, DST_MAC);
2845 	MAP_FLAG(LOC_PORT, DST_PORT);
2846 	MAP_FLAG(ETHER_TYPE, ETHER_TYPE);
2847 	MAP_FLAG(INNER_VID, INNER_VLAN);
2848 	MAP_FLAG(OUTER_VID, OUTER_VLAN);
2849 	MAP_FLAG(IP_PROTO, IP_PROTO);
2850 #undef MAP_FLAG
2851 
2852 	/* Did we map them all? */
2853 	if (mcdi_flags)
2854 		return -EINVAL;
2855 
2856 	return match_flags;
2857 }
2858 
2859 static int efx_ef10_filter_table_probe(struct efx_nic *efx)
2860 {
2861 	MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PARSER_DISP_INFO_IN_LEN);
2862 	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PARSER_DISP_INFO_OUT_LENMAX);
2863 	unsigned int pd_match_pri, pd_match_count;
2864 	struct efx_ef10_filter_table *table;
2865 	size_t outlen;
2866 	int rc;
2867 
2868 	table = kzalloc(sizeof(*table), GFP_KERNEL);
2869 	if (!table)
2870 		return -ENOMEM;
2871 
2872 	/* Find out which RX filter types are supported, and their priorities */
2873 	MCDI_SET_DWORD(inbuf, GET_PARSER_DISP_INFO_IN_OP,
2874 		       MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_RX_MATCHES);
2875 	rc = efx_mcdi_rpc(efx, MC_CMD_GET_PARSER_DISP_INFO,
2876 			  inbuf, sizeof(inbuf), outbuf, sizeof(outbuf),
2877 			  &outlen);
2878 	if (rc)
2879 		goto fail;
2880 	pd_match_count = MCDI_VAR_ARRAY_LEN(
2881 		outlen, GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES);
2882 	table->rx_match_count = 0;
2883 
2884 	for (pd_match_pri = 0; pd_match_pri < pd_match_count; pd_match_pri++) {
2885 		u32 mcdi_flags =
2886 			MCDI_ARRAY_DWORD(
2887 				outbuf,
2888 				GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES,
2889 				pd_match_pri);
2890 		rc = efx_ef10_filter_match_flags_from_mcdi(mcdi_flags);
2891 		if (rc < 0) {
2892 			netif_dbg(efx, probe, efx->net_dev,
2893 				  "%s: fw flags %#x pri %u not supported in driver\n",
2894 				  __func__, mcdi_flags, pd_match_pri);
2895 		} else {
2896 			netif_dbg(efx, probe, efx->net_dev,
2897 				  "%s: fw flags %#x pri %u supported as driver flags %#x pri %u\n",
2898 				  __func__, mcdi_flags, pd_match_pri,
2899 				  rc, table->rx_match_count);
2900 			table->rx_match_flags[table->rx_match_count++] = rc;
2901 		}
2902 	}
2903 
2904 	table->entry = vzalloc(HUNT_FILTER_TBL_ROWS * sizeof(*table->entry));
2905 	if (!table->entry) {
2906 		rc = -ENOMEM;
2907 		goto fail;
2908 	}
2909 
2910 	efx->filter_state = table;
2911 	init_waitqueue_head(&table->waitq);
2912 	return 0;
2913 
2914 fail:
2915 	kfree(table);
2916 	return rc;
2917 }
2918 
2919 static void efx_ef10_filter_table_restore(struct efx_nic *efx)
2920 {
2921 	struct efx_ef10_filter_table *table = efx->filter_state;
2922 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
2923 	struct efx_filter_spec *spec;
2924 	unsigned int filter_idx;
2925 	bool failed = false;
2926 	int rc;
2927 
2928 	if (!nic_data->must_restore_filters)
2929 		return;
2930 
2931 	spin_lock_bh(&efx->filter_lock);
2932 
2933 	for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
2934 		spec = efx_ef10_filter_entry_spec(table, filter_idx);
2935 		if (!spec)
2936 			continue;
2937 
2938 		table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY;
2939 		spin_unlock_bh(&efx->filter_lock);
2940 
2941 		rc = efx_ef10_filter_push(efx, spec,
2942 					  &table->entry[filter_idx].handle,
2943 					  false);
2944 		if (rc)
2945 			failed = true;
2946 
2947 		spin_lock_bh(&efx->filter_lock);
2948 		if (rc) {
2949 			kfree(spec);
2950 			efx_ef10_filter_set_entry(table, filter_idx, NULL, 0);
2951 		} else {
2952 			table->entry[filter_idx].spec &=
2953 				~EFX_EF10_FILTER_FLAG_BUSY;
2954 		}
2955 	}
2956 
2957 	spin_unlock_bh(&efx->filter_lock);
2958 
2959 	if (failed)
2960 		netif_err(efx, hw, efx->net_dev,
2961 			  "unable to restore all filters\n");
2962 	else
2963 		nic_data->must_restore_filters = false;
2964 }
2965 
2966 static void efx_ef10_filter_table_remove(struct efx_nic *efx)
2967 {
2968 	struct efx_ef10_filter_table *table = efx->filter_state;
2969 	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
2970 	struct efx_filter_spec *spec;
2971 	unsigned int filter_idx;
2972 	int rc;
2973 
2974 	for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
2975 		spec = efx_ef10_filter_entry_spec(table, filter_idx);
2976 		if (!spec)
2977 			continue;
2978 
2979 		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
2980 			       efx_ef10_filter_is_exclusive(spec) ?
2981 			       MC_CMD_FILTER_OP_IN_OP_REMOVE :
2982 			       MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
2983 		MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
2984 			       table->entry[filter_idx].handle);
2985 		rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
2986 				  NULL, 0, NULL);
2987 
2988 		WARN_ON(rc != 0);
2989 		kfree(spec);
2990 	}
2991 
2992 	vfree(table->entry);
2993 	kfree(table);
2994 }
2995 
2996 static void efx_ef10_filter_sync_rx_mode(struct efx_nic *efx)
2997 {
2998 	struct efx_ef10_filter_table *table = efx->filter_state;
2999 	struct net_device *net_dev = efx->net_dev;
3000 	struct efx_filter_spec spec;
3001 	bool remove_failed = false;
3002 	struct netdev_hw_addr *uc;
3003 	struct netdev_hw_addr *mc;
3004 	unsigned int filter_idx;
3005 	int i, n, rc;
3006 
3007 	if (!efx_dev_registered(efx))
3008 		return;
3009 
3010 	/* Mark old filters that may need to be removed */
3011 	spin_lock_bh(&efx->filter_lock);
3012 	n = table->stack_uc_count < 0 ? 1 : table->stack_uc_count;
3013 	for (i = 0; i < n; i++) {
3014 		filter_idx = table->stack_uc_list[i].id % HUNT_FILTER_TBL_ROWS;
3015 		table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_STACK_OLD;
3016 	}
3017 	n = table->stack_mc_count < 0 ? 1 : table->stack_mc_count;
3018 	for (i = 0; i < n; i++) {
3019 		filter_idx = table->stack_mc_list[i].id % HUNT_FILTER_TBL_ROWS;
3020 		table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_STACK_OLD;
3021 	}
3022 	spin_unlock_bh(&efx->filter_lock);
3023 
3024 	/* Copy/convert the address lists; add the primary station
3025 	 * address and broadcast address
3026 	 */
3027 	netif_addr_lock_bh(net_dev);
3028 	if (net_dev->flags & IFF_PROMISC ||
3029 	    netdev_uc_count(net_dev) >= EFX_EF10_FILTER_STACK_UC_MAX) {
3030 		table->stack_uc_count = -1;
3031 	} else {
3032 		table->stack_uc_count = 1 + netdev_uc_count(net_dev);
3033 		memcpy(table->stack_uc_list[0].addr, net_dev->dev_addr,
3034 		       ETH_ALEN);
3035 		i = 1;
3036 		netdev_for_each_uc_addr(uc, net_dev) {
3037 			memcpy(table->stack_uc_list[i].addr,
3038 			       uc->addr, ETH_ALEN);
3039 			i++;
3040 		}
3041 	}
3042 	if (net_dev->flags & (IFF_PROMISC | IFF_ALLMULTI) ||
3043 	    netdev_mc_count(net_dev) >= EFX_EF10_FILTER_STACK_MC_MAX) {
3044 		table->stack_mc_count = -1;
3045 	} else {
3046 		table->stack_mc_count = 1 + netdev_mc_count(net_dev);
3047 		eth_broadcast_addr(table->stack_mc_list[0].addr);
3048 		i = 1;
3049 		netdev_for_each_mc_addr(mc, net_dev) {
3050 			memcpy(table->stack_mc_list[i].addr,
3051 			       mc->addr, ETH_ALEN);
3052 			i++;
3053 		}
3054 	}
3055 	netif_addr_unlock_bh(net_dev);
3056 
3057 	/* Insert/renew unicast filters */
3058 	if (table->stack_uc_count >= 0) {
3059 		for (i = 0; i < table->stack_uc_count; i++) {
3060 			efx_filter_init_rx(&spec, EFX_FILTER_PRI_REQUIRED,
3061 					   EFX_FILTER_FLAG_RX_RSS |
3062 					   EFX_FILTER_FLAG_RX_STACK,
3063 					   0);
3064 			efx_filter_set_eth_local(&spec, EFX_FILTER_VID_UNSPEC,
3065 						 table->stack_uc_list[i].addr);
3066 			rc = efx_ef10_filter_insert(efx, &spec, true);
3067 			if (rc < 0) {
3068 				/* Fall back to unicast-promisc */
3069 				while (i--)
3070 					efx_ef10_filter_remove_safe(
3071 						efx, EFX_FILTER_PRI_REQUIRED,
3072 						table->stack_uc_list[i].id);
3073 				table->stack_uc_count = -1;
3074 				break;
3075 			}
3076 			table->stack_uc_list[i].id = rc;
3077 		}
3078 	}
3079 	if (table->stack_uc_count < 0) {
3080 		efx_filter_init_rx(&spec, EFX_FILTER_PRI_REQUIRED,
3081 				   EFX_FILTER_FLAG_RX_RSS |
3082 				   EFX_FILTER_FLAG_RX_STACK,
3083 				   0);
3084 		efx_filter_set_uc_def(&spec);
3085 		rc = efx_ef10_filter_insert(efx, &spec, true);
3086 		if (rc < 0) {
3087 			WARN_ON(1);
3088 			table->stack_uc_count = 0;
3089 		} else {
3090 			table->stack_uc_list[0].id = rc;
3091 		}
3092 	}
3093 
3094 	/* Insert/renew multicast filters */
3095 	if (table->stack_mc_count >= 0) {
3096 		for (i = 0; i < table->stack_mc_count; i++) {
3097 			efx_filter_init_rx(&spec, EFX_FILTER_PRI_REQUIRED,
3098 					   EFX_FILTER_FLAG_RX_RSS |
3099 					   EFX_FILTER_FLAG_RX_STACK,
3100 					   0);
3101 			efx_filter_set_eth_local(&spec, EFX_FILTER_VID_UNSPEC,
3102 						 table->stack_mc_list[i].addr);
3103 			rc = efx_ef10_filter_insert(efx, &spec, true);
3104 			if (rc < 0) {
3105 				/* Fall back to multicast-promisc */
3106 				while (i--)
3107 					efx_ef10_filter_remove_safe(
3108 						efx, EFX_FILTER_PRI_REQUIRED,
3109 						table->stack_mc_list[i].id);
3110 				table->stack_mc_count = -1;
3111 				break;
3112 			}
3113 			table->stack_mc_list[i].id = rc;
3114 		}
3115 	}
3116 	if (table->stack_mc_count < 0) {
3117 		efx_filter_init_rx(&spec, EFX_FILTER_PRI_REQUIRED,
3118 				   EFX_FILTER_FLAG_RX_RSS |
3119 				   EFX_FILTER_FLAG_RX_STACK,
3120 				   0);
3121 		efx_filter_set_mc_def(&spec);
3122 		rc = efx_ef10_filter_insert(efx, &spec, true);
3123 		if (rc < 0) {
3124 			WARN_ON(1);
3125 			table->stack_mc_count = 0;
3126 		} else {
3127 			table->stack_mc_list[0].id = rc;
3128 		}
3129 	}
3130 
3131 	/* Remove filters that weren't renewed.  Since nothing else
3132 	 * changes the STACK_OLD flag or removes these filters, we
3133 	 * don't need to hold the filter_lock while scanning for
3134 	 * these filters.
3135 	 */
3136 	for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) {
3137 		if (ACCESS_ONCE(table->entry[i].spec) &
3138 		    EFX_EF10_FILTER_FLAG_STACK_OLD) {
3139 			if (efx_ef10_filter_remove_internal(efx,
3140 					EFX_FILTER_PRI_REQUIRED,
3141 					i, true) < 0)
3142 				remove_failed = true;
3143 		}
3144 	}
3145 	WARN_ON(remove_failed);
3146 }
3147 
3148 static int efx_ef10_mac_reconfigure(struct efx_nic *efx)
3149 {
3150 	efx_ef10_filter_sync_rx_mode(efx);
3151 
3152 	return efx_mcdi_set_mac(efx);
3153 }
3154 
3155 #ifdef CONFIG_SFC_MTD
3156 
3157 struct efx_ef10_nvram_type_info {
3158 	u16 type, type_mask;
3159 	u8 port;
3160 	const char *name;
3161 };
3162 
3163 static const struct efx_ef10_nvram_type_info efx_ef10_nvram_types[] = {
3164 	{ NVRAM_PARTITION_TYPE_MC_FIRMWARE,	   0,    0, "sfc_mcfw" },
3165 	{ NVRAM_PARTITION_TYPE_MC_FIRMWARE_BACKUP, 0,    0, "sfc_mcfw_backup" },
3166 	{ NVRAM_PARTITION_TYPE_EXPANSION_ROM,	   0,    0, "sfc_exp_rom" },
3167 	{ NVRAM_PARTITION_TYPE_STATIC_CONFIG,	   0,    0, "sfc_static_cfg" },
3168 	{ NVRAM_PARTITION_TYPE_DYNAMIC_CONFIG,	   0,    0, "sfc_dynamic_cfg" },
3169 	{ NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT0, 0,   0, "sfc_exp_rom_cfg" },
3170 	{ NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT1, 0,   1, "sfc_exp_rom_cfg" },
3171 	{ NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT2, 0,   2, "sfc_exp_rom_cfg" },
3172 	{ NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT3, 0,   3, "sfc_exp_rom_cfg" },
3173 	{ NVRAM_PARTITION_TYPE_PHY_MIN,		   0xff, 0, "sfc_phy_fw" },
3174 };
3175 
3176 static int efx_ef10_mtd_probe_partition(struct efx_nic *efx,
3177 					struct efx_mcdi_mtd_partition *part,
3178 					unsigned int type)
3179 {
3180 	MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_METADATA_IN_LEN);
3181 	MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_METADATA_OUT_LENMAX);
3182 	const struct efx_ef10_nvram_type_info *info;
3183 	size_t size, erase_size, outlen;
3184 	bool protected;
3185 	int rc;
3186 
3187 	for (info = efx_ef10_nvram_types; ; info++) {
3188 		if (info ==
3189 		    efx_ef10_nvram_types + ARRAY_SIZE(efx_ef10_nvram_types))
3190 			return -ENODEV;
3191 		if ((type & ~info->type_mask) == info->type)
3192 			break;
3193 	}
3194 	if (info->port != efx_port_num(efx))
3195 		return -ENODEV;
3196 
3197 	rc = efx_mcdi_nvram_info(efx, type, &size, &erase_size, &protected);
3198 	if (rc)
3199 		return rc;
3200 	if (protected)
3201 		return -ENODEV; /* hide it */
3202 
3203 	part->nvram_type = type;
3204 
3205 	MCDI_SET_DWORD(inbuf, NVRAM_METADATA_IN_TYPE, type);
3206 	rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_METADATA, inbuf, sizeof(inbuf),
3207 			  outbuf, sizeof(outbuf), &outlen);
3208 	if (rc)
3209 		return rc;
3210 	if (outlen < MC_CMD_NVRAM_METADATA_OUT_LENMIN)
3211 		return -EIO;
3212 	if (MCDI_DWORD(outbuf, NVRAM_METADATA_OUT_FLAGS) &
3213 	    (1 << MC_CMD_NVRAM_METADATA_OUT_SUBTYPE_VALID_LBN))
3214 		part->fw_subtype = MCDI_DWORD(outbuf,
3215 					      NVRAM_METADATA_OUT_SUBTYPE);
3216 
3217 	part->common.dev_type_name = "EF10 NVRAM manager";
3218 	part->common.type_name = info->name;
3219 
3220 	part->common.mtd.type = MTD_NORFLASH;
3221 	part->common.mtd.flags = MTD_CAP_NORFLASH;
3222 	part->common.mtd.size = size;
3223 	part->common.mtd.erasesize = erase_size;
3224 
3225 	return 0;
3226 }
3227 
3228 static int efx_ef10_mtd_probe(struct efx_nic *efx)
3229 {
3230 	MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_PARTITIONS_OUT_LENMAX);
3231 	struct efx_mcdi_mtd_partition *parts;
3232 	size_t outlen, n_parts_total, i, n_parts;
3233 	unsigned int type;
3234 	int rc;
3235 
3236 	ASSERT_RTNL();
3237 
3238 	BUILD_BUG_ON(MC_CMD_NVRAM_PARTITIONS_IN_LEN != 0);
3239 	rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_PARTITIONS, NULL, 0,
3240 			  outbuf, sizeof(outbuf), &outlen);
3241 	if (rc)
3242 		return rc;
3243 	if (outlen < MC_CMD_NVRAM_PARTITIONS_OUT_LENMIN)
3244 		return -EIO;
3245 
3246 	n_parts_total = MCDI_DWORD(outbuf, NVRAM_PARTITIONS_OUT_NUM_PARTITIONS);
3247 	if (n_parts_total >
3248 	    MCDI_VAR_ARRAY_LEN(outlen, NVRAM_PARTITIONS_OUT_TYPE_ID))
3249 		return -EIO;
3250 
3251 	parts = kcalloc(n_parts_total, sizeof(*parts), GFP_KERNEL);
3252 	if (!parts)
3253 		return -ENOMEM;
3254 
3255 	n_parts = 0;
3256 	for (i = 0; i < n_parts_total; i++) {
3257 		type = MCDI_ARRAY_DWORD(outbuf, NVRAM_PARTITIONS_OUT_TYPE_ID,
3258 					i);
3259 		rc = efx_ef10_mtd_probe_partition(efx, &parts[n_parts], type);
3260 		if (rc == 0)
3261 			n_parts++;
3262 		else if (rc != -ENODEV)
3263 			goto fail;
3264 	}
3265 
3266 	rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts));
3267 fail:
3268 	if (rc)
3269 		kfree(parts);
3270 	return rc;
3271 }
3272 
3273 #endif /* CONFIG_SFC_MTD */
3274 
3275 static void efx_ef10_ptp_write_host_time(struct efx_nic *efx, u32 host_time)
3276 {
3277 	_efx_writed(efx, cpu_to_le32(host_time), ER_DZ_MC_DB_LWRD);
3278 }
3279 
3280 const struct efx_nic_type efx_hunt_a0_nic_type = {
3281 	.mem_map_size = efx_ef10_mem_map_size,
3282 	.probe = efx_ef10_probe,
3283 	.remove = efx_ef10_remove,
3284 	.dimension_resources = efx_ef10_dimension_resources,
3285 	.init = efx_ef10_init_nic,
3286 	.fini = efx_port_dummy_op_void,
3287 	.map_reset_reason = efx_mcdi_map_reset_reason,
3288 	.map_reset_flags = efx_ef10_map_reset_flags,
3289 	.reset = efx_mcdi_reset,
3290 	.probe_port = efx_mcdi_port_probe,
3291 	.remove_port = efx_mcdi_port_remove,
3292 	.fini_dmaq = efx_ef10_fini_dmaq,
3293 	.describe_stats = efx_ef10_describe_stats,
3294 	.update_stats = efx_ef10_update_stats,
3295 	.start_stats = efx_mcdi_mac_start_stats,
3296 	.stop_stats = efx_mcdi_mac_stop_stats,
3297 	.set_id_led = efx_mcdi_set_id_led,
3298 	.push_irq_moderation = efx_ef10_push_irq_moderation,
3299 	.reconfigure_mac = efx_ef10_mac_reconfigure,
3300 	.check_mac_fault = efx_mcdi_mac_check_fault,
3301 	.reconfigure_port = efx_mcdi_port_reconfigure,
3302 	.get_wol = efx_ef10_get_wol,
3303 	.set_wol = efx_ef10_set_wol,
3304 	.resume_wol = efx_port_dummy_op_void,
3305 	/* TODO: test_chip */
3306 	.test_nvram = efx_mcdi_nvram_test_all,
3307 	.mcdi_request = efx_ef10_mcdi_request,
3308 	.mcdi_poll_response = efx_ef10_mcdi_poll_response,
3309 	.mcdi_read_response = efx_ef10_mcdi_read_response,
3310 	.mcdi_poll_reboot = efx_ef10_mcdi_poll_reboot,
3311 	.irq_enable_master = efx_port_dummy_op_void,
3312 	.irq_test_generate = efx_ef10_irq_test_generate,
3313 	.irq_disable_non_ev = efx_port_dummy_op_void,
3314 	.irq_handle_msi = efx_ef10_msi_interrupt,
3315 	.irq_handle_legacy = efx_ef10_legacy_interrupt,
3316 	.tx_probe = efx_ef10_tx_probe,
3317 	.tx_init = efx_ef10_tx_init,
3318 	.tx_remove = efx_ef10_tx_remove,
3319 	.tx_write = efx_ef10_tx_write,
3320 	.rx_push_indir_table = efx_ef10_rx_push_indir_table,
3321 	.rx_probe = efx_ef10_rx_probe,
3322 	.rx_init = efx_ef10_rx_init,
3323 	.rx_remove = efx_ef10_rx_remove,
3324 	.rx_write = efx_ef10_rx_write,
3325 	.rx_defer_refill = efx_ef10_rx_defer_refill,
3326 	.ev_probe = efx_ef10_ev_probe,
3327 	.ev_init = efx_ef10_ev_init,
3328 	.ev_fini = efx_ef10_ev_fini,
3329 	.ev_remove = efx_ef10_ev_remove,
3330 	.ev_process = efx_ef10_ev_process,
3331 	.ev_read_ack = efx_ef10_ev_read_ack,
3332 	.ev_test_generate = efx_ef10_ev_test_generate,
3333 	.filter_table_probe = efx_ef10_filter_table_probe,
3334 	.filter_table_restore = efx_ef10_filter_table_restore,
3335 	.filter_table_remove = efx_ef10_filter_table_remove,
3336 	.filter_update_rx_scatter = efx_ef10_filter_update_rx_scatter,
3337 	.filter_insert = efx_ef10_filter_insert,
3338 	.filter_remove_safe = efx_ef10_filter_remove_safe,
3339 	.filter_get_safe = efx_ef10_filter_get_safe,
3340 	.filter_clear_rx = efx_ef10_filter_clear_rx,
3341 	.filter_count_rx_used = efx_ef10_filter_count_rx_used,
3342 	.filter_get_rx_id_limit = efx_ef10_filter_get_rx_id_limit,
3343 	.filter_get_rx_ids = efx_ef10_filter_get_rx_ids,
3344 #ifdef CONFIG_RFS_ACCEL
3345 	.filter_rfs_insert = efx_ef10_filter_rfs_insert,
3346 	.filter_rfs_expire_one = efx_ef10_filter_rfs_expire_one,
3347 #endif
3348 #ifdef CONFIG_SFC_MTD
3349 	.mtd_probe = efx_ef10_mtd_probe,
3350 	.mtd_rename = efx_mcdi_mtd_rename,
3351 	.mtd_read = efx_mcdi_mtd_read,
3352 	.mtd_erase = efx_mcdi_mtd_erase,
3353 	.mtd_write = efx_mcdi_mtd_write,
3354 	.mtd_sync = efx_mcdi_mtd_sync,
3355 #endif
3356 	.ptp_write_host_time = efx_ef10_ptp_write_host_time,
3357 
3358 	.revision = EFX_REV_HUNT_A0,
3359 	.max_dma_mask = DMA_BIT_MASK(ESF_DZ_TX_KER_BUF_ADDR_WIDTH),
3360 	.rx_prefix_size = ES_DZ_RX_PREFIX_SIZE,
3361 	.rx_hash_offset = ES_DZ_RX_PREFIX_HASH_OFST,
3362 	.can_rx_scatter = true,
3363 	.always_rx_scatter = true,
3364 	.max_interrupt_mode = EFX_INT_MODE_MSIX,
3365 	.timer_period_max = 1 << ERF_DD_EVQ_IND_TIMER_VAL_WIDTH,
3366 	.offload_features = (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3367 			     NETIF_F_RXHASH | NETIF_F_NTUPLE),
3368 	.mcdi_max_ver = 2,
3369 	.max_rx_ip_filters = HUNT_FILTER_TBL_ROWS,
3370 };
3371