xref: /linux/drivers/net/ethernet/google/gve/gve_main.c (revision 7110f24f9e33979fd704f7a4a595a9d3e9bdacb7)
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2024 Google LLC
5  */
6 
7 #include <linux/bpf.h>
8 #include <linux/cpumask.h>
9 #include <linux/etherdevice.h>
10 #include <linux/filter.h>
11 #include <linux/interrupt.h>
12 #include <linux/irq.h>
13 #include <linux/module.h>
14 #include <linux/pci.h>
15 #include <linux/sched.h>
16 #include <linux/timer.h>
17 #include <linux/workqueue.h>
18 #include <linux/utsname.h>
19 #include <linux/version.h>
20 #include <net/netdev_queues.h>
21 #include <net/sch_generic.h>
22 #include <net/xdp_sock_drv.h>
23 #include "gve.h"
24 #include "gve_dqo.h"
25 #include "gve_adminq.h"
26 #include "gve_register.h"
27 #include "gve_utils.h"
28 
29 #define GVE_DEFAULT_RX_COPYBREAK	(256)
30 
31 #define DEFAULT_MSG_LEVEL	(NETIF_MSG_DRV | NETIF_MSG_LINK)
32 #define GVE_VERSION		"1.0.0"
33 #define GVE_VERSION_PREFIX	"GVE-"
34 
35 // Minimum amount of time between queue kicks in msec (10 seconds)
36 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
37 
38 char gve_driver_name[] = "gve";
39 const char gve_version_str[] = GVE_VERSION;
40 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
41 
42 static int gve_verify_driver_compatibility(struct gve_priv *priv)
43 {
44 	int err;
45 	struct gve_driver_info *driver_info;
46 	dma_addr_t driver_info_bus;
47 
48 	driver_info = dma_alloc_coherent(&priv->pdev->dev,
49 					 sizeof(struct gve_driver_info),
50 					 &driver_info_bus, GFP_KERNEL);
51 	if (!driver_info)
52 		return -ENOMEM;
53 
54 	*driver_info = (struct gve_driver_info) {
55 		.os_type = 1, /* Linux */
56 		.os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR),
57 		.os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL),
58 		.os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL),
59 		.driver_capability_flags = {
60 			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1),
61 			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2),
62 			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3),
63 			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4),
64 		},
65 	};
66 	strscpy(driver_info->os_version_str1, utsname()->release,
67 		sizeof(driver_info->os_version_str1));
68 	strscpy(driver_info->os_version_str2, utsname()->version,
69 		sizeof(driver_info->os_version_str2));
70 
71 	err = gve_adminq_verify_driver_compatibility(priv,
72 						     sizeof(struct gve_driver_info),
73 						     driver_info_bus);
74 
75 	/* It's ok if the device doesn't support this */
76 	if (err == -EOPNOTSUPP)
77 		err = 0;
78 
79 	dma_free_coherent(&priv->pdev->dev,
80 			  sizeof(struct gve_driver_info),
81 			  driver_info, driver_info_bus);
82 	return err;
83 }
84 
85 static netdev_features_t gve_features_check(struct sk_buff *skb,
86 					    struct net_device *dev,
87 					    netdev_features_t features)
88 {
89 	struct gve_priv *priv = netdev_priv(dev);
90 
91 	if (!gve_is_gqi(priv))
92 		return gve_features_check_dqo(skb, dev, features);
93 
94 	return features;
95 }
96 
97 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
98 {
99 	struct gve_priv *priv = netdev_priv(dev);
100 
101 	if (gve_is_gqi(priv))
102 		return gve_tx(skb, dev);
103 	else
104 		return gve_tx_dqo(skb, dev);
105 }
106 
107 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
108 {
109 	struct gve_priv *priv = netdev_priv(dev);
110 	unsigned int start;
111 	u64 packets, bytes;
112 	int num_tx_queues;
113 	int ring;
114 
115 	num_tx_queues = gve_num_tx_queues(priv);
116 	if (priv->rx) {
117 		for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
118 			do {
119 				start =
120 				  u64_stats_fetch_begin(&priv->rx[ring].statss);
121 				packets = priv->rx[ring].rpackets;
122 				bytes = priv->rx[ring].rbytes;
123 			} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
124 						       start));
125 			s->rx_packets += packets;
126 			s->rx_bytes += bytes;
127 		}
128 	}
129 	if (priv->tx) {
130 		for (ring = 0; ring < num_tx_queues; ring++) {
131 			do {
132 				start =
133 				  u64_stats_fetch_begin(&priv->tx[ring].statss);
134 				packets = priv->tx[ring].pkt_done;
135 				bytes = priv->tx[ring].bytes_done;
136 			} while (u64_stats_fetch_retry(&priv->tx[ring].statss,
137 						       start));
138 			s->tx_packets += packets;
139 			s->tx_bytes += bytes;
140 		}
141 	}
142 }
143 
144 static int gve_alloc_flow_rule_caches(struct gve_priv *priv)
145 {
146 	struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
147 	int err = 0;
148 
149 	if (!priv->max_flow_rules)
150 		return 0;
151 
152 	flow_rules_cache->rules_cache =
153 		kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache),
154 			 GFP_KERNEL);
155 	if (!flow_rules_cache->rules_cache) {
156 		dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n");
157 		return -ENOMEM;
158 	}
159 
160 	flow_rules_cache->rule_ids_cache =
161 		kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache),
162 			 GFP_KERNEL);
163 	if (!flow_rules_cache->rule_ids_cache) {
164 		dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n");
165 		err = -ENOMEM;
166 		goto free_rules_cache;
167 	}
168 
169 	return 0;
170 
171 free_rules_cache:
172 	kvfree(flow_rules_cache->rules_cache);
173 	flow_rules_cache->rules_cache = NULL;
174 	return err;
175 }
176 
177 static void gve_free_flow_rule_caches(struct gve_priv *priv)
178 {
179 	struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
180 
181 	kvfree(flow_rules_cache->rule_ids_cache);
182 	flow_rules_cache->rule_ids_cache = NULL;
183 	kvfree(flow_rules_cache->rules_cache);
184 	flow_rules_cache->rules_cache = NULL;
185 }
186 
187 static int gve_alloc_counter_array(struct gve_priv *priv)
188 {
189 	priv->counter_array =
190 		dma_alloc_coherent(&priv->pdev->dev,
191 				   priv->num_event_counters *
192 				   sizeof(*priv->counter_array),
193 				   &priv->counter_array_bus, GFP_KERNEL);
194 	if (!priv->counter_array)
195 		return -ENOMEM;
196 
197 	return 0;
198 }
199 
200 static void gve_free_counter_array(struct gve_priv *priv)
201 {
202 	if (!priv->counter_array)
203 		return;
204 
205 	dma_free_coherent(&priv->pdev->dev,
206 			  priv->num_event_counters *
207 			  sizeof(*priv->counter_array),
208 			  priv->counter_array, priv->counter_array_bus);
209 	priv->counter_array = NULL;
210 }
211 
212 /* NIC requests to report stats */
213 static void gve_stats_report_task(struct work_struct *work)
214 {
215 	struct gve_priv *priv = container_of(work, struct gve_priv,
216 					     stats_report_task);
217 	if (gve_get_do_report_stats(priv)) {
218 		gve_handle_report_stats(priv);
219 		gve_clear_do_report_stats(priv);
220 	}
221 }
222 
223 static void gve_stats_report_schedule(struct gve_priv *priv)
224 {
225 	if (!gve_get_probe_in_progress(priv) &&
226 	    !gve_get_reset_in_progress(priv)) {
227 		gve_set_do_report_stats(priv);
228 		queue_work(priv->gve_wq, &priv->stats_report_task);
229 	}
230 }
231 
232 static void gve_stats_report_timer(struct timer_list *t)
233 {
234 	struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
235 
236 	mod_timer(&priv->stats_report_timer,
237 		  round_jiffies(jiffies +
238 		  msecs_to_jiffies(priv->stats_report_timer_period)));
239 	gve_stats_report_schedule(priv);
240 }
241 
242 static int gve_alloc_stats_report(struct gve_priv *priv)
243 {
244 	int tx_stats_num, rx_stats_num;
245 
246 	tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
247 		       gve_num_tx_queues(priv);
248 	rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
249 		       priv->rx_cfg.num_queues;
250 	priv->stats_report_len = struct_size(priv->stats_report, stats,
251 					     size_add(tx_stats_num, rx_stats_num));
252 	priv->stats_report =
253 		dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
254 				   &priv->stats_report_bus, GFP_KERNEL);
255 	if (!priv->stats_report)
256 		return -ENOMEM;
257 	/* Set up timer for the report-stats task */
258 	timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
259 	priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
260 	return 0;
261 }
262 
263 static void gve_free_stats_report(struct gve_priv *priv)
264 {
265 	if (!priv->stats_report)
266 		return;
267 
268 	del_timer_sync(&priv->stats_report_timer);
269 	dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
270 			  priv->stats_report, priv->stats_report_bus);
271 	priv->stats_report = NULL;
272 }
273 
274 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
275 {
276 	struct gve_priv *priv = arg;
277 
278 	queue_work(priv->gve_wq, &priv->service_task);
279 	return IRQ_HANDLED;
280 }
281 
282 static irqreturn_t gve_intr(int irq, void *arg)
283 {
284 	struct gve_notify_block *block = arg;
285 	struct gve_priv *priv = block->priv;
286 
287 	iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
288 	napi_schedule_irqoff(&block->napi);
289 	return IRQ_HANDLED;
290 }
291 
292 static irqreturn_t gve_intr_dqo(int irq, void *arg)
293 {
294 	struct gve_notify_block *block = arg;
295 
296 	/* Interrupts are automatically masked */
297 	napi_schedule_irqoff(&block->napi);
298 	return IRQ_HANDLED;
299 }
300 
301 static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq)
302 {
303 	int cpu_curr = smp_processor_id();
304 	const struct cpumask *aff_mask;
305 
306 	aff_mask = irq_get_effective_affinity_mask(irq);
307 	if (unlikely(!aff_mask))
308 		return 1;
309 
310 	return cpumask_test_cpu(cpu_curr, aff_mask);
311 }
312 
313 int gve_napi_poll(struct napi_struct *napi, int budget)
314 {
315 	struct gve_notify_block *block;
316 	__be32 __iomem *irq_doorbell;
317 	bool reschedule = false;
318 	struct gve_priv *priv;
319 	int work_done = 0;
320 
321 	block = container_of(napi, struct gve_notify_block, napi);
322 	priv = block->priv;
323 
324 	if (block->tx) {
325 		if (block->tx->q_num < priv->tx_cfg.num_queues)
326 			reschedule |= gve_tx_poll(block, budget);
327 		else if (budget)
328 			reschedule |= gve_xdp_poll(block, budget);
329 	}
330 
331 	if (!budget)
332 		return 0;
333 
334 	if (block->rx) {
335 		work_done = gve_rx_poll(block, budget);
336 
337 		/* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of
338 		 * TX and RX work done.
339 		 */
340 		if (priv->xdp_prog)
341 			work_done = max_t(int, work_done,
342 					  gve_xsk_tx_poll(block, budget));
343 
344 		reschedule |= work_done == budget;
345 	}
346 
347 	if (reschedule)
348 		return budget;
349 
350        /* Complete processing - don't unmask irq if busy polling is enabled */
351 	if (likely(napi_complete_done(napi, work_done))) {
352 		irq_doorbell = gve_irq_doorbell(priv, block);
353 		iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
354 
355 		/* Ensure IRQ ACK is visible before we check pending work.
356 		 * If queue had issued updates, it would be truly visible.
357 		 */
358 		mb();
359 
360 		if (block->tx)
361 			reschedule |= gve_tx_clean_pending(priv, block->tx);
362 		if (block->rx)
363 			reschedule |= gve_rx_work_pending(block->rx);
364 
365 		if (reschedule && napi_schedule(napi))
366 			iowrite32be(GVE_IRQ_MASK, irq_doorbell);
367 	}
368 	return work_done;
369 }
370 
371 int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
372 {
373 	struct gve_notify_block *block =
374 		container_of(napi, struct gve_notify_block, napi);
375 	struct gve_priv *priv = block->priv;
376 	bool reschedule = false;
377 	int work_done = 0;
378 
379 	if (block->tx)
380 		reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
381 
382 	if (!budget)
383 		return 0;
384 
385 	if (block->rx) {
386 		work_done = gve_rx_poll_dqo(block, budget);
387 		reschedule |= work_done == budget;
388 	}
389 
390 	if (reschedule) {
391 		/* Reschedule by returning budget only if already on the correct
392 		 * cpu.
393 		 */
394 		if (likely(gve_is_napi_on_home_cpu(priv, block->irq)))
395 			return budget;
396 
397 		/* If not on the cpu with which this queue's irq has affinity
398 		 * with, we avoid rescheduling napi and arm the irq instead so
399 		 * that napi gets rescheduled back eventually onto the right
400 		 * cpu.
401 		 */
402 		if (work_done == budget)
403 			work_done--;
404 	}
405 
406 	if (likely(napi_complete_done(napi, work_done))) {
407 		/* Enable interrupts again.
408 		 *
409 		 * We don't need to repoll afterwards because HW supports the
410 		 * PCI MSI-X PBA feature.
411 		 *
412 		 * Another interrupt would be triggered if a new event came in
413 		 * since the last one.
414 		 */
415 		gve_write_irq_doorbell_dqo(priv, block,
416 					   GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
417 	}
418 
419 	return work_done;
420 }
421 
422 static int gve_alloc_notify_blocks(struct gve_priv *priv)
423 {
424 	int num_vecs_requested = priv->num_ntfy_blks + 1;
425 	unsigned int active_cpus;
426 	int vecs_enabled;
427 	int i, j;
428 	int err;
429 
430 	priv->msix_vectors = kvcalloc(num_vecs_requested,
431 				      sizeof(*priv->msix_vectors), GFP_KERNEL);
432 	if (!priv->msix_vectors)
433 		return -ENOMEM;
434 	for (i = 0; i < num_vecs_requested; i++)
435 		priv->msix_vectors[i].entry = i;
436 	vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
437 					     GVE_MIN_MSIX, num_vecs_requested);
438 	if (vecs_enabled < 0) {
439 		dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
440 			GVE_MIN_MSIX, vecs_enabled);
441 		err = vecs_enabled;
442 		goto abort_with_msix_vectors;
443 	}
444 	if (vecs_enabled != num_vecs_requested) {
445 		int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
446 		int vecs_per_type = new_num_ntfy_blks / 2;
447 		int vecs_left = new_num_ntfy_blks % 2;
448 
449 		priv->num_ntfy_blks = new_num_ntfy_blks;
450 		priv->mgmt_msix_idx = priv->num_ntfy_blks;
451 		priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
452 						vecs_per_type);
453 		priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
454 						vecs_per_type + vecs_left);
455 		dev_err(&priv->pdev->dev,
456 			"Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
457 			vecs_enabled, priv->tx_cfg.max_queues,
458 			priv->rx_cfg.max_queues);
459 		if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
460 			priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
461 		if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
462 			priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
463 	}
464 	/* Half the notification blocks go to TX and half to RX */
465 	active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
466 
467 	/* Setup Management Vector  - the last vector */
468 	snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
469 		 pci_name(priv->pdev));
470 	err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
471 			  gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
472 	if (err) {
473 		dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
474 		goto abort_with_msix_enabled;
475 	}
476 	priv->irq_db_indices =
477 		dma_alloc_coherent(&priv->pdev->dev,
478 				   priv->num_ntfy_blks *
479 				   sizeof(*priv->irq_db_indices),
480 				   &priv->irq_db_indices_bus, GFP_KERNEL);
481 	if (!priv->irq_db_indices) {
482 		err = -ENOMEM;
483 		goto abort_with_mgmt_vector;
484 	}
485 
486 	priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
487 				     sizeof(*priv->ntfy_blocks), GFP_KERNEL);
488 	if (!priv->ntfy_blocks) {
489 		err = -ENOMEM;
490 		goto abort_with_irq_db_indices;
491 	}
492 
493 	/* Setup the other blocks - the first n-1 vectors */
494 	for (i = 0; i < priv->num_ntfy_blks; i++) {
495 		struct gve_notify_block *block = &priv->ntfy_blocks[i];
496 		int msix_idx = i;
497 
498 		snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s",
499 			 i, pci_name(priv->pdev));
500 		block->priv = priv;
501 		err = request_irq(priv->msix_vectors[msix_idx].vector,
502 				  gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
503 				  0, block->name, block);
504 		if (err) {
505 			dev_err(&priv->pdev->dev,
506 				"Failed to receive msix vector %d\n", i);
507 			goto abort_with_some_ntfy_blocks;
508 		}
509 		block->irq = priv->msix_vectors[msix_idx].vector;
510 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
511 				      get_cpu_mask(i % active_cpus));
512 		block->irq_db_index = &priv->irq_db_indices[i].index;
513 	}
514 	return 0;
515 abort_with_some_ntfy_blocks:
516 	for (j = 0; j < i; j++) {
517 		struct gve_notify_block *block = &priv->ntfy_blocks[j];
518 		int msix_idx = j;
519 
520 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
521 				      NULL);
522 		free_irq(priv->msix_vectors[msix_idx].vector, block);
523 		block->irq = 0;
524 	}
525 	kvfree(priv->ntfy_blocks);
526 	priv->ntfy_blocks = NULL;
527 abort_with_irq_db_indices:
528 	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
529 			  sizeof(*priv->irq_db_indices),
530 			  priv->irq_db_indices, priv->irq_db_indices_bus);
531 	priv->irq_db_indices = NULL;
532 abort_with_mgmt_vector:
533 	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
534 abort_with_msix_enabled:
535 	pci_disable_msix(priv->pdev);
536 abort_with_msix_vectors:
537 	kvfree(priv->msix_vectors);
538 	priv->msix_vectors = NULL;
539 	return err;
540 }
541 
542 static void gve_free_notify_blocks(struct gve_priv *priv)
543 {
544 	int i;
545 
546 	if (!priv->msix_vectors)
547 		return;
548 
549 	/* Free the irqs */
550 	for (i = 0; i < priv->num_ntfy_blks; i++) {
551 		struct gve_notify_block *block = &priv->ntfy_blocks[i];
552 		int msix_idx = i;
553 
554 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
555 				      NULL);
556 		free_irq(priv->msix_vectors[msix_idx].vector, block);
557 		block->irq = 0;
558 	}
559 	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
560 	kvfree(priv->ntfy_blocks);
561 	priv->ntfy_blocks = NULL;
562 	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
563 			  sizeof(*priv->irq_db_indices),
564 			  priv->irq_db_indices, priv->irq_db_indices_bus);
565 	priv->irq_db_indices = NULL;
566 	pci_disable_msix(priv->pdev);
567 	kvfree(priv->msix_vectors);
568 	priv->msix_vectors = NULL;
569 }
570 
571 static int gve_setup_device_resources(struct gve_priv *priv)
572 {
573 	int err;
574 
575 	err = gve_alloc_flow_rule_caches(priv);
576 	if (err)
577 		return err;
578 	err = gve_alloc_counter_array(priv);
579 	if (err)
580 		goto abort_with_flow_rule_caches;
581 	err = gve_alloc_notify_blocks(priv);
582 	if (err)
583 		goto abort_with_counter;
584 	err = gve_alloc_stats_report(priv);
585 	if (err)
586 		goto abort_with_ntfy_blocks;
587 	err = gve_adminq_configure_device_resources(priv,
588 						    priv->counter_array_bus,
589 						    priv->num_event_counters,
590 						    priv->irq_db_indices_bus,
591 						    priv->num_ntfy_blks);
592 	if (unlikely(err)) {
593 		dev_err(&priv->pdev->dev,
594 			"could not setup device_resources: err=%d\n", err);
595 		err = -ENXIO;
596 		goto abort_with_stats_report;
597 	}
598 
599 	if (!gve_is_gqi(priv)) {
600 		priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
601 					       GFP_KERNEL);
602 		if (!priv->ptype_lut_dqo) {
603 			err = -ENOMEM;
604 			goto abort_with_stats_report;
605 		}
606 		err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
607 		if (err) {
608 			dev_err(&priv->pdev->dev,
609 				"Failed to get ptype map: err=%d\n", err);
610 			goto abort_with_ptype_lut;
611 		}
612 	}
613 
614 	err = gve_adminq_report_stats(priv, priv->stats_report_len,
615 				      priv->stats_report_bus,
616 				      GVE_STATS_REPORT_TIMER_PERIOD);
617 	if (err)
618 		dev_err(&priv->pdev->dev,
619 			"Failed to report stats: err=%d\n", err);
620 	gve_set_device_resources_ok(priv);
621 	return 0;
622 
623 abort_with_ptype_lut:
624 	kvfree(priv->ptype_lut_dqo);
625 	priv->ptype_lut_dqo = NULL;
626 abort_with_stats_report:
627 	gve_free_stats_report(priv);
628 abort_with_ntfy_blocks:
629 	gve_free_notify_blocks(priv);
630 abort_with_counter:
631 	gve_free_counter_array(priv);
632 abort_with_flow_rule_caches:
633 	gve_free_flow_rule_caches(priv);
634 
635 	return err;
636 }
637 
638 static void gve_trigger_reset(struct gve_priv *priv);
639 
640 static void gve_teardown_device_resources(struct gve_priv *priv)
641 {
642 	int err;
643 
644 	/* Tell device its resources are being freed */
645 	if (gve_get_device_resources_ok(priv)) {
646 		err = gve_flow_rules_reset(priv);
647 		if (err) {
648 			dev_err(&priv->pdev->dev,
649 				"Failed to reset flow rules: err=%d\n", err);
650 			gve_trigger_reset(priv);
651 		}
652 		/* detach the stats report */
653 		err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
654 		if (err) {
655 			dev_err(&priv->pdev->dev,
656 				"Failed to detach stats report: err=%d\n", err);
657 			gve_trigger_reset(priv);
658 		}
659 		err = gve_adminq_deconfigure_device_resources(priv);
660 		if (err) {
661 			dev_err(&priv->pdev->dev,
662 				"Could not deconfigure device resources: err=%d\n",
663 				err);
664 			gve_trigger_reset(priv);
665 		}
666 	}
667 
668 	kvfree(priv->ptype_lut_dqo);
669 	priv->ptype_lut_dqo = NULL;
670 
671 	gve_free_flow_rule_caches(priv);
672 	gve_free_counter_array(priv);
673 	gve_free_notify_blocks(priv);
674 	gve_free_stats_report(priv);
675 	gve_clear_device_resources_ok(priv);
676 }
677 
678 static int gve_unregister_qpl(struct gve_priv *priv,
679 			      struct gve_queue_page_list *qpl)
680 {
681 	int err;
682 
683 	if (!qpl)
684 		return 0;
685 
686 	err = gve_adminq_unregister_page_list(priv, qpl->id);
687 	if (err) {
688 		netif_err(priv, drv, priv->dev,
689 			  "Failed to unregister queue page list %d\n",
690 			  qpl->id);
691 		return err;
692 	}
693 
694 	priv->num_registered_pages -= qpl->num_entries;
695 	return 0;
696 }
697 
698 static int gve_register_qpl(struct gve_priv *priv,
699 			    struct gve_queue_page_list *qpl)
700 {
701 	int pages;
702 	int err;
703 
704 	if (!qpl)
705 		return 0;
706 
707 	pages = qpl->num_entries;
708 
709 	if (pages + priv->num_registered_pages > priv->max_registered_pages) {
710 		netif_err(priv, drv, priv->dev,
711 			  "Reached max number of registered pages %llu > %llu\n",
712 			  pages + priv->num_registered_pages,
713 			  priv->max_registered_pages);
714 		return -EINVAL;
715 	}
716 
717 	err = gve_adminq_register_page_list(priv, qpl);
718 	if (err) {
719 		netif_err(priv, drv, priv->dev,
720 			  "failed to register queue page list %d\n",
721 			  qpl->id);
722 		return err;
723 	}
724 
725 	priv->num_registered_pages += pages;
726 	return 0;
727 }
728 
729 static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx)
730 {
731 	struct gve_tx_ring *tx = &priv->tx[idx];
732 
733 	if (gve_is_gqi(priv))
734 		return tx->tx_fifo.qpl;
735 	else
736 		return tx->dqo.qpl;
737 }
738 
739 static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx)
740 {
741 	struct gve_rx_ring *rx = &priv->rx[idx];
742 
743 	if (gve_is_gqi(priv))
744 		return rx->data.qpl;
745 	else
746 		return rx->dqo.qpl;
747 }
748 
749 static int gve_register_xdp_qpls(struct gve_priv *priv)
750 {
751 	int start_id;
752 	int err;
753 	int i;
754 
755 	start_id = gve_xdp_tx_start_queue_id(priv);
756 	for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
757 		err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i));
758 		/* This failure will trigger a reset - no need to clean up */
759 		if (err)
760 			return err;
761 	}
762 	return 0;
763 }
764 
765 static int gve_register_qpls(struct gve_priv *priv)
766 {
767 	int num_tx_qpls, num_rx_qpls;
768 	int err;
769 	int i;
770 
771 	num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv),
772 				      gve_is_qpl(priv));
773 	num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
774 
775 	for (i = 0; i < num_tx_qpls; i++) {
776 		err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i));
777 		if (err)
778 			return err;
779 	}
780 
781 	for (i = 0; i < num_rx_qpls; i++) {
782 		err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i));
783 		if (err)
784 			return err;
785 	}
786 
787 	return 0;
788 }
789 
790 static int gve_unregister_xdp_qpls(struct gve_priv *priv)
791 {
792 	int start_id;
793 	int err;
794 	int i;
795 
796 	start_id = gve_xdp_tx_start_queue_id(priv);
797 	for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
798 		err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i));
799 		/* This failure will trigger a reset - no need to clean */
800 		if (err)
801 			return err;
802 	}
803 	return 0;
804 }
805 
806 static int gve_unregister_qpls(struct gve_priv *priv)
807 {
808 	int num_tx_qpls, num_rx_qpls;
809 	int err;
810 	int i;
811 
812 	num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv),
813 				      gve_is_qpl(priv));
814 	num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
815 
816 	for (i = 0; i < num_tx_qpls; i++) {
817 		err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i));
818 		/* This failure will trigger a reset - no need to clean */
819 		if (err)
820 			return err;
821 	}
822 
823 	for (i = 0; i < num_rx_qpls; i++) {
824 		err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i));
825 		/* This failure will trigger a reset - no need to clean */
826 		if (err)
827 			return err;
828 	}
829 	return 0;
830 }
831 
832 static int gve_create_xdp_rings(struct gve_priv *priv)
833 {
834 	int err;
835 
836 	err = gve_adminq_create_tx_queues(priv,
837 					  gve_xdp_tx_start_queue_id(priv),
838 					  priv->num_xdp_queues);
839 	if (err) {
840 		netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
841 			  priv->num_xdp_queues);
842 		/* This failure will trigger a reset - no need to clean
843 		 * up
844 		 */
845 		return err;
846 	}
847 	netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
848 		  priv->num_xdp_queues);
849 
850 	return 0;
851 }
852 
853 static int gve_create_rings(struct gve_priv *priv)
854 {
855 	int num_tx_queues = gve_num_tx_queues(priv);
856 	int err;
857 	int i;
858 
859 	err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues);
860 	if (err) {
861 		netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
862 			  num_tx_queues);
863 		/* This failure will trigger a reset - no need to clean
864 		 * up
865 		 */
866 		return err;
867 	}
868 	netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
869 		  num_tx_queues);
870 
871 	err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
872 	if (err) {
873 		netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
874 			  priv->rx_cfg.num_queues);
875 		/* This failure will trigger a reset - no need to clean
876 		 * up
877 		 */
878 		return err;
879 	}
880 	netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
881 		  priv->rx_cfg.num_queues);
882 
883 	if (gve_is_gqi(priv)) {
884 		/* Rx data ring has been prefilled with packet buffers at queue
885 		 * allocation time.
886 		 *
887 		 * Write the doorbell to provide descriptor slots and packet
888 		 * buffers to the NIC.
889 		 */
890 		for (i = 0; i < priv->rx_cfg.num_queues; i++)
891 			gve_rx_write_doorbell(priv, &priv->rx[i]);
892 	} else {
893 		for (i = 0; i < priv->rx_cfg.num_queues; i++) {
894 			/* Post buffers and ring doorbell. */
895 			gve_rx_post_buffers_dqo(&priv->rx[i]);
896 		}
897 	}
898 
899 	return 0;
900 }
901 
902 static void init_xdp_sync_stats(struct gve_priv *priv)
903 {
904 	int start_id = gve_xdp_tx_start_queue_id(priv);
905 	int i;
906 
907 	/* Init stats */
908 	for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
909 		int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
910 
911 		u64_stats_init(&priv->tx[i].statss);
912 		priv->tx[i].ntfy_id = ntfy_idx;
913 	}
914 }
915 
916 static void gve_init_sync_stats(struct gve_priv *priv)
917 {
918 	int i;
919 
920 	for (i = 0; i < priv->tx_cfg.num_queues; i++)
921 		u64_stats_init(&priv->tx[i].statss);
922 
923 	/* Init stats for XDP TX queues */
924 	init_xdp_sync_stats(priv);
925 
926 	for (i = 0; i < priv->rx_cfg.num_queues; i++)
927 		u64_stats_init(&priv->rx[i].statss);
928 }
929 
930 static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv,
931 				      struct gve_tx_alloc_rings_cfg *cfg)
932 {
933 	int num_xdp_queues = priv->xdp_prog ? priv->rx_cfg.num_queues : 0;
934 
935 	cfg->qcfg = &priv->tx_cfg;
936 	cfg->raw_addressing = !gve_is_qpl(priv);
937 	cfg->ring_size = priv->tx_desc_cnt;
938 	cfg->start_idx = 0;
939 	cfg->num_rings = priv->tx_cfg.num_queues + num_xdp_queues;
940 	cfg->tx = priv->tx;
941 }
942 
943 static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings)
944 {
945 	int i;
946 
947 	if (!priv->tx)
948 		return;
949 
950 	for (i = start_id; i < start_id + num_rings; i++) {
951 		if (gve_is_gqi(priv))
952 			gve_tx_stop_ring_gqi(priv, i);
953 		else
954 			gve_tx_stop_ring_dqo(priv, i);
955 	}
956 }
957 
958 static void gve_tx_start_rings(struct gve_priv *priv, int start_id,
959 			       int num_rings)
960 {
961 	int i;
962 
963 	for (i = start_id; i < start_id + num_rings; i++) {
964 		if (gve_is_gqi(priv))
965 			gve_tx_start_ring_gqi(priv, i);
966 		else
967 			gve_tx_start_ring_dqo(priv, i);
968 	}
969 }
970 
971 static int gve_alloc_xdp_rings(struct gve_priv *priv)
972 {
973 	struct gve_tx_alloc_rings_cfg cfg = {0};
974 	int err = 0;
975 
976 	if (!priv->num_xdp_queues)
977 		return 0;
978 
979 	gve_tx_get_curr_alloc_cfg(priv, &cfg);
980 	cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
981 	cfg.num_rings = priv->num_xdp_queues;
982 
983 	err = gve_tx_alloc_rings_gqi(priv, &cfg);
984 	if (err)
985 		return err;
986 
987 	gve_tx_start_rings(priv, cfg.start_idx, cfg.num_rings);
988 	init_xdp_sync_stats(priv);
989 
990 	return 0;
991 }
992 
993 static int gve_queues_mem_alloc(struct gve_priv *priv,
994 				struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
995 				struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
996 {
997 	int err;
998 
999 	if (gve_is_gqi(priv))
1000 		err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg);
1001 	else
1002 		err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg);
1003 	if (err)
1004 		return err;
1005 
1006 	if (gve_is_gqi(priv))
1007 		err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg);
1008 	else
1009 		err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg);
1010 	if (err)
1011 		goto free_tx;
1012 
1013 	return 0;
1014 
1015 free_tx:
1016 	if (gve_is_gqi(priv))
1017 		gve_tx_free_rings_gqi(priv, tx_alloc_cfg);
1018 	else
1019 		gve_tx_free_rings_dqo(priv, tx_alloc_cfg);
1020 	return err;
1021 }
1022 
1023 static int gve_destroy_xdp_rings(struct gve_priv *priv)
1024 {
1025 	int start_id;
1026 	int err;
1027 
1028 	start_id = gve_xdp_tx_start_queue_id(priv);
1029 	err = gve_adminq_destroy_tx_queues(priv,
1030 					   start_id,
1031 					   priv->num_xdp_queues);
1032 	if (err) {
1033 		netif_err(priv, drv, priv->dev,
1034 			  "failed to destroy XDP queues\n");
1035 		/* This failure will trigger a reset - no need to clean up */
1036 		return err;
1037 	}
1038 	netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
1039 
1040 	return 0;
1041 }
1042 
1043 static int gve_destroy_rings(struct gve_priv *priv)
1044 {
1045 	int num_tx_queues = gve_num_tx_queues(priv);
1046 	int err;
1047 
1048 	err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues);
1049 	if (err) {
1050 		netif_err(priv, drv, priv->dev,
1051 			  "failed to destroy tx queues\n");
1052 		/* This failure will trigger a reset - no need to clean up */
1053 		return err;
1054 	}
1055 	netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
1056 	err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
1057 	if (err) {
1058 		netif_err(priv, drv, priv->dev,
1059 			  "failed to destroy rx queues\n");
1060 		/* This failure will trigger a reset - no need to clean up */
1061 		return err;
1062 	}
1063 	netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
1064 	return 0;
1065 }
1066 
1067 static void gve_free_xdp_rings(struct gve_priv *priv)
1068 {
1069 	struct gve_tx_alloc_rings_cfg cfg = {0};
1070 
1071 	gve_tx_get_curr_alloc_cfg(priv, &cfg);
1072 	cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
1073 	cfg.num_rings = priv->num_xdp_queues;
1074 
1075 	if (priv->tx) {
1076 		gve_tx_stop_rings(priv, cfg.start_idx, cfg.num_rings);
1077 		gve_tx_free_rings_gqi(priv, &cfg);
1078 	}
1079 }
1080 
1081 static void gve_queues_mem_free(struct gve_priv *priv,
1082 				struct gve_tx_alloc_rings_cfg *tx_cfg,
1083 				struct gve_rx_alloc_rings_cfg *rx_cfg)
1084 {
1085 	if (gve_is_gqi(priv)) {
1086 		gve_tx_free_rings_gqi(priv, tx_cfg);
1087 		gve_rx_free_rings_gqi(priv, rx_cfg);
1088 	} else {
1089 		gve_tx_free_rings_dqo(priv, tx_cfg);
1090 		gve_rx_free_rings_dqo(priv, rx_cfg);
1091 	}
1092 }
1093 
1094 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
1095 		   struct page **page, dma_addr_t *dma,
1096 		   enum dma_data_direction dir, gfp_t gfp_flags)
1097 {
1098 	*page = alloc_page(gfp_flags);
1099 	if (!*page) {
1100 		priv->page_alloc_fail++;
1101 		return -ENOMEM;
1102 	}
1103 	*dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
1104 	if (dma_mapping_error(dev, *dma)) {
1105 		priv->dma_mapping_error++;
1106 		put_page(*page);
1107 		return -ENOMEM;
1108 	}
1109 	return 0;
1110 }
1111 
1112 struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv,
1113 						      u32 id, int pages)
1114 {
1115 	struct gve_queue_page_list *qpl;
1116 	int err;
1117 	int i;
1118 
1119 	qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL);
1120 	if (!qpl)
1121 		return NULL;
1122 
1123 	qpl->id = id;
1124 	qpl->num_entries = 0;
1125 	qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
1126 	if (!qpl->pages)
1127 		goto abort;
1128 
1129 	qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL);
1130 	if (!qpl->page_buses)
1131 		goto abort;
1132 
1133 	for (i = 0; i < pages; i++) {
1134 		err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
1135 				     &qpl->page_buses[i],
1136 				     gve_qpl_dma_dir(priv, id), GFP_KERNEL);
1137 		if (err)
1138 			goto abort;
1139 		qpl->num_entries++;
1140 	}
1141 
1142 	return qpl;
1143 
1144 abort:
1145 	gve_free_queue_page_list(priv, qpl, id);
1146 	return NULL;
1147 }
1148 
1149 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
1150 		   enum dma_data_direction dir)
1151 {
1152 	if (!dma_mapping_error(dev, dma))
1153 		dma_unmap_page(dev, dma, PAGE_SIZE, dir);
1154 	if (page)
1155 		put_page(page);
1156 }
1157 
1158 void gve_free_queue_page_list(struct gve_priv *priv,
1159 			      struct gve_queue_page_list *qpl,
1160 			      u32 id)
1161 {
1162 	int i;
1163 
1164 	if (!qpl)
1165 		return;
1166 	if (!qpl->pages)
1167 		goto free_qpl;
1168 	if (!qpl->page_buses)
1169 		goto free_pages;
1170 
1171 	for (i = 0; i < qpl->num_entries; i++)
1172 		gve_free_page(&priv->pdev->dev, qpl->pages[i],
1173 			      qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
1174 
1175 	kvfree(qpl->page_buses);
1176 	qpl->page_buses = NULL;
1177 free_pages:
1178 	kvfree(qpl->pages);
1179 	qpl->pages = NULL;
1180 free_qpl:
1181 	kvfree(qpl);
1182 }
1183 
1184 /* Use this to schedule a reset when the device is capable of continuing
1185  * to handle other requests in its current state. If it is not, do a reset
1186  * in thread instead.
1187  */
1188 void gve_schedule_reset(struct gve_priv *priv)
1189 {
1190 	gve_set_do_reset(priv);
1191 	queue_work(priv->gve_wq, &priv->service_task);
1192 }
1193 
1194 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
1195 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
1196 static void gve_turndown(struct gve_priv *priv);
1197 static void gve_turnup(struct gve_priv *priv);
1198 
1199 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
1200 {
1201 	struct napi_struct *napi;
1202 	struct gve_rx_ring *rx;
1203 	int err = 0;
1204 	int i, j;
1205 	u32 tx_qid;
1206 
1207 	if (!priv->num_xdp_queues)
1208 		return 0;
1209 
1210 	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1211 		rx = &priv->rx[i];
1212 		napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1213 
1214 		err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i,
1215 				       napi->napi_id);
1216 		if (err)
1217 			goto err;
1218 		err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
1219 						 MEM_TYPE_PAGE_SHARED, NULL);
1220 		if (err)
1221 			goto err;
1222 		rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
1223 		if (rx->xsk_pool) {
1224 			err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
1225 					       napi->napi_id);
1226 			if (err)
1227 				goto err;
1228 			err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1229 							 MEM_TYPE_XSK_BUFF_POOL, NULL);
1230 			if (err)
1231 				goto err;
1232 			xsk_pool_set_rxq_info(rx->xsk_pool,
1233 					      &rx->xsk_rxq);
1234 		}
1235 	}
1236 
1237 	for (i = 0; i < priv->num_xdp_queues; i++) {
1238 		tx_qid = gve_xdp_tx_queue_id(priv, i);
1239 		priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
1240 	}
1241 	return 0;
1242 
1243 err:
1244 	for (j = i; j >= 0; j--) {
1245 		rx = &priv->rx[j];
1246 		if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
1247 			xdp_rxq_info_unreg(&rx->xdp_rxq);
1248 		if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1249 			xdp_rxq_info_unreg(&rx->xsk_rxq);
1250 	}
1251 	return err;
1252 }
1253 
1254 static void gve_unreg_xdp_info(struct gve_priv *priv)
1255 {
1256 	int i, tx_qid;
1257 
1258 	if (!priv->num_xdp_queues)
1259 		return;
1260 
1261 	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1262 		struct gve_rx_ring *rx = &priv->rx[i];
1263 
1264 		xdp_rxq_info_unreg(&rx->xdp_rxq);
1265 		if (rx->xsk_pool) {
1266 			xdp_rxq_info_unreg(&rx->xsk_rxq);
1267 			rx->xsk_pool = NULL;
1268 		}
1269 	}
1270 
1271 	for (i = 0; i < priv->num_xdp_queues; i++) {
1272 		tx_qid = gve_xdp_tx_queue_id(priv, i);
1273 		priv->tx[tx_qid].xsk_pool = NULL;
1274 	}
1275 }
1276 
1277 static void gve_drain_page_cache(struct gve_priv *priv)
1278 {
1279 	int i;
1280 
1281 	for (i = 0; i < priv->rx_cfg.num_queues; i++)
1282 		page_frag_cache_drain(&priv->rx[i].page_cache);
1283 }
1284 
1285 static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv,
1286 				      struct gve_rx_alloc_rings_cfg *cfg)
1287 {
1288 	cfg->qcfg = &priv->rx_cfg;
1289 	cfg->qcfg_tx = &priv->tx_cfg;
1290 	cfg->raw_addressing = !gve_is_qpl(priv);
1291 	cfg->enable_header_split = priv->header_split_enabled;
1292 	cfg->ring_size = priv->rx_desc_cnt;
1293 	cfg->packet_buffer_size = gve_is_gqi(priv) ?
1294 				  GVE_DEFAULT_RX_BUFFER_SIZE :
1295 				  priv->data_buffer_size_dqo;
1296 	cfg->rx = priv->rx;
1297 }
1298 
1299 void gve_get_curr_alloc_cfgs(struct gve_priv *priv,
1300 			     struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1301 			     struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1302 {
1303 	gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg);
1304 	gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg);
1305 }
1306 
1307 static void gve_rx_start_ring(struct gve_priv *priv, int i)
1308 {
1309 	if (gve_is_gqi(priv))
1310 		gve_rx_start_ring_gqi(priv, i);
1311 	else
1312 		gve_rx_start_ring_dqo(priv, i);
1313 }
1314 
1315 static void gve_rx_start_rings(struct gve_priv *priv, int num_rings)
1316 {
1317 	int i;
1318 
1319 	for (i = 0; i < num_rings; i++)
1320 		gve_rx_start_ring(priv, i);
1321 }
1322 
1323 static void gve_rx_stop_ring(struct gve_priv *priv, int i)
1324 {
1325 	if (gve_is_gqi(priv))
1326 		gve_rx_stop_ring_gqi(priv, i);
1327 	else
1328 		gve_rx_stop_ring_dqo(priv, i);
1329 }
1330 
1331 static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings)
1332 {
1333 	int i;
1334 
1335 	if (!priv->rx)
1336 		return;
1337 
1338 	for (i = 0; i < num_rings; i++)
1339 		gve_rx_stop_ring(priv, i);
1340 }
1341 
1342 static void gve_queues_mem_remove(struct gve_priv *priv)
1343 {
1344 	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1345 	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1346 
1347 	gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1348 	gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1349 	priv->tx = NULL;
1350 	priv->rx = NULL;
1351 }
1352 
1353 /* The passed-in queue memory is stored into priv and the queues are made live.
1354  * No memory is allocated. Passed-in memory is freed on errors.
1355  */
1356 static int gve_queues_start(struct gve_priv *priv,
1357 			    struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1358 			    struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1359 {
1360 	struct net_device *dev = priv->dev;
1361 	int err;
1362 
1363 	/* Record new resources into priv */
1364 	priv->tx = tx_alloc_cfg->tx;
1365 	priv->rx = rx_alloc_cfg->rx;
1366 
1367 	/* Record new configs into priv */
1368 	priv->tx_cfg = *tx_alloc_cfg->qcfg;
1369 	priv->rx_cfg = *rx_alloc_cfg->qcfg;
1370 	priv->tx_desc_cnt = tx_alloc_cfg->ring_size;
1371 	priv->rx_desc_cnt = rx_alloc_cfg->ring_size;
1372 
1373 	if (priv->xdp_prog)
1374 		priv->num_xdp_queues = priv->rx_cfg.num_queues;
1375 	else
1376 		priv->num_xdp_queues = 0;
1377 
1378 	gve_tx_start_rings(priv, 0, tx_alloc_cfg->num_rings);
1379 	gve_rx_start_rings(priv, rx_alloc_cfg->qcfg->num_queues);
1380 	gve_init_sync_stats(priv);
1381 
1382 	err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
1383 	if (err)
1384 		goto stop_and_free_rings;
1385 	err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
1386 	if (err)
1387 		goto stop_and_free_rings;
1388 
1389 	err = gve_reg_xdp_info(priv, dev);
1390 	if (err)
1391 		goto stop_and_free_rings;
1392 
1393 	err = gve_register_qpls(priv);
1394 	if (err)
1395 		goto reset;
1396 
1397 	priv->header_split_enabled = rx_alloc_cfg->enable_header_split;
1398 	priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size;
1399 
1400 	err = gve_create_rings(priv);
1401 	if (err)
1402 		goto reset;
1403 
1404 	gve_set_device_rings_ok(priv);
1405 
1406 	if (gve_get_report_stats(priv))
1407 		mod_timer(&priv->stats_report_timer,
1408 			  round_jiffies(jiffies +
1409 				msecs_to_jiffies(priv->stats_report_timer_period)));
1410 
1411 	gve_turnup(priv);
1412 	queue_work(priv->gve_wq, &priv->service_task);
1413 	priv->interface_up_cnt++;
1414 	return 0;
1415 
1416 reset:
1417 	if (gve_get_reset_in_progress(priv))
1418 		goto stop_and_free_rings;
1419 	gve_reset_and_teardown(priv, true);
1420 	/* if this fails there is nothing we can do so just ignore the return */
1421 	gve_reset_recovery(priv, false);
1422 	/* return the original error */
1423 	return err;
1424 stop_and_free_rings:
1425 	gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv));
1426 	gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
1427 	gve_queues_mem_remove(priv);
1428 	return err;
1429 }
1430 
1431 static int gve_open(struct net_device *dev)
1432 {
1433 	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1434 	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1435 	struct gve_priv *priv = netdev_priv(dev);
1436 	int err;
1437 
1438 	gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1439 
1440 	err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1441 	if (err)
1442 		return err;
1443 
1444 	/* No need to free on error: ownership of resources is lost after
1445 	 * calling gve_queues_start.
1446 	 */
1447 	err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1448 	if (err)
1449 		return err;
1450 
1451 	return 0;
1452 }
1453 
1454 static int gve_queues_stop(struct gve_priv *priv)
1455 {
1456 	int err;
1457 
1458 	netif_carrier_off(priv->dev);
1459 	if (gve_get_device_rings_ok(priv)) {
1460 		gve_turndown(priv);
1461 		gve_drain_page_cache(priv);
1462 		err = gve_destroy_rings(priv);
1463 		if (err)
1464 			goto err;
1465 		err = gve_unregister_qpls(priv);
1466 		if (err)
1467 			goto err;
1468 		gve_clear_device_rings_ok(priv);
1469 	}
1470 	del_timer_sync(&priv->stats_report_timer);
1471 
1472 	gve_unreg_xdp_info(priv);
1473 
1474 	gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv));
1475 	gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
1476 
1477 	priv->interface_down_cnt++;
1478 	return 0;
1479 
1480 err:
1481 	/* This must have been called from a reset due to the rtnl lock
1482 	 * so just return at this point.
1483 	 */
1484 	if (gve_get_reset_in_progress(priv))
1485 		return err;
1486 	/* Otherwise reset before returning */
1487 	gve_reset_and_teardown(priv, true);
1488 	return gve_reset_recovery(priv, false);
1489 }
1490 
1491 static int gve_close(struct net_device *dev)
1492 {
1493 	struct gve_priv *priv = netdev_priv(dev);
1494 	int err;
1495 
1496 	err = gve_queues_stop(priv);
1497 	if (err)
1498 		return err;
1499 
1500 	gve_queues_mem_remove(priv);
1501 	return 0;
1502 }
1503 
1504 static int gve_remove_xdp_queues(struct gve_priv *priv)
1505 {
1506 	int err;
1507 
1508 	err = gve_destroy_xdp_rings(priv);
1509 	if (err)
1510 		return err;
1511 
1512 	err = gve_unregister_xdp_qpls(priv);
1513 	if (err)
1514 		return err;
1515 
1516 	gve_unreg_xdp_info(priv);
1517 	gve_free_xdp_rings(priv);
1518 
1519 	priv->num_xdp_queues = 0;
1520 	return 0;
1521 }
1522 
1523 static int gve_add_xdp_queues(struct gve_priv *priv)
1524 {
1525 	int err;
1526 
1527 	priv->num_xdp_queues = priv->rx_cfg.num_queues;
1528 
1529 	err = gve_alloc_xdp_rings(priv);
1530 	if (err)
1531 		goto err;
1532 
1533 	err = gve_reg_xdp_info(priv, priv->dev);
1534 	if (err)
1535 		goto free_xdp_rings;
1536 
1537 	err = gve_register_xdp_qpls(priv);
1538 	if (err)
1539 		goto free_xdp_rings;
1540 
1541 	err = gve_create_xdp_rings(priv);
1542 	if (err)
1543 		goto free_xdp_rings;
1544 
1545 	return 0;
1546 
1547 free_xdp_rings:
1548 	gve_free_xdp_rings(priv);
1549 err:
1550 	priv->num_xdp_queues = 0;
1551 	return err;
1552 }
1553 
1554 static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1555 {
1556 	if (!gve_get_napi_enabled(priv))
1557 		return;
1558 
1559 	if (link_status == netif_carrier_ok(priv->dev))
1560 		return;
1561 
1562 	if (link_status) {
1563 		netdev_info(priv->dev, "Device link is up.\n");
1564 		netif_carrier_on(priv->dev);
1565 	} else {
1566 		netdev_info(priv->dev, "Device link is down.\n");
1567 		netif_carrier_off(priv->dev);
1568 	}
1569 }
1570 
1571 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
1572 		       struct netlink_ext_ack *extack)
1573 {
1574 	struct bpf_prog *old_prog;
1575 	int err = 0;
1576 	u32 status;
1577 
1578 	old_prog = READ_ONCE(priv->xdp_prog);
1579 	if (!netif_running(priv->dev)) {
1580 		WRITE_ONCE(priv->xdp_prog, prog);
1581 		if (old_prog)
1582 			bpf_prog_put(old_prog);
1583 		return 0;
1584 	}
1585 
1586 	gve_turndown(priv);
1587 	if (!old_prog && prog) {
1588 		// Allocate XDP TX queues if an XDP program is
1589 		// being installed
1590 		err = gve_add_xdp_queues(priv);
1591 		if (err)
1592 			goto out;
1593 	} else if (old_prog && !prog) {
1594 		// Remove XDP TX queues if an XDP program is
1595 		// being uninstalled
1596 		err = gve_remove_xdp_queues(priv);
1597 		if (err)
1598 			goto out;
1599 	}
1600 	WRITE_ONCE(priv->xdp_prog, prog);
1601 	if (old_prog)
1602 		bpf_prog_put(old_prog);
1603 
1604 out:
1605 	gve_turnup(priv);
1606 	status = ioread32be(&priv->reg_bar0->device_status);
1607 	gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1608 	return err;
1609 }
1610 
1611 static int gve_xsk_pool_enable(struct net_device *dev,
1612 			       struct xsk_buff_pool *pool,
1613 			       u16 qid)
1614 {
1615 	struct gve_priv *priv = netdev_priv(dev);
1616 	struct napi_struct *napi;
1617 	struct gve_rx_ring *rx;
1618 	int tx_qid;
1619 	int err;
1620 
1621 	if (qid >= priv->rx_cfg.num_queues) {
1622 		dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
1623 		return -EINVAL;
1624 	}
1625 	if (xsk_pool_get_rx_frame_size(pool) <
1626 	     priv->dev->max_mtu + sizeof(struct ethhdr)) {
1627 		dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
1628 		return -EINVAL;
1629 	}
1630 
1631 	err = xsk_pool_dma_map(pool, &priv->pdev->dev,
1632 			       DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1633 	if (err)
1634 		return err;
1635 
1636 	/* If XDP prog is not installed or interface is down, return. */
1637 	if (!priv->xdp_prog || !netif_running(dev))
1638 		return 0;
1639 
1640 	rx = &priv->rx[qid];
1641 	napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1642 	err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
1643 	if (err)
1644 		goto err;
1645 
1646 	err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1647 					 MEM_TYPE_XSK_BUFF_POOL, NULL);
1648 	if (err)
1649 		goto err;
1650 
1651 	xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
1652 	rx->xsk_pool = pool;
1653 
1654 	tx_qid = gve_xdp_tx_queue_id(priv, qid);
1655 	priv->tx[tx_qid].xsk_pool = pool;
1656 
1657 	return 0;
1658 err:
1659 	if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1660 		xdp_rxq_info_unreg(&rx->xsk_rxq);
1661 
1662 	xsk_pool_dma_unmap(pool,
1663 			   DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1664 	return err;
1665 }
1666 
1667 static int gve_xsk_pool_disable(struct net_device *dev,
1668 				u16 qid)
1669 {
1670 	struct gve_priv *priv = netdev_priv(dev);
1671 	struct napi_struct *napi_rx;
1672 	struct napi_struct *napi_tx;
1673 	struct xsk_buff_pool *pool;
1674 	int tx_qid;
1675 
1676 	pool = xsk_get_pool_from_qid(dev, qid);
1677 	if (!pool)
1678 		return -EINVAL;
1679 	if (qid >= priv->rx_cfg.num_queues)
1680 		return -EINVAL;
1681 
1682 	/* If XDP prog is not installed or interface is down, unmap DMA and
1683 	 * return.
1684 	 */
1685 	if (!priv->xdp_prog || !netif_running(dev))
1686 		goto done;
1687 
1688 	napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
1689 	napi_disable(napi_rx); /* make sure current rx poll is done */
1690 
1691 	tx_qid = gve_xdp_tx_queue_id(priv, qid);
1692 	napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
1693 	napi_disable(napi_tx); /* make sure current tx poll is done */
1694 
1695 	priv->rx[qid].xsk_pool = NULL;
1696 	xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1697 	priv->tx[tx_qid].xsk_pool = NULL;
1698 	smp_mb(); /* Make sure it is visible to the workers on datapath */
1699 
1700 	napi_enable(napi_rx);
1701 	if (gve_rx_work_pending(&priv->rx[qid]))
1702 		napi_schedule(napi_rx);
1703 
1704 	napi_enable(napi_tx);
1705 	if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
1706 		napi_schedule(napi_tx);
1707 
1708 done:
1709 	xsk_pool_dma_unmap(pool,
1710 			   DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1711 	return 0;
1712 }
1713 
1714 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
1715 {
1716 	struct gve_priv *priv = netdev_priv(dev);
1717 	struct napi_struct *napi;
1718 
1719 	if (!gve_get_napi_enabled(priv))
1720 		return -ENETDOWN;
1721 
1722 	if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
1723 		return -EINVAL;
1724 
1725 	napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi;
1726 	if (!napi_if_scheduled_mark_missed(napi)) {
1727 		/* Call local_bh_enable to trigger SoftIRQ processing */
1728 		local_bh_disable();
1729 		napi_schedule(napi);
1730 		local_bh_enable();
1731 	}
1732 
1733 	return 0;
1734 }
1735 
1736 static int verify_xdp_configuration(struct net_device *dev)
1737 {
1738 	struct gve_priv *priv = netdev_priv(dev);
1739 
1740 	if (dev->features & NETIF_F_LRO) {
1741 		netdev_warn(dev, "XDP is not supported when LRO is on.\n");
1742 		return -EOPNOTSUPP;
1743 	}
1744 
1745 	if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
1746 		netdev_warn(dev, "XDP is not supported in mode %d.\n",
1747 			    priv->queue_format);
1748 		return -EOPNOTSUPP;
1749 	}
1750 
1751 	if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) {
1752 		netdev_warn(dev, "XDP is not supported for mtu %d.\n",
1753 			    dev->mtu);
1754 		return -EOPNOTSUPP;
1755 	}
1756 
1757 	if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues ||
1758 	    (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) {
1759 		netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d",
1760 			    priv->rx_cfg.num_queues,
1761 			    priv->tx_cfg.num_queues,
1762 			    priv->tx_cfg.max_queues);
1763 		return -EINVAL;
1764 	}
1765 	return 0;
1766 }
1767 
1768 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1769 {
1770 	struct gve_priv *priv = netdev_priv(dev);
1771 	int err;
1772 
1773 	err = verify_xdp_configuration(dev);
1774 	if (err)
1775 		return err;
1776 	switch (xdp->command) {
1777 	case XDP_SETUP_PROG:
1778 		return gve_set_xdp(priv, xdp->prog, xdp->extack);
1779 	case XDP_SETUP_XSK_POOL:
1780 		if (xdp->xsk.pool)
1781 			return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id);
1782 		else
1783 			return gve_xsk_pool_disable(dev, xdp->xsk.queue_id);
1784 	default:
1785 		return -EINVAL;
1786 	}
1787 }
1788 
1789 int gve_flow_rules_reset(struct gve_priv *priv)
1790 {
1791 	if (!priv->max_flow_rules)
1792 		return 0;
1793 
1794 	return gve_adminq_reset_flow_rules(priv);
1795 }
1796 
1797 int gve_adjust_config(struct gve_priv *priv,
1798 		      struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1799 		      struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1800 {
1801 	int err;
1802 
1803 	/* Allocate resources for the new confiugration */
1804 	err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg);
1805 	if (err) {
1806 		netif_err(priv, drv, priv->dev,
1807 			  "Adjust config failed to alloc new queues");
1808 		return err;
1809 	}
1810 
1811 	/* Teardown the device and free existing resources */
1812 	err = gve_close(priv->dev);
1813 	if (err) {
1814 		netif_err(priv, drv, priv->dev,
1815 			  "Adjust config failed to close old queues");
1816 		gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg);
1817 		return err;
1818 	}
1819 
1820 	/* Bring the device back up again with the new resources. */
1821 	err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg);
1822 	if (err) {
1823 		netif_err(priv, drv, priv->dev,
1824 			  "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n");
1825 		/* No need to free on error: ownership of resources is lost after
1826 		 * calling gve_queues_start.
1827 		 */
1828 		gve_turndown(priv);
1829 		return err;
1830 	}
1831 
1832 	return 0;
1833 }
1834 
1835 int gve_adjust_queues(struct gve_priv *priv,
1836 		      struct gve_queue_config new_rx_config,
1837 		      struct gve_queue_config new_tx_config)
1838 {
1839 	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1840 	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1841 	int num_xdp_queues;
1842 	int err;
1843 
1844 	gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1845 
1846 	/* Relay the new config from ethtool */
1847 	tx_alloc_cfg.qcfg = &new_tx_config;
1848 	rx_alloc_cfg.qcfg_tx = &new_tx_config;
1849 	rx_alloc_cfg.qcfg = &new_rx_config;
1850 	tx_alloc_cfg.num_rings = new_tx_config.num_queues;
1851 
1852 	/* Add dedicated XDP TX queues if enabled. */
1853 	num_xdp_queues = priv->xdp_prog ? new_rx_config.num_queues : 0;
1854 	tx_alloc_cfg.num_rings += num_xdp_queues;
1855 
1856 	if (netif_running(priv->dev)) {
1857 		err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1858 		return err;
1859 	}
1860 	/* Set the config for the next up. */
1861 	priv->tx_cfg = new_tx_config;
1862 	priv->rx_cfg = new_rx_config;
1863 
1864 	return 0;
1865 }
1866 
1867 static void gve_turndown(struct gve_priv *priv)
1868 {
1869 	int idx;
1870 
1871 	if (netif_carrier_ok(priv->dev))
1872 		netif_carrier_off(priv->dev);
1873 
1874 	if (!gve_get_napi_enabled(priv))
1875 		return;
1876 
1877 	/* Disable napi to prevent more work from coming in */
1878 	for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1879 		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1880 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1881 
1882 		if (!gve_tx_was_added_to_block(priv, idx))
1883 			continue;
1884 
1885 		if (idx < priv->tx_cfg.num_queues)
1886 			netif_queue_set_napi(priv->dev, idx,
1887 					     NETDEV_QUEUE_TYPE_TX, NULL);
1888 
1889 		napi_disable(&block->napi);
1890 	}
1891 	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1892 		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1893 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1894 
1895 		if (!gve_rx_was_added_to_block(priv, idx))
1896 			continue;
1897 
1898 		netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX,
1899 				     NULL);
1900 		napi_disable(&block->napi);
1901 	}
1902 
1903 	/* Stop tx queues */
1904 	netif_tx_disable(priv->dev);
1905 
1906 	gve_clear_napi_enabled(priv);
1907 	gve_clear_report_stats(priv);
1908 
1909 	/* Make sure that all traffic is finished processing. */
1910 	synchronize_net();
1911 }
1912 
1913 static void gve_turnup(struct gve_priv *priv)
1914 {
1915 	int idx;
1916 
1917 	/* Start the tx queues */
1918 	netif_tx_start_all_queues(priv->dev);
1919 
1920 	/* Enable napi and unmask interrupts for all queues */
1921 	for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1922 		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1923 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1924 
1925 		if (!gve_tx_was_added_to_block(priv, idx))
1926 			continue;
1927 
1928 		napi_enable(&block->napi);
1929 
1930 		if (idx < priv->tx_cfg.num_queues)
1931 			netif_queue_set_napi(priv->dev, idx,
1932 					     NETDEV_QUEUE_TYPE_TX,
1933 					     &block->napi);
1934 
1935 		if (gve_is_gqi(priv)) {
1936 			iowrite32be(0, gve_irq_doorbell(priv, block));
1937 		} else {
1938 			gve_set_itr_coalesce_usecs_dqo(priv, block,
1939 						       priv->tx_coalesce_usecs);
1940 		}
1941 
1942 		/* Any descs written by the NIC before this barrier will be
1943 		 * handled by the one-off napi schedule below. Whereas any
1944 		 * descs after the barrier will generate interrupts.
1945 		 */
1946 		mb();
1947 		napi_schedule(&block->napi);
1948 	}
1949 	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1950 		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1951 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1952 
1953 		if (!gve_rx_was_added_to_block(priv, idx))
1954 			continue;
1955 
1956 		napi_enable(&block->napi);
1957 		netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX,
1958 				     &block->napi);
1959 
1960 		if (gve_is_gqi(priv)) {
1961 			iowrite32be(0, gve_irq_doorbell(priv, block));
1962 		} else {
1963 			gve_set_itr_coalesce_usecs_dqo(priv, block,
1964 						       priv->rx_coalesce_usecs);
1965 		}
1966 
1967 		/* Any descs written by the NIC before this barrier will be
1968 		 * handled by the one-off napi schedule below. Whereas any
1969 		 * descs after the barrier will generate interrupts.
1970 		 */
1971 		mb();
1972 		napi_schedule(&block->napi);
1973 	}
1974 
1975 	gve_set_napi_enabled(priv);
1976 }
1977 
1978 static void gve_turnup_and_check_status(struct gve_priv *priv)
1979 {
1980 	u32 status;
1981 
1982 	gve_turnup(priv);
1983 	status = ioread32be(&priv->reg_bar0->device_status);
1984 	gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1985 }
1986 
1987 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
1988 {
1989 	struct gve_notify_block *block;
1990 	struct gve_tx_ring *tx = NULL;
1991 	struct gve_priv *priv;
1992 	u32 last_nic_done;
1993 	u32 current_time;
1994 	u32 ntfy_idx;
1995 
1996 	netdev_info(dev, "Timeout on tx queue, %d", txqueue);
1997 	priv = netdev_priv(dev);
1998 	if (txqueue > priv->tx_cfg.num_queues)
1999 		goto reset;
2000 
2001 	ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
2002 	if (ntfy_idx >= priv->num_ntfy_blks)
2003 		goto reset;
2004 
2005 	block = &priv->ntfy_blocks[ntfy_idx];
2006 	tx = block->tx;
2007 
2008 	current_time = jiffies_to_msecs(jiffies);
2009 	if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
2010 		goto reset;
2011 
2012 	/* Check to see if there are missed completions, which will allow us to
2013 	 * kick the queue.
2014 	 */
2015 	last_nic_done = gve_tx_load_event_counter(priv, tx);
2016 	if (last_nic_done - tx->done) {
2017 		netdev_info(dev, "Kicking queue %d", txqueue);
2018 		iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
2019 		napi_schedule(&block->napi);
2020 		tx->last_kick_msec = current_time;
2021 		goto out;
2022 	} // Else reset.
2023 
2024 reset:
2025 	gve_schedule_reset(priv);
2026 
2027 out:
2028 	if (tx)
2029 		tx->queue_timeout++;
2030 	priv->tx_timeo_cnt++;
2031 }
2032 
2033 u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit)
2034 {
2035 	if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE)
2036 		return GVE_MAX_RX_BUFFER_SIZE;
2037 	else
2038 		return GVE_DEFAULT_RX_BUFFER_SIZE;
2039 }
2040 
2041 /* header-split is not supported on non-DQO_RDA yet even if device advertises it */
2042 bool gve_header_split_supported(const struct gve_priv *priv)
2043 {
2044 	return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT;
2045 }
2046 
2047 int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split)
2048 {
2049 	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
2050 	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
2051 	bool enable_hdr_split;
2052 	int err = 0;
2053 
2054 	if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN)
2055 		return 0;
2056 
2057 	if (!gve_header_split_supported(priv)) {
2058 		dev_err(&priv->pdev->dev, "Header-split not supported\n");
2059 		return -EOPNOTSUPP;
2060 	}
2061 
2062 	if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED)
2063 		enable_hdr_split = true;
2064 	else
2065 		enable_hdr_split = false;
2066 
2067 	if (enable_hdr_split == priv->header_split_enabled)
2068 		return 0;
2069 
2070 	gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
2071 
2072 	rx_alloc_cfg.enable_header_split = enable_hdr_split;
2073 	rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split);
2074 
2075 	if (netif_running(priv->dev))
2076 		err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
2077 	return err;
2078 }
2079 
2080 static int gve_set_features(struct net_device *netdev,
2081 			    netdev_features_t features)
2082 {
2083 	const netdev_features_t orig_features = netdev->features;
2084 	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
2085 	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
2086 	struct gve_priv *priv = netdev_priv(netdev);
2087 	int err;
2088 
2089 	gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
2090 
2091 	if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
2092 		netdev->features ^= NETIF_F_LRO;
2093 		if (netif_running(netdev)) {
2094 			err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
2095 			if (err)
2096 				goto revert_features;
2097 		}
2098 	}
2099 	if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) {
2100 		err = gve_flow_rules_reset(priv);
2101 		if (err)
2102 			goto revert_features;
2103 	}
2104 
2105 	return 0;
2106 
2107 revert_features:
2108 	netdev->features = orig_features;
2109 	return err;
2110 }
2111 
2112 static const struct net_device_ops gve_netdev_ops = {
2113 	.ndo_start_xmit		=	gve_start_xmit,
2114 	.ndo_features_check	=	gve_features_check,
2115 	.ndo_open		=	gve_open,
2116 	.ndo_stop		=	gve_close,
2117 	.ndo_get_stats64	=	gve_get_stats,
2118 	.ndo_tx_timeout         =       gve_tx_timeout,
2119 	.ndo_set_features	=	gve_set_features,
2120 	.ndo_bpf		=	gve_xdp,
2121 	.ndo_xdp_xmit		=	gve_xdp_xmit,
2122 	.ndo_xsk_wakeup		=	gve_xsk_wakeup,
2123 };
2124 
2125 static void gve_handle_status(struct gve_priv *priv, u32 status)
2126 {
2127 	if (GVE_DEVICE_STATUS_RESET_MASK & status) {
2128 		dev_info(&priv->pdev->dev, "Device requested reset.\n");
2129 		gve_set_do_reset(priv);
2130 	}
2131 	if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
2132 		priv->stats_report_trigger_cnt++;
2133 		gve_set_do_report_stats(priv);
2134 	}
2135 }
2136 
2137 static void gve_handle_reset(struct gve_priv *priv)
2138 {
2139 	/* A service task will be scheduled at the end of probe to catch any
2140 	 * resets that need to happen, and we don't want to reset until
2141 	 * probe is done.
2142 	 */
2143 	if (gve_get_probe_in_progress(priv))
2144 		return;
2145 
2146 	if (gve_get_do_reset(priv)) {
2147 		rtnl_lock();
2148 		gve_reset(priv, false);
2149 		rtnl_unlock();
2150 	}
2151 }
2152 
2153 void gve_handle_report_stats(struct gve_priv *priv)
2154 {
2155 	struct stats *stats = priv->stats_report->stats;
2156 	int idx, stats_idx = 0;
2157 	unsigned int start = 0;
2158 	u64 tx_bytes;
2159 
2160 	if (!gve_get_report_stats(priv))
2161 		return;
2162 
2163 	be64_add_cpu(&priv->stats_report->written_count, 1);
2164 	/* tx stats */
2165 	if (priv->tx) {
2166 		for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
2167 			u32 last_completion = 0;
2168 			u32 tx_frames = 0;
2169 
2170 			/* DQO doesn't currently support these metrics. */
2171 			if (gve_is_gqi(priv)) {
2172 				last_completion = priv->tx[idx].done;
2173 				tx_frames = priv->tx[idx].req;
2174 			}
2175 
2176 			do {
2177 				start = u64_stats_fetch_begin(&priv->tx[idx].statss);
2178 				tx_bytes = priv->tx[idx].bytes_done;
2179 			} while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
2180 			stats[stats_idx++] = (struct stats) {
2181 				.stat_name = cpu_to_be32(TX_WAKE_CNT),
2182 				.value = cpu_to_be64(priv->tx[idx].wake_queue),
2183 				.queue_id = cpu_to_be32(idx),
2184 			};
2185 			stats[stats_idx++] = (struct stats) {
2186 				.stat_name = cpu_to_be32(TX_STOP_CNT),
2187 				.value = cpu_to_be64(priv->tx[idx].stop_queue),
2188 				.queue_id = cpu_to_be32(idx),
2189 			};
2190 			stats[stats_idx++] = (struct stats) {
2191 				.stat_name = cpu_to_be32(TX_FRAMES_SENT),
2192 				.value = cpu_to_be64(tx_frames),
2193 				.queue_id = cpu_to_be32(idx),
2194 			};
2195 			stats[stats_idx++] = (struct stats) {
2196 				.stat_name = cpu_to_be32(TX_BYTES_SENT),
2197 				.value = cpu_to_be64(tx_bytes),
2198 				.queue_id = cpu_to_be32(idx),
2199 			};
2200 			stats[stats_idx++] = (struct stats) {
2201 				.stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
2202 				.value = cpu_to_be64(last_completion),
2203 				.queue_id = cpu_to_be32(idx),
2204 			};
2205 			stats[stats_idx++] = (struct stats) {
2206 				.stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
2207 				.value = cpu_to_be64(priv->tx[idx].queue_timeout),
2208 				.queue_id = cpu_to_be32(idx),
2209 			};
2210 		}
2211 	}
2212 	/* rx stats */
2213 	if (priv->rx) {
2214 		for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
2215 			stats[stats_idx++] = (struct stats) {
2216 				.stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
2217 				.value = cpu_to_be64(priv->rx[idx].desc.seqno),
2218 				.queue_id = cpu_to_be32(idx),
2219 			};
2220 			stats[stats_idx++] = (struct stats) {
2221 				.stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
2222 				.value = cpu_to_be64(priv->rx[0].fill_cnt),
2223 				.queue_id = cpu_to_be32(idx),
2224 			};
2225 		}
2226 	}
2227 }
2228 
2229 /* Handle NIC status register changes, reset requests and report stats */
2230 static void gve_service_task(struct work_struct *work)
2231 {
2232 	struct gve_priv *priv = container_of(work, struct gve_priv,
2233 					     service_task);
2234 	u32 status = ioread32be(&priv->reg_bar0->device_status);
2235 
2236 	gve_handle_status(priv, status);
2237 
2238 	gve_handle_reset(priv);
2239 	gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
2240 }
2241 
2242 static void gve_set_netdev_xdp_features(struct gve_priv *priv)
2243 {
2244 	xdp_features_t xdp_features;
2245 
2246 	if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
2247 		xdp_features = NETDEV_XDP_ACT_BASIC;
2248 		xdp_features |= NETDEV_XDP_ACT_REDIRECT;
2249 		xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
2250 		xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
2251 	} else {
2252 		xdp_features = 0;
2253 	}
2254 
2255 	xdp_set_features_flag(priv->dev, xdp_features);
2256 }
2257 
2258 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
2259 {
2260 	int num_ntfy;
2261 	int err;
2262 
2263 	/* Set up the adminq */
2264 	err = gve_adminq_alloc(&priv->pdev->dev, priv);
2265 	if (err) {
2266 		dev_err(&priv->pdev->dev,
2267 			"Failed to alloc admin queue: err=%d\n", err);
2268 		return err;
2269 	}
2270 
2271 	err = gve_verify_driver_compatibility(priv);
2272 	if (err) {
2273 		dev_err(&priv->pdev->dev,
2274 			"Could not verify driver compatibility: err=%d\n", err);
2275 		goto err;
2276 	}
2277 
2278 	priv->num_registered_pages = 0;
2279 
2280 	if (skip_describe_device)
2281 		goto setup_device;
2282 
2283 	priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
2284 	/* Get the initial information we need from the device */
2285 	err = gve_adminq_describe_device(priv);
2286 	if (err) {
2287 		dev_err(&priv->pdev->dev,
2288 			"Could not get device information: err=%d\n", err);
2289 		goto err;
2290 	}
2291 	priv->dev->mtu = priv->dev->max_mtu;
2292 	num_ntfy = pci_msix_vec_count(priv->pdev);
2293 	if (num_ntfy <= 0) {
2294 		dev_err(&priv->pdev->dev,
2295 			"could not count MSI-x vectors: err=%d\n", num_ntfy);
2296 		err = num_ntfy;
2297 		goto err;
2298 	} else if (num_ntfy < GVE_MIN_MSIX) {
2299 		dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
2300 			GVE_MIN_MSIX, num_ntfy);
2301 		err = -EINVAL;
2302 		goto err;
2303 	}
2304 
2305 	/* Big TCP is only supported on DQ*/
2306 	if (!gve_is_gqi(priv))
2307 		netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
2308 
2309 	priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
2310 	/* gvnic has one Notification Block per MSI-x vector, except for the
2311 	 * management vector
2312 	 */
2313 	priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
2314 	priv->mgmt_msix_idx = priv->num_ntfy_blks;
2315 
2316 	priv->tx_cfg.max_queues =
2317 		min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
2318 	priv->rx_cfg.max_queues =
2319 		min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
2320 
2321 	priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
2322 	priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
2323 	if (priv->default_num_queues > 0) {
2324 		priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
2325 						priv->tx_cfg.num_queues);
2326 		priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
2327 						priv->rx_cfg.num_queues);
2328 	}
2329 
2330 	dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
2331 		 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
2332 	dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
2333 		 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
2334 
2335 	if (!gve_is_gqi(priv)) {
2336 		priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
2337 		priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
2338 	}
2339 
2340 setup_device:
2341 	gve_set_netdev_xdp_features(priv);
2342 	err = gve_setup_device_resources(priv);
2343 	if (!err)
2344 		return 0;
2345 err:
2346 	gve_adminq_free(&priv->pdev->dev, priv);
2347 	return err;
2348 }
2349 
2350 static void gve_teardown_priv_resources(struct gve_priv *priv)
2351 {
2352 	gve_teardown_device_resources(priv);
2353 	gve_adminq_free(&priv->pdev->dev, priv);
2354 }
2355 
2356 static void gve_trigger_reset(struct gve_priv *priv)
2357 {
2358 	/* Reset the device by releasing the AQ */
2359 	gve_adminq_release(priv);
2360 }
2361 
2362 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
2363 {
2364 	gve_trigger_reset(priv);
2365 	/* With the reset having already happened, close cannot fail */
2366 	if (was_up)
2367 		gve_close(priv->dev);
2368 	gve_teardown_priv_resources(priv);
2369 }
2370 
2371 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
2372 {
2373 	int err;
2374 
2375 	err = gve_init_priv(priv, true);
2376 	if (err)
2377 		goto err;
2378 	if (was_up) {
2379 		err = gve_open(priv->dev);
2380 		if (err)
2381 			goto err;
2382 	}
2383 	return 0;
2384 err:
2385 	dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
2386 	gve_turndown(priv);
2387 	return err;
2388 }
2389 
2390 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
2391 {
2392 	bool was_up = netif_running(priv->dev);
2393 	int err;
2394 
2395 	dev_info(&priv->pdev->dev, "Performing reset\n");
2396 	gve_clear_do_reset(priv);
2397 	gve_set_reset_in_progress(priv);
2398 	/* If we aren't attempting to teardown normally, just go turndown and
2399 	 * reset right away.
2400 	 */
2401 	if (!attempt_teardown) {
2402 		gve_turndown(priv);
2403 		gve_reset_and_teardown(priv, was_up);
2404 	} else {
2405 		/* Otherwise attempt to close normally */
2406 		if (was_up) {
2407 			err = gve_close(priv->dev);
2408 			/* If that fails reset as we did above */
2409 			if (err)
2410 				gve_reset_and_teardown(priv, was_up);
2411 		}
2412 		/* Clean up any remaining resources */
2413 		gve_teardown_priv_resources(priv);
2414 	}
2415 
2416 	/* Set it all back up */
2417 	err = gve_reset_recovery(priv, was_up);
2418 	gve_clear_reset_in_progress(priv);
2419 	priv->reset_cnt++;
2420 	priv->interface_up_cnt = 0;
2421 	priv->interface_down_cnt = 0;
2422 	priv->stats_report_trigger_cnt = 0;
2423 	return err;
2424 }
2425 
2426 static void gve_write_version(u8 __iomem *driver_version_register)
2427 {
2428 	const char *c = gve_version_prefix;
2429 
2430 	while (*c) {
2431 		writeb(*c, driver_version_register);
2432 		c++;
2433 	}
2434 
2435 	c = gve_version_str;
2436 	while (*c) {
2437 		writeb(*c, driver_version_register);
2438 		c++;
2439 	}
2440 	writeb('\n', driver_version_register);
2441 }
2442 
2443 static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx)
2444 {
2445 	struct gve_priv *priv = netdev_priv(dev);
2446 	struct gve_rx_ring *gve_per_q_mem;
2447 	int err;
2448 
2449 	if (!priv->rx)
2450 		return -EAGAIN;
2451 
2452 	/* Destroying queue 0 while other queues exist is not supported in DQO */
2453 	if (!gve_is_gqi(priv) && idx == 0)
2454 		return -ERANGE;
2455 
2456 	/* Single-queue destruction requires quiescence on all queues */
2457 	gve_turndown(priv);
2458 
2459 	/* This failure will trigger a reset - no need to clean up */
2460 	err = gve_adminq_destroy_single_rx_queue(priv, idx);
2461 	if (err)
2462 		return err;
2463 
2464 	if (gve_is_qpl(priv)) {
2465 		/* This failure will trigger a reset - no need to clean up */
2466 		err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx));
2467 		if (err)
2468 			return err;
2469 	}
2470 
2471 	gve_rx_stop_ring(priv, idx);
2472 
2473 	/* Turn the unstopped queues back up */
2474 	gve_turnup_and_check_status(priv);
2475 
2476 	gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2477 	*gve_per_q_mem = priv->rx[idx];
2478 	memset(&priv->rx[idx], 0, sizeof(priv->rx[idx]));
2479 	return 0;
2480 }
2481 
2482 static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem)
2483 {
2484 	struct gve_priv *priv = netdev_priv(dev);
2485 	struct gve_rx_alloc_rings_cfg cfg = {0};
2486 	struct gve_rx_ring *gve_per_q_mem;
2487 
2488 	gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2489 	gve_rx_get_curr_alloc_cfg(priv, &cfg);
2490 
2491 	if (gve_is_gqi(priv))
2492 		gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg);
2493 	else
2494 		gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg);
2495 }
2496 
2497 static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem,
2498 				  int idx)
2499 {
2500 	struct gve_priv *priv = netdev_priv(dev);
2501 	struct gve_rx_alloc_rings_cfg cfg = {0};
2502 	struct gve_rx_ring *gve_per_q_mem;
2503 	int err;
2504 
2505 	if (!priv->rx)
2506 		return -EAGAIN;
2507 
2508 	gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2509 	gve_rx_get_curr_alloc_cfg(priv, &cfg);
2510 
2511 	if (gve_is_gqi(priv))
2512 		err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx);
2513 	else
2514 		err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx);
2515 
2516 	return err;
2517 }
2518 
2519 static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx)
2520 {
2521 	struct gve_priv *priv = netdev_priv(dev);
2522 	struct gve_rx_ring *gve_per_q_mem;
2523 	int err;
2524 
2525 	if (!priv->rx)
2526 		return -EAGAIN;
2527 
2528 	gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2529 	priv->rx[idx] = *gve_per_q_mem;
2530 
2531 	/* Single-queue creation requires quiescence on all queues */
2532 	gve_turndown(priv);
2533 
2534 	gve_rx_start_ring(priv, idx);
2535 
2536 	if (gve_is_qpl(priv)) {
2537 		/* This failure will trigger a reset - no need to clean up */
2538 		err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx));
2539 		if (err)
2540 			goto abort;
2541 	}
2542 
2543 	/* This failure will trigger a reset - no need to clean up */
2544 	err = gve_adminq_create_single_rx_queue(priv, idx);
2545 	if (err)
2546 		goto abort;
2547 
2548 	if (gve_is_gqi(priv))
2549 		gve_rx_write_doorbell(priv, &priv->rx[idx]);
2550 	else
2551 		gve_rx_post_buffers_dqo(&priv->rx[idx]);
2552 
2553 	/* Turn the unstopped queues back up */
2554 	gve_turnup_and_check_status(priv);
2555 	return 0;
2556 
2557 abort:
2558 	gve_rx_stop_ring(priv, idx);
2559 
2560 	/* All failures in this func result in a reset, by clearing the struct
2561 	 * at idx, we prevent a double free when that reset runs. The reset,
2562 	 * which needs the rtnl lock, will not run till this func returns and
2563 	 * its caller gives up the lock.
2564 	 */
2565 	memset(&priv->rx[idx], 0, sizeof(priv->rx[idx]));
2566 	return err;
2567 }
2568 
2569 static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = {
2570 	.ndo_queue_mem_size	=	sizeof(struct gve_rx_ring),
2571 	.ndo_queue_mem_alloc	=	gve_rx_queue_mem_alloc,
2572 	.ndo_queue_mem_free	=	gve_rx_queue_mem_free,
2573 	.ndo_queue_start	=	gve_rx_queue_start,
2574 	.ndo_queue_stop		=	gve_rx_queue_stop,
2575 };
2576 
2577 static void gve_get_rx_queue_stats(struct net_device *dev, int idx,
2578 				   struct netdev_queue_stats_rx *rx_stats)
2579 {
2580 	struct gve_priv *priv = netdev_priv(dev);
2581 	struct gve_rx_ring *rx = &priv->rx[idx];
2582 	unsigned int start;
2583 
2584 	do {
2585 		start = u64_stats_fetch_begin(&rx->statss);
2586 		rx_stats->packets = rx->rpackets;
2587 		rx_stats->bytes = rx->rbytes;
2588 		rx_stats->alloc_fail = rx->rx_skb_alloc_fail +
2589 				       rx->rx_buf_alloc_fail;
2590 	} while (u64_stats_fetch_retry(&rx->statss, start));
2591 }
2592 
2593 static void gve_get_tx_queue_stats(struct net_device *dev, int idx,
2594 				   struct netdev_queue_stats_tx *tx_stats)
2595 {
2596 	struct gve_priv *priv = netdev_priv(dev);
2597 	struct gve_tx_ring *tx = &priv->tx[idx];
2598 	unsigned int start;
2599 
2600 	do {
2601 		start = u64_stats_fetch_begin(&tx->statss);
2602 		tx_stats->packets = tx->pkt_done;
2603 		tx_stats->bytes = tx->bytes_done;
2604 	} while (u64_stats_fetch_retry(&tx->statss, start));
2605 }
2606 
2607 static void gve_get_base_stats(struct net_device *dev,
2608 			       struct netdev_queue_stats_rx *rx,
2609 			       struct netdev_queue_stats_tx *tx)
2610 {
2611 	rx->packets = 0;
2612 	rx->bytes = 0;
2613 	rx->alloc_fail = 0;
2614 
2615 	tx->packets = 0;
2616 	tx->bytes = 0;
2617 }
2618 
2619 static const struct netdev_stat_ops gve_stat_ops = {
2620 	.get_queue_stats_rx	= gve_get_rx_queue_stats,
2621 	.get_queue_stats_tx	= gve_get_tx_queue_stats,
2622 	.get_base_stats		= gve_get_base_stats,
2623 };
2624 
2625 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2626 {
2627 	int max_tx_queues, max_rx_queues;
2628 	struct net_device *dev;
2629 	__be32 __iomem *db_bar;
2630 	struct gve_registers __iomem *reg_bar;
2631 	struct gve_priv *priv;
2632 	int err;
2633 
2634 	err = pci_enable_device(pdev);
2635 	if (err)
2636 		return err;
2637 
2638 	err = pci_request_regions(pdev, gve_driver_name);
2639 	if (err)
2640 		goto abort_with_enabled;
2641 
2642 	pci_set_master(pdev);
2643 
2644 	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2645 	if (err) {
2646 		dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
2647 		goto abort_with_pci_region;
2648 	}
2649 
2650 	reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
2651 	if (!reg_bar) {
2652 		dev_err(&pdev->dev, "Failed to map pci bar!\n");
2653 		err = -ENOMEM;
2654 		goto abort_with_pci_region;
2655 	}
2656 
2657 	db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
2658 	if (!db_bar) {
2659 		dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
2660 		err = -ENOMEM;
2661 		goto abort_with_reg_bar;
2662 	}
2663 
2664 	gve_write_version(&reg_bar->driver_version);
2665 	/* Get max queues to alloc etherdev */
2666 	max_tx_queues = ioread32be(&reg_bar->max_tx_queues);
2667 	max_rx_queues = ioread32be(&reg_bar->max_rx_queues);
2668 	/* Alloc and setup the netdev and priv */
2669 	dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
2670 	if (!dev) {
2671 		dev_err(&pdev->dev, "could not allocate netdev\n");
2672 		err = -ENOMEM;
2673 		goto abort_with_db_bar;
2674 	}
2675 	SET_NETDEV_DEV(dev, &pdev->dev);
2676 	pci_set_drvdata(pdev, dev);
2677 	dev->ethtool_ops = &gve_ethtool_ops;
2678 	dev->netdev_ops = &gve_netdev_ops;
2679 	dev->queue_mgmt_ops = &gve_queue_mgmt_ops;
2680 	dev->stat_ops = &gve_stat_ops;
2681 
2682 	/* Set default and supported features.
2683 	 *
2684 	 * Features might be set in other locations as well (such as
2685 	 * `gve_adminq_describe_device`).
2686 	 */
2687 	dev->hw_features = NETIF_F_HIGHDMA;
2688 	dev->hw_features |= NETIF_F_SG;
2689 	dev->hw_features |= NETIF_F_HW_CSUM;
2690 	dev->hw_features |= NETIF_F_TSO;
2691 	dev->hw_features |= NETIF_F_TSO6;
2692 	dev->hw_features |= NETIF_F_TSO_ECN;
2693 	dev->hw_features |= NETIF_F_RXCSUM;
2694 	dev->hw_features |= NETIF_F_RXHASH;
2695 	dev->features = dev->hw_features;
2696 	dev->watchdog_timeo = 5 * HZ;
2697 	dev->min_mtu = ETH_MIN_MTU;
2698 	netif_carrier_off(dev);
2699 
2700 	priv = netdev_priv(dev);
2701 	priv->dev = dev;
2702 	priv->pdev = pdev;
2703 	priv->msg_enable = DEFAULT_MSG_LEVEL;
2704 	priv->reg_bar0 = reg_bar;
2705 	priv->db_bar2 = db_bar;
2706 	priv->service_task_flags = 0x0;
2707 	priv->state_flags = 0x0;
2708 	priv->ethtool_flags = 0x0;
2709 	priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE;
2710 	priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
2711 
2712 	gve_set_probe_in_progress(priv);
2713 	priv->gve_wq = alloc_ordered_workqueue("gve", 0);
2714 	if (!priv->gve_wq) {
2715 		dev_err(&pdev->dev, "Could not allocate workqueue");
2716 		err = -ENOMEM;
2717 		goto abort_with_netdev;
2718 	}
2719 	INIT_WORK(&priv->service_task, gve_service_task);
2720 	INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
2721 	priv->tx_cfg.max_queues = max_tx_queues;
2722 	priv->rx_cfg.max_queues = max_rx_queues;
2723 
2724 	err = gve_init_priv(priv, false);
2725 	if (err)
2726 		goto abort_with_wq;
2727 
2728 	err = register_netdev(dev);
2729 	if (err)
2730 		goto abort_with_gve_init;
2731 
2732 	dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
2733 	dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
2734 	gve_clear_probe_in_progress(priv);
2735 	queue_work(priv->gve_wq, &priv->service_task);
2736 	return 0;
2737 
2738 abort_with_gve_init:
2739 	gve_teardown_priv_resources(priv);
2740 
2741 abort_with_wq:
2742 	destroy_workqueue(priv->gve_wq);
2743 
2744 abort_with_netdev:
2745 	free_netdev(dev);
2746 
2747 abort_with_db_bar:
2748 	pci_iounmap(pdev, db_bar);
2749 
2750 abort_with_reg_bar:
2751 	pci_iounmap(pdev, reg_bar);
2752 
2753 abort_with_pci_region:
2754 	pci_release_regions(pdev);
2755 
2756 abort_with_enabled:
2757 	pci_disable_device(pdev);
2758 	return err;
2759 }
2760 
2761 static void gve_remove(struct pci_dev *pdev)
2762 {
2763 	struct net_device *netdev = pci_get_drvdata(pdev);
2764 	struct gve_priv *priv = netdev_priv(netdev);
2765 	__be32 __iomem *db_bar = priv->db_bar2;
2766 	void __iomem *reg_bar = priv->reg_bar0;
2767 
2768 	unregister_netdev(netdev);
2769 	gve_teardown_priv_resources(priv);
2770 	destroy_workqueue(priv->gve_wq);
2771 	free_netdev(netdev);
2772 	pci_iounmap(pdev, db_bar);
2773 	pci_iounmap(pdev, reg_bar);
2774 	pci_release_regions(pdev);
2775 	pci_disable_device(pdev);
2776 }
2777 
2778 static void gve_shutdown(struct pci_dev *pdev)
2779 {
2780 	struct net_device *netdev = pci_get_drvdata(pdev);
2781 	struct gve_priv *priv = netdev_priv(netdev);
2782 	bool was_up = netif_running(priv->dev);
2783 
2784 	rtnl_lock();
2785 	if (was_up && gve_close(priv->dev)) {
2786 		/* If the dev was up, attempt to close, if close fails, reset */
2787 		gve_reset_and_teardown(priv, was_up);
2788 	} else {
2789 		/* If the dev wasn't up or close worked, finish tearing down */
2790 		gve_teardown_priv_resources(priv);
2791 	}
2792 	rtnl_unlock();
2793 }
2794 
2795 #ifdef CONFIG_PM
2796 static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
2797 {
2798 	struct net_device *netdev = pci_get_drvdata(pdev);
2799 	struct gve_priv *priv = netdev_priv(netdev);
2800 	bool was_up = netif_running(priv->dev);
2801 
2802 	priv->suspend_cnt++;
2803 	rtnl_lock();
2804 	if (was_up && gve_close(priv->dev)) {
2805 		/* If the dev was up, attempt to close, if close fails, reset */
2806 		gve_reset_and_teardown(priv, was_up);
2807 	} else {
2808 		/* If the dev wasn't up or close worked, finish tearing down */
2809 		gve_teardown_priv_resources(priv);
2810 	}
2811 	priv->up_before_suspend = was_up;
2812 	rtnl_unlock();
2813 	return 0;
2814 }
2815 
2816 static int gve_resume(struct pci_dev *pdev)
2817 {
2818 	struct net_device *netdev = pci_get_drvdata(pdev);
2819 	struct gve_priv *priv = netdev_priv(netdev);
2820 	int err;
2821 
2822 	priv->resume_cnt++;
2823 	rtnl_lock();
2824 	err = gve_reset_recovery(priv, priv->up_before_suspend);
2825 	rtnl_unlock();
2826 	return err;
2827 }
2828 #endif /* CONFIG_PM */
2829 
2830 static const struct pci_device_id gve_id_table[] = {
2831 	{ PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
2832 	{ }
2833 };
2834 
2835 static struct pci_driver gve_driver = {
2836 	.name		= gve_driver_name,
2837 	.id_table	= gve_id_table,
2838 	.probe		= gve_probe,
2839 	.remove		= gve_remove,
2840 	.shutdown	= gve_shutdown,
2841 #ifdef CONFIG_PM
2842 	.suspend        = gve_suspend,
2843 	.resume         = gve_resume,
2844 #endif
2845 };
2846 
2847 module_pci_driver(gve_driver);
2848 
2849 MODULE_DEVICE_TABLE(pci, gve_id_table);
2850 MODULE_AUTHOR("Google, Inc.");
2851 MODULE_DESCRIPTION("Google Virtual NIC Driver");
2852 MODULE_LICENSE("Dual MIT/GPL");
2853 MODULE_VERSION(GVE_VERSION);
2854