xref: /linux/drivers/net/ethernet/google/gve/gve_main.c (revision 673f816b9e1e92d1f70e1bf5f21b531e0ff9ad6c)
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2021 Google, Inc.
5  */
6 
7 #include <linux/bpf.h>
8 #include <linux/cpumask.h>
9 #include <linux/etherdevice.h>
10 #include <linux/filter.h>
11 #include <linux/interrupt.h>
12 #include <linux/irq.h>
13 #include <linux/module.h>
14 #include <linux/pci.h>
15 #include <linux/sched.h>
16 #include <linux/timer.h>
17 #include <linux/workqueue.h>
18 #include <linux/utsname.h>
19 #include <linux/version.h>
20 #include <net/netdev_queues.h>
21 #include <net/sch_generic.h>
22 #include <net/xdp_sock_drv.h>
23 #include "gve.h"
24 #include "gve_dqo.h"
25 #include "gve_adminq.h"
26 #include "gve_register.h"
27 #include "gve_utils.h"
28 
29 #define GVE_DEFAULT_RX_COPYBREAK	(256)
30 
31 #define DEFAULT_MSG_LEVEL	(NETIF_MSG_DRV | NETIF_MSG_LINK)
32 #define GVE_VERSION		"1.0.0"
33 #define GVE_VERSION_PREFIX	"GVE-"
34 
35 // Minimum amount of time between queue kicks in msec (10 seconds)
36 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
37 
38 char gve_driver_name[] = "gve";
39 const char gve_version_str[] = GVE_VERSION;
40 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
41 
42 static int gve_verify_driver_compatibility(struct gve_priv *priv)
43 {
44 	int err;
45 	struct gve_driver_info *driver_info;
46 	dma_addr_t driver_info_bus;
47 
48 	driver_info = dma_alloc_coherent(&priv->pdev->dev,
49 					 sizeof(struct gve_driver_info),
50 					 &driver_info_bus, GFP_KERNEL);
51 	if (!driver_info)
52 		return -ENOMEM;
53 
54 	*driver_info = (struct gve_driver_info) {
55 		.os_type = 1, /* Linux */
56 		.os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR),
57 		.os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL),
58 		.os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL),
59 		.driver_capability_flags = {
60 			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1),
61 			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2),
62 			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3),
63 			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4),
64 		},
65 	};
66 	strscpy(driver_info->os_version_str1, utsname()->release,
67 		sizeof(driver_info->os_version_str1));
68 	strscpy(driver_info->os_version_str2, utsname()->version,
69 		sizeof(driver_info->os_version_str2));
70 
71 	err = gve_adminq_verify_driver_compatibility(priv,
72 						     sizeof(struct gve_driver_info),
73 						     driver_info_bus);
74 
75 	/* It's ok if the device doesn't support this */
76 	if (err == -EOPNOTSUPP)
77 		err = 0;
78 
79 	dma_free_coherent(&priv->pdev->dev,
80 			  sizeof(struct gve_driver_info),
81 			  driver_info, driver_info_bus);
82 	return err;
83 }
84 
85 static netdev_features_t gve_features_check(struct sk_buff *skb,
86 					    struct net_device *dev,
87 					    netdev_features_t features)
88 {
89 	struct gve_priv *priv = netdev_priv(dev);
90 
91 	if (!gve_is_gqi(priv))
92 		return gve_features_check_dqo(skb, dev, features);
93 
94 	return features;
95 }
96 
97 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
98 {
99 	struct gve_priv *priv = netdev_priv(dev);
100 
101 	if (gve_is_gqi(priv))
102 		return gve_tx(skb, dev);
103 	else
104 		return gve_tx_dqo(skb, dev);
105 }
106 
107 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
108 {
109 	struct gve_priv *priv = netdev_priv(dev);
110 	unsigned int start;
111 	u64 packets, bytes;
112 	int num_tx_queues;
113 	int ring;
114 
115 	num_tx_queues = gve_num_tx_queues(priv);
116 	if (priv->rx) {
117 		for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
118 			do {
119 				start =
120 				  u64_stats_fetch_begin(&priv->rx[ring].statss);
121 				packets = priv->rx[ring].rpackets;
122 				bytes = priv->rx[ring].rbytes;
123 			} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
124 						       start));
125 			s->rx_packets += packets;
126 			s->rx_bytes += bytes;
127 		}
128 	}
129 	if (priv->tx) {
130 		for (ring = 0; ring < num_tx_queues; ring++) {
131 			do {
132 				start =
133 				  u64_stats_fetch_begin(&priv->tx[ring].statss);
134 				packets = priv->tx[ring].pkt_done;
135 				bytes = priv->tx[ring].bytes_done;
136 			} while (u64_stats_fetch_retry(&priv->tx[ring].statss,
137 						       start));
138 			s->tx_packets += packets;
139 			s->tx_bytes += bytes;
140 		}
141 	}
142 }
143 
144 static int gve_alloc_counter_array(struct gve_priv *priv)
145 {
146 	priv->counter_array =
147 		dma_alloc_coherent(&priv->pdev->dev,
148 				   priv->num_event_counters *
149 				   sizeof(*priv->counter_array),
150 				   &priv->counter_array_bus, GFP_KERNEL);
151 	if (!priv->counter_array)
152 		return -ENOMEM;
153 
154 	return 0;
155 }
156 
157 static void gve_free_counter_array(struct gve_priv *priv)
158 {
159 	if (!priv->counter_array)
160 		return;
161 
162 	dma_free_coherent(&priv->pdev->dev,
163 			  priv->num_event_counters *
164 			  sizeof(*priv->counter_array),
165 			  priv->counter_array, priv->counter_array_bus);
166 	priv->counter_array = NULL;
167 }
168 
169 /* NIC requests to report stats */
170 static void gve_stats_report_task(struct work_struct *work)
171 {
172 	struct gve_priv *priv = container_of(work, struct gve_priv,
173 					     stats_report_task);
174 	if (gve_get_do_report_stats(priv)) {
175 		gve_handle_report_stats(priv);
176 		gve_clear_do_report_stats(priv);
177 	}
178 }
179 
180 static void gve_stats_report_schedule(struct gve_priv *priv)
181 {
182 	if (!gve_get_probe_in_progress(priv) &&
183 	    !gve_get_reset_in_progress(priv)) {
184 		gve_set_do_report_stats(priv);
185 		queue_work(priv->gve_wq, &priv->stats_report_task);
186 	}
187 }
188 
189 static void gve_stats_report_timer(struct timer_list *t)
190 {
191 	struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
192 
193 	mod_timer(&priv->stats_report_timer,
194 		  round_jiffies(jiffies +
195 		  msecs_to_jiffies(priv->stats_report_timer_period)));
196 	gve_stats_report_schedule(priv);
197 }
198 
199 static int gve_alloc_stats_report(struct gve_priv *priv)
200 {
201 	int tx_stats_num, rx_stats_num;
202 
203 	tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
204 		       gve_num_tx_queues(priv);
205 	rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
206 		       priv->rx_cfg.num_queues;
207 	priv->stats_report_len = struct_size(priv->stats_report, stats,
208 					     size_add(tx_stats_num, rx_stats_num));
209 	priv->stats_report =
210 		dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
211 				   &priv->stats_report_bus, GFP_KERNEL);
212 	if (!priv->stats_report)
213 		return -ENOMEM;
214 	/* Set up timer for the report-stats task */
215 	timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
216 	priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
217 	return 0;
218 }
219 
220 static void gve_free_stats_report(struct gve_priv *priv)
221 {
222 	if (!priv->stats_report)
223 		return;
224 
225 	del_timer_sync(&priv->stats_report_timer);
226 	dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
227 			  priv->stats_report, priv->stats_report_bus);
228 	priv->stats_report = NULL;
229 }
230 
231 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
232 {
233 	struct gve_priv *priv = arg;
234 
235 	queue_work(priv->gve_wq, &priv->service_task);
236 	return IRQ_HANDLED;
237 }
238 
239 static irqreturn_t gve_intr(int irq, void *arg)
240 {
241 	struct gve_notify_block *block = arg;
242 	struct gve_priv *priv = block->priv;
243 
244 	iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
245 	napi_schedule_irqoff(&block->napi);
246 	return IRQ_HANDLED;
247 }
248 
249 static irqreturn_t gve_intr_dqo(int irq, void *arg)
250 {
251 	struct gve_notify_block *block = arg;
252 
253 	/* Interrupts are automatically masked */
254 	napi_schedule_irqoff(&block->napi);
255 	return IRQ_HANDLED;
256 }
257 
258 static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq)
259 {
260 	int cpu_curr = smp_processor_id();
261 	const struct cpumask *aff_mask;
262 
263 	aff_mask = irq_get_effective_affinity_mask(irq);
264 	if (unlikely(!aff_mask))
265 		return 1;
266 
267 	return cpumask_test_cpu(cpu_curr, aff_mask);
268 }
269 
270 int gve_napi_poll(struct napi_struct *napi, int budget)
271 {
272 	struct gve_notify_block *block;
273 	__be32 __iomem *irq_doorbell;
274 	bool reschedule = false;
275 	struct gve_priv *priv;
276 	int work_done = 0;
277 
278 	block = container_of(napi, struct gve_notify_block, napi);
279 	priv = block->priv;
280 
281 	if (block->tx) {
282 		if (block->tx->q_num < priv->tx_cfg.num_queues)
283 			reschedule |= gve_tx_poll(block, budget);
284 		else if (budget)
285 			reschedule |= gve_xdp_poll(block, budget);
286 	}
287 
288 	if (!budget)
289 		return 0;
290 
291 	if (block->rx) {
292 		work_done = gve_rx_poll(block, budget);
293 		reschedule |= work_done == budget;
294 	}
295 
296 	if (reschedule)
297 		return budget;
298 
299        /* Complete processing - don't unmask irq if busy polling is enabled */
300 	if (likely(napi_complete_done(napi, work_done))) {
301 		irq_doorbell = gve_irq_doorbell(priv, block);
302 		iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
303 
304 		/* Ensure IRQ ACK is visible before we check pending work.
305 		 * If queue had issued updates, it would be truly visible.
306 		 */
307 		mb();
308 
309 		if (block->tx)
310 			reschedule |= gve_tx_clean_pending(priv, block->tx);
311 		if (block->rx)
312 			reschedule |= gve_rx_work_pending(block->rx);
313 
314 		if (reschedule && napi_schedule(napi))
315 			iowrite32be(GVE_IRQ_MASK, irq_doorbell);
316 	}
317 	return work_done;
318 }
319 
320 int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
321 {
322 	struct gve_notify_block *block =
323 		container_of(napi, struct gve_notify_block, napi);
324 	struct gve_priv *priv = block->priv;
325 	bool reschedule = false;
326 	int work_done = 0;
327 
328 	if (block->tx)
329 		reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
330 
331 	if (!budget)
332 		return 0;
333 
334 	if (block->rx) {
335 		work_done = gve_rx_poll_dqo(block, budget);
336 		reschedule |= work_done == budget;
337 	}
338 
339 	if (reschedule) {
340 		/* Reschedule by returning budget only if already on the correct
341 		 * cpu.
342 		 */
343 		if (likely(gve_is_napi_on_home_cpu(priv, block->irq)))
344 			return budget;
345 
346 		/* If not on the cpu with which this queue's irq has affinity
347 		 * with, we avoid rescheduling napi and arm the irq instead so
348 		 * that napi gets rescheduled back eventually onto the right
349 		 * cpu.
350 		 */
351 		if (work_done == budget)
352 			work_done--;
353 	}
354 
355 	if (likely(napi_complete_done(napi, work_done))) {
356 		/* Enable interrupts again.
357 		 *
358 		 * We don't need to repoll afterwards because HW supports the
359 		 * PCI MSI-X PBA feature.
360 		 *
361 		 * Another interrupt would be triggered if a new event came in
362 		 * since the last one.
363 		 */
364 		gve_write_irq_doorbell_dqo(priv, block,
365 					   GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
366 	}
367 
368 	return work_done;
369 }
370 
371 static int gve_alloc_notify_blocks(struct gve_priv *priv)
372 {
373 	int num_vecs_requested = priv->num_ntfy_blks + 1;
374 	unsigned int active_cpus;
375 	int vecs_enabled;
376 	int i, j;
377 	int err;
378 
379 	priv->msix_vectors = kvcalloc(num_vecs_requested,
380 				      sizeof(*priv->msix_vectors), GFP_KERNEL);
381 	if (!priv->msix_vectors)
382 		return -ENOMEM;
383 	for (i = 0; i < num_vecs_requested; i++)
384 		priv->msix_vectors[i].entry = i;
385 	vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
386 					     GVE_MIN_MSIX, num_vecs_requested);
387 	if (vecs_enabled < 0) {
388 		dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
389 			GVE_MIN_MSIX, vecs_enabled);
390 		err = vecs_enabled;
391 		goto abort_with_msix_vectors;
392 	}
393 	if (vecs_enabled != num_vecs_requested) {
394 		int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
395 		int vecs_per_type = new_num_ntfy_blks / 2;
396 		int vecs_left = new_num_ntfy_blks % 2;
397 
398 		priv->num_ntfy_blks = new_num_ntfy_blks;
399 		priv->mgmt_msix_idx = priv->num_ntfy_blks;
400 		priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
401 						vecs_per_type);
402 		priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
403 						vecs_per_type + vecs_left);
404 		dev_err(&priv->pdev->dev,
405 			"Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
406 			vecs_enabled, priv->tx_cfg.max_queues,
407 			priv->rx_cfg.max_queues);
408 		if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
409 			priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
410 		if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
411 			priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
412 	}
413 	/* Half the notification blocks go to TX and half to RX */
414 	active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
415 
416 	/* Setup Management Vector  - the last vector */
417 	snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
418 		 pci_name(priv->pdev));
419 	err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
420 			  gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
421 	if (err) {
422 		dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
423 		goto abort_with_msix_enabled;
424 	}
425 	priv->irq_db_indices =
426 		dma_alloc_coherent(&priv->pdev->dev,
427 				   priv->num_ntfy_blks *
428 				   sizeof(*priv->irq_db_indices),
429 				   &priv->irq_db_indices_bus, GFP_KERNEL);
430 	if (!priv->irq_db_indices) {
431 		err = -ENOMEM;
432 		goto abort_with_mgmt_vector;
433 	}
434 
435 	priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
436 				     sizeof(*priv->ntfy_blocks), GFP_KERNEL);
437 	if (!priv->ntfy_blocks) {
438 		err = -ENOMEM;
439 		goto abort_with_irq_db_indices;
440 	}
441 
442 	/* Setup the other blocks - the first n-1 vectors */
443 	for (i = 0; i < priv->num_ntfy_blks; i++) {
444 		struct gve_notify_block *block = &priv->ntfy_blocks[i];
445 		int msix_idx = i;
446 
447 		snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s",
448 			 i, pci_name(priv->pdev));
449 		block->priv = priv;
450 		err = request_irq(priv->msix_vectors[msix_idx].vector,
451 				  gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
452 				  0, block->name, block);
453 		if (err) {
454 			dev_err(&priv->pdev->dev,
455 				"Failed to receive msix vector %d\n", i);
456 			goto abort_with_some_ntfy_blocks;
457 		}
458 		block->irq = priv->msix_vectors[msix_idx].vector;
459 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
460 				      get_cpu_mask(i % active_cpus));
461 		block->irq_db_index = &priv->irq_db_indices[i].index;
462 	}
463 	return 0;
464 abort_with_some_ntfy_blocks:
465 	for (j = 0; j < i; j++) {
466 		struct gve_notify_block *block = &priv->ntfy_blocks[j];
467 		int msix_idx = j;
468 
469 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
470 				      NULL);
471 		free_irq(priv->msix_vectors[msix_idx].vector, block);
472 		block->irq = 0;
473 	}
474 	kvfree(priv->ntfy_blocks);
475 	priv->ntfy_blocks = NULL;
476 abort_with_irq_db_indices:
477 	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
478 			  sizeof(*priv->irq_db_indices),
479 			  priv->irq_db_indices, priv->irq_db_indices_bus);
480 	priv->irq_db_indices = NULL;
481 abort_with_mgmt_vector:
482 	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
483 abort_with_msix_enabled:
484 	pci_disable_msix(priv->pdev);
485 abort_with_msix_vectors:
486 	kvfree(priv->msix_vectors);
487 	priv->msix_vectors = NULL;
488 	return err;
489 }
490 
491 static void gve_free_notify_blocks(struct gve_priv *priv)
492 {
493 	int i;
494 
495 	if (!priv->msix_vectors)
496 		return;
497 
498 	/* Free the irqs */
499 	for (i = 0; i < priv->num_ntfy_blks; i++) {
500 		struct gve_notify_block *block = &priv->ntfy_blocks[i];
501 		int msix_idx = i;
502 
503 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
504 				      NULL);
505 		free_irq(priv->msix_vectors[msix_idx].vector, block);
506 		block->irq = 0;
507 	}
508 	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
509 	kvfree(priv->ntfy_blocks);
510 	priv->ntfy_blocks = NULL;
511 	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
512 			  sizeof(*priv->irq_db_indices),
513 			  priv->irq_db_indices, priv->irq_db_indices_bus);
514 	priv->irq_db_indices = NULL;
515 	pci_disable_msix(priv->pdev);
516 	kvfree(priv->msix_vectors);
517 	priv->msix_vectors = NULL;
518 }
519 
520 static int gve_setup_device_resources(struct gve_priv *priv)
521 {
522 	int err;
523 
524 	err = gve_alloc_counter_array(priv);
525 	if (err)
526 		return err;
527 	err = gve_alloc_notify_blocks(priv);
528 	if (err)
529 		goto abort_with_counter;
530 	err = gve_alloc_stats_report(priv);
531 	if (err)
532 		goto abort_with_ntfy_blocks;
533 	err = gve_adminq_configure_device_resources(priv,
534 						    priv->counter_array_bus,
535 						    priv->num_event_counters,
536 						    priv->irq_db_indices_bus,
537 						    priv->num_ntfy_blks);
538 	if (unlikely(err)) {
539 		dev_err(&priv->pdev->dev,
540 			"could not setup device_resources: err=%d\n", err);
541 		err = -ENXIO;
542 		goto abort_with_stats_report;
543 	}
544 
545 	if (!gve_is_gqi(priv)) {
546 		priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
547 					       GFP_KERNEL);
548 		if (!priv->ptype_lut_dqo) {
549 			err = -ENOMEM;
550 			goto abort_with_stats_report;
551 		}
552 		err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
553 		if (err) {
554 			dev_err(&priv->pdev->dev,
555 				"Failed to get ptype map: err=%d\n", err);
556 			goto abort_with_ptype_lut;
557 		}
558 	}
559 
560 	err = gve_adminq_report_stats(priv, priv->stats_report_len,
561 				      priv->stats_report_bus,
562 				      GVE_STATS_REPORT_TIMER_PERIOD);
563 	if (err)
564 		dev_err(&priv->pdev->dev,
565 			"Failed to report stats: err=%d\n", err);
566 	gve_set_device_resources_ok(priv);
567 	return 0;
568 
569 abort_with_ptype_lut:
570 	kvfree(priv->ptype_lut_dqo);
571 	priv->ptype_lut_dqo = NULL;
572 abort_with_stats_report:
573 	gve_free_stats_report(priv);
574 abort_with_ntfy_blocks:
575 	gve_free_notify_blocks(priv);
576 abort_with_counter:
577 	gve_free_counter_array(priv);
578 
579 	return err;
580 }
581 
582 static void gve_trigger_reset(struct gve_priv *priv);
583 
584 static void gve_teardown_device_resources(struct gve_priv *priv)
585 {
586 	int err;
587 
588 	/* Tell device its resources are being freed */
589 	if (gve_get_device_resources_ok(priv)) {
590 		/* detach the stats report */
591 		err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
592 		if (err) {
593 			dev_err(&priv->pdev->dev,
594 				"Failed to detach stats report: err=%d\n", err);
595 			gve_trigger_reset(priv);
596 		}
597 		err = gve_adminq_deconfigure_device_resources(priv);
598 		if (err) {
599 			dev_err(&priv->pdev->dev,
600 				"Could not deconfigure device resources: err=%d\n",
601 				err);
602 			gve_trigger_reset(priv);
603 		}
604 	}
605 
606 	kvfree(priv->ptype_lut_dqo);
607 	priv->ptype_lut_dqo = NULL;
608 
609 	gve_free_counter_array(priv);
610 	gve_free_notify_blocks(priv);
611 	gve_free_stats_report(priv);
612 	gve_clear_device_resources_ok(priv);
613 }
614 
615 static int gve_unregister_qpl(struct gve_priv *priv,
616 			      struct gve_queue_page_list *qpl)
617 {
618 	int err;
619 
620 	if (!qpl)
621 		return 0;
622 
623 	err = gve_adminq_unregister_page_list(priv, qpl->id);
624 	if (err) {
625 		netif_err(priv, drv, priv->dev,
626 			  "Failed to unregister queue page list %d\n",
627 			  qpl->id);
628 		return err;
629 	}
630 
631 	priv->num_registered_pages -= qpl->num_entries;
632 	return 0;
633 }
634 
635 static int gve_register_qpl(struct gve_priv *priv,
636 			    struct gve_queue_page_list *qpl)
637 {
638 	int pages;
639 	int err;
640 
641 	if (!qpl)
642 		return 0;
643 
644 	pages = qpl->num_entries;
645 
646 	if (pages + priv->num_registered_pages > priv->max_registered_pages) {
647 		netif_err(priv, drv, priv->dev,
648 			  "Reached max number of registered pages %llu > %llu\n",
649 			  pages + priv->num_registered_pages,
650 			  priv->max_registered_pages);
651 		return -EINVAL;
652 	}
653 
654 	err = gve_adminq_register_page_list(priv, qpl);
655 	if (err) {
656 		netif_err(priv, drv, priv->dev,
657 			  "failed to register queue page list %d\n",
658 			  qpl->id);
659 		return err;
660 	}
661 
662 	priv->num_registered_pages += pages;
663 	return 0;
664 }
665 
666 static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx)
667 {
668 	struct gve_tx_ring *tx = &priv->tx[idx];
669 
670 	if (gve_is_gqi(priv))
671 		return tx->tx_fifo.qpl;
672 	else
673 		return tx->dqo.qpl;
674 }
675 
676 static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx)
677 {
678 	struct gve_rx_ring *rx = &priv->rx[idx];
679 
680 	if (gve_is_gqi(priv))
681 		return rx->data.qpl;
682 	else
683 		return rx->dqo.qpl;
684 }
685 
686 static int gve_register_xdp_qpls(struct gve_priv *priv)
687 {
688 	int start_id;
689 	int err;
690 	int i;
691 
692 	start_id = gve_xdp_tx_start_queue_id(priv);
693 	for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
694 		err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i));
695 		/* This failure will trigger a reset - no need to clean up */
696 		if (err)
697 			return err;
698 	}
699 	return 0;
700 }
701 
702 static int gve_register_qpls(struct gve_priv *priv)
703 {
704 	int num_tx_qpls, num_rx_qpls;
705 	int err;
706 	int i;
707 
708 	num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv),
709 				      gve_is_qpl(priv));
710 	num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
711 
712 	for (i = 0; i < num_tx_qpls; i++) {
713 		err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i));
714 		if (err)
715 			return err;
716 	}
717 
718 	for (i = 0; i < num_rx_qpls; i++) {
719 		err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i));
720 		if (err)
721 			return err;
722 	}
723 
724 	return 0;
725 }
726 
727 static int gve_unregister_xdp_qpls(struct gve_priv *priv)
728 {
729 	int start_id;
730 	int err;
731 	int i;
732 
733 	start_id = gve_xdp_tx_start_queue_id(priv);
734 	for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
735 		err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i));
736 		/* This failure will trigger a reset - no need to clean */
737 		if (err)
738 			return err;
739 	}
740 	return 0;
741 }
742 
743 static int gve_unregister_qpls(struct gve_priv *priv)
744 {
745 	int num_tx_qpls, num_rx_qpls;
746 	int err;
747 	int i;
748 
749 	num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv),
750 				      gve_is_qpl(priv));
751 	num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
752 
753 	for (i = 0; i < num_tx_qpls; i++) {
754 		err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i));
755 		/* This failure will trigger a reset - no need to clean */
756 		if (err)
757 			return err;
758 	}
759 
760 	for (i = 0; i < num_rx_qpls; i++) {
761 		err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i));
762 		/* This failure will trigger a reset - no need to clean */
763 		if (err)
764 			return err;
765 	}
766 	return 0;
767 }
768 
769 static int gve_create_xdp_rings(struct gve_priv *priv)
770 {
771 	int err;
772 
773 	err = gve_adminq_create_tx_queues(priv,
774 					  gve_xdp_tx_start_queue_id(priv),
775 					  priv->num_xdp_queues);
776 	if (err) {
777 		netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
778 			  priv->num_xdp_queues);
779 		/* This failure will trigger a reset - no need to clean
780 		 * up
781 		 */
782 		return err;
783 	}
784 	netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
785 		  priv->num_xdp_queues);
786 
787 	return 0;
788 }
789 
790 static int gve_create_rings(struct gve_priv *priv)
791 {
792 	int num_tx_queues = gve_num_tx_queues(priv);
793 	int err;
794 	int i;
795 
796 	err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues);
797 	if (err) {
798 		netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
799 			  num_tx_queues);
800 		/* This failure will trigger a reset - no need to clean
801 		 * up
802 		 */
803 		return err;
804 	}
805 	netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
806 		  num_tx_queues);
807 
808 	err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
809 	if (err) {
810 		netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
811 			  priv->rx_cfg.num_queues);
812 		/* This failure will trigger a reset - no need to clean
813 		 * up
814 		 */
815 		return err;
816 	}
817 	netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
818 		  priv->rx_cfg.num_queues);
819 
820 	if (gve_is_gqi(priv)) {
821 		/* Rx data ring has been prefilled with packet buffers at queue
822 		 * allocation time.
823 		 *
824 		 * Write the doorbell to provide descriptor slots and packet
825 		 * buffers to the NIC.
826 		 */
827 		for (i = 0; i < priv->rx_cfg.num_queues; i++)
828 			gve_rx_write_doorbell(priv, &priv->rx[i]);
829 	} else {
830 		for (i = 0; i < priv->rx_cfg.num_queues; i++) {
831 			/* Post buffers and ring doorbell. */
832 			gve_rx_post_buffers_dqo(&priv->rx[i]);
833 		}
834 	}
835 
836 	return 0;
837 }
838 
839 static void init_xdp_sync_stats(struct gve_priv *priv)
840 {
841 	int start_id = gve_xdp_tx_start_queue_id(priv);
842 	int i;
843 
844 	/* Init stats */
845 	for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
846 		int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
847 
848 		u64_stats_init(&priv->tx[i].statss);
849 		priv->tx[i].ntfy_id = ntfy_idx;
850 	}
851 }
852 
853 static void gve_init_sync_stats(struct gve_priv *priv)
854 {
855 	int i;
856 
857 	for (i = 0; i < priv->tx_cfg.num_queues; i++)
858 		u64_stats_init(&priv->tx[i].statss);
859 
860 	/* Init stats for XDP TX queues */
861 	init_xdp_sync_stats(priv);
862 
863 	for (i = 0; i < priv->rx_cfg.num_queues; i++)
864 		u64_stats_init(&priv->rx[i].statss);
865 }
866 
867 static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv,
868 				      struct gve_tx_alloc_rings_cfg *cfg)
869 {
870 	cfg->qcfg = &priv->tx_cfg;
871 	cfg->raw_addressing = !gve_is_qpl(priv);
872 	cfg->ring_size = priv->tx_desc_cnt;
873 	cfg->start_idx = 0;
874 	cfg->num_rings = gve_num_tx_queues(priv);
875 	cfg->tx = priv->tx;
876 }
877 
878 static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings)
879 {
880 	int i;
881 
882 	if (!priv->tx)
883 		return;
884 
885 	for (i = start_id; i < start_id + num_rings; i++) {
886 		if (gve_is_gqi(priv))
887 			gve_tx_stop_ring_gqi(priv, i);
888 		else
889 			gve_tx_stop_ring_dqo(priv, i);
890 	}
891 }
892 
893 static void gve_tx_start_rings(struct gve_priv *priv, int start_id,
894 			       int num_rings)
895 {
896 	int i;
897 
898 	for (i = start_id; i < start_id + num_rings; i++) {
899 		if (gve_is_gqi(priv))
900 			gve_tx_start_ring_gqi(priv, i);
901 		else
902 			gve_tx_start_ring_dqo(priv, i);
903 	}
904 }
905 
906 static int gve_alloc_xdp_rings(struct gve_priv *priv)
907 {
908 	struct gve_tx_alloc_rings_cfg cfg = {0};
909 	int err = 0;
910 
911 	if (!priv->num_xdp_queues)
912 		return 0;
913 
914 	gve_tx_get_curr_alloc_cfg(priv, &cfg);
915 	cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
916 	cfg.num_rings = priv->num_xdp_queues;
917 
918 	err = gve_tx_alloc_rings_gqi(priv, &cfg);
919 	if (err)
920 		return err;
921 
922 	gve_tx_start_rings(priv, cfg.start_idx, cfg.num_rings);
923 	init_xdp_sync_stats(priv);
924 
925 	return 0;
926 }
927 
928 static int gve_queues_mem_alloc(struct gve_priv *priv,
929 				struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
930 				struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
931 {
932 	int err;
933 
934 	if (gve_is_gqi(priv))
935 		err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg);
936 	else
937 		err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg);
938 	if (err)
939 		return err;
940 
941 	if (gve_is_gqi(priv))
942 		err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg);
943 	else
944 		err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg);
945 	if (err)
946 		goto free_tx;
947 
948 	return 0;
949 
950 free_tx:
951 	if (gve_is_gqi(priv))
952 		gve_tx_free_rings_gqi(priv, tx_alloc_cfg);
953 	else
954 		gve_tx_free_rings_dqo(priv, tx_alloc_cfg);
955 	return err;
956 }
957 
958 static int gve_destroy_xdp_rings(struct gve_priv *priv)
959 {
960 	int start_id;
961 	int err;
962 
963 	start_id = gve_xdp_tx_start_queue_id(priv);
964 	err = gve_adminq_destroy_tx_queues(priv,
965 					   start_id,
966 					   priv->num_xdp_queues);
967 	if (err) {
968 		netif_err(priv, drv, priv->dev,
969 			  "failed to destroy XDP queues\n");
970 		/* This failure will trigger a reset - no need to clean up */
971 		return err;
972 	}
973 	netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
974 
975 	return 0;
976 }
977 
978 static int gve_destroy_rings(struct gve_priv *priv)
979 {
980 	int num_tx_queues = gve_num_tx_queues(priv);
981 	int err;
982 
983 	err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues);
984 	if (err) {
985 		netif_err(priv, drv, priv->dev,
986 			  "failed to destroy tx queues\n");
987 		/* This failure will trigger a reset - no need to clean up */
988 		return err;
989 	}
990 	netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
991 	err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
992 	if (err) {
993 		netif_err(priv, drv, priv->dev,
994 			  "failed to destroy rx queues\n");
995 		/* This failure will trigger a reset - no need to clean up */
996 		return err;
997 	}
998 	netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
999 	return 0;
1000 }
1001 
1002 static void gve_free_xdp_rings(struct gve_priv *priv)
1003 {
1004 	struct gve_tx_alloc_rings_cfg cfg = {0};
1005 
1006 	gve_tx_get_curr_alloc_cfg(priv, &cfg);
1007 	cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
1008 	cfg.num_rings = priv->num_xdp_queues;
1009 
1010 	if (priv->tx) {
1011 		gve_tx_stop_rings(priv, cfg.start_idx, cfg.num_rings);
1012 		gve_tx_free_rings_gqi(priv, &cfg);
1013 	}
1014 }
1015 
1016 static void gve_queues_mem_free(struct gve_priv *priv,
1017 				struct gve_tx_alloc_rings_cfg *tx_cfg,
1018 				struct gve_rx_alloc_rings_cfg *rx_cfg)
1019 {
1020 	if (gve_is_gqi(priv)) {
1021 		gve_tx_free_rings_gqi(priv, tx_cfg);
1022 		gve_rx_free_rings_gqi(priv, rx_cfg);
1023 	} else {
1024 		gve_tx_free_rings_dqo(priv, tx_cfg);
1025 		gve_rx_free_rings_dqo(priv, rx_cfg);
1026 	}
1027 }
1028 
1029 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
1030 		   struct page **page, dma_addr_t *dma,
1031 		   enum dma_data_direction dir, gfp_t gfp_flags)
1032 {
1033 	*page = alloc_page(gfp_flags);
1034 	if (!*page) {
1035 		priv->page_alloc_fail++;
1036 		return -ENOMEM;
1037 	}
1038 	*dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
1039 	if (dma_mapping_error(dev, *dma)) {
1040 		priv->dma_mapping_error++;
1041 		put_page(*page);
1042 		return -ENOMEM;
1043 	}
1044 	return 0;
1045 }
1046 
1047 struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv,
1048 						      u32 id, int pages)
1049 {
1050 	struct gve_queue_page_list *qpl;
1051 	int err;
1052 	int i;
1053 
1054 	qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL);
1055 	if (!qpl)
1056 		return NULL;
1057 
1058 	qpl->id = id;
1059 	qpl->num_entries = 0;
1060 	qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
1061 	if (!qpl->pages)
1062 		goto abort;
1063 
1064 	qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL);
1065 	if (!qpl->page_buses)
1066 		goto abort;
1067 
1068 	for (i = 0; i < pages; i++) {
1069 		err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
1070 				     &qpl->page_buses[i],
1071 				     gve_qpl_dma_dir(priv, id), GFP_KERNEL);
1072 		if (err)
1073 			goto abort;
1074 		qpl->num_entries++;
1075 	}
1076 
1077 	return qpl;
1078 
1079 abort:
1080 	gve_free_queue_page_list(priv, qpl, id);
1081 	return NULL;
1082 }
1083 
1084 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
1085 		   enum dma_data_direction dir)
1086 {
1087 	if (!dma_mapping_error(dev, dma))
1088 		dma_unmap_page(dev, dma, PAGE_SIZE, dir);
1089 	if (page)
1090 		put_page(page);
1091 }
1092 
1093 void gve_free_queue_page_list(struct gve_priv *priv,
1094 			      struct gve_queue_page_list *qpl,
1095 			      u32 id)
1096 {
1097 	int i;
1098 
1099 	if (!qpl)
1100 		return;
1101 	if (!qpl->pages)
1102 		goto free_qpl;
1103 	if (!qpl->page_buses)
1104 		goto free_pages;
1105 
1106 	for (i = 0; i < qpl->num_entries; i++)
1107 		gve_free_page(&priv->pdev->dev, qpl->pages[i],
1108 			      qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
1109 
1110 	kvfree(qpl->page_buses);
1111 	qpl->page_buses = NULL;
1112 free_pages:
1113 	kvfree(qpl->pages);
1114 	qpl->pages = NULL;
1115 free_qpl:
1116 	kvfree(qpl);
1117 }
1118 
1119 /* Use this to schedule a reset when the device is capable of continuing
1120  * to handle other requests in its current state. If it is not, do a reset
1121  * in thread instead.
1122  */
1123 void gve_schedule_reset(struct gve_priv *priv)
1124 {
1125 	gve_set_do_reset(priv);
1126 	queue_work(priv->gve_wq, &priv->service_task);
1127 }
1128 
1129 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
1130 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
1131 static void gve_turndown(struct gve_priv *priv);
1132 static void gve_turnup(struct gve_priv *priv);
1133 
1134 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
1135 {
1136 	struct napi_struct *napi;
1137 	struct gve_rx_ring *rx;
1138 	int err = 0;
1139 	int i, j;
1140 	u32 tx_qid;
1141 
1142 	if (!priv->num_xdp_queues)
1143 		return 0;
1144 
1145 	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1146 		rx = &priv->rx[i];
1147 		napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1148 
1149 		err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i,
1150 				       napi->napi_id);
1151 		if (err)
1152 			goto err;
1153 		err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
1154 						 MEM_TYPE_PAGE_SHARED, NULL);
1155 		if (err)
1156 			goto err;
1157 		rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
1158 		if (rx->xsk_pool) {
1159 			err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
1160 					       napi->napi_id);
1161 			if (err)
1162 				goto err;
1163 			err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1164 							 MEM_TYPE_XSK_BUFF_POOL, NULL);
1165 			if (err)
1166 				goto err;
1167 			xsk_pool_set_rxq_info(rx->xsk_pool,
1168 					      &rx->xsk_rxq);
1169 		}
1170 	}
1171 
1172 	for (i = 0; i < priv->num_xdp_queues; i++) {
1173 		tx_qid = gve_xdp_tx_queue_id(priv, i);
1174 		priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
1175 	}
1176 	return 0;
1177 
1178 err:
1179 	for (j = i; j >= 0; j--) {
1180 		rx = &priv->rx[j];
1181 		if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
1182 			xdp_rxq_info_unreg(&rx->xdp_rxq);
1183 		if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1184 			xdp_rxq_info_unreg(&rx->xsk_rxq);
1185 	}
1186 	return err;
1187 }
1188 
1189 static void gve_unreg_xdp_info(struct gve_priv *priv)
1190 {
1191 	int i, tx_qid;
1192 
1193 	if (!priv->num_xdp_queues)
1194 		return;
1195 
1196 	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1197 		struct gve_rx_ring *rx = &priv->rx[i];
1198 
1199 		xdp_rxq_info_unreg(&rx->xdp_rxq);
1200 		if (rx->xsk_pool) {
1201 			xdp_rxq_info_unreg(&rx->xsk_rxq);
1202 			rx->xsk_pool = NULL;
1203 		}
1204 	}
1205 
1206 	for (i = 0; i < priv->num_xdp_queues; i++) {
1207 		tx_qid = gve_xdp_tx_queue_id(priv, i);
1208 		priv->tx[tx_qid].xsk_pool = NULL;
1209 	}
1210 }
1211 
1212 static void gve_drain_page_cache(struct gve_priv *priv)
1213 {
1214 	int i;
1215 
1216 	for (i = 0; i < priv->rx_cfg.num_queues; i++)
1217 		page_frag_cache_drain(&priv->rx[i].page_cache);
1218 }
1219 
1220 static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv,
1221 				      struct gve_rx_alloc_rings_cfg *cfg)
1222 {
1223 	cfg->qcfg = &priv->rx_cfg;
1224 	cfg->qcfg_tx = &priv->tx_cfg;
1225 	cfg->raw_addressing = !gve_is_qpl(priv);
1226 	cfg->enable_header_split = priv->header_split_enabled;
1227 	cfg->ring_size = priv->rx_desc_cnt;
1228 	cfg->packet_buffer_size = gve_is_gqi(priv) ?
1229 				  GVE_DEFAULT_RX_BUFFER_SIZE :
1230 				  priv->data_buffer_size_dqo;
1231 	cfg->rx = priv->rx;
1232 }
1233 
1234 void gve_get_curr_alloc_cfgs(struct gve_priv *priv,
1235 			     struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1236 			     struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1237 {
1238 	gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg);
1239 	gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg);
1240 }
1241 
1242 static void gve_rx_start_ring(struct gve_priv *priv, int i)
1243 {
1244 	if (gve_is_gqi(priv))
1245 		gve_rx_start_ring_gqi(priv, i);
1246 	else
1247 		gve_rx_start_ring_dqo(priv, i);
1248 }
1249 
1250 static void gve_rx_start_rings(struct gve_priv *priv, int num_rings)
1251 {
1252 	int i;
1253 
1254 	for (i = 0; i < num_rings; i++)
1255 		gve_rx_start_ring(priv, i);
1256 }
1257 
1258 static void gve_rx_stop_ring(struct gve_priv *priv, int i)
1259 {
1260 	if (gve_is_gqi(priv))
1261 		gve_rx_stop_ring_gqi(priv, i);
1262 	else
1263 		gve_rx_stop_ring_dqo(priv, i);
1264 }
1265 
1266 static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings)
1267 {
1268 	int i;
1269 
1270 	if (!priv->rx)
1271 		return;
1272 
1273 	for (i = 0; i < num_rings; i++)
1274 		gve_rx_stop_ring(priv, i);
1275 }
1276 
1277 static void gve_queues_mem_remove(struct gve_priv *priv)
1278 {
1279 	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1280 	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1281 
1282 	gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1283 	gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1284 	priv->tx = NULL;
1285 	priv->rx = NULL;
1286 }
1287 
1288 /* The passed-in queue memory is stored into priv and the queues are made live.
1289  * No memory is allocated. Passed-in memory is freed on errors.
1290  */
1291 static int gve_queues_start(struct gve_priv *priv,
1292 			    struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1293 			    struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1294 {
1295 	struct net_device *dev = priv->dev;
1296 	int err;
1297 
1298 	/* Record new resources into priv */
1299 	priv->tx = tx_alloc_cfg->tx;
1300 	priv->rx = rx_alloc_cfg->rx;
1301 
1302 	/* Record new configs into priv */
1303 	priv->tx_cfg = *tx_alloc_cfg->qcfg;
1304 	priv->rx_cfg = *rx_alloc_cfg->qcfg;
1305 	priv->tx_desc_cnt = tx_alloc_cfg->ring_size;
1306 	priv->rx_desc_cnt = rx_alloc_cfg->ring_size;
1307 
1308 	if (priv->xdp_prog)
1309 		priv->num_xdp_queues = priv->rx_cfg.num_queues;
1310 	else
1311 		priv->num_xdp_queues = 0;
1312 
1313 	gve_tx_start_rings(priv, 0, tx_alloc_cfg->num_rings);
1314 	gve_rx_start_rings(priv, rx_alloc_cfg->qcfg->num_queues);
1315 	gve_init_sync_stats(priv);
1316 
1317 	err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
1318 	if (err)
1319 		goto stop_and_free_rings;
1320 	err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
1321 	if (err)
1322 		goto stop_and_free_rings;
1323 
1324 	err = gve_reg_xdp_info(priv, dev);
1325 	if (err)
1326 		goto stop_and_free_rings;
1327 
1328 	err = gve_register_qpls(priv);
1329 	if (err)
1330 		goto reset;
1331 
1332 	priv->header_split_enabled = rx_alloc_cfg->enable_header_split;
1333 	priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size;
1334 
1335 	err = gve_create_rings(priv);
1336 	if (err)
1337 		goto reset;
1338 
1339 	gve_set_device_rings_ok(priv);
1340 
1341 	if (gve_get_report_stats(priv))
1342 		mod_timer(&priv->stats_report_timer,
1343 			  round_jiffies(jiffies +
1344 				msecs_to_jiffies(priv->stats_report_timer_period)));
1345 
1346 	gve_turnup(priv);
1347 	queue_work(priv->gve_wq, &priv->service_task);
1348 	priv->interface_up_cnt++;
1349 	return 0;
1350 
1351 reset:
1352 	if (gve_get_reset_in_progress(priv))
1353 		goto stop_and_free_rings;
1354 	gve_reset_and_teardown(priv, true);
1355 	/* if this fails there is nothing we can do so just ignore the return */
1356 	gve_reset_recovery(priv, false);
1357 	/* return the original error */
1358 	return err;
1359 stop_and_free_rings:
1360 	gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv));
1361 	gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
1362 	gve_queues_mem_remove(priv);
1363 	return err;
1364 }
1365 
1366 static int gve_open(struct net_device *dev)
1367 {
1368 	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1369 	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1370 	struct gve_priv *priv = netdev_priv(dev);
1371 	int err;
1372 
1373 	gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1374 
1375 	err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1376 	if (err)
1377 		return err;
1378 
1379 	/* No need to free on error: ownership of resources is lost after
1380 	 * calling gve_queues_start.
1381 	 */
1382 	err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1383 	if (err)
1384 		return err;
1385 
1386 	return 0;
1387 }
1388 
1389 static int gve_queues_stop(struct gve_priv *priv)
1390 {
1391 	int err;
1392 
1393 	netif_carrier_off(priv->dev);
1394 	if (gve_get_device_rings_ok(priv)) {
1395 		gve_turndown(priv);
1396 		gve_drain_page_cache(priv);
1397 		err = gve_destroy_rings(priv);
1398 		if (err)
1399 			goto err;
1400 		err = gve_unregister_qpls(priv);
1401 		if (err)
1402 			goto err;
1403 		gve_clear_device_rings_ok(priv);
1404 	}
1405 	del_timer_sync(&priv->stats_report_timer);
1406 
1407 	gve_unreg_xdp_info(priv);
1408 
1409 	gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv));
1410 	gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
1411 
1412 	priv->interface_down_cnt++;
1413 	return 0;
1414 
1415 err:
1416 	/* This must have been called from a reset due to the rtnl lock
1417 	 * so just return at this point.
1418 	 */
1419 	if (gve_get_reset_in_progress(priv))
1420 		return err;
1421 	/* Otherwise reset before returning */
1422 	gve_reset_and_teardown(priv, true);
1423 	return gve_reset_recovery(priv, false);
1424 }
1425 
1426 static int gve_close(struct net_device *dev)
1427 {
1428 	struct gve_priv *priv = netdev_priv(dev);
1429 	int err;
1430 
1431 	err = gve_queues_stop(priv);
1432 	if (err)
1433 		return err;
1434 
1435 	gve_queues_mem_remove(priv);
1436 	return 0;
1437 }
1438 
1439 static int gve_remove_xdp_queues(struct gve_priv *priv)
1440 {
1441 	int err;
1442 
1443 	err = gve_destroy_xdp_rings(priv);
1444 	if (err)
1445 		return err;
1446 
1447 	err = gve_unregister_xdp_qpls(priv);
1448 	if (err)
1449 		return err;
1450 
1451 	gve_unreg_xdp_info(priv);
1452 	gve_free_xdp_rings(priv);
1453 
1454 	priv->num_xdp_queues = 0;
1455 	return 0;
1456 }
1457 
1458 static int gve_add_xdp_queues(struct gve_priv *priv)
1459 {
1460 	int err;
1461 
1462 	priv->num_xdp_queues = priv->rx_cfg.num_queues;
1463 
1464 	err = gve_alloc_xdp_rings(priv);
1465 	if (err)
1466 		goto err;
1467 
1468 	err = gve_reg_xdp_info(priv, priv->dev);
1469 	if (err)
1470 		goto free_xdp_rings;
1471 
1472 	err = gve_register_xdp_qpls(priv);
1473 	if (err)
1474 		goto free_xdp_rings;
1475 
1476 	err = gve_create_xdp_rings(priv);
1477 	if (err)
1478 		goto free_xdp_rings;
1479 
1480 	return 0;
1481 
1482 free_xdp_rings:
1483 	gve_free_xdp_rings(priv);
1484 err:
1485 	priv->num_xdp_queues = 0;
1486 	return err;
1487 }
1488 
1489 static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1490 {
1491 	if (!gve_get_napi_enabled(priv))
1492 		return;
1493 
1494 	if (link_status == netif_carrier_ok(priv->dev))
1495 		return;
1496 
1497 	if (link_status) {
1498 		netdev_info(priv->dev, "Device link is up.\n");
1499 		netif_carrier_on(priv->dev);
1500 	} else {
1501 		netdev_info(priv->dev, "Device link is down.\n");
1502 		netif_carrier_off(priv->dev);
1503 	}
1504 }
1505 
1506 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
1507 		       struct netlink_ext_ack *extack)
1508 {
1509 	struct bpf_prog *old_prog;
1510 	int err = 0;
1511 	u32 status;
1512 
1513 	old_prog = READ_ONCE(priv->xdp_prog);
1514 	if (!netif_carrier_ok(priv->dev)) {
1515 		WRITE_ONCE(priv->xdp_prog, prog);
1516 		if (old_prog)
1517 			bpf_prog_put(old_prog);
1518 		return 0;
1519 	}
1520 
1521 	gve_turndown(priv);
1522 	if (!old_prog && prog) {
1523 		// Allocate XDP TX queues if an XDP program is
1524 		// being installed
1525 		err = gve_add_xdp_queues(priv);
1526 		if (err)
1527 			goto out;
1528 	} else if (old_prog && !prog) {
1529 		// Remove XDP TX queues if an XDP program is
1530 		// being uninstalled
1531 		err = gve_remove_xdp_queues(priv);
1532 		if (err)
1533 			goto out;
1534 	}
1535 	WRITE_ONCE(priv->xdp_prog, prog);
1536 	if (old_prog)
1537 		bpf_prog_put(old_prog);
1538 
1539 out:
1540 	gve_turnup(priv);
1541 	status = ioread32be(&priv->reg_bar0->device_status);
1542 	gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1543 	return err;
1544 }
1545 
1546 static int gve_xsk_pool_enable(struct net_device *dev,
1547 			       struct xsk_buff_pool *pool,
1548 			       u16 qid)
1549 {
1550 	struct gve_priv *priv = netdev_priv(dev);
1551 	struct napi_struct *napi;
1552 	struct gve_rx_ring *rx;
1553 	int tx_qid;
1554 	int err;
1555 
1556 	if (qid >= priv->rx_cfg.num_queues) {
1557 		dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
1558 		return -EINVAL;
1559 	}
1560 	if (xsk_pool_get_rx_frame_size(pool) <
1561 	     priv->dev->max_mtu + sizeof(struct ethhdr)) {
1562 		dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
1563 		return -EINVAL;
1564 	}
1565 
1566 	err = xsk_pool_dma_map(pool, &priv->pdev->dev,
1567 			       DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1568 	if (err)
1569 		return err;
1570 
1571 	/* If XDP prog is not installed, return */
1572 	if (!priv->xdp_prog)
1573 		return 0;
1574 
1575 	rx = &priv->rx[qid];
1576 	napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1577 	err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
1578 	if (err)
1579 		goto err;
1580 
1581 	err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1582 					 MEM_TYPE_XSK_BUFF_POOL, NULL);
1583 	if (err)
1584 		goto err;
1585 
1586 	xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
1587 	rx->xsk_pool = pool;
1588 
1589 	tx_qid = gve_xdp_tx_queue_id(priv, qid);
1590 	priv->tx[tx_qid].xsk_pool = pool;
1591 
1592 	return 0;
1593 err:
1594 	if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1595 		xdp_rxq_info_unreg(&rx->xsk_rxq);
1596 
1597 	xsk_pool_dma_unmap(pool,
1598 			   DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1599 	return err;
1600 }
1601 
1602 static int gve_xsk_pool_disable(struct net_device *dev,
1603 				u16 qid)
1604 {
1605 	struct gve_priv *priv = netdev_priv(dev);
1606 	struct napi_struct *napi_rx;
1607 	struct napi_struct *napi_tx;
1608 	struct xsk_buff_pool *pool;
1609 	int tx_qid;
1610 
1611 	pool = xsk_get_pool_from_qid(dev, qid);
1612 	if (!pool)
1613 		return -EINVAL;
1614 	if (qid >= priv->rx_cfg.num_queues)
1615 		return -EINVAL;
1616 
1617 	/* If XDP prog is not installed, unmap DMA and return */
1618 	if (!priv->xdp_prog)
1619 		goto done;
1620 
1621 	tx_qid = gve_xdp_tx_queue_id(priv, qid);
1622 	if (!netif_running(dev)) {
1623 		priv->rx[qid].xsk_pool = NULL;
1624 		xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1625 		priv->tx[tx_qid].xsk_pool = NULL;
1626 		goto done;
1627 	}
1628 
1629 	napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
1630 	napi_disable(napi_rx); /* make sure current rx poll is done */
1631 
1632 	napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
1633 	napi_disable(napi_tx); /* make sure current tx poll is done */
1634 
1635 	priv->rx[qid].xsk_pool = NULL;
1636 	xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1637 	priv->tx[tx_qid].xsk_pool = NULL;
1638 	smp_mb(); /* Make sure it is visible to the workers on datapath */
1639 
1640 	napi_enable(napi_rx);
1641 	if (gve_rx_work_pending(&priv->rx[qid]))
1642 		napi_schedule(napi_rx);
1643 
1644 	napi_enable(napi_tx);
1645 	if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
1646 		napi_schedule(napi_tx);
1647 
1648 done:
1649 	xsk_pool_dma_unmap(pool,
1650 			   DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1651 	return 0;
1652 }
1653 
1654 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
1655 {
1656 	struct gve_priv *priv = netdev_priv(dev);
1657 	int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
1658 
1659 	if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
1660 		return -EINVAL;
1661 
1662 	if (flags & XDP_WAKEUP_TX) {
1663 		struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
1664 		struct napi_struct *napi =
1665 			&priv->ntfy_blocks[tx->ntfy_id].napi;
1666 
1667 		if (!napi_if_scheduled_mark_missed(napi)) {
1668 			/* Call local_bh_enable to trigger SoftIRQ processing */
1669 			local_bh_disable();
1670 			napi_schedule(napi);
1671 			local_bh_enable();
1672 		}
1673 
1674 		tx->xdp_xsk_wakeup++;
1675 	}
1676 
1677 	return 0;
1678 }
1679 
1680 static int verify_xdp_configuration(struct net_device *dev)
1681 {
1682 	struct gve_priv *priv = netdev_priv(dev);
1683 
1684 	if (dev->features & NETIF_F_LRO) {
1685 		netdev_warn(dev, "XDP is not supported when LRO is on.\n");
1686 		return -EOPNOTSUPP;
1687 	}
1688 
1689 	if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
1690 		netdev_warn(dev, "XDP is not supported in mode %d.\n",
1691 			    priv->queue_format);
1692 		return -EOPNOTSUPP;
1693 	}
1694 
1695 	if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) {
1696 		netdev_warn(dev, "XDP is not supported for mtu %d.\n",
1697 			    dev->mtu);
1698 		return -EOPNOTSUPP;
1699 	}
1700 
1701 	if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues ||
1702 	    (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) {
1703 		netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d",
1704 			    priv->rx_cfg.num_queues,
1705 			    priv->tx_cfg.num_queues,
1706 			    priv->tx_cfg.max_queues);
1707 		return -EINVAL;
1708 	}
1709 	return 0;
1710 }
1711 
1712 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1713 {
1714 	struct gve_priv *priv = netdev_priv(dev);
1715 	int err;
1716 
1717 	err = verify_xdp_configuration(dev);
1718 	if (err)
1719 		return err;
1720 	switch (xdp->command) {
1721 	case XDP_SETUP_PROG:
1722 		return gve_set_xdp(priv, xdp->prog, xdp->extack);
1723 	case XDP_SETUP_XSK_POOL:
1724 		if (xdp->xsk.pool)
1725 			return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id);
1726 		else
1727 			return gve_xsk_pool_disable(dev, xdp->xsk.queue_id);
1728 	default:
1729 		return -EINVAL;
1730 	}
1731 }
1732 
1733 int gve_adjust_config(struct gve_priv *priv,
1734 		      struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1735 		      struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1736 {
1737 	int err;
1738 
1739 	/* Allocate resources for the new confiugration */
1740 	err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg);
1741 	if (err) {
1742 		netif_err(priv, drv, priv->dev,
1743 			  "Adjust config failed to alloc new queues");
1744 		return err;
1745 	}
1746 
1747 	/* Teardown the device and free existing resources */
1748 	err = gve_close(priv->dev);
1749 	if (err) {
1750 		netif_err(priv, drv, priv->dev,
1751 			  "Adjust config failed to close old queues");
1752 		gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg);
1753 		return err;
1754 	}
1755 
1756 	/* Bring the device back up again with the new resources. */
1757 	err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg);
1758 	if (err) {
1759 		netif_err(priv, drv, priv->dev,
1760 			  "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n");
1761 		/* No need to free on error: ownership of resources is lost after
1762 		 * calling gve_queues_start.
1763 		 */
1764 		gve_turndown(priv);
1765 		return err;
1766 	}
1767 
1768 	return 0;
1769 }
1770 
1771 int gve_adjust_queues(struct gve_priv *priv,
1772 		      struct gve_queue_config new_rx_config,
1773 		      struct gve_queue_config new_tx_config)
1774 {
1775 	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1776 	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1777 	int err;
1778 
1779 	gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1780 
1781 	/* Relay the new config from ethtool */
1782 	tx_alloc_cfg.qcfg = &new_tx_config;
1783 	rx_alloc_cfg.qcfg_tx = &new_tx_config;
1784 	rx_alloc_cfg.qcfg = &new_rx_config;
1785 	tx_alloc_cfg.num_rings = new_tx_config.num_queues;
1786 
1787 	if (netif_carrier_ok(priv->dev)) {
1788 		err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1789 		return err;
1790 	}
1791 	/* Set the config for the next up. */
1792 	priv->tx_cfg = new_tx_config;
1793 	priv->rx_cfg = new_rx_config;
1794 
1795 	return 0;
1796 }
1797 
1798 static void gve_turndown(struct gve_priv *priv)
1799 {
1800 	int idx;
1801 
1802 	if (netif_carrier_ok(priv->dev))
1803 		netif_carrier_off(priv->dev);
1804 
1805 	if (!gve_get_napi_enabled(priv))
1806 		return;
1807 
1808 	/* Disable napi to prevent more work from coming in */
1809 	for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1810 		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1811 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1812 
1813 		if (!gve_tx_was_added_to_block(priv, idx))
1814 			continue;
1815 		napi_disable(&block->napi);
1816 	}
1817 	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1818 		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1819 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1820 
1821 		if (!gve_rx_was_added_to_block(priv, idx))
1822 			continue;
1823 		napi_disable(&block->napi);
1824 	}
1825 
1826 	/* Stop tx queues */
1827 	netif_tx_disable(priv->dev);
1828 
1829 	gve_clear_napi_enabled(priv);
1830 	gve_clear_report_stats(priv);
1831 }
1832 
1833 static void gve_turnup(struct gve_priv *priv)
1834 {
1835 	int idx;
1836 
1837 	/* Start the tx queues */
1838 	netif_tx_start_all_queues(priv->dev);
1839 
1840 	/* Enable napi and unmask interrupts for all queues */
1841 	for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1842 		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1843 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1844 
1845 		if (!gve_tx_was_added_to_block(priv, idx))
1846 			continue;
1847 
1848 		napi_enable(&block->napi);
1849 		if (gve_is_gqi(priv)) {
1850 			iowrite32be(0, gve_irq_doorbell(priv, block));
1851 		} else {
1852 			gve_set_itr_coalesce_usecs_dqo(priv, block,
1853 						       priv->tx_coalesce_usecs);
1854 		}
1855 
1856 		/* Any descs written by the NIC before this barrier will be
1857 		 * handled by the one-off napi schedule below. Whereas any
1858 		 * descs after the barrier will generate interrupts.
1859 		 */
1860 		mb();
1861 		napi_schedule(&block->napi);
1862 	}
1863 	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1864 		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1865 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1866 
1867 		if (!gve_rx_was_added_to_block(priv, idx))
1868 			continue;
1869 
1870 		napi_enable(&block->napi);
1871 		if (gve_is_gqi(priv)) {
1872 			iowrite32be(0, gve_irq_doorbell(priv, block));
1873 		} else {
1874 			gve_set_itr_coalesce_usecs_dqo(priv, block,
1875 						       priv->rx_coalesce_usecs);
1876 		}
1877 
1878 		/* Any descs written by the NIC before this barrier will be
1879 		 * handled by the one-off napi schedule below. Whereas any
1880 		 * descs after the barrier will generate interrupts.
1881 		 */
1882 		mb();
1883 		napi_schedule(&block->napi);
1884 	}
1885 
1886 	gve_set_napi_enabled(priv);
1887 }
1888 
1889 static void gve_turnup_and_check_status(struct gve_priv *priv)
1890 {
1891 	u32 status;
1892 
1893 	gve_turnup(priv);
1894 	status = ioread32be(&priv->reg_bar0->device_status);
1895 	gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1896 }
1897 
1898 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
1899 {
1900 	struct gve_notify_block *block;
1901 	struct gve_tx_ring *tx = NULL;
1902 	struct gve_priv *priv;
1903 	u32 last_nic_done;
1904 	u32 current_time;
1905 	u32 ntfy_idx;
1906 
1907 	netdev_info(dev, "Timeout on tx queue, %d", txqueue);
1908 	priv = netdev_priv(dev);
1909 	if (txqueue > priv->tx_cfg.num_queues)
1910 		goto reset;
1911 
1912 	ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
1913 	if (ntfy_idx >= priv->num_ntfy_blks)
1914 		goto reset;
1915 
1916 	block = &priv->ntfy_blocks[ntfy_idx];
1917 	tx = block->tx;
1918 
1919 	current_time = jiffies_to_msecs(jiffies);
1920 	if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
1921 		goto reset;
1922 
1923 	/* Check to see if there are missed completions, which will allow us to
1924 	 * kick the queue.
1925 	 */
1926 	last_nic_done = gve_tx_load_event_counter(priv, tx);
1927 	if (last_nic_done - tx->done) {
1928 		netdev_info(dev, "Kicking queue %d", txqueue);
1929 		iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
1930 		napi_schedule(&block->napi);
1931 		tx->last_kick_msec = current_time;
1932 		goto out;
1933 	} // Else reset.
1934 
1935 reset:
1936 	gve_schedule_reset(priv);
1937 
1938 out:
1939 	if (tx)
1940 		tx->queue_timeout++;
1941 	priv->tx_timeo_cnt++;
1942 }
1943 
1944 u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit)
1945 {
1946 	if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE)
1947 		return GVE_MAX_RX_BUFFER_SIZE;
1948 	else
1949 		return GVE_DEFAULT_RX_BUFFER_SIZE;
1950 }
1951 
1952 /* header-split is not supported on non-DQO_RDA yet even if device advertises it */
1953 bool gve_header_split_supported(const struct gve_priv *priv)
1954 {
1955 	return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT;
1956 }
1957 
1958 int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split)
1959 {
1960 	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1961 	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1962 	bool enable_hdr_split;
1963 	int err = 0;
1964 
1965 	if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN)
1966 		return 0;
1967 
1968 	if (!gve_header_split_supported(priv)) {
1969 		dev_err(&priv->pdev->dev, "Header-split not supported\n");
1970 		return -EOPNOTSUPP;
1971 	}
1972 
1973 	if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED)
1974 		enable_hdr_split = true;
1975 	else
1976 		enable_hdr_split = false;
1977 
1978 	if (enable_hdr_split == priv->header_split_enabled)
1979 		return 0;
1980 
1981 	gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1982 
1983 	rx_alloc_cfg.enable_header_split = enable_hdr_split;
1984 	rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split);
1985 
1986 	if (netif_running(priv->dev))
1987 		err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1988 	return err;
1989 }
1990 
1991 static int gve_set_features(struct net_device *netdev,
1992 			    netdev_features_t features)
1993 {
1994 	const netdev_features_t orig_features = netdev->features;
1995 	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1996 	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1997 	struct gve_priv *priv = netdev_priv(netdev);
1998 	int err;
1999 
2000 	gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
2001 
2002 	if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
2003 		netdev->features ^= NETIF_F_LRO;
2004 		if (netif_carrier_ok(netdev)) {
2005 			err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
2006 			if (err) {
2007 				/* Revert the change on error. */
2008 				netdev->features = orig_features;
2009 				return err;
2010 			}
2011 		}
2012 	}
2013 
2014 	return 0;
2015 }
2016 
2017 static const struct net_device_ops gve_netdev_ops = {
2018 	.ndo_start_xmit		=	gve_start_xmit,
2019 	.ndo_features_check	=	gve_features_check,
2020 	.ndo_open		=	gve_open,
2021 	.ndo_stop		=	gve_close,
2022 	.ndo_get_stats64	=	gve_get_stats,
2023 	.ndo_tx_timeout         =       gve_tx_timeout,
2024 	.ndo_set_features	=	gve_set_features,
2025 	.ndo_bpf		=	gve_xdp,
2026 	.ndo_xdp_xmit		=	gve_xdp_xmit,
2027 	.ndo_xsk_wakeup		=	gve_xsk_wakeup,
2028 };
2029 
2030 static void gve_handle_status(struct gve_priv *priv, u32 status)
2031 {
2032 	if (GVE_DEVICE_STATUS_RESET_MASK & status) {
2033 		dev_info(&priv->pdev->dev, "Device requested reset.\n");
2034 		gve_set_do_reset(priv);
2035 	}
2036 	if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
2037 		priv->stats_report_trigger_cnt++;
2038 		gve_set_do_report_stats(priv);
2039 	}
2040 }
2041 
2042 static void gve_handle_reset(struct gve_priv *priv)
2043 {
2044 	/* A service task will be scheduled at the end of probe to catch any
2045 	 * resets that need to happen, and we don't want to reset until
2046 	 * probe is done.
2047 	 */
2048 	if (gve_get_probe_in_progress(priv))
2049 		return;
2050 
2051 	if (gve_get_do_reset(priv)) {
2052 		rtnl_lock();
2053 		gve_reset(priv, false);
2054 		rtnl_unlock();
2055 	}
2056 }
2057 
2058 void gve_handle_report_stats(struct gve_priv *priv)
2059 {
2060 	struct stats *stats = priv->stats_report->stats;
2061 	int idx, stats_idx = 0;
2062 	unsigned int start = 0;
2063 	u64 tx_bytes;
2064 
2065 	if (!gve_get_report_stats(priv))
2066 		return;
2067 
2068 	be64_add_cpu(&priv->stats_report->written_count, 1);
2069 	/* tx stats */
2070 	if (priv->tx) {
2071 		for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
2072 			u32 last_completion = 0;
2073 			u32 tx_frames = 0;
2074 
2075 			/* DQO doesn't currently support these metrics. */
2076 			if (gve_is_gqi(priv)) {
2077 				last_completion = priv->tx[idx].done;
2078 				tx_frames = priv->tx[idx].req;
2079 			}
2080 
2081 			do {
2082 				start = u64_stats_fetch_begin(&priv->tx[idx].statss);
2083 				tx_bytes = priv->tx[idx].bytes_done;
2084 			} while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
2085 			stats[stats_idx++] = (struct stats) {
2086 				.stat_name = cpu_to_be32(TX_WAKE_CNT),
2087 				.value = cpu_to_be64(priv->tx[idx].wake_queue),
2088 				.queue_id = cpu_to_be32(idx),
2089 			};
2090 			stats[stats_idx++] = (struct stats) {
2091 				.stat_name = cpu_to_be32(TX_STOP_CNT),
2092 				.value = cpu_to_be64(priv->tx[idx].stop_queue),
2093 				.queue_id = cpu_to_be32(idx),
2094 			};
2095 			stats[stats_idx++] = (struct stats) {
2096 				.stat_name = cpu_to_be32(TX_FRAMES_SENT),
2097 				.value = cpu_to_be64(tx_frames),
2098 				.queue_id = cpu_to_be32(idx),
2099 			};
2100 			stats[stats_idx++] = (struct stats) {
2101 				.stat_name = cpu_to_be32(TX_BYTES_SENT),
2102 				.value = cpu_to_be64(tx_bytes),
2103 				.queue_id = cpu_to_be32(idx),
2104 			};
2105 			stats[stats_idx++] = (struct stats) {
2106 				.stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
2107 				.value = cpu_to_be64(last_completion),
2108 				.queue_id = cpu_to_be32(idx),
2109 			};
2110 			stats[stats_idx++] = (struct stats) {
2111 				.stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
2112 				.value = cpu_to_be64(priv->tx[idx].queue_timeout),
2113 				.queue_id = cpu_to_be32(idx),
2114 			};
2115 		}
2116 	}
2117 	/* rx stats */
2118 	if (priv->rx) {
2119 		for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
2120 			stats[stats_idx++] = (struct stats) {
2121 				.stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
2122 				.value = cpu_to_be64(priv->rx[idx].desc.seqno),
2123 				.queue_id = cpu_to_be32(idx),
2124 			};
2125 			stats[stats_idx++] = (struct stats) {
2126 				.stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
2127 				.value = cpu_to_be64(priv->rx[0].fill_cnt),
2128 				.queue_id = cpu_to_be32(idx),
2129 			};
2130 		}
2131 	}
2132 }
2133 
2134 /* Handle NIC status register changes, reset requests and report stats */
2135 static void gve_service_task(struct work_struct *work)
2136 {
2137 	struct gve_priv *priv = container_of(work, struct gve_priv,
2138 					     service_task);
2139 	u32 status = ioread32be(&priv->reg_bar0->device_status);
2140 
2141 	gve_handle_status(priv, status);
2142 
2143 	gve_handle_reset(priv);
2144 	gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
2145 }
2146 
2147 static void gve_set_netdev_xdp_features(struct gve_priv *priv)
2148 {
2149 	if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
2150 		priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC;
2151 		priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
2152 		priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
2153 		priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
2154 	} else {
2155 		priv->dev->xdp_features = 0;
2156 	}
2157 }
2158 
2159 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
2160 {
2161 	int num_ntfy;
2162 	int err;
2163 
2164 	/* Set up the adminq */
2165 	err = gve_adminq_alloc(&priv->pdev->dev, priv);
2166 	if (err) {
2167 		dev_err(&priv->pdev->dev,
2168 			"Failed to alloc admin queue: err=%d\n", err);
2169 		return err;
2170 	}
2171 
2172 	err = gve_verify_driver_compatibility(priv);
2173 	if (err) {
2174 		dev_err(&priv->pdev->dev,
2175 			"Could not verify driver compatibility: err=%d\n", err);
2176 		goto err;
2177 	}
2178 
2179 	priv->num_registered_pages = 0;
2180 
2181 	if (skip_describe_device)
2182 		goto setup_device;
2183 
2184 	priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
2185 	/* Get the initial information we need from the device */
2186 	err = gve_adminq_describe_device(priv);
2187 	if (err) {
2188 		dev_err(&priv->pdev->dev,
2189 			"Could not get device information: err=%d\n", err);
2190 		goto err;
2191 	}
2192 	priv->dev->mtu = priv->dev->max_mtu;
2193 	num_ntfy = pci_msix_vec_count(priv->pdev);
2194 	if (num_ntfy <= 0) {
2195 		dev_err(&priv->pdev->dev,
2196 			"could not count MSI-x vectors: err=%d\n", num_ntfy);
2197 		err = num_ntfy;
2198 		goto err;
2199 	} else if (num_ntfy < GVE_MIN_MSIX) {
2200 		dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
2201 			GVE_MIN_MSIX, num_ntfy);
2202 		err = -EINVAL;
2203 		goto err;
2204 	}
2205 
2206 	/* Big TCP is only supported on DQ*/
2207 	if (!gve_is_gqi(priv))
2208 		netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
2209 
2210 	priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
2211 	/* gvnic has one Notification Block per MSI-x vector, except for the
2212 	 * management vector
2213 	 */
2214 	priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
2215 	priv->mgmt_msix_idx = priv->num_ntfy_blks;
2216 
2217 	priv->tx_cfg.max_queues =
2218 		min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
2219 	priv->rx_cfg.max_queues =
2220 		min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
2221 
2222 	priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
2223 	priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
2224 	if (priv->default_num_queues > 0) {
2225 		priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
2226 						priv->tx_cfg.num_queues);
2227 		priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
2228 						priv->rx_cfg.num_queues);
2229 	}
2230 
2231 	dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
2232 		 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
2233 	dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
2234 		 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
2235 
2236 	if (!gve_is_gqi(priv)) {
2237 		priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
2238 		priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
2239 	}
2240 
2241 setup_device:
2242 	gve_set_netdev_xdp_features(priv);
2243 	err = gve_setup_device_resources(priv);
2244 	if (!err)
2245 		return 0;
2246 err:
2247 	gve_adminq_free(&priv->pdev->dev, priv);
2248 	return err;
2249 }
2250 
2251 static void gve_teardown_priv_resources(struct gve_priv *priv)
2252 {
2253 	gve_teardown_device_resources(priv);
2254 	gve_adminq_free(&priv->pdev->dev, priv);
2255 }
2256 
2257 static void gve_trigger_reset(struct gve_priv *priv)
2258 {
2259 	/* Reset the device by releasing the AQ */
2260 	gve_adminq_release(priv);
2261 }
2262 
2263 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
2264 {
2265 	gve_trigger_reset(priv);
2266 	/* With the reset having already happened, close cannot fail */
2267 	if (was_up)
2268 		gve_close(priv->dev);
2269 	gve_teardown_priv_resources(priv);
2270 }
2271 
2272 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
2273 {
2274 	int err;
2275 
2276 	err = gve_init_priv(priv, true);
2277 	if (err)
2278 		goto err;
2279 	if (was_up) {
2280 		err = gve_open(priv->dev);
2281 		if (err)
2282 			goto err;
2283 	}
2284 	return 0;
2285 err:
2286 	dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
2287 	gve_turndown(priv);
2288 	return err;
2289 }
2290 
2291 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
2292 {
2293 	bool was_up = netif_carrier_ok(priv->dev);
2294 	int err;
2295 
2296 	dev_info(&priv->pdev->dev, "Performing reset\n");
2297 	gve_clear_do_reset(priv);
2298 	gve_set_reset_in_progress(priv);
2299 	/* If we aren't attempting to teardown normally, just go turndown and
2300 	 * reset right away.
2301 	 */
2302 	if (!attempt_teardown) {
2303 		gve_turndown(priv);
2304 		gve_reset_and_teardown(priv, was_up);
2305 	} else {
2306 		/* Otherwise attempt to close normally */
2307 		if (was_up) {
2308 			err = gve_close(priv->dev);
2309 			/* If that fails reset as we did above */
2310 			if (err)
2311 				gve_reset_and_teardown(priv, was_up);
2312 		}
2313 		/* Clean up any remaining resources */
2314 		gve_teardown_priv_resources(priv);
2315 	}
2316 
2317 	/* Set it all back up */
2318 	err = gve_reset_recovery(priv, was_up);
2319 	gve_clear_reset_in_progress(priv);
2320 	priv->reset_cnt++;
2321 	priv->interface_up_cnt = 0;
2322 	priv->interface_down_cnt = 0;
2323 	priv->stats_report_trigger_cnt = 0;
2324 	return err;
2325 }
2326 
2327 static void gve_write_version(u8 __iomem *driver_version_register)
2328 {
2329 	const char *c = gve_version_prefix;
2330 
2331 	while (*c) {
2332 		writeb(*c, driver_version_register);
2333 		c++;
2334 	}
2335 
2336 	c = gve_version_str;
2337 	while (*c) {
2338 		writeb(*c, driver_version_register);
2339 		c++;
2340 	}
2341 	writeb('\n', driver_version_register);
2342 }
2343 
2344 static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx)
2345 {
2346 	struct gve_priv *priv = netdev_priv(dev);
2347 	struct gve_rx_ring *gve_per_q_mem;
2348 	int err;
2349 
2350 	if (!priv->rx)
2351 		return -EAGAIN;
2352 
2353 	/* Destroying queue 0 while other queues exist is not supported in DQO */
2354 	if (!gve_is_gqi(priv) && idx == 0)
2355 		return -ERANGE;
2356 
2357 	/* Single-queue destruction requires quiescence on all queues */
2358 	gve_turndown(priv);
2359 
2360 	/* This failure will trigger a reset - no need to clean up */
2361 	err = gve_adminq_destroy_single_rx_queue(priv, idx);
2362 	if (err)
2363 		return err;
2364 
2365 	if (gve_is_qpl(priv)) {
2366 		/* This failure will trigger a reset - no need to clean up */
2367 		err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx));
2368 		if (err)
2369 			return err;
2370 	}
2371 
2372 	gve_rx_stop_ring(priv, idx);
2373 
2374 	/* Turn the unstopped queues back up */
2375 	gve_turnup_and_check_status(priv);
2376 
2377 	gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2378 	*gve_per_q_mem = priv->rx[idx];
2379 	memset(&priv->rx[idx], 0, sizeof(priv->rx[idx]));
2380 	return 0;
2381 }
2382 
2383 static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem)
2384 {
2385 	struct gve_priv *priv = netdev_priv(dev);
2386 	struct gve_rx_alloc_rings_cfg cfg = {0};
2387 	struct gve_rx_ring *gve_per_q_mem;
2388 
2389 	gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2390 	gve_rx_get_curr_alloc_cfg(priv, &cfg);
2391 
2392 	if (gve_is_gqi(priv))
2393 		gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg);
2394 	else
2395 		gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg);
2396 }
2397 
2398 static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem,
2399 				  int idx)
2400 {
2401 	struct gve_priv *priv = netdev_priv(dev);
2402 	struct gve_rx_alloc_rings_cfg cfg = {0};
2403 	struct gve_rx_ring *gve_per_q_mem;
2404 	int err;
2405 
2406 	if (!priv->rx)
2407 		return -EAGAIN;
2408 
2409 	gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2410 	gve_rx_get_curr_alloc_cfg(priv, &cfg);
2411 
2412 	if (gve_is_gqi(priv))
2413 		err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx);
2414 	else
2415 		err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx);
2416 
2417 	return err;
2418 }
2419 
2420 static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx)
2421 {
2422 	struct gve_priv *priv = netdev_priv(dev);
2423 	struct gve_rx_ring *gve_per_q_mem;
2424 	int err;
2425 
2426 	if (!priv->rx)
2427 		return -EAGAIN;
2428 
2429 	gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2430 	priv->rx[idx] = *gve_per_q_mem;
2431 
2432 	/* Single-queue creation requires quiescence on all queues */
2433 	gve_turndown(priv);
2434 
2435 	gve_rx_start_ring(priv, idx);
2436 
2437 	if (gve_is_qpl(priv)) {
2438 		/* This failure will trigger a reset - no need to clean up */
2439 		err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx));
2440 		if (err)
2441 			goto abort;
2442 	}
2443 
2444 	/* This failure will trigger a reset - no need to clean up */
2445 	err = gve_adminq_create_single_rx_queue(priv, idx);
2446 	if (err)
2447 		goto abort;
2448 
2449 	if (gve_is_gqi(priv))
2450 		gve_rx_write_doorbell(priv, &priv->rx[idx]);
2451 	else
2452 		gve_rx_post_buffers_dqo(&priv->rx[idx]);
2453 
2454 	/* Turn the unstopped queues back up */
2455 	gve_turnup_and_check_status(priv);
2456 	return 0;
2457 
2458 abort:
2459 	gve_rx_stop_ring(priv, idx);
2460 
2461 	/* All failures in this func result in a reset, by clearing the struct
2462 	 * at idx, we prevent a double free when that reset runs. The reset,
2463 	 * which needs the rtnl lock, will not run till this func returns and
2464 	 * its caller gives up the lock.
2465 	 */
2466 	memset(&priv->rx[idx], 0, sizeof(priv->rx[idx]));
2467 	return err;
2468 }
2469 
2470 static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = {
2471 	.ndo_queue_mem_size	=	sizeof(struct gve_rx_ring),
2472 	.ndo_queue_mem_alloc	=	gve_rx_queue_mem_alloc,
2473 	.ndo_queue_mem_free	=	gve_rx_queue_mem_free,
2474 	.ndo_queue_start	=	gve_rx_queue_start,
2475 	.ndo_queue_stop		=	gve_rx_queue_stop,
2476 };
2477 
2478 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2479 {
2480 	int max_tx_queues, max_rx_queues;
2481 	struct net_device *dev;
2482 	__be32 __iomem *db_bar;
2483 	struct gve_registers __iomem *reg_bar;
2484 	struct gve_priv *priv;
2485 	int err;
2486 
2487 	err = pci_enable_device(pdev);
2488 	if (err)
2489 		return err;
2490 
2491 	err = pci_request_regions(pdev, gve_driver_name);
2492 	if (err)
2493 		goto abort_with_enabled;
2494 
2495 	pci_set_master(pdev);
2496 
2497 	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2498 	if (err) {
2499 		dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
2500 		goto abort_with_pci_region;
2501 	}
2502 
2503 	reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
2504 	if (!reg_bar) {
2505 		dev_err(&pdev->dev, "Failed to map pci bar!\n");
2506 		err = -ENOMEM;
2507 		goto abort_with_pci_region;
2508 	}
2509 
2510 	db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
2511 	if (!db_bar) {
2512 		dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
2513 		err = -ENOMEM;
2514 		goto abort_with_reg_bar;
2515 	}
2516 
2517 	gve_write_version(&reg_bar->driver_version);
2518 	/* Get max queues to alloc etherdev */
2519 	max_tx_queues = ioread32be(&reg_bar->max_tx_queues);
2520 	max_rx_queues = ioread32be(&reg_bar->max_rx_queues);
2521 	/* Alloc and setup the netdev and priv */
2522 	dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
2523 	if (!dev) {
2524 		dev_err(&pdev->dev, "could not allocate netdev\n");
2525 		err = -ENOMEM;
2526 		goto abort_with_db_bar;
2527 	}
2528 	SET_NETDEV_DEV(dev, &pdev->dev);
2529 	pci_set_drvdata(pdev, dev);
2530 	dev->ethtool_ops = &gve_ethtool_ops;
2531 	dev->netdev_ops = &gve_netdev_ops;
2532 	dev->queue_mgmt_ops = &gve_queue_mgmt_ops;
2533 
2534 	/* Set default and supported features.
2535 	 *
2536 	 * Features might be set in other locations as well (such as
2537 	 * `gve_adminq_describe_device`).
2538 	 */
2539 	dev->hw_features = NETIF_F_HIGHDMA;
2540 	dev->hw_features |= NETIF_F_SG;
2541 	dev->hw_features |= NETIF_F_HW_CSUM;
2542 	dev->hw_features |= NETIF_F_TSO;
2543 	dev->hw_features |= NETIF_F_TSO6;
2544 	dev->hw_features |= NETIF_F_TSO_ECN;
2545 	dev->hw_features |= NETIF_F_RXCSUM;
2546 	dev->hw_features |= NETIF_F_RXHASH;
2547 	dev->features = dev->hw_features;
2548 	dev->watchdog_timeo = 5 * HZ;
2549 	dev->min_mtu = ETH_MIN_MTU;
2550 	netif_carrier_off(dev);
2551 
2552 	priv = netdev_priv(dev);
2553 	priv->dev = dev;
2554 	priv->pdev = pdev;
2555 	priv->msg_enable = DEFAULT_MSG_LEVEL;
2556 	priv->reg_bar0 = reg_bar;
2557 	priv->db_bar2 = db_bar;
2558 	priv->service_task_flags = 0x0;
2559 	priv->state_flags = 0x0;
2560 	priv->ethtool_flags = 0x0;
2561 	priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE;
2562 	priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
2563 
2564 	gve_set_probe_in_progress(priv);
2565 	priv->gve_wq = alloc_ordered_workqueue("gve", 0);
2566 	if (!priv->gve_wq) {
2567 		dev_err(&pdev->dev, "Could not allocate workqueue");
2568 		err = -ENOMEM;
2569 		goto abort_with_netdev;
2570 	}
2571 	INIT_WORK(&priv->service_task, gve_service_task);
2572 	INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
2573 	priv->tx_cfg.max_queues = max_tx_queues;
2574 	priv->rx_cfg.max_queues = max_rx_queues;
2575 
2576 	err = gve_init_priv(priv, false);
2577 	if (err)
2578 		goto abort_with_wq;
2579 
2580 	err = register_netdev(dev);
2581 	if (err)
2582 		goto abort_with_gve_init;
2583 
2584 	dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
2585 	dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
2586 	gve_clear_probe_in_progress(priv);
2587 	queue_work(priv->gve_wq, &priv->service_task);
2588 	return 0;
2589 
2590 abort_with_gve_init:
2591 	gve_teardown_priv_resources(priv);
2592 
2593 abort_with_wq:
2594 	destroy_workqueue(priv->gve_wq);
2595 
2596 abort_with_netdev:
2597 	free_netdev(dev);
2598 
2599 abort_with_db_bar:
2600 	pci_iounmap(pdev, db_bar);
2601 
2602 abort_with_reg_bar:
2603 	pci_iounmap(pdev, reg_bar);
2604 
2605 abort_with_pci_region:
2606 	pci_release_regions(pdev);
2607 
2608 abort_with_enabled:
2609 	pci_disable_device(pdev);
2610 	return err;
2611 }
2612 
2613 static void gve_remove(struct pci_dev *pdev)
2614 {
2615 	struct net_device *netdev = pci_get_drvdata(pdev);
2616 	struct gve_priv *priv = netdev_priv(netdev);
2617 	__be32 __iomem *db_bar = priv->db_bar2;
2618 	void __iomem *reg_bar = priv->reg_bar0;
2619 
2620 	unregister_netdev(netdev);
2621 	gve_teardown_priv_resources(priv);
2622 	destroy_workqueue(priv->gve_wq);
2623 	free_netdev(netdev);
2624 	pci_iounmap(pdev, db_bar);
2625 	pci_iounmap(pdev, reg_bar);
2626 	pci_release_regions(pdev);
2627 	pci_disable_device(pdev);
2628 }
2629 
2630 static void gve_shutdown(struct pci_dev *pdev)
2631 {
2632 	struct net_device *netdev = pci_get_drvdata(pdev);
2633 	struct gve_priv *priv = netdev_priv(netdev);
2634 	bool was_up = netif_carrier_ok(priv->dev);
2635 
2636 	rtnl_lock();
2637 	if (was_up && gve_close(priv->dev)) {
2638 		/* If the dev was up, attempt to close, if close fails, reset */
2639 		gve_reset_and_teardown(priv, was_up);
2640 	} else {
2641 		/* If the dev wasn't up or close worked, finish tearing down */
2642 		gve_teardown_priv_resources(priv);
2643 	}
2644 	rtnl_unlock();
2645 }
2646 
2647 #ifdef CONFIG_PM
2648 static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
2649 {
2650 	struct net_device *netdev = pci_get_drvdata(pdev);
2651 	struct gve_priv *priv = netdev_priv(netdev);
2652 	bool was_up = netif_carrier_ok(priv->dev);
2653 
2654 	priv->suspend_cnt++;
2655 	rtnl_lock();
2656 	if (was_up && gve_close(priv->dev)) {
2657 		/* If the dev was up, attempt to close, if close fails, reset */
2658 		gve_reset_and_teardown(priv, was_up);
2659 	} else {
2660 		/* If the dev wasn't up or close worked, finish tearing down */
2661 		gve_teardown_priv_resources(priv);
2662 	}
2663 	priv->up_before_suspend = was_up;
2664 	rtnl_unlock();
2665 	return 0;
2666 }
2667 
2668 static int gve_resume(struct pci_dev *pdev)
2669 {
2670 	struct net_device *netdev = pci_get_drvdata(pdev);
2671 	struct gve_priv *priv = netdev_priv(netdev);
2672 	int err;
2673 
2674 	priv->resume_cnt++;
2675 	rtnl_lock();
2676 	err = gve_reset_recovery(priv, priv->up_before_suspend);
2677 	rtnl_unlock();
2678 	return err;
2679 }
2680 #endif /* CONFIG_PM */
2681 
2682 static const struct pci_device_id gve_id_table[] = {
2683 	{ PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
2684 	{ }
2685 };
2686 
2687 static struct pci_driver gve_driver = {
2688 	.name		= gve_driver_name,
2689 	.id_table	= gve_id_table,
2690 	.probe		= gve_probe,
2691 	.remove		= gve_remove,
2692 	.shutdown	= gve_shutdown,
2693 #ifdef CONFIG_PM
2694 	.suspend        = gve_suspend,
2695 	.resume         = gve_resume,
2696 #endif
2697 };
2698 
2699 module_pci_driver(gve_driver);
2700 
2701 MODULE_DEVICE_TABLE(pci, gve_id_table);
2702 MODULE_AUTHOR("Google, Inc.");
2703 MODULE_DESCRIPTION("Google Virtual NIC Driver");
2704 MODULE_LICENSE("Dual MIT/GPL");
2705 MODULE_VERSION(GVE_VERSION);
2706