xref: /linux/drivers/net/ethernet/google/gve/gve_main.c (revision 1a562c0d44974d3cf89c6cc5c34c708c08af420e)
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2021 Google, Inc.
5  */
6 
7 #include <linux/bpf.h>
8 #include <linux/cpumask.h>
9 #include <linux/etherdevice.h>
10 #include <linux/filter.h>
11 #include <linux/interrupt.h>
12 #include <linux/module.h>
13 #include <linux/pci.h>
14 #include <linux/sched.h>
15 #include <linux/timer.h>
16 #include <linux/workqueue.h>
17 #include <linux/utsname.h>
18 #include <linux/version.h>
19 #include <net/sch_generic.h>
20 #include <net/xdp_sock_drv.h>
21 #include "gve.h"
22 #include "gve_dqo.h"
23 #include "gve_adminq.h"
24 #include "gve_register.h"
25 
26 #define GVE_DEFAULT_RX_COPYBREAK	(256)
27 
28 #define DEFAULT_MSG_LEVEL	(NETIF_MSG_DRV | NETIF_MSG_LINK)
29 #define GVE_VERSION		"1.0.0"
30 #define GVE_VERSION_PREFIX	"GVE-"
31 
32 // Minimum amount of time between queue kicks in msec (10 seconds)
33 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
34 
35 char gve_driver_name[] = "gve";
36 const char gve_version_str[] = GVE_VERSION;
37 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
38 
39 static int gve_verify_driver_compatibility(struct gve_priv *priv)
40 {
41 	int err;
42 	struct gve_driver_info *driver_info;
43 	dma_addr_t driver_info_bus;
44 
45 	driver_info = dma_alloc_coherent(&priv->pdev->dev,
46 					 sizeof(struct gve_driver_info),
47 					 &driver_info_bus, GFP_KERNEL);
48 	if (!driver_info)
49 		return -ENOMEM;
50 
51 	*driver_info = (struct gve_driver_info) {
52 		.os_type = 1, /* Linux */
53 		.os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR),
54 		.os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL),
55 		.os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL),
56 		.driver_capability_flags = {
57 			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1),
58 			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2),
59 			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3),
60 			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4),
61 		},
62 	};
63 	strscpy(driver_info->os_version_str1, utsname()->release,
64 		sizeof(driver_info->os_version_str1));
65 	strscpy(driver_info->os_version_str2, utsname()->version,
66 		sizeof(driver_info->os_version_str2));
67 
68 	err = gve_adminq_verify_driver_compatibility(priv,
69 						     sizeof(struct gve_driver_info),
70 						     driver_info_bus);
71 
72 	/* It's ok if the device doesn't support this */
73 	if (err == -EOPNOTSUPP)
74 		err = 0;
75 
76 	dma_free_coherent(&priv->pdev->dev,
77 			  sizeof(struct gve_driver_info),
78 			  driver_info, driver_info_bus);
79 	return err;
80 }
81 
82 static netdev_features_t gve_features_check(struct sk_buff *skb,
83 					    struct net_device *dev,
84 					    netdev_features_t features)
85 {
86 	struct gve_priv *priv = netdev_priv(dev);
87 
88 	if (!gve_is_gqi(priv))
89 		return gve_features_check_dqo(skb, dev, features);
90 
91 	return features;
92 }
93 
94 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
95 {
96 	struct gve_priv *priv = netdev_priv(dev);
97 
98 	if (gve_is_gqi(priv))
99 		return gve_tx(skb, dev);
100 	else
101 		return gve_tx_dqo(skb, dev);
102 }
103 
104 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
105 {
106 	struct gve_priv *priv = netdev_priv(dev);
107 	unsigned int start;
108 	u64 packets, bytes;
109 	int num_tx_queues;
110 	int ring;
111 
112 	num_tx_queues = gve_num_tx_queues(priv);
113 	if (priv->rx) {
114 		for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
115 			do {
116 				start =
117 				  u64_stats_fetch_begin(&priv->rx[ring].statss);
118 				packets = priv->rx[ring].rpackets;
119 				bytes = priv->rx[ring].rbytes;
120 			} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
121 						       start));
122 			s->rx_packets += packets;
123 			s->rx_bytes += bytes;
124 		}
125 	}
126 	if (priv->tx) {
127 		for (ring = 0; ring < num_tx_queues; ring++) {
128 			do {
129 				start =
130 				  u64_stats_fetch_begin(&priv->tx[ring].statss);
131 				packets = priv->tx[ring].pkt_done;
132 				bytes = priv->tx[ring].bytes_done;
133 			} while (u64_stats_fetch_retry(&priv->tx[ring].statss,
134 						       start));
135 			s->tx_packets += packets;
136 			s->tx_bytes += bytes;
137 		}
138 	}
139 }
140 
141 static int gve_alloc_counter_array(struct gve_priv *priv)
142 {
143 	priv->counter_array =
144 		dma_alloc_coherent(&priv->pdev->dev,
145 				   priv->num_event_counters *
146 				   sizeof(*priv->counter_array),
147 				   &priv->counter_array_bus, GFP_KERNEL);
148 	if (!priv->counter_array)
149 		return -ENOMEM;
150 
151 	return 0;
152 }
153 
154 static void gve_free_counter_array(struct gve_priv *priv)
155 {
156 	if (!priv->counter_array)
157 		return;
158 
159 	dma_free_coherent(&priv->pdev->dev,
160 			  priv->num_event_counters *
161 			  sizeof(*priv->counter_array),
162 			  priv->counter_array, priv->counter_array_bus);
163 	priv->counter_array = NULL;
164 }
165 
166 /* NIC requests to report stats */
167 static void gve_stats_report_task(struct work_struct *work)
168 {
169 	struct gve_priv *priv = container_of(work, struct gve_priv,
170 					     stats_report_task);
171 	if (gve_get_do_report_stats(priv)) {
172 		gve_handle_report_stats(priv);
173 		gve_clear_do_report_stats(priv);
174 	}
175 }
176 
177 static void gve_stats_report_schedule(struct gve_priv *priv)
178 {
179 	if (!gve_get_probe_in_progress(priv) &&
180 	    !gve_get_reset_in_progress(priv)) {
181 		gve_set_do_report_stats(priv);
182 		queue_work(priv->gve_wq, &priv->stats_report_task);
183 	}
184 }
185 
186 static void gve_stats_report_timer(struct timer_list *t)
187 {
188 	struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
189 
190 	mod_timer(&priv->stats_report_timer,
191 		  round_jiffies(jiffies +
192 		  msecs_to_jiffies(priv->stats_report_timer_period)));
193 	gve_stats_report_schedule(priv);
194 }
195 
196 static int gve_alloc_stats_report(struct gve_priv *priv)
197 {
198 	int tx_stats_num, rx_stats_num;
199 
200 	tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
201 		       gve_num_tx_queues(priv);
202 	rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
203 		       priv->rx_cfg.num_queues;
204 	priv->stats_report_len = struct_size(priv->stats_report, stats,
205 					     size_add(tx_stats_num, rx_stats_num));
206 	priv->stats_report =
207 		dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
208 				   &priv->stats_report_bus, GFP_KERNEL);
209 	if (!priv->stats_report)
210 		return -ENOMEM;
211 	/* Set up timer for the report-stats task */
212 	timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
213 	priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
214 	return 0;
215 }
216 
217 static void gve_free_stats_report(struct gve_priv *priv)
218 {
219 	if (!priv->stats_report)
220 		return;
221 
222 	del_timer_sync(&priv->stats_report_timer);
223 	dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
224 			  priv->stats_report, priv->stats_report_bus);
225 	priv->stats_report = NULL;
226 }
227 
228 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
229 {
230 	struct gve_priv *priv = arg;
231 
232 	queue_work(priv->gve_wq, &priv->service_task);
233 	return IRQ_HANDLED;
234 }
235 
236 static irqreturn_t gve_intr(int irq, void *arg)
237 {
238 	struct gve_notify_block *block = arg;
239 	struct gve_priv *priv = block->priv;
240 
241 	iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
242 	napi_schedule_irqoff(&block->napi);
243 	return IRQ_HANDLED;
244 }
245 
246 static irqreturn_t gve_intr_dqo(int irq, void *arg)
247 {
248 	struct gve_notify_block *block = arg;
249 
250 	/* Interrupts are automatically masked */
251 	napi_schedule_irqoff(&block->napi);
252 	return IRQ_HANDLED;
253 }
254 
255 static int gve_napi_poll(struct napi_struct *napi, int budget)
256 {
257 	struct gve_notify_block *block;
258 	__be32 __iomem *irq_doorbell;
259 	bool reschedule = false;
260 	struct gve_priv *priv;
261 	int work_done = 0;
262 
263 	block = container_of(napi, struct gve_notify_block, napi);
264 	priv = block->priv;
265 
266 	if (block->tx) {
267 		if (block->tx->q_num < priv->tx_cfg.num_queues)
268 			reschedule |= gve_tx_poll(block, budget);
269 		else if (budget)
270 			reschedule |= gve_xdp_poll(block, budget);
271 	}
272 
273 	if (!budget)
274 		return 0;
275 
276 	if (block->rx) {
277 		work_done = gve_rx_poll(block, budget);
278 		reschedule |= work_done == budget;
279 	}
280 
281 	if (reschedule)
282 		return budget;
283 
284        /* Complete processing - don't unmask irq if busy polling is enabled */
285 	if (likely(napi_complete_done(napi, work_done))) {
286 		irq_doorbell = gve_irq_doorbell(priv, block);
287 		iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
288 
289 		/* Ensure IRQ ACK is visible before we check pending work.
290 		 * If queue had issued updates, it would be truly visible.
291 		 */
292 		mb();
293 
294 		if (block->tx)
295 			reschedule |= gve_tx_clean_pending(priv, block->tx);
296 		if (block->rx)
297 			reschedule |= gve_rx_work_pending(block->rx);
298 
299 		if (reschedule && napi_schedule(napi))
300 			iowrite32be(GVE_IRQ_MASK, irq_doorbell);
301 	}
302 	return work_done;
303 }
304 
305 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
306 {
307 	struct gve_notify_block *block =
308 		container_of(napi, struct gve_notify_block, napi);
309 	struct gve_priv *priv = block->priv;
310 	bool reschedule = false;
311 	int work_done = 0;
312 
313 	if (block->tx)
314 		reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
315 
316 	if (!budget)
317 		return 0;
318 
319 	if (block->rx) {
320 		work_done = gve_rx_poll_dqo(block, budget);
321 		reschedule |= work_done == budget;
322 	}
323 
324 	if (reschedule)
325 		return budget;
326 
327 	if (likely(napi_complete_done(napi, work_done))) {
328 		/* Enable interrupts again.
329 		 *
330 		 * We don't need to repoll afterwards because HW supports the
331 		 * PCI MSI-X PBA feature.
332 		 *
333 		 * Another interrupt would be triggered if a new event came in
334 		 * since the last one.
335 		 */
336 		gve_write_irq_doorbell_dqo(priv, block,
337 					   GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
338 	}
339 
340 	return work_done;
341 }
342 
343 static int gve_alloc_notify_blocks(struct gve_priv *priv)
344 {
345 	int num_vecs_requested = priv->num_ntfy_blks + 1;
346 	unsigned int active_cpus;
347 	int vecs_enabled;
348 	int i, j;
349 	int err;
350 
351 	priv->msix_vectors = kvcalloc(num_vecs_requested,
352 				      sizeof(*priv->msix_vectors), GFP_KERNEL);
353 	if (!priv->msix_vectors)
354 		return -ENOMEM;
355 	for (i = 0; i < num_vecs_requested; i++)
356 		priv->msix_vectors[i].entry = i;
357 	vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
358 					     GVE_MIN_MSIX, num_vecs_requested);
359 	if (vecs_enabled < 0) {
360 		dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
361 			GVE_MIN_MSIX, vecs_enabled);
362 		err = vecs_enabled;
363 		goto abort_with_msix_vectors;
364 	}
365 	if (vecs_enabled != num_vecs_requested) {
366 		int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
367 		int vecs_per_type = new_num_ntfy_blks / 2;
368 		int vecs_left = new_num_ntfy_blks % 2;
369 
370 		priv->num_ntfy_blks = new_num_ntfy_blks;
371 		priv->mgmt_msix_idx = priv->num_ntfy_blks;
372 		priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
373 						vecs_per_type);
374 		priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
375 						vecs_per_type + vecs_left);
376 		dev_err(&priv->pdev->dev,
377 			"Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
378 			vecs_enabled, priv->tx_cfg.max_queues,
379 			priv->rx_cfg.max_queues);
380 		if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
381 			priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
382 		if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
383 			priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
384 	}
385 	/* Half the notification blocks go to TX and half to RX */
386 	active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
387 
388 	/* Setup Management Vector  - the last vector */
389 	snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
390 		 pci_name(priv->pdev));
391 	err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
392 			  gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
393 	if (err) {
394 		dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
395 		goto abort_with_msix_enabled;
396 	}
397 	priv->irq_db_indices =
398 		dma_alloc_coherent(&priv->pdev->dev,
399 				   priv->num_ntfy_blks *
400 				   sizeof(*priv->irq_db_indices),
401 				   &priv->irq_db_indices_bus, GFP_KERNEL);
402 	if (!priv->irq_db_indices) {
403 		err = -ENOMEM;
404 		goto abort_with_mgmt_vector;
405 	}
406 
407 	priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
408 				     sizeof(*priv->ntfy_blocks), GFP_KERNEL);
409 	if (!priv->ntfy_blocks) {
410 		err = -ENOMEM;
411 		goto abort_with_irq_db_indices;
412 	}
413 
414 	/* Setup the other blocks - the first n-1 vectors */
415 	for (i = 0; i < priv->num_ntfy_blks; i++) {
416 		struct gve_notify_block *block = &priv->ntfy_blocks[i];
417 		int msix_idx = i;
418 
419 		snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s",
420 			 i, pci_name(priv->pdev));
421 		block->priv = priv;
422 		err = request_irq(priv->msix_vectors[msix_idx].vector,
423 				  gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
424 				  0, block->name, block);
425 		if (err) {
426 			dev_err(&priv->pdev->dev,
427 				"Failed to receive msix vector %d\n", i);
428 			goto abort_with_some_ntfy_blocks;
429 		}
430 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
431 				      get_cpu_mask(i % active_cpus));
432 		block->irq_db_index = &priv->irq_db_indices[i].index;
433 	}
434 	return 0;
435 abort_with_some_ntfy_blocks:
436 	for (j = 0; j < i; j++) {
437 		struct gve_notify_block *block = &priv->ntfy_blocks[j];
438 		int msix_idx = j;
439 
440 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
441 				      NULL);
442 		free_irq(priv->msix_vectors[msix_idx].vector, block);
443 	}
444 	kvfree(priv->ntfy_blocks);
445 	priv->ntfy_blocks = NULL;
446 abort_with_irq_db_indices:
447 	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
448 			  sizeof(*priv->irq_db_indices),
449 			  priv->irq_db_indices, priv->irq_db_indices_bus);
450 	priv->irq_db_indices = NULL;
451 abort_with_mgmt_vector:
452 	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
453 abort_with_msix_enabled:
454 	pci_disable_msix(priv->pdev);
455 abort_with_msix_vectors:
456 	kvfree(priv->msix_vectors);
457 	priv->msix_vectors = NULL;
458 	return err;
459 }
460 
461 static void gve_free_notify_blocks(struct gve_priv *priv)
462 {
463 	int i;
464 
465 	if (!priv->msix_vectors)
466 		return;
467 
468 	/* Free the irqs */
469 	for (i = 0; i < priv->num_ntfy_blks; i++) {
470 		struct gve_notify_block *block = &priv->ntfy_blocks[i];
471 		int msix_idx = i;
472 
473 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
474 				      NULL);
475 		free_irq(priv->msix_vectors[msix_idx].vector, block);
476 	}
477 	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
478 	kvfree(priv->ntfy_blocks);
479 	priv->ntfy_blocks = NULL;
480 	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
481 			  sizeof(*priv->irq_db_indices),
482 			  priv->irq_db_indices, priv->irq_db_indices_bus);
483 	priv->irq_db_indices = NULL;
484 	pci_disable_msix(priv->pdev);
485 	kvfree(priv->msix_vectors);
486 	priv->msix_vectors = NULL;
487 }
488 
489 static int gve_setup_device_resources(struct gve_priv *priv)
490 {
491 	int err;
492 
493 	err = gve_alloc_counter_array(priv);
494 	if (err)
495 		return err;
496 	err = gve_alloc_notify_blocks(priv);
497 	if (err)
498 		goto abort_with_counter;
499 	err = gve_alloc_stats_report(priv);
500 	if (err)
501 		goto abort_with_ntfy_blocks;
502 	err = gve_adminq_configure_device_resources(priv,
503 						    priv->counter_array_bus,
504 						    priv->num_event_counters,
505 						    priv->irq_db_indices_bus,
506 						    priv->num_ntfy_blks);
507 	if (unlikely(err)) {
508 		dev_err(&priv->pdev->dev,
509 			"could not setup device_resources: err=%d\n", err);
510 		err = -ENXIO;
511 		goto abort_with_stats_report;
512 	}
513 
514 	if (!gve_is_gqi(priv)) {
515 		priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
516 					       GFP_KERNEL);
517 		if (!priv->ptype_lut_dqo) {
518 			err = -ENOMEM;
519 			goto abort_with_stats_report;
520 		}
521 		err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
522 		if (err) {
523 			dev_err(&priv->pdev->dev,
524 				"Failed to get ptype map: err=%d\n", err);
525 			goto abort_with_ptype_lut;
526 		}
527 	}
528 
529 	err = gve_adminq_report_stats(priv, priv->stats_report_len,
530 				      priv->stats_report_bus,
531 				      GVE_STATS_REPORT_TIMER_PERIOD);
532 	if (err)
533 		dev_err(&priv->pdev->dev,
534 			"Failed to report stats: err=%d\n", err);
535 	gve_set_device_resources_ok(priv);
536 	return 0;
537 
538 abort_with_ptype_lut:
539 	kvfree(priv->ptype_lut_dqo);
540 	priv->ptype_lut_dqo = NULL;
541 abort_with_stats_report:
542 	gve_free_stats_report(priv);
543 abort_with_ntfy_blocks:
544 	gve_free_notify_blocks(priv);
545 abort_with_counter:
546 	gve_free_counter_array(priv);
547 
548 	return err;
549 }
550 
551 static void gve_trigger_reset(struct gve_priv *priv);
552 
553 static void gve_teardown_device_resources(struct gve_priv *priv)
554 {
555 	int err;
556 
557 	/* Tell device its resources are being freed */
558 	if (gve_get_device_resources_ok(priv)) {
559 		/* detach the stats report */
560 		err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
561 		if (err) {
562 			dev_err(&priv->pdev->dev,
563 				"Failed to detach stats report: err=%d\n", err);
564 			gve_trigger_reset(priv);
565 		}
566 		err = gve_adminq_deconfigure_device_resources(priv);
567 		if (err) {
568 			dev_err(&priv->pdev->dev,
569 				"Could not deconfigure device resources: err=%d\n",
570 				err);
571 			gve_trigger_reset(priv);
572 		}
573 	}
574 
575 	kvfree(priv->ptype_lut_dqo);
576 	priv->ptype_lut_dqo = NULL;
577 
578 	gve_free_counter_array(priv);
579 	gve_free_notify_blocks(priv);
580 	gve_free_stats_report(priv);
581 	gve_clear_device_resources_ok(priv);
582 }
583 
584 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
585 			 int (*gve_poll)(struct napi_struct *, int))
586 {
587 	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
588 
589 	netif_napi_add(priv->dev, &block->napi, gve_poll);
590 }
591 
592 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
593 {
594 	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
595 
596 	netif_napi_del(&block->napi);
597 }
598 
599 static int gve_register_xdp_qpls(struct gve_priv *priv)
600 {
601 	int start_id;
602 	int err;
603 	int i;
604 
605 	start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
606 	for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
607 		err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
608 		if (err) {
609 			netif_err(priv, drv, priv->dev,
610 				  "failed to register queue page list %d\n",
611 				  priv->qpls[i].id);
612 			/* This failure will trigger a reset - no need to clean
613 			 * up
614 			 */
615 			return err;
616 		}
617 	}
618 	return 0;
619 }
620 
621 static int gve_register_qpls(struct gve_priv *priv)
622 {
623 	int start_id;
624 	int err;
625 	int i;
626 
627 	start_id = gve_tx_start_qpl_id(priv);
628 	for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
629 		err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
630 		if (err) {
631 			netif_err(priv, drv, priv->dev,
632 				  "failed to register queue page list %d\n",
633 				  priv->qpls[i].id);
634 			/* This failure will trigger a reset - no need to clean
635 			 * up
636 			 */
637 			return err;
638 		}
639 	}
640 
641 	start_id = gve_rx_start_qpl_id(priv);
642 	for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
643 		err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
644 		if (err) {
645 			netif_err(priv, drv, priv->dev,
646 				  "failed to register queue page list %d\n",
647 				  priv->qpls[i].id);
648 			/* This failure will trigger a reset - no need to clean
649 			 * up
650 			 */
651 			return err;
652 		}
653 	}
654 	return 0;
655 }
656 
657 static int gve_unregister_xdp_qpls(struct gve_priv *priv)
658 {
659 	int start_id;
660 	int err;
661 	int i;
662 
663 	start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
664 	for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
665 		err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
666 		/* This failure will trigger a reset - no need to clean up */
667 		if (err) {
668 			netif_err(priv, drv, priv->dev,
669 				  "Failed to unregister queue page list %d\n",
670 				  priv->qpls[i].id);
671 			return err;
672 		}
673 	}
674 	return 0;
675 }
676 
677 static int gve_unregister_qpls(struct gve_priv *priv)
678 {
679 	int start_id;
680 	int err;
681 	int i;
682 
683 	start_id = gve_tx_start_qpl_id(priv);
684 	for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
685 		err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
686 		/* This failure will trigger a reset - no need to clean up */
687 		if (err) {
688 			netif_err(priv, drv, priv->dev,
689 				  "Failed to unregister queue page list %d\n",
690 				  priv->qpls[i].id);
691 			return err;
692 		}
693 	}
694 
695 	start_id = gve_rx_start_qpl_id(priv);
696 	for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
697 		err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
698 		/* This failure will trigger a reset - no need to clean up */
699 		if (err) {
700 			netif_err(priv, drv, priv->dev,
701 				  "Failed to unregister queue page list %d\n",
702 				  priv->qpls[i].id);
703 			return err;
704 		}
705 	}
706 	return 0;
707 }
708 
709 static int gve_create_xdp_rings(struct gve_priv *priv)
710 {
711 	int err;
712 
713 	err = gve_adminq_create_tx_queues(priv,
714 					  gve_xdp_tx_start_queue_id(priv),
715 					  priv->num_xdp_queues);
716 	if (err) {
717 		netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
718 			  priv->num_xdp_queues);
719 		/* This failure will trigger a reset - no need to clean
720 		 * up
721 		 */
722 		return err;
723 	}
724 	netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
725 		  priv->num_xdp_queues);
726 
727 	return 0;
728 }
729 
730 static int gve_create_rings(struct gve_priv *priv)
731 {
732 	int num_tx_queues = gve_num_tx_queues(priv);
733 	int err;
734 	int i;
735 
736 	err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues);
737 	if (err) {
738 		netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
739 			  num_tx_queues);
740 		/* This failure will trigger a reset - no need to clean
741 		 * up
742 		 */
743 		return err;
744 	}
745 	netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
746 		  num_tx_queues);
747 
748 	err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
749 	if (err) {
750 		netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
751 			  priv->rx_cfg.num_queues);
752 		/* This failure will trigger a reset - no need to clean
753 		 * up
754 		 */
755 		return err;
756 	}
757 	netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
758 		  priv->rx_cfg.num_queues);
759 
760 	if (gve_is_gqi(priv)) {
761 		/* Rx data ring has been prefilled with packet buffers at queue
762 		 * allocation time.
763 		 *
764 		 * Write the doorbell to provide descriptor slots and packet
765 		 * buffers to the NIC.
766 		 */
767 		for (i = 0; i < priv->rx_cfg.num_queues; i++)
768 			gve_rx_write_doorbell(priv, &priv->rx[i]);
769 	} else {
770 		for (i = 0; i < priv->rx_cfg.num_queues; i++) {
771 			/* Post buffers and ring doorbell. */
772 			gve_rx_post_buffers_dqo(&priv->rx[i]);
773 		}
774 	}
775 
776 	return 0;
777 }
778 
779 static void add_napi_init_xdp_sync_stats(struct gve_priv *priv,
780 					 int (*napi_poll)(struct napi_struct *napi,
781 							  int budget))
782 {
783 	int start_id = gve_xdp_tx_start_queue_id(priv);
784 	int i;
785 
786 	/* Add xdp tx napi & init sync stats*/
787 	for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
788 		int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
789 
790 		u64_stats_init(&priv->tx[i].statss);
791 		priv->tx[i].ntfy_id = ntfy_idx;
792 		gve_add_napi(priv, ntfy_idx, napi_poll);
793 	}
794 }
795 
796 static void add_napi_init_sync_stats(struct gve_priv *priv,
797 				     int (*napi_poll)(struct napi_struct *napi,
798 						      int budget))
799 {
800 	int i;
801 
802 	/* Add tx napi & init sync stats*/
803 	for (i = 0; i < gve_num_tx_queues(priv); i++) {
804 		int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
805 
806 		u64_stats_init(&priv->tx[i].statss);
807 		priv->tx[i].ntfy_id = ntfy_idx;
808 		gve_add_napi(priv, ntfy_idx, napi_poll);
809 	}
810 	/* Add rx napi  & init sync stats*/
811 	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
812 		int ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
813 
814 		u64_stats_init(&priv->rx[i].statss);
815 		priv->rx[i].ntfy_id = ntfy_idx;
816 		gve_add_napi(priv, ntfy_idx, napi_poll);
817 	}
818 }
819 
820 static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings)
821 {
822 	if (gve_is_gqi(priv)) {
823 		gve_tx_free_rings_gqi(priv, start_id, num_rings);
824 	} else {
825 		gve_tx_free_rings_dqo(priv);
826 	}
827 }
828 
829 static int gve_alloc_xdp_rings(struct gve_priv *priv)
830 {
831 	int start_id;
832 	int err = 0;
833 
834 	if (!priv->num_xdp_queues)
835 		return 0;
836 
837 	start_id = gve_xdp_tx_start_queue_id(priv);
838 	err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues);
839 	if (err)
840 		return err;
841 	add_napi_init_xdp_sync_stats(priv, gve_napi_poll);
842 
843 	return 0;
844 }
845 
846 static int gve_alloc_rings(struct gve_priv *priv)
847 {
848 	int err;
849 
850 	/* Setup tx rings */
851 	priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx),
852 			    GFP_KERNEL);
853 	if (!priv->tx)
854 		return -ENOMEM;
855 
856 	if (gve_is_gqi(priv))
857 		err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv));
858 	else
859 		err = gve_tx_alloc_rings_dqo(priv);
860 	if (err)
861 		goto free_tx;
862 
863 	/* Setup rx rings */
864 	priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx),
865 			    GFP_KERNEL);
866 	if (!priv->rx) {
867 		err = -ENOMEM;
868 		goto free_tx_queue;
869 	}
870 
871 	if (gve_is_gqi(priv))
872 		err = gve_rx_alloc_rings(priv);
873 	else
874 		err = gve_rx_alloc_rings_dqo(priv);
875 	if (err)
876 		goto free_rx;
877 
878 	if (gve_is_gqi(priv))
879 		add_napi_init_sync_stats(priv, gve_napi_poll);
880 	else
881 		add_napi_init_sync_stats(priv, gve_napi_poll_dqo);
882 
883 	return 0;
884 
885 free_rx:
886 	kvfree(priv->rx);
887 	priv->rx = NULL;
888 free_tx_queue:
889 	gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv));
890 free_tx:
891 	kvfree(priv->tx);
892 	priv->tx = NULL;
893 	return err;
894 }
895 
896 static int gve_destroy_xdp_rings(struct gve_priv *priv)
897 {
898 	int start_id;
899 	int err;
900 
901 	start_id = gve_xdp_tx_start_queue_id(priv);
902 	err = gve_adminq_destroy_tx_queues(priv,
903 					   start_id,
904 					   priv->num_xdp_queues);
905 	if (err) {
906 		netif_err(priv, drv, priv->dev,
907 			  "failed to destroy XDP queues\n");
908 		/* This failure will trigger a reset - no need to clean up */
909 		return err;
910 	}
911 	netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
912 
913 	return 0;
914 }
915 
916 static int gve_destroy_rings(struct gve_priv *priv)
917 {
918 	int num_tx_queues = gve_num_tx_queues(priv);
919 	int err;
920 
921 	err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues);
922 	if (err) {
923 		netif_err(priv, drv, priv->dev,
924 			  "failed to destroy tx queues\n");
925 		/* This failure will trigger a reset - no need to clean up */
926 		return err;
927 	}
928 	netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
929 	err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
930 	if (err) {
931 		netif_err(priv, drv, priv->dev,
932 			  "failed to destroy rx queues\n");
933 		/* This failure will trigger a reset - no need to clean up */
934 		return err;
935 	}
936 	netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
937 	return 0;
938 }
939 
940 static void gve_rx_free_rings(struct gve_priv *priv)
941 {
942 	if (gve_is_gqi(priv))
943 		gve_rx_free_rings_gqi(priv);
944 	else
945 		gve_rx_free_rings_dqo(priv);
946 }
947 
948 static void gve_free_xdp_rings(struct gve_priv *priv)
949 {
950 	int ntfy_idx, start_id;
951 	int i;
952 
953 	start_id = gve_xdp_tx_start_queue_id(priv);
954 	if (priv->tx) {
955 		for (i = start_id; i <  start_id + priv->num_xdp_queues; i++) {
956 			ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
957 			gve_remove_napi(priv, ntfy_idx);
958 		}
959 		gve_tx_free_rings(priv, start_id, priv->num_xdp_queues);
960 	}
961 }
962 
963 static void gve_free_rings(struct gve_priv *priv)
964 {
965 	int num_tx_queues = gve_num_tx_queues(priv);
966 	int ntfy_idx;
967 	int i;
968 
969 	if (priv->tx) {
970 		for (i = 0; i < num_tx_queues; i++) {
971 			ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
972 			gve_remove_napi(priv, ntfy_idx);
973 		}
974 		gve_tx_free_rings(priv, 0, num_tx_queues);
975 		kvfree(priv->tx);
976 		priv->tx = NULL;
977 	}
978 	if (priv->rx) {
979 		for (i = 0; i < priv->rx_cfg.num_queues; i++) {
980 			ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
981 			gve_remove_napi(priv, ntfy_idx);
982 		}
983 		gve_rx_free_rings(priv);
984 		kvfree(priv->rx);
985 		priv->rx = NULL;
986 	}
987 }
988 
989 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
990 		   struct page **page, dma_addr_t *dma,
991 		   enum dma_data_direction dir, gfp_t gfp_flags)
992 {
993 	*page = alloc_page(gfp_flags);
994 	if (!*page) {
995 		priv->page_alloc_fail++;
996 		return -ENOMEM;
997 	}
998 	*dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
999 	if (dma_mapping_error(dev, *dma)) {
1000 		priv->dma_mapping_error++;
1001 		put_page(*page);
1002 		return -ENOMEM;
1003 	}
1004 	return 0;
1005 }
1006 
1007 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
1008 				     int pages)
1009 {
1010 	struct gve_queue_page_list *qpl = &priv->qpls[id];
1011 	int err;
1012 	int i;
1013 
1014 	if (pages + priv->num_registered_pages > priv->max_registered_pages) {
1015 		netif_err(priv, drv, priv->dev,
1016 			  "Reached max number of registered pages %llu > %llu\n",
1017 			  pages + priv->num_registered_pages,
1018 			  priv->max_registered_pages);
1019 		return -EINVAL;
1020 	}
1021 
1022 	qpl->id = id;
1023 	qpl->num_entries = 0;
1024 	qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
1025 	/* caller handles clean up */
1026 	if (!qpl->pages)
1027 		return -ENOMEM;
1028 	qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL);
1029 	/* caller handles clean up */
1030 	if (!qpl->page_buses)
1031 		return -ENOMEM;
1032 
1033 	for (i = 0; i < pages; i++) {
1034 		err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
1035 				     &qpl->page_buses[i],
1036 				     gve_qpl_dma_dir(priv, id), GFP_KERNEL);
1037 		/* caller handles clean up */
1038 		if (err)
1039 			return -ENOMEM;
1040 		qpl->num_entries++;
1041 	}
1042 	priv->num_registered_pages += pages;
1043 
1044 	return 0;
1045 }
1046 
1047 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
1048 		   enum dma_data_direction dir)
1049 {
1050 	if (!dma_mapping_error(dev, dma))
1051 		dma_unmap_page(dev, dma, PAGE_SIZE, dir);
1052 	if (page)
1053 		put_page(page);
1054 }
1055 
1056 static void gve_free_queue_page_list(struct gve_priv *priv, u32 id)
1057 {
1058 	struct gve_queue_page_list *qpl = &priv->qpls[id];
1059 	int i;
1060 
1061 	if (!qpl->pages)
1062 		return;
1063 	if (!qpl->page_buses)
1064 		goto free_pages;
1065 
1066 	for (i = 0; i < qpl->num_entries; i++)
1067 		gve_free_page(&priv->pdev->dev, qpl->pages[i],
1068 			      qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
1069 
1070 	kvfree(qpl->page_buses);
1071 	qpl->page_buses = NULL;
1072 free_pages:
1073 	kvfree(qpl->pages);
1074 	qpl->pages = NULL;
1075 	priv->num_registered_pages -= qpl->num_entries;
1076 }
1077 
1078 static int gve_alloc_xdp_qpls(struct gve_priv *priv)
1079 {
1080 	int start_id;
1081 	int i, j;
1082 	int err;
1083 
1084 	start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
1085 	for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
1086 		err = gve_alloc_queue_page_list(priv, i,
1087 						priv->tx_pages_per_qpl);
1088 		if (err)
1089 			goto free_qpls;
1090 	}
1091 
1092 	return 0;
1093 
1094 free_qpls:
1095 	for (j = start_id; j <= i; j++)
1096 		gve_free_queue_page_list(priv, j);
1097 	return err;
1098 }
1099 
1100 static int gve_alloc_qpls(struct gve_priv *priv)
1101 {
1102 	int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
1103 	int page_count;
1104 	int start_id;
1105 	int i, j;
1106 	int err;
1107 
1108 	if (!gve_is_qpl(priv))
1109 		return 0;
1110 
1111 	priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL);
1112 	if (!priv->qpls)
1113 		return -ENOMEM;
1114 
1115 	start_id = gve_tx_start_qpl_id(priv);
1116 	page_count = priv->tx_pages_per_qpl;
1117 	for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
1118 		err = gve_alloc_queue_page_list(priv, i,
1119 						page_count);
1120 		if (err)
1121 			goto free_qpls;
1122 	}
1123 
1124 	start_id = gve_rx_start_qpl_id(priv);
1125 
1126 	/* For GQI_QPL number of pages allocated have 1:1 relationship with
1127 	 * number of descriptors. For DQO, number of pages required are
1128 	 * more than descriptors (because of out of order completions).
1129 	 */
1130 	page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ?
1131 		priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
1132 	for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
1133 		err = gve_alloc_queue_page_list(priv, i,
1134 						page_count);
1135 		if (err)
1136 			goto free_qpls;
1137 	}
1138 
1139 	priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) *
1140 				     sizeof(unsigned long) * BITS_PER_BYTE;
1141 	priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues),
1142 					    sizeof(unsigned long), GFP_KERNEL);
1143 	if (!priv->qpl_cfg.qpl_id_map) {
1144 		err = -ENOMEM;
1145 		goto free_qpls;
1146 	}
1147 
1148 	return 0;
1149 
1150 free_qpls:
1151 	for (j = 0; j <= i; j++)
1152 		gve_free_queue_page_list(priv, j);
1153 	kvfree(priv->qpls);
1154 	priv->qpls = NULL;
1155 	return err;
1156 }
1157 
1158 static void gve_free_xdp_qpls(struct gve_priv *priv)
1159 {
1160 	int start_id;
1161 	int i;
1162 
1163 	start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
1164 	for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++)
1165 		gve_free_queue_page_list(priv, i);
1166 }
1167 
1168 static void gve_free_qpls(struct gve_priv *priv)
1169 {
1170 	int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
1171 	int i;
1172 
1173 	if (!priv->qpls)
1174 		return;
1175 
1176 	kvfree(priv->qpl_cfg.qpl_id_map);
1177 	priv->qpl_cfg.qpl_id_map = NULL;
1178 
1179 	for (i = 0; i < max_queues; i++)
1180 		gve_free_queue_page_list(priv, i);
1181 
1182 	kvfree(priv->qpls);
1183 	priv->qpls = NULL;
1184 }
1185 
1186 /* Use this to schedule a reset when the device is capable of continuing
1187  * to handle other requests in its current state. If it is not, do a reset
1188  * in thread instead.
1189  */
1190 void gve_schedule_reset(struct gve_priv *priv)
1191 {
1192 	gve_set_do_reset(priv);
1193 	queue_work(priv->gve_wq, &priv->service_task);
1194 }
1195 
1196 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
1197 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
1198 static void gve_turndown(struct gve_priv *priv);
1199 static void gve_turnup(struct gve_priv *priv);
1200 
1201 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
1202 {
1203 	struct napi_struct *napi;
1204 	struct gve_rx_ring *rx;
1205 	int err = 0;
1206 	int i, j;
1207 	u32 tx_qid;
1208 
1209 	if (!priv->num_xdp_queues)
1210 		return 0;
1211 
1212 	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1213 		rx = &priv->rx[i];
1214 		napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1215 
1216 		err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i,
1217 				       napi->napi_id);
1218 		if (err)
1219 			goto err;
1220 		err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
1221 						 MEM_TYPE_PAGE_SHARED, NULL);
1222 		if (err)
1223 			goto err;
1224 		rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
1225 		if (rx->xsk_pool) {
1226 			err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
1227 					       napi->napi_id);
1228 			if (err)
1229 				goto err;
1230 			err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1231 							 MEM_TYPE_XSK_BUFF_POOL, NULL);
1232 			if (err)
1233 				goto err;
1234 			xsk_pool_set_rxq_info(rx->xsk_pool,
1235 					      &rx->xsk_rxq);
1236 		}
1237 	}
1238 
1239 	for (i = 0; i < priv->num_xdp_queues; i++) {
1240 		tx_qid = gve_xdp_tx_queue_id(priv, i);
1241 		priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
1242 	}
1243 	return 0;
1244 
1245 err:
1246 	for (j = i; j >= 0; j--) {
1247 		rx = &priv->rx[j];
1248 		if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
1249 			xdp_rxq_info_unreg(&rx->xdp_rxq);
1250 		if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1251 			xdp_rxq_info_unreg(&rx->xsk_rxq);
1252 	}
1253 	return err;
1254 }
1255 
1256 static void gve_unreg_xdp_info(struct gve_priv *priv)
1257 {
1258 	int i, tx_qid;
1259 
1260 	if (!priv->num_xdp_queues)
1261 		return;
1262 
1263 	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1264 		struct gve_rx_ring *rx = &priv->rx[i];
1265 
1266 		xdp_rxq_info_unreg(&rx->xdp_rxq);
1267 		if (rx->xsk_pool) {
1268 			xdp_rxq_info_unreg(&rx->xsk_rxq);
1269 			rx->xsk_pool = NULL;
1270 		}
1271 	}
1272 
1273 	for (i = 0; i < priv->num_xdp_queues; i++) {
1274 		tx_qid = gve_xdp_tx_queue_id(priv, i);
1275 		priv->tx[tx_qid].xsk_pool = NULL;
1276 	}
1277 }
1278 
1279 static void gve_drain_page_cache(struct gve_priv *priv)
1280 {
1281 	struct page_frag_cache *nc;
1282 	int i;
1283 
1284 	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1285 		nc = &priv->rx[i].page_cache;
1286 		if (nc->va) {
1287 			__page_frag_cache_drain(virt_to_page(nc->va),
1288 						nc->pagecnt_bias);
1289 			nc->va = NULL;
1290 		}
1291 	}
1292 }
1293 
1294 static int gve_open(struct net_device *dev)
1295 {
1296 	struct gve_priv *priv = netdev_priv(dev);
1297 	int err;
1298 
1299 	if (priv->xdp_prog)
1300 		priv->num_xdp_queues = priv->rx_cfg.num_queues;
1301 	else
1302 		priv->num_xdp_queues = 0;
1303 
1304 	err = gve_alloc_qpls(priv);
1305 	if (err)
1306 		return err;
1307 
1308 	err = gve_alloc_rings(priv);
1309 	if (err)
1310 		goto free_qpls;
1311 
1312 	err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
1313 	if (err)
1314 		goto free_rings;
1315 	err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
1316 	if (err)
1317 		goto free_rings;
1318 
1319 	err = gve_reg_xdp_info(priv, dev);
1320 	if (err)
1321 		goto free_rings;
1322 
1323 	err = gve_register_qpls(priv);
1324 	if (err)
1325 		goto reset;
1326 
1327 	if (!gve_is_gqi(priv)) {
1328 		/* Hard code this for now. This may be tuned in the future for
1329 		 * performance.
1330 		 */
1331 		priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE;
1332 	}
1333 	err = gve_create_rings(priv);
1334 	if (err)
1335 		goto reset;
1336 
1337 	gve_set_device_rings_ok(priv);
1338 
1339 	if (gve_get_report_stats(priv))
1340 		mod_timer(&priv->stats_report_timer,
1341 			  round_jiffies(jiffies +
1342 				msecs_to_jiffies(priv->stats_report_timer_period)));
1343 
1344 	gve_turnup(priv);
1345 	queue_work(priv->gve_wq, &priv->service_task);
1346 	priv->interface_up_cnt++;
1347 	return 0;
1348 
1349 free_rings:
1350 	gve_free_rings(priv);
1351 free_qpls:
1352 	gve_free_qpls(priv);
1353 	return err;
1354 
1355 reset:
1356 	/* This must have been called from a reset due to the rtnl lock
1357 	 * so just return at this point.
1358 	 */
1359 	if (gve_get_reset_in_progress(priv))
1360 		return err;
1361 	/* Otherwise reset before returning */
1362 	gve_reset_and_teardown(priv, true);
1363 	/* if this fails there is nothing we can do so just ignore the return */
1364 	gve_reset_recovery(priv, false);
1365 	/* return the original error */
1366 	return err;
1367 }
1368 
1369 static int gve_close(struct net_device *dev)
1370 {
1371 	struct gve_priv *priv = netdev_priv(dev);
1372 	int err;
1373 
1374 	netif_carrier_off(dev);
1375 	if (gve_get_device_rings_ok(priv)) {
1376 		gve_turndown(priv);
1377 		gve_drain_page_cache(priv);
1378 		err = gve_destroy_rings(priv);
1379 		if (err)
1380 			goto err;
1381 		err = gve_unregister_qpls(priv);
1382 		if (err)
1383 			goto err;
1384 		gve_clear_device_rings_ok(priv);
1385 	}
1386 	del_timer_sync(&priv->stats_report_timer);
1387 
1388 	gve_unreg_xdp_info(priv);
1389 	gve_free_rings(priv);
1390 	gve_free_qpls(priv);
1391 	priv->interface_down_cnt++;
1392 	return 0;
1393 
1394 err:
1395 	/* This must have been called from a reset due to the rtnl lock
1396 	 * so just return at this point.
1397 	 */
1398 	if (gve_get_reset_in_progress(priv))
1399 		return err;
1400 	/* Otherwise reset before returning */
1401 	gve_reset_and_teardown(priv, true);
1402 	return gve_reset_recovery(priv, false);
1403 }
1404 
1405 static int gve_remove_xdp_queues(struct gve_priv *priv)
1406 {
1407 	int err;
1408 
1409 	err = gve_destroy_xdp_rings(priv);
1410 	if (err)
1411 		return err;
1412 
1413 	err = gve_unregister_xdp_qpls(priv);
1414 	if (err)
1415 		return err;
1416 
1417 	gve_unreg_xdp_info(priv);
1418 	gve_free_xdp_rings(priv);
1419 	gve_free_xdp_qpls(priv);
1420 	priv->num_xdp_queues = 0;
1421 	return 0;
1422 }
1423 
1424 static int gve_add_xdp_queues(struct gve_priv *priv)
1425 {
1426 	int err;
1427 
1428 	priv->num_xdp_queues = priv->tx_cfg.num_queues;
1429 
1430 	err = gve_alloc_xdp_qpls(priv);
1431 	if (err)
1432 		goto err;
1433 
1434 	err = gve_alloc_xdp_rings(priv);
1435 	if (err)
1436 		goto free_xdp_qpls;
1437 
1438 	err = gve_reg_xdp_info(priv, priv->dev);
1439 	if (err)
1440 		goto free_xdp_rings;
1441 
1442 	err = gve_register_xdp_qpls(priv);
1443 	if (err)
1444 		goto free_xdp_rings;
1445 
1446 	err = gve_create_xdp_rings(priv);
1447 	if (err)
1448 		goto free_xdp_rings;
1449 
1450 	return 0;
1451 
1452 free_xdp_rings:
1453 	gve_free_xdp_rings(priv);
1454 free_xdp_qpls:
1455 	gve_free_xdp_qpls(priv);
1456 err:
1457 	priv->num_xdp_queues = 0;
1458 	return err;
1459 }
1460 
1461 static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1462 {
1463 	if (!gve_get_napi_enabled(priv))
1464 		return;
1465 
1466 	if (link_status == netif_carrier_ok(priv->dev))
1467 		return;
1468 
1469 	if (link_status) {
1470 		netdev_info(priv->dev, "Device link is up.\n");
1471 		netif_carrier_on(priv->dev);
1472 	} else {
1473 		netdev_info(priv->dev, "Device link is down.\n");
1474 		netif_carrier_off(priv->dev);
1475 	}
1476 }
1477 
1478 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
1479 		       struct netlink_ext_ack *extack)
1480 {
1481 	struct bpf_prog *old_prog;
1482 	int err = 0;
1483 	u32 status;
1484 
1485 	old_prog = READ_ONCE(priv->xdp_prog);
1486 	if (!netif_carrier_ok(priv->dev)) {
1487 		WRITE_ONCE(priv->xdp_prog, prog);
1488 		if (old_prog)
1489 			bpf_prog_put(old_prog);
1490 		return 0;
1491 	}
1492 
1493 	gve_turndown(priv);
1494 	if (!old_prog && prog) {
1495 		// Allocate XDP TX queues if an XDP program is
1496 		// being installed
1497 		err = gve_add_xdp_queues(priv);
1498 		if (err)
1499 			goto out;
1500 	} else if (old_prog && !prog) {
1501 		// Remove XDP TX queues if an XDP program is
1502 		// being uninstalled
1503 		err = gve_remove_xdp_queues(priv);
1504 		if (err)
1505 			goto out;
1506 	}
1507 	WRITE_ONCE(priv->xdp_prog, prog);
1508 	if (old_prog)
1509 		bpf_prog_put(old_prog);
1510 
1511 out:
1512 	gve_turnup(priv);
1513 	status = ioread32be(&priv->reg_bar0->device_status);
1514 	gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1515 	return err;
1516 }
1517 
1518 static int gve_xsk_pool_enable(struct net_device *dev,
1519 			       struct xsk_buff_pool *pool,
1520 			       u16 qid)
1521 {
1522 	struct gve_priv *priv = netdev_priv(dev);
1523 	struct napi_struct *napi;
1524 	struct gve_rx_ring *rx;
1525 	int tx_qid;
1526 	int err;
1527 
1528 	if (qid >= priv->rx_cfg.num_queues) {
1529 		dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
1530 		return -EINVAL;
1531 	}
1532 	if (xsk_pool_get_rx_frame_size(pool) <
1533 	     priv->dev->max_mtu + sizeof(struct ethhdr)) {
1534 		dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
1535 		return -EINVAL;
1536 	}
1537 
1538 	err = xsk_pool_dma_map(pool, &priv->pdev->dev,
1539 			       DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1540 	if (err)
1541 		return err;
1542 
1543 	/* If XDP prog is not installed, return */
1544 	if (!priv->xdp_prog)
1545 		return 0;
1546 
1547 	rx = &priv->rx[qid];
1548 	napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1549 	err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
1550 	if (err)
1551 		goto err;
1552 
1553 	err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1554 					 MEM_TYPE_XSK_BUFF_POOL, NULL);
1555 	if (err)
1556 		goto err;
1557 
1558 	xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
1559 	rx->xsk_pool = pool;
1560 
1561 	tx_qid = gve_xdp_tx_queue_id(priv, qid);
1562 	priv->tx[tx_qid].xsk_pool = pool;
1563 
1564 	return 0;
1565 err:
1566 	if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1567 		xdp_rxq_info_unreg(&rx->xsk_rxq);
1568 
1569 	xsk_pool_dma_unmap(pool,
1570 			   DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1571 	return err;
1572 }
1573 
1574 static int gve_xsk_pool_disable(struct net_device *dev,
1575 				u16 qid)
1576 {
1577 	struct gve_priv *priv = netdev_priv(dev);
1578 	struct napi_struct *napi_rx;
1579 	struct napi_struct *napi_tx;
1580 	struct xsk_buff_pool *pool;
1581 	int tx_qid;
1582 
1583 	pool = xsk_get_pool_from_qid(dev, qid);
1584 	if (!pool)
1585 		return -EINVAL;
1586 	if (qid >= priv->rx_cfg.num_queues)
1587 		return -EINVAL;
1588 
1589 	/* If XDP prog is not installed, unmap DMA and return */
1590 	if (!priv->xdp_prog)
1591 		goto done;
1592 
1593 	tx_qid = gve_xdp_tx_queue_id(priv, qid);
1594 	if (!netif_running(dev)) {
1595 		priv->rx[qid].xsk_pool = NULL;
1596 		xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1597 		priv->tx[tx_qid].xsk_pool = NULL;
1598 		goto done;
1599 	}
1600 
1601 	napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
1602 	napi_disable(napi_rx); /* make sure current rx poll is done */
1603 
1604 	napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
1605 	napi_disable(napi_tx); /* make sure current tx poll is done */
1606 
1607 	priv->rx[qid].xsk_pool = NULL;
1608 	xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1609 	priv->tx[tx_qid].xsk_pool = NULL;
1610 	smp_mb(); /* Make sure it is visible to the workers on datapath */
1611 
1612 	napi_enable(napi_rx);
1613 	if (gve_rx_work_pending(&priv->rx[qid]))
1614 		napi_schedule(napi_rx);
1615 
1616 	napi_enable(napi_tx);
1617 	if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
1618 		napi_schedule(napi_tx);
1619 
1620 done:
1621 	xsk_pool_dma_unmap(pool,
1622 			   DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1623 	return 0;
1624 }
1625 
1626 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
1627 {
1628 	struct gve_priv *priv = netdev_priv(dev);
1629 	int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
1630 
1631 	if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
1632 		return -EINVAL;
1633 
1634 	if (flags & XDP_WAKEUP_TX) {
1635 		struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
1636 		struct napi_struct *napi =
1637 			&priv->ntfy_blocks[tx->ntfy_id].napi;
1638 
1639 		if (!napi_if_scheduled_mark_missed(napi)) {
1640 			/* Call local_bh_enable to trigger SoftIRQ processing */
1641 			local_bh_disable();
1642 			napi_schedule(napi);
1643 			local_bh_enable();
1644 		}
1645 
1646 		tx->xdp_xsk_wakeup++;
1647 	}
1648 
1649 	return 0;
1650 }
1651 
1652 static int verify_xdp_configuration(struct net_device *dev)
1653 {
1654 	struct gve_priv *priv = netdev_priv(dev);
1655 
1656 	if (dev->features & NETIF_F_LRO) {
1657 		netdev_warn(dev, "XDP is not supported when LRO is on.\n");
1658 		return -EOPNOTSUPP;
1659 	}
1660 
1661 	if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
1662 		netdev_warn(dev, "XDP is not supported in mode %d.\n",
1663 			    priv->queue_format);
1664 		return -EOPNOTSUPP;
1665 	}
1666 
1667 	if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) {
1668 		netdev_warn(dev, "XDP is not supported for mtu %d.\n",
1669 			    dev->mtu);
1670 		return -EOPNOTSUPP;
1671 	}
1672 
1673 	if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues ||
1674 	    (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) {
1675 		netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d",
1676 			    priv->rx_cfg.num_queues,
1677 			    priv->tx_cfg.num_queues,
1678 			    priv->tx_cfg.max_queues);
1679 		return -EINVAL;
1680 	}
1681 	return 0;
1682 }
1683 
1684 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1685 {
1686 	struct gve_priv *priv = netdev_priv(dev);
1687 	int err;
1688 
1689 	err = verify_xdp_configuration(dev);
1690 	if (err)
1691 		return err;
1692 	switch (xdp->command) {
1693 	case XDP_SETUP_PROG:
1694 		return gve_set_xdp(priv, xdp->prog, xdp->extack);
1695 	case XDP_SETUP_XSK_POOL:
1696 		if (xdp->xsk.pool)
1697 			return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id);
1698 		else
1699 			return gve_xsk_pool_disable(dev, xdp->xsk.queue_id);
1700 	default:
1701 		return -EINVAL;
1702 	}
1703 }
1704 
1705 int gve_adjust_queues(struct gve_priv *priv,
1706 		      struct gve_queue_config new_rx_config,
1707 		      struct gve_queue_config new_tx_config)
1708 {
1709 	int err;
1710 
1711 	if (netif_carrier_ok(priv->dev)) {
1712 		/* To make this process as simple as possible we teardown the
1713 		 * device, set the new configuration, and then bring the device
1714 		 * up again.
1715 		 */
1716 		err = gve_close(priv->dev);
1717 		/* we have already tried to reset in close,
1718 		 * just fail at this point
1719 		 */
1720 		if (err)
1721 			return err;
1722 		priv->tx_cfg = new_tx_config;
1723 		priv->rx_cfg = new_rx_config;
1724 
1725 		err = gve_open(priv->dev);
1726 		if (err)
1727 			goto err;
1728 
1729 		return 0;
1730 	}
1731 	/* Set the config for the next up. */
1732 	priv->tx_cfg = new_tx_config;
1733 	priv->rx_cfg = new_rx_config;
1734 
1735 	return 0;
1736 err:
1737 	netif_err(priv, drv, priv->dev,
1738 		  "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
1739 	gve_turndown(priv);
1740 	return err;
1741 }
1742 
1743 static void gve_turndown(struct gve_priv *priv)
1744 {
1745 	int idx;
1746 
1747 	if (netif_carrier_ok(priv->dev))
1748 		netif_carrier_off(priv->dev);
1749 
1750 	if (!gve_get_napi_enabled(priv))
1751 		return;
1752 
1753 	/* Disable napi to prevent more work from coming in */
1754 	for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1755 		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1756 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1757 
1758 		napi_disable(&block->napi);
1759 	}
1760 	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1761 		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1762 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1763 
1764 		napi_disable(&block->napi);
1765 	}
1766 
1767 	/* Stop tx queues */
1768 	netif_tx_disable(priv->dev);
1769 
1770 	gve_clear_napi_enabled(priv);
1771 	gve_clear_report_stats(priv);
1772 }
1773 
1774 static void gve_turnup(struct gve_priv *priv)
1775 {
1776 	int idx;
1777 
1778 	/* Start the tx queues */
1779 	netif_tx_start_all_queues(priv->dev);
1780 
1781 	/* Enable napi and unmask interrupts for all queues */
1782 	for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1783 		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1784 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1785 
1786 		napi_enable(&block->napi);
1787 		if (gve_is_gqi(priv)) {
1788 			iowrite32be(0, gve_irq_doorbell(priv, block));
1789 		} else {
1790 			gve_set_itr_coalesce_usecs_dqo(priv, block,
1791 						       priv->tx_coalesce_usecs);
1792 		}
1793 	}
1794 	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1795 		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1796 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1797 
1798 		napi_enable(&block->napi);
1799 		if (gve_is_gqi(priv)) {
1800 			iowrite32be(0, gve_irq_doorbell(priv, block));
1801 		} else {
1802 			gve_set_itr_coalesce_usecs_dqo(priv, block,
1803 						       priv->rx_coalesce_usecs);
1804 		}
1805 	}
1806 
1807 	gve_set_napi_enabled(priv);
1808 }
1809 
1810 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
1811 {
1812 	struct gve_notify_block *block;
1813 	struct gve_tx_ring *tx = NULL;
1814 	struct gve_priv *priv;
1815 	u32 last_nic_done;
1816 	u32 current_time;
1817 	u32 ntfy_idx;
1818 
1819 	netdev_info(dev, "Timeout on tx queue, %d", txqueue);
1820 	priv = netdev_priv(dev);
1821 	if (txqueue > priv->tx_cfg.num_queues)
1822 		goto reset;
1823 
1824 	ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
1825 	if (ntfy_idx >= priv->num_ntfy_blks)
1826 		goto reset;
1827 
1828 	block = &priv->ntfy_blocks[ntfy_idx];
1829 	tx = block->tx;
1830 
1831 	current_time = jiffies_to_msecs(jiffies);
1832 	if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
1833 		goto reset;
1834 
1835 	/* Check to see if there are missed completions, which will allow us to
1836 	 * kick the queue.
1837 	 */
1838 	last_nic_done = gve_tx_load_event_counter(priv, tx);
1839 	if (last_nic_done - tx->done) {
1840 		netdev_info(dev, "Kicking queue %d", txqueue);
1841 		iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
1842 		napi_schedule(&block->napi);
1843 		tx->last_kick_msec = current_time;
1844 		goto out;
1845 	} // Else reset.
1846 
1847 reset:
1848 	gve_schedule_reset(priv);
1849 
1850 out:
1851 	if (tx)
1852 		tx->queue_timeout++;
1853 	priv->tx_timeo_cnt++;
1854 }
1855 
1856 static int gve_set_features(struct net_device *netdev,
1857 			    netdev_features_t features)
1858 {
1859 	const netdev_features_t orig_features = netdev->features;
1860 	struct gve_priv *priv = netdev_priv(netdev);
1861 	int err;
1862 
1863 	if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
1864 		netdev->features ^= NETIF_F_LRO;
1865 		if (netif_carrier_ok(netdev)) {
1866 			/* To make this process as simple as possible we
1867 			 * teardown the device, set the new configuration,
1868 			 * and then bring the device up again.
1869 			 */
1870 			err = gve_close(netdev);
1871 			/* We have already tried to reset in close, just fail
1872 			 * at this point.
1873 			 */
1874 			if (err)
1875 				goto err;
1876 
1877 			err = gve_open(netdev);
1878 			if (err)
1879 				goto err;
1880 		}
1881 	}
1882 
1883 	return 0;
1884 err:
1885 	/* Reverts the change on error. */
1886 	netdev->features = orig_features;
1887 	netif_err(priv, drv, netdev,
1888 		  "Set features failed! !!! DISABLING ALL QUEUES !!!\n");
1889 	return err;
1890 }
1891 
1892 static const struct net_device_ops gve_netdev_ops = {
1893 	.ndo_start_xmit		=	gve_start_xmit,
1894 	.ndo_features_check	=	gve_features_check,
1895 	.ndo_open		=	gve_open,
1896 	.ndo_stop		=	gve_close,
1897 	.ndo_get_stats64	=	gve_get_stats,
1898 	.ndo_tx_timeout         =       gve_tx_timeout,
1899 	.ndo_set_features	=	gve_set_features,
1900 	.ndo_bpf		=	gve_xdp,
1901 	.ndo_xdp_xmit		=	gve_xdp_xmit,
1902 	.ndo_xsk_wakeup		=	gve_xsk_wakeup,
1903 };
1904 
1905 static void gve_handle_status(struct gve_priv *priv, u32 status)
1906 {
1907 	if (GVE_DEVICE_STATUS_RESET_MASK & status) {
1908 		dev_info(&priv->pdev->dev, "Device requested reset.\n");
1909 		gve_set_do_reset(priv);
1910 	}
1911 	if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
1912 		priv->stats_report_trigger_cnt++;
1913 		gve_set_do_report_stats(priv);
1914 	}
1915 }
1916 
1917 static void gve_handle_reset(struct gve_priv *priv)
1918 {
1919 	/* A service task will be scheduled at the end of probe to catch any
1920 	 * resets that need to happen, and we don't want to reset until
1921 	 * probe is done.
1922 	 */
1923 	if (gve_get_probe_in_progress(priv))
1924 		return;
1925 
1926 	if (gve_get_do_reset(priv)) {
1927 		rtnl_lock();
1928 		gve_reset(priv, false);
1929 		rtnl_unlock();
1930 	}
1931 }
1932 
1933 void gve_handle_report_stats(struct gve_priv *priv)
1934 {
1935 	struct stats *stats = priv->stats_report->stats;
1936 	int idx, stats_idx = 0;
1937 	unsigned int start = 0;
1938 	u64 tx_bytes;
1939 
1940 	if (!gve_get_report_stats(priv))
1941 		return;
1942 
1943 	be64_add_cpu(&priv->stats_report->written_count, 1);
1944 	/* tx stats */
1945 	if (priv->tx) {
1946 		for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1947 			u32 last_completion = 0;
1948 			u32 tx_frames = 0;
1949 
1950 			/* DQO doesn't currently support these metrics. */
1951 			if (gve_is_gqi(priv)) {
1952 				last_completion = priv->tx[idx].done;
1953 				tx_frames = priv->tx[idx].req;
1954 			}
1955 
1956 			do {
1957 				start = u64_stats_fetch_begin(&priv->tx[idx].statss);
1958 				tx_bytes = priv->tx[idx].bytes_done;
1959 			} while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
1960 			stats[stats_idx++] = (struct stats) {
1961 				.stat_name = cpu_to_be32(TX_WAKE_CNT),
1962 				.value = cpu_to_be64(priv->tx[idx].wake_queue),
1963 				.queue_id = cpu_to_be32(idx),
1964 			};
1965 			stats[stats_idx++] = (struct stats) {
1966 				.stat_name = cpu_to_be32(TX_STOP_CNT),
1967 				.value = cpu_to_be64(priv->tx[idx].stop_queue),
1968 				.queue_id = cpu_to_be32(idx),
1969 			};
1970 			stats[stats_idx++] = (struct stats) {
1971 				.stat_name = cpu_to_be32(TX_FRAMES_SENT),
1972 				.value = cpu_to_be64(tx_frames),
1973 				.queue_id = cpu_to_be32(idx),
1974 			};
1975 			stats[stats_idx++] = (struct stats) {
1976 				.stat_name = cpu_to_be32(TX_BYTES_SENT),
1977 				.value = cpu_to_be64(tx_bytes),
1978 				.queue_id = cpu_to_be32(idx),
1979 			};
1980 			stats[stats_idx++] = (struct stats) {
1981 				.stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
1982 				.value = cpu_to_be64(last_completion),
1983 				.queue_id = cpu_to_be32(idx),
1984 			};
1985 			stats[stats_idx++] = (struct stats) {
1986 				.stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
1987 				.value = cpu_to_be64(priv->tx[idx].queue_timeout),
1988 				.queue_id = cpu_to_be32(idx),
1989 			};
1990 		}
1991 	}
1992 	/* rx stats */
1993 	if (priv->rx) {
1994 		for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1995 			stats[stats_idx++] = (struct stats) {
1996 				.stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
1997 				.value = cpu_to_be64(priv->rx[idx].desc.seqno),
1998 				.queue_id = cpu_to_be32(idx),
1999 			};
2000 			stats[stats_idx++] = (struct stats) {
2001 				.stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
2002 				.value = cpu_to_be64(priv->rx[0].fill_cnt),
2003 				.queue_id = cpu_to_be32(idx),
2004 			};
2005 		}
2006 	}
2007 }
2008 
2009 /* Handle NIC status register changes, reset requests and report stats */
2010 static void gve_service_task(struct work_struct *work)
2011 {
2012 	struct gve_priv *priv = container_of(work, struct gve_priv,
2013 					     service_task);
2014 	u32 status = ioread32be(&priv->reg_bar0->device_status);
2015 
2016 	gve_handle_status(priv, status);
2017 
2018 	gve_handle_reset(priv);
2019 	gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
2020 }
2021 
2022 static void gve_set_netdev_xdp_features(struct gve_priv *priv)
2023 {
2024 	if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
2025 		priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC;
2026 		priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
2027 		priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
2028 		priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
2029 	} else {
2030 		priv->dev->xdp_features = 0;
2031 	}
2032 }
2033 
2034 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
2035 {
2036 	int num_ntfy;
2037 	int err;
2038 
2039 	/* Set up the adminq */
2040 	err = gve_adminq_alloc(&priv->pdev->dev, priv);
2041 	if (err) {
2042 		dev_err(&priv->pdev->dev,
2043 			"Failed to alloc admin queue: err=%d\n", err);
2044 		return err;
2045 	}
2046 
2047 	err = gve_verify_driver_compatibility(priv);
2048 	if (err) {
2049 		dev_err(&priv->pdev->dev,
2050 			"Could not verify driver compatibility: err=%d\n", err);
2051 		goto err;
2052 	}
2053 
2054 	if (skip_describe_device)
2055 		goto setup_device;
2056 
2057 	priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
2058 	/* Get the initial information we need from the device */
2059 	err = gve_adminq_describe_device(priv);
2060 	if (err) {
2061 		dev_err(&priv->pdev->dev,
2062 			"Could not get device information: err=%d\n", err);
2063 		goto err;
2064 	}
2065 	priv->dev->mtu = priv->dev->max_mtu;
2066 	num_ntfy = pci_msix_vec_count(priv->pdev);
2067 	if (num_ntfy <= 0) {
2068 		dev_err(&priv->pdev->dev,
2069 			"could not count MSI-x vectors: err=%d\n", num_ntfy);
2070 		err = num_ntfy;
2071 		goto err;
2072 	} else if (num_ntfy < GVE_MIN_MSIX) {
2073 		dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
2074 			GVE_MIN_MSIX, num_ntfy);
2075 		err = -EINVAL;
2076 		goto err;
2077 	}
2078 
2079 	/* Big TCP is only supported on DQ*/
2080 	if (!gve_is_gqi(priv))
2081 		netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
2082 
2083 	priv->num_registered_pages = 0;
2084 	priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
2085 	/* gvnic has one Notification Block per MSI-x vector, except for the
2086 	 * management vector
2087 	 */
2088 	priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
2089 	priv->mgmt_msix_idx = priv->num_ntfy_blks;
2090 
2091 	priv->tx_cfg.max_queues =
2092 		min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
2093 	priv->rx_cfg.max_queues =
2094 		min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
2095 
2096 	priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
2097 	priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
2098 	if (priv->default_num_queues > 0) {
2099 		priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
2100 						priv->tx_cfg.num_queues);
2101 		priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
2102 						priv->rx_cfg.num_queues);
2103 	}
2104 
2105 	dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
2106 		 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
2107 	dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
2108 		 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
2109 
2110 	if (!gve_is_gqi(priv)) {
2111 		priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
2112 		priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
2113 	}
2114 
2115 setup_device:
2116 	gve_set_netdev_xdp_features(priv);
2117 	err = gve_setup_device_resources(priv);
2118 	if (!err)
2119 		return 0;
2120 err:
2121 	gve_adminq_free(&priv->pdev->dev, priv);
2122 	return err;
2123 }
2124 
2125 static void gve_teardown_priv_resources(struct gve_priv *priv)
2126 {
2127 	gve_teardown_device_resources(priv);
2128 	gve_adminq_free(&priv->pdev->dev, priv);
2129 }
2130 
2131 static void gve_trigger_reset(struct gve_priv *priv)
2132 {
2133 	/* Reset the device by releasing the AQ */
2134 	gve_adminq_release(priv);
2135 }
2136 
2137 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
2138 {
2139 	gve_trigger_reset(priv);
2140 	/* With the reset having already happened, close cannot fail */
2141 	if (was_up)
2142 		gve_close(priv->dev);
2143 	gve_teardown_priv_resources(priv);
2144 }
2145 
2146 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
2147 {
2148 	int err;
2149 
2150 	err = gve_init_priv(priv, true);
2151 	if (err)
2152 		goto err;
2153 	if (was_up) {
2154 		err = gve_open(priv->dev);
2155 		if (err)
2156 			goto err;
2157 	}
2158 	return 0;
2159 err:
2160 	dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
2161 	gve_turndown(priv);
2162 	return err;
2163 }
2164 
2165 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
2166 {
2167 	bool was_up = netif_carrier_ok(priv->dev);
2168 	int err;
2169 
2170 	dev_info(&priv->pdev->dev, "Performing reset\n");
2171 	gve_clear_do_reset(priv);
2172 	gve_set_reset_in_progress(priv);
2173 	/* If we aren't attempting to teardown normally, just go turndown and
2174 	 * reset right away.
2175 	 */
2176 	if (!attempt_teardown) {
2177 		gve_turndown(priv);
2178 		gve_reset_and_teardown(priv, was_up);
2179 	} else {
2180 		/* Otherwise attempt to close normally */
2181 		if (was_up) {
2182 			err = gve_close(priv->dev);
2183 			/* If that fails reset as we did above */
2184 			if (err)
2185 				gve_reset_and_teardown(priv, was_up);
2186 		}
2187 		/* Clean up any remaining resources */
2188 		gve_teardown_priv_resources(priv);
2189 	}
2190 
2191 	/* Set it all back up */
2192 	err = gve_reset_recovery(priv, was_up);
2193 	gve_clear_reset_in_progress(priv);
2194 	priv->reset_cnt++;
2195 	priv->interface_up_cnt = 0;
2196 	priv->interface_down_cnt = 0;
2197 	priv->stats_report_trigger_cnt = 0;
2198 	return err;
2199 }
2200 
2201 static void gve_write_version(u8 __iomem *driver_version_register)
2202 {
2203 	const char *c = gve_version_prefix;
2204 
2205 	while (*c) {
2206 		writeb(*c, driver_version_register);
2207 		c++;
2208 	}
2209 
2210 	c = gve_version_str;
2211 	while (*c) {
2212 		writeb(*c, driver_version_register);
2213 		c++;
2214 	}
2215 	writeb('\n', driver_version_register);
2216 }
2217 
2218 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2219 {
2220 	int max_tx_queues, max_rx_queues;
2221 	struct net_device *dev;
2222 	__be32 __iomem *db_bar;
2223 	struct gve_registers __iomem *reg_bar;
2224 	struct gve_priv *priv;
2225 	int err;
2226 
2227 	err = pci_enable_device(pdev);
2228 	if (err)
2229 		return err;
2230 
2231 	err = pci_request_regions(pdev, gve_driver_name);
2232 	if (err)
2233 		goto abort_with_enabled;
2234 
2235 	pci_set_master(pdev);
2236 
2237 	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2238 	if (err) {
2239 		dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
2240 		goto abort_with_pci_region;
2241 	}
2242 
2243 	reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
2244 	if (!reg_bar) {
2245 		dev_err(&pdev->dev, "Failed to map pci bar!\n");
2246 		err = -ENOMEM;
2247 		goto abort_with_pci_region;
2248 	}
2249 
2250 	db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
2251 	if (!db_bar) {
2252 		dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
2253 		err = -ENOMEM;
2254 		goto abort_with_reg_bar;
2255 	}
2256 
2257 	gve_write_version(&reg_bar->driver_version);
2258 	/* Get max queues to alloc etherdev */
2259 	max_tx_queues = ioread32be(&reg_bar->max_tx_queues);
2260 	max_rx_queues = ioread32be(&reg_bar->max_rx_queues);
2261 	/* Alloc and setup the netdev and priv */
2262 	dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
2263 	if (!dev) {
2264 		dev_err(&pdev->dev, "could not allocate netdev\n");
2265 		err = -ENOMEM;
2266 		goto abort_with_db_bar;
2267 	}
2268 	SET_NETDEV_DEV(dev, &pdev->dev);
2269 	pci_set_drvdata(pdev, dev);
2270 	dev->ethtool_ops = &gve_ethtool_ops;
2271 	dev->netdev_ops = &gve_netdev_ops;
2272 
2273 	/* Set default and supported features.
2274 	 *
2275 	 * Features might be set in other locations as well (such as
2276 	 * `gve_adminq_describe_device`).
2277 	 */
2278 	dev->hw_features = NETIF_F_HIGHDMA;
2279 	dev->hw_features |= NETIF_F_SG;
2280 	dev->hw_features |= NETIF_F_HW_CSUM;
2281 	dev->hw_features |= NETIF_F_TSO;
2282 	dev->hw_features |= NETIF_F_TSO6;
2283 	dev->hw_features |= NETIF_F_TSO_ECN;
2284 	dev->hw_features |= NETIF_F_RXCSUM;
2285 	dev->hw_features |= NETIF_F_RXHASH;
2286 	dev->features = dev->hw_features;
2287 	dev->watchdog_timeo = 5 * HZ;
2288 	dev->min_mtu = ETH_MIN_MTU;
2289 	netif_carrier_off(dev);
2290 
2291 	priv = netdev_priv(dev);
2292 	priv->dev = dev;
2293 	priv->pdev = pdev;
2294 	priv->msg_enable = DEFAULT_MSG_LEVEL;
2295 	priv->reg_bar0 = reg_bar;
2296 	priv->db_bar2 = db_bar;
2297 	priv->service_task_flags = 0x0;
2298 	priv->state_flags = 0x0;
2299 	priv->ethtool_flags = 0x0;
2300 
2301 	gve_set_probe_in_progress(priv);
2302 	priv->gve_wq = alloc_ordered_workqueue("gve", 0);
2303 	if (!priv->gve_wq) {
2304 		dev_err(&pdev->dev, "Could not allocate workqueue");
2305 		err = -ENOMEM;
2306 		goto abort_with_netdev;
2307 	}
2308 	INIT_WORK(&priv->service_task, gve_service_task);
2309 	INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
2310 	priv->tx_cfg.max_queues = max_tx_queues;
2311 	priv->rx_cfg.max_queues = max_rx_queues;
2312 
2313 	err = gve_init_priv(priv, false);
2314 	if (err)
2315 		goto abort_with_wq;
2316 
2317 	err = register_netdev(dev);
2318 	if (err)
2319 		goto abort_with_gve_init;
2320 
2321 	dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
2322 	dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
2323 	gve_clear_probe_in_progress(priv);
2324 	queue_work(priv->gve_wq, &priv->service_task);
2325 	return 0;
2326 
2327 abort_with_gve_init:
2328 	gve_teardown_priv_resources(priv);
2329 
2330 abort_with_wq:
2331 	destroy_workqueue(priv->gve_wq);
2332 
2333 abort_with_netdev:
2334 	free_netdev(dev);
2335 
2336 abort_with_db_bar:
2337 	pci_iounmap(pdev, db_bar);
2338 
2339 abort_with_reg_bar:
2340 	pci_iounmap(pdev, reg_bar);
2341 
2342 abort_with_pci_region:
2343 	pci_release_regions(pdev);
2344 
2345 abort_with_enabled:
2346 	pci_disable_device(pdev);
2347 	return err;
2348 }
2349 
2350 static void gve_remove(struct pci_dev *pdev)
2351 {
2352 	struct net_device *netdev = pci_get_drvdata(pdev);
2353 	struct gve_priv *priv = netdev_priv(netdev);
2354 	__be32 __iomem *db_bar = priv->db_bar2;
2355 	void __iomem *reg_bar = priv->reg_bar0;
2356 
2357 	unregister_netdev(netdev);
2358 	gve_teardown_priv_resources(priv);
2359 	destroy_workqueue(priv->gve_wq);
2360 	free_netdev(netdev);
2361 	pci_iounmap(pdev, db_bar);
2362 	pci_iounmap(pdev, reg_bar);
2363 	pci_release_regions(pdev);
2364 	pci_disable_device(pdev);
2365 }
2366 
2367 static void gve_shutdown(struct pci_dev *pdev)
2368 {
2369 	struct net_device *netdev = pci_get_drvdata(pdev);
2370 	struct gve_priv *priv = netdev_priv(netdev);
2371 	bool was_up = netif_carrier_ok(priv->dev);
2372 
2373 	rtnl_lock();
2374 	if (was_up && gve_close(priv->dev)) {
2375 		/* If the dev was up, attempt to close, if close fails, reset */
2376 		gve_reset_and_teardown(priv, was_up);
2377 	} else {
2378 		/* If the dev wasn't up or close worked, finish tearing down */
2379 		gve_teardown_priv_resources(priv);
2380 	}
2381 	rtnl_unlock();
2382 }
2383 
2384 #ifdef CONFIG_PM
2385 static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
2386 {
2387 	struct net_device *netdev = pci_get_drvdata(pdev);
2388 	struct gve_priv *priv = netdev_priv(netdev);
2389 	bool was_up = netif_carrier_ok(priv->dev);
2390 
2391 	priv->suspend_cnt++;
2392 	rtnl_lock();
2393 	if (was_up && gve_close(priv->dev)) {
2394 		/* If the dev was up, attempt to close, if close fails, reset */
2395 		gve_reset_and_teardown(priv, was_up);
2396 	} else {
2397 		/* If the dev wasn't up or close worked, finish tearing down */
2398 		gve_teardown_priv_resources(priv);
2399 	}
2400 	priv->up_before_suspend = was_up;
2401 	rtnl_unlock();
2402 	return 0;
2403 }
2404 
2405 static int gve_resume(struct pci_dev *pdev)
2406 {
2407 	struct net_device *netdev = pci_get_drvdata(pdev);
2408 	struct gve_priv *priv = netdev_priv(netdev);
2409 	int err;
2410 
2411 	priv->resume_cnt++;
2412 	rtnl_lock();
2413 	err = gve_reset_recovery(priv, priv->up_before_suspend);
2414 	rtnl_unlock();
2415 	return err;
2416 }
2417 #endif /* CONFIG_PM */
2418 
2419 static const struct pci_device_id gve_id_table[] = {
2420 	{ PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
2421 	{ }
2422 };
2423 
2424 static struct pci_driver gve_driver = {
2425 	.name		= gve_driver_name,
2426 	.id_table	= gve_id_table,
2427 	.probe		= gve_probe,
2428 	.remove		= gve_remove,
2429 	.shutdown	= gve_shutdown,
2430 #ifdef CONFIG_PM
2431 	.suspend        = gve_suspend,
2432 	.resume         = gve_resume,
2433 #endif
2434 };
2435 
2436 module_pci_driver(gve_driver);
2437 
2438 MODULE_DEVICE_TABLE(pci, gve_id_table);
2439 MODULE_AUTHOR("Google, Inc.");
2440 MODULE_DESCRIPTION("Google Virtual NIC Driver");
2441 MODULE_LICENSE("Dual MIT/GPL");
2442 MODULE_VERSION(GVE_VERSION);
2443