1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2024 Google LLC
5 */
6
7 #include <linux/bpf.h>
8 #include <linux/cpumask.h>
9 #include <linux/etherdevice.h>
10 #include <linux/filter.h>
11 #include <linux/interrupt.h>
12 #include <linux/irq.h>
13 #include <linux/module.h>
14 #include <linux/pci.h>
15 #include <linux/sched.h>
16 #include <linux/timer.h>
17 #include <linux/workqueue.h>
18 #include <linux/utsname.h>
19 #include <linux/version.h>
20 #include <net/netdev_queues.h>
21 #include <net/sch_generic.h>
22 #include <net/xdp_sock_drv.h>
23 #include "gve.h"
24 #include "gve_dqo.h"
25 #include "gve_adminq.h"
26 #include "gve_register.h"
27 #include "gve_utils.h"
28
29 #define GVE_DEFAULT_RX_COPYBREAK (256)
30
31 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK)
32 #define GVE_VERSION "1.0.0"
33 #define GVE_VERSION_PREFIX "GVE-"
34
35 // Minimum amount of time between queue kicks in msec (10 seconds)
36 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
37
38 char gve_driver_name[] = "gve";
39 const char gve_version_str[] = GVE_VERSION;
40 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
41
gve_verify_driver_compatibility(struct gve_priv * priv)42 static int gve_verify_driver_compatibility(struct gve_priv *priv)
43 {
44 int err;
45 struct gve_driver_info *driver_info;
46 dma_addr_t driver_info_bus;
47
48 driver_info = dma_alloc_coherent(&priv->pdev->dev,
49 sizeof(struct gve_driver_info),
50 &driver_info_bus, GFP_KERNEL);
51 if (!driver_info)
52 return -ENOMEM;
53
54 *driver_info = (struct gve_driver_info) {
55 .os_type = 1, /* Linux */
56 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR),
57 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL),
58 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL),
59 .driver_capability_flags = {
60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1),
61 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2),
62 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3),
63 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4),
64 },
65 };
66 strscpy(driver_info->os_version_str1, utsname()->release,
67 sizeof(driver_info->os_version_str1));
68 strscpy(driver_info->os_version_str2, utsname()->version,
69 sizeof(driver_info->os_version_str2));
70
71 err = gve_adminq_verify_driver_compatibility(priv,
72 sizeof(struct gve_driver_info),
73 driver_info_bus);
74
75 /* It's ok if the device doesn't support this */
76 if (err == -EOPNOTSUPP)
77 err = 0;
78
79 dma_free_coherent(&priv->pdev->dev,
80 sizeof(struct gve_driver_info),
81 driver_info, driver_info_bus);
82 return err;
83 }
84
gve_features_check(struct sk_buff * skb,struct net_device * dev,netdev_features_t features)85 static netdev_features_t gve_features_check(struct sk_buff *skb,
86 struct net_device *dev,
87 netdev_features_t features)
88 {
89 struct gve_priv *priv = netdev_priv(dev);
90
91 if (!gve_is_gqi(priv))
92 return gve_features_check_dqo(skb, dev, features);
93
94 return features;
95 }
96
gve_start_xmit(struct sk_buff * skb,struct net_device * dev)97 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
98 {
99 struct gve_priv *priv = netdev_priv(dev);
100
101 if (gve_is_gqi(priv))
102 return gve_tx(skb, dev);
103 else
104 return gve_tx_dqo(skb, dev);
105 }
106
gve_get_stats(struct net_device * dev,struct rtnl_link_stats64 * s)107 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
108 {
109 struct gve_priv *priv = netdev_priv(dev);
110 unsigned int start;
111 u64 packets, bytes;
112 int num_tx_queues;
113 int ring;
114
115 num_tx_queues = gve_num_tx_queues(priv);
116 if (priv->rx) {
117 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
118 do {
119 start =
120 u64_stats_fetch_begin(&priv->rx[ring].statss);
121 packets = priv->rx[ring].rpackets;
122 bytes = priv->rx[ring].rbytes;
123 } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
124 start));
125 s->rx_packets += packets;
126 s->rx_bytes += bytes;
127 }
128 }
129 if (priv->tx) {
130 for (ring = 0; ring < num_tx_queues; ring++) {
131 do {
132 start =
133 u64_stats_fetch_begin(&priv->tx[ring].statss);
134 packets = priv->tx[ring].pkt_done;
135 bytes = priv->tx[ring].bytes_done;
136 } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
137 start));
138 s->tx_packets += packets;
139 s->tx_bytes += bytes;
140 }
141 }
142 }
143
gve_alloc_flow_rule_caches(struct gve_priv * priv)144 static int gve_alloc_flow_rule_caches(struct gve_priv *priv)
145 {
146 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
147 int err = 0;
148
149 if (!priv->max_flow_rules)
150 return 0;
151
152 flow_rules_cache->rules_cache =
153 kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache),
154 GFP_KERNEL);
155 if (!flow_rules_cache->rules_cache) {
156 dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n");
157 return -ENOMEM;
158 }
159
160 flow_rules_cache->rule_ids_cache =
161 kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache),
162 GFP_KERNEL);
163 if (!flow_rules_cache->rule_ids_cache) {
164 dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n");
165 err = -ENOMEM;
166 goto free_rules_cache;
167 }
168
169 return 0;
170
171 free_rules_cache:
172 kvfree(flow_rules_cache->rules_cache);
173 flow_rules_cache->rules_cache = NULL;
174 return err;
175 }
176
gve_free_flow_rule_caches(struct gve_priv * priv)177 static void gve_free_flow_rule_caches(struct gve_priv *priv)
178 {
179 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
180
181 kvfree(flow_rules_cache->rule_ids_cache);
182 flow_rules_cache->rule_ids_cache = NULL;
183 kvfree(flow_rules_cache->rules_cache);
184 flow_rules_cache->rules_cache = NULL;
185 }
186
gve_alloc_counter_array(struct gve_priv * priv)187 static int gve_alloc_counter_array(struct gve_priv *priv)
188 {
189 priv->counter_array =
190 dma_alloc_coherent(&priv->pdev->dev,
191 priv->num_event_counters *
192 sizeof(*priv->counter_array),
193 &priv->counter_array_bus, GFP_KERNEL);
194 if (!priv->counter_array)
195 return -ENOMEM;
196
197 return 0;
198 }
199
gve_free_counter_array(struct gve_priv * priv)200 static void gve_free_counter_array(struct gve_priv *priv)
201 {
202 if (!priv->counter_array)
203 return;
204
205 dma_free_coherent(&priv->pdev->dev,
206 priv->num_event_counters *
207 sizeof(*priv->counter_array),
208 priv->counter_array, priv->counter_array_bus);
209 priv->counter_array = NULL;
210 }
211
212 /* NIC requests to report stats */
gve_stats_report_task(struct work_struct * work)213 static void gve_stats_report_task(struct work_struct *work)
214 {
215 struct gve_priv *priv = container_of(work, struct gve_priv,
216 stats_report_task);
217 if (gve_get_do_report_stats(priv)) {
218 gve_handle_report_stats(priv);
219 gve_clear_do_report_stats(priv);
220 }
221 }
222
gve_stats_report_schedule(struct gve_priv * priv)223 static void gve_stats_report_schedule(struct gve_priv *priv)
224 {
225 if (!gve_get_probe_in_progress(priv) &&
226 !gve_get_reset_in_progress(priv)) {
227 gve_set_do_report_stats(priv);
228 queue_work(priv->gve_wq, &priv->stats_report_task);
229 }
230 }
231
gve_stats_report_timer(struct timer_list * t)232 static void gve_stats_report_timer(struct timer_list *t)
233 {
234 struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
235
236 mod_timer(&priv->stats_report_timer,
237 round_jiffies(jiffies +
238 msecs_to_jiffies(priv->stats_report_timer_period)));
239 gve_stats_report_schedule(priv);
240 }
241
gve_alloc_stats_report(struct gve_priv * priv)242 static int gve_alloc_stats_report(struct gve_priv *priv)
243 {
244 int tx_stats_num, rx_stats_num;
245
246 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
247 gve_num_tx_queues(priv);
248 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
249 priv->rx_cfg.num_queues;
250 priv->stats_report_len = struct_size(priv->stats_report, stats,
251 size_add(tx_stats_num, rx_stats_num));
252 priv->stats_report =
253 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
254 &priv->stats_report_bus, GFP_KERNEL);
255 if (!priv->stats_report)
256 return -ENOMEM;
257 /* Set up timer for the report-stats task */
258 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
259 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
260 return 0;
261 }
262
gve_free_stats_report(struct gve_priv * priv)263 static void gve_free_stats_report(struct gve_priv *priv)
264 {
265 if (!priv->stats_report)
266 return;
267
268 del_timer_sync(&priv->stats_report_timer);
269 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
270 priv->stats_report, priv->stats_report_bus);
271 priv->stats_report = NULL;
272 }
273
gve_mgmnt_intr(int irq,void * arg)274 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
275 {
276 struct gve_priv *priv = arg;
277
278 queue_work(priv->gve_wq, &priv->service_task);
279 return IRQ_HANDLED;
280 }
281
gve_intr(int irq,void * arg)282 static irqreturn_t gve_intr(int irq, void *arg)
283 {
284 struct gve_notify_block *block = arg;
285 struct gve_priv *priv = block->priv;
286
287 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
288 napi_schedule_irqoff(&block->napi);
289 return IRQ_HANDLED;
290 }
291
gve_intr_dqo(int irq,void * arg)292 static irqreturn_t gve_intr_dqo(int irq, void *arg)
293 {
294 struct gve_notify_block *block = arg;
295
296 /* Interrupts are automatically masked */
297 napi_schedule_irqoff(&block->napi);
298 return IRQ_HANDLED;
299 }
300
gve_is_napi_on_home_cpu(struct gve_priv * priv,u32 irq)301 static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq)
302 {
303 int cpu_curr = smp_processor_id();
304 const struct cpumask *aff_mask;
305
306 aff_mask = irq_get_effective_affinity_mask(irq);
307 if (unlikely(!aff_mask))
308 return 1;
309
310 return cpumask_test_cpu(cpu_curr, aff_mask);
311 }
312
gve_napi_poll(struct napi_struct * napi,int budget)313 int gve_napi_poll(struct napi_struct *napi, int budget)
314 {
315 struct gve_notify_block *block;
316 __be32 __iomem *irq_doorbell;
317 bool reschedule = false;
318 struct gve_priv *priv;
319 int work_done = 0;
320
321 block = container_of(napi, struct gve_notify_block, napi);
322 priv = block->priv;
323
324 if (block->tx) {
325 if (block->tx->q_num < priv->tx_cfg.num_queues)
326 reschedule |= gve_tx_poll(block, budget);
327 else if (budget)
328 reschedule |= gve_xdp_poll(block, budget);
329 }
330
331 if (!budget)
332 return 0;
333
334 if (block->rx) {
335 work_done = gve_rx_poll(block, budget);
336 reschedule |= work_done == budget;
337 }
338
339 if (reschedule)
340 return budget;
341
342 /* Complete processing - don't unmask irq if busy polling is enabled */
343 if (likely(napi_complete_done(napi, work_done))) {
344 irq_doorbell = gve_irq_doorbell(priv, block);
345 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
346
347 /* Ensure IRQ ACK is visible before we check pending work.
348 * If queue had issued updates, it would be truly visible.
349 */
350 mb();
351
352 if (block->tx)
353 reschedule |= gve_tx_clean_pending(priv, block->tx);
354 if (block->rx)
355 reschedule |= gve_rx_work_pending(block->rx);
356
357 if (reschedule && napi_schedule(napi))
358 iowrite32be(GVE_IRQ_MASK, irq_doorbell);
359 }
360 return work_done;
361 }
362
gve_napi_poll_dqo(struct napi_struct * napi,int budget)363 int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
364 {
365 struct gve_notify_block *block =
366 container_of(napi, struct gve_notify_block, napi);
367 struct gve_priv *priv = block->priv;
368 bool reschedule = false;
369 int work_done = 0;
370
371 if (block->tx)
372 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
373
374 if (!budget)
375 return 0;
376
377 if (block->rx) {
378 work_done = gve_rx_poll_dqo(block, budget);
379 reschedule |= work_done == budget;
380 }
381
382 if (reschedule) {
383 /* Reschedule by returning budget only if already on the correct
384 * cpu.
385 */
386 if (likely(gve_is_napi_on_home_cpu(priv, block->irq)))
387 return budget;
388
389 /* If not on the cpu with which this queue's irq has affinity
390 * with, we avoid rescheduling napi and arm the irq instead so
391 * that napi gets rescheduled back eventually onto the right
392 * cpu.
393 */
394 if (work_done == budget)
395 work_done--;
396 }
397
398 if (likely(napi_complete_done(napi, work_done))) {
399 /* Enable interrupts again.
400 *
401 * We don't need to repoll afterwards because HW supports the
402 * PCI MSI-X PBA feature.
403 *
404 * Another interrupt would be triggered if a new event came in
405 * since the last one.
406 */
407 gve_write_irq_doorbell_dqo(priv, block,
408 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
409 }
410
411 return work_done;
412 }
413
gve_alloc_notify_blocks(struct gve_priv * priv)414 static int gve_alloc_notify_blocks(struct gve_priv *priv)
415 {
416 int num_vecs_requested = priv->num_ntfy_blks + 1;
417 unsigned int active_cpus;
418 int vecs_enabled;
419 int i, j;
420 int err;
421
422 priv->msix_vectors = kvcalloc(num_vecs_requested,
423 sizeof(*priv->msix_vectors), GFP_KERNEL);
424 if (!priv->msix_vectors)
425 return -ENOMEM;
426 for (i = 0; i < num_vecs_requested; i++)
427 priv->msix_vectors[i].entry = i;
428 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
429 GVE_MIN_MSIX, num_vecs_requested);
430 if (vecs_enabled < 0) {
431 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
432 GVE_MIN_MSIX, vecs_enabled);
433 err = vecs_enabled;
434 goto abort_with_msix_vectors;
435 }
436 if (vecs_enabled != num_vecs_requested) {
437 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
438 int vecs_per_type = new_num_ntfy_blks / 2;
439 int vecs_left = new_num_ntfy_blks % 2;
440
441 priv->num_ntfy_blks = new_num_ntfy_blks;
442 priv->mgmt_msix_idx = priv->num_ntfy_blks;
443 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
444 vecs_per_type);
445 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
446 vecs_per_type + vecs_left);
447 dev_err(&priv->pdev->dev,
448 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
449 vecs_enabled, priv->tx_cfg.max_queues,
450 priv->rx_cfg.max_queues);
451 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
452 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
453 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
454 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
455 }
456 /* Half the notification blocks go to TX and half to RX */
457 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
458
459 /* Setup Management Vector - the last vector */
460 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
461 pci_name(priv->pdev));
462 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
463 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
464 if (err) {
465 dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
466 goto abort_with_msix_enabled;
467 }
468 priv->irq_db_indices =
469 dma_alloc_coherent(&priv->pdev->dev,
470 priv->num_ntfy_blks *
471 sizeof(*priv->irq_db_indices),
472 &priv->irq_db_indices_bus, GFP_KERNEL);
473 if (!priv->irq_db_indices) {
474 err = -ENOMEM;
475 goto abort_with_mgmt_vector;
476 }
477
478 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
479 sizeof(*priv->ntfy_blocks), GFP_KERNEL);
480 if (!priv->ntfy_blocks) {
481 err = -ENOMEM;
482 goto abort_with_irq_db_indices;
483 }
484
485 /* Setup the other blocks - the first n-1 vectors */
486 for (i = 0; i < priv->num_ntfy_blks; i++) {
487 struct gve_notify_block *block = &priv->ntfy_blocks[i];
488 int msix_idx = i;
489
490 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s",
491 i, pci_name(priv->pdev));
492 block->priv = priv;
493 err = request_irq(priv->msix_vectors[msix_idx].vector,
494 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
495 0, block->name, block);
496 if (err) {
497 dev_err(&priv->pdev->dev,
498 "Failed to receive msix vector %d\n", i);
499 goto abort_with_some_ntfy_blocks;
500 }
501 block->irq = priv->msix_vectors[msix_idx].vector;
502 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
503 get_cpu_mask(i % active_cpus));
504 block->irq_db_index = &priv->irq_db_indices[i].index;
505 }
506 return 0;
507 abort_with_some_ntfy_blocks:
508 for (j = 0; j < i; j++) {
509 struct gve_notify_block *block = &priv->ntfy_blocks[j];
510 int msix_idx = j;
511
512 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
513 NULL);
514 free_irq(priv->msix_vectors[msix_idx].vector, block);
515 block->irq = 0;
516 }
517 kvfree(priv->ntfy_blocks);
518 priv->ntfy_blocks = NULL;
519 abort_with_irq_db_indices:
520 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
521 sizeof(*priv->irq_db_indices),
522 priv->irq_db_indices, priv->irq_db_indices_bus);
523 priv->irq_db_indices = NULL;
524 abort_with_mgmt_vector:
525 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
526 abort_with_msix_enabled:
527 pci_disable_msix(priv->pdev);
528 abort_with_msix_vectors:
529 kvfree(priv->msix_vectors);
530 priv->msix_vectors = NULL;
531 return err;
532 }
533
gve_free_notify_blocks(struct gve_priv * priv)534 static void gve_free_notify_blocks(struct gve_priv *priv)
535 {
536 int i;
537
538 if (!priv->msix_vectors)
539 return;
540
541 /* Free the irqs */
542 for (i = 0; i < priv->num_ntfy_blks; i++) {
543 struct gve_notify_block *block = &priv->ntfy_blocks[i];
544 int msix_idx = i;
545
546 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
547 NULL);
548 free_irq(priv->msix_vectors[msix_idx].vector, block);
549 block->irq = 0;
550 }
551 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
552 kvfree(priv->ntfy_blocks);
553 priv->ntfy_blocks = NULL;
554 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
555 sizeof(*priv->irq_db_indices),
556 priv->irq_db_indices, priv->irq_db_indices_bus);
557 priv->irq_db_indices = NULL;
558 pci_disable_msix(priv->pdev);
559 kvfree(priv->msix_vectors);
560 priv->msix_vectors = NULL;
561 }
562
gve_setup_device_resources(struct gve_priv * priv)563 static int gve_setup_device_resources(struct gve_priv *priv)
564 {
565 int err;
566
567 err = gve_alloc_flow_rule_caches(priv);
568 if (err)
569 return err;
570 err = gve_alloc_counter_array(priv);
571 if (err)
572 goto abort_with_flow_rule_caches;
573 err = gve_alloc_notify_blocks(priv);
574 if (err)
575 goto abort_with_counter;
576 err = gve_alloc_stats_report(priv);
577 if (err)
578 goto abort_with_ntfy_blocks;
579 err = gve_adminq_configure_device_resources(priv,
580 priv->counter_array_bus,
581 priv->num_event_counters,
582 priv->irq_db_indices_bus,
583 priv->num_ntfy_blks);
584 if (unlikely(err)) {
585 dev_err(&priv->pdev->dev,
586 "could not setup device_resources: err=%d\n", err);
587 err = -ENXIO;
588 goto abort_with_stats_report;
589 }
590
591 if (!gve_is_gqi(priv)) {
592 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
593 GFP_KERNEL);
594 if (!priv->ptype_lut_dqo) {
595 err = -ENOMEM;
596 goto abort_with_stats_report;
597 }
598 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
599 if (err) {
600 dev_err(&priv->pdev->dev,
601 "Failed to get ptype map: err=%d\n", err);
602 goto abort_with_ptype_lut;
603 }
604 }
605
606 err = gve_adminq_report_stats(priv, priv->stats_report_len,
607 priv->stats_report_bus,
608 GVE_STATS_REPORT_TIMER_PERIOD);
609 if (err)
610 dev_err(&priv->pdev->dev,
611 "Failed to report stats: err=%d\n", err);
612 gve_set_device_resources_ok(priv);
613 return 0;
614
615 abort_with_ptype_lut:
616 kvfree(priv->ptype_lut_dqo);
617 priv->ptype_lut_dqo = NULL;
618 abort_with_stats_report:
619 gve_free_stats_report(priv);
620 abort_with_ntfy_blocks:
621 gve_free_notify_blocks(priv);
622 abort_with_counter:
623 gve_free_counter_array(priv);
624 abort_with_flow_rule_caches:
625 gve_free_flow_rule_caches(priv);
626
627 return err;
628 }
629
630 static void gve_trigger_reset(struct gve_priv *priv);
631
gve_teardown_device_resources(struct gve_priv * priv)632 static void gve_teardown_device_resources(struct gve_priv *priv)
633 {
634 int err;
635
636 /* Tell device its resources are being freed */
637 if (gve_get_device_resources_ok(priv)) {
638 err = gve_flow_rules_reset(priv);
639 if (err) {
640 dev_err(&priv->pdev->dev,
641 "Failed to reset flow rules: err=%d\n", err);
642 gve_trigger_reset(priv);
643 }
644 /* detach the stats report */
645 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
646 if (err) {
647 dev_err(&priv->pdev->dev,
648 "Failed to detach stats report: err=%d\n", err);
649 gve_trigger_reset(priv);
650 }
651 err = gve_adminq_deconfigure_device_resources(priv);
652 if (err) {
653 dev_err(&priv->pdev->dev,
654 "Could not deconfigure device resources: err=%d\n",
655 err);
656 gve_trigger_reset(priv);
657 }
658 }
659
660 kvfree(priv->ptype_lut_dqo);
661 priv->ptype_lut_dqo = NULL;
662
663 gve_free_flow_rule_caches(priv);
664 gve_free_counter_array(priv);
665 gve_free_notify_blocks(priv);
666 gve_free_stats_report(priv);
667 gve_clear_device_resources_ok(priv);
668 }
669
gve_unregister_qpl(struct gve_priv * priv,struct gve_queue_page_list * qpl)670 static int gve_unregister_qpl(struct gve_priv *priv,
671 struct gve_queue_page_list *qpl)
672 {
673 int err;
674
675 if (!qpl)
676 return 0;
677
678 err = gve_adminq_unregister_page_list(priv, qpl->id);
679 if (err) {
680 netif_err(priv, drv, priv->dev,
681 "Failed to unregister queue page list %d\n",
682 qpl->id);
683 return err;
684 }
685
686 priv->num_registered_pages -= qpl->num_entries;
687 return 0;
688 }
689
gve_register_qpl(struct gve_priv * priv,struct gve_queue_page_list * qpl)690 static int gve_register_qpl(struct gve_priv *priv,
691 struct gve_queue_page_list *qpl)
692 {
693 int pages;
694 int err;
695
696 if (!qpl)
697 return 0;
698
699 pages = qpl->num_entries;
700
701 if (pages + priv->num_registered_pages > priv->max_registered_pages) {
702 netif_err(priv, drv, priv->dev,
703 "Reached max number of registered pages %llu > %llu\n",
704 pages + priv->num_registered_pages,
705 priv->max_registered_pages);
706 return -EINVAL;
707 }
708
709 err = gve_adminq_register_page_list(priv, qpl);
710 if (err) {
711 netif_err(priv, drv, priv->dev,
712 "failed to register queue page list %d\n",
713 qpl->id);
714 return err;
715 }
716
717 priv->num_registered_pages += pages;
718 return 0;
719 }
720
gve_tx_get_qpl(struct gve_priv * priv,int idx)721 static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx)
722 {
723 struct gve_tx_ring *tx = &priv->tx[idx];
724
725 if (gve_is_gqi(priv))
726 return tx->tx_fifo.qpl;
727 else
728 return tx->dqo.qpl;
729 }
730
gve_rx_get_qpl(struct gve_priv * priv,int idx)731 static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx)
732 {
733 struct gve_rx_ring *rx = &priv->rx[idx];
734
735 if (gve_is_gqi(priv))
736 return rx->data.qpl;
737 else
738 return rx->dqo.qpl;
739 }
740
gve_register_xdp_qpls(struct gve_priv * priv)741 static int gve_register_xdp_qpls(struct gve_priv *priv)
742 {
743 int start_id;
744 int err;
745 int i;
746
747 start_id = gve_xdp_tx_start_queue_id(priv);
748 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
749 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i));
750 /* This failure will trigger a reset - no need to clean up */
751 if (err)
752 return err;
753 }
754 return 0;
755 }
756
gve_register_qpls(struct gve_priv * priv)757 static int gve_register_qpls(struct gve_priv *priv)
758 {
759 int num_tx_qpls, num_rx_qpls;
760 int err;
761 int i;
762
763 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv),
764 gve_is_qpl(priv));
765 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
766
767 for (i = 0; i < num_tx_qpls; i++) {
768 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i));
769 if (err)
770 return err;
771 }
772
773 for (i = 0; i < num_rx_qpls; i++) {
774 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i));
775 if (err)
776 return err;
777 }
778
779 return 0;
780 }
781
gve_unregister_xdp_qpls(struct gve_priv * priv)782 static int gve_unregister_xdp_qpls(struct gve_priv *priv)
783 {
784 int start_id;
785 int err;
786 int i;
787
788 start_id = gve_xdp_tx_start_queue_id(priv);
789 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
790 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i));
791 /* This failure will trigger a reset - no need to clean */
792 if (err)
793 return err;
794 }
795 return 0;
796 }
797
gve_unregister_qpls(struct gve_priv * priv)798 static int gve_unregister_qpls(struct gve_priv *priv)
799 {
800 int num_tx_qpls, num_rx_qpls;
801 int err;
802 int i;
803
804 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv),
805 gve_is_qpl(priv));
806 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
807
808 for (i = 0; i < num_tx_qpls; i++) {
809 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i));
810 /* This failure will trigger a reset - no need to clean */
811 if (err)
812 return err;
813 }
814
815 for (i = 0; i < num_rx_qpls; i++) {
816 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i));
817 /* This failure will trigger a reset - no need to clean */
818 if (err)
819 return err;
820 }
821 return 0;
822 }
823
gve_create_xdp_rings(struct gve_priv * priv)824 static int gve_create_xdp_rings(struct gve_priv *priv)
825 {
826 int err;
827
828 err = gve_adminq_create_tx_queues(priv,
829 gve_xdp_tx_start_queue_id(priv),
830 priv->num_xdp_queues);
831 if (err) {
832 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
833 priv->num_xdp_queues);
834 /* This failure will trigger a reset - no need to clean
835 * up
836 */
837 return err;
838 }
839 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
840 priv->num_xdp_queues);
841
842 return 0;
843 }
844
gve_create_rings(struct gve_priv * priv)845 static int gve_create_rings(struct gve_priv *priv)
846 {
847 int num_tx_queues = gve_num_tx_queues(priv);
848 int err;
849 int i;
850
851 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues);
852 if (err) {
853 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
854 num_tx_queues);
855 /* This failure will trigger a reset - no need to clean
856 * up
857 */
858 return err;
859 }
860 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
861 num_tx_queues);
862
863 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
864 if (err) {
865 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
866 priv->rx_cfg.num_queues);
867 /* This failure will trigger a reset - no need to clean
868 * up
869 */
870 return err;
871 }
872 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
873 priv->rx_cfg.num_queues);
874
875 if (gve_is_gqi(priv)) {
876 /* Rx data ring has been prefilled with packet buffers at queue
877 * allocation time.
878 *
879 * Write the doorbell to provide descriptor slots and packet
880 * buffers to the NIC.
881 */
882 for (i = 0; i < priv->rx_cfg.num_queues; i++)
883 gve_rx_write_doorbell(priv, &priv->rx[i]);
884 } else {
885 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
886 /* Post buffers and ring doorbell. */
887 gve_rx_post_buffers_dqo(&priv->rx[i]);
888 }
889 }
890
891 return 0;
892 }
893
init_xdp_sync_stats(struct gve_priv * priv)894 static void init_xdp_sync_stats(struct gve_priv *priv)
895 {
896 int start_id = gve_xdp_tx_start_queue_id(priv);
897 int i;
898
899 /* Init stats */
900 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
901 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
902
903 u64_stats_init(&priv->tx[i].statss);
904 priv->tx[i].ntfy_id = ntfy_idx;
905 }
906 }
907
gve_init_sync_stats(struct gve_priv * priv)908 static void gve_init_sync_stats(struct gve_priv *priv)
909 {
910 int i;
911
912 for (i = 0; i < priv->tx_cfg.num_queues; i++)
913 u64_stats_init(&priv->tx[i].statss);
914
915 /* Init stats for XDP TX queues */
916 init_xdp_sync_stats(priv);
917
918 for (i = 0; i < priv->rx_cfg.num_queues; i++)
919 u64_stats_init(&priv->rx[i].statss);
920 }
921
gve_tx_get_curr_alloc_cfg(struct gve_priv * priv,struct gve_tx_alloc_rings_cfg * cfg)922 static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv,
923 struct gve_tx_alloc_rings_cfg *cfg)
924 {
925 cfg->qcfg = &priv->tx_cfg;
926 cfg->raw_addressing = !gve_is_qpl(priv);
927 cfg->ring_size = priv->tx_desc_cnt;
928 cfg->start_idx = 0;
929 cfg->num_rings = gve_num_tx_queues(priv);
930 cfg->tx = priv->tx;
931 }
932
gve_tx_stop_rings(struct gve_priv * priv,int start_id,int num_rings)933 static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings)
934 {
935 int i;
936
937 if (!priv->tx)
938 return;
939
940 for (i = start_id; i < start_id + num_rings; i++) {
941 if (gve_is_gqi(priv))
942 gve_tx_stop_ring_gqi(priv, i);
943 else
944 gve_tx_stop_ring_dqo(priv, i);
945 }
946 }
947
gve_tx_start_rings(struct gve_priv * priv,int start_id,int num_rings)948 static void gve_tx_start_rings(struct gve_priv *priv, int start_id,
949 int num_rings)
950 {
951 int i;
952
953 for (i = start_id; i < start_id + num_rings; i++) {
954 if (gve_is_gqi(priv))
955 gve_tx_start_ring_gqi(priv, i);
956 else
957 gve_tx_start_ring_dqo(priv, i);
958 }
959 }
960
gve_alloc_xdp_rings(struct gve_priv * priv)961 static int gve_alloc_xdp_rings(struct gve_priv *priv)
962 {
963 struct gve_tx_alloc_rings_cfg cfg = {0};
964 int err = 0;
965
966 if (!priv->num_xdp_queues)
967 return 0;
968
969 gve_tx_get_curr_alloc_cfg(priv, &cfg);
970 cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
971 cfg.num_rings = priv->num_xdp_queues;
972
973 err = gve_tx_alloc_rings_gqi(priv, &cfg);
974 if (err)
975 return err;
976
977 gve_tx_start_rings(priv, cfg.start_idx, cfg.num_rings);
978 init_xdp_sync_stats(priv);
979
980 return 0;
981 }
982
gve_queues_mem_alloc(struct gve_priv * priv,struct gve_tx_alloc_rings_cfg * tx_alloc_cfg,struct gve_rx_alloc_rings_cfg * rx_alloc_cfg)983 static int gve_queues_mem_alloc(struct gve_priv *priv,
984 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
985 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
986 {
987 int err;
988
989 if (gve_is_gqi(priv))
990 err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg);
991 else
992 err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg);
993 if (err)
994 return err;
995
996 if (gve_is_gqi(priv))
997 err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg);
998 else
999 err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg);
1000 if (err)
1001 goto free_tx;
1002
1003 return 0;
1004
1005 free_tx:
1006 if (gve_is_gqi(priv))
1007 gve_tx_free_rings_gqi(priv, tx_alloc_cfg);
1008 else
1009 gve_tx_free_rings_dqo(priv, tx_alloc_cfg);
1010 return err;
1011 }
1012
gve_destroy_xdp_rings(struct gve_priv * priv)1013 static int gve_destroy_xdp_rings(struct gve_priv *priv)
1014 {
1015 int start_id;
1016 int err;
1017
1018 start_id = gve_xdp_tx_start_queue_id(priv);
1019 err = gve_adminq_destroy_tx_queues(priv,
1020 start_id,
1021 priv->num_xdp_queues);
1022 if (err) {
1023 netif_err(priv, drv, priv->dev,
1024 "failed to destroy XDP queues\n");
1025 /* This failure will trigger a reset - no need to clean up */
1026 return err;
1027 }
1028 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
1029
1030 return 0;
1031 }
1032
gve_destroy_rings(struct gve_priv * priv)1033 static int gve_destroy_rings(struct gve_priv *priv)
1034 {
1035 int num_tx_queues = gve_num_tx_queues(priv);
1036 int err;
1037
1038 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues);
1039 if (err) {
1040 netif_err(priv, drv, priv->dev,
1041 "failed to destroy tx queues\n");
1042 /* This failure will trigger a reset - no need to clean up */
1043 return err;
1044 }
1045 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
1046 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
1047 if (err) {
1048 netif_err(priv, drv, priv->dev,
1049 "failed to destroy rx queues\n");
1050 /* This failure will trigger a reset - no need to clean up */
1051 return err;
1052 }
1053 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
1054 return 0;
1055 }
1056
gve_free_xdp_rings(struct gve_priv * priv)1057 static void gve_free_xdp_rings(struct gve_priv *priv)
1058 {
1059 struct gve_tx_alloc_rings_cfg cfg = {0};
1060
1061 gve_tx_get_curr_alloc_cfg(priv, &cfg);
1062 cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
1063 cfg.num_rings = priv->num_xdp_queues;
1064
1065 if (priv->tx) {
1066 gve_tx_stop_rings(priv, cfg.start_idx, cfg.num_rings);
1067 gve_tx_free_rings_gqi(priv, &cfg);
1068 }
1069 }
1070
gve_queues_mem_free(struct gve_priv * priv,struct gve_tx_alloc_rings_cfg * tx_cfg,struct gve_rx_alloc_rings_cfg * rx_cfg)1071 static void gve_queues_mem_free(struct gve_priv *priv,
1072 struct gve_tx_alloc_rings_cfg *tx_cfg,
1073 struct gve_rx_alloc_rings_cfg *rx_cfg)
1074 {
1075 if (gve_is_gqi(priv)) {
1076 gve_tx_free_rings_gqi(priv, tx_cfg);
1077 gve_rx_free_rings_gqi(priv, rx_cfg);
1078 } else {
1079 gve_tx_free_rings_dqo(priv, tx_cfg);
1080 gve_rx_free_rings_dqo(priv, rx_cfg);
1081 }
1082 }
1083
gve_alloc_page(struct gve_priv * priv,struct device * dev,struct page ** page,dma_addr_t * dma,enum dma_data_direction dir,gfp_t gfp_flags)1084 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
1085 struct page **page, dma_addr_t *dma,
1086 enum dma_data_direction dir, gfp_t gfp_flags)
1087 {
1088 *page = alloc_page(gfp_flags);
1089 if (!*page) {
1090 priv->page_alloc_fail++;
1091 return -ENOMEM;
1092 }
1093 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
1094 if (dma_mapping_error(dev, *dma)) {
1095 priv->dma_mapping_error++;
1096 put_page(*page);
1097 return -ENOMEM;
1098 }
1099 return 0;
1100 }
1101
gve_alloc_queue_page_list(struct gve_priv * priv,u32 id,int pages)1102 struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv,
1103 u32 id, int pages)
1104 {
1105 struct gve_queue_page_list *qpl;
1106 int err;
1107 int i;
1108
1109 qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL);
1110 if (!qpl)
1111 return NULL;
1112
1113 qpl->id = id;
1114 qpl->num_entries = 0;
1115 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
1116 if (!qpl->pages)
1117 goto abort;
1118
1119 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL);
1120 if (!qpl->page_buses)
1121 goto abort;
1122
1123 for (i = 0; i < pages; i++) {
1124 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
1125 &qpl->page_buses[i],
1126 gve_qpl_dma_dir(priv, id), GFP_KERNEL);
1127 if (err)
1128 goto abort;
1129 qpl->num_entries++;
1130 }
1131
1132 return qpl;
1133
1134 abort:
1135 gve_free_queue_page_list(priv, qpl, id);
1136 return NULL;
1137 }
1138
gve_free_page(struct device * dev,struct page * page,dma_addr_t dma,enum dma_data_direction dir)1139 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
1140 enum dma_data_direction dir)
1141 {
1142 if (!dma_mapping_error(dev, dma))
1143 dma_unmap_page(dev, dma, PAGE_SIZE, dir);
1144 if (page)
1145 put_page(page);
1146 }
1147
gve_free_queue_page_list(struct gve_priv * priv,struct gve_queue_page_list * qpl,u32 id)1148 void gve_free_queue_page_list(struct gve_priv *priv,
1149 struct gve_queue_page_list *qpl,
1150 u32 id)
1151 {
1152 int i;
1153
1154 if (!qpl)
1155 return;
1156 if (!qpl->pages)
1157 goto free_qpl;
1158 if (!qpl->page_buses)
1159 goto free_pages;
1160
1161 for (i = 0; i < qpl->num_entries; i++)
1162 gve_free_page(&priv->pdev->dev, qpl->pages[i],
1163 qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
1164
1165 kvfree(qpl->page_buses);
1166 qpl->page_buses = NULL;
1167 free_pages:
1168 kvfree(qpl->pages);
1169 qpl->pages = NULL;
1170 free_qpl:
1171 kvfree(qpl);
1172 }
1173
1174 /* Use this to schedule a reset when the device is capable of continuing
1175 * to handle other requests in its current state. If it is not, do a reset
1176 * in thread instead.
1177 */
gve_schedule_reset(struct gve_priv * priv)1178 void gve_schedule_reset(struct gve_priv *priv)
1179 {
1180 gve_set_do_reset(priv);
1181 queue_work(priv->gve_wq, &priv->service_task);
1182 }
1183
1184 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
1185 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
1186 static void gve_turndown(struct gve_priv *priv);
1187 static void gve_turnup(struct gve_priv *priv);
1188
gve_reg_xdp_info(struct gve_priv * priv,struct net_device * dev)1189 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
1190 {
1191 struct napi_struct *napi;
1192 struct gve_rx_ring *rx;
1193 int err = 0;
1194 int i, j;
1195 u32 tx_qid;
1196
1197 if (!priv->num_xdp_queues)
1198 return 0;
1199
1200 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1201 rx = &priv->rx[i];
1202 napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1203
1204 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i,
1205 napi->napi_id);
1206 if (err)
1207 goto err;
1208 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
1209 MEM_TYPE_PAGE_SHARED, NULL);
1210 if (err)
1211 goto err;
1212 rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
1213 if (rx->xsk_pool) {
1214 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
1215 napi->napi_id);
1216 if (err)
1217 goto err;
1218 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1219 MEM_TYPE_XSK_BUFF_POOL, NULL);
1220 if (err)
1221 goto err;
1222 xsk_pool_set_rxq_info(rx->xsk_pool,
1223 &rx->xsk_rxq);
1224 }
1225 }
1226
1227 for (i = 0; i < priv->num_xdp_queues; i++) {
1228 tx_qid = gve_xdp_tx_queue_id(priv, i);
1229 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
1230 }
1231 return 0;
1232
1233 err:
1234 for (j = i; j >= 0; j--) {
1235 rx = &priv->rx[j];
1236 if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
1237 xdp_rxq_info_unreg(&rx->xdp_rxq);
1238 if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1239 xdp_rxq_info_unreg(&rx->xsk_rxq);
1240 }
1241 return err;
1242 }
1243
gve_unreg_xdp_info(struct gve_priv * priv)1244 static void gve_unreg_xdp_info(struct gve_priv *priv)
1245 {
1246 int i, tx_qid;
1247
1248 if (!priv->num_xdp_queues)
1249 return;
1250
1251 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1252 struct gve_rx_ring *rx = &priv->rx[i];
1253
1254 xdp_rxq_info_unreg(&rx->xdp_rxq);
1255 if (rx->xsk_pool) {
1256 xdp_rxq_info_unreg(&rx->xsk_rxq);
1257 rx->xsk_pool = NULL;
1258 }
1259 }
1260
1261 for (i = 0; i < priv->num_xdp_queues; i++) {
1262 tx_qid = gve_xdp_tx_queue_id(priv, i);
1263 priv->tx[tx_qid].xsk_pool = NULL;
1264 }
1265 }
1266
gve_drain_page_cache(struct gve_priv * priv)1267 static void gve_drain_page_cache(struct gve_priv *priv)
1268 {
1269 int i;
1270
1271 for (i = 0; i < priv->rx_cfg.num_queues; i++)
1272 page_frag_cache_drain(&priv->rx[i].page_cache);
1273 }
1274
gve_rx_get_curr_alloc_cfg(struct gve_priv * priv,struct gve_rx_alloc_rings_cfg * cfg)1275 static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv,
1276 struct gve_rx_alloc_rings_cfg *cfg)
1277 {
1278 cfg->qcfg = &priv->rx_cfg;
1279 cfg->qcfg_tx = &priv->tx_cfg;
1280 cfg->raw_addressing = !gve_is_qpl(priv);
1281 cfg->enable_header_split = priv->header_split_enabled;
1282 cfg->ring_size = priv->rx_desc_cnt;
1283 cfg->packet_buffer_size = gve_is_gqi(priv) ?
1284 GVE_DEFAULT_RX_BUFFER_SIZE :
1285 priv->data_buffer_size_dqo;
1286 cfg->rx = priv->rx;
1287 }
1288
gve_get_curr_alloc_cfgs(struct gve_priv * priv,struct gve_tx_alloc_rings_cfg * tx_alloc_cfg,struct gve_rx_alloc_rings_cfg * rx_alloc_cfg)1289 void gve_get_curr_alloc_cfgs(struct gve_priv *priv,
1290 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1291 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1292 {
1293 gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg);
1294 gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg);
1295 }
1296
gve_rx_start_ring(struct gve_priv * priv,int i)1297 static void gve_rx_start_ring(struct gve_priv *priv, int i)
1298 {
1299 if (gve_is_gqi(priv))
1300 gve_rx_start_ring_gqi(priv, i);
1301 else
1302 gve_rx_start_ring_dqo(priv, i);
1303 }
1304
gve_rx_start_rings(struct gve_priv * priv,int num_rings)1305 static void gve_rx_start_rings(struct gve_priv *priv, int num_rings)
1306 {
1307 int i;
1308
1309 for (i = 0; i < num_rings; i++)
1310 gve_rx_start_ring(priv, i);
1311 }
1312
gve_rx_stop_ring(struct gve_priv * priv,int i)1313 static void gve_rx_stop_ring(struct gve_priv *priv, int i)
1314 {
1315 if (gve_is_gqi(priv))
1316 gve_rx_stop_ring_gqi(priv, i);
1317 else
1318 gve_rx_stop_ring_dqo(priv, i);
1319 }
1320
gve_rx_stop_rings(struct gve_priv * priv,int num_rings)1321 static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings)
1322 {
1323 int i;
1324
1325 if (!priv->rx)
1326 return;
1327
1328 for (i = 0; i < num_rings; i++)
1329 gve_rx_stop_ring(priv, i);
1330 }
1331
gve_queues_mem_remove(struct gve_priv * priv)1332 static void gve_queues_mem_remove(struct gve_priv *priv)
1333 {
1334 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1335 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1336
1337 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1338 gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1339 priv->tx = NULL;
1340 priv->rx = NULL;
1341 }
1342
1343 /* The passed-in queue memory is stored into priv and the queues are made live.
1344 * No memory is allocated. Passed-in memory is freed on errors.
1345 */
gve_queues_start(struct gve_priv * priv,struct gve_tx_alloc_rings_cfg * tx_alloc_cfg,struct gve_rx_alloc_rings_cfg * rx_alloc_cfg)1346 static int gve_queues_start(struct gve_priv *priv,
1347 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1348 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1349 {
1350 struct net_device *dev = priv->dev;
1351 int err;
1352
1353 /* Record new resources into priv */
1354 priv->tx = tx_alloc_cfg->tx;
1355 priv->rx = rx_alloc_cfg->rx;
1356
1357 /* Record new configs into priv */
1358 priv->tx_cfg = *tx_alloc_cfg->qcfg;
1359 priv->rx_cfg = *rx_alloc_cfg->qcfg;
1360 priv->tx_desc_cnt = tx_alloc_cfg->ring_size;
1361 priv->rx_desc_cnt = rx_alloc_cfg->ring_size;
1362
1363 if (priv->xdp_prog)
1364 priv->num_xdp_queues = priv->rx_cfg.num_queues;
1365 else
1366 priv->num_xdp_queues = 0;
1367
1368 gve_tx_start_rings(priv, 0, tx_alloc_cfg->num_rings);
1369 gve_rx_start_rings(priv, rx_alloc_cfg->qcfg->num_queues);
1370 gve_init_sync_stats(priv);
1371
1372 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
1373 if (err)
1374 goto stop_and_free_rings;
1375 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
1376 if (err)
1377 goto stop_and_free_rings;
1378
1379 err = gve_reg_xdp_info(priv, dev);
1380 if (err)
1381 goto stop_and_free_rings;
1382
1383 err = gve_register_qpls(priv);
1384 if (err)
1385 goto reset;
1386
1387 priv->header_split_enabled = rx_alloc_cfg->enable_header_split;
1388 priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size;
1389
1390 err = gve_create_rings(priv);
1391 if (err)
1392 goto reset;
1393
1394 gve_set_device_rings_ok(priv);
1395
1396 if (gve_get_report_stats(priv))
1397 mod_timer(&priv->stats_report_timer,
1398 round_jiffies(jiffies +
1399 msecs_to_jiffies(priv->stats_report_timer_period)));
1400
1401 gve_turnup(priv);
1402 queue_work(priv->gve_wq, &priv->service_task);
1403 priv->interface_up_cnt++;
1404 return 0;
1405
1406 reset:
1407 if (gve_get_reset_in_progress(priv))
1408 goto stop_and_free_rings;
1409 gve_reset_and_teardown(priv, true);
1410 /* if this fails there is nothing we can do so just ignore the return */
1411 gve_reset_recovery(priv, false);
1412 /* return the original error */
1413 return err;
1414 stop_and_free_rings:
1415 gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv));
1416 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
1417 gve_queues_mem_remove(priv);
1418 return err;
1419 }
1420
gve_open(struct net_device * dev)1421 static int gve_open(struct net_device *dev)
1422 {
1423 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1424 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1425 struct gve_priv *priv = netdev_priv(dev);
1426 int err;
1427
1428 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1429
1430 err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1431 if (err)
1432 return err;
1433
1434 /* No need to free on error: ownership of resources is lost after
1435 * calling gve_queues_start.
1436 */
1437 err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1438 if (err)
1439 return err;
1440
1441 return 0;
1442 }
1443
gve_queues_stop(struct gve_priv * priv)1444 static int gve_queues_stop(struct gve_priv *priv)
1445 {
1446 int err;
1447
1448 netif_carrier_off(priv->dev);
1449 if (gve_get_device_rings_ok(priv)) {
1450 gve_turndown(priv);
1451 gve_drain_page_cache(priv);
1452 err = gve_destroy_rings(priv);
1453 if (err)
1454 goto err;
1455 err = gve_unregister_qpls(priv);
1456 if (err)
1457 goto err;
1458 gve_clear_device_rings_ok(priv);
1459 }
1460 del_timer_sync(&priv->stats_report_timer);
1461
1462 gve_unreg_xdp_info(priv);
1463
1464 gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv));
1465 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
1466
1467 priv->interface_down_cnt++;
1468 return 0;
1469
1470 err:
1471 /* This must have been called from a reset due to the rtnl lock
1472 * so just return at this point.
1473 */
1474 if (gve_get_reset_in_progress(priv))
1475 return err;
1476 /* Otherwise reset before returning */
1477 gve_reset_and_teardown(priv, true);
1478 return gve_reset_recovery(priv, false);
1479 }
1480
gve_close(struct net_device * dev)1481 static int gve_close(struct net_device *dev)
1482 {
1483 struct gve_priv *priv = netdev_priv(dev);
1484 int err;
1485
1486 err = gve_queues_stop(priv);
1487 if (err)
1488 return err;
1489
1490 gve_queues_mem_remove(priv);
1491 return 0;
1492 }
1493
gve_remove_xdp_queues(struct gve_priv * priv)1494 static int gve_remove_xdp_queues(struct gve_priv *priv)
1495 {
1496 int err;
1497
1498 err = gve_destroy_xdp_rings(priv);
1499 if (err)
1500 return err;
1501
1502 err = gve_unregister_xdp_qpls(priv);
1503 if (err)
1504 return err;
1505
1506 gve_unreg_xdp_info(priv);
1507 gve_free_xdp_rings(priv);
1508
1509 priv->num_xdp_queues = 0;
1510 return 0;
1511 }
1512
gve_add_xdp_queues(struct gve_priv * priv)1513 static int gve_add_xdp_queues(struct gve_priv *priv)
1514 {
1515 int err;
1516
1517 priv->num_xdp_queues = priv->rx_cfg.num_queues;
1518
1519 err = gve_alloc_xdp_rings(priv);
1520 if (err)
1521 goto err;
1522
1523 err = gve_reg_xdp_info(priv, priv->dev);
1524 if (err)
1525 goto free_xdp_rings;
1526
1527 err = gve_register_xdp_qpls(priv);
1528 if (err)
1529 goto free_xdp_rings;
1530
1531 err = gve_create_xdp_rings(priv);
1532 if (err)
1533 goto free_xdp_rings;
1534
1535 return 0;
1536
1537 free_xdp_rings:
1538 gve_free_xdp_rings(priv);
1539 err:
1540 priv->num_xdp_queues = 0;
1541 return err;
1542 }
1543
gve_handle_link_status(struct gve_priv * priv,bool link_status)1544 static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1545 {
1546 if (!gve_get_napi_enabled(priv))
1547 return;
1548
1549 if (link_status == netif_carrier_ok(priv->dev))
1550 return;
1551
1552 if (link_status) {
1553 netdev_info(priv->dev, "Device link is up.\n");
1554 netif_carrier_on(priv->dev);
1555 } else {
1556 netdev_info(priv->dev, "Device link is down.\n");
1557 netif_carrier_off(priv->dev);
1558 }
1559 }
1560
gve_set_xdp(struct gve_priv * priv,struct bpf_prog * prog,struct netlink_ext_ack * extack)1561 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
1562 struct netlink_ext_ack *extack)
1563 {
1564 struct bpf_prog *old_prog;
1565 int err = 0;
1566 u32 status;
1567
1568 old_prog = READ_ONCE(priv->xdp_prog);
1569 if (!netif_running(priv->dev)) {
1570 WRITE_ONCE(priv->xdp_prog, prog);
1571 if (old_prog)
1572 bpf_prog_put(old_prog);
1573 return 0;
1574 }
1575
1576 gve_turndown(priv);
1577 if (!old_prog && prog) {
1578 // Allocate XDP TX queues if an XDP program is
1579 // being installed
1580 err = gve_add_xdp_queues(priv);
1581 if (err)
1582 goto out;
1583 } else if (old_prog && !prog) {
1584 // Remove XDP TX queues if an XDP program is
1585 // being uninstalled
1586 err = gve_remove_xdp_queues(priv);
1587 if (err)
1588 goto out;
1589 }
1590 WRITE_ONCE(priv->xdp_prog, prog);
1591 if (old_prog)
1592 bpf_prog_put(old_prog);
1593
1594 out:
1595 gve_turnup(priv);
1596 status = ioread32be(&priv->reg_bar0->device_status);
1597 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1598 return err;
1599 }
1600
gve_xsk_pool_enable(struct net_device * dev,struct xsk_buff_pool * pool,u16 qid)1601 static int gve_xsk_pool_enable(struct net_device *dev,
1602 struct xsk_buff_pool *pool,
1603 u16 qid)
1604 {
1605 struct gve_priv *priv = netdev_priv(dev);
1606 struct napi_struct *napi;
1607 struct gve_rx_ring *rx;
1608 int tx_qid;
1609 int err;
1610
1611 if (qid >= priv->rx_cfg.num_queues) {
1612 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
1613 return -EINVAL;
1614 }
1615 if (xsk_pool_get_rx_frame_size(pool) <
1616 priv->dev->max_mtu + sizeof(struct ethhdr)) {
1617 dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
1618 return -EINVAL;
1619 }
1620
1621 err = xsk_pool_dma_map(pool, &priv->pdev->dev,
1622 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1623 if (err)
1624 return err;
1625
1626 /* If XDP prog is not installed, return */
1627 if (!priv->xdp_prog)
1628 return 0;
1629
1630 rx = &priv->rx[qid];
1631 napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1632 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
1633 if (err)
1634 goto err;
1635
1636 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1637 MEM_TYPE_XSK_BUFF_POOL, NULL);
1638 if (err)
1639 goto err;
1640
1641 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
1642 rx->xsk_pool = pool;
1643
1644 tx_qid = gve_xdp_tx_queue_id(priv, qid);
1645 priv->tx[tx_qid].xsk_pool = pool;
1646
1647 return 0;
1648 err:
1649 if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1650 xdp_rxq_info_unreg(&rx->xsk_rxq);
1651
1652 xsk_pool_dma_unmap(pool,
1653 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1654 return err;
1655 }
1656
gve_xsk_pool_disable(struct net_device * dev,u16 qid)1657 static int gve_xsk_pool_disable(struct net_device *dev,
1658 u16 qid)
1659 {
1660 struct gve_priv *priv = netdev_priv(dev);
1661 struct napi_struct *napi_rx;
1662 struct napi_struct *napi_tx;
1663 struct xsk_buff_pool *pool;
1664 int tx_qid;
1665
1666 pool = xsk_get_pool_from_qid(dev, qid);
1667 if (!pool)
1668 return -EINVAL;
1669 if (qid >= priv->rx_cfg.num_queues)
1670 return -EINVAL;
1671
1672 /* If XDP prog is not installed, unmap DMA and return */
1673 if (!priv->xdp_prog)
1674 goto done;
1675
1676 tx_qid = gve_xdp_tx_queue_id(priv, qid);
1677 if (!netif_running(dev)) {
1678 priv->rx[qid].xsk_pool = NULL;
1679 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1680 priv->tx[tx_qid].xsk_pool = NULL;
1681 goto done;
1682 }
1683
1684 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
1685 napi_disable(napi_rx); /* make sure current rx poll is done */
1686
1687 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
1688 napi_disable(napi_tx); /* make sure current tx poll is done */
1689
1690 priv->rx[qid].xsk_pool = NULL;
1691 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1692 priv->tx[tx_qid].xsk_pool = NULL;
1693 smp_mb(); /* Make sure it is visible to the workers on datapath */
1694
1695 napi_enable(napi_rx);
1696 if (gve_rx_work_pending(&priv->rx[qid]))
1697 napi_schedule(napi_rx);
1698
1699 napi_enable(napi_tx);
1700 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
1701 napi_schedule(napi_tx);
1702
1703 done:
1704 xsk_pool_dma_unmap(pool,
1705 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1706 return 0;
1707 }
1708
gve_xsk_wakeup(struct net_device * dev,u32 queue_id,u32 flags)1709 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
1710 {
1711 struct gve_priv *priv = netdev_priv(dev);
1712 int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
1713
1714 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
1715 return -EINVAL;
1716
1717 if (flags & XDP_WAKEUP_TX) {
1718 struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
1719 struct napi_struct *napi =
1720 &priv->ntfy_blocks[tx->ntfy_id].napi;
1721
1722 if (!napi_if_scheduled_mark_missed(napi)) {
1723 /* Call local_bh_enable to trigger SoftIRQ processing */
1724 local_bh_disable();
1725 napi_schedule(napi);
1726 local_bh_enable();
1727 }
1728
1729 tx->xdp_xsk_wakeup++;
1730 }
1731
1732 return 0;
1733 }
1734
verify_xdp_configuration(struct net_device * dev)1735 static int verify_xdp_configuration(struct net_device *dev)
1736 {
1737 struct gve_priv *priv = netdev_priv(dev);
1738
1739 if (dev->features & NETIF_F_LRO) {
1740 netdev_warn(dev, "XDP is not supported when LRO is on.\n");
1741 return -EOPNOTSUPP;
1742 }
1743
1744 if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
1745 netdev_warn(dev, "XDP is not supported in mode %d.\n",
1746 priv->queue_format);
1747 return -EOPNOTSUPP;
1748 }
1749
1750 if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) {
1751 netdev_warn(dev, "XDP is not supported for mtu %d.\n",
1752 dev->mtu);
1753 return -EOPNOTSUPP;
1754 }
1755
1756 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues ||
1757 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) {
1758 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d",
1759 priv->rx_cfg.num_queues,
1760 priv->tx_cfg.num_queues,
1761 priv->tx_cfg.max_queues);
1762 return -EINVAL;
1763 }
1764 return 0;
1765 }
1766
gve_xdp(struct net_device * dev,struct netdev_bpf * xdp)1767 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1768 {
1769 struct gve_priv *priv = netdev_priv(dev);
1770 int err;
1771
1772 err = verify_xdp_configuration(dev);
1773 if (err)
1774 return err;
1775 switch (xdp->command) {
1776 case XDP_SETUP_PROG:
1777 return gve_set_xdp(priv, xdp->prog, xdp->extack);
1778 case XDP_SETUP_XSK_POOL:
1779 if (xdp->xsk.pool)
1780 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id);
1781 else
1782 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id);
1783 default:
1784 return -EINVAL;
1785 }
1786 }
1787
gve_flow_rules_reset(struct gve_priv * priv)1788 int gve_flow_rules_reset(struct gve_priv *priv)
1789 {
1790 if (!priv->max_flow_rules)
1791 return 0;
1792
1793 return gve_adminq_reset_flow_rules(priv);
1794 }
1795
gve_adjust_config(struct gve_priv * priv,struct gve_tx_alloc_rings_cfg * tx_alloc_cfg,struct gve_rx_alloc_rings_cfg * rx_alloc_cfg)1796 int gve_adjust_config(struct gve_priv *priv,
1797 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1798 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1799 {
1800 int err;
1801
1802 /* Allocate resources for the new confiugration */
1803 err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg);
1804 if (err) {
1805 netif_err(priv, drv, priv->dev,
1806 "Adjust config failed to alloc new queues");
1807 return err;
1808 }
1809
1810 /* Teardown the device and free existing resources */
1811 err = gve_close(priv->dev);
1812 if (err) {
1813 netif_err(priv, drv, priv->dev,
1814 "Adjust config failed to close old queues");
1815 gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg);
1816 return err;
1817 }
1818
1819 /* Bring the device back up again with the new resources. */
1820 err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg);
1821 if (err) {
1822 netif_err(priv, drv, priv->dev,
1823 "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n");
1824 /* No need to free on error: ownership of resources is lost after
1825 * calling gve_queues_start.
1826 */
1827 gve_turndown(priv);
1828 return err;
1829 }
1830
1831 return 0;
1832 }
1833
gve_adjust_queues(struct gve_priv * priv,struct gve_queue_config new_rx_config,struct gve_queue_config new_tx_config)1834 int gve_adjust_queues(struct gve_priv *priv,
1835 struct gve_queue_config new_rx_config,
1836 struct gve_queue_config new_tx_config)
1837 {
1838 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1839 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1840 int err;
1841
1842 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1843
1844 /* Relay the new config from ethtool */
1845 tx_alloc_cfg.qcfg = &new_tx_config;
1846 rx_alloc_cfg.qcfg_tx = &new_tx_config;
1847 rx_alloc_cfg.qcfg = &new_rx_config;
1848 tx_alloc_cfg.num_rings = new_tx_config.num_queues;
1849
1850 if (netif_running(priv->dev)) {
1851 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1852 return err;
1853 }
1854 /* Set the config for the next up. */
1855 priv->tx_cfg = new_tx_config;
1856 priv->rx_cfg = new_rx_config;
1857
1858 return 0;
1859 }
1860
gve_turndown(struct gve_priv * priv)1861 static void gve_turndown(struct gve_priv *priv)
1862 {
1863 int idx;
1864
1865 if (netif_carrier_ok(priv->dev))
1866 netif_carrier_off(priv->dev);
1867
1868 if (!gve_get_napi_enabled(priv))
1869 return;
1870
1871 /* Disable napi to prevent more work from coming in */
1872 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1873 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1874 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1875
1876 if (!gve_tx_was_added_to_block(priv, idx))
1877 continue;
1878 napi_disable(&block->napi);
1879 }
1880 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1881 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1882 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1883
1884 if (!gve_rx_was_added_to_block(priv, idx))
1885 continue;
1886 napi_disable(&block->napi);
1887 }
1888
1889 /* Stop tx queues */
1890 netif_tx_disable(priv->dev);
1891
1892 gve_clear_napi_enabled(priv);
1893 gve_clear_report_stats(priv);
1894 }
1895
gve_turnup(struct gve_priv * priv)1896 static void gve_turnup(struct gve_priv *priv)
1897 {
1898 int idx;
1899
1900 /* Start the tx queues */
1901 netif_tx_start_all_queues(priv->dev);
1902
1903 /* Enable napi and unmask interrupts for all queues */
1904 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1905 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1906 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1907
1908 if (!gve_tx_was_added_to_block(priv, idx))
1909 continue;
1910
1911 napi_enable(&block->napi);
1912 if (gve_is_gqi(priv)) {
1913 iowrite32be(0, gve_irq_doorbell(priv, block));
1914 } else {
1915 gve_set_itr_coalesce_usecs_dqo(priv, block,
1916 priv->tx_coalesce_usecs);
1917 }
1918
1919 /* Any descs written by the NIC before this barrier will be
1920 * handled by the one-off napi schedule below. Whereas any
1921 * descs after the barrier will generate interrupts.
1922 */
1923 mb();
1924 napi_schedule(&block->napi);
1925 }
1926 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1927 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1928 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1929
1930 if (!gve_rx_was_added_to_block(priv, idx))
1931 continue;
1932
1933 napi_enable(&block->napi);
1934 if (gve_is_gqi(priv)) {
1935 iowrite32be(0, gve_irq_doorbell(priv, block));
1936 } else {
1937 gve_set_itr_coalesce_usecs_dqo(priv, block,
1938 priv->rx_coalesce_usecs);
1939 }
1940
1941 /* Any descs written by the NIC before this barrier will be
1942 * handled by the one-off napi schedule below. Whereas any
1943 * descs after the barrier will generate interrupts.
1944 */
1945 mb();
1946 napi_schedule(&block->napi);
1947 }
1948
1949 gve_set_napi_enabled(priv);
1950 }
1951
gve_turnup_and_check_status(struct gve_priv * priv)1952 static void gve_turnup_and_check_status(struct gve_priv *priv)
1953 {
1954 u32 status;
1955
1956 gve_turnup(priv);
1957 status = ioread32be(&priv->reg_bar0->device_status);
1958 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1959 }
1960
gve_tx_timeout(struct net_device * dev,unsigned int txqueue)1961 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
1962 {
1963 struct gve_notify_block *block;
1964 struct gve_tx_ring *tx = NULL;
1965 struct gve_priv *priv;
1966 u32 last_nic_done;
1967 u32 current_time;
1968 u32 ntfy_idx;
1969
1970 netdev_info(dev, "Timeout on tx queue, %d", txqueue);
1971 priv = netdev_priv(dev);
1972 if (txqueue > priv->tx_cfg.num_queues)
1973 goto reset;
1974
1975 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
1976 if (ntfy_idx >= priv->num_ntfy_blks)
1977 goto reset;
1978
1979 block = &priv->ntfy_blocks[ntfy_idx];
1980 tx = block->tx;
1981
1982 current_time = jiffies_to_msecs(jiffies);
1983 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
1984 goto reset;
1985
1986 /* Check to see if there are missed completions, which will allow us to
1987 * kick the queue.
1988 */
1989 last_nic_done = gve_tx_load_event_counter(priv, tx);
1990 if (last_nic_done - tx->done) {
1991 netdev_info(dev, "Kicking queue %d", txqueue);
1992 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
1993 napi_schedule(&block->napi);
1994 tx->last_kick_msec = current_time;
1995 goto out;
1996 } // Else reset.
1997
1998 reset:
1999 gve_schedule_reset(priv);
2000
2001 out:
2002 if (tx)
2003 tx->queue_timeout++;
2004 priv->tx_timeo_cnt++;
2005 }
2006
gve_get_pkt_buf_size(const struct gve_priv * priv,bool enable_hsplit)2007 u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit)
2008 {
2009 if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE)
2010 return GVE_MAX_RX_BUFFER_SIZE;
2011 else
2012 return GVE_DEFAULT_RX_BUFFER_SIZE;
2013 }
2014
2015 /* header-split is not supported on non-DQO_RDA yet even if device advertises it */
gve_header_split_supported(const struct gve_priv * priv)2016 bool gve_header_split_supported(const struct gve_priv *priv)
2017 {
2018 return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT;
2019 }
2020
gve_set_hsplit_config(struct gve_priv * priv,u8 tcp_data_split)2021 int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split)
2022 {
2023 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
2024 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
2025 bool enable_hdr_split;
2026 int err = 0;
2027
2028 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN)
2029 return 0;
2030
2031 if (!gve_header_split_supported(priv)) {
2032 dev_err(&priv->pdev->dev, "Header-split not supported\n");
2033 return -EOPNOTSUPP;
2034 }
2035
2036 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED)
2037 enable_hdr_split = true;
2038 else
2039 enable_hdr_split = false;
2040
2041 if (enable_hdr_split == priv->header_split_enabled)
2042 return 0;
2043
2044 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
2045
2046 rx_alloc_cfg.enable_header_split = enable_hdr_split;
2047 rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split);
2048
2049 if (netif_running(priv->dev))
2050 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
2051 return err;
2052 }
2053
gve_set_features(struct net_device * netdev,netdev_features_t features)2054 static int gve_set_features(struct net_device *netdev,
2055 netdev_features_t features)
2056 {
2057 const netdev_features_t orig_features = netdev->features;
2058 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
2059 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
2060 struct gve_priv *priv = netdev_priv(netdev);
2061 int err;
2062
2063 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
2064
2065 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
2066 netdev->features ^= NETIF_F_LRO;
2067 if (netif_running(netdev)) {
2068 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
2069 if (err)
2070 goto revert_features;
2071 }
2072 }
2073 if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) {
2074 err = gve_flow_rules_reset(priv);
2075 if (err)
2076 goto revert_features;
2077 }
2078
2079 return 0;
2080
2081 revert_features:
2082 netdev->features = orig_features;
2083 return err;
2084 }
2085
2086 static const struct net_device_ops gve_netdev_ops = {
2087 .ndo_start_xmit = gve_start_xmit,
2088 .ndo_features_check = gve_features_check,
2089 .ndo_open = gve_open,
2090 .ndo_stop = gve_close,
2091 .ndo_get_stats64 = gve_get_stats,
2092 .ndo_tx_timeout = gve_tx_timeout,
2093 .ndo_set_features = gve_set_features,
2094 .ndo_bpf = gve_xdp,
2095 .ndo_xdp_xmit = gve_xdp_xmit,
2096 .ndo_xsk_wakeup = gve_xsk_wakeup,
2097 };
2098
gve_handle_status(struct gve_priv * priv,u32 status)2099 static void gve_handle_status(struct gve_priv *priv, u32 status)
2100 {
2101 if (GVE_DEVICE_STATUS_RESET_MASK & status) {
2102 dev_info(&priv->pdev->dev, "Device requested reset.\n");
2103 gve_set_do_reset(priv);
2104 }
2105 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
2106 priv->stats_report_trigger_cnt++;
2107 gve_set_do_report_stats(priv);
2108 }
2109 }
2110
gve_handle_reset(struct gve_priv * priv)2111 static void gve_handle_reset(struct gve_priv *priv)
2112 {
2113 /* A service task will be scheduled at the end of probe to catch any
2114 * resets that need to happen, and we don't want to reset until
2115 * probe is done.
2116 */
2117 if (gve_get_probe_in_progress(priv))
2118 return;
2119
2120 if (gve_get_do_reset(priv)) {
2121 rtnl_lock();
2122 gve_reset(priv, false);
2123 rtnl_unlock();
2124 }
2125 }
2126
gve_handle_report_stats(struct gve_priv * priv)2127 void gve_handle_report_stats(struct gve_priv *priv)
2128 {
2129 struct stats *stats = priv->stats_report->stats;
2130 int idx, stats_idx = 0;
2131 unsigned int start = 0;
2132 u64 tx_bytes;
2133
2134 if (!gve_get_report_stats(priv))
2135 return;
2136
2137 be64_add_cpu(&priv->stats_report->written_count, 1);
2138 /* tx stats */
2139 if (priv->tx) {
2140 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
2141 u32 last_completion = 0;
2142 u32 tx_frames = 0;
2143
2144 /* DQO doesn't currently support these metrics. */
2145 if (gve_is_gqi(priv)) {
2146 last_completion = priv->tx[idx].done;
2147 tx_frames = priv->tx[idx].req;
2148 }
2149
2150 do {
2151 start = u64_stats_fetch_begin(&priv->tx[idx].statss);
2152 tx_bytes = priv->tx[idx].bytes_done;
2153 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
2154 stats[stats_idx++] = (struct stats) {
2155 .stat_name = cpu_to_be32(TX_WAKE_CNT),
2156 .value = cpu_to_be64(priv->tx[idx].wake_queue),
2157 .queue_id = cpu_to_be32(idx),
2158 };
2159 stats[stats_idx++] = (struct stats) {
2160 .stat_name = cpu_to_be32(TX_STOP_CNT),
2161 .value = cpu_to_be64(priv->tx[idx].stop_queue),
2162 .queue_id = cpu_to_be32(idx),
2163 };
2164 stats[stats_idx++] = (struct stats) {
2165 .stat_name = cpu_to_be32(TX_FRAMES_SENT),
2166 .value = cpu_to_be64(tx_frames),
2167 .queue_id = cpu_to_be32(idx),
2168 };
2169 stats[stats_idx++] = (struct stats) {
2170 .stat_name = cpu_to_be32(TX_BYTES_SENT),
2171 .value = cpu_to_be64(tx_bytes),
2172 .queue_id = cpu_to_be32(idx),
2173 };
2174 stats[stats_idx++] = (struct stats) {
2175 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
2176 .value = cpu_to_be64(last_completion),
2177 .queue_id = cpu_to_be32(idx),
2178 };
2179 stats[stats_idx++] = (struct stats) {
2180 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
2181 .value = cpu_to_be64(priv->tx[idx].queue_timeout),
2182 .queue_id = cpu_to_be32(idx),
2183 };
2184 }
2185 }
2186 /* rx stats */
2187 if (priv->rx) {
2188 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
2189 stats[stats_idx++] = (struct stats) {
2190 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
2191 .value = cpu_to_be64(priv->rx[idx].desc.seqno),
2192 .queue_id = cpu_to_be32(idx),
2193 };
2194 stats[stats_idx++] = (struct stats) {
2195 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
2196 .value = cpu_to_be64(priv->rx[0].fill_cnt),
2197 .queue_id = cpu_to_be32(idx),
2198 };
2199 }
2200 }
2201 }
2202
2203 /* Handle NIC status register changes, reset requests and report stats */
gve_service_task(struct work_struct * work)2204 static void gve_service_task(struct work_struct *work)
2205 {
2206 struct gve_priv *priv = container_of(work, struct gve_priv,
2207 service_task);
2208 u32 status = ioread32be(&priv->reg_bar0->device_status);
2209
2210 gve_handle_status(priv, status);
2211
2212 gve_handle_reset(priv);
2213 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
2214 }
2215
gve_set_netdev_xdp_features(struct gve_priv * priv)2216 static void gve_set_netdev_xdp_features(struct gve_priv *priv)
2217 {
2218 if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
2219 priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC;
2220 priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
2221 priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
2222 priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
2223 } else {
2224 priv->dev->xdp_features = 0;
2225 }
2226 }
2227
gve_init_priv(struct gve_priv * priv,bool skip_describe_device)2228 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
2229 {
2230 int num_ntfy;
2231 int err;
2232
2233 /* Set up the adminq */
2234 err = gve_adminq_alloc(&priv->pdev->dev, priv);
2235 if (err) {
2236 dev_err(&priv->pdev->dev,
2237 "Failed to alloc admin queue: err=%d\n", err);
2238 return err;
2239 }
2240
2241 err = gve_verify_driver_compatibility(priv);
2242 if (err) {
2243 dev_err(&priv->pdev->dev,
2244 "Could not verify driver compatibility: err=%d\n", err);
2245 goto err;
2246 }
2247
2248 priv->num_registered_pages = 0;
2249
2250 if (skip_describe_device)
2251 goto setup_device;
2252
2253 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
2254 /* Get the initial information we need from the device */
2255 err = gve_adminq_describe_device(priv);
2256 if (err) {
2257 dev_err(&priv->pdev->dev,
2258 "Could not get device information: err=%d\n", err);
2259 goto err;
2260 }
2261 priv->dev->mtu = priv->dev->max_mtu;
2262 num_ntfy = pci_msix_vec_count(priv->pdev);
2263 if (num_ntfy <= 0) {
2264 dev_err(&priv->pdev->dev,
2265 "could not count MSI-x vectors: err=%d\n", num_ntfy);
2266 err = num_ntfy;
2267 goto err;
2268 } else if (num_ntfy < GVE_MIN_MSIX) {
2269 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
2270 GVE_MIN_MSIX, num_ntfy);
2271 err = -EINVAL;
2272 goto err;
2273 }
2274
2275 /* Big TCP is only supported on DQ*/
2276 if (!gve_is_gqi(priv))
2277 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
2278
2279 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
2280 /* gvnic has one Notification Block per MSI-x vector, except for the
2281 * management vector
2282 */
2283 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
2284 priv->mgmt_msix_idx = priv->num_ntfy_blks;
2285
2286 priv->tx_cfg.max_queues =
2287 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
2288 priv->rx_cfg.max_queues =
2289 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
2290
2291 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
2292 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
2293 if (priv->default_num_queues > 0) {
2294 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
2295 priv->tx_cfg.num_queues);
2296 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
2297 priv->rx_cfg.num_queues);
2298 }
2299
2300 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
2301 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
2302 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
2303 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
2304
2305 if (!gve_is_gqi(priv)) {
2306 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
2307 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
2308 }
2309
2310 setup_device:
2311 gve_set_netdev_xdp_features(priv);
2312 err = gve_setup_device_resources(priv);
2313 if (!err)
2314 return 0;
2315 err:
2316 gve_adminq_free(&priv->pdev->dev, priv);
2317 return err;
2318 }
2319
gve_teardown_priv_resources(struct gve_priv * priv)2320 static void gve_teardown_priv_resources(struct gve_priv *priv)
2321 {
2322 gve_teardown_device_resources(priv);
2323 gve_adminq_free(&priv->pdev->dev, priv);
2324 }
2325
gve_trigger_reset(struct gve_priv * priv)2326 static void gve_trigger_reset(struct gve_priv *priv)
2327 {
2328 /* Reset the device by releasing the AQ */
2329 gve_adminq_release(priv);
2330 }
2331
gve_reset_and_teardown(struct gve_priv * priv,bool was_up)2332 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
2333 {
2334 gve_trigger_reset(priv);
2335 /* With the reset having already happened, close cannot fail */
2336 if (was_up)
2337 gve_close(priv->dev);
2338 gve_teardown_priv_resources(priv);
2339 }
2340
gve_reset_recovery(struct gve_priv * priv,bool was_up)2341 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
2342 {
2343 int err;
2344
2345 err = gve_init_priv(priv, true);
2346 if (err)
2347 goto err;
2348 if (was_up) {
2349 err = gve_open(priv->dev);
2350 if (err)
2351 goto err;
2352 }
2353 return 0;
2354 err:
2355 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
2356 gve_turndown(priv);
2357 return err;
2358 }
2359
gve_reset(struct gve_priv * priv,bool attempt_teardown)2360 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
2361 {
2362 bool was_up = netif_running(priv->dev);
2363 int err;
2364
2365 dev_info(&priv->pdev->dev, "Performing reset\n");
2366 gve_clear_do_reset(priv);
2367 gve_set_reset_in_progress(priv);
2368 /* If we aren't attempting to teardown normally, just go turndown and
2369 * reset right away.
2370 */
2371 if (!attempt_teardown) {
2372 gve_turndown(priv);
2373 gve_reset_and_teardown(priv, was_up);
2374 } else {
2375 /* Otherwise attempt to close normally */
2376 if (was_up) {
2377 err = gve_close(priv->dev);
2378 /* If that fails reset as we did above */
2379 if (err)
2380 gve_reset_and_teardown(priv, was_up);
2381 }
2382 /* Clean up any remaining resources */
2383 gve_teardown_priv_resources(priv);
2384 }
2385
2386 /* Set it all back up */
2387 err = gve_reset_recovery(priv, was_up);
2388 gve_clear_reset_in_progress(priv);
2389 priv->reset_cnt++;
2390 priv->interface_up_cnt = 0;
2391 priv->interface_down_cnt = 0;
2392 priv->stats_report_trigger_cnt = 0;
2393 return err;
2394 }
2395
gve_write_version(u8 __iomem * driver_version_register)2396 static void gve_write_version(u8 __iomem *driver_version_register)
2397 {
2398 const char *c = gve_version_prefix;
2399
2400 while (*c) {
2401 writeb(*c, driver_version_register);
2402 c++;
2403 }
2404
2405 c = gve_version_str;
2406 while (*c) {
2407 writeb(*c, driver_version_register);
2408 c++;
2409 }
2410 writeb('\n', driver_version_register);
2411 }
2412
gve_rx_queue_stop(struct net_device * dev,void * per_q_mem,int idx)2413 static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx)
2414 {
2415 struct gve_priv *priv = netdev_priv(dev);
2416 struct gve_rx_ring *gve_per_q_mem;
2417 int err;
2418
2419 if (!priv->rx)
2420 return -EAGAIN;
2421
2422 /* Destroying queue 0 while other queues exist is not supported in DQO */
2423 if (!gve_is_gqi(priv) && idx == 0)
2424 return -ERANGE;
2425
2426 /* Single-queue destruction requires quiescence on all queues */
2427 gve_turndown(priv);
2428
2429 /* This failure will trigger a reset - no need to clean up */
2430 err = gve_adminq_destroy_single_rx_queue(priv, idx);
2431 if (err)
2432 return err;
2433
2434 if (gve_is_qpl(priv)) {
2435 /* This failure will trigger a reset - no need to clean up */
2436 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx));
2437 if (err)
2438 return err;
2439 }
2440
2441 gve_rx_stop_ring(priv, idx);
2442
2443 /* Turn the unstopped queues back up */
2444 gve_turnup_and_check_status(priv);
2445
2446 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2447 *gve_per_q_mem = priv->rx[idx];
2448 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx]));
2449 return 0;
2450 }
2451
gve_rx_queue_mem_free(struct net_device * dev,void * per_q_mem)2452 static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem)
2453 {
2454 struct gve_priv *priv = netdev_priv(dev);
2455 struct gve_rx_alloc_rings_cfg cfg = {0};
2456 struct gve_rx_ring *gve_per_q_mem;
2457
2458 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2459 gve_rx_get_curr_alloc_cfg(priv, &cfg);
2460
2461 if (gve_is_gqi(priv))
2462 gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg);
2463 else
2464 gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg);
2465 }
2466
gve_rx_queue_mem_alloc(struct net_device * dev,void * per_q_mem,int idx)2467 static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem,
2468 int idx)
2469 {
2470 struct gve_priv *priv = netdev_priv(dev);
2471 struct gve_rx_alloc_rings_cfg cfg = {0};
2472 struct gve_rx_ring *gve_per_q_mem;
2473 int err;
2474
2475 if (!priv->rx)
2476 return -EAGAIN;
2477
2478 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2479 gve_rx_get_curr_alloc_cfg(priv, &cfg);
2480
2481 if (gve_is_gqi(priv))
2482 err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx);
2483 else
2484 err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx);
2485
2486 return err;
2487 }
2488
gve_rx_queue_start(struct net_device * dev,void * per_q_mem,int idx)2489 static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx)
2490 {
2491 struct gve_priv *priv = netdev_priv(dev);
2492 struct gve_rx_ring *gve_per_q_mem;
2493 int err;
2494
2495 if (!priv->rx)
2496 return -EAGAIN;
2497
2498 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2499 priv->rx[idx] = *gve_per_q_mem;
2500
2501 /* Single-queue creation requires quiescence on all queues */
2502 gve_turndown(priv);
2503
2504 gve_rx_start_ring(priv, idx);
2505
2506 if (gve_is_qpl(priv)) {
2507 /* This failure will trigger a reset - no need to clean up */
2508 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx));
2509 if (err)
2510 goto abort;
2511 }
2512
2513 /* This failure will trigger a reset - no need to clean up */
2514 err = gve_adminq_create_single_rx_queue(priv, idx);
2515 if (err)
2516 goto abort;
2517
2518 if (gve_is_gqi(priv))
2519 gve_rx_write_doorbell(priv, &priv->rx[idx]);
2520 else
2521 gve_rx_post_buffers_dqo(&priv->rx[idx]);
2522
2523 /* Turn the unstopped queues back up */
2524 gve_turnup_and_check_status(priv);
2525 return 0;
2526
2527 abort:
2528 gve_rx_stop_ring(priv, idx);
2529
2530 /* All failures in this func result in a reset, by clearing the struct
2531 * at idx, we prevent a double free when that reset runs. The reset,
2532 * which needs the rtnl lock, will not run till this func returns and
2533 * its caller gives up the lock.
2534 */
2535 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx]));
2536 return err;
2537 }
2538
2539 static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = {
2540 .ndo_queue_mem_size = sizeof(struct gve_rx_ring),
2541 .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc,
2542 .ndo_queue_mem_free = gve_rx_queue_mem_free,
2543 .ndo_queue_start = gve_rx_queue_start,
2544 .ndo_queue_stop = gve_rx_queue_stop,
2545 };
2546
gve_probe(struct pci_dev * pdev,const struct pci_device_id * ent)2547 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2548 {
2549 int max_tx_queues, max_rx_queues;
2550 struct net_device *dev;
2551 __be32 __iomem *db_bar;
2552 struct gve_registers __iomem *reg_bar;
2553 struct gve_priv *priv;
2554 int err;
2555
2556 err = pci_enable_device(pdev);
2557 if (err)
2558 return err;
2559
2560 err = pci_request_regions(pdev, gve_driver_name);
2561 if (err)
2562 goto abort_with_enabled;
2563
2564 pci_set_master(pdev);
2565
2566 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2567 if (err) {
2568 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
2569 goto abort_with_pci_region;
2570 }
2571
2572 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
2573 if (!reg_bar) {
2574 dev_err(&pdev->dev, "Failed to map pci bar!\n");
2575 err = -ENOMEM;
2576 goto abort_with_pci_region;
2577 }
2578
2579 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
2580 if (!db_bar) {
2581 dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
2582 err = -ENOMEM;
2583 goto abort_with_reg_bar;
2584 }
2585
2586 gve_write_version(®_bar->driver_version);
2587 /* Get max queues to alloc etherdev */
2588 max_tx_queues = ioread32be(®_bar->max_tx_queues);
2589 max_rx_queues = ioread32be(®_bar->max_rx_queues);
2590 /* Alloc and setup the netdev and priv */
2591 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
2592 if (!dev) {
2593 dev_err(&pdev->dev, "could not allocate netdev\n");
2594 err = -ENOMEM;
2595 goto abort_with_db_bar;
2596 }
2597 SET_NETDEV_DEV(dev, &pdev->dev);
2598 pci_set_drvdata(pdev, dev);
2599 dev->ethtool_ops = &gve_ethtool_ops;
2600 dev->netdev_ops = &gve_netdev_ops;
2601 dev->queue_mgmt_ops = &gve_queue_mgmt_ops;
2602
2603 /* Set default and supported features.
2604 *
2605 * Features might be set in other locations as well (such as
2606 * `gve_adminq_describe_device`).
2607 */
2608 dev->hw_features = NETIF_F_HIGHDMA;
2609 dev->hw_features |= NETIF_F_SG;
2610 dev->hw_features |= NETIF_F_HW_CSUM;
2611 dev->hw_features |= NETIF_F_TSO;
2612 dev->hw_features |= NETIF_F_TSO6;
2613 dev->hw_features |= NETIF_F_TSO_ECN;
2614 dev->hw_features |= NETIF_F_RXCSUM;
2615 dev->hw_features |= NETIF_F_RXHASH;
2616 dev->features = dev->hw_features;
2617 dev->watchdog_timeo = 5 * HZ;
2618 dev->min_mtu = ETH_MIN_MTU;
2619 netif_carrier_off(dev);
2620
2621 priv = netdev_priv(dev);
2622 priv->dev = dev;
2623 priv->pdev = pdev;
2624 priv->msg_enable = DEFAULT_MSG_LEVEL;
2625 priv->reg_bar0 = reg_bar;
2626 priv->db_bar2 = db_bar;
2627 priv->service_task_flags = 0x0;
2628 priv->state_flags = 0x0;
2629 priv->ethtool_flags = 0x0;
2630 priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE;
2631 priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
2632
2633 gve_set_probe_in_progress(priv);
2634 priv->gve_wq = alloc_ordered_workqueue("gve", 0);
2635 if (!priv->gve_wq) {
2636 dev_err(&pdev->dev, "Could not allocate workqueue");
2637 err = -ENOMEM;
2638 goto abort_with_netdev;
2639 }
2640 INIT_WORK(&priv->service_task, gve_service_task);
2641 INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
2642 priv->tx_cfg.max_queues = max_tx_queues;
2643 priv->rx_cfg.max_queues = max_rx_queues;
2644
2645 err = gve_init_priv(priv, false);
2646 if (err)
2647 goto abort_with_wq;
2648
2649 err = register_netdev(dev);
2650 if (err)
2651 goto abort_with_gve_init;
2652
2653 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
2654 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
2655 gve_clear_probe_in_progress(priv);
2656 queue_work(priv->gve_wq, &priv->service_task);
2657 return 0;
2658
2659 abort_with_gve_init:
2660 gve_teardown_priv_resources(priv);
2661
2662 abort_with_wq:
2663 destroy_workqueue(priv->gve_wq);
2664
2665 abort_with_netdev:
2666 free_netdev(dev);
2667
2668 abort_with_db_bar:
2669 pci_iounmap(pdev, db_bar);
2670
2671 abort_with_reg_bar:
2672 pci_iounmap(pdev, reg_bar);
2673
2674 abort_with_pci_region:
2675 pci_release_regions(pdev);
2676
2677 abort_with_enabled:
2678 pci_disable_device(pdev);
2679 return err;
2680 }
2681
gve_remove(struct pci_dev * pdev)2682 static void gve_remove(struct pci_dev *pdev)
2683 {
2684 struct net_device *netdev = pci_get_drvdata(pdev);
2685 struct gve_priv *priv = netdev_priv(netdev);
2686 __be32 __iomem *db_bar = priv->db_bar2;
2687 void __iomem *reg_bar = priv->reg_bar0;
2688
2689 unregister_netdev(netdev);
2690 gve_teardown_priv_resources(priv);
2691 destroy_workqueue(priv->gve_wq);
2692 free_netdev(netdev);
2693 pci_iounmap(pdev, db_bar);
2694 pci_iounmap(pdev, reg_bar);
2695 pci_release_regions(pdev);
2696 pci_disable_device(pdev);
2697 }
2698
gve_shutdown(struct pci_dev * pdev)2699 static void gve_shutdown(struct pci_dev *pdev)
2700 {
2701 struct net_device *netdev = pci_get_drvdata(pdev);
2702 struct gve_priv *priv = netdev_priv(netdev);
2703 bool was_up = netif_running(priv->dev);
2704
2705 rtnl_lock();
2706 if (was_up && gve_close(priv->dev)) {
2707 /* If the dev was up, attempt to close, if close fails, reset */
2708 gve_reset_and_teardown(priv, was_up);
2709 } else {
2710 /* If the dev wasn't up or close worked, finish tearing down */
2711 gve_teardown_priv_resources(priv);
2712 }
2713 rtnl_unlock();
2714 }
2715
2716 #ifdef CONFIG_PM
gve_suspend(struct pci_dev * pdev,pm_message_t state)2717 static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
2718 {
2719 struct net_device *netdev = pci_get_drvdata(pdev);
2720 struct gve_priv *priv = netdev_priv(netdev);
2721 bool was_up = netif_running(priv->dev);
2722
2723 priv->suspend_cnt++;
2724 rtnl_lock();
2725 if (was_up && gve_close(priv->dev)) {
2726 /* If the dev was up, attempt to close, if close fails, reset */
2727 gve_reset_and_teardown(priv, was_up);
2728 } else {
2729 /* If the dev wasn't up or close worked, finish tearing down */
2730 gve_teardown_priv_resources(priv);
2731 }
2732 priv->up_before_suspend = was_up;
2733 rtnl_unlock();
2734 return 0;
2735 }
2736
gve_resume(struct pci_dev * pdev)2737 static int gve_resume(struct pci_dev *pdev)
2738 {
2739 struct net_device *netdev = pci_get_drvdata(pdev);
2740 struct gve_priv *priv = netdev_priv(netdev);
2741 int err;
2742
2743 priv->resume_cnt++;
2744 rtnl_lock();
2745 err = gve_reset_recovery(priv, priv->up_before_suspend);
2746 rtnl_unlock();
2747 return err;
2748 }
2749 #endif /* CONFIG_PM */
2750
2751 static const struct pci_device_id gve_id_table[] = {
2752 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
2753 { }
2754 };
2755
2756 static struct pci_driver gve_driver = {
2757 .name = gve_driver_name,
2758 .id_table = gve_id_table,
2759 .probe = gve_probe,
2760 .remove = gve_remove,
2761 .shutdown = gve_shutdown,
2762 #ifdef CONFIG_PM
2763 .suspend = gve_suspend,
2764 .resume = gve_resume,
2765 #endif
2766 };
2767
2768 module_pci_driver(gve_driver);
2769
2770 MODULE_DEVICE_TABLE(pci, gve_id_table);
2771 MODULE_AUTHOR("Google, Inc.");
2772 MODULE_DESCRIPTION("Google Virtual NIC Driver");
2773 MODULE_LICENSE("Dual MIT/GPL");
2774 MODULE_VERSION(GVE_VERSION);
2775