1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2024 Google LLC
5 */
6
7 #include <linux/bpf.h>
8 #include <linux/cpumask.h>
9 #include <linux/etherdevice.h>
10 #include <linux/filter.h>
11 #include <linux/interrupt.h>
12 #include <linux/irq.h>
13 #include <linux/module.h>
14 #include <linux/pci.h>
15 #include <linux/sched.h>
16 #include <linux/timer.h>
17 #include <linux/workqueue.h>
18 #include <linux/utsname.h>
19 #include <linux/version.h>
20 #include <net/netdev_queues.h>
21 #include <net/sch_generic.h>
22 #include <net/xdp_sock_drv.h>
23 #include "gve.h"
24 #include "gve_dqo.h"
25 #include "gve_adminq.h"
26 #include "gve_register.h"
27 #include "gve_utils.h"
28
29 #define GVE_DEFAULT_RX_COPYBREAK (256)
30
31 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK)
32 #define GVE_VERSION "1.0.0"
33 #define GVE_VERSION_PREFIX "GVE-"
34
35 // Minimum amount of time between queue kicks in msec (10 seconds)
36 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
37
38 char gve_driver_name[] = "gve";
39 const char gve_version_str[] = GVE_VERSION;
40 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
41
gve_verify_driver_compatibility(struct gve_priv * priv)42 static int gve_verify_driver_compatibility(struct gve_priv *priv)
43 {
44 int err;
45 struct gve_driver_info *driver_info;
46 dma_addr_t driver_info_bus;
47
48 driver_info = dma_alloc_coherent(&priv->pdev->dev,
49 sizeof(struct gve_driver_info),
50 &driver_info_bus, GFP_KERNEL);
51 if (!driver_info)
52 return -ENOMEM;
53
54 *driver_info = (struct gve_driver_info) {
55 .os_type = 1, /* Linux */
56 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR),
57 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL),
58 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL),
59 .driver_capability_flags = {
60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1),
61 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2),
62 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3),
63 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4),
64 },
65 };
66 strscpy(driver_info->os_version_str1, utsname()->release,
67 sizeof(driver_info->os_version_str1));
68 strscpy(driver_info->os_version_str2, utsname()->version,
69 sizeof(driver_info->os_version_str2));
70
71 err = gve_adminq_verify_driver_compatibility(priv,
72 sizeof(struct gve_driver_info),
73 driver_info_bus);
74
75 /* It's ok if the device doesn't support this */
76 if (err == -EOPNOTSUPP)
77 err = 0;
78
79 dma_free_coherent(&priv->pdev->dev,
80 sizeof(struct gve_driver_info),
81 driver_info, driver_info_bus);
82 return err;
83 }
84
gve_features_check(struct sk_buff * skb,struct net_device * dev,netdev_features_t features)85 static netdev_features_t gve_features_check(struct sk_buff *skb,
86 struct net_device *dev,
87 netdev_features_t features)
88 {
89 struct gve_priv *priv = netdev_priv(dev);
90
91 if (!gve_is_gqi(priv))
92 return gve_features_check_dqo(skb, dev, features);
93
94 return features;
95 }
96
gve_start_xmit(struct sk_buff * skb,struct net_device * dev)97 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
98 {
99 struct gve_priv *priv = netdev_priv(dev);
100
101 if (gve_is_gqi(priv))
102 return gve_tx(skb, dev);
103 else
104 return gve_tx_dqo(skb, dev);
105 }
106
gve_get_stats(struct net_device * dev,struct rtnl_link_stats64 * s)107 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
108 {
109 struct gve_priv *priv = netdev_priv(dev);
110 unsigned int start;
111 u64 packets, bytes;
112 int num_tx_queues;
113 int ring;
114
115 num_tx_queues = gve_num_tx_queues(priv);
116 if (priv->rx) {
117 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
118 do {
119 start =
120 u64_stats_fetch_begin(&priv->rx[ring].statss);
121 packets = priv->rx[ring].rpackets;
122 bytes = priv->rx[ring].rbytes;
123 } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
124 start));
125 s->rx_packets += packets;
126 s->rx_bytes += bytes;
127 }
128 }
129 if (priv->tx) {
130 for (ring = 0; ring < num_tx_queues; ring++) {
131 do {
132 start =
133 u64_stats_fetch_begin(&priv->tx[ring].statss);
134 packets = priv->tx[ring].pkt_done;
135 bytes = priv->tx[ring].bytes_done;
136 } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
137 start));
138 s->tx_packets += packets;
139 s->tx_bytes += bytes;
140 }
141 }
142 }
143
gve_alloc_flow_rule_caches(struct gve_priv * priv)144 static int gve_alloc_flow_rule_caches(struct gve_priv *priv)
145 {
146 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
147 int err = 0;
148
149 if (!priv->max_flow_rules)
150 return 0;
151
152 flow_rules_cache->rules_cache =
153 kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache),
154 GFP_KERNEL);
155 if (!flow_rules_cache->rules_cache) {
156 dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n");
157 return -ENOMEM;
158 }
159
160 flow_rules_cache->rule_ids_cache =
161 kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache),
162 GFP_KERNEL);
163 if (!flow_rules_cache->rule_ids_cache) {
164 dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n");
165 err = -ENOMEM;
166 goto free_rules_cache;
167 }
168
169 return 0;
170
171 free_rules_cache:
172 kvfree(flow_rules_cache->rules_cache);
173 flow_rules_cache->rules_cache = NULL;
174 return err;
175 }
176
gve_free_flow_rule_caches(struct gve_priv * priv)177 static void gve_free_flow_rule_caches(struct gve_priv *priv)
178 {
179 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
180
181 kvfree(flow_rules_cache->rule_ids_cache);
182 flow_rules_cache->rule_ids_cache = NULL;
183 kvfree(flow_rules_cache->rules_cache);
184 flow_rules_cache->rules_cache = NULL;
185 }
186
gve_alloc_counter_array(struct gve_priv * priv)187 static int gve_alloc_counter_array(struct gve_priv *priv)
188 {
189 priv->counter_array =
190 dma_alloc_coherent(&priv->pdev->dev,
191 priv->num_event_counters *
192 sizeof(*priv->counter_array),
193 &priv->counter_array_bus, GFP_KERNEL);
194 if (!priv->counter_array)
195 return -ENOMEM;
196
197 return 0;
198 }
199
gve_free_counter_array(struct gve_priv * priv)200 static void gve_free_counter_array(struct gve_priv *priv)
201 {
202 if (!priv->counter_array)
203 return;
204
205 dma_free_coherent(&priv->pdev->dev,
206 priv->num_event_counters *
207 sizeof(*priv->counter_array),
208 priv->counter_array, priv->counter_array_bus);
209 priv->counter_array = NULL;
210 }
211
212 /* NIC requests to report stats */
gve_stats_report_task(struct work_struct * work)213 static void gve_stats_report_task(struct work_struct *work)
214 {
215 struct gve_priv *priv = container_of(work, struct gve_priv,
216 stats_report_task);
217 if (gve_get_do_report_stats(priv)) {
218 gve_handle_report_stats(priv);
219 gve_clear_do_report_stats(priv);
220 }
221 }
222
gve_stats_report_schedule(struct gve_priv * priv)223 static void gve_stats_report_schedule(struct gve_priv *priv)
224 {
225 if (!gve_get_probe_in_progress(priv) &&
226 !gve_get_reset_in_progress(priv)) {
227 gve_set_do_report_stats(priv);
228 queue_work(priv->gve_wq, &priv->stats_report_task);
229 }
230 }
231
gve_stats_report_timer(struct timer_list * t)232 static void gve_stats_report_timer(struct timer_list *t)
233 {
234 struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
235
236 mod_timer(&priv->stats_report_timer,
237 round_jiffies(jiffies +
238 msecs_to_jiffies(priv->stats_report_timer_period)));
239 gve_stats_report_schedule(priv);
240 }
241
gve_alloc_stats_report(struct gve_priv * priv)242 static int gve_alloc_stats_report(struct gve_priv *priv)
243 {
244 int tx_stats_num, rx_stats_num;
245
246 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
247 gve_num_tx_queues(priv);
248 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
249 priv->rx_cfg.num_queues;
250 priv->stats_report_len = struct_size(priv->stats_report, stats,
251 size_add(tx_stats_num, rx_stats_num));
252 priv->stats_report =
253 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
254 &priv->stats_report_bus, GFP_KERNEL);
255 if (!priv->stats_report)
256 return -ENOMEM;
257 /* Set up timer for the report-stats task */
258 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
259 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
260 return 0;
261 }
262
gve_free_stats_report(struct gve_priv * priv)263 static void gve_free_stats_report(struct gve_priv *priv)
264 {
265 if (!priv->stats_report)
266 return;
267
268 del_timer_sync(&priv->stats_report_timer);
269 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
270 priv->stats_report, priv->stats_report_bus);
271 priv->stats_report = NULL;
272 }
273
gve_mgmnt_intr(int irq,void * arg)274 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
275 {
276 struct gve_priv *priv = arg;
277
278 queue_work(priv->gve_wq, &priv->service_task);
279 return IRQ_HANDLED;
280 }
281
gve_intr(int irq,void * arg)282 static irqreturn_t gve_intr(int irq, void *arg)
283 {
284 struct gve_notify_block *block = arg;
285 struct gve_priv *priv = block->priv;
286
287 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
288 napi_schedule_irqoff(&block->napi);
289 return IRQ_HANDLED;
290 }
291
gve_intr_dqo(int irq,void * arg)292 static irqreturn_t gve_intr_dqo(int irq, void *arg)
293 {
294 struct gve_notify_block *block = arg;
295
296 /* Interrupts are automatically masked */
297 napi_schedule_irqoff(&block->napi);
298 return IRQ_HANDLED;
299 }
300
gve_is_napi_on_home_cpu(struct gve_priv * priv,u32 irq)301 static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq)
302 {
303 int cpu_curr = smp_processor_id();
304 const struct cpumask *aff_mask;
305
306 aff_mask = irq_get_effective_affinity_mask(irq);
307 if (unlikely(!aff_mask))
308 return 1;
309
310 return cpumask_test_cpu(cpu_curr, aff_mask);
311 }
312
gve_napi_poll(struct napi_struct * napi,int budget)313 int gve_napi_poll(struct napi_struct *napi, int budget)
314 {
315 struct gve_notify_block *block;
316 __be32 __iomem *irq_doorbell;
317 bool reschedule = false;
318 struct gve_priv *priv;
319 int work_done = 0;
320
321 block = container_of(napi, struct gve_notify_block, napi);
322 priv = block->priv;
323
324 if (block->tx) {
325 if (block->tx->q_num < priv->tx_cfg.num_queues)
326 reschedule |= gve_tx_poll(block, budget);
327 else if (budget)
328 reschedule |= gve_xdp_poll(block, budget);
329 }
330
331 if (!budget)
332 return 0;
333
334 if (block->rx) {
335 work_done = gve_rx_poll(block, budget);
336 reschedule |= work_done == budget;
337 }
338
339 if (reschedule)
340 return budget;
341
342 /* Complete processing - don't unmask irq if busy polling is enabled */
343 if (likely(napi_complete_done(napi, work_done))) {
344 irq_doorbell = gve_irq_doorbell(priv, block);
345 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
346
347 /* Ensure IRQ ACK is visible before we check pending work.
348 * If queue had issued updates, it would be truly visible.
349 */
350 mb();
351
352 if (block->tx)
353 reschedule |= gve_tx_clean_pending(priv, block->tx);
354 if (block->rx)
355 reschedule |= gve_rx_work_pending(block->rx);
356
357 if (reschedule && napi_schedule(napi))
358 iowrite32be(GVE_IRQ_MASK, irq_doorbell);
359 }
360 return work_done;
361 }
362
gve_napi_poll_dqo(struct napi_struct * napi,int budget)363 int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
364 {
365 struct gve_notify_block *block =
366 container_of(napi, struct gve_notify_block, napi);
367 struct gve_priv *priv = block->priv;
368 bool reschedule = false;
369 int work_done = 0;
370
371 if (block->tx)
372 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
373
374 if (!budget)
375 return 0;
376
377 if (block->rx) {
378 work_done = gve_rx_poll_dqo(block, budget);
379 reschedule |= work_done == budget;
380 }
381
382 if (reschedule) {
383 /* Reschedule by returning budget only if already on the correct
384 * cpu.
385 */
386 if (likely(gve_is_napi_on_home_cpu(priv, block->irq)))
387 return budget;
388
389 /* If not on the cpu with which this queue's irq has affinity
390 * with, we avoid rescheduling napi and arm the irq instead so
391 * that napi gets rescheduled back eventually onto the right
392 * cpu.
393 */
394 if (work_done == budget)
395 work_done--;
396 }
397
398 if (likely(napi_complete_done(napi, work_done))) {
399 /* Enable interrupts again.
400 *
401 * We don't need to repoll afterwards because HW supports the
402 * PCI MSI-X PBA feature.
403 *
404 * Another interrupt would be triggered if a new event came in
405 * since the last one.
406 */
407 gve_write_irq_doorbell_dqo(priv, block,
408 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
409 }
410
411 return work_done;
412 }
413
gve_alloc_notify_blocks(struct gve_priv * priv)414 static int gve_alloc_notify_blocks(struct gve_priv *priv)
415 {
416 int num_vecs_requested = priv->num_ntfy_blks + 1;
417 unsigned int active_cpus;
418 int vecs_enabled;
419 int i, j;
420 int err;
421
422 priv->msix_vectors = kvcalloc(num_vecs_requested,
423 sizeof(*priv->msix_vectors), GFP_KERNEL);
424 if (!priv->msix_vectors)
425 return -ENOMEM;
426 for (i = 0; i < num_vecs_requested; i++)
427 priv->msix_vectors[i].entry = i;
428 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
429 GVE_MIN_MSIX, num_vecs_requested);
430 if (vecs_enabled < 0) {
431 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
432 GVE_MIN_MSIX, vecs_enabled);
433 err = vecs_enabled;
434 goto abort_with_msix_vectors;
435 }
436 if (vecs_enabled != num_vecs_requested) {
437 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
438 int vecs_per_type = new_num_ntfy_blks / 2;
439 int vecs_left = new_num_ntfy_blks % 2;
440
441 priv->num_ntfy_blks = new_num_ntfy_blks;
442 priv->mgmt_msix_idx = priv->num_ntfy_blks;
443 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
444 vecs_per_type);
445 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
446 vecs_per_type + vecs_left);
447 dev_err(&priv->pdev->dev,
448 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
449 vecs_enabled, priv->tx_cfg.max_queues,
450 priv->rx_cfg.max_queues);
451 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
452 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
453 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
454 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
455 }
456 /* Half the notification blocks go to TX and half to RX */
457 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
458
459 /* Setup Management Vector - the last vector */
460 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
461 pci_name(priv->pdev));
462 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
463 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
464 if (err) {
465 dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
466 goto abort_with_msix_enabled;
467 }
468 priv->irq_db_indices =
469 dma_alloc_coherent(&priv->pdev->dev,
470 priv->num_ntfy_blks *
471 sizeof(*priv->irq_db_indices),
472 &priv->irq_db_indices_bus, GFP_KERNEL);
473 if (!priv->irq_db_indices) {
474 err = -ENOMEM;
475 goto abort_with_mgmt_vector;
476 }
477
478 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
479 sizeof(*priv->ntfy_blocks), GFP_KERNEL);
480 if (!priv->ntfy_blocks) {
481 err = -ENOMEM;
482 goto abort_with_irq_db_indices;
483 }
484
485 /* Setup the other blocks - the first n-1 vectors */
486 for (i = 0; i < priv->num_ntfy_blks; i++) {
487 struct gve_notify_block *block = &priv->ntfy_blocks[i];
488 int msix_idx = i;
489
490 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s",
491 i, pci_name(priv->pdev));
492 block->priv = priv;
493 err = request_irq(priv->msix_vectors[msix_idx].vector,
494 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
495 0, block->name, block);
496 if (err) {
497 dev_err(&priv->pdev->dev,
498 "Failed to receive msix vector %d\n", i);
499 goto abort_with_some_ntfy_blocks;
500 }
501 block->irq = priv->msix_vectors[msix_idx].vector;
502 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
503 get_cpu_mask(i % active_cpus));
504 block->irq_db_index = &priv->irq_db_indices[i].index;
505 }
506 return 0;
507 abort_with_some_ntfy_blocks:
508 for (j = 0; j < i; j++) {
509 struct gve_notify_block *block = &priv->ntfy_blocks[j];
510 int msix_idx = j;
511
512 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
513 NULL);
514 free_irq(priv->msix_vectors[msix_idx].vector, block);
515 block->irq = 0;
516 }
517 kvfree(priv->ntfy_blocks);
518 priv->ntfy_blocks = NULL;
519 abort_with_irq_db_indices:
520 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
521 sizeof(*priv->irq_db_indices),
522 priv->irq_db_indices, priv->irq_db_indices_bus);
523 priv->irq_db_indices = NULL;
524 abort_with_mgmt_vector:
525 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
526 abort_with_msix_enabled:
527 pci_disable_msix(priv->pdev);
528 abort_with_msix_vectors:
529 kvfree(priv->msix_vectors);
530 priv->msix_vectors = NULL;
531 return err;
532 }
533
gve_free_notify_blocks(struct gve_priv * priv)534 static void gve_free_notify_blocks(struct gve_priv *priv)
535 {
536 int i;
537
538 if (!priv->msix_vectors)
539 return;
540
541 /* Free the irqs */
542 for (i = 0; i < priv->num_ntfy_blks; i++) {
543 struct gve_notify_block *block = &priv->ntfy_blocks[i];
544 int msix_idx = i;
545
546 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
547 NULL);
548 free_irq(priv->msix_vectors[msix_idx].vector, block);
549 block->irq = 0;
550 }
551 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
552 kvfree(priv->ntfy_blocks);
553 priv->ntfy_blocks = NULL;
554 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
555 sizeof(*priv->irq_db_indices),
556 priv->irq_db_indices, priv->irq_db_indices_bus);
557 priv->irq_db_indices = NULL;
558 pci_disable_msix(priv->pdev);
559 kvfree(priv->msix_vectors);
560 priv->msix_vectors = NULL;
561 }
562
gve_setup_device_resources(struct gve_priv * priv)563 static int gve_setup_device_resources(struct gve_priv *priv)
564 {
565 int err;
566
567 err = gve_alloc_flow_rule_caches(priv);
568 if (err)
569 return err;
570 err = gve_alloc_counter_array(priv);
571 if (err)
572 goto abort_with_flow_rule_caches;
573 err = gve_alloc_notify_blocks(priv);
574 if (err)
575 goto abort_with_counter;
576 err = gve_alloc_stats_report(priv);
577 if (err)
578 goto abort_with_ntfy_blocks;
579 err = gve_adminq_configure_device_resources(priv,
580 priv->counter_array_bus,
581 priv->num_event_counters,
582 priv->irq_db_indices_bus,
583 priv->num_ntfy_blks);
584 if (unlikely(err)) {
585 dev_err(&priv->pdev->dev,
586 "could not setup device_resources: err=%d\n", err);
587 err = -ENXIO;
588 goto abort_with_stats_report;
589 }
590
591 if (!gve_is_gqi(priv)) {
592 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
593 GFP_KERNEL);
594 if (!priv->ptype_lut_dqo) {
595 err = -ENOMEM;
596 goto abort_with_stats_report;
597 }
598 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
599 if (err) {
600 dev_err(&priv->pdev->dev,
601 "Failed to get ptype map: err=%d\n", err);
602 goto abort_with_ptype_lut;
603 }
604 }
605
606 err = gve_adminq_report_stats(priv, priv->stats_report_len,
607 priv->stats_report_bus,
608 GVE_STATS_REPORT_TIMER_PERIOD);
609 if (err)
610 dev_err(&priv->pdev->dev,
611 "Failed to report stats: err=%d\n", err);
612 gve_set_device_resources_ok(priv);
613 return 0;
614
615 abort_with_ptype_lut:
616 kvfree(priv->ptype_lut_dqo);
617 priv->ptype_lut_dqo = NULL;
618 abort_with_stats_report:
619 gve_free_stats_report(priv);
620 abort_with_ntfy_blocks:
621 gve_free_notify_blocks(priv);
622 abort_with_counter:
623 gve_free_counter_array(priv);
624 abort_with_flow_rule_caches:
625 gve_free_flow_rule_caches(priv);
626
627 return err;
628 }
629
630 static void gve_trigger_reset(struct gve_priv *priv);
631
gve_teardown_device_resources(struct gve_priv * priv)632 static void gve_teardown_device_resources(struct gve_priv *priv)
633 {
634 int err;
635
636 /* Tell device its resources are being freed */
637 if (gve_get_device_resources_ok(priv)) {
638 err = gve_flow_rules_reset(priv);
639 if (err) {
640 dev_err(&priv->pdev->dev,
641 "Failed to reset flow rules: err=%d\n", err);
642 gve_trigger_reset(priv);
643 }
644 /* detach the stats report */
645 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
646 if (err) {
647 dev_err(&priv->pdev->dev,
648 "Failed to detach stats report: err=%d\n", err);
649 gve_trigger_reset(priv);
650 }
651 err = gve_adminq_deconfigure_device_resources(priv);
652 if (err) {
653 dev_err(&priv->pdev->dev,
654 "Could not deconfigure device resources: err=%d\n",
655 err);
656 gve_trigger_reset(priv);
657 }
658 }
659
660 kvfree(priv->ptype_lut_dqo);
661 priv->ptype_lut_dqo = NULL;
662
663 gve_free_flow_rule_caches(priv);
664 gve_free_counter_array(priv);
665 gve_free_notify_blocks(priv);
666 gve_free_stats_report(priv);
667 gve_clear_device_resources_ok(priv);
668 }
669
gve_unregister_qpl(struct gve_priv * priv,struct gve_queue_page_list * qpl)670 static int gve_unregister_qpl(struct gve_priv *priv,
671 struct gve_queue_page_list *qpl)
672 {
673 int err;
674
675 if (!qpl)
676 return 0;
677
678 err = gve_adminq_unregister_page_list(priv, qpl->id);
679 if (err) {
680 netif_err(priv, drv, priv->dev,
681 "Failed to unregister queue page list %d\n",
682 qpl->id);
683 return err;
684 }
685
686 priv->num_registered_pages -= qpl->num_entries;
687 return 0;
688 }
689
gve_register_qpl(struct gve_priv * priv,struct gve_queue_page_list * qpl)690 static int gve_register_qpl(struct gve_priv *priv,
691 struct gve_queue_page_list *qpl)
692 {
693 int pages;
694 int err;
695
696 if (!qpl)
697 return 0;
698
699 pages = qpl->num_entries;
700
701 if (pages + priv->num_registered_pages > priv->max_registered_pages) {
702 netif_err(priv, drv, priv->dev,
703 "Reached max number of registered pages %llu > %llu\n",
704 pages + priv->num_registered_pages,
705 priv->max_registered_pages);
706 return -EINVAL;
707 }
708
709 err = gve_adminq_register_page_list(priv, qpl);
710 if (err) {
711 netif_err(priv, drv, priv->dev,
712 "failed to register queue page list %d\n",
713 qpl->id);
714 return err;
715 }
716
717 priv->num_registered_pages += pages;
718 return 0;
719 }
720
gve_tx_get_qpl(struct gve_priv * priv,int idx)721 static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx)
722 {
723 struct gve_tx_ring *tx = &priv->tx[idx];
724
725 if (gve_is_gqi(priv))
726 return tx->tx_fifo.qpl;
727 else
728 return tx->dqo.qpl;
729 }
730
gve_rx_get_qpl(struct gve_priv * priv,int idx)731 static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx)
732 {
733 struct gve_rx_ring *rx = &priv->rx[idx];
734
735 if (gve_is_gqi(priv))
736 return rx->data.qpl;
737 else
738 return rx->dqo.qpl;
739 }
740
gve_register_xdp_qpls(struct gve_priv * priv)741 static int gve_register_xdp_qpls(struct gve_priv *priv)
742 {
743 int start_id;
744 int err;
745 int i;
746
747 start_id = gve_xdp_tx_start_queue_id(priv);
748 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
749 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i));
750 /* This failure will trigger a reset - no need to clean up */
751 if (err)
752 return err;
753 }
754 return 0;
755 }
756
gve_register_qpls(struct gve_priv * priv)757 static int gve_register_qpls(struct gve_priv *priv)
758 {
759 int num_tx_qpls, num_rx_qpls;
760 int err;
761 int i;
762
763 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv),
764 gve_is_qpl(priv));
765 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
766
767 for (i = 0; i < num_tx_qpls; i++) {
768 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i));
769 if (err)
770 return err;
771 }
772
773 for (i = 0; i < num_rx_qpls; i++) {
774 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i));
775 if (err)
776 return err;
777 }
778
779 return 0;
780 }
781
gve_unregister_xdp_qpls(struct gve_priv * priv)782 static int gve_unregister_xdp_qpls(struct gve_priv *priv)
783 {
784 int start_id;
785 int err;
786 int i;
787
788 start_id = gve_xdp_tx_start_queue_id(priv);
789 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
790 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i));
791 /* This failure will trigger a reset - no need to clean */
792 if (err)
793 return err;
794 }
795 return 0;
796 }
797
gve_unregister_qpls(struct gve_priv * priv)798 static int gve_unregister_qpls(struct gve_priv *priv)
799 {
800 int num_tx_qpls, num_rx_qpls;
801 int err;
802 int i;
803
804 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv),
805 gve_is_qpl(priv));
806 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
807
808 for (i = 0; i < num_tx_qpls; i++) {
809 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i));
810 /* This failure will trigger a reset - no need to clean */
811 if (err)
812 return err;
813 }
814
815 for (i = 0; i < num_rx_qpls; i++) {
816 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i));
817 /* This failure will trigger a reset - no need to clean */
818 if (err)
819 return err;
820 }
821 return 0;
822 }
823
gve_create_xdp_rings(struct gve_priv * priv)824 static int gve_create_xdp_rings(struct gve_priv *priv)
825 {
826 int err;
827
828 err = gve_adminq_create_tx_queues(priv,
829 gve_xdp_tx_start_queue_id(priv),
830 priv->num_xdp_queues);
831 if (err) {
832 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
833 priv->num_xdp_queues);
834 /* This failure will trigger a reset - no need to clean
835 * up
836 */
837 return err;
838 }
839 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
840 priv->num_xdp_queues);
841
842 return 0;
843 }
844
gve_create_rings(struct gve_priv * priv)845 static int gve_create_rings(struct gve_priv *priv)
846 {
847 int num_tx_queues = gve_num_tx_queues(priv);
848 int err;
849 int i;
850
851 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues);
852 if (err) {
853 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
854 num_tx_queues);
855 /* This failure will trigger a reset - no need to clean
856 * up
857 */
858 return err;
859 }
860 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
861 num_tx_queues);
862
863 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
864 if (err) {
865 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
866 priv->rx_cfg.num_queues);
867 /* This failure will trigger a reset - no need to clean
868 * up
869 */
870 return err;
871 }
872 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
873 priv->rx_cfg.num_queues);
874
875 if (gve_is_gqi(priv)) {
876 /* Rx data ring has been prefilled with packet buffers at queue
877 * allocation time.
878 *
879 * Write the doorbell to provide descriptor slots and packet
880 * buffers to the NIC.
881 */
882 for (i = 0; i < priv->rx_cfg.num_queues; i++)
883 gve_rx_write_doorbell(priv, &priv->rx[i]);
884 } else {
885 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
886 /* Post buffers and ring doorbell. */
887 gve_rx_post_buffers_dqo(&priv->rx[i]);
888 }
889 }
890
891 return 0;
892 }
893
init_xdp_sync_stats(struct gve_priv * priv)894 static void init_xdp_sync_stats(struct gve_priv *priv)
895 {
896 int start_id = gve_xdp_tx_start_queue_id(priv);
897 int i;
898
899 /* Init stats */
900 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
901 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
902
903 u64_stats_init(&priv->tx[i].statss);
904 priv->tx[i].ntfy_id = ntfy_idx;
905 }
906 }
907
gve_init_sync_stats(struct gve_priv * priv)908 static void gve_init_sync_stats(struct gve_priv *priv)
909 {
910 int i;
911
912 for (i = 0; i < priv->tx_cfg.num_queues; i++)
913 u64_stats_init(&priv->tx[i].statss);
914
915 /* Init stats for XDP TX queues */
916 init_xdp_sync_stats(priv);
917
918 for (i = 0; i < priv->rx_cfg.num_queues; i++)
919 u64_stats_init(&priv->rx[i].statss);
920 }
921
gve_tx_get_curr_alloc_cfg(struct gve_priv * priv,struct gve_tx_alloc_rings_cfg * cfg)922 static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv,
923 struct gve_tx_alloc_rings_cfg *cfg)
924 {
925 cfg->qcfg = &priv->tx_cfg;
926 cfg->raw_addressing = !gve_is_qpl(priv);
927 cfg->ring_size = priv->tx_desc_cnt;
928 cfg->start_idx = 0;
929 cfg->num_rings = gve_num_tx_queues(priv);
930 cfg->tx = priv->tx;
931 }
932
gve_tx_stop_rings(struct gve_priv * priv,int start_id,int num_rings)933 static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings)
934 {
935 int i;
936
937 if (!priv->tx)
938 return;
939
940 for (i = start_id; i < start_id + num_rings; i++) {
941 if (gve_is_gqi(priv))
942 gve_tx_stop_ring_gqi(priv, i);
943 else
944 gve_tx_stop_ring_dqo(priv, i);
945 }
946 }
947
gve_tx_start_rings(struct gve_priv * priv,int start_id,int num_rings)948 static void gve_tx_start_rings(struct gve_priv *priv, int start_id,
949 int num_rings)
950 {
951 int i;
952
953 for (i = start_id; i < start_id + num_rings; i++) {
954 if (gve_is_gqi(priv))
955 gve_tx_start_ring_gqi(priv, i);
956 else
957 gve_tx_start_ring_dqo(priv, i);
958 }
959 }
960
gve_alloc_xdp_rings(struct gve_priv * priv)961 static int gve_alloc_xdp_rings(struct gve_priv *priv)
962 {
963 struct gve_tx_alloc_rings_cfg cfg = {0};
964 int err = 0;
965
966 if (!priv->num_xdp_queues)
967 return 0;
968
969 gve_tx_get_curr_alloc_cfg(priv, &cfg);
970 cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
971 cfg.num_rings = priv->num_xdp_queues;
972
973 err = gve_tx_alloc_rings_gqi(priv, &cfg);
974 if (err)
975 return err;
976
977 gve_tx_start_rings(priv, cfg.start_idx, cfg.num_rings);
978 init_xdp_sync_stats(priv);
979
980 return 0;
981 }
982
gve_queues_mem_alloc(struct gve_priv * priv,struct gve_tx_alloc_rings_cfg * tx_alloc_cfg,struct gve_rx_alloc_rings_cfg * rx_alloc_cfg)983 static int gve_queues_mem_alloc(struct gve_priv *priv,
984 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
985 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
986 {
987 int err;
988
989 if (gve_is_gqi(priv))
990 err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg);
991 else
992 err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg);
993 if (err)
994 return err;
995
996 if (gve_is_gqi(priv))
997 err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg);
998 else
999 err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg);
1000 if (err)
1001 goto free_tx;
1002
1003 return 0;
1004
1005 free_tx:
1006 if (gve_is_gqi(priv))
1007 gve_tx_free_rings_gqi(priv, tx_alloc_cfg);
1008 else
1009 gve_tx_free_rings_dqo(priv, tx_alloc_cfg);
1010 return err;
1011 }
1012
gve_destroy_xdp_rings(struct gve_priv * priv)1013 static int gve_destroy_xdp_rings(struct gve_priv *priv)
1014 {
1015 int start_id;
1016 int err;
1017
1018 start_id = gve_xdp_tx_start_queue_id(priv);
1019 err = gve_adminq_destroy_tx_queues(priv,
1020 start_id,
1021 priv->num_xdp_queues);
1022 if (err) {
1023 netif_err(priv, drv, priv->dev,
1024 "failed to destroy XDP queues\n");
1025 /* This failure will trigger a reset - no need to clean up */
1026 return err;
1027 }
1028 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
1029
1030 return 0;
1031 }
1032
gve_destroy_rings(struct gve_priv * priv)1033 static int gve_destroy_rings(struct gve_priv *priv)
1034 {
1035 int num_tx_queues = gve_num_tx_queues(priv);
1036 int err;
1037
1038 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues);
1039 if (err) {
1040 netif_err(priv, drv, priv->dev,
1041 "failed to destroy tx queues\n");
1042 /* This failure will trigger a reset - no need to clean up */
1043 return err;
1044 }
1045 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
1046 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
1047 if (err) {
1048 netif_err(priv, drv, priv->dev,
1049 "failed to destroy rx queues\n");
1050 /* This failure will trigger a reset - no need to clean up */
1051 return err;
1052 }
1053 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
1054 return 0;
1055 }
1056
gve_free_xdp_rings(struct gve_priv * priv)1057 static void gve_free_xdp_rings(struct gve_priv *priv)
1058 {
1059 struct gve_tx_alloc_rings_cfg cfg = {0};
1060
1061 gve_tx_get_curr_alloc_cfg(priv, &cfg);
1062 cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
1063 cfg.num_rings = priv->num_xdp_queues;
1064
1065 if (priv->tx) {
1066 gve_tx_stop_rings(priv, cfg.start_idx, cfg.num_rings);
1067 gve_tx_free_rings_gqi(priv, &cfg);
1068 }
1069 }
1070
gve_queues_mem_free(struct gve_priv * priv,struct gve_tx_alloc_rings_cfg * tx_cfg,struct gve_rx_alloc_rings_cfg * rx_cfg)1071 static void gve_queues_mem_free(struct gve_priv *priv,
1072 struct gve_tx_alloc_rings_cfg *tx_cfg,
1073 struct gve_rx_alloc_rings_cfg *rx_cfg)
1074 {
1075 if (gve_is_gqi(priv)) {
1076 gve_tx_free_rings_gqi(priv, tx_cfg);
1077 gve_rx_free_rings_gqi(priv, rx_cfg);
1078 } else {
1079 gve_tx_free_rings_dqo(priv, tx_cfg);
1080 gve_rx_free_rings_dqo(priv, rx_cfg);
1081 }
1082 }
1083
gve_alloc_page(struct gve_priv * priv,struct device * dev,struct page ** page,dma_addr_t * dma,enum dma_data_direction dir,gfp_t gfp_flags)1084 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
1085 struct page **page, dma_addr_t *dma,
1086 enum dma_data_direction dir, gfp_t gfp_flags)
1087 {
1088 *page = alloc_page(gfp_flags);
1089 if (!*page) {
1090 priv->page_alloc_fail++;
1091 return -ENOMEM;
1092 }
1093 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
1094 if (dma_mapping_error(dev, *dma)) {
1095 priv->dma_mapping_error++;
1096 put_page(*page);
1097 return -ENOMEM;
1098 }
1099 return 0;
1100 }
1101
gve_alloc_queue_page_list(struct gve_priv * priv,u32 id,int pages)1102 struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv,
1103 u32 id, int pages)
1104 {
1105 struct gve_queue_page_list *qpl;
1106 int err;
1107 int i;
1108
1109 qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL);
1110 if (!qpl)
1111 return NULL;
1112
1113 qpl->id = id;
1114 qpl->num_entries = 0;
1115 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
1116 if (!qpl->pages)
1117 goto abort;
1118
1119 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL);
1120 if (!qpl->page_buses)
1121 goto abort;
1122
1123 for (i = 0; i < pages; i++) {
1124 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
1125 &qpl->page_buses[i],
1126 gve_qpl_dma_dir(priv, id), GFP_KERNEL);
1127 if (err)
1128 goto abort;
1129 qpl->num_entries++;
1130 }
1131
1132 return qpl;
1133
1134 abort:
1135 gve_free_queue_page_list(priv, qpl, id);
1136 return NULL;
1137 }
1138
gve_free_page(struct device * dev,struct page * page,dma_addr_t dma,enum dma_data_direction dir)1139 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
1140 enum dma_data_direction dir)
1141 {
1142 if (!dma_mapping_error(dev, dma))
1143 dma_unmap_page(dev, dma, PAGE_SIZE, dir);
1144 if (page)
1145 put_page(page);
1146 }
1147
gve_free_queue_page_list(struct gve_priv * priv,struct gve_queue_page_list * qpl,u32 id)1148 void gve_free_queue_page_list(struct gve_priv *priv,
1149 struct gve_queue_page_list *qpl,
1150 u32 id)
1151 {
1152 int i;
1153
1154 if (!qpl)
1155 return;
1156 if (!qpl->pages)
1157 goto free_qpl;
1158 if (!qpl->page_buses)
1159 goto free_pages;
1160
1161 for (i = 0; i < qpl->num_entries; i++)
1162 gve_free_page(&priv->pdev->dev, qpl->pages[i],
1163 qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
1164
1165 kvfree(qpl->page_buses);
1166 qpl->page_buses = NULL;
1167 free_pages:
1168 kvfree(qpl->pages);
1169 qpl->pages = NULL;
1170 free_qpl:
1171 kvfree(qpl);
1172 }
1173
1174 /* Use this to schedule a reset when the device is capable of continuing
1175 * to handle other requests in its current state. If it is not, do a reset
1176 * in thread instead.
1177 */
gve_schedule_reset(struct gve_priv * priv)1178 void gve_schedule_reset(struct gve_priv *priv)
1179 {
1180 gve_set_do_reset(priv);
1181 queue_work(priv->gve_wq, &priv->service_task);
1182 }
1183
1184 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
1185 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
1186 static void gve_turndown(struct gve_priv *priv);
1187 static void gve_turnup(struct gve_priv *priv);
1188
gve_reg_xdp_info(struct gve_priv * priv,struct net_device * dev)1189 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
1190 {
1191 struct napi_struct *napi;
1192 struct gve_rx_ring *rx;
1193 int err = 0;
1194 int i, j;
1195 u32 tx_qid;
1196
1197 if (!priv->num_xdp_queues)
1198 return 0;
1199
1200 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1201 rx = &priv->rx[i];
1202 napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1203
1204 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i,
1205 napi->napi_id);
1206 if (err)
1207 goto err;
1208 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
1209 MEM_TYPE_PAGE_SHARED, NULL);
1210 if (err)
1211 goto err;
1212 rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
1213 if (rx->xsk_pool) {
1214 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
1215 napi->napi_id);
1216 if (err)
1217 goto err;
1218 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1219 MEM_TYPE_XSK_BUFF_POOL, NULL);
1220 if (err)
1221 goto err;
1222 xsk_pool_set_rxq_info(rx->xsk_pool,
1223 &rx->xsk_rxq);
1224 }
1225 }
1226
1227 for (i = 0; i < priv->num_xdp_queues; i++) {
1228 tx_qid = gve_xdp_tx_queue_id(priv, i);
1229 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
1230 }
1231 return 0;
1232
1233 err:
1234 for (j = i; j >= 0; j--) {
1235 rx = &priv->rx[j];
1236 if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
1237 xdp_rxq_info_unreg(&rx->xdp_rxq);
1238 if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1239 xdp_rxq_info_unreg(&rx->xsk_rxq);
1240 }
1241 return err;
1242 }
1243
gve_unreg_xdp_info(struct gve_priv * priv)1244 static void gve_unreg_xdp_info(struct gve_priv *priv)
1245 {
1246 int i, tx_qid;
1247
1248 if (!priv->num_xdp_queues)
1249 return;
1250
1251 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1252 struct gve_rx_ring *rx = &priv->rx[i];
1253
1254 xdp_rxq_info_unreg(&rx->xdp_rxq);
1255 if (rx->xsk_pool) {
1256 xdp_rxq_info_unreg(&rx->xsk_rxq);
1257 rx->xsk_pool = NULL;
1258 }
1259 }
1260
1261 for (i = 0; i < priv->num_xdp_queues; i++) {
1262 tx_qid = gve_xdp_tx_queue_id(priv, i);
1263 priv->tx[tx_qid].xsk_pool = NULL;
1264 }
1265 }
1266
gve_drain_page_cache(struct gve_priv * priv)1267 static void gve_drain_page_cache(struct gve_priv *priv)
1268 {
1269 int i;
1270
1271 for (i = 0; i < priv->rx_cfg.num_queues; i++)
1272 page_frag_cache_drain(&priv->rx[i].page_cache);
1273 }
1274
gve_rx_get_curr_alloc_cfg(struct gve_priv * priv,struct gve_rx_alloc_rings_cfg * cfg)1275 static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv,
1276 struct gve_rx_alloc_rings_cfg *cfg)
1277 {
1278 cfg->qcfg = &priv->rx_cfg;
1279 cfg->qcfg_tx = &priv->tx_cfg;
1280 cfg->raw_addressing = !gve_is_qpl(priv);
1281 cfg->enable_header_split = priv->header_split_enabled;
1282 cfg->ring_size = priv->rx_desc_cnt;
1283 cfg->packet_buffer_size = gve_is_gqi(priv) ?
1284 GVE_DEFAULT_RX_BUFFER_SIZE :
1285 priv->data_buffer_size_dqo;
1286 cfg->rx = priv->rx;
1287 }
1288
gve_get_curr_alloc_cfgs(struct gve_priv * priv,struct gve_tx_alloc_rings_cfg * tx_alloc_cfg,struct gve_rx_alloc_rings_cfg * rx_alloc_cfg)1289 void gve_get_curr_alloc_cfgs(struct gve_priv *priv,
1290 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1291 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1292 {
1293 gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg);
1294 gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg);
1295 }
1296
gve_rx_start_ring(struct gve_priv * priv,int i)1297 static void gve_rx_start_ring(struct gve_priv *priv, int i)
1298 {
1299 if (gve_is_gqi(priv))
1300 gve_rx_start_ring_gqi(priv, i);
1301 else
1302 gve_rx_start_ring_dqo(priv, i);
1303 }
1304
gve_rx_start_rings(struct gve_priv * priv,int num_rings)1305 static void gve_rx_start_rings(struct gve_priv *priv, int num_rings)
1306 {
1307 int i;
1308
1309 for (i = 0; i < num_rings; i++)
1310 gve_rx_start_ring(priv, i);
1311 }
1312
gve_rx_stop_ring(struct gve_priv * priv,int i)1313 static void gve_rx_stop_ring(struct gve_priv *priv, int i)
1314 {
1315 if (gve_is_gqi(priv))
1316 gve_rx_stop_ring_gqi(priv, i);
1317 else
1318 gve_rx_stop_ring_dqo(priv, i);
1319 }
1320
gve_rx_stop_rings(struct gve_priv * priv,int num_rings)1321 static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings)
1322 {
1323 int i;
1324
1325 if (!priv->rx)
1326 return;
1327
1328 for (i = 0; i < num_rings; i++)
1329 gve_rx_stop_ring(priv, i);
1330 }
1331
gve_queues_mem_remove(struct gve_priv * priv)1332 static void gve_queues_mem_remove(struct gve_priv *priv)
1333 {
1334 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1335 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1336
1337 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1338 gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1339 priv->tx = NULL;
1340 priv->rx = NULL;
1341 }
1342
1343 /* The passed-in queue memory is stored into priv and the queues are made live.
1344 * No memory is allocated. Passed-in memory is freed on errors.
1345 */
gve_queues_start(struct gve_priv * priv,struct gve_tx_alloc_rings_cfg * tx_alloc_cfg,struct gve_rx_alloc_rings_cfg * rx_alloc_cfg)1346 static int gve_queues_start(struct gve_priv *priv,
1347 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1348 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1349 {
1350 struct net_device *dev = priv->dev;
1351 int err;
1352
1353 /* Record new resources into priv */
1354 priv->tx = tx_alloc_cfg->tx;
1355 priv->rx = rx_alloc_cfg->rx;
1356
1357 /* Record new configs into priv */
1358 priv->tx_cfg = *tx_alloc_cfg->qcfg;
1359 priv->rx_cfg = *rx_alloc_cfg->qcfg;
1360 priv->tx_desc_cnt = tx_alloc_cfg->ring_size;
1361 priv->rx_desc_cnt = rx_alloc_cfg->ring_size;
1362
1363 if (priv->xdp_prog)
1364 priv->num_xdp_queues = priv->rx_cfg.num_queues;
1365 else
1366 priv->num_xdp_queues = 0;
1367
1368 gve_tx_start_rings(priv, 0, tx_alloc_cfg->num_rings);
1369 gve_rx_start_rings(priv, rx_alloc_cfg->qcfg->num_queues);
1370 gve_init_sync_stats(priv);
1371
1372 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
1373 if (err)
1374 goto stop_and_free_rings;
1375 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
1376 if (err)
1377 goto stop_and_free_rings;
1378
1379 err = gve_reg_xdp_info(priv, dev);
1380 if (err)
1381 goto stop_and_free_rings;
1382
1383 err = gve_register_qpls(priv);
1384 if (err)
1385 goto reset;
1386
1387 priv->header_split_enabled = rx_alloc_cfg->enable_header_split;
1388 priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size;
1389
1390 err = gve_create_rings(priv);
1391 if (err)
1392 goto reset;
1393
1394 gve_set_device_rings_ok(priv);
1395
1396 if (gve_get_report_stats(priv))
1397 mod_timer(&priv->stats_report_timer,
1398 round_jiffies(jiffies +
1399 msecs_to_jiffies(priv->stats_report_timer_period)));
1400
1401 gve_turnup(priv);
1402 queue_work(priv->gve_wq, &priv->service_task);
1403 priv->interface_up_cnt++;
1404 return 0;
1405
1406 reset:
1407 if (gve_get_reset_in_progress(priv))
1408 goto stop_and_free_rings;
1409 gve_reset_and_teardown(priv, true);
1410 /* if this fails there is nothing we can do so just ignore the return */
1411 gve_reset_recovery(priv, false);
1412 /* return the original error */
1413 return err;
1414 stop_and_free_rings:
1415 gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv));
1416 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
1417 gve_queues_mem_remove(priv);
1418 return err;
1419 }
1420
gve_open(struct net_device * dev)1421 static int gve_open(struct net_device *dev)
1422 {
1423 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1424 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1425 struct gve_priv *priv = netdev_priv(dev);
1426 int err;
1427
1428 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1429
1430 err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1431 if (err)
1432 return err;
1433
1434 /* No need to free on error: ownership of resources is lost after
1435 * calling gve_queues_start.
1436 */
1437 err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1438 if (err)
1439 return err;
1440
1441 return 0;
1442 }
1443
gve_queues_stop(struct gve_priv * priv)1444 static int gve_queues_stop(struct gve_priv *priv)
1445 {
1446 int err;
1447
1448 netif_carrier_off(priv->dev);
1449 if (gve_get_device_rings_ok(priv)) {
1450 gve_turndown(priv);
1451 gve_drain_page_cache(priv);
1452 err = gve_destroy_rings(priv);
1453 if (err)
1454 goto err;
1455 err = gve_unregister_qpls(priv);
1456 if (err)
1457 goto err;
1458 gve_clear_device_rings_ok(priv);
1459 }
1460 del_timer_sync(&priv->stats_report_timer);
1461
1462 gve_unreg_xdp_info(priv);
1463
1464 gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv));
1465 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
1466
1467 priv->interface_down_cnt++;
1468 return 0;
1469
1470 err:
1471 /* This must have been called from a reset due to the rtnl lock
1472 * so just return at this point.
1473 */
1474 if (gve_get_reset_in_progress(priv))
1475 return err;
1476 /* Otherwise reset before returning */
1477 gve_reset_and_teardown(priv, true);
1478 return gve_reset_recovery(priv, false);
1479 }
1480
gve_close(struct net_device * dev)1481 static int gve_close(struct net_device *dev)
1482 {
1483 struct gve_priv *priv = netdev_priv(dev);
1484 int err;
1485
1486 err = gve_queues_stop(priv);
1487 if (err)
1488 return err;
1489
1490 gve_queues_mem_remove(priv);
1491 return 0;
1492 }
1493
gve_remove_xdp_queues(struct gve_priv * priv)1494 static int gve_remove_xdp_queues(struct gve_priv *priv)
1495 {
1496 int err;
1497
1498 err = gve_destroy_xdp_rings(priv);
1499 if (err)
1500 return err;
1501
1502 err = gve_unregister_xdp_qpls(priv);
1503 if (err)
1504 return err;
1505
1506 gve_unreg_xdp_info(priv);
1507 gve_free_xdp_rings(priv);
1508
1509 priv->num_xdp_queues = 0;
1510 return 0;
1511 }
1512
gve_add_xdp_queues(struct gve_priv * priv)1513 static int gve_add_xdp_queues(struct gve_priv *priv)
1514 {
1515 int err;
1516
1517 priv->num_xdp_queues = priv->rx_cfg.num_queues;
1518
1519 err = gve_alloc_xdp_rings(priv);
1520 if (err)
1521 goto err;
1522
1523 err = gve_reg_xdp_info(priv, priv->dev);
1524 if (err)
1525 goto free_xdp_rings;
1526
1527 err = gve_register_xdp_qpls(priv);
1528 if (err)
1529 goto free_xdp_rings;
1530
1531 err = gve_create_xdp_rings(priv);
1532 if (err)
1533 goto free_xdp_rings;
1534
1535 return 0;
1536
1537 free_xdp_rings:
1538 gve_free_xdp_rings(priv);
1539 err:
1540 priv->num_xdp_queues = 0;
1541 return err;
1542 }
1543
gve_handle_link_status(struct gve_priv * priv,bool link_status)1544 static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1545 {
1546 if (!gve_get_napi_enabled(priv))
1547 return;
1548
1549 if (link_status == netif_carrier_ok(priv->dev))
1550 return;
1551
1552 if (link_status) {
1553 netdev_info(priv->dev, "Device link is up.\n");
1554 netif_carrier_on(priv->dev);
1555 } else {
1556 netdev_info(priv->dev, "Device link is down.\n");
1557 netif_carrier_off(priv->dev);
1558 }
1559 }
1560
gve_set_xdp(struct gve_priv * priv,struct bpf_prog * prog,struct netlink_ext_ack * extack)1561 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
1562 struct netlink_ext_ack *extack)
1563 {
1564 struct bpf_prog *old_prog;
1565 int err = 0;
1566 u32 status;
1567
1568 old_prog = READ_ONCE(priv->xdp_prog);
1569 if (!netif_running(priv->dev)) {
1570 WRITE_ONCE(priv->xdp_prog, prog);
1571 if (old_prog)
1572 bpf_prog_put(old_prog);
1573 return 0;
1574 }
1575
1576 gve_turndown(priv);
1577 if (!old_prog && prog) {
1578 // Allocate XDP TX queues if an XDP program is
1579 // being installed
1580 err = gve_add_xdp_queues(priv);
1581 if (err)
1582 goto out;
1583 } else if (old_prog && !prog) {
1584 // Remove XDP TX queues if an XDP program is
1585 // being uninstalled
1586 err = gve_remove_xdp_queues(priv);
1587 if (err)
1588 goto out;
1589 }
1590 WRITE_ONCE(priv->xdp_prog, prog);
1591 if (old_prog)
1592 bpf_prog_put(old_prog);
1593
1594 out:
1595 gve_turnup(priv);
1596 status = ioread32be(&priv->reg_bar0->device_status);
1597 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1598 return err;
1599 }
1600
gve_xsk_pool_enable(struct net_device * dev,struct xsk_buff_pool * pool,u16 qid)1601 static int gve_xsk_pool_enable(struct net_device *dev,
1602 struct xsk_buff_pool *pool,
1603 u16 qid)
1604 {
1605 struct gve_priv *priv = netdev_priv(dev);
1606 struct napi_struct *napi;
1607 struct gve_rx_ring *rx;
1608 int tx_qid;
1609 int err;
1610
1611 if (qid >= priv->rx_cfg.num_queues) {
1612 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
1613 return -EINVAL;
1614 }
1615 if (xsk_pool_get_rx_frame_size(pool) <
1616 priv->dev->max_mtu + sizeof(struct ethhdr)) {
1617 dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
1618 return -EINVAL;
1619 }
1620
1621 err = xsk_pool_dma_map(pool, &priv->pdev->dev,
1622 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1623 if (err)
1624 return err;
1625
1626 /* If XDP prog is not installed, return */
1627 if (!priv->xdp_prog)
1628 return 0;
1629
1630 rx = &priv->rx[qid];
1631 napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1632 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
1633 if (err)
1634 goto err;
1635
1636 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1637 MEM_TYPE_XSK_BUFF_POOL, NULL);
1638 if (err)
1639 goto err;
1640
1641 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
1642 rx->xsk_pool = pool;
1643
1644 tx_qid = gve_xdp_tx_queue_id(priv, qid);
1645 priv->tx[tx_qid].xsk_pool = pool;
1646
1647 return 0;
1648 err:
1649 if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1650 xdp_rxq_info_unreg(&rx->xsk_rxq);
1651
1652 xsk_pool_dma_unmap(pool,
1653 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1654 return err;
1655 }
1656
gve_xsk_pool_disable(struct net_device * dev,u16 qid)1657 static int gve_xsk_pool_disable(struct net_device *dev,
1658 u16 qid)
1659 {
1660 struct gve_priv *priv = netdev_priv(dev);
1661 struct napi_struct *napi_rx;
1662 struct napi_struct *napi_tx;
1663 struct xsk_buff_pool *pool;
1664 int tx_qid;
1665
1666 pool = xsk_get_pool_from_qid(dev, qid);
1667 if (!pool)
1668 return -EINVAL;
1669 if (qid >= priv->rx_cfg.num_queues)
1670 return -EINVAL;
1671
1672 /* If XDP prog is not installed, unmap DMA and return */
1673 if (!priv->xdp_prog)
1674 goto done;
1675
1676 tx_qid = gve_xdp_tx_queue_id(priv, qid);
1677 if (!netif_running(dev)) {
1678 priv->rx[qid].xsk_pool = NULL;
1679 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1680 priv->tx[tx_qid].xsk_pool = NULL;
1681 goto done;
1682 }
1683
1684 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
1685 napi_disable(napi_rx); /* make sure current rx poll is done */
1686
1687 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
1688 napi_disable(napi_tx); /* make sure current tx poll is done */
1689
1690 priv->rx[qid].xsk_pool = NULL;
1691 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1692 priv->tx[tx_qid].xsk_pool = NULL;
1693 smp_mb(); /* Make sure it is visible to the workers on datapath */
1694
1695 napi_enable(napi_rx);
1696 if (gve_rx_work_pending(&priv->rx[qid]))
1697 napi_schedule(napi_rx);
1698
1699 napi_enable(napi_tx);
1700 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
1701 napi_schedule(napi_tx);
1702
1703 done:
1704 xsk_pool_dma_unmap(pool,
1705 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1706 return 0;
1707 }
1708
gve_xsk_wakeup(struct net_device * dev,u32 queue_id,u32 flags)1709 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
1710 {
1711 struct gve_priv *priv = netdev_priv(dev);
1712 int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
1713
1714 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
1715 return -EINVAL;
1716
1717 if (flags & XDP_WAKEUP_TX) {
1718 struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
1719 struct napi_struct *napi =
1720 &priv->ntfy_blocks[tx->ntfy_id].napi;
1721
1722 if (!napi_if_scheduled_mark_missed(napi)) {
1723 /* Call local_bh_enable to trigger SoftIRQ processing */
1724 local_bh_disable();
1725 napi_schedule(napi);
1726 local_bh_enable();
1727 }
1728
1729 tx->xdp_xsk_wakeup++;
1730 }
1731
1732 return 0;
1733 }
1734
verify_xdp_configuration(struct net_device * dev)1735 static int verify_xdp_configuration(struct net_device *dev)
1736 {
1737 struct gve_priv *priv = netdev_priv(dev);
1738
1739 if (dev->features & NETIF_F_LRO) {
1740 netdev_warn(dev, "XDP is not supported when LRO is on.\n");
1741 return -EOPNOTSUPP;
1742 }
1743
1744 if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
1745 netdev_warn(dev, "XDP is not supported in mode %d.\n",
1746 priv->queue_format);
1747 return -EOPNOTSUPP;
1748 }
1749
1750 if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) {
1751 netdev_warn(dev, "XDP is not supported for mtu %d.\n",
1752 dev->mtu);
1753 return -EOPNOTSUPP;
1754 }
1755
1756 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues ||
1757 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) {
1758 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d",
1759 priv->rx_cfg.num_queues,
1760 priv->tx_cfg.num_queues,
1761 priv->tx_cfg.max_queues);
1762 return -EINVAL;
1763 }
1764 return 0;
1765 }
1766
gve_xdp(struct net_device * dev,struct netdev_bpf * xdp)1767 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1768 {
1769 struct gve_priv *priv = netdev_priv(dev);
1770 int err;
1771
1772 err = verify_xdp_configuration(dev);
1773 if (err)
1774 return err;
1775 switch (xdp->command) {
1776 case XDP_SETUP_PROG:
1777 return gve_set_xdp(priv, xdp->prog, xdp->extack);
1778 case XDP_SETUP_XSK_POOL:
1779 if (xdp->xsk.pool)
1780 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id);
1781 else
1782 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id);
1783 default:
1784 return -EINVAL;
1785 }
1786 }
1787
gve_flow_rules_reset(struct gve_priv * priv)1788 int gve_flow_rules_reset(struct gve_priv *priv)
1789 {
1790 if (!priv->max_flow_rules)
1791 return 0;
1792
1793 return gve_adminq_reset_flow_rules(priv);
1794 }
1795
gve_adjust_config(struct gve_priv * priv,struct gve_tx_alloc_rings_cfg * tx_alloc_cfg,struct gve_rx_alloc_rings_cfg * rx_alloc_cfg)1796 int gve_adjust_config(struct gve_priv *priv,
1797 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1798 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1799 {
1800 int err;
1801
1802 /* Allocate resources for the new confiugration */
1803 err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg);
1804 if (err) {
1805 netif_err(priv, drv, priv->dev,
1806 "Adjust config failed to alloc new queues");
1807 return err;
1808 }
1809
1810 /* Teardown the device and free existing resources */
1811 err = gve_close(priv->dev);
1812 if (err) {
1813 netif_err(priv, drv, priv->dev,
1814 "Adjust config failed to close old queues");
1815 gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg);
1816 return err;
1817 }
1818
1819 /* Bring the device back up again with the new resources. */
1820 err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg);
1821 if (err) {
1822 netif_err(priv, drv, priv->dev,
1823 "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n");
1824 /* No need to free on error: ownership of resources is lost after
1825 * calling gve_queues_start.
1826 */
1827 gve_turndown(priv);
1828 return err;
1829 }
1830
1831 return 0;
1832 }
1833
gve_adjust_queues(struct gve_priv * priv,struct gve_queue_config new_rx_config,struct gve_queue_config new_tx_config)1834 int gve_adjust_queues(struct gve_priv *priv,
1835 struct gve_queue_config new_rx_config,
1836 struct gve_queue_config new_tx_config)
1837 {
1838 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1839 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1840 int err;
1841
1842 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1843
1844 /* Relay the new config from ethtool */
1845 tx_alloc_cfg.qcfg = &new_tx_config;
1846 rx_alloc_cfg.qcfg_tx = &new_tx_config;
1847 rx_alloc_cfg.qcfg = &new_rx_config;
1848 tx_alloc_cfg.num_rings = new_tx_config.num_queues;
1849
1850 if (netif_running(priv->dev)) {
1851 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1852 return err;
1853 }
1854 /* Set the config for the next up. */
1855 priv->tx_cfg = new_tx_config;
1856 priv->rx_cfg = new_rx_config;
1857
1858 return 0;
1859 }
1860
gve_turndown(struct gve_priv * priv)1861 static void gve_turndown(struct gve_priv *priv)
1862 {
1863 int idx;
1864
1865 if (netif_carrier_ok(priv->dev))
1866 netif_carrier_off(priv->dev);
1867
1868 if (!gve_get_napi_enabled(priv))
1869 return;
1870
1871 /* Disable napi to prevent more work from coming in */
1872 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1873 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1874 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1875
1876 if (!gve_tx_was_added_to_block(priv, idx))
1877 continue;
1878
1879 if (idx < priv->tx_cfg.num_queues)
1880 netif_queue_set_napi(priv->dev, idx,
1881 NETDEV_QUEUE_TYPE_TX, NULL);
1882
1883 napi_disable(&block->napi);
1884 }
1885 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1886 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1887 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1888
1889 if (!gve_rx_was_added_to_block(priv, idx))
1890 continue;
1891
1892 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX,
1893 NULL);
1894 napi_disable(&block->napi);
1895 }
1896
1897 /* Stop tx queues */
1898 netif_tx_disable(priv->dev);
1899
1900 gve_clear_napi_enabled(priv);
1901 gve_clear_report_stats(priv);
1902 }
1903
gve_turnup(struct gve_priv * priv)1904 static void gve_turnup(struct gve_priv *priv)
1905 {
1906 int idx;
1907
1908 /* Start the tx queues */
1909 netif_tx_start_all_queues(priv->dev);
1910
1911 /* Enable napi and unmask interrupts for all queues */
1912 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1913 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1914 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1915
1916 if (!gve_tx_was_added_to_block(priv, idx))
1917 continue;
1918
1919 napi_enable(&block->napi);
1920
1921 if (idx < priv->tx_cfg.num_queues)
1922 netif_queue_set_napi(priv->dev, idx,
1923 NETDEV_QUEUE_TYPE_TX,
1924 &block->napi);
1925
1926 if (gve_is_gqi(priv)) {
1927 iowrite32be(0, gve_irq_doorbell(priv, block));
1928 } else {
1929 gve_set_itr_coalesce_usecs_dqo(priv, block,
1930 priv->tx_coalesce_usecs);
1931 }
1932
1933 /* Any descs written by the NIC before this barrier will be
1934 * handled by the one-off napi schedule below. Whereas any
1935 * descs after the barrier will generate interrupts.
1936 */
1937 mb();
1938 napi_schedule(&block->napi);
1939 }
1940 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1941 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1942 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1943
1944 if (!gve_rx_was_added_to_block(priv, idx))
1945 continue;
1946
1947 napi_enable(&block->napi);
1948 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX,
1949 &block->napi);
1950
1951 if (gve_is_gqi(priv)) {
1952 iowrite32be(0, gve_irq_doorbell(priv, block));
1953 } else {
1954 gve_set_itr_coalesce_usecs_dqo(priv, block,
1955 priv->rx_coalesce_usecs);
1956 }
1957
1958 /* Any descs written by the NIC before this barrier will be
1959 * handled by the one-off napi schedule below. Whereas any
1960 * descs after the barrier will generate interrupts.
1961 */
1962 mb();
1963 napi_schedule(&block->napi);
1964 }
1965
1966 gve_set_napi_enabled(priv);
1967 }
1968
gve_turnup_and_check_status(struct gve_priv * priv)1969 static void gve_turnup_and_check_status(struct gve_priv *priv)
1970 {
1971 u32 status;
1972
1973 gve_turnup(priv);
1974 status = ioread32be(&priv->reg_bar0->device_status);
1975 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1976 }
1977
gve_tx_timeout(struct net_device * dev,unsigned int txqueue)1978 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
1979 {
1980 struct gve_notify_block *block;
1981 struct gve_tx_ring *tx = NULL;
1982 struct gve_priv *priv;
1983 u32 last_nic_done;
1984 u32 current_time;
1985 u32 ntfy_idx;
1986
1987 netdev_info(dev, "Timeout on tx queue, %d", txqueue);
1988 priv = netdev_priv(dev);
1989 if (txqueue > priv->tx_cfg.num_queues)
1990 goto reset;
1991
1992 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
1993 if (ntfy_idx >= priv->num_ntfy_blks)
1994 goto reset;
1995
1996 block = &priv->ntfy_blocks[ntfy_idx];
1997 tx = block->tx;
1998
1999 current_time = jiffies_to_msecs(jiffies);
2000 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
2001 goto reset;
2002
2003 /* Check to see if there are missed completions, which will allow us to
2004 * kick the queue.
2005 */
2006 last_nic_done = gve_tx_load_event_counter(priv, tx);
2007 if (last_nic_done - tx->done) {
2008 netdev_info(dev, "Kicking queue %d", txqueue);
2009 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
2010 napi_schedule(&block->napi);
2011 tx->last_kick_msec = current_time;
2012 goto out;
2013 } // Else reset.
2014
2015 reset:
2016 gve_schedule_reset(priv);
2017
2018 out:
2019 if (tx)
2020 tx->queue_timeout++;
2021 priv->tx_timeo_cnt++;
2022 }
2023
gve_get_pkt_buf_size(const struct gve_priv * priv,bool enable_hsplit)2024 u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit)
2025 {
2026 if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE)
2027 return GVE_MAX_RX_BUFFER_SIZE;
2028 else
2029 return GVE_DEFAULT_RX_BUFFER_SIZE;
2030 }
2031
2032 /* header-split is not supported on non-DQO_RDA yet even if device advertises it */
gve_header_split_supported(const struct gve_priv * priv)2033 bool gve_header_split_supported(const struct gve_priv *priv)
2034 {
2035 return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT;
2036 }
2037
gve_set_hsplit_config(struct gve_priv * priv,u8 tcp_data_split)2038 int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split)
2039 {
2040 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
2041 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
2042 bool enable_hdr_split;
2043 int err = 0;
2044
2045 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN)
2046 return 0;
2047
2048 if (!gve_header_split_supported(priv)) {
2049 dev_err(&priv->pdev->dev, "Header-split not supported\n");
2050 return -EOPNOTSUPP;
2051 }
2052
2053 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED)
2054 enable_hdr_split = true;
2055 else
2056 enable_hdr_split = false;
2057
2058 if (enable_hdr_split == priv->header_split_enabled)
2059 return 0;
2060
2061 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
2062
2063 rx_alloc_cfg.enable_header_split = enable_hdr_split;
2064 rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split);
2065
2066 if (netif_running(priv->dev))
2067 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
2068 return err;
2069 }
2070
gve_set_features(struct net_device * netdev,netdev_features_t features)2071 static int gve_set_features(struct net_device *netdev,
2072 netdev_features_t features)
2073 {
2074 const netdev_features_t orig_features = netdev->features;
2075 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
2076 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
2077 struct gve_priv *priv = netdev_priv(netdev);
2078 int err;
2079
2080 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
2081
2082 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
2083 netdev->features ^= NETIF_F_LRO;
2084 if (netif_running(netdev)) {
2085 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
2086 if (err)
2087 goto revert_features;
2088 }
2089 }
2090 if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) {
2091 err = gve_flow_rules_reset(priv);
2092 if (err)
2093 goto revert_features;
2094 }
2095
2096 return 0;
2097
2098 revert_features:
2099 netdev->features = orig_features;
2100 return err;
2101 }
2102
2103 static const struct net_device_ops gve_netdev_ops = {
2104 .ndo_start_xmit = gve_start_xmit,
2105 .ndo_features_check = gve_features_check,
2106 .ndo_open = gve_open,
2107 .ndo_stop = gve_close,
2108 .ndo_get_stats64 = gve_get_stats,
2109 .ndo_tx_timeout = gve_tx_timeout,
2110 .ndo_set_features = gve_set_features,
2111 .ndo_bpf = gve_xdp,
2112 .ndo_xdp_xmit = gve_xdp_xmit,
2113 .ndo_xsk_wakeup = gve_xsk_wakeup,
2114 };
2115
gve_handle_status(struct gve_priv * priv,u32 status)2116 static void gve_handle_status(struct gve_priv *priv, u32 status)
2117 {
2118 if (GVE_DEVICE_STATUS_RESET_MASK & status) {
2119 dev_info(&priv->pdev->dev, "Device requested reset.\n");
2120 gve_set_do_reset(priv);
2121 }
2122 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
2123 priv->stats_report_trigger_cnt++;
2124 gve_set_do_report_stats(priv);
2125 }
2126 }
2127
gve_handle_reset(struct gve_priv * priv)2128 static void gve_handle_reset(struct gve_priv *priv)
2129 {
2130 /* A service task will be scheduled at the end of probe to catch any
2131 * resets that need to happen, and we don't want to reset until
2132 * probe is done.
2133 */
2134 if (gve_get_probe_in_progress(priv))
2135 return;
2136
2137 if (gve_get_do_reset(priv)) {
2138 rtnl_lock();
2139 gve_reset(priv, false);
2140 rtnl_unlock();
2141 }
2142 }
2143
gve_handle_report_stats(struct gve_priv * priv)2144 void gve_handle_report_stats(struct gve_priv *priv)
2145 {
2146 struct stats *stats = priv->stats_report->stats;
2147 int idx, stats_idx = 0;
2148 unsigned int start = 0;
2149 u64 tx_bytes;
2150
2151 if (!gve_get_report_stats(priv))
2152 return;
2153
2154 be64_add_cpu(&priv->stats_report->written_count, 1);
2155 /* tx stats */
2156 if (priv->tx) {
2157 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
2158 u32 last_completion = 0;
2159 u32 tx_frames = 0;
2160
2161 /* DQO doesn't currently support these metrics. */
2162 if (gve_is_gqi(priv)) {
2163 last_completion = priv->tx[idx].done;
2164 tx_frames = priv->tx[idx].req;
2165 }
2166
2167 do {
2168 start = u64_stats_fetch_begin(&priv->tx[idx].statss);
2169 tx_bytes = priv->tx[idx].bytes_done;
2170 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
2171 stats[stats_idx++] = (struct stats) {
2172 .stat_name = cpu_to_be32(TX_WAKE_CNT),
2173 .value = cpu_to_be64(priv->tx[idx].wake_queue),
2174 .queue_id = cpu_to_be32(idx),
2175 };
2176 stats[stats_idx++] = (struct stats) {
2177 .stat_name = cpu_to_be32(TX_STOP_CNT),
2178 .value = cpu_to_be64(priv->tx[idx].stop_queue),
2179 .queue_id = cpu_to_be32(idx),
2180 };
2181 stats[stats_idx++] = (struct stats) {
2182 .stat_name = cpu_to_be32(TX_FRAMES_SENT),
2183 .value = cpu_to_be64(tx_frames),
2184 .queue_id = cpu_to_be32(idx),
2185 };
2186 stats[stats_idx++] = (struct stats) {
2187 .stat_name = cpu_to_be32(TX_BYTES_SENT),
2188 .value = cpu_to_be64(tx_bytes),
2189 .queue_id = cpu_to_be32(idx),
2190 };
2191 stats[stats_idx++] = (struct stats) {
2192 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
2193 .value = cpu_to_be64(last_completion),
2194 .queue_id = cpu_to_be32(idx),
2195 };
2196 stats[stats_idx++] = (struct stats) {
2197 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
2198 .value = cpu_to_be64(priv->tx[idx].queue_timeout),
2199 .queue_id = cpu_to_be32(idx),
2200 };
2201 }
2202 }
2203 /* rx stats */
2204 if (priv->rx) {
2205 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
2206 stats[stats_idx++] = (struct stats) {
2207 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
2208 .value = cpu_to_be64(priv->rx[idx].desc.seqno),
2209 .queue_id = cpu_to_be32(idx),
2210 };
2211 stats[stats_idx++] = (struct stats) {
2212 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
2213 .value = cpu_to_be64(priv->rx[0].fill_cnt),
2214 .queue_id = cpu_to_be32(idx),
2215 };
2216 }
2217 }
2218 }
2219
2220 /* Handle NIC status register changes, reset requests and report stats */
gve_service_task(struct work_struct * work)2221 static void gve_service_task(struct work_struct *work)
2222 {
2223 struct gve_priv *priv = container_of(work, struct gve_priv,
2224 service_task);
2225 u32 status = ioread32be(&priv->reg_bar0->device_status);
2226
2227 gve_handle_status(priv, status);
2228
2229 gve_handle_reset(priv);
2230 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
2231 }
2232
gve_set_netdev_xdp_features(struct gve_priv * priv)2233 static void gve_set_netdev_xdp_features(struct gve_priv *priv)
2234 {
2235 if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
2236 priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC;
2237 priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
2238 priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
2239 priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
2240 } else {
2241 priv->dev->xdp_features = 0;
2242 }
2243 }
2244
gve_init_priv(struct gve_priv * priv,bool skip_describe_device)2245 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
2246 {
2247 int num_ntfy;
2248 int err;
2249
2250 /* Set up the adminq */
2251 err = gve_adminq_alloc(&priv->pdev->dev, priv);
2252 if (err) {
2253 dev_err(&priv->pdev->dev,
2254 "Failed to alloc admin queue: err=%d\n", err);
2255 return err;
2256 }
2257
2258 err = gve_verify_driver_compatibility(priv);
2259 if (err) {
2260 dev_err(&priv->pdev->dev,
2261 "Could not verify driver compatibility: err=%d\n", err);
2262 goto err;
2263 }
2264
2265 priv->num_registered_pages = 0;
2266
2267 if (skip_describe_device)
2268 goto setup_device;
2269
2270 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
2271 /* Get the initial information we need from the device */
2272 err = gve_adminq_describe_device(priv);
2273 if (err) {
2274 dev_err(&priv->pdev->dev,
2275 "Could not get device information: err=%d\n", err);
2276 goto err;
2277 }
2278 priv->dev->mtu = priv->dev->max_mtu;
2279 num_ntfy = pci_msix_vec_count(priv->pdev);
2280 if (num_ntfy <= 0) {
2281 dev_err(&priv->pdev->dev,
2282 "could not count MSI-x vectors: err=%d\n", num_ntfy);
2283 err = num_ntfy;
2284 goto err;
2285 } else if (num_ntfy < GVE_MIN_MSIX) {
2286 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
2287 GVE_MIN_MSIX, num_ntfy);
2288 err = -EINVAL;
2289 goto err;
2290 }
2291
2292 /* Big TCP is only supported on DQ*/
2293 if (!gve_is_gqi(priv))
2294 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
2295
2296 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
2297 /* gvnic has one Notification Block per MSI-x vector, except for the
2298 * management vector
2299 */
2300 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
2301 priv->mgmt_msix_idx = priv->num_ntfy_blks;
2302
2303 priv->tx_cfg.max_queues =
2304 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
2305 priv->rx_cfg.max_queues =
2306 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
2307
2308 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
2309 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
2310 if (priv->default_num_queues > 0) {
2311 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
2312 priv->tx_cfg.num_queues);
2313 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
2314 priv->rx_cfg.num_queues);
2315 }
2316
2317 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
2318 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
2319 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
2320 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
2321
2322 if (!gve_is_gqi(priv)) {
2323 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
2324 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
2325 }
2326
2327 setup_device:
2328 gve_set_netdev_xdp_features(priv);
2329 err = gve_setup_device_resources(priv);
2330 if (!err)
2331 return 0;
2332 err:
2333 gve_adminq_free(&priv->pdev->dev, priv);
2334 return err;
2335 }
2336
gve_teardown_priv_resources(struct gve_priv * priv)2337 static void gve_teardown_priv_resources(struct gve_priv *priv)
2338 {
2339 gve_teardown_device_resources(priv);
2340 gve_adminq_free(&priv->pdev->dev, priv);
2341 }
2342
gve_trigger_reset(struct gve_priv * priv)2343 static void gve_trigger_reset(struct gve_priv *priv)
2344 {
2345 /* Reset the device by releasing the AQ */
2346 gve_adminq_release(priv);
2347 }
2348
gve_reset_and_teardown(struct gve_priv * priv,bool was_up)2349 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
2350 {
2351 gve_trigger_reset(priv);
2352 /* With the reset having already happened, close cannot fail */
2353 if (was_up)
2354 gve_close(priv->dev);
2355 gve_teardown_priv_resources(priv);
2356 }
2357
gve_reset_recovery(struct gve_priv * priv,bool was_up)2358 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
2359 {
2360 int err;
2361
2362 err = gve_init_priv(priv, true);
2363 if (err)
2364 goto err;
2365 if (was_up) {
2366 err = gve_open(priv->dev);
2367 if (err)
2368 goto err;
2369 }
2370 return 0;
2371 err:
2372 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
2373 gve_turndown(priv);
2374 return err;
2375 }
2376
gve_reset(struct gve_priv * priv,bool attempt_teardown)2377 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
2378 {
2379 bool was_up = netif_running(priv->dev);
2380 int err;
2381
2382 dev_info(&priv->pdev->dev, "Performing reset\n");
2383 gve_clear_do_reset(priv);
2384 gve_set_reset_in_progress(priv);
2385 /* If we aren't attempting to teardown normally, just go turndown and
2386 * reset right away.
2387 */
2388 if (!attempt_teardown) {
2389 gve_turndown(priv);
2390 gve_reset_and_teardown(priv, was_up);
2391 } else {
2392 /* Otherwise attempt to close normally */
2393 if (was_up) {
2394 err = gve_close(priv->dev);
2395 /* If that fails reset as we did above */
2396 if (err)
2397 gve_reset_and_teardown(priv, was_up);
2398 }
2399 /* Clean up any remaining resources */
2400 gve_teardown_priv_resources(priv);
2401 }
2402
2403 /* Set it all back up */
2404 err = gve_reset_recovery(priv, was_up);
2405 gve_clear_reset_in_progress(priv);
2406 priv->reset_cnt++;
2407 priv->interface_up_cnt = 0;
2408 priv->interface_down_cnt = 0;
2409 priv->stats_report_trigger_cnt = 0;
2410 return err;
2411 }
2412
gve_write_version(u8 __iomem * driver_version_register)2413 static void gve_write_version(u8 __iomem *driver_version_register)
2414 {
2415 const char *c = gve_version_prefix;
2416
2417 while (*c) {
2418 writeb(*c, driver_version_register);
2419 c++;
2420 }
2421
2422 c = gve_version_str;
2423 while (*c) {
2424 writeb(*c, driver_version_register);
2425 c++;
2426 }
2427 writeb('\n', driver_version_register);
2428 }
2429
gve_rx_queue_stop(struct net_device * dev,void * per_q_mem,int idx)2430 static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx)
2431 {
2432 struct gve_priv *priv = netdev_priv(dev);
2433 struct gve_rx_ring *gve_per_q_mem;
2434 int err;
2435
2436 if (!priv->rx)
2437 return -EAGAIN;
2438
2439 /* Destroying queue 0 while other queues exist is not supported in DQO */
2440 if (!gve_is_gqi(priv) && idx == 0)
2441 return -ERANGE;
2442
2443 /* Single-queue destruction requires quiescence on all queues */
2444 gve_turndown(priv);
2445
2446 /* This failure will trigger a reset - no need to clean up */
2447 err = gve_adminq_destroy_single_rx_queue(priv, idx);
2448 if (err)
2449 return err;
2450
2451 if (gve_is_qpl(priv)) {
2452 /* This failure will trigger a reset - no need to clean up */
2453 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx));
2454 if (err)
2455 return err;
2456 }
2457
2458 gve_rx_stop_ring(priv, idx);
2459
2460 /* Turn the unstopped queues back up */
2461 gve_turnup_and_check_status(priv);
2462
2463 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2464 *gve_per_q_mem = priv->rx[idx];
2465 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx]));
2466 return 0;
2467 }
2468
gve_rx_queue_mem_free(struct net_device * dev,void * per_q_mem)2469 static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem)
2470 {
2471 struct gve_priv *priv = netdev_priv(dev);
2472 struct gve_rx_alloc_rings_cfg cfg = {0};
2473 struct gve_rx_ring *gve_per_q_mem;
2474
2475 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2476 gve_rx_get_curr_alloc_cfg(priv, &cfg);
2477
2478 if (gve_is_gqi(priv))
2479 gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg);
2480 else
2481 gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg);
2482 }
2483
gve_rx_queue_mem_alloc(struct net_device * dev,void * per_q_mem,int idx)2484 static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem,
2485 int idx)
2486 {
2487 struct gve_priv *priv = netdev_priv(dev);
2488 struct gve_rx_alloc_rings_cfg cfg = {0};
2489 struct gve_rx_ring *gve_per_q_mem;
2490 int err;
2491
2492 if (!priv->rx)
2493 return -EAGAIN;
2494
2495 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2496 gve_rx_get_curr_alloc_cfg(priv, &cfg);
2497
2498 if (gve_is_gqi(priv))
2499 err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx);
2500 else
2501 err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx);
2502
2503 return err;
2504 }
2505
gve_rx_queue_start(struct net_device * dev,void * per_q_mem,int idx)2506 static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx)
2507 {
2508 struct gve_priv *priv = netdev_priv(dev);
2509 struct gve_rx_ring *gve_per_q_mem;
2510 int err;
2511
2512 if (!priv->rx)
2513 return -EAGAIN;
2514
2515 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2516 priv->rx[idx] = *gve_per_q_mem;
2517
2518 /* Single-queue creation requires quiescence on all queues */
2519 gve_turndown(priv);
2520
2521 gve_rx_start_ring(priv, idx);
2522
2523 if (gve_is_qpl(priv)) {
2524 /* This failure will trigger a reset - no need to clean up */
2525 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx));
2526 if (err)
2527 goto abort;
2528 }
2529
2530 /* This failure will trigger a reset - no need to clean up */
2531 err = gve_adminq_create_single_rx_queue(priv, idx);
2532 if (err)
2533 goto abort;
2534
2535 if (gve_is_gqi(priv))
2536 gve_rx_write_doorbell(priv, &priv->rx[idx]);
2537 else
2538 gve_rx_post_buffers_dqo(&priv->rx[idx]);
2539
2540 /* Turn the unstopped queues back up */
2541 gve_turnup_and_check_status(priv);
2542 return 0;
2543
2544 abort:
2545 gve_rx_stop_ring(priv, idx);
2546
2547 /* All failures in this func result in a reset, by clearing the struct
2548 * at idx, we prevent a double free when that reset runs. The reset,
2549 * which needs the rtnl lock, will not run till this func returns and
2550 * its caller gives up the lock.
2551 */
2552 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx]));
2553 return err;
2554 }
2555
2556 static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = {
2557 .ndo_queue_mem_size = sizeof(struct gve_rx_ring),
2558 .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc,
2559 .ndo_queue_mem_free = gve_rx_queue_mem_free,
2560 .ndo_queue_start = gve_rx_queue_start,
2561 .ndo_queue_stop = gve_rx_queue_stop,
2562 };
2563
gve_get_rx_queue_stats(struct net_device * dev,int idx,struct netdev_queue_stats_rx * rx_stats)2564 static void gve_get_rx_queue_stats(struct net_device *dev, int idx,
2565 struct netdev_queue_stats_rx *rx_stats)
2566 {
2567 struct gve_priv *priv = netdev_priv(dev);
2568 struct gve_rx_ring *rx = &priv->rx[idx];
2569 unsigned int start;
2570
2571 do {
2572 start = u64_stats_fetch_begin(&rx->statss);
2573 rx_stats->packets = rx->rpackets;
2574 rx_stats->bytes = rx->rbytes;
2575 rx_stats->alloc_fail = rx->rx_skb_alloc_fail +
2576 rx->rx_buf_alloc_fail;
2577 } while (u64_stats_fetch_retry(&rx->statss, start));
2578 }
2579
gve_get_tx_queue_stats(struct net_device * dev,int idx,struct netdev_queue_stats_tx * tx_stats)2580 static void gve_get_tx_queue_stats(struct net_device *dev, int idx,
2581 struct netdev_queue_stats_tx *tx_stats)
2582 {
2583 struct gve_priv *priv = netdev_priv(dev);
2584 struct gve_tx_ring *tx = &priv->tx[idx];
2585 unsigned int start;
2586
2587 do {
2588 start = u64_stats_fetch_begin(&tx->statss);
2589 tx_stats->packets = tx->pkt_done;
2590 tx_stats->bytes = tx->bytes_done;
2591 } while (u64_stats_fetch_retry(&tx->statss, start));
2592 }
2593
gve_get_base_stats(struct net_device * dev,struct netdev_queue_stats_rx * rx,struct netdev_queue_stats_tx * tx)2594 static void gve_get_base_stats(struct net_device *dev,
2595 struct netdev_queue_stats_rx *rx,
2596 struct netdev_queue_stats_tx *tx)
2597 {
2598 rx->packets = 0;
2599 rx->bytes = 0;
2600 rx->alloc_fail = 0;
2601
2602 tx->packets = 0;
2603 tx->bytes = 0;
2604 }
2605
2606 static const struct netdev_stat_ops gve_stat_ops = {
2607 .get_queue_stats_rx = gve_get_rx_queue_stats,
2608 .get_queue_stats_tx = gve_get_tx_queue_stats,
2609 .get_base_stats = gve_get_base_stats,
2610 };
2611
gve_probe(struct pci_dev * pdev,const struct pci_device_id * ent)2612 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2613 {
2614 int max_tx_queues, max_rx_queues;
2615 struct net_device *dev;
2616 __be32 __iomem *db_bar;
2617 struct gve_registers __iomem *reg_bar;
2618 struct gve_priv *priv;
2619 int err;
2620
2621 err = pci_enable_device(pdev);
2622 if (err)
2623 return err;
2624
2625 err = pci_request_regions(pdev, gve_driver_name);
2626 if (err)
2627 goto abort_with_enabled;
2628
2629 pci_set_master(pdev);
2630
2631 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2632 if (err) {
2633 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
2634 goto abort_with_pci_region;
2635 }
2636
2637 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
2638 if (!reg_bar) {
2639 dev_err(&pdev->dev, "Failed to map pci bar!\n");
2640 err = -ENOMEM;
2641 goto abort_with_pci_region;
2642 }
2643
2644 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
2645 if (!db_bar) {
2646 dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
2647 err = -ENOMEM;
2648 goto abort_with_reg_bar;
2649 }
2650
2651 gve_write_version(®_bar->driver_version);
2652 /* Get max queues to alloc etherdev */
2653 max_tx_queues = ioread32be(®_bar->max_tx_queues);
2654 max_rx_queues = ioread32be(®_bar->max_rx_queues);
2655 /* Alloc and setup the netdev and priv */
2656 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
2657 if (!dev) {
2658 dev_err(&pdev->dev, "could not allocate netdev\n");
2659 err = -ENOMEM;
2660 goto abort_with_db_bar;
2661 }
2662 SET_NETDEV_DEV(dev, &pdev->dev);
2663 pci_set_drvdata(pdev, dev);
2664 dev->ethtool_ops = &gve_ethtool_ops;
2665 dev->netdev_ops = &gve_netdev_ops;
2666 dev->queue_mgmt_ops = &gve_queue_mgmt_ops;
2667 dev->stat_ops = &gve_stat_ops;
2668
2669 /* Set default and supported features.
2670 *
2671 * Features might be set in other locations as well (such as
2672 * `gve_adminq_describe_device`).
2673 */
2674 dev->hw_features = NETIF_F_HIGHDMA;
2675 dev->hw_features |= NETIF_F_SG;
2676 dev->hw_features |= NETIF_F_HW_CSUM;
2677 dev->hw_features |= NETIF_F_TSO;
2678 dev->hw_features |= NETIF_F_TSO6;
2679 dev->hw_features |= NETIF_F_TSO_ECN;
2680 dev->hw_features |= NETIF_F_RXCSUM;
2681 dev->hw_features |= NETIF_F_RXHASH;
2682 dev->features = dev->hw_features;
2683 dev->watchdog_timeo = 5 * HZ;
2684 dev->min_mtu = ETH_MIN_MTU;
2685 netif_carrier_off(dev);
2686
2687 priv = netdev_priv(dev);
2688 priv->dev = dev;
2689 priv->pdev = pdev;
2690 priv->msg_enable = DEFAULT_MSG_LEVEL;
2691 priv->reg_bar0 = reg_bar;
2692 priv->db_bar2 = db_bar;
2693 priv->service_task_flags = 0x0;
2694 priv->state_flags = 0x0;
2695 priv->ethtool_flags = 0x0;
2696 priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE;
2697 priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
2698
2699 gve_set_probe_in_progress(priv);
2700 priv->gve_wq = alloc_ordered_workqueue("gve", 0);
2701 if (!priv->gve_wq) {
2702 dev_err(&pdev->dev, "Could not allocate workqueue");
2703 err = -ENOMEM;
2704 goto abort_with_netdev;
2705 }
2706 INIT_WORK(&priv->service_task, gve_service_task);
2707 INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
2708 priv->tx_cfg.max_queues = max_tx_queues;
2709 priv->rx_cfg.max_queues = max_rx_queues;
2710
2711 err = gve_init_priv(priv, false);
2712 if (err)
2713 goto abort_with_wq;
2714
2715 err = register_netdev(dev);
2716 if (err)
2717 goto abort_with_gve_init;
2718
2719 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
2720 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
2721 gve_clear_probe_in_progress(priv);
2722 queue_work(priv->gve_wq, &priv->service_task);
2723 return 0;
2724
2725 abort_with_gve_init:
2726 gve_teardown_priv_resources(priv);
2727
2728 abort_with_wq:
2729 destroy_workqueue(priv->gve_wq);
2730
2731 abort_with_netdev:
2732 free_netdev(dev);
2733
2734 abort_with_db_bar:
2735 pci_iounmap(pdev, db_bar);
2736
2737 abort_with_reg_bar:
2738 pci_iounmap(pdev, reg_bar);
2739
2740 abort_with_pci_region:
2741 pci_release_regions(pdev);
2742
2743 abort_with_enabled:
2744 pci_disable_device(pdev);
2745 return err;
2746 }
2747
gve_remove(struct pci_dev * pdev)2748 static void gve_remove(struct pci_dev *pdev)
2749 {
2750 struct net_device *netdev = pci_get_drvdata(pdev);
2751 struct gve_priv *priv = netdev_priv(netdev);
2752 __be32 __iomem *db_bar = priv->db_bar2;
2753 void __iomem *reg_bar = priv->reg_bar0;
2754
2755 unregister_netdev(netdev);
2756 gve_teardown_priv_resources(priv);
2757 destroy_workqueue(priv->gve_wq);
2758 free_netdev(netdev);
2759 pci_iounmap(pdev, db_bar);
2760 pci_iounmap(pdev, reg_bar);
2761 pci_release_regions(pdev);
2762 pci_disable_device(pdev);
2763 }
2764
gve_shutdown(struct pci_dev * pdev)2765 static void gve_shutdown(struct pci_dev *pdev)
2766 {
2767 struct net_device *netdev = pci_get_drvdata(pdev);
2768 struct gve_priv *priv = netdev_priv(netdev);
2769 bool was_up = netif_running(priv->dev);
2770
2771 rtnl_lock();
2772 if (was_up && gve_close(priv->dev)) {
2773 /* If the dev was up, attempt to close, if close fails, reset */
2774 gve_reset_and_teardown(priv, was_up);
2775 } else {
2776 /* If the dev wasn't up or close worked, finish tearing down */
2777 gve_teardown_priv_resources(priv);
2778 }
2779 rtnl_unlock();
2780 }
2781
2782 #ifdef CONFIG_PM
gve_suspend(struct pci_dev * pdev,pm_message_t state)2783 static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
2784 {
2785 struct net_device *netdev = pci_get_drvdata(pdev);
2786 struct gve_priv *priv = netdev_priv(netdev);
2787 bool was_up = netif_running(priv->dev);
2788
2789 priv->suspend_cnt++;
2790 rtnl_lock();
2791 if (was_up && gve_close(priv->dev)) {
2792 /* If the dev was up, attempt to close, if close fails, reset */
2793 gve_reset_and_teardown(priv, was_up);
2794 } else {
2795 /* If the dev wasn't up or close worked, finish tearing down */
2796 gve_teardown_priv_resources(priv);
2797 }
2798 priv->up_before_suspend = was_up;
2799 rtnl_unlock();
2800 return 0;
2801 }
2802
gve_resume(struct pci_dev * pdev)2803 static int gve_resume(struct pci_dev *pdev)
2804 {
2805 struct net_device *netdev = pci_get_drvdata(pdev);
2806 struct gve_priv *priv = netdev_priv(netdev);
2807 int err;
2808
2809 priv->resume_cnt++;
2810 rtnl_lock();
2811 err = gve_reset_recovery(priv, priv->up_before_suspend);
2812 rtnl_unlock();
2813 return err;
2814 }
2815 #endif /* CONFIG_PM */
2816
2817 static const struct pci_device_id gve_id_table[] = {
2818 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
2819 { }
2820 };
2821
2822 static struct pci_driver gve_driver = {
2823 .name = gve_driver_name,
2824 .id_table = gve_id_table,
2825 .probe = gve_probe,
2826 .remove = gve_remove,
2827 .shutdown = gve_shutdown,
2828 #ifdef CONFIG_PM
2829 .suspend = gve_suspend,
2830 .resume = gve_resume,
2831 #endif
2832 };
2833
2834 module_pci_driver(gve_driver);
2835
2836 MODULE_DEVICE_TABLE(pci, gve_id_table);
2837 MODULE_AUTHOR("Google, Inc.");
2838 MODULE_DESCRIPTION("Google Virtual NIC Driver");
2839 MODULE_LICENSE("Dual MIT/GPL");
2840 MODULE_VERSION(GVE_VERSION);
2841