1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2024 Google LLC
5 */
6
7 #include <linux/bpf.h>
8 #include <linux/cpumask.h>
9 #include <linux/etherdevice.h>
10 #include <linux/filter.h>
11 #include <linux/interrupt.h>
12 #include <linux/irq.h>
13 #include <linux/module.h>
14 #include <linux/pci.h>
15 #include <linux/sched.h>
16 #include <linux/timer.h>
17 #include <linux/workqueue.h>
18 #include <linux/utsname.h>
19 #include <linux/version.h>
20 #include <net/netdev_queues.h>
21 #include <net/sch_generic.h>
22 #include <net/xdp_sock_drv.h>
23 #include "gve.h"
24 #include "gve_dqo.h"
25 #include "gve_adminq.h"
26 #include "gve_register.h"
27 #include "gve_utils.h"
28
29 #define GVE_DEFAULT_RX_COPYBREAK (256)
30
31 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK)
32 #define GVE_VERSION "1.0.0"
33 #define GVE_VERSION_PREFIX "GVE-"
34
35 // Minimum amount of time between queue kicks in msec (10 seconds)
36 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
37
38 char gve_driver_name[] = "gve";
39 const char gve_version_str[] = GVE_VERSION;
40 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
41
gve_verify_driver_compatibility(struct gve_priv * priv)42 static int gve_verify_driver_compatibility(struct gve_priv *priv)
43 {
44 int err;
45 struct gve_driver_info *driver_info;
46 dma_addr_t driver_info_bus;
47
48 driver_info = dma_alloc_coherent(&priv->pdev->dev,
49 sizeof(struct gve_driver_info),
50 &driver_info_bus, GFP_KERNEL);
51 if (!driver_info)
52 return -ENOMEM;
53
54 *driver_info = (struct gve_driver_info) {
55 .os_type = 1, /* Linux */
56 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR),
57 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL),
58 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL),
59 .driver_capability_flags = {
60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1),
61 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2),
62 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3),
63 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4),
64 },
65 };
66 strscpy(driver_info->os_version_str1, utsname()->release,
67 sizeof(driver_info->os_version_str1));
68 strscpy(driver_info->os_version_str2, utsname()->version,
69 sizeof(driver_info->os_version_str2));
70
71 err = gve_adminq_verify_driver_compatibility(priv,
72 sizeof(struct gve_driver_info),
73 driver_info_bus);
74
75 /* It's ok if the device doesn't support this */
76 if (err == -EOPNOTSUPP)
77 err = 0;
78
79 dma_free_coherent(&priv->pdev->dev,
80 sizeof(struct gve_driver_info),
81 driver_info, driver_info_bus);
82 return err;
83 }
84
gve_features_check(struct sk_buff * skb,struct net_device * dev,netdev_features_t features)85 static netdev_features_t gve_features_check(struct sk_buff *skb,
86 struct net_device *dev,
87 netdev_features_t features)
88 {
89 struct gve_priv *priv = netdev_priv(dev);
90
91 if (!gve_is_gqi(priv))
92 return gve_features_check_dqo(skb, dev, features);
93
94 return features;
95 }
96
gve_start_xmit(struct sk_buff * skb,struct net_device * dev)97 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
98 {
99 struct gve_priv *priv = netdev_priv(dev);
100
101 if (gve_is_gqi(priv))
102 return gve_tx(skb, dev);
103 else
104 return gve_tx_dqo(skb, dev);
105 }
106
gve_get_stats(struct net_device * dev,struct rtnl_link_stats64 * s)107 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
108 {
109 struct gve_priv *priv = netdev_priv(dev);
110 unsigned int start;
111 u64 packets, bytes;
112 int num_tx_queues;
113 int ring;
114
115 num_tx_queues = gve_num_tx_queues(priv);
116 if (priv->rx) {
117 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
118 do {
119 start =
120 u64_stats_fetch_begin(&priv->rx[ring].statss);
121 packets = priv->rx[ring].rpackets;
122 bytes = priv->rx[ring].rbytes;
123 } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
124 start));
125 s->rx_packets += packets;
126 s->rx_bytes += bytes;
127 }
128 }
129 if (priv->tx) {
130 for (ring = 0; ring < num_tx_queues; ring++) {
131 do {
132 start =
133 u64_stats_fetch_begin(&priv->tx[ring].statss);
134 packets = priv->tx[ring].pkt_done;
135 bytes = priv->tx[ring].bytes_done;
136 } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
137 start));
138 s->tx_packets += packets;
139 s->tx_bytes += bytes;
140 }
141 }
142 }
143
gve_alloc_flow_rule_caches(struct gve_priv * priv)144 static int gve_alloc_flow_rule_caches(struct gve_priv *priv)
145 {
146 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
147 int err = 0;
148
149 if (!priv->max_flow_rules)
150 return 0;
151
152 flow_rules_cache->rules_cache =
153 kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache),
154 GFP_KERNEL);
155 if (!flow_rules_cache->rules_cache) {
156 dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n");
157 return -ENOMEM;
158 }
159
160 flow_rules_cache->rule_ids_cache =
161 kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache),
162 GFP_KERNEL);
163 if (!flow_rules_cache->rule_ids_cache) {
164 dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n");
165 err = -ENOMEM;
166 goto free_rules_cache;
167 }
168
169 return 0;
170
171 free_rules_cache:
172 kvfree(flow_rules_cache->rules_cache);
173 flow_rules_cache->rules_cache = NULL;
174 return err;
175 }
176
gve_free_flow_rule_caches(struct gve_priv * priv)177 static void gve_free_flow_rule_caches(struct gve_priv *priv)
178 {
179 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
180
181 kvfree(flow_rules_cache->rule_ids_cache);
182 flow_rules_cache->rule_ids_cache = NULL;
183 kvfree(flow_rules_cache->rules_cache);
184 flow_rules_cache->rules_cache = NULL;
185 }
186
gve_alloc_rss_config_cache(struct gve_priv * priv)187 static int gve_alloc_rss_config_cache(struct gve_priv *priv)
188 {
189 struct gve_rss_config *rss_config = &priv->rss_config;
190
191 if (!priv->cache_rss_config)
192 return 0;
193
194 rss_config->hash_key = kcalloc(priv->rss_key_size,
195 sizeof(rss_config->hash_key[0]),
196 GFP_KERNEL);
197 if (!rss_config->hash_key)
198 return -ENOMEM;
199
200 rss_config->hash_lut = kcalloc(priv->rss_lut_size,
201 sizeof(rss_config->hash_lut[0]),
202 GFP_KERNEL);
203 if (!rss_config->hash_lut)
204 goto free_rss_key_cache;
205
206 return 0;
207
208 free_rss_key_cache:
209 kfree(rss_config->hash_key);
210 rss_config->hash_key = NULL;
211 return -ENOMEM;
212 }
213
gve_free_rss_config_cache(struct gve_priv * priv)214 static void gve_free_rss_config_cache(struct gve_priv *priv)
215 {
216 struct gve_rss_config *rss_config = &priv->rss_config;
217
218 kfree(rss_config->hash_key);
219 kfree(rss_config->hash_lut);
220
221 memset(rss_config, 0, sizeof(*rss_config));
222 }
223
gve_alloc_counter_array(struct gve_priv * priv)224 static int gve_alloc_counter_array(struct gve_priv *priv)
225 {
226 priv->counter_array =
227 dma_alloc_coherent(&priv->pdev->dev,
228 priv->num_event_counters *
229 sizeof(*priv->counter_array),
230 &priv->counter_array_bus, GFP_KERNEL);
231 if (!priv->counter_array)
232 return -ENOMEM;
233
234 return 0;
235 }
236
gve_free_counter_array(struct gve_priv * priv)237 static void gve_free_counter_array(struct gve_priv *priv)
238 {
239 if (!priv->counter_array)
240 return;
241
242 dma_free_coherent(&priv->pdev->dev,
243 priv->num_event_counters *
244 sizeof(*priv->counter_array),
245 priv->counter_array, priv->counter_array_bus);
246 priv->counter_array = NULL;
247 }
248
249 /* NIC requests to report stats */
gve_stats_report_task(struct work_struct * work)250 static void gve_stats_report_task(struct work_struct *work)
251 {
252 struct gve_priv *priv = container_of(work, struct gve_priv,
253 stats_report_task);
254 if (gve_get_do_report_stats(priv)) {
255 gve_handle_report_stats(priv);
256 gve_clear_do_report_stats(priv);
257 }
258 }
259
gve_stats_report_schedule(struct gve_priv * priv)260 static void gve_stats_report_schedule(struct gve_priv *priv)
261 {
262 if (!gve_get_probe_in_progress(priv) &&
263 !gve_get_reset_in_progress(priv)) {
264 gve_set_do_report_stats(priv);
265 queue_work(priv->gve_wq, &priv->stats_report_task);
266 }
267 }
268
gve_stats_report_timer(struct timer_list * t)269 static void gve_stats_report_timer(struct timer_list *t)
270 {
271 struct gve_priv *priv = timer_container_of(priv, t,
272 stats_report_timer);
273
274 mod_timer(&priv->stats_report_timer,
275 round_jiffies(jiffies +
276 msecs_to_jiffies(priv->stats_report_timer_period)));
277 gve_stats_report_schedule(priv);
278 }
279
gve_alloc_stats_report(struct gve_priv * priv)280 static int gve_alloc_stats_report(struct gve_priv *priv)
281 {
282 int tx_stats_num, rx_stats_num;
283
284 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
285 gve_num_tx_queues(priv);
286 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
287 priv->rx_cfg.num_queues;
288 priv->stats_report_len = struct_size(priv->stats_report, stats,
289 size_add(tx_stats_num, rx_stats_num));
290 priv->stats_report =
291 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
292 &priv->stats_report_bus, GFP_KERNEL);
293 if (!priv->stats_report)
294 return -ENOMEM;
295 /* Set up timer for the report-stats task */
296 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
297 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
298 return 0;
299 }
300
gve_free_stats_report(struct gve_priv * priv)301 static void gve_free_stats_report(struct gve_priv *priv)
302 {
303 if (!priv->stats_report)
304 return;
305
306 timer_delete_sync(&priv->stats_report_timer);
307 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
308 priv->stats_report, priv->stats_report_bus);
309 priv->stats_report = NULL;
310 }
311
gve_mgmnt_intr(int irq,void * arg)312 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
313 {
314 struct gve_priv *priv = arg;
315
316 queue_work(priv->gve_wq, &priv->service_task);
317 return IRQ_HANDLED;
318 }
319
gve_intr(int irq,void * arg)320 static irqreturn_t gve_intr(int irq, void *arg)
321 {
322 struct gve_notify_block *block = arg;
323 struct gve_priv *priv = block->priv;
324
325 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
326 napi_schedule_irqoff(&block->napi);
327 return IRQ_HANDLED;
328 }
329
gve_intr_dqo(int irq,void * arg)330 static irqreturn_t gve_intr_dqo(int irq, void *arg)
331 {
332 struct gve_notify_block *block = arg;
333
334 /* Interrupts are automatically masked */
335 napi_schedule_irqoff(&block->napi);
336 return IRQ_HANDLED;
337 }
338
gve_is_napi_on_home_cpu(struct gve_priv * priv,u32 irq)339 static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq)
340 {
341 int cpu_curr = smp_processor_id();
342 const struct cpumask *aff_mask;
343
344 aff_mask = irq_get_effective_affinity_mask(irq);
345 if (unlikely(!aff_mask))
346 return 1;
347
348 return cpumask_test_cpu(cpu_curr, aff_mask);
349 }
350
gve_napi_poll(struct napi_struct * napi,int budget)351 int gve_napi_poll(struct napi_struct *napi, int budget)
352 {
353 struct gve_notify_block *block;
354 __be32 __iomem *irq_doorbell;
355 bool reschedule = false;
356 struct gve_priv *priv;
357 int work_done = 0;
358
359 block = container_of(napi, struct gve_notify_block, napi);
360 priv = block->priv;
361
362 if (block->tx) {
363 if (block->tx->q_num < priv->tx_cfg.num_queues)
364 reschedule |= gve_tx_poll(block, budget);
365 else if (budget)
366 reschedule |= gve_xdp_poll(block, budget);
367 }
368
369 if (!budget)
370 return 0;
371
372 if (block->rx) {
373 work_done = gve_rx_poll(block, budget);
374
375 /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of
376 * TX and RX work done.
377 */
378 if (priv->xdp_prog)
379 work_done = max_t(int, work_done,
380 gve_xsk_tx_poll(block, budget));
381
382 reschedule |= work_done == budget;
383 }
384
385 if (reschedule)
386 return budget;
387
388 /* Complete processing - don't unmask irq if busy polling is enabled */
389 if (likely(napi_complete_done(napi, work_done))) {
390 irq_doorbell = gve_irq_doorbell(priv, block);
391 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
392
393 /* Ensure IRQ ACK is visible before we check pending work.
394 * If queue had issued updates, it would be truly visible.
395 */
396 mb();
397
398 if (block->tx)
399 reschedule |= gve_tx_clean_pending(priv, block->tx);
400 if (block->rx)
401 reschedule |= gve_rx_work_pending(block->rx);
402
403 if (reschedule && napi_schedule(napi))
404 iowrite32be(GVE_IRQ_MASK, irq_doorbell);
405 }
406 return work_done;
407 }
408
gve_napi_poll_dqo(struct napi_struct * napi,int budget)409 int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
410 {
411 struct gve_notify_block *block =
412 container_of(napi, struct gve_notify_block, napi);
413 struct gve_priv *priv = block->priv;
414 bool reschedule = false;
415 int work_done = 0;
416
417 if (block->tx)
418 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
419
420 if (!budget)
421 return 0;
422
423 if (block->rx) {
424 work_done = gve_rx_poll_dqo(block, budget);
425 reschedule |= work_done == budget;
426 }
427
428 if (reschedule) {
429 /* Reschedule by returning budget only if already on the correct
430 * cpu.
431 */
432 if (likely(gve_is_napi_on_home_cpu(priv, block->irq)))
433 return budget;
434
435 /* If not on the cpu with which this queue's irq has affinity
436 * with, we avoid rescheduling napi and arm the irq instead so
437 * that napi gets rescheduled back eventually onto the right
438 * cpu.
439 */
440 if (work_done == budget)
441 work_done--;
442 }
443
444 if (likely(napi_complete_done(napi, work_done))) {
445 /* Enable interrupts again.
446 *
447 * We don't need to repoll afterwards because HW supports the
448 * PCI MSI-X PBA feature.
449 *
450 * Another interrupt would be triggered if a new event came in
451 * since the last one.
452 */
453 gve_write_irq_doorbell_dqo(priv, block,
454 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
455 }
456
457 return work_done;
458 }
459
gve_alloc_notify_blocks(struct gve_priv * priv)460 static int gve_alloc_notify_blocks(struct gve_priv *priv)
461 {
462 int num_vecs_requested = priv->num_ntfy_blks + 1;
463 unsigned int active_cpus;
464 int vecs_enabled;
465 int i, j;
466 int err;
467
468 priv->msix_vectors = kvcalloc(num_vecs_requested,
469 sizeof(*priv->msix_vectors), GFP_KERNEL);
470 if (!priv->msix_vectors)
471 return -ENOMEM;
472 for (i = 0; i < num_vecs_requested; i++)
473 priv->msix_vectors[i].entry = i;
474 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
475 GVE_MIN_MSIX, num_vecs_requested);
476 if (vecs_enabled < 0) {
477 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
478 GVE_MIN_MSIX, vecs_enabled);
479 err = vecs_enabled;
480 goto abort_with_msix_vectors;
481 }
482 if (vecs_enabled != num_vecs_requested) {
483 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
484 int vecs_per_type = new_num_ntfy_blks / 2;
485 int vecs_left = new_num_ntfy_blks % 2;
486
487 priv->num_ntfy_blks = new_num_ntfy_blks;
488 priv->mgmt_msix_idx = priv->num_ntfy_blks;
489 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
490 vecs_per_type);
491 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
492 vecs_per_type + vecs_left);
493 dev_err(&priv->pdev->dev,
494 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
495 vecs_enabled, priv->tx_cfg.max_queues,
496 priv->rx_cfg.max_queues);
497 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
498 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
499 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
500 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
501 }
502 /* Half the notification blocks go to TX and half to RX */
503 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
504
505 /* Setup Management Vector - the last vector */
506 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
507 pci_name(priv->pdev));
508 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
509 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
510 if (err) {
511 dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
512 goto abort_with_msix_enabled;
513 }
514 priv->irq_db_indices =
515 dma_alloc_coherent(&priv->pdev->dev,
516 priv->num_ntfy_blks *
517 sizeof(*priv->irq_db_indices),
518 &priv->irq_db_indices_bus, GFP_KERNEL);
519 if (!priv->irq_db_indices) {
520 err = -ENOMEM;
521 goto abort_with_mgmt_vector;
522 }
523
524 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
525 sizeof(*priv->ntfy_blocks), GFP_KERNEL);
526 if (!priv->ntfy_blocks) {
527 err = -ENOMEM;
528 goto abort_with_irq_db_indices;
529 }
530
531 /* Setup the other blocks - the first n-1 vectors */
532 for (i = 0; i < priv->num_ntfy_blks; i++) {
533 struct gve_notify_block *block = &priv->ntfy_blocks[i];
534 int msix_idx = i;
535
536 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s",
537 i, pci_name(priv->pdev));
538 block->priv = priv;
539 err = request_irq(priv->msix_vectors[msix_idx].vector,
540 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
541 0, block->name, block);
542 if (err) {
543 dev_err(&priv->pdev->dev,
544 "Failed to receive msix vector %d\n", i);
545 goto abort_with_some_ntfy_blocks;
546 }
547 block->irq = priv->msix_vectors[msix_idx].vector;
548 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
549 get_cpu_mask(i % active_cpus));
550 block->irq_db_index = &priv->irq_db_indices[i].index;
551 }
552 return 0;
553 abort_with_some_ntfy_blocks:
554 for (j = 0; j < i; j++) {
555 struct gve_notify_block *block = &priv->ntfy_blocks[j];
556 int msix_idx = j;
557
558 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
559 NULL);
560 free_irq(priv->msix_vectors[msix_idx].vector, block);
561 block->irq = 0;
562 }
563 kvfree(priv->ntfy_blocks);
564 priv->ntfy_blocks = NULL;
565 abort_with_irq_db_indices:
566 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
567 sizeof(*priv->irq_db_indices),
568 priv->irq_db_indices, priv->irq_db_indices_bus);
569 priv->irq_db_indices = NULL;
570 abort_with_mgmt_vector:
571 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
572 abort_with_msix_enabled:
573 pci_disable_msix(priv->pdev);
574 abort_with_msix_vectors:
575 kvfree(priv->msix_vectors);
576 priv->msix_vectors = NULL;
577 return err;
578 }
579
gve_free_notify_blocks(struct gve_priv * priv)580 static void gve_free_notify_blocks(struct gve_priv *priv)
581 {
582 int i;
583
584 if (!priv->msix_vectors)
585 return;
586
587 /* Free the irqs */
588 for (i = 0; i < priv->num_ntfy_blks; i++) {
589 struct gve_notify_block *block = &priv->ntfy_blocks[i];
590 int msix_idx = i;
591
592 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
593 NULL);
594 free_irq(priv->msix_vectors[msix_idx].vector, block);
595 block->irq = 0;
596 }
597 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
598 kvfree(priv->ntfy_blocks);
599 priv->ntfy_blocks = NULL;
600 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
601 sizeof(*priv->irq_db_indices),
602 priv->irq_db_indices, priv->irq_db_indices_bus);
603 priv->irq_db_indices = NULL;
604 pci_disable_msix(priv->pdev);
605 kvfree(priv->msix_vectors);
606 priv->msix_vectors = NULL;
607 }
608
gve_setup_device_resources(struct gve_priv * priv)609 static int gve_setup_device_resources(struct gve_priv *priv)
610 {
611 int err;
612
613 err = gve_alloc_flow_rule_caches(priv);
614 if (err)
615 return err;
616 err = gve_alloc_rss_config_cache(priv);
617 if (err)
618 goto abort_with_flow_rule_caches;
619 err = gve_alloc_counter_array(priv);
620 if (err)
621 goto abort_with_rss_config_cache;
622 err = gve_alloc_notify_blocks(priv);
623 if (err)
624 goto abort_with_counter;
625 err = gve_alloc_stats_report(priv);
626 if (err)
627 goto abort_with_ntfy_blocks;
628 err = gve_adminq_configure_device_resources(priv,
629 priv->counter_array_bus,
630 priv->num_event_counters,
631 priv->irq_db_indices_bus,
632 priv->num_ntfy_blks);
633 if (unlikely(err)) {
634 dev_err(&priv->pdev->dev,
635 "could not setup device_resources: err=%d\n", err);
636 err = -ENXIO;
637 goto abort_with_stats_report;
638 }
639
640 if (!gve_is_gqi(priv)) {
641 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
642 GFP_KERNEL);
643 if (!priv->ptype_lut_dqo) {
644 err = -ENOMEM;
645 goto abort_with_stats_report;
646 }
647 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
648 if (err) {
649 dev_err(&priv->pdev->dev,
650 "Failed to get ptype map: err=%d\n", err);
651 goto abort_with_ptype_lut;
652 }
653 }
654
655 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues);
656 if (err) {
657 dev_err(&priv->pdev->dev, "Failed to init RSS config");
658 goto abort_with_ptype_lut;
659 }
660
661 err = gve_adminq_report_stats(priv, priv->stats_report_len,
662 priv->stats_report_bus,
663 GVE_STATS_REPORT_TIMER_PERIOD);
664 if (err)
665 dev_err(&priv->pdev->dev,
666 "Failed to report stats: err=%d\n", err);
667 gve_set_device_resources_ok(priv);
668 return 0;
669
670 abort_with_ptype_lut:
671 kvfree(priv->ptype_lut_dqo);
672 priv->ptype_lut_dqo = NULL;
673 abort_with_stats_report:
674 gve_free_stats_report(priv);
675 abort_with_ntfy_blocks:
676 gve_free_notify_blocks(priv);
677 abort_with_counter:
678 gve_free_counter_array(priv);
679 abort_with_rss_config_cache:
680 gve_free_rss_config_cache(priv);
681 abort_with_flow_rule_caches:
682 gve_free_flow_rule_caches(priv);
683
684 return err;
685 }
686
687 static void gve_trigger_reset(struct gve_priv *priv);
688
gve_teardown_device_resources(struct gve_priv * priv)689 static void gve_teardown_device_resources(struct gve_priv *priv)
690 {
691 int err;
692
693 /* Tell device its resources are being freed */
694 if (gve_get_device_resources_ok(priv)) {
695 err = gve_flow_rules_reset(priv);
696 if (err) {
697 dev_err(&priv->pdev->dev,
698 "Failed to reset flow rules: err=%d\n", err);
699 gve_trigger_reset(priv);
700 }
701 /* detach the stats report */
702 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
703 if (err) {
704 dev_err(&priv->pdev->dev,
705 "Failed to detach stats report: err=%d\n", err);
706 gve_trigger_reset(priv);
707 }
708 err = gve_adminq_deconfigure_device_resources(priv);
709 if (err) {
710 dev_err(&priv->pdev->dev,
711 "Could not deconfigure device resources: err=%d\n",
712 err);
713 gve_trigger_reset(priv);
714 }
715 }
716
717 kvfree(priv->ptype_lut_dqo);
718 priv->ptype_lut_dqo = NULL;
719
720 gve_free_flow_rule_caches(priv);
721 gve_free_rss_config_cache(priv);
722 gve_free_counter_array(priv);
723 gve_free_notify_blocks(priv);
724 gve_free_stats_report(priv);
725 gve_clear_device_resources_ok(priv);
726 }
727
gve_unregister_qpl(struct gve_priv * priv,struct gve_queue_page_list * qpl)728 static int gve_unregister_qpl(struct gve_priv *priv,
729 struct gve_queue_page_list *qpl)
730 {
731 int err;
732
733 if (!qpl)
734 return 0;
735
736 err = gve_adminq_unregister_page_list(priv, qpl->id);
737 if (err) {
738 netif_err(priv, drv, priv->dev,
739 "Failed to unregister queue page list %d\n",
740 qpl->id);
741 return err;
742 }
743
744 priv->num_registered_pages -= qpl->num_entries;
745 return 0;
746 }
747
gve_register_qpl(struct gve_priv * priv,struct gve_queue_page_list * qpl)748 static int gve_register_qpl(struct gve_priv *priv,
749 struct gve_queue_page_list *qpl)
750 {
751 int pages;
752 int err;
753
754 if (!qpl)
755 return 0;
756
757 pages = qpl->num_entries;
758
759 if (pages + priv->num_registered_pages > priv->max_registered_pages) {
760 netif_err(priv, drv, priv->dev,
761 "Reached max number of registered pages %llu > %llu\n",
762 pages + priv->num_registered_pages,
763 priv->max_registered_pages);
764 return -EINVAL;
765 }
766
767 err = gve_adminq_register_page_list(priv, qpl);
768 if (err) {
769 netif_err(priv, drv, priv->dev,
770 "failed to register queue page list %d\n",
771 qpl->id);
772 return err;
773 }
774
775 priv->num_registered_pages += pages;
776 return 0;
777 }
778
gve_tx_get_qpl(struct gve_priv * priv,int idx)779 static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx)
780 {
781 struct gve_tx_ring *tx = &priv->tx[idx];
782
783 if (gve_is_gqi(priv))
784 return tx->tx_fifo.qpl;
785 else
786 return tx->dqo.qpl;
787 }
788
gve_rx_get_qpl(struct gve_priv * priv,int idx)789 static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx)
790 {
791 struct gve_rx_ring *rx = &priv->rx[idx];
792
793 if (gve_is_gqi(priv))
794 return rx->data.qpl;
795 else
796 return rx->dqo.qpl;
797 }
798
gve_register_qpls(struct gve_priv * priv)799 static int gve_register_qpls(struct gve_priv *priv)
800 {
801 int num_tx_qpls, num_rx_qpls;
802 int err;
803 int i;
804
805 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv));
806 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
807
808 for (i = 0; i < num_tx_qpls; i++) {
809 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i));
810 if (err)
811 return err;
812 }
813
814 for (i = 0; i < num_rx_qpls; i++) {
815 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i));
816 if (err)
817 return err;
818 }
819
820 return 0;
821 }
822
gve_unregister_qpls(struct gve_priv * priv)823 static int gve_unregister_qpls(struct gve_priv *priv)
824 {
825 int num_tx_qpls, num_rx_qpls;
826 int err;
827 int i;
828
829 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv));
830 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
831
832 for (i = 0; i < num_tx_qpls; i++) {
833 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i));
834 /* This failure will trigger a reset - no need to clean */
835 if (err)
836 return err;
837 }
838
839 for (i = 0; i < num_rx_qpls; i++) {
840 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i));
841 /* This failure will trigger a reset - no need to clean */
842 if (err)
843 return err;
844 }
845 return 0;
846 }
847
gve_create_rings(struct gve_priv * priv)848 static int gve_create_rings(struct gve_priv *priv)
849 {
850 int num_tx_queues = gve_num_tx_queues(priv);
851 int err;
852 int i;
853
854 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues);
855 if (err) {
856 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
857 num_tx_queues);
858 /* This failure will trigger a reset - no need to clean
859 * up
860 */
861 return err;
862 }
863 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
864 num_tx_queues);
865
866 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
867 if (err) {
868 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
869 priv->rx_cfg.num_queues);
870 /* This failure will trigger a reset - no need to clean
871 * up
872 */
873 return err;
874 }
875 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
876 priv->rx_cfg.num_queues);
877
878 if (gve_is_gqi(priv)) {
879 /* Rx data ring has been prefilled with packet buffers at queue
880 * allocation time.
881 *
882 * Write the doorbell to provide descriptor slots and packet
883 * buffers to the NIC.
884 */
885 for (i = 0; i < priv->rx_cfg.num_queues; i++)
886 gve_rx_write_doorbell(priv, &priv->rx[i]);
887 } else {
888 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
889 /* Post buffers and ring doorbell. */
890 gve_rx_post_buffers_dqo(&priv->rx[i]);
891 }
892 }
893
894 return 0;
895 }
896
init_xdp_sync_stats(struct gve_priv * priv)897 static void init_xdp_sync_stats(struct gve_priv *priv)
898 {
899 int start_id = gve_xdp_tx_start_queue_id(priv);
900 int i;
901
902 /* Init stats */
903 for (i = start_id; i < start_id + priv->tx_cfg.num_xdp_queues; i++) {
904 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
905
906 u64_stats_init(&priv->tx[i].statss);
907 priv->tx[i].ntfy_id = ntfy_idx;
908 }
909 }
910
gve_init_sync_stats(struct gve_priv * priv)911 static void gve_init_sync_stats(struct gve_priv *priv)
912 {
913 int i;
914
915 for (i = 0; i < priv->tx_cfg.num_queues; i++)
916 u64_stats_init(&priv->tx[i].statss);
917
918 /* Init stats for XDP TX queues */
919 init_xdp_sync_stats(priv);
920
921 for (i = 0; i < priv->rx_cfg.num_queues; i++)
922 u64_stats_init(&priv->rx[i].statss);
923 }
924
gve_tx_get_curr_alloc_cfg(struct gve_priv * priv,struct gve_tx_alloc_rings_cfg * cfg)925 static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv,
926 struct gve_tx_alloc_rings_cfg *cfg)
927 {
928 cfg->qcfg = &priv->tx_cfg;
929 cfg->raw_addressing = !gve_is_qpl(priv);
930 cfg->ring_size = priv->tx_desc_cnt;
931 cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues;
932 cfg->tx = priv->tx;
933 }
934
gve_tx_stop_rings(struct gve_priv * priv,int num_rings)935 static void gve_tx_stop_rings(struct gve_priv *priv, int num_rings)
936 {
937 int i;
938
939 if (!priv->tx)
940 return;
941
942 for (i = 0; i < num_rings; i++) {
943 if (gve_is_gqi(priv))
944 gve_tx_stop_ring_gqi(priv, i);
945 else
946 gve_tx_stop_ring_dqo(priv, i);
947 }
948 }
949
gve_tx_start_rings(struct gve_priv * priv,int num_rings)950 static void gve_tx_start_rings(struct gve_priv *priv, int num_rings)
951 {
952 int i;
953
954 for (i = 0; i < num_rings; i++) {
955 if (gve_is_gqi(priv))
956 gve_tx_start_ring_gqi(priv, i);
957 else
958 gve_tx_start_ring_dqo(priv, i);
959 }
960 }
961
gve_queues_mem_alloc(struct gve_priv * priv,struct gve_tx_alloc_rings_cfg * tx_alloc_cfg,struct gve_rx_alloc_rings_cfg * rx_alloc_cfg)962 static int gve_queues_mem_alloc(struct gve_priv *priv,
963 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
964 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
965 {
966 int err;
967
968 if (gve_is_gqi(priv))
969 err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg);
970 else
971 err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg);
972 if (err)
973 return err;
974
975 if (gve_is_gqi(priv))
976 err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg);
977 else
978 err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg);
979 if (err)
980 goto free_tx;
981
982 return 0;
983
984 free_tx:
985 if (gve_is_gqi(priv))
986 gve_tx_free_rings_gqi(priv, tx_alloc_cfg);
987 else
988 gve_tx_free_rings_dqo(priv, tx_alloc_cfg);
989 return err;
990 }
991
gve_destroy_rings(struct gve_priv * priv)992 static int gve_destroy_rings(struct gve_priv *priv)
993 {
994 int num_tx_queues = gve_num_tx_queues(priv);
995 int err;
996
997 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues);
998 if (err) {
999 netif_err(priv, drv, priv->dev,
1000 "failed to destroy tx queues\n");
1001 /* This failure will trigger a reset - no need to clean up */
1002 return err;
1003 }
1004 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
1005 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
1006 if (err) {
1007 netif_err(priv, drv, priv->dev,
1008 "failed to destroy rx queues\n");
1009 /* This failure will trigger a reset - no need to clean up */
1010 return err;
1011 }
1012 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
1013 return 0;
1014 }
1015
gve_queues_mem_free(struct gve_priv * priv,struct gve_tx_alloc_rings_cfg * tx_cfg,struct gve_rx_alloc_rings_cfg * rx_cfg)1016 static void gve_queues_mem_free(struct gve_priv *priv,
1017 struct gve_tx_alloc_rings_cfg *tx_cfg,
1018 struct gve_rx_alloc_rings_cfg *rx_cfg)
1019 {
1020 if (gve_is_gqi(priv)) {
1021 gve_tx_free_rings_gqi(priv, tx_cfg);
1022 gve_rx_free_rings_gqi(priv, rx_cfg);
1023 } else {
1024 gve_tx_free_rings_dqo(priv, tx_cfg);
1025 gve_rx_free_rings_dqo(priv, rx_cfg);
1026 }
1027 }
1028
gve_alloc_page(struct gve_priv * priv,struct device * dev,struct page ** page,dma_addr_t * dma,enum dma_data_direction dir,gfp_t gfp_flags)1029 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
1030 struct page **page, dma_addr_t *dma,
1031 enum dma_data_direction dir, gfp_t gfp_flags)
1032 {
1033 *page = alloc_page(gfp_flags);
1034 if (!*page) {
1035 priv->page_alloc_fail++;
1036 return -ENOMEM;
1037 }
1038 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
1039 if (dma_mapping_error(dev, *dma)) {
1040 priv->dma_mapping_error++;
1041 put_page(*page);
1042 return -ENOMEM;
1043 }
1044 return 0;
1045 }
1046
gve_alloc_queue_page_list(struct gve_priv * priv,u32 id,int pages)1047 struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv,
1048 u32 id, int pages)
1049 {
1050 struct gve_queue_page_list *qpl;
1051 int err;
1052 int i;
1053
1054 qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL);
1055 if (!qpl)
1056 return NULL;
1057
1058 qpl->id = id;
1059 qpl->num_entries = 0;
1060 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
1061 if (!qpl->pages)
1062 goto abort;
1063
1064 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL);
1065 if (!qpl->page_buses)
1066 goto abort;
1067
1068 for (i = 0; i < pages; i++) {
1069 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
1070 &qpl->page_buses[i],
1071 gve_qpl_dma_dir(priv, id), GFP_KERNEL);
1072 if (err)
1073 goto abort;
1074 qpl->num_entries++;
1075 }
1076
1077 return qpl;
1078
1079 abort:
1080 gve_free_queue_page_list(priv, qpl, id);
1081 return NULL;
1082 }
1083
gve_free_page(struct device * dev,struct page * page,dma_addr_t dma,enum dma_data_direction dir)1084 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
1085 enum dma_data_direction dir)
1086 {
1087 if (!dma_mapping_error(dev, dma))
1088 dma_unmap_page(dev, dma, PAGE_SIZE, dir);
1089 if (page)
1090 put_page(page);
1091 }
1092
gve_free_queue_page_list(struct gve_priv * priv,struct gve_queue_page_list * qpl,u32 id)1093 void gve_free_queue_page_list(struct gve_priv *priv,
1094 struct gve_queue_page_list *qpl,
1095 u32 id)
1096 {
1097 int i;
1098
1099 if (!qpl)
1100 return;
1101 if (!qpl->pages)
1102 goto free_qpl;
1103 if (!qpl->page_buses)
1104 goto free_pages;
1105
1106 for (i = 0; i < qpl->num_entries; i++)
1107 gve_free_page(&priv->pdev->dev, qpl->pages[i],
1108 qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
1109
1110 kvfree(qpl->page_buses);
1111 qpl->page_buses = NULL;
1112 free_pages:
1113 kvfree(qpl->pages);
1114 qpl->pages = NULL;
1115 free_qpl:
1116 kvfree(qpl);
1117 }
1118
1119 /* Use this to schedule a reset when the device is capable of continuing
1120 * to handle other requests in its current state. If it is not, do a reset
1121 * in thread instead.
1122 */
gve_schedule_reset(struct gve_priv * priv)1123 void gve_schedule_reset(struct gve_priv *priv)
1124 {
1125 gve_set_do_reset(priv);
1126 queue_work(priv->gve_wq, &priv->service_task);
1127 }
1128
1129 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
1130 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
1131 static void gve_turndown(struct gve_priv *priv);
1132 static void gve_turnup(struct gve_priv *priv);
1133
gve_reg_xdp_info(struct gve_priv * priv,struct net_device * dev)1134 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
1135 {
1136 struct napi_struct *napi;
1137 struct gve_rx_ring *rx;
1138 int err = 0;
1139 int i, j;
1140 u32 tx_qid;
1141
1142 if (!priv->tx_cfg.num_xdp_queues)
1143 return 0;
1144
1145 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1146 rx = &priv->rx[i];
1147 napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1148
1149 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i,
1150 napi->napi_id);
1151 if (err)
1152 goto err;
1153 if (gve_is_qpl(priv))
1154 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
1155 MEM_TYPE_PAGE_SHARED,
1156 NULL);
1157 else
1158 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
1159 MEM_TYPE_PAGE_POOL,
1160 rx->dqo.page_pool);
1161 if (err)
1162 goto err;
1163 rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
1164 if (rx->xsk_pool) {
1165 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
1166 napi->napi_id);
1167 if (err)
1168 goto err;
1169 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1170 MEM_TYPE_XSK_BUFF_POOL, NULL);
1171 if (err)
1172 goto err;
1173 xsk_pool_set_rxq_info(rx->xsk_pool,
1174 &rx->xsk_rxq);
1175 }
1176 }
1177
1178 for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) {
1179 tx_qid = gve_xdp_tx_queue_id(priv, i);
1180 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
1181 }
1182 return 0;
1183
1184 err:
1185 for (j = i; j >= 0; j--) {
1186 rx = &priv->rx[j];
1187 if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
1188 xdp_rxq_info_unreg(&rx->xdp_rxq);
1189 if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1190 xdp_rxq_info_unreg(&rx->xsk_rxq);
1191 }
1192 return err;
1193 }
1194
gve_unreg_xdp_info(struct gve_priv * priv)1195 static void gve_unreg_xdp_info(struct gve_priv *priv)
1196 {
1197 int i, tx_qid;
1198
1199 if (!priv->tx_cfg.num_xdp_queues || !priv->rx || !priv->tx)
1200 return;
1201
1202 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1203 struct gve_rx_ring *rx = &priv->rx[i];
1204
1205 xdp_rxq_info_unreg(&rx->xdp_rxq);
1206 if (rx->xsk_pool) {
1207 xdp_rxq_info_unreg(&rx->xsk_rxq);
1208 rx->xsk_pool = NULL;
1209 }
1210 }
1211
1212 for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) {
1213 tx_qid = gve_xdp_tx_queue_id(priv, i);
1214 priv->tx[tx_qid].xsk_pool = NULL;
1215 }
1216 }
1217
gve_drain_page_cache(struct gve_priv * priv)1218 static void gve_drain_page_cache(struct gve_priv *priv)
1219 {
1220 int i;
1221
1222 for (i = 0; i < priv->rx_cfg.num_queues; i++)
1223 page_frag_cache_drain(&priv->rx[i].page_cache);
1224 }
1225
gve_rx_get_curr_alloc_cfg(struct gve_priv * priv,struct gve_rx_alloc_rings_cfg * cfg)1226 static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv,
1227 struct gve_rx_alloc_rings_cfg *cfg)
1228 {
1229 cfg->qcfg_rx = &priv->rx_cfg;
1230 cfg->qcfg_tx = &priv->tx_cfg;
1231 cfg->raw_addressing = !gve_is_qpl(priv);
1232 cfg->enable_header_split = priv->header_split_enabled;
1233 cfg->ring_size = priv->rx_desc_cnt;
1234 cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size;
1235 cfg->rx = priv->rx;
1236 cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues;
1237 }
1238
gve_get_curr_alloc_cfgs(struct gve_priv * priv,struct gve_tx_alloc_rings_cfg * tx_alloc_cfg,struct gve_rx_alloc_rings_cfg * rx_alloc_cfg)1239 void gve_get_curr_alloc_cfgs(struct gve_priv *priv,
1240 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1241 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1242 {
1243 gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg);
1244 gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg);
1245 }
1246
gve_rx_start_ring(struct gve_priv * priv,int i)1247 static void gve_rx_start_ring(struct gve_priv *priv, int i)
1248 {
1249 if (gve_is_gqi(priv))
1250 gve_rx_start_ring_gqi(priv, i);
1251 else
1252 gve_rx_start_ring_dqo(priv, i);
1253 }
1254
gve_rx_start_rings(struct gve_priv * priv,int num_rings)1255 static void gve_rx_start_rings(struct gve_priv *priv, int num_rings)
1256 {
1257 int i;
1258
1259 for (i = 0; i < num_rings; i++)
1260 gve_rx_start_ring(priv, i);
1261 }
1262
gve_rx_stop_ring(struct gve_priv * priv,int i)1263 static void gve_rx_stop_ring(struct gve_priv *priv, int i)
1264 {
1265 if (gve_is_gqi(priv))
1266 gve_rx_stop_ring_gqi(priv, i);
1267 else
1268 gve_rx_stop_ring_dqo(priv, i);
1269 }
1270
gve_rx_stop_rings(struct gve_priv * priv,int num_rings)1271 static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings)
1272 {
1273 int i;
1274
1275 if (!priv->rx)
1276 return;
1277
1278 for (i = 0; i < num_rings; i++)
1279 gve_rx_stop_ring(priv, i);
1280 }
1281
gve_queues_mem_remove(struct gve_priv * priv)1282 static void gve_queues_mem_remove(struct gve_priv *priv)
1283 {
1284 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1285 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1286
1287 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1288 gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1289 priv->tx = NULL;
1290 priv->rx = NULL;
1291 }
1292
1293 /* The passed-in queue memory is stored into priv and the queues are made live.
1294 * No memory is allocated. Passed-in memory is freed on errors.
1295 */
gve_queues_start(struct gve_priv * priv,struct gve_tx_alloc_rings_cfg * tx_alloc_cfg,struct gve_rx_alloc_rings_cfg * rx_alloc_cfg)1296 static int gve_queues_start(struct gve_priv *priv,
1297 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1298 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1299 {
1300 struct net_device *dev = priv->dev;
1301 int err;
1302
1303 /* Record new resources into priv */
1304 priv->tx = tx_alloc_cfg->tx;
1305 priv->rx = rx_alloc_cfg->rx;
1306
1307 /* Record new configs into priv */
1308 priv->tx_cfg = *tx_alloc_cfg->qcfg;
1309 priv->tx_cfg.num_xdp_queues = tx_alloc_cfg->num_xdp_rings;
1310 priv->rx_cfg = *rx_alloc_cfg->qcfg_rx;
1311 priv->tx_desc_cnt = tx_alloc_cfg->ring_size;
1312 priv->rx_desc_cnt = rx_alloc_cfg->ring_size;
1313
1314 gve_tx_start_rings(priv, gve_num_tx_queues(priv));
1315 gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues);
1316 gve_init_sync_stats(priv);
1317
1318 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
1319 if (err)
1320 goto stop_and_free_rings;
1321 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
1322 if (err)
1323 goto stop_and_free_rings;
1324
1325 err = gve_reg_xdp_info(priv, dev);
1326 if (err)
1327 goto stop_and_free_rings;
1328
1329 if (rx_alloc_cfg->reset_rss) {
1330 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues);
1331 if (err)
1332 goto reset;
1333 }
1334
1335 err = gve_register_qpls(priv);
1336 if (err)
1337 goto reset;
1338
1339 priv->header_split_enabled = rx_alloc_cfg->enable_header_split;
1340 priv->rx_cfg.packet_buffer_size = rx_alloc_cfg->packet_buffer_size;
1341
1342 err = gve_create_rings(priv);
1343 if (err)
1344 goto reset;
1345
1346 gve_set_device_rings_ok(priv);
1347
1348 if (gve_get_report_stats(priv))
1349 mod_timer(&priv->stats_report_timer,
1350 round_jiffies(jiffies +
1351 msecs_to_jiffies(priv->stats_report_timer_period)));
1352
1353 gve_turnup(priv);
1354 queue_work(priv->gve_wq, &priv->service_task);
1355 priv->interface_up_cnt++;
1356 return 0;
1357
1358 reset:
1359 if (gve_get_reset_in_progress(priv))
1360 goto stop_and_free_rings;
1361 gve_reset_and_teardown(priv, true);
1362 /* if this fails there is nothing we can do so just ignore the return */
1363 gve_reset_recovery(priv, false);
1364 /* return the original error */
1365 return err;
1366 stop_and_free_rings:
1367 gve_tx_stop_rings(priv, gve_num_tx_queues(priv));
1368 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
1369 gve_queues_mem_remove(priv);
1370 return err;
1371 }
1372
gve_open(struct net_device * dev)1373 static int gve_open(struct net_device *dev)
1374 {
1375 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1376 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1377 struct gve_priv *priv = netdev_priv(dev);
1378 int err;
1379
1380 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1381
1382 err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1383 if (err)
1384 return err;
1385
1386 /* No need to free on error: ownership of resources is lost after
1387 * calling gve_queues_start.
1388 */
1389 err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1390 if (err)
1391 return err;
1392
1393 return 0;
1394 }
1395
gve_queues_stop(struct gve_priv * priv)1396 static int gve_queues_stop(struct gve_priv *priv)
1397 {
1398 int err;
1399
1400 netif_carrier_off(priv->dev);
1401 if (gve_get_device_rings_ok(priv)) {
1402 gve_turndown(priv);
1403 gve_drain_page_cache(priv);
1404 err = gve_destroy_rings(priv);
1405 if (err)
1406 goto err;
1407 err = gve_unregister_qpls(priv);
1408 if (err)
1409 goto err;
1410 gve_clear_device_rings_ok(priv);
1411 }
1412 timer_delete_sync(&priv->stats_report_timer);
1413
1414 gve_unreg_xdp_info(priv);
1415
1416 gve_tx_stop_rings(priv, gve_num_tx_queues(priv));
1417 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
1418
1419 priv->interface_down_cnt++;
1420 return 0;
1421
1422 err:
1423 /* This must have been called from a reset due to the rtnl lock
1424 * so just return at this point.
1425 */
1426 if (gve_get_reset_in_progress(priv))
1427 return err;
1428 /* Otherwise reset before returning */
1429 gve_reset_and_teardown(priv, true);
1430 return gve_reset_recovery(priv, false);
1431 }
1432
gve_close(struct net_device * dev)1433 static int gve_close(struct net_device *dev)
1434 {
1435 struct gve_priv *priv = netdev_priv(dev);
1436 int err;
1437
1438 err = gve_queues_stop(priv);
1439 if (err)
1440 return err;
1441
1442 gve_queues_mem_remove(priv);
1443 return 0;
1444 }
1445
gve_handle_link_status(struct gve_priv * priv,bool link_status)1446 static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1447 {
1448 if (!gve_get_napi_enabled(priv))
1449 return;
1450
1451 if (link_status == netif_carrier_ok(priv->dev))
1452 return;
1453
1454 if (link_status) {
1455 netdev_info(priv->dev, "Device link is up.\n");
1456 netif_carrier_on(priv->dev);
1457 } else {
1458 netdev_info(priv->dev, "Device link is down.\n");
1459 netif_carrier_off(priv->dev);
1460 }
1461 }
1462
gve_configure_rings_xdp(struct gve_priv * priv,u16 num_xdp_rings)1463 static int gve_configure_rings_xdp(struct gve_priv *priv,
1464 u16 num_xdp_rings)
1465 {
1466 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1467 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1468
1469 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1470 tx_alloc_cfg.num_xdp_rings = num_xdp_rings;
1471
1472 rx_alloc_cfg.xdp = !!num_xdp_rings;
1473 return gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1474 }
1475
gve_set_xdp(struct gve_priv * priv,struct bpf_prog * prog,struct netlink_ext_ack * extack)1476 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
1477 struct netlink_ext_ack *extack)
1478 {
1479 struct bpf_prog *old_prog;
1480 int err = 0;
1481 u32 status;
1482
1483 old_prog = READ_ONCE(priv->xdp_prog);
1484 if (!netif_running(priv->dev)) {
1485 WRITE_ONCE(priv->xdp_prog, prog);
1486 if (old_prog)
1487 bpf_prog_put(old_prog);
1488
1489 /* Update priv XDP queue configuration */
1490 priv->tx_cfg.num_xdp_queues = priv->xdp_prog ?
1491 priv->rx_cfg.num_queues : 0;
1492 return 0;
1493 }
1494
1495 if (!old_prog && prog)
1496 err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues);
1497 else if (old_prog && !prog)
1498 err = gve_configure_rings_xdp(priv, 0);
1499
1500 if (err)
1501 goto out;
1502
1503 WRITE_ONCE(priv->xdp_prog, prog);
1504 if (old_prog)
1505 bpf_prog_put(old_prog);
1506
1507 out:
1508 status = ioread32be(&priv->reg_bar0->device_status);
1509 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1510 return err;
1511 }
1512
gve_xsk_pool_enable(struct net_device * dev,struct xsk_buff_pool * pool,u16 qid)1513 static int gve_xsk_pool_enable(struct net_device *dev,
1514 struct xsk_buff_pool *pool,
1515 u16 qid)
1516 {
1517 struct gve_priv *priv = netdev_priv(dev);
1518 struct napi_struct *napi;
1519 struct gve_rx_ring *rx;
1520 int tx_qid;
1521 int err;
1522
1523 if (qid >= priv->rx_cfg.num_queues) {
1524 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
1525 return -EINVAL;
1526 }
1527 if (xsk_pool_get_rx_frame_size(pool) <
1528 priv->dev->max_mtu + sizeof(struct ethhdr)) {
1529 dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
1530 return -EINVAL;
1531 }
1532
1533 err = xsk_pool_dma_map(pool, &priv->pdev->dev,
1534 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1535 if (err)
1536 return err;
1537
1538 /* If XDP prog is not installed or interface is down, return. */
1539 if (!priv->xdp_prog || !netif_running(dev))
1540 return 0;
1541
1542 rx = &priv->rx[qid];
1543 napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1544 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
1545 if (err)
1546 goto err;
1547
1548 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1549 MEM_TYPE_XSK_BUFF_POOL, NULL);
1550 if (err)
1551 goto err;
1552
1553 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
1554 rx->xsk_pool = pool;
1555
1556 tx_qid = gve_xdp_tx_queue_id(priv, qid);
1557 priv->tx[tx_qid].xsk_pool = pool;
1558
1559 return 0;
1560 err:
1561 if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1562 xdp_rxq_info_unreg(&rx->xsk_rxq);
1563
1564 xsk_pool_dma_unmap(pool,
1565 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1566 return err;
1567 }
1568
gve_xsk_pool_disable(struct net_device * dev,u16 qid)1569 static int gve_xsk_pool_disable(struct net_device *dev,
1570 u16 qid)
1571 {
1572 struct gve_priv *priv = netdev_priv(dev);
1573 struct napi_struct *napi_rx;
1574 struct napi_struct *napi_tx;
1575 struct xsk_buff_pool *pool;
1576 int tx_qid;
1577
1578 pool = xsk_get_pool_from_qid(dev, qid);
1579 if (!pool)
1580 return -EINVAL;
1581 if (qid >= priv->rx_cfg.num_queues)
1582 return -EINVAL;
1583
1584 /* If XDP prog is not installed or interface is down, unmap DMA and
1585 * return.
1586 */
1587 if (!priv->xdp_prog || !netif_running(dev))
1588 goto done;
1589
1590 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
1591 napi_disable(napi_rx); /* make sure current rx poll is done */
1592
1593 tx_qid = gve_xdp_tx_queue_id(priv, qid);
1594 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
1595 napi_disable(napi_tx); /* make sure current tx poll is done */
1596
1597 priv->rx[qid].xsk_pool = NULL;
1598 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1599 priv->tx[tx_qid].xsk_pool = NULL;
1600 smp_mb(); /* Make sure it is visible to the workers on datapath */
1601
1602 napi_enable(napi_rx);
1603 if (gve_rx_work_pending(&priv->rx[qid]))
1604 napi_schedule(napi_rx);
1605
1606 napi_enable(napi_tx);
1607 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
1608 napi_schedule(napi_tx);
1609
1610 done:
1611 xsk_pool_dma_unmap(pool,
1612 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1613 return 0;
1614 }
1615
gve_xsk_wakeup(struct net_device * dev,u32 queue_id,u32 flags)1616 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
1617 {
1618 struct gve_priv *priv = netdev_priv(dev);
1619 struct napi_struct *napi;
1620
1621 if (!gve_get_napi_enabled(priv))
1622 return -ENETDOWN;
1623
1624 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
1625 return -EINVAL;
1626
1627 napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi;
1628 if (!napi_if_scheduled_mark_missed(napi)) {
1629 /* Call local_bh_enable to trigger SoftIRQ processing */
1630 local_bh_disable();
1631 napi_schedule(napi);
1632 local_bh_enable();
1633 }
1634
1635 return 0;
1636 }
1637
verify_xdp_configuration(struct net_device * dev)1638 static int verify_xdp_configuration(struct net_device *dev)
1639 {
1640 struct gve_priv *priv = netdev_priv(dev);
1641 u16 max_xdp_mtu;
1642
1643 if (dev->features & NETIF_F_LRO) {
1644 netdev_warn(dev, "XDP is not supported when LRO is on.\n");
1645 return -EOPNOTSUPP;
1646 }
1647
1648 if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
1649 netdev_warn(dev, "XDP is not supported in mode %d.\n",
1650 priv->queue_format);
1651 return -EOPNOTSUPP;
1652 }
1653
1654 max_xdp_mtu = priv->rx_cfg.packet_buffer_size - sizeof(struct ethhdr);
1655 if (priv->queue_format == GVE_GQI_QPL_FORMAT)
1656 max_xdp_mtu -= GVE_RX_PAD;
1657
1658 if (dev->mtu > max_xdp_mtu) {
1659 netdev_warn(dev, "XDP is not supported for mtu %d.\n",
1660 dev->mtu);
1661 return -EOPNOTSUPP;
1662 }
1663
1664 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues ||
1665 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) {
1666 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d",
1667 priv->rx_cfg.num_queues,
1668 priv->tx_cfg.num_queues,
1669 priv->tx_cfg.max_queues);
1670 return -EINVAL;
1671 }
1672 return 0;
1673 }
1674
gve_xdp(struct net_device * dev,struct netdev_bpf * xdp)1675 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1676 {
1677 struct gve_priv *priv = netdev_priv(dev);
1678 int err;
1679
1680 err = verify_xdp_configuration(dev);
1681 if (err)
1682 return err;
1683 switch (xdp->command) {
1684 case XDP_SETUP_PROG:
1685 return gve_set_xdp(priv, xdp->prog, xdp->extack);
1686 case XDP_SETUP_XSK_POOL:
1687 if (xdp->xsk.pool)
1688 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id);
1689 else
1690 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id);
1691 default:
1692 return -EINVAL;
1693 }
1694 }
1695
gve_init_rss_config(struct gve_priv * priv,u16 num_queues)1696 int gve_init_rss_config(struct gve_priv *priv, u16 num_queues)
1697 {
1698 struct gve_rss_config *rss_config = &priv->rss_config;
1699 struct ethtool_rxfh_param rxfh = {0};
1700 u16 i;
1701
1702 if (!priv->cache_rss_config)
1703 return 0;
1704
1705 for (i = 0; i < priv->rss_lut_size; i++)
1706 rss_config->hash_lut[i] =
1707 ethtool_rxfh_indir_default(i, num_queues);
1708
1709 netdev_rss_key_fill(rss_config->hash_key, priv->rss_key_size);
1710
1711 rxfh.hfunc = ETH_RSS_HASH_TOP;
1712
1713 return gve_adminq_configure_rss(priv, &rxfh);
1714 }
1715
gve_flow_rules_reset(struct gve_priv * priv)1716 int gve_flow_rules_reset(struct gve_priv *priv)
1717 {
1718 if (!priv->max_flow_rules)
1719 return 0;
1720
1721 return gve_adminq_reset_flow_rules(priv);
1722 }
1723
gve_adjust_config(struct gve_priv * priv,struct gve_tx_alloc_rings_cfg * tx_alloc_cfg,struct gve_rx_alloc_rings_cfg * rx_alloc_cfg)1724 int gve_adjust_config(struct gve_priv *priv,
1725 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1726 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1727 {
1728 int err;
1729
1730 /* Allocate resources for the new confiugration */
1731 err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg);
1732 if (err) {
1733 netif_err(priv, drv, priv->dev,
1734 "Adjust config failed to alloc new queues");
1735 return err;
1736 }
1737
1738 /* Teardown the device and free existing resources */
1739 err = gve_close(priv->dev);
1740 if (err) {
1741 netif_err(priv, drv, priv->dev,
1742 "Adjust config failed to close old queues");
1743 gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg);
1744 return err;
1745 }
1746
1747 /* Bring the device back up again with the new resources. */
1748 err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg);
1749 if (err) {
1750 netif_err(priv, drv, priv->dev,
1751 "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n");
1752 /* No need to free on error: ownership of resources is lost after
1753 * calling gve_queues_start.
1754 */
1755 gve_turndown(priv);
1756 return err;
1757 }
1758
1759 return 0;
1760 }
1761
gve_adjust_queues(struct gve_priv * priv,struct gve_rx_queue_config new_rx_config,struct gve_tx_queue_config new_tx_config,bool reset_rss)1762 int gve_adjust_queues(struct gve_priv *priv,
1763 struct gve_rx_queue_config new_rx_config,
1764 struct gve_tx_queue_config new_tx_config,
1765 bool reset_rss)
1766 {
1767 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1768 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1769 int err;
1770
1771 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1772
1773 /* Relay the new config from ethtool */
1774 tx_alloc_cfg.qcfg = &new_tx_config;
1775 rx_alloc_cfg.qcfg_tx = &new_tx_config;
1776 rx_alloc_cfg.qcfg_rx = &new_rx_config;
1777 rx_alloc_cfg.reset_rss = reset_rss;
1778
1779 if (netif_running(priv->dev)) {
1780 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
1781 return err;
1782 }
1783 /* Set the config for the next up. */
1784 if (reset_rss) {
1785 err = gve_init_rss_config(priv, new_rx_config.num_queues);
1786 if (err)
1787 return err;
1788 }
1789 priv->tx_cfg = new_tx_config;
1790 priv->rx_cfg = new_rx_config;
1791
1792 return 0;
1793 }
1794
gve_turndown(struct gve_priv * priv)1795 static void gve_turndown(struct gve_priv *priv)
1796 {
1797 int idx;
1798
1799 if (netif_carrier_ok(priv->dev))
1800 netif_carrier_off(priv->dev);
1801
1802 if (!gve_get_napi_enabled(priv))
1803 return;
1804
1805 /* Disable napi to prevent more work from coming in */
1806 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1807 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1808 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1809
1810 if (!gve_tx_was_added_to_block(priv, idx))
1811 continue;
1812
1813 if (idx < priv->tx_cfg.num_queues)
1814 netif_queue_set_napi(priv->dev, idx,
1815 NETDEV_QUEUE_TYPE_TX, NULL);
1816
1817 napi_disable_locked(&block->napi);
1818 }
1819 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1820 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1821 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1822
1823 if (!gve_rx_was_added_to_block(priv, idx))
1824 continue;
1825
1826 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX,
1827 NULL);
1828 napi_disable_locked(&block->napi);
1829 }
1830
1831 /* Stop tx queues */
1832 netif_tx_disable(priv->dev);
1833
1834 xdp_features_clear_redirect_target_locked(priv->dev);
1835
1836 gve_clear_napi_enabled(priv);
1837 gve_clear_report_stats(priv);
1838
1839 /* Make sure that all traffic is finished processing. */
1840 synchronize_net();
1841 }
1842
gve_turnup(struct gve_priv * priv)1843 static void gve_turnup(struct gve_priv *priv)
1844 {
1845 int idx;
1846
1847 /* Start the tx queues */
1848 netif_tx_start_all_queues(priv->dev);
1849
1850 /* Enable napi and unmask interrupts for all queues */
1851 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1852 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1853 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1854
1855 if (!gve_tx_was_added_to_block(priv, idx))
1856 continue;
1857
1858 napi_enable_locked(&block->napi);
1859
1860 if (idx < priv->tx_cfg.num_queues)
1861 netif_queue_set_napi(priv->dev, idx,
1862 NETDEV_QUEUE_TYPE_TX,
1863 &block->napi);
1864
1865 if (gve_is_gqi(priv)) {
1866 iowrite32be(0, gve_irq_doorbell(priv, block));
1867 } else {
1868 gve_set_itr_coalesce_usecs_dqo(priv, block,
1869 priv->tx_coalesce_usecs);
1870 }
1871
1872 /* Any descs written by the NIC before this barrier will be
1873 * handled by the one-off napi schedule below. Whereas any
1874 * descs after the barrier will generate interrupts.
1875 */
1876 mb();
1877 napi_schedule(&block->napi);
1878 }
1879 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1880 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1881 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1882
1883 if (!gve_rx_was_added_to_block(priv, idx))
1884 continue;
1885
1886 napi_enable_locked(&block->napi);
1887 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX,
1888 &block->napi);
1889
1890 if (gve_is_gqi(priv)) {
1891 iowrite32be(0, gve_irq_doorbell(priv, block));
1892 } else {
1893 gve_set_itr_coalesce_usecs_dqo(priv, block,
1894 priv->rx_coalesce_usecs);
1895 }
1896
1897 /* Any descs written by the NIC before this barrier will be
1898 * handled by the one-off napi schedule below. Whereas any
1899 * descs after the barrier will generate interrupts.
1900 */
1901 mb();
1902 napi_schedule(&block->napi);
1903 }
1904
1905 if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv))
1906 xdp_features_set_redirect_target_locked(priv->dev, false);
1907
1908 gve_set_napi_enabled(priv);
1909 }
1910
gve_turnup_and_check_status(struct gve_priv * priv)1911 static void gve_turnup_and_check_status(struct gve_priv *priv)
1912 {
1913 u32 status;
1914
1915 gve_turnup(priv);
1916 status = ioread32be(&priv->reg_bar0->device_status);
1917 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1918 }
1919
gve_tx_timeout(struct net_device * dev,unsigned int txqueue)1920 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
1921 {
1922 struct gve_notify_block *block;
1923 struct gve_tx_ring *tx = NULL;
1924 struct gve_priv *priv;
1925 u32 last_nic_done;
1926 u32 current_time;
1927 u32 ntfy_idx;
1928
1929 netdev_info(dev, "Timeout on tx queue, %d", txqueue);
1930 priv = netdev_priv(dev);
1931 if (txqueue > priv->tx_cfg.num_queues)
1932 goto reset;
1933
1934 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
1935 if (ntfy_idx >= priv->num_ntfy_blks)
1936 goto reset;
1937
1938 block = &priv->ntfy_blocks[ntfy_idx];
1939 tx = block->tx;
1940
1941 current_time = jiffies_to_msecs(jiffies);
1942 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
1943 goto reset;
1944
1945 /* Check to see if there are missed completions, which will allow us to
1946 * kick the queue.
1947 */
1948 last_nic_done = gve_tx_load_event_counter(priv, tx);
1949 if (last_nic_done - tx->done) {
1950 netdev_info(dev, "Kicking queue %d", txqueue);
1951 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
1952 napi_schedule(&block->napi);
1953 tx->last_kick_msec = current_time;
1954 goto out;
1955 } // Else reset.
1956
1957 reset:
1958 gve_schedule_reset(priv);
1959
1960 out:
1961 if (tx)
1962 tx->queue_timeout++;
1963 priv->tx_timeo_cnt++;
1964 }
1965
gve_get_pkt_buf_size(const struct gve_priv * priv,bool enable_hsplit)1966 u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit)
1967 {
1968 if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE)
1969 return GVE_MAX_RX_BUFFER_SIZE;
1970 else
1971 return GVE_DEFAULT_RX_BUFFER_SIZE;
1972 }
1973
1974 /* header-split is not supported on non-DQO_RDA yet even if device advertises it */
gve_header_split_supported(const struct gve_priv * priv)1975 bool gve_header_split_supported(const struct gve_priv *priv)
1976 {
1977 return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT;
1978 }
1979
gve_set_hsplit_config(struct gve_priv * priv,u8 tcp_data_split)1980 int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split)
1981 {
1982 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1983 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1984 bool enable_hdr_split;
1985 int err = 0;
1986
1987 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN)
1988 return 0;
1989
1990 if (!gve_header_split_supported(priv)) {
1991 dev_err(&priv->pdev->dev, "Header-split not supported\n");
1992 return -EOPNOTSUPP;
1993 }
1994
1995 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED)
1996 enable_hdr_split = true;
1997 else
1998 enable_hdr_split = false;
1999
2000 if (enable_hdr_split == priv->header_split_enabled)
2001 return 0;
2002
2003 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
2004
2005 rx_alloc_cfg.enable_header_split = enable_hdr_split;
2006 rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split);
2007
2008 if (netif_running(priv->dev))
2009 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
2010 return err;
2011 }
2012
gve_set_features(struct net_device * netdev,netdev_features_t features)2013 static int gve_set_features(struct net_device *netdev,
2014 netdev_features_t features)
2015 {
2016 const netdev_features_t orig_features = netdev->features;
2017 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
2018 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
2019 struct gve_priv *priv = netdev_priv(netdev);
2020 int err;
2021
2022 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
2023
2024 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
2025 netdev->features ^= NETIF_F_LRO;
2026 if (netif_running(netdev)) {
2027 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
2028 if (err)
2029 goto revert_features;
2030 }
2031 }
2032 if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) {
2033 err = gve_flow_rules_reset(priv);
2034 if (err)
2035 goto revert_features;
2036 }
2037
2038 return 0;
2039
2040 revert_features:
2041 netdev->features = orig_features;
2042 return err;
2043 }
2044
2045 static const struct net_device_ops gve_netdev_ops = {
2046 .ndo_start_xmit = gve_start_xmit,
2047 .ndo_features_check = gve_features_check,
2048 .ndo_open = gve_open,
2049 .ndo_stop = gve_close,
2050 .ndo_get_stats64 = gve_get_stats,
2051 .ndo_tx_timeout = gve_tx_timeout,
2052 .ndo_set_features = gve_set_features,
2053 .ndo_bpf = gve_xdp,
2054 .ndo_xdp_xmit = gve_xdp_xmit,
2055 .ndo_xsk_wakeup = gve_xsk_wakeup,
2056 };
2057
gve_handle_status(struct gve_priv * priv,u32 status)2058 static void gve_handle_status(struct gve_priv *priv, u32 status)
2059 {
2060 if (GVE_DEVICE_STATUS_RESET_MASK & status) {
2061 dev_info(&priv->pdev->dev, "Device requested reset.\n");
2062 gve_set_do_reset(priv);
2063 }
2064 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
2065 priv->stats_report_trigger_cnt++;
2066 gve_set_do_report_stats(priv);
2067 }
2068 }
2069
gve_handle_reset(struct gve_priv * priv)2070 static void gve_handle_reset(struct gve_priv *priv)
2071 {
2072 /* A service task will be scheduled at the end of probe to catch any
2073 * resets that need to happen, and we don't want to reset until
2074 * probe is done.
2075 */
2076 if (gve_get_probe_in_progress(priv))
2077 return;
2078
2079 if (gve_get_do_reset(priv)) {
2080 rtnl_lock();
2081 netdev_lock(priv->dev);
2082 gve_reset(priv, false);
2083 netdev_unlock(priv->dev);
2084 rtnl_unlock();
2085 }
2086 }
2087
gve_handle_report_stats(struct gve_priv * priv)2088 void gve_handle_report_stats(struct gve_priv *priv)
2089 {
2090 struct stats *stats = priv->stats_report->stats;
2091 int idx, stats_idx = 0;
2092 unsigned int start = 0;
2093 u64 tx_bytes;
2094
2095 if (!gve_get_report_stats(priv))
2096 return;
2097
2098 be64_add_cpu(&priv->stats_report->written_count, 1);
2099 /* tx stats */
2100 if (priv->tx) {
2101 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
2102 u32 last_completion = 0;
2103 u32 tx_frames = 0;
2104
2105 /* DQO doesn't currently support these metrics. */
2106 if (gve_is_gqi(priv)) {
2107 last_completion = priv->tx[idx].done;
2108 tx_frames = priv->tx[idx].req;
2109 }
2110
2111 do {
2112 start = u64_stats_fetch_begin(&priv->tx[idx].statss);
2113 tx_bytes = priv->tx[idx].bytes_done;
2114 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
2115 stats[stats_idx++] = (struct stats) {
2116 .stat_name = cpu_to_be32(TX_WAKE_CNT),
2117 .value = cpu_to_be64(priv->tx[idx].wake_queue),
2118 .queue_id = cpu_to_be32(idx),
2119 };
2120 stats[stats_idx++] = (struct stats) {
2121 .stat_name = cpu_to_be32(TX_STOP_CNT),
2122 .value = cpu_to_be64(priv->tx[idx].stop_queue),
2123 .queue_id = cpu_to_be32(idx),
2124 };
2125 stats[stats_idx++] = (struct stats) {
2126 .stat_name = cpu_to_be32(TX_FRAMES_SENT),
2127 .value = cpu_to_be64(tx_frames),
2128 .queue_id = cpu_to_be32(idx),
2129 };
2130 stats[stats_idx++] = (struct stats) {
2131 .stat_name = cpu_to_be32(TX_BYTES_SENT),
2132 .value = cpu_to_be64(tx_bytes),
2133 .queue_id = cpu_to_be32(idx),
2134 };
2135 stats[stats_idx++] = (struct stats) {
2136 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
2137 .value = cpu_to_be64(last_completion),
2138 .queue_id = cpu_to_be32(idx),
2139 };
2140 stats[stats_idx++] = (struct stats) {
2141 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
2142 .value = cpu_to_be64(priv->tx[idx].queue_timeout),
2143 .queue_id = cpu_to_be32(idx),
2144 };
2145 }
2146 }
2147 /* rx stats */
2148 if (priv->rx) {
2149 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
2150 stats[stats_idx++] = (struct stats) {
2151 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
2152 .value = cpu_to_be64(priv->rx[idx].desc.seqno),
2153 .queue_id = cpu_to_be32(idx),
2154 };
2155 stats[stats_idx++] = (struct stats) {
2156 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
2157 .value = cpu_to_be64(priv->rx[idx].fill_cnt),
2158 .queue_id = cpu_to_be32(idx),
2159 };
2160 }
2161 }
2162 }
2163
2164 /* Handle NIC status register changes, reset requests and report stats */
gve_service_task(struct work_struct * work)2165 static void gve_service_task(struct work_struct *work)
2166 {
2167 struct gve_priv *priv = container_of(work, struct gve_priv,
2168 service_task);
2169 u32 status = ioread32be(&priv->reg_bar0->device_status);
2170
2171 gve_handle_status(priv, status);
2172
2173 gve_handle_reset(priv);
2174 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
2175 }
2176
gve_set_netdev_xdp_features(struct gve_priv * priv)2177 static void gve_set_netdev_xdp_features(struct gve_priv *priv)
2178 {
2179 xdp_features_t xdp_features;
2180
2181 if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
2182 xdp_features = NETDEV_XDP_ACT_BASIC;
2183 xdp_features |= NETDEV_XDP_ACT_REDIRECT;
2184 xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
2185 } else {
2186 xdp_features = 0;
2187 }
2188
2189 xdp_set_features_flag_locked(priv->dev, xdp_features);
2190 }
2191
gve_init_priv(struct gve_priv * priv,bool skip_describe_device)2192 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
2193 {
2194 int num_ntfy;
2195 int err;
2196
2197 /* Set up the adminq */
2198 err = gve_adminq_alloc(&priv->pdev->dev, priv);
2199 if (err) {
2200 dev_err(&priv->pdev->dev,
2201 "Failed to alloc admin queue: err=%d\n", err);
2202 return err;
2203 }
2204
2205 err = gve_verify_driver_compatibility(priv);
2206 if (err) {
2207 dev_err(&priv->pdev->dev,
2208 "Could not verify driver compatibility: err=%d\n", err);
2209 goto err;
2210 }
2211
2212 priv->num_registered_pages = 0;
2213
2214 if (skip_describe_device)
2215 goto setup_device;
2216
2217 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
2218 /* Get the initial information we need from the device */
2219 err = gve_adminq_describe_device(priv);
2220 if (err) {
2221 dev_err(&priv->pdev->dev,
2222 "Could not get device information: err=%d\n", err);
2223 goto err;
2224 }
2225 priv->dev->mtu = priv->dev->max_mtu;
2226 num_ntfy = pci_msix_vec_count(priv->pdev);
2227 if (num_ntfy <= 0) {
2228 dev_err(&priv->pdev->dev,
2229 "could not count MSI-x vectors: err=%d\n", num_ntfy);
2230 err = num_ntfy;
2231 goto err;
2232 } else if (num_ntfy < GVE_MIN_MSIX) {
2233 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
2234 GVE_MIN_MSIX, num_ntfy);
2235 err = -EINVAL;
2236 goto err;
2237 }
2238
2239 /* Big TCP is only supported on DQ*/
2240 if (!gve_is_gqi(priv))
2241 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
2242
2243 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
2244 /* gvnic has one Notification Block per MSI-x vector, except for the
2245 * management vector
2246 */
2247 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
2248 priv->mgmt_msix_idx = priv->num_ntfy_blks;
2249
2250 priv->tx_cfg.max_queues =
2251 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
2252 priv->rx_cfg.max_queues =
2253 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
2254
2255 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
2256 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
2257 if (priv->default_num_queues > 0) {
2258 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
2259 priv->tx_cfg.num_queues);
2260 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
2261 priv->rx_cfg.num_queues);
2262 }
2263 priv->tx_cfg.num_xdp_queues = 0;
2264
2265 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
2266 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
2267 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
2268 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
2269
2270 if (!gve_is_gqi(priv)) {
2271 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
2272 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
2273 }
2274
2275 setup_device:
2276 gve_set_netdev_xdp_features(priv);
2277 err = gve_setup_device_resources(priv);
2278 if (!err)
2279 return 0;
2280 err:
2281 gve_adminq_free(&priv->pdev->dev, priv);
2282 return err;
2283 }
2284
gve_teardown_priv_resources(struct gve_priv * priv)2285 static void gve_teardown_priv_resources(struct gve_priv *priv)
2286 {
2287 gve_teardown_device_resources(priv);
2288 gve_adminq_free(&priv->pdev->dev, priv);
2289 }
2290
gve_trigger_reset(struct gve_priv * priv)2291 static void gve_trigger_reset(struct gve_priv *priv)
2292 {
2293 /* Reset the device by releasing the AQ */
2294 gve_adminq_release(priv);
2295 }
2296
gve_reset_and_teardown(struct gve_priv * priv,bool was_up)2297 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
2298 {
2299 gve_trigger_reset(priv);
2300 /* With the reset having already happened, close cannot fail */
2301 if (was_up)
2302 gve_close(priv->dev);
2303 gve_teardown_priv_resources(priv);
2304 }
2305
gve_reset_recovery(struct gve_priv * priv,bool was_up)2306 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
2307 {
2308 int err;
2309
2310 err = gve_init_priv(priv, true);
2311 if (err)
2312 goto err;
2313 if (was_up) {
2314 err = gve_open(priv->dev);
2315 if (err)
2316 goto err;
2317 }
2318 return 0;
2319 err:
2320 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
2321 gve_turndown(priv);
2322 return err;
2323 }
2324
gve_reset(struct gve_priv * priv,bool attempt_teardown)2325 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
2326 {
2327 bool was_up = netif_running(priv->dev);
2328 int err;
2329
2330 dev_info(&priv->pdev->dev, "Performing reset\n");
2331 gve_clear_do_reset(priv);
2332 gve_set_reset_in_progress(priv);
2333 /* If we aren't attempting to teardown normally, just go turndown and
2334 * reset right away.
2335 */
2336 if (!attempt_teardown) {
2337 gve_turndown(priv);
2338 gve_reset_and_teardown(priv, was_up);
2339 } else {
2340 /* Otherwise attempt to close normally */
2341 if (was_up) {
2342 err = gve_close(priv->dev);
2343 /* If that fails reset as we did above */
2344 if (err)
2345 gve_reset_and_teardown(priv, was_up);
2346 }
2347 /* Clean up any remaining resources */
2348 gve_teardown_priv_resources(priv);
2349 }
2350
2351 /* Set it all back up */
2352 err = gve_reset_recovery(priv, was_up);
2353 gve_clear_reset_in_progress(priv);
2354 priv->reset_cnt++;
2355 priv->interface_up_cnt = 0;
2356 priv->interface_down_cnt = 0;
2357 priv->stats_report_trigger_cnt = 0;
2358 return err;
2359 }
2360
gve_write_version(u8 __iomem * driver_version_register)2361 static void gve_write_version(u8 __iomem *driver_version_register)
2362 {
2363 const char *c = gve_version_prefix;
2364
2365 while (*c) {
2366 writeb(*c, driver_version_register);
2367 c++;
2368 }
2369
2370 c = gve_version_str;
2371 while (*c) {
2372 writeb(*c, driver_version_register);
2373 c++;
2374 }
2375 writeb('\n', driver_version_register);
2376 }
2377
gve_rx_queue_stop(struct net_device * dev,void * per_q_mem,int idx)2378 static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx)
2379 {
2380 struct gve_priv *priv = netdev_priv(dev);
2381 struct gve_rx_ring *gve_per_q_mem;
2382 int err;
2383
2384 if (!priv->rx)
2385 return -EAGAIN;
2386
2387 /* Destroying queue 0 while other queues exist is not supported in DQO */
2388 if (!gve_is_gqi(priv) && idx == 0)
2389 return -ERANGE;
2390
2391 /* Single-queue destruction requires quiescence on all queues */
2392 gve_turndown(priv);
2393
2394 /* This failure will trigger a reset - no need to clean up */
2395 err = gve_adminq_destroy_single_rx_queue(priv, idx);
2396 if (err)
2397 return err;
2398
2399 if (gve_is_qpl(priv)) {
2400 /* This failure will trigger a reset - no need to clean up */
2401 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx));
2402 if (err)
2403 return err;
2404 }
2405
2406 gve_rx_stop_ring(priv, idx);
2407
2408 /* Turn the unstopped queues back up */
2409 gve_turnup_and_check_status(priv);
2410
2411 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2412 *gve_per_q_mem = priv->rx[idx];
2413 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx]));
2414 return 0;
2415 }
2416
gve_rx_queue_mem_free(struct net_device * dev,void * per_q_mem)2417 static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem)
2418 {
2419 struct gve_priv *priv = netdev_priv(dev);
2420 struct gve_rx_alloc_rings_cfg cfg = {0};
2421 struct gve_rx_ring *gve_per_q_mem;
2422
2423 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2424 gve_rx_get_curr_alloc_cfg(priv, &cfg);
2425
2426 if (gve_is_gqi(priv))
2427 gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg);
2428 else
2429 gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg);
2430 }
2431
gve_rx_queue_mem_alloc(struct net_device * dev,void * per_q_mem,int idx)2432 static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem,
2433 int idx)
2434 {
2435 struct gve_priv *priv = netdev_priv(dev);
2436 struct gve_rx_alloc_rings_cfg cfg = {0};
2437 struct gve_rx_ring *gve_per_q_mem;
2438 int err;
2439
2440 if (!priv->rx)
2441 return -EAGAIN;
2442
2443 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2444 gve_rx_get_curr_alloc_cfg(priv, &cfg);
2445
2446 if (gve_is_gqi(priv))
2447 err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx);
2448 else
2449 err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx);
2450
2451 return err;
2452 }
2453
gve_rx_queue_start(struct net_device * dev,void * per_q_mem,int idx)2454 static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx)
2455 {
2456 struct gve_priv *priv = netdev_priv(dev);
2457 struct gve_rx_ring *gve_per_q_mem;
2458 int err;
2459
2460 if (!priv->rx)
2461 return -EAGAIN;
2462
2463 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
2464 priv->rx[idx] = *gve_per_q_mem;
2465
2466 /* Single-queue creation requires quiescence on all queues */
2467 gve_turndown(priv);
2468
2469 gve_rx_start_ring(priv, idx);
2470
2471 if (gve_is_qpl(priv)) {
2472 /* This failure will trigger a reset - no need to clean up */
2473 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx));
2474 if (err)
2475 goto abort;
2476 }
2477
2478 /* This failure will trigger a reset - no need to clean up */
2479 err = gve_adminq_create_single_rx_queue(priv, idx);
2480 if (err)
2481 goto abort;
2482
2483 if (gve_is_gqi(priv))
2484 gve_rx_write_doorbell(priv, &priv->rx[idx]);
2485 else
2486 gve_rx_post_buffers_dqo(&priv->rx[idx]);
2487
2488 /* Turn the unstopped queues back up */
2489 gve_turnup_and_check_status(priv);
2490 return 0;
2491
2492 abort:
2493 gve_rx_stop_ring(priv, idx);
2494
2495 /* All failures in this func result in a reset, by clearing the struct
2496 * at idx, we prevent a double free when that reset runs. The reset,
2497 * which needs the rtnl lock, will not run till this func returns and
2498 * its caller gives up the lock.
2499 */
2500 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx]));
2501 return err;
2502 }
2503
2504 static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = {
2505 .ndo_queue_mem_size = sizeof(struct gve_rx_ring),
2506 .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc,
2507 .ndo_queue_mem_free = gve_rx_queue_mem_free,
2508 .ndo_queue_start = gve_rx_queue_start,
2509 .ndo_queue_stop = gve_rx_queue_stop,
2510 };
2511
gve_get_rx_queue_stats(struct net_device * dev,int idx,struct netdev_queue_stats_rx * rx_stats)2512 static void gve_get_rx_queue_stats(struct net_device *dev, int idx,
2513 struct netdev_queue_stats_rx *rx_stats)
2514 {
2515 struct gve_priv *priv = netdev_priv(dev);
2516 struct gve_rx_ring *rx = &priv->rx[idx];
2517 unsigned int start;
2518
2519 do {
2520 start = u64_stats_fetch_begin(&rx->statss);
2521 rx_stats->packets = rx->rpackets;
2522 rx_stats->bytes = rx->rbytes;
2523 rx_stats->alloc_fail = rx->rx_skb_alloc_fail +
2524 rx->rx_buf_alloc_fail;
2525 } while (u64_stats_fetch_retry(&rx->statss, start));
2526 }
2527
gve_get_tx_queue_stats(struct net_device * dev,int idx,struct netdev_queue_stats_tx * tx_stats)2528 static void gve_get_tx_queue_stats(struct net_device *dev, int idx,
2529 struct netdev_queue_stats_tx *tx_stats)
2530 {
2531 struct gve_priv *priv = netdev_priv(dev);
2532 struct gve_tx_ring *tx = &priv->tx[idx];
2533 unsigned int start;
2534
2535 do {
2536 start = u64_stats_fetch_begin(&tx->statss);
2537 tx_stats->packets = tx->pkt_done;
2538 tx_stats->bytes = tx->bytes_done;
2539 } while (u64_stats_fetch_retry(&tx->statss, start));
2540 }
2541
gve_get_base_stats(struct net_device * dev,struct netdev_queue_stats_rx * rx,struct netdev_queue_stats_tx * tx)2542 static void gve_get_base_stats(struct net_device *dev,
2543 struct netdev_queue_stats_rx *rx,
2544 struct netdev_queue_stats_tx *tx)
2545 {
2546 rx->packets = 0;
2547 rx->bytes = 0;
2548 rx->alloc_fail = 0;
2549
2550 tx->packets = 0;
2551 tx->bytes = 0;
2552 }
2553
2554 static const struct netdev_stat_ops gve_stat_ops = {
2555 .get_queue_stats_rx = gve_get_rx_queue_stats,
2556 .get_queue_stats_tx = gve_get_tx_queue_stats,
2557 .get_base_stats = gve_get_base_stats,
2558 };
2559
gve_probe(struct pci_dev * pdev,const struct pci_device_id * ent)2560 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2561 {
2562 int max_tx_queues, max_rx_queues;
2563 struct net_device *dev;
2564 __be32 __iomem *db_bar;
2565 struct gve_registers __iomem *reg_bar;
2566 struct gve_priv *priv;
2567 int err;
2568
2569 err = pci_enable_device(pdev);
2570 if (err)
2571 return err;
2572
2573 err = pci_request_regions(pdev, gve_driver_name);
2574 if (err)
2575 goto abort_with_enabled;
2576
2577 pci_set_master(pdev);
2578
2579 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2580 if (err) {
2581 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
2582 goto abort_with_pci_region;
2583 }
2584
2585 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
2586 if (!reg_bar) {
2587 dev_err(&pdev->dev, "Failed to map pci bar!\n");
2588 err = -ENOMEM;
2589 goto abort_with_pci_region;
2590 }
2591
2592 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
2593 if (!db_bar) {
2594 dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
2595 err = -ENOMEM;
2596 goto abort_with_reg_bar;
2597 }
2598
2599 gve_write_version(®_bar->driver_version);
2600 /* Get max queues to alloc etherdev */
2601 max_tx_queues = ioread32be(®_bar->max_tx_queues);
2602 max_rx_queues = ioread32be(®_bar->max_rx_queues);
2603 /* Alloc and setup the netdev and priv */
2604 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
2605 if (!dev) {
2606 dev_err(&pdev->dev, "could not allocate netdev\n");
2607 err = -ENOMEM;
2608 goto abort_with_db_bar;
2609 }
2610 SET_NETDEV_DEV(dev, &pdev->dev);
2611 pci_set_drvdata(pdev, dev);
2612 dev->ethtool_ops = &gve_ethtool_ops;
2613 dev->netdev_ops = &gve_netdev_ops;
2614 dev->queue_mgmt_ops = &gve_queue_mgmt_ops;
2615 dev->stat_ops = &gve_stat_ops;
2616
2617 /* Set default and supported features.
2618 *
2619 * Features might be set in other locations as well (such as
2620 * `gve_adminq_describe_device`).
2621 */
2622 dev->hw_features = NETIF_F_HIGHDMA;
2623 dev->hw_features |= NETIF_F_SG;
2624 dev->hw_features |= NETIF_F_HW_CSUM;
2625 dev->hw_features |= NETIF_F_TSO;
2626 dev->hw_features |= NETIF_F_TSO6;
2627 dev->hw_features |= NETIF_F_TSO_ECN;
2628 dev->hw_features |= NETIF_F_RXCSUM;
2629 dev->hw_features |= NETIF_F_RXHASH;
2630 dev->features = dev->hw_features;
2631 dev->watchdog_timeo = 5 * HZ;
2632 dev->min_mtu = ETH_MIN_MTU;
2633 netif_carrier_off(dev);
2634
2635 priv = netdev_priv(dev);
2636 priv->dev = dev;
2637 priv->pdev = pdev;
2638 priv->msg_enable = DEFAULT_MSG_LEVEL;
2639 priv->reg_bar0 = reg_bar;
2640 priv->db_bar2 = db_bar;
2641 priv->service_task_flags = 0x0;
2642 priv->state_flags = 0x0;
2643 priv->ethtool_flags = 0x0;
2644 priv->rx_cfg.packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
2645 priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
2646
2647 gve_set_probe_in_progress(priv);
2648 priv->gve_wq = alloc_ordered_workqueue("gve", 0);
2649 if (!priv->gve_wq) {
2650 dev_err(&pdev->dev, "Could not allocate workqueue");
2651 err = -ENOMEM;
2652 goto abort_with_netdev;
2653 }
2654 INIT_WORK(&priv->service_task, gve_service_task);
2655 INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
2656 priv->tx_cfg.max_queues = max_tx_queues;
2657 priv->rx_cfg.max_queues = max_rx_queues;
2658
2659 err = gve_init_priv(priv, false);
2660 if (err)
2661 goto abort_with_wq;
2662
2663 if (!gve_is_gqi(priv) && !gve_is_qpl(priv))
2664 dev->netmem_tx = true;
2665
2666 err = register_netdev(dev);
2667 if (err)
2668 goto abort_with_gve_init;
2669
2670 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
2671 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
2672 gve_clear_probe_in_progress(priv);
2673 queue_work(priv->gve_wq, &priv->service_task);
2674 return 0;
2675
2676 abort_with_gve_init:
2677 gve_teardown_priv_resources(priv);
2678
2679 abort_with_wq:
2680 destroy_workqueue(priv->gve_wq);
2681
2682 abort_with_netdev:
2683 free_netdev(dev);
2684
2685 abort_with_db_bar:
2686 pci_iounmap(pdev, db_bar);
2687
2688 abort_with_reg_bar:
2689 pci_iounmap(pdev, reg_bar);
2690
2691 abort_with_pci_region:
2692 pci_release_regions(pdev);
2693
2694 abort_with_enabled:
2695 pci_disable_device(pdev);
2696 return err;
2697 }
2698
gve_remove(struct pci_dev * pdev)2699 static void gve_remove(struct pci_dev *pdev)
2700 {
2701 struct net_device *netdev = pci_get_drvdata(pdev);
2702 struct gve_priv *priv = netdev_priv(netdev);
2703 __be32 __iomem *db_bar = priv->db_bar2;
2704 void __iomem *reg_bar = priv->reg_bar0;
2705
2706 unregister_netdev(netdev);
2707 gve_teardown_priv_resources(priv);
2708 destroy_workqueue(priv->gve_wq);
2709 free_netdev(netdev);
2710 pci_iounmap(pdev, db_bar);
2711 pci_iounmap(pdev, reg_bar);
2712 pci_release_regions(pdev);
2713 pci_disable_device(pdev);
2714 }
2715
gve_shutdown(struct pci_dev * pdev)2716 static void gve_shutdown(struct pci_dev *pdev)
2717 {
2718 struct net_device *netdev = pci_get_drvdata(pdev);
2719 struct gve_priv *priv = netdev_priv(netdev);
2720 bool was_up = netif_running(priv->dev);
2721
2722 rtnl_lock();
2723 netdev_lock(netdev);
2724 if (was_up && gve_close(priv->dev)) {
2725 /* If the dev was up, attempt to close, if close fails, reset */
2726 gve_reset_and_teardown(priv, was_up);
2727 } else {
2728 /* If the dev wasn't up or close worked, finish tearing down */
2729 gve_teardown_priv_resources(priv);
2730 }
2731 netdev_unlock(netdev);
2732 rtnl_unlock();
2733 }
2734
2735 #ifdef CONFIG_PM
gve_suspend(struct pci_dev * pdev,pm_message_t state)2736 static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
2737 {
2738 struct net_device *netdev = pci_get_drvdata(pdev);
2739 struct gve_priv *priv = netdev_priv(netdev);
2740 bool was_up = netif_running(priv->dev);
2741
2742 priv->suspend_cnt++;
2743 rtnl_lock();
2744 netdev_lock(netdev);
2745 if (was_up && gve_close(priv->dev)) {
2746 /* If the dev was up, attempt to close, if close fails, reset */
2747 gve_reset_and_teardown(priv, was_up);
2748 } else {
2749 /* If the dev wasn't up or close worked, finish tearing down */
2750 gve_teardown_priv_resources(priv);
2751 }
2752 priv->up_before_suspend = was_up;
2753 netdev_unlock(netdev);
2754 rtnl_unlock();
2755 return 0;
2756 }
2757
gve_resume(struct pci_dev * pdev)2758 static int gve_resume(struct pci_dev *pdev)
2759 {
2760 struct net_device *netdev = pci_get_drvdata(pdev);
2761 struct gve_priv *priv = netdev_priv(netdev);
2762 int err;
2763
2764 priv->resume_cnt++;
2765 rtnl_lock();
2766 netdev_lock(netdev);
2767 err = gve_reset_recovery(priv, priv->up_before_suspend);
2768 netdev_unlock(netdev);
2769 rtnl_unlock();
2770 return err;
2771 }
2772 #endif /* CONFIG_PM */
2773
2774 static const struct pci_device_id gve_id_table[] = {
2775 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
2776 { }
2777 };
2778
2779 static struct pci_driver gve_driver = {
2780 .name = gve_driver_name,
2781 .id_table = gve_id_table,
2782 .probe = gve_probe,
2783 .remove = gve_remove,
2784 .shutdown = gve_shutdown,
2785 #ifdef CONFIG_PM
2786 .suspend = gve_suspend,
2787 .resume = gve_resume,
2788 #endif
2789 };
2790
2791 module_pci_driver(gve_driver);
2792
2793 MODULE_DEVICE_TABLE(pci, gve_id_table);
2794 MODULE_AUTHOR("Google, Inc.");
2795 MODULE_DESCRIPTION("Google Virtual NIC Driver");
2796 MODULE_LICENSE("Dual MIT/GPL");
2797 MODULE_VERSION(GVE_VERSION);
2798