1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* A network driver using virtio.
3 *
4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
5 */
6 //#define DEBUG
7 #include <linux/netdevice.h>
8 #include <linux/etherdevice.h>
9 #include <linux/ethtool.h>
10 #include <linux/module.h>
11 #include <linux/virtio.h>
12 #include <linux/virtio_net.h>
13 #include <linux/bpf.h>
14 #include <linux/bpf_trace.h>
15 #include <linux/scatterlist.h>
16 #include <linux/if_vlan.h>
17 #include <linux/slab.h>
18 #include <linux/cpu.h>
19 #include <linux/average.h>
20 #include <linux/filter.h>
21 #include <linux/kernel.h>
22 #include <linux/dim.h>
23 #include <net/route.h>
24 #include <net/xdp.h>
25 #include <net/net_failover.h>
26 #include <net/netdev_rx_queue.h>
27 #include <net/netdev_queues.h>
28 #include <net/xdp_sock_drv.h>
29
30 static int napi_weight = NAPI_POLL_WEIGHT;
31 module_param(napi_weight, int, 0444);
32
33 static bool csum = true, gso = true, napi_tx = true;
34 module_param(csum, bool, 0444);
35 module_param(gso, bool, 0444);
36 module_param(napi_tx, bool, 0644);
37
38 /* FIXME: MTU in config. */
39 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
40 #define GOOD_COPY_LEN 128
41
42 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
43
44 /* Separating two types of XDP xmit */
45 #define VIRTIO_XDP_TX BIT(0)
46 #define VIRTIO_XDP_REDIR BIT(1)
47
48 #define VIRTIO_XDP_FLAG BIT(0)
49 #define VIRTIO_ORPHAN_FLAG BIT(1)
50
51 /* RX packet size EWMA. The average packet size is used to determine the packet
52 * buffer size when refilling RX rings. As the entire RX ring may be refilled
53 * at once, the weight is chosen so that the EWMA will be insensitive to short-
54 * term, transient changes in packet size.
55 */
56 DECLARE_EWMA(pkt_len, 0, 64)
57
58 #define VIRTNET_DRIVER_VERSION "1.0.0"
59
60 static const unsigned long guest_offloads[] = {
61 VIRTIO_NET_F_GUEST_TSO4,
62 VIRTIO_NET_F_GUEST_TSO6,
63 VIRTIO_NET_F_GUEST_ECN,
64 VIRTIO_NET_F_GUEST_UFO,
65 VIRTIO_NET_F_GUEST_CSUM,
66 VIRTIO_NET_F_GUEST_USO4,
67 VIRTIO_NET_F_GUEST_USO6,
68 VIRTIO_NET_F_GUEST_HDRLEN
69 };
70
71 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
72 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
73 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \
74 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \
75 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \
76 (1ULL << VIRTIO_NET_F_GUEST_USO6))
77
78 struct virtnet_stat_desc {
79 char desc[ETH_GSTRING_LEN];
80 size_t offset;
81 size_t qstat_offset;
82 };
83
84 struct virtnet_sq_free_stats {
85 u64 packets;
86 u64 bytes;
87 u64 napi_packets;
88 u64 napi_bytes;
89 };
90
91 struct virtnet_sq_stats {
92 struct u64_stats_sync syncp;
93 u64_stats_t packets;
94 u64_stats_t bytes;
95 u64_stats_t xdp_tx;
96 u64_stats_t xdp_tx_drops;
97 u64_stats_t kicks;
98 u64_stats_t tx_timeouts;
99 u64_stats_t stop;
100 u64_stats_t wake;
101 };
102
103 struct virtnet_rq_stats {
104 struct u64_stats_sync syncp;
105 u64_stats_t packets;
106 u64_stats_t bytes;
107 u64_stats_t drops;
108 u64_stats_t xdp_packets;
109 u64_stats_t xdp_tx;
110 u64_stats_t xdp_redirects;
111 u64_stats_t xdp_drops;
112 u64_stats_t kicks;
113 };
114
115 #define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1}
116 #define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1}
117
118 #define VIRTNET_SQ_STAT_QSTAT(name, m) \
119 { \
120 name, \
121 offsetof(struct virtnet_sq_stats, m), \
122 offsetof(struct netdev_queue_stats_tx, m), \
123 }
124
125 #define VIRTNET_RQ_STAT_QSTAT(name, m) \
126 { \
127 name, \
128 offsetof(struct virtnet_rq_stats, m), \
129 offsetof(struct netdev_queue_stats_rx, m), \
130 }
131
132 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = {
133 VIRTNET_SQ_STAT("xdp_tx", xdp_tx),
134 VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops),
135 VIRTNET_SQ_STAT("kicks", kicks),
136 VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts),
137 };
138
139 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
140 VIRTNET_RQ_STAT("drops", drops),
141 VIRTNET_RQ_STAT("xdp_packets", xdp_packets),
142 VIRTNET_RQ_STAT("xdp_tx", xdp_tx),
143 VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects),
144 VIRTNET_RQ_STAT("xdp_drops", xdp_drops),
145 VIRTNET_RQ_STAT("kicks", kicks),
146 };
147
148 static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = {
149 VIRTNET_SQ_STAT_QSTAT("packets", packets),
150 VIRTNET_SQ_STAT_QSTAT("bytes", bytes),
151 VIRTNET_SQ_STAT_QSTAT("stop", stop),
152 VIRTNET_SQ_STAT_QSTAT("wake", wake),
153 };
154
155 static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = {
156 VIRTNET_RQ_STAT_QSTAT("packets", packets),
157 VIRTNET_RQ_STAT_QSTAT("bytes", bytes),
158 };
159
160 #define VIRTNET_STATS_DESC_CQ(name) \
161 {#name, offsetof(struct virtio_net_stats_cvq, name), -1}
162
163 #define VIRTNET_STATS_DESC_RX(class, name) \
164 {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1}
165
166 #define VIRTNET_STATS_DESC_TX(class, name) \
167 {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1}
168
169
170 static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = {
171 VIRTNET_STATS_DESC_CQ(command_num),
172 VIRTNET_STATS_DESC_CQ(ok_num),
173 };
174
175 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = {
176 VIRTNET_STATS_DESC_RX(basic, packets),
177 VIRTNET_STATS_DESC_RX(basic, bytes),
178
179 VIRTNET_STATS_DESC_RX(basic, notifications),
180 VIRTNET_STATS_DESC_RX(basic, interrupts),
181 };
182
183 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = {
184 VIRTNET_STATS_DESC_TX(basic, packets),
185 VIRTNET_STATS_DESC_TX(basic, bytes),
186
187 VIRTNET_STATS_DESC_TX(basic, notifications),
188 VIRTNET_STATS_DESC_TX(basic, interrupts),
189 };
190
191 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = {
192 VIRTNET_STATS_DESC_RX(csum, needs_csum),
193 };
194
195 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = {
196 VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg),
197 VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg),
198 };
199
200 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = {
201 VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes),
202 };
203
204 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = {
205 VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes),
206 };
207
208 #define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \
209 { \
210 #name, \
211 offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \
212 offsetof(struct netdev_queue_stats_rx, qstat_field), \
213 }
214
215 #define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \
216 { \
217 #name, \
218 offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \
219 offsetof(struct netdev_queue_stats_tx, qstat_field), \
220 }
221
222 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = {
223 VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops),
224 VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns),
225 };
226
227 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = {
228 VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops),
229 VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors),
230 };
231
232 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = {
233 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary),
234 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none),
235 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad),
236 };
237
238 static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = {
239 VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none),
240 VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum),
241 };
242
243 static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = {
244 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets),
245 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes),
246 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets),
247 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes),
248 };
249
250 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = {
251 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets),
252 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes),
253 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets),
254 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes),
255 };
256
257 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = {
258 VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits),
259 };
260
261 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = {
262 VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits),
263 };
264
265 #define VIRTNET_Q_TYPE_RX 0
266 #define VIRTNET_Q_TYPE_TX 1
267 #define VIRTNET_Q_TYPE_CQ 2
268
269 struct virtnet_interrupt_coalesce {
270 u32 max_packets;
271 u32 max_usecs;
272 };
273
274 /* The dma information of pages allocated at a time. */
275 struct virtnet_rq_dma {
276 dma_addr_t addr;
277 u32 ref;
278 u16 len;
279 u16 need_sync;
280 };
281
282 /* Internal representation of a send virtqueue */
283 struct send_queue {
284 /* Virtqueue associated with this send _queue */
285 struct virtqueue *vq;
286
287 /* TX: fragments + linear part + virtio header */
288 struct scatterlist sg[MAX_SKB_FRAGS + 2];
289
290 /* Name of the send queue: output.$index */
291 char name[16];
292
293 struct virtnet_sq_stats stats;
294
295 struct virtnet_interrupt_coalesce intr_coal;
296
297 struct napi_struct napi;
298
299 /* Record whether sq is in reset state. */
300 bool reset;
301 };
302
303 /* Internal representation of a receive virtqueue */
304 struct receive_queue {
305 /* Virtqueue associated with this receive_queue */
306 struct virtqueue *vq;
307
308 struct napi_struct napi;
309
310 struct bpf_prog __rcu *xdp_prog;
311
312 struct virtnet_rq_stats stats;
313
314 /* The number of rx notifications */
315 u16 calls;
316
317 /* Is dynamic interrupt moderation enabled? */
318 bool dim_enabled;
319
320 /* Used to protect dim_enabled and inter_coal */
321 struct mutex dim_lock;
322
323 /* Dynamic Interrupt Moderation */
324 struct dim dim;
325
326 u32 packets_in_napi;
327
328 struct virtnet_interrupt_coalesce intr_coal;
329
330 /* Chain pages by the private ptr. */
331 struct page *pages;
332
333 /* Average packet length for mergeable receive buffers. */
334 struct ewma_pkt_len mrg_avg_pkt_len;
335
336 /* Page frag for packet buffer allocation. */
337 struct page_frag alloc_frag;
338
339 /* RX: fragments + linear part + virtio header */
340 struct scatterlist sg[MAX_SKB_FRAGS + 2];
341
342 /* Min single buffer size for mergeable buffers case. */
343 unsigned int min_buf_len;
344
345 /* Name of this receive queue: input.$index */
346 char name[16];
347
348 struct xdp_rxq_info xdp_rxq;
349
350 /* Record the last dma info to free after new pages is allocated. */
351 struct virtnet_rq_dma *last_dma;
352
353 struct xsk_buff_pool *xsk_pool;
354
355 /* xdp rxq used by xsk */
356 struct xdp_rxq_info xsk_rxq_info;
357
358 struct xdp_buff **xsk_buffs;
359
360 /* Do dma by self */
361 bool do_dma;
362 };
363
364 /* This structure can contain rss message with maximum settings for indirection table and keysize
365 * Note, that default structure that describes RSS configuration virtio_net_rss_config
366 * contains same info but can't handle table values.
367 * In any case, structure would be passed to virtio hw through sg_buf split by parts
368 * because table sizes may be differ according to the device configuration.
369 */
370 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40
371 struct virtio_net_ctrl_rss {
372 u32 hash_types;
373 u16 indirection_table_mask;
374 u16 unclassified_queue;
375 u16 hash_cfg_reserved; /* for HASH_CONFIG (see virtio_net_hash_config for details) */
376 u16 max_tx_vq;
377 u8 hash_key_length;
378 u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE];
379
380 u16 *indirection_table;
381 };
382
383 /* Control VQ buffers: protected by the rtnl lock */
384 struct control_buf {
385 struct virtio_net_ctrl_hdr hdr;
386 virtio_net_ctrl_ack status;
387 };
388
389 struct virtnet_info {
390 struct virtio_device *vdev;
391 struct virtqueue *cvq;
392 struct net_device *dev;
393 struct send_queue *sq;
394 struct receive_queue *rq;
395 unsigned int status;
396
397 /* Max # of queue pairs supported by the device */
398 u16 max_queue_pairs;
399
400 /* # of queue pairs currently used by the driver */
401 u16 curr_queue_pairs;
402
403 /* # of XDP queue pairs currently used by the driver */
404 u16 xdp_queue_pairs;
405
406 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
407 bool xdp_enabled;
408
409 /* I like... big packets and I cannot lie! */
410 bool big_packets;
411
412 /* number of sg entries allocated for big packets */
413 unsigned int big_packets_num_skbfrags;
414
415 /* Host will merge rx buffers for big packets (shake it! shake it!) */
416 bool mergeable_rx_bufs;
417
418 /* Host supports rss and/or hash report */
419 bool has_rss;
420 bool has_rss_hash_report;
421 u8 rss_key_size;
422 u16 rss_indir_table_size;
423 u32 rss_hash_types_supported;
424 u32 rss_hash_types_saved;
425 struct virtio_net_ctrl_rss rss;
426
427 /* Has control virtqueue */
428 bool has_cvq;
429
430 /* Lock to protect the control VQ */
431 struct mutex cvq_lock;
432
433 /* Host can handle any s/g split between our header and packet data */
434 bool any_header_sg;
435
436 /* Packet virtio header size */
437 u8 hdr_len;
438
439 /* Work struct for delayed refilling if we run low on memory. */
440 struct delayed_work refill;
441
442 /* Is delayed refill enabled? */
443 bool refill_enabled;
444
445 /* The lock to synchronize the access to refill_enabled */
446 spinlock_t refill_lock;
447
448 /* Work struct for config space updates */
449 struct work_struct config_work;
450
451 /* Work struct for setting rx mode */
452 struct work_struct rx_mode_work;
453
454 /* OK to queue work setting RX mode? */
455 bool rx_mode_work_enabled;
456
457 /* Does the affinity hint is set for virtqueues? */
458 bool affinity_hint_set;
459
460 /* CPU hotplug instances for online & dead */
461 struct hlist_node node;
462 struct hlist_node node_dead;
463
464 struct control_buf *ctrl;
465
466 /* Ethtool settings */
467 u8 duplex;
468 u32 speed;
469
470 /* Is rx dynamic interrupt moderation enabled? */
471 bool rx_dim_enabled;
472
473 /* Interrupt coalescing settings */
474 struct virtnet_interrupt_coalesce intr_coal_tx;
475 struct virtnet_interrupt_coalesce intr_coal_rx;
476
477 unsigned long guest_offloads;
478 unsigned long guest_offloads_capable;
479
480 /* failover when STANDBY feature enabled */
481 struct failover *failover;
482
483 u64 device_stats_cap;
484 };
485
486 struct padded_vnet_hdr {
487 struct virtio_net_hdr_v1_hash hdr;
488 /*
489 * hdr is in a separate sg buffer, and data sg buffer shares same page
490 * with this header sg. This padding makes next sg 16 byte aligned
491 * after the header.
492 */
493 char padding[12];
494 };
495
496 struct virtio_net_common_hdr {
497 union {
498 struct virtio_net_hdr hdr;
499 struct virtio_net_hdr_mrg_rxbuf mrg_hdr;
500 struct virtio_net_hdr_v1_hash hash_v1_hdr;
501 };
502 };
503
504 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
505 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
506 struct net_device *dev,
507 unsigned int *xdp_xmit,
508 struct virtnet_rq_stats *stats);
509 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq,
510 struct sk_buff *skb, u8 flags);
511 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb,
512 struct sk_buff *curr_skb,
513 struct page *page, void *buf,
514 int len, int truesize);
515
rss_indirection_table_alloc(struct virtio_net_ctrl_rss * rss,u16 indir_table_size)516 static int rss_indirection_table_alloc(struct virtio_net_ctrl_rss *rss, u16 indir_table_size)
517 {
518 if (!indir_table_size) {
519 rss->indirection_table = NULL;
520 return 0;
521 }
522
523 rss->indirection_table = kmalloc_array(indir_table_size, sizeof(u16), GFP_KERNEL);
524 if (!rss->indirection_table)
525 return -ENOMEM;
526
527 return 0;
528 }
529
rss_indirection_table_free(struct virtio_net_ctrl_rss * rss)530 static void rss_indirection_table_free(struct virtio_net_ctrl_rss *rss)
531 {
532 kfree(rss->indirection_table);
533 }
534
is_xdp_frame(void * ptr)535 static bool is_xdp_frame(void *ptr)
536 {
537 return (unsigned long)ptr & VIRTIO_XDP_FLAG;
538 }
539
xdp_to_ptr(struct xdp_frame * ptr)540 static void *xdp_to_ptr(struct xdp_frame *ptr)
541 {
542 return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG);
543 }
544
ptr_to_xdp(void * ptr)545 static struct xdp_frame *ptr_to_xdp(void *ptr)
546 {
547 return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
548 }
549
is_orphan_skb(void * ptr)550 static bool is_orphan_skb(void *ptr)
551 {
552 return (unsigned long)ptr & VIRTIO_ORPHAN_FLAG;
553 }
554
skb_to_ptr(struct sk_buff * skb,bool orphan)555 static void *skb_to_ptr(struct sk_buff *skb, bool orphan)
556 {
557 return (void *)((unsigned long)skb | (orphan ? VIRTIO_ORPHAN_FLAG : 0));
558 }
559
ptr_to_skb(void * ptr)560 static struct sk_buff *ptr_to_skb(void *ptr)
561 {
562 return (struct sk_buff *)((unsigned long)ptr & ~VIRTIO_ORPHAN_FLAG);
563 }
564
__free_old_xmit(struct send_queue * sq,struct netdev_queue * txq,bool in_napi,struct virtnet_sq_free_stats * stats)565 static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
566 bool in_napi, struct virtnet_sq_free_stats *stats)
567 {
568 unsigned int len;
569 void *ptr;
570
571 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
572 if (!is_xdp_frame(ptr)) {
573 struct sk_buff *skb = ptr_to_skb(ptr);
574
575 pr_debug("Sent skb %p\n", skb);
576
577 if (is_orphan_skb(ptr)) {
578 stats->packets++;
579 stats->bytes += skb->len;
580 } else {
581 stats->napi_packets++;
582 stats->napi_bytes += skb->len;
583 }
584 napi_consume_skb(skb, in_napi);
585 } else {
586 struct xdp_frame *frame = ptr_to_xdp(ptr);
587
588 stats->packets++;
589 stats->bytes += xdp_get_frame_len(frame);
590 xdp_return_frame(frame);
591 }
592 }
593 netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes);
594 }
595
596 /* Converting between virtqueue no. and kernel tx/rx queue no.
597 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
598 */
vq2txq(struct virtqueue * vq)599 static int vq2txq(struct virtqueue *vq)
600 {
601 return (vq->index - 1) / 2;
602 }
603
txq2vq(int txq)604 static int txq2vq(int txq)
605 {
606 return txq * 2 + 1;
607 }
608
vq2rxq(struct virtqueue * vq)609 static int vq2rxq(struct virtqueue *vq)
610 {
611 return vq->index / 2;
612 }
613
rxq2vq(int rxq)614 static int rxq2vq(int rxq)
615 {
616 return rxq * 2;
617 }
618
vq_type(struct virtnet_info * vi,int qid)619 static int vq_type(struct virtnet_info *vi, int qid)
620 {
621 if (qid == vi->max_queue_pairs * 2)
622 return VIRTNET_Q_TYPE_CQ;
623
624 if (qid % 2)
625 return VIRTNET_Q_TYPE_TX;
626
627 return VIRTNET_Q_TYPE_RX;
628 }
629
630 static inline struct virtio_net_common_hdr *
skb_vnet_common_hdr(struct sk_buff * skb)631 skb_vnet_common_hdr(struct sk_buff *skb)
632 {
633 return (struct virtio_net_common_hdr *)skb->cb;
634 }
635
636 /*
637 * private is used to chain pages for big packets, put the whole
638 * most recent used list in the beginning for reuse
639 */
give_pages(struct receive_queue * rq,struct page * page)640 static void give_pages(struct receive_queue *rq, struct page *page)
641 {
642 struct page *end;
643
644 /* Find end of list, sew whole thing into vi->rq.pages. */
645 for (end = page; end->private; end = (struct page *)end->private);
646 end->private = (unsigned long)rq->pages;
647 rq->pages = page;
648 }
649
get_a_page(struct receive_queue * rq,gfp_t gfp_mask)650 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
651 {
652 struct page *p = rq->pages;
653
654 if (p) {
655 rq->pages = (struct page *)p->private;
656 /* clear private here, it is used to chain pages */
657 p->private = 0;
658 } else
659 p = alloc_page(gfp_mask);
660 return p;
661 }
662
virtnet_rq_free_buf(struct virtnet_info * vi,struct receive_queue * rq,void * buf)663 static void virtnet_rq_free_buf(struct virtnet_info *vi,
664 struct receive_queue *rq, void *buf)
665 {
666 if (vi->mergeable_rx_bufs)
667 put_page(virt_to_head_page(buf));
668 else if (vi->big_packets)
669 give_pages(rq, buf);
670 else
671 put_page(virt_to_head_page(buf));
672 }
673
enable_delayed_refill(struct virtnet_info * vi)674 static void enable_delayed_refill(struct virtnet_info *vi)
675 {
676 spin_lock_bh(&vi->refill_lock);
677 vi->refill_enabled = true;
678 spin_unlock_bh(&vi->refill_lock);
679 }
680
disable_delayed_refill(struct virtnet_info * vi)681 static void disable_delayed_refill(struct virtnet_info *vi)
682 {
683 spin_lock_bh(&vi->refill_lock);
684 vi->refill_enabled = false;
685 spin_unlock_bh(&vi->refill_lock);
686 }
687
enable_rx_mode_work(struct virtnet_info * vi)688 static void enable_rx_mode_work(struct virtnet_info *vi)
689 {
690 rtnl_lock();
691 vi->rx_mode_work_enabled = true;
692 rtnl_unlock();
693 }
694
disable_rx_mode_work(struct virtnet_info * vi)695 static void disable_rx_mode_work(struct virtnet_info *vi)
696 {
697 rtnl_lock();
698 vi->rx_mode_work_enabled = false;
699 rtnl_unlock();
700 }
701
virtqueue_napi_schedule(struct napi_struct * napi,struct virtqueue * vq)702 static void virtqueue_napi_schedule(struct napi_struct *napi,
703 struct virtqueue *vq)
704 {
705 if (napi_schedule_prep(napi)) {
706 virtqueue_disable_cb(vq);
707 __napi_schedule(napi);
708 }
709 }
710
virtqueue_napi_complete(struct napi_struct * napi,struct virtqueue * vq,int processed)711 static bool virtqueue_napi_complete(struct napi_struct *napi,
712 struct virtqueue *vq, int processed)
713 {
714 int opaque;
715
716 opaque = virtqueue_enable_cb_prepare(vq);
717 if (napi_complete_done(napi, processed)) {
718 if (unlikely(virtqueue_poll(vq, opaque)))
719 virtqueue_napi_schedule(napi, vq);
720 else
721 return true;
722 } else {
723 virtqueue_disable_cb(vq);
724 }
725
726 return false;
727 }
728
skb_xmit_done(struct virtqueue * vq)729 static void skb_xmit_done(struct virtqueue *vq)
730 {
731 struct virtnet_info *vi = vq->vdev->priv;
732 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi;
733
734 /* Suppress further interrupts. */
735 virtqueue_disable_cb(vq);
736
737 if (napi->weight)
738 virtqueue_napi_schedule(napi, vq);
739 else
740 /* We were probably waiting for more output buffers. */
741 netif_wake_subqueue(vi->dev, vq2txq(vq));
742 }
743
744 #define MRG_CTX_HEADER_SHIFT 22
mergeable_len_to_ctx(unsigned int truesize,unsigned int headroom)745 static void *mergeable_len_to_ctx(unsigned int truesize,
746 unsigned int headroom)
747 {
748 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize);
749 }
750
mergeable_ctx_to_headroom(void * mrg_ctx)751 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
752 {
753 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
754 }
755
mergeable_ctx_to_truesize(void * mrg_ctx)756 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
757 {
758 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
759 }
760
virtnet_build_skb(void * buf,unsigned int buflen,unsigned int headroom,unsigned int len)761 static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen,
762 unsigned int headroom,
763 unsigned int len)
764 {
765 struct sk_buff *skb;
766
767 skb = build_skb(buf, buflen);
768 if (unlikely(!skb))
769 return NULL;
770
771 skb_reserve(skb, headroom);
772 skb_put(skb, len);
773
774 return skb;
775 }
776
777 /* Called from bottom half context */
page_to_skb(struct virtnet_info * vi,struct receive_queue * rq,struct page * page,unsigned int offset,unsigned int len,unsigned int truesize,unsigned int headroom)778 static struct sk_buff *page_to_skb(struct virtnet_info *vi,
779 struct receive_queue *rq,
780 struct page *page, unsigned int offset,
781 unsigned int len, unsigned int truesize,
782 unsigned int headroom)
783 {
784 struct sk_buff *skb;
785 struct virtio_net_common_hdr *hdr;
786 unsigned int copy, hdr_len, hdr_padded_len;
787 struct page *page_to_free = NULL;
788 int tailroom, shinfo_size;
789 char *p, *hdr_p, *buf;
790
791 p = page_address(page) + offset;
792 hdr_p = p;
793
794 hdr_len = vi->hdr_len;
795 if (vi->mergeable_rx_bufs)
796 hdr_padded_len = hdr_len;
797 else
798 hdr_padded_len = sizeof(struct padded_vnet_hdr);
799
800 buf = p - headroom;
801 len -= hdr_len;
802 offset += hdr_padded_len;
803 p += hdr_padded_len;
804 tailroom = truesize - headroom - hdr_padded_len - len;
805
806 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
807
808 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
809 skb = virtnet_build_skb(buf, truesize, p - buf, len);
810 if (unlikely(!skb))
811 return NULL;
812
813 page = (struct page *)page->private;
814 if (page)
815 give_pages(rq, page);
816 goto ok;
817 }
818
819 /* copy small packet so we can reuse these pages for small data */
820 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
821 if (unlikely(!skb))
822 return NULL;
823
824 /* Copy all frame if it fits skb->head, otherwise
825 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
826 */
827 if (len <= skb_tailroom(skb))
828 copy = len;
829 else
830 copy = ETH_HLEN;
831 skb_put_data(skb, p, copy);
832
833 len -= copy;
834 offset += copy;
835
836 if (vi->mergeable_rx_bufs) {
837 if (len)
838 skb_add_rx_frag(skb, 0, page, offset, len, truesize);
839 else
840 page_to_free = page;
841 goto ok;
842 }
843
844 /*
845 * Verify that we can indeed put this data into a skb.
846 * This is here to handle cases when the device erroneously
847 * tries to receive more than is possible. This is usually
848 * the case of a broken device.
849 */
850 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
851 net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
852 dev_kfree_skb(skb);
853 return NULL;
854 }
855 BUG_ON(offset >= PAGE_SIZE);
856 while (len) {
857 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
858 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
859 frag_size, truesize);
860 len -= frag_size;
861 page = (struct page *)page->private;
862 offset = 0;
863 }
864
865 if (page)
866 give_pages(rq, page);
867
868 ok:
869 hdr = skb_vnet_common_hdr(skb);
870 memcpy(hdr, hdr_p, hdr_len);
871 if (page_to_free)
872 put_page(page_to_free);
873
874 return skb;
875 }
876
virtnet_rq_unmap(struct receive_queue * rq,void * buf,u32 len)877 static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len)
878 {
879 struct page *page = virt_to_head_page(buf);
880 struct virtnet_rq_dma *dma;
881 void *head;
882 int offset;
883
884 head = page_address(page);
885
886 dma = head;
887
888 --dma->ref;
889
890 if (dma->need_sync && len) {
891 offset = buf - (head + sizeof(*dma));
892
893 virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr,
894 offset, len,
895 DMA_FROM_DEVICE);
896 }
897
898 if (dma->ref)
899 return;
900
901 virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len,
902 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
903 put_page(page);
904 }
905
virtnet_rq_get_buf(struct receive_queue * rq,u32 * len,void ** ctx)906 static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx)
907 {
908 void *buf;
909
910 buf = virtqueue_get_buf_ctx(rq->vq, len, ctx);
911 if (buf && rq->do_dma)
912 virtnet_rq_unmap(rq, buf, *len);
913
914 return buf;
915 }
916
virtnet_rq_init_one_sg(struct receive_queue * rq,void * buf,u32 len)917 static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len)
918 {
919 struct virtnet_rq_dma *dma;
920 dma_addr_t addr;
921 u32 offset;
922 void *head;
923
924 if (!rq->do_dma) {
925 sg_init_one(rq->sg, buf, len);
926 return;
927 }
928
929 head = page_address(rq->alloc_frag.page);
930
931 offset = buf - head;
932
933 dma = head;
934
935 addr = dma->addr - sizeof(*dma) + offset;
936
937 sg_init_table(rq->sg, 1);
938 rq->sg[0].dma_address = addr;
939 rq->sg[0].length = len;
940 }
941
virtnet_rq_alloc(struct receive_queue * rq,u32 size,gfp_t gfp)942 static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp)
943 {
944 struct page_frag *alloc_frag = &rq->alloc_frag;
945 struct virtnet_rq_dma *dma;
946 void *buf, *head;
947 dma_addr_t addr;
948
949 if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp)))
950 return NULL;
951
952 head = page_address(alloc_frag->page);
953
954 if (rq->do_dma) {
955 dma = head;
956
957 /* new pages */
958 if (!alloc_frag->offset) {
959 if (rq->last_dma) {
960 /* Now, the new page is allocated, the last dma
961 * will not be used. So the dma can be unmapped
962 * if the ref is 0.
963 */
964 virtnet_rq_unmap(rq, rq->last_dma, 0);
965 rq->last_dma = NULL;
966 }
967
968 dma->len = alloc_frag->size - sizeof(*dma);
969
970 addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1,
971 dma->len, DMA_FROM_DEVICE, 0);
972 if (virtqueue_dma_mapping_error(rq->vq, addr))
973 return NULL;
974
975 dma->addr = addr;
976 dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr);
977
978 /* Add a reference to dma to prevent the entire dma from
979 * being released during error handling. This reference
980 * will be freed after the pages are no longer used.
981 */
982 get_page(alloc_frag->page);
983 dma->ref = 1;
984 alloc_frag->offset = sizeof(*dma);
985
986 rq->last_dma = dma;
987 }
988
989 ++dma->ref;
990 }
991
992 buf = head + alloc_frag->offset;
993
994 get_page(alloc_frag->page);
995 alloc_frag->offset += size;
996
997 return buf;
998 }
999
virtnet_rq_unmap_free_buf(struct virtqueue * vq,void * buf)1000 static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf)
1001 {
1002 struct virtnet_info *vi = vq->vdev->priv;
1003 struct receive_queue *rq;
1004 int i = vq2rxq(vq);
1005
1006 rq = &vi->rq[i];
1007
1008 if (rq->xsk_pool) {
1009 xsk_buff_free((struct xdp_buff *)buf);
1010 return;
1011 }
1012
1013 if (rq->do_dma)
1014 virtnet_rq_unmap(rq, buf, 0);
1015
1016 virtnet_rq_free_buf(vi, rq, buf);
1017 }
1018
free_old_xmit(struct send_queue * sq,struct netdev_queue * txq,bool in_napi)1019 static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
1020 bool in_napi)
1021 {
1022 struct virtnet_sq_free_stats stats = {0};
1023
1024 __free_old_xmit(sq, txq, in_napi, &stats);
1025
1026 /* Avoid overhead when no packets have been processed
1027 * happens when called speculatively from start_xmit.
1028 */
1029 if (!stats.packets && !stats.napi_packets)
1030 return;
1031
1032 u64_stats_update_begin(&sq->stats.syncp);
1033 u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes);
1034 u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets);
1035 u64_stats_update_end(&sq->stats.syncp);
1036 }
1037
is_xdp_raw_buffer_queue(struct virtnet_info * vi,int q)1038 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
1039 {
1040 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
1041 return false;
1042 else if (q < vi->curr_queue_pairs)
1043 return true;
1044 else
1045 return false;
1046 }
1047
check_sq_full_and_disable(struct virtnet_info * vi,struct net_device * dev,struct send_queue * sq)1048 static void check_sq_full_and_disable(struct virtnet_info *vi,
1049 struct net_device *dev,
1050 struct send_queue *sq)
1051 {
1052 bool use_napi = sq->napi.weight;
1053 int qnum;
1054
1055 qnum = sq - vi->sq;
1056
1057 /* If running out of space, stop queue to avoid getting packets that we
1058 * are then unable to transmit.
1059 * An alternative would be to force queuing layer to requeue the skb by
1060 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
1061 * returned in a normal path of operation: it means that driver is not
1062 * maintaining the TX queue stop/start state properly, and causes
1063 * the stack to do a non-trivial amount of useless work.
1064 * Since most packets only take 1 or 2 ring slots, stopping the queue
1065 * early means 16 slots are typically wasted.
1066 */
1067 if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
1068 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
1069
1070 netif_tx_stop_queue(txq);
1071 u64_stats_update_begin(&sq->stats.syncp);
1072 u64_stats_inc(&sq->stats.stop);
1073 u64_stats_update_end(&sq->stats.syncp);
1074 if (use_napi) {
1075 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
1076 virtqueue_napi_schedule(&sq->napi, sq->vq);
1077 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
1078 /* More just got used, free them then recheck. */
1079 free_old_xmit(sq, txq, false);
1080 if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
1081 netif_start_subqueue(dev, qnum);
1082 u64_stats_update_begin(&sq->stats.syncp);
1083 u64_stats_inc(&sq->stats.wake);
1084 u64_stats_update_end(&sq->stats.syncp);
1085 virtqueue_disable_cb(sq->vq);
1086 }
1087 }
1088 }
1089 }
1090
sg_fill_dma(struct scatterlist * sg,dma_addr_t addr,u32 len)1091 static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
1092 {
1093 sg->dma_address = addr;
1094 sg->length = len;
1095 }
1096
buf_to_xdp(struct virtnet_info * vi,struct receive_queue * rq,void * buf,u32 len)1097 static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi,
1098 struct receive_queue *rq, void *buf, u32 len)
1099 {
1100 struct xdp_buff *xdp;
1101 u32 bufsize;
1102
1103 xdp = (struct xdp_buff *)buf;
1104
1105 bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len;
1106
1107 if (unlikely(len > bufsize)) {
1108 pr_debug("%s: rx error: len %u exceeds truesize %u\n",
1109 vi->dev->name, len, bufsize);
1110 DEV_STATS_INC(vi->dev, rx_length_errors);
1111 xsk_buff_free(xdp);
1112 return NULL;
1113 }
1114
1115 xsk_buff_set_size(xdp, len);
1116 xsk_buff_dma_sync_for_cpu(xdp);
1117
1118 return xdp;
1119 }
1120
xsk_construct_skb(struct receive_queue * rq,struct xdp_buff * xdp)1121 static struct sk_buff *xsk_construct_skb(struct receive_queue *rq,
1122 struct xdp_buff *xdp)
1123 {
1124 unsigned int metasize = xdp->data - xdp->data_meta;
1125 struct sk_buff *skb;
1126 unsigned int size;
1127
1128 size = xdp->data_end - xdp->data_hard_start;
1129 skb = napi_alloc_skb(&rq->napi, size);
1130 if (unlikely(!skb)) {
1131 xsk_buff_free(xdp);
1132 return NULL;
1133 }
1134
1135 skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
1136
1137 size = xdp->data_end - xdp->data_meta;
1138 memcpy(__skb_put(skb, size), xdp->data_meta, size);
1139
1140 if (metasize) {
1141 __skb_pull(skb, metasize);
1142 skb_metadata_set(skb, metasize);
1143 }
1144
1145 xsk_buff_free(xdp);
1146
1147 return skb;
1148 }
1149
virtnet_receive_xsk_small(struct net_device * dev,struct virtnet_info * vi,struct receive_queue * rq,struct xdp_buff * xdp,unsigned int * xdp_xmit,struct virtnet_rq_stats * stats)1150 static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi,
1151 struct receive_queue *rq, struct xdp_buff *xdp,
1152 unsigned int *xdp_xmit,
1153 struct virtnet_rq_stats *stats)
1154 {
1155 struct bpf_prog *prog;
1156 u32 ret;
1157
1158 ret = XDP_PASS;
1159 rcu_read_lock();
1160 prog = rcu_dereference(rq->xdp_prog);
1161 if (prog)
1162 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
1163 rcu_read_unlock();
1164
1165 switch (ret) {
1166 case XDP_PASS:
1167 return xsk_construct_skb(rq, xdp);
1168
1169 case XDP_TX:
1170 case XDP_REDIRECT:
1171 return NULL;
1172
1173 default:
1174 /* drop packet */
1175 xsk_buff_free(xdp);
1176 u64_stats_inc(&stats->drops);
1177 return NULL;
1178 }
1179 }
1180
xsk_drop_follow_bufs(struct net_device * dev,struct receive_queue * rq,u32 num_buf,struct virtnet_rq_stats * stats)1181 static void xsk_drop_follow_bufs(struct net_device *dev,
1182 struct receive_queue *rq,
1183 u32 num_buf,
1184 struct virtnet_rq_stats *stats)
1185 {
1186 struct xdp_buff *xdp;
1187 u32 len;
1188
1189 while (num_buf-- > 1) {
1190 xdp = virtqueue_get_buf(rq->vq, &len);
1191 if (unlikely(!xdp)) {
1192 pr_debug("%s: rx error: %d buffers missing\n",
1193 dev->name, num_buf);
1194 DEV_STATS_INC(dev, rx_length_errors);
1195 break;
1196 }
1197 u64_stats_add(&stats->bytes, len);
1198 xsk_buff_free(xdp);
1199 }
1200 }
1201
xsk_append_merge_buffer(struct virtnet_info * vi,struct receive_queue * rq,struct sk_buff * head_skb,u32 num_buf,struct virtio_net_hdr_mrg_rxbuf * hdr,struct virtnet_rq_stats * stats)1202 static int xsk_append_merge_buffer(struct virtnet_info *vi,
1203 struct receive_queue *rq,
1204 struct sk_buff *head_skb,
1205 u32 num_buf,
1206 struct virtio_net_hdr_mrg_rxbuf *hdr,
1207 struct virtnet_rq_stats *stats)
1208 {
1209 struct sk_buff *curr_skb;
1210 struct xdp_buff *xdp;
1211 u32 len, truesize;
1212 struct page *page;
1213 void *buf;
1214
1215 curr_skb = head_skb;
1216
1217 while (--num_buf) {
1218 buf = virtqueue_get_buf(rq->vq, &len);
1219 if (unlikely(!buf)) {
1220 pr_debug("%s: rx error: %d buffers out of %d missing\n",
1221 vi->dev->name, num_buf,
1222 virtio16_to_cpu(vi->vdev,
1223 hdr->num_buffers));
1224 DEV_STATS_INC(vi->dev, rx_length_errors);
1225 return -EINVAL;
1226 }
1227
1228 u64_stats_add(&stats->bytes, len);
1229
1230 xdp = buf_to_xdp(vi, rq, buf, len);
1231 if (!xdp)
1232 goto err;
1233
1234 buf = napi_alloc_frag(len);
1235 if (!buf) {
1236 xsk_buff_free(xdp);
1237 goto err;
1238 }
1239
1240 memcpy(buf, xdp->data - vi->hdr_len, len);
1241
1242 xsk_buff_free(xdp);
1243
1244 page = virt_to_page(buf);
1245
1246 truesize = len;
1247
1248 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page,
1249 buf, len, truesize);
1250 if (!curr_skb) {
1251 put_page(page);
1252 goto err;
1253 }
1254 }
1255
1256 return 0;
1257
1258 err:
1259 xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats);
1260 return -EINVAL;
1261 }
1262
virtnet_receive_xsk_merge(struct net_device * dev,struct virtnet_info * vi,struct receive_queue * rq,struct xdp_buff * xdp,unsigned int * xdp_xmit,struct virtnet_rq_stats * stats)1263 static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi,
1264 struct receive_queue *rq, struct xdp_buff *xdp,
1265 unsigned int *xdp_xmit,
1266 struct virtnet_rq_stats *stats)
1267 {
1268 struct virtio_net_hdr_mrg_rxbuf *hdr;
1269 struct bpf_prog *prog;
1270 struct sk_buff *skb;
1271 u32 ret, num_buf;
1272
1273 hdr = xdp->data - vi->hdr_len;
1274 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
1275
1276 ret = XDP_PASS;
1277 rcu_read_lock();
1278 prog = rcu_dereference(rq->xdp_prog);
1279 /* TODO: support multi buffer. */
1280 if (prog && num_buf == 1)
1281 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
1282 rcu_read_unlock();
1283
1284 switch (ret) {
1285 case XDP_PASS:
1286 skb = xsk_construct_skb(rq, xdp);
1287 if (!skb)
1288 goto drop_bufs;
1289
1290 if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) {
1291 dev_kfree_skb(skb);
1292 goto drop;
1293 }
1294
1295 return skb;
1296
1297 case XDP_TX:
1298 case XDP_REDIRECT:
1299 return NULL;
1300
1301 default:
1302 /* drop packet */
1303 xsk_buff_free(xdp);
1304 }
1305
1306 drop_bufs:
1307 xsk_drop_follow_bufs(dev, rq, num_buf, stats);
1308
1309 drop:
1310 u64_stats_inc(&stats->drops);
1311 return NULL;
1312 }
1313
virtnet_receive_xsk_buf(struct virtnet_info * vi,struct receive_queue * rq,void * buf,u32 len,unsigned int * xdp_xmit,struct virtnet_rq_stats * stats)1314 static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq,
1315 void *buf, u32 len,
1316 unsigned int *xdp_xmit,
1317 struct virtnet_rq_stats *stats)
1318 {
1319 struct net_device *dev = vi->dev;
1320 struct sk_buff *skb = NULL;
1321 struct xdp_buff *xdp;
1322 u8 flags;
1323
1324 len -= vi->hdr_len;
1325
1326 u64_stats_add(&stats->bytes, len);
1327
1328 xdp = buf_to_xdp(vi, rq, buf, len);
1329 if (!xdp)
1330 return;
1331
1332 if (unlikely(len < ETH_HLEN)) {
1333 pr_debug("%s: short packet %i\n", dev->name, len);
1334 DEV_STATS_INC(dev, rx_length_errors);
1335 xsk_buff_free(xdp);
1336 return;
1337 }
1338
1339 flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags;
1340
1341 if (!vi->mergeable_rx_bufs)
1342 skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats);
1343 else
1344 skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats);
1345
1346 if (skb)
1347 virtnet_receive_done(vi, rq, skb, flags);
1348 }
1349
virtnet_add_recvbuf_xsk(struct virtnet_info * vi,struct receive_queue * rq,struct xsk_buff_pool * pool,gfp_t gfp)1350 static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq,
1351 struct xsk_buff_pool *pool, gfp_t gfp)
1352 {
1353 struct xdp_buff **xsk_buffs;
1354 dma_addr_t addr;
1355 int err = 0;
1356 u32 len, i;
1357 int num;
1358
1359 xsk_buffs = rq->xsk_buffs;
1360
1361 num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free);
1362 if (!num)
1363 return -ENOMEM;
1364
1365 len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len;
1366
1367 for (i = 0; i < num; ++i) {
1368 /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space.
1369 * We assume XDP_PACKET_HEADROOM is larger than hdr->len.
1370 * (see function virtnet_xsk_pool_enable)
1371 */
1372 addr = xsk_buff_xdp_get_dma(xsk_buffs[i]) - vi->hdr_len;
1373
1374 sg_init_table(rq->sg, 1);
1375 sg_fill_dma(rq->sg, addr, len);
1376
1377 err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, xsk_buffs[i], gfp);
1378 if (err)
1379 goto err;
1380 }
1381
1382 return num;
1383
1384 err:
1385 for (; i < num; ++i)
1386 xsk_buff_free(xsk_buffs[i]);
1387
1388 return err;
1389 }
1390
virtnet_xsk_wakeup(struct net_device * dev,u32 qid,u32 flag)1391 static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag)
1392 {
1393 struct virtnet_info *vi = netdev_priv(dev);
1394 struct send_queue *sq;
1395
1396 if (!netif_running(dev))
1397 return -ENETDOWN;
1398
1399 if (qid >= vi->curr_queue_pairs)
1400 return -EINVAL;
1401
1402 sq = &vi->sq[qid];
1403
1404 if (napi_if_scheduled_mark_missed(&sq->napi))
1405 return 0;
1406
1407 local_bh_disable();
1408 virtqueue_napi_schedule(&sq->napi, sq->vq);
1409 local_bh_enable();
1410
1411 return 0;
1412 }
1413
__virtnet_xdp_xmit_one(struct virtnet_info * vi,struct send_queue * sq,struct xdp_frame * xdpf)1414 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
1415 struct send_queue *sq,
1416 struct xdp_frame *xdpf)
1417 {
1418 struct virtio_net_hdr_mrg_rxbuf *hdr;
1419 struct skb_shared_info *shinfo;
1420 u8 nr_frags = 0;
1421 int err, i;
1422
1423 if (unlikely(xdpf->headroom < vi->hdr_len))
1424 return -EOVERFLOW;
1425
1426 if (unlikely(xdp_frame_has_frags(xdpf))) {
1427 shinfo = xdp_get_shared_info_from_frame(xdpf);
1428 nr_frags = shinfo->nr_frags;
1429 }
1430
1431 /* In wrapping function virtnet_xdp_xmit(), we need to free
1432 * up the pending old buffers, where we need to calculate the
1433 * position of skb_shared_info in xdp_get_frame_len() and
1434 * xdp_return_frame(), which will involve to xdpf->data and
1435 * xdpf->headroom. Therefore, we need to update the value of
1436 * headroom synchronously here.
1437 */
1438 xdpf->headroom -= vi->hdr_len;
1439 xdpf->data -= vi->hdr_len;
1440 /* Zero header and leave csum up to XDP layers */
1441 hdr = xdpf->data;
1442 memset(hdr, 0, vi->hdr_len);
1443 xdpf->len += vi->hdr_len;
1444
1445 sg_init_table(sq->sg, nr_frags + 1);
1446 sg_set_buf(sq->sg, xdpf->data, xdpf->len);
1447 for (i = 0; i < nr_frags; i++) {
1448 skb_frag_t *frag = &shinfo->frags[i];
1449
1450 sg_set_page(&sq->sg[i + 1], skb_frag_page(frag),
1451 skb_frag_size(frag), skb_frag_off(frag));
1452 }
1453
1454 err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1,
1455 xdp_to_ptr(xdpf), GFP_ATOMIC);
1456 if (unlikely(err))
1457 return -ENOSPC; /* Caller handle free/refcnt */
1458
1459 return 0;
1460 }
1461
1462 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on
1463 * the current cpu, so it does not need to be locked.
1464 *
1465 * Here we use marco instead of inline functions because we have to deal with
1466 * three issues at the same time: 1. the choice of sq. 2. judge and execute the
1467 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline
1468 * functions to perfectly solve these three problems at the same time.
1469 */
1470 #define virtnet_xdp_get_sq(vi) ({ \
1471 int cpu = smp_processor_id(); \
1472 struct netdev_queue *txq; \
1473 typeof(vi) v = (vi); \
1474 unsigned int qp; \
1475 \
1476 if (v->curr_queue_pairs > nr_cpu_ids) { \
1477 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \
1478 qp += cpu; \
1479 txq = netdev_get_tx_queue(v->dev, qp); \
1480 __netif_tx_acquire(txq); \
1481 } else { \
1482 qp = cpu % v->curr_queue_pairs; \
1483 txq = netdev_get_tx_queue(v->dev, qp); \
1484 __netif_tx_lock(txq, cpu); \
1485 } \
1486 v->sq + qp; \
1487 })
1488
1489 #define virtnet_xdp_put_sq(vi, q) { \
1490 struct netdev_queue *txq; \
1491 typeof(vi) v = (vi); \
1492 \
1493 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \
1494 if (v->curr_queue_pairs > nr_cpu_ids) \
1495 __netif_tx_release(txq); \
1496 else \
1497 __netif_tx_unlock(txq); \
1498 }
1499
virtnet_xdp_xmit(struct net_device * dev,int n,struct xdp_frame ** frames,u32 flags)1500 static int virtnet_xdp_xmit(struct net_device *dev,
1501 int n, struct xdp_frame **frames, u32 flags)
1502 {
1503 struct virtnet_info *vi = netdev_priv(dev);
1504 struct virtnet_sq_free_stats stats = {0};
1505 struct receive_queue *rq = vi->rq;
1506 struct bpf_prog *xdp_prog;
1507 struct send_queue *sq;
1508 int nxmit = 0;
1509 int kicks = 0;
1510 int ret;
1511 int i;
1512
1513 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
1514 * indicate XDP resources have been successfully allocated.
1515 */
1516 xdp_prog = rcu_access_pointer(rq->xdp_prog);
1517 if (!xdp_prog)
1518 return -ENXIO;
1519
1520 sq = virtnet_xdp_get_sq(vi);
1521
1522 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
1523 ret = -EINVAL;
1524 goto out;
1525 }
1526
1527 /* Free up any pending old buffers before queueing new ones. */
1528 __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq),
1529 false, &stats);
1530
1531 for (i = 0; i < n; i++) {
1532 struct xdp_frame *xdpf = frames[i];
1533
1534 if (__virtnet_xdp_xmit_one(vi, sq, xdpf))
1535 break;
1536 nxmit++;
1537 }
1538 ret = nxmit;
1539
1540 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq))
1541 check_sq_full_and_disable(vi, dev, sq);
1542
1543 if (flags & XDP_XMIT_FLUSH) {
1544 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq))
1545 kicks = 1;
1546 }
1547 out:
1548 u64_stats_update_begin(&sq->stats.syncp);
1549 u64_stats_add(&sq->stats.bytes, stats.bytes);
1550 u64_stats_add(&sq->stats.packets, stats.packets);
1551 u64_stats_add(&sq->stats.xdp_tx, n);
1552 u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit);
1553 u64_stats_add(&sq->stats.kicks, kicks);
1554 u64_stats_update_end(&sq->stats.syncp);
1555
1556 virtnet_xdp_put_sq(vi, sq);
1557 return ret;
1558 }
1559
put_xdp_frags(struct xdp_buff * xdp)1560 static void put_xdp_frags(struct xdp_buff *xdp)
1561 {
1562 struct skb_shared_info *shinfo;
1563 struct page *xdp_page;
1564 int i;
1565
1566 if (xdp_buff_has_frags(xdp)) {
1567 shinfo = xdp_get_shared_info_from_buff(xdp);
1568 for (i = 0; i < shinfo->nr_frags; i++) {
1569 xdp_page = skb_frag_page(&shinfo->frags[i]);
1570 put_page(xdp_page);
1571 }
1572 }
1573 }
1574
virtnet_xdp_handler(struct bpf_prog * xdp_prog,struct xdp_buff * xdp,struct net_device * dev,unsigned int * xdp_xmit,struct virtnet_rq_stats * stats)1575 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
1576 struct net_device *dev,
1577 unsigned int *xdp_xmit,
1578 struct virtnet_rq_stats *stats)
1579 {
1580 struct xdp_frame *xdpf;
1581 int err;
1582 u32 act;
1583
1584 act = bpf_prog_run_xdp(xdp_prog, xdp);
1585 u64_stats_inc(&stats->xdp_packets);
1586
1587 switch (act) {
1588 case XDP_PASS:
1589 return act;
1590
1591 case XDP_TX:
1592 u64_stats_inc(&stats->xdp_tx);
1593 xdpf = xdp_convert_buff_to_frame(xdp);
1594 if (unlikely(!xdpf)) {
1595 netdev_dbg(dev, "convert buff to frame failed for xdp\n");
1596 return XDP_DROP;
1597 }
1598
1599 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
1600 if (unlikely(!err)) {
1601 xdp_return_frame_rx_napi(xdpf);
1602 } else if (unlikely(err < 0)) {
1603 trace_xdp_exception(dev, xdp_prog, act);
1604 return XDP_DROP;
1605 }
1606 *xdp_xmit |= VIRTIO_XDP_TX;
1607 return act;
1608
1609 case XDP_REDIRECT:
1610 u64_stats_inc(&stats->xdp_redirects);
1611 err = xdp_do_redirect(dev, xdp, xdp_prog);
1612 if (err)
1613 return XDP_DROP;
1614
1615 *xdp_xmit |= VIRTIO_XDP_REDIR;
1616 return act;
1617
1618 default:
1619 bpf_warn_invalid_xdp_action(dev, xdp_prog, act);
1620 fallthrough;
1621 case XDP_ABORTED:
1622 trace_xdp_exception(dev, xdp_prog, act);
1623 fallthrough;
1624 case XDP_DROP:
1625 return XDP_DROP;
1626 }
1627 }
1628
virtnet_get_headroom(struct virtnet_info * vi)1629 static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
1630 {
1631 return vi->xdp_enabled ? XDP_PACKET_HEADROOM : 0;
1632 }
1633
1634 /* We copy the packet for XDP in the following cases:
1635 *
1636 * 1) Packet is scattered across multiple rx buffers.
1637 * 2) Headroom space is insufficient.
1638 *
1639 * This is inefficient but it's a temporary condition that
1640 * we hit right after XDP is enabled and until queue is refilled
1641 * with large buffers with sufficient headroom - so it should affect
1642 * at most queue size packets.
1643 * Afterwards, the conditions to enable
1644 * XDP should preclude the underlying device from sending packets
1645 * across multiple buffers (num_buf > 1), and we make sure buffers
1646 * have enough headroom.
1647 */
xdp_linearize_page(struct receive_queue * rq,int * num_buf,struct page * p,int offset,int page_off,unsigned int * len)1648 static struct page *xdp_linearize_page(struct receive_queue *rq,
1649 int *num_buf,
1650 struct page *p,
1651 int offset,
1652 int page_off,
1653 unsigned int *len)
1654 {
1655 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1656 struct page *page;
1657
1658 if (page_off + *len + tailroom > PAGE_SIZE)
1659 return NULL;
1660
1661 page = alloc_page(GFP_ATOMIC);
1662 if (!page)
1663 return NULL;
1664
1665 memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
1666 page_off += *len;
1667
1668 while (--*num_buf) {
1669 unsigned int buflen;
1670 void *buf;
1671 int off;
1672
1673 buf = virtnet_rq_get_buf(rq, &buflen, NULL);
1674 if (unlikely(!buf))
1675 goto err_buf;
1676
1677 p = virt_to_head_page(buf);
1678 off = buf - page_address(p);
1679
1680 /* guard against a misconfigured or uncooperative backend that
1681 * is sending packet larger than the MTU.
1682 */
1683 if ((page_off + buflen + tailroom) > PAGE_SIZE) {
1684 put_page(p);
1685 goto err_buf;
1686 }
1687
1688 memcpy(page_address(page) + page_off,
1689 page_address(p) + off, buflen);
1690 page_off += buflen;
1691 put_page(p);
1692 }
1693
1694 /* Headroom does not contribute to packet length */
1695 *len = page_off - XDP_PACKET_HEADROOM;
1696 return page;
1697 err_buf:
1698 __free_pages(page, 0);
1699 return NULL;
1700 }
1701
receive_small_build_skb(struct virtnet_info * vi,unsigned int xdp_headroom,void * buf,unsigned int len)1702 static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi,
1703 unsigned int xdp_headroom,
1704 void *buf,
1705 unsigned int len)
1706 {
1707 unsigned int header_offset;
1708 unsigned int headroom;
1709 unsigned int buflen;
1710 struct sk_buff *skb;
1711
1712 header_offset = VIRTNET_RX_PAD + xdp_headroom;
1713 headroom = vi->hdr_len + header_offset;
1714 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
1715 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1716
1717 skb = virtnet_build_skb(buf, buflen, headroom, len);
1718 if (unlikely(!skb))
1719 return NULL;
1720
1721 buf += header_offset;
1722 memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len);
1723
1724 return skb;
1725 }
1726
receive_small_xdp(struct net_device * dev,struct virtnet_info * vi,struct receive_queue * rq,struct bpf_prog * xdp_prog,void * buf,unsigned int xdp_headroom,unsigned int len,unsigned int * xdp_xmit,struct virtnet_rq_stats * stats)1727 static struct sk_buff *receive_small_xdp(struct net_device *dev,
1728 struct virtnet_info *vi,
1729 struct receive_queue *rq,
1730 struct bpf_prog *xdp_prog,
1731 void *buf,
1732 unsigned int xdp_headroom,
1733 unsigned int len,
1734 unsigned int *xdp_xmit,
1735 struct virtnet_rq_stats *stats)
1736 {
1737 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
1738 unsigned int headroom = vi->hdr_len + header_offset;
1739 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
1740 struct page *page = virt_to_head_page(buf);
1741 struct page *xdp_page;
1742 unsigned int buflen;
1743 struct xdp_buff xdp;
1744 struct sk_buff *skb;
1745 unsigned int metasize = 0;
1746 u32 act;
1747
1748 if (unlikely(hdr->hdr.gso_type))
1749 goto err_xdp;
1750
1751 /* Partially checksummed packets must be dropped. */
1752 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM))
1753 goto err_xdp;
1754
1755 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
1756 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1757
1758 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
1759 int offset = buf - page_address(page) + header_offset;
1760 unsigned int tlen = len + vi->hdr_len;
1761 int num_buf = 1;
1762
1763 xdp_headroom = virtnet_get_headroom(vi);
1764 header_offset = VIRTNET_RX_PAD + xdp_headroom;
1765 headroom = vi->hdr_len + header_offset;
1766 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
1767 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1768 xdp_page = xdp_linearize_page(rq, &num_buf, page,
1769 offset, header_offset,
1770 &tlen);
1771 if (!xdp_page)
1772 goto err_xdp;
1773
1774 buf = page_address(xdp_page);
1775 put_page(page);
1776 page = xdp_page;
1777 }
1778
1779 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq);
1780 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len,
1781 xdp_headroom, len, true);
1782
1783 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats);
1784
1785 switch (act) {
1786 case XDP_PASS:
1787 /* Recalculate length in case bpf program changed it */
1788 len = xdp.data_end - xdp.data;
1789 metasize = xdp.data - xdp.data_meta;
1790 break;
1791
1792 case XDP_TX:
1793 case XDP_REDIRECT:
1794 goto xdp_xmit;
1795
1796 default:
1797 goto err_xdp;
1798 }
1799
1800 skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len);
1801 if (unlikely(!skb))
1802 goto err;
1803
1804 if (metasize)
1805 skb_metadata_set(skb, metasize);
1806
1807 return skb;
1808
1809 err_xdp:
1810 u64_stats_inc(&stats->xdp_drops);
1811 err:
1812 u64_stats_inc(&stats->drops);
1813 put_page(page);
1814 xdp_xmit:
1815 return NULL;
1816 }
1817
receive_small(struct net_device * dev,struct virtnet_info * vi,struct receive_queue * rq,void * buf,void * ctx,unsigned int len,unsigned int * xdp_xmit,struct virtnet_rq_stats * stats)1818 static struct sk_buff *receive_small(struct net_device *dev,
1819 struct virtnet_info *vi,
1820 struct receive_queue *rq,
1821 void *buf, void *ctx,
1822 unsigned int len,
1823 unsigned int *xdp_xmit,
1824 struct virtnet_rq_stats *stats)
1825 {
1826 unsigned int xdp_headroom = (unsigned long)ctx;
1827 struct page *page = virt_to_head_page(buf);
1828 struct sk_buff *skb;
1829
1830 /* We passed the address of virtnet header to virtio-core,
1831 * so truncate the padding.
1832 */
1833 buf -= VIRTNET_RX_PAD + xdp_headroom;
1834
1835 len -= vi->hdr_len;
1836 u64_stats_add(&stats->bytes, len);
1837
1838 if (unlikely(len > GOOD_PACKET_LEN)) {
1839 pr_debug("%s: rx error: len %u exceeds max size %d\n",
1840 dev->name, len, GOOD_PACKET_LEN);
1841 DEV_STATS_INC(dev, rx_length_errors);
1842 goto err;
1843 }
1844
1845 if (unlikely(vi->xdp_enabled)) {
1846 struct bpf_prog *xdp_prog;
1847
1848 rcu_read_lock();
1849 xdp_prog = rcu_dereference(rq->xdp_prog);
1850 if (xdp_prog) {
1851 skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf,
1852 xdp_headroom, len, xdp_xmit,
1853 stats);
1854 rcu_read_unlock();
1855 return skb;
1856 }
1857 rcu_read_unlock();
1858 }
1859
1860 skb = receive_small_build_skb(vi, xdp_headroom, buf, len);
1861 if (likely(skb))
1862 return skb;
1863
1864 err:
1865 u64_stats_inc(&stats->drops);
1866 put_page(page);
1867 return NULL;
1868 }
1869
receive_big(struct net_device * dev,struct virtnet_info * vi,struct receive_queue * rq,void * buf,unsigned int len,struct virtnet_rq_stats * stats)1870 static struct sk_buff *receive_big(struct net_device *dev,
1871 struct virtnet_info *vi,
1872 struct receive_queue *rq,
1873 void *buf,
1874 unsigned int len,
1875 struct virtnet_rq_stats *stats)
1876 {
1877 struct page *page = buf;
1878 struct sk_buff *skb =
1879 page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0);
1880
1881 u64_stats_add(&stats->bytes, len - vi->hdr_len);
1882 if (unlikely(!skb))
1883 goto err;
1884
1885 return skb;
1886
1887 err:
1888 u64_stats_inc(&stats->drops);
1889 give_pages(rq, page);
1890 return NULL;
1891 }
1892
mergeable_buf_free(struct receive_queue * rq,int num_buf,struct net_device * dev,struct virtnet_rq_stats * stats)1893 static void mergeable_buf_free(struct receive_queue *rq, int num_buf,
1894 struct net_device *dev,
1895 struct virtnet_rq_stats *stats)
1896 {
1897 struct page *page;
1898 void *buf;
1899 int len;
1900
1901 while (num_buf-- > 1) {
1902 buf = virtnet_rq_get_buf(rq, &len, NULL);
1903 if (unlikely(!buf)) {
1904 pr_debug("%s: rx error: %d buffers missing\n",
1905 dev->name, num_buf);
1906 DEV_STATS_INC(dev, rx_length_errors);
1907 break;
1908 }
1909 u64_stats_add(&stats->bytes, len);
1910 page = virt_to_head_page(buf);
1911 put_page(page);
1912 }
1913 }
1914
1915 /* Why not use xdp_build_skb_from_frame() ?
1916 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in
1917 * virtio-net there are 2 points that do not match its requirements:
1918 * 1. The size of the prefilled buffer is not fixed before xdp is set.
1919 * 2. xdp_build_skb_from_frame() does more checks that we don't need,
1920 * like eth_type_trans() (which virtio-net does in receive_buf()).
1921 */
build_skb_from_xdp_buff(struct net_device * dev,struct virtnet_info * vi,struct xdp_buff * xdp,unsigned int xdp_frags_truesz)1922 static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev,
1923 struct virtnet_info *vi,
1924 struct xdp_buff *xdp,
1925 unsigned int xdp_frags_truesz)
1926 {
1927 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
1928 unsigned int headroom, data_len;
1929 struct sk_buff *skb;
1930 int metasize;
1931 u8 nr_frags;
1932
1933 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) {
1934 pr_debug("Error building skb as missing reserved tailroom for xdp");
1935 return NULL;
1936 }
1937
1938 if (unlikely(xdp_buff_has_frags(xdp)))
1939 nr_frags = sinfo->nr_frags;
1940
1941 skb = build_skb(xdp->data_hard_start, xdp->frame_sz);
1942 if (unlikely(!skb))
1943 return NULL;
1944
1945 headroom = xdp->data - xdp->data_hard_start;
1946 data_len = xdp->data_end - xdp->data;
1947 skb_reserve(skb, headroom);
1948 __skb_put(skb, data_len);
1949
1950 metasize = xdp->data - xdp->data_meta;
1951 metasize = metasize > 0 ? metasize : 0;
1952 if (metasize)
1953 skb_metadata_set(skb, metasize);
1954
1955 if (unlikely(xdp_buff_has_frags(xdp)))
1956 xdp_update_skb_shared_info(skb, nr_frags,
1957 sinfo->xdp_frags_size,
1958 xdp_frags_truesz,
1959 xdp_buff_is_frag_pfmemalloc(xdp));
1960
1961 return skb;
1962 }
1963
1964 /* TODO: build xdp in big mode */
virtnet_build_xdp_buff_mrg(struct net_device * dev,struct virtnet_info * vi,struct receive_queue * rq,struct xdp_buff * xdp,void * buf,unsigned int len,unsigned int frame_sz,int * num_buf,unsigned int * xdp_frags_truesize,struct virtnet_rq_stats * stats)1965 static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
1966 struct virtnet_info *vi,
1967 struct receive_queue *rq,
1968 struct xdp_buff *xdp,
1969 void *buf,
1970 unsigned int len,
1971 unsigned int frame_sz,
1972 int *num_buf,
1973 unsigned int *xdp_frags_truesize,
1974 struct virtnet_rq_stats *stats)
1975 {
1976 struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
1977 unsigned int headroom, tailroom, room;
1978 unsigned int truesize, cur_frag_size;
1979 struct skb_shared_info *shinfo;
1980 unsigned int xdp_frags_truesz = 0;
1981 struct page *page;
1982 skb_frag_t *frag;
1983 int offset;
1984 void *ctx;
1985
1986 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq);
1987 xdp_prepare_buff(xdp, buf - XDP_PACKET_HEADROOM,
1988 XDP_PACKET_HEADROOM + vi->hdr_len, len - vi->hdr_len, true);
1989
1990 if (!*num_buf)
1991 return 0;
1992
1993 if (*num_buf > 1) {
1994 /* If we want to build multi-buffer xdp, we need
1995 * to specify that the flags of xdp_buff have the
1996 * XDP_FLAGS_HAS_FRAG bit.
1997 */
1998 if (!xdp_buff_has_frags(xdp))
1999 xdp_buff_set_frags_flag(xdp);
2000
2001 shinfo = xdp_get_shared_info_from_buff(xdp);
2002 shinfo->nr_frags = 0;
2003 shinfo->xdp_frags_size = 0;
2004 }
2005
2006 if (*num_buf > MAX_SKB_FRAGS + 1)
2007 return -EINVAL;
2008
2009 while (--*num_buf > 0) {
2010 buf = virtnet_rq_get_buf(rq, &len, &ctx);
2011 if (unlikely(!buf)) {
2012 pr_debug("%s: rx error: %d buffers out of %d missing\n",
2013 dev->name, *num_buf,
2014 virtio16_to_cpu(vi->vdev, hdr->num_buffers));
2015 DEV_STATS_INC(dev, rx_length_errors);
2016 goto err;
2017 }
2018
2019 u64_stats_add(&stats->bytes, len);
2020 page = virt_to_head_page(buf);
2021 offset = buf - page_address(page);
2022
2023 truesize = mergeable_ctx_to_truesize(ctx);
2024 headroom = mergeable_ctx_to_headroom(ctx);
2025 tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
2026 room = SKB_DATA_ALIGN(headroom + tailroom);
2027
2028 cur_frag_size = truesize;
2029 xdp_frags_truesz += cur_frag_size;
2030 if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) {
2031 put_page(page);
2032 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
2033 dev->name, len, (unsigned long)(truesize - room));
2034 DEV_STATS_INC(dev, rx_length_errors);
2035 goto err;
2036 }
2037
2038 frag = &shinfo->frags[shinfo->nr_frags++];
2039 skb_frag_fill_page_desc(frag, page, offset, len);
2040 if (page_is_pfmemalloc(page))
2041 xdp_buff_set_frag_pfmemalloc(xdp);
2042
2043 shinfo->xdp_frags_size += len;
2044 }
2045
2046 *xdp_frags_truesize = xdp_frags_truesz;
2047 return 0;
2048
2049 err:
2050 put_xdp_frags(xdp);
2051 return -EINVAL;
2052 }
2053
mergeable_xdp_get_buf(struct virtnet_info * vi,struct receive_queue * rq,struct bpf_prog * xdp_prog,void * ctx,unsigned int * frame_sz,int * num_buf,struct page ** page,int offset,unsigned int * len,struct virtio_net_hdr_mrg_rxbuf * hdr)2054 static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
2055 struct receive_queue *rq,
2056 struct bpf_prog *xdp_prog,
2057 void *ctx,
2058 unsigned int *frame_sz,
2059 int *num_buf,
2060 struct page **page,
2061 int offset,
2062 unsigned int *len,
2063 struct virtio_net_hdr_mrg_rxbuf *hdr)
2064 {
2065 unsigned int truesize = mergeable_ctx_to_truesize(ctx);
2066 unsigned int headroom = mergeable_ctx_to_headroom(ctx);
2067 struct page *xdp_page;
2068 unsigned int xdp_room;
2069
2070 /* Transient failure which in theory could occur if
2071 * in-flight packets from before XDP was enabled reach
2072 * the receive path after XDP is loaded.
2073 */
2074 if (unlikely(hdr->hdr.gso_type))
2075 return NULL;
2076
2077 /* Partially checksummed packets must be dropped. */
2078 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM))
2079 return NULL;
2080
2081 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers
2082 * with headroom may add hole in truesize, which
2083 * make their length exceed PAGE_SIZE. So we disabled the
2084 * hole mechanism for xdp. See add_recvbuf_mergeable().
2085 */
2086 *frame_sz = truesize;
2087
2088 if (likely(headroom >= virtnet_get_headroom(vi) &&
2089 (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) {
2090 return page_address(*page) + offset;
2091 }
2092
2093 /* This happens when headroom is not enough because
2094 * of the buffer was prefilled before XDP is set.
2095 * This should only happen for the first several packets.
2096 * In fact, vq reset can be used here to help us clean up
2097 * the prefilled buffers, but many existing devices do not
2098 * support it, and we don't want to bother users who are
2099 * using xdp normally.
2100 */
2101 if (!xdp_prog->aux->xdp_has_frags) {
2102 /* linearize data for XDP */
2103 xdp_page = xdp_linearize_page(rq, num_buf,
2104 *page, offset,
2105 XDP_PACKET_HEADROOM,
2106 len);
2107 if (!xdp_page)
2108 return NULL;
2109 } else {
2110 xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM +
2111 sizeof(struct skb_shared_info));
2112 if (*len + xdp_room > PAGE_SIZE)
2113 return NULL;
2114
2115 xdp_page = alloc_page(GFP_ATOMIC);
2116 if (!xdp_page)
2117 return NULL;
2118
2119 memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM,
2120 page_address(*page) + offset, *len);
2121 }
2122
2123 *frame_sz = PAGE_SIZE;
2124
2125 put_page(*page);
2126
2127 *page = xdp_page;
2128
2129 return page_address(*page) + XDP_PACKET_HEADROOM;
2130 }
2131
receive_mergeable_xdp(struct net_device * dev,struct virtnet_info * vi,struct receive_queue * rq,struct bpf_prog * xdp_prog,void * buf,void * ctx,unsigned int len,unsigned int * xdp_xmit,struct virtnet_rq_stats * stats)2132 static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
2133 struct virtnet_info *vi,
2134 struct receive_queue *rq,
2135 struct bpf_prog *xdp_prog,
2136 void *buf,
2137 void *ctx,
2138 unsigned int len,
2139 unsigned int *xdp_xmit,
2140 struct virtnet_rq_stats *stats)
2141 {
2142 struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
2143 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
2144 struct page *page = virt_to_head_page(buf);
2145 int offset = buf - page_address(page);
2146 unsigned int xdp_frags_truesz = 0;
2147 struct sk_buff *head_skb;
2148 unsigned int frame_sz;
2149 struct xdp_buff xdp;
2150 void *data;
2151 u32 act;
2152 int err;
2153
2154 data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page,
2155 offset, &len, hdr);
2156 if (unlikely(!data))
2157 goto err_xdp;
2158
2159 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz,
2160 &num_buf, &xdp_frags_truesz, stats);
2161 if (unlikely(err))
2162 goto err_xdp;
2163
2164 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats);
2165
2166 switch (act) {
2167 case XDP_PASS:
2168 head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz);
2169 if (unlikely(!head_skb))
2170 break;
2171 return head_skb;
2172
2173 case XDP_TX:
2174 case XDP_REDIRECT:
2175 return NULL;
2176
2177 default:
2178 break;
2179 }
2180
2181 put_xdp_frags(&xdp);
2182
2183 err_xdp:
2184 put_page(page);
2185 mergeable_buf_free(rq, num_buf, dev, stats);
2186
2187 u64_stats_inc(&stats->xdp_drops);
2188 u64_stats_inc(&stats->drops);
2189 return NULL;
2190 }
2191
virtnet_skb_append_frag(struct sk_buff * head_skb,struct sk_buff * curr_skb,struct page * page,void * buf,int len,int truesize)2192 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb,
2193 struct sk_buff *curr_skb,
2194 struct page *page, void *buf,
2195 int len, int truesize)
2196 {
2197 int num_skb_frags;
2198 int offset;
2199
2200 num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
2201 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
2202 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);
2203
2204 if (unlikely(!nskb))
2205 return NULL;
2206
2207 if (curr_skb == head_skb)
2208 skb_shinfo(curr_skb)->frag_list = nskb;
2209 else
2210 curr_skb->next = nskb;
2211 curr_skb = nskb;
2212 head_skb->truesize += nskb->truesize;
2213 num_skb_frags = 0;
2214 }
2215
2216 if (curr_skb != head_skb) {
2217 head_skb->data_len += len;
2218 head_skb->len += len;
2219 head_skb->truesize += truesize;
2220 }
2221
2222 offset = buf - page_address(page);
2223 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
2224 put_page(page);
2225 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
2226 len, truesize);
2227 } else {
2228 skb_add_rx_frag(curr_skb, num_skb_frags, page,
2229 offset, len, truesize);
2230 }
2231
2232 return curr_skb;
2233 }
2234
receive_mergeable(struct net_device * dev,struct virtnet_info * vi,struct receive_queue * rq,void * buf,void * ctx,unsigned int len,unsigned int * xdp_xmit,struct virtnet_rq_stats * stats)2235 static struct sk_buff *receive_mergeable(struct net_device *dev,
2236 struct virtnet_info *vi,
2237 struct receive_queue *rq,
2238 void *buf,
2239 void *ctx,
2240 unsigned int len,
2241 unsigned int *xdp_xmit,
2242 struct virtnet_rq_stats *stats)
2243 {
2244 struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
2245 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
2246 struct page *page = virt_to_head_page(buf);
2247 int offset = buf - page_address(page);
2248 struct sk_buff *head_skb, *curr_skb;
2249 unsigned int truesize = mergeable_ctx_to_truesize(ctx);
2250 unsigned int headroom = mergeable_ctx_to_headroom(ctx);
2251 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
2252 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
2253
2254 head_skb = NULL;
2255 u64_stats_add(&stats->bytes, len - vi->hdr_len);
2256
2257 if (unlikely(len > truesize - room)) {
2258 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
2259 dev->name, len, (unsigned long)(truesize - room));
2260 DEV_STATS_INC(dev, rx_length_errors);
2261 goto err_skb;
2262 }
2263
2264 if (unlikely(vi->xdp_enabled)) {
2265 struct bpf_prog *xdp_prog;
2266
2267 rcu_read_lock();
2268 xdp_prog = rcu_dereference(rq->xdp_prog);
2269 if (xdp_prog) {
2270 head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx,
2271 len, xdp_xmit, stats);
2272 rcu_read_unlock();
2273 return head_skb;
2274 }
2275 rcu_read_unlock();
2276 }
2277
2278 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom);
2279 curr_skb = head_skb;
2280
2281 if (unlikely(!curr_skb))
2282 goto err_skb;
2283 while (--num_buf) {
2284 buf = virtnet_rq_get_buf(rq, &len, &ctx);
2285 if (unlikely(!buf)) {
2286 pr_debug("%s: rx error: %d buffers out of %d missing\n",
2287 dev->name, num_buf,
2288 virtio16_to_cpu(vi->vdev,
2289 hdr->num_buffers));
2290 DEV_STATS_INC(dev, rx_length_errors);
2291 goto err_buf;
2292 }
2293
2294 u64_stats_add(&stats->bytes, len);
2295 page = virt_to_head_page(buf);
2296
2297 truesize = mergeable_ctx_to_truesize(ctx);
2298 headroom = mergeable_ctx_to_headroom(ctx);
2299 tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
2300 room = SKB_DATA_ALIGN(headroom + tailroom);
2301 if (unlikely(len > truesize - room)) {
2302 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
2303 dev->name, len, (unsigned long)(truesize - room));
2304 DEV_STATS_INC(dev, rx_length_errors);
2305 goto err_skb;
2306 }
2307
2308 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page,
2309 buf, len, truesize);
2310 if (!curr_skb)
2311 goto err_skb;
2312 }
2313
2314 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
2315 return head_skb;
2316
2317 err_skb:
2318 put_page(page);
2319 mergeable_buf_free(rq, num_buf, dev, stats);
2320
2321 err_buf:
2322 u64_stats_inc(&stats->drops);
2323 dev_kfree_skb(head_skb);
2324 return NULL;
2325 }
2326
virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash * hdr_hash,struct sk_buff * skb)2327 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash,
2328 struct sk_buff *skb)
2329 {
2330 enum pkt_hash_types rss_hash_type;
2331
2332 if (!hdr_hash || !skb)
2333 return;
2334
2335 switch (__le16_to_cpu(hdr_hash->hash_report)) {
2336 case VIRTIO_NET_HASH_REPORT_TCPv4:
2337 case VIRTIO_NET_HASH_REPORT_UDPv4:
2338 case VIRTIO_NET_HASH_REPORT_TCPv6:
2339 case VIRTIO_NET_HASH_REPORT_UDPv6:
2340 case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
2341 case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
2342 rss_hash_type = PKT_HASH_TYPE_L4;
2343 break;
2344 case VIRTIO_NET_HASH_REPORT_IPv4:
2345 case VIRTIO_NET_HASH_REPORT_IPv6:
2346 case VIRTIO_NET_HASH_REPORT_IPv6_EX:
2347 rss_hash_type = PKT_HASH_TYPE_L3;
2348 break;
2349 case VIRTIO_NET_HASH_REPORT_NONE:
2350 default:
2351 rss_hash_type = PKT_HASH_TYPE_NONE;
2352 }
2353 skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type);
2354 }
2355
virtnet_receive_done(struct virtnet_info * vi,struct receive_queue * rq,struct sk_buff * skb,u8 flags)2356 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq,
2357 struct sk_buff *skb, u8 flags)
2358 {
2359 struct virtio_net_common_hdr *hdr;
2360 struct net_device *dev = vi->dev;
2361
2362 hdr = skb_vnet_common_hdr(skb);
2363 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report)
2364 virtio_skb_set_hash(&hdr->hash_v1_hdr, skb);
2365
2366 if (flags & VIRTIO_NET_HDR_F_DATA_VALID)
2367 skb->ip_summed = CHECKSUM_UNNECESSARY;
2368
2369 if (virtio_net_hdr_to_skb(skb, &hdr->hdr,
2370 virtio_is_little_endian(vi->vdev))) {
2371 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n",
2372 dev->name, hdr->hdr.gso_type,
2373 hdr->hdr.gso_size);
2374 goto frame_err;
2375 }
2376
2377 skb_record_rx_queue(skb, vq2rxq(rq->vq));
2378 skb->protocol = eth_type_trans(skb, dev);
2379 pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
2380 ntohs(skb->protocol), skb->len, skb->pkt_type);
2381
2382 napi_gro_receive(&rq->napi, skb);
2383 return;
2384
2385 frame_err:
2386 DEV_STATS_INC(dev, rx_frame_errors);
2387 dev_kfree_skb(skb);
2388 }
2389
receive_buf(struct virtnet_info * vi,struct receive_queue * rq,void * buf,unsigned int len,void ** ctx,unsigned int * xdp_xmit,struct virtnet_rq_stats * stats)2390 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
2391 void *buf, unsigned int len, void **ctx,
2392 unsigned int *xdp_xmit,
2393 struct virtnet_rq_stats *stats)
2394 {
2395 struct net_device *dev = vi->dev;
2396 struct sk_buff *skb;
2397 u8 flags;
2398
2399 if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
2400 pr_debug("%s: short packet %i\n", dev->name, len);
2401 DEV_STATS_INC(dev, rx_length_errors);
2402 virtnet_rq_free_buf(vi, rq, buf);
2403 return;
2404 }
2405
2406 /* 1. Save the flags early, as the XDP program might overwrite them.
2407 * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID
2408 * stay valid after XDP processing.
2409 * 2. XDP doesn't work with partially checksummed packets (refer to
2410 * virtnet_xdp_set()), so packets marked as
2411 * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing.
2412 */
2413 flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags;
2414
2415 if (vi->mergeable_rx_bufs)
2416 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit,
2417 stats);
2418 else if (vi->big_packets)
2419 skb = receive_big(dev, vi, rq, buf, len, stats);
2420 else
2421 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats);
2422
2423 if (unlikely(!skb))
2424 return;
2425
2426 virtnet_receive_done(vi, rq, skb, flags);
2427 }
2428
2429 /* Unlike mergeable buffers, all buffers are allocated to the
2430 * same size, except for the headroom. For this reason we do
2431 * not need to use mergeable_len_to_ctx here - it is enough
2432 * to store the headroom as the context ignoring the truesize.
2433 */
add_recvbuf_small(struct virtnet_info * vi,struct receive_queue * rq,gfp_t gfp)2434 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
2435 gfp_t gfp)
2436 {
2437 char *buf;
2438 unsigned int xdp_headroom = virtnet_get_headroom(vi);
2439 void *ctx = (void *)(unsigned long)xdp_headroom;
2440 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
2441 int err;
2442
2443 len = SKB_DATA_ALIGN(len) +
2444 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
2445
2446 buf = virtnet_rq_alloc(rq, len, gfp);
2447 if (unlikely(!buf))
2448 return -ENOMEM;
2449
2450 buf += VIRTNET_RX_PAD + xdp_headroom;
2451
2452 virtnet_rq_init_one_sg(rq, buf, vi->hdr_len + GOOD_PACKET_LEN);
2453
2454 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
2455 if (err < 0) {
2456 if (rq->do_dma)
2457 virtnet_rq_unmap(rq, buf, 0);
2458 put_page(virt_to_head_page(buf));
2459 }
2460
2461 return err;
2462 }
2463
add_recvbuf_big(struct virtnet_info * vi,struct receive_queue * rq,gfp_t gfp)2464 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
2465 gfp_t gfp)
2466 {
2467 struct page *first, *list = NULL;
2468 char *p;
2469 int i, err, offset;
2470
2471 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2);
2472
2473 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */
2474 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) {
2475 first = get_a_page(rq, gfp);
2476 if (!first) {
2477 if (list)
2478 give_pages(rq, list);
2479 return -ENOMEM;
2480 }
2481 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
2482
2483 /* chain new page in list head to match sg */
2484 first->private = (unsigned long)list;
2485 list = first;
2486 }
2487
2488 first = get_a_page(rq, gfp);
2489 if (!first) {
2490 give_pages(rq, list);
2491 return -ENOMEM;
2492 }
2493 p = page_address(first);
2494
2495 /* rq->sg[0], rq->sg[1] share the same page */
2496 /* a separated rq->sg[0] for header - required in case !any_header_sg */
2497 sg_set_buf(&rq->sg[0], p, vi->hdr_len);
2498
2499 /* rq->sg[1] for data packet, from offset */
2500 offset = sizeof(struct padded_vnet_hdr);
2501 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
2502
2503 /* chain first in list head */
2504 first->private = (unsigned long)list;
2505 err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2,
2506 first, gfp);
2507 if (err < 0)
2508 give_pages(rq, first);
2509
2510 return err;
2511 }
2512
get_mergeable_buf_len(struct receive_queue * rq,struct ewma_pkt_len * avg_pkt_len,unsigned int room)2513 static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
2514 struct ewma_pkt_len *avg_pkt_len,
2515 unsigned int room)
2516 {
2517 struct virtnet_info *vi = rq->vq->vdev->priv;
2518 const size_t hdr_len = vi->hdr_len;
2519 unsigned int len;
2520
2521 if (room)
2522 return PAGE_SIZE - room;
2523
2524 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
2525 rq->min_buf_len, PAGE_SIZE - hdr_len);
2526
2527 return ALIGN(len, L1_CACHE_BYTES);
2528 }
2529
add_recvbuf_mergeable(struct virtnet_info * vi,struct receive_queue * rq,gfp_t gfp)2530 static int add_recvbuf_mergeable(struct virtnet_info *vi,
2531 struct receive_queue *rq, gfp_t gfp)
2532 {
2533 struct page_frag *alloc_frag = &rq->alloc_frag;
2534 unsigned int headroom = virtnet_get_headroom(vi);
2535 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
2536 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
2537 unsigned int len, hole;
2538 void *ctx;
2539 char *buf;
2540 int err;
2541
2542 /* Extra tailroom is needed to satisfy XDP's assumption. This
2543 * means rx frags coalescing won't work, but consider we've
2544 * disabled GSO for XDP, it won't be a big issue.
2545 */
2546 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
2547
2548 buf = virtnet_rq_alloc(rq, len + room, gfp);
2549 if (unlikely(!buf))
2550 return -ENOMEM;
2551
2552 buf += headroom; /* advance address leaving hole at front of pkt */
2553 hole = alloc_frag->size - alloc_frag->offset;
2554 if (hole < len + room) {
2555 /* To avoid internal fragmentation, if there is very likely not
2556 * enough space for another buffer, add the remaining space to
2557 * the current buffer.
2558 * XDP core assumes that frame_size of xdp_buff and the length
2559 * of the frag are PAGE_SIZE, so we disable the hole mechanism.
2560 */
2561 if (!headroom)
2562 len += hole;
2563 alloc_frag->offset += hole;
2564 }
2565
2566 virtnet_rq_init_one_sg(rq, buf, len);
2567
2568 ctx = mergeable_len_to_ctx(len + room, headroom);
2569 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
2570 if (err < 0) {
2571 if (rq->do_dma)
2572 virtnet_rq_unmap(rq, buf, 0);
2573 put_page(virt_to_head_page(buf));
2574 }
2575
2576 return err;
2577 }
2578
2579 /*
2580 * Returns false if we couldn't fill entirely (OOM).
2581 *
2582 * Normally run in the receive path, but can also be run from ndo_open
2583 * before we're receiving packets, or from refill_work which is
2584 * careful to disable receiving (using napi_disable).
2585 */
try_fill_recv(struct virtnet_info * vi,struct receive_queue * rq,gfp_t gfp)2586 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
2587 gfp_t gfp)
2588 {
2589 int err;
2590
2591 if (rq->xsk_pool) {
2592 err = virtnet_add_recvbuf_xsk(vi, rq, rq->xsk_pool, gfp);
2593 goto kick;
2594 }
2595
2596 do {
2597 if (vi->mergeable_rx_bufs)
2598 err = add_recvbuf_mergeable(vi, rq, gfp);
2599 else if (vi->big_packets)
2600 err = add_recvbuf_big(vi, rq, gfp);
2601 else
2602 err = add_recvbuf_small(vi, rq, gfp);
2603
2604 if (err)
2605 break;
2606 } while (rq->vq->num_free);
2607
2608 kick:
2609 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) {
2610 unsigned long flags;
2611
2612 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp);
2613 u64_stats_inc(&rq->stats.kicks);
2614 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags);
2615 }
2616
2617 return err != -ENOMEM;
2618 }
2619
skb_recv_done(struct virtqueue * rvq)2620 static void skb_recv_done(struct virtqueue *rvq)
2621 {
2622 struct virtnet_info *vi = rvq->vdev->priv;
2623 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
2624
2625 rq->calls++;
2626 virtqueue_napi_schedule(&rq->napi, rvq);
2627 }
2628
virtnet_napi_enable(struct virtqueue * vq,struct napi_struct * napi)2629 static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
2630 {
2631 napi_enable(napi);
2632
2633 /* If all buffers were filled by other side before we napi_enabled, we
2634 * won't get another interrupt, so process any outstanding packets now.
2635 * Call local_bh_enable after to trigger softIRQ processing.
2636 */
2637 local_bh_disable();
2638 virtqueue_napi_schedule(napi, vq);
2639 local_bh_enable();
2640 }
2641
virtnet_napi_tx_enable(struct virtnet_info * vi,struct virtqueue * vq,struct napi_struct * napi)2642 static void virtnet_napi_tx_enable(struct virtnet_info *vi,
2643 struct virtqueue *vq,
2644 struct napi_struct *napi)
2645 {
2646 if (!napi->weight)
2647 return;
2648
2649 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only
2650 * enable the feature if this is likely affine with the transmit path.
2651 */
2652 if (!vi->affinity_hint_set) {
2653 napi->weight = 0;
2654 return;
2655 }
2656
2657 return virtnet_napi_enable(vq, napi);
2658 }
2659
virtnet_napi_tx_disable(struct napi_struct * napi)2660 static void virtnet_napi_tx_disable(struct napi_struct *napi)
2661 {
2662 if (napi->weight)
2663 napi_disable(napi);
2664 }
2665
refill_work(struct work_struct * work)2666 static void refill_work(struct work_struct *work)
2667 {
2668 struct virtnet_info *vi =
2669 container_of(work, struct virtnet_info, refill.work);
2670 bool still_empty;
2671 int i;
2672
2673 for (i = 0; i < vi->curr_queue_pairs; i++) {
2674 struct receive_queue *rq = &vi->rq[i];
2675
2676 napi_disable(&rq->napi);
2677 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
2678 virtnet_napi_enable(rq->vq, &rq->napi);
2679
2680 /* In theory, this can happen: if we don't get any buffers in
2681 * we will *never* try to fill again.
2682 */
2683 if (still_empty)
2684 schedule_delayed_work(&vi->refill, HZ/2);
2685 }
2686 }
2687
virtnet_receive_xsk_bufs(struct virtnet_info * vi,struct receive_queue * rq,int budget,unsigned int * xdp_xmit,struct virtnet_rq_stats * stats)2688 static int virtnet_receive_xsk_bufs(struct virtnet_info *vi,
2689 struct receive_queue *rq,
2690 int budget,
2691 unsigned int *xdp_xmit,
2692 struct virtnet_rq_stats *stats)
2693 {
2694 unsigned int len;
2695 int packets = 0;
2696 void *buf;
2697
2698 while (packets < budget) {
2699 buf = virtqueue_get_buf(rq->vq, &len);
2700 if (!buf)
2701 break;
2702
2703 virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats);
2704 packets++;
2705 }
2706
2707 return packets;
2708 }
2709
virtnet_receive_packets(struct virtnet_info * vi,struct receive_queue * rq,int budget,unsigned int * xdp_xmit,struct virtnet_rq_stats * stats)2710 static int virtnet_receive_packets(struct virtnet_info *vi,
2711 struct receive_queue *rq,
2712 int budget,
2713 unsigned int *xdp_xmit,
2714 struct virtnet_rq_stats *stats)
2715 {
2716 unsigned int len;
2717 int packets = 0;
2718 void *buf;
2719
2720 if (!vi->big_packets || vi->mergeable_rx_bufs) {
2721 void *ctx;
2722 while (packets < budget &&
2723 (buf = virtnet_rq_get_buf(rq, &len, &ctx))) {
2724 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats);
2725 packets++;
2726 }
2727 } else {
2728 while (packets < budget &&
2729 (buf = virtnet_rq_get_buf(rq, &len, NULL)) != NULL) {
2730 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats);
2731 packets++;
2732 }
2733 }
2734
2735 return packets;
2736 }
2737
virtnet_receive(struct receive_queue * rq,int budget,unsigned int * xdp_xmit)2738 static int virtnet_receive(struct receive_queue *rq, int budget,
2739 unsigned int *xdp_xmit)
2740 {
2741 struct virtnet_info *vi = rq->vq->vdev->priv;
2742 struct virtnet_rq_stats stats = {};
2743 int i, packets;
2744
2745 if (rq->xsk_pool)
2746 packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, &stats);
2747 else
2748 packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats);
2749
2750 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
2751 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
2752 spin_lock(&vi->refill_lock);
2753 if (vi->refill_enabled)
2754 schedule_delayed_work(&vi->refill, 0);
2755 spin_unlock(&vi->refill_lock);
2756 }
2757 }
2758
2759 u64_stats_set(&stats.packets, packets);
2760 u64_stats_update_begin(&rq->stats.syncp);
2761 for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) {
2762 size_t offset = virtnet_rq_stats_desc[i].offset;
2763 u64_stats_t *item, *src;
2764
2765 item = (u64_stats_t *)((u8 *)&rq->stats + offset);
2766 src = (u64_stats_t *)((u8 *)&stats + offset);
2767 u64_stats_add(item, u64_stats_read(src));
2768 }
2769
2770 u64_stats_add(&rq->stats.packets, u64_stats_read(&stats.packets));
2771 u64_stats_add(&rq->stats.bytes, u64_stats_read(&stats.bytes));
2772
2773 u64_stats_update_end(&rq->stats.syncp);
2774
2775 return packets;
2776 }
2777
virtnet_poll_cleantx(struct receive_queue * rq,int budget)2778 static void virtnet_poll_cleantx(struct receive_queue *rq, int budget)
2779 {
2780 struct virtnet_info *vi = rq->vq->vdev->priv;
2781 unsigned int index = vq2rxq(rq->vq);
2782 struct send_queue *sq = &vi->sq[index];
2783 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
2784
2785 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
2786 return;
2787
2788 if (__netif_tx_trylock(txq)) {
2789 if (sq->reset) {
2790 __netif_tx_unlock(txq);
2791 return;
2792 }
2793
2794 do {
2795 virtqueue_disable_cb(sq->vq);
2796 free_old_xmit(sq, txq, !!budget);
2797 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
2798
2799 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
2800 if (netif_tx_queue_stopped(txq)) {
2801 u64_stats_update_begin(&sq->stats.syncp);
2802 u64_stats_inc(&sq->stats.wake);
2803 u64_stats_update_end(&sq->stats.syncp);
2804 }
2805 netif_tx_wake_queue(txq);
2806 }
2807
2808 __netif_tx_unlock(txq);
2809 }
2810 }
2811
virtnet_rx_dim_update(struct virtnet_info * vi,struct receive_queue * rq)2812 static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq)
2813 {
2814 struct dim_sample cur_sample = {};
2815
2816 if (!rq->packets_in_napi)
2817 return;
2818
2819 /* Don't need protection when fetching stats, since fetcher and
2820 * updater of the stats are in same context
2821 */
2822 dim_update_sample(rq->calls,
2823 u64_stats_read(&rq->stats.packets),
2824 u64_stats_read(&rq->stats.bytes),
2825 &cur_sample);
2826
2827 net_dim(&rq->dim, cur_sample);
2828 rq->packets_in_napi = 0;
2829 }
2830
virtnet_poll(struct napi_struct * napi,int budget)2831 static int virtnet_poll(struct napi_struct *napi, int budget)
2832 {
2833 struct receive_queue *rq =
2834 container_of(napi, struct receive_queue, napi);
2835 struct virtnet_info *vi = rq->vq->vdev->priv;
2836 struct send_queue *sq;
2837 unsigned int received;
2838 unsigned int xdp_xmit = 0;
2839 bool napi_complete;
2840
2841 virtnet_poll_cleantx(rq, budget);
2842
2843 received = virtnet_receive(rq, budget, &xdp_xmit);
2844 rq->packets_in_napi += received;
2845
2846 if (xdp_xmit & VIRTIO_XDP_REDIR)
2847 xdp_do_flush();
2848
2849 /* Out of packets? */
2850 if (received < budget) {
2851 napi_complete = virtqueue_napi_complete(napi, rq->vq, received);
2852 /* Intentionally not taking dim_lock here. This may result in a
2853 * spurious net_dim call. But if that happens virtnet_rx_dim_work
2854 * will not act on the scheduled work.
2855 */
2856 if (napi_complete && rq->dim_enabled)
2857 virtnet_rx_dim_update(vi, rq);
2858 }
2859
2860 if (xdp_xmit & VIRTIO_XDP_TX) {
2861 sq = virtnet_xdp_get_sq(vi);
2862 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
2863 u64_stats_update_begin(&sq->stats.syncp);
2864 u64_stats_inc(&sq->stats.kicks);
2865 u64_stats_update_end(&sq->stats.syncp);
2866 }
2867 virtnet_xdp_put_sq(vi, sq);
2868 }
2869
2870 return received;
2871 }
2872
virtnet_disable_queue_pair(struct virtnet_info * vi,int qp_index)2873 static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
2874 {
2875 virtnet_napi_tx_disable(&vi->sq[qp_index].napi);
2876 napi_disable(&vi->rq[qp_index].napi);
2877 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
2878 }
2879
virtnet_enable_queue_pair(struct virtnet_info * vi,int qp_index)2880 static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
2881 {
2882 struct net_device *dev = vi->dev;
2883 int err;
2884
2885 err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index,
2886 vi->rq[qp_index].napi.napi_id);
2887 if (err < 0)
2888 return err;
2889
2890 err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq,
2891 MEM_TYPE_PAGE_SHARED, NULL);
2892 if (err < 0)
2893 goto err_xdp_reg_mem_model;
2894
2895 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index));
2896 virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
2897 virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi);
2898
2899 return 0;
2900
2901 err_xdp_reg_mem_model:
2902 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
2903 return err;
2904 }
2905
virtnet_cancel_dim(struct virtnet_info * vi,struct dim * dim)2906 static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim)
2907 {
2908 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
2909 return;
2910 net_dim_work_cancel(dim);
2911 }
2912
virtnet_update_settings(struct virtnet_info * vi)2913 static void virtnet_update_settings(struct virtnet_info *vi)
2914 {
2915 u32 speed;
2916 u8 duplex;
2917
2918 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX))
2919 return;
2920
2921 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed);
2922
2923 if (ethtool_validate_speed(speed))
2924 vi->speed = speed;
2925
2926 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex);
2927
2928 if (ethtool_validate_duplex(duplex))
2929 vi->duplex = duplex;
2930 }
2931
virtnet_open(struct net_device * dev)2932 static int virtnet_open(struct net_device *dev)
2933 {
2934 struct virtnet_info *vi = netdev_priv(dev);
2935 int i, err;
2936
2937 enable_delayed_refill(vi);
2938
2939 for (i = 0; i < vi->max_queue_pairs; i++) {
2940 if (i < vi->curr_queue_pairs)
2941 /* Make sure we have some buffers: if oom use wq. */
2942 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
2943 schedule_delayed_work(&vi->refill, 0);
2944
2945 err = virtnet_enable_queue_pair(vi, i);
2946 if (err < 0)
2947 goto err_enable_qp;
2948 }
2949
2950 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
2951 if (vi->status & VIRTIO_NET_S_LINK_UP)
2952 netif_carrier_on(vi->dev);
2953 virtio_config_driver_enable(vi->vdev);
2954 } else {
2955 vi->status = VIRTIO_NET_S_LINK_UP;
2956 netif_carrier_on(dev);
2957 }
2958
2959 return 0;
2960
2961 err_enable_qp:
2962 disable_delayed_refill(vi);
2963 cancel_delayed_work_sync(&vi->refill);
2964
2965 for (i--; i >= 0; i--) {
2966 virtnet_disable_queue_pair(vi, i);
2967 virtnet_cancel_dim(vi, &vi->rq[i].dim);
2968 }
2969
2970 return err;
2971 }
2972
virtnet_poll_tx(struct napi_struct * napi,int budget)2973 static int virtnet_poll_tx(struct napi_struct *napi, int budget)
2974 {
2975 struct send_queue *sq = container_of(napi, struct send_queue, napi);
2976 struct virtnet_info *vi = sq->vq->vdev->priv;
2977 unsigned int index = vq2txq(sq->vq);
2978 struct netdev_queue *txq;
2979 int opaque;
2980 bool done;
2981
2982 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
2983 /* We don't need to enable cb for XDP */
2984 napi_complete_done(napi, 0);
2985 return 0;
2986 }
2987
2988 txq = netdev_get_tx_queue(vi->dev, index);
2989 __netif_tx_lock(txq, raw_smp_processor_id());
2990 virtqueue_disable_cb(sq->vq);
2991 free_old_xmit(sq, txq, !!budget);
2992
2993 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
2994 if (netif_tx_queue_stopped(txq)) {
2995 u64_stats_update_begin(&sq->stats.syncp);
2996 u64_stats_inc(&sq->stats.wake);
2997 u64_stats_update_end(&sq->stats.syncp);
2998 }
2999 netif_tx_wake_queue(txq);
3000 }
3001
3002 opaque = virtqueue_enable_cb_prepare(sq->vq);
3003
3004 done = napi_complete_done(napi, 0);
3005
3006 if (!done)
3007 virtqueue_disable_cb(sq->vq);
3008
3009 __netif_tx_unlock(txq);
3010
3011 if (done) {
3012 if (unlikely(virtqueue_poll(sq->vq, opaque))) {
3013 if (napi_schedule_prep(napi)) {
3014 __netif_tx_lock(txq, raw_smp_processor_id());
3015 virtqueue_disable_cb(sq->vq);
3016 __netif_tx_unlock(txq);
3017 __napi_schedule(napi);
3018 }
3019 }
3020 }
3021
3022 return 0;
3023 }
3024
xmit_skb(struct send_queue * sq,struct sk_buff * skb,bool orphan)3025 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan)
3026 {
3027 struct virtio_net_hdr_mrg_rxbuf *hdr;
3028 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
3029 struct virtnet_info *vi = sq->vq->vdev->priv;
3030 int num_sg;
3031 unsigned hdr_len = vi->hdr_len;
3032 bool can_push;
3033
3034 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
3035
3036 can_push = vi->any_header_sg &&
3037 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
3038 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
3039 /* Even if we can, don't push here yet as this would skew
3040 * csum_start offset below. */
3041 if (can_push)
3042 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len);
3043 else
3044 hdr = &skb_vnet_common_hdr(skb)->mrg_hdr;
3045
3046 if (virtio_net_hdr_from_skb(skb, &hdr->hdr,
3047 virtio_is_little_endian(vi->vdev), false,
3048 0))
3049 return -EPROTO;
3050
3051 if (vi->mergeable_rx_bufs)
3052 hdr->num_buffers = 0;
3053
3054 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2));
3055 if (can_push) {
3056 __skb_push(skb, hdr_len);
3057 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
3058 if (unlikely(num_sg < 0))
3059 return num_sg;
3060 /* Pull header back to avoid skew in tx bytes calculations. */
3061 __skb_pull(skb, hdr_len);
3062 } else {
3063 sg_set_buf(sq->sg, hdr, hdr_len);
3064 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len);
3065 if (unlikely(num_sg < 0))
3066 return num_sg;
3067 num_sg++;
3068 }
3069 return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg,
3070 skb_to_ptr(skb, orphan), GFP_ATOMIC);
3071 }
3072
start_xmit(struct sk_buff * skb,struct net_device * dev)3073 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
3074 {
3075 struct virtnet_info *vi = netdev_priv(dev);
3076 int qnum = skb_get_queue_mapping(skb);
3077 struct send_queue *sq = &vi->sq[qnum];
3078 int err;
3079 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
3080 bool xmit_more = netdev_xmit_more();
3081 bool use_napi = sq->napi.weight;
3082 bool kick;
3083
3084 /* Free up any pending old buffers before queueing new ones. */
3085 do {
3086 if (use_napi)
3087 virtqueue_disable_cb(sq->vq);
3088
3089 free_old_xmit(sq, txq, false);
3090
3091 } while (use_napi && !xmit_more &&
3092 unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
3093
3094 /* timestamp packet in software */
3095 skb_tx_timestamp(skb);
3096
3097 /* Try to transmit */
3098 err = xmit_skb(sq, skb, !use_napi);
3099
3100 /* This should not happen! */
3101 if (unlikely(err)) {
3102 DEV_STATS_INC(dev, tx_fifo_errors);
3103 if (net_ratelimit())
3104 dev_warn(&dev->dev,
3105 "Unexpected TXQ (%d) queue failure: %d\n",
3106 qnum, err);
3107 DEV_STATS_INC(dev, tx_dropped);
3108 dev_kfree_skb_any(skb);
3109 return NETDEV_TX_OK;
3110 }
3111
3112 /* Don't wait up for transmitted skbs to be freed. */
3113 if (!use_napi) {
3114 skb_orphan(skb);
3115 nf_reset_ct(skb);
3116 }
3117
3118 check_sq_full_and_disable(vi, dev, sq);
3119
3120 kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) :
3121 !xmit_more || netif_xmit_stopped(txq);
3122 if (kick) {
3123 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
3124 u64_stats_update_begin(&sq->stats.syncp);
3125 u64_stats_inc(&sq->stats.kicks);
3126 u64_stats_update_end(&sq->stats.syncp);
3127 }
3128 }
3129
3130 return NETDEV_TX_OK;
3131 }
3132
virtnet_rx_pause(struct virtnet_info * vi,struct receive_queue * rq)3133 static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
3134 {
3135 bool running = netif_running(vi->dev);
3136
3137 if (running) {
3138 napi_disable(&rq->napi);
3139 virtnet_cancel_dim(vi, &rq->dim);
3140 }
3141 }
3142
virtnet_rx_resume(struct virtnet_info * vi,struct receive_queue * rq)3143 static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq)
3144 {
3145 bool running = netif_running(vi->dev);
3146
3147 if (!try_fill_recv(vi, rq, GFP_KERNEL))
3148 schedule_delayed_work(&vi->refill, 0);
3149
3150 if (running)
3151 virtnet_napi_enable(rq->vq, &rq->napi);
3152 }
3153
virtnet_rx_resize(struct virtnet_info * vi,struct receive_queue * rq,u32 ring_num)3154 static int virtnet_rx_resize(struct virtnet_info *vi,
3155 struct receive_queue *rq, u32 ring_num)
3156 {
3157 int err, qindex;
3158
3159 qindex = rq - vi->rq;
3160
3161 virtnet_rx_pause(vi, rq);
3162
3163 err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf);
3164 if (err)
3165 netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err);
3166
3167 virtnet_rx_resume(vi, rq);
3168 return err;
3169 }
3170
virtnet_tx_pause(struct virtnet_info * vi,struct send_queue * sq)3171 static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq)
3172 {
3173 bool running = netif_running(vi->dev);
3174 struct netdev_queue *txq;
3175 int qindex;
3176
3177 qindex = sq - vi->sq;
3178
3179 if (running)
3180 virtnet_napi_tx_disable(&sq->napi);
3181
3182 txq = netdev_get_tx_queue(vi->dev, qindex);
3183
3184 /* 1. wait all ximt complete
3185 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue()
3186 */
3187 __netif_tx_lock_bh(txq);
3188
3189 /* Prevent rx poll from accessing sq. */
3190 sq->reset = true;
3191
3192 /* Prevent the upper layer from trying to send packets. */
3193 netif_stop_subqueue(vi->dev, qindex);
3194
3195 __netif_tx_unlock_bh(txq);
3196 }
3197
virtnet_tx_resume(struct virtnet_info * vi,struct send_queue * sq)3198 static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq)
3199 {
3200 bool running = netif_running(vi->dev);
3201 struct netdev_queue *txq;
3202 int qindex;
3203
3204 qindex = sq - vi->sq;
3205
3206 txq = netdev_get_tx_queue(vi->dev, qindex);
3207
3208 __netif_tx_lock_bh(txq);
3209 sq->reset = false;
3210 netif_tx_wake_queue(txq);
3211 __netif_tx_unlock_bh(txq);
3212
3213 if (running)
3214 virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
3215 }
3216
virtnet_tx_resize(struct virtnet_info * vi,struct send_queue * sq,u32 ring_num)3217 static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq,
3218 u32 ring_num)
3219 {
3220 int qindex, err;
3221
3222 qindex = sq - vi->sq;
3223
3224 virtnet_tx_pause(vi, sq);
3225
3226 err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
3227 if (err)
3228 netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err);
3229
3230 virtnet_tx_resume(vi, sq);
3231
3232 return err;
3233 }
3234
3235 /*
3236 * Send command via the control virtqueue and check status. Commands
3237 * supported by the hypervisor, as indicated by feature bits, should
3238 * never fail unless improperly formatted.
3239 */
virtnet_send_command_reply(struct virtnet_info * vi,u8 class,u8 cmd,struct scatterlist * out,struct scatterlist * in)3240 static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd,
3241 struct scatterlist *out,
3242 struct scatterlist *in)
3243 {
3244 struct scatterlist *sgs[5], hdr, stat;
3245 u32 out_num = 0, tmp, in_num = 0;
3246 bool ok;
3247 int ret;
3248
3249 /* Caller should know better */
3250 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
3251
3252 mutex_lock(&vi->cvq_lock);
3253 vi->ctrl->status = ~0;
3254 vi->ctrl->hdr.class = class;
3255 vi->ctrl->hdr.cmd = cmd;
3256 /* Add header */
3257 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr));
3258 sgs[out_num++] = &hdr;
3259
3260 if (out)
3261 sgs[out_num++] = out;
3262
3263 /* Add return status. */
3264 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status));
3265 sgs[out_num + in_num++] = &stat;
3266
3267 if (in)
3268 sgs[out_num + in_num++] = in;
3269
3270 BUG_ON(out_num + in_num > ARRAY_SIZE(sgs));
3271 ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC);
3272 if (ret < 0) {
3273 dev_warn(&vi->vdev->dev,
3274 "Failed to add sgs for command vq: %d\n.", ret);
3275 mutex_unlock(&vi->cvq_lock);
3276 return false;
3277 }
3278
3279 if (unlikely(!virtqueue_kick(vi->cvq)))
3280 goto unlock;
3281
3282 /* Spin for a response, the kick causes an ioport write, trapping
3283 * into the hypervisor, so the request should be handled immediately.
3284 */
3285 while (!virtqueue_get_buf(vi->cvq, &tmp) &&
3286 !virtqueue_is_broken(vi->cvq)) {
3287 cond_resched();
3288 cpu_relax();
3289 }
3290
3291 unlock:
3292 ok = vi->ctrl->status == VIRTIO_NET_OK;
3293 mutex_unlock(&vi->cvq_lock);
3294 return ok;
3295 }
3296
virtnet_send_command(struct virtnet_info * vi,u8 class,u8 cmd,struct scatterlist * out)3297 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
3298 struct scatterlist *out)
3299 {
3300 return virtnet_send_command_reply(vi, class, cmd, out, NULL);
3301 }
3302
virtnet_set_mac_address(struct net_device * dev,void * p)3303 static int virtnet_set_mac_address(struct net_device *dev, void *p)
3304 {
3305 struct virtnet_info *vi = netdev_priv(dev);
3306 struct virtio_device *vdev = vi->vdev;
3307 int ret;
3308 struct sockaddr *addr;
3309 struct scatterlist sg;
3310
3311 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY))
3312 return -EOPNOTSUPP;
3313
3314 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL);
3315 if (!addr)
3316 return -ENOMEM;
3317
3318 ret = eth_prepare_mac_addr_change(dev, addr);
3319 if (ret)
3320 goto out;
3321
3322 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
3323 sg_init_one(&sg, addr->sa_data, dev->addr_len);
3324 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
3325 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
3326 dev_warn(&vdev->dev,
3327 "Failed to set mac address by vq command.\n");
3328 ret = -EINVAL;
3329 goto out;
3330 }
3331 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
3332 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3333 unsigned int i;
3334
3335 /* Naturally, this has an atomicity problem. */
3336 for (i = 0; i < dev->addr_len; i++)
3337 virtio_cwrite8(vdev,
3338 offsetof(struct virtio_net_config, mac) +
3339 i, addr->sa_data[i]);
3340 }
3341
3342 eth_commit_mac_addr_change(dev, p);
3343 ret = 0;
3344
3345 out:
3346 kfree(addr);
3347 return ret;
3348 }
3349
virtnet_stats(struct net_device * dev,struct rtnl_link_stats64 * tot)3350 static void virtnet_stats(struct net_device *dev,
3351 struct rtnl_link_stats64 *tot)
3352 {
3353 struct virtnet_info *vi = netdev_priv(dev);
3354 unsigned int start;
3355 int i;
3356
3357 for (i = 0; i < vi->max_queue_pairs; i++) {
3358 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops;
3359 struct receive_queue *rq = &vi->rq[i];
3360 struct send_queue *sq = &vi->sq[i];
3361
3362 do {
3363 start = u64_stats_fetch_begin(&sq->stats.syncp);
3364 tpackets = u64_stats_read(&sq->stats.packets);
3365 tbytes = u64_stats_read(&sq->stats.bytes);
3366 terrors = u64_stats_read(&sq->stats.tx_timeouts);
3367 } while (u64_stats_fetch_retry(&sq->stats.syncp, start));
3368
3369 do {
3370 start = u64_stats_fetch_begin(&rq->stats.syncp);
3371 rpackets = u64_stats_read(&rq->stats.packets);
3372 rbytes = u64_stats_read(&rq->stats.bytes);
3373 rdrops = u64_stats_read(&rq->stats.drops);
3374 } while (u64_stats_fetch_retry(&rq->stats.syncp, start));
3375
3376 tot->rx_packets += rpackets;
3377 tot->tx_packets += tpackets;
3378 tot->rx_bytes += rbytes;
3379 tot->tx_bytes += tbytes;
3380 tot->rx_dropped += rdrops;
3381 tot->tx_errors += terrors;
3382 }
3383
3384 tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
3385 tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors);
3386 tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors);
3387 tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors);
3388 }
3389
virtnet_ack_link_announce(struct virtnet_info * vi)3390 static void virtnet_ack_link_announce(struct virtnet_info *vi)
3391 {
3392 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
3393 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL))
3394 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
3395 }
3396
3397 static bool virtnet_commit_rss_command(struct virtnet_info *vi);
3398
virtnet_rss_update_by_qpairs(struct virtnet_info * vi,u16 queue_pairs)3399 static void virtnet_rss_update_by_qpairs(struct virtnet_info *vi, u16 queue_pairs)
3400 {
3401 u32 indir_val = 0;
3402 int i = 0;
3403
3404 for (; i < vi->rss_indir_table_size; ++i) {
3405 indir_val = ethtool_rxfh_indir_default(i, queue_pairs);
3406 vi->rss.indirection_table[i] = indir_val;
3407 }
3408 vi->rss.max_tx_vq = queue_pairs;
3409 }
3410
virtnet_set_queues(struct virtnet_info * vi,u16 queue_pairs)3411 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
3412 {
3413 struct virtio_net_ctrl_mq *mq __free(kfree) = NULL;
3414 struct virtio_net_ctrl_rss old_rss;
3415 struct net_device *dev = vi->dev;
3416 struct scatterlist sg;
3417
3418 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
3419 return 0;
3420
3421 /* Firstly check if we need update rss. Do updating if both (1) rss enabled and
3422 * (2) no user configuration.
3423 *
3424 * During rss command processing, device updates queue_pairs using rss.max_tx_vq. That is,
3425 * the device updates queue_pairs together with rss, so we can skip the sperate queue_pairs
3426 * update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly.
3427 */
3428 if (vi->has_rss && !netif_is_rxfh_configured(dev)) {
3429 memcpy(&old_rss, &vi->rss, sizeof(old_rss));
3430 if (rss_indirection_table_alloc(&vi->rss, vi->rss_indir_table_size)) {
3431 vi->rss.indirection_table = old_rss.indirection_table;
3432 return -ENOMEM;
3433 }
3434
3435 virtnet_rss_update_by_qpairs(vi, queue_pairs);
3436
3437 if (!virtnet_commit_rss_command(vi)) {
3438 /* restore ctrl_rss if commit_rss_command failed */
3439 rss_indirection_table_free(&vi->rss);
3440 memcpy(&vi->rss, &old_rss, sizeof(old_rss));
3441
3442 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d, because committing RSS failed\n",
3443 queue_pairs);
3444 return -EINVAL;
3445 }
3446 rss_indirection_table_free(&old_rss);
3447 goto succ;
3448 }
3449
3450 mq = kzalloc(sizeof(*mq), GFP_KERNEL);
3451 if (!mq)
3452 return -ENOMEM;
3453
3454 mq->virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
3455 sg_init_one(&sg, mq, sizeof(*mq));
3456
3457 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
3458 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) {
3459 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
3460 queue_pairs);
3461 return -EINVAL;
3462 }
3463 succ:
3464 vi->curr_queue_pairs = queue_pairs;
3465 /* virtnet_open() will refill when device is going to up. */
3466 if (dev->flags & IFF_UP)
3467 schedule_delayed_work(&vi->refill, 0);
3468
3469 return 0;
3470 }
3471
virtnet_close(struct net_device * dev)3472 static int virtnet_close(struct net_device *dev)
3473 {
3474 struct virtnet_info *vi = netdev_priv(dev);
3475 int i;
3476
3477 /* Make sure NAPI doesn't schedule refill work */
3478 disable_delayed_refill(vi);
3479 /* Make sure refill_work doesn't re-enable napi! */
3480 cancel_delayed_work_sync(&vi->refill);
3481 /* Prevent the config change callback from changing carrier
3482 * after close
3483 */
3484 virtio_config_driver_disable(vi->vdev);
3485 /* Stop getting status/speed updates: we don't care until next
3486 * open
3487 */
3488 cancel_work_sync(&vi->config_work);
3489
3490 for (i = 0; i < vi->max_queue_pairs; i++) {
3491 virtnet_disable_queue_pair(vi, i);
3492 virtnet_cancel_dim(vi, &vi->rq[i].dim);
3493 }
3494
3495 netif_carrier_off(dev);
3496
3497 return 0;
3498 }
3499
virtnet_rx_mode_work(struct work_struct * work)3500 static void virtnet_rx_mode_work(struct work_struct *work)
3501 {
3502 struct virtnet_info *vi =
3503 container_of(work, struct virtnet_info, rx_mode_work);
3504 u8 *promisc_allmulti __free(kfree) = NULL;
3505 struct net_device *dev = vi->dev;
3506 struct scatterlist sg[2];
3507 struct virtio_net_ctrl_mac *mac_data;
3508 struct netdev_hw_addr *ha;
3509 int uc_count;
3510 int mc_count;
3511 void *buf;
3512 int i;
3513
3514 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */
3515 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
3516 return;
3517
3518 promisc_allmulti = kzalloc(sizeof(*promisc_allmulti), GFP_KERNEL);
3519 if (!promisc_allmulti) {
3520 dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n");
3521 return;
3522 }
3523
3524 rtnl_lock();
3525
3526 *promisc_allmulti = !!(dev->flags & IFF_PROMISC);
3527 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti));
3528
3529 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
3530 VIRTIO_NET_CTRL_RX_PROMISC, sg))
3531 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
3532 *promisc_allmulti ? "en" : "dis");
3533
3534 *promisc_allmulti = !!(dev->flags & IFF_ALLMULTI);
3535 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti));
3536
3537 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
3538 VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
3539 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
3540 *promisc_allmulti ? "en" : "dis");
3541
3542 netif_addr_lock_bh(dev);
3543
3544 uc_count = netdev_uc_count(dev);
3545 mc_count = netdev_mc_count(dev);
3546 /* MAC filter - use one buffer for both lists */
3547 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
3548 (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
3549 mac_data = buf;
3550 if (!buf) {
3551 netif_addr_unlock_bh(dev);
3552 rtnl_unlock();
3553 return;
3554 }
3555
3556 sg_init_table(sg, 2);
3557
3558 /* Store the unicast list and count in the front of the buffer */
3559 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count);
3560 i = 0;
3561 netdev_for_each_uc_addr(ha, dev)
3562 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
3563
3564 sg_set_buf(&sg[0], mac_data,
3565 sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
3566
3567 /* multicast list and count fill the end */
3568 mac_data = (void *)&mac_data->macs[uc_count][0];
3569
3570 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count);
3571 i = 0;
3572 netdev_for_each_mc_addr(ha, dev)
3573 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
3574
3575 netif_addr_unlock_bh(dev);
3576
3577 sg_set_buf(&sg[1], mac_data,
3578 sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
3579
3580 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
3581 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
3582 dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
3583
3584 rtnl_unlock();
3585
3586 kfree(buf);
3587 }
3588
virtnet_set_rx_mode(struct net_device * dev)3589 static void virtnet_set_rx_mode(struct net_device *dev)
3590 {
3591 struct virtnet_info *vi = netdev_priv(dev);
3592
3593 if (vi->rx_mode_work_enabled)
3594 schedule_work(&vi->rx_mode_work);
3595 }
3596
virtnet_vlan_rx_add_vid(struct net_device * dev,__be16 proto,u16 vid)3597 static int virtnet_vlan_rx_add_vid(struct net_device *dev,
3598 __be16 proto, u16 vid)
3599 {
3600 struct virtnet_info *vi = netdev_priv(dev);
3601 __virtio16 *_vid __free(kfree) = NULL;
3602 struct scatterlist sg;
3603
3604 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL);
3605 if (!_vid)
3606 return -ENOMEM;
3607
3608 *_vid = cpu_to_virtio16(vi->vdev, vid);
3609 sg_init_one(&sg, _vid, sizeof(*_vid));
3610
3611 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
3612 VIRTIO_NET_CTRL_VLAN_ADD, &sg))
3613 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
3614 return 0;
3615 }
3616
virtnet_vlan_rx_kill_vid(struct net_device * dev,__be16 proto,u16 vid)3617 static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
3618 __be16 proto, u16 vid)
3619 {
3620 struct virtnet_info *vi = netdev_priv(dev);
3621 __virtio16 *_vid __free(kfree) = NULL;
3622 struct scatterlist sg;
3623
3624 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL);
3625 if (!_vid)
3626 return -ENOMEM;
3627
3628 *_vid = cpu_to_virtio16(vi->vdev, vid);
3629 sg_init_one(&sg, _vid, sizeof(*_vid));
3630
3631 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
3632 VIRTIO_NET_CTRL_VLAN_DEL, &sg))
3633 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
3634 return 0;
3635 }
3636
virtnet_clean_affinity(struct virtnet_info * vi)3637 static void virtnet_clean_affinity(struct virtnet_info *vi)
3638 {
3639 int i;
3640
3641 if (vi->affinity_hint_set) {
3642 for (i = 0; i < vi->max_queue_pairs; i++) {
3643 virtqueue_set_affinity(vi->rq[i].vq, NULL);
3644 virtqueue_set_affinity(vi->sq[i].vq, NULL);
3645 }
3646
3647 vi->affinity_hint_set = false;
3648 }
3649 }
3650
virtnet_set_affinity(struct virtnet_info * vi)3651 static void virtnet_set_affinity(struct virtnet_info *vi)
3652 {
3653 cpumask_var_t mask;
3654 int stragglers;
3655 int group_size;
3656 int i, j, cpu;
3657 int num_cpu;
3658 int stride;
3659
3660 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
3661 virtnet_clean_affinity(vi);
3662 return;
3663 }
3664
3665 num_cpu = num_online_cpus();
3666 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1);
3667 stragglers = num_cpu >= vi->curr_queue_pairs ?
3668 num_cpu % vi->curr_queue_pairs :
3669 0;
3670 cpu = cpumask_first(cpu_online_mask);
3671
3672 for (i = 0; i < vi->curr_queue_pairs; i++) {
3673 group_size = stride + (i < stragglers ? 1 : 0);
3674
3675 for (j = 0; j < group_size; j++) {
3676 cpumask_set_cpu(cpu, mask);
3677 cpu = cpumask_next_wrap(cpu, cpu_online_mask,
3678 nr_cpu_ids, false);
3679 }
3680 virtqueue_set_affinity(vi->rq[i].vq, mask);
3681 virtqueue_set_affinity(vi->sq[i].vq, mask);
3682 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS);
3683 cpumask_clear(mask);
3684 }
3685
3686 vi->affinity_hint_set = true;
3687 free_cpumask_var(mask);
3688 }
3689
virtnet_cpu_online(unsigned int cpu,struct hlist_node * node)3690 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node)
3691 {
3692 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
3693 node);
3694 virtnet_set_affinity(vi);
3695 return 0;
3696 }
3697
virtnet_cpu_dead(unsigned int cpu,struct hlist_node * node)3698 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node)
3699 {
3700 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
3701 node_dead);
3702 virtnet_set_affinity(vi);
3703 return 0;
3704 }
3705
virtnet_cpu_down_prep(unsigned int cpu,struct hlist_node * node)3706 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
3707 {
3708 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
3709 node);
3710
3711 virtnet_clean_affinity(vi);
3712 return 0;
3713 }
3714
3715 static enum cpuhp_state virtionet_online;
3716
virtnet_cpu_notif_add(struct virtnet_info * vi)3717 static int virtnet_cpu_notif_add(struct virtnet_info *vi)
3718 {
3719 int ret;
3720
3721 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node);
3722 if (ret)
3723 return ret;
3724 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD,
3725 &vi->node_dead);
3726 if (!ret)
3727 return ret;
3728 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
3729 return ret;
3730 }
3731
virtnet_cpu_notif_remove(struct virtnet_info * vi)3732 static void virtnet_cpu_notif_remove(struct virtnet_info *vi)
3733 {
3734 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
3735 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD,
3736 &vi->node_dead);
3737 }
3738
virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info * vi,u16 vqn,u32 max_usecs,u32 max_packets)3739 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi,
3740 u16 vqn, u32 max_usecs, u32 max_packets)
3741 {
3742 struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL;
3743 struct scatterlist sgs;
3744
3745 coal_vq = kzalloc(sizeof(*coal_vq), GFP_KERNEL);
3746 if (!coal_vq)
3747 return -ENOMEM;
3748
3749 coal_vq->vqn = cpu_to_le16(vqn);
3750 coal_vq->coal.max_usecs = cpu_to_le32(max_usecs);
3751 coal_vq->coal.max_packets = cpu_to_le32(max_packets);
3752 sg_init_one(&sgs, coal_vq, sizeof(*coal_vq));
3753
3754 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
3755 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET,
3756 &sgs))
3757 return -EINVAL;
3758
3759 return 0;
3760 }
3761
virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info * vi,u16 queue,u32 max_usecs,u32 max_packets)3762 static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi,
3763 u16 queue, u32 max_usecs,
3764 u32 max_packets)
3765 {
3766 int err;
3767
3768 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
3769 return -EOPNOTSUPP;
3770
3771 err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue),
3772 max_usecs, max_packets);
3773 if (err)
3774 return err;
3775
3776 vi->rq[queue].intr_coal.max_usecs = max_usecs;
3777 vi->rq[queue].intr_coal.max_packets = max_packets;
3778
3779 return 0;
3780 }
3781
virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info * vi,u16 queue,u32 max_usecs,u32 max_packets)3782 static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi,
3783 u16 queue, u32 max_usecs,
3784 u32 max_packets)
3785 {
3786 int err;
3787
3788 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
3789 return -EOPNOTSUPP;
3790
3791 err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue),
3792 max_usecs, max_packets);
3793 if (err)
3794 return err;
3795
3796 vi->sq[queue].intr_coal.max_usecs = max_usecs;
3797 vi->sq[queue].intr_coal.max_packets = max_packets;
3798
3799 return 0;
3800 }
3801
virtnet_get_ringparam(struct net_device * dev,struct ethtool_ringparam * ring,struct kernel_ethtool_ringparam * kernel_ring,struct netlink_ext_ack * extack)3802 static void virtnet_get_ringparam(struct net_device *dev,
3803 struct ethtool_ringparam *ring,
3804 struct kernel_ethtool_ringparam *kernel_ring,
3805 struct netlink_ext_ack *extack)
3806 {
3807 struct virtnet_info *vi = netdev_priv(dev);
3808
3809 ring->rx_max_pending = vi->rq[0].vq->num_max;
3810 ring->tx_max_pending = vi->sq[0].vq->num_max;
3811 ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
3812 ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
3813 }
3814
virtnet_set_ringparam(struct net_device * dev,struct ethtool_ringparam * ring,struct kernel_ethtool_ringparam * kernel_ring,struct netlink_ext_ack * extack)3815 static int virtnet_set_ringparam(struct net_device *dev,
3816 struct ethtool_ringparam *ring,
3817 struct kernel_ethtool_ringparam *kernel_ring,
3818 struct netlink_ext_ack *extack)
3819 {
3820 struct virtnet_info *vi = netdev_priv(dev);
3821 u32 rx_pending, tx_pending;
3822 struct receive_queue *rq;
3823 struct send_queue *sq;
3824 int i, err;
3825
3826 if (ring->rx_mini_pending || ring->rx_jumbo_pending)
3827 return -EINVAL;
3828
3829 rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
3830 tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
3831
3832 if (ring->rx_pending == rx_pending &&
3833 ring->tx_pending == tx_pending)
3834 return 0;
3835
3836 if (ring->rx_pending > vi->rq[0].vq->num_max)
3837 return -EINVAL;
3838
3839 if (ring->tx_pending > vi->sq[0].vq->num_max)
3840 return -EINVAL;
3841
3842 for (i = 0; i < vi->max_queue_pairs; i++) {
3843 rq = vi->rq + i;
3844 sq = vi->sq + i;
3845
3846 if (ring->tx_pending != tx_pending) {
3847 err = virtnet_tx_resize(vi, sq, ring->tx_pending);
3848 if (err)
3849 return err;
3850
3851 /* Upon disabling and re-enabling a transmit virtqueue, the device must
3852 * set the coalescing parameters of the virtqueue to those configured
3853 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver
3854 * did not set any TX coalescing parameters, to 0.
3855 */
3856 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i,
3857 vi->intr_coal_tx.max_usecs,
3858 vi->intr_coal_tx.max_packets);
3859
3860 /* Don't break the tx resize action if the vq coalescing is not
3861 * supported. The same is true for rx resize below.
3862 */
3863 if (err && err != -EOPNOTSUPP)
3864 return err;
3865 }
3866
3867 if (ring->rx_pending != rx_pending) {
3868 err = virtnet_rx_resize(vi, rq, ring->rx_pending);
3869 if (err)
3870 return err;
3871
3872 /* The reason is same as the transmit virtqueue reset */
3873 mutex_lock(&vi->rq[i].dim_lock);
3874 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i,
3875 vi->intr_coal_rx.max_usecs,
3876 vi->intr_coal_rx.max_packets);
3877 mutex_unlock(&vi->rq[i].dim_lock);
3878 if (err && err != -EOPNOTSUPP)
3879 return err;
3880 }
3881 }
3882
3883 return 0;
3884 }
3885
virtnet_commit_rss_command(struct virtnet_info * vi)3886 static bool virtnet_commit_rss_command(struct virtnet_info *vi)
3887 {
3888 struct net_device *dev = vi->dev;
3889 struct scatterlist sgs[4];
3890 unsigned int sg_buf_size;
3891
3892 /* prepare sgs */
3893 sg_init_table(sgs, 4);
3894
3895 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, hash_cfg_reserved);
3896 sg_set_buf(&sgs[0], &vi->rss, sg_buf_size);
3897
3898 if (vi->has_rss) {
3899 sg_buf_size = sizeof(uint16_t) * vi->rss_indir_table_size;
3900 sg_set_buf(&sgs[1], vi->rss.indirection_table, sg_buf_size);
3901 } else {
3902 sg_set_buf(&sgs[1], &vi->rss.hash_cfg_reserved, sizeof(uint16_t));
3903 }
3904
3905 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key)
3906 - offsetof(struct virtio_net_ctrl_rss, max_tx_vq);
3907 sg_set_buf(&sgs[2], &vi->rss.max_tx_vq, sg_buf_size);
3908
3909 sg_buf_size = vi->rss_key_size;
3910 sg_set_buf(&sgs[3], vi->rss.key, sg_buf_size);
3911
3912 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
3913 vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG
3914 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs))
3915 goto err;
3916
3917 return true;
3918
3919 err:
3920 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n");
3921 return false;
3922
3923 }
3924
virtnet_init_default_rss(struct virtnet_info * vi)3925 static void virtnet_init_default_rss(struct virtnet_info *vi)
3926 {
3927 vi->rss.hash_types = vi->rss_hash_types_supported;
3928 vi->rss_hash_types_saved = vi->rss_hash_types_supported;
3929 vi->rss.indirection_table_mask = vi->rss_indir_table_size
3930 ? vi->rss_indir_table_size - 1 : 0;
3931 vi->rss.unclassified_queue = 0;
3932
3933 virtnet_rss_update_by_qpairs(vi, vi->curr_queue_pairs);
3934
3935 vi->rss.hash_key_length = vi->rss_key_size;
3936
3937 netdev_rss_key_fill(vi->rss.key, vi->rss_key_size);
3938 }
3939
virtnet_get_hashflow(const struct virtnet_info * vi,struct ethtool_rxnfc * info)3940 static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info)
3941 {
3942 info->data = 0;
3943 switch (info->flow_type) {
3944 case TCP_V4_FLOW:
3945 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
3946 info->data = RXH_IP_SRC | RXH_IP_DST |
3947 RXH_L4_B_0_1 | RXH_L4_B_2_3;
3948 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
3949 info->data = RXH_IP_SRC | RXH_IP_DST;
3950 }
3951 break;
3952 case TCP_V6_FLOW:
3953 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
3954 info->data = RXH_IP_SRC | RXH_IP_DST |
3955 RXH_L4_B_0_1 | RXH_L4_B_2_3;
3956 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
3957 info->data = RXH_IP_SRC | RXH_IP_DST;
3958 }
3959 break;
3960 case UDP_V4_FLOW:
3961 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
3962 info->data = RXH_IP_SRC | RXH_IP_DST |
3963 RXH_L4_B_0_1 | RXH_L4_B_2_3;
3964 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
3965 info->data = RXH_IP_SRC | RXH_IP_DST;
3966 }
3967 break;
3968 case UDP_V6_FLOW:
3969 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
3970 info->data = RXH_IP_SRC | RXH_IP_DST |
3971 RXH_L4_B_0_1 | RXH_L4_B_2_3;
3972 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
3973 info->data = RXH_IP_SRC | RXH_IP_DST;
3974 }
3975 break;
3976 case IPV4_FLOW:
3977 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4)
3978 info->data = RXH_IP_SRC | RXH_IP_DST;
3979
3980 break;
3981 case IPV6_FLOW:
3982 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6)
3983 info->data = RXH_IP_SRC | RXH_IP_DST;
3984
3985 break;
3986 default:
3987 info->data = 0;
3988 break;
3989 }
3990 }
3991
virtnet_set_hashflow(struct virtnet_info * vi,struct ethtool_rxnfc * info)3992 static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info)
3993 {
3994 u32 new_hashtypes = vi->rss_hash_types_saved;
3995 bool is_disable = info->data & RXH_DISCARD;
3996 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3);
3997
3998 /* supports only 'sd', 'sdfn' and 'r' */
3999 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable))
4000 return false;
4001
4002 switch (info->flow_type) {
4003 case TCP_V4_FLOW:
4004 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4);
4005 if (!is_disable)
4006 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4
4007 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0);
4008 break;
4009 case UDP_V4_FLOW:
4010 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4);
4011 if (!is_disable)
4012 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4
4013 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0);
4014 break;
4015 case IPV4_FLOW:
4016 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4;
4017 if (!is_disable)
4018 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4;
4019 break;
4020 case TCP_V6_FLOW:
4021 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6);
4022 if (!is_disable)
4023 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6
4024 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0);
4025 break;
4026 case UDP_V6_FLOW:
4027 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6);
4028 if (!is_disable)
4029 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6
4030 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0);
4031 break;
4032 case IPV6_FLOW:
4033 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6;
4034 if (!is_disable)
4035 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6;
4036 break;
4037 default:
4038 /* unsupported flow */
4039 return false;
4040 }
4041
4042 /* if unsupported hashtype was set */
4043 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported))
4044 return false;
4045
4046 if (new_hashtypes != vi->rss_hash_types_saved) {
4047 vi->rss_hash_types_saved = new_hashtypes;
4048 vi->rss.hash_types = vi->rss_hash_types_saved;
4049 if (vi->dev->features & NETIF_F_RXHASH)
4050 return virtnet_commit_rss_command(vi);
4051 }
4052
4053 return true;
4054 }
4055
virtnet_get_drvinfo(struct net_device * dev,struct ethtool_drvinfo * info)4056 static void virtnet_get_drvinfo(struct net_device *dev,
4057 struct ethtool_drvinfo *info)
4058 {
4059 struct virtnet_info *vi = netdev_priv(dev);
4060 struct virtio_device *vdev = vi->vdev;
4061
4062 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
4063 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
4064 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));
4065
4066 }
4067
4068 /* TODO: Eliminate OOO packets during switching */
virtnet_set_channels(struct net_device * dev,struct ethtool_channels * channels)4069 static int virtnet_set_channels(struct net_device *dev,
4070 struct ethtool_channels *channels)
4071 {
4072 struct virtnet_info *vi = netdev_priv(dev);
4073 u16 queue_pairs = channels->combined_count;
4074 int err;
4075
4076 /* We don't support separate rx/tx channels.
4077 * We don't allow setting 'other' channels.
4078 */
4079 if (channels->rx_count || channels->tx_count || channels->other_count)
4080 return -EINVAL;
4081
4082 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
4083 return -EINVAL;
4084
4085 /* For now we don't support modifying channels while XDP is loaded
4086 * also when XDP is loaded all RX queues have XDP programs so we only
4087 * need to check a single RX queue.
4088 */
4089 if (vi->rq[0].xdp_prog)
4090 return -EINVAL;
4091
4092 cpus_read_lock();
4093 err = virtnet_set_queues(vi, queue_pairs);
4094 if (err) {
4095 cpus_read_unlock();
4096 goto err;
4097 }
4098 virtnet_set_affinity(vi);
4099 cpus_read_unlock();
4100
4101 netif_set_real_num_tx_queues(dev, queue_pairs);
4102 netif_set_real_num_rx_queues(dev, queue_pairs);
4103 err:
4104 return err;
4105 }
4106
virtnet_stats_sprintf(u8 ** p,const char * fmt,const char * noq_fmt,int num,int qid,const struct virtnet_stat_desc * desc)4107 static void virtnet_stats_sprintf(u8 **p, const char *fmt, const char *noq_fmt,
4108 int num, int qid, const struct virtnet_stat_desc *desc)
4109 {
4110 int i;
4111
4112 if (qid < 0) {
4113 for (i = 0; i < num; ++i)
4114 ethtool_sprintf(p, noq_fmt, desc[i].desc);
4115 } else {
4116 for (i = 0; i < num; ++i)
4117 ethtool_sprintf(p, fmt, qid, desc[i].desc);
4118 }
4119 }
4120
4121 /* qid == -1: for rx/tx queue total field */
virtnet_get_stats_string(struct virtnet_info * vi,int type,int qid,u8 ** data)4122 static void virtnet_get_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data)
4123 {
4124 const struct virtnet_stat_desc *desc;
4125 const char *fmt, *noq_fmt;
4126 u8 *p = *data;
4127 u32 num;
4128
4129 if (type == VIRTNET_Q_TYPE_CQ && qid >= 0) {
4130 noq_fmt = "cq_hw_%s";
4131
4132 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) {
4133 desc = &virtnet_stats_cvq_desc[0];
4134 num = ARRAY_SIZE(virtnet_stats_cvq_desc);
4135
4136 virtnet_stats_sprintf(&p, NULL, noq_fmt, num, -1, desc);
4137 }
4138 }
4139
4140 if (type == VIRTNET_Q_TYPE_RX) {
4141 fmt = "rx%u_%s";
4142 noq_fmt = "rx_%s";
4143
4144 desc = &virtnet_rq_stats_desc[0];
4145 num = ARRAY_SIZE(virtnet_rq_stats_desc);
4146
4147 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
4148
4149 fmt = "rx%u_hw_%s";
4150 noq_fmt = "rx_hw_%s";
4151
4152 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
4153 desc = &virtnet_stats_rx_basic_desc[0];
4154 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc);
4155
4156 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
4157 }
4158
4159 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
4160 desc = &virtnet_stats_rx_csum_desc[0];
4161 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc);
4162
4163 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
4164 }
4165
4166 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
4167 desc = &virtnet_stats_rx_speed_desc[0];
4168 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc);
4169
4170 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
4171 }
4172 }
4173
4174 if (type == VIRTNET_Q_TYPE_TX) {
4175 fmt = "tx%u_%s";
4176 noq_fmt = "tx_%s";
4177
4178 desc = &virtnet_sq_stats_desc[0];
4179 num = ARRAY_SIZE(virtnet_sq_stats_desc);
4180
4181 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
4182
4183 fmt = "tx%u_hw_%s";
4184 noq_fmt = "tx_hw_%s";
4185
4186 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
4187 desc = &virtnet_stats_tx_basic_desc[0];
4188 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc);
4189
4190 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
4191 }
4192
4193 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
4194 desc = &virtnet_stats_tx_gso_desc[0];
4195 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc);
4196
4197 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
4198 }
4199
4200 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
4201 desc = &virtnet_stats_tx_speed_desc[0];
4202 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc);
4203
4204 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc);
4205 }
4206 }
4207
4208 *data = p;
4209 }
4210
4211 struct virtnet_stats_ctx {
4212 /* The stats are write to qstats or ethtool -S */
4213 bool to_qstat;
4214
4215 /* Used to calculate the offset inside the output buffer. */
4216 u32 desc_num[3];
4217
4218 /* The actual supported stat types. */
4219 u64 bitmap[3];
4220
4221 /* Used to calculate the reply buffer size. */
4222 u32 size[3];
4223
4224 /* Record the output buffer. */
4225 u64 *data;
4226 };
4227
virtnet_stats_ctx_init(struct virtnet_info * vi,struct virtnet_stats_ctx * ctx,u64 * data,bool to_qstat)4228 static void virtnet_stats_ctx_init(struct virtnet_info *vi,
4229 struct virtnet_stats_ctx *ctx,
4230 u64 *data, bool to_qstat)
4231 {
4232 u32 queue_type;
4233
4234 ctx->data = data;
4235 ctx->to_qstat = to_qstat;
4236
4237 if (to_qstat) {
4238 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat);
4239 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat);
4240
4241 queue_type = VIRTNET_Q_TYPE_RX;
4242
4243 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
4244 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC;
4245 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat);
4246 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic);
4247 }
4248
4249 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
4250 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM;
4251 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat);
4252 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum);
4253 }
4254
4255 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) {
4256 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_GSO;
4257 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat);
4258 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso);
4259 }
4260
4261 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
4262 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED;
4263 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat);
4264 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed);
4265 }
4266
4267 queue_type = VIRTNET_Q_TYPE_TX;
4268
4269 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
4270 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC;
4271 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat);
4272 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic);
4273 }
4274
4275 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) {
4276 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_CSUM;
4277 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat);
4278 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum);
4279 }
4280
4281 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
4282 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO;
4283 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat);
4284 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso);
4285 }
4286
4287 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
4288 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED;
4289 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat);
4290 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed);
4291 }
4292
4293 return;
4294 }
4295
4296 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc);
4297 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc);
4298
4299 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) {
4300 queue_type = VIRTNET_Q_TYPE_CQ;
4301
4302 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_CVQ;
4303 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc);
4304 ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq);
4305 }
4306
4307 queue_type = VIRTNET_Q_TYPE_RX;
4308
4309 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
4310 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC;
4311 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc);
4312 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic);
4313 }
4314
4315 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
4316 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM;
4317 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc);
4318 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum);
4319 }
4320
4321 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
4322 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED;
4323 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc);
4324 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed);
4325 }
4326
4327 queue_type = VIRTNET_Q_TYPE_TX;
4328
4329 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
4330 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC;
4331 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc);
4332 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic);
4333 }
4334
4335 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
4336 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO;
4337 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc);
4338 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso);
4339 }
4340
4341 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
4342 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED;
4343 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc);
4344 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed);
4345 }
4346 }
4347
4348 /* stats_sum_queue - Calculate the sum of the same fields in sq or rq.
4349 * @sum: the position to store the sum values
4350 * @num: field num
4351 * @q_value: the first queue fields
4352 * @q_num: number of the queues
4353 */
stats_sum_queue(u64 * sum,u32 num,u64 * q_value,u32 q_num)4354 static void stats_sum_queue(u64 *sum, u32 num, u64 *q_value, u32 q_num)
4355 {
4356 u32 step = num;
4357 int i, j;
4358 u64 *p;
4359
4360 for (i = 0; i < num; ++i) {
4361 p = sum + i;
4362 *p = 0;
4363
4364 for (j = 0; j < q_num; ++j)
4365 *p += *(q_value + i + j * step);
4366 }
4367 }
4368
virtnet_fill_total_fields(struct virtnet_info * vi,struct virtnet_stats_ctx * ctx)4369 static void virtnet_fill_total_fields(struct virtnet_info *vi,
4370 struct virtnet_stats_ctx *ctx)
4371 {
4372 u64 *data, *first_rx_q, *first_tx_q;
4373 u32 num_cq, num_rx, num_tx;
4374
4375 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ];
4376 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX];
4377 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX];
4378
4379 first_rx_q = ctx->data + num_rx + num_tx + num_cq;
4380 first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx;
4381
4382 data = ctx->data;
4383
4384 stats_sum_queue(data, num_rx, first_rx_q, vi->curr_queue_pairs);
4385
4386 data = ctx->data + num_rx;
4387
4388 stats_sum_queue(data, num_tx, first_tx_q, vi->curr_queue_pairs);
4389 }
4390
virtnet_fill_stats_qstat(struct virtnet_info * vi,u32 qid,struct virtnet_stats_ctx * ctx,const u8 * base,bool drv_stats,u8 reply_type)4391 static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid,
4392 struct virtnet_stats_ctx *ctx,
4393 const u8 *base, bool drv_stats, u8 reply_type)
4394 {
4395 const struct virtnet_stat_desc *desc;
4396 const u64_stats_t *v_stat;
4397 u64 offset, bitmap;
4398 const __le64 *v;
4399 u32 queue_type;
4400 int i, num;
4401
4402 queue_type = vq_type(vi, qid);
4403 bitmap = ctx->bitmap[queue_type];
4404
4405 if (drv_stats) {
4406 if (queue_type == VIRTNET_Q_TYPE_RX) {
4407 desc = &virtnet_rq_stats_desc_qstat[0];
4408 num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat);
4409 } else {
4410 desc = &virtnet_sq_stats_desc_qstat[0];
4411 num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat);
4412 }
4413
4414 for (i = 0; i < num; ++i) {
4415 offset = desc[i].qstat_offset / sizeof(*ctx->data);
4416 v_stat = (const u64_stats_t *)(base + desc[i].offset);
4417 ctx->data[offset] = u64_stats_read(v_stat);
4418 }
4419 return;
4420 }
4421
4422 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
4423 desc = &virtnet_stats_rx_basic_desc_qstat[0];
4424 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat);
4425 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC)
4426 goto found;
4427 }
4428
4429 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
4430 desc = &virtnet_stats_rx_csum_desc_qstat[0];
4431 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat);
4432 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM)
4433 goto found;
4434 }
4435
4436 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) {
4437 desc = &virtnet_stats_rx_gso_desc_qstat[0];
4438 num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat);
4439 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO)
4440 goto found;
4441 }
4442
4443 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
4444 desc = &virtnet_stats_rx_speed_desc_qstat[0];
4445 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat);
4446 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED)
4447 goto found;
4448 }
4449
4450 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
4451 desc = &virtnet_stats_tx_basic_desc_qstat[0];
4452 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat);
4453 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC)
4454 goto found;
4455 }
4456
4457 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) {
4458 desc = &virtnet_stats_tx_csum_desc_qstat[0];
4459 num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat);
4460 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM)
4461 goto found;
4462 }
4463
4464 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
4465 desc = &virtnet_stats_tx_gso_desc_qstat[0];
4466 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat);
4467 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO)
4468 goto found;
4469 }
4470
4471 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
4472 desc = &virtnet_stats_tx_speed_desc_qstat[0];
4473 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat);
4474 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED)
4475 goto found;
4476 }
4477
4478 return;
4479
4480 found:
4481 for (i = 0; i < num; ++i) {
4482 offset = desc[i].qstat_offset / sizeof(*ctx->data);
4483 v = (const __le64 *)(base + desc[i].offset);
4484 ctx->data[offset] = le64_to_cpu(*v);
4485 }
4486 }
4487
4488 /* virtnet_fill_stats - copy the stats to qstats or ethtool -S
4489 * The stats source is the device or the driver.
4490 *
4491 * @vi: virtio net info
4492 * @qid: the vq id
4493 * @ctx: stats ctx (initiated by virtnet_stats_ctx_init())
4494 * @base: pointer to the device reply or the driver stats structure.
4495 * @drv_stats: designate the base type (device reply, driver stats)
4496 * @type: the type of the device reply (if drv_stats is true, this must be zero)
4497 */
virtnet_fill_stats(struct virtnet_info * vi,u32 qid,struct virtnet_stats_ctx * ctx,const u8 * base,bool drv_stats,u8 reply_type)4498 static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid,
4499 struct virtnet_stats_ctx *ctx,
4500 const u8 *base, bool drv_stats, u8 reply_type)
4501 {
4502 u32 queue_type, num_rx, num_tx, num_cq;
4503 const struct virtnet_stat_desc *desc;
4504 const u64_stats_t *v_stat;
4505 u64 offset, bitmap;
4506 const __le64 *v;
4507 int i, num;
4508
4509 if (ctx->to_qstat)
4510 return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type);
4511
4512 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ];
4513 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX];
4514 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX];
4515
4516 queue_type = vq_type(vi, qid);
4517 bitmap = ctx->bitmap[queue_type];
4518
4519 /* skip the total fields of pairs */
4520 offset = num_rx + num_tx;
4521
4522 if (queue_type == VIRTNET_Q_TYPE_TX) {
4523 offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2);
4524
4525 num = ARRAY_SIZE(virtnet_sq_stats_desc);
4526 if (drv_stats) {
4527 desc = &virtnet_sq_stats_desc[0];
4528 goto drv_stats;
4529 }
4530
4531 offset += num;
4532
4533 } else if (queue_type == VIRTNET_Q_TYPE_RX) {
4534 offset += num_cq + num_rx * (qid / 2);
4535
4536 num = ARRAY_SIZE(virtnet_rq_stats_desc);
4537 if (drv_stats) {
4538 desc = &virtnet_rq_stats_desc[0];
4539 goto drv_stats;
4540 }
4541
4542 offset += num;
4543 }
4544
4545 if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) {
4546 desc = &virtnet_stats_cvq_desc[0];
4547 num = ARRAY_SIZE(virtnet_stats_cvq_desc);
4548 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ)
4549 goto found;
4550
4551 offset += num;
4552 }
4553
4554 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
4555 desc = &virtnet_stats_rx_basic_desc[0];
4556 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc);
4557 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC)
4558 goto found;
4559
4560 offset += num;
4561 }
4562
4563 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
4564 desc = &virtnet_stats_rx_csum_desc[0];
4565 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc);
4566 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM)
4567 goto found;
4568
4569 offset += num;
4570 }
4571
4572 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) {
4573 desc = &virtnet_stats_rx_speed_desc[0];
4574 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc);
4575 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED)
4576 goto found;
4577
4578 offset += num;
4579 }
4580
4581 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
4582 desc = &virtnet_stats_tx_basic_desc[0];
4583 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc);
4584 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC)
4585 goto found;
4586
4587 offset += num;
4588 }
4589
4590 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
4591 desc = &virtnet_stats_tx_gso_desc[0];
4592 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc);
4593 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO)
4594 goto found;
4595
4596 offset += num;
4597 }
4598
4599 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) {
4600 desc = &virtnet_stats_tx_speed_desc[0];
4601 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc);
4602 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED)
4603 goto found;
4604
4605 offset += num;
4606 }
4607
4608 return;
4609
4610 found:
4611 for (i = 0; i < num; ++i) {
4612 v = (const __le64 *)(base + desc[i].offset);
4613 ctx->data[offset + i] = le64_to_cpu(*v);
4614 }
4615
4616 return;
4617
4618 drv_stats:
4619 for (i = 0; i < num; ++i) {
4620 v_stat = (const u64_stats_t *)(base + desc[i].offset);
4621 ctx->data[offset + i] = u64_stats_read(v_stat);
4622 }
4623 }
4624
__virtnet_get_hw_stats(struct virtnet_info * vi,struct virtnet_stats_ctx * ctx,struct virtio_net_ctrl_queue_stats * req,int req_size,void * reply,int res_size)4625 static int __virtnet_get_hw_stats(struct virtnet_info *vi,
4626 struct virtnet_stats_ctx *ctx,
4627 struct virtio_net_ctrl_queue_stats *req,
4628 int req_size, void *reply, int res_size)
4629 {
4630 struct virtio_net_stats_reply_hdr *hdr;
4631 struct scatterlist sgs_in, sgs_out;
4632 void *p;
4633 u32 qid;
4634 int ok;
4635
4636 sg_init_one(&sgs_out, req, req_size);
4637 sg_init_one(&sgs_in, reply, res_size);
4638
4639 ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS,
4640 VIRTIO_NET_CTRL_STATS_GET,
4641 &sgs_out, &sgs_in);
4642
4643 if (!ok)
4644 return ok;
4645
4646 for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) {
4647 hdr = p;
4648 qid = le16_to_cpu(hdr->vq_index);
4649 virtnet_fill_stats(vi, qid, ctx, p, false, hdr->type);
4650 }
4651
4652 return 0;
4653 }
4654
virtnet_make_stat_req(struct virtnet_info * vi,struct virtnet_stats_ctx * ctx,struct virtio_net_ctrl_queue_stats * req,int qid,int * idx)4655 static void virtnet_make_stat_req(struct virtnet_info *vi,
4656 struct virtnet_stats_ctx *ctx,
4657 struct virtio_net_ctrl_queue_stats *req,
4658 int qid, int *idx)
4659 {
4660 int qtype = vq_type(vi, qid);
4661 u64 bitmap = ctx->bitmap[qtype];
4662
4663 if (!bitmap)
4664 return;
4665
4666 req->stats[*idx].vq_index = cpu_to_le16(qid);
4667 req->stats[*idx].types_bitmap[0] = cpu_to_le64(bitmap);
4668 *idx += 1;
4669 }
4670
4671 /* qid: -1: get stats of all vq.
4672 * > 0: get the stats for the special vq. This must not be cvq.
4673 */
virtnet_get_hw_stats(struct virtnet_info * vi,struct virtnet_stats_ctx * ctx,int qid)4674 static int virtnet_get_hw_stats(struct virtnet_info *vi,
4675 struct virtnet_stats_ctx *ctx, int qid)
4676 {
4677 int qnum, i, j, res_size, qtype, last_vq, first_vq;
4678 struct virtio_net_ctrl_queue_stats *req;
4679 bool enable_cvq;
4680 void *reply;
4681 int ok;
4682
4683 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS))
4684 return 0;
4685
4686 if (qid == -1) {
4687 last_vq = vi->curr_queue_pairs * 2 - 1;
4688 first_vq = 0;
4689 enable_cvq = true;
4690 } else {
4691 last_vq = qid;
4692 first_vq = qid;
4693 enable_cvq = false;
4694 }
4695
4696 qnum = 0;
4697 res_size = 0;
4698 for (i = first_vq; i <= last_vq ; ++i) {
4699 qtype = vq_type(vi, i);
4700 if (ctx->bitmap[qtype]) {
4701 ++qnum;
4702 res_size += ctx->size[qtype];
4703 }
4704 }
4705
4706 if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) {
4707 res_size += ctx->size[VIRTNET_Q_TYPE_CQ];
4708 qnum += 1;
4709 }
4710
4711 req = kcalloc(qnum, sizeof(*req), GFP_KERNEL);
4712 if (!req)
4713 return -ENOMEM;
4714
4715 reply = kmalloc(res_size, GFP_KERNEL);
4716 if (!reply) {
4717 kfree(req);
4718 return -ENOMEM;
4719 }
4720
4721 j = 0;
4722 for (i = first_vq; i <= last_vq ; ++i)
4723 virtnet_make_stat_req(vi, ctx, req, i, &j);
4724
4725 if (enable_cvq)
4726 virtnet_make_stat_req(vi, ctx, req, vi->max_queue_pairs * 2, &j);
4727
4728 ok = __virtnet_get_hw_stats(vi, ctx, req, sizeof(*req) * j, reply, res_size);
4729
4730 kfree(req);
4731 kfree(reply);
4732
4733 return ok;
4734 }
4735
virtnet_get_strings(struct net_device * dev,u32 stringset,u8 * data)4736 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
4737 {
4738 struct virtnet_info *vi = netdev_priv(dev);
4739 unsigned int i;
4740 u8 *p = data;
4741
4742 switch (stringset) {
4743 case ETH_SS_STATS:
4744 /* Generate the total field names. */
4745 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, -1, &p);
4746 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, -1, &p);
4747
4748 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, 0, &p);
4749
4750 for (i = 0; i < vi->curr_queue_pairs; ++i)
4751 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, i, &p);
4752
4753 for (i = 0; i < vi->curr_queue_pairs; ++i)
4754 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, i, &p);
4755 break;
4756 }
4757 }
4758
virtnet_get_sset_count(struct net_device * dev,int sset)4759 static int virtnet_get_sset_count(struct net_device *dev, int sset)
4760 {
4761 struct virtnet_info *vi = netdev_priv(dev);
4762 struct virtnet_stats_ctx ctx = {0};
4763 u32 pair_count;
4764
4765 switch (sset) {
4766 case ETH_SS_STATS:
4767 virtnet_stats_ctx_init(vi, &ctx, NULL, false);
4768
4769 pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX];
4770
4771 return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] +
4772 vi->curr_queue_pairs * pair_count;
4773 default:
4774 return -EOPNOTSUPP;
4775 }
4776 }
4777
virtnet_get_ethtool_stats(struct net_device * dev,struct ethtool_stats * stats,u64 * data)4778 static void virtnet_get_ethtool_stats(struct net_device *dev,
4779 struct ethtool_stats *stats, u64 *data)
4780 {
4781 struct virtnet_info *vi = netdev_priv(dev);
4782 struct virtnet_stats_ctx ctx = {0};
4783 unsigned int start, i;
4784 const u8 *stats_base;
4785
4786 virtnet_stats_ctx_init(vi, &ctx, data, false);
4787 if (virtnet_get_hw_stats(vi, &ctx, -1))
4788 dev_warn(&vi->dev->dev, "Failed to get hw stats.\n");
4789
4790 for (i = 0; i < vi->curr_queue_pairs; i++) {
4791 struct receive_queue *rq = &vi->rq[i];
4792 struct send_queue *sq = &vi->sq[i];
4793
4794 stats_base = (const u8 *)&rq->stats;
4795 do {
4796 start = u64_stats_fetch_begin(&rq->stats.syncp);
4797 virtnet_fill_stats(vi, i * 2, &ctx, stats_base, true, 0);
4798 } while (u64_stats_fetch_retry(&rq->stats.syncp, start));
4799
4800 stats_base = (const u8 *)&sq->stats;
4801 do {
4802 start = u64_stats_fetch_begin(&sq->stats.syncp);
4803 virtnet_fill_stats(vi, i * 2 + 1, &ctx, stats_base, true, 0);
4804 } while (u64_stats_fetch_retry(&sq->stats.syncp, start));
4805 }
4806
4807 virtnet_fill_total_fields(vi, &ctx);
4808 }
4809
virtnet_get_channels(struct net_device * dev,struct ethtool_channels * channels)4810 static void virtnet_get_channels(struct net_device *dev,
4811 struct ethtool_channels *channels)
4812 {
4813 struct virtnet_info *vi = netdev_priv(dev);
4814
4815 channels->combined_count = vi->curr_queue_pairs;
4816 channels->max_combined = vi->max_queue_pairs;
4817 channels->max_other = 0;
4818 channels->rx_count = 0;
4819 channels->tx_count = 0;
4820 channels->other_count = 0;
4821 }
4822
virtnet_set_link_ksettings(struct net_device * dev,const struct ethtool_link_ksettings * cmd)4823 static int virtnet_set_link_ksettings(struct net_device *dev,
4824 const struct ethtool_link_ksettings *cmd)
4825 {
4826 struct virtnet_info *vi = netdev_priv(dev);
4827
4828 return ethtool_virtdev_set_link_ksettings(dev, cmd,
4829 &vi->speed, &vi->duplex);
4830 }
4831
virtnet_get_link_ksettings(struct net_device * dev,struct ethtool_link_ksettings * cmd)4832 static int virtnet_get_link_ksettings(struct net_device *dev,
4833 struct ethtool_link_ksettings *cmd)
4834 {
4835 struct virtnet_info *vi = netdev_priv(dev);
4836
4837 cmd->base.speed = vi->speed;
4838 cmd->base.duplex = vi->duplex;
4839 cmd->base.port = PORT_OTHER;
4840
4841 return 0;
4842 }
4843
virtnet_send_tx_notf_coal_cmds(struct virtnet_info * vi,struct ethtool_coalesce * ec)4844 static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi,
4845 struct ethtool_coalesce *ec)
4846 {
4847 struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL;
4848 struct scatterlist sgs_tx;
4849 int i;
4850
4851 coal_tx = kzalloc(sizeof(*coal_tx), GFP_KERNEL);
4852 if (!coal_tx)
4853 return -ENOMEM;
4854
4855 coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
4856 coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
4857 sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx));
4858
4859 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
4860 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
4861 &sgs_tx))
4862 return -EINVAL;
4863
4864 vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs;
4865 vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames;
4866 for (i = 0; i < vi->max_queue_pairs; i++) {
4867 vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs;
4868 vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames;
4869 }
4870
4871 return 0;
4872 }
4873
virtnet_send_rx_notf_coal_cmds(struct virtnet_info * vi,struct ethtool_coalesce * ec)4874 static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi,
4875 struct ethtool_coalesce *ec)
4876 {
4877 struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL;
4878 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce;
4879 struct scatterlist sgs_rx;
4880 int i;
4881
4882 if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
4883 return -EOPNOTSUPP;
4884
4885 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs ||
4886 ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets))
4887 return -EINVAL;
4888
4889 if (rx_ctrl_dim_on && !vi->rx_dim_enabled) {
4890 vi->rx_dim_enabled = true;
4891 for (i = 0; i < vi->max_queue_pairs; i++) {
4892 mutex_lock(&vi->rq[i].dim_lock);
4893 vi->rq[i].dim_enabled = true;
4894 mutex_unlock(&vi->rq[i].dim_lock);
4895 }
4896 return 0;
4897 }
4898
4899 coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL);
4900 if (!coal_rx)
4901 return -ENOMEM;
4902
4903 if (!rx_ctrl_dim_on && vi->rx_dim_enabled) {
4904 vi->rx_dim_enabled = false;
4905 for (i = 0; i < vi->max_queue_pairs; i++) {
4906 mutex_lock(&vi->rq[i].dim_lock);
4907 vi->rq[i].dim_enabled = false;
4908 mutex_unlock(&vi->rq[i].dim_lock);
4909 }
4910 }
4911
4912 /* Since the per-queue coalescing params can be set,
4913 * we need apply the global new params even if they
4914 * are not updated.
4915 */
4916 coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
4917 coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
4918 sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx));
4919
4920 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
4921 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
4922 &sgs_rx))
4923 return -EINVAL;
4924
4925 vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs;
4926 vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames;
4927 for (i = 0; i < vi->max_queue_pairs; i++) {
4928 mutex_lock(&vi->rq[i].dim_lock);
4929 vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs;
4930 vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames;
4931 mutex_unlock(&vi->rq[i].dim_lock);
4932 }
4933
4934 return 0;
4935 }
4936
virtnet_send_notf_coal_cmds(struct virtnet_info * vi,struct ethtool_coalesce * ec)4937 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
4938 struct ethtool_coalesce *ec)
4939 {
4940 int err;
4941
4942 err = virtnet_send_tx_notf_coal_cmds(vi, ec);
4943 if (err)
4944 return err;
4945
4946 err = virtnet_send_rx_notf_coal_cmds(vi, ec);
4947 if (err)
4948 return err;
4949
4950 return 0;
4951 }
4952
virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info * vi,struct ethtool_coalesce * ec,u16 queue)4953 static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi,
4954 struct ethtool_coalesce *ec,
4955 u16 queue)
4956 {
4957 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce;
4958 u32 max_usecs, max_packets;
4959 bool cur_rx_dim;
4960 int err;
4961
4962 mutex_lock(&vi->rq[queue].dim_lock);
4963 cur_rx_dim = vi->rq[queue].dim_enabled;
4964 max_usecs = vi->rq[queue].intr_coal.max_usecs;
4965 max_packets = vi->rq[queue].intr_coal.max_packets;
4966
4967 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs ||
4968 ec->rx_max_coalesced_frames != max_packets)) {
4969 mutex_unlock(&vi->rq[queue].dim_lock);
4970 return -EINVAL;
4971 }
4972
4973 if (rx_ctrl_dim_on && !cur_rx_dim) {
4974 vi->rq[queue].dim_enabled = true;
4975 mutex_unlock(&vi->rq[queue].dim_lock);
4976 return 0;
4977 }
4978
4979 if (!rx_ctrl_dim_on && cur_rx_dim)
4980 vi->rq[queue].dim_enabled = false;
4981
4982 /* If no params are updated, userspace ethtool will
4983 * reject the modification.
4984 */
4985 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue,
4986 ec->rx_coalesce_usecs,
4987 ec->rx_max_coalesced_frames);
4988 mutex_unlock(&vi->rq[queue].dim_lock);
4989 return err;
4990 }
4991
virtnet_send_notf_coal_vq_cmds(struct virtnet_info * vi,struct ethtool_coalesce * ec,u16 queue)4992 static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi,
4993 struct ethtool_coalesce *ec,
4994 u16 queue)
4995 {
4996 int err;
4997
4998 err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue);
4999 if (err)
5000 return err;
5001
5002 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue,
5003 ec->tx_coalesce_usecs,
5004 ec->tx_max_coalesced_frames);
5005 if (err)
5006 return err;
5007
5008 return 0;
5009 }
5010
virtnet_rx_dim_work(struct work_struct * work)5011 static void virtnet_rx_dim_work(struct work_struct *work)
5012 {
5013 struct dim *dim = container_of(work, struct dim, work);
5014 struct receive_queue *rq = container_of(dim,
5015 struct receive_queue, dim);
5016 struct virtnet_info *vi = rq->vq->vdev->priv;
5017 struct net_device *dev = vi->dev;
5018 struct dim_cq_moder update_moder;
5019 int qnum, err;
5020
5021 qnum = rq - vi->rq;
5022
5023 mutex_lock(&rq->dim_lock);
5024 if (!rq->dim_enabled)
5025 goto out;
5026
5027 update_moder = net_dim_get_rx_irq_moder(dev, dim);
5028 if (update_moder.usec != rq->intr_coal.max_usecs ||
5029 update_moder.pkts != rq->intr_coal.max_packets) {
5030 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum,
5031 update_moder.usec,
5032 update_moder.pkts);
5033 if (err)
5034 pr_debug("%s: Failed to send dim parameters on rxq%d\n",
5035 dev->name, qnum);
5036 }
5037 out:
5038 dim->state = DIM_START_MEASURE;
5039 mutex_unlock(&rq->dim_lock);
5040 }
5041
virtnet_coal_params_supported(struct ethtool_coalesce * ec)5042 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
5043 {
5044 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
5045 * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated.
5046 */
5047 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs)
5048 return -EOPNOTSUPP;
5049
5050 if (ec->tx_max_coalesced_frames > 1 ||
5051 ec->rx_max_coalesced_frames != 1)
5052 return -EINVAL;
5053
5054 return 0;
5055 }
5056
virtnet_should_update_vq_weight(int dev_flags,int weight,int vq_weight,bool * should_update)5057 static int virtnet_should_update_vq_weight(int dev_flags, int weight,
5058 int vq_weight, bool *should_update)
5059 {
5060 if (weight ^ vq_weight) {
5061 if (dev_flags & IFF_UP)
5062 return -EBUSY;
5063 *should_update = true;
5064 }
5065
5066 return 0;
5067 }
5068
virtnet_set_coalesce(struct net_device * dev,struct ethtool_coalesce * ec,struct kernel_ethtool_coalesce * kernel_coal,struct netlink_ext_ack * extack)5069 static int virtnet_set_coalesce(struct net_device *dev,
5070 struct ethtool_coalesce *ec,
5071 struct kernel_ethtool_coalesce *kernel_coal,
5072 struct netlink_ext_ack *extack)
5073 {
5074 struct virtnet_info *vi = netdev_priv(dev);
5075 int ret, queue_number, napi_weight;
5076 bool update_napi = false;
5077
5078 /* Can't change NAPI weight if the link is up */
5079 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
5080 for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) {
5081 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight,
5082 vi->sq[queue_number].napi.weight,
5083 &update_napi);
5084 if (ret)
5085 return ret;
5086
5087 if (update_napi) {
5088 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be
5089 * updated for the sake of simplicity, which might not be necessary
5090 */
5091 break;
5092 }
5093 }
5094
5095 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL))
5096 ret = virtnet_send_notf_coal_cmds(vi, ec);
5097 else
5098 ret = virtnet_coal_params_supported(ec);
5099
5100 if (ret)
5101 return ret;
5102
5103 if (update_napi) {
5104 for (; queue_number < vi->max_queue_pairs; queue_number++)
5105 vi->sq[queue_number].napi.weight = napi_weight;
5106 }
5107
5108 return ret;
5109 }
5110
virtnet_get_coalesce(struct net_device * dev,struct ethtool_coalesce * ec,struct kernel_ethtool_coalesce * kernel_coal,struct netlink_ext_ack * extack)5111 static int virtnet_get_coalesce(struct net_device *dev,
5112 struct ethtool_coalesce *ec,
5113 struct kernel_ethtool_coalesce *kernel_coal,
5114 struct netlink_ext_ack *extack)
5115 {
5116 struct virtnet_info *vi = netdev_priv(dev);
5117
5118 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
5119 ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs;
5120 ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs;
5121 ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets;
5122 ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets;
5123 ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled;
5124 } else {
5125 ec->rx_max_coalesced_frames = 1;
5126
5127 if (vi->sq[0].napi.weight)
5128 ec->tx_max_coalesced_frames = 1;
5129 }
5130
5131 return 0;
5132 }
5133
virtnet_set_per_queue_coalesce(struct net_device * dev,u32 queue,struct ethtool_coalesce * ec)5134 static int virtnet_set_per_queue_coalesce(struct net_device *dev,
5135 u32 queue,
5136 struct ethtool_coalesce *ec)
5137 {
5138 struct virtnet_info *vi = netdev_priv(dev);
5139 int ret, napi_weight;
5140 bool update_napi = false;
5141
5142 if (queue >= vi->max_queue_pairs)
5143 return -EINVAL;
5144
5145 /* Can't change NAPI weight if the link is up */
5146 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
5147 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight,
5148 vi->sq[queue].napi.weight,
5149 &update_napi);
5150 if (ret)
5151 return ret;
5152
5153 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
5154 ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue);
5155 else
5156 ret = virtnet_coal_params_supported(ec);
5157
5158 if (ret)
5159 return ret;
5160
5161 if (update_napi)
5162 vi->sq[queue].napi.weight = napi_weight;
5163
5164 return 0;
5165 }
5166
virtnet_get_per_queue_coalesce(struct net_device * dev,u32 queue,struct ethtool_coalesce * ec)5167 static int virtnet_get_per_queue_coalesce(struct net_device *dev,
5168 u32 queue,
5169 struct ethtool_coalesce *ec)
5170 {
5171 struct virtnet_info *vi = netdev_priv(dev);
5172
5173 if (queue >= vi->max_queue_pairs)
5174 return -EINVAL;
5175
5176 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) {
5177 mutex_lock(&vi->rq[queue].dim_lock);
5178 ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs;
5179 ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs;
5180 ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets;
5181 ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets;
5182 ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled;
5183 mutex_unlock(&vi->rq[queue].dim_lock);
5184 } else {
5185 ec->rx_max_coalesced_frames = 1;
5186
5187 if (vi->sq[queue].napi.weight)
5188 ec->tx_max_coalesced_frames = 1;
5189 }
5190
5191 return 0;
5192 }
5193
virtnet_init_settings(struct net_device * dev)5194 static void virtnet_init_settings(struct net_device *dev)
5195 {
5196 struct virtnet_info *vi = netdev_priv(dev);
5197
5198 vi->speed = SPEED_UNKNOWN;
5199 vi->duplex = DUPLEX_UNKNOWN;
5200 }
5201
virtnet_get_rxfh_key_size(struct net_device * dev)5202 static u32 virtnet_get_rxfh_key_size(struct net_device *dev)
5203 {
5204 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size;
5205 }
5206
virtnet_get_rxfh_indir_size(struct net_device * dev)5207 static u32 virtnet_get_rxfh_indir_size(struct net_device *dev)
5208 {
5209 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size;
5210 }
5211
virtnet_get_rxfh(struct net_device * dev,struct ethtool_rxfh_param * rxfh)5212 static int virtnet_get_rxfh(struct net_device *dev,
5213 struct ethtool_rxfh_param *rxfh)
5214 {
5215 struct virtnet_info *vi = netdev_priv(dev);
5216 int i;
5217
5218 if (rxfh->indir) {
5219 for (i = 0; i < vi->rss_indir_table_size; ++i)
5220 rxfh->indir[i] = vi->rss.indirection_table[i];
5221 }
5222
5223 if (rxfh->key)
5224 memcpy(rxfh->key, vi->rss.key, vi->rss_key_size);
5225
5226 rxfh->hfunc = ETH_RSS_HASH_TOP;
5227
5228 return 0;
5229 }
5230
virtnet_set_rxfh(struct net_device * dev,struct ethtool_rxfh_param * rxfh,struct netlink_ext_ack * extack)5231 static int virtnet_set_rxfh(struct net_device *dev,
5232 struct ethtool_rxfh_param *rxfh,
5233 struct netlink_ext_ack *extack)
5234 {
5235 struct virtnet_info *vi = netdev_priv(dev);
5236 bool update = false;
5237 int i;
5238
5239 if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
5240 rxfh->hfunc != ETH_RSS_HASH_TOP)
5241 return -EOPNOTSUPP;
5242
5243 if (rxfh->indir) {
5244 if (!vi->has_rss)
5245 return -EOPNOTSUPP;
5246
5247 for (i = 0; i < vi->rss_indir_table_size; ++i)
5248 vi->rss.indirection_table[i] = rxfh->indir[i];
5249 update = true;
5250 }
5251
5252 if (rxfh->key) {
5253 /* If either _F_HASH_REPORT or _F_RSS are negotiated, the
5254 * device provides hash calculation capabilities, that is,
5255 * hash_key is configured.
5256 */
5257 if (!vi->has_rss && !vi->has_rss_hash_report)
5258 return -EOPNOTSUPP;
5259
5260 memcpy(vi->rss.key, rxfh->key, vi->rss_key_size);
5261 update = true;
5262 }
5263
5264 if (update)
5265 virtnet_commit_rss_command(vi);
5266
5267 return 0;
5268 }
5269
virtnet_get_rxnfc(struct net_device * dev,struct ethtool_rxnfc * info,u32 * rule_locs)5270 static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs)
5271 {
5272 struct virtnet_info *vi = netdev_priv(dev);
5273 int rc = 0;
5274
5275 switch (info->cmd) {
5276 case ETHTOOL_GRXRINGS:
5277 info->data = vi->curr_queue_pairs;
5278 break;
5279 case ETHTOOL_GRXFH:
5280 virtnet_get_hashflow(vi, info);
5281 break;
5282 default:
5283 rc = -EOPNOTSUPP;
5284 }
5285
5286 return rc;
5287 }
5288
virtnet_set_rxnfc(struct net_device * dev,struct ethtool_rxnfc * info)5289 static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
5290 {
5291 struct virtnet_info *vi = netdev_priv(dev);
5292 int rc = 0;
5293
5294 switch (info->cmd) {
5295 case ETHTOOL_SRXFH:
5296 if (!virtnet_set_hashflow(vi, info))
5297 rc = -EINVAL;
5298
5299 break;
5300 default:
5301 rc = -EOPNOTSUPP;
5302 }
5303
5304 return rc;
5305 }
5306
5307 static const struct ethtool_ops virtnet_ethtool_ops = {
5308 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
5309 ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
5310 .get_drvinfo = virtnet_get_drvinfo,
5311 .get_link = ethtool_op_get_link,
5312 .get_ringparam = virtnet_get_ringparam,
5313 .set_ringparam = virtnet_set_ringparam,
5314 .get_strings = virtnet_get_strings,
5315 .get_sset_count = virtnet_get_sset_count,
5316 .get_ethtool_stats = virtnet_get_ethtool_stats,
5317 .set_channels = virtnet_set_channels,
5318 .get_channels = virtnet_get_channels,
5319 .get_ts_info = ethtool_op_get_ts_info,
5320 .get_link_ksettings = virtnet_get_link_ksettings,
5321 .set_link_ksettings = virtnet_set_link_ksettings,
5322 .set_coalesce = virtnet_set_coalesce,
5323 .get_coalesce = virtnet_get_coalesce,
5324 .set_per_queue_coalesce = virtnet_set_per_queue_coalesce,
5325 .get_per_queue_coalesce = virtnet_get_per_queue_coalesce,
5326 .get_rxfh_key_size = virtnet_get_rxfh_key_size,
5327 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size,
5328 .get_rxfh = virtnet_get_rxfh,
5329 .set_rxfh = virtnet_set_rxfh,
5330 .get_rxnfc = virtnet_get_rxnfc,
5331 .set_rxnfc = virtnet_set_rxnfc,
5332 };
5333
virtnet_get_queue_stats_rx(struct net_device * dev,int i,struct netdev_queue_stats_rx * stats)5334 static void virtnet_get_queue_stats_rx(struct net_device *dev, int i,
5335 struct netdev_queue_stats_rx *stats)
5336 {
5337 struct virtnet_info *vi = netdev_priv(dev);
5338 struct receive_queue *rq = &vi->rq[i];
5339 struct virtnet_stats_ctx ctx = {0};
5340
5341 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true);
5342
5343 virtnet_get_hw_stats(vi, &ctx, i * 2);
5344 virtnet_fill_stats(vi, i * 2, &ctx, (void *)&rq->stats, true, 0);
5345 }
5346
virtnet_get_queue_stats_tx(struct net_device * dev,int i,struct netdev_queue_stats_tx * stats)5347 static void virtnet_get_queue_stats_tx(struct net_device *dev, int i,
5348 struct netdev_queue_stats_tx *stats)
5349 {
5350 struct virtnet_info *vi = netdev_priv(dev);
5351 struct send_queue *sq = &vi->sq[i];
5352 struct virtnet_stats_ctx ctx = {0};
5353
5354 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true);
5355
5356 virtnet_get_hw_stats(vi, &ctx, i * 2 + 1);
5357 virtnet_fill_stats(vi, i * 2 + 1, &ctx, (void *)&sq->stats, true, 0);
5358 }
5359
virtnet_get_base_stats(struct net_device * dev,struct netdev_queue_stats_rx * rx,struct netdev_queue_stats_tx * tx)5360 static void virtnet_get_base_stats(struct net_device *dev,
5361 struct netdev_queue_stats_rx *rx,
5362 struct netdev_queue_stats_tx *tx)
5363 {
5364 struct virtnet_info *vi = netdev_priv(dev);
5365
5366 /* The queue stats of the virtio-net will not be reset. So here we
5367 * return 0.
5368 */
5369 rx->bytes = 0;
5370 rx->packets = 0;
5371
5372 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) {
5373 rx->hw_drops = 0;
5374 rx->hw_drop_overruns = 0;
5375 }
5376
5377 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) {
5378 rx->csum_unnecessary = 0;
5379 rx->csum_none = 0;
5380 rx->csum_bad = 0;
5381 }
5382
5383 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) {
5384 rx->hw_gro_packets = 0;
5385 rx->hw_gro_bytes = 0;
5386 rx->hw_gro_wire_packets = 0;
5387 rx->hw_gro_wire_bytes = 0;
5388 }
5389
5390 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED)
5391 rx->hw_drop_ratelimits = 0;
5392
5393 tx->bytes = 0;
5394 tx->packets = 0;
5395 tx->stop = 0;
5396 tx->wake = 0;
5397
5398 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) {
5399 tx->hw_drops = 0;
5400 tx->hw_drop_errors = 0;
5401 }
5402
5403 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) {
5404 tx->csum_none = 0;
5405 tx->needs_csum = 0;
5406 }
5407
5408 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) {
5409 tx->hw_gso_packets = 0;
5410 tx->hw_gso_bytes = 0;
5411 tx->hw_gso_wire_packets = 0;
5412 tx->hw_gso_wire_bytes = 0;
5413 }
5414
5415 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED)
5416 tx->hw_drop_ratelimits = 0;
5417 }
5418
5419 static const struct netdev_stat_ops virtnet_stat_ops = {
5420 .get_queue_stats_rx = virtnet_get_queue_stats_rx,
5421 .get_queue_stats_tx = virtnet_get_queue_stats_tx,
5422 .get_base_stats = virtnet_get_base_stats,
5423 };
5424
virtnet_freeze_down(struct virtio_device * vdev)5425 static void virtnet_freeze_down(struct virtio_device *vdev)
5426 {
5427 struct virtnet_info *vi = vdev->priv;
5428
5429 /* Make sure no work handler is accessing the device */
5430 flush_work(&vi->config_work);
5431 disable_rx_mode_work(vi);
5432 flush_work(&vi->rx_mode_work);
5433
5434 netif_tx_lock_bh(vi->dev);
5435 netif_device_detach(vi->dev);
5436 netif_tx_unlock_bh(vi->dev);
5437 if (netif_running(vi->dev))
5438 virtnet_close(vi->dev);
5439 }
5440
5441 static int init_vqs(struct virtnet_info *vi);
5442
virtnet_restore_up(struct virtio_device * vdev)5443 static int virtnet_restore_up(struct virtio_device *vdev)
5444 {
5445 struct virtnet_info *vi = vdev->priv;
5446 int err;
5447
5448 err = init_vqs(vi);
5449 if (err)
5450 return err;
5451
5452 virtio_device_ready(vdev);
5453
5454 enable_delayed_refill(vi);
5455 enable_rx_mode_work(vi);
5456
5457 if (netif_running(vi->dev)) {
5458 err = virtnet_open(vi->dev);
5459 if (err)
5460 return err;
5461 }
5462
5463 netif_tx_lock_bh(vi->dev);
5464 netif_device_attach(vi->dev);
5465 netif_tx_unlock_bh(vi->dev);
5466 return err;
5467 }
5468
virtnet_set_guest_offloads(struct virtnet_info * vi,u64 offloads)5469 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
5470 {
5471 __virtio64 *_offloads __free(kfree) = NULL;
5472 struct scatterlist sg;
5473
5474 _offloads = kzalloc(sizeof(*_offloads), GFP_KERNEL);
5475 if (!_offloads)
5476 return -ENOMEM;
5477
5478 *_offloads = cpu_to_virtio64(vi->vdev, offloads);
5479
5480 sg_init_one(&sg, _offloads, sizeof(*_offloads));
5481
5482 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
5483 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) {
5484 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n");
5485 return -EINVAL;
5486 }
5487
5488 return 0;
5489 }
5490
virtnet_clear_guest_offloads(struct virtnet_info * vi)5491 static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
5492 {
5493 u64 offloads = 0;
5494
5495 if (!vi->guest_offloads)
5496 return 0;
5497
5498 return virtnet_set_guest_offloads(vi, offloads);
5499 }
5500
virtnet_restore_guest_offloads(struct virtnet_info * vi)5501 static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
5502 {
5503 u64 offloads = vi->guest_offloads;
5504
5505 if (!vi->guest_offloads)
5506 return 0;
5507
5508 return virtnet_set_guest_offloads(vi, offloads);
5509 }
5510
virtnet_rq_bind_xsk_pool(struct virtnet_info * vi,struct receive_queue * rq,struct xsk_buff_pool * pool)5511 static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq,
5512 struct xsk_buff_pool *pool)
5513 {
5514 int err, qindex;
5515
5516 qindex = rq - vi->rq;
5517
5518 if (pool) {
5519 err = xdp_rxq_info_reg(&rq->xsk_rxq_info, vi->dev, qindex, rq->napi.napi_id);
5520 if (err < 0)
5521 return err;
5522
5523 err = xdp_rxq_info_reg_mem_model(&rq->xsk_rxq_info,
5524 MEM_TYPE_XSK_BUFF_POOL, NULL);
5525 if (err < 0)
5526 goto unreg;
5527
5528 xsk_pool_set_rxq_info(pool, &rq->xsk_rxq_info);
5529 }
5530
5531 virtnet_rx_pause(vi, rq);
5532
5533 err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf);
5534 if (err) {
5535 netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err);
5536
5537 pool = NULL;
5538 }
5539
5540 rq->xsk_pool = pool;
5541
5542 virtnet_rx_resume(vi, rq);
5543
5544 if (pool)
5545 return 0;
5546
5547 unreg:
5548 xdp_rxq_info_unreg(&rq->xsk_rxq_info);
5549 return err;
5550 }
5551
virtnet_xsk_pool_enable(struct net_device * dev,struct xsk_buff_pool * pool,u16 qid)5552 static int virtnet_xsk_pool_enable(struct net_device *dev,
5553 struct xsk_buff_pool *pool,
5554 u16 qid)
5555 {
5556 struct virtnet_info *vi = netdev_priv(dev);
5557 struct receive_queue *rq;
5558 struct device *dma_dev;
5559 struct send_queue *sq;
5560 int err, size;
5561
5562 if (vi->hdr_len > xsk_pool_get_headroom(pool))
5563 return -EINVAL;
5564
5565 /* In big_packets mode, xdp cannot work, so there is no need to
5566 * initialize xsk of rq.
5567 */
5568 if (vi->big_packets && !vi->mergeable_rx_bufs)
5569 return -ENOENT;
5570
5571 if (qid >= vi->curr_queue_pairs)
5572 return -EINVAL;
5573
5574 sq = &vi->sq[qid];
5575 rq = &vi->rq[qid];
5576
5577 /* xsk assumes that tx and rx must have the same dma device. The af-xdp
5578 * may use one buffer to receive from the rx and reuse this buffer to
5579 * send by the tx. So the dma dev of sq and rq must be the same one.
5580 *
5581 * But vq->dma_dev allows every vq has the respective dma dev. So I
5582 * check the dma dev of vq and sq is the same dev.
5583 */
5584 if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq))
5585 return -EINVAL;
5586
5587 dma_dev = virtqueue_dma_dev(rq->vq);
5588 if (!dma_dev)
5589 return -EINVAL;
5590
5591 size = virtqueue_get_vring_size(rq->vq);
5592
5593 rq->xsk_buffs = kvcalloc(size, sizeof(*rq->xsk_buffs), GFP_KERNEL);
5594 if (!rq->xsk_buffs)
5595 return -ENOMEM;
5596
5597 err = xsk_pool_dma_map(pool, dma_dev, 0);
5598 if (err)
5599 goto err_xsk_map;
5600
5601 err = virtnet_rq_bind_xsk_pool(vi, rq, pool);
5602 if (err)
5603 goto err_rq;
5604
5605 return 0;
5606
5607 err_rq:
5608 xsk_pool_dma_unmap(pool, 0);
5609 err_xsk_map:
5610 return err;
5611 }
5612
virtnet_xsk_pool_disable(struct net_device * dev,u16 qid)5613 static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid)
5614 {
5615 struct virtnet_info *vi = netdev_priv(dev);
5616 struct xsk_buff_pool *pool;
5617 struct receive_queue *rq;
5618 int err;
5619
5620 if (qid >= vi->curr_queue_pairs)
5621 return -EINVAL;
5622
5623 rq = &vi->rq[qid];
5624
5625 pool = rq->xsk_pool;
5626
5627 err = virtnet_rq_bind_xsk_pool(vi, rq, NULL);
5628
5629 xsk_pool_dma_unmap(pool, 0);
5630
5631 kvfree(rq->xsk_buffs);
5632
5633 return err;
5634 }
5635
virtnet_xsk_pool_setup(struct net_device * dev,struct netdev_bpf * xdp)5636 static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp)
5637 {
5638 if (xdp->xsk.pool)
5639 return virtnet_xsk_pool_enable(dev, xdp->xsk.pool,
5640 xdp->xsk.queue_id);
5641 else
5642 return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id);
5643 }
5644
virtnet_xdp_set(struct net_device * dev,struct bpf_prog * prog,struct netlink_ext_ack * extack)5645 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
5646 struct netlink_ext_ack *extack)
5647 {
5648 unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM +
5649 sizeof(struct skb_shared_info));
5650 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN;
5651 struct virtnet_info *vi = netdev_priv(dev);
5652 struct bpf_prog *old_prog;
5653 u16 xdp_qp = 0, curr_qp;
5654 int i, err;
5655
5656 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
5657 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
5658 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
5659 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
5660 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
5661 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) ||
5662 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) ||
5663 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) {
5664 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first");
5665 return -EOPNOTSUPP;
5666 }
5667
5668 if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
5669 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required");
5670 return -EINVAL;
5671 }
5672
5673 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) {
5674 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags");
5675 netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz);
5676 return -EINVAL;
5677 }
5678
5679 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
5680 if (prog)
5681 xdp_qp = nr_cpu_ids;
5682
5683 /* XDP requires extra queues for XDP_TX */
5684 if (curr_qp + xdp_qp > vi->max_queue_pairs) {
5685 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",
5686 curr_qp + xdp_qp, vi->max_queue_pairs);
5687 xdp_qp = 0;
5688 }
5689
5690 old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
5691 if (!prog && !old_prog)
5692 return 0;
5693
5694 if (prog)
5695 bpf_prog_add(prog, vi->max_queue_pairs - 1);
5696
5697 /* Make sure NAPI is not using any XDP TX queues for RX. */
5698 if (netif_running(dev)) {
5699 for (i = 0; i < vi->max_queue_pairs; i++) {
5700 napi_disable(&vi->rq[i].napi);
5701 virtnet_napi_tx_disable(&vi->sq[i].napi);
5702 }
5703 }
5704
5705 if (!prog) {
5706 for (i = 0; i < vi->max_queue_pairs; i++) {
5707 rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
5708 if (i == 0)
5709 virtnet_restore_guest_offloads(vi);
5710 }
5711 synchronize_net();
5712 }
5713
5714 err = virtnet_set_queues(vi, curr_qp + xdp_qp);
5715 if (err)
5716 goto err;
5717 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
5718 vi->xdp_queue_pairs = xdp_qp;
5719
5720 if (prog) {
5721 vi->xdp_enabled = true;
5722 for (i = 0; i < vi->max_queue_pairs; i++) {
5723 rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
5724 if (i == 0 && !old_prog)
5725 virtnet_clear_guest_offloads(vi);
5726 }
5727 if (!old_prog)
5728 xdp_features_set_redirect_target(dev, true);
5729 } else {
5730 xdp_features_clear_redirect_target(dev);
5731 vi->xdp_enabled = false;
5732 }
5733
5734 for (i = 0; i < vi->max_queue_pairs; i++) {
5735 if (old_prog)
5736 bpf_prog_put(old_prog);
5737 if (netif_running(dev)) {
5738 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
5739 virtnet_napi_tx_enable(vi, vi->sq[i].vq,
5740 &vi->sq[i].napi);
5741 }
5742 }
5743
5744 return 0;
5745
5746 err:
5747 if (!prog) {
5748 virtnet_clear_guest_offloads(vi);
5749 for (i = 0; i < vi->max_queue_pairs; i++)
5750 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
5751 }
5752
5753 if (netif_running(dev)) {
5754 for (i = 0; i < vi->max_queue_pairs; i++) {
5755 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
5756 virtnet_napi_tx_enable(vi, vi->sq[i].vq,
5757 &vi->sq[i].napi);
5758 }
5759 }
5760 if (prog)
5761 bpf_prog_sub(prog, vi->max_queue_pairs - 1);
5762 return err;
5763 }
5764
virtnet_xdp(struct net_device * dev,struct netdev_bpf * xdp)5765 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
5766 {
5767 switch (xdp->command) {
5768 case XDP_SETUP_PROG:
5769 return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
5770 case XDP_SETUP_XSK_POOL:
5771 return virtnet_xsk_pool_setup(dev, xdp);
5772 default:
5773 return -EINVAL;
5774 }
5775 }
5776
virtnet_get_phys_port_name(struct net_device * dev,char * buf,size_t len)5777 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf,
5778 size_t len)
5779 {
5780 struct virtnet_info *vi = netdev_priv(dev);
5781 int ret;
5782
5783 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY))
5784 return -EOPNOTSUPP;
5785
5786 ret = snprintf(buf, len, "sby");
5787 if (ret >= len)
5788 return -EOPNOTSUPP;
5789
5790 return 0;
5791 }
5792
virtnet_set_features(struct net_device * dev,netdev_features_t features)5793 static int virtnet_set_features(struct net_device *dev,
5794 netdev_features_t features)
5795 {
5796 struct virtnet_info *vi = netdev_priv(dev);
5797 u64 offloads;
5798 int err;
5799
5800 if ((dev->features ^ features) & NETIF_F_GRO_HW) {
5801 if (vi->xdp_enabled)
5802 return -EBUSY;
5803
5804 if (features & NETIF_F_GRO_HW)
5805 offloads = vi->guest_offloads_capable;
5806 else
5807 offloads = vi->guest_offloads_capable &
5808 ~GUEST_OFFLOAD_GRO_HW_MASK;
5809
5810 err = virtnet_set_guest_offloads(vi, offloads);
5811 if (err)
5812 return err;
5813 vi->guest_offloads = offloads;
5814 }
5815
5816 if ((dev->features ^ features) & NETIF_F_RXHASH) {
5817 if (features & NETIF_F_RXHASH)
5818 vi->rss.hash_types = vi->rss_hash_types_saved;
5819 else
5820 vi->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE;
5821
5822 if (!virtnet_commit_rss_command(vi))
5823 return -EINVAL;
5824 }
5825
5826 return 0;
5827 }
5828
virtnet_tx_timeout(struct net_device * dev,unsigned int txqueue)5829 static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue)
5830 {
5831 struct virtnet_info *priv = netdev_priv(dev);
5832 struct send_queue *sq = &priv->sq[txqueue];
5833 struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue);
5834
5835 u64_stats_update_begin(&sq->stats.syncp);
5836 u64_stats_inc(&sq->stats.tx_timeouts);
5837 u64_stats_update_end(&sq->stats.syncp);
5838
5839 netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n",
5840 txqueue, sq->name, sq->vq->index, sq->vq->name,
5841 jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start)));
5842 }
5843
virtnet_init_irq_moder(struct virtnet_info * vi)5844 static int virtnet_init_irq_moder(struct virtnet_info *vi)
5845 {
5846 u8 profile_flags = 0, coal_flags = 0;
5847 int ret, i;
5848
5849 profile_flags |= DIM_PROFILE_RX;
5850 coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS;
5851 ret = net_dim_init_irq_moder(vi->dev, profile_flags, coal_flags,
5852 DIM_CQ_PERIOD_MODE_START_FROM_EQE,
5853 0, virtnet_rx_dim_work, NULL);
5854
5855 if (ret)
5856 return ret;
5857
5858 for (i = 0; i < vi->max_queue_pairs; i++)
5859 net_dim_setting(vi->dev, &vi->rq[i].dim, false);
5860
5861 return 0;
5862 }
5863
virtnet_free_irq_moder(struct virtnet_info * vi)5864 static void virtnet_free_irq_moder(struct virtnet_info *vi)
5865 {
5866 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
5867 return;
5868
5869 rtnl_lock();
5870 net_dim_free_irq_moder(vi->dev);
5871 rtnl_unlock();
5872 }
5873
5874 static const struct net_device_ops virtnet_netdev = {
5875 .ndo_open = virtnet_open,
5876 .ndo_stop = virtnet_close,
5877 .ndo_start_xmit = start_xmit,
5878 .ndo_validate_addr = eth_validate_addr,
5879 .ndo_set_mac_address = virtnet_set_mac_address,
5880 .ndo_set_rx_mode = virtnet_set_rx_mode,
5881 .ndo_get_stats64 = virtnet_stats,
5882 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
5883 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
5884 .ndo_bpf = virtnet_xdp,
5885 .ndo_xdp_xmit = virtnet_xdp_xmit,
5886 .ndo_xsk_wakeup = virtnet_xsk_wakeup,
5887 .ndo_features_check = passthru_features_check,
5888 .ndo_get_phys_port_name = virtnet_get_phys_port_name,
5889 .ndo_set_features = virtnet_set_features,
5890 .ndo_tx_timeout = virtnet_tx_timeout,
5891 };
5892
virtnet_config_changed_work(struct work_struct * work)5893 static void virtnet_config_changed_work(struct work_struct *work)
5894 {
5895 struct virtnet_info *vi =
5896 container_of(work, struct virtnet_info, config_work);
5897 u16 v;
5898
5899 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS,
5900 struct virtio_net_config, status, &v) < 0)
5901 return;
5902
5903 if (v & VIRTIO_NET_S_ANNOUNCE) {
5904 netdev_notify_peers(vi->dev);
5905 virtnet_ack_link_announce(vi);
5906 }
5907
5908 /* Ignore unknown (future) status bits */
5909 v &= VIRTIO_NET_S_LINK_UP;
5910
5911 if (vi->status == v)
5912 return;
5913
5914 vi->status = v;
5915
5916 if (vi->status & VIRTIO_NET_S_LINK_UP) {
5917 virtnet_update_settings(vi);
5918 netif_carrier_on(vi->dev);
5919 netif_tx_wake_all_queues(vi->dev);
5920 } else {
5921 netif_carrier_off(vi->dev);
5922 netif_tx_stop_all_queues(vi->dev);
5923 }
5924 }
5925
virtnet_config_changed(struct virtio_device * vdev)5926 static void virtnet_config_changed(struct virtio_device *vdev)
5927 {
5928 struct virtnet_info *vi = vdev->priv;
5929
5930 schedule_work(&vi->config_work);
5931 }
5932
virtnet_free_queues(struct virtnet_info * vi)5933 static void virtnet_free_queues(struct virtnet_info *vi)
5934 {
5935 int i;
5936
5937 for (i = 0; i < vi->max_queue_pairs; i++) {
5938 __netif_napi_del(&vi->rq[i].napi);
5939 __netif_napi_del(&vi->sq[i].napi);
5940 }
5941
5942 /* We called __netif_napi_del(),
5943 * we need to respect an RCU grace period before freeing vi->rq
5944 */
5945 synchronize_net();
5946
5947 kfree(vi->rq);
5948 kfree(vi->sq);
5949 kfree(vi->ctrl);
5950 }
5951
_free_receive_bufs(struct virtnet_info * vi)5952 static void _free_receive_bufs(struct virtnet_info *vi)
5953 {
5954 struct bpf_prog *old_prog;
5955 int i;
5956
5957 for (i = 0; i < vi->max_queue_pairs; i++) {
5958 while (vi->rq[i].pages)
5959 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
5960
5961 old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
5962 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
5963 if (old_prog)
5964 bpf_prog_put(old_prog);
5965 }
5966 }
5967
free_receive_bufs(struct virtnet_info * vi)5968 static void free_receive_bufs(struct virtnet_info *vi)
5969 {
5970 rtnl_lock();
5971 _free_receive_bufs(vi);
5972 rtnl_unlock();
5973 }
5974
free_receive_page_frags(struct virtnet_info * vi)5975 static void free_receive_page_frags(struct virtnet_info *vi)
5976 {
5977 int i;
5978 for (i = 0; i < vi->max_queue_pairs; i++)
5979 if (vi->rq[i].alloc_frag.page) {
5980 if (vi->rq[i].do_dma && vi->rq[i].last_dma)
5981 virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0);
5982 put_page(vi->rq[i].alloc_frag.page);
5983 }
5984 }
5985
virtnet_sq_free_unused_buf(struct virtqueue * vq,void * buf)5986 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf)
5987 {
5988 if (!is_xdp_frame(buf))
5989 dev_kfree_skb(buf);
5990 else
5991 xdp_return_frame(ptr_to_xdp(buf));
5992 }
5993
free_unused_bufs(struct virtnet_info * vi)5994 static void free_unused_bufs(struct virtnet_info *vi)
5995 {
5996 void *buf;
5997 int i;
5998
5999 for (i = 0; i < vi->max_queue_pairs; i++) {
6000 struct virtqueue *vq = vi->sq[i].vq;
6001 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
6002 virtnet_sq_free_unused_buf(vq, buf);
6003 cond_resched();
6004 }
6005
6006 for (i = 0; i < vi->max_queue_pairs; i++) {
6007 struct virtqueue *vq = vi->rq[i].vq;
6008
6009 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
6010 virtnet_rq_unmap_free_buf(vq, buf);
6011 cond_resched();
6012 }
6013 }
6014
virtnet_del_vqs(struct virtnet_info * vi)6015 static void virtnet_del_vqs(struct virtnet_info *vi)
6016 {
6017 struct virtio_device *vdev = vi->vdev;
6018
6019 virtnet_clean_affinity(vi);
6020
6021 vdev->config->del_vqs(vdev);
6022
6023 virtnet_free_queues(vi);
6024 }
6025
6026 /* How large should a single buffer be so a queue full of these can fit at
6027 * least one full packet?
6028 * Logic below assumes the mergeable buffer header is used.
6029 */
mergeable_min_buf_len(struct virtnet_info * vi,struct virtqueue * vq)6030 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
6031 {
6032 const unsigned int hdr_len = vi->hdr_len;
6033 unsigned int rq_size = virtqueue_get_vring_size(vq);
6034 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
6035 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
6036 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
6037
6038 return max(max(min_buf_len, hdr_len) - hdr_len,
6039 (unsigned int)GOOD_PACKET_LEN);
6040 }
6041
virtnet_find_vqs(struct virtnet_info * vi)6042 static int virtnet_find_vqs(struct virtnet_info *vi)
6043 {
6044 struct virtqueue_info *vqs_info;
6045 struct virtqueue **vqs;
6046 int ret = -ENOMEM;
6047 int total_vqs;
6048 bool *ctx;
6049 u16 i;
6050
6051 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
6052 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
6053 * possible control vq.
6054 */
6055 total_vqs = vi->max_queue_pairs * 2 +
6056 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
6057
6058 /* Allocate space for find_vqs parameters */
6059 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL);
6060 if (!vqs)
6061 goto err_vq;
6062 vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL);
6063 if (!vqs_info)
6064 goto err_vqs_info;
6065 if (!vi->big_packets || vi->mergeable_rx_bufs) {
6066 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL);
6067 if (!ctx)
6068 goto err_ctx;
6069 } else {
6070 ctx = NULL;
6071 }
6072
6073 /* Parameters for control virtqueue, if any */
6074 if (vi->has_cvq) {
6075 vqs_info[total_vqs - 1].name = "control";
6076 }
6077
6078 /* Allocate/initialize parameters for send/receive virtqueues */
6079 for (i = 0; i < vi->max_queue_pairs; i++) {
6080 vqs_info[rxq2vq(i)].callback = skb_recv_done;
6081 vqs_info[txq2vq(i)].callback = skb_xmit_done;
6082 sprintf(vi->rq[i].name, "input.%u", i);
6083 sprintf(vi->sq[i].name, "output.%u", i);
6084 vqs_info[rxq2vq(i)].name = vi->rq[i].name;
6085 vqs_info[txq2vq(i)].name = vi->sq[i].name;
6086 if (ctx)
6087 vqs_info[rxq2vq(i)].ctx = true;
6088 }
6089
6090 ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, vqs_info, NULL);
6091 if (ret)
6092 goto err_find;
6093
6094 if (vi->has_cvq) {
6095 vi->cvq = vqs[total_vqs - 1];
6096 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
6097 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
6098 }
6099
6100 for (i = 0; i < vi->max_queue_pairs; i++) {
6101 vi->rq[i].vq = vqs[rxq2vq(i)];
6102 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq);
6103 vi->sq[i].vq = vqs[txq2vq(i)];
6104 }
6105
6106 /* run here: ret == 0. */
6107
6108
6109 err_find:
6110 kfree(ctx);
6111 err_ctx:
6112 kfree(vqs_info);
6113 err_vqs_info:
6114 kfree(vqs);
6115 err_vq:
6116 return ret;
6117 }
6118
virtnet_alloc_queues(struct virtnet_info * vi)6119 static int virtnet_alloc_queues(struct virtnet_info *vi)
6120 {
6121 int i;
6122
6123 if (vi->has_cvq) {
6124 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
6125 if (!vi->ctrl)
6126 goto err_ctrl;
6127 } else {
6128 vi->ctrl = NULL;
6129 }
6130 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL);
6131 if (!vi->sq)
6132 goto err_sq;
6133 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL);
6134 if (!vi->rq)
6135 goto err_rq;
6136
6137 INIT_DELAYED_WORK(&vi->refill, refill_work);
6138 for (i = 0; i < vi->max_queue_pairs; i++) {
6139 vi->rq[i].pages = NULL;
6140 netif_napi_add_weight(vi->dev, &vi->rq[i].napi, virtnet_poll,
6141 napi_weight);
6142 netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi,
6143 virtnet_poll_tx,
6144 napi_tx ? napi_weight : 0);
6145
6146 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
6147 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
6148 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
6149
6150 u64_stats_init(&vi->rq[i].stats.syncp);
6151 u64_stats_init(&vi->sq[i].stats.syncp);
6152 mutex_init(&vi->rq[i].dim_lock);
6153 }
6154
6155 return 0;
6156
6157 err_rq:
6158 kfree(vi->sq);
6159 err_sq:
6160 kfree(vi->ctrl);
6161 err_ctrl:
6162 return -ENOMEM;
6163 }
6164
init_vqs(struct virtnet_info * vi)6165 static int init_vqs(struct virtnet_info *vi)
6166 {
6167 int ret;
6168
6169 /* Allocate send & receive queues */
6170 ret = virtnet_alloc_queues(vi);
6171 if (ret)
6172 goto err;
6173
6174 ret = virtnet_find_vqs(vi);
6175 if (ret)
6176 goto err_free;
6177
6178 cpus_read_lock();
6179 virtnet_set_affinity(vi);
6180 cpus_read_unlock();
6181
6182 return 0;
6183
6184 err_free:
6185 virtnet_free_queues(vi);
6186 err:
6187 return ret;
6188 }
6189
6190 #ifdef CONFIG_SYSFS
mergeable_rx_buffer_size_show(struct netdev_rx_queue * queue,char * buf)6191 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
6192 char *buf)
6193 {
6194 struct virtnet_info *vi = netdev_priv(queue->dev);
6195 unsigned int queue_index = get_netdev_rx_queue_index(queue);
6196 unsigned int headroom = virtnet_get_headroom(vi);
6197 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
6198 struct ewma_pkt_len *avg;
6199
6200 BUG_ON(queue_index >= vi->max_queue_pairs);
6201 avg = &vi->rq[queue_index].mrg_avg_pkt_len;
6202 return sprintf(buf, "%u\n",
6203 get_mergeable_buf_len(&vi->rq[queue_index], avg,
6204 SKB_DATA_ALIGN(headroom + tailroom)));
6205 }
6206
6207 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
6208 __ATTR_RO(mergeable_rx_buffer_size);
6209
6210 static struct attribute *virtio_net_mrg_rx_attrs[] = {
6211 &mergeable_rx_buffer_size_attribute.attr,
6212 NULL
6213 };
6214
6215 static const struct attribute_group virtio_net_mrg_rx_group = {
6216 .name = "virtio_net",
6217 .attrs = virtio_net_mrg_rx_attrs
6218 };
6219 #endif
6220
virtnet_fail_on_feature(struct virtio_device * vdev,unsigned int fbit,const char * fname,const char * dname)6221 static bool virtnet_fail_on_feature(struct virtio_device *vdev,
6222 unsigned int fbit,
6223 const char *fname, const char *dname)
6224 {
6225 if (!virtio_has_feature(vdev, fbit))
6226 return false;
6227
6228 dev_err(&vdev->dev, "device advertises feature %s but not %s",
6229 fname, dname);
6230
6231 return true;
6232 }
6233
6234 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \
6235 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit)
6236
virtnet_validate_features(struct virtio_device * vdev)6237 static bool virtnet_validate_features(struct virtio_device *vdev)
6238 {
6239 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) &&
6240 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX,
6241 "VIRTIO_NET_F_CTRL_VQ") ||
6242 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN,
6243 "VIRTIO_NET_F_CTRL_VQ") ||
6244 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE,
6245 "VIRTIO_NET_F_CTRL_VQ") ||
6246 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") ||
6247 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
6248 "VIRTIO_NET_F_CTRL_VQ") ||
6249 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
6250 "VIRTIO_NET_F_CTRL_VQ") ||
6251 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
6252 "VIRTIO_NET_F_CTRL_VQ") ||
6253 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL,
6254 "VIRTIO_NET_F_CTRL_VQ") ||
6255 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL,
6256 "VIRTIO_NET_F_CTRL_VQ"))) {
6257 return false;
6258 }
6259
6260 return true;
6261 }
6262
6263 #define MIN_MTU ETH_MIN_MTU
6264 #define MAX_MTU ETH_MAX_MTU
6265
virtnet_validate(struct virtio_device * vdev)6266 static int virtnet_validate(struct virtio_device *vdev)
6267 {
6268 if (!vdev->config->get) {
6269 dev_err(&vdev->dev, "%s failure: config access disabled\n",
6270 __func__);
6271 return -EINVAL;
6272 }
6273
6274 if (!virtnet_validate_features(vdev))
6275 return -EINVAL;
6276
6277 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
6278 int mtu = virtio_cread16(vdev,
6279 offsetof(struct virtio_net_config,
6280 mtu));
6281 if (mtu < MIN_MTU)
6282 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
6283 }
6284
6285 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) &&
6286 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
6287 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby");
6288 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY);
6289 }
6290
6291 return 0;
6292 }
6293
virtnet_check_guest_gso(const struct virtnet_info * vi)6294 static bool virtnet_check_guest_gso(const struct virtnet_info *vi)
6295 {
6296 return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
6297 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
6298 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
6299 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
6300 (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) &&
6301 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6));
6302 }
6303
virtnet_set_big_packets(struct virtnet_info * vi,const int mtu)6304 static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu)
6305 {
6306 bool guest_gso = virtnet_check_guest_gso(vi);
6307
6308 /* If device can receive ANY guest GSO packets, regardless of mtu,
6309 * allocate packets of maximum size, otherwise limit it to only
6310 * mtu size worth only.
6311 */
6312 if (mtu > ETH_DATA_LEN || guest_gso) {
6313 vi->big_packets = true;
6314 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE);
6315 }
6316 }
6317
6318 #define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10
6319 static enum xdp_rss_hash_type
6320 virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = {
6321 [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE,
6322 [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4,
6323 [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP,
6324 [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP,
6325 [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6,
6326 [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP,
6327 [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP,
6328 [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX,
6329 [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX,
6330 [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX
6331 };
6332
virtnet_xdp_rx_hash(const struct xdp_md * _ctx,u32 * hash,enum xdp_rss_hash_type * rss_type)6333 static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
6334 enum xdp_rss_hash_type *rss_type)
6335 {
6336 const struct xdp_buff *xdp = (void *)_ctx;
6337 struct virtio_net_hdr_v1_hash *hdr_hash;
6338 struct virtnet_info *vi;
6339 u16 hash_report;
6340
6341 if (!(xdp->rxq->dev->features & NETIF_F_RXHASH))
6342 return -ENODATA;
6343
6344 vi = netdev_priv(xdp->rxq->dev);
6345 hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len);
6346 hash_report = __le16_to_cpu(hdr_hash->hash_report);
6347
6348 if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE)
6349 hash_report = VIRTIO_NET_HASH_REPORT_NONE;
6350
6351 *rss_type = virtnet_xdp_rss_type[hash_report];
6352 *hash = __le32_to_cpu(hdr_hash->hash_value);
6353 return 0;
6354 }
6355
6356 static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = {
6357 .xmo_rx_hash = virtnet_xdp_rx_hash,
6358 };
6359
virtnet_probe(struct virtio_device * vdev)6360 static int virtnet_probe(struct virtio_device *vdev)
6361 {
6362 int i, err = -ENOMEM;
6363 struct net_device *dev;
6364 struct virtnet_info *vi;
6365 u16 max_queue_pairs;
6366 int mtu = 0;
6367
6368 /* Find if host supports multiqueue/rss virtio_net device */
6369 max_queue_pairs = 1;
6370 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
6371 max_queue_pairs =
6372 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs));
6373
6374 /* We need at least 2 queue's */
6375 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
6376 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
6377 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
6378 max_queue_pairs = 1;
6379
6380 /* Allocate ourselves a network device with room for our info */
6381 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
6382 if (!dev)
6383 return -ENOMEM;
6384
6385 /* Set up network device as normal. */
6386 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE |
6387 IFF_TX_SKB_NO_LINEAR;
6388 dev->netdev_ops = &virtnet_netdev;
6389 dev->stat_ops = &virtnet_stat_ops;
6390 dev->features = NETIF_F_HIGHDMA;
6391
6392 dev->ethtool_ops = &virtnet_ethtool_ops;
6393 SET_NETDEV_DEV(dev, &vdev->dev);
6394
6395 /* Do we support "hardware" checksums? */
6396 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
6397 /* This opens up the world of extra features. */
6398 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
6399 if (csum)
6400 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
6401
6402 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
6403 dev->hw_features |= NETIF_F_TSO
6404 | NETIF_F_TSO_ECN | NETIF_F_TSO6;
6405 }
6406 /* Individual feature bits: what can host handle? */
6407 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
6408 dev->hw_features |= NETIF_F_TSO;
6409 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
6410 dev->hw_features |= NETIF_F_TSO6;
6411 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
6412 dev->hw_features |= NETIF_F_TSO_ECN;
6413 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO))
6414 dev->hw_features |= NETIF_F_GSO_UDP_L4;
6415
6416 dev->features |= NETIF_F_GSO_ROBUST;
6417
6418 if (gso)
6419 dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
6420 /* (!csum && gso) case will be fixed by register_netdev() */
6421 }
6422
6423 /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't
6424 * need to calculate checksums for partially checksummed packets,
6425 * as they're considered valid by the upper layer.
6426 * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only
6427 * receives fully checksummed packets. The device may assist in
6428 * validating these packets' checksums, so the driver won't have to.
6429 */
6430 dev->features |= NETIF_F_RXCSUM;
6431
6432 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
6433 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
6434 dev->features |= NETIF_F_GRO_HW;
6435 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
6436 dev->hw_features |= NETIF_F_GRO_HW;
6437
6438 dev->vlan_features = dev->features;
6439 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT;
6440
6441 /* MTU range: 68 - 65535 */
6442 dev->min_mtu = MIN_MTU;
6443 dev->max_mtu = MAX_MTU;
6444
6445 /* Configuration may specify what MAC to use. Otherwise random. */
6446 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
6447 u8 addr[ETH_ALEN];
6448
6449 virtio_cread_bytes(vdev,
6450 offsetof(struct virtio_net_config, mac),
6451 addr, ETH_ALEN);
6452 eth_hw_addr_set(dev, addr);
6453 } else {
6454 eth_hw_addr_random(dev);
6455 dev_info(&vdev->dev, "Assigned random MAC address %pM\n",
6456 dev->dev_addr);
6457 }
6458
6459 /* Set up our device-specific information */
6460 vi = netdev_priv(dev);
6461 vi->dev = dev;
6462 vi->vdev = vdev;
6463 vdev->priv = vi;
6464
6465 INIT_WORK(&vi->config_work, virtnet_config_changed_work);
6466 INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work);
6467 spin_lock_init(&vi->refill_lock);
6468
6469 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) {
6470 vi->mergeable_rx_bufs = true;
6471 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG;
6472 }
6473
6474 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
6475 vi->has_rss_hash_report = true;
6476
6477 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
6478 vi->has_rss = true;
6479
6480 vi->rss_indir_table_size =
6481 virtio_cread16(vdev, offsetof(struct virtio_net_config,
6482 rss_max_indirection_table_length));
6483 }
6484 err = rss_indirection_table_alloc(&vi->rss, vi->rss_indir_table_size);
6485 if (err)
6486 goto free;
6487
6488 if (vi->has_rss || vi->has_rss_hash_report) {
6489 vi->rss_key_size =
6490 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size));
6491 if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
6492 dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n",
6493 vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE);
6494 err = -EINVAL;
6495 goto free;
6496 }
6497
6498 vi->rss_hash_types_supported =
6499 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types));
6500 vi->rss_hash_types_supported &=
6501 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX |
6502 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
6503 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX);
6504
6505 dev->hw_features |= NETIF_F_RXHASH;
6506 dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops;
6507 }
6508
6509 if (vi->has_rss_hash_report)
6510 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash);
6511 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
6512 virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
6513 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
6514 else
6515 vi->hdr_len = sizeof(struct virtio_net_hdr);
6516
6517 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
6518 virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
6519 vi->any_header_sg = true;
6520
6521 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
6522 vi->has_cvq = true;
6523
6524 mutex_init(&vi->cvq_lock);
6525
6526 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
6527 mtu = virtio_cread16(vdev,
6528 offsetof(struct virtio_net_config,
6529 mtu));
6530 if (mtu < dev->min_mtu) {
6531 /* Should never trigger: MTU was previously validated
6532 * in virtnet_validate.
6533 */
6534 dev_err(&vdev->dev,
6535 "device MTU appears to have changed it is now %d < %d",
6536 mtu, dev->min_mtu);
6537 err = -EINVAL;
6538 goto free;
6539 }
6540
6541 dev->mtu = mtu;
6542 dev->max_mtu = mtu;
6543 }
6544
6545 virtnet_set_big_packets(vi, mtu);
6546
6547 if (vi->any_header_sg)
6548 dev->needed_headroom = vi->hdr_len;
6549
6550 /* Enable multiqueue by default */
6551 if (num_online_cpus() >= max_queue_pairs)
6552 vi->curr_queue_pairs = max_queue_pairs;
6553 else
6554 vi->curr_queue_pairs = num_online_cpus();
6555 vi->max_queue_pairs = max_queue_pairs;
6556
6557 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
6558 err = init_vqs(vi);
6559 if (err)
6560 goto free;
6561
6562 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
6563 vi->intr_coal_rx.max_usecs = 0;
6564 vi->intr_coal_tx.max_usecs = 0;
6565 vi->intr_coal_rx.max_packets = 0;
6566
6567 /* Keep the default values of the coalescing parameters
6568 * aligned with the default napi_tx state.
6569 */
6570 if (vi->sq[0].napi.weight)
6571 vi->intr_coal_tx.max_packets = 1;
6572 else
6573 vi->intr_coal_tx.max_packets = 0;
6574 }
6575
6576 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) {
6577 /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */
6578 for (i = 0; i < vi->max_queue_pairs; i++)
6579 if (vi->sq[i].napi.weight)
6580 vi->sq[i].intr_coal.max_packets = 1;
6581
6582 err = virtnet_init_irq_moder(vi);
6583 if (err)
6584 goto free;
6585 }
6586
6587 #ifdef CONFIG_SYSFS
6588 if (vi->mergeable_rx_bufs)
6589 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
6590 #endif
6591 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
6592 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
6593
6594 virtnet_init_settings(dev);
6595
6596 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
6597 vi->failover = net_failover_create(vi->dev);
6598 if (IS_ERR(vi->failover)) {
6599 err = PTR_ERR(vi->failover);
6600 goto free_vqs;
6601 }
6602 }
6603
6604 if (vi->has_rss || vi->has_rss_hash_report)
6605 virtnet_init_default_rss(vi);
6606
6607 enable_rx_mode_work(vi);
6608
6609 /* serialize netdev register + virtio_device_ready() with ndo_open() */
6610 rtnl_lock();
6611
6612 err = register_netdevice(dev);
6613 if (err) {
6614 pr_debug("virtio_net: registering device failed\n");
6615 rtnl_unlock();
6616 goto free_failover;
6617 }
6618
6619 /* Disable config change notification until ndo_open. */
6620 virtio_config_driver_disable(vi->vdev);
6621
6622 virtio_device_ready(vdev);
6623
6624 if (vi->has_rss || vi->has_rss_hash_report) {
6625 if (!virtnet_commit_rss_command(vi)) {
6626 dev_warn(&vdev->dev, "RSS disabled because committing failed.\n");
6627 dev->hw_features &= ~NETIF_F_RXHASH;
6628 vi->has_rss_hash_report = false;
6629 vi->has_rss = false;
6630 }
6631 }
6632
6633 virtnet_set_queues(vi, vi->curr_queue_pairs);
6634
6635 /* a random MAC address has been assigned, notify the device.
6636 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there
6637 * because many devices work fine without getting MAC explicitly
6638 */
6639 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
6640 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
6641 struct scatterlist sg;
6642
6643 sg_init_one(&sg, dev->dev_addr, dev->addr_len);
6644 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
6645 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
6646 pr_debug("virtio_net: setting MAC address failed\n");
6647 rtnl_unlock();
6648 err = -EINVAL;
6649 goto free_unregister_netdev;
6650 }
6651 }
6652
6653 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) {
6654 struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL;
6655 struct scatterlist sg;
6656 __le64 v;
6657
6658 stats_cap = kzalloc(sizeof(*stats_cap), GFP_KERNEL);
6659 if (!stats_cap) {
6660 rtnl_unlock();
6661 err = -ENOMEM;
6662 goto free_unregister_netdev;
6663 }
6664
6665 sg_init_one(&sg, stats_cap, sizeof(*stats_cap));
6666
6667 if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS,
6668 VIRTIO_NET_CTRL_STATS_QUERY,
6669 NULL, &sg)) {
6670 pr_debug("virtio_net: fail to get stats capability\n");
6671 rtnl_unlock();
6672 err = -EINVAL;
6673 goto free_unregister_netdev;
6674 }
6675
6676 v = stats_cap->supported_stats_types[0];
6677 vi->device_stats_cap = le64_to_cpu(v);
6678 }
6679
6680 /* Assume link up if device can't report link status,
6681 otherwise get link status from config. */
6682 netif_carrier_off(dev);
6683 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
6684 virtnet_config_changed_work(&vi->config_work);
6685 } else {
6686 vi->status = VIRTIO_NET_S_LINK_UP;
6687 virtnet_update_settings(vi);
6688 netif_carrier_on(dev);
6689 }
6690
6691 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
6692 if (virtio_has_feature(vi->vdev, guest_offloads[i]))
6693 set_bit(guest_offloads[i], &vi->guest_offloads);
6694 vi->guest_offloads_capable = vi->guest_offloads;
6695
6696 rtnl_unlock();
6697
6698 err = virtnet_cpu_notif_add(vi);
6699 if (err) {
6700 pr_debug("virtio_net: registering cpu notifier failed\n");
6701 goto free_unregister_netdev;
6702 }
6703
6704 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
6705 dev->name, max_queue_pairs);
6706
6707 return 0;
6708
6709 free_unregister_netdev:
6710 unregister_netdev(dev);
6711 free_failover:
6712 net_failover_destroy(vi->failover);
6713 free_vqs:
6714 virtio_reset_device(vdev);
6715 cancel_delayed_work_sync(&vi->refill);
6716 free_receive_page_frags(vi);
6717 virtnet_del_vqs(vi);
6718 free:
6719 free_netdev(dev);
6720 return err;
6721 }
6722
remove_vq_common(struct virtnet_info * vi)6723 static void remove_vq_common(struct virtnet_info *vi)
6724 {
6725 virtio_reset_device(vi->vdev);
6726
6727 /* Free unused buffers in both send and recv, if any. */
6728 free_unused_bufs(vi);
6729
6730 free_receive_bufs(vi);
6731
6732 free_receive_page_frags(vi);
6733
6734 virtnet_del_vqs(vi);
6735 }
6736
virtnet_remove(struct virtio_device * vdev)6737 static void virtnet_remove(struct virtio_device *vdev)
6738 {
6739 struct virtnet_info *vi = vdev->priv;
6740
6741 virtnet_cpu_notif_remove(vi);
6742
6743 /* Make sure no work handler is accessing the device. */
6744 flush_work(&vi->config_work);
6745 disable_rx_mode_work(vi);
6746 flush_work(&vi->rx_mode_work);
6747
6748 virtnet_free_irq_moder(vi);
6749
6750 unregister_netdev(vi->dev);
6751
6752 net_failover_destroy(vi->failover);
6753
6754 remove_vq_common(vi);
6755
6756 rss_indirection_table_free(&vi->rss);
6757
6758 free_netdev(vi->dev);
6759 }
6760
virtnet_freeze(struct virtio_device * vdev)6761 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev)
6762 {
6763 struct virtnet_info *vi = vdev->priv;
6764
6765 virtnet_cpu_notif_remove(vi);
6766 virtnet_freeze_down(vdev);
6767 remove_vq_common(vi);
6768
6769 return 0;
6770 }
6771
virtnet_restore(struct virtio_device * vdev)6772 static __maybe_unused int virtnet_restore(struct virtio_device *vdev)
6773 {
6774 struct virtnet_info *vi = vdev->priv;
6775 int err;
6776
6777 err = virtnet_restore_up(vdev);
6778 if (err)
6779 return err;
6780 virtnet_set_queues(vi, vi->curr_queue_pairs);
6781
6782 err = virtnet_cpu_notif_add(vi);
6783 if (err) {
6784 virtnet_freeze_down(vdev);
6785 remove_vq_common(vi);
6786 return err;
6787 }
6788
6789 return 0;
6790 }
6791
6792 static struct virtio_device_id id_table[] = {
6793 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
6794 { 0 },
6795 };
6796
6797 #define VIRTNET_FEATURES \
6798 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
6799 VIRTIO_NET_F_MAC, \
6800 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
6801 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
6802 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
6803 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \
6804 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
6805 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
6806 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
6807 VIRTIO_NET_F_CTRL_MAC_ADDR, \
6808 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
6809 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
6810 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \
6811 VIRTIO_NET_F_VQ_NOTF_COAL, \
6812 VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS
6813
6814 static unsigned int features[] = {
6815 VIRTNET_FEATURES,
6816 };
6817
6818 static unsigned int features_legacy[] = {
6819 VIRTNET_FEATURES,
6820 VIRTIO_NET_F_GSO,
6821 VIRTIO_F_ANY_LAYOUT,
6822 };
6823
6824 static struct virtio_driver virtio_net_driver = {
6825 .feature_table = features,
6826 .feature_table_size = ARRAY_SIZE(features),
6827 .feature_table_legacy = features_legacy,
6828 .feature_table_size_legacy = ARRAY_SIZE(features_legacy),
6829 .driver.name = KBUILD_MODNAME,
6830 .id_table = id_table,
6831 .validate = virtnet_validate,
6832 .probe = virtnet_probe,
6833 .remove = virtnet_remove,
6834 .config_changed = virtnet_config_changed,
6835 #ifdef CONFIG_PM_SLEEP
6836 .freeze = virtnet_freeze,
6837 .restore = virtnet_restore,
6838 #endif
6839 };
6840
virtio_net_driver_init(void)6841 static __init int virtio_net_driver_init(void)
6842 {
6843 int ret;
6844
6845 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online",
6846 virtnet_cpu_online,
6847 virtnet_cpu_down_prep);
6848 if (ret < 0)
6849 goto out;
6850 virtionet_online = ret;
6851 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead",
6852 NULL, virtnet_cpu_dead);
6853 if (ret)
6854 goto err_dead;
6855 ret = register_virtio_driver(&virtio_net_driver);
6856 if (ret)
6857 goto err_virtio;
6858 return 0;
6859 err_virtio:
6860 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
6861 err_dead:
6862 cpuhp_remove_multi_state(virtionet_online);
6863 out:
6864 return ret;
6865 }
6866 module_init(virtio_net_driver_init);
6867
virtio_net_driver_exit(void)6868 static __exit void virtio_net_driver_exit(void)
6869 {
6870 unregister_virtio_driver(&virtio_net_driver);
6871 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
6872 cpuhp_remove_multi_state(virtionet_online);
6873 }
6874 module_exit(virtio_net_driver_exit);
6875
6876 MODULE_DEVICE_TABLE(virtio, id_table);
6877 MODULE_DESCRIPTION("Virtio network driver");
6878 MODULE_LICENSE("GPL");
6879