1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* A network driver using virtio. 3 * 4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 5 */ 6 //#define DEBUG 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/module.h> 11 #include <linux/virtio.h> 12 #include <linux/virtio_net.h> 13 #include <linux/bpf.h> 14 #include <linux/bpf_trace.h> 15 #include <linux/scatterlist.h> 16 #include <linux/if_vlan.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/average.h> 20 #include <linux/filter.h> 21 #include <linux/kernel.h> 22 #include <net/route.h> 23 #include <net/xdp.h> 24 #include <net/net_failover.h> 25 26 static int napi_weight = NAPI_POLL_WEIGHT; 27 module_param(napi_weight, int, 0444); 28 29 static bool csum = true, gso = true, napi_tx = true; 30 module_param(csum, bool, 0444); 31 module_param(gso, bool, 0444); 32 module_param(napi_tx, bool, 0644); 33 34 /* FIXME: MTU in config. */ 35 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 36 #define GOOD_COPY_LEN 128 37 38 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 39 40 /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */ 41 #define VIRTIO_XDP_HEADROOM 256 42 43 /* Separating two types of XDP xmit */ 44 #define VIRTIO_XDP_TX BIT(0) 45 #define VIRTIO_XDP_REDIR BIT(1) 46 47 #define VIRTIO_XDP_FLAG BIT(0) 48 49 /* RX packet size EWMA. The average packet size is used to determine the packet 50 * buffer size when refilling RX rings. As the entire RX ring may be refilled 51 * at once, the weight is chosen so that the EWMA will be insensitive to short- 52 * term, transient changes in packet size. 53 */ 54 DECLARE_EWMA(pkt_len, 0, 64) 55 56 #define VIRTNET_DRIVER_VERSION "1.0.0" 57 58 static const unsigned long guest_offloads[] = { 59 VIRTIO_NET_F_GUEST_TSO4, 60 VIRTIO_NET_F_GUEST_TSO6, 61 VIRTIO_NET_F_GUEST_ECN, 62 VIRTIO_NET_F_GUEST_UFO, 63 VIRTIO_NET_F_GUEST_CSUM, 64 VIRTIO_NET_F_GUEST_USO4, 65 VIRTIO_NET_F_GUEST_USO6 66 }; 67 68 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 69 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 70 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 71 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ 72 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ 73 (1ULL << VIRTIO_NET_F_GUEST_USO6)) 74 75 struct virtnet_stat_desc { 76 char desc[ETH_GSTRING_LEN]; 77 size_t offset; 78 }; 79 80 struct virtnet_sq_stats { 81 struct u64_stats_sync syncp; 82 u64 packets; 83 u64 bytes; 84 u64 xdp_tx; 85 u64 xdp_tx_drops; 86 u64 kicks; 87 u64 tx_timeouts; 88 }; 89 90 struct virtnet_rq_stats { 91 struct u64_stats_sync syncp; 92 u64 packets; 93 u64 bytes; 94 u64 drops; 95 u64 xdp_packets; 96 u64 xdp_tx; 97 u64 xdp_redirects; 98 u64 xdp_drops; 99 u64 kicks; 100 }; 101 102 #define VIRTNET_SQ_STAT(m) offsetof(struct virtnet_sq_stats, m) 103 #define VIRTNET_RQ_STAT(m) offsetof(struct virtnet_rq_stats, m) 104 105 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { 106 { "packets", VIRTNET_SQ_STAT(packets) }, 107 { "bytes", VIRTNET_SQ_STAT(bytes) }, 108 { "xdp_tx", VIRTNET_SQ_STAT(xdp_tx) }, 109 { "xdp_tx_drops", VIRTNET_SQ_STAT(xdp_tx_drops) }, 110 { "kicks", VIRTNET_SQ_STAT(kicks) }, 111 { "tx_timeouts", VIRTNET_SQ_STAT(tx_timeouts) }, 112 }; 113 114 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { 115 { "packets", VIRTNET_RQ_STAT(packets) }, 116 { "bytes", VIRTNET_RQ_STAT(bytes) }, 117 { "drops", VIRTNET_RQ_STAT(drops) }, 118 { "xdp_packets", VIRTNET_RQ_STAT(xdp_packets) }, 119 { "xdp_tx", VIRTNET_RQ_STAT(xdp_tx) }, 120 { "xdp_redirects", VIRTNET_RQ_STAT(xdp_redirects) }, 121 { "xdp_drops", VIRTNET_RQ_STAT(xdp_drops) }, 122 { "kicks", VIRTNET_RQ_STAT(kicks) }, 123 }; 124 125 #define VIRTNET_SQ_STATS_LEN ARRAY_SIZE(virtnet_sq_stats_desc) 126 #define VIRTNET_RQ_STATS_LEN ARRAY_SIZE(virtnet_rq_stats_desc) 127 128 /* Internal representation of a send virtqueue */ 129 struct send_queue { 130 /* Virtqueue associated with this send _queue */ 131 struct virtqueue *vq; 132 133 /* TX: fragments + linear part + virtio header */ 134 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 135 136 /* Name of the send queue: output.$index */ 137 char name[16]; 138 139 struct virtnet_sq_stats stats; 140 141 struct napi_struct napi; 142 143 /* Record whether sq is in reset state. */ 144 bool reset; 145 }; 146 147 /* Internal representation of a receive virtqueue */ 148 struct receive_queue { 149 /* Virtqueue associated with this receive_queue */ 150 struct virtqueue *vq; 151 152 struct napi_struct napi; 153 154 struct bpf_prog __rcu *xdp_prog; 155 156 struct virtnet_rq_stats stats; 157 158 /* Chain pages by the private ptr. */ 159 struct page *pages; 160 161 /* Average packet length for mergeable receive buffers. */ 162 struct ewma_pkt_len mrg_avg_pkt_len; 163 164 /* Page frag for packet buffer allocation. */ 165 struct page_frag alloc_frag; 166 167 /* RX: fragments + linear part + virtio header */ 168 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 169 170 /* Min single buffer size for mergeable buffers case. */ 171 unsigned int min_buf_len; 172 173 /* Name of this receive queue: input.$index */ 174 char name[16]; 175 176 struct xdp_rxq_info xdp_rxq; 177 }; 178 179 /* This structure can contain rss message with maximum settings for indirection table and keysize 180 * Note, that default structure that describes RSS configuration virtio_net_rss_config 181 * contains same info but can't handle table values. 182 * In any case, structure would be passed to virtio hw through sg_buf split by parts 183 * because table sizes may be differ according to the device configuration. 184 */ 185 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 186 #define VIRTIO_NET_RSS_MAX_TABLE_LEN 128 187 struct virtio_net_ctrl_rss { 188 u32 hash_types; 189 u16 indirection_table_mask; 190 u16 unclassified_queue; 191 u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN]; 192 u16 max_tx_vq; 193 u8 hash_key_length; 194 u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; 195 }; 196 197 /* Control VQ buffers: protected by the rtnl lock */ 198 struct control_buf { 199 struct virtio_net_ctrl_hdr hdr; 200 virtio_net_ctrl_ack status; 201 struct virtio_net_ctrl_mq mq; 202 u8 promisc; 203 u8 allmulti; 204 __virtio16 vid; 205 __virtio64 offloads; 206 struct virtio_net_ctrl_rss rss; 207 }; 208 209 struct virtnet_info { 210 struct virtio_device *vdev; 211 struct virtqueue *cvq; 212 struct net_device *dev; 213 struct send_queue *sq; 214 struct receive_queue *rq; 215 unsigned int status; 216 217 /* Max # of queue pairs supported by the device */ 218 u16 max_queue_pairs; 219 220 /* # of queue pairs currently used by the driver */ 221 u16 curr_queue_pairs; 222 223 /* # of XDP queue pairs currently used by the driver */ 224 u16 xdp_queue_pairs; 225 226 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ 227 bool xdp_enabled; 228 229 /* I like... big packets and I cannot lie! */ 230 bool big_packets; 231 232 /* number of sg entries allocated for big packets */ 233 unsigned int big_packets_num_skbfrags; 234 235 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 236 bool mergeable_rx_bufs; 237 238 /* Host supports rss and/or hash report */ 239 bool has_rss; 240 bool has_rss_hash_report; 241 u8 rss_key_size; 242 u16 rss_indir_table_size; 243 u32 rss_hash_types_supported; 244 u32 rss_hash_types_saved; 245 246 /* Has control virtqueue */ 247 bool has_cvq; 248 249 /* Host can handle any s/g split between our header and packet data */ 250 bool any_header_sg; 251 252 /* Packet virtio header size */ 253 u8 hdr_len; 254 255 /* Work struct for delayed refilling if we run low on memory. */ 256 struct delayed_work refill; 257 258 /* Is delayed refill enabled? */ 259 bool refill_enabled; 260 261 /* The lock to synchronize the access to refill_enabled */ 262 spinlock_t refill_lock; 263 264 /* Work struct for config space updates */ 265 struct work_struct config_work; 266 267 /* Does the affinity hint is set for virtqueues? */ 268 bool affinity_hint_set; 269 270 /* CPU hotplug instances for online & dead */ 271 struct hlist_node node; 272 struct hlist_node node_dead; 273 274 struct control_buf *ctrl; 275 276 /* Ethtool settings */ 277 u8 duplex; 278 u32 speed; 279 280 /* Interrupt coalescing settings */ 281 u32 tx_usecs; 282 u32 rx_usecs; 283 u32 tx_max_packets; 284 u32 rx_max_packets; 285 286 unsigned long guest_offloads; 287 unsigned long guest_offloads_capable; 288 289 /* failover when STANDBY feature enabled */ 290 struct failover *failover; 291 }; 292 293 struct padded_vnet_hdr { 294 struct virtio_net_hdr_v1_hash hdr; 295 /* 296 * hdr is in a separate sg buffer, and data sg buffer shares same page 297 * with this header sg. This padding makes next sg 16 byte aligned 298 * after the header. 299 */ 300 char padding[12]; 301 }; 302 303 static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf); 304 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); 305 306 static bool is_xdp_frame(void *ptr) 307 { 308 return (unsigned long)ptr & VIRTIO_XDP_FLAG; 309 } 310 311 static void *xdp_to_ptr(struct xdp_frame *ptr) 312 { 313 return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); 314 } 315 316 static struct xdp_frame *ptr_to_xdp(void *ptr) 317 { 318 return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); 319 } 320 321 /* Converting between virtqueue no. and kernel tx/rx queue no. 322 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq 323 */ 324 static int vq2txq(struct virtqueue *vq) 325 { 326 return (vq->index - 1) / 2; 327 } 328 329 static int txq2vq(int txq) 330 { 331 return txq * 2 + 1; 332 } 333 334 static int vq2rxq(struct virtqueue *vq) 335 { 336 return vq->index / 2; 337 } 338 339 static int rxq2vq(int rxq) 340 { 341 return rxq * 2; 342 } 343 344 static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb) 345 { 346 return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb; 347 } 348 349 /* 350 * private is used to chain pages for big packets, put the whole 351 * most recent used list in the beginning for reuse 352 */ 353 static void give_pages(struct receive_queue *rq, struct page *page) 354 { 355 struct page *end; 356 357 /* Find end of list, sew whole thing into vi->rq.pages. */ 358 for (end = page; end->private; end = (struct page *)end->private); 359 end->private = (unsigned long)rq->pages; 360 rq->pages = page; 361 } 362 363 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) 364 { 365 struct page *p = rq->pages; 366 367 if (p) { 368 rq->pages = (struct page *)p->private; 369 /* clear private here, it is used to chain pages */ 370 p->private = 0; 371 } else 372 p = alloc_page(gfp_mask); 373 return p; 374 } 375 376 static void enable_delayed_refill(struct virtnet_info *vi) 377 { 378 spin_lock_bh(&vi->refill_lock); 379 vi->refill_enabled = true; 380 spin_unlock_bh(&vi->refill_lock); 381 } 382 383 static void disable_delayed_refill(struct virtnet_info *vi) 384 { 385 spin_lock_bh(&vi->refill_lock); 386 vi->refill_enabled = false; 387 spin_unlock_bh(&vi->refill_lock); 388 } 389 390 static void virtqueue_napi_schedule(struct napi_struct *napi, 391 struct virtqueue *vq) 392 { 393 if (napi_schedule_prep(napi)) { 394 virtqueue_disable_cb(vq); 395 __napi_schedule(napi); 396 } 397 } 398 399 static void virtqueue_napi_complete(struct napi_struct *napi, 400 struct virtqueue *vq, int processed) 401 { 402 int opaque; 403 404 opaque = virtqueue_enable_cb_prepare(vq); 405 if (napi_complete_done(napi, processed)) { 406 if (unlikely(virtqueue_poll(vq, opaque))) 407 virtqueue_napi_schedule(napi, vq); 408 } else { 409 virtqueue_disable_cb(vq); 410 } 411 } 412 413 static void skb_xmit_done(struct virtqueue *vq) 414 { 415 struct virtnet_info *vi = vq->vdev->priv; 416 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; 417 418 /* Suppress further interrupts. */ 419 virtqueue_disable_cb(vq); 420 421 if (napi->weight) 422 virtqueue_napi_schedule(napi, vq); 423 else 424 /* We were probably waiting for more output buffers. */ 425 netif_wake_subqueue(vi->dev, vq2txq(vq)); 426 } 427 428 #define MRG_CTX_HEADER_SHIFT 22 429 static void *mergeable_len_to_ctx(unsigned int truesize, 430 unsigned int headroom) 431 { 432 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); 433 } 434 435 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) 436 { 437 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; 438 } 439 440 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) 441 { 442 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); 443 } 444 445 /* Called from bottom half context */ 446 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 447 struct receive_queue *rq, 448 struct page *page, unsigned int offset, 449 unsigned int len, unsigned int truesize) 450 { 451 struct sk_buff *skb; 452 struct virtio_net_hdr_mrg_rxbuf *hdr; 453 unsigned int copy, hdr_len, hdr_padded_len; 454 struct page *page_to_free = NULL; 455 int tailroom, shinfo_size; 456 char *p, *hdr_p, *buf; 457 458 p = page_address(page) + offset; 459 hdr_p = p; 460 461 hdr_len = vi->hdr_len; 462 if (vi->mergeable_rx_bufs) 463 hdr_padded_len = hdr_len; 464 else 465 hdr_padded_len = sizeof(struct padded_vnet_hdr); 466 467 buf = p; 468 len -= hdr_len; 469 offset += hdr_padded_len; 470 p += hdr_padded_len; 471 tailroom = truesize - hdr_padded_len - len; 472 473 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 474 475 /* copy small packet so we can reuse these pages */ 476 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { 477 skb = build_skb(buf, truesize); 478 if (unlikely(!skb)) 479 return NULL; 480 481 skb_reserve(skb, p - buf); 482 skb_put(skb, len); 483 484 page = (struct page *)page->private; 485 if (page) 486 give_pages(rq, page); 487 goto ok; 488 } 489 490 /* copy small packet so we can reuse these pages for small data */ 491 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); 492 if (unlikely(!skb)) 493 return NULL; 494 495 /* Copy all frame if it fits skb->head, otherwise 496 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. 497 */ 498 if (len <= skb_tailroom(skb)) 499 copy = len; 500 else 501 copy = ETH_HLEN; 502 skb_put_data(skb, p, copy); 503 504 len -= copy; 505 offset += copy; 506 507 if (vi->mergeable_rx_bufs) { 508 if (len) 509 skb_add_rx_frag(skb, 0, page, offset, len, truesize); 510 else 511 page_to_free = page; 512 goto ok; 513 } 514 515 /* 516 * Verify that we can indeed put this data into a skb. 517 * This is here to handle cases when the device erroneously 518 * tries to receive more than is possible. This is usually 519 * the case of a broken device. 520 */ 521 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { 522 net_dbg_ratelimited("%s: too much data\n", skb->dev->name); 523 dev_kfree_skb(skb); 524 return NULL; 525 } 526 BUG_ON(offset >= PAGE_SIZE); 527 while (len) { 528 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); 529 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, 530 frag_size, truesize); 531 len -= frag_size; 532 page = (struct page *)page->private; 533 offset = 0; 534 } 535 536 if (page) 537 give_pages(rq, page); 538 539 ok: 540 hdr = skb_vnet_hdr(skb); 541 memcpy(hdr, hdr_p, hdr_len); 542 if (page_to_free) 543 put_page(page_to_free); 544 545 return skb; 546 } 547 548 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, 549 struct send_queue *sq, 550 struct xdp_frame *xdpf) 551 { 552 struct virtio_net_hdr_mrg_rxbuf *hdr; 553 struct skb_shared_info *shinfo; 554 u8 nr_frags = 0; 555 int err, i; 556 557 if (unlikely(xdpf->headroom < vi->hdr_len)) 558 return -EOVERFLOW; 559 560 if (unlikely(xdp_frame_has_frags(xdpf))) { 561 shinfo = xdp_get_shared_info_from_frame(xdpf); 562 nr_frags = shinfo->nr_frags; 563 } 564 565 /* In wrapping function virtnet_xdp_xmit(), we need to free 566 * up the pending old buffers, where we need to calculate the 567 * position of skb_shared_info in xdp_get_frame_len() and 568 * xdp_return_frame(), which will involve to xdpf->data and 569 * xdpf->headroom. Therefore, we need to update the value of 570 * headroom synchronously here. 571 */ 572 xdpf->headroom -= vi->hdr_len; 573 xdpf->data -= vi->hdr_len; 574 /* Zero header and leave csum up to XDP layers */ 575 hdr = xdpf->data; 576 memset(hdr, 0, vi->hdr_len); 577 xdpf->len += vi->hdr_len; 578 579 sg_init_table(sq->sg, nr_frags + 1); 580 sg_set_buf(sq->sg, xdpf->data, xdpf->len); 581 for (i = 0; i < nr_frags; i++) { 582 skb_frag_t *frag = &shinfo->frags[i]; 583 584 sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), 585 skb_frag_size(frag), skb_frag_off(frag)); 586 } 587 588 err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1, 589 xdp_to_ptr(xdpf), GFP_ATOMIC); 590 if (unlikely(err)) 591 return -ENOSPC; /* Caller handle free/refcnt */ 592 593 return 0; 594 } 595 596 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on 597 * the current cpu, so it does not need to be locked. 598 * 599 * Here we use marco instead of inline functions because we have to deal with 600 * three issues at the same time: 1. the choice of sq. 2. judge and execute the 601 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline 602 * functions to perfectly solve these three problems at the same time. 603 */ 604 #define virtnet_xdp_get_sq(vi) ({ \ 605 int cpu = smp_processor_id(); \ 606 struct netdev_queue *txq; \ 607 typeof(vi) v = (vi); \ 608 unsigned int qp; \ 609 \ 610 if (v->curr_queue_pairs > nr_cpu_ids) { \ 611 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ 612 qp += cpu; \ 613 txq = netdev_get_tx_queue(v->dev, qp); \ 614 __netif_tx_acquire(txq); \ 615 } else { \ 616 qp = cpu % v->curr_queue_pairs; \ 617 txq = netdev_get_tx_queue(v->dev, qp); \ 618 __netif_tx_lock(txq, cpu); \ 619 } \ 620 v->sq + qp; \ 621 }) 622 623 #define virtnet_xdp_put_sq(vi, q) { \ 624 struct netdev_queue *txq; \ 625 typeof(vi) v = (vi); \ 626 \ 627 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ 628 if (v->curr_queue_pairs > nr_cpu_ids) \ 629 __netif_tx_release(txq); \ 630 else \ 631 __netif_tx_unlock(txq); \ 632 } 633 634 static int virtnet_xdp_xmit(struct net_device *dev, 635 int n, struct xdp_frame **frames, u32 flags) 636 { 637 struct virtnet_info *vi = netdev_priv(dev); 638 struct receive_queue *rq = vi->rq; 639 struct bpf_prog *xdp_prog; 640 struct send_queue *sq; 641 unsigned int len; 642 int packets = 0; 643 int bytes = 0; 644 int nxmit = 0; 645 int kicks = 0; 646 void *ptr; 647 int ret; 648 int i; 649 650 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this 651 * indicate XDP resources have been successfully allocated. 652 */ 653 xdp_prog = rcu_access_pointer(rq->xdp_prog); 654 if (!xdp_prog) 655 return -ENXIO; 656 657 sq = virtnet_xdp_get_sq(vi); 658 659 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 660 ret = -EINVAL; 661 goto out; 662 } 663 664 /* Free up any pending old buffers before queueing new ones. */ 665 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 666 if (likely(is_xdp_frame(ptr))) { 667 struct xdp_frame *frame = ptr_to_xdp(ptr); 668 669 bytes += xdp_get_frame_len(frame); 670 xdp_return_frame(frame); 671 } else { 672 struct sk_buff *skb = ptr; 673 674 bytes += skb->len; 675 napi_consume_skb(skb, false); 676 } 677 packets++; 678 } 679 680 for (i = 0; i < n; i++) { 681 struct xdp_frame *xdpf = frames[i]; 682 683 if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) 684 break; 685 nxmit++; 686 } 687 ret = nxmit; 688 689 if (flags & XDP_XMIT_FLUSH) { 690 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 691 kicks = 1; 692 } 693 out: 694 u64_stats_update_begin(&sq->stats.syncp); 695 sq->stats.bytes += bytes; 696 sq->stats.packets += packets; 697 sq->stats.xdp_tx += n; 698 sq->stats.xdp_tx_drops += n - nxmit; 699 sq->stats.kicks += kicks; 700 u64_stats_update_end(&sq->stats.syncp); 701 702 virtnet_xdp_put_sq(vi, sq); 703 return ret; 704 } 705 706 static unsigned int virtnet_get_headroom(struct virtnet_info *vi) 707 { 708 return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0; 709 } 710 711 /* We copy the packet for XDP in the following cases: 712 * 713 * 1) Packet is scattered across multiple rx buffers. 714 * 2) Headroom space is insufficient. 715 * 716 * This is inefficient but it's a temporary condition that 717 * we hit right after XDP is enabled and until queue is refilled 718 * with large buffers with sufficient headroom - so it should affect 719 * at most queue size packets. 720 * Afterwards, the conditions to enable 721 * XDP should preclude the underlying device from sending packets 722 * across multiple buffers (num_buf > 1), and we make sure buffers 723 * have enough headroom. 724 */ 725 static struct page *xdp_linearize_page(struct receive_queue *rq, 726 u16 *num_buf, 727 struct page *p, 728 int offset, 729 int page_off, 730 unsigned int *len) 731 { 732 struct page *page = alloc_page(GFP_ATOMIC); 733 734 if (!page) 735 return NULL; 736 737 memcpy(page_address(page) + page_off, page_address(p) + offset, *len); 738 page_off += *len; 739 740 while (--*num_buf) { 741 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 742 unsigned int buflen; 743 void *buf; 744 int off; 745 746 buf = virtqueue_get_buf(rq->vq, &buflen); 747 if (unlikely(!buf)) 748 goto err_buf; 749 750 p = virt_to_head_page(buf); 751 off = buf - page_address(p); 752 753 /* guard against a misconfigured or uncooperative backend that 754 * is sending packet larger than the MTU. 755 */ 756 if ((page_off + buflen + tailroom) > PAGE_SIZE) { 757 put_page(p); 758 goto err_buf; 759 } 760 761 memcpy(page_address(page) + page_off, 762 page_address(p) + off, buflen); 763 page_off += buflen; 764 put_page(p); 765 } 766 767 /* Headroom does not contribute to packet length */ 768 *len = page_off - VIRTIO_XDP_HEADROOM; 769 return page; 770 err_buf: 771 __free_pages(page, 0); 772 return NULL; 773 } 774 775 static struct sk_buff *receive_small(struct net_device *dev, 776 struct virtnet_info *vi, 777 struct receive_queue *rq, 778 void *buf, void *ctx, 779 unsigned int len, 780 unsigned int *xdp_xmit, 781 struct virtnet_rq_stats *stats) 782 { 783 struct sk_buff *skb; 784 struct bpf_prog *xdp_prog; 785 unsigned int xdp_headroom = (unsigned long)ctx; 786 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; 787 unsigned int headroom = vi->hdr_len + header_offset; 788 unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 789 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 790 struct page *page = virt_to_head_page(buf); 791 unsigned int delta = 0; 792 struct page *xdp_page; 793 int err; 794 unsigned int metasize = 0; 795 796 len -= vi->hdr_len; 797 stats->bytes += len; 798 799 if (unlikely(len > GOOD_PACKET_LEN)) { 800 pr_debug("%s: rx error: len %u exceeds max size %d\n", 801 dev->name, len, GOOD_PACKET_LEN); 802 dev->stats.rx_length_errors++; 803 goto err; 804 } 805 806 if (likely(!vi->xdp_enabled)) { 807 xdp_prog = NULL; 808 goto skip_xdp; 809 } 810 811 rcu_read_lock(); 812 xdp_prog = rcu_dereference(rq->xdp_prog); 813 if (xdp_prog) { 814 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; 815 struct xdp_frame *xdpf; 816 struct xdp_buff xdp; 817 void *orig_data; 818 u32 act; 819 820 if (unlikely(hdr->hdr.gso_type)) 821 goto err_xdp; 822 823 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 824 int offset = buf - page_address(page) + header_offset; 825 unsigned int tlen = len + vi->hdr_len; 826 u16 num_buf = 1; 827 828 xdp_headroom = virtnet_get_headroom(vi); 829 header_offset = VIRTNET_RX_PAD + xdp_headroom; 830 headroom = vi->hdr_len + header_offset; 831 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 832 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 833 xdp_page = xdp_linearize_page(rq, &num_buf, page, 834 offset, header_offset, 835 &tlen); 836 if (!xdp_page) 837 goto err_xdp; 838 839 buf = page_address(xdp_page); 840 put_page(page); 841 page = xdp_page; 842 } 843 844 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); 845 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, 846 xdp_headroom, len, true); 847 orig_data = xdp.data; 848 act = bpf_prog_run_xdp(xdp_prog, &xdp); 849 stats->xdp_packets++; 850 851 switch (act) { 852 case XDP_PASS: 853 /* Recalculate length in case bpf program changed it */ 854 delta = orig_data - xdp.data; 855 len = xdp.data_end - xdp.data; 856 metasize = xdp.data - xdp.data_meta; 857 break; 858 case XDP_TX: 859 stats->xdp_tx++; 860 xdpf = xdp_convert_buff_to_frame(&xdp); 861 if (unlikely(!xdpf)) 862 goto err_xdp; 863 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 864 if (unlikely(!err)) { 865 xdp_return_frame_rx_napi(xdpf); 866 } else if (unlikely(err < 0)) { 867 trace_xdp_exception(vi->dev, xdp_prog, act); 868 goto err_xdp; 869 } 870 *xdp_xmit |= VIRTIO_XDP_TX; 871 rcu_read_unlock(); 872 goto xdp_xmit; 873 case XDP_REDIRECT: 874 stats->xdp_redirects++; 875 err = xdp_do_redirect(dev, &xdp, xdp_prog); 876 if (err) 877 goto err_xdp; 878 *xdp_xmit |= VIRTIO_XDP_REDIR; 879 rcu_read_unlock(); 880 goto xdp_xmit; 881 default: 882 bpf_warn_invalid_xdp_action(vi->dev, xdp_prog, act); 883 fallthrough; 884 case XDP_ABORTED: 885 trace_xdp_exception(vi->dev, xdp_prog, act); 886 goto err_xdp; 887 case XDP_DROP: 888 goto err_xdp; 889 } 890 } 891 rcu_read_unlock(); 892 893 skip_xdp: 894 skb = build_skb(buf, buflen); 895 if (!skb) 896 goto err; 897 skb_reserve(skb, headroom - delta); 898 skb_put(skb, len); 899 if (!xdp_prog) { 900 buf += header_offset; 901 memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); 902 } /* keep zeroed vnet hdr since XDP is loaded */ 903 904 if (metasize) 905 skb_metadata_set(skb, metasize); 906 907 return skb; 908 909 err_xdp: 910 rcu_read_unlock(); 911 stats->xdp_drops++; 912 err: 913 stats->drops++; 914 put_page(page); 915 xdp_xmit: 916 return NULL; 917 } 918 919 static struct sk_buff *receive_big(struct net_device *dev, 920 struct virtnet_info *vi, 921 struct receive_queue *rq, 922 void *buf, 923 unsigned int len, 924 struct virtnet_rq_stats *stats) 925 { 926 struct page *page = buf; 927 struct sk_buff *skb = 928 page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); 929 930 stats->bytes += len - vi->hdr_len; 931 if (unlikely(!skb)) 932 goto err; 933 934 return skb; 935 936 err: 937 stats->drops++; 938 give_pages(rq, page); 939 return NULL; 940 } 941 942 /* Why not use xdp_build_skb_from_frame() ? 943 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in 944 * virtio-net there are 2 points that do not match its requirements: 945 * 1. The size of the prefilled buffer is not fixed before xdp is set. 946 * 2. xdp_build_skb_from_frame() does more checks that we don't need, 947 * like eth_type_trans() (which virtio-net does in receive_buf()). 948 */ 949 static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, 950 struct virtnet_info *vi, 951 struct xdp_buff *xdp, 952 unsigned int xdp_frags_truesz) 953 { 954 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 955 unsigned int headroom, data_len; 956 struct sk_buff *skb; 957 int metasize; 958 u8 nr_frags; 959 960 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 961 pr_debug("Error building skb as missing reserved tailroom for xdp"); 962 return NULL; 963 } 964 965 if (unlikely(xdp_buff_has_frags(xdp))) 966 nr_frags = sinfo->nr_frags; 967 968 skb = build_skb(xdp->data_hard_start, xdp->frame_sz); 969 if (unlikely(!skb)) 970 return NULL; 971 972 headroom = xdp->data - xdp->data_hard_start; 973 data_len = xdp->data_end - xdp->data; 974 skb_reserve(skb, headroom); 975 __skb_put(skb, data_len); 976 977 metasize = xdp->data - xdp->data_meta; 978 metasize = metasize > 0 ? metasize : 0; 979 if (metasize) 980 skb_metadata_set(skb, metasize); 981 982 if (unlikely(xdp_buff_has_frags(xdp))) 983 xdp_update_skb_shared_info(skb, nr_frags, 984 sinfo->xdp_frags_size, 985 xdp_frags_truesz, 986 xdp_buff_is_frag_pfmemalloc(xdp)); 987 988 return skb; 989 } 990 991 /* TODO: build xdp in big mode */ 992 static int virtnet_build_xdp_buff_mrg(struct net_device *dev, 993 struct virtnet_info *vi, 994 struct receive_queue *rq, 995 struct xdp_buff *xdp, 996 void *buf, 997 unsigned int len, 998 unsigned int frame_sz, 999 u16 *num_buf, 1000 unsigned int *xdp_frags_truesize, 1001 struct virtnet_rq_stats *stats) 1002 { 1003 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 1004 unsigned int headroom, tailroom, room; 1005 unsigned int truesize, cur_frag_size; 1006 struct skb_shared_info *shinfo; 1007 unsigned int xdp_frags_truesz = 0; 1008 struct page *page; 1009 skb_frag_t *frag; 1010 int offset; 1011 void *ctx; 1012 1013 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 1014 xdp_prepare_buff(xdp, buf - VIRTIO_XDP_HEADROOM, 1015 VIRTIO_XDP_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); 1016 1017 if (*num_buf > 1) { 1018 /* If we want to build multi-buffer xdp, we need 1019 * to specify that the flags of xdp_buff have the 1020 * XDP_FLAGS_HAS_FRAG bit. 1021 */ 1022 if (!xdp_buff_has_frags(xdp)) 1023 xdp_buff_set_frags_flag(xdp); 1024 1025 shinfo = xdp_get_shared_info_from_buff(xdp); 1026 shinfo->nr_frags = 0; 1027 shinfo->xdp_frags_size = 0; 1028 } 1029 1030 if ((*num_buf - 1) > MAX_SKB_FRAGS) 1031 return -EINVAL; 1032 1033 while ((--*num_buf) >= 1) { 1034 buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx); 1035 if (unlikely(!buf)) { 1036 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1037 dev->name, *num_buf, 1038 virtio16_to_cpu(vi->vdev, hdr->num_buffers)); 1039 dev->stats.rx_length_errors++; 1040 return -EINVAL; 1041 } 1042 1043 stats->bytes += len; 1044 page = virt_to_head_page(buf); 1045 offset = buf - page_address(page); 1046 1047 truesize = mergeable_ctx_to_truesize(ctx); 1048 headroom = mergeable_ctx_to_headroom(ctx); 1049 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1050 room = SKB_DATA_ALIGN(headroom + tailroom); 1051 1052 cur_frag_size = truesize; 1053 xdp_frags_truesz += cur_frag_size; 1054 if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { 1055 put_page(page); 1056 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 1057 dev->name, len, (unsigned long)(truesize - room)); 1058 dev->stats.rx_length_errors++; 1059 return -EINVAL; 1060 } 1061 1062 frag = &shinfo->frags[shinfo->nr_frags++]; 1063 __skb_frag_set_page(frag, page); 1064 skb_frag_off_set(frag, offset); 1065 skb_frag_size_set(frag, len); 1066 if (page_is_pfmemalloc(page)) 1067 xdp_buff_set_frag_pfmemalloc(xdp); 1068 1069 shinfo->xdp_frags_size += len; 1070 } 1071 1072 *xdp_frags_truesize = xdp_frags_truesz; 1073 return 0; 1074 } 1075 1076 static struct sk_buff *receive_mergeable(struct net_device *dev, 1077 struct virtnet_info *vi, 1078 struct receive_queue *rq, 1079 void *buf, 1080 void *ctx, 1081 unsigned int len, 1082 unsigned int *xdp_xmit, 1083 struct virtnet_rq_stats *stats) 1084 { 1085 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 1086 u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1087 struct page *page = virt_to_head_page(buf); 1088 int offset = buf - page_address(page); 1089 struct sk_buff *head_skb, *curr_skb; 1090 struct bpf_prog *xdp_prog; 1091 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 1092 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 1093 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1094 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 1095 unsigned int frame_sz, xdp_room; 1096 int err; 1097 1098 head_skb = NULL; 1099 stats->bytes += len - vi->hdr_len; 1100 1101 if (unlikely(len > truesize - room)) { 1102 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 1103 dev->name, len, (unsigned long)(truesize - room)); 1104 dev->stats.rx_length_errors++; 1105 goto err_skb; 1106 } 1107 1108 if (likely(!vi->xdp_enabled)) { 1109 xdp_prog = NULL; 1110 goto skip_xdp; 1111 } 1112 1113 rcu_read_lock(); 1114 xdp_prog = rcu_dereference(rq->xdp_prog); 1115 if (xdp_prog) { 1116 unsigned int xdp_frags_truesz = 0; 1117 struct skb_shared_info *shinfo; 1118 struct xdp_frame *xdpf; 1119 struct page *xdp_page; 1120 struct xdp_buff xdp; 1121 void *data; 1122 u32 act; 1123 int i; 1124 1125 /* Transient failure which in theory could occur if 1126 * in-flight packets from before XDP was enabled reach 1127 * the receive path after XDP is loaded. 1128 */ 1129 if (unlikely(hdr->hdr.gso_type)) 1130 goto err_xdp; 1131 1132 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers 1133 * with headroom may add hole in truesize, which 1134 * make their length exceed PAGE_SIZE. So we disabled the 1135 * hole mechanism for xdp. See add_recvbuf_mergeable(). 1136 */ 1137 frame_sz = truesize; 1138 1139 /* This happens when headroom is not enough because 1140 * of the buffer was prefilled before XDP is set. 1141 * This should only happen for the first several packets. 1142 * In fact, vq reset can be used here to help us clean up 1143 * the prefilled buffers, but many existing devices do not 1144 * support it, and we don't want to bother users who are 1145 * using xdp normally. 1146 */ 1147 if (!xdp_prog->aux->xdp_has_frags && 1148 (num_buf > 1 || headroom < virtnet_get_headroom(vi))) { 1149 /* linearize data for XDP */ 1150 xdp_page = xdp_linearize_page(rq, &num_buf, 1151 page, offset, 1152 VIRTIO_XDP_HEADROOM, 1153 &len); 1154 frame_sz = PAGE_SIZE; 1155 1156 if (!xdp_page) 1157 goto err_xdp; 1158 offset = VIRTIO_XDP_HEADROOM; 1159 } else if (unlikely(headroom < virtnet_get_headroom(vi))) { 1160 xdp_room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM + 1161 sizeof(struct skb_shared_info)); 1162 if (len + xdp_room > PAGE_SIZE) 1163 goto err_xdp; 1164 1165 xdp_page = alloc_page(GFP_ATOMIC); 1166 if (!xdp_page) 1167 goto err_xdp; 1168 1169 memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM, 1170 page_address(page) + offset, len); 1171 frame_sz = PAGE_SIZE; 1172 offset = VIRTIO_XDP_HEADROOM; 1173 } else { 1174 xdp_page = page; 1175 } 1176 1177 data = page_address(xdp_page) + offset; 1178 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, 1179 &num_buf, &xdp_frags_truesz, stats); 1180 if (unlikely(err)) 1181 goto err_xdp_frags; 1182 1183 act = bpf_prog_run_xdp(xdp_prog, &xdp); 1184 stats->xdp_packets++; 1185 1186 switch (act) { 1187 case XDP_PASS: 1188 if (unlikely(xdp_page != page)) 1189 put_page(page); 1190 head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); 1191 rcu_read_unlock(); 1192 return head_skb; 1193 case XDP_TX: 1194 stats->xdp_tx++; 1195 xdpf = xdp_convert_buff_to_frame(&xdp); 1196 if (unlikely(!xdpf)) { 1197 netdev_dbg(dev, "convert buff to frame failed for xdp\n"); 1198 goto err_xdp_frags; 1199 } 1200 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 1201 if (unlikely(!err)) { 1202 xdp_return_frame_rx_napi(xdpf); 1203 } else if (unlikely(err < 0)) { 1204 trace_xdp_exception(vi->dev, xdp_prog, act); 1205 goto err_xdp_frags; 1206 } 1207 *xdp_xmit |= VIRTIO_XDP_TX; 1208 if (unlikely(xdp_page != page)) 1209 put_page(page); 1210 rcu_read_unlock(); 1211 goto xdp_xmit; 1212 case XDP_REDIRECT: 1213 stats->xdp_redirects++; 1214 err = xdp_do_redirect(dev, &xdp, xdp_prog); 1215 if (err) 1216 goto err_xdp_frags; 1217 *xdp_xmit |= VIRTIO_XDP_REDIR; 1218 if (unlikely(xdp_page != page)) 1219 put_page(page); 1220 rcu_read_unlock(); 1221 goto xdp_xmit; 1222 default: 1223 bpf_warn_invalid_xdp_action(vi->dev, xdp_prog, act); 1224 fallthrough; 1225 case XDP_ABORTED: 1226 trace_xdp_exception(vi->dev, xdp_prog, act); 1227 fallthrough; 1228 case XDP_DROP: 1229 goto err_xdp_frags; 1230 } 1231 err_xdp_frags: 1232 if (unlikely(xdp_page != page)) 1233 __free_pages(xdp_page, 0); 1234 1235 if (xdp_buff_has_frags(&xdp)) { 1236 shinfo = xdp_get_shared_info_from_buff(&xdp); 1237 for (i = 0; i < shinfo->nr_frags; i++) { 1238 xdp_page = skb_frag_page(&shinfo->frags[i]); 1239 put_page(xdp_page); 1240 } 1241 } 1242 1243 goto err_xdp; 1244 } 1245 rcu_read_unlock(); 1246 1247 skip_xdp: 1248 head_skb = page_to_skb(vi, rq, page, offset, len, truesize); 1249 curr_skb = head_skb; 1250 1251 if (unlikely(!curr_skb)) 1252 goto err_skb; 1253 while (--num_buf) { 1254 int num_skb_frags; 1255 1256 buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx); 1257 if (unlikely(!buf)) { 1258 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1259 dev->name, num_buf, 1260 virtio16_to_cpu(vi->vdev, 1261 hdr->num_buffers)); 1262 dev->stats.rx_length_errors++; 1263 goto err_buf; 1264 } 1265 1266 stats->bytes += len; 1267 page = virt_to_head_page(buf); 1268 1269 truesize = mergeable_ctx_to_truesize(ctx); 1270 headroom = mergeable_ctx_to_headroom(ctx); 1271 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1272 room = SKB_DATA_ALIGN(headroom + tailroom); 1273 if (unlikely(len > truesize - room)) { 1274 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 1275 dev->name, len, (unsigned long)(truesize - room)); 1276 dev->stats.rx_length_errors++; 1277 goto err_skb; 1278 } 1279 1280 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 1281 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 1282 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); 1283 1284 if (unlikely(!nskb)) 1285 goto err_skb; 1286 if (curr_skb == head_skb) 1287 skb_shinfo(curr_skb)->frag_list = nskb; 1288 else 1289 curr_skb->next = nskb; 1290 curr_skb = nskb; 1291 head_skb->truesize += nskb->truesize; 1292 num_skb_frags = 0; 1293 } 1294 if (curr_skb != head_skb) { 1295 head_skb->data_len += len; 1296 head_skb->len += len; 1297 head_skb->truesize += truesize; 1298 } 1299 offset = buf - page_address(page); 1300 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 1301 put_page(page); 1302 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 1303 len, truesize); 1304 } else { 1305 skb_add_rx_frag(curr_skb, num_skb_frags, page, 1306 offset, len, truesize); 1307 } 1308 } 1309 1310 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); 1311 return head_skb; 1312 1313 err_xdp: 1314 rcu_read_unlock(); 1315 stats->xdp_drops++; 1316 err_skb: 1317 put_page(page); 1318 while (num_buf-- > 1) { 1319 buf = virtqueue_get_buf(rq->vq, &len); 1320 if (unlikely(!buf)) { 1321 pr_debug("%s: rx error: %d buffers missing\n", 1322 dev->name, num_buf); 1323 dev->stats.rx_length_errors++; 1324 break; 1325 } 1326 stats->bytes += len; 1327 page = virt_to_head_page(buf); 1328 put_page(page); 1329 } 1330 err_buf: 1331 stats->drops++; 1332 dev_kfree_skb(head_skb); 1333 xdp_xmit: 1334 return NULL; 1335 } 1336 1337 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, 1338 struct sk_buff *skb) 1339 { 1340 enum pkt_hash_types rss_hash_type; 1341 1342 if (!hdr_hash || !skb) 1343 return; 1344 1345 switch (__le16_to_cpu(hdr_hash->hash_report)) { 1346 case VIRTIO_NET_HASH_REPORT_TCPv4: 1347 case VIRTIO_NET_HASH_REPORT_UDPv4: 1348 case VIRTIO_NET_HASH_REPORT_TCPv6: 1349 case VIRTIO_NET_HASH_REPORT_UDPv6: 1350 case VIRTIO_NET_HASH_REPORT_TCPv6_EX: 1351 case VIRTIO_NET_HASH_REPORT_UDPv6_EX: 1352 rss_hash_type = PKT_HASH_TYPE_L4; 1353 break; 1354 case VIRTIO_NET_HASH_REPORT_IPv4: 1355 case VIRTIO_NET_HASH_REPORT_IPv6: 1356 case VIRTIO_NET_HASH_REPORT_IPv6_EX: 1357 rss_hash_type = PKT_HASH_TYPE_L3; 1358 break; 1359 case VIRTIO_NET_HASH_REPORT_NONE: 1360 default: 1361 rss_hash_type = PKT_HASH_TYPE_NONE; 1362 } 1363 skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); 1364 } 1365 1366 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 1367 void *buf, unsigned int len, void **ctx, 1368 unsigned int *xdp_xmit, 1369 struct virtnet_rq_stats *stats) 1370 { 1371 struct net_device *dev = vi->dev; 1372 struct sk_buff *skb; 1373 struct virtio_net_hdr_mrg_rxbuf *hdr; 1374 1375 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 1376 pr_debug("%s: short packet %i\n", dev->name, len); 1377 dev->stats.rx_length_errors++; 1378 virtnet_rq_free_unused_buf(rq->vq, buf); 1379 return; 1380 } 1381 1382 if (vi->mergeable_rx_bufs) 1383 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 1384 stats); 1385 else if (vi->big_packets) 1386 skb = receive_big(dev, vi, rq, buf, len, stats); 1387 else 1388 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 1389 1390 if (unlikely(!skb)) 1391 return; 1392 1393 hdr = skb_vnet_hdr(skb); 1394 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) 1395 virtio_skb_set_hash((const struct virtio_net_hdr_v1_hash *)hdr, skb); 1396 1397 if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) 1398 skb->ip_summed = CHECKSUM_UNNECESSARY; 1399 1400 if (virtio_net_hdr_to_skb(skb, &hdr->hdr, 1401 virtio_is_little_endian(vi->vdev))) { 1402 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n", 1403 dev->name, hdr->hdr.gso_type, 1404 hdr->hdr.gso_size); 1405 goto frame_err; 1406 } 1407 1408 skb_record_rx_queue(skb, vq2rxq(rq->vq)); 1409 skb->protocol = eth_type_trans(skb, dev); 1410 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 1411 ntohs(skb->protocol), skb->len, skb->pkt_type); 1412 1413 napi_gro_receive(&rq->napi, skb); 1414 return; 1415 1416 frame_err: 1417 dev->stats.rx_frame_errors++; 1418 dev_kfree_skb(skb); 1419 } 1420 1421 /* Unlike mergeable buffers, all buffers are allocated to the 1422 * same size, except for the headroom. For this reason we do 1423 * not need to use mergeable_len_to_ctx here - it is enough 1424 * to store the headroom as the context ignoring the truesize. 1425 */ 1426 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, 1427 gfp_t gfp) 1428 { 1429 struct page_frag *alloc_frag = &rq->alloc_frag; 1430 char *buf; 1431 unsigned int xdp_headroom = virtnet_get_headroom(vi); 1432 void *ctx = (void *)(unsigned long)xdp_headroom; 1433 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; 1434 int err; 1435 1436 len = SKB_DATA_ALIGN(len) + 1437 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1438 if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp))) 1439 return -ENOMEM; 1440 1441 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; 1442 get_page(alloc_frag->page); 1443 alloc_frag->offset += len; 1444 sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom, 1445 vi->hdr_len + GOOD_PACKET_LEN); 1446 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 1447 if (err < 0) 1448 put_page(virt_to_head_page(buf)); 1449 return err; 1450 } 1451 1452 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, 1453 gfp_t gfp) 1454 { 1455 struct page *first, *list = NULL; 1456 char *p; 1457 int i, err, offset; 1458 1459 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); 1460 1461 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ 1462 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { 1463 first = get_a_page(rq, gfp); 1464 if (!first) { 1465 if (list) 1466 give_pages(rq, list); 1467 return -ENOMEM; 1468 } 1469 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); 1470 1471 /* chain new page in list head to match sg */ 1472 first->private = (unsigned long)list; 1473 list = first; 1474 } 1475 1476 first = get_a_page(rq, gfp); 1477 if (!first) { 1478 give_pages(rq, list); 1479 return -ENOMEM; 1480 } 1481 p = page_address(first); 1482 1483 /* rq->sg[0], rq->sg[1] share the same page */ 1484 /* a separated rq->sg[0] for header - required in case !any_header_sg */ 1485 sg_set_buf(&rq->sg[0], p, vi->hdr_len); 1486 1487 /* rq->sg[1] for data packet, from offset */ 1488 offset = sizeof(struct padded_vnet_hdr); 1489 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); 1490 1491 /* chain first in list head */ 1492 first->private = (unsigned long)list; 1493 err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, 1494 first, gfp); 1495 if (err < 0) 1496 give_pages(rq, first); 1497 1498 return err; 1499 } 1500 1501 static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 1502 struct ewma_pkt_len *avg_pkt_len, 1503 unsigned int room) 1504 { 1505 struct virtnet_info *vi = rq->vq->vdev->priv; 1506 const size_t hdr_len = vi->hdr_len; 1507 unsigned int len; 1508 1509 if (room) 1510 return PAGE_SIZE - room; 1511 1512 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 1513 rq->min_buf_len, PAGE_SIZE - hdr_len); 1514 1515 return ALIGN(len, L1_CACHE_BYTES); 1516 } 1517 1518 static int add_recvbuf_mergeable(struct virtnet_info *vi, 1519 struct receive_queue *rq, gfp_t gfp) 1520 { 1521 struct page_frag *alloc_frag = &rq->alloc_frag; 1522 unsigned int headroom = virtnet_get_headroom(vi); 1523 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 1524 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 1525 char *buf; 1526 void *ctx; 1527 int err; 1528 unsigned int len, hole; 1529 1530 /* Extra tailroom is needed to satisfy XDP's assumption. This 1531 * means rx frags coalescing won't work, but consider we've 1532 * disabled GSO for XDP, it won't be a big issue. 1533 */ 1534 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); 1535 if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) 1536 return -ENOMEM; 1537 1538 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; 1539 buf += headroom; /* advance address leaving hole at front of pkt */ 1540 get_page(alloc_frag->page); 1541 alloc_frag->offset += len + room; 1542 hole = alloc_frag->size - alloc_frag->offset; 1543 if (hole < len + room) { 1544 /* To avoid internal fragmentation, if there is very likely not 1545 * enough space for another buffer, add the remaining space to 1546 * the current buffer. 1547 * XDP core assumes that frame_size of xdp_buff and the length 1548 * of the frag are PAGE_SIZE, so we disable the hole mechanism. 1549 */ 1550 if (!headroom) 1551 len += hole; 1552 alloc_frag->offset += hole; 1553 } 1554 1555 sg_init_one(rq->sg, buf, len); 1556 ctx = mergeable_len_to_ctx(len + room, headroom); 1557 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 1558 if (err < 0) 1559 put_page(virt_to_head_page(buf)); 1560 1561 return err; 1562 } 1563 1564 /* 1565 * Returns false if we couldn't fill entirely (OOM). 1566 * 1567 * Normally run in the receive path, but can also be run from ndo_open 1568 * before we're receiving packets, or from refill_work which is 1569 * careful to disable receiving (using napi_disable). 1570 */ 1571 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, 1572 gfp_t gfp) 1573 { 1574 int err; 1575 bool oom; 1576 1577 do { 1578 if (vi->mergeable_rx_bufs) 1579 err = add_recvbuf_mergeable(vi, rq, gfp); 1580 else if (vi->big_packets) 1581 err = add_recvbuf_big(vi, rq, gfp); 1582 else 1583 err = add_recvbuf_small(vi, rq, gfp); 1584 1585 oom = err == -ENOMEM; 1586 if (err) 1587 break; 1588 } while (rq->vq->num_free); 1589 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { 1590 unsigned long flags; 1591 1592 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); 1593 rq->stats.kicks++; 1594 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); 1595 } 1596 1597 return !oom; 1598 } 1599 1600 static void skb_recv_done(struct virtqueue *rvq) 1601 { 1602 struct virtnet_info *vi = rvq->vdev->priv; 1603 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; 1604 1605 virtqueue_napi_schedule(&rq->napi, rvq); 1606 } 1607 1608 static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi) 1609 { 1610 napi_enable(napi); 1611 1612 /* If all buffers were filled by other side before we napi_enabled, we 1613 * won't get another interrupt, so process any outstanding packets now. 1614 * Call local_bh_enable after to trigger softIRQ processing. 1615 */ 1616 local_bh_disable(); 1617 virtqueue_napi_schedule(napi, vq); 1618 local_bh_enable(); 1619 } 1620 1621 static void virtnet_napi_tx_enable(struct virtnet_info *vi, 1622 struct virtqueue *vq, 1623 struct napi_struct *napi) 1624 { 1625 if (!napi->weight) 1626 return; 1627 1628 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only 1629 * enable the feature if this is likely affine with the transmit path. 1630 */ 1631 if (!vi->affinity_hint_set) { 1632 napi->weight = 0; 1633 return; 1634 } 1635 1636 return virtnet_napi_enable(vq, napi); 1637 } 1638 1639 static void virtnet_napi_tx_disable(struct napi_struct *napi) 1640 { 1641 if (napi->weight) 1642 napi_disable(napi); 1643 } 1644 1645 static void refill_work(struct work_struct *work) 1646 { 1647 struct virtnet_info *vi = 1648 container_of(work, struct virtnet_info, refill.work); 1649 bool still_empty; 1650 int i; 1651 1652 for (i = 0; i < vi->curr_queue_pairs; i++) { 1653 struct receive_queue *rq = &vi->rq[i]; 1654 1655 napi_disable(&rq->napi); 1656 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); 1657 virtnet_napi_enable(rq->vq, &rq->napi); 1658 1659 /* In theory, this can happen: if we don't get any buffers in 1660 * we will *never* try to fill again. 1661 */ 1662 if (still_empty) 1663 schedule_delayed_work(&vi->refill, HZ/2); 1664 } 1665 } 1666 1667 static int virtnet_receive(struct receive_queue *rq, int budget, 1668 unsigned int *xdp_xmit) 1669 { 1670 struct virtnet_info *vi = rq->vq->vdev->priv; 1671 struct virtnet_rq_stats stats = {}; 1672 unsigned int len; 1673 void *buf; 1674 int i; 1675 1676 if (!vi->big_packets || vi->mergeable_rx_bufs) { 1677 void *ctx; 1678 1679 while (stats.packets < budget && 1680 (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) { 1681 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &stats); 1682 stats.packets++; 1683 } 1684 } else { 1685 while (stats.packets < budget && 1686 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { 1687 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &stats); 1688 stats.packets++; 1689 } 1690 } 1691 1692 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { 1693 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { 1694 spin_lock(&vi->refill_lock); 1695 if (vi->refill_enabled) 1696 schedule_delayed_work(&vi->refill, 0); 1697 spin_unlock(&vi->refill_lock); 1698 } 1699 } 1700 1701 u64_stats_update_begin(&rq->stats.syncp); 1702 for (i = 0; i < VIRTNET_RQ_STATS_LEN; i++) { 1703 size_t offset = virtnet_rq_stats_desc[i].offset; 1704 u64 *item; 1705 1706 item = (u64 *)((u8 *)&rq->stats + offset); 1707 *item += *(u64 *)((u8 *)&stats + offset); 1708 } 1709 u64_stats_update_end(&rq->stats.syncp); 1710 1711 return stats.packets; 1712 } 1713 1714 static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) 1715 { 1716 unsigned int len; 1717 unsigned int packets = 0; 1718 unsigned int bytes = 0; 1719 void *ptr; 1720 1721 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 1722 if (likely(!is_xdp_frame(ptr))) { 1723 struct sk_buff *skb = ptr; 1724 1725 pr_debug("Sent skb %p\n", skb); 1726 1727 bytes += skb->len; 1728 napi_consume_skb(skb, in_napi); 1729 } else { 1730 struct xdp_frame *frame = ptr_to_xdp(ptr); 1731 1732 bytes += xdp_get_frame_len(frame); 1733 xdp_return_frame(frame); 1734 } 1735 packets++; 1736 } 1737 1738 /* Avoid overhead when no packets have been processed 1739 * happens when called speculatively from start_xmit. 1740 */ 1741 if (!packets) 1742 return; 1743 1744 u64_stats_update_begin(&sq->stats.syncp); 1745 sq->stats.bytes += bytes; 1746 sq->stats.packets += packets; 1747 u64_stats_update_end(&sq->stats.syncp); 1748 } 1749 1750 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) 1751 { 1752 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) 1753 return false; 1754 else if (q < vi->curr_queue_pairs) 1755 return true; 1756 else 1757 return false; 1758 } 1759 1760 static void virtnet_poll_cleantx(struct receive_queue *rq) 1761 { 1762 struct virtnet_info *vi = rq->vq->vdev->priv; 1763 unsigned int index = vq2rxq(rq->vq); 1764 struct send_queue *sq = &vi->sq[index]; 1765 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 1766 1767 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) 1768 return; 1769 1770 if (__netif_tx_trylock(txq)) { 1771 if (sq->reset) { 1772 __netif_tx_unlock(txq); 1773 return; 1774 } 1775 1776 do { 1777 virtqueue_disable_cb(sq->vq); 1778 free_old_xmit_skbs(sq, true); 1779 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 1780 1781 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) 1782 netif_tx_wake_queue(txq); 1783 1784 __netif_tx_unlock(txq); 1785 } 1786 } 1787 1788 static int virtnet_poll(struct napi_struct *napi, int budget) 1789 { 1790 struct receive_queue *rq = 1791 container_of(napi, struct receive_queue, napi); 1792 struct virtnet_info *vi = rq->vq->vdev->priv; 1793 struct send_queue *sq; 1794 unsigned int received; 1795 unsigned int xdp_xmit = 0; 1796 1797 virtnet_poll_cleantx(rq); 1798 1799 received = virtnet_receive(rq, budget, &xdp_xmit); 1800 1801 /* Out of packets? */ 1802 if (received < budget) 1803 virtqueue_napi_complete(napi, rq->vq, received); 1804 1805 if (xdp_xmit & VIRTIO_XDP_REDIR) 1806 xdp_do_flush(); 1807 1808 if (xdp_xmit & VIRTIO_XDP_TX) { 1809 sq = virtnet_xdp_get_sq(vi); 1810 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 1811 u64_stats_update_begin(&sq->stats.syncp); 1812 sq->stats.kicks++; 1813 u64_stats_update_end(&sq->stats.syncp); 1814 } 1815 virtnet_xdp_put_sq(vi, sq); 1816 } 1817 1818 return received; 1819 } 1820 1821 static int virtnet_open(struct net_device *dev) 1822 { 1823 struct virtnet_info *vi = netdev_priv(dev); 1824 int i, err; 1825 1826 enable_delayed_refill(vi); 1827 1828 for (i = 0; i < vi->max_queue_pairs; i++) { 1829 if (i < vi->curr_queue_pairs) 1830 /* Make sure we have some buffers: if oom use wq. */ 1831 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 1832 schedule_delayed_work(&vi->refill, 0); 1833 1834 err = xdp_rxq_info_reg(&vi->rq[i].xdp_rxq, dev, i, vi->rq[i].napi.napi_id); 1835 if (err < 0) 1836 return err; 1837 1838 err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq, 1839 MEM_TYPE_PAGE_SHARED, NULL); 1840 if (err < 0) { 1841 xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq); 1842 return err; 1843 } 1844 1845 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 1846 virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi); 1847 } 1848 1849 return 0; 1850 } 1851 1852 static int virtnet_poll_tx(struct napi_struct *napi, int budget) 1853 { 1854 struct send_queue *sq = container_of(napi, struct send_queue, napi); 1855 struct virtnet_info *vi = sq->vq->vdev->priv; 1856 unsigned int index = vq2txq(sq->vq); 1857 struct netdev_queue *txq; 1858 int opaque; 1859 bool done; 1860 1861 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { 1862 /* We don't need to enable cb for XDP */ 1863 napi_complete_done(napi, 0); 1864 return 0; 1865 } 1866 1867 txq = netdev_get_tx_queue(vi->dev, index); 1868 __netif_tx_lock(txq, raw_smp_processor_id()); 1869 virtqueue_disable_cb(sq->vq); 1870 free_old_xmit_skbs(sq, true); 1871 1872 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) 1873 netif_tx_wake_queue(txq); 1874 1875 opaque = virtqueue_enable_cb_prepare(sq->vq); 1876 1877 done = napi_complete_done(napi, 0); 1878 1879 if (!done) 1880 virtqueue_disable_cb(sq->vq); 1881 1882 __netif_tx_unlock(txq); 1883 1884 if (done) { 1885 if (unlikely(virtqueue_poll(sq->vq, opaque))) { 1886 if (napi_schedule_prep(napi)) { 1887 __netif_tx_lock(txq, raw_smp_processor_id()); 1888 virtqueue_disable_cb(sq->vq); 1889 __netif_tx_unlock(txq); 1890 __napi_schedule(napi); 1891 } 1892 } 1893 } 1894 1895 return 0; 1896 } 1897 1898 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) 1899 { 1900 struct virtio_net_hdr_mrg_rxbuf *hdr; 1901 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 1902 struct virtnet_info *vi = sq->vq->vdev->priv; 1903 int num_sg; 1904 unsigned hdr_len = vi->hdr_len; 1905 bool can_push; 1906 1907 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 1908 1909 can_push = vi->any_header_sg && 1910 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && 1911 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; 1912 /* Even if we can, don't push here yet as this would skew 1913 * csum_start offset below. */ 1914 if (can_push) 1915 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len); 1916 else 1917 hdr = skb_vnet_hdr(skb); 1918 1919 if (virtio_net_hdr_from_skb(skb, &hdr->hdr, 1920 virtio_is_little_endian(vi->vdev), false, 1921 0)) 1922 return -EPROTO; 1923 1924 if (vi->mergeable_rx_bufs) 1925 hdr->num_buffers = 0; 1926 1927 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); 1928 if (can_push) { 1929 __skb_push(skb, hdr_len); 1930 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); 1931 if (unlikely(num_sg < 0)) 1932 return num_sg; 1933 /* Pull header back to avoid skew in tx bytes calculations. */ 1934 __skb_pull(skb, hdr_len); 1935 } else { 1936 sg_set_buf(sq->sg, hdr, hdr_len); 1937 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); 1938 if (unlikely(num_sg < 0)) 1939 return num_sg; 1940 num_sg++; 1941 } 1942 return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC); 1943 } 1944 1945 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 1946 { 1947 struct virtnet_info *vi = netdev_priv(dev); 1948 int qnum = skb_get_queue_mapping(skb); 1949 struct send_queue *sq = &vi->sq[qnum]; 1950 int err; 1951 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1952 bool kick = !netdev_xmit_more(); 1953 bool use_napi = sq->napi.weight; 1954 1955 /* Free up any pending old buffers before queueing new ones. */ 1956 do { 1957 if (use_napi) 1958 virtqueue_disable_cb(sq->vq); 1959 1960 free_old_xmit_skbs(sq, false); 1961 1962 } while (use_napi && kick && 1963 unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 1964 1965 /* timestamp packet in software */ 1966 skb_tx_timestamp(skb); 1967 1968 /* Try to transmit */ 1969 err = xmit_skb(sq, skb); 1970 1971 /* This should not happen! */ 1972 if (unlikely(err)) { 1973 dev->stats.tx_fifo_errors++; 1974 if (net_ratelimit()) 1975 dev_warn(&dev->dev, 1976 "Unexpected TXQ (%d) queue failure: %d\n", 1977 qnum, err); 1978 dev->stats.tx_dropped++; 1979 dev_kfree_skb_any(skb); 1980 return NETDEV_TX_OK; 1981 } 1982 1983 /* Don't wait up for transmitted skbs to be freed. */ 1984 if (!use_napi) { 1985 skb_orphan(skb); 1986 nf_reset_ct(skb); 1987 } 1988 1989 /* If running out of space, stop queue to avoid getting packets that we 1990 * are then unable to transmit. 1991 * An alternative would be to force queuing layer to requeue the skb by 1992 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be 1993 * returned in a normal path of operation: it means that driver is not 1994 * maintaining the TX queue stop/start state properly, and causes 1995 * the stack to do a non-trivial amount of useless work. 1996 * Since most packets only take 1 or 2 ring slots, stopping the queue 1997 * early means 16 slots are typically wasted. 1998 */ 1999 if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { 2000 netif_stop_subqueue(dev, qnum); 2001 if (use_napi) { 2002 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 2003 virtqueue_napi_schedule(&sq->napi, sq->vq); 2004 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 2005 /* More just got used, free them then recheck. */ 2006 free_old_xmit_skbs(sq, false); 2007 if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { 2008 netif_start_subqueue(dev, qnum); 2009 virtqueue_disable_cb(sq->vq); 2010 } 2011 } 2012 } 2013 2014 if (kick || netif_xmit_stopped(txq)) { 2015 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 2016 u64_stats_update_begin(&sq->stats.syncp); 2017 sq->stats.kicks++; 2018 u64_stats_update_end(&sq->stats.syncp); 2019 } 2020 } 2021 2022 return NETDEV_TX_OK; 2023 } 2024 2025 static int virtnet_rx_resize(struct virtnet_info *vi, 2026 struct receive_queue *rq, u32 ring_num) 2027 { 2028 bool running = netif_running(vi->dev); 2029 int err, qindex; 2030 2031 qindex = rq - vi->rq; 2032 2033 if (running) 2034 napi_disable(&rq->napi); 2035 2036 err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf); 2037 if (err) 2038 netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); 2039 2040 if (!try_fill_recv(vi, rq, GFP_KERNEL)) 2041 schedule_delayed_work(&vi->refill, 0); 2042 2043 if (running) 2044 virtnet_napi_enable(rq->vq, &rq->napi); 2045 return err; 2046 } 2047 2048 static int virtnet_tx_resize(struct virtnet_info *vi, 2049 struct send_queue *sq, u32 ring_num) 2050 { 2051 bool running = netif_running(vi->dev); 2052 struct netdev_queue *txq; 2053 int err, qindex; 2054 2055 qindex = sq - vi->sq; 2056 2057 if (running) 2058 virtnet_napi_tx_disable(&sq->napi); 2059 2060 txq = netdev_get_tx_queue(vi->dev, qindex); 2061 2062 /* 1. wait all ximt complete 2063 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() 2064 */ 2065 __netif_tx_lock_bh(txq); 2066 2067 /* Prevent rx poll from accessing sq. */ 2068 sq->reset = true; 2069 2070 /* Prevent the upper layer from trying to send packets. */ 2071 netif_stop_subqueue(vi->dev, qindex); 2072 2073 __netif_tx_unlock_bh(txq); 2074 2075 err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf); 2076 if (err) 2077 netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); 2078 2079 __netif_tx_lock_bh(txq); 2080 sq->reset = false; 2081 netif_tx_wake_queue(txq); 2082 __netif_tx_unlock_bh(txq); 2083 2084 if (running) 2085 virtnet_napi_tx_enable(vi, sq->vq, &sq->napi); 2086 return err; 2087 } 2088 2089 /* 2090 * Send command via the control virtqueue and check status. Commands 2091 * supported by the hypervisor, as indicated by feature bits, should 2092 * never fail unless improperly formatted. 2093 */ 2094 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 2095 struct scatterlist *out) 2096 { 2097 struct scatterlist *sgs[4], hdr, stat; 2098 unsigned out_num = 0, tmp; 2099 int ret; 2100 2101 /* Caller should know better */ 2102 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 2103 2104 vi->ctrl->status = ~0; 2105 vi->ctrl->hdr.class = class; 2106 vi->ctrl->hdr.cmd = cmd; 2107 /* Add header */ 2108 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); 2109 sgs[out_num++] = &hdr; 2110 2111 if (out) 2112 sgs[out_num++] = out; 2113 2114 /* Add return status. */ 2115 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); 2116 sgs[out_num] = &stat; 2117 2118 BUG_ON(out_num + 1 > ARRAY_SIZE(sgs)); 2119 ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC); 2120 if (ret < 0) { 2121 dev_warn(&vi->vdev->dev, 2122 "Failed to add sgs for command vq: %d\n.", ret); 2123 return false; 2124 } 2125 2126 if (unlikely(!virtqueue_kick(vi->cvq))) 2127 return vi->ctrl->status == VIRTIO_NET_OK; 2128 2129 /* Spin for a response, the kick causes an ioport write, trapping 2130 * into the hypervisor, so the request should be handled immediately. 2131 */ 2132 while (!virtqueue_get_buf(vi->cvq, &tmp) && 2133 !virtqueue_is_broken(vi->cvq)) 2134 cpu_relax(); 2135 2136 return vi->ctrl->status == VIRTIO_NET_OK; 2137 } 2138 2139 static int virtnet_set_mac_address(struct net_device *dev, void *p) 2140 { 2141 struct virtnet_info *vi = netdev_priv(dev); 2142 struct virtio_device *vdev = vi->vdev; 2143 int ret; 2144 struct sockaddr *addr; 2145 struct scatterlist sg; 2146 2147 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 2148 return -EOPNOTSUPP; 2149 2150 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); 2151 if (!addr) 2152 return -ENOMEM; 2153 2154 ret = eth_prepare_mac_addr_change(dev, addr); 2155 if (ret) 2156 goto out; 2157 2158 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 2159 sg_init_one(&sg, addr->sa_data, dev->addr_len); 2160 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 2161 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 2162 dev_warn(&vdev->dev, 2163 "Failed to set mac address by vq command.\n"); 2164 ret = -EINVAL; 2165 goto out; 2166 } 2167 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 2168 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { 2169 unsigned int i; 2170 2171 /* Naturally, this has an atomicity problem. */ 2172 for (i = 0; i < dev->addr_len; i++) 2173 virtio_cwrite8(vdev, 2174 offsetof(struct virtio_net_config, mac) + 2175 i, addr->sa_data[i]); 2176 } 2177 2178 eth_commit_mac_addr_change(dev, p); 2179 ret = 0; 2180 2181 out: 2182 kfree(addr); 2183 return ret; 2184 } 2185 2186 static void virtnet_stats(struct net_device *dev, 2187 struct rtnl_link_stats64 *tot) 2188 { 2189 struct virtnet_info *vi = netdev_priv(dev); 2190 unsigned int start; 2191 int i; 2192 2193 for (i = 0; i < vi->max_queue_pairs; i++) { 2194 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; 2195 struct receive_queue *rq = &vi->rq[i]; 2196 struct send_queue *sq = &vi->sq[i]; 2197 2198 do { 2199 start = u64_stats_fetch_begin(&sq->stats.syncp); 2200 tpackets = sq->stats.packets; 2201 tbytes = sq->stats.bytes; 2202 terrors = sq->stats.tx_timeouts; 2203 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 2204 2205 do { 2206 start = u64_stats_fetch_begin(&rq->stats.syncp); 2207 rpackets = rq->stats.packets; 2208 rbytes = rq->stats.bytes; 2209 rdrops = rq->stats.drops; 2210 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 2211 2212 tot->rx_packets += rpackets; 2213 tot->tx_packets += tpackets; 2214 tot->rx_bytes += rbytes; 2215 tot->tx_bytes += tbytes; 2216 tot->rx_dropped += rdrops; 2217 tot->tx_errors += terrors; 2218 } 2219 2220 tot->tx_dropped = dev->stats.tx_dropped; 2221 tot->tx_fifo_errors = dev->stats.tx_fifo_errors; 2222 tot->rx_length_errors = dev->stats.rx_length_errors; 2223 tot->rx_frame_errors = dev->stats.rx_frame_errors; 2224 } 2225 2226 static void virtnet_ack_link_announce(struct virtnet_info *vi) 2227 { 2228 rtnl_lock(); 2229 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 2230 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) 2231 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 2232 rtnl_unlock(); 2233 } 2234 2235 static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 2236 { 2237 struct scatterlist sg; 2238 struct net_device *dev = vi->dev; 2239 2240 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) 2241 return 0; 2242 2243 vi->ctrl->mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); 2244 sg_init_one(&sg, &vi->ctrl->mq, sizeof(vi->ctrl->mq)); 2245 2246 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 2247 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { 2248 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 2249 queue_pairs); 2250 return -EINVAL; 2251 } else { 2252 vi->curr_queue_pairs = queue_pairs; 2253 /* virtnet_open() will refill when device is going to up. */ 2254 if (dev->flags & IFF_UP) 2255 schedule_delayed_work(&vi->refill, 0); 2256 } 2257 2258 return 0; 2259 } 2260 2261 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 2262 { 2263 int err; 2264 2265 rtnl_lock(); 2266 err = _virtnet_set_queues(vi, queue_pairs); 2267 rtnl_unlock(); 2268 return err; 2269 } 2270 2271 static int virtnet_close(struct net_device *dev) 2272 { 2273 struct virtnet_info *vi = netdev_priv(dev); 2274 int i; 2275 2276 /* Make sure NAPI doesn't schedule refill work */ 2277 disable_delayed_refill(vi); 2278 /* Make sure refill_work doesn't re-enable napi! */ 2279 cancel_delayed_work_sync(&vi->refill); 2280 2281 for (i = 0; i < vi->max_queue_pairs; i++) { 2282 xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq); 2283 napi_disable(&vi->rq[i].napi); 2284 virtnet_napi_tx_disable(&vi->sq[i].napi); 2285 } 2286 2287 return 0; 2288 } 2289 2290 static void virtnet_set_rx_mode(struct net_device *dev) 2291 { 2292 struct virtnet_info *vi = netdev_priv(dev); 2293 struct scatterlist sg[2]; 2294 struct virtio_net_ctrl_mac *mac_data; 2295 struct netdev_hw_addr *ha; 2296 int uc_count; 2297 int mc_count; 2298 void *buf; 2299 int i; 2300 2301 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ 2302 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 2303 return; 2304 2305 vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0); 2306 vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0); 2307 2308 sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc)); 2309 2310 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 2311 VIRTIO_NET_CTRL_RX_PROMISC, sg)) 2312 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 2313 vi->ctrl->promisc ? "en" : "dis"); 2314 2315 sg_init_one(sg, &vi->ctrl->allmulti, sizeof(vi->ctrl->allmulti)); 2316 2317 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 2318 VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) 2319 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 2320 vi->ctrl->allmulti ? "en" : "dis"); 2321 2322 uc_count = netdev_uc_count(dev); 2323 mc_count = netdev_mc_count(dev); 2324 /* MAC filter - use one buffer for both lists */ 2325 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + 2326 (2 * sizeof(mac_data->entries)), GFP_ATOMIC); 2327 mac_data = buf; 2328 if (!buf) 2329 return; 2330 2331 sg_init_table(sg, 2); 2332 2333 /* Store the unicast list and count in the front of the buffer */ 2334 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); 2335 i = 0; 2336 netdev_for_each_uc_addr(ha, dev) 2337 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 2338 2339 sg_set_buf(&sg[0], mac_data, 2340 sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); 2341 2342 /* multicast list and count fill the end */ 2343 mac_data = (void *)&mac_data->macs[uc_count][0]; 2344 2345 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); 2346 i = 0; 2347 netdev_for_each_mc_addr(ha, dev) 2348 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 2349 2350 sg_set_buf(&sg[1], mac_data, 2351 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 2352 2353 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 2354 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) 2355 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 2356 2357 kfree(buf); 2358 } 2359 2360 static int virtnet_vlan_rx_add_vid(struct net_device *dev, 2361 __be16 proto, u16 vid) 2362 { 2363 struct virtnet_info *vi = netdev_priv(dev); 2364 struct scatterlist sg; 2365 2366 vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid); 2367 sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid)); 2368 2369 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 2370 VIRTIO_NET_CTRL_VLAN_ADD, &sg)) 2371 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 2372 return 0; 2373 } 2374 2375 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, 2376 __be16 proto, u16 vid) 2377 { 2378 struct virtnet_info *vi = netdev_priv(dev); 2379 struct scatterlist sg; 2380 2381 vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid); 2382 sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid)); 2383 2384 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 2385 VIRTIO_NET_CTRL_VLAN_DEL, &sg)) 2386 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 2387 return 0; 2388 } 2389 2390 static void virtnet_clean_affinity(struct virtnet_info *vi) 2391 { 2392 int i; 2393 2394 if (vi->affinity_hint_set) { 2395 for (i = 0; i < vi->max_queue_pairs; i++) { 2396 virtqueue_set_affinity(vi->rq[i].vq, NULL); 2397 virtqueue_set_affinity(vi->sq[i].vq, NULL); 2398 } 2399 2400 vi->affinity_hint_set = false; 2401 } 2402 } 2403 2404 static void virtnet_set_affinity(struct virtnet_info *vi) 2405 { 2406 cpumask_var_t mask; 2407 int stragglers; 2408 int group_size; 2409 int i, j, cpu; 2410 int num_cpu; 2411 int stride; 2412 2413 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 2414 virtnet_clean_affinity(vi); 2415 return; 2416 } 2417 2418 num_cpu = num_online_cpus(); 2419 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); 2420 stragglers = num_cpu >= vi->curr_queue_pairs ? 2421 num_cpu % vi->curr_queue_pairs : 2422 0; 2423 cpu = cpumask_first(cpu_online_mask); 2424 2425 for (i = 0; i < vi->curr_queue_pairs; i++) { 2426 group_size = stride + (i < stragglers ? 1 : 0); 2427 2428 for (j = 0; j < group_size; j++) { 2429 cpumask_set_cpu(cpu, mask); 2430 cpu = cpumask_next_wrap(cpu, cpu_online_mask, 2431 nr_cpu_ids, false); 2432 } 2433 virtqueue_set_affinity(vi->rq[i].vq, mask); 2434 virtqueue_set_affinity(vi->sq[i].vq, mask); 2435 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); 2436 cpumask_clear(mask); 2437 } 2438 2439 vi->affinity_hint_set = true; 2440 free_cpumask_var(mask); 2441 } 2442 2443 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) 2444 { 2445 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 2446 node); 2447 virtnet_set_affinity(vi); 2448 return 0; 2449 } 2450 2451 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) 2452 { 2453 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 2454 node_dead); 2455 virtnet_set_affinity(vi); 2456 return 0; 2457 } 2458 2459 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 2460 { 2461 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 2462 node); 2463 2464 virtnet_clean_affinity(vi); 2465 return 0; 2466 } 2467 2468 static enum cpuhp_state virtionet_online; 2469 2470 static int virtnet_cpu_notif_add(struct virtnet_info *vi) 2471 { 2472 int ret; 2473 2474 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); 2475 if (ret) 2476 return ret; 2477 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, 2478 &vi->node_dead); 2479 if (!ret) 2480 return ret; 2481 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 2482 return ret; 2483 } 2484 2485 static void virtnet_cpu_notif_remove(struct virtnet_info *vi) 2486 { 2487 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 2488 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, 2489 &vi->node_dead); 2490 } 2491 2492 static void virtnet_get_ringparam(struct net_device *dev, 2493 struct ethtool_ringparam *ring, 2494 struct kernel_ethtool_ringparam *kernel_ring, 2495 struct netlink_ext_ack *extack) 2496 { 2497 struct virtnet_info *vi = netdev_priv(dev); 2498 2499 ring->rx_max_pending = vi->rq[0].vq->num_max; 2500 ring->tx_max_pending = vi->sq[0].vq->num_max; 2501 ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 2502 ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 2503 } 2504 2505 static int virtnet_set_ringparam(struct net_device *dev, 2506 struct ethtool_ringparam *ring, 2507 struct kernel_ethtool_ringparam *kernel_ring, 2508 struct netlink_ext_ack *extack) 2509 { 2510 struct virtnet_info *vi = netdev_priv(dev); 2511 u32 rx_pending, tx_pending; 2512 struct receive_queue *rq; 2513 struct send_queue *sq; 2514 int i, err; 2515 2516 if (ring->rx_mini_pending || ring->rx_jumbo_pending) 2517 return -EINVAL; 2518 2519 rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 2520 tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 2521 2522 if (ring->rx_pending == rx_pending && 2523 ring->tx_pending == tx_pending) 2524 return 0; 2525 2526 if (ring->rx_pending > vi->rq[0].vq->num_max) 2527 return -EINVAL; 2528 2529 if (ring->tx_pending > vi->sq[0].vq->num_max) 2530 return -EINVAL; 2531 2532 for (i = 0; i < vi->max_queue_pairs; i++) { 2533 rq = vi->rq + i; 2534 sq = vi->sq + i; 2535 2536 if (ring->tx_pending != tx_pending) { 2537 err = virtnet_tx_resize(vi, sq, ring->tx_pending); 2538 if (err) 2539 return err; 2540 } 2541 2542 if (ring->rx_pending != rx_pending) { 2543 err = virtnet_rx_resize(vi, rq, ring->rx_pending); 2544 if (err) 2545 return err; 2546 } 2547 } 2548 2549 return 0; 2550 } 2551 2552 static bool virtnet_commit_rss_command(struct virtnet_info *vi) 2553 { 2554 struct net_device *dev = vi->dev; 2555 struct scatterlist sgs[4]; 2556 unsigned int sg_buf_size; 2557 2558 /* prepare sgs */ 2559 sg_init_table(sgs, 4); 2560 2561 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, indirection_table); 2562 sg_set_buf(&sgs[0], &vi->ctrl->rss, sg_buf_size); 2563 2564 sg_buf_size = sizeof(uint16_t) * (vi->ctrl->rss.indirection_table_mask + 1); 2565 sg_set_buf(&sgs[1], vi->ctrl->rss.indirection_table, sg_buf_size); 2566 2567 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key) 2568 - offsetof(struct virtio_net_ctrl_rss, max_tx_vq); 2569 sg_set_buf(&sgs[2], &vi->ctrl->rss.max_tx_vq, sg_buf_size); 2570 2571 sg_buf_size = vi->rss_key_size; 2572 sg_set_buf(&sgs[3], vi->ctrl->rss.key, sg_buf_size); 2573 2574 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 2575 vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG 2576 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) { 2577 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); 2578 return false; 2579 } 2580 return true; 2581 } 2582 2583 static void virtnet_init_default_rss(struct virtnet_info *vi) 2584 { 2585 u32 indir_val = 0; 2586 int i = 0; 2587 2588 vi->ctrl->rss.hash_types = vi->rss_hash_types_supported; 2589 vi->rss_hash_types_saved = vi->rss_hash_types_supported; 2590 vi->ctrl->rss.indirection_table_mask = vi->rss_indir_table_size 2591 ? vi->rss_indir_table_size - 1 : 0; 2592 vi->ctrl->rss.unclassified_queue = 0; 2593 2594 for (; i < vi->rss_indir_table_size; ++i) { 2595 indir_val = ethtool_rxfh_indir_default(i, vi->curr_queue_pairs); 2596 vi->ctrl->rss.indirection_table[i] = indir_val; 2597 } 2598 2599 vi->ctrl->rss.max_tx_vq = vi->curr_queue_pairs; 2600 vi->ctrl->rss.hash_key_length = vi->rss_key_size; 2601 2602 netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size); 2603 } 2604 2605 static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info) 2606 { 2607 info->data = 0; 2608 switch (info->flow_type) { 2609 case TCP_V4_FLOW: 2610 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 2611 info->data = RXH_IP_SRC | RXH_IP_DST | 2612 RXH_L4_B_0_1 | RXH_L4_B_2_3; 2613 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 2614 info->data = RXH_IP_SRC | RXH_IP_DST; 2615 } 2616 break; 2617 case TCP_V6_FLOW: 2618 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 2619 info->data = RXH_IP_SRC | RXH_IP_DST | 2620 RXH_L4_B_0_1 | RXH_L4_B_2_3; 2621 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 2622 info->data = RXH_IP_SRC | RXH_IP_DST; 2623 } 2624 break; 2625 case UDP_V4_FLOW: 2626 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 2627 info->data = RXH_IP_SRC | RXH_IP_DST | 2628 RXH_L4_B_0_1 | RXH_L4_B_2_3; 2629 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 2630 info->data = RXH_IP_SRC | RXH_IP_DST; 2631 } 2632 break; 2633 case UDP_V6_FLOW: 2634 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 2635 info->data = RXH_IP_SRC | RXH_IP_DST | 2636 RXH_L4_B_0_1 | RXH_L4_B_2_3; 2637 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 2638 info->data = RXH_IP_SRC | RXH_IP_DST; 2639 } 2640 break; 2641 case IPV4_FLOW: 2642 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) 2643 info->data = RXH_IP_SRC | RXH_IP_DST; 2644 2645 break; 2646 case IPV6_FLOW: 2647 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) 2648 info->data = RXH_IP_SRC | RXH_IP_DST; 2649 2650 break; 2651 default: 2652 info->data = 0; 2653 break; 2654 } 2655 } 2656 2657 static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info) 2658 { 2659 u32 new_hashtypes = vi->rss_hash_types_saved; 2660 bool is_disable = info->data & RXH_DISCARD; 2661 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); 2662 2663 /* supports only 'sd', 'sdfn' and 'r' */ 2664 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) 2665 return false; 2666 2667 switch (info->flow_type) { 2668 case TCP_V4_FLOW: 2669 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); 2670 if (!is_disable) 2671 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 2672 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); 2673 break; 2674 case UDP_V4_FLOW: 2675 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); 2676 if (!is_disable) 2677 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 2678 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); 2679 break; 2680 case IPV4_FLOW: 2681 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; 2682 if (!is_disable) 2683 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; 2684 break; 2685 case TCP_V6_FLOW: 2686 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); 2687 if (!is_disable) 2688 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 2689 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); 2690 break; 2691 case UDP_V6_FLOW: 2692 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); 2693 if (!is_disable) 2694 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 2695 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); 2696 break; 2697 case IPV6_FLOW: 2698 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; 2699 if (!is_disable) 2700 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; 2701 break; 2702 default: 2703 /* unsupported flow */ 2704 return false; 2705 } 2706 2707 /* if unsupported hashtype was set */ 2708 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) 2709 return false; 2710 2711 if (new_hashtypes != vi->rss_hash_types_saved) { 2712 vi->rss_hash_types_saved = new_hashtypes; 2713 vi->ctrl->rss.hash_types = vi->rss_hash_types_saved; 2714 if (vi->dev->features & NETIF_F_RXHASH) 2715 return virtnet_commit_rss_command(vi); 2716 } 2717 2718 return true; 2719 } 2720 2721 static void virtnet_get_drvinfo(struct net_device *dev, 2722 struct ethtool_drvinfo *info) 2723 { 2724 struct virtnet_info *vi = netdev_priv(dev); 2725 struct virtio_device *vdev = vi->vdev; 2726 2727 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 2728 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); 2729 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); 2730 2731 } 2732 2733 /* TODO: Eliminate OOO packets during switching */ 2734 static int virtnet_set_channels(struct net_device *dev, 2735 struct ethtool_channels *channels) 2736 { 2737 struct virtnet_info *vi = netdev_priv(dev); 2738 u16 queue_pairs = channels->combined_count; 2739 int err; 2740 2741 /* We don't support separate rx/tx channels. 2742 * We don't allow setting 'other' channels. 2743 */ 2744 if (channels->rx_count || channels->tx_count || channels->other_count) 2745 return -EINVAL; 2746 2747 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) 2748 return -EINVAL; 2749 2750 /* For now we don't support modifying channels while XDP is loaded 2751 * also when XDP is loaded all RX queues have XDP programs so we only 2752 * need to check a single RX queue. 2753 */ 2754 if (vi->rq[0].xdp_prog) 2755 return -EINVAL; 2756 2757 cpus_read_lock(); 2758 err = _virtnet_set_queues(vi, queue_pairs); 2759 if (err) { 2760 cpus_read_unlock(); 2761 goto err; 2762 } 2763 virtnet_set_affinity(vi); 2764 cpus_read_unlock(); 2765 2766 netif_set_real_num_tx_queues(dev, queue_pairs); 2767 netif_set_real_num_rx_queues(dev, queue_pairs); 2768 err: 2769 return err; 2770 } 2771 2772 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) 2773 { 2774 struct virtnet_info *vi = netdev_priv(dev); 2775 unsigned int i, j; 2776 u8 *p = data; 2777 2778 switch (stringset) { 2779 case ETH_SS_STATS: 2780 for (i = 0; i < vi->curr_queue_pairs; i++) { 2781 for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) 2782 ethtool_sprintf(&p, "rx_queue_%u_%s", i, 2783 virtnet_rq_stats_desc[j].desc); 2784 } 2785 2786 for (i = 0; i < vi->curr_queue_pairs; i++) { 2787 for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) 2788 ethtool_sprintf(&p, "tx_queue_%u_%s", i, 2789 virtnet_sq_stats_desc[j].desc); 2790 } 2791 break; 2792 } 2793 } 2794 2795 static int virtnet_get_sset_count(struct net_device *dev, int sset) 2796 { 2797 struct virtnet_info *vi = netdev_priv(dev); 2798 2799 switch (sset) { 2800 case ETH_SS_STATS: 2801 return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN + 2802 VIRTNET_SQ_STATS_LEN); 2803 default: 2804 return -EOPNOTSUPP; 2805 } 2806 } 2807 2808 static void virtnet_get_ethtool_stats(struct net_device *dev, 2809 struct ethtool_stats *stats, u64 *data) 2810 { 2811 struct virtnet_info *vi = netdev_priv(dev); 2812 unsigned int idx = 0, start, i, j; 2813 const u8 *stats_base; 2814 size_t offset; 2815 2816 for (i = 0; i < vi->curr_queue_pairs; i++) { 2817 struct receive_queue *rq = &vi->rq[i]; 2818 2819 stats_base = (u8 *)&rq->stats; 2820 do { 2821 start = u64_stats_fetch_begin(&rq->stats.syncp); 2822 for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) { 2823 offset = virtnet_rq_stats_desc[j].offset; 2824 data[idx + j] = *(u64 *)(stats_base + offset); 2825 } 2826 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 2827 idx += VIRTNET_RQ_STATS_LEN; 2828 } 2829 2830 for (i = 0; i < vi->curr_queue_pairs; i++) { 2831 struct send_queue *sq = &vi->sq[i]; 2832 2833 stats_base = (u8 *)&sq->stats; 2834 do { 2835 start = u64_stats_fetch_begin(&sq->stats.syncp); 2836 for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) { 2837 offset = virtnet_sq_stats_desc[j].offset; 2838 data[idx + j] = *(u64 *)(stats_base + offset); 2839 } 2840 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 2841 idx += VIRTNET_SQ_STATS_LEN; 2842 } 2843 } 2844 2845 static void virtnet_get_channels(struct net_device *dev, 2846 struct ethtool_channels *channels) 2847 { 2848 struct virtnet_info *vi = netdev_priv(dev); 2849 2850 channels->combined_count = vi->curr_queue_pairs; 2851 channels->max_combined = vi->max_queue_pairs; 2852 channels->max_other = 0; 2853 channels->rx_count = 0; 2854 channels->tx_count = 0; 2855 channels->other_count = 0; 2856 } 2857 2858 static int virtnet_set_link_ksettings(struct net_device *dev, 2859 const struct ethtool_link_ksettings *cmd) 2860 { 2861 struct virtnet_info *vi = netdev_priv(dev); 2862 2863 return ethtool_virtdev_set_link_ksettings(dev, cmd, 2864 &vi->speed, &vi->duplex); 2865 } 2866 2867 static int virtnet_get_link_ksettings(struct net_device *dev, 2868 struct ethtool_link_ksettings *cmd) 2869 { 2870 struct virtnet_info *vi = netdev_priv(dev); 2871 2872 cmd->base.speed = vi->speed; 2873 cmd->base.duplex = vi->duplex; 2874 cmd->base.port = PORT_OTHER; 2875 2876 return 0; 2877 } 2878 2879 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, 2880 struct ethtool_coalesce *ec) 2881 { 2882 struct scatterlist sgs_tx, sgs_rx; 2883 struct virtio_net_ctrl_coal_tx coal_tx; 2884 struct virtio_net_ctrl_coal_rx coal_rx; 2885 2886 coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); 2887 coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); 2888 sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx)); 2889 2890 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 2891 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, 2892 &sgs_tx)) 2893 return -EINVAL; 2894 2895 /* Save parameters */ 2896 vi->tx_usecs = ec->tx_coalesce_usecs; 2897 vi->tx_max_packets = ec->tx_max_coalesced_frames; 2898 2899 coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); 2900 coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); 2901 sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx)); 2902 2903 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 2904 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, 2905 &sgs_rx)) 2906 return -EINVAL; 2907 2908 /* Save parameters */ 2909 vi->rx_usecs = ec->rx_coalesce_usecs; 2910 vi->rx_max_packets = ec->rx_max_coalesced_frames; 2911 2912 return 0; 2913 } 2914 2915 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) 2916 { 2917 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL 2918 * feature is negotiated. 2919 */ 2920 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) 2921 return -EOPNOTSUPP; 2922 2923 if (ec->tx_max_coalesced_frames > 1 || 2924 ec->rx_max_coalesced_frames != 1) 2925 return -EINVAL; 2926 2927 return 0; 2928 } 2929 2930 static int virtnet_set_coalesce(struct net_device *dev, 2931 struct ethtool_coalesce *ec, 2932 struct kernel_ethtool_coalesce *kernel_coal, 2933 struct netlink_ext_ack *extack) 2934 { 2935 struct virtnet_info *vi = netdev_priv(dev); 2936 int ret, i, napi_weight; 2937 bool update_napi = false; 2938 2939 /* Can't change NAPI weight if the link is up */ 2940 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 2941 if (napi_weight ^ vi->sq[0].napi.weight) { 2942 if (dev->flags & IFF_UP) 2943 return -EBUSY; 2944 else 2945 update_napi = true; 2946 } 2947 2948 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) 2949 ret = virtnet_send_notf_coal_cmds(vi, ec); 2950 else 2951 ret = virtnet_coal_params_supported(ec); 2952 2953 if (ret) 2954 return ret; 2955 2956 if (update_napi) { 2957 for (i = 0; i < vi->max_queue_pairs; i++) 2958 vi->sq[i].napi.weight = napi_weight; 2959 } 2960 2961 return ret; 2962 } 2963 2964 static int virtnet_get_coalesce(struct net_device *dev, 2965 struct ethtool_coalesce *ec, 2966 struct kernel_ethtool_coalesce *kernel_coal, 2967 struct netlink_ext_ack *extack) 2968 { 2969 struct virtnet_info *vi = netdev_priv(dev); 2970 2971 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 2972 ec->rx_coalesce_usecs = vi->rx_usecs; 2973 ec->tx_coalesce_usecs = vi->tx_usecs; 2974 ec->tx_max_coalesced_frames = vi->tx_max_packets; 2975 ec->rx_max_coalesced_frames = vi->rx_max_packets; 2976 } else { 2977 ec->rx_max_coalesced_frames = 1; 2978 2979 if (vi->sq[0].napi.weight) 2980 ec->tx_max_coalesced_frames = 1; 2981 } 2982 2983 return 0; 2984 } 2985 2986 static void virtnet_init_settings(struct net_device *dev) 2987 { 2988 struct virtnet_info *vi = netdev_priv(dev); 2989 2990 vi->speed = SPEED_UNKNOWN; 2991 vi->duplex = DUPLEX_UNKNOWN; 2992 } 2993 2994 static void virtnet_update_settings(struct virtnet_info *vi) 2995 { 2996 u32 speed; 2997 u8 duplex; 2998 2999 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) 3000 return; 3001 3002 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); 3003 3004 if (ethtool_validate_speed(speed)) 3005 vi->speed = speed; 3006 3007 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); 3008 3009 if (ethtool_validate_duplex(duplex)) 3010 vi->duplex = duplex; 3011 } 3012 3013 static u32 virtnet_get_rxfh_key_size(struct net_device *dev) 3014 { 3015 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; 3016 } 3017 3018 static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) 3019 { 3020 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; 3021 } 3022 3023 static int virtnet_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfunc) 3024 { 3025 struct virtnet_info *vi = netdev_priv(dev); 3026 int i; 3027 3028 if (indir) { 3029 for (i = 0; i < vi->rss_indir_table_size; ++i) 3030 indir[i] = vi->ctrl->rss.indirection_table[i]; 3031 } 3032 3033 if (key) 3034 memcpy(key, vi->ctrl->rss.key, vi->rss_key_size); 3035 3036 if (hfunc) 3037 *hfunc = ETH_RSS_HASH_TOP; 3038 3039 return 0; 3040 } 3041 3042 static int virtnet_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc) 3043 { 3044 struct virtnet_info *vi = netdev_priv(dev); 3045 int i; 3046 3047 if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) 3048 return -EOPNOTSUPP; 3049 3050 if (indir) { 3051 for (i = 0; i < vi->rss_indir_table_size; ++i) 3052 vi->ctrl->rss.indirection_table[i] = indir[i]; 3053 } 3054 if (key) 3055 memcpy(vi->ctrl->rss.key, key, vi->rss_key_size); 3056 3057 virtnet_commit_rss_command(vi); 3058 3059 return 0; 3060 } 3061 3062 static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) 3063 { 3064 struct virtnet_info *vi = netdev_priv(dev); 3065 int rc = 0; 3066 3067 switch (info->cmd) { 3068 case ETHTOOL_GRXRINGS: 3069 info->data = vi->curr_queue_pairs; 3070 break; 3071 case ETHTOOL_GRXFH: 3072 virtnet_get_hashflow(vi, info); 3073 break; 3074 default: 3075 rc = -EOPNOTSUPP; 3076 } 3077 3078 return rc; 3079 } 3080 3081 static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) 3082 { 3083 struct virtnet_info *vi = netdev_priv(dev); 3084 int rc = 0; 3085 3086 switch (info->cmd) { 3087 case ETHTOOL_SRXFH: 3088 if (!virtnet_set_hashflow(vi, info)) 3089 rc = -EINVAL; 3090 3091 break; 3092 default: 3093 rc = -EOPNOTSUPP; 3094 } 3095 3096 return rc; 3097 } 3098 3099 static const struct ethtool_ops virtnet_ethtool_ops = { 3100 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | 3101 ETHTOOL_COALESCE_USECS, 3102 .get_drvinfo = virtnet_get_drvinfo, 3103 .get_link = ethtool_op_get_link, 3104 .get_ringparam = virtnet_get_ringparam, 3105 .set_ringparam = virtnet_set_ringparam, 3106 .get_strings = virtnet_get_strings, 3107 .get_sset_count = virtnet_get_sset_count, 3108 .get_ethtool_stats = virtnet_get_ethtool_stats, 3109 .set_channels = virtnet_set_channels, 3110 .get_channels = virtnet_get_channels, 3111 .get_ts_info = ethtool_op_get_ts_info, 3112 .get_link_ksettings = virtnet_get_link_ksettings, 3113 .set_link_ksettings = virtnet_set_link_ksettings, 3114 .set_coalesce = virtnet_set_coalesce, 3115 .get_coalesce = virtnet_get_coalesce, 3116 .get_rxfh_key_size = virtnet_get_rxfh_key_size, 3117 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, 3118 .get_rxfh = virtnet_get_rxfh, 3119 .set_rxfh = virtnet_set_rxfh, 3120 .get_rxnfc = virtnet_get_rxnfc, 3121 .set_rxnfc = virtnet_set_rxnfc, 3122 }; 3123 3124 static void virtnet_freeze_down(struct virtio_device *vdev) 3125 { 3126 struct virtnet_info *vi = vdev->priv; 3127 3128 /* Make sure no work handler is accessing the device */ 3129 flush_work(&vi->config_work); 3130 3131 netif_tx_lock_bh(vi->dev); 3132 netif_device_detach(vi->dev); 3133 netif_tx_unlock_bh(vi->dev); 3134 if (netif_running(vi->dev)) 3135 virtnet_close(vi->dev); 3136 } 3137 3138 static int init_vqs(struct virtnet_info *vi); 3139 3140 static int virtnet_restore_up(struct virtio_device *vdev) 3141 { 3142 struct virtnet_info *vi = vdev->priv; 3143 int err; 3144 3145 err = init_vqs(vi); 3146 if (err) 3147 return err; 3148 3149 virtio_device_ready(vdev); 3150 3151 enable_delayed_refill(vi); 3152 3153 if (netif_running(vi->dev)) { 3154 err = virtnet_open(vi->dev); 3155 if (err) 3156 return err; 3157 } 3158 3159 netif_tx_lock_bh(vi->dev); 3160 netif_device_attach(vi->dev); 3161 netif_tx_unlock_bh(vi->dev); 3162 return err; 3163 } 3164 3165 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) 3166 { 3167 struct scatterlist sg; 3168 vi->ctrl->offloads = cpu_to_virtio64(vi->vdev, offloads); 3169 3170 sg_init_one(&sg, &vi->ctrl->offloads, sizeof(vi->ctrl->offloads)); 3171 3172 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, 3173 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { 3174 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); 3175 return -EINVAL; 3176 } 3177 3178 return 0; 3179 } 3180 3181 static int virtnet_clear_guest_offloads(struct virtnet_info *vi) 3182 { 3183 u64 offloads = 0; 3184 3185 if (!vi->guest_offloads) 3186 return 0; 3187 3188 return virtnet_set_guest_offloads(vi, offloads); 3189 } 3190 3191 static int virtnet_restore_guest_offloads(struct virtnet_info *vi) 3192 { 3193 u64 offloads = vi->guest_offloads; 3194 3195 if (!vi->guest_offloads) 3196 return 0; 3197 3198 return virtnet_set_guest_offloads(vi, offloads); 3199 } 3200 3201 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 3202 struct netlink_ext_ack *extack) 3203 { 3204 unsigned int room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM + 3205 sizeof(struct skb_shared_info)); 3206 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; 3207 struct virtnet_info *vi = netdev_priv(dev); 3208 struct bpf_prog *old_prog; 3209 u16 xdp_qp = 0, curr_qp; 3210 int i, err; 3211 3212 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 3213 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 3214 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 3215 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 3216 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 3217 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || 3218 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || 3219 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { 3220 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); 3221 return -EOPNOTSUPP; 3222 } 3223 3224 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 3225 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 3226 return -EINVAL; 3227 } 3228 3229 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { 3230 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); 3231 netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); 3232 return -EINVAL; 3233 } 3234 3235 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; 3236 if (prog) 3237 xdp_qp = nr_cpu_ids; 3238 3239 /* XDP requires extra queues for XDP_TX */ 3240 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 3241 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", 3242 curr_qp + xdp_qp, vi->max_queue_pairs); 3243 xdp_qp = 0; 3244 } 3245 3246 old_prog = rtnl_dereference(vi->rq[0].xdp_prog); 3247 if (!prog && !old_prog) 3248 return 0; 3249 3250 if (prog) 3251 bpf_prog_add(prog, vi->max_queue_pairs - 1); 3252 3253 /* Make sure NAPI is not using any XDP TX queues for RX. */ 3254 if (netif_running(dev)) { 3255 for (i = 0; i < vi->max_queue_pairs; i++) { 3256 napi_disable(&vi->rq[i].napi); 3257 virtnet_napi_tx_disable(&vi->sq[i].napi); 3258 } 3259 } 3260 3261 if (!prog) { 3262 for (i = 0; i < vi->max_queue_pairs; i++) { 3263 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 3264 if (i == 0) 3265 virtnet_restore_guest_offloads(vi); 3266 } 3267 synchronize_net(); 3268 } 3269 3270 err = _virtnet_set_queues(vi, curr_qp + xdp_qp); 3271 if (err) 3272 goto err; 3273 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 3274 vi->xdp_queue_pairs = xdp_qp; 3275 3276 if (prog) { 3277 vi->xdp_enabled = true; 3278 for (i = 0; i < vi->max_queue_pairs; i++) { 3279 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 3280 if (i == 0 && !old_prog) 3281 virtnet_clear_guest_offloads(vi); 3282 } 3283 } else { 3284 vi->xdp_enabled = false; 3285 } 3286 3287 for (i = 0; i < vi->max_queue_pairs; i++) { 3288 if (old_prog) 3289 bpf_prog_put(old_prog); 3290 if (netif_running(dev)) { 3291 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 3292 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 3293 &vi->sq[i].napi); 3294 } 3295 } 3296 3297 return 0; 3298 3299 err: 3300 if (!prog) { 3301 virtnet_clear_guest_offloads(vi); 3302 for (i = 0; i < vi->max_queue_pairs; i++) 3303 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); 3304 } 3305 3306 if (netif_running(dev)) { 3307 for (i = 0; i < vi->max_queue_pairs; i++) { 3308 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 3309 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 3310 &vi->sq[i].napi); 3311 } 3312 } 3313 if (prog) 3314 bpf_prog_sub(prog, vi->max_queue_pairs - 1); 3315 return err; 3316 } 3317 3318 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) 3319 { 3320 switch (xdp->command) { 3321 case XDP_SETUP_PROG: 3322 return virtnet_xdp_set(dev, xdp->prog, xdp->extack); 3323 default: 3324 return -EINVAL; 3325 } 3326 } 3327 3328 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, 3329 size_t len) 3330 { 3331 struct virtnet_info *vi = netdev_priv(dev); 3332 int ret; 3333 3334 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 3335 return -EOPNOTSUPP; 3336 3337 ret = snprintf(buf, len, "sby"); 3338 if (ret >= len) 3339 return -EOPNOTSUPP; 3340 3341 return 0; 3342 } 3343 3344 static int virtnet_set_features(struct net_device *dev, 3345 netdev_features_t features) 3346 { 3347 struct virtnet_info *vi = netdev_priv(dev); 3348 u64 offloads; 3349 int err; 3350 3351 if ((dev->features ^ features) & NETIF_F_GRO_HW) { 3352 if (vi->xdp_enabled) 3353 return -EBUSY; 3354 3355 if (features & NETIF_F_GRO_HW) 3356 offloads = vi->guest_offloads_capable; 3357 else 3358 offloads = vi->guest_offloads_capable & 3359 ~GUEST_OFFLOAD_GRO_HW_MASK; 3360 3361 err = virtnet_set_guest_offloads(vi, offloads); 3362 if (err) 3363 return err; 3364 vi->guest_offloads = offloads; 3365 } 3366 3367 if ((dev->features ^ features) & NETIF_F_RXHASH) { 3368 if (features & NETIF_F_RXHASH) 3369 vi->ctrl->rss.hash_types = vi->rss_hash_types_saved; 3370 else 3371 vi->ctrl->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE; 3372 3373 if (!virtnet_commit_rss_command(vi)) 3374 return -EINVAL; 3375 } 3376 3377 return 0; 3378 } 3379 3380 static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) 3381 { 3382 struct virtnet_info *priv = netdev_priv(dev); 3383 struct send_queue *sq = &priv->sq[txqueue]; 3384 struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); 3385 3386 u64_stats_update_begin(&sq->stats.syncp); 3387 sq->stats.tx_timeouts++; 3388 u64_stats_update_end(&sq->stats.syncp); 3389 3390 netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", 3391 txqueue, sq->name, sq->vq->index, sq->vq->name, 3392 jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); 3393 } 3394 3395 static const struct net_device_ops virtnet_netdev = { 3396 .ndo_open = virtnet_open, 3397 .ndo_stop = virtnet_close, 3398 .ndo_start_xmit = start_xmit, 3399 .ndo_validate_addr = eth_validate_addr, 3400 .ndo_set_mac_address = virtnet_set_mac_address, 3401 .ndo_set_rx_mode = virtnet_set_rx_mode, 3402 .ndo_get_stats64 = virtnet_stats, 3403 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 3404 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 3405 .ndo_bpf = virtnet_xdp, 3406 .ndo_xdp_xmit = virtnet_xdp_xmit, 3407 .ndo_features_check = passthru_features_check, 3408 .ndo_get_phys_port_name = virtnet_get_phys_port_name, 3409 .ndo_set_features = virtnet_set_features, 3410 .ndo_tx_timeout = virtnet_tx_timeout, 3411 }; 3412 3413 static void virtnet_config_changed_work(struct work_struct *work) 3414 { 3415 struct virtnet_info *vi = 3416 container_of(work, struct virtnet_info, config_work); 3417 u16 v; 3418 3419 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, 3420 struct virtio_net_config, status, &v) < 0) 3421 return; 3422 3423 if (v & VIRTIO_NET_S_ANNOUNCE) { 3424 netdev_notify_peers(vi->dev); 3425 virtnet_ack_link_announce(vi); 3426 } 3427 3428 /* Ignore unknown (future) status bits */ 3429 v &= VIRTIO_NET_S_LINK_UP; 3430 3431 if (vi->status == v) 3432 return; 3433 3434 vi->status = v; 3435 3436 if (vi->status & VIRTIO_NET_S_LINK_UP) { 3437 virtnet_update_settings(vi); 3438 netif_carrier_on(vi->dev); 3439 netif_tx_wake_all_queues(vi->dev); 3440 } else { 3441 netif_carrier_off(vi->dev); 3442 netif_tx_stop_all_queues(vi->dev); 3443 } 3444 } 3445 3446 static void virtnet_config_changed(struct virtio_device *vdev) 3447 { 3448 struct virtnet_info *vi = vdev->priv; 3449 3450 schedule_work(&vi->config_work); 3451 } 3452 3453 static void virtnet_free_queues(struct virtnet_info *vi) 3454 { 3455 int i; 3456 3457 for (i = 0; i < vi->max_queue_pairs; i++) { 3458 __netif_napi_del(&vi->rq[i].napi); 3459 __netif_napi_del(&vi->sq[i].napi); 3460 } 3461 3462 /* We called __netif_napi_del(), 3463 * we need to respect an RCU grace period before freeing vi->rq 3464 */ 3465 synchronize_net(); 3466 3467 kfree(vi->rq); 3468 kfree(vi->sq); 3469 kfree(vi->ctrl); 3470 } 3471 3472 static void _free_receive_bufs(struct virtnet_info *vi) 3473 { 3474 struct bpf_prog *old_prog; 3475 int i; 3476 3477 for (i = 0; i < vi->max_queue_pairs; i++) { 3478 while (vi->rq[i].pages) 3479 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); 3480 3481 old_prog = rtnl_dereference(vi->rq[i].xdp_prog); 3482 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); 3483 if (old_prog) 3484 bpf_prog_put(old_prog); 3485 } 3486 } 3487 3488 static void free_receive_bufs(struct virtnet_info *vi) 3489 { 3490 rtnl_lock(); 3491 _free_receive_bufs(vi); 3492 rtnl_unlock(); 3493 } 3494 3495 static void free_receive_page_frags(struct virtnet_info *vi) 3496 { 3497 int i; 3498 for (i = 0; i < vi->max_queue_pairs; i++) 3499 if (vi->rq[i].alloc_frag.page) 3500 put_page(vi->rq[i].alloc_frag.page); 3501 } 3502 3503 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) 3504 { 3505 if (!is_xdp_frame(buf)) 3506 dev_kfree_skb(buf); 3507 else 3508 xdp_return_frame(ptr_to_xdp(buf)); 3509 } 3510 3511 static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf) 3512 { 3513 struct virtnet_info *vi = vq->vdev->priv; 3514 int i = vq2rxq(vq); 3515 3516 if (vi->mergeable_rx_bufs) 3517 put_page(virt_to_head_page(buf)); 3518 else if (vi->big_packets) 3519 give_pages(&vi->rq[i], buf); 3520 else 3521 put_page(virt_to_head_page(buf)); 3522 } 3523 3524 static void free_unused_bufs(struct virtnet_info *vi) 3525 { 3526 void *buf; 3527 int i; 3528 3529 for (i = 0; i < vi->max_queue_pairs; i++) { 3530 struct virtqueue *vq = vi->sq[i].vq; 3531 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 3532 virtnet_sq_free_unused_buf(vq, buf); 3533 } 3534 3535 for (i = 0; i < vi->max_queue_pairs; i++) { 3536 struct virtqueue *vq = vi->rq[i].vq; 3537 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 3538 virtnet_rq_free_unused_buf(vq, buf); 3539 } 3540 } 3541 3542 static void virtnet_del_vqs(struct virtnet_info *vi) 3543 { 3544 struct virtio_device *vdev = vi->vdev; 3545 3546 virtnet_clean_affinity(vi); 3547 3548 vdev->config->del_vqs(vdev); 3549 3550 virtnet_free_queues(vi); 3551 } 3552 3553 /* How large should a single buffer be so a queue full of these can fit at 3554 * least one full packet? 3555 * Logic below assumes the mergeable buffer header is used. 3556 */ 3557 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) 3558 { 3559 const unsigned int hdr_len = vi->hdr_len; 3560 unsigned int rq_size = virtqueue_get_vring_size(vq); 3561 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; 3562 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; 3563 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); 3564 3565 return max(max(min_buf_len, hdr_len) - hdr_len, 3566 (unsigned int)GOOD_PACKET_LEN); 3567 } 3568 3569 static int virtnet_find_vqs(struct virtnet_info *vi) 3570 { 3571 vq_callback_t **callbacks; 3572 struct virtqueue **vqs; 3573 int ret = -ENOMEM; 3574 int i, total_vqs; 3575 const char **names; 3576 bool *ctx; 3577 3578 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by 3579 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by 3580 * possible control vq. 3581 */ 3582 total_vqs = vi->max_queue_pairs * 2 + 3583 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); 3584 3585 /* Allocate space for find_vqs parameters */ 3586 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL); 3587 if (!vqs) 3588 goto err_vq; 3589 callbacks = kmalloc_array(total_vqs, sizeof(*callbacks), GFP_KERNEL); 3590 if (!callbacks) 3591 goto err_callback; 3592 names = kmalloc_array(total_vqs, sizeof(*names), GFP_KERNEL); 3593 if (!names) 3594 goto err_names; 3595 if (!vi->big_packets || vi->mergeable_rx_bufs) { 3596 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL); 3597 if (!ctx) 3598 goto err_ctx; 3599 } else { 3600 ctx = NULL; 3601 } 3602 3603 /* Parameters for control virtqueue, if any */ 3604 if (vi->has_cvq) { 3605 callbacks[total_vqs - 1] = NULL; 3606 names[total_vqs - 1] = "control"; 3607 } 3608 3609 /* Allocate/initialize parameters for send/receive virtqueues */ 3610 for (i = 0; i < vi->max_queue_pairs; i++) { 3611 callbacks[rxq2vq(i)] = skb_recv_done; 3612 callbacks[txq2vq(i)] = skb_xmit_done; 3613 sprintf(vi->rq[i].name, "input.%d", i); 3614 sprintf(vi->sq[i].name, "output.%d", i); 3615 names[rxq2vq(i)] = vi->rq[i].name; 3616 names[txq2vq(i)] = vi->sq[i].name; 3617 if (ctx) 3618 ctx[rxq2vq(i)] = true; 3619 } 3620 3621 ret = virtio_find_vqs_ctx(vi->vdev, total_vqs, vqs, callbacks, 3622 names, ctx, NULL); 3623 if (ret) 3624 goto err_find; 3625 3626 if (vi->has_cvq) { 3627 vi->cvq = vqs[total_vqs - 1]; 3628 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 3629 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 3630 } 3631 3632 for (i = 0; i < vi->max_queue_pairs; i++) { 3633 vi->rq[i].vq = vqs[rxq2vq(i)]; 3634 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); 3635 vi->sq[i].vq = vqs[txq2vq(i)]; 3636 } 3637 3638 /* run here: ret == 0. */ 3639 3640 3641 err_find: 3642 kfree(ctx); 3643 err_ctx: 3644 kfree(names); 3645 err_names: 3646 kfree(callbacks); 3647 err_callback: 3648 kfree(vqs); 3649 err_vq: 3650 return ret; 3651 } 3652 3653 static int virtnet_alloc_queues(struct virtnet_info *vi) 3654 { 3655 int i; 3656 3657 if (vi->has_cvq) { 3658 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); 3659 if (!vi->ctrl) 3660 goto err_ctrl; 3661 } else { 3662 vi->ctrl = NULL; 3663 } 3664 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); 3665 if (!vi->sq) 3666 goto err_sq; 3667 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL); 3668 if (!vi->rq) 3669 goto err_rq; 3670 3671 INIT_DELAYED_WORK(&vi->refill, refill_work); 3672 for (i = 0; i < vi->max_queue_pairs; i++) { 3673 vi->rq[i].pages = NULL; 3674 netif_napi_add_weight(vi->dev, &vi->rq[i].napi, virtnet_poll, 3675 napi_weight); 3676 netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, 3677 virtnet_poll_tx, 3678 napi_tx ? napi_weight : 0); 3679 3680 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 3681 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); 3682 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 3683 3684 u64_stats_init(&vi->rq[i].stats.syncp); 3685 u64_stats_init(&vi->sq[i].stats.syncp); 3686 } 3687 3688 return 0; 3689 3690 err_rq: 3691 kfree(vi->sq); 3692 err_sq: 3693 kfree(vi->ctrl); 3694 err_ctrl: 3695 return -ENOMEM; 3696 } 3697 3698 static int init_vqs(struct virtnet_info *vi) 3699 { 3700 int ret; 3701 3702 /* Allocate send & receive queues */ 3703 ret = virtnet_alloc_queues(vi); 3704 if (ret) 3705 goto err; 3706 3707 ret = virtnet_find_vqs(vi); 3708 if (ret) 3709 goto err_free; 3710 3711 cpus_read_lock(); 3712 virtnet_set_affinity(vi); 3713 cpus_read_unlock(); 3714 3715 return 0; 3716 3717 err_free: 3718 virtnet_free_queues(vi); 3719 err: 3720 return ret; 3721 } 3722 3723 #ifdef CONFIG_SYSFS 3724 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, 3725 char *buf) 3726 { 3727 struct virtnet_info *vi = netdev_priv(queue->dev); 3728 unsigned int queue_index = get_netdev_rx_queue_index(queue); 3729 unsigned int headroom = virtnet_get_headroom(vi); 3730 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 3731 struct ewma_pkt_len *avg; 3732 3733 BUG_ON(queue_index >= vi->max_queue_pairs); 3734 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 3735 return sprintf(buf, "%u\n", 3736 get_mergeable_buf_len(&vi->rq[queue_index], avg, 3737 SKB_DATA_ALIGN(headroom + tailroom))); 3738 } 3739 3740 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 3741 __ATTR_RO(mergeable_rx_buffer_size); 3742 3743 static struct attribute *virtio_net_mrg_rx_attrs[] = { 3744 &mergeable_rx_buffer_size_attribute.attr, 3745 NULL 3746 }; 3747 3748 static const struct attribute_group virtio_net_mrg_rx_group = { 3749 .name = "virtio_net", 3750 .attrs = virtio_net_mrg_rx_attrs 3751 }; 3752 #endif 3753 3754 static bool virtnet_fail_on_feature(struct virtio_device *vdev, 3755 unsigned int fbit, 3756 const char *fname, const char *dname) 3757 { 3758 if (!virtio_has_feature(vdev, fbit)) 3759 return false; 3760 3761 dev_err(&vdev->dev, "device advertises feature %s but not %s", 3762 fname, dname); 3763 3764 return true; 3765 } 3766 3767 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ 3768 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) 3769 3770 static bool virtnet_validate_features(struct virtio_device *vdev) 3771 { 3772 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && 3773 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, 3774 "VIRTIO_NET_F_CTRL_VQ") || 3775 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, 3776 "VIRTIO_NET_F_CTRL_VQ") || 3777 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, 3778 "VIRTIO_NET_F_CTRL_VQ") || 3779 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || 3780 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, 3781 "VIRTIO_NET_F_CTRL_VQ") || 3782 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, 3783 "VIRTIO_NET_F_CTRL_VQ") || 3784 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, 3785 "VIRTIO_NET_F_CTRL_VQ") || 3786 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, 3787 "VIRTIO_NET_F_CTRL_VQ"))) { 3788 return false; 3789 } 3790 3791 return true; 3792 } 3793 3794 #define MIN_MTU ETH_MIN_MTU 3795 #define MAX_MTU ETH_MAX_MTU 3796 3797 static int virtnet_validate(struct virtio_device *vdev) 3798 { 3799 if (!vdev->config->get) { 3800 dev_err(&vdev->dev, "%s failure: config access disabled\n", 3801 __func__); 3802 return -EINVAL; 3803 } 3804 3805 if (!virtnet_validate_features(vdev)) 3806 return -EINVAL; 3807 3808 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 3809 int mtu = virtio_cread16(vdev, 3810 offsetof(struct virtio_net_config, 3811 mtu)); 3812 if (mtu < MIN_MTU) 3813 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); 3814 } 3815 3816 return 0; 3817 } 3818 3819 static bool virtnet_check_guest_gso(const struct virtnet_info *vi) 3820 { 3821 return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 3822 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 3823 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 3824 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 3825 (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && 3826 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); 3827 } 3828 3829 static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) 3830 { 3831 bool guest_gso = virtnet_check_guest_gso(vi); 3832 3833 /* If device can receive ANY guest GSO packets, regardless of mtu, 3834 * allocate packets of maximum size, otherwise limit it to only 3835 * mtu size worth only. 3836 */ 3837 if (mtu > ETH_DATA_LEN || guest_gso) { 3838 vi->big_packets = true; 3839 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); 3840 } 3841 } 3842 3843 static int virtnet_probe(struct virtio_device *vdev) 3844 { 3845 int i, err = -ENOMEM; 3846 struct net_device *dev; 3847 struct virtnet_info *vi; 3848 u16 max_queue_pairs; 3849 int mtu = 0; 3850 3851 /* Find if host supports multiqueue/rss virtio_net device */ 3852 max_queue_pairs = 1; 3853 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 3854 max_queue_pairs = 3855 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); 3856 3857 /* We need at least 2 queue's */ 3858 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 3859 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 3860 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 3861 max_queue_pairs = 1; 3862 3863 /* Allocate ourselves a network device with room for our info */ 3864 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); 3865 if (!dev) 3866 return -ENOMEM; 3867 3868 /* Set up network device as normal. */ 3869 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | 3870 IFF_TX_SKB_NO_LINEAR; 3871 dev->netdev_ops = &virtnet_netdev; 3872 dev->features = NETIF_F_HIGHDMA; 3873 3874 dev->ethtool_ops = &virtnet_ethtool_ops; 3875 SET_NETDEV_DEV(dev, &vdev->dev); 3876 3877 /* Do we support "hardware" checksums? */ 3878 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 3879 /* This opens up the world of extra features. */ 3880 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; 3881 if (csum) 3882 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; 3883 3884 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 3885 dev->hw_features |= NETIF_F_TSO 3886 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 3887 } 3888 /* Individual feature bits: what can host handle? */ 3889 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 3890 dev->hw_features |= NETIF_F_TSO; 3891 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 3892 dev->hw_features |= NETIF_F_TSO6; 3893 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 3894 dev->hw_features |= NETIF_F_TSO_ECN; 3895 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) 3896 dev->hw_features |= NETIF_F_GSO_UDP_L4; 3897 3898 dev->features |= NETIF_F_GSO_ROBUST; 3899 3900 if (gso) 3901 dev->features |= dev->hw_features & NETIF_F_ALL_TSO; 3902 /* (!csum && gso) case will be fixed by register_netdev() */ 3903 } 3904 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM)) 3905 dev->features |= NETIF_F_RXCSUM; 3906 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 3907 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) 3908 dev->features |= NETIF_F_GRO_HW; 3909 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) 3910 dev->hw_features |= NETIF_F_GRO_HW; 3911 3912 dev->vlan_features = dev->features; 3913 3914 /* MTU range: 68 - 65535 */ 3915 dev->min_mtu = MIN_MTU; 3916 dev->max_mtu = MAX_MTU; 3917 3918 /* Configuration may specify what MAC to use. Otherwise random. */ 3919 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 3920 u8 addr[ETH_ALEN]; 3921 3922 virtio_cread_bytes(vdev, 3923 offsetof(struct virtio_net_config, mac), 3924 addr, ETH_ALEN); 3925 eth_hw_addr_set(dev, addr); 3926 } else { 3927 eth_hw_addr_random(dev); 3928 } 3929 3930 /* Set up our device-specific information */ 3931 vi = netdev_priv(dev); 3932 vi->dev = dev; 3933 vi->vdev = vdev; 3934 vdev->priv = vi; 3935 3936 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 3937 spin_lock_init(&vi->refill_lock); 3938 3939 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) 3940 vi->mergeable_rx_bufs = true; 3941 3942 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 3943 vi->rx_usecs = 0; 3944 vi->tx_usecs = 0; 3945 vi->tx_max_packets = 0; 3946 vi->rx_max_packets = 0; 3947 } 3948 3949 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) 3950 vi->has_rss_hash_report = true; 3951 3952 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 3953 vi->has_rss = true; 3954 3955 if (vi->has_rss || vi->has_rss_hash_report) { 3956 vi->rss_indir_table_size = 3957 virtio_cread16(vdev, offsetof(struct virtio_net_config, 3958 rss_max_indirection_table_length)); 3959 vi->rss_key_size = 3960 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); 3961 3962 vi->rss_hash_types_supported = 3963 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); 3964 vi->rss_hash_types_supported &= 3965 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | 3966 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 3967 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); 3968 3969 dev->hw_features |= NETIF_F_RXHASH; 3970 } 3971 3972 if (vi->has_rss_hash_report) 3973 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); 3974 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || 3975 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 3976 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 3977 else 3978 vi->hdr_len = sizeof(struct virtio_net_hdr); 3979 3980 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || 3981 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 3982 vi->any_header_sg = true; 3983 3984 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 3985 vi->has_cvq = true; 3986 3987 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 3988 mtu = virtio_cread16(vdev, 3989 offsetof(struct virtio_net_config, 3990 mtu)); 3991 if (mtu < dev->min_mtu) { 3992 /* Should never trigger: MTU was previously validated 3993 * in virtnet_validate. 3994 */ 3995 dev_err(&vdev->dev, 3996 "device MTU appears to have changed it is now %d < %d", 3997 mtu, dev->min_mtu); 3998 err = -EINVAL; 3999 goto free; 4000 } 4001 4002 dev->mtu = mtu; 4003 dev->max_mtu = mtu; 4004 } 4005 4006 virtnet_set_big_packets(vi, mtu); 4007 4008 if (vi->any_header_sg) 4009 dev->needed_headroom = vi->hdr_len; 4010 4011 /* Enable multiqueue by default */ 4012 if (num_online_cpus() >= max_queue_pairs) 4013 vi->curr_queue_pairs = max_queue_pairs; 4014 else 4015 vi->curr_queue_pairs = num_online_cpus(); 4016 vi->max_queue_pairs = max_queue_pairs; 4017 4018 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ 4019 err = init_vqs(vi); 4020 if (err) 4021 goto free; 4022 4023 #ifdef CONFIG_SYSFS 4024 if (vi->mergeable_rx_bufs) 4025 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; 4026 #endif 4027 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 4028 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 4029 4030 virtnet_init_settings(dev); 4031 4032 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 4033 vi->failover = net_failover_create(vi->dev); 4034 if (IS_ERR(vi->failover)) { 4035 err = PTR_ERR(vi->failover); 4036 goto free_vqs; 4037 } 4038 } 4039 4040 if (vi->has_rss || vi->has_rss_hash_report) 4041 virtnet_init_default_rss(vi); 4042 4043 /* serialize netdev register + virtio_device_ready() with ndo_open() */ 4044 rtnl_lock(); 4045 4046 err = register_netdevice(dev); 4047 if (err) { 4048 pr_debug("virtio_net: registering device failed\n"); 4049 rtnl_unlock(); 4050 goto free_failover; 4051 } 4052 4053 virtio_device_ready(vdev); 4054 4055 rtnl_unlock(); 4056 4057 err = virtnet_cpu_notif_add(vi); 4058 if (err) { 4059 pr_debug("virtio_net: registering cpu notifier failed\n"); 4060 goto free_unregister_netdev; 4061 } 4062 4063 virtnet_set_queues(vi, vi->curr_queue_pairs); 4064 4065 /* Assume link up if device can't report link status, 4066 otherwise get link status from config. */ 4067 netif_carrier_off(dev); 4068 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 4069 schedule_work(&vi->config_work); 4070 } else { 4071 vi->status = VIRTIO_NET_S_LINK_UP; 4072 virtnet_update_settings(vi); 4073 netif_carrier_on(dev); 4074 } 4075 4076 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) 4077 if (virtio_has_feature(vi->vdev, guest_offloads[i])) 4078 set_bit(guest_offloads[i], &vi->guest_offloads); 4079 vi->guest_offloads_capable = vi->guest_offloads; 4080 4081 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", 4082 dev->name, max_queue_pairs); 4083 4084 return 0; 4085 4086 free_unregister_netdev: 4087 unregister_netdev(dev); 4088 free_failover: 4089 net_failover_destroy(vi->failover); 4090 free_vqs: 4091 virtio_reset_device(vdev); 4092 cancel_delayed_work_sync(&vi->refill); 4093 free_receive_page_frags(vi); 4094 virtnet_del_vqs(vi); 4095 free: 4096 free_netdev(dev); 4097 return err; 4098 } 4099 4100 static void remove_vq_common(struct virtnet_info *vi) 4101 { 4102 virtio_reset_device(vi->vdev); 4103 4104 /* Free unused buffers in both send and recv, if any. */ 4105 free_unused_bufs(vi); 4106 4107 free_receive_bufs(vi); 4108 4109 free_receive_page_frags(vi); 4110 4111 virtnet_del_vqs(vi); 4112 } 4113 4114 static void virtnet_remove(struct virtio_device *vdev) 4115 { 4116 struct virtnet_info *vi = vdev->priv; 4117 4118 virtnet_cpu_notif_remove(vi); 4119 4120 /* Make sure no work handler is accessing the device. */ 4121 flush_work(&vi->config_work); 4122 4123 unregister_netdev(vi->dev); 4124 4125 net_failover_destroy(vi->failover); 4126 4127 remove_vq_common(vi); 4128 4129 free_netdev(vi->dev); 4130 } 4131 4132 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) 4133 { 4134 struct virtnet_info *vi = vdev->priv; 4135 4136 virtnet_cpu_notif_remove(vi); 4137 virtnet_freeze_down(vdev); 4138 remove_vq_common(vi); 4139 4140 return 0; 4141 } 4142 4143 static __maybe_unused int virtnet_restore(struct virtio_device *vdev) 4144 { 4145 struct virtnet_info *vi = vdev->priv; 4146 int err; 4147 4148 err = virtnet_restore_up(vdev); 4149 if (err) 4150 return err; 4151 virtnet_set_queues(vi, vi->curr_queue_pairs); 4152 4153 err = virtnet_cpu_notif_add(vi); 4154 if (err) { 4155 virtnet_freeze_down(vdev); 4156 remove_vq_common(vi); 4157 return err; 4158 } 4159 4160 return 0; 4161 } 4162 4163 static struct virtio_device_id id_table[] = { 4164 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 4165 { 0 }, 4166 }; 4167 4168 #define VIRTNET_FEATURES \ 4169 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ 4170 VIRTIO_NET_F_MAC, \ 4171 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ 4172 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ 4173 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ 4174 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ 4175 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ 4176 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ 4177 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ 4178 VIRTIO_NET_F_CTRL_MAC_ADDR, \ 4179 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ 4180 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ 4181 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL 4182 4183 static unsigned int features[] = { 4184 VIRTNET_FEATURES, 4185 }; 4186 4187 static unsigned int features_legacy[] = { 4188 VIRTNET_FEATURES, 4189 VIRTIO_NET_F_GSO, 4190 VIRTIO_F_ANY_LAYOUT, 4191 }; 4192 4193 static struct virtio_driver virtio_net_driver = { 4194 .feature_table = features, 4195 .feature_table_size = ARRAY_SIZE(features), 4196 .feature_table_legacy = features_legacy, 4197 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 4198 .driver.name = KBUILD_MODNAME, 4199 .driver.owner = THIS_MODULE, 4200 .id_table = id_table, 4201 .validate = virtnet_validate, 4202 .probe = virtnet_probe, 4203 .remove = virtnet_remove, 4204 .config_changed = virtnet_config_changed, 4205 #ifdef CONFIG_PM_SLEEP 4206 .freeze = virtnet_freeze, 4207 .restore = virtnet_restore, 4208 #endif 4209 }; 4210 4211 static __init int virtio_net_driver_init(void) 4212 { 4213 int ret; 4214 4215 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", 4216 virtnet_cpu_online, 4217 virtnet_cpu_down_prep); 4218 if (ret < 0) 4219 goto out; 4220 virtionet_online = ret; 4221 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", 4222 NULL, virtnet_cpu_dead); 4223 if (ret) 4224 goto err_dead; 4225 ret = register_virtio_driver(&virtio_net_driver); 4226 if (ret) 4227 goto err_virtio; 4228 return 0; 4229 err_virtio: 4230 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 4231 err_dead: 4232 cpuhp_remove_multi_state(virtionet_online); 4233 out: 4234 return ret; 4235 } 4236 module_init(virtio_net_driver_init); 4237 4238 static __exit void virtio_net_driver_exit(void) 4239 { 4240 unregister_virtio_driver(&virtio_net_driver); 4241 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 4242 cpuhp_remove_multi_state(virtionet_online); 4243 } 4244 module_exit(virtio_net_driver_exit); 4245 4246 MODULE_DEVICE_TABLE(virtio, id_table); 4247 MODULE_DESCRIPTION("Virtio network driver"); 4248 MODULE_LICENSE("GPL"); 4249