1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* A network driver using virtio. 3 * 4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 5 */ 6 //#define DEBUG 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/module.h> 11 #include <linux/virtio.h> 12 #include <linux/virtio_net.h> 13 #include <linux/bpf.h> 14 #include <linux/bpf_trace.h> 15 #include <linux/scatterlist.h> 16 #include <linux/if_vlan.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/average.h> 20 #include <linux/filter.h> 21 #include <linux/kernel.h> 22 #include <linux/dim.h> 23 #include <net/route.h> 24 #include <net/xdp.h> 25 #include <net/net_failover.h> 26 #include <net/netdev_rx_queue.h> 27 #include <net/netdev_queues.h> 28 #include <net/xdp_sock_drv.h> 29 30 static int napi_weight = NAPI_POLL_WEIGHT; 31 module_param(napi_weight, int, 0444); 32 33 static bool csum = true, gso = true, napi_tx = true; 34 module_param(csum, bool, 0444); 35 module_param(gso, bool, 0444); 36 module_param(napi_tx, bool, 0644); 37 38 /* FIXME: MTU in config. */ 39 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 40 #define GOOD_COPY_LEN 128 41 42 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 43 44 /* Separating two types of XDP xmit */ 45 #define VIRTIO_XDP_TX BIT(0) 46 #define VIRTIO_XDP_REDIR BIT(1) 47 48 #define VIRTIO_XDP_FLAG BIT(0) 49 #define VIRTIO_ORPHAN_FLAG BIT(1) 50 51 /* RX packet size EWMA. The average packet size is used to determine the packet 52 * buffer size when refilling RX rings. As the entire RX ring may be refilled 53 * at once, the weight is chosen so that the EWMA will be insensitive to short- 54 * term, transient changes in packet size. 55 */ 56 DECLARE_EWMA(pkt_len, 0, 64) 57 58 #define VIRTNET_DRIVER_VERSION "1.0.0" 59 60 static const unsigned long guest_offloads[] = { 61 VIRTIO_NET_F_GUEST_TSO4, 62 VIRTIO_NET_F_GUEST_TSO6, 63 VIRTIO_NET_F_GUEST_ECN, 64 VIRTIO_NET_F_GUEST_UFO, 65 VIRTIO_NET_F_GUEST_CSUM, 66 VIRTIO_NET_F_GUEST_USO4, 67 VIRTIO_NET_F_GUEST_USO6, 68 VIRTIO_NET_F_GUEST_HDRLEN 69 }; 70 71 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 72 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 73 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 74 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ 75 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ 76 (1ULL << VIRTIO_NET_F_GUEST_USO6)) 77 78 struct virtnet_stat_desc { 79 char desc[ETH_GSTRING_LEN]; 80 size_t offset; 81 size_t qstat_offset; 82 }; 83 84 struct virtnet_sq_free_stats { 85 u64 packets; 86 u64 bytes; 87 u64 napi_packets; 88 u64 napi_bytes; 89 }; 90 91 struct virtnet_sq_stats { 92 struct u64_stats_sync syncp; 93 u64_stats_t packets; 94 u64_stats_t bytes; 95 u64_stats_t xdp_tx; 96 u64_stats_t xdp_tx_drops; 97 u64_stats_t kicks; 98 u64_stats_t tx_timeouts; 99 u64_stats_t stop; 100 u64_stats_t wake; 101 }; 102 103 struct virtnet_rq_stats { 104 struct u64_stats_sync syncp; 105 u64_stats_t packets; 106 u64_stats_t bytes; 107 u64_stats_t drops; 108 u64_stats_t xdp_packets; 109 u64_stats_t xdp_tx; 110 u64_stats_t xdp_redirects; 111 u64_stats_t xdp_drops; 112 u64_stats_t kicks; 113 }; 114 115 #define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1} 116 #define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1} 117 118 #define VIRTNET_SQ_STAT_QSTAT(name, m) \ 119 { \ 120 name, \ 121 offsetof(struct virtnet_sq_stats, m), \ 122 offsetof(struct netdev_queue_stats_tx, m), \ 123 } 124 125 #define VIRTNET_RQ_STAT_QSTAT(name, m) \ 126 { \ 127 name, \ 128 offsetof(struct virtnet_rq_stats, m), \ 129 offsetof(struct netdev_queue_stats_rx, m), \ 130 } 131 132 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { 133 VIRTNET_SQ_STAT("xdp_tx", xdp_tx), 134 VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops), 135 VIRTNET_SQ_STAT("kicks", kicks), 136 VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts), 137 }; 138 139 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { 140 VIRTNET_RQ_STAT("drops", drops), 141 VIRTNET_RQ_STAT("xdp_packets", xdp_packets), 142 VIRTNET_RQ_STAT("xdp_tx", xdp_tx), 143 VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects), 144 VIRTNET_RQ_STAT("xdp_drops", xdp_drops), 145 VIRTNET_RQ_STAT("kicks", kicks), 146 }; 147 148 static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = { 149 VIRTNET_SQ_STAT_QSTAT("packets", packets), 150 VIRTNET_SQ_STAT_QSTAT("bytes", bytes), 151 VIRTNET_SQ_STAT_QSTAT("stop", stop), 152 VIRTNET_SQ_STAT_QSTAT("wake", wake), 153 }; 154 155 static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = { 156 VIRTNET_RQ_STAT_QSTAT("packets", packets), 157 VIRTNET_RQ_STAT_QSTAT("bytes", bytes), 158 }; 159 160 #define VIRTNET_STATS_DESC_CQ(name) \ 161 {#name, offsetof(struct virtio_net_stats_cvq, name), -1} 162 163 #define VIRTNET_STATS_DESC_RX(class, name) \ 164 {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1} 165 166 #define VIRTNET_STATS_DESC_TX(class, name) \ 167 {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1} 168 169 170 static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = { 171 VIRTNET_STATS_DESC_CQ(command_num), 172 VIRTNET_STATS_DESC_CQ(ok_num), 173 }; 174 175 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { 176 VIRTNET_STATS_DESC_RX(basic, packets), 177 VIRTNET_STATS_DESC_RX(basic, bytes), 178 179 VIRTNET_STATS_DESC_RX(basic, notifications), 180 VIRTNET_STATS_DESC_RX(basic, interrupts), 181 }; 182 183 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = { 184 VIRTNET_STATS_DESC_TX(basic, packets), 185 VIRTNET_STATS_DESC_TX(basic, bytes), 186 187 VIRTNET_STATS_DESC_TX(basic, notifications), 188 VIRTNET_STATS_DESC_TX(basic, interrupts), 189 }; 190 191 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = { 192 VIRTNET_STATS_DESC_RX(csum, needs_csum), 193 }; 194 195 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = { 196 VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg), 197 VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg), 198 }; 199 200 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = { 201 VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes), 202 }; 203 204 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = { 205 VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes), 206 }; 207 208 #define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \ 209 { \ 210 #name, \ 211 offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \ 212 offsetof(struct netdev_queue_stats_rx, qstat_field), \ 213 } 214 215 #define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \ 216 { \ 217 #name, \ 218 offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \ 219 offsetof(struct netdev_queue_stats_tx, qstat_field), \ 220 } 221 222 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = { 223 VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops), 224 VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns), 225 }; 226 227 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = { 228 VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops), 229 VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors), 230 }; 231 232 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = { 233 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary), 234 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none), 235 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad), 236 }; 237 238 static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = { 239 VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none), 240 VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum), 241 }; 242 243 static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = { 244 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets), 245 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes), 246 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets), 247 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes), 248 }; 249 250 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = { 251 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets), 252 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes), 253 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets), 254 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes), 255 }; 256 257 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = { 258 VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 259 }; 260 261 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = { 262 VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 263 }; 264 265 #define VIRTNET_Q_TYPE_RX 0 266 #define VIRTNET_Q_TYPE_TX 1 267 #define VIRTNET_Q_TYPE_CQ 2 268 269 struct virtnet_interrupt_coalesce { 270 u32 max_packets; 271 u32 max_usecs; 272 }; 273 274 /* The dma information of pages allocated at a time. */ 275 struct virtnet_rq_dma { 276 dma_addr_t addr; 277 u32 ref; 278 u16 len; 279 u16 need_sync; 280 }; 281 282 /* Internal representation of a send virtqueue */ 283 struct send_queue { 284 /* Virtqueue associated with this send _queue */ 285 struct virtqueue *vq; 286 287 /* TX: fragments + linear part + virtio header */ 288 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 289 290 /* Name of the send queue: output.$index */ 291 char name[16]; 292 293 struct virtnet_sq_stats stats; 294 295 struct virtnet_interrupt_coalesce intr_coal; 296 297 struct napi_struct napi; 298 299 /* Record whether sq is in reset state. */ 300 bool reset; 301 }; 302 303 /* Internal representation of a receive virtqueue */ 304 struct receive_queue { 305 /* Virtqueue associated with this receive_queue */ 306 struct virtqueue *vq; 307 308 struct napi_struct napi; 309 310 struct bpf_prog __rcu *xdp_prog; 311 312 struct virtnet_rq_stats stats; 313 314 /* The number of rx notifications */ 315 u16 calls; 316 317 /* Is dynamic interrupt moderation enabled? */ 318 bool dim_enabled; 319 320 /* Used to protect dim_enabled and inter_coal */ 321 struct mutex dim_lock; 322 323 /* Dynamic Interrupt Moderation */ 324 struct dim dim; 325 326 u32 packets_in_napi; 327 328 struct virtnet_interrupt_coalesce intr_coal; 329 330 /* Chain pages by the private ptr. */ 331 struct page *pages; 332 333 /* Average packet length for mergeable receive buffers. */ 334 struct ewma_pkt_len mrg_avg_pkt_len; 335 336 /* Page frag for packet buffer allocation. */ 337 struct page_frag alloc_frag; 338 339 /* RX: fragments + linear part + virtio header */ 340 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 341 342 /* Min single buffer size for mergeable buffers case. */ 343 unsigned int min_buf_len; 344 345 /* Name of this receive queue: input.$index */ 346 char name[16]; 347 348 struct xdp_rxq_info xdp_rxq; 349 350 /* Record the last dma info to free after new pages is allocated. */ 351 struct virtnet_rq_dma *last_dma; 352 353 struct xsk_buff_pool *xsk_pool; 354 355 /* xdp rxq used by xsk */ 356 struct xdp_rxq_info xsk_rxq_info; 357 358 struct xdp_buff **xsk_buffs; 359 }; 360 361 /* This structure can contain rss message with maximum settings for indirection table and keysize 362 * Note, that default structure that describes RSS configuration virtio_net_rss_config 363 * contains same info but can't handle table values. 364 * In any case, structure would be passed to virtio hw through sg_buf split by parts 365 * because table sizes may be differ according to the device configuration. 366 */ 367 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 368 #define VIRTIO_NET_RSS_MAX_TABLE_LEN 128 369 struct virtio_net_ctrl_rss { 370 u32 hash_types; 371 u16 indirection_table_mask; 372 u16 unclassified_queue; 373 u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN]; 374 u16 max_tx_vq; 375 u8 hash_key_length; 376 u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; 377 }; 378 379 /* Control VQ buffers: protected by the rtnl lock */ 380 struct control_buf { 381 struct virtio_net_ctrl_hdr hdr; 382 virtio_net_ctrl_ack status; 383 }; 384 385 struct virtnet_info { 386 struct virtio_device *vdev; 387 struct virtqueue *cvq; 388 struct net_device *dev; 389 struct send_queue *sq; 390 struct receive_queue *rq; 391 unsigned int status; 392 393 /* Max # of queue pairs supported by the device */ 394 u16 max_queue_pairs; 395 396 /* # of queue pairs currently used by the driver */ 397 u16 curr_queue_pairs; 398 399 /* # of XDP queue pairs currently used by the driver */ 400 u16 xdp_queue_pairs; 401 402 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ 403 bool xdp_enabled; 404 405 /* I like... big packets and I cannot lie! */ 406 bool big_packets; 407 408 /* number of sg entries allocated for big packets */ 409 unsigned int big_packets_num_skbfrags; 410 411 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 412 bool mergeable_rx_bufs; 413 414 /* Host supports rss and/or hash report */ 415 bool has_rss; 416 bool has_rss_hash_report; 417 u8 rss_key_size; 418 u16 rss_indir_table_size; 419 u32 rss_hash_types_supported; 420 u32 rss_hash_types_saved; 421 struct virtio_net_ctrl_rss rss; 422 423 /* Has control virtqueue */ 424 bool has_cvq; 425 426 /* Lock to protect the control VQ */ 427 struct mutex cvq_lock; 428 429 /* Host can handle any s/g split between our header and packet data */ 430 bool any_header_sg; 431 432 /* Packet virtio header size */ 433 u8 hdr_len; 434 435 /* Work struct for delayed refilling if we run low on memory. */ 436 struct delayed_work refill; 437 438 /* Is delayed refill enabled? */ 439 bool refill_enabled; 440 441 /* The lock to synchronize the access to refill_enabled */ 442 spinlock_t refill_lock; 443 444 /* Work struct for config space updates */ 445 struct work_struct config_work; 446 447 /* Work struct for setting rx mode */ 448 struct work_struct rx_mode_work; 449 450 /* OK to queue work setting RX mode? */ 451 bool rx_mode_work_enabled; 452 453 /* Does the affinity hint is set for virtqueues? */ 454 bool affinity_hint_set; 455 456 /* CPU hotplug instances for online & dead */ 457 struct hlist_node node; 458 struct hlist_node node_dead; 459 460 struct control_buf *ctrl; 461 462 /* Ethtool settings */ 463 u8 duplex; 464 u32 speed; 465 466 /* Is rx dynamic interrupt moderation enabled? */ 467 bool rx_dim_enabled; 468 469 /* Interrupt coalescing settings */ 470 struct virtnet_interrupt_coalesce intr_coal_tx; 471 struct virtnet_interrupt_coalesce intr_coal_rx; 472 473 unsigned long guest_offloads; 474 unsigned long guest_offloads_capable; 475 476 /* failover when STANDBY feature enabled */ 477 struct failover *failover; 478 479 u64 device_stats_cap; 480 }; 481 482 struct padded_vnet_hdr { 483 struct virtio_net_hdr_v1_hash hdr; 484 /* 485 * hdr is in a separate sg buffer, and data sg buffer shares same page 486 * with this header sg. This padding makes next sg 16 byte aligned 487 * after the header. 488 */ 489 char padding[12]; 490 }; 491 492 struct virtio_net_common_hdr { 493 union { 494 struct virtio_net_hdr hdr; 495 struct virtio_net_hdr_mrg_rxbuf mrg_hdr; 496 struct virtio_net_hdr_v1_hash hash_v1_hdr; 497 }; 498 }; 499 500 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); 501 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 502 struct net_device *dev, 503 unsigned int *xdp_xmit, 504 struct virtnet_rq_stats *stats); 505 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 506 struct sk_buff *skb, u8 flags); 507 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 508 struct sk_buff *curr_skb, 509 struct page *page, void *buf, 510 int len, int truesize); 511 512 static bool is_xdp_frame(void *ptr) 513 { 514 return (unsigned long)ptr & VIRTIO_XDP_FLAG; 515 } 516 517 static void *xdp_to_ptr(struct xdp_frame *ptr) 518 { 519 return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); 520 } 521 522 static struct xdp_frame *ptr_to_xdp(void *ptr) 523 { 524 return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); 525 } 526 527 static bool is_orphan_skb(void *ptr) 528 { 529 return (unsigned long)ptr & VIRTIO_ORPHAN_FLAG; 530 } 531 532 static void *skb_to_ptr(struct sk_buff *skb, bool orphan) 533 { 534 return (void *)((unsigned long)skb | (orphan ? VIRTIO_ORPHAN_FLAG : 0)); 535 } 536 537 static struct sk_buff *ptr_to_skb(void *ptr) 538 { 539 return (struct sk_buff *)((unsigned long)ptr & ~VIRTIO_ORPHAN_FLAG); 540 } 541 542 static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 543 bool in_napi, struct virtnet_sq_free_stats *stats) 544 { 545 unsigned int len; 546 void *ptr; 547 548 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 549 if (!is_xdp_frame(ptr)) { 550 struct sk_buff *skb = ptr_to_skb(ptr); 551 552 pr_debug("Sent skb %p\n", skb); 553 554 if (is_orphan_skb(ptr)) { 555 stats->packets++; 556 stats->bytes += skb->len; 557 } else { 558 stats->napi_packets++; 559 stats->napi_bytes += skb->len; 560 } 561 napi_consume_skb(skb, in_napi); 562 } else { 563 struct xdp_frame *frame = ptr_to_xdp(ptr); 564 565 stats->packets++; 566 stats->bytes += xdp_get_frame_len(frame); 567 xdp_return_frame(frame); 568 } 569 } 570 netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes); 571 } 572 573 /* Converting between virtqueue no. and kernel tx/rx queue no. 574 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq 575 */ 576 static int vq2txq(struct virtqueue *vq) 577 { 578 return (vq->index - 1) / 2; 579 } 580 581 static int txq2vq(int txq) 582 { 583 return txq * 2 + 1; 584 } 585 586 static int vq2rxq(struct virtqueue *vq) 587 { 588 return vq->index / 2; 589 } 590 591 static int rxq2vq(int rxq) 592 { 593 return rxq * 2; 594 } 595 596 static int vq_type(struct virtnet_info *vi, int qid) 597 { 598 if (qid == vi->max_queue_pairs * 2) 599 return VIRTNET_Q_TYPE_CQ; 600 601 if (qid % 2) 602 return VIRTNET_Q_TYPE_TX; 603 604 return VIRTNET_Q_TYPE_RX; 605 } 606 607 static inline struct virtio_net_common_hdr * 608 skb_vnet_common_hdr(struct sk_buff *skb) 609 { 610 return (struct virtio_net_common_hdr *)skb->cb; 611 } 612 613 /* 614 * private is used to chain pages for big packets, put the whole 615 * most recent used list in the beginning for reuse 616 */ 617 static void give_pages(struct receive_queue *rq, struct page *page) 618 { 619 struct page *end; 620 621 /* Find end of list, sew whole thing into vi->rq.pages. */ 622 for (end = page; end->private; end = (struct page *)end->private); 623 end->private = (unsigned long)rq->pages; 624 rq->pages = page; 625 } 626 627 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) 628 { 629 struct page *p = rq->pages; 630 631 if (p) { 632 rq->pages = (struct page *)p->private; 633 /* clear private here, it is used to chain pages */ 634 p->private = 0; 635 } else 636 p = alloc_page(gfp_mask); 637 return p; 638 } 639 640 static void virtnet_rq_free_buf(struct virtnet_info *vi, 641 struct receive_queue *rq, void *buf) 642 { 643 if (vi->mergeable_rx_bufs) 644 put_page(virt_to_head_page(buf)); 645 else if (vi->big_packets) 646 give_pages(rq, buf); 647 else 648 put_page(virt_to_head_page(buf)); 649 } 650 651 static void enable_delayed_refill(struct virtnet_info *vi) 652 { 653 spin_lock_bh(&vi->refill_lock); 654 vi->refill_enabled = true; 655 spin_unlock_bh(&vi->refill_lock); 656 } 657 658 static void disable_delayed_refill(struct virtnet_info *vi) 659 { 660 spin_lock_bh(&vi->refill_lock); 661 vi->refill_enabled = false; 662 spin_unlock_bh(&vi->refill_lock); 663 } 664 665 static void enable_rx_mode_work(struct virtnet_info *vi) 666 { 667 rtnl_lock(); 668 vi->rx_mode_work_enabled = true; 669 rtnl_unlock(); 670 } 671 672 static void disable_rx_mode_work(struct virtnet_info *vi) 673 { 674 rtnl_lock(); 675 vi->rx_mode_work_enabled = false; 676 rtnl_unlock(); 677 } 678 679 static void virtqueue_napi_schedule(struct napi_struct *napi, 680 struct virtqueue *vq) 681 { 682 if (napi_schedule_prep(napi)) { 683 virtqueue_disable_cb(vq); 684 __napi_schedule(napi); 685 } 686 } 687 688 static bool virtqueue_napi_complete(struct napi_struct *napi, 689 struct virtqueue *vq, int processed) 690 { 691 int opaque; 692 693 opaque = virtqueue_enable_cb_prepare(vq); 694 if (napi_complete_done(napi, processed)) { 695 if (unlikely(virtqueue_poll(vq, opaque))) 696 virtqueue_napi_schedule(napi, vq); 697 else 698 return true; 699 } else { 700 virtqueue_disable_cb(vq); 701 } 702 703 return false; 704 } 705 706 static void skb_xmit_done(struct virtqueue *vq) 707 { 708 struct virtnet_info *vi = vq->vdev->priv; 709 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; 710 711 /* Suppress further interrupts. */ 712 virtqueue_disable_cb(vq); 713 714 if (napi->weight) 715 virtqueue_napi_schedule(napi, vq); 716 else 717 /* We were probably waiting for more output buffers. */ 718 netif_wake_subqueue(vi->dev, vq2txq(vq)); 719 } 720 721 #define MRG_CTX_HEADER_SHIFT 22 722 static void *mergeable_len_to_ctx(unsigned int truesize, 723 unsigned int headroom) 724 { 725 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); 726 } 727 728 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) 729 { 730 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; 731 } 732 733 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) 734 { 735 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); 736 } 737 738 static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, 739 unsigned int headroom, 740 unsigned int len) 741 { 742 struct sk_buff *skb; 743 744 skb = build_skb(buf, buflen); 745 if (unlikely(!skb)) 746 return NULL; 747 748 skb_reserve(skb, headroom); 749 skb_put(skb, len); 750 751 return skb; 752 } 753 754 /* Called from bottom half context */ 755 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 756 struct receive_queue *rq, 757 struct page *page, unsigned int offset, 758 unsigned int len, unsigned int truesize, 759 unsigned int headroom) 760 { 761 struct sk_buff *skb; 762 struct virtio_net_common_hdr *hdr; 763 unsigned int copy, hdr_len, hdr_padded_len; 764 struct page *page_to_free = NULL; 765 int tailroom, shinfo_size; 766 char *p, *hdr_p, *buf; 767 768 p = page_address(page) + offset; 769 hdr_p = p; 770 771 hdr_len = vi->hdr_len; 772 if (vi->mergeable_rx_bufs) 773 hdr_padded_len = hdr_len; 774 else 775 hdr_padded_len = sizeof(struct padded_vnet_hdr); 776 777 buf = p - headroom; 778 len -= hdr_len; 779 offset += hdr_padded_len; 780 p += hdr_padded_len; 781 tailroom = truesize - headroom - hdr_padded_len - len; 782 783 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 784 785 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { 786 skb = virtnet_build_skb(buf, truesize, p - buf, len); 787 if (unlikely(!skb)) 788 return NULL; 789 790 page = (struct page *)page->private; 791 if (page) 792 give_pages(rq, page); 793 goto ok; 794 } 795 796 /* copy small packet so we can reuse these pages for small data */ 797 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); 798 if (unlikely(!skb)) 799 return NULL; 800 801 /* Copy all frame if it fits skb->head, otherwise 802 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. 803 */ 804 if (len <= skb_tailroom(skb)) 805 copy = len; 806 else 807 copy = ETH_HLEN; 808 skb_put_data(skb, p, copy); 809 810 len -= copy; 811 offset += copy; 812 813 if (vi->mergeable_rx_bufs) { 814 if (len) 815 skb_add_rx_frag(skb, 0, page, offset, len, truesize); 816 else 817 page_to_free = page; 818 goto ok; 819 } 820 821 /* 822 * Verify that we can indeed put this data into a skb. 823 * This is here to handle cases when the device erroneously 824 * tries to receive more than is possible. This is usually 825 * the case of a broken device. 826 */ 827 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { 828 net_dbg_ratelimited("%s: too much data\n", skb->dev->name); 829 dev_kfree_skb(skb); 830 return NULL; 831 } 832 BUG_ON(offset >= PAGE_SIZE); 833 while (len) { 834 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); 835 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, 836 frag_size, truesize); 837 len -= frag_size; 838 page = (struct page *)page->private; 839 offset = 0; 840 } 841 842 if (page) 843 give_pages(rq, page); 844 845 ok: 846 hdr = skb_vnet_common_hdr(skb); 847 memcpy(hdr, hdr_p, hdr_len); 848 if (page_to_free) 849 put_page(page_to_free); 850 851 return skb; 852 } 853 854 static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) 855 { 856 struct page *page = virt_to_head_page(buf); 857 struct virtnet_rq_dma *dma; 858 void *head; 859 int offset; 860 861 head = page_address(page); 862 863 dma = head; 864 865 --dma->ref; 866 867 if (dma->need_sync && len) { 868 offset = buf - (head + sizeof(*dma)); 869 870 virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, 871 offset, len, 872 DMA_FROM_DEVICE); 873 } 874 875 if (dma->ref) 876 return; 877 878 virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, 879 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 880 put_page(page); 881 } 882 883 static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) 884 { 885 void *buf; 886 887 buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); 888 if (buf) 889 virtnet_rq_unmap(rq, buf, *len); 890 891 return buf; 892 } 893 894 static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) 895 { 896 struct virtnet_rq_dma *dma; 897 dma_addr_t addr; 898 u32 offset; 899 void *head; 900 901 head = page_address(rq->alloc_frag.page); 902 903 offset = buf - head; 904 905 dma = head; 906 907 addr = dma->addr - sizeof(*dma) + offset; 908 909 sg_init_table(rq->sg, 1); 910 rq->sg[0].dma_address = addr; 911 rq->sg[0].length = len; 912 } 913 914 static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) 915 { 916 struct page_frag *alloc_frag = &rq->alloc_frag; 917 struct virtnet_rq_dma *dma; 918 void *buf, *head; 919 dma_addr_t addr; 920 921 if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp))) 922 return NULL; 923 924 head = page_address(alloc_frag->page); 925 926 dma = head; 927 928 /* new pages */ 929 if (!alloc_frag->offset) { 930 if (rq->last_dma) { 931 /* Now, the new page is allocated, the last dma 932 * will not be used. So the dma can be unmapped 933 * if the ref is 0. 934 */ 935 virtnet_rq_unmap(rq, rq->last_dma, 0); 936 rq->last_dma = NULL; 937 } 938 939 dma->len = alloc_frag->size - sizeof(*dma); 940 941 addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, 942 dma->len, DMA_FROM_DEVICE, 0); 943 if (virtqueue_dma_mapping_error(rq->vq, addr)) 944 return NULL; 945 946 dma->addr = addr; 947 dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); 948 949 /* Add a reference to dma to prevent the entire dma from 950 * being released during error handling. This reference 951 * will be freed after the pages are no longer used. 952 */ 953 get_page(alloc_frag->page); 954 dma->ref = 1; 955 alloc_frag->offset = sizeof(*dma); 956 957 rq->last_dma = dma; 958 } 959 960 ++dma->ref; 961 962 buf = head + alloc_frag->offset; 963 964 get_page(alloc_frag->page); 965 alloc_frag->offset += size; 966 967 return buf; 968 } 969 970 static void virtnet_rq_set_premapped(struct virtnet_info *vi) 971 { 972 int i; 973 974 /* disable for big mode */ 975 if (!vi->mergeable_rx_bufs && vi->big_packets) 976 return; 977 978 for (i = 0; i < vi->max_queue_pairs; i++) 979 /* error should never happen */ 980 BUG_ON(virtqueue_set_dma_premapped(vi->rq[i].vq)); 981 } 982 983 static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) 984 { 985 struct virtnet_info *vi = vq->vdev->priv; 986 struct receive_queue *rq; 987 int i = vq2rxq(vq); 988 989 rq = &vi->rq[i]; 990 991 if (rq->xsk_pool) { 992 xsk_buff_free((struct xdp_buff *)buf); 993 return; 994 } 995 996 if (!vi->big_packets || vi->mergeable_rx_bufs) 997 virtnet_rq_unmap(rq, buf, 0); 998 999 virtnet_rq_free_buf(vi, rq, buf); 1000 } 1001 1002 static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 1003 bool in_napi) 1004 { 1005 struct virtnet_sq_free_stats stats = {0}; 1006 1007 __free_old_xmit(sq, txq, in_napi, &stats); 1008 1009 /* Avoid overhead when no packets have been processed 1010 * happens when called speculatively from start_xmit. 1011 */ 1012 if (!stats.packets && !stats.napi_packets) 1013 return; 1014 1015 u64_stats_update_begin(&sq->stats.syncp); 1016 u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes); 1017 u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets); 1018 u64_stats_update_end(&sq->stats.syncp); 1019 } 1020 1021 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) 1022 { 1023 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) 1024 return false; 1025 else if (q < vi->curr_queue_pairs) 1026 return true; 1027 else 1028 return false; 1029 } 1030 1031 static void check_sq_full_and_disable(struct virtnet_info *vi, 1032 struct net_device *dev, 1033 struct send_queue *sq) 1034 { 1035 bool use_napi = sq->napi.weight; 1036 int qnum; 1037 1038 qnum = sq - vi->sq; 1039 1040 /* If running out of space, stop queue to avoid getting packets that we 1041 * are then unable to transmit. 1042 * An alternative would be to force queuing layer to requeue the skb by 1043 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be 1044 * returned in a normal path of operation: it means that driver is not 1045 * maintaining the TX queue stop/start state properly, and causes 1046 * the stack to do a non-trivial amount of useless work. 1047 * Since most packets only take 1 or 2 ring slots, stopping the queue 1048 * early means 16 slots are typically wasted. 1049 */ 1050 if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { 1051 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1052 1053 netif_tx_stop_queue(txq); 1054 u64_stats_update_begin(&sq->stats.syncp); 1055 u64_stats_inc(&sq->stats.stop); 1056 u64_stats_update_end(&sq->stats.syncp); 1057 if (use_napi) { 1058 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 1059 virtqueue_napi_schedule(&sq->napi, sq->vq); 1060 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 1061 /* More just got used, free them then recheck. */ 1062 free_old_xmit(sq, txq, false); 1063 if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { 1064 netif_start_subqueue(dev, qnum); 1065 u64_stats_update_begin(&sq->stats.syncp); 1066 u64_stats_inc(&sq->stats.wake); 1067 u64_stats_update_end(&sq->stats.syncp); 1068 virtqueue_disable_cb(sq->vq); 1069 } 1070 } 1071 } 1072 } 1073 1074 static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len) 1075 { 1076 sg->dma_address = addr; 1077 sg->length = len; 1078 } 1079 1080 static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, 1081 struct receive_queue *rq, void *buf, u32 len) 1082 { 1083 struct xdp_buff *xdp; 1084 u32 bufsize; 1085 1086 xdp = (struct xdp_buff *)buf; 1087 1088 bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len; 1089 1090 if (unlikely(len > bufsize)) { 1091 pr_debug("%s: rx error: len %u exceeds truesize %u\n", 1092 vi->dev->name, len, bufsize); 1093 DEV_STATS_INC(vi->dev, rx_length_errors); 1094 xsk_buff_free(xdp); 1095 return NULL; 1096 } 1097 1098 xsk_buff_set_size(xdp, len); 1099 xsk_buff_dma_sync_for_cpu(xdp); 1100 1101 return xdp; 1102 } 1103 1104 static struct sk_buff *xsk_construct_skb(struct receive_queue *rq, 1105 struct xdp_buff *xdp) 1106 { 1107 unsigned int metasize = xdp->data - xdp->data_meta; 1108 struct sk_buff *skb; 1109 unsigned int size; 1110 1111 size = xdp->data_end - xdp->data_hard_start; 1112 skb = napi_alloc_skb(&rq->napi, size); 1113 if (unlikely(!skb)) { 1114 xsk_buff_free(xdp); 1115 return NULL; 1116 } 1117 1118 skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); 1119 1120 size = xdp->data_end - xdp->data_meta; 1121 memcpy(__skb_put(skb, size), xdp->data_meta, size); 1122 1123 if (metasize) { 1124 __skb_pull(skb, metasize); 1125 skb_metadata_set(skb, metasize); 1126 } 1127 1128 xsk_buff_free(xdp); 1129 1130 return skb; 1131 } 1132 1133 static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi, 1134 struct receive_queue *rq, struct xdp_buff *xdp, 1135 unsigned int *xdp_xmit, 1136 struct virtnet_rq_stats *stats) 1137 { 1138 struct bpf_prog *prog; 1139 u32 ret; 1140 1141 ret = XDP_PASS; 1142 rcu_read_lock(); 1143 prog = rcu_dereference(rq->xdp_prog); 1144 if (prog) 1145 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1146 rcu_read_unlock(); 1147 1148 switch (ret) { 1149 case XDP_PASS: 1150 return xsk_construct_skb(rq, xdp); 1151 1152 case XDP_TX: 1153 case XDP_REDIRECT: 1154 return NULL; 1155 1156 default: 1157 /* drop packet */ 1158 xsk_buff_free(xdp); 1159 u64_stats_inc(&stats->drops); 1160 return NULL; 1161 } 1162 } 1163 1164 static void xsk_drop_follow_bufs(struct net_device *dev, 1165 struct receive_queue *rq, 1166 u32 num_buf, 1167 struct virtnet_rq_stats *stats) 1168 { 1169 struct xdp_buff *xdp; 1170 u32 len; 1171 1172 while (num_buf-- > 1) { 1173 xdp = virtqueue_get_buf(rq->vq, &len); 1174 if (unlikely(!xdp)) { 1175 pr_debug("%s: rx error: %d buffers missing\n", 1176 dev->name, num_buf); 1177 DEV_STATS_INC(dev, rx_length_errors); 1178 break; 1179 } 1180 u64_stats_add(&stats->bytes, len); 1181 xsk_buff_free(xdp); 1182 } 1183 } 1184 1185 static int xsk_append_merge_buffer(struct virtnet_info *vi, 1186 struct receive_queue *rq, 1187 struct sk_buff *head_skb, 1188 u32 num_buf, 1189 struct virtio_net_hdr_mrg_rxbuf *hdr, 1190 struct virtnet_rq_stats *stats) 1191 { 1192 struct sk_buff *curr_skb; 1193 struct xdp_buff *xdp; 1194 u32 len, truesize; 1195 struct page *page; 1196 void *buf; 1197 1198 curr_skb = head_skb; 1199 1200 while (--num_buf) { 1201 buf = virtqueue_get_buf(rq->vq, &len); 1202 if (unlikely(!buf)) { 1203 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1204 vi->dev->name, num_buf, 1205 virtio16_to_cpu(vi->vdev, 1206 hdr->num_buffers)); 1207 DEV_STATS_INC(vi->dev, rx_length_errors); 1208 return -EINVAL; 1209 } 1210 1211 u64_stats_add(&stats->bytes, len); 1212 1213 xdp = buf_to_xdp(vi, rq, buf, len); 1214 if (!xdp) 1215 goto err; 1216 1217 buf = napi_alloc_frag(len); 1218 if (!buf) { 1219 xsk_buff_free(xdp); 1220 goto err; 1221 } 1222 1223 memcpy(buf, xdp->data - vi->hdr_len, len); 1224 1225 xsk_buff_free(xdp); 1226 1227 page = virt_to_page(buf); 1228 1229 truesize = len; 1230 1231 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 1232 buf, len, truesize); 1233 if (!curr_skb) { 1234 put_page(page); 1235 goto err; 1236 } 1237 } 1238 1239 return 0; 1240 1241 err: 1242 xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats); 1243 return -EINVAL; 1244 } 1245 1246 static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi, 1247 struct receive_queue *rq, struct xdp_buff *xdp, 1248 unsigned int *xdp_xmit, 1249 struct virtnet_rq_stats *stats) 1250 { 1251 struct virtio_net_hdr_mrg_rxbuf *hdr; 1252 struct bpf_prog *prog; 1253 struct sk_buff *skb; 1254 u32 ret, num_buf; 1255 1256 hdr = xdp->data - vi->hdr_len; 1257 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1258 1259 ret = XDP_PASS; 1260 rcu_read_lock(); 1261 prog = rcu_dereference(rq->xdp_prog); 1262 /* TODO: support multi buffer. */ 1263 if (prog && num_buf == 1) 1264 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1265 rcu_read_unlock(); 1266 1267 switch (ret) { 1268 case XDP_PASS: 1269 skb = xsk_construct_skb(rq, xdp); 1270 if (!skb) 1271 goto drop_bufs; 1272 1273 if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) { 1274 dev_kfree_skb(skb); 1275 goto drop; 1276 } 1277 1278 return skb; 1279 1280 case XDP_TX: 1281 case XDP_REDIRECT: 1282 return NULL; 1283 1284 default: 1285 /* drop packet */ 1286 xsk_buff_free(xdp); 1287 } 1288 1289 drop_bufs: 1290 xsk_drop_follow_bufs(dev, rq, num_buf, stats); 1291 1292 drop: 1293 u64_stats_inc(&stats->drops); 1294 return NULL; 1295 } 1296 1297 static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq, 1298 void *buf, u32 len, 1299 unsigned int *xdp_xmit, 1300 struct virtnet_rq_stats *stats) 1301 { 1302 struct net_device *dev = vi->dev; 1303 struct sk_buff *skb = NULL; 1304 struct xdp_buff *xdp; 1305 u8 flags; 1306 1307 len -= vi->hdr_len; 1308 1309 u64_stats_add(&stats->bytes, len); 1310 1311 xdp = buf_to_xdp(vi, rq, buf, len); 1312 if (!xdp) 1313 return; 1314 1315 if (unlikely(len < ETH_HLEN)) { 1316 pr_debug("%s: short packet %i\n", dev->name, len); 1317 DEV_STATS_INC(dev, rx_length_errors); 1318 xsk_buff_free(xdp); 1319 return; 1320 } 1321 1322 flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags; 1323 1324 if (!vi->mergeable_rx_bufs) 1325 skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats); 1326 else 1327 skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats); 1328 1329 if (skb) 1330 virtnet_receive_done(vi, rq, skb, flags); 1331 } 1332 1333 static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq, 1334 struct xsk_buff_pool *pool, gfp_t gfp) 1335 { 1336 struct xdp_buff **xsk_buffs; 1337 dma_addr_t addr; 1338 int err = 0; 1339 u32 len, i; 1340 int num; 1341 1342 xsk_buffs = rq->xsk_buffs; 1343 1344 num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free); 1345 if (!num) 1346 return -ENOMEM; 1347 1348 len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len; 1349 1350 for (i = 0; i < num; ++i) { 1351 /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space. 1352 * We assume XDP_PACKET_HEADROOM is larger than hdr->len. 1353 * (see function virtnet_xsk_pool_enable) 1354 */ 1355 addr = xsk_buff_xdp_get_dma(xsk_buffs[i]) - vi->hdr_len; 1356 1357 sg_init_table(rq->sg, 1); 1358 sg_fill_dma(rq->sg, addr, len); 1359 1360 err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, xsk_buffs[i], gfp); 1361 if (err) 1362 goto err; 1363 } 1364 1365 return num; 1366 1367 err: 1368 for (; i < num; ++i) 1369 xsk_buff_free(xsk_buffs[i]); 1370 1371 return err; 1372 } 1373 1374 static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) 1375 { 1376 struct virtnet_info *vi = netdev_priv(dev); 1377 struct send_queue *sq; 1378 1379 if (!netif_running(dev)) 1380 return -ENETDOWN; 1381 1382 if (qid >= vi->curr_queue_pairs) 1383 return -EINVAL; 1384 1385 sq = &vi->sq[qid]; 1386 1387 if (napi_if_scheduled_mark_missed(&sq->napi)) 1388 return 0; 1389 1390 local_bh_disable(); 1391 virtqueue_napi_schedule(&sq->napi, sq->vq); 1392 local_bh_enable(); 1393 1394 return 0; 1395 } 1396 1397 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, 1398 struct send_queue *sq, 1399 struct xdp_frame *xdpf) 1400 { 1401 struct virtio_net_hdr_mrg_rxbuf *hdr; 1402 struct skb_shared_info *shinfo; 1403 u8 nr_frags = 0; 1404 int err, i; 1405 1406 if (unlikely(xdpf->headroom < vi->hdr_len)) 1407 return -EOVERFLOW; 1408 1409 if (unlikely(xdp_frame_has_frags(xdpf))) { 1410 shinfo = xdp_get_shared_info_from_frame(xdpf); 1411 nr_frags = shinfo->nr_frags; 1412 } 1413 1414 /* In wrapping function virtnet_xdp_xmit(), we need to free 1415 * up the pending old buffers, where we need to calculate the 1416 * position of skb_shared_info in xdp_get_frame_len() and 1417 * xdp_return_frame(), which will involve to xdpf->data and 1418 * xdpf->headroom. Therefore, we need to update the value of 1419 * headroom synchronously here. 1420 */ 1421 xdpf->headroom -= vi->hdr_len; 1422 xdpf->data -= vi->hdr_len; 1423 /* Zero header and leave csum up to XDP layers */ 1424 hdr = xdpf->data; 1425 memset(hdr, 0, vi->hdr_len); 1426 xdpf->len += vi->hdr_len; 1427 1428 sg_init_table(sq->sg, nr_frags + 1); 1429 sg_set_buf(sq->sg, xdpf->data, xdpf->len); 1430 for (i = 0; i < nr_frags; i++) { 1431 skb_frag_t *frag = &shinfo->frags[i]; 1432 1433 sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), 1434 skb_frag_size(frag), skb_frag_off(frag)); 1435 } 1436 1437 err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1, 1438 xdp_to_ptr(xdpf), GFP_ATOMIC); 1439 if (unlikely(err)) 1440 return -ENOSPC; /* Caller handle free/refcnt */ 1441 1442 return 0; 1443 } 1444 1445 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on 1446 * the current cpu, so it does not need to be locked. 1447 * 1448 * Here we use marco instead of inline functions because we have to deal with 1449 * three issues at the same time: 1. the choice of sq. 2. judge and execute the 1450 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline 1451 * functions to perfectly solve these three problems at the same time. 1452 */ 1453 #define virtnet_xdp_get_sq(vi) ({ \ 1454 int cpu = smp_processor_id(); \ 1455 struct netdev_queue *txq; \ 1456 typeof(vi) v = (vi); \ 1457 unsigned int qp; \ 1458 \ 1459 if (v->curr_queue_pairs > nr_cpu_ids) { \ 1460 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ 1461 qp += cpu; \ 1462 txq = netdev_get_tx_queue(v->dev, qp); \ 1463 __netif_tx_acquire(txq); \ 1464 } else { \ 1465 qp = cpu % v->curr_queue_pairs; \ 1466 txq = netdev_get_tx_queue(v->dev, qp); \ 1467 __netif_tx_lock(txq, cpu); \ 1468 } \ 1469 v->sq + qp; \ 1470 }) 1471 1472 #define virtnet_xdp_put_sq(vi, q) { \ 1473 struct netdev_queue *txq; \ 1474 typeof(vi) v = (vi); \ 1475 \ 1476 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ 1477 if (v->curr_queue_pairs > nr_cpu_ids) \ 1478 __netif_tx_release(txq); \ 1479 else \ 1480 __netif_tx_unlock(txq); \ 1481 } 1482 1483 static int virtnet_xdp_xmit(struct net_device *dev, 1484 int n, struct xdp_frame **frames, u32 flags) 1485 { 1486 struct virtnet_info *vi = netdev_priv(dev); 1487 struct virtnet_sq_free_stats stats = {0}; 1488 struct receive_queue *rq = vi->rq; 1489 struct bpf_prog *xdp_prog; 1490 struct send_queue *sq; 1491 int nxmit = 0; 1492 int kicks = 0; 1493 int ret; 1494 int i; 1495 1496 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this 1497 * indicate XDP resources have been successfully allocated. 1498 */ 1499 xdp_prog = rcu_access_pointer(rq->xdp_prog); 1500 if (!xdp_prog) 1501 return -ENXIO; 1502 1503 sq = virtnet_xdp_get_sq(vi); 1504 1505 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 1506 ret = -EINVAL; 1507 goto out; 1508 } 1509 1510 /* Free up any pending old buffers before queueing new ones. */ 1511 __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), 1512 false, &stats); 1513 1514 for (i = 0; i < n; i++) { 1515 struct xdp_frame *xdpf = frames[i]; 1516 1517 if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) 1518 break; 1519 nxmit++; 1520 } 1521 ret = nxmit; 1522 1523 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1524 check_sq_full_and_disable(vi, dev, sq); 1525 1526 if (flags & XDP_XMIT_FLUSH) { 1527 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1528 kicks = 1; 1529 } 1530 out: 1531 u64_stats_update_begin(&sq->stats.syncp); 1532 u64_stats_add(&sq->stats.bytes, stats.bytes); 1533 u64_stats_add(&sq->stats.packets, stats.packets); 1534 u64_stats_add(&sq->stats.xdp_tx, n); 1535 u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit); 1536 u64_stats_add(&sq->stats.kicks, kicks); 1537 u64_stats_update_end(&sq->stats.syncp); 1538 1539 virtnet_xdp_put_sq(vi, sq); 1540 return ret; 1541 } 1542 1543 static void put_xdp_frags(struct xdp_buff *xdp) 1544 { 1545 struct skb_shared_info *shinfo; 1546 struct page *xdp_page; 1547 int i; 1548 1549 if (xdp_buff_has_frags(xdp)) { 1550 shinfo = xdp_get_shared_info_from_buff(xdp); 1551 for (i = 0; i < shinfo->nr_frags; i++) { 1552 xdp_page = skb_frag_page(&shinfo->frags[i]); 1553 put_page(xdp_page); 1554 } 1555 } 1556 } 1557 1558 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 1559 struct net_device *dev, 1560 unsigned int *xdp_xmit, 1561 struct virtnet_rq_stats *stats) 1562 { 1563 struct xdp_frame *xdpf; 1564 int err; 1565 u32 act; 1566 1567 act = bpf_prog_run_xdp(xdp_prog, xdp); 1568 u64_stats_inc(&stats->xdp_packets); 1569 1570 switch (act) { 1571 case XDP_PASS: 1572 return act; 1573 1574 case XDP_TX: 1575 u64_stats_inc(&stats->xdp_tx); 1576 xdpf = xdp_convert_buff_to_frame(xdp); 1577 if (unlikely(!xdpf)) { 1578 netdev_dbg(dev, "convert buff to frame failed for xdp\n"); 1579 return XDP_DROP; 1580 } 1581 1582 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 1583 if (unlikely(!err)) { 1584 xdp_return_frame_rx_napi(xdpf); 1585 } else if (unlikely(err < 0)) { 1586 trace_xdp_exception(dev, xdp_prog, act); 1587 return XDP_DROP; 1588 } 1589 *xdp_xmit |= VIRTIO_XDP_TX; 1590 return act; 1591 1592 case XDP_REDIRECT: 1593 u64_stats_inc(&stats->xdp_redirects); 1594 err = xdp_do_redirect(dev, xdp, xdp_prog); 1595 if (err) 1596 return XDP_DROP; 1597 1598 *xdp_xmit |= VIRTIO_XDP_REDIR; 1599 return act; 1600 1601 default: 1602 bpf_warn_invalid_xdp_action(dev, xdp_prog, act); 1603 fallthrough; 1604 case XDP_ABORTED: 1605 trace_xdp_exception(dev, xdp_prog, act); 1606 fallthrough; 1607 case XDP_DROP: 1608 return XDP_DROP; 1609 } 1610 } 1611 1612 static unsigned int virtnet_get_headroom(struct virtnet_info *vi) 1613 { 1614 return vi->xdp_enabled ? XDP_PACKET_HEADROOM : 0; 1615 } 1616 1617 /* We copy the packet for XDP in the following cases: 1618 * 1619 * 1) Packet is scattered across multiple rx buffers. 1620 * 2) Headroom space is insufficient. 1621 * 1622 * This is inefficient but it's a temporary condition that 1623 * we hit right after XDP is enabled and until queue is refilled 1624 * with large buffers with sufficient headroom - so it should affect 1625 * at most queue size packets. 1626 * Afterwards, the conditions to enable 1627 * XDP should preclude the underlying device from sending packets 1628 * across multiple buffers (num_buf > 1), and we make sure buffers 1629 * have enough headroom. 1630 */ 1631 static struct page *xdp_linearize_page(struct receive_queue *rq, 1632 int *num_buf, 1633 struct page *p, 1634 int offset, 1635 int page_off, 1636 unsigned int *len) 1637 { 1638 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1639 struct page *page; 1640 1641 if (page_off + *len + tailroom > PAGE_SIZE) 1642 return NULL; 1643 1644 page = alloc_page(GFP_ATOMIC); 1645 if (!page) 1646 return NULL; 1647 1648 memcpy(page_address(page) + page_off, page_address(p) + offset, *len); 1649 page_off += *len; 1650 1651 while (--*num_buf) { 1652 unsigned int buflen; 1653 void *buf; 1654 int off; 1655 1656 buf = virtnet_rq_get_buf(rq, &buflen, NULL); 1657 if (unlikely(!buf)) 1658 goto err_buf; 1659 1660 p = virt_to_head_page(buf); 1661 off = buf - page_address(p); 1662 1663 /* guard against a misconfigured or uncooperative backend that 1664 * is sending packet larger than the MTU. 1665 */ 1666 if ((page_off + buflen + tailroom) > PAGE_SIZE) { 1667 put_page(p); 1668 goto err_buf; 1669 } 1670 1671 memcpy(page_address(page) + page_off, 1672 page_address(p) + off, buflen); 1673 page_off += buflen; 1674 put_page(p); 1675 } 1676 1677 /* Headroom does not contribute to packet length */ 1678 *len = page_off - XDP_PACKET_HEADROOM; 1679 return page; 1680 err_buf: 1681 __free_pages(page, 0); 1682 return NULL; 1683 } 1684 1685 static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, 1686 unsigned int xdp_headroom, 1687 void *buf, 1688 unsigned int len) 1689 { 1690 unsigned int header_offset; 1691 unsigned int headroom; 1692 unsigned int buflen; 1693 struct sk_buff *skb; 1694 1695 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1696 headroom = vi->hdr_len + header_offset; 1697 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1698 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1699 1700 skb = virtnet_build_skb(buf, buflen, headroom, len); 1701 if (unlikely(!skb)) 1702 return NULL; 1703 1704 buf += header_offset; 1705 memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len); 1706 1707 return skb; 1708 } 1709 1710 static struct sk_buff *receive_small_xdp(struct net_device *dev, 1711 struct virtnet_info *vi, 1712 struct receive_queue *rq, 1713 struct bpf_prog *xdp_prog, 1714 void *buf, 1715 unsigned int xdp_headroom, 1716 unsigned int len, 1717 unsigned int *xdp_xmit, 1718 struct virtnet_rq_stats *stats) 1719 { 1720 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; 1721 unsigned int headroom = vi->hdr_len + header_offset; 1722 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; 1723 struct page *page = virt_to_head_page(buf); 1724 struct page *xdp_page; 1725 unsigned int buflen; 1726 struct xdp_buff xdp; 1727 struct sk_buff *skb; 1728 unsigned int metasize = 0; 1729 u32 act; 1730 1731 if (unlikely(hdr->hdr.gso_type)) 1732 goto err_xdp; 1733 1734 /* Partially checksummed packets must be dropped. */ 1735 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 1736 goto err_xdp; 1737 1738 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1739 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1740 1741 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 1742 int offset = buf - page_address(page) + header_offset; 1743 unsigned int tlen = len + vi->hdr_len; 1744 int num_buf = 1; 1745 1746 xdp_headroom = virtnet_get_headroom(vi); 1747 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1748 headroom = vi->hdr_len + header_offset; 1749 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1750 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1751 xdp_page = xdp_linearize_page(rq, &num_buf, page, 1752 offset, header_offset, 1753 &tlen); 1754 if (!xdp_page) 1755 goto err_xdp; 1756 1757 buf = page_address(xdp_page); 1758 put_page(page); 1759 page = xdp_page; 1760 } 1761 1762 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); 1763 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, 1764 xdp_headroom, len, true); 1765 1766 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 1767 1768 switch (act) { 1769 case XDP_PASS: 1770 /* Recalculate length in case bpf program changed it */ 1771 len = xdp.data_end - xdp.data; 1772 metasize = xdp.data - xdp.data_meta; 1773 break; 1774 1775 case XDP_TX: 1776 case XDP_REDIRECT: 1777 goto xdp_xmit; 1778 1779 default: 1780 goto err_xdp; 1781 } 1782 1783 skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len); 1784 if (unlikely(!skb)) 1785 goto err; 1786 1787 if (metasize) 1788 skb_metadata_set(skb, metasize); 1789 1790 return skb; 1791 1792 err_xdp: 1793 u64_stats_inc(&stats->xdp_drops); 1794 err: 1795 u64_stats_inc(&stats->drops); 1796 put_page(page); 1797 xdp_xmit: 1798 return NULL; 1799 } 1800 1801 static struct sk_buff *receive_small(struct net_device *dev, 1802 struct virtnet_info *vi, 1803 struct receive_queue *rq, 1804 void *buf, void *ctx, 1805 unsigned int len, 1806 unsigned int *xdp_xmit, 1807 struct virtnet_rq_stats *stats) 1808 { 1809 unsigned int xdp_headroom = (unsigned long)ctx; 1810 struct page *page = virt_to_head_page(buf); 1811 struct sk_buff *skb; 1812 1813 len -= vi->hdr_len; 1814 u64_stats_add(&stats->bytes, len); 1815 1816 if (unlikely(len > GOOD_PACKET_LEN)) { 1817 pr_debug("%s: rx error: len %u exceeds max size %d\n", 1818 dev->name, len, GOOD_PACKET_LEN); 1819 DEV_STATS_INC(dev, rx_length_errors); 1820 goto err; 1821 } 1822 1823 if (unlikely(vi->xdp_enabled)) { 1824 struct bpf_prog *xdp_prog; 1825 1826 rcu_read_lock(); 1827 xdp_prog = rcu_dereference(rq->xdp_prog); 1828 if (xdp_prog) { 1829 skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, 1830 xdp_headroom, len, xdp_xmit, 1831 stats); 1832 rcu_read_unlock(); 1833 return skb; 1834 } 1835 rcu_read_unlock(); 1836 } 1837 1838 skb = receive_small_build_skb(vi, xdp_headroom, buf, len); 1839 if (likely(skb)) 1840 return skb; 1841 1842 err: 1843 u64_stats_inc(&stats->drops); 1844 put_page(page); 1845 return NULL; 1846 } 1847 1848 static struct sk_buff *receive_big(struct net_device *dev, 1849 struct virtnet_info *vi, 1850 struct receive_queue *rq, 1851 void *buf, 1852 unsigned int len, 1853 struct virtnet_rq_stats *stats) 1854 { 1855 struct page *page = buf; 1856 struct sk_buff *skb = 1857 page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); 1858 1859 u64_stats_add(&stats->bytes, len - vi->hdr_len); 1860 if (unlikely(!skb)) 1861 goto err; 1862 1863 return skb; 1864 1865 err: 1866 u64_stats_inc(&stats->drops); 1867 give_pages(rq, page); 1868 return NULL; 1869 } 1870 1871 static void mergeable_buf_free(struct receive_queue *rq, int num_buf, 1872 struct net_device *dev, 1873 struct virtnet_rq_stats *stats) 1874 { 1875 struct page *page; 1876 void *buf; 1877 int len; 1878 1879 while (num_buf-- > 1) { 1880 buf = virtnet_rq_get_buf(rq, &len, NULL); 1881 if (unlikely(!buf)) { 1882 pr_debug("%s: rx error: %d buffers missing\n", 1883 dev->name, num_buf); 1884 DEV_STATS_INC(dev, rx_length_errors); 1885 break; 1886 } 1887 u64_stats_add(&stats->bytes, len); 1888 page = virt_to_head_page(buf); 1889 put_page(page); 1890 } 1891 } 1892 1893 /* Why not use xdp_build_skb_from_frame() ? 1894 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in 1895 * virtio-net there are 2 points that do not match its requirements: 1896 * 1. The size of the prefilled buffer is not fixed before xdp is set. 1897 * 2. xdp_build_skb_from_frame() does more checks that we don't need, 1898 * like eth_type_trans() (which virtio-net does in receive_buf()). 1899 */ 1900 static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, 1901 struct virtnet_info *vi, 1902 struct xdp_buff *xdp, 1903 unsigned int xdp_frags_truesz) 1904 { 1905 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 1906 unsigned int headroom, data_len; 1907 struct sk_buff *skb; 1908 int metasize; 1909 u8 nr_frags; 1910 1911 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 1912 pr_debug("Error building skb as missing reserved tailroom for xdp"); 1913 return NULL; 1914 } 1915 1916 if (unlikely(xdp_buff_has_frags(xdp))) 1917 nr_frags = sinfo->nr_frags; 1918 1919 skb = build_skb(xdp->data_hard_start, xdp->frame_sz); 1920 if (unlikely(!skb)) 1921 return NULL; 1922 1923 headroom = xdp->data - xdp->data_hard_start; 1924 data_len = xdp->data_end - xdp->data; 1925 skb_reserve(skb, headroom); 1926 __skb_put(skb, data_len); 1927 1928 metasize = xdp->data - xdp->data_meta; 1929 metasize = metasize > 0 ? metasize : 0; 1930 if (metasize) 1931 skb_metadata_set(skb, metasize); 1932 1933 if (unlikely(xdp_buff_has_frags(xdp))) 1934 xdp_update_skb_shared_info(skb, nr_frags, 1935 sinfo->xdp_frags_size, 1936 xdp_frags_truesz, 1937 xdp_buff_is_frag_pfmemalloc(xdp)); 1938 1939 return skb; 1940 } 1941 1942 /* TODO: build xdp in big mode */ 1943 static int virtnet_build_xdp_buff_mrg(struct net_device *dev, 1944 struct virtnet_info *vi, 1945 struct receive_queue *rq, 1946 struct xdp_buff *xdp, 1947 void *buf, 1948 unsigned int len, 1949 unsigned int frame_sz, 1950 int *num_buf, 1951 unsigned int *xdp_frags_truesize, 1952 struct virtnet_rq_stats *stats) 1953 { 1954 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 1955 unsigned int headroom, tailroom, room; 1956 unsigned int truesize, cur_frag_size; 1957 struct skb_shared_info *shinfo; 1958 unsigned int xdp_frags_truesz = 0; 1959 struct page *page; 1960 skb_frag_t *frag; 1961 int offset; 1962 void *ctx; 1963 1964 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 1965 xdp_prepare_buff(xdp, buf - XDP_PACKET_HEADROOM, 1966 XDP_PACKET_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); 1967 1968 if (!*num_buf) 1969 return 0; 1970 1971 if (*num_buf > 1) { 1972 /* If we want to build multi-buffer xdp, we need 1973 * to specify that the flags of xdp_buff have the 1974 * XDP_FLAGS_HAS_FRAG bit. 1975 */ 1976 if (!xdp_buff_has_frags(xdp)) 1977 xdp_buff_set_frags_flag(xdp); 1978 1979 shinfo = xdp_get_shared_info_from_buff(xdp); 1980 shinfo->nr_frags = 0; 1981 shinfo->xdp_frags_size = 0; 1982 } 1983 1984 if (*num_buf > MAX_SKB_FRAGS + 1) 1985 return -EINVAL; 1986 1987 while (--*num_buf > 0) { 1988 buf = virtnet_rq_get_buf(rq, &len, &ctx); 1989 if (unlikely(!buf)) { 1990 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1991 dev->name, *num_buf, 1992 virtio16_to_cpu(vi->vdev, hdr->num_buffers)); 1993 DEV_STATS_INC(dev, rx_length_errors); 1994 goto err; 1995 } 1996 1997 u64_stats_add(&stats->bytes, len); 1998 page = virt_to_head_page(buf); 1999 offset = buf - page_address(page); 2000 2001 truesize = mergeable_ctx_to_truesize(ctx); 2002 headroom = mergeable_ctx_to_headroom(ctx); 2003 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2004 room = SKB_DATA_ALIGN(headroom + tailroom); 2005 2006 cur_frag_size = truesize; 2007 xdp_frags_truesz += cur_frag_size; 2008 if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { 2009 put_page(page); 2010 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2011 dev->name, len, (unsigned long)(truesize - room)); 2012 DEV_STATS_INC(dev, rx_length_errors); 2013 goto err; 2014 } 2015 2016 frag = &shinfo->frags[shinfo->nr_frags++]; 2017 skb_frag_fill_page_desc(frag, page, offset, len); 2018 if (page_is_pfmemalloc(page)) 2019 xdp_buff_set_frag_pfmemalloc(xdp); 2020 2021 shinfo->xdp_frags_size += len; 2022 } 2023 2024 *xdp_frags_truesize = xdp_frags_truesz; 2025 return 0; 2026 2027 err: 2028 put_xdp_frags(xdp); 2029 return -EINVAL; 2030 } 2031 2032 static void *mergeable_xdp_get_buf(struct virtnet_info *vi, 2033 struct receive_queue *rq, 2034 struct bpf_prog *xdp_prog, 2035 void *ctx, 2036 unsigned int *frame_sz, 2037 int *num_buf, 2038 struct page **page, 2039 int offset, 2040 unsigned int *len, 2041 struct virtio_net_hdr_mrg_rxbuf *hdr) 2042 { 2043 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2044 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2045 struct page *xdp_page; 2046 unsigned int xdp_room; 2047 2048 /* Transient failure which in theory could occur if 2049 * in-flight packets from before XDP was enabled reach 2050 * the receive path after XDP is loaded. 2051 */ 2052 if (unlikely(hdr->hdr.gso_type)) 2053 return NULL; 2054 2055 /* Partially checksummed packets must be dropped. */ 2056 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 2057 return NULL; 2058 2059 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers 2060 * with headroom may add hole in truesize, which 2061 * make their length exceed PAGE_SIZE. So we disabled the 2062 * hole mechanism for xdp. See add_recvbuf_mergeable(). 2063 */ 2064 *frame_sz = truesize; 2065 2066 if (likely(headroom >= virtnet_get_headroom(vi) && 2067 (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) { 2068 return page_address(*page) + offset; 2069 } 2070 2071 /* This happens when headroom is not enough because 2072 * of the buffer was prefilled before XDP is set. 2073 * This should only happen for the first several packets. 2074 * In fact, vq reset can be used here to help us clean up 2075 * the prefilled buffers, but many existing devices do not 2076 * support it, and we don't want to bother users who are 2077 * using xdp normally. 2078 */ 2079 if (!xdp_prog->aux->xdp_has_frags) { 2080 /* linearize data for XDP */ 2081 xdp_page = xdp_linearize_page(rq, num_buf, 2082 *page, offset, 2083 XDP_PACKET_HEADROOM, 2084 len); 2085 if (!xdp_page) 2086 return NULL; 2087 } else { 2088 xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 2089 sizeof(struct skb_shared_info)); 2090 if (*len + xdp_room > PAGE_SIZE) 2091 return NULL; 2092 2093 xdp_page = alloc_page(GFP_ATOMIC); 2094 if (!xdp_page) 2095 return NULL; 2096 2097 memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM, 2098 page_address(*page) + offset, *len); 2099 } 2100 2101 *frame_sz = PAGE_SIZE; 2102 2103 put_page(*page); 2104 2105 *page = xdp_page; 2106 2107 return page_address(*page) + XDP_PACKET_HEADROOM; 2108 } 2109 2110 static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, 2111 struct virtnet_info *vi, 2112 struct receive_queue *rq, 2113 struct bpf_prog *xdp_prog, 2114 void *buf, 2115 void *ctx, 2116 unsigned int len, 2117 unsigned int *xdp_xmit, 2118 struct virtnet_rq_stats *stats) 2119 { 2120 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2121 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2122 struct page *page = virt_to_head_page(buf); 2123 int offset = buf - page_address(page); 2124 unsigned int xdp_frags_truesz = 0; 2125 struct sk_buff *head_skb; 2126 unsigned int frame_sz; 2127 struct xdp_buff xdp; 2128 void *data; 2129 u32 act; 2130 int err; 2131 2132 data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page, 2133 offset, &len, hdr); 2134 if (unlikely(!data)) 2135 goto err_xdp; 2136 2137 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, 2138 &num_buf, &xdp_frags_truesz, stats); 2139 if (unlikely(err)) 2140 goto err_xdp; 2141 2142 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 2143 2144 switch (act) { 2145 case XDP_PASS: 2146 head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); 2147 if (unlikely(!head_skb)) 2148 break; 2149 return head_skb; 2150 2151 case XDP_TX: 2152 case XDP_REDIRECT: 2153 return NULL; 2154 2155 default: 2156 break; 2157 } 2158 2159 put_xdp_frags(&xdp); 2160 2161 err_xdp: 2162 put_page(page); 2163 mergeable_buf_free(rq, num_buf, dev, stats); 2164 2165 u64_stats_inc(&stats->xdp_drops); 2166 u64_stats_inc(&stats->drops); 2167 return NULL; 2168 } 2169 2170 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 2171 struct sk_buff *curr_skb, 2172 struct page *page, void *buf, 2173 int len, int truesize) 2174 { 2175 int num_skb_frags; 2176 int offset; 2177 2178 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 2179 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 2180 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); 2181 2182 if (unlikely(!nskb)) 2183 return NULL; 2184 2185 if (curr_skb == head_skb) 2186 skb_shinfo(curr_skb)->frag_list = nskb; 2187 else 2188 curr_skb->next = nskb; 2189 curr_skb = nskb; 2190 head_skb->truesize += nskb->truesize; 2191 num_skb_frags = 0; 2192 } 2193 2194 if (curr_skb != head_skb) { 2195 head_skb->data_len += len; 2196 head_skb->len += len; 2197 head_skb->truesize += truesize; 2198 } 2199 2200 offset = buf - page_address(page); 2201 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 2202 put_page(page); 2203 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 2204 len, truesize); 2205 } else { 2206 skb_add_rx_frag(curr_skb, num_skb_frags, page, 2207 offset, len, truesize); 2208 } 2209 2210 return curr_skb; 2211 } 2212 2213 static struct sk_buff *receive_mergeable(struct net_device *dev, 2214 struct virtnet_info *vi, 2215 struct receive_queue *rq, 2216 void *buf, 2217 void *ctx, 2218 unsigned int len, 2219 unsigned int *xdp_xmit, 2220 struct virtnet_rq_stats *stats) 2221 { 2222 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2223 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2224 struct page *page = virt_to_head_page(buf); 2225 int offset = buf - page_address(page); 2226 struct sk_buff *head_skb, *curr_skb; 2227 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2228 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2229 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2230 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2231 2232 head_skb = NULL; 2233 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2234 2235 if (unlikely(len > truesize - room)) { 2236 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2237 dev->name, len, (unsigned long)(truesize - room)); 2238 DEV_STATS_INC(dev, rx_length_errors); 2239 goto err_skb; 2240 } 2241 2242 if (unlikely(vi->xdp_enabled)) { 2243 struct bpf_prog *xdp_prog; 2244 2245 rcu_read_lock(); 2246 xdp_prog = rcu_dereference(rq->xdp_prog); 2247 if (xdp_prog) { 2248 head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx, 2249 len, xdp_xmit, stats); 2250 rcu_read_unlock(); 2251 return head_skb; 2252 } 2253 rcu_read_unlock(); 2254 } 2255 2256 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); 2257 curr_skb = head_skb; 2258 2259 if (unlikely(!curr_skb)) 2260 goto err_skb; 2261 while (--num_buf) { 2262 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2263 if (unlikely(!buf)) { 2264 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2265 dev->name, num_buf, 2266 virtio16_to_cpu(vi->vdev, 2267 hdr->num_buffers)); 2268 DEV_STATS_INC(dev, rx_length_errors); 2269 goto err_buf; 2270 } 2271 2272 u64_stats_add(&stats->bytes, len); 2273 page = virt_to_head_page(buf); 2274 2275 truesize = mergeable_ctx_to_truesize(ctx); 2276 headroom = mergeable_ctx_to_headroom(ctx); 2277 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2278 room = SKB_DATA_ALIGN(headroom + tailroom); 2279 if (unlikely(len > truesize - room)) { 2280 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2281 dev->name, len, (unsigned long)(truesize - room)); 2282 DEV_STATS_INC(dev, rx_length_errors); 2283 goto err_skb; 2284 } 2285 2286 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 2287 buf, len, truesize); 2288 if (!curr_skb) 2289 goto err_skb; 2290 } 2291 2292 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); 2293 return head_skb; 2294 2295 err_skb: 2296 put_page(page); 2297 mergeable_buf_free(rq, num_buf, dev, stats); 2298 2299 err_buf: 2300 u64_stats_inc(&stats->drops); 2301 dev_kfree_skb(head_skb); 2302 return NULL; 2303 } 2304 2305 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, 2306 struct sk_buff *skb) 2307 { 2308 enum pkt_hash_types rss_hash_type; 2309 2310 if (!hdr_hash || !skb) 2311 return; 2312 2313 switch (__le16_to_cpu(hdr_hash->hash_report)) { 2314 case VIRTIO_NET_HASH_REPORT_TCPv4: 2315 case VIRTIO_NET_HASH_REPORT_UDPv4: 2316 case VIRTIO_NET_HASH_REPORT_TCPv6: 2317 case VIRTIO_NET_HASH_REPORT_UDPv6: 2318 case VIRTIO_NET_HASH_REPORT_TCPv6_EX: 2319 case VIRTIO_NET_HASH_REPORT_UDPv6_EX: 2320 rss_hash_type = PKT_HASH_TYPE_L4; 2321 break; 2322 case VIRTIO_NET_HASH_REPORT_IPv4: 2323 case VIRTIO_NET_HASH_REPORT_IPv6: 2324 case VIRTIO_NET_HASH_REPORT_IPv6_EX: 2325 rss_hash_type = PKT_HASH_TYPE_L3; 2326 break; 2327 case VIRTIO_NET_HASH_REPORT_NONE: 2328 default: 2329 rss_hash_type = PKT_HASH_TYPE_NONE; 2330 } 2331 skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); 2332 } 2333 2334 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 2335 struct sk_buff *skb, u8 flags) 2336 { 2337 struct virtio_net_common_hdr *hdr; 2338 struct net_device *dev = vi->dev; 2339 2340 hdr = skb_vnet_common_hdr(skb); 2341 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) 2342 virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); 2343 2344 if (flags & VIRTIO_NET_HDR_F_DATA_VALID) 2345 skb->ip_summed = CHECKSUM_UNNECESSARY; 2346 2347 if (virtio_net_hdr_to_skb(skb, &hdr->hdr, 2348 virtio_is_little_endian(vi->vdev))) { 2349 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n", 2350 dev->name, hdr->hdr.gso_type, 2351 hdr->hdr.gso_size); 2352 goto frame_err; 2353 } 2354 2355 skb_record_rx_queue(skb, vq2rxq(rq->vq)); 2356 skb->protocol = eth_type_trans(skb, dev); 2357 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 2358 ntohs(skb->protocol), skb->len, skb->pkt_type); 2359 2360 napi_gro_receive(&rq->napi, skb); 2361 return; 2362 2363 frame_err: 2364 DEV_STATS_INC(dev, rx_frame_errors); 2365 dev_kfree_skb(skb); 2366 } 2367 2368 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 2369 void *buf, unsigned int len, void **ctx, 2370 unsigned int *xdp_xmit, 2371 struct virtnet_rq_stats *stats) 2372 { 2373 struct net_device *dev = vi->dev; 2374 struct sk_buff *skb; 2375 u8 flags; 2376 2377 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 2378 pr_debug("%s: short packet %i\n", dev->name, len); 2379 DEV_STATS_INC(dev, rx_length_errors); 2380 virtnet_rq_free_buf(vi, rq, buf); 2381 return; 2382 } 2383 2384 /* 1. Save the flags early, as the XDP program might overwrite them. 2385 * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID 2386 * stay valid after XDP processing. 2387 * 2. XDP doesn't work with partially checksummed packets (refer to 2388 * virtnet_xdp_set()), so packets marked as 2389 * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. 2390 */ 2391 flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2392 2393 if (vi->mergeable_rx_bufs) 2394 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 2395 stats); 2396 else if (vi->big_packets) 2397 skb = receive_big(dev, vi, rq, buf, len, stats); 2398 else 2399 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 2400 2401 if (unlikely(!skb)) 2402 return; 2403 2404 virtnet_receive_done(vi, rq, skb, flags); 2405 } 2406 2407 /* Unlike mergeable buffers, all buffers are allocated to the 2408 * same size, except for the headroom. For this reason we do 2409 * not need to use mergeable_len_to_ctx here - it is enough 2410 * to store the headroom as the context ignoring the truesize. 2411 */ 2412 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, 2413 gfp_t gfp) 2414 { 2415 char *buf; 2416 unsigned int xdp_headroom = virtnet_get_headroom(vi); 2417 void *ctx = (void *)(unsigned long)xdp_headroom; 2418 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; 2419 int err; 2420 2421 len = SKB_DATA_ALIGN(len) + 2422 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 2423 2424 buf = virtnet_rq_alloc(rq, len, gfp); 2425 if (unlikely(!buf)) 2426 return -ENOMEM; 2427 2428 virtnet_rq_init_one_sg(rq, buf + VIRTNET_RX_PAD + xdp_headroom, 2429 vi->hdr_len + GOOD_PACKET_LEN); 2430 2431 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 2432 if (err < 0) { 2433 virtnet_rq_unmap(rq, buf, 0); 2434 put_page(virt_to_head_page(buf)); 2435 } 2436 2437 return err; 2438 } 2439 2440 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, 2441 gfp_t gfp) 2442 { 2443 struct page *first, *list = NULL; 2444 char *p; 2445 int i, err, offset; 2446 2447 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); 2448 2449 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ 2450 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { 2451 first = get_a_page(rq, gfp); 2452 if (!first) { 2453 if (list) 2454 give_pages(rq, list); 2455 return -ENOMEM; 2456 } 2457 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); 2458 2459 /* chain new page in list head to match sg */ 2460 first->private = (unsigned long)list; 2461 list = first; 2462 } 2463 2464 first = get_a_page(rq, gfp); 2465 if (!first) { 2466 give_pages(rq, list); 2467 return -ENOMEM; 2468 } 2469 p = page_address(first); 2470 2471 /* rq->sg[0], rq->sg[1] share the same page */ 2472 /* a separated rq->sg[0] for header - required in case !any_header_sg */ 2473 sg_set_buf(&rq->sg[0], p, vi->hdr_len); 2474 2475 /* rq->sg[1] for data packet, from offset */ 2476 offset = sizeof(struct padded_vnet_hdr); 2477 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); 2478 2479 /* chain first in list head */ 2480 first->private = (unsigned long)list; 2481 err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, 2482 first, gfp); 2483 if (err < 0) 2484 give_pages(rq, first); 2485 2486 return err; 2487 } 2488 2489 static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 2490 struct ewma_pkt_len *avg_pkt_len, 2491 unsigned int room) 2492 { 2493 struct virtnet_info *vi = rq->vq->vdev->priv; 2494 const size_t hdr_len = vi->hdr_len; 2495 unsigned int len; 2496 2497 if (room) 2498 return PAGE_SIZE - room; 2499 2500 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 2501 rq->min_buf_len, PAGE_SIZE - hdr_len); 2502 2503 return ALIGN(len, L1_CACHE_BYTES); 2504 } 2505 2506 static int add_recvbuf_mergeable(struct virtnet_info *vi, 2507 struct receive_queue *rq, gfp_t gfp) 2508 { 2509 struct page_frag *alloc_frag = &rq->alloc_frag; 2510 unsigned int headroom = virtnet_get_headroom(vi); 2511 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2512 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2513 unsigned int len, hole; 2514 void *ctx; 2515 char *buf; 2516 int err; 2517 2518 /* Extra tailroom is needed to satisfy XDP's assumption. This 2519 * means rx frags coalescing won't work, but consider we've 2520 * disabled GSO for XDP, it won't be a big issue. 2521 */ 2522 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); 2523 2524 buf = virtnet_rq_alloc(rq, len + room, gfp); 2525 if (unlikely(!buf)) 2526 return -ENOMEM; 2527 2528 buf += headroom; /* advance address leaving hole at front of pkt */ 2529 hole = alloc_frag->size - alloc_frag->offset; 2530 if (hole < len + room) { 2531 /* To avoid internal fragmentation, if there is very likely not 2532 * enough space for another buffer, add the remaining space to 2533 * the current buffer. 2534 * XDP core assumes that frame_size of xdp_buff and the length 2535 * of the frag are PAGE_SIZE, so we disable the hole mechanism. 2536 */ 2537 if (!headroom) 2538 len += hole; 2539 alloc_frag->offset += hole; 2540 } 2541 2542 virtnet_rq_init_one_sg(rq, buf, len); 2543 2544 ctx = mergeable_len_to_ctx(len + room, headroom); 2545 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 2546 if (err < 0) { 2547 virtnet_rq_unmap(rq, buf, 0); 2548 put_page(virt_to_head_page(buf)); 2549 } 2550 2551 return err; 2552 } 2553 2554 /* 2555 * Returns false if we couldn't fill entirely (OOM). 2556 * 2557 * Normally run in the receive path, but can also be run from ndo_open 2558 * before we're receiving packets, or from refill_work which is 2559 * careful to disable receiving (using napi_disable). 2560 */ 2561 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, 2562 gfp_t gfp) 2563 { 2564 int err; 2565 2566 if (rq->xsk_pool) { 2567 err = virtnet_add_recvbuf_xsk(vi, rq, rq->xsk_pool, gfp); 2568 goto kick; 2569 } 2570 2571 do { 2572 if (vi->mergeable_rx_bufs) 2573 err = add_recvbuf_mergeable(vi, rq, gfp); 2574 else if (vi->big_packets) 2575 err = add_recvbuf_big(vi, rq, gfp); 2576 else 2577 err = add_recvbuf_small(vi, rq, gfp); 2578 2579 if (err) 2580 break; 2581 } while (rq->vq->num_free); 2582 2583 kick: 2584 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { 2585 unsigned long flags; 2586 2587 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); 2588 u64_stats_inc(&rq->stats.kicks); 2589 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); 2590 } 2591 2592 return err != -ENOMEM; 2593 } 2594 2595 static void skb_recv_done(struct virtqueue *rvq) 2596 { 2597 struct virtnet_info *vi = rvq->vdev->priv; 2598 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; 2599 2600 rq->calls++; 2601 virtqueue_napi_schedule(&rq->napi, rvq); 2602 } 2603 2604 static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi) 2605 { 2606 napi_enable(napi); 2607 2608 /* If all buffers were filled by other side before we napi_enabled, we 2609 * won't get another interrupt, so process any outstanding packets now. 2610 * Call local_bh_enable after to trigger softIRQ processing. 2611 */ 2612 local_bh_disable(); 2613 virtqueue_napi_schedule(napi, vq); 2614 local_bh_enable(); 2615 } 2616 2617 static void virtnet_napi_tx_enable(struct virtnet_info *vi, 2618 struct virtqueue *vq, 2619 struct napi_struct *napi) 2620 { 2621 if (!napi->weight) 2622 return; 2623 2624 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only 2625 * enable the feature if this is likely affine with the transmit path. 2626 */ 2627 if (!vi->affinity_hint_set) { 2628 napi->weight = 0; 2629 return; 2630 } 2631 2632 return virtnet_napi_enable(vq, napi); 2633 } 2634 2635 static void virtnet_napi_tx_disable(struct napi_struct *napi) 2636 { 2637 if (napi->weight) 2638 napi_disable(napi); 2639 } 2640 2641 static void refill_work(struct work_struct *work) 2642 { 2643 struct virtnet_info *vi = 2644 container_of(work, struct virtnet_info, refill.work); 2645 bool still_empty; 2646 int i; 2647 2648 for (i = 0; i < vi->curr_queue_pairs; i++) { 2649 struct receive_queue *rq = &vi->rq[i]; 2650 2651 napi_disable(&rq->napi); 2652 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); 2653 virtnet_napi_enable(rq->vq, &rq->napi); 2654 2655 /* In theory, this can happen: if we don't get any buffers in 2656 * we will *never* try to fill again. 2657 */ 2658 if (still_empty) 2659 schedule_delayed_work(&vi->refill, HZ/2); 2660 } 2661 } 2662 2663 static int virtnet_receive_xsk_bufs(struct virtnet_info *vi, 2664 struct receive_queue *rq, 2665 int budget, 2666 unsigned int *xdp_xmit, 2667 struct virtnet_rq_stats *stats) 2668 { 2669 unsigned int len; 2670 int packets = 0; 2671 void *buf; 2672 2673 while (packets < budget) { 2674 buf = virtqueue_get_buf(rq->vq, &len); 2675 if (!buf) 2676 break; 2677 2678 virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats); 2679 packets++; 2680 } 2681 2682 return packets; 2683 } 2684 2685 static int virtnet_receive_packets(struct virtnet_info *vi, 2686 struct receive_queue *rq, 2687 int budget, 2688 unsigned int *xdp_xmit, 2689 struct virtnet_rq_stats *stats) 2690 { 2691 unsigned int len; 2692 int packets = 0; 2693 void *buf; 2694 2695 if (!vi->big_packets || vi->mergeable_rx_bufs) { 2696 void *ctx; 2697 while (packets < budget && 2698 (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { 2699 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats); 2700 packets++; 2701 } 2702 } else { 2703 while (packets < budget && 2704 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { 2705 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats); 2706 packets++; 2707 } 2708 } 2709 2710 return packets; 2711 } 2712 2713 static int virtnet_receive(struct receive_queue *rq, int budget, 2714 unsigned int *xdp_xmit) 2715 { 2716 struct virtnet_info *vi = rq->vq->vdev->priv; 2717 struct virtnet_rq_stats stats = {}; 2718 int i, packets; 2719 2720 if (rq->xsk_pool) 2721 packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, &stats); 2722 else 2723 packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats); 2724 2725 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { 2726 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { 2727 spin_lock(&vi->refill_lock); 2728 if (vi->refill_enabled) 2729 schedule_delayed_work(&vi->refill, 0); 2730 spin_unlock(&vi->refill_lock); 2731 } 2732 } 2733 2734 u64_stats_set(&stats.packets, packets); 2735 u64_stats_update_begin(&rq->stats.syncp); 2736 for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) { 2737 size_t offset = virtnet_rq_stats_desc[i].offset; 2738 u64_stats_t *item, *src; 2739 2740 item = (u64_stats_t *)((u8 *)&rq->stats + offset); 2741 src = (u64_stats_t *)((u8 *)&stats + offset); 2742 u64_stats_add(item, u64_stats_read(src)); 2743 } 2744 2745 u64_stats_add(&rq->stats.packets, u64_stats_read(&stats.packets)); 2746 u64_stats_add(&rq->stats.bytes, u64_stats_read(&stats.bytes)); 2747 2748 u64_stats_update_end(&rq->stats.syncp); 2749 2750 return packets; 2751 } 2752 2753 static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) 2754 { 2755 struct virtnet_info *vi = rq->vq->vdev->priv; 2756 unsigned int index = vq2rxq(rq->vq); 2757 struct send_queue *sq = &vi->sq[index]; 2758 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 2759 2760 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) 2761 return; 2762 2763 if (__netif_tx_trylock(txq)) { 2764 if (sq->reset) { 2765 __netif_tx_unlock(txq); 2766 return; 2767 } 2768 2769 do { 2770 virtqueue_disable_cb(sq->vq); 2771 free_old_xmit(sq, txq, !!budget); 2772 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 2773 2774 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { 2775 if (netif_tx_queue_stopped(txq)) { 2776 u64_stats_update_begin(&sq->stats.syncp); 2777 u64_stats_inc(&sq->stats.wake); 2778 u64_stats_update_end(&sq->stats.syncp); 2779 } 2780 netif_tx_wake_queue(txq); 2781 } 2782 2783 __netif_tx_unlock(txq); 2784 } 2785 } 2786 2787 static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq) 2788 { 2789 struct dim_sample cur_sample = {}; 2790 2791 if (!rq->packets_in_napi) 2792 return; 2793 2794 /* Don't need protection when fetching stats, since fetcher and 2795 * updater of the stats are in same context 2796 */ 2797 dim_update_sample(rq->calls, 2798 u64_stats_read(&rq->stats.packets), 2799 u64_stats_read(&rq->stats.bytes), 2800 &cur_sample); 2801 2802 net_dim(&rq->dim, cur_sample); 2803 rq->packets_in_napi = 0; 2804 } 2805 2806 static int virtnet_poll(struct napi_struct *napi, int budget) 2807 { 2808 struct receive_queue *rq = 2809 container_of(napi, struct receive_queue, napi); 2810 struct virtnet_info *vi = rq->vq->vdev->priv; 2811 struct send_queue *sq; 2812 unsigned int received; 2813 unsigned int xdp_xmit = 0; 2814 bool napi_complete; 2815 2816 virtnet_poll_cleantx(rq, budget); 2817 2818 received = virtnet_receive(rq, budget, &xdp_xmit); 2819 rq->packets_in_napi += received; 2820 2821 if (xdp_xmit & VIRTIO_XDP_REDIR) 2822 xdp_do_flush(); 2823 2824 /* Out of packets? */ 2825 if (received < budget) { 2826 napi_complete = virtqueue_napi_complete(napi, rq->vq, received); 2827 /* Intentionally not taking dim_lock here. This may result in a 2828 * spurious net_dim call. But if that happens virtnet_rx_dim_work 2829 * will not act on the scheduled work. 2830 */ 2831 if (napi_complete && rq->dim_enabled) 2832 virtnet_rx_dim_update(vi, rq); 2833 } 2834 2835 if (xdp_xmit & VIRTIO_XDP_TX) { 2836 sq = virtnet_xdp_get_sq(vi); 2837 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 2838 u64_stats_update_begin(&sq->stats.syncp); 2839 u64_stats_inc(&sq->stats.kicks); 2840 u64_stats_update_end(&sq->stats.syncp); 2841 } 2842 virtnet_xdp_put_sq(vi, sq); 2843 } 2844 2845 return received; 2846 } 2847 2848 static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) 2849 { 2850 virtnet_napi_tx_disable(&vi->sq[qp_index].napi); 2851 napi_disable(&vi->rq[qp_index].napi); 2852 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 2853 } 2854 2855 static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) 2856 { 2857 struct net_device *dev = vi->dev; 2858 int err; 2859 2860 err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index, 2861 vi->rq[qp_index].napi.napi_id); 2862 if (err < 0) 2863 return err; 2864 2865 err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq, 2866 MEM_TYPE_PAGE_SHARED, NULL); 2867 if (err < 0) 2868 goto err_xdp_reg_mem_model; 2869 2870 virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); 2871 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index)); 2872 virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi); 2873 2874 return 0; 2875 2876 err_xdp_reg_mem_model: 2877 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 2878 return err; 2879 } 2880 2881 static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim) 2882 { 2883 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 2884 return; 2885 net_dim_work_cancel(dim); 2886 } 2887 2888 static int virtnet_open(struct net_device *dev) 2889 { 2890 struct virtnet_info *vi = netdev_priv(dev); 2891 int i, err; 2892 2893 enable_delayed_refill(vi); 2894 2895 for (i = 0; i < vi->max_queue_pairs; i++) { 2896 if (i < vi->curr_queue_pairs) 2897 /* Make sure we have some buffers: if oom use wq. */ 2898 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 2899 schedule_delayed_work(&vi->refill, 0); 2900 2901 err = virtnet_enable_queue_pair(vi, i); 2902 if (err < 0) 2903 goto err_enable_qp; 2904 } 2905 2906 return 0; 2907 2908 err_enable_qp: 2909 disable_delayed_refill(vi); 2910 cancel_delayed_work_sync(&vi->refill); 2911 2912 for (i--; i >= 0; i--) { 2913 virtnet_disable_queue_pair(vi, i); 2914 virtnet_cancel_dim(vi, &vi->rq[i].dim); 2915 } 2916 2917 return err; 2918 } 2919 2920 static int virtnet_poll_tx(struct napi_struct *napi, int budget) 2921 { 2922 struct send_queue *sq = container_of(napi, struct send_queue, napi); 2923 struct virtnet_info *vi = sq->vq->vdev->priv; 2924 unsigned int index = vq2txq(sq->vq); 2925 struct netdev_queue *txq; 2926 int opaque; 2927 bool done; 2928 2929 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { 2930 /* We don't need to enable cb for XDP */ 2931 napi_complete_done(napi, 0); 2932 return 0; 2933 } 2934 2935 txq = netdev_get_tx_queue(vi->dev, index); 2936 __netif_tx_lock(txq, raw_smp_processor_id()); 2937 virtqueue_disable_cb(sq->vq); 2938 free_old_xmit(sq, txq, !!budget); 2939 2940 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { 2941 if (netif_tx_queue_stopped(txq)) { 2942 u64_stats_update_begin(&sq->stats.syncp); 2943 u64_stats_inc(&sq->stats.wake); 2944 u64_stats_update_end(&sq->stats.syncp); 2945 } 2946 netif_tx_wake_queue(txq); 2947 } 2948 2949 opaque = virtqueue_enable_cb_prepare(sq->vq); 2950 2951 done = napi_complete_done(napi, 0); 2952 2953 if (!done) 2954 virtqueue_disable_cb(sq->vq); 2955 2956 __netif_tx_unlock(txq); 2957 2958 if (done) { 2959 if (unlikely(virtqueue_poll(sq->vq, opaque))) { 2960 if (napi_schedule_prep(napi)) { 2961 __netif_tx_lock(txq, raw_smp_processor_id()); 2962 virtqueue_disable_cb(sq->vq); 2963 __netif_tx_unlock(txq); 2964 __napi_schedule(napi); 2965 } 2966 } 2967 } 2968 2969 return 0; 2970 } 2971 2972 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) 2973 { 2974 struct virtio_net_hdr_mrg_rxbuf *hdr; 2975 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 2976 struct virtnet_info *vi = sq->vq->vdev->priv; 2977 int num_sg; 2978 unsigned hdr_len = vi->hdr_len; 2979 bool can_push; 2980 2981 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 2982 2983 can_push = vi->any_header_sg && 2984 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && 2985 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; 2986 /* Even if we can, don't push here yet as this would skew 2987 * csum_start offset below. */ 2988 if (can_push) 2989 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len); 2990 else 2991 hdr = &skb_vnet_common_hdr(skb)->mrg_hdr; 2992 2993 if (virtio_net_hdr_from_skb(skb, &hdr->hdr, 2994 virtio_is_little_endian(vi->vdev), false, 2995 0)) 2996 return -EPROTO; 2997 2998 if (vi->mergeable_rx_bufs) 2999 hdr->num_buffers = 0; 3000 3001 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); 3002 if (can_push) { 3003 __skb_push(skb, hdr_len); 3004 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); 3005 if (unlikely(num_sg < 0)) 3006 return num_sg; 3007 /* Pull header back to avoid skew in tx bytes calculations. */ 3008 __skb_pull(skb, hdr_len); 3009 } else { 3010 sg_set_buf(sq->sg, hdr, hdr_len); 3011 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); 3012 if (unlikely(num_sg < 0)) 3013 return num_sg; 3014 num_sg++; 3015 } 3016 return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, 3017 skb_to_ptr(skb, orphan), GFP_ATOMIC); 3018 } 3019 3020 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 3021 { 3022 struct virtnet_info *vi = netdev_priv(dev); 3023 int qnum = skb_get_queue_mapping(skb); 3024 struct send_queue *sq = &vi->sq[qnum]; 3025 int err; 3026 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 3027 bool xmit_more = netdev_xmit_more(); 3028 bool use_napi = sq->napi.weight; 3029 bool kick; 3030 3031 /* Free up any pending old buffers before queueing new ones. */ 3032 do { 3033 if (use_napi) 3034 virtqueue_disable_cb(sq->vq); 3035 3036 free_old_xmit(sq, txq, false); 3037 3038 } while (use_napi && !xmit_more && 3039 unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 3040 3041 /* timestamp packet in software */ 3042 skb_tx_timestamp(skb); 3043 3044 /* Try to transmit */ 3045 err = xmit_skb(sq, skb, !use_napi); 3046 3047 /* This should not happen! */ 3048 if (unlikely(err)) { 3049 DEV_STATS_INC(dev, tx_fifo_errors); 3050 if (net_ratelimit()) 3051 dev_warn(&dev->dev, 3052 "Unexpected TXQ (%d) queue failure: %d\n", 3053 qnum, err); 3054 DEV_STATS_INC(dev, tx_dropped); 3055 dev_kfree_skb_any(skb); 3056 return NETDEV_TX_OK; 3057 } 3058 3059 /* Don't wait up for transmitted skbs to be freed. */ 3060 if (!use_napi) { 3061 skb_orphan(skb); 3062 nf_reset_ct(skb); 3063 } 3064 3065 check_sq_full_and_disable(vi, dev, sq); 3066 3067 kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) : 3068 !xmit_more || netif_xmit_stopped(txq); 3069 if (kick) { 3070 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3071 u64_stats_update_begin(&sq->stats.syncp); 3072 u64_stats_inc(&sq->stats.kicks); 3073 u64_stats_update_end(&sq->stats.syncp); 3074 } 3075 } 3076 3077 return NETDEV_TX_OK; 3078 } 3079 3080 static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) 3081 { 3082 bool running = netif_running(vi->dev); 3083 3084 if (running) { 3085 napi_disable(&rq->napi); 3086 virtnet_cancel_dim(vi, &rq->dim); 3087 } 3088 } 3089 3090 static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) 3091 { 3092 bool running = netif_running(vi->dev); 3093 3094 if (!try_fill_recv(vi, rq, GFP_KERNEL)) 3095 schedule_delayed_work(&vi->refill, 0); 3096 3097 if (running) 3098 virtnet_napi_enable(rq->vq, &rq->napi); 3099 } 3100 3101 static int virtnet_rx_resize(struct virtnet_info *vi, 3102 struct receive_queue *rq, u32 ring_num) 3103 { 3104 int err, qindex; 3105 3106 qindex = rq - vi->rq; 3107 3108 virtnet_rx_pause(vi, rq); 3109 3110 err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf); 3111 if (err) 3112 netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); 3113 3114 virtnet_rx_resume(vi, rq); 3115 return err; 3116 } 3117 3118 static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq) 3119 { 3120 bool running = netif_running(vi->dev); 3121 struct netdev_queue *txq; 3122 int qindex; 3123 3124 qindex = sq - vi->sq; 3125 3126 if (running) 3127 virtnet_napi_tx_disable(&sq->napi); 3128 3129 txq = netdev_get_tx_queue(vi->dev, qindex); 3130 3131 /* 1. wait all ximt complete 3132 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() 3133 */ 3134 __netif_tx_lock_bh(txq); 3135 3136 /* Prevent rx poll from accessing sq. */ 3137 sq->reset = true; 3138 3139 /* Prevent the upper layer from trying to send packets. */ 3140 netif_stop_subqueue(vi->dev, qindex); 3141 3142 __netif_tx_unlock_bh(txq); 3143 } 3144 3145 static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq) 3146 { 3147 bool running = netif_running(vi->dev); 3148 struct netdev_queue *txq; 3149 int qindex; 3150 3151 qindex = sq - vi->sq; 3152 3153 txq = netdev_get_tx_queue(vi->dev, qindex); 3154 3155 __netif_tx_lock_bh(txq); 3156 sq->reset = false; 3157 netif_tx_wake_queue(txq); 3158 __netif_tx_unlock_bh(txq); 3159 3160 if (running) 3161 virtnet_napi_tx_enable(vi, sq->vq, &sq->napi); 3162 } 3163 3164 static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, 3165 u32 ring_num) 3166 { 3167 int qindex, err; 3168 3169 qindex = sq - vi->sq; 3170 3171 virtnet_tx_pause(vi, sq); 3172 3173 err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf); 3174 if (err) 3175 netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); 3176 3177 virtnet_tx_resume(vi, sq); 3178 3179 return err; 3180 } 3181 3182 /* 3183 * Send command via the control virtqueue and check status. Commands 3184 * supported by the hypervisor, as indicated by feature bits, should 3185 * never fail unless improperly formatted. 3186 */ 3187 static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd, 3188 struct scatterlist *out, 3189 struct scatterlist *in) 3190 { 3191 struct scatterlist *sgs[5], hdr, stat; 3192 u32 out_num = 0, tmp, in_num = 0; 3193 bool ok; 3194 int ret; 3195 3196 /* Caller should know better */ 3197 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 3198 3199 mutex_lock(&vi->cvq_lock); 3200 vi->ctrl->status = ~0; 3201 vi->ctrl->hdr.class = class; 3202 vi->ctrl->hdr.cmd = cmd; 3203 /* Add header */ 3204 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); 3205 sgs[out_num++] = &hdr; 3206 3207 if (out) 3208 sgs[out_num++] = out; 3209 3210 /* Add return status. */ 3211 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); 3212 sgs[out_num + in_num++] = &stat; 3213 3214 if (in) 3215 sgs[out_num + in_num++] = in; 3216 3217 BUG_ON(out_num + in_num > ARRAY_SIZE(sgs)); 3218 ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC); 3219 if (ret < 0) { 3220 dev_warn(&vi->vdev->dev, 3221 "Failed to add sgs for command vq: %d\n.", ret); 3222 mutex_unlock(&vi->cvq_lock); 3223 return false; 3224 } 3225 3226 if (unlikely(!virtqueue_kick(vi->cvq))) 3227 goto unlock; 3228 3229 /* Spin for a response, the kick causes an ioport write, trapping 3230 * into the hypervisor, so the request should be handled immediately. 3231 */ 3232 while (!virtqueue_get_buf(vi->cvq, &tmp) && 3233 !virtqueue_is_broken(vi->cvq)) { 3234 cond_resched(); 3235 cpu_relax(); 3236 } 3237 3238 unlock: 3239 ok = vi->ctrl->status == VIRTIO_NET_OK; 3240 mutex_unlock(&vi->cvq_lock); 3241 return ok; 3242 } 3243 3244 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 3245 struct scatterlist *out) 3246 { 3247 return virtnet_send_command_reply(vi, class, cmd, out, NULL); 3248 } 3249 3250 static int virtnet_set_mac_address(struct net_device *dev, void *p) 3251 { 3252 struct virtnet_info *vi = netdev_priv(dev); 3253 struct virtio_device *vdev = vi->vdev; 3254 int ret; 3255 struct sockaddr *addr; 3256 struct scatterlist sg; 3257 3258 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 3259 return -EOPNOTSUPP; 3260 3261 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); 3262 if (!addr) 3263 return -ENOMEM; 3264 3265 ret = eth_prepare_mac_addr_change(dev, addr); 3266 if (ret) 3267 goto out; 3268 3269 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 3270 sg_init_one(&sg, addr->sa_data, dev->addr_len); 3271 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3272 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 3273 dev_warn(&vdev->dev, 3274 "Failed to set mac address by vq command.\n"); 3275 ret = -EINVAL; 3276 goto out; 3277 } 3278 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 3279 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3280 unsigned int i; 3281 3282 /* Naturally, this has an atomicity problem. */ 3283 for (i = 0; i < dev->addr_len; i++) 3284 virtio_cwrite8(vdev, 3285 offsetof(struct virtio_net_config, mac) + 3286 i, addr->sa_data[i]); 3287 } 3288 3289 eth_commit_mac_addr_change(dev, p); 3290 ret = 0; 3291 3292 out: 3293 kfree(addr); 3294 return ret; 3295 } 3296 3297 static void virtnet_stats(struct net_device *dev, 3298 struct rtnl_link_stats64 *tot) 3299 { 3300 struct virtnet_info *vi = netdev_priv(dev); 3301 unsigned int start; 3302 int i; 3303 3304 for (i = 0; i < vi->max_queue_pairs; i++) { 3305 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; 3306 struct receive_queue *rq = &vi->rq[i]; 3307 struct send_queue *sq = &vi->sq[i]; 3308 3309 do { 3310 start = u64_stats_fetch_begin(&sq->stats.syncp); 3311 tpackets = u64_stats_read(&sq->stats.packets); 3312 tbytes = u64_stats_read(&sq->stats.bytes); 3313 terrors = u64_stats_read(&sq->stats.tx_timeouts); 3314 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 3315 3316 do { 3317 start = u64_stats_fetch_begin(&rq->stats.syncp); 3318 rpackets = u64_stats_read(&rq->stats.packets); 3319 rbytes = u64_stats_read(&rq->stats.bytes); 3320 rdrops = u64_stats_read(&rq->stats.drops); 3321 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 3322 3323 tot->rx_packets += rpackets; 3324 tot->tx_packets += tpackets; 3325 tot->rx_bytes += rbytes; 3326 tot->tx_bytes += tbytes; 3327 tot->rx_dropped += rdrops; 3328 tot->tx_errors += terrors; 3329 } 3330 3331 tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped); 3332 tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors); 3333 tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors); 3334 tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors); 3335 } 3336 3337 static void virtnet_ack_link_announce(struct virtnet_info *vi) 3338 { 3339 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 3340 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) 3341 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 3342 } 3343 3344 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 3345 { 3346 struct virtio_net_ctrl_mq *mq __free(kfree) = NULL; 3347 struct scatterlist sg; 3348 struct net_device *dev = vi->dev; 3349 3350 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) 3351 return 0; 3352 3353 mq = kzalloc(sizeof(*mq), GFP_KERNEL); 3354 if (!mq) 3355 return -ENOMEM; 3356 3357 mq->virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); 3358 sg_init_one(&sg, mq, sizeof(*mq)); 3359 3360 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 3361 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { 3362 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 3363 queue_pairs); 3364 return -EINVAL; 3365 } else { 3366 vi->curr_queue_pairs = queue_pairs; 3367 /* virtnet_open() will refill when device is going to up. */ 3368 if (dev->flags & IFF_UP) 3369 schedule_delayed_work(&vi->refill, 0); 3370 } 3371 3372 return 0; 3373 } 3374 3375 static int virtnet_close(struct net_device *dev) 3376 { 3377 struct virtnet_info *vi = netdev_priv(dev); 3378 int i; 3379 3380 /* Make sure NAPI doesn't schedule refill work */ 3381 disable_delayed_refill(vi); 3382 /* Make sure refill_work doesn't re-enable napi! */ 3383 cancel_delayed_work_sync(&vi->refill); 3384 3385 for (i = 0; i < vi->max_queue_pairs; i++) { 3386 virtnet_disable_queue_pair(vi, i); 3387 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3388 } 3389 3390 return 0; 3391 } 3392 3393 static void virtnet_rx_mode_work(struct work_struct *work) 3394 { 3395 struct virtnet_info *vi = 3396 container_of(work, struct virtnet_info, rx_mode_work); 3397 u8 *promisc_allmulti __free(kfree) = NULL; 3398 struct net_device *dev = vi->dev; 3399 struct scatterlist sg[2]; 3400 struct virtio_net_ctrl_mac *mac_data; 3401 struct netdev_hw_addr *ha; 3402 int uc_count; 3403 int mc_count; 3404 void *buf; 3405 int i; 3406 3407 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ 3408 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 3409 return; 3410 3411 promisc_allmulti = kzalloc(sizeof(*promisc_allmulti), GFP_KERNEL); 3412 if (!promisc_allmulti) { 3413 dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n"); 3414 return; 3415 } 3416 3417 rtnl_lock(); 3418 3419 *promisc_allmulti = !!(dev->flags & IFF_PROMISC); 3420 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3421 3422 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3423 VIRTIO_NET_CTRL_RX_PROMISC, sg)) 3424 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 3425 *promisc_allmulti ? "en" : "dis"); 3426 3427 *promisc_allmulti = !!(dev->flags & IFF_ALLMULTI); 3428 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3429 3430 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3431 VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) 3432 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 3433 *promisc_allmulti ? "en" : "dis"); 3434 3435 netif_addr_lock_bh(dev); 3436 3437 uc_count = netdev_uc_count(dev); 3438 mc_count = netdev_mc_count(dev); 3439 /* MAC filter - use one buffer for both lists */ 3440 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + 3441 (2 * sizeof(mac_data->entries)), GFP_ATOMIC); 3442 mac_data = buf; 3443 if (!buf) { 3444 netif_addr_unlock_bh(dev); 3445 rtnl_unlock(); 3446 return; 3447 } 3448 3449 sg_init_table(sg, 2); 3450 3451 /* Store the unicast list and count in the front of the buffer */ 3452 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); 3453 i = 0; 3454 netdev_for_each_uc_addr(ha, dev) 3455 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3456 3457 sg_set_buf(&sg[0], mac_data, 3458 sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); 3459 3460 /* multicast list and count fill the end */ 3461 mac_data = (void *)&mac_data->macs[uc_count][0]; 3462 3463 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); 3464 i = 0; 3465 netdev_for_each_mc_addr(ha, dev) 3466 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3467 3468 netif_addr_unlock_bh(dev); 3469 3470 sg_set_buf(&sg[1], mac_data, 3471 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 3472 3473 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3474 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) 3475 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 3476 3477 rtnl_unlock(); 3478 3479 kfree(buf); 3480 } 3481 3482 static void virtnet_set_rx_mode(struct net_device *dev) 3483 { 3484 struct virtnet_info *vi = netdev_priv(dev); 3485 3486 if (vi->rx_mode_work_enabled) 3487 schedule_work(&vi->rx_mode_work); 3488 } 3489 3490 static int virtnet_vlan_rx_add_vid(struct net_device *dev, 3491 __be16 proto, u16 vid) 3492 { 3493 struct virtnet_info *vi = netdev_priv(dev); 3494 __virtio16 *_vid __free(kfree) = NULL; 3495 struct scatterlist sg; 3496 3497 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3498 if (!_vid) 3499 return -ENOMEM; 3500 3501 *_vid = cpu_to_virtio16(vi->vdev, vid); 3502 sg_init_one(&sg, _vid, sizeof(*_vid)); 3503 3504 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3505 VIRTIO_NET_CTRL_VLAN_ADD, &sg)) 3506 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 3507 return 0; 3508 } 3509 3510 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, 3511 __be16 proto, u16 vid) 3512 { 3513 struct virtnet_info *vi = netdev_priv(dev); 3514 __virtio16 *_vid __free(kfree) = NULL; 3515 struct scatterlist sg; 3516 3517 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3518 if (!_vid) 3519 return -ENOMEM; 3520 3521 *_vid = cpu_to_virtio16(vi->vdev, vid); 3522 sg_init_one(&sg, _vid, sizeof(*_vid)); 3523 3524 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3525 VIRTIO_NET_CTRL_VLAN_DEL, &sg)) 3526 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 3527 return 0; 3528 } 3529 3530 static void virtnet_clean_affinity(struct virtnet_info *vi) 3531 { 3532 int i; 3533 3534 if (vi->affinity_hint_set) { 3535 for (i = 0; i < vi->max_queue_pairs; i++) { 3536 virtqueue_set_affinity(vi->rq[i].vq, NULL); 3537 virtqueue_set_affinity(vi->sq[i].vq, NULL); 3538 } 3539 3540 vi->affinity_hint_set = false; 3541 } 3542 } 3543 3544 static void virtnet_set_affinity(struct virtnet_info *vi) 3545 { 3546 cpumask_var_t mask; 3547 int stragglers; 3548 int group_size; 3549 int i, j, cpu; 3550 int num_cpu; 3551 int stride; 3552 3553 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 3554 virtnet_clean_affinity(vi); 3555 return; 3556 } 3557 3558 num_cpu = num_online_cpus(); 3559 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); 3560 stragglers = num_cpu >= vi->curr_queue_pairs ? 3561 num_cpu % vi->curr_queue_pairs : 3562 0; 3563 cpu = cpumask_first(cpu_online_mask); 3564 3565 for (i = 0; i < vi->curr_queue_pairs; i++) { 3566 group_size = stride + (i < stragglers ? 1 : 0); 3567 3568 for (j = 0; j < group_size; j++) { 3569 cpumask_set_cpu(cpu, mask); 3570 cpu = cpumask_next_wrap(cpu, cpu_online_mask, 3571 nr_cpu_ids, false); 3572 } 3573 virtqueue_set_affinity(vi->rq[i].vq, mask); 3574 virtqueue_set_affinity(vi->sq[i].vq, mask); 3575 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); 3576 cpumask_clear(mask); 3577 } 3578 3579 vi->affinity_hint_set = true; 3580 free_cpumask_var(mask); 3581 } 3582 3583 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) 3584 { 3585 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3586 node); 3587 virtnet_set_affinity(vi); 3588 return 0; 3589 } 3590 3591 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) 3592 { 3593 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3594 node_dead); 3595 virtnet_set_affinity(vi); 3596 return 0; 3597 } 3598 3599 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 3600 { 3601 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3602 node); 3603 3604 virtnet_clean_affinity(vi); 3605 return 0; 3606 } 3607 3608 static enum cpuhp_state virtionet_online; 3609 3610 static int virtnet_cpu_notif_add(struct virtnet_info *vi) 3611 { 3612 int ret; 3613 3614 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); 3615 if (ret) 3616 return ret; 3617 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, 3618 &vi->node_dead); 3619 if (!ret) 3620 return ret; 3621 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 3622 return ret; 3623 } 3624 3625 static void virtnet_cpu_notif_remove(struct virtnet_info *vi) 3626 { 3627 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 3628 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, 3629 &vi->node_dead); 3630 } 3631 3632 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3633 u16 vqn, u32 max_usecs, u32 max_packets) 3634 { 3635 struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL; 3636 struct scatterlist sgs; 3637 3638 coal_vq = kzalloc(sizeof(*coal_vq), GFP_KERNEL); 3639 if (!coal_vq) 3640 return -ENOMEM; 3641 3642 coal_vq->vqn = cpu_to_le16(vqn); 3643 coal_vq->coal.max_usecs = cpu_to_le32(max_usecs); 3644 coal_vq->coal.max_packets = cpu_to_le32(max_packets); 3645 sg_init_one(&sgs, coal_vq, sizeof(*coal_vq)); 3646 3647 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 3648 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET, 3649 &sgs)) 3650 return -EINVAL; 3651 3652 return 0; 3653 } 3654 3655 static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3656 u16 queue, u32 max_usecs, 3657 u32 max_packets) 3658 { 3659 int err; 3660 3661 err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), 3662 max_usecs, max_packets); 3663 if (err) 3664 return err; 3665 3666 vi->rq[queue].intr_coal.max_usecs = max_usecs; 3667 vi->rq[queue].intr_coal.max_packets = max_packets; 3668 3669 return 0; 3670 } 3671 3672 static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3673 u16 queue, u32 max_usecs, 3674 u32 max_packets) 3675 { 3676 int err; 3677 3678 err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), 3679 max_usecs, max_packets); 3680 if (err) 3681 return err; 3682 3683 vi->sq[queue].intr_coal.max_usecs = max_usecs; 3684 vi->sq[queue].intr_coal.max_packets = max_packets; 3685 3686 return 0; 3687 } 3688 3689 static void virtnet_get_ringparam(struct net_device *dev, 3690 struct ethtool_ringparam *ring, 3691 struct kernel_ethtool_ringparam *kernel_ring, 3692 struct netlink_ext_ack *extack) 3693 { 3694 struct virtnet_info *vi = netdev_priv(dev); 3695 3696 ring->rx_max_pending = vi->rq[0].vq->num_max; 3697 ring->tx_max_pending = vi->sq[0].vq->num_max; 3698 ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 3699 ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 3700 } 3701 3702 static int virtnet_set_ringparam(struct net_device *dev, 3703 struct ethtool_ringparam *ring, 3704 struct kernel_ethtool_ringparam *kernel_ring, 3705 struct netlink_ext_ack *extack) 3706 { 3707 struct virtnet_info *vi = netdev_priv(dev); 3708 u32 rx_pending, tx_pending; 3709 struct receive_queue *rq; 3710 struct send_queue *sq; 3711 int i, err; 3712 3713 if (ring->rx_mini_pending || ring->rx_jumbo_pending) 3714 return -EINVAL; 3715 3716 rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 3717 tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 3718 3719 if (ring->rx_pending == rx_pending && 3720 ring->tx_pending == tx_pending) 3721 return 0; 3722 3723 if (ring->rx_pending > vi->rq[0].vq->num_max) 3724 return -EINVAL; 3725 3726 if (ring->tx_pending > vi->sq[0].vq->num_max) 3727 return -EINVAL; 3728 3729 for (i = 0; i < vi->max_queue_pairs; i++) { 3730 rq = vi->rq + i; 3731 sq = vi->sq + i; 3732 3733 if (ring->tx_pending != tx_pending) { 3734 err = virtnet_tx_resize(vi, sq, ring->tx_pending); 3735 if (err) 3736 return err; 3737 3738 /* Upon disabling and re-enabling a transmit virtqueue, the device must 3739 * set the coalescing parameters of the virtqueue to those configured 3740 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver 3741 * did not set any TX coalescing parameters, to 0. 3742 */ 3743 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, 3744 vi->intr_coal_tx.max_usecs, 3745 vi->intr_coal_tx.max_packets); 3746 if (err) 3747 return err; 3748 } 3749 3750 if (ring->rx_pending != rx_pending) { 3751 err = virtnet_rx_resize(vi, rq, ring->rx_pending); 3752 if (err) 3753 return err; 3754 3755 /* The reason is same as the transmit virtqueue reset */ 3756 mutex_lock(&vi->rq[i].dim_lock); 3757 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i, 3758 vi->intr_coal_rx.max_usecs, 3759 vi->intr_coal_rx.max_packets); 3760 mutex_unlock(&vi->rq[i].dim_lock); 3761 if (err) 3762 return err; 3763 } 3764 } 3765 3766 return 0; 3767 } 3768 3769 static bool virtnet_commit_rss_command(struct virtnet_info *vi) 3770 { 3771 struct net_device *dev = vi->dev; 3772 struct scatterlist sgs[4]; 3773 unsigned int sg_buf_size; 3774 3775 /* prepare sgs */ 3776 sg_init_table(sgs, 4); 3777 3778 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, indirection_table); 3779 sg_set_buf(&sgs[0], &vi->rss, sg_buf_size); 3780 3781 sg_buf_size = sizeof(uint16_t) * (vi->rss.indirection_table_mask + 1); 3782 sg_set_buf(&sgs[1], vi->rss.indirection_table, sg_buf_size); 3783 3784 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key) 3785 - offsetof(struct virtio_net_ctrl_rss, max_tx_vq); 3786 sg_set_buf(&sgs[2], &vi->rss.max_tx_vq, sg_buf_size); 3787 3788 sg_buf_size = vi->rss_key_size; 3789 sg_set_buf(&sgs[3], vi->rss.key, sg_buf_size); 3790 3791 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 3792 vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG 3793 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) 3794 goto err; 3795 3796 return true; 3797 3798 err: 3799 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); 3800 return false; 3801 3802 } 3803 3804 static void virtnet_init_default_rss(struct virtnet_info *vi) 3805 { 3806 u32 indir_val = 0; 3807 int i = 0; 3808 3809 vi->rss.hash_types = vi->rss_hash_types_supported; 3810 vi->rss_hash_types_saved = vi->rss_hash_types_supported; 3811 vi->rss.indirection_table_mask = vi->rss_indir_table_size 3812 ? vi->rss_indir_table_size - 1 : 0; 3813 vi->rss.unclassified_queue = 0; 3814 3815 for (; i < vi->rss_indir_table_size; ++i) { 3816 indir_val = ethtool_rxfh_indir_default(i, vi->curr_queue_pairs); 3817 vi->rss.indirection_table[i] = indir_val; 3818 } 3819 3820 vi->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0; 3821 vi->rss.hash_key_length = vi->rss_key_size; 3822 3823 netdev_rss_key_fill(vi->rss.key, vi->rss_key_size); 3824 } 3825 3826 static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info) 3827 { 3828 info->data = 0; 3829 switch (info->flow_type) { 3830 case TCP_V4_FLOW: 3831 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 3832 info->data = RXH_IP_SRC | RXH_IP_DST | 3833 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3834 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 3835 info->data = RXH_IP_SRC | RXH_IP_DST; 3836 } 3837 break; 3838 case TCP_V6_FLOW: 3839 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 3840 info->data = RXH_IP_SRC | RXH_IP_DST | 3841 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3842 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 3843 info->data = RXH_IP_SRC | RXH_IP_DST; 3844 } 3845 break; 3846 case UDP_V4_FLOW: 3847 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 3848 info->data = RXH_IP_SRC | RXH_IP_DST | 3849 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3850 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 3851 info->data = RXH_IP_SRC | RXH_IP_DST; 3852 } 3853 break; 3854 case UDP_V6_FLOW: 3855 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 3856 info->data = RXH_IP_SRC | RXH_IP_DST | 3857 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3858 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 3859 info->data = RXH_IP_SRC | RXH_IP_DST; 3860 } 3861 break; 3862 case IPV4_FLOW: 3863 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) 3864 info->data = RXH_IP_SRC | RXH_IP_DST; 3865 3866 break; 3867 case IPV6_FLOW: 3868 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) 3869 info->data = RXH_IP_SRC | RXH_IP_DST; 3870 3871 break; 3872 default: 3873 info->data = 0; 3874 break; 3875 } 3876 } 3877 3878 static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info) 3879 { 3880 u32 new_hashtypes = vi->rss_hash_types_saved; 3881 bool is_disable = info->data & RXH_DISCARD; 3882 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); 3883 3884 /* supports only 'sd', 'sdfn' and 'r' */ 3885 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) 3886 return false; 3887 3888 switch (info->flow_type) { 3889 case TCP_V4_FLOW: 3890 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); 3891 if (!is_disable) 3892 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 3893 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); 3894 break; 3895 case UDP_V4_FLOW: 3896 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); 3897 if (!is_disable) 3898 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 3899 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); 3900 break; 3901 case IPV4_FLOW: 3902 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; 3903 if (!is_disable) 3904 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; 3905 break; 3906 case TCP_V6_FLOW: 3907 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); 3908 if (!is_disable) 3909 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 3910 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); 3911 break; 3912 case UDP_V6_FLOW: 3913 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); 3914 if (!is_disable) 3915 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 3916 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); 3917 break; 3918 case IPV6_FLOW: 3919 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; 3920 if (!is_disable) 3921 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; 3922 break; 3923 default: 3924 /* unsupported flow */ 3925 return false; 3926 } 3927 3928 /* if unsupported hashtype was set */ 3929 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) 3930 return false; 3931 3932 if (new_hashtypes != vi->rss_hash_types_saved) { 3933 vi->rss_hash_types_saved = new_hashtypes; 3934 vi->rss.hash_types = vi->rss_hash_types_saved; 3935 if (vi->dev->features & NETIF_F_RXHASH) 3936 return virtnet_commit_rss_command(vi); 3937 } 3938 3939 return true; 3940 } 3941 3942 static void virtnet_get_drvinfo(struct net_device *dev, 3943 struct ethtool_drvinfo *info) 3944 { 3945 struct virtnet_info *vi = netdev_priv(dev); 3946 struct virtio_device *vdev = vi->vdev; 3947 3948 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 3949 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); 3950 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); 3951 3952 } 3953 3954 /* TODO: Eliminate OOO packets during switching */ 3955 static int virtnet_set_channels(struct net_device *dev, 3956 struct ethtool_channels *channels) 3957 { 3958 struct virtnet_info *vi = netdev_priv(dev); 3959 u16 queue_pairs = channels->combined_count; 3960 int err; 3961 3962 /* We don't support separate rx/tx channels. 3963 * We don't allow setting 'other' channels. 3964 */ 3965 if (channels->rx_count || channels->tx_count || channels->other_count) 3966 return -EINVAL; 3967 3968 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) 3969 return -EINVAL; 3970 3971 /* For now we don't support modifying channels while XDP is loaded 3972 * also when XDP is loaded all RX queues have XDP programs so we only 3973 * need to check a single RX queue. 3974 */ 3975 if (vi->rq[0].xdp_prog) 3976 return -EINVAL; 3977 3978 cpus_read_lock(); 3979 err = virtnet_set_queues(vi, queue_pairs); 3980 if (err) { 3981 cpus_read_unlock(); 3982 goto err; 3983 } 3984 virtnet_set_affinity(vi); 3985 cpus_read_unlock(); 3986 3987 netif_set_real_num_tx_queues(dev, queue_pairs); 3988 netif_set_real_num_rx_queues(dev, queue_pairs); 3989 err: 3990 return err; 3991 } 3992 3993 static void virtnet_stats_sprintf(u8 **p, const char *fmt, const char *noq_fmt, 3994 int num, int qid, const struct virtnet_stat_desc *desc) 3995 { 3996 int i; 3997 3998 if (qid < 0) { 3999 for (i = 0; i < num; ++i) 4000 ethtool_sprintf(p, noq_fmt, desc[i].desc); 4001 } else { 4002 for (i = 0; i < num; ++i) 4003 ethtool_sprintf(p, fmt, qid, desc[i].desc); 4004 } 4005 } 4006 4007 /* qid == -1: for rx/tx queue total field */ 4008 static void virtnet_get_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data) 4009 { 4010 const struct virtnet_stat_desc *desc; 4011 const char *fmt, *noq_fmt; 4012 u8 *p = *data; 4013 u32 num; 4014 4015 if (type == VIRTNET_Q_TYPE_CQ && qid >= 0) { 4016 noq_fmt = "cq_hw_%s"; 4017 4018 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4019 desc = &virtnet_stats_cvq_desc[0]; 4020 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4021 4022 virtnet_stats_sprintf(&p, NULL, noq_fmt, num, -1, desc); 4023 } 4024 } 4025 4026 if (type == VIRTNET_Q_TYPE_RX) { 4027 fmt = "rx%u_%s"; 4028 noq_fmt = "rx_%s"; 4029 4030 desc = &virtnet_rq_stats_desc[0]; 4031 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4032 4033 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4034 4035 fmt = "rx%u_hw_%s"; 4036 noq_fmt = "rx_hw_%s"; 4037 4038 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4039 desc = &virtnet_stats_rx_basic_desc[0]; 4040 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4041 4042 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4043 } 4044 4045 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4046 desc = &virtnet_stats_rx_csum_desc[0]; 4047 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4048 4049 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4050 } 4051 4052 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4053 desc = &virtnet_stats_rx_speed_desc[0]; 4054 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4055 4056 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4057 } 4058 } 4059 4060 if (type == VIRTNET_Q_TYPE_TX) { 4061 fmt = "tx%u_%s"; 4062 noq_fmt = "tx_%s"; 4063 4064 desc = &virtnet_sq_stats_desc[0]; 4065 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4066 4067 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4068 4069 fmt = "tx%u_hw_%s"; 4070 noq_fmt = "tx_hw_%s"; 4071 4072 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4073 desc = &virtnet_stats_tx_basic_desc[0]; 4074 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4075 4076 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4077 } 4078 4079 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4080 desc = &virtnet_stats_tx_gso_desc[0]; 4081 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4082 4083 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4084 } 4085 4086 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4087 desc = &virtnet_stats_tx_speed_desc[0]; 4088 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4089 4090 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4091 } 4092 } 4093 4094 *data = p; 4095 } 4096 4097 struct virtnet_stats_ctx { 4098 /* The stats are write to qstats or ethtool -S */ 4099 bool to_qstat; 4100 4101 /* Used to calculate the offset inside the output buffer. */ 4102 u32 desc_num[3]; 4103 4104 /* The actual supported stat types. */ 4105 u32 bitmap[3]; 4106 4107 /* Used to calculate the reply buffer size. */ 4108 u32 size[3]; 4109 4110 /* Record the output buffer. */ 4111 u64 *data; 4112 }; 4113 4114 static void virtnet_stats_ctx_init(struct virtnet_info *vi, 4115 struct virtnet_stats_ctx *ctx, 4116 u64 *data, bool to_qstat) 4117 { 4118 u32 queue_type; 4119 4120 ctx->data = data; 4121 ctx->to_qstat = to_qstat; 4122 4123 if (to_qstat) { 4124 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4125 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4126 4127 queue_type = VIRTNET_Q_TYPE_RX; 4128 4129 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4130 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4131 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4132 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4133 } 4134 4135 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4136 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4137 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4138 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4139 } 4140 4141 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4142 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_GSO; 4143 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4144 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso); 4145 } 4146 4147 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4148 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4149 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4150 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4151 } 4152 4153 queue_type = VIRTNET_Q_TYPE_TX; 4154 4155 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4156 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4157 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4158 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4159 } 4160 4161 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4162 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_CSUM; 4163 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4164 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum); 4165 } 4166 4167 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4168 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4169 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4170 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4171 } 4172 4173 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4174 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4175 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4176 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4177 } 4178 4179 return; 4180 } 4181 4182 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc); 4183 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc); 4184 4185 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4186 queue_type = VIRTNET_Q_TYPE_CQ; 4187 4188 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_CVQ; 4189 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc); 4190 ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq); 4191 } 4192 4193 queue_type = VIRTNET_Q_TYPE_RX; 4194 4195 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4196 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4197 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4198 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4199 } 4200 4201 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4202 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4203 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4204 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4205 } 4206 4207 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4208 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4209 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4210 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4211 } 4212 4213 queue_type = VIRTNET_Q_TYPE_TX; 4214 4215 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4216 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4217 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4218 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4219 } 4220 4221 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4222 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4223 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4224 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4225 } 4226 4227 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4228 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4229 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4230 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4231 } 4232 } 4233 4234 /* stats_sum_queue - Calculate the sum of the same fields in sq or rq. 4235 * @sum: the position to store the sum values 4236 * @num: field num 4237 * @q_value: the first queue fields 4238 * @q_num: number of the queues 4239 */ 4240 static void stats_sum_queue(u64 *sum, u32 num, u64 *q_value, u32 q_num) 4241 { 4242 u32 step = num; 4243 int i, j; 4244 u64 *p; 4245 4246 for (i = 0; i < num; ++i) { 4247 p = sum + i; 4248 *p = 0; 4249 4250 for (j = 0; j < q_num; ++j) 4251 *p += *(q_value + i + j * step); 4252 } 4253 } 4254 4255 static void virtnet_fill_total_fields(struct virtnet_info *vi, 4256 struct virtnet_stats_ctx *ctx) 4257 { 4258 u64 *data, *first_rx_q, *first_tx_q; 4259 u32 num_cq, num_rx, num_tx; 4260 4261 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4262 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4263 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4264 4265 first_rx_q = ctx->data + num_rx + num_tx + num_cq; 4266 first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx; 4267 4268 data = ctx->data; 4269 4270 stats_sum_queue(data, num_rx, first_rx_q, vi->curr_queue_pairs); 4271 4272 data = ctx->data + num_rx; 4273 4274 stats_sum_queue(data, num_tx, first_tx_q, vi->curr_queue_pairs); 4275 } 4276 4277 static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid, 4278 struct virtnet_stats_ctx *ctx, 4279 const u8 *base, bool drv_stats, u8 reply_type) 4280 { 4281 const struct virtnet_stat_desc *desc; 4282 const u64_stats_t *v_stat; 4283 u64 offset, bitmap; 4284 const __le64 *v; 4285 u32 queue_type; 4286 int i, num; 4287 4288 queue_type = vq_type(vi, qid); 4289 bitmap = ctx->bitmap[queue_type]; 4290 4291 if (drv_stats) { 4292 if (queue_type == VIRTNET_Q_TYPE_RX) { 4293 desc = &virtnet_rq_stats_desc_qstat[0]; 4294 num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4295 } else { 4296 desc = &virtnet_sq_stats_desc_qstat[0]; 4297 num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4298 } 4299 4300 for (i = 0; i < num; ++i) { 4301 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4302 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4303 ctx->data[offset] = u64_stats_read(v_stat); 4304 } 4305 return; 4306 } 4307 4308 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4309 desc = &virtnet_stats_rx_basic_desc_qstat[0]; 4310 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4311 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4312 goto found; 4313 } 4314 4315 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4316 desc = &virtnet_stats_rx_csum_desc_qstat[0]; 4317 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4318 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4319 goto found; 4320 } 4321 4322 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4323 desc = &virtnet_stats_rx_gso_desc_qstat[0]; 4324 num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4325 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO) 4326 goto found; 4327 } 4328 4329 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4330 desc = &virtnet_stats_rx_speed_desc_qstat[0]; 4331 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4332 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4333 goto found; 4334 } 4335 4336 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4337 desc = &virtnet_stats_tx_basic_desc_qstat[0]; 4338 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4339 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4340 goto found; 4341 } 4342 4343 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4344 desc = &virtnet_stats_tx_csum_desc_qstat[0]; 4345 num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4346 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM) 4347 goto found; 4348 } 4349 4350 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4351 desc = &virtnet_stats_tx_gso_desc_qstat[0]; 4352 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4353 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4354 goto found; 4355 } 4356 4357 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4358 desc = &virtnet_stats_tx_speed_desc_qstat[0]; 4359 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4360 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4361 goto found; 4362 } 4363 4364 return; 4365 4366 found: 4367 for (i = 0; i < num; ++i) { 4368 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4369 v = (const __le64 *)(base + desc[i].offset); 4370 ctx->data[offset] = le64_to_cpu(*v); 4371 } 4372 } 4373 4374 /* virtnet_fill_stats - copy the stats to qstats or ethtool -S 4375 * The stats source is the device or the driver. 4376 * 4377 * @vi: virtio net info 4378 * @qid: the vq id 4379 * @ctx: stats ctx (initiated by virtnet_stats_ctx_init()) 4380 * @base: pointer to the device reply or the driver stats structure. 4381 * @drv_stats: designate the base type (device reply, driver stats) 4382 * @type: the type of the device reply (if drv_stats is true, this must be zero) 4383 */ 4384 static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid, 4385 struct virtnet_stats_ctx *ctx, 4386 const u8 *base, bool drv_stats, u8 reply_type) 4387 { 4388 u32 queue_type, num_rx, num_tx, num_cq; 4389 const struct virtnet_stat_desc *desc; 4390 const u64_stats_t *v_stat; 4391 u64 offset, bitmap; 4392 const __le64 *v; 4393 int i, num; 4394 4395 if (ctx->to_qstat) 4396 return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type); 4397 4398 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4399 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4400 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4401 4402 queue_type = vq_type(vi, qid); 4403 bitmap = ctx->bitmap[queue_type]; 4404 4405 /* skip the total fields of pairs */ 4406 offset = num_rx + num_tx; 4407 4408 if (queue_type == VIRTNET_Q_TYPE_TX) { 4409 offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2); 4410 4411 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4412 if (drv_stats) { 4413 desc = &virtnet_sq_stats_desc[0]; 4414 goto drv_stats; 4415 } 4416 4417 offset += num; 4418 4419 } else if (queue_type == VIRTNET_Q_TYPE_RX) { 4420 offset += num_cq + num_rx * (qid / 2); 4421 4422 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4423 if (drv_stats) { 4424 desc = &virtnet_rq_stats_desc[0]; 4425 goto drv_stats; 4426 } 4427 4428 offset += num; 4429 } 4430 4431 if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) { 4432 desc = &virtnet_stats_cvq_desc[0]; 4433 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4434 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ) 4435 goto found; 4436 4437 offset += num; 4438 } 4439 4440 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4441 desc = &virtnet_stats_rx_basic_desc[0]; 4442 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4443 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4444 goto found; 4445 4446 offset += num; 4447 } 4448 4449 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4450 desc = &virtnet_stats_rx_csum_desc[0]; 4451 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4452 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4453 goto found; 4454 4455 offset += num; 4456 } 4457 4458 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4459 desc = &virtnet_stats_rx_speed_desc[0]; 4460 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4461 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4462 goto found; 4463 4464 offset += num; 4465 } 4466 4467 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4468 desc = &virtnet_stats_tx_basic_desc[0]; 4469 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4470 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4471 goto found; 4472 4473 offset += num; 4474 } 4475 4476 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4477 desc = &virtnet_stats_tx_gso_desc[0]; 4478 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4479 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4480 goto found; 4481 4482 offset += num; 4483 } 4484 4485 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4486 desc = &virtnet_stats_tx_speed_desc[0]; 4487 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4488 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4489 goto found; 4490 4491 offset += num; 4492 } 4493 4494 return; 4495 4496 found: 4497 for (i = 0; i < num; ++i) { 4498 v = (const __le64 *)(base + desc[i].offset); 4499 ctx->data[offset + i] = le64_to_cpu(*v); 4500 } 4501 4502 return; 4503 4504 drv_stats: 4505 for (i = 0; i < num; ++i) { 4506 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4507 ctx->data[offset + i] = u64_stats_read(v_stat); 4508 } 4509 } 4510 4511 static int __virtnet_get_hw_stats(struct virtnet_info *vi, 4512 struct virtnet_stats_ctx *ctx, 4513 struct virtio_net_ctrl_queue_stats *req, 4514 int req_size, void *reply, int res_size) 4515 { 4516 struct virtio_net_stats_reply_hdr *hdr; 4517 struct scatterlist sgs_in, sgs_out; 4518 void *p; 4519 u32 qid; 4520 int ok; 4521 4522 sg_init_one(&sgs_out, req, req_size); 4523 sg_init_one(&sgs_in, reply, res_size); 4524 4525 ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 4526 VIRTIO_NET_CTRL_STATS_GET, 4527 &sgs_out, &sgs_in); 4528 4529 if (!ok) 4530 return ok; 4531 4532 for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) { 4533 hdr = p; 4534 qid = le16_to_cpu(hdr->vq_index); 4535 virtnet_fill_stats(vi, qid, ctx, p, false, hdr->type); 4536 } 4537 4538 return 0; 4539 } 4540 4541 static void virtnet_make_stat_req(struct virtnet_info *vi, 4542 struct virtnet_stats_ctx *ctx, 4543 struct virtio_net_ctrl_queue_stats *req, 4544 int qid, int *idx) 4545 { 4546 int qtype = vq_type(vi, qid); 4547 u64 bitmap = ctx->bitmap[qtype]; 4548 4549 if (!bitmap) 4550 return; 4551 4552 req->stats[*idx].vq_index = cpu_to_le16(qid); 4553 req->stats[*idx].types_bitmap[0] = cpu_to_le64(bitmap); 4554 *idx += 1; 4555 } 4556 4557 /* qid: -1: get stats of all vq. 4558 * > 0: get the stats for the special vq. This must not be cvq. 4559 */ 4560 static int virtnet_get_hw_stats(struct virtnet_info *vi, 4561 struct virtnet_stats_ctx *ctx, int qid) 4562 { 4563 int qnum, i, j, res_size, qtype, last_vq, first_vq; 4564 struct virtio_net_ctrl_queue_stats *req; 4565 bool enable_cvq; 4566 void *reply; 4567 int ok; 4568 4569 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) 4570 return 0; 4571 4572 if (qid == -1) { 4573 last_vq = vi->curr_queue_pairs * 2 - 1; 4574 first_vq = 0; 4575 enable_cvq = true; 4576 } else { 4577 last_vq = qid; 4578 first_vq = qid; 4579 enable_cvq = false; 4580 } 4581 4582 qnum = 0; 4583 res_size = 0; 4584 for (i = first_vq; i <= last_vq ; ++i) { 4585 qtype = vq_type(vi, i); 4586 if (ctx->bitmap[qtype]) { 4587 ++qnum; 4588 res_size += ctx->size[qtype]; 4589 } 4590 } 4591 4592 if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) { 4593 res_size += ctx->size[VIRTNET_Q_TYPE_CQ]; 4594 qnum += 1; 4595 } 4596 4597 req = kcalloc(qnum, sizeof(*req), GFP_KERNEL); 4598 if (!req) 4599 return -ENOMEM; 4600 4601 reply = kmalloc(res_size, GFP_KERNEL); 4602 if (!reply) { 4603 kfree(req); 4604 return -ENOMEM; 4605 } 4606 4607 j = 0; 4608 for (i = first_vq; i <= last_vq ; ++i) 4609 virtnet_make_stat_req(vi, ctx, req, i, &j); 4610 4611 if (enable_cvq) 4612 virtnet_make_stat_req(vi, ctx, req, vi->max_queue_pairs * 2, &j); 4613 4614 ok = __virtnet_get_hw_stats(vi, ctx, req, sizeof(*req) * j, reply, res_size); 4615 4616 kfree(req); 4617 kfree(reply); 4618 4619 return ok; 4620 } 4621 4622 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) 4623 { 4624 struct virtnet_info *vi = netdev_priv(dev); 4625 unsigned int i; 4626 u8 *p = data; 4627 4628 switch (stringset) { 4629 case ETH_SS_STATS: 4630 /* Generate the total field names. */ 4631 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, -1, &p); 4632 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, -1, &p); 4633 4634 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, 0, &p); 4635 4636 for (i = 0; i < vi->curr_queue_pairs; ++i) 4637 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, i, &p); 4638 4639 for (i = 0; i < vi->curr_queue_pairs; ++i) 4640 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, i, &p); 4641 break; 4642 } 4643 } 4644 4645 static int virtnet_get_sset_count(struct net_device *dev, int sset) 4646 { 4647 struct virtnet_info *vi = netdev_priv(dev); 4648 struct virtnet_stats_ctx ctx = {0}; 4649 u32 pair_count; 4650 4651 switch (sset) { 4652 case ETH_SS_STATS: 4653 virtnet_stats_ctx_init(vi, &ctx, NULL, false); 4654 4655 pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX]; 4656 4657 return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] + 4658 vi->curr_queue_pairs * pair_count; 4659 default: 4660 return -EOPNOTSUPP; 4661 } 4662 } 4663 4664 static void virtnet_get_ethtool_stats(struct net_device *dev, 4665 struct ethtool_stats *stats, u64 *data) 4666 { 4667 struct virtnet_info *vi = netdev_priv(dev); 4668 struct virtnet_stats_ctx ctx = {0}; 4669 unsigned int start, i; 4670 const u8 *stats_base; 4671 4672 virtnet_stats_ctx_init(vi, &ctx, data, false); 4673 if (virtnet_get_hw_stats(vi, &ctx, -1)) 4674 dev_warn(&vi->dev->dev, "Failed to get hw stats.\n"); 4675 4676 for (i = 0; i < vi->curr_queue_pairs; i++) { 4677 struct receive_queue *rq = &vi->rq[i]; 4678 struct send_queue *sq = &vi->sq[i]; 4679 4680 stats_base = (const u8 *)&rq->stats; 4681 do { 4682 start = u64_stats_fetch_begin(&rq->stats.syncp); 4683 virtnet_fill_stats(vi, i * 2, &ctx, stats_base, true, 0); 4684 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 4685 4686 stats_base = (const u8 *)&sq->stats; 4687 do { 4688 start = u64_stats_fetch_begin(&sq->stats.syncp); 4689 virtnet_fill_stats(vi, i * 2 + 1, &ctx, stats_base, true, 0); 4690 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 4691 } 4692 4693 virtnet_fill_total_fields(vi, &ctx); 4694 } 4695 4696 static void virtnet_get_channels(struct net_device *dev, 4697 struct ethtool_channels *channels) 4698 { 4699 struct virtnet_info *vi = netdev_priv(dev); 4700 4701 channels->combined_count = vi->curr_queue_pairs; 4702 channels->max_combined = vi->max_queue_pairs; 4703 channels->max_other = 0; 4704 channels->rx_count = 0; 4705 channels->tx_count = 0; 4706 channels->other_count = 0; 4707 } 4708 4709 static int virtnet_set_link_ksettings(struct net_device *dev, 4710 const struct ethtool_link_ksettings *cmd) 4711 { 4712 struct virtnet_info *vi = netdev_priv(dev); 4713 4714 return ethtool_virtdev_set_link_ksettings(dev, cmd, 4715 &vi->speed, &vi->duplex); 4716 } 4717 4718 static int virtnet_get_link_ksettings(struct net_device *dev, 4719 struct ethtool_link_ksettings *cmd) 4720 { 4721 struct virtnet_info *vi = netdev_priv(dev); 4722 4723 cmd->base.speed = vi->speed; 4724 cmd->base.duplex = vi->duplex; 4725 cmd->base.port = PORT_OTHER; 4726 4727 return 0; 4728 } 4729 4730 static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi, 4731 struct ethtool_coalesce *ec) 4732 { 4733 struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL; 4734 struct scatterlist sgs_tx; 4735 int i; 4736 4737 coal_tx = kzalloc(sizeof(*coal_tx), GFP_KERNEL); 4738 if (!coal_tx) 4739 return -ENOMEM; 4740 4741 coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); 4742 coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); 4743 sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx)); 4744 4745 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 4746 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, 4747 &sgs_tx)) 4748 return -EINVAL; 4749 4750 vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; 4751 vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; 4752 for (i = 0; i < vi->max_queue_pairs; i++) { 4753 vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; 4754 vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; 4755 } 4756 4757 return 0; 4758 } 4759 4760 static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi, 4761 struct ethtool_coalesce *ec) 4762 { 4763 struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL; 4764 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 4765 struct scatterlist sgs_rx; 4766 int i; 4767 4768 if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4769 return -EOPNOTSUPP; 4770 4771 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs || 4772 ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets)) 4773 return -EINVAL; 4774 4775 if (rx_ctrl_dim_on && !vi->rx_dim_enabled) { 4776 vi->rx_dim_enabled = true; 4777 for (i = 0; i < vi->max_queue_pairs; i++) { 4778 mutex_lock(&vi->rq[i].dim_lock); 4779 vi->rq[i].dim_enabled = true; 4780 mutex_unlock(&vi->rq[i].dim_lock); 4781 } 4782 return 0; 4783 } 4784 4785 coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL); 4786 if (!coal_rx) 4787 return -ENOMEM; 4788 4789 if (!rx_ctrl_dim_on && vi->rx_dim_enabled) { 4790 vi->rx_dim_enabled = false; 4791 for (i = 0; i < vi->max_queue_pairs; i++) { 4792 mutex_lock(&vi->rq[i].dim_lock); 4793 vi->rq[i].dim_enabled = false; 4794 mutex_unlock(&vi->rq[i].dim_lock); 4795 } 4796 } 4797 4798 /* Since the per-queue coalescing params can be set, 4799 * we need apply the global new params even if they 4800 * are not updated. 4801 */ 4802 coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); 4803 coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); 4804 sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx)); 4805 4806 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 4807 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, 4808 &sgs_rx)) 4809 return -EINVAL; 4810 4811 vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; 4812 vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; 4813 for (i = 0; i < vi->max_queue_pairs; i++) { 4814 mutex_lock(&vi->rq[i].dim_lock); 4815 vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; 4816 vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; 4817 mutex_unlock(&vi->rq[i].dim_lock); 4818 } 4819 4820 return 0; 4821 } 4822 4823 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, 4824 struct ethtool_coalesce *ec) 4825 { 4826 int err; 4827 4828 err = virtnet_send_tx_notf_coal_cmds(vi, ec); 4829 if (err) 4830 return err; 4831 4832 err = virtnet_send_rx_notf_coal_cmds(vi, ec); 4833 if (err) 4834 return err; 4835 4836 return 0; 4837 } 4838 4839 static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi, 4840 struct ethtool_coalesce *ec, 4841 u16 queue) 4842 { 4843 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 4844 u32 max_usecs, max_packets; 4845 bool cur_rx_dim; 4846 int err; 4847 4848 mutex_lock(&vi->rq[queue].dim_lock); 4849 cur_rx_dim = vi->rq[queue].dim_enabled; 4850 max_usecs = vi->rq[queue].intr_coal.max_usecs; 4851 max_packets = vi->rq[queue].intr_coal.max_packets; 4852 4853 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs || 4854 ec->rx_max_coalesced_frames != max_packets)) { 4855 mutex_unlock(&vi->rq[queue].dim_lock); 4856 return -EINVAL; 4857 } 4858 4859 if (rx_ctrl_dim_on && !cur_rx_dim) { 4860 vi->rq[queue].dim_enabled = true; 4861 mutex_unlock(&vi->rq[queue].dim_lock); 4862 return 0; 4863 } 4864 4865 if (!rx_ctrl_dim_on && cur_rx_dim) 4866 vi->rq[queue].dim_enabled = false; 4867 4868 /* If no params are updated, userspace ethtool will 4869 * reject the modification. 4870 */ 4871 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue, 4872 ec->rx_coalesce_usecs, 4873 ec->rx_max_coalesced_frames); 4874 mutex_unlock(&vi->rq[queue].dim_lock); 4875 return err; 4876 } 4877 4878 static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, 4879 struct ethtool_coalesce *ec, 4880 u16 queue) 4881 { 4882 int err; 4883 4884 err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue); 4885 if (err) 4886 return err; 4887 4888 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue, 4889 ec->tx_coalesce_usecs, 4890 ec->tx_max_coalesced_frames); 4891 if (err) 4892 return err; 4893 4894 return 0; 4895 } 4896 4897 static void virtnet_rx_dim_work(struct work_struct *work) 4898 { 4899 struct dim *dim = container_of(work, struct dim, work); 4900 struct receive_queue *rq = container_of(dim, 4901 struct receive_queue, dim); 4902 struct virtnet_info *vi = rq->vq->vdev->priv; 4903 struct net_device *dev = vi->dev; 4904 struct dim_cq_moder update_moder; 4905 int qnum, err; 4906 4907 qnum = rq - vi->rq; 4908 4909 mutex_lock(&rq->dim_lock); 4910 if (!rq->dim_enabled) 4911 goto out; 4912 4913 update_moder = net_dim_get_rx_irq_moder(dev, dim); 4914 if (update_moder.usec != rq->intr_coal.max_usecs || 4915 update_moder.pkts != rq->intr_coal.max_packets) { 4916 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum, 4917 update_moder.usec, 4918 update_moder.pkts); 4919 if (err) 4920 pr_debug("%s: Failed to send dim parameters on rxq%d\n", 4921 dev->name, qnum); 4922 } 4923 out: 4924 dim->state = DIM_START_MEASURE; 4925 mutex_unlock(&rq->dim_lock); 4926 } 4927 4928 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) 4929 { 4930 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL 4931 * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated. 4932 */ 4933 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) 4934 return -EOPNOTSUPP; 4935 4936 if (ec->tx_max_coalesced_frames > 1 || 4937 ec->rx_max_coalesced_frames != 1) 4938 return -EINVAL; 4939 4940 return 0; 4941 } 4942 4943 static int virtnet_should_update_vq_weight(int dev_flags, int weight, 4944 int vq_weight, bool *should_update) 4945 { 4946 if (weight ^ vq_weight) { 4947 if (dev_flags & IFF_UP) 4948 return -EBUSY; 4949 *should_update = true; 4950 } 4951 4952 return 0; 4953 } 4954 4955 static int virtnet_set_coalesce(struct net_device *dev, 4956 struct ethtool_coalesce *ec, 4957 struct kernel_ethtool_coalesce *kernel_coal, 4958 struct netlink_ext_ack *extack) 4959 { 4960 struct virtnet_info *vi = netdev_priv(dev); 4961 int ret, queue_number, napi_weight; 4962 bool update_napi = false; 4963 4964 /* Can't change NAPI weight if the link is up */ 4965 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 4966 for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) { 4967 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 4968 vi->sq[queue_number].napi.weight, 4969 &update_napi); 4970 if (ret) 4971 return ret; 4972 4973 if (update_napi) { 4974 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be 4975 * updated for the sake of simplicity, which might not be necessary 4976 */ 4977 break; 4978 } 4979 } 4980 4981 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) 4982 ret = virtnet_send_notf_coal_cmds(vi, ec); 4983 else 4984 ret = virtnet_coal_params_supported(ec); 4985 4986 if (ret) 4987 return ret; 4988 4989 if (update_napi) { 4990 for (; queue_number < vi->max_queue_pairs; queue_number++) 4991 vi->sq[queue_number].napi.weight = napi_weight; 4992 } 4993 4994 return ret; 4995 } 4996 4997 static int virtnet_get_coalesce(struct net_device *dev, 4998 struct ethtool_coalesce *ec, 4999 struct kernel_ethtool_coalesce *kernel_coal, 5000 struct netlink_ext_ack *extack) 5001 { 5002 struct virtnet_info *vi = netdev_priv(dev); 5003 5004 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 5005 ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs; 5006 ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs; 5007 ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets; 5008 ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets; 5009 ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled; 5010 } else { 5011 ec->rx_max_coalesced_frames = 1; 5012 5013 if (vi->sq[0].napi.weight) 5014 ec->tx_max_coalesced_frames = 1; 5015 } 5016 5017 return 0; 5018 } 5019 5020 static int virtnet_set_per_queue_coalesce(struct net_device *dev, 5021 u32 queue, 5022 struct ethtool_coalesce *ec) 5023 { 5024 struct virtnet_info *vi = netdev_priv(dev); 5025 int ret, napi_weight; 5026 bool update_napi = false; 5027 5028 if (queue >= vi->max_queue_pairs) 5029 return -EINVAL; 5030 5031 /* Can't change NAPI weight if the link is up */ 5032 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5033 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5034 vi->sq[queue].napi.weight, 5035 &update_napi); 5036 if (ret) 5037 return ret; 5038 5039 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5040 ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue); 5041 else 5042 ret = virtnet_coal_params_supported(ec); 5043 5044 if (ret) 5045 return ret; 5046 5047 if (update_napi) 5048 vi->sq[queue].napi.weight = napi_weight; 5049 5050 return 0; 5051 } 5052 5053 static int virtnet_get_per_queue_coalesce(struct net_device *dev, 5054 u32 queue, 5055 struct ethtool_coalesce *ec) 5056 { 5057 struct virtnet_info *vi = netdev_priv(dev); 5058 5059 if (queue >= vi->max_queue_pairs) 5060 return -EINVAL; 5061 5062 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 5063 mutex_lock(&vi->rq[queue].dim_lock); 5064 ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs; 5065 ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs; 5066 ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets; 5067 ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets; 5068 ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled; 5069 mutex_unlock(&vi->rq[queue].dim_lock); 5070 } else { 5071 ec->rx_max_coalesced_frames = 1; 5072 5073 if (vi->sq[queue].napi.weight) 5074 ec->tx_max_coalesced_frames = 1; 5075 } 5076 5077 return 0; 5078 } 5079 5080 static void virtnet_init_settings(struct net_device *dev) 5081 { 5082 struct virtnet_info *vi = netdev_priv(dev); 5083 5084 vi->speed = SPEED_UNKNOWN; 5085 vi->duplex = DUPLEX_UNKNOWN; 5086 } 5087 5088 static void virtnet_update_settings(struct virtnet_info *vi) 5089 { 5090 u32 speed; 5091 u8 duplex; 5092 5093 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) 5094 return; 5095 5096 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); 5097 5098 if (ethtool_validate_speed(speed)) 5099 vi->speed = speed; 5100 5101 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); 5102 5103 if (ethtool_validate_duplex(duplex)) 5104 vi->duplex = duplex; 5105 } 5106 5107 static u32 virtnet_get_rxfh_key_size(struct net_device *dev) 5108 { 5109 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; 5110 } 5111 5112 static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) 5113 { 5114 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; 5115 } 5116 5117 static int virtnet_get_rxfh(struct net_device *dev, 5118 struct ethtool_rxfh_param *rxfh) 5119 { 5120 struct virtnet_info *vi = netdev_priv(dev); 5121 int i; 5122 5123 if (rxfh->indir) { 5124 for (i = 0; i < vi->rss_indir_table_size; ++i) 5125 rxfh->indir[i] = vi->rss.indirection_table[i]; 5126 } 5127 5128 if (rxfh->key) 5129 memcpy(rxfh->key, vi->rss.key, vi->rss_key_size); 5130 5131 rxfh->hfunc = ETH_RSS_HASH_TOP; 5132 5133 return 0; 5134 } 5135 5136 static int virtnet_set_rxfh(struct net_device *dev, 5137 struct ethtool_rxfh_param *rxfh, 5138 struct netlink_ext_ack *extack) 5139 { 5140 struct virtnet_info *vi = netdev_priv(dev); 5141 bool update = false; 5142 int i; 5143 5144 if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && 5145 rxfh->hfunc != ETH_RSS_HASH_TOP) 5146 return -EOPNOTSUPP; 5147 5148 if (rxfh->indir) { 5149 if (!vi->has_rss) 5150 return -EOPNOTSUPP; 5151 5152 for (i = 0; i < vi->rss_indir_table_size; ++i) 5153 vi->rss.indirection_table[i] = rxfh->indir[i]; 5154 update = true; 5155 } 5156 5157 if (rxfh->key) { 5158 /* If either _F_HASH_REPORT or _F_RSS are negotiated, the 5159 * device provides hash calculation capabilities, that is, 5160 * hash_key is configured. 5161 */ 5162 if (!vi->has_rss && !vi->has_rss_hash_report) 5163 return -EOPNOTSUPP; 5164 5165 memcpy(vi->rss.key, rxfh->key, vi->rss_key_size); 5166 update = true; 5167 } 5168 5169 if (update) 5170 virtnet_commit_rss_command(vi); 5171 5172 return 0; 5173 } 5174 5175 static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) 5176 { 5177 struct virtnet_info *vi = netdev_priv(dev); 5178 int rc = 0; 5179 5180 switch (info->cmd) { 5181 case ETHTOOL_GRXRINGS: 5182 info->data = vi->curr_queue_pairs; 5183 break; 5184 case ETHTOOL_GRXFH: 5185 virtnet_get_hashflow(vi, info); 5186 break; 5187 default: 5188 rc = -EOPNOTSUPP; 5189 } 5190 5191 return rc; 5192 } 5193 5194 static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) 5195 { 5196 struct virtnet_info *vi = netdev_priv(dev); 5197 int rc = 0; 5198 5199 switch (info->cmd) { 5200 case ETHTOOL_SRXFH: 5201 if (!virtnet_set_hashflow(vi, info)) 5202 rc = -EINVAL; 5203 5204 break; 5205 default: 5206 rc = -EOPNOTSUPP; 5207 } 5208 5209 return rc; 5210 } 5211 5212 static const struct ethtool_ops virtnet_ethtool_ops = { 5213 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | 5214 ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, 5215 .get_drvinfo = virtnet_get_drvinfo, 5216 .get_link = ethtool_op_get_link, 5217 .get_ringparam = virtnet_get_ringparam, 5218 .set_ringparam = virtnet_set_ringparam, 5219 .get_strings = virtnet_get_strings, 5220 .get_sset_count = virtnet_get_sset_count, 5221 .get_ethtool_stats = virtnet_get_ethtool_stats, 5222 .set_channels = virtnet_set_channels, 5223 .get_channels = virtnet_get_channels, 5224 .get_ts_info = ethtool_op_get_ts_info, 5225 .get_link_ksettings = virtnet_get_link_ksettings, 5226 .set_link_ksettings = virtnet_set_link_ksettings, 5227 .set_coalesce = virtnet_set_coalesce, 5228 .get_coalesce = virtnet_get_coalesce, 5229 .set_per_queue_coalesce = virtnet_set_per_queue_coalesce, 5230 .get_per_queue_coalesce = virtnet_get_per_queue_coalesce, 5231 .get_rxfh_key_size = virtnet_get_rxfh_key_size, 5232 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, 5233 .get_rxfh = virtnet_get_rxfh, 5234 .set_rxfh = virtnet_set_rxfh, 5235 .get_rxnfc = virtnet_get_rxnfc, 5236 .set_rxnfc = virtnet_set_rxnfc, 5237 }; 5238 5239 static void virtnet_get_queue_stats_rx(struct net_device *dev, int i, 5240 struct netdev_queue_stats_rx *stats) 5241 { 5242 struct virtnet_info *vi = netdev_priv(dev); 5243 struct receive_queue *rq = &vi->rq[i]; 5244 struct virtnet_stats_ctx ctx = {0}; 5245 5246 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5247 5248 virtnet_get_hw_stats(vi, &ctx, i * 2); 5249 virtnet_fill_stats(vi, i * 2, &ctx, (void *)&rq->stats, true, 0); 5250 } 5251 5252 static void virtnet_get_queue_stats_tx(struct net_device *dev, int i, 5253 struct netdev_queue_stats_tx *stats) 5254 { 5255 struct virtnet_info *vi = netdev_priv(dev); 5256 struct send_queue *sq = &vi->sq[i]; 5257 struct virtnet_stats_ctx ctx = {0}; 5258 5259 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5260 5261 virtnet_get_hw_stats(vi, &ctx, i * 2 + 1); 5262 virtnet_fill_stats(vi, i * 2 + 1, &ctx, (void *)&sq->stats, true, 0); 5263 } 5264 5265 static void virtnet_get_base_stats(struct net_device *dev, 5266 struct netdev_queue_stats_rx *rx, 5267 struct netdev_queue_stats_tx *tx) 5268 { 5269 struct virtnet_info *vi = netdev_priv(dev); 5270 5271 /* The queue stats of the virtio-net will not be reset. So here we 5272 * return 0. 5273 */ 5274 rx->bytes = 0; 5275 rx->packets = 0; 5276 5277 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 5278 rx->hw_drops = 0; 5279 rx->hw_drop_overruns = 0; 5280 } 5281 5282 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 5283 rx->csum_unnecessary = 0; 5284 rx->csum_none = 0; 5285 rx->csum_bad = 0; 5286 } 5287 5288 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 5289 rx->hw_gro_packets = 0; 5290 rx->hw_gro_bytes = 0; 5291 rx->hw_gro_wire_packets = 0; 5292 rx->hw_gro_wire_bytes = 0; 5293 } 5294 5295 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) 5296 rx->hw_drop_ratelimits = 0; 5297 5298 tx->bytes = 0; 5299 tx->packets = 0; 5300 tx->stop = 0; 5301 tx->wake = 0; 5302 5303 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 5304 tx->hw_drops = 0; 5305 tx->hw_drop_errors = 0; 5306 } 5307 5308 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 5309 tx->csum_none = 0; 5310 tx->needs_csum = 0; 5311 } 5312 5313 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 5314 tx->hw_gso_packets = 0; 5315 tx->hw_gso_bytes = 0; 5316 tx->hw_gso_wire_packets = 0; 5317 tx->hw_gso_wire_bytes = 0; 5318 } 5319 5320 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) 5321 tx->hw_drop_ratelimits = 0; 5322 } 5323 5324 static const struct netdev_stat_ops virtnet_stat_ops = { 5325 .get_queue_stats_rx = virtnet_get_queue_stats_rx, 5326 .get_queue_stats_tx = virtnet_get_queue_stats_tx, 5327 .get_base_stats = virtnet_get_base_stats, 5328 }; 5329 5330 static void virtnet_freeze_down(struct virtio_device *vdev) 5331 { 5332 struct virtnet_info *vi = vdev->priv; 5333 5334 /* Make sure no work handler is accessing the device */ 5335 flush_work(&vi->config_work); 5336 disable_rx_mode_work(vi); 5337 flush_work(&vi->rx_mode_work); 5338 5339 netif_tx_lock_bh(vi->dev); 5340 netif_device_detach(vi->dev); 5341 netif_tx_unlock_bh(vi->dev); 5342 if (netif_running(vi->dev)) 5343 virtnet_close(vi->dev); 5344 } 5345 5346 static int init_vqs(struct virtnet_info *vi); 5347 5348 static int virtnet_restore_up(struct virtio_device *vdev) 5349 { 5350 struct virtnet_info *vi = vdev->priv; 5351 int err; 5352 5353 err = init_vqs(vi); 5354 if (err) 5355 return err; 5356 5357 virtio_device_ready(vdev); 5358 5359 enable_delayed_refill(vi); 5360 enable_rx_mode_work(vi); 5361 5362 if (netif_running(vi->dev)) { 5363 err = virtnet_open(vi->dev); 5364 if (err) 5365 return err; 5366 } 5367 5368 netif_tx_lock_bh(vi->dev); 5369 netif_device_attach(vi->dev); 5370 netif_tx_unlock_bh(vi->dev); 5371 return err; 5372 } 5373 5374 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) 5375 { 5376 __virtio64 *_offloads __free(kfree) = NULL; 5377 struct scatterlist sg; 5378 5379 _offloads = kzalloc(sizeof(*_offloads), GFP_KERNEL); 5380 if (!_offloads) 5381 return -ENOMEM; 5382 5383 *_offloads = cpu_to_virtio64(vi->vdev, offloads); 5384 5385 sg_init_one(&sg, _offloads, sizeof(*_offloads)); 5386 5387 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, 5388 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { 5389 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); 5390 return -EINVAL; 5391 } 5392 5393 return 0; 5394 } 5395 5396 static int virtnet_clear_guest_offloads(struct virtnet_info *vi) 5397 { 5398 u64 offloads = 0; 5399 5400 if (!vi->guest_offloads) 5401 return 0; 5402 5403 return virtnet_set_guest_offloads(vi, offloads); 5404 } 5405 5406 static int virtnet_restore_guest_offloads(struct virtnet_info *vi) 5407 { 5408 u64 offloads = vi->guest_offloads; 5409 5410 if (!vi->guest_offloads) 5411 return 0; 5412 5413 return virtnet_set_guest_offloads(vi, offloads); 5414 } 5415 5416 static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq, 5417 struct xsk_buff_pool *pool) 5418 { 5419 int err, qindex; 5420 5421 qindex = rq - vi->rq; 5422 5423 if (pool) { 5424 err = xdp_rxq_info_reg(&rq->xsk_rxq_info, vi->dev, qindex, rq->napi.napi_id); 5425 if (err < 0) 5426 return err; 5427 5428 err = xdp_rxq_info_reg_mem_model(&rq->xsk_rxq_info, 5429 MEM_TYPE_XSK_BUFF_POOL, NULL); 5430 if (err < 0) 5431 goto unreg; 5432 5433 xsk_pool_set_rxq_info(pool, &rq->xsk_rxq_info); 5434 } 5435 5436 virtnet_rx_pause(vi, rq); 5437 5438 err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf); 5439 if (err) { 5440 netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err); 5441 5442 pool = NULL; 5443 } 5444 5445 rq->xsk_pool = pool; 5446 5447 virtnet_rx_resume(vi, rq); 5448 5449 if (pool) 5450 return 0; 5451 5452 unreg: 5453 xdp_rxq_info_unreg(&rq->xsk_rxq_info); 5454 return err; 5455 } 5456 5457 static int virtnet_xsk_pool_enable(struct net_device *dev, 5458 struct xsk_buff_pool *pool, 5459 u16 qid) 5460 { 5461 struct virtnet_info *vi = netdev_priv(dev); 5462 struct receive_queue *rq; 5463 struct device *dma_dev; 5464 struct send_queue *sq; 5465 int err, size; 5466 5467 if (vi->hdr_len > xsk_pool_get_headroom(pool)) 5468 return -EINVAL; 5469 5470 /* In big_packets mode, xdp cannot work, so there is no need to 5471 * initialize xsk of rq. 5472 */ 5473 if (vi->big_packets && !vi->mergeable_rx_bufs) 5474 return -ENOENT; 5475 5476 if (qid >= vi->curr_queue_pairs) 5477 return -EINVAL; 5478 5479 sq = &vi->sq[qid]; 5480 rq = &vi->rq[qid]; 5481 5482 /* xsk assumes that tx and rx must have the same dma device. The af-xdp 5483 * may use one buffer to receive from the rx and reuse this buffer to 5484 * send by the tx. So the dma dev of sq and rq must be the same one. 5485 * 5486 * But vq->dma_dev allows every vq has the respective dma dev. So I 5487 * check the dma dev of vq and sq is the same dev. 5488 */ 5489 if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq)) 5490 return -EINVAL; 5491 5492 dma_dev = virtqueue_dma_dev(rq->vq); 5493 if (!dma_dev) 5494 return -EINVAL; 5495 5496 size = virtqueue_get_vring_size(rq->vq); 5497 5498 rq->xsk_buffs = kvcalloc(size, sizeof(*rq->xsk_buffs), GFP_KERNEL); 5499 if (!rq->xsk_buffs) 5500 return -ENOMEM; 5501 5502 err = xsk_pool_dma_map(pool, dma_dev, 0); 5503 if (err) 5504 goto err_xsk_map; 5505 5506 err = virtnet_rq_bind_xsk_pool(vi, rq, pool); 5507 if (err) 5508 goto err_rq; 5509 5510 return 0; 5511 5512 err_rq: 5513 xsk_pool_dma_unmap(pool, 0); 5514 err_xsk_map: 5515 return err; 5516 } 5517 5518 static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid) 5519 { 5520 struct virtnet_info *vi = netdev_priv(dev); 5521 struct xsk_buff_pool *pool; 5522 struct receive_queue *rq; 5523 int err; 5524 5525 if (qid >= vi->curr_queue_pairs) 5526 return -EINVAL; 5527 5528 rq = &vi->rq[qid]; 5529 5530 pool = rq->xsk_pool; 5531 5532 err = virtnet_rq_bind_xsk_pool(vi, rq, NULL); 5533 5534 xsk_pool_dma_unmap(pool, 0); 5535 5536 kvfree(rq->xsk_buffs); 5537 5538 return err; 5539 } 5540 5541 static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp) 5542 { 5543 if (xdp->xsk.pool) 5544 return virtnet_xsk_pool_enable(dev, xdp->xsk.pool, 5545 xdp->xsk.queue_id); 5546 else 5547 return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id); 5548 } 5549 5550 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 5551 struct netlink_ext_ack *extack) 5552 { 5553 unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 5554 sizeof(struct skb_shared_info)); 5555 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; 5556 struct virtnet_info *vi = netdev_priv(dev); 5557 struct bpf_prog *old_prog; 5558 u16 xdp_qp = 0, curr_qp; 5559 int i, err; 5560 5561 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 5562 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 5563 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 5564 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 5565 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 5566 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || 5567 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || 5568 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { 5569 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); 5570 return -EOPNOTSUPP; 5571 } 5572 5573 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 5574 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 5575 return -EINVAL; 5576 } 5577 5578 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { 5579 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); 5580 netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); 5581 return -EINVAL; 5582 } 5583 5584 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; 5585 if (prog) 5586 xdp_qp = nr_cpu_ids; 5587 5588 /* XDP requires extra queues for XDP_TX */ 5589 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 5590 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", 5591 curr_qp + xdp_qp, vi->max_queue_pairs); 5592 xdp_qp = 0; 5593 } 5594 5595 old_prog = rtnl_dereference(vi->rq[0].xdp_prog); 5596 if (!prog && !old_prog) 5597 return 0; 5598 5599 if (prog) 5600 bpf_prog_add(prog, vi->max_queue_pairs - 1); 5601 5602 /* Make sure NAPI is not using any XDP TX queues for RX. */ 5603 if (netif_running(dev)) { 5604 for (i = 0; i < vi->max_queue_pairs; i++) { 5605 napi_disable(&vi->rq[i].napi); 5606 virtnet_napi_tx_disable(&vi->sq[i].napi); 5607 } 5608 } 5609 5610 if (!prog) { 5611 for (i = 0; i < vi->max_queue_pairs; i++) { 5612 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 5613 if (i == 0) 5614 virtnet_restore_guest_offloads(vi); 5615 } 5616 synchronize_net(); 5617 } 5618 5619 err = virtnet_set_queues(vi, curr_qp + xdp_qp); 5620 if (err) 5621 goto err; 5622 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 5623 vi->xdp_queue_pairs = xdp_qp; 5624 5625 if (prog) { 5626 vi->xdp_enabled = true; 5627 for (i = 0; i < vi->max_queue_pairs; i++) { 5628 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 5629 if (i == 0 && !old_prog) 5630 virtnet_clear_guest_offloads(vi); 5631 } 5632 if (!old_prog) 5633 xdp_features_set_redirect_target(dev, true); 5634 } else { 5635 xdp_features_clear_redirect_target(dev); 5636 vi->xdp_enabled = false; 5637 } 5638 5639 for (i = 0; i < vi->max_queue_pairs; i++) { 5640 if (old_prog) 5641 bpf_prog_put(old_prog); 5642 if (netif_running(dev)) { 5643 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 5644 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 5645 &vi->sq[i].napi); 5646 } 5647 } 5648 5649 return 0; 5650 5651 err: 5652 if (!prog) { 5653 virtnet_clear_guest_offloads(vi); 5654 for (i = 0; i < vi->max_queue_pairs; i++) 5655 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); 5656 } 5657 5658 if (netif_running(dev)) { 5659 for (i = 0; i < vi->max_queue_pairs; i++) { 5660 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 5661 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 5662 &vi->sq[i].napi); 5663 } 5664 } 5665 if (prog) 5666 bpf_prog_sub(prog, vi->max_queue_pairs - 1); 5667 return err; 5668 } 5669 5670 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) 5671 { 5672 switch (xdp->command) { 5673 case XDP_SETUP_PROG: 5674 return virtnet_xdp_set(dev, xdp->prog, xdp->extack); 5675 case XDP_SETUP_XSK_POOL: 5676 return virtnet_xsk_pool_setup(dev, xdp); 5677 default: 5678 return -EINVAL; 5679 } 5680 } 5681 5682 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, 5683 size_t len) 5684 { 5685 struct virtnet_info *vi = netdev_priv(dev); 5686 int ret; 5687 5688 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 5689 return -EOPNOTSUPP; 5690 5691 ret = snprintf(buf, len, "sby"); 5692 if (ret >= len) 5693 return -EOPNOTSUPP; 5694 5695 return 0; 5696 } 5697 5698 static int virtnet_set_features(struct net_device *dev, 5699 netdev_features_t features) 5700 { 5701 struct virtnet_info *vi = netdev_priv(dev); 5702 u64 offloads; 5703 int err; 5704 5705 if ((dev->features ^ features) & NETIF_F_GRO_HW) { 5706 if (vi->xdp_enabled) 5707 return -EBUSY; 5708 5709 if (features & NETIF_F_GRO_HW) 5710 offloads = vi->guest_offloads_capable; 5711 else 5712 offloads = vi->guest_offloads_capable & 5713 ~GUEST_OFFLOAD_GRO_HW_MASK; 5714 5715 err = virtnet_set_guest_offloads(vi, offloads); 5716 if (err) 5717 return err; 5718 vi->guest_offloads = offloads; 5719 } 5720 5721 if ((dev->features ^ features) & NETIF_F_RXHASH) { 5722 if (features & NETIF_F_RXHASH) 5723 vi->rss.hash_types = vi->rss_hash_types_saved; 5724 else 5725 vi->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE; 5726 5727 if (!virtnet_commit_rss_command(vi)) 5728 return -EINVAL; 5729 } 5730 5731 return 0; 5732 } 5733 5734 static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) 5735 { 5736 struct virtnet_info *priv = netdev_priv(dev); 5737 struct send_queue *sq = &priv->sq[txqueue]; 5738 struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); 5739 5740 u64_stats_update_begin(&sq->stats.syncp); 5741 u64_stats_inc(&sq->stats.tx_timeouts); 5742 u64_stats_update_end(&sq->stats.syncp); 5743 5744 netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", 5745 txqueue, sq->name, sq->vq->index, sq->vq->name, 5746 jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); 5747 } 5748 5749 static int virtnet_init_irq_moder(struct virtnet_info *vi) 5750 { 5751 u8 profile_flags = 0, coal_flags = 0; 5752 int ret, i; 5753 5754 profile_flags |= DIM_PROFILE_RX; 5755 coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS; 5756 ret = net_dim_init_irq_moder(vi->dev, profile_flags, coal_flags, 5757 DIM_CQ_PERIOD_MODE_START_FROM_EQE, 5758 0, virtnet_rx_dim_work, NULL); 5759 5760 if (ret) 5761 return ret; 5762 5763 for (i = 0; i < vi->max_queue_pairs; i++) 5764 net_dim_setting(vi->dev, &vi->rq[i].dim, false); 5765 5766 return 0; 5767 } 5768 5769 static void virtnet_free_irq_moder(struct virtnet_info *vi) 5770 { 5771 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5772 return; 5773 5774 rtnl_lock(); 5775 net_dim_free_irq_moder(vi->dev); 5776 rtnl_unlock(); 5777 } 5778 5779 static const struct net_device_ops virtnet_netdev = { 5780 .ndo_open = virtnet_open, 5781 .ndo_stop = virtnet_close, 5782 .ndo_start_xmit = start_xmit, 5783 .ndo_validate_addr = eth_validate_addr, 5784 .ndo_set_mac_address = virtnet_set_mac_address, 5785 .ndo_set_rx_mode = virtnet_set_rx_mode, 5786 .ndo_get_stats64 = virtnet_stats, 5787 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 5788 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 5789 .ndo_bpf = virtnet_xdp, 5790 .ndo_xdp_xmit = virtnet_xdp_xmit, 5791 .ndo_xsk_wakeup = virtnet_xsk_wakeup, 5792 .ndo_features_check = passthru_features_check, 5793 .ndo_get_phys_port_name = virtnet_get_phys_port_name, 5794 .ndo_set_features = virtnet_set_features, 5795 .ndo_tx_timeout = virtnet_tx_timeout, 5796 }; 5797 5798 static void virtnet_config_changed_work(struct work_struct *work) 5799 { 5800 struct virtnet_info *vi = 5801 container_of(work, struct virtnet_info, config_work); 5802 u16 v; 5803 5804 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, 5805 struct virtio_net_config, status, &v) < 0) 5806 return; 5807 5808 if (v & VIRTIO_NET_S_ANNOUNCE) { 5809 netdev_notify_peers(vi->dev); 5810 virtnet_ack_link_announce(vi); 5811 } 5812 5813 /* Ignore unknown (future) status bits */ 5814 v &= VIRTIO_NET_S_LINK_UP; 5815 5816 if (vi->status == v) 5817 return; 5818 5819 vi->status = v; 5820 5821 if (vi->status & VIRTIO_NET_S_LINK_UP) { 5822 virtnet_update_settings(vi); 5823 netif_carrier_on(vi->dev); 5824 netif_tx_wake_all_queues(vi->dev); 5825 } else { 5826 netif_carrier_off(vi->dev); 5827 netif_tx_stop_all_queues(vi->dev); 5828 } 5829 } 5830 5831 static void virtnet_config_changed(struct virtio_device *vdev) 5832 { 5833 struct virtnet_info *vi = vdev->priv; 5834 5835 schedule_work(&vi->config_work); 5836 } 5837 5838 static void virtnet_free_queues(struct virtnet_info *vi) 5839 { 5840 int i; 5841 5842 for (i = 0; i < vi->max_queue_pairs; i++) { 5843 __netif_napi_del(&vi->rq[i].napi); 5844 __netif_napi_del(&vi->sq[i].napi); 5845 } 5846 5847 /* We called __netif_napi_del(), 5848 * we need to respect an RCU grace period before freeing vi->rq 5849 */ 5850 synchronize_net(); 5851 5852 kfree(vi->rq); 5853 kfree(vi->sq); 5854 kfree(vi->ctrl); 5855 } 5856 5857 static void _free_receive_bufs(struct virtnet_info *vi) 5858 { 5859 struct bpf_prog *old_prog; 5860 int i; 5861 5862 for (i = 0; i < vi->max_queue_pairs; i++) { 5863 while (vi->rq[i].pages) 5864 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); 5865 5866 old_prog = rtnl_dereference(vi->rq[i].xdp_prog); 5867 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); 5868 if (old_prog) 5869 bpf_prog_put(old_prog); 5870 } 5871 } 5872 5873 static void free_receive_bufs(struct virtnet_info *vi) 5874 { 5875 rtnl_lock(); 5876 _free_receive_bufs(vi); 5877 rtnl_unlock(); 5878 } 5879 5880 static void free_receive_page_frags(struct virtnet_info *vi) 5881 { 5882 int i; 5883 for (i = 0; i < vi->max_queue_pairs; i++) 5884 if (vi->rq[i].alloc_frag.page) { 5885 if (vi->rq[i].last_dma) 5886 virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); 5887 put_page(vi->rq[i].alloc_frag.page); 5888 } 5889 } 5890 5891 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) 5892 { 5893 if (!is_xdp_frame(buf)) 5894 dev_kfree_skb(buf); 5895 else 5896 xdp_return_frame(ptr_to_xdp(buf)); 5897 } 5898 5899 static void free_unused_bufs(struct virtnet_info *vi) 5900 { 5901 void *buf; 5902 int i; 5903 5904 for (i = 0; i < vi->max_queue_pairs; i++) { 5905 struct virtqueue *vq = vi->sq[i].vq; 5906 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 5907 virtnet_sq_free_unused_buf(vq, buf); 5908 cond_resched(); 5909 } 5910 5911 for (i = 0; i < vi->max_queue_pairs; i++) { 5912 struct virtqueue *vq = vi->rq[i].vq; 5913 5914 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 5915 virtnet_rq_unmap_free_buf(vq, buf); 5916 cond_resched(); 5917 } 5918 } 5919 5920 static void virtnet_del_vqs(struct virtnet_info *vi) 5921 { 5922 struct virtio_device *vdev = vi->vdev; 5923 5924 virtnet_clean_affinity(vi); 5925 5926 vdev->config->del_vqs(vdev); 5927 5928 virtnet_free_queues(vi); 5929 } 5930 5931 /* How large should a single buffer be so a queue full of these can fit at 5932 * least one full packet? 5933 * Logic below assumes the mergeable buffer header is used. 5934 */ 5935 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) 5936 { 5937 const unsigned int hdr_len = vi->hdr_len; 5938 unsigned int rq_size = virtqueue_get_vring_size(vq); 5939 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; 5940 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; 5941 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); 5942 5943 return max(max(min_buf_len, hdr_len) - hdr_len, 5944 (unsigned int)GOOD_PACKET_LEN); 5945 } 5946 5947 static int virtnet_find_vqs(struct virtnet_info *vi) 5948 { 5949 struct virtqueue_info *vqs_info; 5950 struct virtqueue **vqs; 5951 int ret = -ENOMEM; 5952 int total_vqs; 5953 bool *ctx; 5954 u16 i; 5955 5956 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by 5957 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by 5958 * possible control vq. 5959 */ 5960 total_vqs = vi->max_queue_pairs * 2 + 5961 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); 5962 5963 /* Allocate space for find_vqs parameters */ 5964 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL); 5965 if (!vqs) 5966 goto err_vq; 5967 vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL); 5968 if (!vqs_info) 5969 goto err_vqs_info; 5970 if (!vi->big_packets || vi->mergeable_rx_bufs) { 5971 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL); 5972 if (!ctx) 5973 goto err_ctx; 5974 } else { 5975 ctx = NULL; 5976 } 5977 5978 /* Parameters for control virtqueue, if any */ 5979 if (vi->has_cvq) { 5980 vqs_info[total_vqs - 1].name = "control"; 5981 } 5982 5983 /* Allocate/initialize parameters for send/receive virtqueues */ 5984 for (i = 0; i < vi->max_queue_pairs; i++) { 5985 vqs_info[rxq2vq(i)].callback = skb_recv_done; 5986 vqs_info[txq2vq(i)].callback = skb_xmit_done; 5987 sprintf(vi->rq[i].name, "input.%u", i); 5988 sprintf(vi->sq[i].name, "output.%u", i); 5989 vqs_info[rxq2vq(i)].name = vi->rq[i].name; 5990 vqs_info[txq2vq(i)].name = vi->sq[i].name; 5991 if (ctx) 5992 vqs_info[rxq2vq(i)].ctx = true; 5993 } 5994 5995 ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, vqs_info, NULL); 5996 if (ret) 5997 goto err_find; 5998 5999 if (vi->has_cvq) { 6000 vi->cvq = vqs[total_vqs - 1]; 6001 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 6002 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 6003 } 6004 6005 for (i = 0; i < vi->max_queue_pairs; i++) { 6006 vi->rq[i].vq = vqs[rxq2vq(i)]; 6007 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); 6008 vi->sq[i].vq = vqs[txq2vq(i)]; 6009 } 6010 6011 /* run here: ret == 0. */ 6012 6013 6014 err_find: 6015 kfree(ctx); 6016 err_ctx: 6017 kfree(vqs_info); 6018 err_vqs_info: 6019 kfree(vqs); 6020 err_vq: 6021 return ret; 6022 } 6023 6024 static int virtnet_alloc_queues(struct virtnet_info *vi) 6025 { 6026 int i; 6027 6028 if (vi->has_cvq) { 6029 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); 6030 if (!vi->ctrl) 6031 goto err_ctrl; 6032 } else { 6033 vi->ctrl = NULL; 6034 } 6035 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); 6036 if (!vi->sq) 6037 goto err_sq; 6038 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL); 6039 if (!vi->rq) 6040 goto err_rq; 6041 6042 INIT_DELAYED_WORK(&vi->refill, refill_work); 6043 for (i = 0; i < vi->max_queue_pairs; i++) { 6044 vi->rq[i].pages = NULL; 6045 netif_napi_add_weight(vi->dev, &vi->rq[i].napi, virtnet_poll, 6046 napi_weight); 6047 netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, 6048 virtnet_poll_tx, 6049 napi_tx ? napi_weight : 0); 6050 6051 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 6052 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); 6053 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 6054 6055 u64_stats_init(&vi->rq[i].stats.syncp); 6056 u64_stats_init(&vi->sq[i].stats.syncp); 6057 mutex_init(&vi->rq[i].dim_lock); 6058 } 6059 6060 return 0; 6061 6062 err_rq: 6063 kfree(vi->sq); 6064 err_sq: 6065 kfree(vi->ctrl); 6066 err_ctrl: 6067 return -ENOMEM; 6068 } 6069 6070 static int init_vqs(struct virtnet_info *vi) 6071 { 6072 int ret; 6073 6074 /* Allocate send & receive queues */ 6075 ret = virtnet_alloc_queues(vi); 6076 if (ret) 6077 goto err; 6078 6079 ret = virtnet_find_vqs(vi); 6080 if (ret) 6081 goto err_free; 6082 6083 virtnet_rq_set_premapped(vi); 6084 6085 cpus_read_lock(); 6086 virtnet_set_affinity(vi); 6087 cpus_read_unlock(); 6088 6089 return 0; 6090 6091 err_free: 6092 virtnet_free_queues(vi); 6093 err: 6094 return ret; 6095 } 6096 6097 #ifdef CONFIG_SYSFS 6098 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, 6099 char *buf) 6100 { 6101 struct virtnet_info *vi = netdev_priv(queue->dev); 6102 unsigned int queue_index = get_netdev_rx_queue_index(queue); 6103 unsigned int headroom = virtnet_get_headroom(vi); 6104 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 6105 struct ewma_pkt_len *avg; 6106 6107 BUG_ON(queue_index >= vi->max_queue_pairs); 6108 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 6109 return sprintf(buf, "%u\n", 6110 get_mergeable_buf_len(&vi->rq[queue_index], avg, 6111 SKB_DATA_ALIGN(headroom + tailroom))); 6112 } 6113 6114 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 6115 __ATTR_RO(mergeable_rx_buffer_size); 6116 6117 static struct attribute *virtio_net_mrg_rx_attrs[] = { 6118 &mergeable_rx_buffer_size_attribute.attr, 6119 NULL 6120 }; 6121 6122 static const struct attribute_group virtio_net_mrg_rx_group = { 6123 .name = "virtio_net", 6124 .attrs = virtio_net_mrg_rx_attrs 6125 }; 6126 #endif 6127 6128 static bool virtnet_fail_on_feature(struct virtio_device *vdev, 6129 unsigned int fbit, 6130 const char *fname, const char *dname) 6131 { 6132 if (!virtio_has_feature(vdev, fbit)) 6133 return false; 6134 6135 dev_err(&vdev->dev, "device advertises feature %s but not %s", 6136 fname, dname); 6137 6138 return true; 6139 } 6140 6141 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ 6142 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) 6143 6144 static bool virtnet_validate_features(struct virtio_device *vdev) 6145 { 6146 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && 6147 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, 6148 "VIRTIO_NET_F_CTRL_VQ") || 6149 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, 6150 "VIRTIO_NET_F_CTRL_VQ") || 6151 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, 6152 "VIRTIO_NET_F_CTRL_VQ") || 6153 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || 6154 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, 6155 "VIRTIO_NET_F_CTRL_VQ") || 6156 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, 6157 "VIRTIO_NET_F_CTRL_VQ") || 6158 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, 6159 "VIRTIO_NET_F_CTRL_VQ") || 6160 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, 6161 "VIRTIO_NET_F_CTRL_VQ") || 6162 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL, 6163 "VIRTIO_NET_F_CTRL_VQ"))) { 6164 return false; 6165 } 6166 6167 return true; 6168 } 6169 6170 #define MIN_MTU ETH_MIN_MTU 6171 #define MAX_MTU ETH_MAX_MTU 6172 6173 static int virtnet_validate(struct virtio_device *vdev) 6174 { 6175 if (!vdev->config->get) { 6176 dev_err(&vdev->dev, "%s failure: config access disabled\n", 6177 __func__); 6178 return -EINVAL; 6179 } 6180 6181 if (!virtnet_validate_features(vdev)) 6182 return -EINVAL; 6183 6184 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6185 int mtu = virtio_cread16(vdev, 6186 offsetof(struct virtio_net_config, 6187 mtu)); 6188 if (mtu < MIN_MTU) 6189 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); 6190 } 6191 6192 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && 6193 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6194 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); 6195 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); 6196 } 6197 6198 return 0; 6199 } 6200 6201 static bool virtnet_check_guest_gso(const struct virtnet_info *vi) 6202 { 6203 return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6204 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6205 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6206 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6207 (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && 6208 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); 6209 } 6210 6211 static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) 6212 { 6213 bool guest_gso = virtnet_check_guest_gso(vi); 6214 6215 /* If device can receive ANY guest GSO packets, regardless of mtu, 6216 * allocate packets of maximum size, otherwise limit it to only 6217 * mtu size worth only. 6218 */ 6219 if (mtu > ETH_DATA_LEN || guest_gso) { 6220 vi->big_packets = true; 6221 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); 6222 } 6223 } 6224 6225 #define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10 6226 static enum xdp_rss_hash_type 6227 virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = { 6228 [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE, 6229 [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4, 6230 [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP, 6231 [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP, 6232 [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6, 6233 [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP, 6234 [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP, 6235 [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, 6236 [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, 6237 [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX 6238 }; 6239 6240 static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, 6241 enum xdp_rss_hash_type *rss_type) 6242 { 6243 const struct xdp_buff *xdp = (void *)_ctx; 6244 struct virtio_net_hdr_v1_hash *hdr_hash; 6245 struct virtnet_info *vi; 6246 u16 hash_report; 6247 6248 if (!(xdp->rxq->dev->features & NETIF_F_RXHASH)) 6249 return -ENODATA; 6250 6251 vi = netdev_priv(xdp->rxq->dev); 6252 hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len); 6253 hash_report = __le16_to_cpu(hdr_hash->hash_report); 6254 6255 if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE) 6256 hash_report = VIRTIO_NET_HASH_REPORT_NONE; 6257 6258 *rss_type = virtnet_xdp_rss_type[hash_report]; 6259 *hash = __le32_to_cpu(hdr_hash->hash_value); 6260 return 0; 6261 } 6262 6263 static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = { 6264 .xmo_rx_hash = virtnet_xdp_rx_hash, 6265 }; 6266 6267 static int virtnet_probe(struct virtio_device *vdev) 6268 { 6269 int i, err = -ENOMEM; 6270 struct net_device *dev; 6271 struct virtnet_info *vi; 6272 u16 max_queue_pairs; 6273 int mtu = 0; 6274 6275 /* Find if host supports multiqueue/rss virtio_net device */ 6276 max_queue_pairs = 1; 6277 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 6278 max_queue_pairs = 6279 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); 6280 6281 /* We need at least 2 queue's */ 6282 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 6283 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 6284 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6285 max_queue_pairs = 1; 6286 6287 /* Allocate ourselves a network device with room for our info */ 6288 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); 6289 if (!dev) 6290 return -ENOMEM; 6291 6292 /* Set up network device as normal. */ 6293 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | 6294 IFF_TX_SKB_NO_LINEAR; 6295 dev->netdev_ops = &virtnet_netdev; 6296 dev->stat_ops = &virtnet_stat_ops; 6297 dev->features = NETIF_F_HIGHDMA; 6298 6299 dev->ethtool_ops = &virtnet_ethtool_ops; 6300 SET_NETDEV_DEV(dev, &vdev->dev); 6301 6302 /* Do we support "hardware" checksums? */ 6303 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 6304 /* This opens up the world of extra features. */ 6305 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6306 if (csum) 6307 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6308 6309 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 6310 dev->hw_features |= NETIF_F_TSO 6311 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 6312 } 6313 /* Individual feature bits: what can host handle? */ 6314 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 6315 dev->hw_features |= NETIF_F_TSO; 6316 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 6317 dev->hw_features |= NETIF_F_TSO6; 6318 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 6319 dev->hw_features |= NETIF_F_TSO_ECN; 6320 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) 6321 dev->hw_features |= NETIF_F_GSO_UDP_L4; 6322 6323 dev->features |= NETIF_F_GSO_ROBUST; 6324 6325 if (gso) 6326 dev->features |= dev->hw_features & NETIF_F_ALL_TSO; 6327 /* (!csum && gso) case will be fixed by register_netdev() */ 6328 } 6329 6330 /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't 6331 * need to calculate checksums for partially checksummed packets, 6332 * as they're considered valid by the upper layer. 6333 * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only 6334 * receives fully checksummed packets. The device may assist in 6335 * validating these packets' checksums, so the driver won't have to. 6336 */ 6337 dev->features |= NETIF_F_RXCSUM; 6338 6339 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 6340 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) 6341 dev->features |= NETIF_F_GRO_HW; 6342 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) 6343 dev->hw_features |= NETIF_F_GRO_HW; 6344 6345 dev->vlan_features = dev->features; 6346 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; 6347 6348 /* MTU range: 68 - 65535 */ 6349 dev->min_mtu = MIN_MTU; 6350 dev->max_mtu = MAX_MTU; 6351 6352 /* Configuration may specify what MAC to use. Otherwise random. */ 6353 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6354 u8 addr[ETH_ALEN]; 6355 6356 virtio_cread_bytes(vdev, 6357 offsetof(struct virtio_net_config, mac), 6358 addr, ETH_ALEN); 6359 eth_hw_addr_set(dev, addr); 6360 } else { 6361 eth_hw_addr_random(dev); 6362 dev_info(&vdev->dev, "Assigned random MAC address %pM\n", 6363 dev->dev_addr); 6364 } 6365 6366 /* Set up our device-specific information */ 6367 vi = netdev_priv(dev); 6368 vi->dev = dev; 6369 vi->vdev = vdev; 6370 vdev->priv = vi; 6371 6372 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 6373 INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work); 6374 spin_lock_init(&vi->refill_lock); 6375 6376 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { 6377 vi->mergeable_rx_bufs = true; 6378 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; 6379 } 6380 6381 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) 6382 vi->has_rss_hash_report = true; 6383 6384 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { 6385 vi->has_rss = true; 6386 6387 vi->rss_indir_table_size = 6388 virtio_cread16(vdev, offsetof(struct virtio_net_config, 6389 rss_max_indirection_table_length)); 6390 } 6391 6392 if (vi->has_rss || vi->has_rss_hash_report) { 6393 vi->rss_key_size = 6394 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); 6395 6396 vi->rss_hash_types_supported = 6397 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); 6398 vi->rss_hash_types_supported &= 6399 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | 6400 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 6401 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); 6402 6403 dev->hw_features |= NETIF_F_RXHASH; 6404 dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops; 6405 } 6406 6407 if (vi->has_rss_hash_report) 6408 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); 6409 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || 6410 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6411 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 6412 else 6413 vi->hdr_len = sizeof(struct virtio_net_hdr); 6414 6415 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || 6416 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6417 vi->any_header_sg = true; 6418 6419 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6420 vi->has_cvq = true; 6421 6422 mutex_init(&vi->cvq_lock); 6423 6424 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6425 mtu = virtio_cread16(vdev, 6426 offsetof(struct virtio_net_config, 6427 mtu)); 6428 if (mtu < dev->min_mtu) { 6429 /* Should never trigger: MTU was previously validated 6430 * in virtnet_validate. 6431 */ 6432 dev_err(&vdev->dev, 6433 "device MTU appears to have changed it is now %d < %d", 6434 mtu, dev->min_mtu); 6435 err = -EINVAL; 6436 goto free; 6437 } 6438 6439 dev->mtu = mtu; 6440 dev->max_mtu = mtu; 6441 } 6442 6443 virtnet_set_big_packets(vi, mtu); 6444 6445 if (vi->any_header_sg) 6446 dev->needed_headroom = vi->hdr_len; 6447 6448 /* Enable multiqueue by default */ 6449 if (num_online_cpus() >= max_queue_pairs) 6450 vi->curr_queue_pairs = max_queue_pairs; 6451 else 6452 vi->curr_queue_pairs = num_online_cpus(); 6453 vi->max_queue_pairs = max_queue_pairs; 6454 6455 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ 6456 err = init_vqs(vi); 6457 if (err) 6458 goto free; 6459 6460 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 6461 vi->intr_coal_rx.max_usecs = 0; 6462 vi->intr_coal_tx.max_usecs = 0; 6463 vi->intr_coal_rx.max_packets = 0; 6464 6465 /* Keep the default values of the coalescing parameters 6466 * aligned with the default napi_tx state. 6467 */ 6468 if (vi->sq[0].napi.weight) 6469 vi->intr_coal_tx.max_packets = 1; 6470 else 6471 vi->intr_coal_tx.max_packets = 0; 6472 } 6473 6474 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 6475 /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */ 6476 for (i = 0; i < vi->max_queue_pairs; i++) 6477 if (vi->sq[i].napi.weight) 6478 vi->sq[i].intr_coal.max_packets = 1; 6479 6480 err = virtnet_init_irq_moder(vi); 6481 if (err) 6482 goto free; 6483 } 6484 6485 #ifdef CONFIG_SYSFS 6486 if (vi->mergeable_rx_bufs) 6487 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; 6488 #endif 6489 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 6490 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 6491 6492 virtnet_init_settings(dev); 6493 6494 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 6495 vi->failover = net_failover_create(vi->dev); 6496 if (IS_ERR(vi->failover)) { 6497 err = PTR_ERR(vi->failover); 6498 goto free_vqs; 6499 } 6500 } 6501 6502 if (vi->has_rss || vi->has_rss_hash_report) 6503 virtnet_init_default_rss(vi); 6504 6505 enable_rx_mode_work(vi); 6506 6507 /* serialize netdev register + virtio_device_ready() with ndo_open() */ 6508 rtnl_lock(); 6509 6510 err = register_netdevice(dev); 6511 if (err) { 6512 pr_debug("virtio_net: registering device failed\n"); 6513 rtnl_unlock(); 6514 goto free_failover; 6515 } 6516 6517 virtio_device_ready(vdev); 6518 6519 virtnet_set_queues(vi, vi->curr_queue_pairs); 6520 6521 /* a random MAC address has been assigned, notify the device. 6522 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there 6523 * because many devices work fine without getting MAC explicitly 6524 */ 6525 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 6526 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 6527 struct scatterlist sg; 6528 6529 sg_init_one(&sg, dev->dev_addr, dev->addr_len); 6530 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 6531 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 6532 pr_debug("virtio_net: setting MAC address failed\n"); 6533 rtnl_unlock(); 6534 err = -EINVAL; 6535 goto free_unregister_netdev; 6536 } 6537 } 6538 6539 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) { 6540 struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL; 6541 struct scatterlist sg; 6542 __le64 v; 6543 6544 stats_cap = kzalloc(sizeof(*stats_cap), GFP_KERNEL); 6545 if (!stats_cap) { 6546 rtnl_unlock(); 6547 err = -ENOMEM; 6548 goto free_unregister_netdev; 6549 } 6550 6551 sg_init_one(&sg, stats_cap, sizeof(*stats_cap)); 6552 6553 if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 6554 VIRTIO_NET_CTRL_STATS_QUERY, 6555 NULL, &sg)) { 6556 pr_debug("virtio_net: fail to get stats capability\n"); 6557 rtnl_unlock(); 6558 err = -EINVAL; 6559 goto free_unregister_netdev; 6560 } 6561 6562 v = stats_cap->supported_stats_types[0]; 6563 vi->device_stats_cap = le64_to_cpu(v); 6564 } 6565 6566 rtnl_unlock(); 6567 6568 err = virtnet_cpu_notif_add(vi); 6569 if (err) { 6570 pr_debug("virtio_net: registering cpu notifier failed\n"); 6571 goto free_unregister_netdev; 6572 } 6573 6574 /* Assume link up if device can't report link status, 6575 otherwise get link status from config. */ 6576 netif_carrier_off(dev); 6577 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 6578 schedule_work(&vi->config_work); 6579 } else { 6580 vi->status = VIRTIO_NET_S_LINK_UP; 6581 virtnet_update_settings(vi); 6582 netif_carrier_on(dev); 6583 } 6584 6585 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) 6586 if (virtio_has_feature(vi->vdev, guest_offloads[i])) 6587 set_bit(guest_offloads[i], &vi->guest_offloads); 6588 vi->guest_offloads_capable = vi->guest_offloads; 6589 6590 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", 6591 dev->name, max_queue_pairs); 6592 6593 return 0; 6594 6595 free_unregister_netdev: 6596 unregister_netdev(dev); 6597 free_failover: 6598 net_failover_destroy(vi->failover); 6599 free_vqs: 6600 virtio_reset_device(vdev); 6601 cancel_delayed_work_sync(&vi->refill); 6602 free_receive_page_frags(vi); 6603 virtnet_del_vqs(vi); 6604 free: 6605 free_netdev(dev); 6606 return err; 6607 } 6608 6609 static void remove_vq_common(struct virtnet_info *vi) 6610 { 6611 virtio_reset_device(vi->vdev); 6612 6613 /* Free unused buffers in both send and recv, if any. */ 6614 free_unused_bufs(vi); 6615 6616 free_receive_bufs(vi); 6617 6618 free_receive_page_frags(vi); 6619 6620 virtnet_del_vqs(vi); 6621 } 6622 6623 static void virtnet_remove(struct virtio_device *vdev) 6624 { 6625 struct virtnet_info *vi = vdev->priv; 6626 6627 virtnet_cpu_notif_remove(vi); 6628 6629 /* Make sure no work handler is accessing the device. */ 6630 flush_work(&vi->config_work); 6631 disable_rx_mode_work(vi); 6632 flush_work(&vi->rx_mode_work); 6633 6634 virtnet_free_irq_moder(vi); 6635 6636 unregister_netdev(vi->dev); 6637 6638 net_failover_destroy(vi->failover); 6639 6640 remove_vq_common(vi); 6641 6642 free_netdev(vi->dev); 6643 } 6644 6645 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) 6646 { 6647 struct virtnet_info *vi = vdev->priv; 6648 6649 virtnet_cpu_notif_remove(vi); 6650 virtnet_freeze_down(vdev); 6651 remove_vq_common(vi); 6652 6653 return 0; 6654 } 6655 6656 static __maybe_unused int virtnet_restore(struct virtio_device *vdev) 6657 { 6658 struct virtnet_info *vi = vdev->priv; 6659 int err; 6660 6661 err = virtnet_restore_up(vdev); 6662 if (err) 6663 return err; 6664 virtnet_set_queues(vi, vi->curr_queue_pairs); 6665 6666 err = virtnet_cpu_notif_add(vi); 6667 if (err) { 6668 virtnet_freeze_down(vdev); 6669 remove_vq_common(vi); 6670 return err; 6671 } 6672 6673 return 0; 6674 } 6675 6676 static struct virtio_device_id id_table[] = { 6677 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 6678 { 0 }, 6679 }; 6680 6681 #define VIRTNET_FEATURES \ 6682 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ 6683 VIRTIO_NET_F_MAC, \ 6684 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ 6685 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ 6686 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ 6687 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ 6688 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ 6689 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ 6690 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ 6691 VIRTIO_NET_F_CTRL_MAC_ADDR, \ 6692 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ 6693 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ 6694 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ 6695 VIRTIO_NET_F_VQ_NOTF_COAL, \ 6696 VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS 6697 6698 static unsigned int features[] = { 6699 VIRTNET_FEATURES, 6700 }; 6701 6702 static unsigned int features_legacy[] = { 6703 VIRTNET_FEATURES, 6704 VIRTIO_NET_F_GSO, 6705 VIRTIO_F_ANY_LAYOUT, 6706 }; 6707 6708 static struct virtio_driver virtio_net_driver = { 6709 .feature_table = features, 6710 .feature_table_size = ARRAY_SIZE(features), 6711 .feature_table_legacy = features_legacy, 6712 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 6713 .driver.name = KBUILD_MODNAME, 6714 .id_table = id_table, 6715 .validate = virtnet_validate, 6716 .probe = virtnet_probe, 6717 .remove = virtnet_remove, 6718 .config_changed = virtnet_config_changed, 6719 #ifdef CONFIG_PM_SLEEP 6720 .freeze = virtnet_freeze, 6721 .restore = virtnet_restore, 6722 #endif 6723 }; 6724 6725 static __init int virtio_net_driver_init(void) 6726 { 6727 int ret; 6728 6729 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", 6730 virtnet_cpu_online, 6731 virtnet_cpu_down_prep); 6732 if (ret < 0) 6733 goto out; 6734 virtionet_online = ret; 6735 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", 6736 NULL, virtnet_cpu_dead); 6737 if (ret) 6738 goto err_dead; 6739 ret = register_virtio_driver(&virtio_net_driver); 6740 if (ret) 6741 goto err_virtio; 6742 return 0; 6743 err_virtio: 6744 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 6745 err_dead: 6746 cpuhp_remove_multi_state(virtionet_online); 6747 out: 6748 return ret; 6749 } 6750 module_init(virtio_net_driver_init); 6751 6752 static __exit void virtio_net_driver_exit(void) 6753 { 6754 unregister_virtio_driver(&virtio_net_driver); 6755 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 6756 cpuhp_remove_multi_state(virtionet_online); 6757 } 6758 module_exit(virtio_net_driver_exit); 6759 6760 MODULE_DEVICE_TABLE(virtio, id_table); 6761 MODULE_DESCRIPTION("Virtio network driver"); 6762 MODULE_LICENSE("GPL"); 6763