1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* A network driver using virtio. 3 * 4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 5 */ 6 //#define DEBUG 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/module.h> 11 #include <linux/virtio.h> 12 #include <linux/virtio_net.h> 13 #include <linux/bpf.h> 14 #include <linux/bpf_trace.h> 15 #include <linux/scatterlist.h> 16 #include <linux/if_vlan.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/average.h> 20 #include <linux/filter.h> 21 #include <linux/kernel.h> 22 #include <linux/dim.h> 23 #include <net/route.h> 24 #include <net/xdp.h> 25 #include <net/net_failover.h> 26 #include <net/netdev_rx_queue.h> 27 #include <net/netdev_queues.h> 28 #include <net/xdp_sock_drv.h> 29 30 static int napi_weight = NAPI_POLL_WEIGHT; 31 module_param(napi_weight, int, 0444); 32 33 static bool csum = true, gso = true, napi_tx = true; 34 module_param(csum, bool, 0444); 35 module_param(gso, bool, 0444); 36 module_param(napi_tx, bool, 0644); 37 38 /* FIXME: MTU in config. */ 39 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 40 #define GOOD_COPY_LEN 128 41 42 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 43 44 /* Separating two types of XDP xmit */ 45 #define VIRTIO_XDP_TX BIT(0) 46 #define VIRTIO_XDP_REDIR BIT(1) 47 48 #define VIRTIO_XDP_FLAG BIT(0) 49 #define VIRTIO_ORPHAN_FLAG BIT(1) 50 51 /* RX packet size EWMA. The average packet size is used to determine the packet 52 * buffer size when refilling RX rings. As the entire RX ring may be refilled 53 * at once, the weight is chosen so that the EWMA will be insensitive to short- 54 * term, transient changes in packet size. 55 */ 56 DECLARE_EWMA(pkt_len, 0, 64) 57 58 #define VIRTNET_DRIVER_VERSION "1.0.0" 59 60 static const unsigned long guest_offloads[] = { 61 VIRTIO_NET_F_GUEST_TSO4, 62 VIRTIO_NET_F_GUEST_TSO6, 63 VIRTIO_NET_F_GUEST_ECN, 64 VIRTIO_NET_F_GUEST_UFO, 65 VIRTIO_NET_F_GUEST_CSUM, 66 VIRTIO_NET_F_GUEST_USO4, 67 VIRTIO_NET_F_GUEST_USO6, 68 VIRTIO_NET_F_GUEST_HDRLEN 69 }; 70 71 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 72 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 73 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 74 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ 75 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ 76 (1ULL << VIRTIO_NET_F_GUEST_USO6)) 77 78 struct virtnet_stat_desc { 79 char desc[ETH_GSTRING_LEN]; 80 size_t offset; 81 size_t qstat_offset; 82 }; 83 84 struct virtnet_sq_free_stats { 85 u64 packets; 86 u64 bytes; 87 u64 napi_packets; 88 u64 napi_bytes; 89 }; 90 91 struct virtnet_sq_stats { 92 struct u64_stats_sync syncp; 93 u64_stats_t packets; 94 u64_stats_t bytes; 95 u64_stats_t xdp_tx; 96 u64_stats_t xdp_tx_drops; 97 u64_stats_t kicks; 98 u64_stats_t tx_timeouts; 99 u64_stats_t stop; 100 u64_stats_t wake; 101 }; 102 103 struct virtnet_rq_stats { 104 struct u64_stats_sync syncp; 105 u64_stats_t packets; 106 u64_stats_t bytes; 107 u64_stats_t drops; 108 u64_stats_t xdp_packets; 109 u64_stats_t xdp_tx; 110 u64_stats_t xdp_redirects; 111 u64_stats_t xdp_drops; 112 u64_stats_t kicks; 113 }; 114 115 #define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1} 116 #define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1} 117 118 #define VIRTNET_SQ_STAT_QSTAT(name, m) \ 119 { \ 120 name, \ 121 offsetof(struct virtnet_sq_stats, m), \ 122 offsetof(struct netdev_queue_stats_tx, m), \ 123 } 124 125 #define VIRTNET_RQ_STAT_QSTAT(name, m) \ 126 { \ 127 name, \ 128 offsetof(struct virtnet_rq_stats, m), \ 129 offsetof(struct netdev_queue_stats_rx, m), \ 130 } 131 132 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { 133 VIRTNET_SQ_STAT("xdp_tx", xdp_tx), 134 VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops), 135 VIRTNET_SQ_STAT("kicks", kicks), 136 VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts), 137 }; 138 139 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { 140 VIRTNET_RQ_STAT("drops", drops), 141 VIRTNET_RQ_STAT("xdp_packets", xdp_packets), 142 VIRTNET_RQ_STAT("xdp_tx", xdp_tx), 143 VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects), 144 VIRTNET_RQ_STAT("xdp_drops", xdp_drops), 145 VIRTNET_RQ_STAT("kicks", kicks), 146 }; 147 148 static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = { 149 VIRTNET_SQ_STAT_QSTAT("packets", packets), 150 VIRTNET_SQ_STAT_QSTAT("bytes", bytes), 151 VIRTNET_SQ_STAT_QSTAT("stop", stop), 152 VIRTNET_SQ_STAT_QSTAT("wake", wake), 153 }; 154 155 static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = { 156 VIRTNET_RQ_STAT_QSTAT("packets", packets), 157 VIRTNET_RQ_STAT_QSTAT("bytes", bytes), 158 }; 159 160 #define VIRTNET_STATS_DESC_CQ(name) \ 161 {#name, offsetof(struct virtio_net_stats_cvq, name), -1} 162 163 #define VIRTNET_STATS_DESC_RX(class, name) \ 164 {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1} 165 166 #define VIRTNET_STATS_DESC_TX(class, name) \ 167 {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1} 168 169 170 static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = { 171 VIRTNET_STATS_DESC_CQ(command_num), 172 VIRTNET_STATS_DESC_CQ(ok_num), 173 }; 174 175 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { 176 VIRTNET_STATS_DESC_RX(basic, packets), 177 VIRTNET_STATS_DESC_RX(basic, bytes), 178 179 VIRTNET_STATS_DESC_RX(basic, notifications), 180 VIRTNET_STATS_DESC_RX(basic, interrupts), 181 }; 182 183 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = { 184 VIRTNET_STATS_DESC_TX(basic, packets), 185 VIRTNET_STATS_DESC_TX(basic, bytes), 186 187 VIRTNET_STATS_DESC_TX(basic, notifications), 188 VIRTNET_STATS_DESC_TX(basic, interrupts), 189 }; 190 191 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = { 192 VIRTNET_STATS_DESC_RX(csum, needs_csum), 193 }; 194 195 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = { 196 VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg), 197 VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg), 198 }; 199 200 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = { 201 VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes), 202 }; 203 204 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = { 205 VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes), 206 }; 207 208 #define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \ 209 { \ 210 #name, \ 211 offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \ 212 offsetof(struct netdev_queue_stats_rx, qstat_field), \ 213 } 214 215 #define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \ 216 { \ 217 #name, \ 218 offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \ 219 offsetof(struct netdev_queue_stats_tx, qstat_field), \ 220 } 221 222 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = { 223 VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops), 224 VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns), 225 }; 226 227 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = { 228 VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops), 229 VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors), 230 }; 231 232 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = { 233 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary), 234 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none), 235 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad), 236 }; 237 238 static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = { 239 VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none), 240 VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum), 241 }; 242 243 static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = { 244 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets), 245 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes), 246 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets), 247 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes), 248 }; 249 250 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = { 251 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets), 252 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes), 253 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets), 254 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes), 255 }; 256 257 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = { 258 VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 259 }; 260 261 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = { 262 VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 263 }; 264 265 #define VIRTNET_Q_TYPE_RX 0 266 #define VIRTNET_Q_TYPE_TX 1 267 #define VIRTNET_Q_TYPE_CQ 2 268 269 struct virtnet_interrupt_coalesce { 270 u32 max_packets; 271 u32 max_usecs; 272 }; 273 274 /* The dma information of pages allocated at a time. */ 275 struct virtnet_rq_dma { 276 dma_addr_t addr; 277 u32 ref; 278 u16 len; 279 u16 need_sync; 280 }; 281 282 /* Internal representation of a send virtqueue */ 283 struct send_queue { 284 /* Virtqueue associated with this send _queue */ 285 struct virtqueue *vq; 286 287 /* TX: fragments + linear part + virtio header */ 288 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 289 290 /* Name of the send queue: output.$index */ 291 char name[16]; 292 293 struct virtnet_sq_stats stats; 294 295 struct virtnet_interrupt_coalesce intr_coal; 296 297 struct napi_struct napi; 298 299 /* Record whether sq is in reset state. */ 300 bool reset; 301 }; 302 303 /* Internal representation of a receive virtqueue */ 304 struct receive_queue { 305 /* Virtqueue associated with this receive_queue */ 306 struct virtqueue *vq; 307 308 struct napi_struct napi; 309 310 struct bpf_prog __rcu *xdp_prog; 311 312 struct virtnet_rq_stats stats; 313 314 /* The number of rx notifications */ 315 u16 calls; 316 317 /* Is dynamic interrupt moderation enabled? */ 318 bool dim_enabled; 319 320 /* Used to protect dim_enabled and inter_coal */ 321 struct mutex dim_lock; 322 323 /* Dynamic Interrupt Moderation */ 324 struct dim dim; 325 326 u32 packets_in_napi; 327 328 struct virtnet_interrupt_coalesce intr_coal; 329 330 /* Chain pages by the private ptr. */ 331 struct page *pages; 332 333 /* Average packet length for mergeable receive buffers. */ 334 struct ewma_pkt_len mrg_avg_pkt_len; 335 336 /* Page frag for packet buffer allocation. */ 337 struct page_frag alloc_frag; 338 339 /* RX: fragments + linear part + virtio header */ 340 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 341 342 /* Min single buffer size for mergeable buffers case. */ 343 unsigned int min_buf_len; 344 345 /* Name of this receive queue: input.$index */ 346 char name[16]; 347 348 struct xdp_rxq_info xdp_rxq; 349 350 /* Record the last dma info to free after new pages is allocated. */ 351 struct virtnet_rq_dma *last_dma; 352 353 struct xsk_buff_pool *xsk_pool; 354 355 /* xdp rxq used by xsk */ 356 struct xdp_rxq_info xsk_rxq_info; 357 358 struct xdp_buff **xsk_buffs; 359 }; 360 361 /* This structure can contain rss message with maximum settings for indirection table and keysize 362 * Note, that default structure that describes RSS configuration virtio_net_rss_config 363 * contains same info but can't handle table values. 364 * In any case, structure would be passed to virtio hw through sg_buf split by parts 365 * because table sizes may be differ according to the device configuration. 366 */ 367 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 368 #define VIRTIO_NET_RSS_MAX_TABLE_LEN 128 369 struct virtio_net_ctrl_rss { 370 u32 hash_types; 371 u16 indirection_table_mask; 372 u16 unclassified_queue; 373 u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN]; 374 u16 max_tx_vq; 375 u8 hash_key_length; 376 u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; 377 }; 378 379 /* Control VQ buffers: protected by the rtnl lock */ 380 struct control_buf { 381 struct virtio_net_ctrl_hdr hdr; 382 virtio_net_ctrl_ack status; 383 }; 384 385 struct virtnet_info { 386 struct virtio_device *vdev; 387 struct virtqueue *cvq; 388 struct net_device *dev; 389 struct send_queue *sq; 390 struct receive_queue *rq; 391 unsigned int status; 392 393 /* Max # of queue pairs supported by the device */ 394 u16 max_queue_pairs; 395 396 /* # of queue pairs currently used by the driver */ 397 u16 curr_queue_pairs; 398 399 /* # of XDP queue pairs currently used by the driver */ 400 u16 xdp_queue_pairs; 401 402 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ 403 bool xdp_enabled; 404 405 /* I like... big packets and I cannot lie! */ 406 bool big_packets; 407 408 /* number of sg entries allocated for big packets */ 409 unsigned int big_packets_num_skbfrags; 410 411 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 412 bool mergeable_rx_bufs; 413 414 /* Host supports rss and/or hash report */ 415 bool has_rss; 416 bool has_rss_hash_report; 417 u8 rss_key_size; 418 u16 rss_indir_table_size; 419 u32 rss_hash_types_supported; 420 u32 rss_hash_types_saved; 421 struct virtio_net_ctrl_rss rss; 422 423 /* Has control virtqueue */ 424 bool has_cvq; 425 426 /* Lock to protect the control VQ */ 427 struct mutex cvq_lock; 428 429 /* Host can handle any s/g split between our header and packet data */ 430 bool any_header_sg; 431 432 /* Packet virtio header size */ 433 u8 hdr_len; 434 435 /* Work struct for delayed refilling if we run low on memory. */ 436 struct delayed_work refill; 437 438 /* Is delayed refill enabled? */ 439 bool refill_enabled; 440 441 /* The lock to synchronize the access to refill_enabled */ 442 spinlock_t refill_lock; 443 444 /* Work struct for config space updates */ 445 struct work_struct config_work; 446 447 /* Work struct for setting rx mode */ 448 struct work_struct rx_mode_work; 449 450 /* OK to queue work setting RX mode? */ 451 bool rx_mode_work_enabled; 452 453 /* Does the affinity hint is set for virtqueues? */ 454 bool affinity_hint_set; 455 456 /* CPU hotplug instances for online & dead */ 457 struct hlist_node node; 458 struct hlist_node node_dead; 459 460 struct control_buf *ctrl; 461 462 /* Ethtool settings */ 463 u8 duplex; 464 u32 speed; 465 466 /* Is rx dynamic interrupt moderation enabled? */ 467 bool rx_dim_enabled; 468 469 /* Interrupt coalescing settings */ 470 struct virtnet_interrupt_coalesce intr_coal_tx; 471 struct virtnet_interrupt_coalesce intr_coal_rx; 472 473 unsigned long guest_offloads; 474 unsigned long guest_offloads_capable; 475 476 /* failover when STANDBY feature enabled */ 477 struct failover *failover; 478 479 u64 device_stats_cap; 480 }; 481 482 struct padded_vnet_hdr { 483 struct virtio_net_hdr_v1_hash hdr; 484 /* 485 * hdr is in a separate sg buffer, and data sg buffer shares same page 486 * with this header sg. This padding makes next sg 16 byte aligned 487 * after the header. 488 */ 489 char padding[12]; 490 }; 491 492 struct virtio_net_common_hdr { 493 union { 494 struct virtio_net_hdr hdr; 495 struct virtio_net_hdr_mrg_rxbuf mrg_hdr; 496 struct virtio_net_hdr_v1_hash hash_v1_hdr; 497 }; 498 }; 499 500 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); 501 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 502 struct net_device *dev, 503 unsigned int *xdp_xmit, 504 struct virtnet_rq_stats *stats); 505 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 506 struct sk_buff *skb, u8 flags); 507 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 508 struct sk_buff *curr_skb, 509 struct page *page, void *buf, 510 int len, int truesize); 511 512 static bool is_xdp_frame(void *ptr) 513 { 514 return (unsigned long)ptr & VIRTIO_XDP_FLAG; 515 } 516 517 static void *xdp_to_ptr(struct xdp_frame *ptr) 518 { 519 return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); 520 } 521 522 static struct xdp_frame *ptr_to_xdp(void *ptr) 523 { 524 return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); 525 } 526 527 static bool is_orphan_skb(void *ptr) 528 { 529 return (unsigned long)ptr & VIRTIO_ORPHAN_FLAG; 530 } 531 532 static void *skb_to_ptr(struct sk_buff *skb, bool orphan) 533 { 534 return (void *)((unsigned long)skb | (orphan ? VIRTIO_ORPHAN_FLAG : 0)); 535 } 536 537 static struct sk_buff *ptr_to_skb(void *ptr) 538 { 539 return (struct sk_buff *)((unsigned long)ptr & ~VIRTIO_ORPHAN_FLAG); 540 } 541 542 static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 543 bool in_napi, struct virtnet_sq_free_stats *stats) 544 { 545 unsigned int len; 546 void *ptr; 547 548 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 549 if (!is_xdp_frame(ptr)) { 550 struct sk_buff *skb = ptr_to_skb(ptr); 551 552 pr_debug("Sent skb %p\n", skb); 553 554 if (is_orphan_skb(ptr)) { 555 stats->packets++; 556 stats->bytes += skb->len; 557 } else { 558 stats->napi_packets++; 559 stats->napi_bytes += skb->len; 560 } 561 napi_consume_skb(skb, in_napi); 562 } else { 563 struct xdp_frame *frame = ptr_to_xdp(ptr); 564 565 stats->packets++; 566 stats->bytes += xdp_get_frame_len(frame); 567 xdp_return_frame(frame); 568 } 569 } 570 netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes); 571 } 572 573 /* Converting between virtqueue no. and kernel tx/rx queue no. 574 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq 575 */ 576 static int vq2txq(struct virtqueue *vq) 577 { 578 return (vq->index - 1) / 2; 579 } 580 581 static int txq2vq(int txq) 582 { 583 return txq * 2 + 1; 584 } 585 586 static int vq2rxq(struct virtqueue *vq) 587 { 588 return vq->index / 2; 589 } 590 591 static int rxq2vq(int rxq) 592 { 593 return rxq * 2; 594 } 595 596 static int vq_type(struct virtnet_info *vi, int qid) 597 { 598 if (qid == vi->max_queue_pairs * 2) 599 return VIRTNET_Q_TYPE_CQ; 600 601 if (qid % 2) 602 return VIRTNET_Q_TYPE_TX; 603 604 return VIRTNET_Q_TYPE_RX; 605 } 606 607 static inline struct virtio_net_common_hdr * 608 skb_vnet_common_hdr(struct sk_buff *skb) 609 { 610 return (struct virtio_net_common_hdr *)skb->cb; 611 } 612 613 /* 614 * private is used to chain pages for big packets, put the whole 615 * most recent used list in the beginning for reuse 616 */ 617 static void give_pages(struct receive_queue *rq, struct page *page) 618 { 619 struct page *end; 620 621 /* Find end of list, sew whole thing into vi->rq.pages. */ 622 for (end = page; end->private; end = (struct page *)end->private); 623 end->private = (unsigned long)rq->pages; 624 rq->pages = page; 625 } 626 627 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) 628 { 629 struct page *p = rq->pages; 630 631 if (p) { 632 rq->pages = (struct page *)p->private; 633 /* clear private here, it is used to chain pages */ 634 p->private = 0; 635 } else 636 p = alloc_page(gfp_mask); 637 return p; 638 } 639 640 static void virtnet_rq_free_buf(struct virtnet_info *vi, 641 struct receive_queue *rq, void *buf) 642 { 643 if (vi->mergeable_rx_bufs) 644 put_page(virt_to_head_page(buf)); 645 else if (vi->big_packets) 646 give_pages(rq, buf); 647 else 648 put_page(virt_to_head_page(buf)); 649 } 650 651 static void enable_delayed_refill(struct virtnet_info *vi) 652 { 653 spin_lock_bh(&vi->refill_lock); 654 vi->refill_enabled = true; 655 spin_unlock_bh(&vi->refill_lock); 656 } 657 658 static void disable_delayed_refill(struct virtnet_info *vi) 659 { 660 spin_lock_bh(&vi->refill_lock); 661 vi->refill_enabled = false; 662 spin_unlock_bh(&vi->refill_lock); 663 } 664 665 static void enable_rx_mode_work(struct virtnet_info *vi) 666 { 667 rtnl_lock(); 668 vi->rx_mode_work_enabled = true; 669 rtnl_unlock(); 670 } 671 672 static void disable_rx_mode_work(struct virtnet_info *vi) 673 { 674 rtnl_lock(); 675 vi->rx_mode_work_enabled = false; 676 rtnl_unlock(); 677 } 678 679 static void virtqueue_napi_schedule(struct napi_struct *napi, 680 struct virtqueue *vq) 681 { 682 if (napi_schedule_prep(napi)) { 683 virtqueue_disable_cb(vq); 684 __napi_schedule(napi); 685 } 686 } 687 688 static bool virtqueue_napi_complete(struct napi_struct *napi, 689 struct virtqueue *vq, int processed) 690 { 691 int opaque; 692 693 opaque = virtqueue_enable_cb_prepare(vq); 694 if (napi_complete_done(napi, processed)) { 695 if (unlikely(virtqueue_poll(vq, opaque))) 696 virtqueue_napi_schedule(napi, vq); 697 else 698 return true; 699 } else { 700 virtqueue_disable_cb(vq); 701 } 702 703 return false; 704 } 705 706 static void skb_xmit_done(struct virtqueue *vq) 707 { 708 struct virtnet_info *vi = vq->vdev->priv; 709 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; 710 711 /* Suppress further interrupts. */ 712 virtqueue_disable_cb(vq); 713 714 if (napi->weight) 715 virtqueue_napi_schedule(napi, vq); 716 else 717 /* We were probably waiting for more output buffers. */ 718 netif_wake_subqueue(vi->dev, vq2txq(vq)); 719 } 720 721 #define MRG_CTX_HEADER_SHIFT 22 722 static void *mergeable_len_to_ctx(unsigned int truesize, 723 unsigned int headroom) 724 { 725 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); 726 } 727 728 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) 729 { 730 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; 731 } 732 733 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) 734 { 735 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); 736 } 737 738 static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, 739 unsigned int headroom, 740 unsigned int len) 741 { 742 struct sk_buff *skb; 743 744 skb = build_skb(buf, buflen); 745 if (unlikely(!skb)) 746 return NULL; 747 748 skb_reserve(skb, headroom); 749 skb_put(skb, len); 750 751 return skb; 752 } 753 754 /* Called from bottom half context */ 755 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 756 struct receive_queue *rq, 757 struct page *page, unsigned int offset, 758 unsigned int len, unsigned int truesize, 759 unsigned int headroom) 760 { 761 struct sk_buff *skb; 762 struct virtio_net_common_hdr *hdr; 763 unsigned int copy, hdr_len, hdr_padded_len; 764 struct page *page_to_free = NULL; 765 int tailroom, shinfo_size; 766 char *p, *hdr_p, *buf; 767 768 p = page_address(page) + offset; 769 hdr_p = p; 770 771 hdr_len = vi->hdr_len; 772 if (vi->mergeable_rx_bufs) 773 hdr_padded_len = hdr_len; 774 else 775 hdr_padded_len = sizeof(struct padded_vnet_hdr); 776 777 buf = p - headroom; 778 len -= hdr_len; 779 offset += hdr_padded_len; 780 p += hdr_padded_len; 781 tailroom = truesize - headroom - hdr_padded_len - len; 782 783 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 784 785 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { 786 skb = virtnet_build_skb(buf, truesize, p - buf, len); 787 if (unlikely(!skb)) 788 return NULL; 789 790 page = (struct page *)page->private; 791 if (page) 792 give_pages(rq, page); 793 goto ok; 794 } 795 796 /* copy small packet so we can reuse these pages for small data */ 797 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); 798 if (unlikely(!skb)) 799 return NULL; 800 801 /* Copy all frame if it fits skb->head, otherwise 802 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. 803 */ 804 if (len <= skb_tailroom(skb)) 805 copy = len; 806 else 807 copy = ETH_HLEN; 808 skb_put_data(skb, p, copy); 809 810 len -= copy; 811 offset += copy; 812 813 if (vi->mergeable_rx_bufs) { 814 if (len) 815 skb_add_rx_frag(skb, 0, page, offset, len, truesize); 816 else 817 page_to_free = page; 818 goto ok; 819 } 820 821 /* 822 * Verify that we can indeed put this data into a skb. 823 * This is here to handle cases when the device erroneously 824 * tries to receive more than is possible. This is usually 825 * the case of a broken device. 826 */ 827 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { 828 net_dbg_ratelimited("%s: too much data\n", skb->dev->name); 829 dev_kfree_skb(skb); 830 return NULL; 831 } 832 BUG_ON(offset >= PAGE_SIZE); 833 while (len) { 834 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); 835 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, 836 frag_size, truesize); 837 len -= frag_size; 838 page = (struct page *)page->private; 839 offset = 0; 840 } 841 842 if (page) 843 give_pages(rq, page); 844 845 ok: 846 hdr = skb_vnet_common_hdr(skb); 847 memcpy(hdr, hdr_p, hdr_len); 848 if (page_to_free) 849 put_page(page_to_free); 850 851 return skb; 852 } 853 854 static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) 855 { 856 struct page *page = virt_to_head_page(buf); 857 struct virtnet_rq_dma *dma; 858 void *head; 859 int offset; 860 861 head = page_address(page); 862 863 dma = head; 864 865 --dma->ref; 866 867 if (dma->need_sync && len) { 868 offset = buf - (head + sizeof(*dma)); 869 870 virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, 871 offset, len, 872 DMA_FROM_DEVICE); 873 } 874 875 if (dma->ref) 876 return; 877 878 virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, 879 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 880 put_page(page); 881 } 882 883 static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) 884 { 885 void *buf; 886 887 buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); 888 if (buf) 889 virtnet_rq_unmap(rq, buf, *len); 890 891 return buf; 892 } 893 894 static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) 895 { 896 struct virtnet_rq_dma *dma; 897 dma_addr_t addr; 898 u32 offset; 899 void *head; 900 901 head = page_address(rq->alloc_frag.page); 902 903 offset = buf - head; 904 905 dma = head; 906 907 addr = dma->addr - sizeof(*dma) + offset; 908 909 sg_init_table(rq->sg, 1); 910 rq->sg[0].dma_address = addr; 911 rq->sg[0].length = len; 912 } 913 914 static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) 915 { 916 struct page_frag *alloc_frag = &rq->alloc_frag; 917 struct virtnet_rq_dma *dma; 918 void *buf, *head; 919 dma_addr_t addr; 920 921 if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp))) 922 return NULL; 923 924 head = page_address(alloc_frag->page); 925 926 dma = head; 927 928 /* new pages */ 929 if (!alloc_frag->offset) { 930 if (rq->last_dma) { 931 /* Now, the new page is allocated, the last dma 932 * will not be used. So the dma can be unmapped 933 * if the ref is 0. 934 */ 935 virtnet_rq_unmap(rq, rq->last_dma, 0); 936 rq->last_dma = NULL; 937 } 938 939 dma->len = alloc_frag->size - sizeof(*dma); 940 941 addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, 942 dma->len, DMA_FROM_DEVICE, 0); 943 if (virtqueue_dma_mapping_error(rq->vq, addr)) 944 return NULL; 945 946 dma->addr = addr; 947 dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); 948 949 /* Add a reference to dma to prevent the entire dma from 950 * being released during error handling. This reference 951 * will be freed after the pages are no longer used. 952 */ 953 get_page(alloc_frag->page); 954 dma->ref = 1; 955 alloc_frag->offset = sizeof(*dma); 956 957 rq->last_dma = dma; 958 } 959 960 ++dma->ref; 961 962 buf = head + alloc_frag->offset; 963 964 get_page(alloc_frag->page); 965 alloc_frag->offset += size; 966 967 return buf; 968 } 969 970 static void virtnet_rq_set_premapped(struct virtnet_info *vi) 971 { 972 int i; 973 974 /* disable for big mode */ 975 if (!vi->mergeable_rx_bufs && vi->big_packets) 976 return; 977 978 for (i = 0; i < vi->max_queue_pairs; i++) 979 /* error should never happen */ 980 BUG_ON(virtqueue_set_dma_premapped(vi->rq[i].vq)); 981 } 982 983 static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) 984 { 985 struct virtnet_info *vi = vq->vdev->priv; 986 struct receive_queue *rq; 987 int i = vq2rxq(vq); 988 989 rq = &vi->rq[i]; 990 991 if (rq->xsk_pool) { 992 xsk_buff_free((struct xdp_buff *)buf); 993 return; 994 } 995 996 if (!vi->big_packets || vi->mergeable_rx_bufs) 997 virtnet_rq_unmap(rq, buf, 0); 998 999 virtnet_rq_free_buf(vi, rq, buf); 1000 } 1001 1002 static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 1003 bool in_napi) 1004 { 1005 struct virtnet_sq_free_stats stats = {0}; 1006 1007 __free_old_xmit(sq, txq, in_napi, &stats); 1008 1009 /* Avoid overhead when no packets have been processed 1010 * happens when called speculatively from start_xmit. 1011 */ 1012 if (!stats.packets && !stats.napi_packets) 1013 return; 1014 1015 u64_stats_update_begin(&sq->stats.syncp); 1016 u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes); 1017 u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets); 1018 u64_stats_update_end(&sq->stats.syncp); 1019 } 1020 1021 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) 1022 { 1023 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) 1024 return false; 1025 else if (q < vi->curr_queue_pairs) 1026 return true; 1027 else 1028 return false; 1029 } 1030 1031 static void check_sq_full_and_disable(struct virtnet_info *vi, 1032 struct net_device *dev, 1033 struct send_queue *sq) 1034 { 1035 bool use_napi = sq->napi.weight; 1036 int qnum; 1037 1038 qnum = sq - vi->sq; 1039 1040 /* If running out of space, stop queue to avoid getting packets that we 1041 * are then unable to transmit. 1042 * An alternative would be to force queuing layer to requeue the skb by 1043 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be 1044 * returned in a normal path of operation: it means that driver is not 1045 * maintaining the TX queue stop/start state properly, and causes 1046 * the stack to do a non-trivial amount of useless work. 1047 * Since most packets only take 1 or 2 ring slots, stopping the queue 1048 * early means 16 slots are typically wasted. 1049 */ 1050 if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { 1051 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1052 1053 netif_tx_stop_queue(txq); 1054 u64_stats_update_begin(&sq->stats.syncp); 1055 u64_stats_inc(&sq->stats.stop); 1056 u64_stats_update_end(&sq->stats.syncp); 1057 if (use_napi) { 1058 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 1059 virtqueue_napi_schedule(&sq->napi, sq->vq); 1060 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 1061 /* More just got used, free them then recheck. */ 1062 free_old_xmit(sq, txq, false); 1063 if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { 1064 netif_start_subqueue(dev, qnum); 1065 u64_stats_update_begin(&sq->stats.syncp); 1066 u64_stats_inc(&sq->stats.wake); 1067 u64_stats_update_end(&sq->stats.syncp); 1068 virtqueue_disable_cb(sq->vq); 1069 } 1070 } 1071 } 1072 } 1073 1074 static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len) 1075 { 1076 sg->dma_address = addr; 1077 sg->length = len; 1078 } 1079 1080 static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, 1081 struct receive_queue *rq, void *buf, u32 len) 1082 { 1083 struct xdp_buff *xdp; 1084 u32 bufsize; 1085 1086 xdp = (struct xdp_buff *)buf; 1087 1088 bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len; 1089 1090 if (unlikely(len > bufsize)) { 1091 pr_debug("%s: rx error: len %u exceeds truesize %u\n", 1092 vi->dev->name, len, bufsize); 1093 DEV_STATS_INC(vi->dev, rx_length_errors); 1094 xsk_buff_free(xdp); 1095 return NULL; 1096 } 1097 1098 xsk_buff_set_size(xdp, len); 1099 xsk_buff_dma_sync_for_cpu(xdp); 1100 1101 return xdp; 1102 } 1103 1104 static struct sk_buff *xsk_construct_skb(struct receive_queue *rq, 1105 struct xdp_buff *xdp) 1106 { 1107 unsigned int metasize = xdp->data - xdp->data_meta; 1108 struct sk_buff *skb; 1109 unsigned int size; 1110 1111 size = xdp->data_end - xdp->data_hard_start; 1112 skb = napi_alloc_skb(&rq->napi, size); 1113 if (unlikely(!skb)) { 1114 xsk_buff_free(xdp); 1115 return NULL; 1116 } 1117 1118 skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); 1119 1120 size = xdp->data_end - xdp->data_meta; 1121 memcpy(__skb_put(skb, size), xdp->data_meta, size); 1122 1123 if (metasize) { 1124 __skb_pull(skb, metasize); 1125 skb_metadata_set(skb, metasize); 1126 } 1127 1128 xsk_buff_free(xdp); 1129 1130 return skb; 1131 } 1132 1133 static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi, 1134 struct receive_queue *rq, struct xdp_buff *xdp, 1135 unsigned int *xdp_xmit, 1136 struct virtnet_rq_stats *stats) 1137 { 1138 struct bpf_prog *prog; 1139 u32 ret; 1140 1141 ret = XDP_PASS; 1142 rcu_read_lock(); 1143 prog = rcu_dereference(rq->xdp_prog); 1144 if (prog) 1145 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1146 rcu_read_unlock(); 1147 1148 switch (ret) { 1149 case XDP_PASS: 1150 return xsk_construct_skb(rq, xdp); 1151 1152 case XDP_TX: 1153 case XDP_REDIRECT: 1154 return NULL; 1155 1156 default: 1157 /* drop packet */ 1158 xsk_buff_free(xdp); 1159 u64_stats_inc(&stats->drops); 1160 return NULL; 1161 } 1162 } 1163 1164 static void xsk_drop_follow_bufs(struct net_device *dev, 1165 struct receive_queue *rq, 1166 u32 num_buf, 1167 struct virtnet_rq_stats *stats) 1168 { 1169 struct xdp_buff *xdp; 1170 u32 len; 1171 1172 while (num_buf-- > 1) { 1173 xdp = virtqueue_get_buf(rq->vq, &len); 1174 if (unlikely(!xdp)) { 1175 pr_debug("%s: rx error: %d buffers missing\n", 1176 dev->name, num_buf); 1177 DEV_STATS_INC(dev, rx_length_errors); 1178 break; 1179 } 1180 u64_stats_add(&stats->bytes, len); 1181 xsk_buff_free(xdp); 1182 } 1183 } 1184 1185 static int xsk_append_merge_buffer(struct virtnet_info *vi, 1186 struct receive_queue *rq, 1187 struct sk_buff *head_skb, 1188 u32 num_buf, 1189 struct virtio_net_hdr_mrg_rxbuf *hdr, 1190 struct virtnet_rq_stats *stats) 1191 { 1192 struct sk_buff *curr_skb; 1193 struct xdp_buff *xdp; 1194 u32 len, truesize; 1195 struct page *page; 1196 void *buf; 1197 1198 curr_skb = head_skb; 1199 1200 while (--num_buf) { 1201 buf = virtqueue_get_buf(rq->vq, &len); 1202 if (unlikely(!buf)) { 1203 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1204 vi->dev->name, num_buf, 1205 virtio16_to_cpu(vi->vdev, 1206 hdr->num_buffers)); 1207 DEV_STATS_INC(vi->dev, rx_length_errors); 1208 return -EINVAL; 1209 } 1210 1211 u64_stats_add(&stats->bytes, len); 1212 1213 xdp = buf_to_xdp(vi, rq, buf, len); 1214 if (!xdp) 1215 goto err; 1216 1217 buf = napi_alloc_frag(len); 1218 if (!buf) { 1219 xsk_buff_free(xdp); 1220 goto err; 1221 } 1222 1223 memcpy(buf, xdp->data - vi->hdr_len, len); 1224 1225 xsk_buff_free(xdp); 1226 1227 page = virt_to_page(buf); 1228 1229 truesize = len; 1230 1231 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 1232 buf, len, truesize); 1233 if (!curr_skb) { 1234 put_page(page); 1235 goto err; 1236 } 1237 } 1238 1239 return 0; 1240 1241 err: 1242 xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats); 1243 return -EINVAL; 1244 } 1245 1246 static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi, 1247 struct receive_queue *rq, struct xdp_buff *xdp, 1248 unsigned int *xdp_xmit, 1249 struct virtnet_rq_stats *stats) 1250 { 1251 struct virtio_net_hdr_mrg_rxbuf *hdr; 1252 struct bpf_prog *prog; 1253 struct sk_buff *skb; 1254 u32 ret, num_buf; 1255 1256 hdr = xdp->data - vi->hdr_len; 1257 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1258 1259 ret = XDP_PASS; 1260 rcu_read_lock(); 1261 prog = rcu_dereference(rq->xdp_prog); 1262 /* TODO: support multi buffer. */ 1263 if (prog && num_buf == 1) 1264 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1265 rcu_read_unlock(); 1266 1267 switch (ret) { 1268 case XDP_PASS: 1269 skb = xsk_construct_skb(rq, xdp); 1270 if (!skb) 1271 goto drop_bufs; 1272 1273 if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) { 1274 dev_kfree_skb(skb); 1275 goto drop; 1276 } 1277 1278 return skb; 1279 1280 case XDP_TX: 1281 case XDP_REDIRECT: 1282 return NULL; 1283 1284 default: 1285 /* drop packet */ 1286 xsk_buff_free(xdp); 1287 } 1288 1289 drop_bufs: 1290 xsk_drop_follow_bufs(dev, rq, num_buf, stats); 1291 1292 drop: 1293 u64_stats_inc(&stats->drops); 1294 return NULL; 1295 } 1296 1297 static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq, 1298 void *buf, u32 len, 1299 unsigned int *xdp_xmit, 1300 struct virtnet_rq_stats *stats) 1301 { 1302 struct net_device *dev = vi->dev; 1303 struct sk_buff *skb = NULL; 1304 struct xdp_buff *xdp; 1305 u8 flags; 1306 1307 len -= vi->hdr_len; 1308 1309 u64_stats_add(&stats->bytes, len); 1310 1311 xdp = buf_to_xdp(vi, rq, buf, len); 1312 if (!xdp) 1313 return; 1314 1315 if (unlikely(len < ETH_HLEN)) { 1316 pr_debug("%s: short packet %i\n", dev->name, len); 1317 DEV_STATS_INC(dev, rx_length_errors); 1318 xsk_buff_free(xdp); 1319 return; 1320 } 1321 1322 flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags; 1323 1324 if (!vi->mergeable_rx_bufs) 1325 skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats); 1326 else 1327 skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats); 1328 1329 if (skb) 1330 virtnet_receive_done(vi, rq, skb, flags); 1331 } 1332 1333 static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq, 1334 struct xsk_buff_pool *pool, gfp_t gfp) 1335 { 1336 struct xdp_buff **xsk_buffs; 1337 dma_addr_t addr; 1338 int err = 0; 1339 u32 len, i; 1340 int num; 1341 1342 xsk_buffs = rq->xsk_buffs; 1343 1344 num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free); 1345 if (!num) 1346 return -ENOMEM; 1347 1348 len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len; 1349 1350 for (i = 0; i < num; ++i) { 1351 /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space. 1352 * We assume XDP_PACKET_HEADROOM is larger than hdr->len. 1353 * (see function virtnet_xsk_pool_enable) 1354 */ 1355 addr = xsk_buff_xdp_get_dma(xsk_buffs[i]) - vi->hdr_len; 1356 1357 sg_init_table(rq->sg, 1); 1358 sg_fill_dma(rq->sg, addr, len); 1359 1360 err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, xsk_buffs[i], gfp); 1361 if (err) 1362 goto err; 1363 } 1364 1365 return num; 1366 1367 err: 1368 for (; i < num; ++i) 1369 xsk_buff_free(xsk_buffs[i]); 1370 1371 return err; 1372 } 1373 1374 static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) 1375 { 1376 struct virtnet_info *vi = netdev_priv(dev); 1377 struct send_queue *sq; 1378 1379 if (!netif_running(dev)) 1380 return -ENETDOWN; 1381 1382 if (qid >= vi->curr_queue_pairs) 1383 return -EINVAL; 1384 1385 sq = &vi->sq[qid]; 1386 1387 if (napi_if_scheduled_mark_missed(&sq->napi)) 1388 return 0; 1389 1390 local_bh_disable(); 1391 virtqueue_napi_schedule(&sq->napi, sq->vq); 1392 local_bh_enable(); 1393 1394 return 0; 1395 } 1396 1397 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, 1398 struct send_queue *sq, 1399 struct xdp_frame *xdpf) 1400 { 1401 struct virtio_net_hdr_mrg_rxbuf *hdr; 1402 struct skb_shared_info *shinfo; 1403 u8 nr_frags = 0; 1404 int err, i; 1405 1406 if (unlikely(xdpf->headroom < vi->hdr_len)) 1407 return -EOVERFLOW; 1408 1409 if (unlikely(xdp_frame_has_frags(xdpf))) { 1410 shinfo = xdp_get_shared_info_from_frame(xdpf); 1411 nr_frags = shinfo->nr_frags; 1412 } 1413 1414 /* In wrapping function virtnet_xdp_xmit(), we need to free 1415 * up the pending old buffers, where we need to calculate the 1416 * position of skb_shared_info in xdp_get_frame_len() and 1417 * xdp_return_frame(), which will involve to xdpf->data and 1418 * xdpf->headroom. Therefore, we need to update the value of 1419 * headroom synchronously here. 1420 */ 1421 xdpf->headroom -= vi->hdr_len; 1422 xdpf->data -= vi->hdr_len; 1423 /* Zero header and leave csum up to XDP layers */ 1424 hdr = xdpf->data; 1425 memset(hdr, 0, vi->hdr_len); 1426 xdpf->len += vi->hdr_len; 1427 1428 sg_init_table(sq->sg, nr_frags + 1); 1429 sg_set_buf(sq->sg, xdpf->data, xdpf->len); 1430 for (i = 0; i < nr_frags; i++) { 1431 skb_frag_t *frag = &shinfo->frags[i]; 1432 1433 sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), 1434 skb_frag_size(frag), skb_frag_off(frag)); 1435 } 1436 1437 err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1, 1438 xdp_to_ptr(xdpf), GFP_ATOMIC); 1439 if (unlikely(err)) 1440 return -ENOSPC; /* Caller handle free/refcnt */ 1441 1442 return 0; 1443 } 1444 1445 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on 1446 * the current cpu, so it does not need to be locked. 1447 * 1448 * Here we use marco instead of inline functions because we have to deal with 1449 * three issues at the same time: 1. the choice of sq. 2. judge and execute the 1450 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline 1451 * functions to perfectly solve these three problems at the same time. 1452 */ 1453 #define virtnet_xdp_get_sq(vi) ({ \ 1454 int cpu = smp_processor_id(); \ 1455 struct netdev_queue *txq; \ 1456 typeof(vi) v = (vi); \ 1457 unsigned int qp; \ 1458 \ 1459 if (v->curr_queue_pairs > nr_cpu_ids) { \ 1460 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ 1461 qp += cpu; \ 1462 txq = netdev_get_tx_queue(v->dev, qp); \ 1463 __netif_tx_acquire(txq); \ 1464 } else { \ 1465 qp = cpu % v->curr_queue_pairs; \ 1466 txq = netdev_get_tx_queue(v->dev, qp); \ 1467 __netif_tx_lock(txq, cpu); \ 1468 } \ 1469 v->sq + qp; \ 1470 }) 1471 1472 #define virtnet_xdp_put_sq(vi, q) { \ 1473 struct netdev_queue *txq; \ 1474 typeof(vi) v = (vi); \ 1475 \ 1476 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ 1477 if (v->curr_queue_pairs > nr_cpu_ids) \ 1478 __netif_tx_release(txq); \ 1479 else \ 1480 __netif_tx_unlock(txq); \ 1481 } 1482 1483 static int virtnet_xdp_xmit(struct net_device *dev, 1484 int n, struct xdp_frame **frames, u32 flags) 1485 { 1486 struct virtnet_info *vi = netdev_priv(dev); 1487 struct virtnet_sq_free_stats stats = {0}; 1488 struct receive_queue *rq = vi->rq; 1489 struct bpf_prog *xdp_prog; 1490 struct send_queue *sq; 1491 int nxmit = 0; 1492 int kicks = 0; 1493 int ret; 1494 int i; 1495 1496 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this 1497 * indicate XDP resources have been successfully allocated. 1498 */ 1499 xdp_prog = rcu_access_pointer(rq->xdp_prog); 1500 if (!xdp_prog) 1501 return -ENXIO; 1502 1503 sq = virtnet_xdp_get_sq(vi); 1504 1505 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 1506 ret = -EINVAL; 1507 goto out; 1508 } 1509 1510 /* Free up any pending old buffers before queueing new ones. */ 1511 __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), 1512 false, &stats); 1513 1514 for (i = 0; i < n; i++) { 1515 struct xdp_frame *xdpf = frames[i]; 1516 1517 if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) 1518 break; 1519 nxmit++; 1520 } 1521 ret = nxmit; 1522 1523 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1524 check_sq_full_and_disable(vi, dev, sq); 1525 1526 if (flags & XDP_XMIT_FLUSH) { 1527 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1528 kicks = 1; 1529 } 1530 out: 1531 u64_stats_update_begin(&sq->stats.syncp); 1532 u64_stats_add(&sq->stats.bytes, stats.bytes); 1533 u64_stats_add(&sq->stats.packets, stats.packets); 1534 u64_stats_add(&sq->stats.xdp_tx, n); 1535 u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit); 1536 u64_stats_add(&sq->stats.kicks, kicks); 1537 u64_stats_update_end(&sq->stats.syncp); 1538 1539 virtnet_xdp_put_sq(vi, sq); 1540 return ret; 1541 } 1542 1543 static void put_xdp_frags(struct xdp_buff *xdp) 1544 { 1545 struct skb_shared_info *shinfo; 1546 struct page *xdp_page; 1547 int i; 1548 1549 if (xdp_buff_has_frags(xdp)) { 1550 shinfo = xdp_get_shared_info_from_buff(xdp); 1551 for (i = 0; i < shinfo->nr_frags; i++) { 1552 xdp_page = skb_frag_page(&shinfo->frags[i]); 1553 put_page(xdp_page); 1554 } 1555 } 1556 } 1557 1558 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 1559 struct net_device *dev, 1560 unsigned int *xdp_xmit, 1561 struct virtnet_rq_stats *stats) 1562 { 1563 struct xdp_frame *xdpf; 1564 int err; 1565 u32 act; 1566 1567 act = bpf_prog_run_xdp(xdp_prog, xdp); 1568 u64_stats_inc(&stats->xdp_packets); 1569 1570 switch (act) { 1571 case XDP_PASS: 1572 return act; 1573 1574 case XDP_TX: 1575 u64_stats_inc(&stats->xdp_tx); 1576 xdpf = xdp_convert_buff_to_frame(xdp); 1577 if (unlikely(!xdpf)) { 1578 netdev_dbg(dev, "convert buff to frame failed for xdp\n"); 1579 return XDP_DROP; 1580 } 1581 1582 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 1583 if (unlikely(!err)) { 1584 xdp_return_frame_rx_napi(xdpf); 1585 } else if (unlikely(err < 0)) { 1586 trace_xdp_exception(dev, xdp_prog, act); 1587 return XDP_DROP; 1588 } 1589 *xdp_xmit |= VIRTIO_XDP_TX; 1590 return act; 1591 1592 case XDP_REDIRECT: 1593 u64_stats_inc(&stats->xdp_redirects); 1594 err = xdp_do_redirect(dev, xdp, xdp_prog); 1595 if (err) 1596 return XDP_DROP; 1597 1598 *xdp_xmit |= VIRTIO_XDP_REDIR; 1599 return act; 1600 1601 default: 1602 bpf_warn_invalid_xdp_action(dev, xdp_prog, act); 1603 fallthrough; 1604 case XDP_ABORTED: 1605 trace_xdp_exception(dev, xdp_prog, act); 1606 fallthrough; 1607 case XDP_DROP: 1608 return XDP_DROP; 1609 } 1610 } 1611 1612 static unsigned int virtnet_get_headroom(struct virtnet_info *vi) 1613 { 1614 return vi->xdp_enabled ? XDP_PACKET_HEADROOM : 0; 1615 } 1616 1617 /* We copy the packet for XDP in the following cases: 1618 * 1619 * 1) Packet is scattered across multiple rx buffers. 1620 * 2) Headroom space is insufficient. 1621 * 1622 * This is inefficient but it's a temporary condition that 1623 * we hit right after XDP is enabled and until queue is refilled 1624 * with large buffers with sufficient headroom - so it should affect 1625 * at most queue size packets. 1626 * Afterwards, the conditions to enable 1627 * XDP should preclude the underlying device from sending packets 1628 * across multiple buffers (num_buf > 1), and we make sure buffers 1629 * have enough headroom. 1630 */ 1631 static struct page *xdp_linearize_page(struct receive_queue *rq, 1632 int *num_buf, 1633 struct page *p, 1634 int offset, 1635 int page_off, 1636 unsigned int *len) 1637 { 1638 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1639 struct page *page; 1640 1641 if (page_off + *len + tailroom > PAGE_SIZE) 1642 return NULL; 1643 1644 page = alloc_page(GFP_ATOMIC); 1645 if (!page) 1646 return NULL; 1647 1648 memcpy(page_address(page) + page_off, page_address(p) + offset, *len); 1649 page_off += *len; 1650 1651 while (--*num_buf) { 1652 unsigned int buflen; 1653 void *buf; 1654 int off; 1655 1656 buf = virtnet_rq_get_buf(rq, &buflen, NULL); 1657 if (unlikely(!buf)) 1658 goto err_buf; 1659 1660 p = virt_to_head_page(buf); 1661 off = buf - page_address(p); 1662 1663 /* guard against a misconfigured or uncooperative backend that 1664 * is sending packet larger than the MTU. 1665 */ 1666 if ((page_off + buflen + tailroom) > PAGE_SIZE) { 1667 put_page(p); 1668 goto err_buf; 1669 } 1670 1671 memcpy(page_address(page) + page_off, 1672 page_address(p) + off, buflen); 1673 page_off += buflen; 1674 put_page(p); 1675 } 1676 1677 /* Headroom does not contribute to packet length */ 1678 *len = page_off - XDP_PACKET_HEADROOM; 1679 return page; 1680 err_buf: 1681 __free_pages(page, 0); 1682 return NULL; 1683 } 1684 1685 static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, 1686 unsigned int xdp_headroom, 1687 void *buf, 1688 unsigned int len) 1689 { 1690 unsigned int header_offset; 1691 unsigned int headroom; 1692 unsigned int buflen; 1693 struct sk_buff *skb; 1694 1695 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1696 headroom = vi->hdr_len + header_offset; 1697 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1698 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1699 1700 skb = virtnet_build_skb(buf, buflen, headroom, len); 1701 if (unlikely(!skb)) 1702 return NULL; 1703 1704 buf += header_offset; 1705 memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len); 1706 1707 return skb; 1708 } 1709 1710 static struct sk_buff *receive_small_xdp(struct net_device *dev, 1711 struct virtnet_info *vi, 1712 struct receive_queue *rq, 1713 struct bpf_prog *xdp_prog, 1714 void *buf, 1715 unsigned int xdp_headroom, 1716 unsigned int len, 1717 unsigned int *xdp_xmit, 1718 struct virtnet_rq_stats *stats) 1719 { 1720 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; 1721 unsigned int headroom = vi->hdr_len + header_offset; 1722 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; 1723 struct page *page = virt_to_head_page(buf); 1724 struct page *xdp_page; 1725 unsigned int buflen; 1726 struct xdp_buff xdp; 1727 struct sk_buff *skb; 1728 unsigned int metasize = 0; 1729 u32 act; 1730 1731 if (unlikely(hdr->hdr.gso_type)) 1732 goto err_xdp; 1733 1734 /* Partially checksummed packets must be dropped. */ 1735 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 1736 goto err_xdp; 1737 1738 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1739 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1740 1741 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 1742 int offset = buf - page_address(page) + header_offset; 1743 unsigned int tlen = len + vi->hdr_len; 1744 int num_buf = 1; 1745 1746 xdp_headroom = virtnet_get_headroom(vi); 1747 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1748 headroom = vi->hdr_len + header_offset; 1749 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1750 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1751 xdp_page = xdp_linearize_page(rq, &num_buf, page, 1752 offset, header_offset, 1753 &tlen); 1754 if (!xdp_page) 1755 goto err_xdp; 1756 1757 buf = page_address(xdp_page); 1758 put_page(page); 1759 page = xdp_page; 1760 } 1761 1762 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); 1763 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, 1764 xdp_headroom, len, true); 1765 1766 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 1767 1768 switch (act) { 1769 case XDP_PASS: 1770 /* Recalculate length in case bpf program changed it */ 1771 len = xdp.data_end - xdp.data; 1772 metasize = xdp.data - xdp.data_meta; 1773 break; 1774 1775 case XDP_TX: 1776 case XDP_REDIRECT: 1777 goto xdp_xmit; 1778 1779 default: 1780 goto err_xdp; 1781 } 1782 1783 skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len); 1784 if (unlikely(!skb)) 1785 goto err; 1786 1787 if (metasize) 1788 skb_metadata_set(skb, metasize); 1789 1790 return skb; 1791 1792 err_xdp: 1793 u64_stats_inc(&stats->xdp_drops); 1794 err: 1795 u64_stats_inc(&stats->drops); 1796 put_page(page); 1797 xdp_xmit: 1798 return NULL; 1799 } 1800 1801 static struct sk_buff *receive_small(struct net_device *dev, 1802 struct virtnet_info *vi, 1803 struct receive_queue *rq, 1804 void *buf, void *ctx, 1805 unsigned int len, 1806 unsigned int *xdp_xmit, 1807 struct virtnet_rq_stats *stats) 1808 { 1809 unsigned int xdp_headroom = (unsigned long)ctx; 1810 struct page *page = virt_to_head_page(buf); 1811 struct sk_buff *skb; 1812 1813 len -= vi->hdr_len; 1814 u64_stats_add(&stats->bytes, len); 1815 1816 if (unlikely(len > GOOD_PACKET_LEN)) { 1817 pr_debug("%s: rx error: len %u exceeds max size %d\n", 1818 dev->name, len, GOOD_PACKET_LEN); 1819 DEV_STATS_INC(dev, rx_length_errors); 1820 goto err; 1821 } 1822 1823 if (unlikely(vi->xdp_enabled)) { 1824 struct bpf_prog *xdp_prog; 1825 1826 rcu_read_lock(); 1827 xdp_prog = rcu_dereference(rq->xdp_prog); 1828 if (xdp_prog) { 1829 skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, 1830 xdp_headroom, len, xdp_xmit, 1831 stats); 1832 rcu_read_unlock(); 1833 return skb; 1834 } 1835 rcu_read_unlock(); 1836 } 1837 1838 skb = receive_small_build_skb(vi, xdp_headroom, buf, len); 1839 if (likely(skb)) 1840 return skb; 1841 1842 err: 1843 u64_stats_inc(&stats->drops); 1844 put_page(page); 1845 return NULL; 1846 } 1847 1848 static struct sk_buff *receive_big(struct net_device *dev, 1849 struct virtnet_info *vi, 1850 struct receive_queue *rq, 1851 void *buf, 1852 unsigned int len, 1853 struct virtnet_rq_stats *stats) 1854 { 1855 struct page *page = buf; 1856 struct sk_buff *skb = 1857 page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); 1858 1859 u64_stats_add(&stats->bytes, len - vi->hdr_len); 1860 if (unlikely(!skb)) 1861 goto err; 1862 1863 return skb; 1864 1865 err: 1866 u64_stats_inc(&stats->drops); 1867 give_pages(rq, page); 1868 return NULL; 1869 } 1870 1871 static void mergeable_buf_free(struct receive_queue *rq, int num_buf, 1872 struct net_device *dev, 1873 struct virtnet_rq_stats *stats) 1874 { 1875 struct page *page; 1876 void *buf; 1877 int len; 1878 1879 while (num_buf-- > 1) { 1880 buf = virtnet_rq_get_buf(rq, &len, NULL); 1881 if (unlikely(!buf)) { 1882 pr_debug("%s: rx error: %d buffers missing\n", 1883 dev->name, num_buf); 1884 DEV_STATS_INC(dev, rx_length_errors); 1885 break; 1886 } 1887 u64_stats_add(&stats->bytes, len); 1888 page = virt_to_head_page(buf); 1889 put_page(page); 1890 } 1891 } 1892 1893 /* Why not use xdp_build_skb_from_frame() ? 1894 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in 1895 * virtio-net there are 2 points that do not match its requirements: 1896 * 1. The size of the prefilled buffer is not fixed before xdp is set. 1897 * 2. xdp_build_skb_from_frame() does more checks that we don't need, 1898 * like eth_type_trans() (which virtio-net does in receive_buf()). 1899 */ 1900 static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, 1901 struct virtnet_info *vi, 1902 struct xdp_buff *xdp, 1903 unsigned int xdp_frags_truesz) 1904 { 1905 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 1906 unsigned int headroom, data_len; 1907 struct sk_buff *skb; 1908 int metasize; 1909 u8 nr_frags; 1910 1911 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 1912 pr_debug("Error building skb as missing reserved tailroom for xdp"); 1913 return NULL; 1914 } 1915 1916 if (unlikely(xdp_buff_has_frags(xdp))) 1917 nr_frags = sinfo->nr_frags; 1918 1919 skb = build_skb(xdp->data_hard_start, xdp->frame_sz); 1920 if (unlikely(!skb)) 1921 return NULL; 1922 1923 headroom = xdp->data - xdp->data_hard_start; 1924 data_len = xdp->data_end - xdp->data; 1925 skb_reserve(skb, headroom); 1926 __skb_put(skb, data_len); 1927 1928 metasize = xdp->data - xdp->data_meta; 1929 metasize = metasize > 0 ? metasize : 0; 1930 if (metasize) 1931 skb_metadata_set(skb, metasize); 1932 1933 if (unlikely(xdp_buff_has_frags(xdp))) 1934 xdp_update_skb_shared_info(skb, nr_frags, 1935 sinfo->xdp_frags_size, 1936 xdp_frags_truesz, 1937 xdp_buff_is_frag_pfmemalloc(xdp)); 1938 1939 return skb; 1940 } 1941 1942 /* TODO: build xdp in big mode */ 1943 static int virtnet_build_xdp_buff_mrg(struct net_device *dev, 1944 struct virtnet_info *vi, 1945 struct receive_queue *rq, 1946 struct xdp_buff *xdp, 1947 void *buf, 1948 unsigned int len, 1949 unsigned int frame_sz, 1950 int *num_buf, 1951 unsigned int *xdp_frags_truesize, 1952 struct virtnet_rq_stats *stats) 1953 { 1954 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 1955 unsigned int headroom, tailroom, room; 1956 unsigned int truesize, cur_frag_size; 1957 struct skb_shared_info *shinfo; 1958 unsigned int xdp_frags_truesz = 0; 1959 struct page *page; 1960 skb_frag_t *frag; 1961 int offset; 1962 void *ctx; 1963 1964 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 1965 xdp_prepare_buff(xdp, buf - XDP_PACKET_HEADROOM, 1966 XDP_PACKET_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); 1967 1968 if (!*num_buf) 1969 return 0; 1970 1971 if (*num_buf > 1) { 1972 /* If we want to build multi-buffer xdp, we need 1973 * to specify that the flags of xdp_buff have the 1974 * XDP_FLAGS_HAS_FRAG bit. 1975 */ 1976 if (!xdp_buff_has_frags(xdp)) 1977 xdp_buff_set_frags_flag(xdp); 1978 1979 shinfo = xdp_get_shared_info_from_buff(xdp); 1980 shinfo->nr_frags = 0; 1981 shinfo->xdp_frags_size = 0; 1982 } 1983 1984 if (*num_buf > MAX_SKB_FRAGS + 1) 1985 return -EINVAL; 1986 1987 while (--*num_buf > 0) { 1988 buf = virtnet_rq_get_buf(rq, &len, &ctx); 1989 if (unlikely(!buf)) { 1990 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1991 dev->name, *num_buf, 1992 virtio16_to_cpu(vi->vdev, hdr->num_buffers)); 1993 DEV_STATS_INC(dev, rx_length_errors); 1994 goto err; 1995 } 1996 1997 u64_stats_add(&stats->bytes, len); 1998 page = virt_to_head_page(buf); 1999 offset = buf - page_address(page); 2000 2001 truesize = mergeable_ctx_to_truesize(ctx); 2002 headroom = mergeable_ctx_to_headroom(ctx); 2003 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2004 room = SKB_DATA_ALIGN(headroom + tailroom); 2005 2006 cur_frag_size = truesize; 2007 xdp_frags_truesz += cur_frag_size; 2008 if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { 2009 put_page(page); 2010 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2011 dev->name, len, (unsigned long)(truesize - room)); 2012 DEV_STATS_INC(dev, rx_length_errors); 2013 goto err; 2014 } 2015 2016 frag = &shinfo->frags[shinfo->nr_frags++]; 2017 skb_frag_fill_page_desc(frag, page, offset, len); 2018 if (page_is_pfmemalloc(page)) 2019 xdp_buff_set_frag_pfmemalloc(xdp); 2020 2021 shinfo->xdp_frags_size += len; 2022 } 2023 2024 *xdp_frags_truesize = xdp_frags_truesz; 2025 return 0; 2026 2027 err: 2028 put_xdp_frags(xdp); 2029 return -EINVAL; 2030 } 2031 2032 static void *mergeable_xdp_get_buf(struct virtnet_info *vi, 2033 struct receive_queue *rq, 2034 struct bpf_prog *xdp_prog, 2035 void *ctx, 2036 unsigned int *frame_sz, 2037 int *num_buf, 2038 struct page **page, 2039 int offset, 2040 unsigned int *len, 2041 struct virtio_net_hdr_mrg_rxbuf *hdr) 2042 { 2043 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2044 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2045 struct page *xdp_page; 2046 unsigned int xdp_room; 2047 2048 /* Transient failure which in theory could occur if 2049 * in-flight packets from before XDP was enabled reach 2050 * the receive path after XDP is loaded. 2051 */ 2052 if (unlikely(hdr->hdr.gso_type)) 2053 return NULL; 2054 2055 /* Partially checksummed packets must be dropped. */ 2056 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 2057 return NULL; 2058 2059 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers 2060 * with headroom may add hole in truesize, which 2061 * make their length exceed PAGE_SIZE. So we disabled the 2062 * hole mechanism for xdp. See add_recvbuf_mergeable(). 2063 */ 2064 *frame_sz = truesize; 2065 2066 if (likely(headroom >= virtnet_get_headroom(vi) && 2067 (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) { 2068 return page_address(*page) + offset; 2069 } 2070 2071 /* This happens when headroom is not enough because 2072 * of the buffer was prefilled before XDP is set. 2073 * This should only happen for the first several packets. 2074 * In fact, vq reset can be used here to help us clean up 2075 * the prefilled buffers, but many existing devices do not 2076 * support it, and we don't want to bother users who are 2077 * using xdp normally. 2078 */ 2079 if (!xdp_prog->aux->xdp_has_frags) { 2080 /* linearize data for XDP */ 2081 xdp_page = xdp_linearize_page(rq, num_buf, 2082 *page, offset, 2083 XDP_PACKET_HEADROOM, 2084 len); 2085 if (!xdp_page) 2086 return NULL; 2087 } else { 2088 xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 2089 sizeof(struct skb_shared_info)); 2090 if (*len + xdp_room > PAGE_SIZE) 2091 return NULL; 2092 2093 xdp_page = alloc_page(GFP_ATOMIC); 2094 if (!xdp_page) 2095 return NULL; 2096 2097 memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM, 2098 page_address(*page) + offset, *len); 2099 } 2100 2101 *frame_sz = PAGE_SIZE; 2102 2103 put_page(*page); 2104 2105 *page = xdp_page; 2106 2107 return page_address(*page) + XDP_PACKET_HEADROOM; 2108 } 2109 2110 static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, 2111 struct virtnet_info *vi, 2112 struct receive_queue *rq, 2113 struct bpf_prog *xdp_prog, 2114 void *buf, 2115 void *ctx, 2116 unsigned int len, 2117 unsigned int *xdp_xmit, 2118 struct virtnet_rq_stats *stats) 2119 { 2120 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2121 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2122 struct page *page = virt_to_head_page(buf); 2123 int offset = buf - page_address(page); 2124 unsigned int xdp_frags_truesz = 0; 2125 struct sk_buff *head_skb; 2126 unsigned int frame_sz; 2127 struct xdp_buff xdp; 2128 void *data; 2129 u32 act; 2130 int err; 2131 2132 data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page, 2133 offset, &len, hdr); 2134 if (unlikely(!data)) 2135 goto err_xdp; 2136 2137 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, 2138 &num_buf, &xdp_frags_truesz, stats); 2139 if (unlikely(err)) 2140 goto err_xdp; 2141 2142 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 2143 2144 switch (act) { 2145 case XDP_PASS: 2146 head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); 2147 if (unlikely(!head_skb)) 2148 break; 2149 return head_skb; 2150 2151 case XDP_TX: 2152 case XDP_REDIRECT: 2153 return NULL; 2154 2155 default: 2156 break; 2157 } 2158 2159 put_xdp_frags(&xdp); 2160 2161 err_xdp: 2162 put_page(page); 2163 mergeable_buf_free(rq, num_buf, dev, stats); 2164 2165 u64_stats_inc(&stats->xdp_drops); 2166 u64_stats_inc(&stats->drops); 2167 return NULL; 2168 } 2169 2170 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 2171 struct sk_buff *curr_skb, 2172 struct page *page, void *buf, 2173 int len, int truesize) 2174 { 2175 int num_skb_frags; 2176 int offset; 2177 2178 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 2179 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 2180 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); 2181 2182 if (unlikely(!nskb)) 2183 return NULL; 2184 2185 if (curr_skb == head_skb) 2186 skb_shinfo(curr_skb)->frag_list = nskb; 2187 else 2188 curr_skb->next = nskb; 2189 curr_skb = nskb; 2190 head_skb->truesize += nskb->truesize; 2191 num_skb_frags = 0; 2192 } 2193 2194 if (curr_skb != head_skb) { 2195 head_skb->data_len += len; 2196 head_skb->len += len; 2197 head_skb->truesize += truesize; 2198 } 2199 2200 offset = buf - page_address(page); 2201 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 2202 put_page(page); 2203 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 2204 len, truesize); 2205 } else { 2206 skb_add_rx_frag(curr_skb, num_skb_frags, page, 2207 offset, len, truesize); 2208 } 2209 2210 return curr_skb; 2211 } 2212 2213 static struct sk_buff *receive_mergeable(struct net_device *dev, 2214 struct virtnet_info *vi, 2215 struct receive_queue *rq, 2216 void *buf, 2217 void *ctx, 2218 unsigned int len, 2219 unsigned int *xdp_xmit, 2220 struct virtnet_rq_stats *stats) 2221 { 2222 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2223 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2224 struct page *page = virt_to_head_page(buf); 2225 int offset = buf - page_address(page); 2226 struct sk_buff *head_skb, *curr_skb; 2227 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2228 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2229 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2230 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2231 2232 head_skb = NULL; 2233 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2234 2235 if (unlikely(len > truesize - room)) { 2236 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2237 dev->name, len, (unsigned long)(truesize - room)); 2238 DEV_STATS_INC(dev, rx_length_errors); 2239 goto err_skb; 2240 } 2241 2242 if (unlikely(vi->xdp_enabled)) { 2243 struct bpf_prog *xdp_prog; 2244 2245 rcu_read_lock(); 2246 xdp_prog = rcu_dereference(rq->xdp_prog); 2247 if (xdp_prog) { 2248 head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx, 2249 len, xdp_xmit, stats); 2250 rcu_read_unlock(); 2251 return head_skb; 2252 } 2253 rcu_read_unlock(); 2254 } 2255 2256 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); 2257 curr_skb = head_skb; 2258 2259 if (unlikely(!curr_skb)) 2260 goto err_skb; 2261 while (--num_buf) { 2262 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2263 if (unlikely(!buf)) { 2264 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2265 dev->name, num_buf, 2266 virtio16_to_cpu(vi->vdev, 2267 hdr->num_buffers)); 2268 DEV_STATS_INC(dev, rx_length_errors); 2269 goto err_buf; 2270 } 2271 2272 u64_stats_add(&stats->bytes, len); 2273 page = virt_to_head_page(buf); 2274 2275 truesize = mergeable_ctx_to_truesize(ctx); 2276 headroom = mergeable_ctx_to_headroom(ctx); 2277 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2278 room = SKB_DATA_ALIGN(headroom + tailroom); 2279 if (unlikely(len > truesize - room)) { 2280 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2281 dev->name, len, (unsigned long)(truesize - room)); 2282 DEV_STATS_INC(dev, rx_length_errors); 2283 goto err_skb; 2284 } 2285 2286 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 2287 buf, len, truesize); 2288 if (!curr_skb) 2289 goto err_skb; 2290 } 2291 2292 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); 2293 return head_skb; 2294 2295 err_skb: 2296 put_page(page); 2297 mergeable_buf_free(rq, num_buf, dev, stats); 2298 2299 err_buf: 2300 u64_stats_inc(&stats->drops); 2301 dev_kfree_skb(head_skb); 2302 return NULL; 2303 } 2304 2305 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, 2306 struct sk_buff *skb) 2307 { 2308 enum pkt_hash_types rss_hash_type; 2309 2310 if (!hdr_hash || !skb) 2311 return; 2312 2313 switch (__le16_to_cpu(hdr_hash->hash_report)) { 2314 case VIRTIO_NET_HASH_REPORT_TCPv4: 2315 case VIRTIO_NET_HASH_REPORT_UDPv4: 2316 case VIRTIO_NET_HASH_REPORT_TCPv6: 2317 case VIRTIO_NET_HASH_REPORT_UDPv6: 2318 case VIRTIO_NET_HASH_REPORT_TCPv6_EX: 2319 case VIRTIO_NET_HASH_REPORT_UDPv6_EX: 2320 rss_hash_type = PKT_HASH_TYPE_L4; 2321 break; 2322 case VIRTIO_NET_HASH_REPORT_IPv4: 2323 case VIRTIO_NET_HASH_REPORT_IPv6: 2324 case VIRTIO_NET_HASH_REPORT_IPv6_EX: 2325 rss_hash_type = PKT_HASH_TYPE_L3; 2326 break; 2327 case VIRTIO_NET_HASH_REPORT_NONE: 2328 default: 2329 rss_hash_type = PKT_HASH_TYPE_NONE; 2330 } 2331 skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); 2332 } 2333 2334 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 2335 struct sk_buff *skb, u8 flags) 2336 { 2337 struct virtio_net_common_hdr *hdr; 2338 struct net_device *dev = vi->dev; 2339 2340 hdr = skb_vnet_common_hdr(skb); 2341 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) 2342 virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); 2343 2344 if (flags & VIRTIO_NET_HDR_F_DATA_VALID) 2345 skb->ip_summed = CHECKSUM_UNNECESSARY; 2346 2347 if (virtio_net_hdr_to_skb(skb, &hdr->hdr, 2348 virtio_is_little_endian(vi->vdev))) { 2349 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n", 2350 dev->name, hdr->hdr.gso_type, 2351 hdr->hdr.gso_size); 2352 goto frame_err; 2353 } 2354 2355 skb_record_rx_queue(skb, vq2rxq(rq->vq)); 2356 skb->protocol = eth_type_trans(skb, dev); 2357 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 2358 ntohs(skb->protocol), skb->len, skb->pkt_type); 2359 2360 napi_gro_receive(&rq->napi, skb); 2361 return; 2362 2363 frame_err: 2364 DEV_STATS_INC(dev, rx_frame_errors); 2365 dev_kfree_skb(skb); 2366 } 2367 2368 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 2369 void *buf, unsigned int len, void **ctx, 2370 unsigned int *xdp_xmit, 2371 struct virtnet_rq_stats *stats) 2372 { 2373 struct net_device *dev = vi->dev; 2374 struct sk_buff *skb; 2375 u8 flags; 2376 2377 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 2378 pr_debug("%s: short packet %i\n", dev->name, len); 2379 DEV_STATS_INC(dev, rx_length_errors); 2380 virtnet_rq_free_buf(vi, rq, buf); 2381 return; 2382 } 2383 2384 /* 1. Save the flags early, as the XDP program might overwrite them. 2385 * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID 2386 * stay valid after XDP processing. 2387 * 2. XDP doesn't work with partially checksummed packets (refer to 2388 * virtnet_xdp_set()), so packets marked as 2389 * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. 2390 */ 2391 flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2392 2393 if (vi->mergeable_rx_bufs) 2394 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 2395 stats); 2396 else if (vi->big_packets) 2397 skb = receive_big(dev, vi, rq, buf, len, stats); 2398 else 2399 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 2400 2401 if (unlikely(!skb)) 2402 return; 2403 2404 virtnet_receive_done(vi, rq, skb, flags); 2405 } 2406 2407 /* Unlike mergeable buffers, all buffers are allocated to the 2408 * same size, except for the headroom. For this reason we do 2409 * not need to use mergeable_len_to_ctx here - it is enough 2410 * to store the headroom as the context ignoring the truesize. 2411 */ 2412 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, 2413 gfp_t gfp) 2414 { 2415 char *buf; 2416 unsigned int xdp_headroom = virtnet_get_headroom(vi); 2417 void *ctx = (void *)(unsigned long)xdp_headroom; 2418 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; 2419 int err; 2420 2421 len = SKB_DATA_ALIGN(len) + 2422 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 2423 2424 buf = virtnet_rq_alloc(rq, len, gfp); 2425 if (unlikely(!buf)) 2426 return -ENOMEM; 2427 2428 virtnet_rq_init_one_sg(rq, buf + VIRTNET_RX_PAD + xdp_headroom, 2429 vi->hdr_len + GOOD_PACKET_LEN); 2430 2431 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 2432 if (err < 0) { 2433 virtnet_rq_unmap(rq, buf, 0); 2434 put_page(virt_to_head_page(buf)); 2435 } 2436 2437 return err; 2438 } 2439 2440 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, 2441 gfp_t gfp) 2442 { 2443 struct page *first, *list = NULL; 2444 char *p; 2445 int i, err, offset; 2446 2447 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); 2448 2449 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ 2450 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { 2451 first = get_a_page(rq, gfp); 2452 if (!first) { 2453 if (list) 2454 give_pages(rq, list); 2455 return -ENOMEM; 2456 } 2457 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); 2458 2459 /* chain new page in list head to match sg */ 2460 first->private = (unsigned long)list; 2461 list = first; 2462 } 2463 2464 first = get_a_page(rq, gfp); 2465 if (!first) { 2466 give_pages(rq, list); 2467 return -ENOMEM; 2468 } 2469 p = page_address(first); 2470 2471 /* rq->sg[0], rq->sg[1] share the same page */ 2472 /* a separated rq->sg[0] for header - required in case !any_header_sg */ 2473 sg_set_buf(&rq->sg[0], p, vi->hdr_len); 2474 2475 /* rq->sg[1] for data packet, from offset */ 2476 offset = sizeof(struct padded_vnet_hdr); 2477 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); 2478 2479 /* chain first in list head */ 2480 first->private = (unsigned long)list; 2481 err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, 2482 first, gfp); 2483 if (err < 0) 2484 give_pages(rq, first); 2485 2486 return err; 2487 } 2488 2489 static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 2490 struct ewma_pkt_len *avg_pkt_len, 2491 unsigned int room) 2492 { 2493 struct virtnet_info *vi = rq->vq->vdev->priv; 2494 const size_t hdr_len = vi->hdr_len; 2495 unsigned int len; 2496 2497 if (room) 2498 return PAGE_SIZE - room; 2499 2500 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 2501 rq->min_buf_len, PAGE_SIZE - hdr_len); 2502 2503 return ALIGN(len, L1_CACHE_BYTES); 2504 } 2505 2506 static int add_recvbuf_mergeable(struct virtnet_info *vi, 2507 struct receive_queue *rq, gfp_t gfp) 2508 { 2509 struct page_frag *alloc_frag = &rq->alloc_frag; 2510 unsigned int headroom = virtnet_get_headroom(vi); 2511 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2512 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2513 unsigned int len, hole; 2514 void *ctx; 2515 char *buf; 2516 int err; 2517 2518 /* Extra tailroom is needed to satisfy XDP's assumption. This 2519 * means rx frags coalescing won't work, but consider we've 2520 * disabled GSO for XDP, it won't be a big issue. 2521 */ 2522 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); 2523 2524 buf = virtnet_rq_alloc(rq, len + room, gfp); 2525 if (unlikely(!buf)) 2526 return -ENOMEM; 2527 2528 buf += headroom; /* advance address leaving hole at front of pkt */ 2529 hole = alloc_frag->size - alloc_frag->offset; 2530 if (hole < len + room) { 2531 /* To avoid internal fragmentation, if there is very likely not 2532 * enough space for another buffer, add the remaining space to 2533 * the current buffer. 2534 * XDP core assumes that frame_size of xdp_buff and the length 2535 * of the frag are PAGE_SIZE, so we disable the hole mechanism. 2536 */ 2537 if (!headroom) 2538 len += hole; 2539 alloc_frag->offset += hole; 2540 } 2541 2542 virtnet_rq_init_one_sg(rq, buf, len); 2543 2544 ctx = mergeable_len_to_ctx(len + room, headroom); 2545 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 2546 if (err < 0) { 2547 virtnet_rq_unmap(rq, buf, 0); 2548 put_page(virt_to_head_page(buf)); 2549 } 2550 2551 return err; 2552 } 2553 2554 /* 2555 * Returns false if we couldn't fill entirely (OOM). 2556 * 2557 * Normally run in the receive path, but can also be run from ndo_open 2558 * before we're receiving packets, or from refill_work which is 2559 * careful to disable receiving (using napi_disable). 2560 */ 2561 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, 2562 gfp_t gfp) 2563 { 2564 int err; 2565 2566 if (rq->xsk_pool) { 2567 err = virtnet_add_recvbuf_xsk(vi, rq, rq->xsk_pool, gfp); 2568 goto kick; 2569 } 2570 2571 do { 2572 if (vi->mergeable_rx_bufs) 2573 err = add_recvbuf_mergeable(vi, rq, gfp); 2574 else if (vi->big_packets) 2575 err = add_recvbuf_big(vi, rq, gfp); 2576 else 2577 err = add_recvbuf_small(vi, rq, gfp); 2578 2579 if (err) 2580 break; 2581 } while (rq->vq->num_free); 2582 2583 kick: 2584 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { 2585 unsigned long flags; 2586 2587 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); 2588 u64_stats_inc(&rq->stats.kicks); 2589 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); 2590 } 2591 2592 return err != -ENOMEM; 2593 } 2594 2595 static void skb_recv_done(struct virtqueue *rvq) 2596 { 2597 struct virtnet_info *vi = rvq->vdev->priv; 2598 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; 2599 2600 rq->calls++; 2601 virtqueue_napi_schedule(&rq->napi, rvq); 2602 } 2603 2604 static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi) 2605 { 2606 napi_enable(napi); 2607 2608 /* If all buffers were filled by other side before we napi_enabled, we 2609 * won't get another interrupt, so process any outstanding packets now. 2610 * Call local_bh_enable after to trigger softIRQ processing. 2611 */ 2612 local_bh_disable(); 2613 virtqueue_napi_schedule(napi, vq); 2614 local_bh_enable(); 2615 } 2616 2617 static void virtnet_napi_tx_enable(struct virtnet_info *vi, 2618 struct virtqueue *vq, 2619 struct napi_struct *napi) 2620 { 2621 if (!napi->weight) 2622 return; 2623 2624 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only 2625 * enable the feature if this is likely affine with the transmit path. 2626 */ 2627 if (!vi->affinity_hint_set) { 2628 napi->weight = 0; 2629 return; 2630 } 2631 2632 return virtnet_napi_enable(vq, napi); 2633 } 2634 2635 static void virtnet_napi_tx_disable(struct napi_struct *napi) 2636 { 2637 if (napi->weight) 2638 napi_disable(napi); 2639 } 2640 2641 static void refill_work(struct work_struct *work) 2642 { 2643 struct virtnet_info *vi = 2644 container_of(work, struct virtnet_info, refill.work); 2645 bool still_empty; 2646 int i; 2647 2648 for (i = 0; i < vi->curr_queue_pairs; i++) { 2649 struct receive_queue *rq = &vi->rq[i]; 2650 2651 napi_disable(&rq->napi); 2652 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); 2653 virtnet_napi_enable(rq->vq, &rq->napi); 2654 2655 /* In theory, this can happen: if we don't get any buffers in 2656 * we will *never* try to fill again. 2657 */ 2658 if (still_empty) 2659 schedule_delayed_work(&vi->refill, HZ/2); 2660 } 2661 } 2662 2663 static int virtnet_receive_xsk_bufs(struct virtnet_info *vi, 2664 struct receive_queue *rq, 2665 int budget, 2666 unsigned int *xdp_xmit, 2667 struct virtnet_rq_stats *stats) 2668 { 2669 unsigned int len; 2670 int packets = 0; 2671 void *buf; 2672 2673 while (packets < budget) { 2674 buf = virtqueue_get_buf(rq->vq, &len); 2675 if (!buf) 2676 break; 2677 2678 virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats); 2679 packets++; 2680 } 2681 2682 return packets; 2683 } 2684 2685 static int virtnet_receive_packets(struct virtnet_info *vi, 2686 struct receive_queue *rq, 2687 int budget, 2688 unsigned int *xdp_xmit, 2689 struct virtnet_rq_stats *stats) 2690 { 2691 unsigned int len; 2692 int packets = 0; 2693 void *buf; 2694 2695 if (!vi->big_packets || vi->mergeable_rx_bufs) { 2696 void *ctx; 2697 while (packets < budget && 2698 (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { 2699 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats); 2700 packets++; 2701 } 2702 } else { 2703 while (packets < budget && 2704 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { 2705 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats); 2706 packets++; 2707 } 2708 } 2709 2710 return packets; 2711 } 2712 2713 static int virtnet_receive(struct receive_queue *rq, int budget, 2714 unsigned int *xdp_xmit) 2715 { 2716 struct virtnet_info *vi = rq->vq->vdev->priv; 2717 struct virtnet_rq_stats stats = {}; 2718 int i, packets; 2719 2720 if (rq->xsk_pool) 2721 packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, &stats); 2722 else 2723 packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats); 2724 2725 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { 2726 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { 2727 spin_lock(&vi->refill_lock); 2728 if (vi->refill_enabled) 2729 schedule_delayed_work(&vi->refill, 0); 2730 spin_unlock(&vi->refill_lock); 2731 } 2732 } 2733 2734 u64_stats_set(&stats.packets, packets); 2735 u64_stats_update_begin(&rq->stats.syncp); 2736 for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) { 2737 size_t offset = virtnet_rq_stats_desc[i].offset; 2738 u64_stats_t *item, *src; 2739 2740 item = (u64_stats_t *)((u8 *)&rq->stats + offset); 2741 src = (u64_stats_t *)((u8 *)&stats + offset); 2742 u64_stats_add(item, u64_stats_read(src)); 2743 } 2744 2745 u64_stats_add(&rq->stats.packets, u64_stats_read(&stats.packets)); 2746 u64_stats_add(&rq->stats.bytes, u64_stats_read(&stats.bytes)); 2747 2748 u64_stats_update_end(&rq->stats.syncp); 2749 2750 return packets; 2751 } 2752 2753 static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) 2754 { 2755 struct virtnet_info *vi = rq->vq->vdev->priv; 2756 unsigned int index = vq2rxq(rq->vq); 2757 struct send_queue *sq = &vi->sq[index]; 2758 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 2759 2760 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) 2761 return; 2762 2763 if (__netif_tx_trylock(txq)) { 2764 if (sq->reset) { 2765 __netif_tx_unlock(txq); 2766 return; 2767 } 2768 2769 do { 2770 virtqueue_disable_cb(sq->vq); 2771 free_old_xmit(sq, txq, !!budget); 2772 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 2773 2774 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { 2775 if (netif_tx_queue_stopped(txq)) { 2776 u64_stats_update_begin(&sq->stats.syncp); 2777 u64_stats_inc(&sq->stats.wake); 2778 u64_stats_update_end(&sq->stats.syncp); 2779 } 2780 netif_tx_wake_queue(txq); 2781 } 2782 2783 __netif_tx_unlock(txq); 2784 } 2785 } 2786 2787 static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq) 2788 { 2789 struct dim_sample cur_sample = {}; 2790 2791 if (!rq->packets_in_napi) 2792 return; 2793 2794 /* Don't need protection when fetching stats, since fetcher and 2795 * updater of the stats are in same context 2796 */ 2797 dim_update_sample(rq->calls, 2798 u64_stats_read(&rq->stats.packets), 2799 u64_stats_read(&rq->stats.bytes), 2800 &cur_sample); 2801 2802 net_dim(&rq->dim, cur_sample); 2803 rq->packets_in_napi = 0; 2804 } 2805 2806 static int virtnet_poll(struct napi_struct *napi, int budget) 2807 { 2808 struct receive_queue *rq = 2809 container_of(napi, struct receive_queue, napi); 2810 struct virtnet_info *vi = rq->vq->vdev->priv; 2811 struct send_queue *sq; 2812 unsigned int received; 2813 unsigned int xdp_xmit = 0; 2814 bool napi_complete; 2815 2816 virtnet_poll_cleantx(rq, budget); 2817 2818 received = virtnet_receive(rq, budget, &xdp_xmit); 2819 rq->packets_in_napi += received; 2820 2821 if (xdp_xmit & VIRTIO_XDP_REDIR) 2822 xdp_do_flush(); 2823 2824 /* Out of packets? */ 2825 if (received < budget) { 2826 napi_complete = virtqueue_napi_complete(napi, rq->vq, received); 2827 /* Intentionally not taking dim_lock here. This may result in a 2828 * spurious net_dim call. But if that happens virtnet_rx_dim_work 2829 * will not act on the scheduled work. 2830 */ 2831 if (napi_complete && rq->dim_enabled) 2832 virtnet_rx_dim_update(vi, rq); 2833 } 2834 2835 if (xdp_xmit & VIRTIO_XDP_TX) { 2836 sq = virtnet_xdp_get_sq(vi); 2837 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 2838 u64_stats_update_begin(&sq->stats.syncp); 2839 u64_stats_inc(&sq->stats.kicks); 2840 u64_stats_update_end(&sq->stats.syncp); 2841 } 2842 virtnet_xdp_put_sq(vi, sq); 2843 } 2844 2845 return received; 2846 } 2847 2848 static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) 2849 { 2850 virtnet_napi_tx_disable(&vi->sq[qp_index].napi); 2851 napi_disable(&vi->rq[qp_index].napi); 2852 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 2853 } 2854 2855 static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) 2856 { 2857 struct net_device *dev = vi->dev; 2858 int err; 2859 2860 err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index, 2861 vi->rq[qp_index].napi.napi_id); 2862 if (err < 0) 2863 return err; 2864 2865 err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq, 2866 MEM_TYPE_PAGE_SHARED, NULL); 2867 if (err < 0) 2868 goto err_xdp_reg_mem_model; 2869 2870 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index)); 2871 virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); 2872 virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi); 2873 2874 return 0; 2875 2876 err_xdp_reg_mem_model: 2877 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 2878 return err; 2879 } 2880 2881 static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim) 2882 { 2883 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 2884 return; 2885 net_dim_work_cancel(dim); 2886 } 2887 2888 static int virtnet_open(struct net_device *dev) 2889 { 2890 struct virtnet_info *vi = netdev_priv(dev); 2891 int i, err; 2892 2893 enable_delayed_refill(vi); 2894 2895 for (i = 0; i < vi->max_queue_pairs; i++) { 2896 if (i < vi->curr_queue_pairs) 2897 /* Make sure we have some buffers: if oom use wq. */ 2898 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 2899 schedule_delayed_work(&vi->refill, 0); 2900 2901 err = virtnet_enable_queue_pair(vi, i); 2902 if (err < 0) 2903 goto err_enable_qp; 2904 } 2905 2906 return 0; 2907 2908 err_enable_qp: 2909 disable_delayed_refill(vi); 2910 cancel_delayed_work_sync(&vi->refill); 2911 2912 for (i--; i >= 0; i--) { 2913 virtnet_disable_queue_pair(vi, i); 2914 virtnet_cancel_dim(vi, &vi->rq[i].dim); 2915 } 2916 2917 return err; 2918 } 2919 2920 static int virtnet_poll_tx(struct napi_struct *napi, int budget) 2921 { 2922 struct send_queue *sq = container_of(napi, struct send_queue, napi); 2923 struct virtnet_info *vi = sq->vq->vdev->priv; 2924 unsigned int index = vq2txq(sq->vq); 2925 struct netdev_queue *txq; 2926 int opaque; 2927 bool done; 2928 2929 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { 2930 /* We don't need to enable cb for XDP */ 2931 napi_complete_done(napi, 0); 2932 return 0; 2933 } 2934 2935 txq = netdev_get_tx_queue(vi->dev, index); 2936 __netif_tx_lock(txq, raw_smp_processor_id()); 2937 virtqueue_disable_cb(sq->vq); 2938 free_old_xmit(sq, txq, !!budget); 2939 2940 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { 2941 if (netif_tx_queue_stopped(txq)) { 2942 u64_stats_update_begin(&sq->stats.syncp); 2943 u64_stats_inc(&sq->stats.wake); 2944 u64_stats_update_end(&sq->stats.syncp); 2945 } 2946 netif_tx_wake_queue(txq); 2947 } 2948 2949 opaque = virtqueue_enable_cb_prepare(sq->vq); 2950 2951 done = napi_complete_done(napi, 0); 2952 2953 if (!done) 2954 virtqueue_disable_cb(sq->vq); 2955 2956 __netif_tx_unlock(txq); 2957 2958 if (done) { 2959 if (unlikely(virtqueue_poll(sq->vq, opaque))) { 2960 if (napi_schedule_prep(napi)) { 2961 __netif_tx_lock(txq, raw_smp_processor_id()); 2962 virtqueue_disable_cb(sq->vq); 2963 __netif_tx_unlock(txq); 2964 __napi_schedule(napi); 2965 } 2966 } 2967 } 2968 2969 return 0; 2970 } 2971 2972 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) 2973 { 2974 struct virtio_net_hdr_mrg_rxbuf *hdr; 2975 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 2976 struct virtnet_info *vi = sq->vq->vdev->priv; 2977 int num_sg; 2978 unsigned hdr_len = vi->hdr_len; 2979 bool can_push; 2980 2981 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 2982 2983 can_push = vi->any_header_sg && 2984 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && 2985 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; 2986 /* Even if we can, don't push here yet as this would skew 2987 * csum_start offset below. */ 2988 if (can_push) 2989 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len); 2990 else 2991 hdr = &skb_vnet_common_hdr(skb)->mrg_hdr; 2992 2993 if (virtio_net_hdr_from_skb(skb, &hdr->hdr, 2994 virtio_is_little_endian(vi->vdev), false, 2995 0)) 2996 return -EPROTO; 2997 2998 if (vi->mergeable_rx_bufs) 2999 hdr->num_buffers = 0; 3000 3001 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); 3002 if (can_push) { 3003 __skb_push(skb, hdr_len); 3004 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); 3005 if (unlikely(num_sg < 0)) 3006 return num_sg; 3007 /* Pull header back to avoid skew in tx bytes calculations. */ 3008 __skb_pull(skb, hdr_len); 3009 } else { 3010 sg_set_buf(sq->sg, hdr, hdr_len); 3011 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); 3012 if (unlikely(num_sg < 0)) 3013 return num_sg; 3014 num_sg++; 3015 } 3016 return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, 3017 skb_to_ptr(skb, orphan), GFP_ATOMIC); 3018 } 3019 3020 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 3021 { 3022 struct virtnet_info *vi = netdev_priv(dev); 3023 int qnum = skb_get_queue_mapping(skb); 3024 struct send_queue *sq = &vi->sq[qnum]; 3025 int err; 3026 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 3027 bool xmit_more = netdev_xmit_more(); 3028 bool use_napi = sq->napi.weight; 3029 bool kick; 3030 3031 /* Free up any pending old buffers before queueing new ones. */ 3032 do { 3033 if (use_napi) 3034 virtqueue_disable_cb(sq->vq); 3035 3036 free_old_xmit(sq, txq, false); 3037 3038 } while (use_napi && !xmit_more && 3039 unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 3040 3041 /* timestamp packet in software */ 3042 skb_tx_timestamp(skb); 3043 3044 /* Try to transmit */ 3045 err = xmit_skb(sq, skb, !use_napi); 3046 3047 /* This should not happen! */ 3048 if (unlikely(err)) { 3049 DEV_STATS_INC(dev, tx_fifo_errors); 3050 if (net_ratelimit()) 3051 dev_warn(&dev->dev, 3052 "Unexpected TXQ (%d) queue failure: %d\n", 3053 qnum, err); 3054 DEV_STATS_INC(dev, tx_dropped); 3055 dev_kfree_skb_any(skb); 3056 return NETDEV_TX_OK; 3057 } 3058 3059 /* Don't wait up for transmitted skbs to be freed. */ 3060 if (!use_napi) { 3061 skb_orphan(skb); 3062 nf_reset_ct(skb); 3063 } 3064 3065 check_sq_full_and_disable(vi, dev, sq); 3066 3067 kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) : 3068 !xmit_more || netif_xmit_stopped(txq); 3069 if (kick) { 3070 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3071 u64_stats_update_begin(&sq->stats.syncp); 3072 u64_stats_inc(&sq->stats.kicks); 3073 u64_stats_update_end(&sq->stats.syncp); 3074 } 3075 } 3076 3077 return NETDEV_TX_OK; 3078 } 3079 3080 static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) 3081 { 3082 bool running = netif_running(vi->dev); 3083 3084 if (running) { 3085 napi_disable(&rq->napi); 3086 virtnet_cancel_dim(vi, &rq->dim); 3087 } 3088 } 3089 3090 static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) 3091 { 3092 bool running = netif_running(vi->dev); 3093 3094 if (!try_fill_recv(vi, rq, GFP_KERNEL)) 3095 schedule_delayed_work(&vi->refill, 0); 3096 3097 if (running) 3098 virtnet_napi_enable(rq->vq, &rq->napi); 3099 } 3100 3101 static int virtnet_rx_resize(struct virtnet_info *vi, 3102 struct receive_queue *rq, u32 ring_num) 3103 { 3104 int err, qindex; 3105 3106 qindex = rq - vi->rq; 3107 3108 virtnet_rx_pause(vi, rq); 3109 3110 err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf); 3111 if (err) 3112 netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); 3113 3114 virtnet_rx_resume(vi, rq); 3115 return err; 3116 } 3117 3118 static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq) 3119 { 3120 bool running = netif_running(vi->dev); 3121 struct netdev_queue *txq; 3122 int qindex; 3123 3124 qindex = sq - vi->sq; 3125 3126 if (running) 3127 virtnet_napi_tx_disable(&sq->napi); 3128 3129 txq = netdev_get_tx_queue(vi->dev, qindex); 3130 3131 /* 1. wait all ximt complete 3132 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() 3133 */ 3134 __netif_tx_lock_bh(txq); 3135 3136 /* Prevent rx poll from accessing sq. */ 3137 sq->reset = true; 3138 3139 /* Prevent the upper layer from trying to send packets. */ 3140 netif_stop_subqueue(vi->dev, qindex); 3141 3142 __netif_tx_unlock_bh(txq); 3143 } 3144 3145 static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq) 3146 { 3147 bool running = netif_running(vi->dev); 3148 struct netdev_queue *txq; 3149 int qindex; 3150 3151 qindex = sq - vi->sq; 3152 3153 txq = netdev_get_tx_queue(vi->dev, qindex); 3154 3155 __netif_tx_lock_bh(txq); 3156 sq->reset = false; 3157 netif_tx_wake_queue(txq); 3158 __netif_tx_unlock_bh(txq); 3159 3160 if (running) 3161 virtnet_napi_tx_enable(vi, sq->vq, &sq->napi); 3162 } 3163 3164 static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, 3165 u32 ring_num) 3166 { 3167 int qindex, err; 3168 3169 qindex = sq - vi->sq; 3170 3171 virtnet_tx_pause(vi, sq); 3172 3173 err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf); 3174 if (err) 3175 netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); 3176 3177 virtnet_tx_resume(vi, sq); 3178 3179 return err; 3180 } 3181 3182 /* 3183 * Send command via the control virtqueue and check status. Commands 3184 * supported by the hypervisor, as indicated by feature bits, should 3185 * never fail unless improperly formatted. 3186 */ 3187 static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd, 3188 struct scatterlist *out, 3189 struct scatterlist *in) 3190 { 3191 struct scatterlist *sgs[5], hdr, stat; 3192 u32 out_num = 0, tmp, in_num = 0; 3193 bool ok; 3194 int ret; 3195 3196 /* Caller should know better */ 3197 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 3198 3199 mutex_lock(&vi->cvq_lock); 3200 vi->ctrl->status = ~0; 3201 vi->ctrl->hdr.class = class; 3202 vi->ctrl->hdr.cmd = cmd; 3203 /* Add header */ 3204 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); 3205 sgs[out_num++] = &hdr; 3206 3207 if (out) 3208 sgs[out_num++] = out; 3209 3210 /* Add return status. */ 3211 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); 3212 sgs[out_num + in_num++] = &stat; 3213 3214 if (in) 3215 sgs[out_num + in_num++] = in; 3216 3217 BUG_ON(out_num + in_num > ARRAY_SIZE(sgs)); 3218 ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC); 3219 if (ret < 0) { 3220 dev_warn(&vi->vdev->dev, 3221 "Failed to add sgs for command vq: %d\n.", ret); 3222 mutex_unlock(&vi->cvq_lock); 3223 return false; 3224 } 3225 3226 if (unlikely(!virtqueue_kick(vi->cvq))) 3227 goto unlock; 3228 3229 /* Spin for a response, the kick causes an ioport write, trapping 3230 * into the hypervisor, so the request should be handled immediately. 3231 */ 3232 while (!virtqueue_get_buf(vi->cvq, &tmp) && 3233 !virtqueue_is_broken(vi->cvq)) { 3234 cond_resched(); 3235 cpu_relax(); 3236 } 3237 3238 unlock: 3239 ok = vi->ctrl->status == VIRTIO_NET_OK; 3240 mutex_unlock(&vi->cvq_lock); 3241 return ok; 3242 } 3243 3244 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 3245 struct scatterlist *out) 3246 { 3247 return virtnet_send_command_reply(vi, class, cmd, out, NULL); 3248 } 3249 3250 static int virtnet_set_mac_address(struct net_device *dev, void *p) 3251 { 3252 struct virtnet_info *vi = netdev_priv(dev); 3253 struct virtio_device *vdev = vi->vdev; 3254 int ret; 3255 struct sockaddr *addr; 3256 struct scatterlist sg; 3257 3258 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 3259 return -EOPNOTSUPP; 3260 3261 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); 3262 if (!addr) 3263 return -ENOMEM; 3264 3265 ret = eth_prepare_mac_addr_change(dev, addr); 3266 if (ret) 3267 goto out; 3268 3269 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 3270 sg_init_one(&sg, addr->sa_data, dev->addr_len); 3271 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3272 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 3273 dev_warn(&vdev->dev, 3274 "Failed to set mac address by vq command.\n"); 3275 ret = -EINVAL; 3276 goto out; 3277 } 3278 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 3279 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3280 unsigned int i; 3281 3282 /* Naturally, this has an atomicity problem. */ 3283 for (i = 0; i < dev->addr_len; i++) 3284 virtio_cwrite8(vdev, 3285 offsetof(struct virtio_net_config, mac) + 3286 i, addr->sa_data[i]); 3287 } 3288 3289 eth_commit_mac_addr_change(dev, p); 3290 ret = 0; 3291 3292 out: 3293 kfree(addr); 3294 return ret; 3295 } 3296 3297 static void virtnet_stats(struct net_device *dev, 3298 struct rtnl_link_stats64 *tot) 3299 { 3300 struct virtnet_info *vi = netdev_priv(dev); 3301 unsigned int start; 3302 int i; 3303 3304 for (i = 0; i < vi->max_queue_pairs; i++) { 3305 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; 3306 struct receive_queue *rq = &vi->rq[i]; 3307 struct send_queue *sq = &vi->sq[i]; 3308 3309 do { 3310 start = u64_stats_fetch_begin(&sq->stats.syncp); 3311 tpackets = u64_stats_read(&sq->stats.packets); 3312 tbytes = u64_stats_read(&sq->stats.bytes); 3313 terrors = u64_stats_read(&sq->stats.tx_timeouts); 3314 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 3315 3316 do { 3317 start = u64_stats_fetch_begin(&rq->stats.syncp); 3318 rpackets = u64_stats_read(&rq->stats.packets); 3319 rbytes = u64_stats_read(&rq->stats.bytes); 3320 rdrops = u64_stats_read(&rq->stats.drops); 3321 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 3322 3323 tot->rx_packets += rpackets; 3324 tot->tx_packets += tpackets; 3325 tot->rx_bytes += rbytes; 3326 tot->tx_bytes += tbytes; 3327 tot->rx_dropped += rdrops; 3328 tot->tx_errors += terrors; 3329 } 3330 3331 tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped); 3332 tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors); 3333 tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors); 3334 tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors); 3335 } 3336 3337 static void virtnet_ack_link_announce(struct virtnet_info *vi) 3338 { 3339 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 3340 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) 3341 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 3342 } 3343 3344 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 3345 { 3346 struct virtio_net_ctrl_mq *mq __free(kfree) = NULL; 3347 struct scatterlist sg; 3348 struct net_device *dev = vi->dev; 3349 3350 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) 3351 return 0; 3352 3353 mq = kzalloc(sizeof(*mq), GFP_KERNEL); 3354 if (!mq) 3355 return -ENOMEM; 3356 3357 mq->virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); 3358 sg_init_one(&sg, mq, sizeof(*mq)); 3359 3360 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 3361 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { 3362 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 3363 queue_pairs); 3364 return -EINVAL; 3365 } else { 3366 vi->curr_queue_pairs = queue_pairs; 3367 /* virtnet_open() will refill when device is going to up. */ 3368 if (dev->flags & IFF_UP) 3369 schedule_delayed_work(&vi->refill, 0); 3370 } 3371 3372 return 0; 3373 } 3374 3375 static int virtnet_close(struct net_device *dev) 3376 { 3377 struct virtnet_info *vi = netdev_priv(dev); 3378 int i; 3379 3380 /* Make sure NAPI doesn't schedule refill work */ 3381 disable_delayed_refill(vi); 3382 /* Make sure refill_work doesn't re-enable napi! */ 3383 cancel_delayed_work_sync(&vi->refill); 3384 3385 for (i = 0; i < vi->max_queue_pairs; i++) { 3386 virtnet_disable_queue_pair(vi, i); 3387 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3388 } 3389 3390 return 0; 3391 } 3392 3393 static void virtnet_rx_mode_work(struct work_struct *work) 3394 { 3395 struct virtnet_info *vi = 3396 container_of(work, struct virtnet_info, rx_mode_work); 3397 u8 *promisc_allmulti __free(kfree) = NULL; 3398 struct net_device *dev = vi->dev; 3399 struct scatterlist sg[2]; 3400 struct virtio_net_ctrl_mac *mac_data; 3401 struct netdev_hw_addr *ha; 3402 int uc_count; 3403 int mc_count; 3404 void *buf; 3405 int i; 3406 3407 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ 3408 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 3409 return; 3410 3411 promisc_allmulti = kzalloc(sizeof(*promisc_allmulti), GFP_KERNEL); 3412 if (!promisc_allmulti) { 3413 dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n"); 3414 return; 3415 } 3416 3417 rtnl_lock(); 3418 3419 *promisc_allmulti = !!(dev->flags & IFF_PROMISC); 3420 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3421 3422 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3423 VIRTIO_NET_CTRL_RX_PROMISC, sg)) 3424 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 3425 *promisc_allmulti ? "en" : "dis"); 3426 3427 *promisc_allmulti = !!(dev->flags & IFF_ALLMULTI); 3428 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3429 3430 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3431 VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) 3432 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 3433 *promisc_allmulti ? "en" : "dis"); 3434 3435 netif_addr_lock_bh(dev); 3436 3437 uc_count = netdev_uc_count(dev); 3438 mc_count = netdev_mc_count(dev); 3439 /* MAC filter - use one buffer for both lists */ 3440 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + 3441 (2 * sizeof(mac_data->entries)), GFP_ATOMIC); 3442 mac_data = buf; 3443 if (!buf) { 3444 netif_addr_unlock_bh(dev); 3445 rtnl_unlock(); 3446 return; 3447 } 3448 3449 sg_init_table(sg, 2); 3450 3451 /* Store the unicast list and count in the front of the buffer */ 3452 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); 3453 i = 0; 3454 netdev_for_each_uc_addr(ha, dev) 3455 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3456 3457 sg_set_buf(&sg[0], mac_data, 3458 sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); 3459 3460 /* multicast list and count fill the end */ 3461 mac_data = (void *)&mac_data->macs[uc_count][0]; 3462 3463 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); 3464 i = 0; 3465 netdev_for_each_mc_addr(ha, dev) 3466 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3467 3468 netif_addr_unlock_bh(dev); 3469 3470 sg_set_buf(&sg[1], mac_data, 3471 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 3472 3473 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3474 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) 3475 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 3476 3477 rtnl_unlock(); 3478 3479 kfree(buf); 3480 } 3481 3482 static void virtnet_set_rx_mode(struct net_device *dev) 3483 { 3484 struct virtnet_info *vi = netdev_priv(dev); 3485 3486 if (vi->rx_mode_work_enabled) 3487 schedule_work(&vi->rx_mode_work); 3488 } 3489 3490 static int virtnet_vlan_rx_add_vid(struct net_device *dev, 3491 __be16 proto, u16 vid) 3492 { 3493 struct virtnet_info *vi = netdev_priv(dev); 3494 __virtio16 *_vid __free(kfree) = NULL; 3495 struct scatterlist sg; 3496 3497 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3498 if (!_vid) 3499 return -ENOMEM; 3500 3501 *_vid = cpu_to_virtio16(vi->vdev, vid); 3502 sg_init_one(&sg, _vid, sizeof(*_vid)); 3503 3504 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3505 VIRTIO_NET_CTRL_VLAN_ADD, &sg)) 3506 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 3507 return 0; 3508 } 3509 3510 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, 3511 __be16 proto, u16 vid) 3512 { 3513 struct virtnet_info *vi = netdev_priv(dev); 3514 __virtio16 *_vid __free(kfree) = NULL; 3515 struct scatterlist sg; 3516 3517 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3518 if (!_vid) 3519 return -ENOMEM; 3520 3521 *_vid = cpu_to_virtio16(vi->vdev, vid); 3522 sg_init_one(&sg, _vid, sizeof(*_vid)); 3523 3524 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3525 VIRTIO_NET_CTRL_VLAN_DEL, &sg)) 3526 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 3527 return 0; 3528 } 3529 3530 static void virtnet_clean_affinity(struct virtnet_info *vi) 3531 { 3532 int i; 3533 3534 if (vi->affinity_hint_set) { 3535 for (i = 0; i < vi->max_queue_pairs; i++) { 3536 virtqueue_set_affinity(vi->rq[i].vq, NULL); 3537 virtqueue_set_affinity(vi->sq[i].vq, NULL); 3538 } 3539 3540 vi->affinity_hint_set = false; 3541 } 3542 } 3543 3544 static void virtnet_set_affinity(struct virtnet_info *vi) 3545 { 3546 cpumask_var_t mask; 3547 int stragglers; 3548 int group_size; 3549 int i, j, cpu; 3550 int num_cpu; 3551 int stride; 3552 3553 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 3554 virtnet_clean_affinity(vi); 3555 return; 3556 } 3557 3558 num_cpu = num_online_cpus(); 3559 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); 3560 stragglers = num_cpu >= vi->curr_queue_pairs ? 3561 num_cpu % vi->curr_queue_pairs : 3562 0; 3563 cpu = cpumask_first(cpu_online_mask); 3564 3565 for (i = 0; i < vi->curr_queue_pairs; i++) { 3566 group_size = stride + (i < stragglers ? 1 : 0); 3567 3568 for (j = 0; j < group_size; j++) { 3569 cpumask_set_cpu(cpu, mask); 3570 cpu = cpumask_next_wrap(cpu, cpu_online_mask, 3571 nr_cpu_ids, false); 3572 } 3573 virtqueue_set_affinity(vi->rq[i].vq, mask); 3574 virtqueue_set_affinity(vi->sq[i].vq, mask); 3575 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); 3576 cpumask_clear(mask); 3577 } 3578 3579 vi->affinity_hint_set = true; 3580 free_cpumask_var(mask); 3581 } 3582 3583 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) 3584 { 3585 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3586 node); 3587 virtnet_set_affinity(vi); 3588 return 0; 3589 } 3590 3591 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) 3592 { 3593 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3594 node_dead); 3595 virtnet_set_affinity(vi); 3596 return 0; 3597 } 3598 3599 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 3600 { 3601 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3602 node); 3603 3604 virtnet_clean_affinity(vi); 3605 return 0; 3606 } 3607 3608 static enum cpuhp_state virtionet_online; 3609 3610 static int virtnet_cpu_notif_add(struct virtnet_info *vi) 3611 { 3612 int ret; 3613 3614 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); 3615 if (ret) 3616 return ret; 3617 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, 3618 &vi->node_dead); 3619 if (!ret) 3620 return ret; 3621 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 3622 return ret; 3623 } 3624 3625 static void virtnet_cpu_notif_remove(struct virtnet_info *vi) 3626 { 3627 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 3628 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, 3629 &vi->node_dead); 3630 } 3631 3632 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3633 u16 vqn, u32 max_usecs, u32 max_packets) 3634 { 3635 struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL; 3636 struct scatterlist sgs; 3637 3638 coal_vq = kzalloc(sizeof(*coal_vq), GFP_KERNEL); 3639 if (!coal_vq) 3640 return -ENOMEM; 3641 3642 coal_vq->vqn = cpu_to_le16(vqn); 3643 coal_vq->coal.max_usecs = cpu_to_le32(max_usecs); 3644 coal_vq->coal.max_packets = cpu_to_le32(max_packets); 3645 sg_init_one(&sgs, coal_vq, sizeof(*coal_vq)); 3646 3647 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 3648 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET, 3649 &sgs)) 3650 return -EINVAL; 3651 3652 return 0; 3653 } 3654 3655 static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3656 u16 queue, u32 max_usecs, 3657 u32 max_packets) 3658 { 3659 int err; 3660 3661 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3662 return -EOPNOTSUPP; 3663 3664 err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), 3665 max_usecs, max_packets); 3666 if (err) 3667 return err; 3668 3669 vi->rq[queue].intr_coal.max_usecs = max_usecs; 3670 vi->rq[queue].intr_coal.max_packets = max_packets; 3671 3672 return 0; 3673 } 3674 3675 static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3676 u16 queue, u32 max_usecs, 3677 u32 max_packets) 3678 { 3679 int err; 3680 3681 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3682 return -EOPNOTSUPP; 3683 3684 err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), 3685 max_usecs, max_packets); 3686 if (err) 3687 return err; 3688 3689 vi->sq[queue].intr_coal.max_usecs = max_usecs; 3690 vi->sq[queue].intr_coal.max_packets = max_packets; 3691 3692 return 0; 3693 } 3694 3695 static void virtnet_get_ringparam(struct net_device *dev, 3696 struct ethtool_ringparam *ring, 3697 struct kernel_ethtool_ringparam *kernel_ring, 3698 struct netlink_ext_ack *extack) 3699 { 3700 struct virtnet_info *vi = netdev_priv(dev); 3701 3702 ring->rx_max_pending = vi->rq[0].vq->num_max; 3703 ring->tx_max_pending = vi->sq[0].vq->num_max; 3704 ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 3705 ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 3706 } 3707 3708 static int virtnet_set_ringparam(struct net_device *dev, 3709 struct ethtool_ringparam *ring, 3710 struct kernel_ethtool_ringparam *kernel_ring, 3711 struct netlink_ext_ack *extack) 3712 { 3713 struct virtnet_info *vi = netdev_priv(dev); 3714 u32 rx_pending, tx_pending; 3715 struct receive_queue *rq; 3716 struct send_queue *sq; 3717 int i, err; 3718 3719 if (ring->rx_mini_pending || ring->rx_jumbo_pending) 3720 return -EINVAL; 3721 3722 rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 3723 tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 3724 3725 if (ring->rx_pending == rx_pending && 3726 ring->tx_pending == tx_pending) 3727 return 0; 3728 3729 if (ring->rx_pending > vi->rq[0].vq->num_max) 3730 return -EINVAL; 3731 3732 if (ring->tx_pending > vi->sq[0].vq->num_max) 3733 return -EINVAL; 3734 3735 for (i = 0; i < vi->max_queue_pairs; i++) { 3736 rq = vi->rq + i; 3737 sq = vi->sq + i; 3738 3739 if (ring->tx_pending != tx_pending) { 3740 err = virtnet_tx_resize(vi, sq, ring->tx_pending); 3741 if (err) 3742 return err; 3743 3744 /* Upon disabling and re-enabling a transmit virtqueue, the device must 3745 * set the coalescing parameters of the virtqueue to those configured 3746 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver 3747 * did not set any TX coalescing parameters, to 0. 3748 */ 3749 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, 3750 vi->intr_coal_tx.max_usecs, 3751 vi->intr_coal_tx.max_packets); 3752 3753 /* Don't break the tx resize action if the vq coalescing is not 3754 * supported. The same is true for rx resize below. 3755 */ 3756 if (err && err != -EOPNOTSUPP) 3757 return err; 3758 } 3759 3760 if (ring->rx_pending != rx_pending) { 3761 err = virtnet_rx_resize(vi, rq, ring->rx_pending); 3762 if (err) 3763 return err; 3764 3765 /* The reason is same as the transmit virtqueue reset */ 3766 mutex_lock(&vi->rq[i].dim_lock); 3767 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i, 3768 vi->intr_coal_rx.max_usecs, 3769 vi->intr_coal_rx.max_packets); 3770 mutex_unlock(&vi->rq[i].dim_lock); 3771 if (err && err != -EOPNOTSUPP) 3772 return err; 3773 } 3774 } 3775 3776 return 0; 3777 } 3778 3779 static bool virtnet_commit_rss_command(struct virtnet_info *vi) 3780 { 3781 struct net_device *dev = vi->dev; 3782 struct scatterlist sgs[4]; 3783 unsigned int sg_buf_size; 3784 3785 /* prepare sgs */ 3786 sg_init_table(sgs, 4); 3787 3788 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, indirection_table); 3789 sg_set_buf(&sgs[0], &vi->rss, sg_buf_size); 3790 3791 sg_buf_size = sizeof(uint16_t) * (vi->rss.indirection_table_mask + 1); 3792 sg_set_buf(&sgs[1], vi->rss.indirection_table, sg_buf_size); 3793 3794 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key) 3795 - offsetof(struct virtio_net_ctrl_rss, max_tx_vq); 3796 sg_set_buf(&sgs[2], &vi->rss.max_tx_vq, sg_buf_size); 3797 3798 sg_buf_size = vi->rss_key_size; 3799 sg_set_buf(&sgs[3], vi->rss.key, sg_buf_size); 3800 3801 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 3802 vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG 3803 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) 3804 goto err; 3805 3806 return true; 3807 3808 err: 3809 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); 3810 return false; 3811 3812 } 3813 3814 static void virtnet_init_default_rss(struct virtnet_info *vi) 3815 { 3816 u32 indir_val = 0; 3817 int i = 0; 3818 3819 vi->rss.hash_types = vi->rss_hash_types_supported; 3820 vi->rss_hash_types_saved = vi->rss_hash_types_supported; 3821 vi->rss.indirection_table_mask = vi->rss_indir_table_size 3822 ? vi->rss_indir_table_size - 1 : 0; 3823 vi->rss.unclassified_queue = 0; 3824 3825 for (; i < vi->rss_indir_table_size; ++i) { 3826 indir_val = ethtool_rxfh_indir_default(i, vi->curr_queue_pairs); 3827 vi->rss.indirection_table[i] = indir_val; 3828 } 3829 3830 vi->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0; 3831 vi->rss.hash_key_length = vi->rss_key_size; 3832 3833 netdev_rss_key_fill(vi->rss.key, vi->rss_key_size); 3834 } 3835 3836 static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info) 3837 { 3838 info->data = 0; 3839 switch (info->flow_type) { 3840 case TCP_V4_FLOW: 3841 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 3842 info->data = RXH_IP_SRC | RXH_IP_DST | 3843 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3844 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 3845 info->data = RXH_IP_SRC | RXH_IP_DST; 3846 } 3847 break; 3848 case TCP_V6_FLOW: 3849 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 3850 info->data = RXH_IP_SRC | RXH_IP_DST | 3851 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3852 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 3853 info->data = RXH_IP_SRC | RXH_IP_DST; 3854 } 3855 break; 3856 case UDP_V4_FLOW: 3857 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 3858 info->data = RXH_IP_SRC | RXH_IP_DST | 3859 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3860 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 3861 info->data = RXH_IP_SRC | RXH_IP_DST; 3862 } 3863 break; 3864 case UDP_V6_FLOW: 3865 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 3866 info->data = RXH_IP_SRC | RXH_IP_DST | 3867 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3868 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 3869 info->data = RXH_IP_SRC | RXH_IP_DST; 3870 } 3871 break; 3872 case IPV4_FLOW: 3873 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) 3874 info->data = RXH_IP_SRC | RXH_IP_DST; 3875 3876 break; 3877 case IPV6_FLOW: 3878 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) 3879 info->data = RXH_IP_SRC | RXH_IP_DST; 3880 3881 break; 3882 default: 3883 info->data = 0; 3884 break; 3885 } 3886 } 3887 3888 static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info) 3889 { 3890 u32 new_hashtypes = vi->rss_hash_types_saved; 3891 bool is_disable = info->data & RXH_DISCARD; 3892 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); 3893 3894 /* supports only 'sd', 'sdfn' and 'r' */ 3895 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) 3896 return false; 3897 3898 switch (info->flow_type) { 3899 case TCP_V4_FLOW: 3900 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); 3901 if (!is_disable) 3902 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 3903 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); 3904 break; 3905 case UDP_V4_FLOW: 3906 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); 3907 if (!is_disable) 3908 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 3909 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); 3910 break; 3911 case IPV4_FLOW: 3912 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; 3913 if (!is_disable) 3914 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; 3915 break; 3916 case TCP_V6_FLOW: 3917 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); 3918 if (!is_disable) 3919 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 3920 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); 3921 break; 3922 case UDP_V6_FLOW: 3923 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); 3924 if (!is_disable) 3925 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 3926 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); 3927 break; 3928 case IPV6_FLOW: 3929 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; 3930 if (!is_disable) 3931 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; 3932 break; 3933 default: 3934 /* unsupported flow */ 3935 return false; 3936 } 3937 3938 /* if unsupported hashtype was set */ 3939 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) 3940 return false; 3941 3942 if (new_hashtypes != vi->rss_hash_types_saved) { 3943 vi->rss_hash_types_saved = new_hashtypes; 3944 vi->rss.hash_types = vi->rss_hash_types_saved; 3945 if (vi->dev->features & NETIF_F_RXHASH) 3946 return virtnet_commit_rss_command(vi); 3947 } 3948 3949 return true; 3950 } 3951 3952 static void virtnet_get_drvinfo(struct net_device *dev, 3953 struct ethtool_drvinfo *info) 3954 { 3955 struct virtnet_info *vi = netdev_priv(dev); 3956 struct virtio_device *vdev = vi->vdev; 3957 3958 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 3959 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); 3960 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); 3961 3962 } 3963 3964 /* TODO: Eliminate OOO packets during switching */ 3965 static int virtnet_set_channels(struct net_device *dev, 3966 struct ethtool_channels *channels) 3967 { 3968 struct virtnet_info *vi = netdev_priv(dev); 3969 u16 queue_pairs = channels->combined_count; 3970 int err; 3971 3972 /* We don't support separate rx/tx channels. 3973 * We don't allow setting 'other' channels. 3974 */ 3975 if (channels->rx_count || channels->tx_count || channels->other_count) 3976 return -EINVAL; 3977 3978 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) 3979 return -EINVAL; 3980 3981 /* For now we don't support modifying channels while XDP is loaded 3982 * also when XDP is loaded all RX queues have XDP programs so we only 3983 * need to check a single RX queue. 3984 */ 3985 if (vi->rq[0].xdp_prog) 3986 return -EINVAL; 3987 3988 cpus_read_lock(); 3989 err = virtnet_set_queues(vi, queue_pairs); 3990 if (err) { 3991 cpus_read_unlock(); 3992 goto err; 3993 } 3994 virtnet_set_affinity(vi); 3995 cpus_read_unlock(); 3996 3997 netif_set_real_num_tx_queues(dev, queue_pairs); 3998 netif_set_real_num_rx_queues(dev, queue_pairs); 3999 err: 4000 return err; 4001 } 4002 4003 static void virtnet_stats_sprintf(u8 **p, const char *fmt, const char *noq_fmt, 4004 int num, int qid, const struct virtnet_stat_desc *desc) 4005 { 4006 int i; 4007 4008 if (qid < 0) { 4009 for (i = 0; i < num; ++i) 4010 ethtool_sprintf(p, noq_fmt, desc[i].desc); 4011 } else { 4012 for (i = 0; i < num; ++i) 4013 ethtool_sprintf(p, fmt, qid, desc[i].desc); 4014 } 4015 } 4016 4017 /* qid == -1: for rx/tx queue total field */ 4018 static void virtnet_get_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data) 4019 { 4020 const struct virtnet_stat_desc *desc; 4021 const char *fmt, *noq_fmt; 4022 u8 *p = *data; 4023 u32 num; 4024 4025 if (type == VIRTNET_Q_TYPE_CQ && qid >= 0) { 4026 noq_fmt = "cq_hw_%s"; 4027 4028 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4029 desc = &virtnet_stats_cvq_desc[0]; 4030 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4031 4032 virtnet_stats_sprintf(&p, NULL, noq_fmt, num, -1, desc); 4033 } 4034 } 4035 4036 if (type == VIRTNET_Q_TYPE_RX) { 4037 fmt = "rx%u_%s"; 4038 noq_fmt = "rx_%s"; 4039 4040 desc = &virtnet_rq_stats_desc[0]; 4041 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4042 4043 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4044 4045 fmt = "rx%u_hw_%s"; 4046 noq_fmt = "rx_hw_%s"; 4047 4048 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4049 desc = &virtnet_stats_rx_basic_desc[0]; 4050 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4051 4052 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4053 } 4054 4055 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4056 desc = &virtnet_stats_rx_csum_desc[0]; 4057 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4058 4059 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4060 } 4061 4062 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4063 desc = &virtnet_stats_rx_speed_desc[0]; 4064 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4065 4066 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4067 } 4068 } 4069 4070 if (type == VIRTNET_Q_TYPE_TX) { 4071 fmt = "tx%u_%s"; 4072 noq_fmt = "tx_%s"; 4073 4074 desc = &virtnet_sq_stats_desc[0]; 4075 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4076 4077 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4078 4079 fmt = "tx%u_hw_%s"; 4080 noq_fmt = "tx_hw_%s"; 4081 4082 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4083 desc = &virtnet_stats_tx_basic_desc[0]; 4084 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4085 4086 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4087 } 4088 4089 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4090 desc = &virtnet_stats_tx_gso_desc[0]; 4091 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4092 4093 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4094 } 4095 4096 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4097 desc = &virtnet_stats_tx_speed_desc[0]; 4098 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4099 4100 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4101 } 4102 } 4103 4104 *data = p; 4105 } 4106 4107 struct virtnet_stats_ctx { 4108 /* The stats are write to qstats or ethtool -S */ 4109 bool to_qstat; 4110 4111 /* Used to calculate the offset inside the output buffer. */ 4112 u32 desc_num[3]; 4113 4114 /* The actual supported stat types. */ 4115 u32 bitmap[3]; 4116 4117 /* Used to calculate the reply buffer size. */ 4118 u32 size[3]; 4119 4120 /* Record the output buffer. */ 4121 u64 *data; 4122 }; 4123 4124 static void virtnet_stats_ctx_init(struct virtnet_info *vi, 4125 struct virtnet_stats_ctx *ctx, 4126 u64 *data, bool to_qstat) 4127 { 4128 u32 queue_type; 4129 4130 ctx->data = data; 4131 ctx->to_qstat = to_qstat; 4132 4133 if (to_qstat) { 4134 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4135 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4136 4137 queue_type = VIRTNET_Q_TYPE_RX; 4138 4139 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4140 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4141 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4142 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4143 } 4144 4145 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4146 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4147 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4148 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4149 } 4150 4151 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4152 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_GSO; 4153 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4154 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso); 4155 } 4156 4157 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4158 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4159 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4160 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4161 } 4162 4163 queue_type = VIRTNET_Q_TYPE_TX; 4164 4165 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4166 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4167 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4168 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4169 } 4170 4171 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4172 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_CSUM; 4173 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4174 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum); 4175 } 4176 4177 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4178 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4179 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4180 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4181 } 4182 4183 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4184 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4185 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4186 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4187 } 4188 4189 return; 4190 } 4191 4192 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc); 4193 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc); 4194 4195 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4196 queue_type = VIRTNET_Q_TYPE_CQ; 4197 4198 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_CVQ; 4199 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc); 4200 ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq); 4201 } 4202 4203 queue_type = VIRTNET_Q_TYPE_RX; 4204 4205 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4206 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4207 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4208 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4209 } 4210 4211 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4212 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4213 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4214 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4215 } 4216 4217 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4218 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4219 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4220 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4221 } 4222 4223 queue_type = VIRTNET_Q_TYPE_TX; 4224 4225 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4226 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4227 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4228 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4229 } 4230 4231 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4232 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4233 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4234 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4235 } 4236 4237 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4238 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4239 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4240 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4241 } 4242 } 4243 4244 /* stats_sum_queue - Calculate the sum of the same fields in sq or rq. 4245 * @sum: the position to store the sum values 4246 * @num: field num 4247 * @q_value: the first queue fields 4248 * @q_num: number of the queues 4249 */ 4250 static void stats_sum_queue(u64 *sum, u32 num, u64 *q_value, u32 q_num) 4251 { 4252 u32 step = num; 4253 int i, j; 4254 u64 *p; 4255 4256 for (i = 0; i < num; ++i) { 4257 p = sum + i; 4258 *p = 0; 4259 4260 for (j = 0; j < q_num; ++j) 4261 *p += *(q_value + i + j * step); 4262 } 4263 } 4264 4265 static void virtnet_fill_total_fields(struct virtnet_info *vi, 4266 struct virtnet_stats_ctx *ctx) 4267 { 4268 u64 *data, *first_rx_q, *first_tx_q; 4269 u32 num_cq, num_rx, num_tx; 4270 4271 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4272 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4273 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4274 4275 first_rx_q = ctx->data + num_rx + num_tx + num_cq; 4276 first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx; 4277 4278 data = ctx->data; 4279 4280 stats_sum_queue(data, num_rx, first_rx_q, vi->curr_queue_pairs); 4281 4282 data = ctx->data + num_rx; 4283 4284 stats_sum_queue(data, num_tx, first_tx_q, vi->curr_queue_pairs); 4285 } 4286 4287 static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid, 4288 struct virtnet_stats_ctx *ctx, 4289 const u8 *base, bool drv_stats, u8 reply_type) 4290 { 4291 const struct virtnet_stat_desc *desc; 4292 const u64_stats_t *v_stat; 4293 u64 offset, bitmap; 4294 const __le64 *v; 4295 u32 queue_type; 4296 int i, num; 4297 4298 queue_type = vq_type(vi, qid); 4299 bitmap = ctx->bitmap[queue_type]; 4300 4301 if (drv_stats) { 4302 if (queue_type == VIRTNET_Q_TYPE_RX) { 4303 desc = &virtnet_rq_stats_desc_qstat[0]; 4304 num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4305 } else { 4306 desc = &virtnet_sq_stats_desc_qstat[0]; 4307 num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4308 } 4309 4310 for (i = 0; i < num; ++i) { 4311 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4312 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4313 ctx->data[offset] = u64_stats_read(v_stat); 4314 } 4315 return; 4316 } 4317 4318 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4319 desc = &virtnet_stats_rx_basic_desc_qstat[0]; 4320 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4321 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4322 goto found; 4323 } 4324 4325 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4326 desc = &virtnet_stats_rx_csum_desc_qstat[0]; 4327 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4328 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4329 goto found; 4330 } 4331 4332 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4333 desc = &virtnet_stats_rx_gso_desc_qstat[0]; 4334 num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4335 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO) 4336 goto found; 4337 } 4338 4339 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4340 desc = &virtnet_stats_rx_speed_desc_qstat[0]; 4341 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4342 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4343 goto found; 4344 } 4345 4346 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4347 desc = &virtnet_stats_tx_basic_desc_qstat[0]; 4348 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4349 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4350 goto found; 4351 } 4352 4353 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4354 desc = &virtnet_stats_tx_csum_desc_qstat[0]; 4355 num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4356 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM) 4357 goto found; 4358 } 4359 4360 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4361 desc = &virtnet_stats_tx_gso_desc_qstat[0]; 4362 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4363 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4364 goto found; 4365 } 4366 4367 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4368 desc = &virtnet_stats_tx_speed_desc_qstat[0]; 4369 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4370 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4371 goto found; 4372 } 4373 4374 return; 4375 4376 found: 4377 for (i = 0; i < num; ++i) { 4378 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4379 v = (const __le64 *)(base + desc[i].offset); 4380 ctx->data[offset] = le64_to_cpu(*v); 4381 } 4382 } 4383 4384 /* virtnet_fill_stats - copy the stats to qstats or ethtool -S 4385 * The stats source is the device or the driver. 4386 * 4387 * @vi: virtio net info 4388 * @qid: the vq id 4389 * @ctx: stats ctx (initiated by virtnet_stats_ctx_init()) 4390 * @base: pointer to the device reply or the driver stats structure. 4391 * @drv_stats: designate the base type (device reply, driver stats) 4392 * @type: the type of the device reply (if drv_stats is true, this must be zero) 4393 */ 4394 static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid, 4395 struct virtnet_stats_ctx *ctx, 4396 const u8 *base, bool drv_stats, u8 reply_type) 4397 { 4398 u32 queue_type, num_rx, num_tx, num_cq; 4399 const struct virtnet_stat_desc *desc; 4400 const u64_stats_t *v_stat; 4401 u64 offset, bitmap; 4402 const __le64 *v; 4403 int i, num; 4404 4405 if (ctx->to_qstat) 4406 return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type); 4407 4408 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4409 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4410 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4411 4412 queue_type = vq_type(vi, qid); 4413 bitmap = ctx->bitmap[queue_type]; 4414 4415 /* skip the total fields of pairs */ 4416 offset = num_rx + num_tx; 4417 4418 if (queue_type == VIRTNET_Q_TYPE_TX) { 4419 offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2); 4420 4421 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4422 if (drv_stats) { 4423 desc = &virtnet_sq_stats_desc[0]; 4424 goto drv_stats; 4425 } 4426 4427 offset += num; 4428 4429 } else if (queue_type == VIRTNET_Q_TYPE_RX) { 4430 offset += num_cq + num_rx * (qid / 2); 4431 4432 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4433 if (drv_stats) { 4434 desc = &virtnet_rq_stats_desc[0]; 4435 goto drv_stats; 4436 } 4437 4438 offset += num; 4439 } 4440 4441 if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) { 4442 desc = &virtnet_stats_cvq_desc[0]; 4443 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4444 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ) 4445 goto found; 4446 4447 offset += num; 4448 } 4449 4450 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4451 desc = &virtnet_stats_rx_basic_desc[0]; 4452 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4453 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4454 goto found; 4455 4456 offset += num; 4457 } 4458 4459 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4460 desc = &virtnet_stats_rx_csum_desc[0]; 4461 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4462 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4463 goto found; 4464 4465 offset += num; 4466 } 4467 4468 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4469 desc = &virtnet_stats_rx_speed_desc[0]; 4470 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4471 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4472 goto found; 4473 4474 offset += num; 4475 } 4476 4477 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4478 desc = &virtnet_stats_tx_basic_desc[0]; 4479 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4480 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4481 goto found; 4482 4483 offset += num; 4484 } 4485 4486 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4487 desc = &virtnet_stats_tx_gso_desc[0]; 4488 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4489 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4490 goto found; 4491 4492 offset += num; 4493 } 4494 4495 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4496 desc = &virtnet_stats_tx_speed_desc[0]; 4497 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4498 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4499 goto found; 4500 4501 offset += num; 4502 } 4503 4504 return; 4505 4506 found: 4507 for (i = 0; i < num; ++i) { 4508 v = (const __le64 *)(base + desc[i].offset); 4509 ctx->data[offset + i] = le64_to_cpu(*v); 4510 } 4511 4512 return; 4513 4514 drv_stats: 4515 for (i = 0; i < num; ++i) { 4516 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4517 ctx->data[offset + i] = u64_stats_read(v_stat); 4518 } 4519 } 4520 4521 static int __virtnet_get_hw_stats(struct virtnet_info *vi, 4522 struct virtnet_stats_ctx *ctx, 4523 struct virtio_net_ctrl_queue_stats *req, 4524 int req_size, void *reply, int res_size) 4525 { 4526 struct virtio_net_stats_reply_hdr *hdr; 4527 struct scatterlist sgs_in, sgs_out; 4528 void *p; 4529 u32 qid; 4530 int ok; 4531 4532 sg_init_one(&sgs_out, req, req_size); 4533 sg_init_one(&sgs_in, reply, res_size); 4534 4535 ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 4536 VIRTIO_NET_CTRL_STATS_GET, 4537 &sgs_out, &sgs_in); 4538 4539 if (!ok) 4540 return ok; 4541 4542 for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) { 4543 hdr = p; 4544 qid = le16_to_cpu(hdr->vq_index); 4545 virtnet_fill_stats(vi, qid, ctx, p, false, hdr->type); 4546 } 4547 4548 return 0; 4549 } 4550 4551 static void virtnet_make_stat_req(struct virtnet_info *vi, 4552 struct virtnet_stats_ctx *ctx, 4553 struct virtio_net_ctrl_queue_stats *req, 4554 int qid, int *idx) 4555 { 4556 int qtype = vq_type(vi, qid); 4557 u64 bitmap = ctx->bitmap[qtype]; 4558 4559 if (!bitmap) 4560 return; 4561 4562 req->stats[*idx].vq_index = cpu_to_le16(qid); 4563 req->stats[*idx].types_bitmap[0] = cpu_to_le64(bitmap); 4564 *idx += 1; 4565 } 4566 4567 /* qid: -1: get stats of all vq. 4568 * > 0: get the stats for the special vq. This must not be cvq. 4569 */ 4570 static int virtnet_get_hw_stats(struct virtnet_info *vi, 4571 struct virtnet_stats_ctx *ctx, int qid) 4572 { 4573 int qnum, i, j, res_size, qtype, last_vq, first_vq; 4574 struct virtio_net_ctrl_queue_stats *req; 4575 bool enable_cvq; 4576 void *reply; 4577 int ok; 4578 4579 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) 4580 return 0; 4581 4582 if (qid == -1) { 4583 last_vq = vi->curr_queue_pairs * 2 - 1; 4584 first_vq = 0; 4585 enable_cvq = true; 4586 } else { 4587 last_vq = qid; 4588 first_vq = qid; 4589 enable_cvq = false; 4590 } 4591 4592 qnum = 0; 4593 res_size = 0; 4594 for (i = first_vq; i <= last_vq ; ++i) { 4595 qtype = vq_type(vi, i); 4596 if (ctx->bitmap[qtype]) { 4597 ++qnum; 4598 res_size += ctx->size[qtype]; 4599 } 4600 } 4601 4602 if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) { 4603 res_size += ctx->size[VIRTNET_Q_TYPE_CQ]; 4604 qnum += 1; 4605 } 4606 4607 req = kcalloc(qnum, sizeof(*req), GFP_KERNEL); 4608 if (!req) 4609 return -ENOMEM; 4610 4611 reply = kmalloc(res_size, GFP_KERNEL); 4612 if (!reply) { 4613 kfree(req); 4614 return -ENOMEM; 4615 } 4616 4617 j = 0; 4618 for (i = first_vq; i <= last_vq ; ++i) 4619 virtnet_make_stat_req(vi, ctx, req, i, &j); 4620 4621 if (enable_cvq) 4622 virtnet_make_stat_req(vi, ctx, req, vi->max_queue_pairs * 2, &j); 4623 4624 ok = __virtnet_get_hw_stats(vi, ctx, req, sizeof(*req) * j, reply, res_size); 4625 4626 kfree(req); 4627 kfree(reply); 4628 4629 return ok; 4630 } 4631 4632 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) 4633 { 4634 struct virtnet_info *vi = netdev_priv(dev); 4635 unsigned int i; 4636 u8 *p = data; 4637 4638 switch (stringset) { 4639 case ETH_SS_STATS: 4640 /* Generate the total field names. */ 4641 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, -1, &p); 4642 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, -1, &p); 4643 4644 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, 0, &p); 4645 4646 for (i = 0; i < vi->curr_queue_pairs; ++i) 4647 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, i, &p); 4648 4649 for (i = 0; i < vi->curr_queue_pairs; ++i) 4650 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, i, &p); 4651 break; 4652 } 4653 } 4654 4655 static int virtnet_get_sset_count(struct net_device *dev, int sset) 4656 { 4657 struct virtnet_info *vi = netdev_priv(dev); 4658 struct virtnet_stats_ctx ctx = {0}; 4659 u32 pair_count; 4660 4661 switch (sset) { 4662 case ETH_SS_STATS: 4663 virtnet_stats_ctx_init(vi, &ctx, NULL, false); 4664 4665 pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX]; 4666 4667 return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] + 4668 vi->curr_queue_pairs * pair_count; 4669 default: 4670 return -EOPNOTSUPP; 4671 } 4672 } 4673 4674 static void virtnet_get_ethtool_stats(struct net_device *dev, 4675 struct ethtool_stats *stats, u64 *data) 4676 { 4677 struct virtnet_info *vi = netdev_priv(dev); 4678 struct virtnet_stats_ctx ctx = {0}; 4679 unsigned int start, i; 4680 const u8 *stats_base; 4681 4682 virtnet_stats_ctx_init(vi, &ctx, data, false); 4683 if (virtnet_get_hw_stats(vi, &ctx, -1)) 4684 dev_warn(&vi->dev->dev, "Failed to get hw stats.\n"); 4685 4686 for (i = 0; i < vi->curr_queue_pairs; i++) { 4687 struct receive_queue *rq = &vi->rq[i]; 4688 struct send_queue *sq = &vi->sq[i]; 4689 4690 stats_base = (const u8 *)&rq->stats; 4691 do { 4692 start = u64_stats_fetch_begin(&rq->stats.syncp); 4693 virtnet_fill_stats(vi, i * 2, &ctx, stats_base, true, 0); 4694 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 4695 4696 stats_base = (const u8 *)&sq->stats; 4697 do { 4698 start = u64_stats_fetch_begin(&sq->stats.syncp); 4699 virtnet_fill_stats(vi, i * 2 + 1, &ctx, stats_base, true, 0); 4700 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 4701 } 4702 4703 virtnet_fill_total_fields(vi, &ctx); 4704 } 4705 4706 static void virtnet_get_channels(struct net_device *dev, 4707 struct ethtool_channels *channels) 4708 { 4709 struct virtnet_info *vi = netdev_priv(dev); 4710 4711 channels->combined_count = vi->curr_queue_pairs; 4712 channels->max_combined = vi->max_queue_pairs; 4713 channels->max_other = 0; 4714 channels->rx_count = 0; 4715 channels->tx_count = 0; 4716 channels->other_count = 0; 4717 } 4718 4719 static int virtnet_set_link_ksettings(struct net_device *dev, 4720 const struct ethtool_link_ksettings *cmd) 4721 { 4722 struct virtnet_info *vi = netdev_priv(dev); 4723 4724 return ethtool_virtdev_set_link_ksettings(dev, cmd, 4725 &vi->speed, &vi->duplex); 4726 } 4727 4728 static int virtnet_get_link_ksettings(struct net_device *dev, 4729 struct ethtool_link_ksettings *cmd) 4730 { 4731 struct virtnet_info *vi = netdev_priv(dev); 4732 4733 cmd->base.speed = vi->speed; 4734 cmd->base.duplex = vi->duplex; 4735 cmd->base.port = PORT_OTHER; 4736 4737 return 0; 4738 } 4739 4740 static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi, 4741 struct ethtool_coalesce *ec) 4742 { 4743 struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL; 4744 struct scatterlist sgs_tx; 4745 int i; 4746 4747 coal_tx = kzalloc(sizeof(*coal_tx), GFP_KERNEL); 4748 if (!coal_tx) 4749 return -ENOMEM; 4750 4751 coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); 4752 coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); 4753 sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx)); 4754 4755 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 4756 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, 4757 &sgs_tx)) 4758 return -EINVAL; 4759 4760 vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; 4761 vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; 4762 for (i = 0; i < vi->max_queue_pairs; i++) { 4763 vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; 4764 vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; 4765 } 4766 4767 return 0; 4768 } 4769 4770 static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi, 4771 struct ethtool_coalesce *ec) 4772 { 4773 struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL; 4774 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 4775 struct scatterlist sgs_rx; 4776 int i; 4777 4778 if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4779 return -EOPNOTSUPP; 4780 4781 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs || 4782 ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets)) 4783 return -EINVAL; 4784 4785 if (rx_ctrl_dim_on && !vi->rx_dim_enabled) { 4786 vi->rx_dim_enabled = true; 4787 for (i = 0; i < vi->max_queue_pairs; i++) { 4788 mutex_lock(&vi->rq[i].dim_lock); 4789 vi->rq[i].dim_enabled = true; 4790 mutex_unlock(&vi->rq[i].dim_lock); 4791 } 4792 return 0; 4793 } 4794 4795 coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL); 4796 if (!coal_rx) 4797 return -ENOMEM; 4798 4799 if (!rx_ctrl_dim_on && vi->rx_dim_enabled) { 4800 vi->rx_dim_enabled = false; 4801 for (i = 0; i < vi->max_queue_pairs; i++) { 4802 mutex_lock(&vi->rq[i].dim_lock); 4803 vi->rq[i].dim_enabled = false; 4804 mutex_unlock(&vi->rq[i].dim_lock); 4805 } 4806 } 4807 4808 /* Since the per-queue coalescing params can be set, 4809 * we need apply the global new params even if they 4810 * are not updated. 4811 */ 4812 coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); 4813 coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); 4814 sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx)); 4815 4816 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 4817 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, 4818 &sgs_rx)) 4819 return -EINVAL; 4820 4821 vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; 4822 vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; 4823 for (i = 0; i < vi->max_queue_pairs; i++) { 4824 mutex_lock(&vi->rq[i].dim_lock); 4825 vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; 4826 vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; 4827 mutex_unlock(&vi->rq[i].dim_lock); 4828 } 4829 4830 return 0; 4831 } 4832 4833 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, 4834 struct ethtool_coalesce *ec) 4835 { 4836 int err; 4837 4838 err = virtnet_send_tx_notf_coal_cmds(vi, ec); 4839 if (err) 4840 return err; 4841 4842 err = virtnet_send_rx_notf_coal_cmds(vi, ec); 4843 if (err) 4844 return err; 4845 4846 return 0; 4847 } 4848 4849 static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi, 4850 struct ethtool_coalesce *ec, 4851 u16 queue) 4852 { 4853 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 4854 u32 max_usecs, max_packets; 4855 bool cur_rx_dim; 4856 int err; 4857 4858 mutex_lock(&vi->rq[queue].dim_lock); 4859 cur_rx_dim = vi->rq[queue].dim_enabled; 4860 max_usecs = vi->rq[queue].intr_coal.max_usecs; 4861 max_packets = vi->rq[queue].intr_coal.max_packets; 4862 4863 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs || 4864 ec->rx_max_coalesced_frames != max_packets)) { 4865 mutex_unlock(&vi->rq[queue].dim_lock); 4866 return -EINVAL; 4867 } 4868 4869 if (rx_ctrl_dim_on && !cur_rx_dim) { 4870 vi->rq[queue].dim_enabled = true; 4871 mutex_unlock(&vi->rq[queue].dim_lock); 4872 return 0; 4873 } 4874 4875 if (!rx_ctrl_dim_on && cur_rx_dim) 4876 vi->rq[queue].dim_enabled = false; 4877 4878 /* If no params are updated, userspace ethtool will 4879 * reject the modification. 4880 */ 4881 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue, 4882 ec->rx_coalesce_usecs, 4883 ec->rx_max_coalesced_frames); 4884 mutex_unlock(&vi->rq[queue].dim_lock); 4885 return err; 4886 } 4887 4888 static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, 4889 struct ethtool_coalesce *ec, 4890 u16 queue) 4891 { 4892 int err; 4893 4894 err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue); 4895 if (err) 4896 return err; 4897 4898 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue, 4899 ec->tx_coalesce_usecs, 4900 ec->tx_max_coalesced_frames); 4901 if (err) 4902 return err; 4903 4904 return 0; 4905 } 4906 4907 static void virtnet_rx_dim_work(struct work_struct *work) 4908 { 4909 struct dim *dim = container_of(work, struct dim, work); 4910 struct receive_queue *rq = container_of(dim, 4911 struct receive_queue, dim); 4912 struct virtnet_info *vi = rq->vq->vdev->priv; 4913 struct net_device *dev = vi->dev; 4914 struct dim_cq_moder update_moder; 4915 int qnum, err; 4916 4917 qnum = rq - vi->rq; 4918 4919 mutex_lock(&rq->dim_lock); 4920 if (!rq->dim_enabled) 4921 goto out; 4922 4923 update_moder = net_dim_get_rx_irq_moder(dev, dim); 4924 if (update_moder.usec != rq->intr_coal.max_usecs || 4925 update_moder.pkts != rq->intr_coal.max_packets) { 4926 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum, 4927 update_moder.usec, 4928 update_moder.pkts); 4929 if (err) 4930 pr_debug("%s: Failed to send dim parameters on rxq%d\n", 4931 dev->name, qnum); 4932 } 4933 out: 4934 dim->state = DIM_START_MEASURE; 4935 mutex_unlock(&rq->dim_lock); 4936 } 4937 4938 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) 4939 { 4940 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL 4941 * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated. 4942 */ 4943 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) 4944 return -EOPNOTSUPP; 4945 4946 if (ec->tx_max_coalesced_frames > 1 || 4947 ec->rx_max_coalesced_frames != 1) 4948 return -EINVAL; 4949 4950 return 0; 4951 } 4952 4953 static int virtnet_should_update_vq_weight(int dev_flags, int weight, 4954 int vq_weight, bool *should_update) 4955 { 4956 if (weight ^ vq_weight) { 4957 if (dev_flags & IFF_UP) 4958 return -EBUSY; 4959 *should_update = true; 4960 } 4961 4962 return 0; 4963 } 4964 4965 static int virtnet_set_coalesce(struct net_device *dev, 4966 struct ethtool_coalesce *ec, 4967 struct kernel_ethtool_coalesce *kernel_coal, 4968 struct netlink_ext_ack *extack) 4969 { 4970 struct virtnet_info *vi = netdev_priv(dev); 4971 int ret, queue_number, napi_weight; 4972 bool update_napi = false; 4973 4974 /* Can't change NAPI weight if the link is up */ 4975 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 4976 for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) { 4977 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 4978 vi->sq[queue_number].napi.weight, 4979 &update_napi); 4980 if (ret) 4981 return ret; 4982 4983 if (update_napi) { 4984 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be 4985 * updated for the sake of simplicity, which might not be necessary 4986 */ 4987 break; 4988 } 4989 } 4990 4991 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) 4992 ret = virtnet_send_notf_coal_cmds(vi, ec); 4993 else 4994 ret = virtnet_coal_params_supported(ec); 4995 4996 if (ret) 4997 return ret; 4998 4999 if (update_napi) { 5000 for (; queue_number < vi->max_queue_pairs; queue_number++) 5001 vi->sq[queue_number].napi.weight = napi_weight; 5002 } 5003 5004 return ret; 5005 } 5006 5007 static int virtnet_get_coalesce(struct net_device *dev, 5008 struct ethtool_coalesce *ec, 5009 struct kernel_ethtool_coalesce *kernel_coal, 5010 struct netlink_ext_ack *extack) 5011 { 5012 struct virtnet_info *vi = netdev_priv(dev); 5013 5014 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 5015 ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs; 5016 ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs; 5017 ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets; 5018 ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets; 5019 ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled; 5020 } else { 5021 ec->rx_max_coalesced_frames = 1; 5022 5023 if (vi->sq[0].napi.weight) 5024 ec->tx_max_coalesced_frames = 1; 5025 } 5026 5027 return 0; 5028 } 5029 5030 static int virtnet_set_per_queue_coalesce(struct net_device *dev, 5031 u32 queue, 5032 struct ethtool_coalesce *ec) 5033 { 5034 struct virtnet_info *vi = netdev_priv(dev); 5035 int ret, napi_weight; 5036 bool update_napi = false; 5037 5038 if (queue >= vi->max_queue_pairs) 5039 return -EINVAL; 5040 5041 /* Can't change NAPI weight if the link is up */ 5042 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5043 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5044 vi->sq[queue].napi.weight, 5045 &update_napi); 5046 if (ret) 5047 return ret; 5048 5049 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5050 ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue); 5051 else 5052 ret = virtnet_coal_params_supported(ec); 5053 5054 if (ret) 5055 return ret; 5056 5057 if (update_napi) 5058 vi->sq[queue].napi.weight = napi_weight; 5059 5060 return 0; 5061 } 5062 5063 static int virtnet_get_per_queue_coalesce(struct net_device *dev, 5064 u32 queue, 5065 struct ethtool_coalesce *ec) 5066 { 5067 struct virtnet_info *vi = netdev_priv(dev); 5068 5069 if (queue >= vi->max_queue_pairs) 5070 return -EINVAL; 5071 5072 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 5073 mutex_lock(&vi->rq[queue].dim_lock); 5074 ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs; 5075 ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs; 5076 ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets; 5077 ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets; 5078 ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled; 5079 mutex_unlock(&vi->rq[queue].dim_lock); 5080 } else { 5081 ec->rx_max_coalesced_frames = 1; 5082 5083 if (vi->sq[queue].napi.weight) 5084 ec->tx_max_coalesced_frames = 1; 5085 } 5086 5087 return 0; 5088 } 5089 5090 static void virtnet_init_settings(struct net_device *dev) 5091 { 5092 struct virtnet_info *vi = netdev_priv(dev); 5093 5094 vi->speed = SPEED_UNKNOWN; 5095 vi->duplex = DUPLEX_UNKNOWN; 5096 } 5097 5098 static void virtnet_update_settings(struct virtnet_info *vi) 5099 { 5100 u32 speed; 5101 u8 duplex; 5102 5103 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) 5104 return; 5105 5106 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); 5107 5108 if (ethtool_validate_speed(speed)) 5109 vi->speed = speed; 5110 5111 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); 5112 5113 if (ethtool_validate_duplex(duplex)) 5114 vi->duplex = duplex; 5115 } 5116 5117 static u32 virtnet_get_rxfh_key_size(struct net_device *dev) 5118 { 5119 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; 5120 } 5121 5122 static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) 5123 { 5124 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; 5125 } 5126 5127 static int virtnet_get_rxfh(struct net_device *dev, 5128 struct ethtool_rxfh_param *rxfh) 5129 { 5130 struct virtnet_info *vi = netdev_priv(dev); 5131 int i; 5132 5133 if (rxfh->indir) { 5134 for (i = 0; i < vi->rss_indir_table_size; ++i) 5135 rxfh->indir[i] = vi->rss.indirection_table[i]; 5136 } 5137 5138 if (rxfh->key) 5139 memcpy(rxfh->key, vi->rss.key, vi->rss_key_size); 5140 5141 rxfh->hfunc = ETH_RSS_HASH_TOP; 5142 5143 return 0; 5144 } 5145 5146 static int virtnet_set_rxfh(struct net_device *dev, 5147 struct ethtool_rxfh_param *rxfh, 5148 struct netlink_ext_ack *extack) 5149 { 5150 struct virtnet_info *vi = netdev_priv(dev); 5151 bool update = false; 5152 int i; 5153 5154 if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && 5155 rxfh->hfunc != ETH_RSS_HASH_TOP) 5156 return -EOPNOTSUPP; 5157 5158 if (rxfh->indir) { 5159 if (!vi->has_rss) 5160 return -EOPNOTSUPP; 5161 5162 for (i = 0; i < vi->rss_indir_table_size; ++i) 5163 vi->rss.indirection_table[i] = rxfh->indir[i]; 5164 update = true; 5165 } 5166 5167 if (rxfh->key) { 5168 /* If either _F_HASH_REPORT or _F_RSS are negotiated, the 5169 * device provides hash calculation capabilities, that is, 5170 * hash_key is configured. 5171 */ 5172 if (!vi->has_rss && !vi->has_rss_hash_report) 5173 return -EOPNOTSUPP; 5174 5175 memcpy(vi->rss.key, rxfh->key, vi->rss_key_size); 5176 update = true; 5177 } 5178 5179 if (update) 5180 virtnet_commit_rss_command(vi); 5181 5182 return 0; 5183 } 5184 5185 static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) 5186 { 5187 struct virtnet_info *vi = netdev_priv(dev); 5188 int rc = 0; 5189 5190 switch (info->cmd) { 5191 case ETHTOOL_GRXRINGS: 5192 info->data = vi->curr_queue_pairs; 5193 break; 5194 case ETHTOOL_GRXFH: 5195 virtnet_get_hashflow(vi, info); 5196 break; 5197 default: 5198 rc = -EOPNOTSUPP; 5199 } 5200 5201 return rc; 5202 } 5203 5204 static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) 5205 { 5206 struct virtnet_info *vi = netdev_priv(dev); 5207 int rc = 0; 5208 5209 switch (info->cmd) { 5210 case ETHTOOL_SRXFH: 5211 if (!virtnet_set_hashflow(vi, info)) 5212 rc = -EINVAL; 5213 5214 break; 5215 default: 5216 rc = -EOPNOTSUPP; 5217 } 5218 5219 return rc; 5220 } 5221 5222 static const struct ethtool_ops virtnet_ethtool_ops = { 5223 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | 5224 ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, 5225 .get_drvinfo = virtnet_get_drvinfo, 5226 .get_link = ethtool_op_get_link, 5227 .get_ringparam = virtnet_get_ringparam, 5228 .set_ringparam = virtnet_set_ringparam, 5229 .get_strings = virtnet_get_strings, 5230 .get_sset_count = virtnet_get_sset_count, 5231 .get_ethtool_stats = virtnet_get_ethtool_stats, 5232 .set_channels = virtnet_set_channels, 5233 .get_channels = virtnet_get_channels, 5234 .get_ts_info = ethtool_op_get_ts_info, 5235 .get_link_ksettings = virtnet_get_link_ksettings, 5236 .set_link_ksettings = virtnet_set_link_ksettings, 5237 .set_coalesce = virtnet_set_coalesce, 5238 .get_coalesce = virtnet_get_coalesce, 5239 .set_per_queue_coalesce = virtnet_set_per_queue_coalesce, 5240 .get_per_queue_coalesce = virtnet_get_per_queue_coalesce, 5241 .get_rxfh_key_size = virtnet_get_rxfh_key_size, 5242 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, 5243 .get_rxfh = virtnet_get_rxfh, 5244 .set_rxfh = virtnet_set_rxfh, 5245 .get_rxnfc = virtnet_get_rxnfc, 5246 .set_rxnfc = virtnet_set_rxnfc, 5247 }; 5248 5249 static void virtnet_get_queue_stats_rx(struct net_device *dev, int i, 5250 struct netdev_queue_stats_rx *stats) 5251 { 5252 struct virtnet_info *vi = netdev_priv(dev); 5253 struct receive_queue *rq = &vi->rq[i]; 5254 struct virtnet_stats_ctx ctx = {0}; 5255 5256 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5257 5258 virtnet_get_hw_stats(vi, &ctx, i * 2); 5259 virtnet_fill_stats(vi, i * 2, &ctx, (void *)&rq->stats, true, 0); 5260 } 5261 5262 static void virtnet_get_queue_stats_tx(struct net_device *dev, int i, 5263 struct netdev_queue_stats_tx *stats) 5264 { 5265 struct virtnet_info *vi = netdev_priv(dev); 5266 struct send_queue *sq = &vi->sq[i]; 5267 struct virtnet_stats_ctx ctx = {0}; 5268 5269 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5270 5271 virtnet_get_hw_stats(vi, &ctx, i * 2 + 1); 5272 virtnet_fill_stats(vi, i * 2 + 1, &ctx, (void *)&sq->stats, true, 0); 5273 } 5274 5275 static void virtnet_get_base_stats(struct net_device *dev, 5276 struct netdev_queue_stats_rx *rx, 5277 struct netdev_queue_stats_tx *tx) 5278 { 5279 struct virtnet_info *vi = netdev_priv(dev); 5280 5281 /* The queue stats of the virtio-net will not be reset. So here we 5282 * return 0. 5283 */ 5284 rx->bytes = 0; 5285 rx->packets = 0; 5286 5287 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 5288 rx->hw_drops = 0; 5289 rx->hw_drop_overruns = 0; 5290 } 5291 5292 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 5293 rx->csum_unnecessary = 0; 5294 rx->csum_none = 0; 5295 rx->csum_bad = 0; 5296 } 5297 5298 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 5299 rx->hw_gro_packets = 0; 5300 rx->hw_gro_bytes = 0; 5301 rx->hw_gro_wire_packets = 0; 5302 rx->hw_gro_wire_bytes = 0; 5303 } 5304 5305 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) 5306 rx->hw_drop_ratelimits = 0; 5307 5308 tx->bytes = 0; 5309 tx->packets = 0; 5310 tx->stop = 0; 5311 tx->wake = 0; 5312 5313 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 5314 tx->hw_drops = 0; 5315 tx->hw_drop_errors = 0; 5316 } 5317 5318 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 5319 tx->csum_none = 0; 5320 tx->needs_csum = 0; 5321 } 5322 5323 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 5324 tx->hw_gso_packets = 0; 5325 tx->hw_gso_bytes = 0; 5326 tx->hw_gso_wire_packets = 0; 5327 tx->hw_gso_wire_bytes = 0; 5328 } 5329 5330 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) 5331 tx->hw_drop_ratelimits = 0; 5332 } 5333 5334 static const struct netdev_stat_ops virtnet_stat_ops = { 5335 .get_queue_stats_rx = virtnet_get_queue_stats_rx, 5336 .get_queue_stats_tx = virtnet_get_queue_stats_tx, 5337 .get_base_stats = virtnet_get_base_stats, 5338 }; 5339 5340 static void virtnet_freeze_down(struct virtio_device *vdev) 5341 { 5342 struct virtnet_info *vi = vdev->priv; 5343 5344 /* Make sure no work handler is accessing the device */ 5345 flush_work(&vi->config_work); 5346 disable_rx_mode_work(vi); 5347 flush_work(&vi->rx_mode_work); 5348 5349 netif_tx_lock_bh(vi->dev); 5350 netif_device_detach(vi->dev); 5351 netif_tx_unlock_bh(vi->dev); 5352 if (netif_running(vi->dev)) 5353 virtnet_close(vi->dev); 5354 } 5355 5356 static int init_vqs(struct virtnet_info *vi); 5357 5358 static int virtnet_restore_up(struct virtio_device *vdev) 5359 { 5360 struct virtnet_info *vi = vdev->priv; 5361 int err; 5362 5363 err = init_vqs(vi); 5364 if (err) 5365 return err; 5366 5367 virtio_device_ready(vdev); 5368 5369 enable_delayed_refill(vi); 5370 enable_rx_mode_work(vi); 5371 5372 if (netif_running(vi->dev)) { 5373 err = virtnet_open(vi->dev); 5374 if (err) 5375 return err; 5376 } 5377 5378 netif_tx_lock_bh(vi->dev); 5379 netif_device_attach(vi->dev); 5380 netif_tx_unlock_bh(vi->dev); 5381 return err; 5382 } 5383 5384 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) 5385 { 5386 __virtio64 *_offloads __free(kfree) = NULL; 5387 struct scatterlist sg; 5388 5389 _offloads = kzalloc(sizeof(*_offloads), GFP_KERNEL); 5390 if (!_offloads) 5391 return -ENOMEM; 5392 5393 *_offloads = cpu_to_virtio64(vi->vdev, offloads); 5394 5395 sg_init_one(&sg, _offloads, sizeof(*_offloads)); 5396 5397 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, 5398 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { 5399 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); 5400 return -EINVAL; 5401 } 5402 5403 return 0; 5404 } 5405 5406 static int virtnet_clear_guest_offloads(struct virtnet_info *vi) 5407 { 5408 u64 offloads = 0; 5409 5410 if (!vi->guest_offloads) 5411 return 0; 5412 5413 return virtnet_set_guest_offloads(vi, offloads); 5414 } 5415 5416 static int virtnet_restore_guest_offloads(struct virtnet_info *vi) 5417 { 5418 u64 offloads = vi->guest_offloads; 5419 5420 if (!vi->guest_offloads) 5421 return 0; 5422 5423 return virtnet_set_guest_offloads(vi, offloads); 5424 } 5425 5426 static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq, 5427 struct xsk_buff_pool *pool) 5428 { 5429 int err, qindex; 5430 5431 qindex = rq - vi->rq; 5432 5433 if (pool) { 5434 err = xdp_rxq_info_reg(&rq->xsk_rxq_info, vi->dev, qindex, rq->napi.napi_id); 5435 if (err < 0) 5436 return err; 5437 5438 err = xdp_rxq_info_reg_mem_model(&rq->xsk_rxq_info, 5439 MEM_TYPE_XSK_BUFF_POOL, NULL); 5440 if (err < 0) 5441 goto unreg; 5442 5443 xsk_pool_set_rxq_info(pool, &rq->xsk_rxq_info); 5444 } 5445 5446 virtnet_rx_pause(vi, rq); 5447 5448 err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf); 5449 if (err) { 5450 netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err); 5451 5452 pool = NULL; 5453 } 5454 5455 rq->xsk_pool = pool; 5456 5457 virtnet_rx_resume(vi, rq); 5458 5459 if (pool) 5460 return 0; 5461 5462 unreg: 5463 xdp_rxq_info_unreg(&rq->xsk_rxq_info); 5464 return err; 5465 } 5466 5467 static int virtnet_xsk_pool_enable(struct net_device *dev, 5468 struct xsk_buff_pool *pool, 5469 u16 qid) 5470 { 5471 struct virtnet_info *vi = netdev_priv(dev); 5472 struct receive_queue *rq; 5473 struct device *dma_dev; 5474 struct send_queue *sq; 5475 int err, size; 5476 5477 if (vi->hdr_len > xsk_pool_get_headroom(pool)) 5478 return -EINVAL; 5479 5480 /* In big_packets mode, xdp cannot work, so there is no need to 5481 * initialize xsk of rq. 5482 */ 5483 if (vi->big_packets && !vi->mergeable_rx_bufs) 5484 return -ENOENT; 5485 5486 if (qid >= vi->curr_queue_pairs) 5487 return -EINVAL; 5488 5489 sq = &vi->sq[qid]; 5490 rq = &vi->rq[qid]; 5491 5492 /* xsk assumes that tx and rx must have the same dma device. The af-xdp 5493 * may use one buffer to receive from the rx and reuse this buffer to 5494 * send by the tx. So the dma dev of sq and rq must be the same one. 5495 * 5496 * But vq->dma_dev allows every vq has the respective dma dev. So I 5497 * check the dma dev of vq and sq is the same dev. 5498 */ 5499 if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq)) 5500 return -EINVAL; 5501 5502 dma_dev = virtqueue_dma_dev(rq->vq); 5503 if (!dma_dev) 5504 return -EINVAL; 5505 5506 size = virtqueue_get_vring_size(rq->vq); 5507 5508 rq->xsk_buffs = kvcalloc(size, sizeof(*rq->xsk_buffs), GFP_KERNEL); 5509 if (!rq->xsk_buffs) 5510 return -ENOMEM; 5511 5512 err = xsk_pool_dma_map(pool, dma_dev, 0); 5513 if (err) 5514 goto err_xsk_map; 5515 5516 err = virtnet_rq_bind_xsk_pool(vi, rq, pool); 5517 if (err) 5518 goto err_rq; 5519 5520 return 0; 5521 5522 err_rq: 5523 xsk_pool_dma_unmap(pool, 0); 5524 err_xsk_map: 5525 return err; 5526 } 5527 5528 static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid) 5529 { 5530 struct virtnet_info *vi = netdev_priv(dev); 5531 struct xsk_buff_pool *pool; 5532 struct receive_queue *rq; 5533 int err; 5534 5535 if (qid >= vi->curr_queue_pairs) 5536 return -EINVAL; 5537 5538 rq = &vi->rq[qid]; 5539 5540 pool = rq->xsk_pool; 5541 5542 err = virtnet_rq_bind_xsk_pool(vi, rq, NULL); 5543 5544 xsk_pool_dma_unmap(pool, 0); 5545 5546 kvfree(rq->xsk_buffs); 5547 5548 return err; 5549 } 5550 5551 static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp) 5552 { 5553 if (xdp->xsk.pool) 5554 return virtnet_xsk_pool_enable(dev, xdp->xsk.pool, 5555 xdp->xsk.queue_id); 5556 else 5557 return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id); 5558 } 5559 5560 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 5561 struct netlink_ext_ack *extack) 5562 { 5563 unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 5564 sizeof(struct skb_shared_info)); 5565 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; 5566 struct virtnet_info *vi = netdev_priv(dev); 5567 struct bpf_prog *old_prog; 5568 u16 xdp_qp = 0, curr_qp; 5569 int i, err; 5570 5571 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 5572 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 5573 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 5574 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 5575 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 5576 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || 5577 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || 5578 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { 5579 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); 5580 return -EOPNOTSUPP; 5581 } 5582 5583 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 5584 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 5585 return -EINVAL; 5586 } 5587 5588 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { 5589 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); 5590 netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); 5591 return -EINVAL; 5592 } 5593 5594 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; 5595 if (prog) 5596 xdp_qp = nr_cpu_ids; 5597 5598 /* XDP requires extra queues for XDP_TX */ 5599 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 5600 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", 5601 curr_qp + xdp_qp, vi->max_queue_pairs); 5602 xdp_qp = 0; 5603 } 5604 5605 old_prog = rtnl_dereference(vi->rq[0].xdp_prog); 5606 if (!prog && !old_prog) 5607 return 0; 5608 5609 if (prog) 5610 bpf_prog_add(prog, vi->max_queue_pairs - 1); 5611 5612 /* Make sure NAPI is not using any XDP TX queues for RX. */ 5613 if (netif_running(dev)) { 5614 for (i = 0; i < vi->max_queue_pairs; i++) { 5615 napi_disable(&vi->rq[i].napi); 5616 virtnet_napi_tx_disable(&vi->sq[i].napi); 5617 } 5618 } 5619 5620 if (!prog) { 5621 for (i = 0; i < vi->max_queue_pairs; i++) { 5622 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 5623 if (i == 0) 5624 virtnet_restore_guest_offloads(vi); 5625 } 5626 synchronize_net(); 5627 } 5628 5629 err = virtnet_set_queues(vi, curr_qp + xdp_qp); 5630 if (err) 5631 goto err; 5632 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 5633 vi->xdp_queue_pairs = xdp_qp; 5634 5635 if (prog) { 5636 vi->xdp_enabled = true; 5637 for (i = 0; i < vi->max_queue_pairs; i++) { 5638 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 5639 if (i == 0 && !old_prog) 5640 virtnet_clear_guest_offloads(vi); 5641 } 5642 if (!old_prog) 5643 xdp_features_set_redirect_target(dev, true); 5644 } else { 5645 xdp_features_clear_redirect_target(dev); 5646 vi->xdp_enabled = false; 5647 } 5648 5649 for (i = 0; i < vi->max_queue_pairs; i++) { 5650 if (old_prog) 5651 bpf_prog_put(old_prog); 5652 if (netif_running(dev)) { 5653 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 5654 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 5655 &vi->sq[i].napi); 5656 } 5657 } 5658 5659 return 0; 5660 5661 err: 5662 if (!prog) { 5663 virtnet_clear_guest_offloads(vi); 5664 for (i = 0; i < vi->max_queue_pairs; i++) 5665 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); 5666 } 5667 5668 if (netif_running(dev)) { 5669 for (i = 0; i < vi->max_queue_pairs; i++) { 5670 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 5671 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 5672 &vi->sq[i].napi); 5673 } 5674 } 5675 if (prog) 5676 bpf_prog_sub(prog, vi->max_queue_pairs - 1); 5677 return err; 5678 } 5679 5680 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) 5681 { 5682 switch (xdp->command) { 5683 case XDP_SETUP_PROG: 5684 return virtnet_xdp_set(dev, xdp->prog, xdp->extack); 5685 case XDP_SETUP_XSK_POOL: 5686 return virtnet_xsk_pool_setup(dev, xdp); 5687 default: 5688 return -EINVAL; 5689 } 5690 } 5691 5692 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, 5693 size_t len) 5694 { 5695 struct virtnet_info *vi = netdev_priv(dev); 5696 int ret; 5697 5698 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 5699 return -EOPNOTSUPP; 5700 5701 ret = snprintf(buf, len, "sby"); 5702 if (ret >= len) 5703 return -EOPNOTSUPP; 5704 5705 return 0; 5706 } 5707 5708 static int virtnet_set_features(struct net_device *dev, 5709 netdev_features_t features) 5710 { 5711 struct virtnet_info *vi = netdev_priv(dev); 5712 u64 offloads; 5713 int err; 5714 5715 if ((dev->features ^ features) & NETIF_F_GRO_HW) { 5716 if (vi->xdp_enabled) 5717 return -EBUSY; 5718 5719 if (features & NETIF_F_GRO_HW) 5720 offloads = vi->guest_offloads_capable; 5721 else 5722 offloads = vi->guest_offloads_capable & 5723 ~GUEST_OFFLOAD_GRO_HW_MASK; 5724 5725 err = virtnet_set_guest_offloads(vi, offloads); 5726 if (err) 5727 return err; 5728 vi->guest_offloads = offloads; 5729 } 5730 5731 if ((dev->features ^ features) & NETIF_F_RXHASH) { 5732 if (features & NETIF_F_RXHASH) 5733 vi->rss.hash_types = vi->rss_hash_types_saved; 5734 else 5735 vi->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE; 5736 5737 if (!virtnet_commit_rss_command(vi)) 5738 return -EINVAL; 5739 } 5740 5741 return 0; 5742 } 5743 5744 static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) 5745 { 5746 struct virtnet_info *priv = netdev_priv(dev); 5747 struct send_queue *sq = &priv->sq[txqueue]; 5748 struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); 5749 5750 u64_stats_update_begin(&sq->stats.syncp); 5751 u64_stats_inc(&sq->stats.tx_timeouts); 5752 u64_stats_update_end(&sq->stats.syncp); 5753 5754 netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", 5755 txqueue, sq->name, sq->vq->index, sq->vq->name, 5756 jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); 5757 } 5758 5759 static int virtnet_init_irq_moder(struct virtnet_info *vi) 5760 { 5761 u8 profile_flags = 0, coal_flags = 0; 5762 int ret, i; 5763 5764 profile_flags |= DIM_PROFILE_RX; 5765 coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS; 5766 ret = net_dim_init_irq_moder(vi->dev, profile_flags, coal_flags, 5767 DIM_CQ_PERIOD_MODE_START_FROM_EQE, 5768 0, virtnet_rx_dim_work, NULL); 5769 5770 if (ret) 5771 return ret; 5772 5773 for (i = 0; i < vi->max_queue_pairs; i++) 5774 net_dim_setting(vi->dev, &vi->rq[i].dim, false); 5775 5776 return 0; 5777 } 5778 5779 static void virtnet_free_irq_moder(struct virtnet_info *vi) 5780 { 5781 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5782 return; 5783 5784 rtnl_lock(); 5785 net_dim_free_irq_moder(vi->dev); 5786 rtnl_unlock(); 5787 } 5788 5789 static const struct net_device_ops virtnet_netdev = { 5790 .ndo_open = virtnet_open, 5791 .ndo_stop = virtnet_close, 5792 .ndo_start_xmit = start_xmit, 5793 .ndo_validate_addr = eth_validate_addr, 5794 .ndo_set_mac_address = virtnet_set_mac_address, 5795 .ndo_set_rx_mode = virtnet_set_rx_mode, 5796 .ndo_get_stats64 = virtnet_stats, 5797 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 5798 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 5799 .ndo_bpf = virtnet_xdp, 5800 .ndo_xdp_xmit = virtnet_xdp_xmit, 5801 .ndo_xsk_wakeup = virtnet_xsk_wakeup, 5802 .ndo_features_check = passthru_features_check, 5803 .ndo_get_phys_port_name = virtnet_get_phys_port_name, 5804 .ndo_set_features = virtnet_set_features, 5805 .ndo_tx_timeout = virtnet_tx_timeout, 5806 }; 5807 5808 static void virtnet_config_changed_work(struct work_struct *work) 5809 { 5810 struct virtnet_info *vi = 5811 container_of(work, struct virtnet_info, config_work); 5812 u16 v; 5813 5814 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, 5815 struct virtio_net_config, status, &v) < 0) 5816 return; 5817 5818 if (v & VIRTIO_NET_S_ANNOUNCE) { 5819 netdev_notify_peers(vi->dev); 5820 virtnet_ack_link_announce(vi); 5821 } 5822 5823 /* Ignore unknown (future) status bits */ 5824 v &= VIRTIO_NET_S_LINK_UP; 5825 5826 if (vi->status == v) 5827 return; 5828 5829 vi->status = v; 5830 5831 if (vi->status & VIRTIO_NET_S_LINK_UP) { 5832 virtnet_update_settings(vi); 5833 netif_carrier_on(vi->dev); 5834 netif_tx_wake_all_queues(vi->dev); 5835 } else { 5836 netif_carrier_off(vi->dev); 5837 netif_tx_stop_all_queues(vi->dev); 5838 } 5839 } 5840 5841 static void virtnet_config_changed(struct virtio_device *vdev) 5842 { 5843 struct virtnet_info *vi = vdev->priv; 5844 5845 schedule_work(&vi->config_work); 5846 } 5847 5848 static void virtnet_free_queues(struct virtnet_info *vi) 5849 { 5850 int i; 5851 5852 for (i = 0; i < vi->max_queue_pairs; i++) { 5853 __netif_napi_del(&vi->rq[i].napi); 5854 __netif_napi_del(&vi->sq[i].napi); 5855 } 5856 5857 /* We called __netif_napi_del(), 5858 * we need to respect an RCU grace period before freeing vi->rq 5859 */ 5860 synchronize_net(); 5861 5862 kfree(vi->rq); 5863 kfree(vi->sq); 5864 kfree(vi->ctrl); 5865 } 5866 5867 static void _free_receive_bufs(struct virtnet_info *vi) 5868 { 5869 struct bpf_prog *old_prog; 5870 int i; 5871 5872 for (i = 0; i < vi->max_queue_pairs; i++) { 5873 while (vi->rq[i].pages) 5874 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); 5875 5876 old_prog = rtnl_dereference(vi->rq[i].xdp_prog); 5877 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); 5878 if (old_prog) 5879 bpf_prog_put(old_prog); 5880 } 5881 } 5882 5883 static void free_receive_bufs(struct virtnet_info *vi) 5884 { 5885 rtnl_lock(); 5886 _free_receive_bufs(vi); 5887 rtnl_unlock(); 5888 } 5889 5890 static void free_receive_page_frags(struct virtnet_info *vi) 5891 { 5892 int i; 5893 for (i = 0; i < vi->max_queue_pairs; i++) 5894 if (vi->rq[i].alloc_frag.page) { 5895 if (vi->rq[i].last_dma) 5896 virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); 5897 put_page(vi->rq[i].alloc_frag.page); 5898 } 5899 } 5900 5901 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) 5902 { 5903 if (!is_xdp_frame(buf)) 5904 dev_kfree_skb(buf); 5905 else 5906 xdp_return_frame(ptr_to_xdp(buf)); 5907 } 5908 5909 static void free_unused_bufs(struct virtnet_info *vi) 5910 { 5911 void *buf; 5912 int i; 5913 5914 for (i = 0; i < vi->max_queue_pairs; i++) { 5915 struct virtqueue *vq = vi->sq[i].vq; 5916 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 5917 virtnet_sq_free_unused_buf(vq, buf); 5918 cond_resched(); 5919 } 5920 5921 for (i = 0; i < vi->max_queue_pairs; i++) { 5922 struct virtqueue *vq = vi->rq[i].vq; 5923 5924 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 5925 virtnet_rq_unmap_free_buf(vq, buf); 5926 cond_resched(); 5927 } 5928 } 5929 5930 static void virtnet_del_vqs(struct virtnet_info *vi) 5931 { 5932 struct virtio_device *vdev = vi->vdev; 5933 5934 virtnet_clean_affinity(vi); 5935 5936 vdev->config->del_vqs(vdev); 5937 5938 virtnet_free_queues(vi); 5939 } 5940 5941 /* How large should a single buffer be so a queue full of these can fit at 5942 * least one full packet? 5943 * Logic below assumes the mergeable buffer header is used. 5944 */ 5945 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) 5946 { 5947 const unsigned int hdr_len = vi->hdr_len; 5948 unsigned int rq_size = virtqueue_get_vring_size(vq); 5949 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; 5950 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; 5951 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); 5952 5953 return max(max(min_buf_len, hdr_len) - hdr_len, 5954 (unsigned int)GOOD_PACKET_LEN); 5955 } 5956 5957 static int virtnet_find_vqs(struct virtnet_info *vi) 5958 { 5959 struct virtqueue_info *vqs_info; 5960 struct virtqueue **vqs; 5961 int ret = -ENOMEM; 5962 int total_vqs; 5963 bool *ctx; 5964 u16 i; 5965 5966 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by 5967 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by 5968 * possible control vq. 5969 */ 5970 total_vqs = vi->max_queue_pairs * 2 + 5971 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); 5972 5973 /* Allocate space for find_vqs parameters */ 5974 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL); 5975 if (!vqs) 5976 goto err_vq; 5977 vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL); 5978 if (!vqs_info) 5979 goto err_vqs_info; 5980 if (!vi->big_packets || vi->mergeable_rx_bufs) { 5981 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL); 5982 if (!ctx) 5983 goto err_ctx; 5984 } else { 5985 ctx = NULL; 5986 } 5987 5988 /* Parameters for control virtqueue, if any */ 5989 if (vi->has_cvq) { 5990 vqs_info[total_vqs - 1].name = "control"; 5991 } 5992 5993 /* Allocate/initialize parameters for send/receive virtqueues */ 5994 for (i = 0; i < vi->max_queue_pairs; i++) { 5995 vqs_info[rxq2vq(i)].callback = skb_recv_done; 5996 vqs_info[txq2vq(i)].callback = skb_xmit_done; 5997 sprintf(vi->rq[i].name, "input.%u", i); 5998 sprintf(vi->sq[i].name, "output.%u", i); 5999 vqs_info[rxq2vq(i)].name = vi->rq[i].name; 6000 vqs_info[txq2vq(i)].name = vi->sq[i].name; 6001 if (ctx) 6002 vqs_info[rxq2vq(i)].ctx = true; 6003 } 6004 6005 ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, vqs_info, NULL); 6006 if (ret) 6007 goto err_find; 6008 6009 if (vi->has_cvq) { 6010 vi->cvq = vqs[total_vqs - 1]; 6011 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 6012 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 6013 } 6014 6015 for (i = 0; i < vi->max_queue_pairs; i++) { 6016 vi->rq[i].vq = vqs[rxq2vq(i)]; 6017 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); 6018 vi->sq[i].vq = vqs[txq2vq(i)]; 6019 } 6020 6021 /* run here: ret == 0. */ 6022 6023 6024 err_find: 6025 kfree(ctx); 6026 err_ctx: 6027 kfree(vqs_info); 6028 err_vqs_info: 6029 kfree(vqs); 6030 err_vq: 6031 return ret; 6032 } 6033 6034 static int virtnet_alloc_queues(struct virtnet_info *vi) 6035 { 6036 int i; 6037 6038 if (vi->has_cvq) { 6039 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); 6040 if (!vi->ctrl) 6041 goto err_ctrl; 6042 } else { 6043 vi->ctrl = NULL; 6044 } 6045 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); 6046 if (!vi->sq) 6047 goto err_sq; 6048 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL); 6049 if (!vi->rq) 6050 goto err_rq; 6051 6052 INIT_DELAYED_WORK(&vi->refill, refill_work); 6053 for (i = 0; i < vi->max_queue_pairs; i++) { 6054 vi->rq[i].pages = NULL; 6055 netif_napi_add_weight(vi->dev, &vi->rq[i].napi, virtnet_poll, 6056 napi_weight); 6057 netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, 6058 virtnet_poll_tx, 6059 napi_tx ? napi_weight : 0); 6060 6061 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 6062 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); 6063 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 6064 6065 u64_stats_init(&vi->rq[i].stats.syncp); 6066 u64_stats_init(&vi->sq[i].stats.syncp); 6067 mutex_init(&vi->rq[i].dim_lock); 6068 } 6069 6070 return 0; 6071 6072 err_rq: 6073 kfree(vi->sq); 6074 err_sq: 6075 kfree(vi->ctrl); 6076 err_ctrl: 6077 return -ENOMEM; 6078 } 6079 6080 static int init_vqs(struct virtnet_info *vi) 6081 { 6082 int ret; 6083 6084 /* Allocate send & receive queues */ 6085 ret = virtnet_alloc_queues(vi); 6086 if (ret) 6087 goto err; 6088 6089 ret = virtnet_find_vqs(vi); 6090 if (ret) 6091 goto err_free; 6092 6093 virtnet_rq_set_premapped(vi); 6094 6095 cpus_read_lock(); 6096 virtnet_set_affinity(vi); 6097 cpus_read_unlock(); 6098 6099 return 0; 6100 6101 err_free: 6102 virtnet_free_queues(vi); 6103 err: 6104 return ret; 6105 } 6106 6107 #ifdef CONFIG_SYSFS 6108 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, 6109 char *buf) 6110 { 6111 struct virtnet_info *vi = netdev_priv(queue->dev); 6112 unsigned int queue_index = get_netdev_rx_queue_index(queue); 6113 unsigned int headroom = virtnet_get_headroom(vi); 6114 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 6115 struct ewma_pkt_len *avg; 6116 6117 BUG_ON(queue_index >= vi->max_queue_pairs); 6118 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 6119 return sprintf(buf, "%u\n", 6120 get_mergeable_buf_len(&vi->rq[queue_index], avg, 6121 SKB_DATA_ALIGN(headroom + tailroom))); 6122 } 6123 6124 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 6125 __ATTR_RO(mergeable_rx_buffer_size); 6126 6127 static struct attribute *virtio_net_mrg_rx_attrs[] = { 6128 &mergeable_rx_buffer_size_attribute.attr, 6129 NULL 6130 }; 6131 6132 static const struct attribute_group virtio_net_mrg_rx_group = { 6133 .name = "virtio_net", 6134 .attrs = virtio_net_mrg_rx_attrs 6135 }; 6136 #endif 6137 6138 static bool virtnet_fail_on_feature(struct virtio_device *vdev, 6139 unsigned int fbit, 6140 const char *fname, const char *dname) 6141 { 6142 if (!virtio_has_feature(vdev, fbit)) 6143 return false; 6144 6145 dev_err(&vdev->dev, "device advertises feature %s but not %s", 6146 fname, dname); 6147 6148 return true; 6149 } 6150 6151 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ 6152 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) 6153 6154 static bool virtnet_validate_features(struct virtio_device *vdev) 6155 { 6156 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && 6157 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, 6158 "VIRTIO_NET_F_CTRL_VQ") || 6159 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, 6160 "VIRTIO_NET_F_CTRL_VQ") || 6161 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, 6162 "VIRTIO_NET_F_CTRL_VQ") || 6163 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || 6164 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, 6165 "VIRTIO_NET_F_CTRL_VQ") || 6166 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, 6167 "VIRTIO_NET_F_CTRL_VQ") || 6168 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, 6169 "VIRTIO_NET_F_CTRL_VQ") || 6170 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, 6171 "VIRTIO_NET_F_CTRL_VQ") || 6172 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL, 6173 "VIRTIO_NET_F_CTRL_VQ"))) { 6174 return false; 6175 } 6176 6177 return true; 6178 } 6179 6180 #define MIN_MTU ETH_MIN_MTU 6181 #define MAX_MTU ETH_MAX_MTU 6182 6183 static int virtnet_validate(struct virtio_device *vdev) 6184 { 6185 if (!vdev->config->get) { 6186 dev_err(&vdev->dev, "%s failure: config access disabled\n", 6187 __func__); 6188 return -EINVAL; 6189 } 6190 6191 if (!virtnet_validate_features(vdev)) 6192 return -EINVAL; 6193 6194 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6195 int mtu = virtio_cread16(vdev, 6196 offsetof(struct virtio_net_config, 6197 mtu)); 6198 if (mtu < MIN_MTU) 6199 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); 6200 } 6201 6202 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && 6203 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6204 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); 6205 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); 6206 } 6207 6208 return 0; 6209 } 6210 6211 static bool virtnet_check_guest_gso(const struct virtnet_info *vi) 6212 { 6213 return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6214 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6215 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6216 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6217 (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && 6218 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); 6219 } 6220 6221 static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) 6222 { 6223 bool guest_gso = virtnet_check_guest_gso(vi); 6224 6225 /* If device can receive ANY guest GSO packets, regardless of mtu, 6226 * allocate packets of maximum size, otherwise limit it to only 6227 * mtu size worth only. 6228 */ 6229 if (mtu > ETH_DATA_LEN || guest_gso) { 6230 vi->big_packets = true; 6231 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); 6232 } 6233 } 6234 6235 #define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10 6236 static enum xdp_rss_hash_type 6237 virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = { 6238 [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE, 6239 [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4, 6240 [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP, 6241 [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP, 6242 [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6, 6243 [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP, 6244 [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP, 6245 [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, 6246 [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, 6247 [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX 6248 }; 6249 6250 static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, 6251 enum xdp_rss_hash_type *rss_type) 6252 { 6253 const struct xdp_buff *xdp = (void *)_ctx; 6254 struct virtio_net_hdr_v1_hash *hdr_hash; 6255 struct virtnet_info *vi; 6256 u16 hash_report; 6257 6258 if (!(xdp->rxq->dev->features & NETIF_F_RXHASH)) 6259 return -ENODATA; 6260 6261 vi = netdev_priv(xdp->rxq->dev); 6262 hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len); 6263 hash_report = __le16_to_cpu(hdr_hash->hash_report); 6264 6265 if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE) 6266 hash_report = VIRTIO_NET_HASH_REPORT_NONE; 6267 6268 *rss_type = virtnet_xdp_rss_type[hash_report]; 6269 *hash = __le32_to_cpu(hdr_hash->hash_value); 6270 return 0; 6271 } 6272 6273 static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = { 6274 .xmo_rx_hash = virtnet_xdp_rx_hash, 6275 }; 6276 6277 static int virtnet_probe(struct virtio_device *vdev) 6278 { 6279 int i, err = -ENOMEM; 6280 struct net_device *dev; 6281 struct virtnet_info *vi; 6282 u16 max_queue_pairs; 6283 int mtu = 0; 6284 6285 /* Find if host supports multiqueue/rss virtio_net device */ 6286 max_queue_pairs = 1; 6287 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 6288 max_queue_pairs = 6289 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); 6290 6291 /* We need at least 2 queue's */ 6292 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 6293 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 6294 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6295 max_queue_pairs = 1; 6296 6297 /* Allocate ourselves a network device with room for our info */ 6298 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); 6299 if (!dev) 6300 return -ENOMEM; 6301 6302 /* Set up network device as normal. */ 6303 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | 6304 IFF_TX_SKB_NO_LINEAR; 6305 dev->netdev_ops = &virtnet_netdev; 6306 dev->stat_ops = &virtnet_stat_ops; 6307 dev->features = NETIF_F_HIGHDMA; 6308 6309 dev->ethtool_ops = &virtnet_ethtool_ops; 6310 SET_NETDEV_DEV(dev, &vdev->dev); 6311 6312 /* Do we support "hardware" checksums? */ 6313 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 6314 /* This opens up the world of extra features. */ 6315 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6316 if (csum) 6317 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6318 6319 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 6320 dev->hw_features |= NETIF_F_TSO 6321 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 6322 } 6323 /* Individual feature bits: what can host handle? */ 6324 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 6325 dev->hw_features |= NETIF_F_TSO; 6326 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 6327 dev->hw_features |= NETIF_F_TSO6; 6328 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 6329 dev->hw_features |= NETIF_F_TSO_ECN; 6330 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) 6331 dev->hw_features |= NETIF_F_GSO_UDP_L4; 6332 6333 dev->features |= NETIF_F_GSO_ROBUST; 6334 6335 if (gso) 6336 dev->features |= dev->hw_features & NETIF_F_ALL_TSO; 6337 /* (!csum && gso) case will be fixed by register_netdev() */ 6338 } 6339 6340 /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't 6341 * need to calculate checksums for partially checksummed packets, 6342 * as they're considered valid by the upper layer. 6343 * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only 6344 * receives fully checksummed packets. The device may assist in 6345 * validating these packets' checksums, so the driver won't have to. 6346 */ 6347 dev->features |= NETIF_F_RXCSUM; 6348 6349 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 6350 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) 6351 dev->features |= NETIF_F_GRO_HW; 6352 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) 6353 dev->hw_features |= NETIF_F_GRO_HW; 6354 6355 dev->vlan_features = dev->features; 6356 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; 6357 6358 /* MTU range: 68 - 65535 */ 6359 dev->min_mtu = MIN_MTU; 6360 dev->max_mtu = MAX_MTU; 6361 6362 /* Configuration may specify what MAC to use. Otherwise random. */ 6363 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6364 u8 addr[ETH_ALEN]; 6365 6366 virtio_cread_bytes(vdev, 6367 offsetof(struct virtio_net_config, mac), 6368 addr, ETH_ALEN); 6369 eth_hw_addr_set(dev, addr); 6370 } else { 6371 eth_hw_addr_random(dev); 6372 dev_info(&vdev->dev, "Assigned random MAC address %pM\n", 6373 dev->dev_addr); 6374 } 6375 6376 /* Set up our device-specific information */ 6377 vi = netdev_priv(dev); 6378 vi->dev = dev; 6379 vi->vdev = vdev; 6380 vdev->priv = vi; 6381 6382 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 6383 INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work); 6384 spin_lock_init(&vi->refill_lock); 6385 6386 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { 6387 vi->mergeable_rx_bufs = true; 6388 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; 6389 } 6390 6391 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) 6392 vi->has_rss_hash_report = true; 6393 6394 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { 6395 vi->has_rss = true; 6396 6397 vi->rss_indir_table_size = 6398 virtio_cread16(vdev, offsetof(struct virtio_net_config, 6399 rss_max_indirection_table_length)); 6400 } 6401 6402 if (vi->has_rss || vi->has_rss_hash_report) { 6403 vi->rss_key_size = 6404 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); 6405 6406 vi->rss_hash_types_supported = 6407 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); 6408 vi->rss_hash_types_supported &= 6409 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | 6410 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 6411 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); 6412 6413 dev->hw_features |= NETIF_F_RXHASH; 6414 dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops; 6415 } 6416 6417 if (vi->has_rss_hash_report) 6418 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); 6419 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || 6420 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6421 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 6422 else 6423 vi->hdr_len = sizeof(struct virtio_net_hdr); 6424 6425 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || 6426 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6427 vi->any_header_sg = true; 6428 6429 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6430 vi->has_cvq = true; 6431 6432 mutex_init(&vi->cvq_lock); 6433 6434 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6435 mtu = virtio_cread16(vdev, 6436 offsetof(struct virtio_net_config, 6437 mtu)); 6438 if (mtu < dev->min_mtu) { 6439 /* Should never trigger: MTU was previously validated 6440 * in virtnet_validate. 6441 */ 6442 dev_err(&vdev->dev, 6443 "device MTU appears to have changed it is now %d < %d", 6444 mtu, dev->min_mtu); 6445 err = -EINVAL; 6446 goto free; 6447 } 6448 6449 dev->mtu = mtu; 6450 dev->max_mtu = mtu; 6451 } 6452 6453 virtnet_set_big_packets(vi, mtu); 6454 6455 if (vi->any_header_sg) 6456 dev->needed_headroom = vi->hdr_len; 6457 6458 /* Enable multiqueue by default */ 6459 if (num_online_cpus() >= max_queue_pairs) 6460 vi->curr_queue_pairs = max_queue_pairs; 6461 else 6462 vi->curr_queue_pairs = num_online_cpus(); 6463 vi->max_queue_pairs = max_queue_pairs; 6464 6465 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ 6466 err = init_vqs(vi); 6467 if (err) 6468 goto free; 6469 6470 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 6471 vi->intr_coal_rx.max_usecs = 0; 6472 vi->intr_coal_tx.max_usecs = 0; 6473 vi->intr_coal_rx.max_packets = 0; 6474 6475 /* Keep the default values of the coalescing parameters 6476 * aligned with the default napi_tx state. 6477 */ 6478 if (vi->sq[0].napi.weight) 6479 vi->intr_coal_tx.max_packets = 1; 6480 else 6481 vi->intr_coal_tx.max_packets = 0; 6482 } 6483 6484 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 6485 /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */ 6486 for (i = 0; i < vi->max_queue_pairs; i++) 6487 if (vi->sq[i].napi.weight) 6488 vi->sq[i].intr_coal.max_packets = 1; 6489 6490 err = virtnet_init_irq_moder(vi); 6491 if (err) 6492 goto free; 6493 } 6494 6495 #ifdef CONFIG_SYSFS 6496 if (vi->mergeable_rx_bufs) 6497 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; 6498 #endif 6499 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 6500 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 6501 6502 virtnet_init_settings(dev); 6503 6504 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 6505 vi->failover = net_failover_create(vi->dev); 6506 if (IS_ERR(vi->failover)) { 6507 err = PTR_ERR(vi->failover); 6508 goto free_vqs; 6509 } 6510 } 6511 6512 if (vi->has_rss || vi->has_rss_hash_report) 6513 virtnet_init_default_rss(vi); 6514 6515 enable_rx_mode_work(vi); 6516 6517 /* serialize netdev register + virtio_device_ready() with ndo_open() */ 6518 rtnl_lock(); 6519 6520 err = register_netdevice(dev); 6521 if (err) { 6522 pr_debug("virtio_net: registering device failed\n"); 6523 rtnl_unlock(); 6524 goto free_failover; 6525 } 6526 6527 virtio_device_ready(vdev); 6528 6529 virtnet_set_queues(vi, vi->curr_queue_pairs); 6530 6531 /* a random MAC address has been assigned, notify the device. 6532 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there 6533 * because many devices work fine without getting MAC explicitly 6534 */ 6535 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 6536 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 6537 struct scatterlist sg; 6538 6539 sg_init_one(&sg, dev->dev_addr, dev->addr_len); 6540 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 6541 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 6542 pr_debug("virtio_net: setting MAC address failed\n"); 6543 rtnl_unlock(); 6544 err = -EINVAL; 6545 goto free_unregister_netdev; 6546 } 6547 } 6548 6549 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) { 6550 struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL; 6551 struct scatterlist sg; 6552 __le64 v; 6553 6554 stats_cap = kzalloc(sizeof(*stats_cap), GFP_KERNEL); 6555 if (!stats_cap) { 6556 rtnl_unlock(); 6557 err = -ENOMEM; 6558 goto free_unregister_netdev; 6559 } 6560 6561 sg_init_one(&sg, stats_cap, sizeof(*stats_cap)); 6562 6563 if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 6564 VIRTIO_NET_CTRL_STATS_QUERY, 6565 NULL, &sg)) { 6566 pr_debug("virtio_net: fail to get stats capability\n"); 6567 rtnl_unlock(); 6568 err = -EINVAL; 6569 goto free_unregister_netdev; 6570 } 6571 6572 v = stats_cap->supported_stats_types[0]; 6573 vi->device_stats_cap = le64_to_cpu(v); 6574 } 6575 6576 rtnl_unlock(); 6577 6578 err = virtnet_cpu_notif_add(vi); 6579 if (err) { 6580 pr_debug("virtio_net: registering cpu notifier failed\n"); 6581 goto free_unregister_netdev; 6582 } 6583 6584 /* Assume link up if device can't report link status, 6585 otherwise get link status from config. */ 6586 netif_carrier_off(dev); 6587 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 6588 schedule_work(&vi->config_work); 6589 } else { 6590 vi->status = VIRTIO_NET_S_LINK_UP; 6591 virtnet_update_settings(vi); 6592 netif_carrier_on(dev); 6593 } 6594 6595 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) 6596 if (virtio_has_feature(vi->vdev, guest_offloads[i])) 6597 set_bit(guest_offloads[i], &vi->guest_offloads); 6598 vi->guest_offloads_capable = vi->guest_offloads; 6599 6600 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", 6601 dev->name, max_queue_pairs); 6602 6603 return 0; 6604 6605 free_unregister_netdev: 6606 unregister_netdev(dev); 6607 free_failover: 6608 net_failover_destroy(vi->failover); 6609 free_vqs: 6610 virtio_reset_device(vdev); 6611 cancel_delayed_work_sync(&vi->refill); 6612 free_receive_page_frags(vi); 6613 virtnet_del_vqs(vi); 6614 free: 6615 free_netdev(dev); 6616 return err; 6617 } 6618 6619 static void remove_vq_common(struct virtnet_info *vi) 6620 { 6621 virtio_reset_device(vi->vdev); 6622 6623 /* Free unused buffers in both send and recv, if any. */ 6624 free_unused_bufs(vi); 6625 6626 free_receive_bufs(vi); 6627 6628 free_receive_page_frags(vi); 6629 6630 virtnet_del_vqs(vi); 6631 } 6632 6633 static void virtnet_remove(struct virtio_device *vdev) 6634 { 6635 struct virtnet_info *vi = vdev->priv; 6636 6637 virtnet_cpu_notif_remove(vi); 6638 6639 /* Make sure no work handler is accessing the device. */ 6640 flush_work(&vi->config_work); 6641 disable_rx_mode_work(vi); 6642 flush_work(&vi->rx_mode_work); 6643 6644 virtnet_free_irq_moder(vi); 6645 6646 unregister_netdev(vi->dev); 6647 6648 net_failover_destroy(vi->failover); 6649 6650 remove_vq_common(vi); 6651 6652 free_netdev(vi->dev); 6653 } 6654 6655 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) 6656 { 6657 struct virtnet_info *vi = vdev->priv; 6658 6659 virtnet_cpu_notif_remove(vi); 6660 virtnet_freeze_down(vdev); 6661 remove_vq_common(vi); 6662 6663 return 0; 6664 } 6665 6666 static __maybe_unused int virtnet_restore(struct virtio_device *vdev) 6667 { 6668 struct virtnet_info *vi = vdev->priv; 6669 int err; 6670 6671 err = virtnet_restore_up(vdev); 6672 if (err) 6673 return err; 6674 virtnet_set_queues(vi, vi->curr_queue_pairs); 6675 6676 err = virtnet_cpu_notif_add(vi); 6677 if (err) { 6678 virtnet_freeze_down(vdev); 6679 remove_vq_common(vi); 6680 return err; 6681 } 6682 6683 return 0; 6684 } 6685 6686 static struct virtio_device_id id_table[] = { 6687 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 6688 { 0 }, 6689 }; 6690 6691 #define VIRTNET_FEATURES \ 6692 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ 6693 VIRTIO_NET_F_MAC, \ 6694 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ 6695 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ 6696 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ 6697 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ 6698 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ 6699 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ 6700 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ 6701 VIRTIO_NET_F_CTRL_MAC_ADDR, \ 6702 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ 6703 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ 6704 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ 6705 VIRTIO_NET_F_VQ_NOTF_COAL, \ 6706 VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS 6707 6708 static unsigned int features[] = { 6709 VIRTNET_FEATURES, 6710 }; 6711 6712 static unsigned int features_legacy[] = { 6713 VIRTNET_FEATURES, 6714 VIRTIO_NET_F_GSO, 6715 VIRTIO_F_ANY_LAYOUT, 6716 }; 6717 6718 static struct virtio_driver virtio_net_driver = { 6719 .feature_table = features, 6720 .feature_table_size = ARRAY_SIZE(features), 6721 .feature_table_legacy = features_legacy, 6722 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 6723 .driver.name = KBUILD_MODNAME, 6724 .id_table = id_table, 6725 .validate = virtnet_validate, 6726 .probe = virtnet_probe, 6727 .remove = virtnet_remove, 6728 .config_changed = virtnet_config_changed, 6729 #ifdef CONFIG_PM_SLEEP 6730 .freeze = virtnet_freeze, 6731 .restore = virtnet_restore, 6732 #endif 6733 }; 6734 6735 static __init int virtio_net_driver_init(void) 6736 { 6737 int ret; 6738 6739 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", 6740 virtnet_cpu_online, 6741 virtnet_cpu_down_prep); 6742 if (ret < 0) 6743 goto out; 6744 virtionet_online = ret; 6745 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", 6746 NULL, virtnet_cpu_dead); 6747 if (ret) 6748 goto err_dead; 6749 ret = register_virtio_driver(&virtio_net_driver); 6750 if (ret) 6751 goto err_virtio; 6752 return 0; 6753 err_virtio: 6754 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 6755 err_dead: 6756 cpuhp_remove_multi_state(virtionet_online); 6757 out: 6758 return ret; 6759 } 6760 module_init(virtio_net_driver_init); 6761 6762 static __exit void virtio_net_driver_exit(void) 6763 { 6764 unregister_virtio_driver(&virtio_net_driver); 6765 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 6766 cpuhp_remove_multi_state(virtionet_online); 6767 } 6768 module_exit(virtio_net_driver_exit); 6769 6770 MODULE_DEVICE_TABLE(virtio, id_table); 6771 MODULE_DESCRIPTION("Virtio network driver"); 6772 MODULE_LICENSE("GPL"); 6773