1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* A network driver using virtio. 3 * 4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 5 */ 6 //#define DEBUG 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/module.h> 11 #include <linux/virtio.h> 12 #include <linux/virtio_net.h> 13 #include <linux/bpf.h> 14 #include <linux/bpf_trace.h> 15 #include <linux/scatterlist.h> 16 #include <linux/if_vlan.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/average.h> 20 #include <linux/filter.h> 21 #include <linux/kernel.h> 22 #include <linux/dim.h> 23 #include <net/route.h> 24 #include <net/xdp.h> 25 #include <net/net_failover.h> 26 #include <net/netdev_rx_queue.h> 27 #include <net/netdev_queues.h> 28 #include <net/xdp_sock_drv.h> 29 30 static int napi_weight = NAPI_POLL_WEIGHT; 31 module_param(napi_weight, int, 0444); 32 33 static bool csum = true, gso = true, napi_tx = true; 34 module_param(csum, bool, 0444); 35 module_param(gso, bool, 0444); 36 module_param(napi_tx, bool, 0644); 37 38 /* FIXME: MTU in config. */ 39 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 40 #define GOOD_COPY_LEN 128 41 42 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 43 44 /* Separating two types of XDP xmit */ 45 #define VIRTIO_XDP_TX BIT(0) 46 #define VIRTIO_XDP_REDIR BIT(1) 47 48 #define VIRTIO_XDP_FLAG BIT(0) 49 #define VIRTIO_ORPHAN_FLAG BIT(1) 50 51 /* RX packet size EWMA. The average packet size is used to determine the packet 52 * buffer size when refilling RX rings. As the entire RX ring may be refilled 53 * at once, the weight is chosen so that the EWMA will be insensitive to short- 54 * term, transient changes in packet size. 55 */ 56 DECLARE_EWMA(pkt_len, 0, 64) 57 58 #define VIRTNET_DRIVER_VERSION "1.0.0" 59 60 static const unsigned long guest_offloads[] = { 61 VIRTIO_NET_F_GUEST_TSO4, 62 VIRTIO_NET_F_GUEST_TSO6, 63 VIRTIO_NET_F_GUEST_ECN, 64 VIRTIO_NET_F_GUEST_UFO, 65 VIRTIO_NET_F_GUEST_CSUM, 66 VIRTIO_NET_F_GUEST_USO4, 67 VIRTIO_NET_F_GUEST_USO6, 68 VIRTIO_NET_F_GUEST_HDRLEN 69 }; 70 71 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 72 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 73 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 74 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ 75 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ 76 (1ULL << VIRTIO_NET_F_GUEST_USO6)) 77 78 struct virtnet_stat_desc { 79 char desc[ETH_GSTRING_LEN]; 80 size_t offset; 81 size_t qstat_offset; 82 }; 83 84 struct virtnet_sq_free_stats { 85 u64 packets; 86 u64 bytes; 87 u64 napi_packets; 88 u64 napi_bytes; 89 }; 90 91 struct virtnet_sq_stats { 92 struct u64_stats_sync syncp; 93 u64_stats_t packets; 94 u64_stats_t bytes; 95 u64_stats_t xdp_tx; 96 u64_stats_t xdp_tx_drops; 97 u64_stats_t kicks; 98 u64_stats_t tx_timeouts; 99 u64_stats_t stop; 100 u64_stats_t wake; 101 }; 102 103 struct virtnet_rq_stats { 104 struct u64_stats_sync syncp; 105 u64_stats_t packets; 106 u64_stats_t bytes; 107 u64_stats_t drops; 108 u64_stats_t xdp_packets; 109 u64_stats_t xdp_tx; 110 u64_stats_t xdp_redirects; 111 u64_stats_t xdp_drops; 112 u64_stats_t kicks; 113 }; 114 115 #define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1} 116 #define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1} 117 118 #define VIRTNET_SQ_STAT_QSTAT(name, m) \ 119 { \ 120 name, \ 121 offsetof(struct virtnet_sq_stats, m), \ 122 offsetof(struct netdev_queue_stats_tx, m), \ 123 } 124 125 #define VIRTNET_RQ_STAT_QSTAT(name, m) \ 126 { \ 127 name, \ 128 offsetof(struct virtnet_rq_stats, m), \ 129 offsetof(struct netdev_queue_stats_rx, m), \ 130 } 131 132 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { 133 VIRTNET_SQ_STAT("xdp_tx", xdp_tx), 134 VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops), 135 VIRTNET_SQ_STAT("kicks", kicks), 136 VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts), 137 }; 138 139 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { 140 VIRTNET_RQ_STAT("drops", drops), 141 VIRTNET_RQ_STAT("xdp_packets", xdp_packets), 142 VIRTNET_RQ_STAT("xdp_tx", xdp_tx), 143 VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects), 144 VIRTNET_RQ_STAT("xdp_drops", xdp_drops), 145 VIRTNET_RQ_STAT("kicks", kicks), 146 }; 147 148 static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = { 149 VIRTNET_SQ_STAT_QSTAT("packets", packets), 150 VIRTNET_SQ_STAT_QSTAT("bytes", bytes), 151 VIRTNET_SQ_STAT_QSTAT("stop", stop), 152 VIRTNET_SQ_STAT_QSTAT("wake", wake), 153 }; 154 155 static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = { 156 VIRTNET_RQ_STAT_QSTAT("packets", packets), 157 VIRTNET_RQ_STAT_QSTAT("bytes", bytes), 158 }; 159 160 #define VIRTNET_STATS_DESC_CQ(name) \ 161 {#name, offsetof(struct virtio_net_stats_cvq, name), -1} 162 163 #define VIRTNET_STATS_DESC_RX(class, name) \ 164 {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1} 165 166 #define VIRTNET_STATS_DESC_TX(class, name) \ 167 {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1} 168 169 170 static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = { 171 VIRTNET_STATS_DESC_CQ(command_num), 172 VIRTNET_STATS_DESC_CQ(ok_num), 173 }; 174 175 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { 176 VIRTNET_STATS_DESC_RX(basic, packets), 177 VIRTNET_STATS_DESC_RX(basic, bytes), 178 179 VIRTNET_STATS_DESC_RX(basic, notifications), 180 VIRTNET_STATS_DESC_RX(basic, interrupts), 181 }; 182 183 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = { 184 VIRTNET_STATS_DESC_TX(basic, packets), 185 VIRTNET_STATS_DESC_TX(basic, bytes), 186 187 VIRTNET_STATS_DESC_TX(basic, notifications), 188 VIRTNET_STATS_DESC_TX(basic, interrupts), 189 }; 190 191 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = { 192 VIRTNET_STATS_DESC_RX(csum, needs_csum), 193 }; 194 195 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = { 196 VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg), 197 VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg), 198 }; 199 200 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = { 201 VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes), 202 }; 203 204 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = { 205 VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes), 206 }; 207 208 #define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \ 209 { \ 210 #name, \ 211 offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \ 212 offsetof(struct netdev_queue_stats_rx, qstat_field), \ 213 } 214 215 #define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \ 216 { \ 217 #name, \ 218 offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \ 219 offsetof(struct netdev_queue_stats_tx, qstat_field), \ 220 } 221 222 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = { 223 VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops), 224 VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns), 225 }; 226 227 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = { 228 VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops), 229 VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors), 230 }; 231 232 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = { 233 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary), 234 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none), 235 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad), 236 }; 237 238 static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = { 239 VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none), 240 VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum), 241 }; 242 243 static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = { 244 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets), 245 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes), 246 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets), 247 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes), 248 }; 249 250 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = { 251 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets), 252 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes), 253 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets), 254 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes), 255 }; 256 257 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = { 258 VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 259 }; 260 261 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = { 262 VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 263 }; 264 265 #define VIRTNET_Q_TYPE_RX 0 266 #define VIRTNET_Q_TYPE_TX 1 267 #define VIRTNET_Q_TYPE_CQ 2 268 269 struct virtnet_interrupt_coalesce { 270 u32 max_packets; 271 u32 max_usecs; 272 }; 273 274 /* The dma information of pages allocated at a time. */ 275 struct virtnet_rq_dma { 276 dma_addr_t addr; 277 u32 ref; 278 u16 len; 279 u16 need_sync; 280 }; 281 282 /* Internal representation of a send virtqueue */ 283 struct send_queue { 284 /* Virtqueue associated with this send _queue */ 285 struct virtqueue *vq; 286 287 /* TX: fragments + linear part + virtio header */ 288 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 289 290 /* Name of the send queue: output.$index */ 291 char name[16]; 292 293 struct virtnet_sq_stats stats; 294 295 struct virtnet_interrupt_coalesce intr_coal; 296 297 struct napi_struct napi; 298 299 /* Record whether sq is in reset state. */ 300 bool reset; 301 }; 302 303 /* Internal representation of a receive virtqueue */ 304 struct receive_queue { 305 /* Virtqueue associated with this receive_queue */ 306 struct virtqueue *vq; 307 308 struct napi_struct napi; 309 310 struct bpf_prog __rcu *xdp_prog; 311 312 struct virtnet_rq_stats stats; 313 314 /* The number of rx notifications */ 315 u16 calls; 316 317 /* Is dynamic interrupt moderation enabled? */ 318 bool dim_enabled; 319 320 /* Used to protect dim_enabled and inter_coal */ 321 struct mutex dim_lock; 322 323 /* Dynamic Interrupt Moderation */ 324 struct dim dim; 325 326 u32 packets_in_napi; 327 328 struct virtnet_interrupt_coalesce intr_coal; 329 330 /* Chain pages by the private ptr. */ 331 struct page *pages; 332 333 /* Average packet length for mergeable receive buffers. */ 334 struct ewma_pkt_len mrg_avg_pkt_len; 335 336 /* Page frag for packet buffer allocation. */ 337 struct page_frag alloc_frag; 338 339 /* RX: fragments + linear part + virtio header */ 340 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 341 342 /* Min single buffer size for mergeable buffers case. */ 343 unsigned int min_buf_len; 344 345 /* Name of this receive queue: input.$index */ 346 char name[16]; 347 348 struct xdp_rxq_info xdp_rxq; 349 350 /* Record the last dma info to free after new pages is allocated. */ 351 struct virtnet_rq_dma *last_dma; 352 353 struct xsk_buff_pool *xsk_pool; 354 355 /* xdp rxq used by xsk */ 356 struct xdp_rxq_info xsk_rxq_info; 357 358 struct xdp_buff **xsk_buffs; 359 }; 360 361 /* This structure can contain rss message with maximum settings for indirection table and keysize 362 * Note, that default structure that describes RSS configuration virtio_net_rss_config 363 * contains same info but can't handle table values. 364 * In any case, structure would be passed to virtio hw through sg_buf split by parts 365 * because table sizes may be differ according to the device configuration. 366 */ 367 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 368 #define VIRTIO_NET_RSS_MAX_TABLE_LEN 128 369 struct virtio_net_ctrl_rss { 370 u32 hash_types; 371 u16 indirection_table_mask; 372 u16 unclassified_queue; 373 u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN]; 374 u16 max_tx_vq; 375 u8 hash_key_length; 376 u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; 377 }; 378 379 /* Control VQ buffers: protected by the rtnl lock */ 380 struct control_buf { 381 struct virtio_net_ctrl_hdr hdr; 382 virtio_net_ctrl_ack status; 383 }; 384 385 struct virtnet_info { 386 struct virtio_device *vdev; 387 struct virtqueue *cvq; 388 struct net_device *dev; 389 struct send_queue *sq; 390 struct receive_queue *rq; 391 unsigned int status; 392 393 /* Max # of queue pairs supported by the device */ 394 u16 max_queue_pairs; 395 396 /* # of queue pairs currently used by the driver */ 397 u16 curr_queue_pairs; 398 399 /* # of XDP queue pairs currently used by the driver */ 400 u16 xdp_queue_pairs; 401 402 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ 403 bool xdp_enabled; 404 405 /* I like... big packets and I cannot lie! */ 406 bool big_packets; 407 408 /* number of sg entries allocated for big packets */ 409 unsigned int big_packets_num_skbfrags; 410 411 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 412 bool mergeable_rx_bufs; 413 414 /* Host supports rss and/or hash report */ 415 bool has_rss; 416 bool has_rss_hash_report; 417 u8 rss_key_size; 418 u16 rss_indir_table_size; 419 u32 rss_hash_types_supported; 420 u32 rss_hash_types_saved; 421 struct virtio_net_ctrl_rss rss; 422 423 /* Has control virtqueue */ 424 bool has_cvq; 425 426 /* Lock to protect the control VQ */ 427 struct mutex cvq_lock; 428 429 /* Host can handle any s/g split between our header and packet data */ 430 bool any_header_sg; 431 432 /* Packet virtio header size */ 433 u8 hdr_len; 434 435 /* Work struct for delayed refilling if we run low on memory. */ 436 struct delayed_work refill; 437 438 /* Is delayed refill enabled? */ 439 bool refill_enabled; 440 441 /* The lock to synchronize the access to refill_enabled */ 442 spinlock_t refill_lock; 443 444 /* Work struct for config space updates */ 445 struct work_struct config_work; 446 447 /* Work struct for setting rx mode */ 448 struct work_struct rx_mode_work; 449 450 /* OK to queue work setting RX mode? */ 451 bool rx_mode_work_enabled; 452 453 /* Does the affinity hint is set for virtqueues? */ 454 bool affinity_hint_set; 455 456 /* CPU hotplug instances for online & dead */ 457 struct hlist_node node; 458 struct hlist_node node_dead; 459 460 struct control_buf *ctrl; 461 462 /* Ethtool settings */ 463 u8 duplex; 464 u32 speed; 465 466 /* Is rx dynamic interrupt moderation enabled? */ 467 bool rx_dim_enabled; 468 469 /* Interrupt coalescing settings */ 470 struct virtnet_interrupt_coalesce intr_coal_tx; 471 struct virtnet_interrupt_coalesce intr_coal_rx; 472 473 unsigned long guest_offloads; 474 unsigned long guest_offloads_capable; 475 476 /* failover when STANDBY feature enabled */ 477 struct failover *failover; 478 479 u64 device_stats_cap; 480 }; 481 482 struct padded_vnet_hdr { 483 struct virtio_net_hdr_v1_hash hdr; 484 /* 485 * hdr is in a separate sg buffer, and data sg buffer shares same page 486 * with this header sg. This padding makes next sg 16 byte aligned 487 * after the header. 488 */ 489 char padding[12]; 490 }; 491 492 struct virtio_net_common_hdr { 493 union { 494 struct virtio_net_hdr hdr; 495 struct virtio_net_hdr_mrg_rxbuf mrg_hdr; 496 struct virtio_net_hdr_v1_hash hash_v1_hdr; 497 }; 498 }; 499 500 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); 501 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 502 struct net_device *dev, 503 unsigned int *xdp_xmit, 504 struct virtnet_rq_stats *stats); 505 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 506 struct sk_buff *skb, u8 flags); 507 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 508 struct sk_buff *curr_skb, 509 struct page *page, void *buf, 510 int len, int truesize); 511 512 static bool is_xdp_frame(void *ptr) 513 { 514 return (unsigned long)ptr & VIRTIO_XDP_FLAG; 515 } 516 517 static void *xdp_to_ptr(struct xdp_frame *ptr) 518 { 519 return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); 520 } 521 522 static struct xdp_frame *ptr_to_xdp(void *ptr) 523 { 524 return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); 525 } 526 527 static bool is_orphan_skb(void *ptr) 528 { 529 return (unsigned long)ptr & VIRTIO_ORPHAN_FLAG; 530 } 531 532 static void *skb_to_ptr(struct sk_buff *skb, bool orphan) 533 { 534 return (void *)((unsigned long)skb | (orphan ? VIRTIO_ORPHAN_FLAG : 0)); 535 } 536 537 static struct sk_buff *ptr_to_skb(void *ptr) 538 { 539 return (struct sk_buff *)((unsigned long)ptr & ~VIRTIO_ORPHAN_FLAG); 540 } 541 542 static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 543 bool in_napi, struct virtnet_sq_free_stats *stats) 544 { 545 unsigned int len; 546 void *ptr; 547 548 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 549 if (!is_xdp_frame(ptr)) { 550 struct sk_buff *skb = ptr_to_skb(ptr); 551 552 pr_debug("Sent skb %p\n", skb); 553 554 if (is_orphan_skb(ptr)) { 555 stats->packets++; 556 stats->bytes += skb->len; 557 } else { 558 stats->napi_packets++; 559 stats->napi_bytes += skb->len; 560 } 561 napi_consume_skb(skb, in_napi); 562 } else { 563 struct xdp_frame *frame = ptr_to_xdp(ptr); 564 565 stats->packets++; 566 stats->bytes += xdp_get_frame_len(frame); 567 xdp_return_frame(frame); 568 } 569 } 570 netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes); 571 } 572 573 /* Converting between virtqueue no. and kernel tx/rx queue no. 574 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq 575 */ 576 static int vq2txq(struct virtqueue *vq) 577 { 578 return (vq->index - 1) / 2; 579 } 580 581 static int txq2vq(int txq) 582 { 583 return txq * 2 + 1; 584 } 585 586 static int vq2rxq(struct virtqueue *vq) 587 { 588 return vq->index / 2; 589 } 590 591 static int rxq2vq(int rxq) 592 { 593 return rxq * 2; 594 } 595 596 static int vq_type(struct virtnet_info *vi, int qid) 597 { 598 if (qid == vi->max_queue_pairs * 2) 599 return VIRTNET_Q_TYPE_CQ; 600 601 if (qid % 2) 602 return VIRTNET_Q_TYPE_TX; 603 604 return VIRTNET_Q_TYPE_RX; 605 } 606 607 static inline struct virtio_net_common_hdr * 608 skb_vnet_common_hdr(struct sk_buff *skb) 609 { 610 return (struct virtio_net_common_hdr *)skb->cb; 611 } 612 613 /* 614 * private is used to chain pages for big packets, put the whole 615 * most recent used list in the beginning for reuse 616 */ 617 static void give_pages(struct receive_queue *rq, struct page *page) 618 { 619 struct page *end; 620 621 /* Find end of list, sew whole thing into vi->rq.pages. */ 622 for (end = page; end->private; end = (struct page *)end->private); 623 end->private = (unsigned long)rq->pages; 624 rq->pages = page; 625 } 626 627 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) 628 { 629 struct page *p = rq->pages; 630 631 if (p) { 632 rq->pages = (struct page *)p->private; 633 /* clear private here, it is used to chain pages */ 634 p->private = 0; 635 } else 636 p = alloc_page(gfp_mask); 637 return p; 638 } 639 640 static void virtnet_rq_free_buf(struct virtnet_info *vi, 641 struct receive_queue *rq, void *buf) 642 { 643 if (vi->mergeable_rx_bufs) 644 put_page(virt_to_head_page(buf)); 645 else if (vi->big_packets) 646 give_pages(rq, buf); 647 else 648 put_page(virt_to_head_page(buf)); 649 } 650 651 static void enable_delayed_refill(struct virtnet_info *vi) 652 { 653 spin_lock_bh(&vi->refill_lock); 654 vi->refill_enabled = true; 655 spin_unlock_bh(&vi->refill_lock); 656 } 657 658 static void disable_delayed_refill(struct virtnet_info *vi) 659 { 660 spin_lock_bh(&vi->refill_lock); 661 vi->refill_enabled = false; 662 spin_unlock_bh(&vi->refill_lock); 663 } 664 665 static void enable_rx_mode_work(struct virtnet_info *vi) 666 { 667 rtnl_lock(); 668 vi->rx_mode_work_enabled = true; 669 rtnl_unlock(); 670 } 671 672 static void disable_rx_mode_work(struct virtnet_info *vi) 673 { 674 rtnl_lock(); 675 vi->rx_mode_work_enabled = false; 676 rtnl_unlock(); 677 } 678 679 static void virtqueue_napi_schedule(struct napi_struct *napi, 680 struct virtqueue *vq) 681 { 682 if (napi_schedule_prep(napi)) { 683 virtqueue_disable_cb(vq); 684 __napi_schedule(napi); 685 } 686 } 687 688 static bool virtqueue_napi_complete(struct napi_struct *napi, 689 struct virtqueue *vq, int processed) 690 { 691 int opaque; 692 693 opaque = virtqueue_enable_cb_prepare(vq); 694 if (napi_complete_done(napi, processed)) { 695 if (unlikely(virtqueue_poll(vq, opaque))) 696 virtqueue_napi_schedule(napi, vq); 697 else 698 return true; 699 } else { 700 virtqueue_disable_cb(vq); 701 } 702 703 return false; 704 } 705 706 static void skb_xmit_done(struct virtqueue *vq) 707 { 708 struct virtnet_info *vi = vq->vdev->priv; 709 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; 710 711 /* Suppress further interrupts. */ 712 virtqueue_disable_cb(vq); 713 714 if (napi->weight) 715 virtqueue_napi_schedule(napi, vq); 716 else 717 /* We were probably waiting for more output buffers. */ 718 netif_wake_subqueue(vi->dev, vq2txq(vq)); 719 } 720 721 #define MRG_CTX_HEADER_SHIFT 22 722 static void *mergeable_len_to_ctx(unsigned int truesize, 723 unsigned int headroom) 724 { 725 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); 726 } 727 728 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) 729 { 730 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; 731 } 732 733 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) 734 { 735 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); 736 } 737 738 static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, 739 unsigned int headroom, 740 unsigned int len) 741 { 742 struct sk_buff *skb; 743 744 skb = build_skb(buf, buflen); 745 if (unlikely(!skb)) 746 return NULL; 747 748 skb_reserve(skb, headroom); 749 skb_put(skb, len); 750 751 return skb; 752 } 753 754 /* Called from bottom half context */ 755 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 756 struct receive_queue *rq, 757 struct page *page, unsigned int offset, 758 unsigned int len, unsigned int truesize, 759 unsigned int headroom) 760 { 761 struct sk_buff *skb; 762 struct virtio_net_common_hdr *hdr; 763 unsigned int copy, hdr_len, hdr_padded_len; 764 struct page *page_to_free = NULL; 765 int tailroom, shinfo_size; 766 char *p, *hdr_p, *buf; 767 768 p = page_address(page) + offset; 769 hdr_p = p; 770 771 hdr_len = vi->hdr_len; 772 if (vi->mergeable_rx_bufs) 773 hdr_padded_len = hdr_len; 774 else 775 hdr_padded_len = sizeof(struct padded_vnet_hdr); 776 777 buf = p - headroom; 778 len -= hdr_len; 779 offset += hdr_padded_len; 780 p += hdr_padded_len; 781 tailroom = truesize - headroom - hdr_padded_len - len; 782 783 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 784 785 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { 786 skb = virtnet_build_skb(buf, truesize, p - buf, len); 787 if (unlikely(!skb)) 788 return NULL; 789 790 page = (struct page *)page->private; 791 if (page) 792 give_pages(rq, page); 793 goto ok; 794 } 795 796 /* copy small packet so we can reuse these pages for small data */ 797 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); 798 if (unlikely(!skb)) 799 return NULL; 800 801 /* Copy all frame if it fits skb->head, otherwise 802 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. 803 */ 804 if (len <= skb_tailroom(skb)) 805 copy = len; 806 else 807 copy = ETH_HLEN; 808 skb_put_data(skb, p, copy); 809 810 len -= copy; 811 offset += copy; 812 813 if (vi->mergeable_rx_bufs) { 814 if (len) 815 skb_add_rx_frag(skb, 0, page, offset, len, truesize); 816 else 817 page_to_free = page; 818 goto ok; 819 } 820 821 /* 822 * Verify that we can indeed put this data into a skb. 823 * This is here to handle cases when the device erroneously 824 * tries to receive more than is possible. This is usually 825 * the case of a broken device. 826 */ 827 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { 828 net_dbg_ratelimited("%s: too much data\n", skb->dev->name); 829 dev_kfree_skb(skb); 830 return NULL; 831 } 832 BUG_ON(offset >= PAGE_SIZE); 833 while (len) { 834 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); 835 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, 836 frag_size, truesize); 837 len -= frag_size; 838 page = (struct page *)page->private; 839 offset = 0; 840 } 841 842 if (page) 843 give_pages(rq, page); 844 845 ok: 846 hdr = skb_vnet_common_hdr(skb); 847 memcpy(hdr, hdr_p, hdr_len); 848 if (page_to_free) 849 put_page(page_to_free); 850 851 return skb; 852 } 853 854 static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) 855 { 856 struct page *page = virt_to_head_page(buf); 857 struct virtnet_rq_dma *dma; 858 void *head; 859 int offset; 860 861 head = page_address(page); 862 863 dma = head; 864 865 --dma->ref; 866 867 if (dma->need_sync && len) { 868 offset = buf - (head + sizeof(*dma)); 869 870 virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, 871 offset, len, 872 DMA_FROM_DEVICE); 873 } 874 875 if (dma->ref) 876 return; 877 878 virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, 879 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 880 put_page(page); 881 } 882 883 static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) 884 { 885 void *buf; 886 887 buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); 888 if (buf) 889 virtnet_rq_unmap(rq, buf, *len); 890 891 return buf; 892 } 893 894 static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) 895 { 896 struct virtnet_rq_dma *dma; 897 dma_addr_t addr; 898 u32 offset; 899 void *head; 900 901 head = page_address(rq->alloc_frag.page); 902 903 offset = buf - head; 904 905 dma = head; 906 907 addr = dma->addr - sizeof(*dma) + offset; 908 909 sg_init_table(rq->sg, 1); 910 rq->sg[0].dma_address = addr; 911 rq->sg[0].length = len; 912 } 913 914 static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) 915 { 916 struct page_frag *alloc_frag = &rq->alloc_frag; 917 struct virtnet_rq_dma *dma; 918 void *buf, *head; 919 dma_addr_t addr; 920 921 if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp))) 922 return NULL; 923 924 head = page_address(alloc_frag->page); 925 926 dma = head; 927 928 /* new pages */ 929 if (!alloc_frag->offset) { 930 if (rq->last_dma) { 931 /* Now, the new page is allocated, the last dma 932 * will not be used. So the dma can be unmapped 933 * if the ref is 0. 934 */ 935 virtnet_rq_unmap(rq, rq->last_dma, 0); 936 rq->last_dma = NULL; 937 } 938 939 dma->len = alloc_frag->size - sizeof(*dma); 940 941 addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, 942 dma->len, DMA_FROM_DEVICE, 0); 943 if (virtqueue_dma_mapping_error(rq->vq, addr)) 944 return NULL; 945 946 dma->addr = addr; 947 dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); 948 949 /* Add a reference to dma to prevent the entire dma from 950 * being released during error handling. This reference 951 * will be freed after the pages are no longer used. 952 */ 953 get_page(alloc_frag->page); 954 dma->ref = 1; 955 alloc_frag->offset = sizeof(*dma); 956 957 rq->last_dma = dma; 958 } 959 960 ++dma->ref; 961 962 buf = head + alloc_frag->offset; 963 964 get_page(alloc_frag->page); 965 alloc_frag->offset += size; 966 967 return buf; 968 } 969 970 static void virtnet_rq_set_premapped(struct virtnet_info *vi) 971 { 972 int i; 973 974 /* disable for big mode */ 975 if (!vi->mergeable_rx_bufs && vi->big_packets) 976 return; 977 978 for (i = 0; i < vi->max_queue_pairs; i++) 979 /* error should never happen */ 980 BUG_ON(virtqueue_set_dma_premapped(vi->rq[i].vq)); 981 } 982 983 static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) 984 { 985 struct virtnet_info *vi = vq->vdev->priv; 986 struct receive_queue *rq; 987 int i = vq2rxq(vq); 988 989 rq = &vi->rq[i]; 990 991 if (rq->xsk_pool) { 992 xsk_buff_free((struct xdp_buff *)buf); 993 return; 994 } 995 996 if (!vi->big_packets || vi->mergeable_rx_bufs) 997 virtnet_rq_unmap(rq, buf, 0); 998 999 virtnet_rq_free_buf(vi, rq, buf); 1000 } 1001 1002 static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 1003 bool in_napi) 1004 { 1005 struct virtnet_sq_free_stats stats = {0}; 1006 1007 __free_old_xmit(sq, txq, in_napi, &stats); 1008 1009 /* Avoid overhead when no packets have been processed 1010 * happens when called speculatively from start_xmit. 1011 */ 1012 if (!stats.packets && !stats.napi_packets) 1013 return; 1014 1015 u64_stats_update_begin(&sq->stats.syncp); 1016 u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes); 1017 u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets); 1018 u64_stats_update_end(&sq->stats.syncp); 1019 } 1020 1021 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) 1022 { 1023 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) 1024 return false; 1025 else if (q < vi->curr_queue_pairs) 1026 return true; 1027 else 1028 return false; 1029 } 1030 1031 static void check_sq_full_and_disable(struct virtnet_info *vi, 1032 struct net_device *dev, 1033 struct send_queue *sq) 1034 { 1035 bool use_napi = sq->napi.weight; 1036 int qnum; 1037 1038 qnum = sq - vi->sq; 1039 1040 /* If running out of space, stop queue to avoid getting packets that we 1041 * are then unable to transmit. 1042 * An alternative would be to force queuing layer to requeue the skb by 1043 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be 1044 * returned in a normal path of operation: it means that driver is not 1045 * maintaining the TX queue stop/start state properly, and causes 1046 * the stack to do a non-trivial amount of useless work. 1047 * Since most packets only take 1 or 2 ring slots, stopping the queue 1048 * early means 16 slots are typically wasted. 1049 */ 1050 if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { 1051 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1052 1053 netif_tx_stop_queue(txq); 1054 u64_stats_update_begin(&sq->stats.syncp); 1055 u64_stats_inc(&sq->stats.stop); 1056 u64_stats_update_end(&sq->stats.syncp); 1057 if (use_napi) { 1058 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 1059 virtqueue_napi_schedule(&sq->napi, sq->vq); 1060 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 1061 /* More just got used, free them then recheck. */ 1062 free_old_xmit(sq, txq, false); 1063 if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { 1064 netif_start_subqueue(dev, qnum); 1065 u64_stats_update_begin(&sq->stats.syncp); 1066 u64_stats_inc(&sq->stats.wake); 1067 u64_stats_update_end(&sq->stats.syncp); 1068 virtqueue_disable_cb(sq->vq); 1069 } 1070 } 1071 } 1072 } 1073 1074 static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len) 1075 { 1076 sg->dma_address = addr; 1077 sg->length = len; 1078 } 1079 1080 static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, 1081 struct receive_queue *rq, void *buf, u32 len) 1082 { 1083 struct xdp_buff *xdp; 1084 u32 bufsize; 1085 1086 xdp = (struct xdp_buff *)buf; 1087 1088 bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len; 1089 1090 if (unlikely(len > bufsize)) { 1091 pr_debug("%s: rx error: len %u exceeds truesize %u\n", 1092 vi->dev->name, len, bufsize); 1093 DEV_STATS_INC(vi->dev, rx_length_errors); 1094 xsk_buff_free(xdp); 1095 return NULL; 1096 } 1097 1098 xsk_buff_set_size(xdp, len); 1099 xsk_buff_dma_sync_for_cpu(xdp); 1100 1101 return xdp; 1102 } 1103 1104 static struct sk_buff *xsk_construct_skb(struct receive_queue *rq, 1105 struct xdp_buff *xdp) 1106 { 1107 unsigned int metasize = xdp->data - xdp->data_meta; 1108 struct sk_buff *skb; 1109 unsigned int size; 1110 1111 size = xdp->data_end - xdp->data_hard_start; 1112 skb = napi_alloc_skb(&rq->napi, size); 1113 if (unlikely(!skb)) { 1114 xsk_buff_free(xdp); 1115 return NULL; 1116 } 1117 1118 skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); 1119 1120 size = xdp->data_end - xdp->data_meta; 1121 memcpy(__skb_put(skb, size), xdp->data_meta, size); 1122 1123 if (metasize) { 1124 __skb_pull(skb, metasize); 1125 skb_metadata_set(skb, metasize); 1126 } 1127 1128 xsk_buff_free(xdp); 1129 1130 return skb; 1131 } 1132 1133 static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi, 1134 struct receive_queue *rq, struct xdp_buff *xdp, 1135 unsigned int *xdp_xmit, 1136 struct virtnet_rq_stats *stats) 1137 { 1138 struct bpf_prog *prog; 1139 u32 ret; 1140 1141 ret = XDP_PASS; 1142 rcu_read_lock(); 1143 prog = rcu_dereference(rq->xdp_prog); 1144 if (prog) 1145 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1146 rcu_read_unlock(); 1147 1148 switch (ret) { 1149 case XDP_PASS: 1150 return xsk_construct_skb(rq, xdp); 1151 1152 case XDP_TX: 1153 case XDP_REDIRECT: 1154 return NULL; 1155 1156 default: 1157 /* drop packet */ 1158 xsk_buff_free(xdp); 1159 u64_stats_inc(&stats->drops); 1160 return NULL; 1161 } 1162 } 1163 1164 static void xsk_drop_follow_bufs(struct net_device *dev, 1165 struct receive_queue *rq, 1166 u32 num_buf, 1167 struct virtnet_rq_stats *stats) 1168 { 1169 struct xdp_buff *xdp; 1170 u32 len; 1171 1172 while (num_buf-- > 1) { 1173 xdp = virtqueue_get_buf(rq->vq, &len); 1174 if (unlikely(!xdp)) { 1175 pr_debug("%s: rx error: %d buffers missing\n", 1176 dev->name, num_buf); 1177 DEV_STATS_INC(dev, rx_length_errors); 1178 break; 1179 } 1180 u64_stats_add(&stats->bytes, len); 1181 xsk_buff_free(xdp); 1182 } 1183 } 1184 1185 static int xsk_append_merge_buffer(struct virtnet_info *vi, 1186 struct receive_queue *rq, 1187 struct sk_buff *head_skb, 1188 u32 num_buf, 1189 struct virtio_net_hdr_mrg_rxbuf *hdr, 1190 struct virtnet_rq_stats *stats) 1191 { 1192 struct sk_buff *curr_skb; 1193 struct xdp_buff *xdp; 1194 u32 len, truesize; 1195 struct page *page; 1196 void *buf; 1197 1198 curr_skb = head_skb; 1199 1200 while (--num_buf) { 1201 buf = virtqueue_get_buf(rq->vq, &len); 1202 if (unlikely(!buf)) { 1203 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1204 vi->dev->name, num_buf, 1205 virtio16_to_cpu(vi->vdev, 1206 hdr->num_buffers)); 1207 DEV_STATS_INC(vi->dev, rx_length_errors); 1208 return -EINVAL; 1209 } 1210 1211 u64_stats_add(&stats->bytes, len); 1212 1213 xdp = buf_to_xdp(vi, rq, buf, len); 1214 if (!xdp) 1215 goto err; 1216 1217 buf = napi_alloc_frag(len); 1218 if (!buf) { 1219 xsk_buff_free(xdp); 1220 goto err; 1221 } 1222 1223 memcpy(buf, xdp->data - vi->hdr_len, len); 1224 1225 xsk_buff_free(xdp); 1226 1227 page = virt_to_page(buf); 1228 1229 truesize = len; 1230 1231 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 1232 buf, len, truesize); 1233 if (!curr_skb) { 1234 put_page(page); 1235 goto err; 1236 } 1237 } 1238 1239 return 0; 1240 1241 err: 1242 xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats); 1243 return -EINVAL; 1244 } 1245 1246 static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi, 1247 struct receive_queue *rq, struct xdp_buff *xdp, 1248 unsigned int *xdp_xmit, 1249 struct virtnet_rq_stats *stats) 1250 { 1251 struct virtio_net_hdr_mrg_rxbuf *hdr; 1252 struct bpf_prog *prog; 1253 struct sk_buff *skb; 1254 u32 ret, num_buf; 1255 1256 hdr = xdp->data - vi->hdr_len; 1257 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1258 1259 ret = XDP_PASS; 1260 rcu_read_lock(); 1261 prog = rcu_dereference(rq->xdp_prog); 1262 /* TODO: support multi buffer. */ 1263 if (prog && num_buf == 1) 1264 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1265 rcu_read_unlock(); 1266 1267 switch (ret) { 1268 case XDP_PASS: 1269 skb = xsk_construct_skb(rq, xdp); 1270 if (!skb) 1271 goto drop_bufs; 1272 1273 if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) { 1274 dev_kfree_skb(skb); 1275 goto drop; 1276 } 1277 1278 return skb; 1279 1280 case XDP_TX: 1281 case XDP_REDIRECT: 1282 return NULL; 1283 1284 default: 1285 /* drop packet */ 1286 xsk_buff_free(xdp); 1287 } 1288 1289 drop_bufs: 1290 xsk_drop_follow_bufs(dev, rq, num_buf, stats); 1291 1292 drop: 1293 u64_stats_inc(&stats->drops); 1294 return NULL; 1295 } 1296 1297 static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq, 1298 void *buf, u32 len, 1299 unsigned int *xdp_xmit, 1300 struct virtnet_rq_stats *stats) 1301 { 1302 struct net_device *dev = vi->dev; 1303 struct sk_buff *skb = NULL; 1304 struct xdp_buff *xdp; 1305 u8 flags; 1306 1307 len -= vi->hdr_len; 1308 1309 u64_stats_add(&stats->bytes, len); 1310 1311 xdp = buf_to_xdp(vi, rq, buf, len); 1312 if (!xdp) 1313 return; 1314 1315 if (unlikely(len < ETH_HLEN)) { 1316 pr_debug("%s: short packet %i\n", dev->name, len); 1317 DEV_STATS_INC(dev, rx_length_errors); 1318 xsk_buff_free(xdp); 1319 return; 1320 } 1321 1322 flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags; 1323 1324 if (!vi->mergeable_rx_bufs) 1325 skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats); 1326 else 1327 skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats); 1328 1329 if (skb) 1330 virtnet_receive_done(vi, rq, skb, flags); 1331 } 1332 1333 static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq, 1334 struct xsk_buff_pool *pool, gfp_t gfp) 1335 { 1336 struct xdp_buff **xsk_buffs; 1337 dma_addr_t addr; 1338 int err = 0; 1339 u32 len, i; 1340 int num; 1341 1342 xsk_buffs = rq->xsk_buffs; 1343 1344 num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free); 1345 if (!num) 1346 return -ENOMEM; 1347 1348 len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len; 1349 1350 for (i = 0; i < num; ++i) { 1351 /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space. 1352 * We assume XDP_PACKET_HEADROOM is larger than hdr->len. 1353 * (see function virtnet_xsk_pool_enable) 1354 */ 1355 addr = xsk_buff_xdp_get_dma(xsk_buffs[i]) - vi->hdr_len; 1356 1357 sg_init_table(rq->sg, 1); 1358 sg_fill_dma(rq->sg, addr, len); 1359 1360 err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, xsk_buffs[i], gfp); 1361 if (err) 1362 goto err; 1363 } 1364 1365 return num; 1366 1367 err: 1368 for (; i < num; ++i) 1369 xsk_buff_free(xsk_buffs[i]); 1370 1371 return err; 1372 } 1373 1374 static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) 1375 { 1376 struct virtnet_info *vi = netdev_priv(dev); 1377 struct send_queue *sq; 1378 1379 if (!netif_running(dev)) 1380 return -ENETDOWN; 1381 1382 if (qid >= vi->curr_queue_pairs) 1383 return -EINVAL; 1384 1385 sq = &vi->sq[qid]; 1386 1387 if (napi_if_scheduled_mark_missed(&sq->napi)) 1388 return 0; 1389 1390 local_bh_disable(); 1391 virtqueue_napi_schedule(&sq->napi, sq->vq); 1392 local_bh_enable(); 1393 1394 return 0; 1395 } 1396 1397 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, 1398 struct send_queue *sq, 1399 struct xdp_frame *xdpf) 1400 { 1401 struct virtio_net_hdr_mrg_rxbuf *hdr; 1402 struct skb_shared_info *shinfo; 1403 u8 nr_frags = 0; 1404 int err, i; 1405 1406 if (unlikely(xdpf->headroom < vi->hdr_len)) 1407 return -EOVERFLOW; 1408 1409 if (unlikely(xdp_frame_has_frags(xdpf))) { 1410 shinfo = xdp_get_shared_info_from_frame(xdpf); 1411 nr_frags = shinfo->nr_frags; 1412 } 1413 1414 /* In wrapping function virtnet_xdp_xmit(), we need to free 1415 * up the pending old buffers, where we need to calculate the 1416 * position of skb_shared_info in xdp_get_frame_len() and 1417 * xdp_return_frame(), which will involve to xdpf->data and 1418 * xdpf->headroom. Therefore, we need to update the value of 1419 * headroom synchronously here. 1420 */ 1421 xdpf->headroom -= vi->hdr_len; 1422 xdpf->data -= vi->hdr_len; 1423 /* Zero header and leave csum up to XDP layers */ 1424 hdr = xdpf->data; 1425 memset(hdr, 0, vi->hdr_len); 1426 xdpf->len += vi->hdr_len; 1427 1428 sg_init_table(sq->sg, nr_frags + 1); 1429 sg_set_buf(sq->sg, xdpf->data, xdpf->len); 1430 for (i = 0; i < nr_frags; i++) { 1431 skb_frag_t *frag = &shinfo->frags[i]; 1432 1433 sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), 1434 skb_frag_size(frag), skb_frag_off(frag)); 1435 } 1436 1437 err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1, 1438 xdp_to_ptr(xdpf), GFP_ATOMIC); 1439 if (unlikely(err)) 1440 return -ENOSPC; /* Caller handle free/refcnt */ 1441 1442 return 0; 1443 } 1444 1445 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on 1446 * the current cpu, so it does not need to be locked. 1447 * 1448 * Here we use marco instead of inline functions because we have to deal with 1449 * three issues at the same time: 1. the choice of sq. 2. judge and execute the 1450 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline 1451 * functions to perfectly solve these three problems at the same time. 1452 */ 1453 #define virtnet_xdp_get_sq(vi) ({ \ 1454 int cpu = smp_processor_id(); \ 1455 struct netdev_queue *txq; \ 1456 typeof(vi) v = (vi); \ 1457 unsigned int qp; \ 1458 \ 1459 if (v->curr_queue_pairs > nr_cpu_ids) { \ 1460 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ 1461 qp += cpu; \ 1462 txq = netdev_get_tx_queue(v->dev, qp); \ 1463 __netif_tx_acquire(txq); \ 1464 } else { \ 1465 qp = cpu % v->curr_queue_pairs; \ 1466 txq = netdev_get_tx_queue(v->dev, qp); \ 1467 __netif_tx_lock(txq, cpu); \ 1468 } \ 1469 v->sq + qp; \ 1470 }) 1471 1472 #define virtnet_xdp_put_sq(vi, q) { \ 1473 struct netdev_queue *txq; \ 1474 typeof(vi) v = (vi); \ 1475 \ 1476 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ 1477 if (v->curr_queue_pairs > nr_cpu_ids) \ 1478 __netif_tx_release(txq); \ 1479 else \ 1480 __netif_tx_unlock(txq); \ 1481 } 1482 1483 static int virtnet_xdp_xmit(struct net_device *dev, 1484 int n, struct xdp_frame **frames, u32 flags) 1485 { 1486 struct virtnet_info *vi = netdev_priv(dev); 1487 struct virtnet_sq_free_stats stats = {0}; 1488 struct receive_queue *rq = vi->rq; 1489 struct bpf_prog *xdp_prog; 1490 struct send_queue *sq; 1491 int nxmit = 0; 1492 int kicks = 0; 1493 int ret; 1494 int i; 1495 1496 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this 1497 * indicate XDP resources have been successfully allocated. 1498 */ 1499 xdp_prog = rcu_access_pointer(rq->xdp_prog); 1500 if (!xdp_prog) 1501 return -ENXIO; 1502 1503 sq = virtnet_xdp_get_sq(vi); 1504 1505 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 1506 ret = -EINVAL; 1507 goto out; 1508 } 1509 1510 /* Free up any pending old buffers before queueing new ones. */ 1511 __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), 1512 false, &stats); 1513 1514 for (i = 0; i < n; i++) { 1515 struct xdp_frame *xdpf = frames[i]; 1516 1517 if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) 1518 break; 1519 nxmit++; 1520 } 1521 ret = nxmit; 1522 1523 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1524 check_sq_full_and_disable(vi, dev, sq); 1525 1526 if (flags & XDP_XMIT_FLUSH) { 1527 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1528 kicks = 1; 1529 } 1530 out: 1531 u64_stats_update_begin(&sq->stats.syncp); 1532 u64_stats_add(&sq->stats.bytes, stats.bytes); 1533 u64_stats_add(&sq->stats.packets, stats.packets); 1534 u64_stats_add(&sq->stats.xdp_tx, n); 1535 u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit); 1536 u64_stats_add(&sq->stats.kicks, kicks); 1537 u64_stats_update_end(&sq->stats.syncp); 1538 1539 virtnet_xdp_put_sq(vi, sq); 1540 return ret; 1541 } 1542 1543 static void put_xdp_frags(struct xdp_buff *xdp) 1544 { 1545 struct skb_shared_info *shinfo; 1546 struct page *xdp_page; 1547 int i; 1548 1549 if (xdp_buff_has_frags(xdp)) { 1550 shinfo = xdp_get_shared_info_from_buff(xdp); 1551 for (i = 0; i < shinfo->nr_frags; i++) { 1552 xdp_page = skb_frag_page(&shinfo->frags[i]); 1553 put_page(xdp_page); 1554 } 1555 } 1556 } 1557 1558 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 1559 struct net_device *dev, 1560 unsigned int *xdp_xmit, 1561 struct virtnet_rq_stats *stats) 1562 { 1563 struct xdp_frame *xdpf; 1564 int err; 1565 u32 act; 1566 1567 act = bpf_prog_run_xdp(xdp_prog, xdp); 1568 u64_stats_inc(&stats->xdp_packets); 1569 1570 switch (act) { 1571 case XDP_PASS: 1572 return act; 1573 1574 case XDP_TX: 1575 u64_stats_inc(&stats->xdp_tx); 1576 xdpf = xdp_convert_buff_to_frame(xdp); 1577 if (unlikely(!xdpf)) { 1578 netdev_dbg(dev, "convert buff to frame failed for xdp\n"); 1579 return XDP_DROP; 1580 } 1581 1582 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 1583 if (unlikely(!err)) { 1584 xdp_return_frame_rx_napi(xdpf); 1585 } else if (unlikely(err < 0)) { 1586 trace_xdp_exception(dev, xdp_prog, act); 1587 return XDP_DROP; 1588 } 1589 *xdp_xmit |= VIRTIO_XDP_TX; 1590 return act; 1591 1592 case XDP_REDIRECT: 1593 u64_stats_inc(&stats->xdp_redirects); 1594 err = xdp_do_redirect(dev, xdp, xdp_prog); 1595 if (err) 1596 return XDP_DROP; 1597 1598 *xdp_xmit |= VIRTIO_XDP_REDIR; 1599 return act; 1600 1601 default: 1602 bpf_warn_invalid_xdp_action(dev, xdp_prog, act); 1603 fallthrough; 1604 case XDP_ABORTED: 1605 trace_xdp_exception(dev, xdp_prog, act); 1606 fallthrough; 1607 case XDP_DROP: 1608 return XDP_DROP; 1609 } 1610 } 1611 1612 static unsigned int virtnet_get_headroom(struct virtnet_info *vi) 1613 { 1614 return vi->xdp_enabled ? XDP_PACKET_HEADROOM : 0; 1615 } 1616 1617 /* We copy the packet for XDP in the following cases: 1618 * 1619 * 1) Packet is scattered across multiple rx buffers. 1620 * 2) Headroom space is insufficient. 1621 * 1622 * This is inefficient but it's a temporary condition that 1623 * we hit right after XDP is enabled and until queue is refilled 1624 * with large buffers with sufficient headroom - so it should affect 1625 * at most queue size packets. 1626 * Afterwards, the conditions to enable 1627 * XDP should preclude the underlying device from sending packets 1628 * across multiple buffers (num_buf > 1), and we make sure buffers 1629 * have enough headroom. 1630 */ 1631 static struct page *xdp_linearize_page(struct receive_queue *rq, 1632 int *num_buf, 1633 struct page *p, 1634 int offset, 1635 int page_off, 1636 unsigned int *len) 1637 { 1638 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1639 struct page *page; 1640 1641 if (page_off + *len + tailroom > PAGE_SIZE) 1642 return NULL; 1643 1644 page = alloc_page(GFP_ATOMIC); 1645 if (!page) 1646 return NULL; 1647 1648 memcpy(page_address(page) + page_off, page_address(p) + offset, *len); 1649 page_off += *len; 1650 1651 while (--*num_buf) { 1652 unsigned int buflen; 1653 void *buf; 1654 int off; 1655 1656 buf = virtnet_rq_get_buf(rq, &buflen, NULL); 1657 if (unlikely(!buf)) 1658 goto err_buf; 1659 1660 p = virt_to_head_page(buf); 1661 off = buf - page_address(p); 1662 1663 /* guard against a misconfigured or uncooperative backend that 1664 * is sending packet larger than the MTU. 1665 */ 1666 if ((page_off + buflen + tailroom) > PAGE_SIZE) { 1667 put_page(p); 1668 goto err_buf; 1669 } 1670 1671 memcpy(page_address(page) + page_off, 1672 page_address(p) + off, buflen); 1673 page_off += buflen; 1674 put_page(p); 1675 } 1676 1677 /* Headroom does not contribute to packet length */ 1678 *len = page_off - XDP_PACKET_HEADROOM; 1679 return page; 1680 err_buf: 1681 __free_pages(page, 0); 1682 return NULL; 1683 } 1684 1685 static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, 1686 unsigned int xdp_headroom, 1687 void *buf, 1688 unsigned int len) 1689 { 1690 unsigned int header_offset; 1691 unsigned int headroom; 1692 unsigned int buflen; 1693 struct sk_buff *skb; 1694 1695 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1696 headroom = vi->hdr_len + header_offset; 1697 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1698 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1699 1700 skb = virtnet_build_skb(buf, buflen, headroom, len); 1701 if (unlikely(!skb)) 1702 return NULL; 1703 1704 buf += header_offset; 1705 memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len); 1706 1707 return skb; 1708 } 1709 1710 static struct sk_buff *receive_small_xdp(struct net_device *dev, 1711 struct virtnet_info *vi, 1712 struct receive_queue *rq, 1713 struct bpf_prog *xdp_prog, 1714 void *buf, 1715 unsigned int xdp_headroom, 1716 unsigned int len, 1717 unsigned int *xdp_xmit, 1718 struct virtnet_rq_stats *stats) 1719 { 1720 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; 1721 unsigned int headroom = vi->hdr_len + header_offset; 1722 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; 1723 struct page *page = virt_to_head_page(buf); 1724 struct page *xdp_page; 1725 unsigned int buflen; 1726 struct xdp_buff xdp; 1727 struct sk_buff *skb; 1728 unsigned int metasize = 0; 1729 u32 act; 1730 1731 if (unlikely(hdr->hdr.gso_type)) 1732 goto err_xdp; 1733 1734 /* Partially checksummed packets must be dropped. */ 1735 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 1736 goto err_xdp; 1737 1738 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1739 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1740 1741 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 1742 int offset = buf - page_address(page) + header_offset; 1743 unsigned int tlen = len + vi->hdr_len; 1744 int num_buf = 1; 1745 1746 xdp_headroom = virtnet_get_headroom(vi); 1747 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1748 headroom = vi->hdr_len + header_offset; 1749 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1750 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1751 xdp_page = xdp_linearize_page(rq, &num_buf, page, 1752 offset, header_offset, 1753 &tlen); 1754 if (!xdp_page) 1755 goto err_xdp; 1756 1757 buf = page_address(xdp_page); 1758 put_page(page); 1759 page = xdp_page; 1760 } 1761 1762 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); 1763 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, 1764 xdp_headroom, len, true); 1765 1766 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 1767 1768 switch (act) { 1769 case XDP_PASS: 1770 /* Recalculate length in case bpf program changed it */ 1771 len = xdp.data_end - xdp.data; 1772 metasize = xdp.data - xdp.data_meta; 1773 break; 1774 1775 case XDP_TX: 1776 case XDP_REDIRECT: 1777 goto xdp_xmit; 1778 1779 default: 1780 goto err_xdp; 1781 } 1782 1783 skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len); 1784 if (unlikely(!skb)) 1785 goto err; 1786 1787 if (metasize) 1788 skb_metadata_set(skb, metasize); 1789 1790 return skb; 1791 1792 err_xdp: 1793 u64_stats_inc(&stats->xdp_drops); 1794 err: 1795 u64_stats_inc(&stats->drops); 1796 put_page(page); 1797 xdp_xmit: 1798 return NULL; 1799 } 1800 1801 static struct sk_buff *receive_small(struct net_device *dev, 1802 struct virtnet_info *vi, 1803 struct receive_queue *rq, 1804 void *buf, void *ctx, 1805 unsigned int len, 1806 unsigned int *xdp_xmit, 1807 struct virtnet_rq_stats *stats) 1808 { 1809 unsigned int xdp_headroom = (unsigned long)ctx; 1810 struct page *page = virt_to_head_page(buf); 1811 struct sk_buff *skb; 1812 1813 len -= vi->hdr_len; 1814 u64_stats_add(&stats->bytes, len); 1815 1816 if (unlikely(len > GOOD_PACKET_LEN)) { 1817 pr_debug("%s: rx error: len %u exceeds max size %d\n", 1818 dev->name, len, GOOD_PACKET_LEN); 1819 DEV_STATS_INC(dev, rx_length_errors); 1820 goto err; 1821 } 1822 1823 if (unlikely(vi->xdp_enabled)) { 1824 struct bpf_prog *xdp_prog; 1825 1826 rcu_read_lock(); 1827 xdp_prog = rcu_dereference(rq->xdp_prog); 1828 if (xdp_prog) { 1829 skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, 1830 xdp_headroom, len, xdp_xmit, 1831 stats); 1832 rcu_read_unlock(); 1833 return skb; 1834 } 1835 rcu_read_unlock(); 1836 } 1837 1838 skb = receive_small_build_skb(vi, xdp_headroom, buf, len); 1839 if (likely(skb)) 1840 return skb; 1841 1842 err: 1843 u64_stats_inc(&stats->drops); 1844 put_page(page); 1845 return NULL; 1846 } 1847 1848 static struct sk_buff *receive_big(struct net_device *dev, 1849 struct virtnet_info *vi, 1850 struct receive_queue *rq, 1851 void *buf, 1852 unsigned int len, 1853 struct virtnet_rq_stats *stats) 1854 { 1855 struct page *page = buf; 1856 struct sk_buff *skb = 1857 page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); 1858 1859 u64_stats_add(&stats->bytes, len - vi->hdr_len); 1860 if (unlikely(!skb)) 1861 goto err; 1862 1863 return skb; 1864 1865 err: 1866 u64_stats_inc(&stats->drops); 1867 give_pages(rq, page); 1868 return NULL; 1869 } 1870 1871 static void mergeable_buf_free(struct receive_queue *rq, int num_buf, 1872 struct net_device *dev, 1873 struct virtnet_rq_stats *stats) 1874 { 1875 struct page *page; 1876 void *buf; 1877 int len; 1878 1879 while (num_buf-- > 1) { 1880 buf = virtnet_rq_get_buf(rq, &len, NULL); 1881 if (unlikely(!buf)) { 1882 pr_debug("%s: rx error: %d buffers missing\n", 1883 dev->name, num_buf); 1884 DEV_STATS_INC(dev, rx_length_errors); 1885 break; 1886 } 1887 u64_stats_add(&stats->bytes, len); 1888 page = virt_to_head_page(buf); 1889 put_page(page); 1890 } 1891 } 1892 1893 /* Why not use xdp_build_skb_from_frame() ? 1894 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in 1895 * virtio-net there are 2 points that do not match its requirements: 1896 * 1. The size of the prefilled buffer is not fixed before xdp is set. 1897 * 2. xdp_build_skb_from_frame() does more checks that we don't need, 1898 * like eth_type_trans() (which virtio-net does in receive_buf()). 1899 */ 1900 static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, 1901 struct virtnet_info *vi, 1902 struct xdp_buff *xdp, 1903 unsigned int xdp_frags_truesz) 1904 { 1905 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 1906 unsigned int headroom, data_len; 1907 struct sk_buff *skb; 1908 int metasize; 1909 u8 nr_frags; 1910 1911 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 1912 pr_debug("Error building skb as missing reserved tailroom for xdp"); 1913 return NULL; 1914 } 1915 1916 if (unlikely(xdp_buff_has_frags(xdp))) 1917 nr_frags = sinfo->nr_frags; 1918 1919 skb = build_skb(xdp->data_hard_start, xdp->frame_sz); 1920 if (unlikely(!skb)) 1921 return NULL; 1922 1923 headroom = xdp->data - xdp->data_hard_start; 1924 data_len = xdp->data_end - xdp->data; 1925 skb_reserve(skb, headroom); 1926 __skb_put(skb, data_len); 1927 1928 metasize = xdp->data - xdp->data_meta; 1929 metasize = metasize > 0 ? metasize : 0; 1930 if (metasize) 1931 skb_metadata_set(skb, metasize); 1932 1933 if (unlikely(xdp_buff_has_frags(xdp))) 1934 xdp_update_skb_shared_info(skb, nr_frags, 1935 sinfo->xdp_frags_size, 1936 xdp_frags_truesz, 1937 xdp_buff_is_frag_pfmemalloc(xdp)); 1938 1939 return skb; 1940 } 1941 1942 /* TODO: build xdp in big mode */ 1943 static int virtnet_build_xdp_buff_mrg(struct net_device *dev, 1944 struct virtnet_info *vi, 1945 struct receive_queue *rq, 1946 struct xdp_buff *xdp, 1947 void *buf, 1948 unsigned int len, 1949 unsigned int frame_sz, 1950 int *num_buf, 1951 unsigned int *xdp_frags_truesize, 1952 struct virtnet_rq_stats *stats) 1953 { 1954 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 1955 unsigned int headroom, tailroom, room; 1956 unsigned int truesize, cur_frag_size; 1957 struct skb_shared_info *shinfo; 1958 unsigned int xdp_frags_truesz = 0; 1959 struct page *page; 1960 skb_frag_t *frag; 1961 int offset; 1962 void *ctx; 1963 1964 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 1965 xdp_prepare_buff(xdp, buf - XDP_PACKET_HEADROOM, 1966 XDP_PACKET_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); 1967 1968 if (!*num_buf) 1969 return 0; 1970 1971 if (*num_buf > 1) { 1972 /* If we want to build multi-buffer xdp, we need 1973 * to specify that the flags of xdp_buff have the 1974 * XDP_FLAGS_HAS_FRAG bit. 1975 */ 1976 if (!xdp_buff_has_frags(xdp)) 1977 xdp_buff_set_frags_flag(xdp); 1978 1979 shinfo = xdp_get_shared_info_from_buff(xdp); 1980 shinfo->nr_frags = 0; 1981 shinfo->xdp_frags_size = 0; 1982 } 1983 1984 if (*num_buf > MAX_SKB_FRAGS + 1) 1985 return -EINVAL; 1986 1987 while (--*num_buf > 0) { 1988 buf = virtnet_rq_get_buf(rq, &len, &ctx); 1989 if (unlikely(!buf)) { 1990 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1991 dev->name, *num_buf, 1992 virtio16_to_cpu(vi->vdev, hdr->num_buffers)); 1993 DEV_STATS_INC(dev, rx_length_errors); 1994 goto err; 1995 } 1996 1997 u64_stats_add(&stats->bytes, len); 1998 page = virt_to_head_page(buf); 1999 offset = buf - page_address(page); 2000 2001 truesize = mergeable_ctx_to_truesize(ctx); 2002 headroom = mergeable_ctx_to_headroom(ctx); 2003 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2004 room = SKB_DATA_ALIGN(headroom + tailroom); 2005 2006 cur_frag_size = truesize; 2007 xdp_frags_truesz += cur_frag_size; 2008 if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { 2009 put_page(page); 2010 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2011 dev->name, len, (unsigned long)(truesize - room)); 2012 DEV_STATS_INC(dev, rx_length_errors); 2013 goto err; 2014 } 2015 2016 frag = &shinfo->frags[shinfo->nr_frags++]; 2017 skb_frag_fill_page_desc(frag, page, offset, len); 2018 if (page_is_pfmemalloc(page)) 2019 xdp_buff_set_frag_pfmemalloc(xdp); 2020 2021 shinfo->xdp_frags_size += len; 2022 } 2023 2024 *xdp_frags_truesize = xdp_frags_truesz; 2025 return 0; 2026 2027 err: 2028 put_xdp_frags(xdp); 2029 return -EINVAL; 2030 } 2031 2032 static void *mergeable_xdp_get_buf(struct virtnet_info *vi, 2033 struct receive_queue *rq, 2034 struct bpf_prog *xdp_prog, 2035 void *ctx, 2036 unsigned int *frame_sz, 2037 int *num_buf, 2038 struct page **page, 2039 int offset, 2040 unsigned int *len, 2041 struct virtio_net_hdr_mrg_rxbuf *hdr) 2042 { 2043 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2044 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2045 struct page *xdp_page; 2046 unsigned int xdp_room; 2047 2048 /* Transient failure which in theory could occur if 2049 * in-flight packets from before XDP was enabled reach 2050 * the receive path after XDP is loaded. 2051 */ 2052 if (unlikely(hdr->hdr.gso_type)) 2053 return NULL; 2054 2055 /* Partially checksummed packets must be dropped. */ 2056 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 2057 return NULL; 2058 2059 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers 2060 * with headroom may add hole in truesize, which 2061 * make their length exceed PAGE_SIZE. So we disabled the 2062 * hole mechanism for xdp. See add_recvbuf_mergeable(). 2063 */ 2064 *frame_sz = truesize; 2065 2066 if (likely(headroom >= virtnet_get_headroom(vi) && 2067 (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) { 2068 return page_address(*page) + offset; 2069 } 2070 2071 /* This happens when headroom is not enough because 2072 * of the buffer was prefilled before XDP is set. 2073 * This should only happen for the first several packets. 2074 * In fact, vq reset can be used here to help us clean up 2075 * the prefilled buffers, but many existing devices do not 2076 * support it, and we don't want to bother users who are 2077 * using xdp normally. 2078 */ 2079 if (!xdp_prog->aux->xdp_has_frags) { 2080 /* linearize data for XDP */ 2081 xdp_page = xdp_linearize_page(rq, num_buf, 2082 *page, offset, 2083 XDP_PACKET_HEADROOM, 2084 len); 2085 if (!xdp_page) 2086 return NULL; 2087 } else { 2088 xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 2089 sizeof(struct skb_shared_info)); 2090 if (*len + xdp_room > PAGE_SIZE) 2091 return NULL; 2092 2093 xdp_page = alloc_page(GFP_ATOMIC); 2094 if (!xdp_page) 2095 return NULL; 2096 2097 memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM, 2098 page_address(*page) + offset, *len); 2099 } 2100 2101 *frame_sz = PAGE_SIZE; 2102 2103 put_page(*page); 2104 2105 *page = xdp_page; 2106 2107 return page_address(*page) + XDP_PACKET_HEADROOM; 2108 } 2109 2110 static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, 2111 struct virtnet_info *vi, 2112 struct receive_queue *rq, 2113 struct bpf_prog *xdp_prog, 2114 void *buf, 2115 void *ctx, 2116 unsigned int len, 2117 unsigned int *xdp_xmit, 2118 struct virtnet_rq_stats *stats) 2119 { 2120 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2121 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2122 struct page *page = virt_to_head_page(buf); 2123 int offset = buf - page_address(page); 2124 unsigned int xdp_frags_truesz = 0; 2125 struct sk_buff *head_skb; 2126 unsigned int frame_sz; 2127 struct xdp_buff xdp; 2128 void *data; 2129 u32 act; 2130 int err; 2131 2132 data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page, 2133 offset, &len, hdr); 2134 if (unlikely(!data)) 2135 goto err_xdp; 2136 2137 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, 2138 &num_buf, &xdp_frags_truesz, stats); 2139 if (unlikely(err)) 2140 goto err_xdp; 2141 2142 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 2143 2144 switch (act) { 2145 case XDP_PASS: 2146 head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); 2147 if (unlikely(!head_skb)) 2148 break; 2149 return head_skb; 2150 2151 case XDP_TX: 2152 case XDP_REDIRECT: 2153 return NULL; 2154 2155 default: 2156 break; 2157 } 2158 2159 put_xdp_frags(&xdp); 2160 2161 err_xdp: 2162 put_page(page); 2163 mergeable_buf_free(rq, num_buf, dev, stats); 2164 2165 u64_stats_inc(&stats->xdp_drops); 2166 u64_stats_inc(&stats->drops); 2167 return NULL; 2168 } 2169 2170 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 2171 struct sk_buff *curr_skb, 2172 struct page *page, void *buf, 2173 int len, int truesize) 2174 { 2175 int num_skb_frags; 2176 int offset; 2177 2178 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 2179 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 2180 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); 2181 2182 if (unlikely(!nskb)) 2183 return NULL; 2184 2185 if (curr_skb == head_skb) 2186 skb_shinfo(curr_skb)->frag_list = nskb; 2187 else 2188 curr_skb->next = nskb; 2189 curr_skb = nskb; 2190 head_skb->truesize += nskb->truesize; 2191 num_skb_frags = 0; 2192 } 2193 2194 if (curr_skb != head_skb) { 2195 head_skb->data_len += len; 2196 head_skb->len += len; 2197 head_skb->truesize += truesize; 2198 } 2199 2200 offset = buf - page_address(page); 2201 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 2202 put_page(page); 2203 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 2204 len, truesize); 2205 } else { 2206 skb_add_rx_frag(curr_skb, num_skb_frags, page, 2207 offset, len, truesize); 2208 } 2209 2210 return curr_skb; 2211 } 2212 2213 static struct sk_buff *receive_mergeable(struct net_device *dev, 2214 struct virtnet_info *vi, 2215 struct receive_queue *rq, 2216 void *buf, 2217 void *ctx, 2218 unsigned int len, 2219 unsigned int *xdp_xmit, 2220 struct virtnet_rq_stats *stats) 2221 { 2222 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2223 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2224 struct page *page = virt_to_head_page(buf); 2225 int offset = buf - page_address(page); 2226 struct sk_buff *head_skb, *curr_skb; 2227 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2228 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2229 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2230 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2231 2232 head_skb = NULL; 2233 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2234 2235 if (unlikely(len > truesize - room)) { 2236 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2237 dev->name, len, (unsigned long)(truesize - room)); 2238 DEV_STATS_INC(dev, rx_length_errors); 2239 goto err_skb; 2240 } 2241 2242 if (unlikely(vi->xdp_enabled)) { 2243 struct bpf_prog *xdp_prog; 2244 2245 rcu_read_lock(); 2246 xdp_prog = rcu_dereference(rq->xdp_prog); 2247 if (xdp_prog) { 2248 head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx, 2249 len, xdp_xmit, stats); 2250 rcu_read_unlock(); 2251 return head_skb; 2252 } 2253 rcu_read_unlock(); 2254 } 2255 2256 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); 2257 curr_skb = head_skb; 2258 2259 if (unlikely(!curr_skb)) 2260 goto err_skb; 2261 while (--num_buf) { 2262 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2263 if (unlikely(!buf)) { 2264 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2265 dev->name, num_buf, 2266 virtio16_to_cpu(vi->vdev, 2267 hdr->num_buffers)); 2268 DEV_STATS_INC(dev, rx_length_errors); 2269 goto err_buf; 2270 } 2271 2272 u64_stats_add(&stats->bytes, len); 2273 page = virt_to_head_page(buf); 2274 2275 truesize = mergeable_ctx_to_truesize(ctx); 2276 headroom = mergeable_ctx_to_headroom(ctx); 2277 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2278 room = SKB_DATA_ALIGN(headroom + tailroom); 2279 if (unlikely(len > truesize - room)) { 2280 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2281 dev->name, len, (unsigned long)(truesize - room)); 2282 DEV_STATS_INC(dev, rx_length_errors); 2283 goto err_skb; 2284 } 2285 2286 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 2287 buf, len, truesize); 2288 if (!curr_skb) 2289 goto err_skb; 2290 } 2291 2292 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); 2293 return head_skb; 2294 2295 err_skb: 2296 put_page(page); 2297 mergeable_buf_free(rq, num_buf, dev, stats); 2298 2299 err_buf: 2300 u64_stats_inc(&stats->drops); 2301 dev_kfree_skb(head_skb); 2302 return NULL; 2303 } 2304 2305 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, 2306 struct sk_buff *skb) 2307 { 2308 enum pkt_hash_types rss_hash_type; 2309 2310 if (!hdr_hash || !skb) 2311 return; 2312 2313 switch (__le16_to_cpu(hdr_hash->hash_report)) { 2314 case VIRTIO_NET_HASH_REPORT_TCPv4: 2315 case VIRTIO_NET_HASH_REPORT_UDPv4: 2316 case VIRTIO_NET_HASH_REPORT_TCPv6: 2317 case VIRTIO_NET_HASH_REPORT_UDPv6: 2318 case VIRTIO_NET_HASH_REPORT_TCPv6_EX: 2319 case VIRTIO_NET_HASH_REPORT_UDPv6_EX: 2320 rss_hash_type = PKT_HASH_TYPE_L4; 2321 break; 2322 case VIRTIO_NET_HASH_REPORT_IPv4: 2323 case VIRTIO_NET_HASH_REPORT_IPv6: 2324 case VIRTIO_NET_HASH_REPORT_IPv6_EX: 2325 rss_hash_type = PKT_HASH_TYPE_L3; 2326 break; 2327 case VIRTIO_NET_HASH_REPORT_NONE: 2328 default: 2329 rss_hash_type = PKT_HASH_TYPE_NONE; 2330 } 2331 skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); 2332 } 2333 2334 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 2335 struct sk_buff *skb, u8 flags) 2336 { 2337 struct virtio_net_common_hdr *hdr; 2338 struct net_device *dev = vi->dev; 2339 2340 hdr = skb_vnet_common_hdr(skb); 2341 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) 2342 virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); 2343 2344 if (flags & VIRTIO_NET_HDR_F_DATA_VALID) 2345 skb->ip_summed = CHECKSUM_UNNECESSARY; 2346 2347 if (virtio_net_hdr_to_skb(skb, &hdr->hdr, 2348 virtio_is_little_endian(vi->vdev))) { 2349 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n", 2350 dev->name, hdr->hdr.gso_type, 2351 hdr->hdr.gso_size); 2352 goto frame_err; 2353 } 2354 2355 skb_record_rx_queue(skb, vq2rxq(rq->vq)); 2356 skb->protocol = eth_type_trans(skb, dev); 2357 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 2358 ntohs(skb->protocol), skb->len, skb->pkt_type); 2359 2360 napi_gro_receive(&rq->napi, skb); 2361 return; 2362 2363 frame_err: 2364 DEV_STATS_INC(dev, rx_frame_errors); 2365 dev_kfree_skb(skb); 2366 } 2367 2368 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 2369 void *buf, unsigned int len, void **ctx, 2370 unsigned int *xdp_xmit, 2371 struct virtnet_rq_stats *stats) 2372 { 2373 struct net_device *dev = vi->dev; 2374 struct sk_buff *skb; 2375 u8 flags; 2376 2377 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 2378 pr_debug("%s: short packet %i\n", dev->name, len); 2379 DEV_STATS_INC(dev, rx_length_errors); 2380 virtnet_rq_free_buf(vi, rq, buf); 2381 return; 2382 } 2383 2384 /* 1. Save the flags early, as the XDP program might overwrite them. 2385 * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID 2386 * stay valid after XDP processing. 2387 * 2. XDP doesn't work with partially checksummed packets (refer to 2388 * virtnet_xdp_set()), so packets marked as 2389 * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. 2390 */ 2391 flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2392 2393 if (vi->mergeable_rx_bufs) 2394 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 2395 stats); 2396 else if (vi->big_packets) 2397 skb = receive_big(dev, vi, rq, buf, len, stats); 2398 else 2399 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 2400 2401 if (unlikely(!skb)) 2402 return; 2403 2404 virtnet_receive_done(vi, rq, skb, flags); 2405 } 2406 2407 /* Unlike mergeable buffers, all buffers are allocated to the 2408 * same size, except for the headroom. For this reason we do 2409 * not need to use mergeable_len_to_ctx here - it is enough 2410 * to store the headroom as the context ignoring the truesize. 2411 */ 2412 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, 2413 gfp_t gfp) 2414 { 2415 char *buf; 2416 unsigned int xdp_headroom = virtnet_get_headroom(vi); 2417 void *ctx = (void *)(unsigned long)xdp_headroom; 2418 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; 2419 int err; 2420 2421 len = SKB_DATA_ALIGN(len) + 2422 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 2423 2424 buf = virtnet_rq_alloc(rq, len, gfp); 2425 if (unlikely(!buf)) 2426 return -ENOMEM; 2427 2428 virtnet_rq_init_one_sg(rq, buf + VIRTNET_RX_PAD + xdp_headroom, 2429 vi->hdr_len + GOOD_PACKET_LEN); 2430 2431 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 2432 if (err < 0) { 2433 virtnet_rq_unmap(rq, buf, 0); 2434 put_page(virt_to_head_page(buf)); 2435 } 2436 2437 return err; 2438 } 2439 2440 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, 2441 gfp_t gfp) 2442 { 2443 struct page *first, *list = NULL; 2444 char *p; 2445 int i, err, offset; 2446 2447 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); 2448 2449 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ 2450 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { 2451 first = get_a_page(rq, gfp); 2452 if (!first) { 2453 if (list) 2454 give_pages(rq, list); 2455 return -ENOMEM; 2456 } 2457 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); 2458 2459 /* chain new page in list head to match sg */ 2460 first->private = (unsigned long)list; 2461 list = first; 2462 } 2463 2464 first = get_a_page(rq, gfp); 2465 if (!first) { 2466 give_pages(rq, list); 2467 return -ENOMEM; 2468 } 2469 p = page_address(first); 2470 2471 /* rq->sg[0], rq->sg[1] share the same page */ 2472 /* a separated rq->sg[0] for header - required in case !any_header_sg */ 2473 sg_set_buf(&rq->sg[0], p, vi->hdr_len); 2474 2475 /* rq->sg[1] for data packet, from offset */ 2476 offset = sizeof(struct padded_vnet_hdr); 2477 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); 2478 2479 /* chain first in list head */ 2480 first->private = (unsigned long)list; 2481 err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, 2482 first, gfp); 2483 if (err < 0) 2484 give_pages(rq, first); 2485 2486 return err; 2487 } 2488 2489 static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 2490 struct ewma_pkt_len *avg_pkt_len, 2491 unsigned int room) 2492 { 2493 struct virtnet_info *vi = rq->vq->vdev->priv; 2494 const size_t hdr_len = vi->hdr_len; 2495 unsigned int len; 2496 2497 if (room) 2498 return PAGE_SIZE - room; 2499 2500 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 2501 rq->min_buf_len, PAGE_SIZE - hdr_len); 2502 2503 return ALIGN(len, L1_CACHE_BYTES); 2504 } 2505 2506 static int add_recvbuf_mergeable(struct virtnet_info *vi, 2507 struct receive_queue *rq, gfp_t gfp) 2508 { 2509 struct page_frag *alloc_frag = &rq->alloc_frag; 2510 unsigned int headroom = virtnet_get_headroom(vi); 2511 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2512 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2513 unsigned int len, hole; 2514 void *ctx; 2515 char *buf; 2516 int err; 2517 2518 /* Extra tailroom is needed to satisfy XDP's assumption. This 2519 * means rx frags coalescing won't work, but consider we've 2520 * disabled GSO for XDP, it won't be a big issue. 2521 */ 2522 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); 2523 2524 buf = virtnet_rq_alloc(rq, len + room, gfp); 2525 if (unlikely(!buf)) 2526 return -ENOMEM; 2527 2528 buf += headroom; /* advance address leaving hole at front of pkt */ 2529 hole = alloc_frag->size - alloc_frag->offset; 2530 if (hole < len + room) { 2531 /* To avoid internal fragmentation, if there is very likely not 2532 * enough space for another buffer, add the remaining space to 2533 * the current buffer. 2534 * XDP core assumes that frame_size of xdp_buff and the length 2535 * of the frag are PAGE_SIZE, so we disable the hole mechanism. 2536 */ 2537 if (!headroom) 2538 len += hole; 2539 alloc_frag->offset += hole; 2540 } 2541 2542 virtnet_rq_init_one_sg(rq, buf, len); 2543 2544 ctx = mergeable_len_to_ctx(len + room, headroom); 2545 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 2546 if (err < 0) { 2547 virtnet_rq_unmap(rq, buf, 0); 2548 put_page(virt_to_head_page(buf)); 2549 } 2550 2551 return err; 2552 } 2553 2554 /* 2555 * Returns false if we couldn't fill entirely (OOM). 2556 * 2557 * Normally run in the receive path, but can also be run from ndo_open 2558 * before we're receiving packets, or from refill_work which is 2559 * careful to disable receiving (using napi_disable). 2560 */ 2561 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, 2562 gfp_t gfp) 2563 { 2564 int err; 2565 2566 if (rq->xsk_pool) { 2567 err = virtnet_add_recvbuf_xsk(vi, rq, rq->xsk_pool, gfp); 2568 goto kick; 2569 } 2570 2571 do { 2572 if (vi->mergeable_rx_bufs) 2573 err = add_recvbuf_mergeable(vi, rq, gfp); 2574 else if (vi->big_packets) 2575 err = add_recvbuf_big(vi, rq, gfp); 2576 else 2577 err = add_recvbuf_small(vi, rq, gfp); 2578 2579 if (err) 2580 break; 2581 } while (rq->vq->num_free); 2582 2583 kick: 2584 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { 2585 unsigned long flags; 2586 2587 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); 2588 u64_stats_inc(&rq->stats.kicks); 2589 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); 2590 } 2591 2592 return err != -ENOMEM; 2593 } 2594 2595 static void skb_recv_done(struct virtqueue *rvq) 2596 { 2597 struct virtnet_info *vi = rvq->vdev->priv; 2598 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; 2599 2600 rq->calls++; 2601 virtqueue_napi_schedule(&rq->napi, rvq); 2602 } 2603 2604 static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi) 2605 { 2606 napi_enable(napi); 2607 2608 /* If all buffers were filled by other side before we napi_enabled, we 2609 * won't get another interrupt, so process any outstanding packets now. 2610 * Call local_bh_enable after to trigger softIRQ processing. 2611 */ 2612 local_bh_disable(); 2613 virtqueue_napi_schedule(napi, vq); 2614 local_bh_enable(); 2615 } 2616 2617 static void virtnet_napi_tx_enable(struct virtnet_info *vi, 2618 struct virtqueue *vq, 2619 struct napi_struct *napi) 2620 { 2621 if (!napi->weight) 2622 return; 2623 2624 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only 2625 * enable the feature if this is likely affine with the transmit path. 2626 */ 2627 if (!vi->affinity_hint_set) { 2628 napi->weight = 0; 2629 return; 2630 } 2631 2632 return virtnet_napi_enable(vq, napi); 2633 } 2634 2635 static void virtnet_napi_tx_disable(struct napi_struct *napi) 2636 { 2637 if (napi->weight) 2638 napi_disable(napi); 2639 } 2640 2641 static void refill_work(struct work_struct *work) 2642 { 2643 struct virtnet_info *vi = 2644 container_of(work, struct virtnet_info, refill.work); 2645 bool still_empty; 2646 int i; 2647 2648 for (i = 0; i < vi->curr_queue_pairs; i++) { 2649 struct receive_queue *rq = &vi->rq[i]; 2650 2651 napi_disable(&rq->napi); 2652 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); 2653 virtnet_napi_enable(rq->vq, &rq->napi); 2654 2655 /* In theory, this can happen: if we don't get any buffers in 2656 * we will *never* try to fill again. 2657 */ 2658 if (still_empty) 2659 schedule_delayed_work(&vi->refill, HZ/2); 2660 } 2661 } 2662 2663 static int virtnet_receive_xsk_bufs(struct virtnet_info *vi, 2664 struct receive_queue *rq, 2665 int budget, 2666 unsigned int *xdp_xmit, 2667 struct virtnet_rq_stats *stats) 2668 { 2669 unsigned int len; 2670 int packets = 0; 2671 void *buf; 2672 2673 while (packets < budget) { 2674 buf = virtqueue_get_buf(rq->vq, &len); 2675 if (!buf) 2676 break; 2677 2678 virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats); 2679 packets++; 2680 } 2681 2682 return packets; 2683 } 2684 2685 static int virtnet_receive_packets(struct virtnet_info *vi, 2686 struct receive_queue *rq, 2687 int budget, 2688 unsigned int *xdp_xmit, 2689 struct virtnet_rq_stats *stats) 2690 { 2691 unsigned int len; 2692 int packets = 0; 2693 void *buf; 2694 2695 if (!vi->big_packets || vi->mergeable_rx_bufs) { 2696 void *ctx; 2697 while (packets < budget && 2698 (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { 2699 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats); 2700 packets++; 2701 } 2702 } else { 2703 while (packets < budget && 2704 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { 2705 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats); 2706 packets++; 2707 } 2708 } 2709 2710 return packets; 2711 } 2712 2713 static int virtnet_receive(struct receive_queue *rq, int budget, 2714 unsigned int *xdp_xmit) 2715 { 2716 struct virtnet_info *vi = rq->vq->vdev->priv; 2717 struct virtnet_rq_stats stats = {}; 2718 int i, packets; 2719 2720 if (rq->xsk_pool) 2721 packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, &stats); 2722 else 2723 packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats); 2724 2725 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { 2726 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { 2727 spin_lock(&vi->refill_lock); 2728 if (vi->refill_enabled) 2729 schedule_delayed_work(&vi->refill, 0); 2730 spin_unlock(&vi->refill_lock); 2731 } 2732 } 2733 2734 u64_stats_set(&stats.packets, packets); 2735 u64_stats_update_begin(&rq->stats.syncp); 2736 for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) { 2737 size_t offset = virtnet_rq_stats_desc[i].offset; 2738 u64_stats_t *item, *src; 2739 2740 item = (u64_stats_t *)((u8 *)&rq->stats + offset); 2741 src = (u64_stats_t *)((u8 *)&stats + offset); 2742 u64_stats_add(item, u64_stats_read(src)); 2743 } 2744 2745 u64_stats_add(&rq->stats.packets, u64_stats_read(&stats.packets)); 2746 u64_stats_add(&rq->stats.bytes, u64_stats_read(&stats.bytes)); 2747 2748 u64_stats_update_end(&rq->stats.syncp); 2749 2750 return packets; 2751 } 2752 2753 static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) 2754 { 2755 struct virtnet_info *vi = rq->vq->vdev->priv; 2756 unsigned int index = vq2rxq(rq->vq); 2757 struct send_queue *sq = &vi->sq[index]; 2758 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 2759 2760 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) 2761 return; 2762 2763 if (__netif_tx_trylock(txq)) { 2764 if (sq->reset) { 2765 __netif_tx_unlock(txq); 2766 return; 2767 } 2768 2769 do { 2770 virtqueue_disable_cb(sq->vq); 2771 free_old_xmit(sq, txq, !!budget); 2772 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 2773 2774 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { 2775 if (netif_tx_queue_stopped(txq)) { 2776 u64_stats_update_begin(&sq->stats.syncp); 2777 u64_stats_inc(&sq->stats.wake); 2778 u64_stats_update_end(&sq->stats.syncp); 2779 } 2780 netif_tx_wake_queue(txq); 2781 } 2782 2783 __netif_tx_unlock(txq); 2784 } 2785 } 2786 2787 static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq) 2788 { 2789 struct dim_sample cur_sample = {}; 2790 2791 if (!rq->packets_in_napi) 2792 return; 2793 2794 /* Don't need protection when fetching stats, since fetcher and 2795 * updater of the stats are in same context 2796 */ 2797 dim_update_sample(rq->calls, 2798 u64_stats_read(&rq->stats.packets), 2799 u64_stats_read(&rq->stats.bytes), 2800 &cur_sample); 2801 2802 net_dim(&rq->dim, cur_sample); 2803 rq->packets_in_napi = 0; 2804 } 2805 2806 static int virtnet_poll(struct napi_struct *napi, int budget) 2807 { 2808 struct receive_queue *rq = 2809 container_of(napi, struct receive_queue, napi); 2810 struct virtnet_info *vi = rq->vq->vdev->priv; 2811 struct send_queue *sq; 2812 unsigned int received; 2813 unsigned int xdp_xmit = 0; 2814 bool napi_complete; 2815 2816 virtnet_poll_cleantx(rq, budget); 2817 2818 received = virtnet_receive(rq, budget, &xdp_xmit); 2819 rq->packets_in_napi += received; 2820 2821 if (xdp_xmit & VIRTIO_XDP_REDIR) 2822 xdp_do_flush(); 2823 2824 /* Out of packets? */ 2825 if (received < budget) { 2826 napi_complete = virtqueue_napi_complete(napi, rq->vq, received); 2827 /* Intentionally not taking dim_lock here. This may result in a 2828 * spurious net_dim call. But if that happens virtnet_rx_dim_work 2829 * will not act on the scheduled work. 2830 */ 2831 if (napi_complete && rq->dim_enabled) 2832 virtnet_rx_dim_update(vi, rq); 2833 } 2834 2835 if (xdp_xmit & VIRTIO_XDP_TX) { 2836 sq = virtnet_xdp_get_sq(vi); 2837 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 2838 u64_stats_update_begin(&sq->stats.syncp); 2839 u64_stats_inc(&sq->stats.kicks); 2840 u64_stats_update_end(&sq->stats.syncp); 2841 } 2842 virtnet_xdp_put_sq(vi, sq); 2843 } 2844 2845 return received; 2846 } 2847 2848 static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) 2849 { 2850 virtnet_napi_tx_disable(&vi->sq[qp_index].napi); 2851 napi_disable(&vi->rq[qp_index].napi); 2852 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 2853 } 2854 2855 static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) 2856 { 2857 struct net_device *dev = vi->dev; 2858 int err; 2859 2860 err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index, 2861 vi->rq[qp_index].napi.napi_id); 2862 if (err < 0) 2863 return err; 2864 2865 err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq, 2866 MEM_TYPE_PAGE_SHARED, NULL); 2867 if (err < 0) 2868 goto err_xdp_reg_mem_model; 2869 2870 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index)); 2871 virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); 2872 virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi); 2873 2874 return 0; 2875 2876 err_xdp_reg_mem_model: 2877 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 2878 return err; 2879 } 2880 2881 static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim) 2882 { 2883 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 2884 return; 2885 net_dim_work_cancel(dim); 2886 } 2887 2888 static void virtnet_update_settings(struct virtnet_info *vi) 2889 { 2890 u32 speed; 2891 u8 duplex; 2892 2893 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) 2894 return; 2895 2896 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); 2897 2898 if (ethtool_validate_speed(speed)) 2899 vi->speed = speed; 2900 2901 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); 2902 2903 if (ethtool_validate_duplex(duplex)) 2904 vi->duplex = duplex; 2905 } 2906 2907 static int virtnet_open(struct net_device *dev) 2908 { 2909 struct virtnet_info *vi = netdev_priv(dev); 2910 int i, err; 2911 2912 enable_delayed_refill(vi); 2913 2914 for (i = 0; i < vi->max_queue_pairs; i++) { 2915 if (i < vi->curr_queue_pairs) 2916 /* Make sure we have some buffers: if oom use wq. */ 2917 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 2918 schedule_delayed_work(&vi->refill, 0); 2919 2920 err = virtnet_enable_queue_pair(vi, i); 2921 if (err < 0) 2922 goto err_enable_qp; 2923 } 2924 2925 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 2926 if (vi->status & VIRTIO_NET_S_LINK_UP) 2927 netif_carrier_on(vi->dev); 2928 virtio_config_driver_enable(vi->vdev); 2929 } else { 2930 vi->status = VIRTIO_NET_S_LINK_UP; 2931 netif_carrier_on(dev); 2932 } 2933 2934 return 0; 2935 2936 err_enable_qp: 2937 disable_delayed_refill(vi); 2938 cancel_delayed_work_sync(&vi->refill); 2939 2940 for (i--; i >= 0; i--) { 2941 virtnet_disable_queue_pair(vi, i); 2942 virtnet_cancel_dim(vi, &vi->rq[i].dim); 2943 } 2944 2945 return err; 2946 } 2947 2948 static int virtnet_poll_tx(struct napi_struct *napi, int budget) 2949 { 2950 struct send_queue *sq = container_of(napi, struct send_queue, napi); 2951 struct virtnet_info *vi = sq->vq->vdev->priv; 2952 unsigned int index = vq2txq(sq->vq); 2953 struct netdev_queue *txq; 2954 int opaque; 2955 bool done; 2956 2957 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { 2958 /* We don't need to enable cb for XDP */ 2959 napi_complete_done(napi, 0); 2960 return 0; 2961 } 2962 2963 txq = netdev_get_tx_queue(vi->dev, index); 2964 __netif_tx_lock(txq, raw_smp_processor_id()); 2965 virtqueue_disable_cb(sq->vq); 2966 free_old_xmit(sq, txq, !!budget); 2967 2968 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { 2969 if (netif_tx_queue_stopped(txq)) { 2970 u64_stats_update_begin(&sq->stats.syncp); 2971 u64_stats_inc(&sq->stats.wake); 2972 u64_stats_update_end(&sq->stats.syncp); 2973 } 2974 netif_tx_wake_queue(txq); 2975 } 2976 2977 opaque = virtqueue_enable_cb_prepare(sq->vq); 2978 2979 done = napi_complete_done(napi, 0); 2980 2981 if (!done) 2982 virtqueue_disable_cb(sq->vq); 2983 2984 __netif_tx_unlock(txq); 2985 2986 if (done) { 2987 if (unlikely(virtqueue_poll(sq->vq, opaque))) { 2988 if (napi_schedule_prep(napi)) { 2989 __netif_tx_lock(txq, raw_smp_processor_id()); 2990 virtqueue_disable_cb(sq->vq); 2991 __netif_tx_unlock(txq); 2992 __napi_schedule(napi); 2993 } 2994 } 2995 } 2996 2997 return 0; 2998 } 2999 3000 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) 3001 { 3002 struct virtio_net_hdr_mrg_rxbuf *hdr; 3003 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 3004 struct virtnet_info *vi = sq->vq->vdev->priv; 3005 int num_sg; 3006 unsigned hdr_len = vi->hdr_len; 3007 bool can_push; 3008 3009 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 3010 3011 can_push = vi->any_header_sg && 3012 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && 3013 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; 3014 /* Even if we can, don't push here yet as this would skew 3015 * csum_start offset below. */ 3016 if (can_push) 3017 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len); 3018 else 3019 hdr = &skb_vnet_common_hdr(skb)->mrg_hdr; 3020 3021 if (virtio_net_hdr_from_skb(skb, &hdr->hdr, 3022 virtio_is_little_endian(vi->vdev), false, 3023 0)) 3024 return -EPROTO; 3025 3026 if (vi->mergeable_rx_bufs) 3027 hdr->num_buffers = 0; 3028 3029 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); 3030 if (can_push) { 3031 __skb_push(skb, hdr_len); 3032 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); 3033 if (unlikely(num_sg < 0)) 3034 return num_sg; 3035 /* Pull header back to avoid skew in tx bytes calculations. */ 3036 __skb_pull(skb, hdr_len); 3037 } else { 3038 sg_set_buf(sq->sg, hdr, hdr_len); 3039 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); 3040 if (unlikely(num_sg < 0)) 3041 return num_sg; 3042 num_sg++; 3043 } 3044 return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, 3045 skb_to_ptr(skb, orphan), GFP_ATOMIC); 3046 } 3047 3048 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 3049 { 3050 struct virtnet_info *vi = netdev_priv(dev); 3051 int qnum = skb_get_queue_mapping(skb); 3052 struct send_queue *sq = &vi->sq[qnum]; 3053 int err; 3054 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 3055 bool xmit_more = netdev_xmit_more(); 3056 bool use_napi = sq->napi.weight; 3057 bool kick; 3058 3059 /* Free up any pending old buffers before queueing new ones. */ 3060 do { 3061 if (use_napi) 3062 virtqueue_disable_cb(sq->vq); 3063 3064 free_old_xmit(sq, txq, false); 3065 3066 } while (use_napi && !xmit_more && 3067 unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 3068 3069 /* timestamp packet in software */ 3070 skb_tx_timestamp(skb); 3071 3072 /* Try to transmit */ 3073 err = xmit_skb(sq, skb, !use_napi); 3074 3075 /* This should not happen! */ 3076 if (unlikely(err)) { 3077 DEV_STATS_INC(dev, tx_fifo_errors); 3078 if (net_ratelimit()) 3079 dev_warn(&dev->dev, 3080 "Unexpected TXQ (%d) queue failure: %d\n", 3081 qnum, err); 3082 DEV_STATS_INC(dev, tx_dropped); 3083 dev_kfree_skb_any(skb); 3084 return NETDEV_TX_OK; 3085 } 3086 3087 /* Don't wait up for transmitted skbs to be freed. */ 3088 if (!use_napi) { 3089 skb_orphan(skb); 3090 nf_reset_ct(skb); 3091 } 3092 3093 check_sq_full_and_disable(vi, dev, sq); 3094 3095 kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) : 3096 !xmit_more || netif_xmit_stopped(txq); 3097 if (kick) { 3098 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3099 u64_stats_update_begin(&sq->stats.syncp); 3100 u64_stats_inc(&sq->stats.kicks); 3101 u64_stats_update_end(&sq->stats.syncp); 3102 } 3103 } 3104 3105 return NETDEV_TX_OK; 3106 } 3107 3108 static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) 3109 { 3110 bool running = netif_running(vi->dev); 3111 3112 if (running) { 3113 napi_disable(&rq->napi); 3114 virtnet_cancel_dim(vi, &rq->dim); 3115 } 3116 } 3117 3118 static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) 3119 { 3120 bool running = netif_running(vi->dev); 3121 3122 if (!try_fill_recv(vi, rq, GFP_KERNEL)) 3123 schedule_delayed_work(&vi->refill, 0); 3124 3125 if (running) 3126 virtnet_napi_enable(rq->vq, &rq->napi); 3127 } 3128 3129 static int virtnet_rx_resize(struct virtnet_info *vi, 3130 struct receive_queue *rq, u32 ring_num) 3131 { 3132 int err, qindex; 3133 3134 qindex = rq - vi->rq; 3135 3136 virtnet_rx_pause(vi, rq); 3137 3138 err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf); 3139 if (err) 3140 netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); 3141 3142 virtnet_rx_resume(vi, rq); 3143 return err; 3144 } 3145 3146 static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq) 3147 { 3148 bool running = netif_running(vi->dev); 3149 struct netdev_queue *txq; 3150 int qindex; 3151 3152 qindex = sq - vi->sq; 3153 3154 if (running) 3155 virtnet_napi_tx_disable(&sq->napi); 3156 3157 txq = netdev_get_tx_queue(vi->dev, qindex); 3158 3159 /* 1. wait all ximt complete 3160 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() 3161 */ 3162 __netif_tx_lock_bh(txq); 3163 3164 /* Prevent rx poll from accessing sq. */ 3165 sq->reset = true; 3166 3167 /* Prevent the upper layer from trying to send packets. */ 3168 netif_stop_subqueue(vi->dev, qindex); 3169 3170 __netif_tx_unlock_bh(txq); 3171 } 3172 3173 static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq) 3174 { 3175 bool running = netif_running(vi->dev); 3176 struct netdev_queue *txq; 3177 int qindex; 3178 3179 qindex = sq - vi->sq; 3180 3181 txq = netdev_get_tx_queue(vi->dev, qindex); 3182 3183 __netif_tx_lock_bh(txq); 3184 sq->reset = false; 3185 netif_tx_wake_queue(txq); 3186 __netif_tx_unlock_bh(txq); 3187 3188 if (running) 3189 virtnet_napi_tx_enable(vi, sq->vq, &sq->napi); 3190 } 3191 3192 static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, 3193 u32 ring_num) 3194 { 3195 int qindex, err; 3196 3197 qindex = sq - vi->sq; 3198 3199 virtnet_tx_pause(vi, sq); 3200 3201 err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf); 3202 if (err) 3203 netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); 3204 3205 virtnet_tx_resume(vi, sq); 3206 3207 return err; 3208 } 3209 3210 /* 3211 * Send command via the control virtqueue and check status. Commands 3212 * supported by the hypervisor, as indicated by feature bits, should 3213 * never fail unless improperly formatted. 3214 */ 3215 static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd, 3216 struct scatterlist *out, 3217 struct scatterlist *in) 3218 { 3219 struct scatterlist *sgs[5], hdr, stat; 3220 u32 out_num = 0, tmp, in_num = 0; 3221 bool ok; 3222 int ret; 3223 3224 /* Caller should know better */ 3225 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 3226 3227 mutex_lock(&vi->cvq_lock); 3228 vi->ctrl->status = ~0; 3229 vi->ctrl->hdr.class = class; 3230 vi->ctrl->hdr.cmd = cmd; 3231 /* Add header */ 3232 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); 3233 sgs[out_num++] = &hdr; 3234 3235 if (out) 3236 sgs[out_num++] = out; 3237 3238 /* Add return status. */ 3239 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); 3240 sgs[out_num + in_num++] = &stat; 3241 3242 if (in) 3243 sgs[out_num + in_num++] = in; 3244 3245 BUG_ON(out_num + in_num > ARRAY_SIZE(sgs)); 3246 ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC); 3247 if (ret < 0) { 3248 dev_warn(&vi->vdev->dev, 3249 "Failed to add sgs for command vq: %d\n.", ret); 3250 mutex_unlock(&vi->cvq_lock); 3251 return false; 3252 } 3253 3254 if (unlikely(!virtqueue_kick(vi->cvq))) 3255 goto unlock; 3256 3257 /* Spin for a response, the kick causes an ioport write, trapping 3258 * into the hypervisor, so the request should be handled immediately. 3259 */ 3260 while (!virtqueue_get_buf(vi->cvq, &tmp) && 3261 !virtqueue_is_broken(vi->cvq)) { 3262 cond_resched(); 3263 cpu_relax(); 3264 } 3265 3266 unlock: 3267 ok = vi->ctrl->status == VIRTIO_NET_OK; 3268 mutex_unlock(&vi->cvq_lock); 3269 return ok; 3270 } 3271 3272 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 3273 struct scatterlist *out) 3274 { 3275 return virtnet_send_command_reply(vi, class, cmd, out, NULL); 3276 } 3277 3278 static int virtnet_set_mac_address(struct net_device *dev, void *p) 3279 { 3280 struct virtnet_info *vi = netdev_priv(dev); 3281 struct virtio_device *vdev = vi->vdev; 3282 int ret; 3283 struct sockaddr *addr; 3284 struct scatterlist sg; 3285 3286 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 3287 return -EOPNOTSUPP; 3288 3289 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); 3290 if (!addr) 3291 return -ENOMEM; 3292 3293 ret = eth_prepare_mac_addr_change(dev, addr); 3294 if (ret) 3295 goto out; 3296 3297 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 3298 sg_init_one(&sg, addr->sa_data, dev->addr_len); 3299 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3300 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 3301 dev_warn(&vdev->dev, 3302 "Failed to set mac address by vq command.\n"); 3303 ret = -EINVAL; 3304 goto out; 3305 } 3306 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 3307 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3308 unsigned int i; 3309 3310 /* Naturally, this has an atomicity problem. */ 3311 for (i = 0; i < dev->addr_len; i++) 3312 virtio_cwrite8(vdev, 3313 offsetof(struct virtio_net_config, mac) + 3314 i, addr->sa_data[i]); 3315 } 3316 3317 eth_commit_mac_addr_change(dev, p); 3318 ret = 0; 3319 3320 out: 3321 kfree(addr); 3322 return ret; 3323 } 3324 3325 static void virtnet_stats(struct net_device *dev, 3326 struct rtnl_link_stats64 *tot) 3327 { 3328 struct virtnet_info *vi = netdev_priv(dev); 3329 unsigned int start; 3330 int i; 3331 3332 for (i = 0; i < vi->max_queue_pairs; i++) { 3333 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; 3334 struct receive_queue *rq = &vi->rq[i]; 3335 struct send_queue *sq = &vi->sq[i]; 3336 3337 do { 3338 start = u64_stats_fetch_begin(&sq->stats.syncp); 3339 tpackets = u64_stats_read(&sq->stats.packets); 3340 tbytes = u64_stats_read(&sq->stats.bytes); 3341 terrors = u64_stats_read(&sq->stats.tx_timeouts); 3342 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 3343 3344 do { 3345 start = u64_stats_fetch_begin(&rq->stats.syncp); 3346 rpackets = u64_stats_read(&rq->stats.packets); 3347 rbytes = u64_stats_read(&rq->stats.bytes); 3348 rdrops = u64_stats_read(&rq->stats.drops); 3349 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 3350 3351 tot->rx_packets += rpackets; 3352 tot->tx_packets += tpackets; 3353 tot->rx_bytes += rbytes; 3354 tot->tx_bytes += tbytes; 3355 tot->rx_dropped += rdrops; 3356 tot->tx_errors += terrors; 3357 } 3358 3359 tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped); 3360 tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors); 3361 tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors); 3362 tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors); 3363 } 3364 3365 static void virtnet_ack_link_announce(struct virtnet_info *vi) 3366 { 3367 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 3368 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) 3369 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 3370 } 3371 3372 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 3373 { 3374 struct virtio_net_ctrl_mq *mq __free(kfree) = NULL; 3375 struct scatterlist sg; 3376 struct net_device *dev = vi->dev; 3377 3378 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) 3379 return 0; 3380 3381 mq = kzalloc(sizeof(*mq), GFP_KERNEL); 3382 if (!mq) 3383 return -ENOMEM; 3384 3385 mq->virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); 3386 sg_init_one(&sg, mq, sizeof(*mq)); 3387 3388 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 3389 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { 3390 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 3391 queue_pairs); 3392 return -EINVAL; 3393 } else { 3394 vi->curr_queue_pairs = queue_pairs; 3395 /* virtnet_open() will refill when device is going to up. */ 3396 if (dev->flags & IFF_UP) 3397 schedule_delayed_work(&vi->refill, 0); 3398 } 3399 3400 return 0; 3401 } 3402 3403 static int virtnet_close(struct net_device *dev) 3404 { 3405 struct virtnet_info *vi = netdev_priv(dev); 3406 int i; 3407 3408 /* Make sure NAPI doesn't schedule refill work */ 3409 disable_delayed_refill(vi); 3410 /* Make sure refill_work doesn't re-enable napi! */ 3411 cancel_delayed_work_sync(&vi->refill); 3412 /* Prevent the config change callback from changing carrier 3413 * after close 3414 */ 3415 virtio_config_driver_disable(vi->vdev); 3416 /* Stop getting status/speed updates: we don't care until next 3417 * open 3418 */ 3419 cancel_work_sync(&vi->config_work); 3420 3421 for (i = 0; i < vi->max_queue_pairs; i++) { 3422 virtnet_disable_queue_pair(vi, i); 3423 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3424 } 3425 3426 netif_carrier_off(dev); 3427 3428 return 0; 3429 } 3430 3431 static void virtnet_rx_mode_work(struct work_struct *work) 3432 { 3433 struct virtnet_info *vi = 3434 container_of(work, struct virtnet_info, rx_mode_work); 3435 u8 *promisc_allmulti __free(kfree) = NULL; 3436 struct net_device *dev = vi->dev; 3437 struct scatterlist sg[2]; 3438 struct virtio_net_ctrl_mac *mac_data; 3439 struct netdev_hw_addr *ha; 3440 int uc_count; 3441 int mc_count; 3442 void *buf; 3443 int i; 3444 3445 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ 3446 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 3447 return; 3448 3449 promisc_allmulti = kzalloc(sizeof(*promisc_allmulti), GFP_KERNEL); 3450 if (!promisc_allmulti) { 3451 dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n"); 3452 return; 3453 } 3454 3455 rtnl_lock(); 3456 3457 *promisc_allmulti = !!(dev->flags & IFF_PROMISC); 3458 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3459 3460 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3461 VIRTIO_NET_CTRL_RX_PROMISC, sg)) 3462 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 3463 *promisc_allmulti ? "en" : "dis"); 3464 3465 *promisc_allmulti = !!(dev->flags & IFF_ALLMULTI); 3466 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3467 3468 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3469 VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) 3470 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 3471 *promisc_allmulti ? "en" : "dis"); 3472 3473 netif_addr_lock_bh(dev); 3474 3475 uc_count = netdev_uc_count(dev); 3476 mc_count = netdev_mc_count(dev); 3477 /* MAC filter - use one buffer for both lists */ 3478 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + 3479 (2 * sizeof(mac_data->entries)), GFP_ATOMIC); 3480 mac_data = buf; 3481 if (!buf) { 3482 netif_addr_unlock_bh(dev); 3483 rtnl_unlock(); 3484 return; 3485 } 3486 3487 sg_init_table(sg, 2); 3488 3489 /* Store the unicast list and count in the front of the buffer */ 3490 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); 3491 i = 0; 3492 netdev_for_each_uc_addr(ha, dev) 3493 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3494 3495 sg_set_buf(&sg[0], mac_data, 3496 sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); 3497 3498 /* multicast list and count fill the end */ 3499 mac_data = (void *)&mac_data->macs[uc_count][0]; 3500 3501 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); 3502 i = 0; 3503 netdev_for_each_mc_addr(ha, dev) 3504 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3505 3506 netif_addr_unlock_bh(dev); 3507 3508 sg_set_buf(&sg[1], mac_data, 3509 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 3510 3511 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3512 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) 3513 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 3514 3515 rtnl_unlock(); 3516 3517 kfree(buf); 3518 } 3519 3520 static void virtnet_set_rx_mode(struct net_device *dev) 3521 { 3522 struct virtnet_info *vi = netdev_priv(dev); 3523 3524 if (vi->rx_mode_work_enabled) 3525 schedule_work(&vi->rx_mode_work); 3526 } 3527 3528 static int virtnet_vlan_rx_add_vid(struct net_device *dev, 3529 __be16 proto, u16 vid) 3530 { 3531 struct virtnet_info *vi = netdev_priv(dev); 3532 __virtio16 *_vid __free(kfree) = NULL; 3533 struct scatterlist sg; 3534 3535 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3536 if (!_vid) 3537 return -ENOMEM; 3538 3539 *_vid = cpu_to_virtio16(vi->vdev, vid); 3540 sg_init_one(&sg, _vid, sizeof(*_vid)); 3541 3542 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3543 VIRTIO_NET_CTRL_VLAN_ADD, &sg)) 3544 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 3545 return 0; 3546 } 3547 3548 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, 3549 __be16 proto, u16 vid) 3550 { 3551 struct virtnet_info *vi = netdev_priv(dev); 3552 __virtio16 *_vid __free(kfree) = NULL; 3553 struct scatterlist sg; 3554 3555 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3556 if (!_vid) 3557 return -ENOMEM; 3558 3559 *_vid = cpu_to_virtio16(vi->vdev, vid); 3560 sg_init_one(&sg, _vid, sizeof(*_vid)); 3561 3562 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3563 VIRTIO_NET_CTRL_VLAN_DEL, &sg)) 3564 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 3565 return 0; 3566 } 3567 3568 static void virtnet_clean_affinity(struct virtnet_info *vi) 3569 { 3570 int i; 3571 3572 if (vi->affinity_hint_set) { 3573 for (i = 0; i < vi->max_queue_pairs; i++) { 3574 virtqueue_set_affinity(vi->rq[i].vq, NULL); 3575 virtqueue_set_affinity(vi->sq[i].vq, NULL); 3576 } 3577 3578 vi->affinity_hint_set = false; 3579 } 3580 } 3581 3582 static void virtnet_set_affinity(struct virtnet_info *vi) 3583 { 3584 cpumask_var_t mask; 3585 int stragglers; 3586 int group_size; 3587 int i, j, cpu; 3588 int num_cpu; 3589 int stride; 3590 3591 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 3592 virtnet_clean_affinity(vi); 3593 return; 3594 } 3595 3596 num_cpu = num_online_cpus(); 3597 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); 3598 stragglers = num_cpu >= vi->curr_queue_pairs ? 3599 num_cpu % vi->curr_queue_pairs : 3600 0; 3601 cpu = cpumask_first(cpu_online_mask); 3602 3603 for (i = 0; i < vi->curr_queue_pairs; i++) { 3604 group_size = stride + (i < stragglers ? 1 : 0); 3605 3606 for (j = 0; j < group_size; j++) { 3607 cpumask_set_cpu(cpu, mask); 3608 cpu = cpumask_next_wrap(cpu, cpu_online_mask, 3609 nr_cpu_ids, false); 3610 } 3611 virtqueue_set_affinity(vi->rq[i].vq, mask); 3612 virtqueue_set_affinity(vi->sq[i].vq, mask); 3613 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); 3614 cpumask_clear(mask); 3615 } 3616 3617 vi->affinity_hint_set = true; 3618 free_cpumask_var(mask); 3619 } 3620 3621 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) 3622 { 3623 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3624 node); 3625 virtnet_set_affinity(vi); 3626 return 0; 3627 } 3628 3629 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) 3630 { 3631 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3632 node_dead); 3633 virtnet_set_affinity(vi); 3634 return 0; 3635 } 3636 3637 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 3638 { 3639 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3640 node); 3641 3642 virtnet_clean_affinity(vi); 3643 return 0; 3644 } 3645 3646 static enum cpuhp_state virtionet_online; 3647 3648 static int virtnet_cpu_notif_add(struct virtnet_info *vi) 3649 { 3650 int ret; 3651 3652 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); 3653 if (ret) 3654 return ret; 3655 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, 3656 &vi->node_dead); 3657 if (!ret) 3658 return ret; 3659 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 3660 return ret; 3661 } 3662 3663 static void virtnet_cpu_notif_remove(struct virtnet_info *vi) 3664 { 3665 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 3666 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, 3667 &vi->node_dead); 3668 } 3669 3670 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3671 u16 vqn, u32 max_usecs, u32 max_packets) 3672 { 3673 struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL; 3674 struct scatterlist sgs; 3675 3676 coal_vq = kzalloc(sizeof(*coal_vq), GFP_KERNEL); 3677 if (!coal_vq) 3678 return -ENOMEM; 3679 3680 coal_vq->vqn = cpu_to_le16(vqn); 3681 coal_vq->coal.max_usecs = cpu_to_le32(max_usecs); 3682 coal_vq->coal.max_packets = cpu_to_le32(max_packets); 3683 sg_init_one(&sgs, coal_vq, sizeof(*coal_vq)); 3684 3685 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 3686 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET, 3687 &sgs)) 3688 return -EINVAL; 3689 3690 return 0; 3691 } 3692 3693 static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3694 u16 queue, u32 max_usecs, 3695 u32 max_packets) 3696 { 3697 int err; 3698 3699 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3700 return -EOPNOTSUPP; 3701 3702 err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), 3703 max_usecs, max_packets); 3704 if (err) 3705 return err; 3706 3707 vi->rq[queue].intr_coal.max_usecs = max_usecs; 3708 vi->rq[queue].intr_coal.max_packets = max_packets; 3709 3710 return 0; 3711 } 3712 3713 static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3714 u16 queue, u32 max_usecs, 3715 u32 max_packets) 3716 { 3717 int err; 3718 3719 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3720 return -EOPNOTSUPP; 3721 3722 err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), 3723 max_usecs, max_packets); 3724 if (err) 3725 return err; 3726 3727 vi->sq[queue].intr_coal.max_usecs = max_usecs; 3728 vi->sq[queue].intr_coal.max_packets = max_packets; 3729 3730 return 0; 3731 } 3732 3733 static void virtnet_get_ringparam(struct net_device *dev, 3734 struct ethtool_ringparam *ring, 3735 struct kernel_ethtool_ringparam *kernel_ring, 3736 struct netlink_ext_ack *extack) 3737 { 3738 struct virtnet_info *vi = netdev_priv(dev); 3739 3740 ring->rx_max_pending = vi->rq[0].vq->num_max; 3741 ring->tx_max_pending = vi->sq[0].vq->num_max; 3742 ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 3743 ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 3744 } 3745 3746 static int virtnet_set_ringparam(struct net_device *dev, 3747 struct ethtool_ringparam *ring, 3748 struct kernel_ethtool_ringparam *kernel_ring, 3749 struct netlink_ext_ack *extack) 3750 { 3751 struct virtnet_info *vi = netdev_priv(dev); 3752 u32 rx_pending, tx_pending; 3753 struct receive_queue *rq; 3754 struct send_queue *sq; 3755 int i, err; 3756 3757 if (ring->rx_mini_pending || ring->rx_jumbo_pending) 3758 return -EINVAL; 3759 3760 rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 3761 tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 3762 3763 if (ring->rx_pending == rx_pending && 3764 ring->tx_pending == tx_pending) 3765 return 0; 3766 3767 if (ring->rx_pending > vi->rq[0].vq->num_max) 3768 return -EINVAL; 3769 3770 if (ring->tx_pending > vi->sq[0].vq->num_max) 3771 return -EINVAL; 3772 3773 for (i = 0; i < vi->max_queue_pairs; i++) { 3774 rq = vi->rq + i; 3775 sq = vi->sq + i; 3776 3777 if (ring->tx_pending != tx_pending) { 3778 err = virtnet_tx_resize(vi, sq, ring->tx_pending); 3779 if (err) 3780 return err; 3781 3782 /* Upon disabling and re-enabling a transmit virtqueue, the device must 3783 * set the coalescing parameters of the virtqueue to those configured 3784 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver 3785 * did not set any TX coalescing parameters, to 0. 3786 */ 3787 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, 3788 vi->intr_coal_tx.max_usecs, 3789 vi->intr_coal_tx.max_packets); 3790 3791 /* Don't break the tx resize action if the vq coalescing is not 3792 * supported. The same is true for rx resize below. 3793 */ 3794 if (err && err != -EOPNOTSUPP) 3795 return err; 3796 } 3797 3798 if (ring->rx_pending != rx_pending) { 3799 err = virtnet_rx_resize(vi, rq, ring->rx_pending); 3800 if (err) 3801 return err; 3802 3803 /* The reason is same as the transmit virtqueue reset */ 3804 mutex_lock(&vi->rq[i].dim_lock); 3805 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i, 3806 vi->intr_coal_rx.max_usecs, 3807 vi->intr_coal_rx.max_packets); 3808 mutex_unlock(&vi->rq[i].dim_lock); 3809 if (err && err != -EOPNOTSUPP) 3810 return err; 3811 } 3812 } 3813 3814 return 0; 3815 } 3816 3817 static bool virtnet_commit_rss_command(struct virtnet_info *vi) 3818 { 3819 struct net_device *dev = vi->dev; 3820 struct scatterlist sgs[4]; 3821 unsigned int sg_buf_size; 3822 3823 /* prepare sgs */ 3824 sg_init_table(sgs, 4); 3825 3826 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, indirection_table); 3827 sg_set_buf(&sgs[0], &vi->rss, sg_buf_size); 3828 3829 sg_buf_size = sizeof(uint16_t) * (vi->rss.indirection_table_mask + 1); 3830 sg_set_buf(&sgs[1], vi->rss.indirection_table, sg_buf_size); 3831 3832 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key) 3833 - offsetof(struct virtio_net_ctrl_rss, max_tx_vq); 3834 sg_set_buf(&sgs[2], &vi->rss.max_tx_vq, sg_buf_size); 3835 3836 sg_buf_size = vi->rss_key_size; 3837 sg_set_buf(&sgs[3], vi->rss.key, sg_buf_size); 3838 3839 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 3840 vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG 3841 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) 3842 goto err; 3843 3844 return true; 3845 3846 err: 3847 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); 3848 return false; 3849 3850 } 3851 3852 static void virtnet_init_default_rss(struct virtnet_info *vi) 3853 { 3854 u32 indir_val = 0; 3855 int i = 0; 3856 3857 vi->rss.hash_types = vi->rss_hash_types_supported; 3858 vi->rss_hash_types_saved = vi->rss_hash_types_supported; 3859 vi->rss.indirection_table_mask = vi->rss_indir_table_size 3860 ? vi->rss_indir_table_size - 1 : 0; 3861 vi->rss.unclassified_queue = 0; 3862 3863 for (; i < vi->rss_indir_table_size; ++i) { 3864 indir_val = ethtool_rxfh_indir_default(i, vi->curr_queue_pairs); 3865 vi->rss.indirection_table[i] = indir_val; 3866 } 3867 3868 vi->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0; 3869 vi->rss.hash_key_length = vi->rss_key_size; 3870 3871 netdev_rss_key_fill(vi->rss.key, vi->rss_key_size); 3872 } 3873 3874 static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info) 3875 { 3876 info->data = 0; 3877 switch (info->flow_type) { 3878 case TCP_V4_FLOW: 3879 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 3880 info->data = RXH_IP_SRC | RXH_IP_DST | 3881 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3882 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 3883 info->data = RXH_IP_SRC | RXH_IP_DST; 3884 } 3885 break; 3886 case TCP_V6_FLOW: 3887 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 3888 info->data = RXH_IP_SRC | RXH_IP_DST | 3889 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3890 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 3891 info->data = RXH_IP_SRC | RXH_IP_DST; 3892 } 3893 break; 3894 case UDP_V4_FLOW: 3895 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 3896 info->data = RXH_IP_SRC | RXH_IP_DST | 3897 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3898 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 3899 info->data = RXH_IP_SRC | RXH_IP_DST; 3900 } 3901 break; 3902 case UDP_V6_FLOW: 3903 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 3904 info->data = RXH_IP_SRC | RXH_IP_DST | 3905 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3906 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 3907 info->data = RXH_IP_SRC | RXH_IP_DST; 3908 } 3909 break; 3910 case IPV4_FLOW: 3911 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) 3912 info->data = RXH_IP_SRC | RXH_IP_DST; 3913 3914 break; 3915 case IPV6_FLOW: 3916 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) 3917 info->data = RXH_IP_SRC | RXH_IP_DST; 3918 3919 break; 3920 default: 3921 info->data = 0; 3922 break; 3923 } 3924 } 3925 3926 static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info) 3927 { 3928 u32 new_hashtypes = vi->rss_hash_types_saved; 3929 bool is_disable = info->data & RXH_DISCARD; 3930 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); 3931 3932 /* supports only 'sd', 'sdfn' and 'r' */ 3933 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) 3934 return false; 3935 3936 switch (info->flow_type) { 3937 case TCP_V4_FLOW: 3938 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); 3939 if (!is_disable) 3940 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 3941 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); 3942 break; 3943 case UDP_V4_FLOW: 3944 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); 3945 if (!is_disable) 3946 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 3947 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); 3948 break; 3949 case IPV4_FLOW: 3950 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; 3951 if (!is_disable) 3952 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; 3953 break; 3954 case TCP_V6_FLOW: 3955 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); 3956 if (!is_disable) 3957 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 3958 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); 3959 break; 3960 case UDP_V6_FLOW: 3961 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); 3962 if (!is_disable) 3963 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 3964 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); 3965 break; 3966 case IPV6_FLOW: 3967 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; 3968 if (!is_disable) 3969 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; 3970 break; 3971 default: 3972 /* unsupported flow */ 3973 return false; 3974 } 3975 3976 /* if unsupported hashtype was set */ 3977 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) 3978 return false; 3979 3980 if (new_hashtypes != vi->rss_hash_types_saved) { 3981 vi->rss_hash_types_saved = new_hashtypes; 3982 vi->rss.hash_types = vi->rss_hash_types_saved; 3983 if (vi->dev->features & NETIF_F_RXHASH) 3984 return virtnet_commit_rss_command(vi); 3985 } 3986 3987 return true; 3988 } 3989 3990 static void virtnet_get_drvinfo(struct net_device *dev, 3991 struct ethtool_drvinfo *info) 3992 { 3993 struct virtnet_info *vi = netdev_priv(dev); 3994 struct virtio_device *vdev = vi->vdev; 3995 3996 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 3997 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); 3998 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); 3999 4000 } 4001 4002 /* TODO: Eliminate OOO packets during switching */ 4003 static int virtnet_set_channels(struct net_device *dev, 4004 struct ethtool_channels *channels) 4005 { 4006 struct virtnet_info *vi = netdev_priv(dev); 4007 u16 queue_pairs = channels->combined_count; 4008 int err; 4009 4010 /* We don't support separate rx/tx channels. 4011 * We don't allow setting 'other' channels. 4012 */ 4013 if (channels->rx_count || channels->tx_count || channels->other_count) 4014 return -EINVAL; 4015 4016 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) 4017 return -EINVAL; 4018 4019 /* For now we don't support modifying channels while XDP is loaded 4020 * also when XDP is loaded all RX queues have XDP programs so we only 4021 * need to check a single RX queue. 4022 */ 4023 if (vi->rq[0].xdp_prog) 4024 return -EINVAL; 4025 4026 cpus_read_lock(); 4027 err = virtnet_set_queues(vi, queue_pairs); 4028 if (err) { 4029 cpus_read_unlock(); 4030 goto err; 4031 } 4032 virtnet_set_affinity(vi); 4033 cpus_read_unlock(); 4034 4035 netif_set_real_num_tx_queues(dev, queue_pairs); 4036 netif_set_real_num_rx_queues(dev, queue_pairs); 4037 err: 4038 return err; 4039 } 4040 4041 static void virtnet_stats_sprintf(u8 **p, const char *fmt, const char *noq_fmt, 4042 int num, int qid, const struct virtnet_stat_desc *desc) 4043 { 4044 int i; 4045 4046 if (qid < 0) { 4047 for (i = 0; i < num; ++i) 4048 ethtool_sprintf(p, noq_fmt, desc[i].desc); 4049 } else { 4050 for (i = 0; i < num; ++i) 4051 ethtool_sprintf(p, fmt, qid, desc[i].desc); 4052 } 4053 } 4054 4055 /* qid == -1: for rx/tx queue total field */ 4056 static void virtnet_get_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data) 4057 { 4058 const struct virtnet_stat_desc *desc; 4059 const char *fmt, *noq_fmt; 4060 u8 *p = *data; 4061 u32 num; 4062 4063 if (type == VIRTNET_Q_TYPE_CQ && qid >= 0) { 4064 noq_fmt = "cq_hw_%s"; 4065 4066 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4067 desc = &virtnet_stats_cvq_desc[0]; 4068 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4069 4070 virtnet_stats_sprintf(&p, NULL, noq_fmt, num, -1, desc); 4071 } 4072 } 4073 4074 if (type == VIRTNET_Q_TYPE_RX) { 4075 fmt = "rx%u_%s"; 4076 noq_fmt = "rx_%s"; 4077 4078 desc = &virtnet_rq_stats_desc[0]; 4079 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4080 4081 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4082 4083 fmt = "rx%u_hw_%s"; 4084 noq_fmt = "rx_hw_%s"; 4085 4086 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4087 desc = &virtnet_stats_rx_basic_desc[0]; 4088 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4089 4090 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4091 } 4092 4093 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4094 desc = &virtnet_stats_rx_csum_desc[0]; 4095 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4096 4097 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4098 } 4099 4100 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4101 desc = &virtnet_stats_rx_speed_desc[0]; 4102 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4103 4104 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4105 } 4106 } 4107 4108 if (type == VIRTNET_Q_TYPE_TX) { 4109 fmt = "tx%u_%s"; 4110 noq_fmt = "tx_%s"; 4111 4112 desc = &virtnet_sq_stats_desc[0]; 4113 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4114 4115 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4116 4117 fmt = "tx%u_hw_%s"; 4118 noq_fmt = "tx_hw_%s"; 4119 4120 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4121 desc = &virtnet_stats_tx_basic_desc[0]; 4122 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4123 4124 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4125 } 4126 4127 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4128 desc = &virtnet_stats_tx_gso_desc[0]; 4129 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4130 4131 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4132 } 4133 4134 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4135 desc = &virtnet_stats_tx_speed_desc[0]; 4136 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4137 4138 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4139 } 4140 } 4141 4142 *data = p; 4143 } 4144 4145 struct virtnet_stats_ctx { 4146 /* The stats are write to qstats or ethtool -S */ 4147 bool to_qstat; 4148 4149 /* Used to calculate the offset inside the output buffer. */ 4150 u32 desc_num[3]; 4151 4152 /* The actual supported stat types. */ 4153 u32 bitmap[3]; 4154 4155 /* Used to calculate the reply buffer size. */ 4156 u32 size[3]; 4157 4158 /* Record the output buffer. */ 4159 u64 *data; 4160 }; 4161 4162 static void virtnet_stats_ctx_init(struct virtnet_info *vi, 4163 struct virtnet_stats_ctx *ctx, 4164 u64 *data, bool to_qstat) 4165 { 4166 u32 queue_type; 4167 4168 ctx->data = data; 4169 ctx->to_qstat = to_qstat; 4170 4171 if (to_qstat) { 4172 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4173 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4174 4175 queue_type = VIRTNET_Q_TYPE_RX; 4176 4177 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4178 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4179 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4180 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4181 } 4182 4183 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4184 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4185 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4186 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4187 } 4188 4189 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4190 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_GSO; 4191 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4192 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso); 4193 } 4194 4195 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4196 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4197 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4198 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4199 } 4200 4201 queue_type = VIRTNET_Q_TYPE_TX; 4202 4203 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4204 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4205 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4206 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4207 } 4208 4209 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4210 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_CSUM; 4211 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4212 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum); 4213 } 4214 4215 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4216 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4217 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4218 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4219 } 4220 4221 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4222 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4223 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4224 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4225 } 4226 4227 return; 4228 } 4229 4230 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc); 4231 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc); 4232 4233 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4234 queue_type = VIRTNET_Q_TYPE_CQ; 4235 4236 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_CVQ; 4237 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc); 4238 ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq); 4239 } 4240 4241 queue_type = VIRTNET_Q_TYPE_RX; 4242 4243 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4244 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4245 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4246 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4247 } 4248 4249 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4250 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4251 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4252 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4253 } 4254 4255 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4256 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4257 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4258 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4259 } 4260 4261 queue_type = VIRTNET_Q_TYPE_TX; 4262 4263 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4264 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4265 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4266 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4267 } 4268 4269 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4270 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4271 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4272 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4273 } 4274 4275 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4276 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4277 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4278 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4279 } 4280 } 4281 4282 /* stats_sum_queue - Calculate the sum of the same fields in sq or rq. 4283 * @sum: the position to store the sum values 4284 * @num: field num 4285 * @q_value: the first queue fields 4286 * @q_num: number of the queues 4287 */ 4288 static void stats_sum_queue(u64 *sum, u32 num, u64 *q_value, u32 q_num) 4289 { 4290 u32 step = num; 4291 int i, j; 4292 u64 *p; 4293 4294 for (i = 0; i < num; ++i) { 4295 p = sum + i; 4296 *p = 0; 4297 4298 for (j = 0; j < q_num; ++j) 4299 *p += *(q_value + i + j * step); 4300 } 4301 } 4302 4303 static void virtnet_fill_total_fields(struct virtnet_info *vi, 4304 struct virtnet_stats_ctx *ctx) 4305 { 4306 u64 *data, *first_rx_q, *first_tx_q; 4307 u32 num_cq, num_rx, num_tx; 4308 4309 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4310 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4311 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4312 4313 first_rx_q = ctx->data + num_rx + num_tx + num_cq; 4314 first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx; 4315 4316 data = ctx->data; 4317 4318 stats_sum_queue(data, num_rx, first_rx_q, vi->curr_queue_pairs); 4319 4320 data = ctx->data + num_rx; 4321 4322 stats_sum_queue(data, num_tx, first_tx_q, vi->curr_queue_pairs); 4323 } 4324 4325 static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid, 4326 struct virtnet_stats_ctx *ctx, 4327 const u8 *base, bool drv_stats, u8 reply_type) 4328 { 4329 const struct virtnet_stat_desc *desc; 4330 const u64_stats_t *v_stat; 4331 u64 offset, bitmap; 4332 const __le64 *v; 4333 u32 queue_type; 4334 int i, num; 4335 4336 queue_type = vq_type(vi, qid); 4337 bitmap = ctx->bitmap[queue_type]; 4338 4339 if (drv_stats) { 4340 if (queue_type == VIRTNET_Q_TYPE_RX) { 4341 desc = &virtnet_rq_stats_desc_qstat[0]; 4342 num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4343 } else { 4344 desc = &virtnet_sq_stats_desc_qstat[0]; 4345 num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4346 } 4347 4348 for (i = 0; i < num; ++i) { 4349 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4350 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4351 ctx->data[offset] = u64_stats_read(v_stat); 4352 } 4353 return; 4354 } 4355 4356 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4357 desc = &virtnet_stats_rx_basic_desc_qstat[0]; 4358 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4359 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4360 goto found; 4361 } 4362 4363 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4364 desc = &virtnet_stats_rx_csum_desc_qstat[0]; 4365 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4366 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4367 goto found; 4368 } 4369 4370 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4371 desc = &virtnet_stats_rx_gso_desc_qstat[0]; 4372 num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4373 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO) 4374 goto found; 4375 } 4376 4377 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4378 desc = &virtnet_stats_rx_speed_desc_qstat[0]; 4379 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4380 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4381 goto found; 4382 } 4383 4384 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4385 desc = &virtnet_stats_tx_basic_desc_qstat[0]; 4386 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4387 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4388 goto found; 4389 } 4390 4391 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4392 desc = &virtnet_stats_tx_csum_desc_qstat[0]; 4393 num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4394 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM) 4395 goto found; 4396 } 4397 4398 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4399 desc = &virtnet_stats_tx_gso_desc_qstat[0]; 4400 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4401 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4402 goto found; 4403 } 4404 4405 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4406 desc = &virtnet_stats_tx_speed_desc_qstat[0]; 4407 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4408 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4409 goto found; 4410 } 4411 4412 return; 4413 4414 found: 4415 for (i = 0; i < num; ++i) { 4416 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4417 v = (const __le64 *)(base + desc[i].offset); 4418 ctx->data[offset] = le64_to_cpu(*v); 4419 } 4420 } 4421 4422 /* virtnet_fill_stats - copy the stats to qstats or ethtool -S 4423 * The stats source is the device or the driver. 4424 * 4425 * @vi: virtio net info 4426 * @qid: the vq id 4427 * @ctx: stats ctx (initiated by virtnet_stats_ctx_init()) 4428 * @base: pointer to the device reply or the driver stats structure. 4429 * @drv_stats: designate the base type (device reply, driver stats) 4430 * @type: the type of the device reply (if drv_stats is true, this must be zero) 4431 */ 4432 static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid, 4433 struct virtnet_stats_ctx *ctx, 4434 const u8 *base, bool drv_stats, u8 reply_type) 4435 { 4436 u32 queue_type, num_rx, num_tx, num_cq; 4437 const struct virtnet_stat_desc *desc; 4438 const u64_stats_t *v_stat; 4439 u64 offset, bitmap; 4440 const __le64 *v; 4441 int i, num; 4442 4443 if (ctx->to_qstat) 4444 return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type); 4445 4446 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4447 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4448 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4449 4450 queue_type = vq_type(vi, qid); 4451 bitmap = ctx->bitmap[queue_type]; 4452 4453 /* skip the total fields of pairs */ 4454 offset = num_rx + num_tx; 4455 4456 if (queue_type == VIRTNET_Q_TYPE_TX) { 4457 offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2); 4458 4459 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4460 if (drv_stats) { 4461 desc = &virtnet_sq_stats_desc[0]; 4462 goto drv_stats; 4463 } 4464 4465 offset += num; 4466 4467 } else if (queue_type == VIRTNET_Q_TYPE_RX) { 4468 offset += num_cq + num_rx * (qid / 2); 4469 4470 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4471 if (drv_stats) { 4472 desc = &virtnet_rq_stats_desc[0]; 4473 goto drv_stats; 4474 } 4475 4476 offset += num; 4477 } 4478 4479 if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) { 4480 desc = &virtnet_stats_cvq_desc[0]; 4481 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4482 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ) 4483 goto found; 4484 4485 offset += num; 4486 } 4487 4488 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4489 desc = &virtnet_stats_rx_basic_desc[0]; 4490 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4491 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4492 goto found; 4493 4494 offset += num; 4495 } 4496 4497 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4498 desc = &virtnet_stats_rx_csum_desc[0]; 4499 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4500 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4501 goto found; 4502 4503 offset += num; 4504 } 4505 4506 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4507 desc = &virtnet_stats_rx_speed_desc[0]; 4508 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4509 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4510 goto found; 4511 4512 offset += num; 4513 } 4514 4515 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4516 desc = &virtnet_stats_tx_basic_desc[0]; 4517 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4518 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4519 goto found; 4520 4521 offset += num; 4522 } 4523 4524 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4525 desc = &virtnet_stats_tx_gso_desc[0]; 4526 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4527 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4528 goto found; 4529 4530 offset += num; 4531 } 4532 4533 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4534 desc = &virtnet_stats_tx_speed_desc[0]; 4535 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4536 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4537 goto found; 4538 4539 offset += num; 4540 } 4541 4542 return; 4543 4544 found: 4545 for (i = 0; i < num; ++i) { 4546 v = (const __le64 *)(base + desc[i].offset); 4547 ctx->data[offset + i] = le64_to_cpu(*v); 4548 } 4549 4550 return; 4551 4552 drv_stats: 4553 for (i = 0; i < num; ++i) { 4554 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4555 ctx->data[offset + i] = u64_stats_read(v_stat); 4556 } 4557 } 4558 4559 static int __virtnet_get_hw_stats(struct virtnet_info *vi, 4560 struct virtnet_stats_ctx *ctx, 4561 struct virtio_net_ctrl_queue_stats *req, 4562 int req_size, void *reply, int res_size) 4563 { 4564 struct virtio_net_stats_reply_hdr *hdr; 4565 struct scatterlist sgs_in, sgs_out; 4566 void *p; 4567 u32 qid; 4568 int ok; 4569 4570 sg_init_one(&sgs_out, req, req_size); 4571 sg_init_one(&sgs_in, reply, res_size); 4572 4573 ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 4574 VIRTIO_NET_CTRL_STATS_GET, 4575 &sgs_out, &sgs_in); 4576 4577 if (!ok) 4578 return ok; 4579 4580 for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) { 4581 hdr = p; 4582 qid = le16_to_cpu(hdr->vq_index); 4583 virtnet_fill_stats(vi, qid, ctx, p, false, hdr->type); 4584 } 4585 4586 return 0; 4587 } 4588 4589 static void virtnet_make_stat_req(struct virtnet_info *vi, 4590 struct virtnet_stats_ctx *ctx, 4591 struct virtio_net_ctrl_queue_stats *req, 4592 int qid, int *idx) 4593 { 4594 int qtype = vq_type(vi, qid); 4595 u64 bitmap = ctx->bitmap[qtype]; 4596 4597 if (!bitmap) 4598 return; 4599 4600 req->stats[*idx].vq_index = cpu_to_le16(qid); 4601 req->stats[*idx].types_bitmap[0] = cpu_to_le64(bitmap); 4602 *idx += 1; 4603 } 4604 4605 /* qid: -1: get stats of all vq. 4606 * > 0: get the stats for the special vq. This must not be cvq. 4607 */ 4608 static int virtnet_get_hw_stats(struct virtnet_info *vi, 4609 struct virtnet_stats_ctx *ctx, int qid) 4610 { 4611 int qnum, i, j, res_size, qtype, last_vq, first_vq; 4612 struct virtio_net_ctrl_queue_stats *req; 4613 bool enable_cvq; 4614 void *reply; 4615 int ok; 4616 4617 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) 4618 return 0; 4619 4620 if (qid == -1) { 4621 last_vq = vi->curr_queue_pairs * 2 - 1; 4622 first_vq = 0; 4623 enable_cvq = true; 4624 } else { 4625 last_vq = qid; 4626 first_vq = qid; 4627 enable_cvq = false; 4628 } 4629 4630 qnum = 0; 4631 res_size = 0; 4632 for (i = first_vq; i <= last_vq ; ++i) { 4633 qtype = vq_type(vi, i); 4634 if (ctx->bitmap[qtype]) { 4635 ++qnum; 4636 res_size += ctx->size[qtype]; 4637 } 4638 } 4639 4640 if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) { 4641 res_size += ctx->size[VIRTNET_Q_TYPE_CQ]; 4642 qnum += 1; 4643 } 4644 4645 req = kcalloc(qnum, sizeof(*req), GFP_KERNEL); 4646 if (!req) 4647 return -ENOMEM; 4648 4649 reply = kmalloc(res_size, GFP_KERNEL); 4650 if (!reply) { 4651 kfree(req); 4652 return -ENOMEM; 4653 } 4654 4655 j = 0; 4656 for (i = first_vq; i <= last_vq ; ++i) 4657 virtnet_make_stat_req(vi, ctx, req, i, &j); 4658 4659 if (enable_cvq) 4660 virtnet_make_stat_req(vi, ctx, req, vi->max_queue_pairs * 2, &j); 4661 4662 ok = __virtnet_get_hw_stats(vi, ctx, req, sizeof(*req) * j, reply, res_size); 4663 4664 kfree(req); 4665 kfree(reply); 4666 4667 return ok; 4668 } 4669 4670 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) 4671 { 4672 struct virtnet_info *vi = netdev_priv(dev); 4673 unsigned int i; 4674 u8 *p = data; 4675 4676 switch (stringset) { 4677 case ETH_SS_STATS: 4678 /* Generate the total field names. */ 4679 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, -1, &p); 4680 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, -1, &p); 4681 4682 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, 0, &p); 4683 4684 for (i = 0; i < vi->curr_queue_pairs; ++i) 4685 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, i, &p); 4686 4687 for (i = 0; i < vi->curr_queue_pairs; ++i) 4688 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, i, &p); 4689 break; 4690 } 4691 } 4692 4693 static int virtnet_get_sset_count(struct net_device *dev, int sset) 4694 { 4695 struct virtnet_info *vi = netdev_priv(dev); 4696 struct virtnet_stats_ctx ctx = {0}; 4697 u32 pair_count; 4698 4699 switch (sset) { 4700 case ETH_SS_STATS: 4701 virtnet_stats_ctx_init(vi, &ctx, NULL, false); 4702 4703 pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX]; 4704 4705 return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] + 4706 vi->curr_queue_pairs * pair_count; 4707 default: 4708 return -EOPNOTSUPP; 4709 } 4710 } 4711 4712 static void virtnet_get_ethtool_stats(struct net_device *dev, 4713 struct ethtool_stats *stats, u64 *data) 4714 { 4715 struct virtnet_info *vi = netdev_priv(dev); 4716 struct virtnet_stats_ctx ctx = {0}; 4717 unsigned int start, i; 4718 const u8 *stats_base; 4719 4720 virtnet_stats_ctx_init(vi, &ctx, data, false); 4721 if (virtnet_get_hw_stats(vi, &ctx, -1)) 4722 dev_warn(&vi->dev->dev, "Failed to get hw stats.\n"); 4723 4724 for (i = 0; i < vi->curr_queue_pairs; i++) { 4725 struct receive_queue *rq = &vi->rq[i]; 4726 struct send_queue *sq = &vi->sq[i]; 4727 4728 stats_base = (const u8 *)&rq->stats; 4729 do { 4730 start = u64_stats_fetch_begin(&rq->stats.syncp); 4731 virtnet_fill_stats(vi, i * 2, &ctx, stats_base, true, 0); 4732 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 4733 4734 stats_base = (const u8 *)&sq->stats; 4735 do { 4736 start = u64_stats_fetch_begin(&sq->stats.syncp); 4737 virtnet_fill_stats(vi, i * 2 + 1, &ctx, stats_base, true, 0); 4738 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 4739 } 4740 4741 virtnet_fill_total_fields(vi, &ctx); 4742 } 4743 4744 static void virtnet_get_channels(struct net_device *dev, 4745 struct ethtool_channels *channels) 4746 { 4747 struct virtnet_info *vi = netdev_priv(dev); 4748 4749 channels->combined_count = vi->curr_queue_pairs; 4750 channels->max_combined = vi->max_queue_pairs; 4751 channels->max_other = 0; 4752 channels->rx_count = 0; 4753 channels->tx_count = 0; 4754 channels->other_count = 0; 4755 } 4756 4757 static int virtnet_set_link_ksettings(struct net_device *dev, 4758 const struct ethtool_link_ksettings *cmd) 4759 { 4760 struct virtnet_info *vi = netdev_priv(dev); 4761 4762 return ethtool_virtdev_set_link_ksettings(dev, cmd, 4763 &vi->speed, &vi->duplex); 4764 } 4765 4766 static int virtnet_get_link_ksettings(struct net_device *dev, 4767 struct ethtool_link_ksettings *cmd) 4768 { 4769 struct virtnet_info *vi = netdev_priv(dev); 4770 4771 cmd->base.speed = vi->speed; 4772 cmd->base.duplex = vi->duplex; 4773 cmd->base.port = PORT_OTHER; 4774 4775 return 0; 4776 } 4777 4778 static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi, 4779 struct ethtool_coalesce *ec) 4780 { 4781 struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL; 4782 struct scatterlist sgs_tx; 4783 int i; 4784 4785 coal_tx = kzalloc(sizeof(*coal_tx), GFP_KERNEL); 4786 if (!coal_tx) 4787 return -ENOMEM; 4788 4789 coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); 4790 coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); 4791 sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx)); 4792 4793 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 4794 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, 4795 &sgs_tx)) 4796 return -EINVAL; 4797 4798 vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; 4799 vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; 4800 for (i = 0; i < vi->max_queue_pairs; i++) { 4801 vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; 4802 vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; 4803 } 4804 4805 return 0; 4806 } 4807 4808 static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi, 4809 struct ethtool_coalesce *ec) 4810 { 4811 struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL; 4812 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 4813 struct scatterlist sgs_rx; 4814 int i; 4815 4816 if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4817 return -EOPNOTSUPP; 4818 4819 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs || 4820 ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets)) 4821 return -EINVAL; 4822 4823 if (rx_ctrl_dim_on && !vi->rx_dim_enabled) { 4824 vi->rx_dim_enabled = true; 4825 for (i = 0; i < vi->max_queue_pairs; i++) { 4826 mutex_lock(&vi->rq[i].dim_lock); 4827 vi->rq[i].dim_enabled = true; 4828 mutex_unlock(&vi->rq[i].dim_lock); 4829 } 4830 return 0; 4831 } 4832 4833 coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL); 4834 if (!coal_rx) 4835 return -ENOMEM; 4836 4837 if (!rx_ctrl_dim_on && vi->rx_dim_enabled) { 4838 vi->rx_dim_enabled = false; 4839 for (i = 0; i < vi->max_queue_pairs; i++) { 4840 mutex_lock(&vi->rq[i].dim_lock); 4841 vi->rq[i].dim_enabled = false; 4842 mutex_unlock(&vi->rq[i].dim_lock); 4843 } 4844 } 4845 4846 /* Since the per-queue coalescing params can be set, 4847 * we need apply the global new params even if they 4848 * are not updated. 4849 */ 4850 coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); 4851 coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); 4852 sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx)); 4853 4854 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 4855 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, 4856 &sgs_rx)) 4857 return -EINVAL; 4858 4859 vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; 4860 vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; 4861 for (i = 0; i < vi->max_queue_pairs; i++) { 4862 mutex_lock(&vi->rq[i].dim_lock); 4863 vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; 4864 vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; 4865 mutex_unlock(&vi->rq[i].dim_lock); 4866 } 4867 4868 return 0; 4869 } 4870 4871 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, 4872 struct ethtool_coalesce *ec) 4873 { 4874 int err; 4875 4876 err = virtnet_send_tx_notf_coal_cmds(vi, ec); 4877 if (err) 4878 return err; 4879 4880 err = virtnet_send_rx_notf_coal_cmds(vi, ec); 4881 if (err) 4882 return err; 4883 4884 return 0; 4885 } 4886 4887 static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi, 4888 struct ethtool_coalesce *ec, 4889 u16 queue) 4890 { 4891 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 4892 u32 max_usecs, max_packets; 4893 bool cur_rx_dim; 4894 int err; 4895 4896 mutex_lock(&vi->rq[queue].dim_lock); 4897 cur_rx_dim = vi->rq[queue].dim_enabled; 4898 max_usecs = vi->rq[queue].intr_coal.max_usecs; 4899 max_packets = vi->rq[queue].intr_coal.max_packets; 4900 4901 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs || 4902 ec->rx_max_coalesced_frames != max_packets)) { 4903 mutex_unlock(&vi->rq[queue].dim_lock); 4904 return -EINVAL; 4905 } 4906 4907 if (rx_ctrl_dim_on && !cur_rx_dim) { 4908 vi->rq[queue].dim_enabled = true; 4909 mutex_unlock(&vi->rq[queue].dim_lock); 4910 return 0; 4911 } 4912 4913 if (!rx_ctrl_dim_on && cur_rx_dim) 4914 vi->rq[queue].dim_enabled = false; 4915 4916 /* If no params are updated, userspace ethtool will 4917 * reject the modification. 4918 */ 4919 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue, 4920 ec->rx_coalesce_usecs, 4921 ec->rx_max_coalesced_frames); 4922 mutex_unlock(&vi->rq[queue].dim_lock); 4923 return err; 4924 } 4925 4926 static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, 4927 struct ethtool_coalesce *ec, 4928 u16 queue) 4929 { 4930 int err; 4931 4932 err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue); 4933 if (err) 4934 return err; 4935 4936 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue, 4937 ec->tx_coalesce_usecs, 4938 ec->tx_max_coalesced_frames); 4939 if (err) 4940 return err; 4941 4942 return 0; 4943 } 4944 4945 static void virtnet_rx_dim_work(struct work_struct *work) 4946 { 4947 struct dim *dim = container_of(work, struct dim, work); 4948 struct receive_queue *rq = container_of(dim, 4949 struct receive_queue, dim); 4950 struct virtnet_info *vi = rq->vq->vdev->priv; 4951 struct net_device *dev = vi->dev; 4952 struct dim_cq_moder update_moder; 4953 int qnum, err; 4954 4955 qnum = rq - vi->rq; 4956 4957 mutex_lock(&rq->dim_lock); 4958 if (!rq->dim_enabled) 4959 goto out; 4960 4961 update_moder = net_dim_get_rx_irq_moder(dev, dim); 4962 if (update_moder.usec != rq->intr_coal.max_usecs || 4963 update_moder.pkts != rq->intr_coal.max_packets) { 4964 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum, 4965 update_moder.usec, 4966 update_moder.pkts); 4967 if (err) 4968 pr_debug("%s: Failed to send dim parameters on rxq%d\n", 4969 dev->name, qnum); 4970 } 4971 out: 4972 dim->state = DIM_START_MEASURE; 4973 mutex_unlock(&rq->dim_lock); 4974 } 4975 4976 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) 4977 { 4978 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL 4979 * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated. 4980 */ 4981 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) 4982 return -EOPNOTSUPP; 4983 4984 if (ec->tx_max_coalesced_frames > 1 || 4985 ec->rx_max_coalesced_frames != 1) 4986 return -EINVAL; 4987 4988 return 0; 4989 } 4990 4991 static int virtnet_should_update_vq_weight(int dev_flags, int weight, 4992 int vq_weight, bool *should_update) 4993 { 4994 if (weight ^ vq_weight) { 4995 if (dev_flags & IFF_UP) 4996 return -EBUSY; 4997 *should_update = true; 4998 } 4999 5000 return 0; 5001 } 5002 5003 static int virtnet_set_coalesce(struct net_device *dev, 5004 struct ethtool_coalesce *ec, 5005 struct kernel_ethtool_coalesce *kernel_coal, 5006 struct netlink_ext_ack *extack) 5007 { 5008 struct virtnet_info *vi = netdev_priv(dev); 5009 int ret, queue_number, napi_weight; 5010 bool update_napi = false; 5011 5012 /* Can't change NAPI weight if the link is up */ 5013 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5014 for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) { 5015 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5016 vi->sq[queue_number].napi.weight, 5017 &update_napi); 5018 if (ret) 5019 return ret; 5020 5021 if (update_napi) { 5022 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be 5023 * updated for the sake of simplicity, which might not be necessary 5024 */ 5025 break; 5026 } 5027 } 5028 5029 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) 5030 ret = virtnet_send_notf_coal_cmds(vi, ec); 5031 else 5032 ret = virtnet_coal_params_supported(ec); 5033 5034 if (ret) 5035 return ret; 5036 5037 if (update_napi) { 5038 for (; queue_number < vi->max_queue_pairs; queue_number++) 5039 vi->sq[queue_number].napi.weight = napi_weight; 5040 } 5041 5042 return ret; 5043 } 5044 5045 static int virtnet_get_coalesce(struct net_device *dev, 5046 struct ethtool_coalesce *ec, 5047 struct kernel_ethtool_coalesce *kernel_coal, 5048 struct netlink_ext_ack *extack) 5049 { 5050 struct virtnet_info *vi = netdev_priv(dev); 5051 5052 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 5053 ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs; 5054 ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs; 5055 ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets; 5056 ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets; 5057 ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled; 5058 } else { 5059 ec->rx_max_coalesced_frames = 1; 5060 5061 if (vi->sq[0].napi.weight) 5062 ec->tx_max_coalesced_frames = 1; 5063 } 5064 5065 return 0; 5066 } 5067 5068 static int virtnet_set_per_queue_coalesce(struct net_device *dev, 5069 u32 queue, 5070 struct ethtool_coalesce *ec) 5071 { 5072 struct virtnet_info *vi = netdev_priv(dev); 5073 int ret, napi_weight; 5074 bool update_napi = false; 5075 5076 if (queue >= vi->max_queue_pairs) 5077 return -EINVAL; 5078 5079 /* Can't change NAPI weight if the link is up */ 5080 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5081 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5082 vi->sq[queue].napi.weight, 5083 &update_napi); 5084 if (ret) 5085 return ret; 5086 5087 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5088 ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue); 5089 else 5090 ret = virtnet_coal_params_supported(ec); 5091 5092 if (ret) 5093 return ret; 5094 5095 if (update_napi) 5096 vi->sq[queue].napi.weight = napi_weight; 5097 5098 return 0; 5099 } 5100 5101 static int virtnet_get_per_queue_coalesce(struct net_device *dev, 5102 u32 queue, 5103 struct ethtool_coalesce *ec) 5104 { 5105 struct virtnet_info *vi = netdev_priv(dev); 5106 5107 if (queue >= vi->max_queue_pairs) 5108 return -EINVAL; 5109 5110 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 5111 mutex_lock(&vi->rq[queue].dim_lock); 5112 ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs; 5113 ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs; 5114 ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets; 5115 ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets; 5116 ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled; 5117 mutex_unlock(&vi->rq[queue].dim_lock); 5118 } else { 5119 ec->rx_max_coalesced_frames = 1; 5120 5121 if (vi->sq[queue].napi.weight) 5122 ec->tx_max_coalesced_frames = 1; 5123 } 5124 5125 return 0; 5126 } 5127 5128 static void virtnet_init_settings(struct net_device *dev) 5129 { 5130 struct virtnet_info *vi = netdev_priv(dev); 5131 5132 vi->speed = SPEED_UNKNOWN; 5133 vi->duplex = DUPLEX_UNKNOWN; 5134 } 5135 5136 static u32 virtnet_get_rxfh_key_size(struct net_device *dev) 5137 { 5138 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; 5139 } 5140 5141 static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) 5142 { 5143 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; 5144 } 5145 5146 static int virtnet_get_rxfh(struct net_device *dev, 5147 struct ethtool_rxfh_param *rxfh) 5148 { 5149 struct virtnet_info *vi = netdev_priv(dev); 5150 int i; 5151 5152 if (rxfh->indir) { 5153 for (i = 0; i < vi->rss_indir_table_size; ++i) 5154 rxfh->indir[i] = vi->rss.indirection_table[i]; 5155 } 5156 5157 if (rxfh->key) 5158 memcpy(rxfh->key, vi->rss.key, vi->rss_key_size); 5159 5160 rxfh->hfunc = ETH_RSS_HASH_TOP; 5161 5162 return 0; 5163 } 5164 5165 static int virtnet_set_rxfh(struct net_device *dev, 5166 struct ethtool_rxfh_param *rxfh, 5167 struct netlink_ext_ack *extack) 5168 { 5169 struct virtnet_info *vi = netdev_priv(dev); 5170 bool update = false; 5171 int i; 5172 5173 if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && 5174 rxfh->hfunc != ETH_RSS_HASH_TOP) 5175 return -EOPNOTSUPP; 5176 5177 if (rxfh->indir) { 5178 if (!vi->has_rss) 5179 return -EOPNOTSUPP; 5180 5181 for (i = 0; i < vi->rss_indir_table_size; ++i) 5182 vi->rss.indirection_table[i] = rxfh->indir[i]; 5183 update = true; 5184 } 5185 5186 if (rxfh->key) { 5187 /* If either _F_HASH_REPORT or _F_RSS are negotiated, the 5188 * device provides hash calculation capabilities, that is, 5189 * hash_key is configured. 5190 */ 5191 if (!vi->has_rss && !vi->has_rss_hash_report) 5192 return -EOPNOTSUPP; 5193 5194 memcpy(vi->rss.key, rxfh->key, vi->rss_key_size); 5195 update = true; 5196 } 5197 5198 if (update) 5199 virtnet_commit_rss_command(vi); 5200 5201 return 0; 5202 } 5203 5204 static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) 5205 { 5206 struct virtnet_info *vi = netdev_priv(dev); 5207 int rc = 0; 5208 5209 switch (info->cmd) { 5210 case ETHTOOL_GRXRINGS: 5211 info->data = vi->curr_queue_pairs; 5212 break; 5213 case ETHTOOL_GRXFH: 5214 virtnet_get_hashflow(vi, info); 5215 break; 5216 default: 5217 rc = -EOPNOTSUPP; 5218 } 5219 5220 return rc; 5221 } 5222 5223 static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) 5224 { 5225 struct virtnet_info *vi = netdev_priv(dev); 5226 int rc = 0; 5227 5228 switch (info->cmd) { 5229 case ETHTOOL_SRXFH: 5230 if (!virtnet_set_hashflow(vi, info)) 5231 rc = -EINVAL; 5232 5233 break; 5234 default: 5235 rc = -EOPNOTSUPP; 5236 } 5237 5238 return rc; 5239 } 5240 5241 static const struct ethtool_ops virtnet_ethtool_ops = { 5242 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | 5243 ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, 5244 .get_drvinfo = virtnet_get_drvinfo, 5245 .get_link = ethtool_op_get_link, 5246 .get_ringparam = virtnet_get_ringparam, 5247 .set_ringparam = virtnet_set_ringparam, 5248 .get_strings = virtnet_get_strings, 5249 .get_sset_count = virtnet_get_sset_count, 5250 .get_ethtool_stats = virtnet_get_ethtool_stats, 5251 .set_channels = virtnet_set_channels, 5252 .get_channels = virtnet_get_channels, 5253 .get_ts_info = ethtool_op_get_ts_info, 5254 .get_link_ksettings = virtnet_get_link_ksettings, 5255 .set_link_ksettings = virtnet_set_link_ksettings, 5256 .set_coalesce = virtnet_set_coalesce, 5257 .get_coalesce = virtnet_get_coalesce, 5258 .set_per_queue_coalesce = virtnet_set_per_queue_coalesce, 5259 .get_per_queue_coalesce = virtnet_get_per_queue_coalesce, 5260 .get_rxfh_key_size = virtnet_get_rxfh_key_size, 5261 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, 5262 .get_rxfh = virtnet_get_rxfh, 5263 .set_rxfh = virtnet_set_rxfh, 5264 .get_rxnfc = virtnet_get_rxnfc, 5265 .set_rxnfc = virtnet_set_rxnfc, 5266 }; 5267 5268 static void virtnet_get_queue_stats_rx(struct net_device *dev, int i, 5269 struct netdev_queue_stats_rx *stats) 5270 { 5271 struct virtnet_info *vi = netdev_priv(dev); 5272 struct receive_queue *rq = &vi->rq[i]; 5273 struct virtnet_stats_ctx ctx = {0}; 5274 5275 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5276 5277 virtnet_get_hw_stats(vi, &ctx, i * 2); 5278 virtnet_fill_stats(vi, i * 2, &ctx, (void *)&rq->stats, true, 0); 5279 } 5280 5281 static void virtnet_get_queue_stats_tx(struct net_device *dev, int i, 5282 struct netdev_queue_stats_tx *stats) 5283 { 5284 struct virtnet_info *vi = netdev_priv(dev); 5285 struct send_queue *sq = &vi->sq[i]; 5286 struct virtnet_stats_ctx ctx = {0}; 5287 5288 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5289 5290 virtnet_get_hw_stats(vi, &ctx, i * 2 + 1); 5291 virtnet_fill_stats(vi, i * 2 + 1, &ctx, (void *)&sq->stats, true, 0); 5292 } 5293 5294 static void virtnet_get_base_stats(struct net_device *dev, 5295 struct netdev_queue_stats_rx *rx, 5296 struct netdev_queue_stats_tx *tx) 5297 { 5298 struct virtnet_info *vi = netdev_priv(dev); 5299 5300 /* The queue stats of the virtio-net will not be reset. So here we 5301 * return 0. 5302 */ 5303 rx->bytes = 0; 5304 rx->packets = 0; 5305 5306 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 5307 rx->hw_drops = 0; 5308 rx->hw_drop_overruns = 0; 5309 } 5310 5311 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 5312 rx->csum_unnecessary = 0; 5313 rx->csum_none = 0; 5314 rx->csum_bad = 0; 5315 } 5316 5317 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 5318 rx->hw_gro_packets = 0; 5319 rx->hw_gro_bytes = 0; 5320 rx->hw_gro_wire_packets = 0; 5321 rx->hw_gro_wire_bytes = 0; 5322 } 5323 5324 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) 5325 rx->hw_drop_ratelimits = 0; 5326 5327 tx->bytes = 0; 5328 tx->packets = 0; 5329 tx->stop = 0; 5330 tx->wake = 0; 5331 5332 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 5333 tx->hw_drops = 0; 5334 tx->hw_drop_errors = 0; 5335 } 5336 5337 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 5338 tx->csum_none = 0; 5339 tx->needs_csum = 0; 5340 } 5341 5342 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 5343 tx->hw_gso_packets = 0; 5344 tx->hw_gso_bytes = 0; 5345 tx->hw_gso_wire_packets = 0; 5346 tx->hw_gso_wire_bytes = 0; 5347 } 5348 5349 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) 5350 tx->hw_drop_ratelimits = 0; 5351 } 5352 5353 static const struct netdev_stat_ops virtnet_stat_ops = { 5354 .get_queue_stats_rx = virtnet_get_queue_stats_rx, 5355 .get_queue_stats_tx = virtnet_get_queue_stats_tx, 5356 .get_base_stats = virtnet_get_base_stats, 5357 }; 5358 5359 static void virtnet_freeze_down(struct virtio_device *vdev) 5360 { 5361 struct virtnet_info *vi = vdev->priv; 5362 5363 /* Make sure no work handler is accessing the device */ 5364 flush_work(&vi->config_work); 5365 disable_rx_mode_work(vi); 5366 flush_work(&vi->rx_mode_work); 5367 5368 netif_tx_lock_bh(vi->dev); 5369 netif_device_detach(vi->dev); 5370 netif_tx_unlock_bh(vi->dev); 5371 if (netif_running(vi->dev)) 5372 virtnet_close(vi->dev); 5373 } 5374 5375 static int init_vqs(struct virtnet_info *vi); 5376 5377 static int virtnet_restore_up(struct virtio_device *vdev) 5378 { 5379 struct virtnet_info *vi = vdev->priv; 5380 int err; 5381 5382 err = init_vqs(vi); 5383 if (err) 5384 return err; 5385 5386 virtio_device_ready(vdev); 5387 5388 enable_delayed_refill(vi); 5389 enable_rx_mode_work(vi); 5390 5391 if (netif_running(vi->dev)) { 5392 err = virtnet_open(vi->dev); 5393 if (err) 5394 return err; 5395 } 5396 5397 netif_tx_lock_bh(vi->dev); 5398 netif_device_attach(vi->dev); 5399 netif_tx_unlock_bh(vi->dev); 5400 return err; 5401 } 5402 5403 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) 5404 { 5405 __virtio64 *_offloads __free(kfree) = NULL; 5406 struct scatterlist sg; 5407 5408 _offloads = kzalloc(sizeof(*_offloads), GFP_KERNEL); 5409 if (!_offloads) 5410 return -ENOMEM; 5411 5412 *_offloads = cpu_to_virtio64(vi->vdev, offloads); 5413 5414 sg_init_one(&sg, _offloads, sizeof(*_offloads)); 5415 5416 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, 5417 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { 5418 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); 5419 return -EINVAL; 5420 } 5421 5422 return 0; 5423 } 5424 5425 static int virtnet_clear_guest_offloads(struct virtnet_info *vi) 5426 { 5427 u64 offloads = 0; 5428 5429 if (!vi->guest_offloads) 5430 return 0; 5431 5432 return virtnet_set_guest_offloads(vi, offloads); 5433 } 5434 5435 static int virtnet_restore_guest_offloads(struct virtnet_info *vi) 5436 { 5437 u64 offloads = vi->guest_offloads; 5438 5439 if (!vi->guest_offloads) 5440 return 0; 5441 5442 return virtnet_set_guest_offloads(vi, offloads); 5443 } 5444 5445 static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq, 5446 struct xsk_buff_pool *pool) 5447 { 5448 int err, qindex; 5449 5450 qindex = rq - vi->rq; 5451 5452 if (pool) { 5453 err = xdp_rxq_info_reg(&rq->xsk_rxq_info, vi->dev, qindex, rq->napi.napi_id); 5454 if (err < 0) 5455 return err; 5456 5457 err = xdp_rxq_info_reg_mem_model(&rq->xsk_rxq_info, 5458 MEM_TYPE_XSK_BUFF_POOL, NULL); 5459 if (err < 0) 5460 goto unreg; 5461 5462 xsk_pool_set_rxq_info(pool, &rq->xsk_rxq_info); 5463 } 5464 5465 virtnet_rx_pause(vi, rq); 5466 5467 err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf); 5468 if (err) { 5469 netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err); 5470 5471 pool = NULL; 5472 } 5473 5474 rq->xsk_pool = pool; 5475 5476 virtnet_rx_resume(vi, rq); 5477 5478 if (pool) 5479 return 0; 5480 5481 unreg: 5482 xdp_rxq_info_unreg(&rq->xsk_rxq_info); 5483 return err; 5484 } 5485 5486 static int virtnet_xsk_pool_enable(struct net_device *dev, 5487 struct xsk_buff_pool *pool, 5488 u16 qid) 5489 { 5490 struct virtnet_info *vi = netdev_priv(dev); 5491 struct receive_queue *rq; 5492 struct device *dma_dev; 5493 struct send_queue *sq; 5494 int err, size; 5495 5496 if (vi->hdr_len > xsk_pool_get_headroom(pool)) 5497 return -EINVAL; 5498 5499 /* In big_packets mode, xdp cannot work, so there is no need to 5500 * initialize xsk of rq. 5501 */ 5502 if (vi->big_packets && !vi->mergeable_rx_bufs) 5503 return -ENOENT; 5504 5505 if (qid >= vi->curr_queue_pairs) 5506 return -EINVAL; 5507 5508 sq = &vi->sq[qid]; 5509 rq = &vi->rq[qid]; 5510 5511 /* xsk assumes that tx and rx must have the same dma device. The af-xdp 5512 * may use one buffer to receive from the rx and reuse this buffer to 5513 * send by the tx. So the dma dev of sq and rq must be the same one. 5514 * 5515 * But vq->dma_dev allows every vq has the respective dma dev. So I 5516 * check the dma dev of vq and sq is the same dev. 5517 */ 5518 if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq)) 5519 return -EINVAL; 5520 5521 dma_dev = virtqueue_dma_dev(rq->vq); 5522 if (!dma_dev) 5523 return -EINVAL; 5524 5525 size = virtqueue_get_vring_size(rq->vq); 5526 5527 rq->xsk_buffs = kvcalloc(size, sizeof(*rq->xsk_buffs), GFP_KERNEL); 5528 if (!rq->xsk_buffs) 5529 return -ENOMEM; 5530 5531 err = xsk_pool_dma_map(pool, dma_dev, 0); 5532 if (err) 5533 goto err_xsk_map; 5534 5535 err = virtnet_rq_bind_xsk_pool(vi, rq, pool); 5536 if (err) 5537 goto err_rq; 5538 5539 return 0; 5540 5541 err_rq: 5542 xsk_pool_dma_unmap(pool, 0); 5543 err_xsk_map: 5544 return err; 5545 } 5546 5547 static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid) 5548 { 5549 struct virtnet_info *vi = netdev_priv(dev); 5550 struct xsk_buff_pool *pool; 5551 struct receive_queue *rq; 5552 int err; 5553 5554 if (qid >= vi->curr_queue_pairs) 5555 return -EINVAL; 5556 5557 rq = &vi->rq[qid]; 5558 5559 pool = rq->xsk_pool; 5560 5561 err = virtnet_rq_bind_xsk_pool(vi, rq, NULL); 5562 5563 xsk_pool_dma_unmap(pool, 0); 5564 5565 kvfree(rq->xsk_buffs); 5566 5567 return err; 5568 } 5569 5570 static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp) 5571 { 5572 if (xdp->xsk.pool) 5573 return virtnet_xsk_pool_enable(dev, xdp->xsk.pool, 5574 xdp->xsk.queue_id); 5575 else 5576 return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id); 5577 } 5578 5579 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 5580 struct netlink_ext_ack *extack) 5581 { 5582 unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 5583 sizeof(struct skb_shared_info)); 5584 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; 5585 struct virtnet_info *vi = netdev_priv(dev); 5586 struct bpf_prog *old_prog; 5587 u16 xdp_qp = 0, curr_qp; 5588 int i, err; 5589 5590 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 5591 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 5592 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 5593 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 5594 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 5595 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || 5596 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || 5597 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { 5598 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); 5599 return -EOPNOTSUPP; 5600 } 5601 5602 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 5603 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 5604 return -EINVAL; 5605 } 5606 5607 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { 5608 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); 5609 netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); 5610 return -EINVAL; 5611 } 5612 5613 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; 5614 if (prog) 5615 xdp_qp = nr_cpu_ids; 5616 5617 /* XDP requires extra queues for XDP_TX */ 5618 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 5619 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", 5620 curr_qp + xdp_qp, vi->max_queue_pairs); 5621 xdp_qp = 0; 5622 } 5623 5624 old_prog = rtnl_dereference(vi->rq[0].xdp_prog); 5625 if (!prog && !old_prog) 5626 return 0; 5627 5628 if (prog) 5629 bpf_prog_add(prog, vi->max_queue_pairs - 1); 5630 5631 /* Make sure NAPI is not using any XDP TX queues for RX. */ 5632 if (netif_running(dev)) { 5633 for (i = 0; i < vi->max_queue_pairs; i++) { 5634 napi_disable(&vi->rq[i].napi); 5635 virtnet_napi_tx_disable(&vi->sq[i].napi); 5636 } 5637 } 5638 5639 if (!prog) { 5640 for (i = 0; i < vi->max_queue_pairs; i++) { 5641 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 5642 if (i == 0) 5643 virtnet_restore_guest_offloads(vi); 5644 } 5645 synchronize_net(); 5646 } 5647 5648 err = virtnet_set_queues(vi, curr_qp + xdp_qp); 5649 if (err) 5650 goto err; 5651 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 5652 vi->xdp_queue_pairs = xdp_qp; 5653 5654 if (prog) { 5655 vi->xdp_enabled = true; 5656 for (i = 0; i < vi->max_queue_pairs; i++) { 5657 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 5658 if (i == 0 && !old_prog) 5659 virtnet_clear_guest_offloads(vi); 5660 } 5661 if (!old_prog) 5662 xdp_features_set_redirect_target(dev, true); 5663 } else { 5664 xdp_features_clear_redirect_target(dev); 5665 vi->xdp_enabled = false; 5666 } 5667 5668 for (i = 0; i < vi->max_queue_pairs; i++) { 5669 if (old_prog) 5670 bpf_prog_put(old_prog); 5671 if (netif_running(dev)) { 5672 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 5673 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 5674 &vi->sq[i].napi); 5675 } 5676 } 5677 5678 return 0; 5679 5680 err: 5681 if (!prog) { 5682 virtnet_clear_guest_offloads(vi); 5683 for (i = 0; i < vi->max_queue_pairs; i++) 5684 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); 5685 } 5686 5687 if (netif_running(dev)) { 5688 for (i = 0; i < vi->max_queue_pairs; i++) { 5689 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 5690 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 5691 &vi->sq[i].napi); 5692 } 5693 } 5694 if (prog) 5695 bpf_prog_sub(prog, vi->max_queue_pairs - 1); 5696 return err; 5697 } 5698 5699 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) 5700 { 5701 switch (xdp->command) { 5702 case XDP_SETUP_PROG: 5703 return virtnet_xdp_set(dev, xdp->prog, xdp->extack); 5704 case XDP_SETUP_XSK_POOL: 5705 return virtnet_xsk_pool_setup(dev, xdp); 5706 default: 5707 return -EINVAL; 5708 } 5709 } 5710 5711 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, 5712 size_t len) 5713 { 5714 struct virtnet_info *vi = netdev_priv(dev); 5715 int ret; 5716 5717 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 5718 return -EOPNOTSUPP; 5719 5720 ret = snprintf(buf, len, "sby"); 5721 if (ret >= len) 5722 return -EOPNOTSUPP; 5723 5724 return 0; 5725 } 5726 5727 static int virtnet_set_features(struct net_device *dev, 5728 netdev_features_t features) 5729 { 5730 struct virtnet_info *vi = netdev_priv(dev); 5731 u64 offloads; 5732 int err; 5733 5734 if ((dev->features ^ features) & NETIF_F_GRO_HW) { 5735 if (vi->xdp_enabled) 5736 return -EBUSY; 5737 5738 if (features & NETIF_F_GRO_HW) 5739 offloads = vi->guest_offloads_capable; 5740 else 5741 offloads = vi->guest_offloads_capable & 5742 ~GUEST_OFFLOAD_GRO_HW_MASK; 5743 5744 err = virtnet_set_guest_offloads(vi, offloads); 5745 if (err) 5746 return err; 5747 vi->guest_offloads = offloads; 5748 } 5749 5750 if ((dev->features ^ features) & NETIF_F_RXHASH) { 5751 if (features & NETIF_F_RXHASH) 5752 vi->rss.hash_types = vi->rss_hash_types_saved; 5753 else 5754 vi->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE; 5755 5756 if (!virtnet_commit_rss_command(vi)) 5757 return -EINVAL; 5758 } 5759 5760 return 0; 5761 } 5762 5763 static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) 5764 { 5765 struct virtnet_info *priv = netdev_priv(dev); 5766 struct send_queue *sq = &priv->sq[txqueue]; 5767 struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); 5768 5769 u64_stats_update_begin(&sq->stats.syncp); 5770 u64_stats_inc(&sq->stats.tx_timeouts); 5771 u64_stats_update_end(&sq->stats.syncp); 5772 5773 netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", 5774 txqueue, sq->name, sq->vq->index, sq->vq->name, 5775 jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); 5776 } 5777 5778 static int virtnet_init_irq_moder(struct virtnet_info *vi) 5779 { 5780 u8 profile_flags = 0, coal_flags = 0; 5781 int ret, i; 5782 5783 profile_flags |= DIM_PROFILE_RX; 5784 coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS; 5785 ret = net_dim_init_irq_moder(vi->dev, profile_flags, coal_flags, 5786 DIM_CQ_PERIOD_MODE_START_FROM_EQE, 5787 0, virtnet_rx_dim_work, NULL); 5788 5789 if (ret) 5790 return ret; 5791 5792 for (i = 0; i < vi->max_queue_pairs; i++) 5793 net_dim_setting(vi->dev, &vi->rq[i].dim, false); 5794 5795 return 0; 5796 } 5797 5798 static void virtnet_free_irq_moder(struct virtnet_info *vi) 5799 { 5800 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5801 return; 5802 5803 rtnl_lock(); 5804 net_dim_free_irq_moder(vi->dev); 5805 rtnl_unlock(); 5806 } 5807 5808 static const struct net_device_ops virtnet_netdev = { 5809 .ndo_open = virtnet_open, 5810 .ndo_stop = virtnet_close, 5811 .ndo_start_xmit = start_xmit, 5812 .ndo_validate_addr = eth_validate_addr, 5813 .ndo_set_mac_address = virtnet_set_mac_address, 5814 .ndo_set_rx_mode = virtnet_set_rx_mode, 5815 .ndo_get_stats64 = virtnet_stats, 5816 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 5817 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 5818 .ndo_bpf = virtnet_xdp, 5819 .ndo_xdp_xmit = virtnet_xdp_xmit, 5820 .ndo_xsk_wakeup = virtnet_xsk_wakeup, 5821 .ndo_features_check = passthru_features_check, 5822 .ndo_get_phys_port_name = virtnet_get_phys_port_name, 5823 .ndo_set_features = virtnet_set_features, 5824 .ndo_tx_timeout = virtnet_tx_timeout, 5825 }; 5826 5827 static void virtnet_config_changed_work(struct work_struct *work) 5828 { 5829 struct virtnet_info *vi = 5830 container_of(work, struct virtnet_info, config_work); 5831 u16 v; 5832 5833 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, 5834 struct virtio_net_config, status, &v) < 0) 5835 return; 5836 5837 if (v & VIRTIO_NET_S_ANNOUNCE) { 5838 netdev_notify_peers(vi->dev); 5839 virtnet_ack_link_announce(vi); 5840 } 5841 5842 /* Ignore unknown (future) status bits */ 5843 v &= VIRTIO_NET_S_LINK_UP; 5844 5845 if (vi->status == v) 5846 return; 5847 5848 vi->status = v; 5849 5850 if (vi->status & VIRTIO_NET_S_LINK_UP) { 5851 virtnet_update_settings(vi); 5852 netif_carrier_on(vi->dev); 5853 netif_tx_wake_all_queues(vi->dev); 5854 } else { 5855 netif_carrier_off(vi->dev); 5856 netif_tx_stop_all_queues(vi->dev); 5857 } 5858 } 5859 5860 static void virtnet_config_changed(struct virtio_device *vdev) 5861 { 5862 struct virtnet_info *vi = vdev->priv; 5863 5864 schedule_work(&vi->config_work); 5865 } 5866 5867 static void virtnet_free_queues(struct virtnet_info *vi) 5868 { 5869 int i; 5870 5871 for (i = 0; i < vi->max_queue_pairs; i++) { 5872 __netif_napi_del(&vi->rq[i].napi); 5873 __netif_napi_del(&vi->sq[i].napi); 5874 } 5875 5876 /* We called __netif_napi_del(), 5877 * we need to respect an RCU grace period before freeing vi->rq 5878 */ 5879 synchronize_net(); 5880 5881 kfree(vi->rq); 5882 kfree(vi->sq); 5883 kfree(vi->ctrl); 5884 } 5885 5886 static void _free_receive_bufs(struct virtnet_info *vi) 5887 { 5888 struct bpf_prog *old_prog; 5889 int i; 5890 5891 for (i = 0; i < vi->max_queue_pairs; i++) { 5892 while (vi->rq[i].pages) 5893 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); 5894 5895 old_prog = rtnl_dereference(vi->rq[i].xdp_prog); 5896 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); 5897 if (old_prog) 5898 bpf_prog_put(old_prog); 5899 } 5900 } 5901 5902 static void free_receive_bufs(struct virtnet_info *vi) 5903 { 5904 rtnl_lock(); 5905 _free_receive_bufs(vi); 5906 rtnl_unlock(); 5907 } 5908 5909 static void free_receive_page_frags(struct virtnet_info *vi) 5910 { 5911 int i; 5912 for (i = 0; i < vi->max_queue_pairs; i++) 5913 if (vi->rq[i].alloc_frag.page) { 5914 if (vi->rq[i].last_dma) 5915 virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); 5916 put_page(vi->rq[i].alloc_frag.page); 5917 } 5918 } 5919 5920 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) 5921 { 5922 if (!is_xdp_frame(buf)) 5923 dev_kfree_skb(buf); 5924 else 5925 xdp_return_frame(ptr_to_xdp(buf)); 5926 } 5927 5928 static void free_unused_bufs(struct virtnet_info *vi) 5929 { 5930 void *buf; 5931 int i; 5932 5933 for (i = 0; i < vi->max_queue_pairs; i++) { 5934 struct virtqueue *vq = vi->sq[i].vq; 5935 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 5936 virtnet_sq_free_unused_buf(vq, buf); 5937 cond_resched(); 5938 } 5939 5940 for (i = 0; i < vi->max_queue_pairs; i++) { 5941 struct virtqueue *vq = vi->rq[i].vq; 5942 5943 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 5944 virtnet_rq_unmap_free_buf(vq, buf); 5945 cond_resched(); 5946 } 5947 } 5948 5949 static void virtnet_del_vqs(struct virtnet_info *vi) 5950 { 5951 struct virtio_device *vdev = vi->vdev; 5952 5953 virtnet_clean_affinity(vi); 5954 5955 vdev->config->del_vqs(vdev); 5956 5957 virtnet_free_queues(vi); 5958 } 5959 5960 /* How large should a single buffer be so a queue full of these can fit at 5961 * least one full packet? 5962 * Logic below assumes the mergeable buffer header is used. 5963 */ 5964 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) 5965 { 5966 const unsigned int hdr_len = vi->hdr_len; 5967 unsigned int rq_size = virtqueue_get_vring_size(vq); 5968 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; 5969 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; 5970 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); 5971 5972 return max(max(min_buf_len, hdr_len) - hdr_len, 5973 (unsigned int)GOOD_PACKET_LEN); 5974 } 5975 5976 static int virtnet_find_vqs(struct virtnet_info *vi) 5977 { 5978 struct virtqueue_info *vqs_info; 5979 struct virtqueue **vqs; 5980 int ret = -ENOMEM; 5981 int total_vqs; 5982 bool *ctx; 5983 u16 i; 5984 5985 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by 5986 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by 5987 * possible control vq. 5988 */ 5989 total_vqs = vi->max_queue_pairs * 2 + 5990 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); 5991 5992 /* Allocate space for find_vqs parameters */ 5993 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL); 5994 if (!vqs) 5995 goto err_vq; 5996 vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL); 5997 if (!vqs_info) 5998 goto err_vqs_info; 5999 if (!vi->big_packets || vi->mergeable_rx_bufs) { 6000 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL); 6001 if (!ctx) 6002 goto err_ctx; 6003 } else { 6004 ctx = NULL; 6005 } 6006 6007 /* Parameters for control virtqueue, if any */ 6008 if (vi->has_cvq) { 6009 vqs_info[total_vqs - 1].name = "control"; 6010 } 6011 6012 /* Allocate/initialize parameters for send/receive virtqueues */ 6013 for (i = 0; i < vi->max_queue_pairs; i++) { 6014 vqs_info[rxq2vq(i)].callback = skb_recv_done; 6015 vqs_info[txq2vq(i)].callback = skb_xmit_done; 6016 sprintf(vi->rq[i].name, "input.%u", i); 6017 sprintf(vi->sq[i].name, "output.%u", i); 6018 vqs_info[rxq2vq(i)].name = vi->rq[i].name; 6019 vqs_info[txq2vq(i)].name = vi->sq[i].name; 6020 if (ctx) 6021 vqs_info[rxq2vq(i)].ctx = true; 6022 } 6023 6024 ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, vqs_info, NULL); 6025 if (ret) 6026 goto err_find; 6027 6028 if (vi->has_cvq) { 6029 vi->cvq = vqs[total_vqs - 1]; 6030 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 6031 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 6032 } 6033 6034 for (i = 0; i < vi->max_queue_pairs; i++) { 6035 vi->rq[i].vq = vqs[rxq2vq(i)]; 6036 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); 6037 vi->sq[i].vq = vqs[txq2vq(i)]; 6038 } 6039 6040 /* run here: ret == 0. */ 6041 6042 6043 err_find: 6044 kfree(ctx); 6045 err_ctx: 6046 kfree(vqs_info); 6047 err_vqs_info: 6048 kfree(vqs); 6049 err_vq: 6050 return ret; 6051 } 6052 6053 static int virtnet_alloc_queues(struct virtnet_info *vi) 6054 { 6055 int i; 6056 6057 if (vi->has_cvq) { 6058 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); 6059 if (!vi->ctrl) 6060 goto err_ctrl; 6061 } else { 6062 vi->ctrl = NULL; 6063 } 6064 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); 6065 if (!vi->sq) 6066 goto err_sq; 6067 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL); 6068 if (!vi->rq) 6069 goto err_rq; 6070 6071 INIT_DELAYED_WORK(&vi->refill, refill_work); 6072 for (i = 0; i < vi->max_queue_pairs; i++) { 6073 vi->rq[i].pages = NULL; 6074 netif_napi_add_weight(vi->dev, &vi->rq[i].napi, virtnet_poll, 6075 napi_weight); 6076 netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, 6077 virtnet_poll_tx, 6078 napi_tx ? napi_weight : 0); 6079 6080 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 6081 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); 6082 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 6083 6084 u64_stats_init(&vi->rq[i].stats.syncp); 6085 u64_stats_init(&vi->sq[i].stats.syncp); 6086 mutex_init(&vi->rq[i].dim_lock); 6087 } 6088 6089 return 0; 6090 6091 err_rq: 6092 kfree(vi->sq); 6093 err_sq: 6094 kfree(vi->ctrl); 6095 err_ctrl: 6096 return -ENOMEM; 6097 } 6098 6099 static int init_vqs(struct virtnet_info *vi) 6100 { 6101 int ret; 6102 6103 /* Allocate send & receive queues */ 6104 ret = virtnet_alloc_queues(vi); 6105 if (ret) 6106 goto err; 6107 6108 ret = virtnet_find_vqs(vi); 6109 if (ret) 6110 goto err_free; 6111 6112 virtnet_rq_set_premapped(vi); 6113 6114 cpus_read_lock(); 6115 virtnet_set_affinity(vi); 6116 cpus_read_unlock(); 6117 6118 return 0; 6119 6120 err_free: 6121 virtnet_free_queues(vi); 6122 err: 6123 return ret; 6124 } 6125 6126 #ifdef CONFIG_SYSFS 6127 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, 6128 char *buf) 6129 { 6130 struct virtnet_info *vi = netdev_priv(queue->dev); 6131 unsigned int queue_index = get_netdev_rx_queue_index(queue); 6132 unsigned int headroom = virtnet_get_headroom(vi); 6133 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 6134 struct ewma_pkt_len *avg; 6135 6136 BUG_ON(queue_index >= vi->max_queue_pairs); 6137 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 6138 return sprintf(buf, "%u\n", 6139 get_mergeable_buf_len(&vi->rq[queue_index], avg, 6140 SKB_DATA_ALIGN(headroom + tailroom))); 6141 } 6142 6143 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 6144 __ATTR_RO(mergeable_rx_buffer_size); 6145 6146 static struct attribute *virtio_net_mrg_rx_attrs[] = { 6147 &mergeable_rx_buffer_size_attribute.attr, 6148 NULL 6149 }; 6150 6151 static const struct attribute_group virtio_net_mrg_rx_group = { 6152 .name = "virtio_net", 6153 .attrs = virtio_net_mrg_rx_attrs 6154 }; 6155 #endif 6156 6157 static bool virtnet_fail_on_feature(struct virtio_device *vdev, 6158 unsigned int fbit, 6159 const char *fname, const char *dname) 6160 { 6161 if (!virtio_has_feature(vdev, fbit)) 6162 return false; 6163 6164 dev_err(&vdev->dev, "device advertises feature %s but not %s", 6165 fname, dname); 6166 6167 return true; 6168 } 6169 6170 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ 6171 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) 6172 6173 static bool virtnet_validate_features(struct virtio_device *vdev) 6174 { 6175 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && 6176 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, 6177 "VIRTIO_NET_F_CTRL_VQ") || 6178 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, 6179 "VIRTIO_NET_F_CTRL_VQ") || 6180 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, 6181 "VIRTIO_NET_F_CTRL_VQ") || 6182 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || 6183 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, 6184 "VIRTIO_NET_F_CTRL_VQ") || 6185 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, 6186 "VIRTIO_NET_F_CTRL_VQ") || 6187 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, 6188 "VIRTIO_NET_F_CTRL_VQ") || 6189 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, 6190 "VIRTIO_NET_F_CTRL_VQ") || 6191 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL, 6192 "VIRTIO_NET_F_CTRL_VQ"))) { 6193 return false; 6194 } 6195 6196 return true; 6197 } 6198 6199 #define MIN_MTU ETH_MIN_MTU 6200 #define MAX_MTU ETH_MAX_MTU 6201 6202 static int virtnet_validate(struct virtio_device *vdev) 6203 { 6204 if (!vdev->config->get) { 6205 dev_err(&vdev->dev, "%s failure: config access disabled\n", 6206 __func__); 6207 return -EINVAL; 6208 } 6209 6210 if (!virtnet_validate_features(vdev)) 6211 return -EINVAL; 6212 6213 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6214 int mtu = virtio_cread16(vdev, 6215 offsetof(struct virtio_net_config, 6216 mtu)); 6217 if (mtu < MIN_MTU) 6218 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); 6219 } 6220 6221 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && 6222 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6223 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); 6224 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); 6225 } 6226 6227 return 0; 6228 } 6229 6230 static bool virtnet_check_guest_gso(const struct virtnet_info *vi) 6231 { 6232 return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6233 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6234 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6235 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6236 (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && 6237 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); 6238 } 6239 6240 static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) 6241 { 6242 bool guest_gso = virtnet_check_guest_gso(vi); 6243 6244 /* If device can receive ANY guest GSO packets, regardless of mtu, 6245 * allocate packets of maximum size, otherwise limit it to only 6246 * mtu size worth only. 6247 */ 6248 if (mtu > ETH_DATA_LEN || guest_gso) { 6249 vi->big_packets = true; 6250 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); 6251 } 6252 } 6253 6254 #define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10 6255 static enum xdp_rss_hash_type 6256 virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = { 6257 [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE, 6258 [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4, 6259 [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP, 6260 [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP, 6261 [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6, 6262 [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP, 6263 [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP, 6264 [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, 6265 [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, 6266 [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX 6267 }; 6268 6269 static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, 6270 enum xdp_rss_hash_type *rss_type) 6271 { 6272 const struct xdp_buff *xdp = (void *)_ctx; 6273 struct virtio_net_hdr_v1_hash *hdr_hash; 6274 struct virtnet_info *vi; 6275 u16 hash_report; 6276 6277 if (!(xdp->rxq->dev->features & NETIF_F_RXHASH)) 6278 return -ENODATA; 6279 6280 vi = netdev_priv(xdp->rxq->dev); 6281 hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len); 6282 hash_report = __le16_to_cpu(hdr_hash->hash_report); 6283 6284 if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE) 6285 hash_report = VIRTIO_NET_HASH_REPORT_NONE; 6286 6287 *rss_type = virtnet_xdp_rss_type[hash_report]; 6288 *hash = __le32_to_cpu(hdr_hash->hash_value); 6289 return 0; 6290 } 6291 6292 static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = { 6293 .xmo_rx_hash = virtnet_xdp_rx_hash, 6294 }; 6295 6296 static int virtnet_probe(struct virtio_device *vdev) 6297 { 6298 int i, err = -ENOMEM; 6299 struct net_device *dev; 6300 struct virtnet_info *vi; 6301 u16 max_queue_pairs; 6302 int mtu = 0; 6303 6304 /* Find if host supports multiqueue/rss virtio_net device */ 6305 max_queue_pairs = 1; 6306 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 6307 max_queue_pairs = 6308 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); 6309 6310 /* We need at least 2 queue's */ 6311 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 6312 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 6313 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6314 max_queue_pairs = 1; 6315 6316 /* Allocate ourselves a network device with room for our info */ 6317 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); 6318 if (!dev) 6319 return -ENOMEM; 6320 6321 /* Set up network device as normal. */ 6322 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | 6323 IFF_TX_SKB_NO_LINEAR; 6324 dev->netdev_ops = &virtnet_netdev; 6325 dev->stat_ops = &virtnet_stat_ops; 6326 dev->features = NETIF_F_HIGHDMA; 6327 6328 dev->ethtool_ops = &virtnet_ethtool_ops; 6329 SET_NETDEV_DEV(dev, &vdev->dev); 6330 6331 /* Do we support "hardware" checksums? */ 6332 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 6333 /* This opens up the world of extra features. */ 6334 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6335 if (csum) 6336 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6337 6338 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 6339 dev->hw_features |= NETIF_F_TSO 6340 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 6341 } 6342 /* Individual feature bits: what can host handle? */ 6343 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 6344 dev->hw_features |= NETIF_F_TSO; 6345 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 6346 dev->hw_features |= NETIF_F_TSO6; 6347 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 6348 dev->hw_features |= NETIF_F_TSO_ECN; 6349 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) 6350 dev->hw_features |= NETIF_F_GSO_UDP_L4; 6351 6352 dev->features |= NETIF_F_GSO_ROBUST; 6353 6354 if (gso) 6355 dev->features |= dev->hw_features & NETIF_F_ALL_TSO; 6356 /* (!csum && gso) case will be fixed by register_netdev() */ 6357 } 6358 6359 /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't 6360 * need to calculate checksums for partially checksummed packets, 6361 * as they're considered valid by the upper layer. 6362 * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only 6363 * receives fully checksummed packets. The device may assist in 6364 * validating these packets' checksums, so the driver won't have to. 6365 */ 6366 dev->features |= NETIF_F_RXCSUM; 6367 6368 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 6369 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) 6370 dev->features |= NETIF_F_GRO_HW; 6371 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) 6372 dev->hw_features |= NETIF_F_GRO_HW; 6373 6374 dev->vlan_features = dev->features; 6375 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; 6376 6377 /* MTU range: 68 - 65535 */ 6378 dev->min_mtu = MIN_MTU; 6379 dev->max_mtu = MAX_MTU; 6380 6381 /* Configuration may specify what MAC to use. Otherwise random. */ 6382 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6383 u8 addr[ETH_ALEN]; 6384 6385 virtio_cread_bytes(vdev, 6386 offsetof(struct virtio_net_config, mac), 6387 addr, ETH_ALEN); 6388 eth_hw_addr_set(dev, addr); 6389 } else { 6390 eth_hw_addr_random(dev); 6391 dev_info(&vdev->dev, "Assigned random MAC address %pM\n", 6392 dev->dev_addr); 6393 } 6394 6395 /* Set up our device-specific information */ 6396 vi = netdev_priv(dev); 6397 vi->dev = dev; 6398 vi->vdev = vdev; 6399 vdev->priv = vi; 6400 6401 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 6402 INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work); 6403 spin_lock_init(&vi->refill_lock); 6404 6405 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { 6406 vi->mergeable_rx_bufs = true; 6407 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; 6408 } 6409 6410 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) 6411 vi->has_rss_hash_report = true; 6412 6413 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { 6414 vi->has_rss = true; 6415 6416 vi->rss_indir_table_size = 6417 virtio_cread16(vdev, offsetof(struct virtio_net_config, 6418 rss_max_indirection_table_length)); 6419 } 6420 6421 if (vi->has_rss || vi->has_rss_hash_report) { 6422 vi->rss_key_size = 6423 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); 6424 6425 vi->rss_hash_types_supported = 6426 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); 6427 vi->rss_hash_types_supported &= 6428 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | 6429 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 6430 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); 6431 6432 dev->hw_features |= NETIF_F_RXHASH; 6433 dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops; 6434 } 6435 6436 if (vi->has_rss_hash_report) 6437 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); 6438 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || 6439 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6440 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 6441 else 6442 vi->hdr_len = sizeof(struct virtio_net_hdr); 6443 6444 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || 6445 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6446 vi->any_header_sg = true; 6447 6448 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6449 vi->has_cvq = true; 6450 6451 mutex_init(&vi->cvq_lock); 6452 6453 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6454 mtu = virtio_cread16(vdev, 6455 offsetof(struct virtio_net_config, 6456 mtu)); 6457 if (mtu < dev->min_mtu) { 6458 /* Should never trigger: MTU was previously validated 6459 * in virtnet_validate. 6460 */ 6461 dev_err(&vdev->dev, 6462 "device MTU appears to have changed it is now %d < %d", 6463 mtu, dev->min_mtu); 6464 err = -EINVAL; 6465 goto free; 6466 } 6467 6468 dev->mtu = mtu; 6469 dev->max_mtu = mtu; 6470 } 6471 6472 virtnet_set_big_packets(vi, mtu); 6473 6474 if (vi->any_header_sg) 6475 dev->needed_headroom = vi->hdr_len; 6476 6477 /* Enable multiqueue by default */ 6478 if (num_online_cpus() >= max_queue_pairs) 6479 vi->curr_queue_pairs = max_queue_pairs; 6480 else 6481 vi->curr_queue_pairs = num_online_cpus(); 6482 vi->max_queue_pairs = max_queue_pairs; 6483 6484 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ 6485 err = init_vqs(vi); 6486 if (err) 6487 goto free; 6488 6489 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 6490 vi->intr_coal_rx.max_usecs = 0; 6491 vi->intr_coal_tx.max_usecs = 0; 6492 vi->intr_coal_rx.max_packets = 0; 6493 6494 /* Keep the default values of the coalescing parameters 6495 * aligned with the default napi_tx state. 6496 */ 6497 if (vi->sq[0].napi.weight) 6498 vi->intr_coal_tx.max_packets = 1; 6499 else 6500 vi->intr_coal_tx.max_packets = 0; 6501 } 6502 6503 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 6504 /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */ 6505 for (i = 0; i < vi->max_queue_pairs; i++) 6506 if (vi->sq[i].napi.weight) 6507 vi->sq[i].intr_coal.max_packets = 1; 6508 6509 err = virtnet_init_irq_moder(vi); 6510 if (err) 6511 goto free; 6512 } 6513 6514 #ifdef CONFIG_SYSFS 6515 if (vi->mergeable_rx_bufs) 6516 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; 6517 #endif 6518 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 6519 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 6520 6521 virtnet_init_settings(dev); 6522 6523 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 6524 vi->failover = net_failover_create(vi->dev); 6525 if (IS_ERR(vi->failover)) { 6526 err = PTR_ERR(vi->failover); 6527 goto free_vqs; 6528 } 6529 } 6530 6531 if (vi->has_rss || vi->has_rss_hash_report) 6532 virtnet_init_default_rss(vi); 6533 6534 enable_rx_mode_work(vi); 6535 6536 /* serialize netdev register + virtio_device_ready() with ndo_open() */ 6537 rtnl_lock(); 6538 6539 err = register_netdevice(dev); 6540 if (err) { 6541 pr_debug("virtio_net: registering device failed\n"); 6542 rtnl_unlock(); 6543 goto free_failover; 6544 } 6545 6546 /* Disable config change notification until ndo_open. */ 6547 virtio_config_driver_disable(vi->vdev); 6548 6549 virtio_device_ready(vdev); 6550 6551 virtnet_set_queues(vi, vi->curr_queue_pairs); 6552 6553 /* a random MAC address has been assigned, notify the device. 6554 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there 6555 * because many devices work fine without getting MAC explicitly 6556 */ 6557 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 6558 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 6559 struct scatterlist sg; 6560 6561 sg_init_one(&sg, dev->dev_addr, dev->addr_len); 6562 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 6563 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 6564 pr_debug("virtio_net: setting MAC address failed\n"); 6565 rtnl_unlock(); 6566 err = -EINVAL; 6567 goto free_unregister_netdev; 6568 } 6569 } 6570 6571 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) { 6572 struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL; 6573 struct scatterlist sg; 6574 __le64 v; 6575 6576 stats_cap = kzalloc(sizeof(*stats_cap), GFP_KERNEL); 6577 if (!stats_cap) { 6578 rtnl_unlock(); 6579 err = -ENOMEM; 6580 goto free_unregister_netdev; 6581 } 6582 6583 sg_init_one(&sg, stats_cap, sizeof(*stats_cap)); 6584 6585 if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 6586 VIRTIO_NET_CTRL_STATS_QUERY, 6587 NULL, &sg)) { 6588 pr_debug("virtio_net: fail to get stats capability\n"); 6589 rtnl_unlock(); 6590 err = -EINVAL; 6591 goto free_unregister_netdev; 6592 } 6593 6594 v = stats_cap->supported_stats_types[0]; 6595 vi->device_stats_cap = le64_to_cpu(v); 6596 } 6597 6598 /* Assume link up if device can't report link status, 6599 otherwise get link status from config. */ 6600 netif_carrier_off(dev); 6601 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 6602 virtnet_config_changed_work(&vi->config_work); 6603 } else { 6604 vi->status = VIRTIO_NET_S_LINK_UP; 6605 virtnet_update_settings(vi); 6606 netif_carrier_on(dev); 6607 } 6608 6609 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) 6610 if (virtio_has_feature(vi->vdev, guest_offloads[i])) 6611 set_bit(guest_offloads[i], &vi->guest_offloads); 6612 vi->guest_offloads_capable = vi->guest_offloads; 6613 6614 rtnl_unlock(); 6615 6616 err = virtnet_cpu_notif_add(vi); 6617 if (err) { 6618 pr_debug("virtio_net: registering cpu notifier failed\n"); 6619 goto free_unregister_netdev; 6620 } 6621 6622 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", 6623 dev->name, max_queue_pairs); 6624 6625 return 0; 6626 6627 free_unregister_netdev: 6628 unregister_netdev(dev); 6629 free_failover: 6630 net_failover_destroy(vi->failover); 6631 free_vqs: 6632 virtio_reset_device(vdev); 6633 cancel_delayed_work_sync(&vi->refill); 6634 free_receive_page_frags(vi); 6635 virtnet_del_vqs(vi); 6636 free: 6637 free_netdev(dev); 6638 return err; 6639 } 6640 6641 static void remove_vq_common(struct virtnet_info *vi) 6642 { 6643 virtio_reset_device(vi->vdev); 6644 6645 /* Free unused buffers in both send and recv, if any. */ 6646 free_unused_bufs(vi); 6647 6648 free_receive_bufs(vi); 6649 6650 free_receive_page_frags(vi); 6651 6652 virtnet_del_vqs(vi); 6653 } 6654 6655 static void virtnet_remove(struct virtio_device *vdev) 6656 { 6657 struct virtnet_info *vi = vdev->priv; 6658 6659 virtnet_cpu_notif_remove(vi); 6660 6661 /* Make sure no work handler is accessing the device. */ 6662 flush_work(&vi->config_work); 6663 disable_rx_mode_work(vi); 6664 flush_work(&vi->rx_mode_work); 6665 6666 virtnet_free_irq_moder(vi); 6667 6668 unregister_netdev(vi->dev); 6669 6670 net_failover_destroy(vi->failover); 6671 6672 remove_vq_common(vi); 6673 6674 free_netdev(vi->dev); 6675 } 6676 6677 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) 6678 { 6679 struct virtnet_info *vi = vdev->priv; 6680 6681 virtnet_cpu_notif_remove(vi); 6682 virtnet_freeze_down(vdev); 6683 remove_vq_common(vi); 6684 6685 return 0; 6686 } 6687 6688 static __maybe_unused int virtnet_restore(struct virtio_device *vdev) 6689 { 6690 struct virtnet_info *vi = vdev->priv; 6691 int err; 6692 6693 err = virtnet_restore_up(vdev); 6694 if (err) 6695 return err; 6696 virtnet_set_queues(vi, vi->curr_queue_pairs); 6697 6698 err = virtnet_cpu_notif_add(vi); 6699 if (err) { 6700 virtnet_freeze_down(vdev); 6701 remove_vq_common(vi); 6702 return err; 6703 } 6704 6705 return 0; 6706 } 6707 6708 static struct virtio_device_id id_table[] = { 6709 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 6710 { 0 }, 6711 }; 6712 6713 #define VIRTNET_FEATURES \ 6714 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ 6715 VIRTIO_NET_F_MAC, \ 6716 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ 6717 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ 6718 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ 6719 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ 6720 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ 6721 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ 6722 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ 6723 VIRTIO_NET_F_CTRL_MAC_ADDR, \ 6724 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ 6725 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ 6726 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ 6727 VIRTIO_NET_F_VQ_NOTF_COAL, \ 6728 VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS 6729 6730 static unsigned int features[] = { 6731 VIRTNET_FEATURES, 6732 }; 6733 6734 static unsigned int features_legacy[] = { 6735 VIRTNET_FEATURES, 6736 VIRTIO_NET_F_GSO, 6737 VIRTIO_F_ANY_LAYOUT, 6738 }; 6739 6740 static struct virtio_driver virtio_net_driver = { 6741 .feature_table = features, 6742 .feature_table_size = ARRAY_SIZE(features), 6743 .feature_table_legacy = features_legacy, 6744 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 6745 .driver.name = KBUILD_MODNAME, 6746 .id_table = id_table, 6747 .validate = virtnet_validate, 6748 .probe = virtnet_probe, 6749 .remove = virtnet_remove, 6750 .config_changed = virtnet_config_changed, 6751 #ifdef CONFIG_PM_SLEEP 6752 .freeze = virtnet_freeze, 6753 .restore = virtnet_restore, 6754 #endif 6755 }; 6756 6757 static __init int virtio_net_driver_init(void) 6758 { 6759 int ret; 6760 6761 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", 6762 virtnet_cpu_online, 6763 virtnet_cpu_down_prep); 6764 if (ret < 0) 6765 goto out; 6766 virtionet_online = ret; 6767 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", 6768 NULL, virtnet_cpu_dead); 6769 if (ret) 6770 goto err_dead; 6771 ret = register_virtio_driver(&virtio_net_driver); 6772 if (ret) 6773 goto err_virtio; 6774 return 0; 6775 err_virtio: 6776 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 6777 err_dead: 6778 cpuhp_remove_multi_state(virtionet_online); 6779 out: 6780 return ret; 6781 } 6782 module_init(virtio_net_driver_init); 6783 6784 static __exit void virtio_net_driver_exit(void) 6785 { 6786 unregister_virtio_driver(&virtio_net_driver); 6787 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 6788 cpuhp_remove_multi_state(virtionet_online); 6789 } 6790 module_exit(virtio_net_driver_exit); 6791 6792 MODULE_DEVICE_TABLE(virtio, id_table); 6793 MODULE_DESCRIPTION("Virtio network driver"); 6794 MODULE_LICENSE("GPL"); 6795