1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* A network driver using virtio. 3 * 4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 5 */ 6 //#define DEBUG 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/module.h> 11 #include <linux/virtio.h> 12 #include <linux/virtio_net.h> 13 #include <linux/bpf.h> 14 #include <linux/bpf_trace.h> 15 #include <linux/scatterlist.h> 16 #include <linux/if_vlan.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/average.h> 20 #include <linux/filter.h> 21 #include <linux/kernel.h> 22 #include <linux/dim.h> 23 #include <net/route.h> 24 #include <net/xdp.h> 25 #include <net/net_failover.h> 26 #include <net/netdev_rx_queue.h> 27 #include <net/netdev_queues.h> 28 #include <net/xdp_sock_drv.h> 29 30 static int napi_weight = NAPI_POLL_WEIGHT; 31 module_param(napi_weight, int, 0444); 32 33 static bool csum = true, gso = true, napi_tx = true; 34 module_param(csum, bool, 0444); 35 module_param(gso, bool, 0444); 36 module_param(napi_tx, bool, 0644); 37 38 /* FIXME: MTU in config. */ 39 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 40 #define GOOD_COPY_LEN 128 41 42 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 43 44 /* Separating two types of XDP xmit */ 45 #define VIRTIO_XDP_TX BIT(0) 46 #define VIRTIO_XDP_REDIR BIT(1) 47 48 #define VIRTIO_XDP_FLAG BIT(0) 49 #define VIRTIO_ORPHAN_FLAG BIT(1) 50 51 /* RX packet size EWMA. The average packet size is used to determine the packet 52 * buffer size when refilling RX rings. As the entire RX ring may be refilled 53 * at once, the weight is chosen so that the EWMA will be insensitive to short- 54 * term, transient changes in packet size. 55 */ 56 DECLARE_EWMA(pkt_len, 0, 64) 57 58 #define VIRTNET_DRIVER_VERSION "1.0.0" 59 60 static const unsigned long guest_offloads[] = { 61 VIRTIO_NET_F_GUEST_TSO4, 62 VIRTIO_NET_F_GUEST_TSO6, 63 VIRTIO_NET_F_GUEST_ECN, 64 VIRTIO_NET_F_GUEST_UFO, 65 VIRTIO_NET_F_GUEST_CSUM, 66 VIRTIO_NET_F_GUEST_USO4, 67 VIRTIO_NET_F_GUEST_USO6, 68 VIRTIO_NET_F_GUEST_HDRLEN 69 }; 70 71 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 72 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 73 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 74 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ 75 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ 76 (1ULL << VIRTIO_NET_F_GUEST_USO6)) 77 78 struct virtnet_stat_desc { 79 char desc[ETH_GSTRING_LEN]; 80 size_t offset; 81 size_t qstat_offset; 82 }; 83 84 struct virtnet_sq_free_stats { 85 u64 packets; 86 u64 bytes; 87 u64 napi_packets; 88 u64 napi_bytes; 89 }; 90 91 struct virtnet_sq_stats { 92 struct u64_stats_sync syncp; 93 u64_stats_t packets; 94 u64_stats_t bytes; 95 u64_stats_t xdp_tx; 96 u64_stats_t xdp_tx_drops; 97 u64_stats_t kicks; 98 u64_stats_t tx_timeouts; 99 u64_stats_t stop; 100 u64_stats_t wake; 101 }; 102 103 struct virtnet_rq_stats { 104 struct u64_stats_sync syncp; 105 u64_stats_t packets; 106 u64_stats_t bytes; 107 u64_stats_t drops; 108 u64_stats_t xdp_packets; 109 u64_stats_t xdp_tx; 110 u64_stats_t xdp_redirects; 111 u64_stats_t xdp_drops; 112 u64_stats_t kicks; 113 }; 114 115 #define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1} 116 #define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1} 117 118 #define VIRTNET_SQ_STAT_QSTAT(name, m) \ 119 { \ 120 name, \ 121 offsetof(struct virtnet_sq_stats, m), \ 122 offsetof(struct netdev_queue_stats_tx, m), \ 123 } 124 125 #define VIRTNET_RQ_STAT_QSTAT(name, m) \ 126 { \ 127 name, \ 128 offsetof(struct virtnet_rq_stats, m), \ 129 offsetof(struct netdev_queue_stats_rx, m), \ 130 } 131 132 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { 133 VIRTNET_SQ_STAT("xdp_tx", xdp_tx), 134 VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops), 135 VIRTNET_SQ_STAT("kicks", kicks), 136 VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts), 137 }; 138 139 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { 140 VIRTNET_RQ_STAT("drops", drops), 141 VIRTNET_RQ_STAT("xdp_packets", xdp_packets), 142 VIRTNET_RQ_STAT("xdp_tx", xdp_tx), 143 VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects), 144 VIRTNET_RQ_STAT("xdp_drops", xdp_drops), 145 VIRTNET_RQ_STAT("kicks", kicks), 146 }; 147 148 static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = { 149 VIRTNET_SQ_STAT_QSTAT("packets", packets), 150 VIRTNET_SQ_STAT_QSTAT("bytes", bytes), 151 VIRTNET_SQ_STAT_QSTAT("stop", stop), 152 VIRTNET_SQ_STAT_QSTAT("wake", wake), 153 }; 154 155 static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = { 156 VIRTNET_RQ_STAT_QSTAT("packets", packets), 157 VIRTNET_RQ_STAT_QSTAT("bytes", bytes), 158 }; 159 160 #define VIRTNET_STATS_DESC_CQ(name) \ 161 {#name, offsetof(struct virtio_net_stats_cvq, name), -1} 162 163 #define VIRTNET_STATS_DESC_RX(class, name) \ 164 {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1} 165 166 #define VIRTNET_STATS_DESC_TX(class, name) \ 167 {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1} 168 169 170 static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = { 171 VIRTNET_STATS_DESC_CQ(command_num), 172 VIRTNET_STATS_DESC_CQ(ok_num), 173 }; 174 175 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { 176 VIRTNET_STATS_DESC_RX(basic, packets), 177 VIRTNET_STATS_DESC_RX(basic, bytes), 178 179 VIRTNET_STATS_DESC_RX(basic, notifications), 180 VIRTNET_STATS_DESC_RX(basic, interrupts), 181 }; 182 183 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = { 184 VIRTNET_STATS_DESC_TX(basic, packets), 185 VIRTNET_STATS_DESC_TX(basic, bytes), 186 187 VIRTNET_STATS_DESC_TX(basic, notifications), 188 VIRTNET_STATS_DESC_TX(basic, interrupts), 189 }; 190 191 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = { 192 VIRTNET_STATS_DESC_RX(csum, needs_csum), 193 }; 194 195 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = { 196 VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg), 197 VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg), 198 }; 199 200 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = { 201 VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes), 202 }; 203 204 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = { 205 VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes), 206 }; 207 208 #define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \ 209 { \ 210 #name, \ 211 offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \ 212 offsetof(struct netdev_queue_stats_rx, qstat_field), \ 213 } 214 215 #define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \ 216 { \ 217 #name, \ 218 offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \ 219 offsetof(struct netdev_queue_stats_tx, qstat_field), \ 220 } 221 222 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = { 223 VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops), 224 VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns), 225 }; 226 227 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = { 228 VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops), 229 VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors), 230 }; 231 232 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = { 233 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary), 234 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none), 235 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad), 236 }; 237 238 static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = { 239 VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none), 240 VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum), 241 }; 242 243 static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = { 244 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets), 245 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes), 246 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets), 247 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes), 248 }; 249 250 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = { 251 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets), 252 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes), 253 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets), 254 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes), 255 }; 256 257 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = { 258 VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 259 }; 260 261 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = { 262 VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 263 }; 264 265 #define VIRTNET_Q_TYPE_RX 0 266 #define VIRTNET_Q_TYPE_TX 1 267 #define VIRTNET_Q_TYPE_CQ 2 268 269 struct virtnet_interrupt_coalesce { 270 u32 max_packets; 271 u32 max_usecs; 272 }; 273 274 /* The dma information of pages allocated at a time. */ 275 struct virtnet_rq_dma { 276 dma_addr_t addr; 277 u32 ref; 278 u16 len; 279 u16 need_sync; 280 }; 281 282 /* Internal representation of a send virtqueue */ 283 struct send_queue { 284 /* Virtqueue associated with this send _queue */ 285 struct virtqueue *vq; 286 287 /* TX: fragments + linear part + virtio header */ 288 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 289 290 /* Name of the send queue: output.$index */ 291 char name[16]; 292 293 struct virtnet_sq_stats stats; 294 295 struct virtnet_interrupt_coalesce intr_coal; 296 297 struct napi_struct napi; 298 299 /* Record whether sq is in reset state. */ 300 bool reset; 301 }; 302 303 /* Internal representation of a receive virtqueue */ 304 struct receive_queue { 305 /* Virtqueue associated with this receive_queue */ 306 struct virtqueue *vq; 307 308 struct napi_struct napi; 309 310 struct bpf_prog __rcu *xdp_prog; 311 312 struct virtnet_rq_stats stats; 313 314 /* The number of rx notifications */ 315 u16 calls; 316 317 /* Is dynamic interrupt moderation enabled? */ 318 bool dim_enabled; 319 320 /* Used to protect dim_enabled and inter_coal */ 321 struct mutex dim_lock; 322 323 /* Dynamic Interrupt Moderation */ 324 struct dim dim; 325 326 u32 packets_in_napi; 327 328 struct virtnet_interrupt_coalesce intr_coal; 329 330 /* Chain pages by the private ptr. */ 331 struct page *pages; 332 333 /* Average packet length for mergeable receive buffers. */ 334 struct ewma_pkt_len mrg_avg_pkt_len; 335 336 /* Page frag for packet buffer allocation. */ 337 struct page_frag alloc_frag; 338 339 /* RX: fragments + linear part + virtio header */ 340 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 341 342 /* Min single buffer size for mergeable buffers case. */ 343 unsigned int min_buf_len; 344 345 /* Name of this receive queue: input.$index */ 346 char name[16]; 347 348 struct xdp_rxq_info xdp_rxq; 349 350 /* Record the last dma info to free after new pages is allocated. */ 351 struct virtnet_rq_dma *last_dma; 352 353 struct xsk_buff_pool *xsk_pool; 354 355 /* xdp rxq used by xsk */ 356 struct xdp_rxq_info xsk_rxq_info; 357 358 struct xdp_buff **xsk_buffs; 359 }; 360 361 /* This structure can contain rss message with maximum settings for indirection table and keysize 362 * Note, that default structure that describes RSS configuration virtio_net_rss_config 363 * contains same info but can't handle table values. 364 * In any case, structure would be passed to virtio hw through sg_buf split by parts 365 * because table sizes may be differ according to the device configuration. 366 */ 367 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 368 #define VIRTIO_NET_RSS_MAX_TABLE_LEN 128 369 struct virtio_net_ctrl_rss { 370 u32 hash_types; 371 u16 indirection_table_mask; 372 u16 unclassified_queue; 373 u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN]; 374 u16 max_tx_vq; 375 u8 hash_key_length; 376 u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; 377 }; 378 379 /* Control VQ buffers: protected by the rtnl lock */ 380 struct control_buf { 381 struct virtio_net_ctrl_hdr hdr; 382 virtio_net_ctrl_ack status; 383 }; 384 385 struct virtnet_info { 386 struct virtio_device *vdev; 387 struct virtqueue *cvq; 388 struct net_device *dev; 389 struct send_queue *sq; 390 struct receive_queue *rq; 391 unsigned int status; 392 393 /* Max # of queue pairs supported by the device */ 394 u16 max_queue_pairs; 395 396 /* # of queue pairs currently used by the driver */ 397 u16 curr_queue_pairs; 398 399 /* # of XDP queue pairs currently used by the driver */ 400 u16 xdp_queue_pairs; 401 402 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ 403 bool xdp_enabled; 404 405 /* I like... big packets and I cannot lie! */ 406 bool big_packets; 407 408 /* number of sg entries allocated for big packets */ 409 unsigned int big_packets_num_skbfrags; 410 411 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 412 bool mergeable_rx_bufs; 413 414 /* Host supports rss and/or hash report */ 415 bool has_rss; 416 bool has_rss_hash_report; 417 u8 rss_key_size; 418 u16 rss_indir_table_size; 419 u32 rss_hash_types_supported; 420 u32 rss_hash_types_saved; 421 struct virtio_net_ctrl_rss rss; 422 423 /* Has control virtqueue */ 424 bool has_cvq; 425 426 /* Lock to protect the control VQ */ 427 struct mutex cvq_lock; 428 429 /* Host can handle any s/g split between our header and packet data */ 430 bool any_header_sg; 431 432 /* Packet virtio header size */ 433 u8 hdr_len; 434 435 /* Work struct for delayed refilling if we run low on memory. */ 436 struct delayed_work refill; 437 438 /* Is delayed refill enabled? */ 439 bool refill_enabled; 440 441 /* The lock to synchronize the access to refill_enabled */ 442 spinlock_t refill_lock; 443 444 /* Work struct for config space updates */ 445 struct work_struct config_work; 446 447 /* Work struct for setting rx mode */ 448 struct work_struct rx_mode_work; 449 450 /* OK to queue work setting RX mode? */ 451 bool rx_mode_work_enabled; 452 453 /* Does the affinity hint is set for virtqueues? */ 454 bool affinity_hint_set; 455 456 /* CPU hotplug instances for online & dead */ 457 struct hlist_node node; 458 struct hlist_node node_dead; 459 460 struct control_buf *ctrl; 461 462 /* Ethtool settings */ 463 u8 duplex; 464 u32 speed; 465 466 /* Is rx dynamic interrupt moderation enabled? */ 467 bool rx_dim_enabled; 468 469 /* Interrupt coalescing settings */ 470 struct virtnet_interrupt_coalesce intr_coal_tx; 471 struct virtnet_interrupt_coalesce intr_coal_rx; 472 473 unsigned long guest_offloads; 474 unsigned long guest_offloads_capable; 475 476 /* failover when STANDBY feature enabled */ 477 struct failover *failover; 478 479 u64 device_stats_cap; 480 }; 481 482 struct padded_vnet_hdr { 483 struct virtio_net_hdr_v1_hash hdr; 484 /* 485 * hdr is in a separate sg buffer, and data sg buffer shares same page 486 * with this header sg. This padding makes next sg 16 byte aligned 487 * after the header. 488 */ 489 char padding[12]; 490 }; 491 492 struct virtio_net_common_hdr { 493 union { 494 struct virtio_net_hdr hdr; 495 struct virtio_net_hdr_mrg_rxbuf mrg_hdr; 496 struct virtio_net_hdr_v1_hash hash_v1_hdr; 497 }; 498 }; 499 500 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); 501 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 502 struct net_device *dev, 503 unsigned int *xdp_xmit, 504 struct virtnet_rq_stats *stats); 505 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 506 struct sk_buff *skb, u8 flags); 507 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 508 struct sk_buff *curr_skb, 509 struct page *page, void *buf, 510 int len, int truesize); 511 512 static bool is_xdp_frame(void *ptr) 513 { 514 return (unsigned long)ptr & VIRTIO_XDP_FLAG; 515 } 516 517 static void *xdp_to_ptr(struct xdp_frame *ptr) 518 { 519 return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); 520 } 521 522 static struct xdp_frame *ptr_to_xdp(void *ptr) 523 { 524 return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); 525 } 526 527 static bool is_orphan_skb(void *ptr) 528 { 529 return (unsigned long)ptr & VIRTIO_ORPHAN_FLAG; 530 } 531 532 static void *skb_to_ptr(struct sk_buff *skb, bool orphan) 533 { 534 return (void *)((unsigned long)skb | (orphan ? VIRTIO_ORPHAN_FLAG : 0)); 535 } 536 537 static struct sk_buff *ptr_to_skb(void *ptr) 538 { 539 return (struct sk_buff *)((unsigned long)ptr & ~VIRTIO_ORPHAN_FLAG); 540 } 541 542 static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 543 bool in_napi, struct virtnet_sq_free_stats *stats) 544 { 545 unsigned int len; 546 void *ptr; 547 548 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 549 if (!is_xdp_frame(ptr)) { 550 struct sk_buff *skb = ptr_to_skb(ptr); 551 552 pr_debug("Sent skb %p\n", skb); 553 554 if (is_orphan_skb(ptr)) { 555 stats->packets++; 556 stats->bytes += skb->len; 557 } else { 558 stats->napi_packets++; 559 stats->napi_bytes += skb->len; 560 } 561 napi_consume_skb(skb, in_napi); 562 } else { 563 struct xdp_frame *frame = ptr_to_xdp(ptr); 564 565 stats->packets++; 566 stats->bytes += xdp_get_frame_len(frame); 567 xdp_return_frame(frame); 568 } 569 } 570 netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes); 571 } 572 573 /* Converting between virtqueue no. and kernel tx/rx queue no. 574 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq 575 */ 576 static int vq2txq(struct virtqueue *vq) 577 { 578 return (vq->index - 1) / 2; 579 } 580 581 static int txq2vq(int txq) 582 { 583 return txq * 2 + 1; 584 } 585 586 static int vq2rxq(struct virtqueue *vq) 587 { 588 return vq->index / 2; 589 } 590 591 static int rxq2vq(int rxq) 592 { 593 return rxq * 2; 594 } 595 596 static int vq_type(struct virtnet_info *vi, int qid) 597 { 598 if (qid == vi->max_queue_pairs * 2) 599 return VIRTNET_Q_TYPE_CQ; 600 601 if (qid % 2) 602 return VIRTNET_Q_TYPE_TX; 603 604 return VIRTNET_Q_TYPE_RX; 605 } 606 607 static inline struct virtio_net_common_hdr * 608 skb_vnet_common_hdr(struct sk_buff *skb) 609 { 610 return (struct virtio_net_common_hdr *)skb->cb; 611 } 612 613 /* 614 * private is used to chain pages for big packets, put the whole 615 * most recent used list in the beginning for reuse 616 */ 617 static void give_pages(struct receive_queue *rq, struct page *page) 618 { 619 struct page *end; 620 621 /* Find end of list, sew whole thing into vi->rq.pages. */ 622 for (end = page; end->private; end = (struct page *)end->private); 623 end->private = (unsigned long)rq->pages; 624 rq->pages = page; 625 } 626 627 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) 628 { 629 struct page *p = rq->pages; 630 631 if (p) { 632 rq->pages = (struct page *)p->private; 633 /* clear private here, it is used to chain pages */ 634 p->private = 0; 635 } else 636 p = alloc_page(gfp_mask); 637 return p; 638 } 639 640 static void virtnet_rq_free_buf(struct virtnet_info *vi, 641 struct receive_queue *rq, void *buf) 642 { 643 if (vi->mergeable_rx_bufs) 644 put_page(virt_to_head_page(buf)); 645 else if (vi->big_packets) 646 give_pages(rq, buf); 647 else 648 put_page(virt_to_head_page(buf)); 649 } 650 651 static void enable_delayed_refill(struct virtnet_info *vi) 652 { 653 spin_lock_bh(&vi->refill_lock); 654 vi->refill_enabled = true; 655 spin_unlock_bh(&vi->refill_lock); 656 } 657 658 static void disable_delayed_refill(struct virtnet_info *vi) 659 { 660 spin_lock_bh(&vi->refill_lock); 661 vi->refill_enabled = false; 662 spin_unlock_bh(&vi->refill_lock); 663 } 664 665 static void enable_rx_mode_work(struct virtnet_info *vi) 666 { 667 rtnl_lock(); 668 vi->rx_mode_work_enabled = true; 669 rtnl_unlock(); 670 } 671 672 static void disable_rx_mode_work(struct virtnet_info *vi) 673 { 674 rtnl_lock(); 675 vi->rx_mode_work_enabled = false; 676 rtnl_unlock(); 677 } 678 679 static void virtqueue_napi_schedule(struct napi_struct *napi, 680 struct virtqueue *vq) 681 { 682 if (napi_schedule_prep(napi)) { 683 virtqueue_disable_cb(vq); 684 __napi_schedule(napi); 685 } 686 } 687 688 static bool virtqueue_napi_complete(struct napi_struct *napi, 689 struct virtqueue *vq, int processed) 690 { 691 int opaque; 692 693 opaque = virtqueue_enable_cb_prepare(vq); 694 if (napi_complete_done(napi, processed)) { 695 if (unlikely(virtqueue_poll(vq, opaque))) 696 virtqueue_napi_schedule(napi, vq); 697 else 698 return true; 699 } else { 700 virtqueue_disable_cb(vq); 701 } 702 703 return false; 704 } 705 706 static void skb_xmit_done(struct virtqueue *vq) 707 { 708 struct virtnet_info *vi = vq->vdev->priv; 709 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; 710 711 /* Suppress further interrupts. */ 712 virtqueue_disable_cb(vq); 713 714 if (napi->weight) 715 virtqueue_napi_schedule(napi, vq); 716 else 717 /* We were probably waiting for more output buffers. */ 718 netif_wake_subqueue(vi->dev, vq2txq(vq)); 719 } 720 721 #define MRG_CTX_HEADER_SHIFT 22 722 static void *mergeable_len_to_ctx(unsigned int truesize, 723 unsigned int headroom) 724 { 725 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); 726 } 727 728 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) 729 { 730 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; 731 } 732 733 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) 734 { 735 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); 736 } 737 738 static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, 739 unsigned int headroom, 740 unsigned int len) 741 { 742 struct sk_buff *skb; 743 744 skb = build_skb(buf, buflen); 745 if (unlikely(!skb)) 746 return NULL; 747 748 skb_reserve(skb, headroom); 749 skb_put(skb, len); 750 751 return skb; 752 } 753 754 /* Called from bottom half context */ 755 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 756 struct receive_queue *rq, 757 struct page *page, unsigned int offset, 758 unsigned int len, unsigned int truesize, 759 unsigned int headroom) 760 { 761 struct sk_buff *skb; 762 struct virtio_net_common_hdr *hdr; 763 unsigned int copy, hdr_len, hdr_padded_len; 764 struct page *page_to_free = NULL; 765 int tailroom, shinfo_size; 766 char *p, *hdr_p, *buf; 767 768 p = page_address(page) + offset; 769 hdr_p = p; 770 771 hdr_len = vi->hdr_len; 772 if (vi->mergeable_rx_bufs) 773 hdr_padded_len = hdr_len; 774 else 775 hdr_padded_len = sizeof(struct padded_vnet_hdr); 776 777 buf = p - headroom; 778 len -= hdr_len; 779 offset += hdr_padded_len; 780 p += hdr_padded_len; 781 tailroom = truesize - headroom - hdr_padded_len - len; 782 783 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 784 785 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { 786 skb = virtnet_build_skb(buf, truesize, p - buf, len); 787 if (unlikely(!skb)) 788 return NULL; 789 790 page = (struct page *)page->private; 791 if (page) 792 give_pages(rq, page); 793 goto ok; 794 } 795 796 /* copy small packet so we can reuse these pages for small data */ 797 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); 798 if (unlikely(!skb)) 799 return NULL; 800 801 /* Copy all frame if it fits skb->head, otherwise 802 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. 803 */ 804 if (len <= skb_tailroom(skb)) 805 copy = len; 806 else 807 copy = ETH_HLEN; 808 skb_put_data(skb, p, copy); 809 810 len -= copy; 811 offset += copy; 812 813 if (vi->mergeable_rx_bufs) { 814 if (len) 815 skb_add_rx_frag(skb, 0, page, offset, len, truesize); 816 else 817 page_to_free = page; 818 goto ok; 819 } 820 821 /* 822 * Verify that we can indeed put this data into a skb. 823 * This is here to handle cases when the device erroneously 824 * tries to receive more than is possible. This is usually 825 * the case of a broken device. 826 */ 827 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { 828 net_dbg_ratelimited("%s: too much data\n", skb->dev->name); 829 dev_kfree_skb(skb); 830 return NULL; 831 } 832 BUG_ON(offset >= PAGE_SIZE); 833 while (len) { 834 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); 835 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, 836 frag_size, truesize); 837 len -= frag_size; 838 page = (struct page *)page->private; 839 offset = 0; 840 } 841 842 if (page) 843 give_pages(rq, page); 844 845 ok: 846 hdr = skb_vnet_common_hdr(skb); 847 memcpy(hdr, hdr_p, hdr_len); 848 if (page_to_free) 849 put_page(page_to_free); 850 851 return skb; 852 } 853 854 static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) 855 { 856 struct virtnet_info *vi = rq->vq->vdev->priv; 857 struct page *page = virt_to_head_page(buf); 858 struct virtnet_rq_dma *dma; 859 void *head; 860 int offset; 861 862 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 863 864 head = page_address(page); 865 866 dma = head; 867 868 --dma->ref; 869 870 if (dma->need_sync && len) { 871 offset = buf - (head + sizeof(*dma)); 872 873 virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, 874 offset, len, 875 DMA_FROM_DEVICE); 876 } 877 878 if (dma->ref) 879 return; 880 881 virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, 882 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 883 put_page(page); 884 } 885 886 static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) 887 { 888 struct virtnet_info *vi = rq->vq->vdev->priv; 889 void *buf; 890 891 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 892 893 buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); 894 if (buf) 895 virtnet_rq_unmap(rq, buf, *len); 896 897 return buf; 898 } 899 900 static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) 901 { 902 struct virtnet_info *vi = rq->vq->vdev->priv; 903 struct virtnet_rq_dma *dma; 904 dma_addr_t addr; 905 u32 offset; 906 void *head; 907 908 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 909 910 head = page_address(rq->alloc_frag.page); 911 912 offset = buf - head; 913 914 dma = head; 915 916 addr = dma->addr - sizeof(*dma) + offset; 917 918 sg_init_table(rq->sg, 1); 919 rq->sg[0].dma_address = addr; 920 rq->sg[0].length = len; 921 } 922 923 static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) 924 { 925 struct page_frag *alloc_frag = &rq->alloc_frag; 926 struct virtnet_info *vi = rq->vq->vdev->priv; 927 struct virtnet_rq_dma *dma; 928 void *buf, *head; 929 dma_addr_t addr; 930 931 BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); 932 933 head = page_address(alloc_frag->page); 934 935 dma = head; 936 937 /* new pages */ 938 if (!alloc_frag->offset) { 939 if (rq->last_dma) { 940 /* Now, the new page is allocated, the last dma 941 * will not be used. So the dma can be unmapped 942 * if the ref is 0. 943 */ 944 virtnet_rq_unmap(rq, rq->last_dma, 0); 945 rq->last_dma = NULL; 946 } 947 948 dma->len = alloc_frag->size - sizeof(*dma); 949 950 addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, 951 dma->len, DMA_FROM_DEVICE, 0); 952 if (virtqueue_dma_mapping_error(rq->vq, addr)) 953 return NULL; 954 955 dma->addr = addr; 956 dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); 957 958 /* Add a reference to dma to prevent the entire dma from 959 * being released during error handling. This reference 960 * will be freed after the pages are no longer used. 961 */ 962 get_page(alloc_frag->page); 963 dma->ref = 1; 964 alloc_frag->offset = sizeof(*dma); 965 966 rq->last_dma = dma; 967 } 968 969 ++dma->ref; 970 971 buf = head + alloc_frag->offset; 972 973 get_page(alloc_frag->page); 974 alloc_frag->offset += size; 975 976 return buf; 977 } 978 979 static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) 980 { 981 struct virtnet_info *vi = vq->vdev->priv; 982 struct receive_queue *rq; 983 int i = vq2rxq(vq); 984 985 rq = &vi->rq[i]; 986 987 if (rq->xsk_pool) { 988 xsk_buff_free((struct xdp_buff *)buf); 989 return; 990 } 991 992 if (!vi->big_packets || vi->mergeable_rx_bufs) 993 virtnet_rq_unmap(rq, buf, 0); 994 995 virtnet_rq_free_buf(vi, rq, buf); 996 } 997 998 static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 999 bool in_napi) 1000 { 1001 struct virtnet_sq_free_stats stats = {0}; 1002 1003 __free_old_xmit(sq, txq, in_napi, &stats); 1004 1005 /* Avoid overhead when no packets have been processed 1006 * happens when called speculatively from start_xmit. 1007 */ 1008 if (!stats.packets && !stats.napi_packets) 1009 return; 1010 1011 u64_stats_update_begin(&sq->stats.syncp); 1012 u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes); 1013 u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets); 1014 u64_stats_update_end(&sq->stats.syncp); 1015 } 1016 1017 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) 1018 { 1019 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) 1020 return false; 1021 else if (q < vi->curr_queue_pairs) 1022 return true; 1023 else 1024 return false; 1025 } 1026 1027 static void check_sq_full_and_disable(struct virtnet_info *vi, 1028 struct net_device *dev, 1029 struct send_queue *sq) 1030 { 1031 bool use_napi = sq->napi.weight; 1032 int qnum; 1033 1034 qnum = sq - vi->sq; 1035 1036 /* If running out of space, stop queue to avoid getting packets that we 1037 * are then unable to transmit. 1038 * An alternative would be to force queuing layer to requeue the skb by 1039 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be 1040 * returned in a normal path of operation: it means that driver is not 1041 * maintaining the TX queue stop/start state properly, and causes 1042 * the stack to do a non-trivial amount of useless work. 1043 * Since most packets only take 1 or 2 ring slots, stopping the queue 1044 * early means 16 slots are typically wasted. 1045 */ 1046 if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { 1047 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1048 1049 netif_tx_stop_queue(txq); 1050 u64_stats_update_begin(&sq->stats.syncp); 1051 u64_stats_inc(&sq->stats.stop); 1052 u64_stats_update_end(&sq->stats.syncp); 1053 if (use_napi) { 1054 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 1055 virtqueue_napi_schedule(&sq->napi, sq->vq); 1056 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 1057 /* More just got used, free them then recheck. */ 1058 free_old_xmit(sq, txq, false); 1059 if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { 1060 netif_start_subqueue(dev, qnum); 1061 u64_stats_update_begin(&sq->stats.syncp); 1062 u64_stats_inc(&sq->stats.wake); 1063 u64_stats_update_end(&sq->stats.syncp); 1064 virtqueue_disable_cb(sq->vq); 1065 } 1066 } 1067 } 1068 } 1069 1070 static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len) 1071 { 1072 sg->dma_address = addr; 1073 sg->length = len; 1074 } 1075 1076 static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, 1077 struct receive_queue *rq, void *buf, u32 len) 1078 { 1079 struct xdp_buff *xdp; 1080 u32 bufsize; 1081 1082 xdp = (struct xdp_buff *)buf; 1083 1084 bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len; 1085 1086 if (unlikely(len > bufsize)) { 1087 pr_debug("%s: rx error: len %u exceeds truesize %u\n", 1088 vi->dev->name, len, bufsize); 1089 DEV_STATS_INC(vi->dev, rx_length_errors); 1090 xsk_buff_free(xdp); 1091 return NULL; 1092 } 1093 1094 xsk_buff_set_size(xdp, len); 1095 xsk_buff_dma_sync_for_cpu(xdp); 1096 1097 return xdp; 1098 } 1099 1100 static struct sk_buff *xsk_construct_skb(struct receive_queue *rq, 1101 struct xdp_buff *xdp) 1102 { 1103 unsigned int metasize = xdp->data - xdp->data_meta; 1104 struct sk_buff *skb; 1105 unsigned int size; 1106 1107 size = xdp->data_end - xdp->data_hard_start; 1108 skb = napi_alloc_skb(&rq->napi, size); 1109 if (unlikely(!skb)) { 1110 xsk_buff_free(xdp); 1111 return NULL; 1112 } 1113 1114 skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); 1115 1116 size = xdp->data_end - xdp->data_meta; 1117 memcpy(__skb_put(skb, size), xdp->data_meta, size); 1118 1119 if (metasize) { 1120 __skb_pull(skb, metasize); 1121 skb_metadata_set(skb, metasize); 1122 } 1123 1124 xsk_buff_free(xdp); 1125 1126 return skb; 1127 } 1128 1129 static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi, 1130 struct receive_queue *rq, struct xdp_buff *xdp, 1131 unsigned int *xdp_xmit, 1132 struct virtnet_rq_stats *stats) 1133 { 1134 struct bpf_prog *prog; 1135 u32 ret; 1136 1137 ret = XDP_PASS; 1138 rcu_read_lock(); 1139 prog = rcu_dereference(rq->xdp_prog); 1140 if (prog) 1141 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1142 rcu_read_unlock(); 1143 1144 switch (ret) { 1145 case XDP_PASS: 1146 return xsk_construct_skb(rq, xdp); 1147 1148 case XDP_TX: 1149 case XDP_REDIRECT: 1150 return NULL; 1151 1152 default: 1153 /* drop packet */ 1154 xsk_buff_free(xdp); 1155 u64_stats_inc(&stats->drops); 1156 return NULL; 1157 } 1158 } 1159 1160 static void xsk_drop_follow_bufs(struct net_device *dev, 1161 struct receive_queue *rq, 1162 u32 num_buf, 1163 struct virtnet_rq_stats *stats) 1164 { 1165 struct xdp_buff *xdp; 1166 u32 len; 1167 1168 while (num_buf-- > 1) { 1169 xdp = virtqueue_get_buf(rq->vq, &len); 1170 if (unlikely(!xdp)) { 1171 pr_debug("%s: rx error: %d buffers missing\n", 1172 dev->name, num_buf); 1173 DEV_STATS_INC(dev, rx_length_errors); 1174 break; 1175 } 1176 u64_stats_add(&stats->bytes, len); 1177 xsk_buff_free(xdp); 1178 } 1179 } 1180 1181 static int xsk_append_merge_buffer(struct virtnet_info *vi, 1182 struct receive_queue *rq, 1183 struct sk_buff *head_skb, 1184 u32 num_buf, 1185 struct virtio_net_hdr_mrg_rxbuf *hdr, 1186 struct virtnet_rq_stats *stats) 1187 { 1188 struct sk_buff *curr_skb; 1189 struct xdp_buff *xdp; 1190 u32 len, truesize; 1191 struct page *page; 1192 void *buf; 1193 1194 curr_skb = head_skb; 1195 1196 while (--num_buf) { 1197 buf = virtqueue_get_buf(rq->vq, &len); 1198 if (unlikely(!buf)) { 1199 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1200 vi->dev->name, num_buf, 1201 virtio16_to_cpu(vi->vdev, 1202 hdr->num_buffers)); 1203 DEV_STATS_INC(vi->dev, rx_length_errors); 1204 return -EINVAL; 1205 } 1206 1207 u64_stats_add(&stats->bytes, len); 1208 1209 xdp = buf_to_xdp(vi, rq, buf, len); 1210 if (!xdp) 1211 goto err; 1212 1213 buf = napi_alloc_frag(len); 1214 if (!buf) { 1215 xsk_buff_free(xdp); 1216 goto err; 1217 } 1218 1219 memcpy(buf, xdp->data - vi->hdr_len, len); 1220 1221 xsk_buff_free(xdp); 1222 1223 page = virt_to_page(buf); 1224 1225 truesize = len; 1226 1227 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 1228 buf, len, truesize); 1229 if (!curr_skb) { 1230 put_page(page); 1231 goto err; 1232 } 1233 } 1234 1235 return 0; 1236 1237 err: 1238 xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats); 1239 return -EINVAL; 1240 } 1241 1242 static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi, 1243 struct receive_queue *rq, struct xdp_buff *xdp, 1244 unsigned int *xdp_xmit, 1245 struct virtnet_rq_stats *stats) 1246 { 1247 struct virtio_net_hdr_mrg_rxbuf *hdr; 1248 struct bpf_prog *prog; 1249 struct sk_buff *skb; 1250 u32 ret, num_buf; 1251 1252 hdr = xdp->data - vi->hdr_len; 1253 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1254 1255 ret = XDP_PASS; 1256 rcu_read_lock(); 1257 prog = rcu_dereference(rq->xdp_prog); 1258 /* TODO: support multi buffer. */ 1259 if (prog && num_buf == 1) 1260 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1261 rcu_read_unlock(); 1262 1263 switch (ret) { 1264 case XDP_PASS: 1265 skb = xsk_construct_skb(rq, xdp); 1266 if (!skb) 1267 goto drop_bufs; 1268 1269 if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) { 1270 dev_kfree_skb(skb); 1271 goto drop; 1272 } 1273 1274 return skb; 1275 1276 case XDP_TX: 1277 case XDP_REDIRECT: 1278 return NULL; 1279 1280 default: 1281 /* drop packet */ 1282 xsk_buff_free(xdp); 1283 } 1284 1285 drop_bufs: 1286 xsk_drop_follow_bufs(dev, rq, num_buf, stats); 1287 1288 drop: 1289 u64_stats_inc(&stats->drops); 1290 return NULL; 1291 } 1292 1293 static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq, 1294 void *buf, u32 len, 1295 unsigned int *xdp_xmit, 1296 struct virtnet_rq_stats *stats) 1297 { 1298 struct net_device *dev = vi->dev; 1299 struct sk_buff *skb = NULL; 1300 struct xdp_buff *xdp; 1301 u8 flags; 1302 1303 len -= vi->hdr_len; 1304 1305 u64_stats_add(&stats->bytes, len); 1306 1307 xdp = buf_to_xdp(vi, rq, buf, len); 1308 if (!xdp) 1309 return; 1310 1311 if (unlikely(len < ETH_HLEN)) { 1312 pr_debug("%s: short packet %i\n", dev->name, len); 1313 DEV_STATS_INC(dev, rx_length_errors); 1314 xsk_buff_free(xdp); 1315 return; 1316 } 1317 1318 flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags; 1319 1320 if (!vi->mergeable_rx_bufs) 1321 skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats); 1322 else 1323 skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats); 1324 1325 if (skb) 1326 virtnet_receive_done(vi, rq, skb, flags); 1327 } 1328 1329 static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq, 1330 struct xsk_buff_pool *pool, gfp_t gfp) 1331 { 1332 struct xdp_buff **xsk_buffs; 1333 dma_addr_t addr; 1334 int err = 0; 1335 u32 len, i; 1336 int num; 1337 1338 xsk_buffs = rq->xsk_buffs; 1339 1340 num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free); 1341 if (!num) 1342 return -ENOMEM; 1343 1344 len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len; 1345 1346 for (i = 0; i < num; ++i) { 1347 /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space. 1348 * We assume XDP_PACKET_HEADROOM is larger than hdr->len. 1349 * (see function virtnet_xsk_pool_enable) 1350 */ 1351 addr = xsk_buff_xdp_get_dma(xsk_buffs[i]) - vi->hdr_len; 1352 1353 sg_init_table(rq->sg, 1); 1354 sg_fill_dma(rq->sg, addr, len); 1355 1356 err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, xsk_buffs[i], gfp); 1357 if (err) 1358 goto err; 1359 } 1360 1361 return num; 1362 1363 err: 1364 for (; i < num; ++i) 1365 xsk_buff_free(xsk_buffs[i]); 1366 1367 return err; 1368 } 1369 1370 static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) 1371 { 1372 struct virtnet_info *vi = netdev_priv(dev); 1373 struct send_queue *sq; 1374 1375 if (!netif_running(dev)) 1376 return -ENETDOWN; 1377 1378 if (qid >= vi->curr_queue_pairs) 1379 return -EINVAL; 1380 1381 sq = &vi->sq[qid]; 1382 1383 if (napi_if_scheduled_mark_missed(&sq->napi)) 1384 return 0; 1385 1386 local_bh_disable(); 1387 virtqueue_napi_schedule(&sq->napi, sq->vq); 1388 local_bh_enable(); 1389 1390 return 0; 1391 } 1392 1393 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, 1394 struct send_queue *sq, 1395 struct xdp_frame *xdpf) 1396 { 1397 struct virtio_net_hdr_mrg_rxbuf *hdr; 1398 struct skb_shared_info *shinfo; 1399 u8 nr_frags = 0; 1400 int err, i; 1401 1402 if (unlikely(xdpf->headroom < vi->hdr_len)) 1403 return -EOVERFLOW; 1404 1405 if (unlikely(xdp_frame_has_frags(xdpf))) { 1406 shinfo = xdp_get_shared_info_from_frame(xdpf); 1407 nr_frags = shinfo->nr_frags; 1408 } 1409 1410 /* In wrapping function virtnet_xdp_xmit(), we need to free 1411 * up the pending old buffers, where we need to calculate the 1412 * position of skb_shared_info in xdp_get_frame_len() and 1413 * xdp_return_frame(), which will involve to xdpf->data and 1414 * xdpf->headroom. Therefore, we need to update the value of 1415 * headroom synchronously here. 1416 */ 1417 xdpf->headroom -= vi->hdr_len; 1418 xdpf->data -= vi->hdr_len; 1419 /* Zero header and leave csum up to XDP layers */ 1420 hdr = xdpf->data; 1421 memset(hdr, 0, vi->hdr_len); 1422 xdpf->len += vi->hdr_len; 1423 1424 sg_init_table(sq->sg, nr_frags + 1); 1425 sg_set_buf(sq->sg, xdpf->data, xdpf->len); 1426 for (i = 0; i < nr_frags; i++) { 1427 skb_frag_t *frag = &shinfo->frags[i]; 1428 1429 sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), 1430 skb_frag_size(frag), skb_frag_off(frag)); 1431 } 1432 1433 err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1, 1434 xdp_to_ptr(xdpf), GFP_ATOMIC); 1435 if (unlikely(err)) 1436 return -ENOSPC; /* Caller handle free/refcnt */ 1437 1438 return 0; 1439 } 1440 1441 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on 1442 * the current cpu, so it does not need to be locked. 1443 * 1444 * Here we use marco instead of inline functions because we have to deal with 1445 * three issues at the same time: 1. the choice of sq. 2. judge and execute the 1446 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline 1447 * functions to perfectly solve these three problems at the same time. 1448 */ 1449 #define virtnet_xdp_get_sq(vi) ({ \ 1450 int cpu = smp_processor_id(); \ 1451 struct netdev_queue *txq; \ 1452 typeof(vi) v = (vi); \ 1453 unsigned int qp; \ 1454 \ 1455 if (v->curr_queue_pairs > nr_cpu_ids) { \ 1456 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ 1457 qp += cpu; \ 1458 txq = netdev_get_tx_queue(v->dev, qp); \ 1459 __netif_tx_acquire(txq); \ 1460 } else { \ 1461 qp = cpu % v->curr_queue_pairs; \ 1462 txq = netdev_get_tx_queue(v->dev, qp); \ 1463 __netif_tx_lock(txq, cpu); \ 1464 } \ 1465 v->sq + qp; \ 1466 }) 1467 1468 #define virtnet_xdp_put_sq(vi, q) { \ 1469 struct netdev_queue *txq; \ 1470 typeof(vi) v = (vi); \ 1471 \ 1472 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ 1473 if (v->curr_queue_pairs > nr_cpu_ids) \ 1474 __netif_tx_release(txq); \ 1475 else \ 1476 __netif_tx_unlock(txq); \ 1477 } 1478 1479 static int virtnet_xdp_xmit(struct net_device *dev, 1480 int n, struct xdp_frame **frames, u32 flags) 1481 { 1482 struct virtnet_info *vi = netdev_priv(dev); 1483 struct virtnet_sq_free_stats stats = {0}; 1484 struct receive_queue *rq = vi->rq; 1485 struct bpf_prog *xdp_prog; 1486 struct send_queue *sq; 1487 int nxmit = 0; 1488 int kicks = 0; 1489 int ret; 1490 int i; 1491 1492 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this 1493 * indicate XDP resources have been successfully allocated. 1494 */ 1495 xdp_prog = rcu_access_pointer(rq->xdp_prog); 1496 if (!xdp_prog) 1497 return -ENXIO; 1498 1499 sq = virtnet_xdp_get_sq(vi); 1500 1501 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 1502 ret = -EINVAL; 1503 goto out; 1504 } 1505 1506 /* Free up any pending old buffers before queueing new ones. */ 1507 __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), 1508 false, &stats); 1509 1510 for (i = 0; i < n; i++) { 1511 struct xdp_frame *xdpf = frames[i]; 1512 1513 if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) 1514 break; 1515 nxmit++; 1516 } 1517 ret = nxmit; 1518 1519 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1520 check_sq_full_and_disable(vi, dev, sq); 1521 1522 if (flags & XDP_XMIT_FLUSH) { 1523 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1524 kicks = 1; 1525 } 1526 out: 1527 u64_stats_update_begin(&sq->stats.syncp); 1528 u64_stats_add(&sq->stats.bytes, stats.bytes); 1529 u64_stats_add(&sq->stats.packets, stats.packets); 1530 u64_stats_add(&sq->stats.xdp_tx, n); 1531 u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit); 1532 u64_stats_add(&sq->stats.kicks, kicks); 1533 u64_stats_update_end(&sq->stats.syncp); 1534 1535 virtnet_xdp_put_sq(vi, sq); 1536 return ret; 1537 } 1538 1539 static void put_xdp_frags(struct xdp_buff *xdp) 1540 { 1541 struct skb_shared_info *shinfo; 1542 struct page *xdp_page; 1543 int i; 1544 1545 if (xdp_buff_has_frags(xdp)) { 1546 shinfo = xdp_get_shared_info_from_buff(xdp); 1547 for (i = 0; i < shinfo->nr_frags; i++) { 1548 xdp_page = skb_frag_page(&shinfo->frags[i]); 1549 put_page(xdp_page); 1550 } 1551 } 1552 } 1553 1554 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 1555 struct net_device *dev, 1556 unsigned int *xdp_xmit, 1557 struct virtnet_rq_stats *stats) 1558 { 1559 struct xdp_frame *xdpf; 1560 int err; 1561 u32 act; 1562 1563 act = bpf_prog_run_xdp(xdp_prog, xdp); 1564 u64_stats_inc(&stats->xdp_packets); 1565 1566 switch (act) { 1567 case XDP_PASS: 1568 return act; 1569 1570 case XDP_TX: 1571 u64_stats_inc(&stats->xdp_tx); 1572 xdpf = xdp_convert_buff_to_frame(xdp); 1573 if (unlikely(!xdpf)) { 1574 netdev_dbg(dev, "convert buff to frame failed for xdp\n"); 1575 return XDP_DROP; 1576 } 1577 1578 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 1579 if (unlikely(!err)) { 1580 xdp_return_frame_rx_napi(xdpf); 1581 } else if (unlikely(err < 0)) { 1582 trace_xdp_exception(dev, xdp_prog, act); 1583 return XDP_DROP; 1584 } 1585 *xdp_xmit |= VIRTIO_XDP_TX; 1586 return act; 1587 1588 case XDP_REDIRECT: 1589 u64_stats_inc(&stats->xdp_redirects); 1590 err = xdp_do_redirect(dev, xdp, xdp_prog); 1591 if (err) 1592 return XDP_DROP; 1593 1594 *xdp_xmit |= VIRTIO_XDP_REDIR; 1595 return act; 1596 1597 default: 1598 bpf_warn_invalid_xdp_action(dev, xdp_prog, act); 1599 fallthrough; 1600 case XDP_ABORTED: 1601 trace_xdp_exception(dev, xdp_prog, act); 1602 fallthrough; 1603 case XDP_DROP: 1604 return XDP_DROP; 1605 } 1606 } 1607 1608 static unsigned int virtnet_get_headroom(struct virtnet_info *vi) 1609 { 1610 return vi->xdp_enabled ? XDP_PACKET_HEADROOM : 0; 1611 } 1612 1613 /* We copy the packet for XDP in the following cases: 1614 * 1615 * 1) Packet is scattered across multiple rx buffers. 1616 * 2) Headroom space is insufficient. 1617 * 1618 * This is inefficient but it's a temporary condition that 1619 * we hit right after XDP is enabled and until queue is refilled 1620 * with large buffers with sufficient headroom - so it should affect 1621 * at most queue size packets. 1622 * Afterwards, the conditions to enable 1623 * XDP should preclude the underlying device from sending packets 1624 * across multiple buffers (num_buf > 1), and we make sure buffers 1625 * have enough headroom. 1626 */ 1627 static struct page *xdp_linearize_page(struct receive_queue *rq, 1628 int *num_buf, 1629 struct page *p, 1630 int offset, 1631 int page_off, 1632 unsigned int *len) 1633 { 1634 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1635 struct page *page; 1636 1637 if (page_off + *len + tailroom > PAGE_SIZE) 1638 return NULL; 1639 1640 page = alloc_page(GFP_ATOMIC); 1641 if (!page) 1642 return NULL; 1643 1644 memcpy(page_address(page) + page_off, page_address(p) + offset, *len); 1645 page_off += *len; 1646 1647 while (--*num_buf) { 1648 unsigned int buflen; 1649 void *buf; 1650 int off; 1651 1652 buf = virtnet_rq_get_buf(rq, &buflen, NULL); 1653 if (unlikely(!buf)) 1654 goto err_buf; 1655 1656 p = virt_to_head_page(buf); 1657 off = buf - page_address(p); 1658 1659 /* guard against a misconfigured or uncooperative backend that 1660 * is sending packet larger than the MTU. 1661 */ 1662 if ((page_off + buflen + tailroom) > PAGE_SIZE) { 1663 put_page(p); 1664 goto err_buf; 1665 } 1666 1667 memcpy(page_address(page) + page_off, 1668 page_address(p) + off, buflen); 1669 page_off += buflen; 1670 put_page(p); 1671 } 1672 1673 /* Headroom does not contribute to packet length */ 1674 *len = page_off - XDP_PACKET_HEADROOM; 1675 return page; 1676 err_buf: 1677 __free_pages(page, 0); 1678 return NULL; 1679 } 1680 1681 static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, 1682 unsigned int xdp_headroom, 1683 void *buf, 1684 unsigned int len) 1685 { 1686 unsigned int header_offset; 1687 unsigned int headroom; 1688 unsigned int buflen; 1689 struct sk_buff *skb; 1690 1691 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1692 headroom = vi->hdr_len + header_offset; 1693 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1694 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1695 1696 skb = virtnet_build_skb(buf, buflen, headroom, len); 1697 if (unlikely(!skb)) 1698 return NULL; 1699 1700 buf += header_offset; 1701 memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len); 1702 1703 return skb; 1704 } 1705 1706 static struct sk_buff *receive_small_xdp(struct net_device *dev, 1707 struct virtnet_info *vi, 1708 struct receive_queue *rq, 1709 struct bpf_prog *xdp_prog, 1710 void *buf, 1711 unsigned int xdp_headroom, 1712 unsigned int len, 1713 unsigned int *xdp_xmit, 1714 struct virtnet_rq_stats *stats) 1715 { 1716 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; 1717 unsigned int headroom = vi->hdr_len + header_offset; 1718 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; 1719 struct page *page = virt_to_head_page(buf); 1720 struct page *xdp_page; 1721 unsigned int buflen; 1722 struct xdp_buff xdp; 1723 struct sk_buff *skb; 1724 unsigned int metasize = 0; 1725 u32 act; 1726 1727 if (unlikely(hdr->hdr.gso_type)) 1728 goto err_xdp; 1729 1730 /* Partially checksummed packets must be dropped. */ 1731 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 1732 goto err_xdp; 1733 1734 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1735 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1736 1737 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 1738 int offset = buf - page_address(page) + header_offset; 1739 unsigned int tlen = len + vi->hdr_len; 1740 int num_buf = 1; 1741 1742 xdp_headroom = virtnet_get_headroom(vi); 1743 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1744 headroom = vi->hdr_len + header_offset; 1745 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1746 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1747 xdp_page = xdp_linearize_page(rq, &num_buf, page, 1748 offset, header_offset, 1749 &tlen); 1750 if (!xdp_page) 1751 goto err_xdp; 1752 1753 buf = page_address(xdp_page); 1754 put_page(page); 1755 page = xdp_page; 1756 } 1757 1758 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); 1759 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, 1760 xdp_headroom, len, true); 1761 1762 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 1763 1764 switch (act) { 1765 case XDP_PASS: 1766 /* Recalculate length in case bpf program changed it */ 1767 len = xdp.data_end - xdp.data; 1768 metasize = xdp.data - xdp.data_meta; 1769 break; 1770 1771 case XDP_TX: 1772 case XDP_REDIRECT: 1773 goto xdp_xmit; 1774 1775 default: 1776 goto err_xdp; 1777 } 1778 1779 skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len); 1780 if (unlikely(!skb)) 1781 goto err; 1782 1783 if (metasize) 1784 skb_metadata_set(skb, metasize); 1785 1786 return skb; 1787 1788 err_xdp: 1789 u64_stats_inc(&stats->xdp_drops); 1790 err: 1791 u64_stats_inc(&stats->drops); 1792 put_page(page); 1793 xdp_xmit: 1794 return NULL; 1795 } 1796 1797 static struct sk_buff *receive_small(struct net_device *dev, 1798 struct virtnet_info *vi, 1799 struct receive_queue *rq, 1800 void *buf, void *ctx, 1801 unsigned int len, 1802 unsigned int *xdp_xmit, 1803 struct virtnet_rq_stats *stats) 1804 { 1805 unsigned int xdp_headroom = (unsigned long)ctx; 1806 struct page *page = virt_to_head_page(buf); 1807 struct sk_buff *skb; 1808 1809 /* We passed the address of virtnet header to virtio-core, 1810 * so truncate the padding. 1811 */ 1812 buf -= VIRTNET_RX_PAD + xdp_headroom; 1813 1814 len -= vi->hdr_len; 1815 u64_stats_add(&stats->bytes, len); 1816 1817 if (unlikely(len > GOOD_PACKET_LEN)) { 1818 pr_debug("%s: rx error: len %u exceeds max size %d\n", 1819 dev->name, len, GOOD_PACKET_LEN); 1820 DEV_STATS_INC(dev, rx_length_errors); 1821 goto err; 1822 } 1823 1824 if (unlikely(vi->xdp_enabled)) { 1825 struct bpf_prog *xdp_prog; 1826 1827 rcu_read_lock(); 1828 xdp_prog = rcu_dereference(rq->xdp_prog); 1829 if (xdp_prog) { 1830 skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, 1831 xdp_headroom, len, xdp_xmit, 1832 stats); 1833 rcu_read_unlock(); 1834 return skb; 1835 } 1836 rcu_read_unlock(); 1837 } 1838 1839 skb = receive_small_build_skb(vi, xdp_headroom, buf, len); 1840 if (likely(skb)) 1841 return skb; 1842 1843 err: 1844 u64_stats_inc(&stats->drops); 1845 put_page(page); 1846 return NULL; 1847 } 1848 1849 static struct sk_buff *receive_big(struct net_device *dev, 1850 struct virtnet_info *vi, 1851 struct receive_queue *rq, 1852 void *buf, 1853 unsigned int len, 1854 struct virtnet_rq_stats *stats) 1855 { 1856 struct page *page = buf; 1857 struct sk_buff *skb = 1858 page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); 1859 1860 u64_stats_add(&stats->bytes, len - vi->hdr_len); 1861 if (unlikely(!skb)) 1862 goto err; 1863 1864 return skb; 1865 1866 err: 1867 u64_stats_inc(&stats->drops); 1868 give_pages(rq, page); 1869 return NULL; 1870 } 1871 1872 static void mergeable_buf_free(struct receive_queue *rq, int num_buf, 1873 struct net_device *dev, 1874 struct virtnet_rq_stats *stats) 1875 { 1876 struct page *page; 1877 void *buf; 1878 int len; 1879 1880 while (num_buf-- > 1) { 1881 buf = virtnet_rq_get_buf(rq, &len, NULL); 1882 if (unlikely(!buf)) { 1883 pr_debug("%s: rx error: %d buffers missing\n", 1884 dev->name, num_buf); 1885 DEV_STATS_INC(dev, rx_length_errors); 1886 break; 1887 } 1888 u64_stats_add(&stats->bytes, len); 1889 page = virt_to_head_page(buf); 1890 put_page(page); 1891 } 1892 } 1893 1894 /* Why not use xdp_build_skb_from_frame() ? 1895 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in 1896 * virtio-net there are 2 points that do not match its requirements: 1897 * 1. The size of the prefilled buffer is not fixed before xdp is set. 1898 * 2. xdp_build_skb_from_frame() does more checks that we don't need, 1899 * like eth_type_trans() (which virtio-net does in receive_buf()). 1900 */ 1901 static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, 1902 struct virtnet_info *vi, 1903 struct xdp_buff *xdp, 1904 unsigned int xdp_frags_truesz) 1905 { 1906 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 1907 unsigned int headroom, data_len; 1908 struct sk_buff *skb; 1909 int metasize; 1910 u8 nr_frags; 1911 1912 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 1913 pr_debug("Error building skb as missing reserved tailroom for xdp"); 1914 return NULL; 1915 } 1916 1917 if (unlikely(xdp_buff_has_frags(xdp))) 1918 nr_frags = sinfo->nr_frags; 1919 1920 skb = build_skb(xdp->data_hard_start, xdp->frame_sz); 1921 if (unlikely(!skb)) 1922 return NULL; 1923 1924 headroom = xdp->data - xdp->data_hard_start; 1925 data_len = xdp->data_end - xdp->data; 1926 skb_reserve(skb, headroom); 1927 __skb_put(skb, data_len); 1928 1929 metasize = xdp->data - xdp->data_meta; 1930 metasize = metasize > 0 ? metasize : 0; 1931 if (metasize) 1932 skb_metadata_set(skb, metasize); 1933 1934 if (unlikely(xdp_buff_has_frags(xdp))) 1935 xdp_update_skb_shared_info(skb, nr_frags, 1936 sinfo->xdp_frags_size, 1937 xdp_frags_truesz, 1938 xdp_buff_is_frag_pfmemalloc(xdp)); 1939 1940 return skb; 1941 } 1942 1943 /* TODO: build xdp in big mode */ 1944 static int virtnet_build_xdp_buff_mrg(struct net_device *dev, 1945 struct virtnet_info *vi, 1946 struct receive_queue *rq, 1947 struct xdp_buff *xdp, 1948 void *buf, 1949 unsigned int len, 1950 unsigned int frame_sz, 1951 int *num_buf, 1952 unsigned int *xdp_frags_truesize, 1953 struct virtnet_rq_stats *stats) 1954 { 1955 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 1956 unsigned int headroom, tailroom, room; 1957 unsigned int truesize, cur_frag_size; 1958 struct skb_shared_info *shinfo; 1959 unsigned int xdp_frags_truesz = 0; 1960 struct page *page; 1961 skb_frag_t *frag; 1962 int offset; 1963 void *ctx; 1964 1965 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 1966 xdp_prepare_buff(xdp, buf - XDP_PACKET_HEADROOM, 1967 XDP_PACKET_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); 1968 1969 if (!*num_buf) 1970 return 0; 1971 1972 if (*num_buf > 1) { 1973 /* If we want to build multi-buffer xdp, we need 1974 * to specify that the flags of xdp_buff have the 1975 * XDP_FLAGS_HAS_FRAG bit. 1976 */ 1977 if (!xdp_buff_has_frags(xdp)) 1978 xdp_buff_set_frags_flag(xdp); 1979 1980 shinfo = xdp_get_shared_info_from_buff(xdp); 1981 shinfo->nr_frags = 0; 1982 shinfo->xdp_frags_size = 0; 1983 } 1984 1985 if (*num_buf > MAX_SKB_FRAGS + 1) 1986 return -EINVAL; 1987 1988 while (--*num_buf > 0) { 1989 buf = virtnet_rq_get_buf(rq, &len, &ctx); 1990 if (unlikely(!buf)) { 1991 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1992 dev->name, *num_buf, 1993 virtio16_to_cpu(vi->vdev, hdr->num_buffers)); 1994 DEV_STATS_INC(dev, rx_length_errors); 1995 goto err; 1996 } 1997 1998 u64_stats_add(&stats->bytes, len); 1999 page = virt_to_head_page(buf); 2000 offset = buf - page_address(page); 2001 2002 truesize = mergeable_ctx_to_truesize(ctx); 2003 headroom = mergeable_ctx_to_headroom(ctx); 2004 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2005 room = SKB_DATA_ALIGN(headroom + tailroom); 2006 2007 cur_frag_size = truesize; 2008 xdp_frags_truesz += cur_frag_size; 2009 if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { 2010 put_page(page); 2011 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2012 dev->name, len, (unsigned long)(truesize - room)); 2013 DEV_STATS_INC(dev, rx_length_errors); 2014 goto err; 2015 } 2016 2017 frag = &shinfo->frags[shinfo->nr_frags++]; 2018 skb_frag_fill_page_desc(frag, page, offset, len); 2019 if (page_is_pfmemalloc(page)) 2020 xdp_buff_set_frag_pfmemalloc(xdp); 2021 2022 shinfo->xdp_frags_size += len; 2023 } 2024 2025 *xdp_frags_truesize = xdp_frags_truesz; 2026 return 0; 2027 2028 err: 2029 put_xdp_frags(xdp); 2030 return -EINVAL; 2031 } 2032 2033 static void *mergeable_xdp_get_buf(struct virtnet_info *vi, 2034 struct receive_queue *rq, 2035 struct bpf_prog *xdp_prog, 2036 void *ctx, 2037 unsigned int *frame_sz, 2038 int *num_buf, 2039 struct page **page, 2040 int offset, 2041 unsigned int *len, 2042 struct virtio_net_hdr_mrg_rxbuf *hdr) 2043 { 2044 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2045 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2046 struct page *xdp_page; 2047 unsigned int xdp_room; 2048 2049 /* Transient failure which in theory could occur if 2050 * in-flight packets from before XDP was enabled reach 2051 * the receive path after XDP is loaded. 2052 */ 2053 if (unlikely(hdr->hdr.gso_type)) 2054 return NULL; 2055 2056 /* Partially checksummed packets must be dropped. */ 2057 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 2058 return NULL; 2059 2060 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers 2061 * with headroom may add hole in truesize, which 2062 * make their length exceed PAGE_SIZE. So we disabled the 2063 * hole mechanism for xdp. See add_recvbuf_mergeable(). 2064 */ 2065 *frame_sz = truesize; 2066 2067 if (likely(headroom >= virtnet_get_headroom(vi) && 2068 (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) { 2069 return page_address(*page) + offset; 2070 } 2071 2072 /* This happens when headroom is not enough because 2073 * of the buffer was prefilled before XDP is set. 2074 * This should only happen for the first several packets. 2075 * In fact, vq reset can be used here to help us clean up 2076 * the prefilled buffers, but many existing devices do not 2077 * support it, and we don't want to bother users who are 2078 * using xdp normally. 2079 */ 2080 if (!xdp_prog->aux->xdp_has_frags) { 2081 /* linearize data for XDP */ 2082 xdp_page = xdp_linearize_page(rq, num_buf, 2083 *page, offset, 2084 XDP_PACKET_HEADROOM, 2085 len); 2086 if (!xdp_page) 2087 return NULL; 2088 } else { 2089 xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 2090 sizeof(struct skb_shared_info)); 2091 if (*len + xdp_room > PAGE_SIZE) 2092 return NULL; 2093 2094 xdp_page = alloc_page(GFP_ATOMIC); 2095 if (!xdp_page) 2096 return NULL; 2097 2098 memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM, 2099 page_address(*page) + offset, *len); 2100 } 2101 2102 *frame_sz = PAGE_SIZE; 2103 2104 put_page(*page); 2105 2106 *page = xdp_page; 2107 2108 return page_address(*page) + XDP_PACKET_HEADROOM; 2109 } 2110 2111 static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, 2112 struct virtnet_info *vi, 2113 struct receive_queue *rq, 2114 struct bpf_prog *xdp_prog, 2115 void *buf, 2116 void *ctx, 2117 unsigned int len, 2118 unsigned int *xdp_xmit, 2119 struct virtnet_rq_stats *stats) 2120 { 2121 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2122 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2123 struct page *page = virt_to_head_page(buf); 2124 int offset = buf - page_address(page); 2125 unsigned int xdp_frags_truesz = 0; 2126 struct sk_buff *head_skb; 2127 unsigned int frame_sz; 2128 struct xdp_buff xdp; 2129 void *data; 2130 u32 act; 2131 int err; 2132 2133 data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page, 2134 offset, &len, hdr); 2135 if (unlikely(!data)) 2136 goto err_xdp; 2137 2138 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, 2139 &num_buf, &xdp_frags_truesz, stats); 2140 if (unlikely(err)) 2141 goto err_xdp; 2142 2143 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 2144 2145 switch (act) { 2146 case XDP_PASS: 2147 head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); 2148 if (unlikely(!head_skb)) 2149 break; 2150 return head_skb; 2151 2152 case XDP_TX: 2153 case XDP_REDIRECT: 2154 return NULL; 2155 2156 default: 2157 break; 2158 } 2159 2160 put_xdp_frags(&xdp); 2161 2162 err_xdp: 2163 put_page(page); 2164 mergeable_buf_free(rq, num_buf, dev, stats); 2165 2166 u64_stats_inc(&stats->xdp_drops); 2167 u64_stats_inc(&stats->drops); 2168 return NULL; 2169 } 2170 2171 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 2172 struct sk_buff *curr_skb, 2173 struct page *page, void *buf, 2174 int len, int truesize) 2175 { 2176 int num_skb_frags; 2177 int offset; 2178 2179 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 2180 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 2181 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); 2182 2183 if (unlikely(!nskb)) 2184 return NULL; 2185 2186 if (curr_skb == head_skb) 2187 skb_shinfo(curr_skb)->frag_list = nskb; 2188 else 2189 curr_skb->next = nskb; 2190 curr_skb = nskb; 2191 head_skb->truesize += nskb->truesize; 2192 num_skb_frags = 0; 2193 } 2194 2195 if (curr_skb != head_skb) { 2196 head_skb->data_len += len; 2197 head_skb->len += len; 2198 head_skb->truesize += truesize; 2199 } 2200 2201 offset = buf - page_address(page); 2202 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 2203 put_page(page); 2204 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 2205 len, truesize); 2206 } else { 2207 skb_add_rx_frag(curr_skb, num_skb_frags, page, 2208 offset, len, truesize); 2209 } 2210 2211 return curr_skb; 2212 } 2213 2214 static struct sk_buff *receive_mergeable(struct net_device *dev, 2215 struct virtnet_info *vi, 2216 struct receive_queue *rq, 2217 void *buf, 2218 void *ctx, 2219 unsigned int len, 2220 unsigned int *xdp_xmit, 2221 struct virtnet_rq_stats *stats) 2222 { 2223 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2224 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2225 struct page *page = virt_to_head_page(buf); 2226 int offset = buf - page_address(page); 2227 struct sk_buff *head_skb, *curr_skb; 2228 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2229 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2230 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2231 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2232 2233 head_skb = NULL; 2234 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2235 2236 if (unlikely(len > truesize - room)) { 2237 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2238 dev->name, len, (unsigned long)(truesize - room)); 2239 DEV_STATS_INC(dev, rx_length_errors); 2240 goto err_skb; 2241 } 2242 2243 if (unlikely(vi->xdp_enabled)) { 2244 struct bpf_prog *xdp_prog; 2245 2246 rcu_read_lock(); 2247 xdp_prog = rcu_dereference(rq->xdp_prog); 2248 if (xdp_prog) { 2249 head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx, 2250 len, xdp_xmit, stats); 2251 rcu_read_unlock(); 2252 return head_skb; 2253 } 2254 rcu_read_unlock(); 2255 } 2256 2257 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); 2258 curr_skb = head_skb; 2259 2260 if (unlikely(!curr_skb)) 2261 goto err_skb; 2262 while (--num_buf) { 2263 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2264 if (unlikely(!buf)) { 2265 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2266 dev->name, num_buf, 2267 virtio16_to_cpu(vi->vdev, 2268 hdr->num_buffers)); 2269 DEV_STATS_INC(dev, rx_length_errors); 2270 goto err_buf; 2271 } 2272 2273 u64_stats_add(&stats->bytes, len); 2274 page = virt_to_head_page(buf); 2275 2276 truesize = mergeable_ctx_to_truesize(ctx); 2277 headroom = mergeable_ctx_to_headroom(ctx); 2278 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2279 room = SKB_DATA_ALIGN(headroom + tailroom); 2280 if (unlikely(len > truesize - room)) { 2281 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2282 dev->name, len, (unsigned long)(truesize - room)); 2283 DEV_STATS_INC(dev, rx_length_errors); 2284 goto err_skb; 2285 } 2286 2287 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 2288 buf, len, truesize); 2289 if (!curr_skb) 2290 goto err_skb; 2291 } 2292 2293 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); 2294 return head_skb; 2295 2296 err_skb: 2297 put_page(page); 2298 mergeable_buf_free(rq, num_buf, dev, stats); 2299 2300 err_buf: 2301 u64_stats_inc(&stats->drops); 2302 dev_kfree_skb(head_skb); 2303 return NULL; 2304 } 2305 2306 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, 2307 struct sk_buff *skb) 2308 { 2309 enum pkt_hash_types rss_hash_type; 2310 2311 if (!hdr_hash || !skb) 2312 return; 2313 2314 switch (__le16_to_cpu(hdr_hash->hash_report)) { 2315 case VIRTIO_NET_HASH_REPORT_TCPv4: 2316 case VIRTIO_NET_HASH_REPORT_UDPv4: 2317 case VIRTIO_NET_HASH_REPORT_TCPv6: 2318 case VIRTIO_NET_HASH_REPORT_UDPv6: 2319 case VIRTIO_NET_HASH_REPORT_TCPv6_EX: 2320 case VIRTIO_NET_HASH_REPORT_UDPv6_EX: 2321 rss_hash_type = PKT_HASH_TYPE_L4; 2322 break; 2323 case VIRTIO_NET_HASH_REPORT_IPv4: 2324 case VIRTIO_NET_HASH_REPORT_IPv6: 2325 case VIRTIO_NET_HASH_REPORT_IPv6_EX: 2326 rss_hash_type = PKT_HASH_TYPE_L3; 2327 break; 2328 case VIRTIO_NET_HASH_REPORT_NONE: 2329 default: 2330 rss_hash_type = PKT_HASH_TYPE_NONE; 2331 } 2332 skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); 2333 } 2334 2335 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 2336 struct sk_buff *skb, u8 flags) 2337 { 2338 struct virtio_net_common_hdr *hdr; 2339 struct net_device *dev = vi->dev; 2340 2341 hdr = skb_vnet_common_hdr(skb); 2342 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) 2343 virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); 2344 2345 if (flags & VIRTIO_NET_HDR_F_DATA_VALID) 2346 skb->ip_summed = CHECKSUM_UNNECESSARY; 2347 2348 if (virtio_net_hdr_to_skb(skb, &hdr->hdr, 2349 virtio_is_little_endian(vi->vdev))) { 2350 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n", 2351 dev->name, hdr->hdr.gso_type, 2352 hdr->hdr.gso_size); 2353 goto frame_err; 2354 } 2355 2356 skb_record_rx_queue(skb, vq2rxq(rq->vq)); 2357 skb->protocol = eth_type_trans(skb, dev); 2358 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 2359 ntohs(skb->protocol), skb->len, skb->pkt_type); 2360 2361 napi_gro_receive(&rq->napi, skb); 2362 return; 2363 2364 frame_err: 2365 DEV_STATS_INC(dev, rx_frame_errors); 2366 dev_kfree_skb(skb); 2367 } 2368 2369 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 2370 void *buf, unsigned int len, void **ctx, 2371 unsigned int *xdp_xmit, 2372 struct virtnet_rq_stats *stats) 2373 { 2374 struct net_device *dev = vi->dev; 2375 struct sk_buff *skb; 2376 u8 flags; 2377 2378 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 2379 pr_debug("%s: short packet %i\n", dev->name, len); 2380 DEV_STATS_INC(dev, rx_length_errors); 2381 virtnet_rq_free_buf(vi, rq, buf); 2382 return; 2383 } 2384 2385 /* 1. Save the flags early, as the XDP program might overwrite them. 2386 * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID 2387 * stay valid after XDP processing. 2388 * 2. XDP doesn't work with partially checksummed packets (refer to 2389 * virtnet_xdp_set()), so packets marked as 2390 * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. 2391 */ 2392 flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2393 2394 if (vi->mergeable_rx_bufs) 2395 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 2396 stats); 2397 else if (vi->big_packets) 2398 skb = receive_big(dev, vi, rq, buf, len, stats); 2399 else 2400 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 2401 2402 if (unlikely(!skb)) 2403 return; 2404 2405 virtnet_receive_done(vi, rq, skb, flags); 2406 } 2407 2408 /* Unlike mergeable buffers, all buffers are allocated to the 2409 * same size, except for the headroom. For this reason we do 2410 * not need to use mergeable_len_to_ctx here - it is enough 2411 * to store the headroom as the context ignoring the truesize. 2412 */ 2413 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, 2414 gfp_t gfp) 2415 { 2416 char *buf; 2417 unsigned int xdp_headroom = virtnet_get_headroom(vi); 2418 void *ctx = (void *)(unsigned long)xdp_headroom; 2419 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; 2420 int err; 2421 2422 len = SKB_DATA_ALIGN(len) + 2423 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 2424 2425 if (unlikely(!skb_page_frag_refill(len, &rq->alloc_frag, gfp))) 2426 return -ENOMEM; 2427 2428 buf = virtnet_rq_alloc(rq, len, gfp); 2429 if (unlikely(!buf)) 2430 return -ENOMEM; 2431 2432 buf += VIRTNET_RX_PAD + xdp_headroom; 2433 2434 virtnet_rq_init_one_sg(rq, buf, vi->hdr_len + GOOD_PACKET_LEN); 2435 2436 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 2437 if (err < 0) { 2438 virtnet_rq_unmap(rq, buf, 0); 2439 put_page(virt_to_head_page(buf)); 2440 } 2441 2442 return err; 2443 } 2444 2445 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, 2446 gfp_t gfp) 2447 { 2448 struct page *first, *list = NULL; 2449 char *p; 2450 int i, err, offset; 2451 2452 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); 2453 2454 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ 2455 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { 2456 first = get_a_page(rq, gfp); 2457 if (!first) { 2458 if (list) 2459 give_pages(rq, list); 2460 return -ENOMEM; 2461 } 2462 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); 2463 2464 /* chain new page in list head to match sg */ 2465 first->private = (unsigned long)list; 2466 list = first; 2467 } 2468 2469 first = get_a_page(rq, gfp); 2470 if (!first) { 2471 give_pages(rq, list); 2472 return -ENOMEM; 2473 } 2474 p = page_address(first); 2475 2476 /* rq->sg[0], rq->sg[1] share the same page */ 2477 /* a separated rq->sg[0] for header - required in case !any_header_sg */ 2478 sg_set_buf(&rq->sg[0], p, vi->hdr_len); 2479 2480 /* rq->sg[1] for data packet, from offset */ 2481 offset = sizeof(struct padded_vnet_hdr); 2482 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); 2483 2484 /* chain first in list head */ 2485 first->private = (unsigned long)list; 2486 err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, 2487 first, gfp); 2488 if (err < 0) 2489 give_pages(rq, first); 2490 2491 return err; 2492 } 2493 2494 static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 2495 struct ewma_pkt_len *avg_pkt_len, 2496 unsigned int room) 2497 { 2498 struct virtnet_info *vi = rq->vq->vdev->priv; 2499 const size_t hdr_len = vi->hdr_len; 2500 unsigned int len; 2501 2502 if (room) 2503 return PAGE_SIZE - room; 2504 2505 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 2506 rq->min_buf_len, PAGE_SIZE - hdr_len); 2507 2508 return ALIGN(len, L1_CACHE_BYTES); 2509 } 2510 2511 static int add_recvbuf_mergeable(struct virtnet_info *vi, 2512 struct receive_queue *rq, gfp_t gfp) 2513 { 2514 struct page_frag *alloc_frag = &rq->alloc_frag; 2515 unsigned int headroom = virtnet_get_headroom(vi); 2516 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2517 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2518 unsigned int len, hole; 2519 void *ctx; 2520 char *buf; 2521 int err; 2522 2523 /* Extra tailroom is needed to satisfy XDP's assumption. This 2524 * means rx frags coalescing won't work, but consider we've 2525 * disabled GSO for XDP, it won't be a big issue. 2526 */ 2527 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); 2528 2529 if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) 2530 return -ENOMEM; 2531 2532 if (!alloc_frag->offset && len + room + sizeof(struct virtnet_rq_dma) > alloc_frag->size) 2533 len -= sizeof(struct virtnet_rq_dma); 2534 2535 buf = virtnet_rq_alloc(rq, len + room, gfp); 2536 if (unlikely(!buf)) 2537 return -ENOMEM; 2538 2539 buf += headroom; /* advance address leaving hole at front of pkt */ 2540 hole = alloc_frag->size - alloc_frag->offset; 2541 if (hole < len + room) { 2542 /* To avoid internal fragmentation, if there is very likely not 2543 * enough space for another buffer, add the remaining space to 2544 * the current buffer. 2545 * XDP core assumes that frame_size of xdp_buff and the length 2546 * of the frag are PAGE_SIZE, so we disable the hole mechanism. 2547 */ 2548 if (!headroom) 2549 len += hole; 2550 alloc_frag->offset += hole; 2551 } 2552 2553 virtnet_rq_init_one_sg(rq, buf, len); 2554 2555 ctx = mergeable_len_to_ctx(len + room, headroom); 2556 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 2557 if (err < 0) { 2558 virtnet_rq_unmap(rq, buf, 0); 2559 put_page(virt_to_head_page(buf)); 2560 } 2561 2562 return err; 2563 } 2564 2565 /* 2566 * Returns false if we couldn't fill entirely (OOM). 2567 * 2568 * Normally run in the receive path, but can also be run from ndo_open 2569 * before we're receiving packets, or from refill_work which is 2570 * careful to disable receiving (using napi_disable). 2571 */ 2572 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, 2573 gfp_t gfp) 2574 { 2575 int err; 2576 2577 if (rq->xsk_pool) { 2578 err = virtnet_add_recvbuf_xsk(vi, rq, rq->xsk_pool, gfp); 2579 goto kick; 2580 } 2581 2582 do { 2583 if (vi->mergeable_rx_bufs) 2584 err = add_recvbuf_mergeable(vi, rq, gfp); 2585 else if (vi->big_packets) 2586 err = add_recvbuf_big(vi, rq, gfp); 2587 else 2588 err = add_recvbuf_small(vi, rq, gfp); 2589 2590 if (err) 2591 break; 2592 } while (rq->vq->num_free); 2593 2594 kick: 2595 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { 2596 unsigned long flags; 2597 2598 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); 2599 u64_stats_inc(&rq->stats.kicks); 2600 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); 2601 } 2602 2603 return err != -ENOMEM; 2604 } 2605 2606 static void skb_recv_done(struct virtqueue *rvq) 2607 { 2608 struct virtnet_info *vi = rvq->vdev->priv; 2609 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; 2610 2611 rq->calls++; 2612 virtqueue_napi_schedule(&rq->napi, rvq); 2613 } 2614 2615 static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi) 2616 { 2617 napi_enable(napi); 2618 2619 /* If all buffers were filled by other side before we napi_enabled, we 2620 * won't get another interrupt, so process any outstanding packets now. 2621 * Call local_bh_enable after to trigger softIRQ processing. 2622 */ 2623 local_bh_disable(); 2624 virtqueue_napi_schedule(napi, vq); 2625 local_bh_enable(); 2626 } 2627 2628 static void virtnet_napi_tx_enable(struct virtnet_info *vi, 2629 struct virtqueue *vq, 2630 struct napi_struct *napi) 2631 { 2632 if (!napi->weight) 2633 return; 2634 2635 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only 2636 * enable the feature if this is likely affine with the transmit path. 2637 */ 2638 if (!vi->affinity_hint_set) { 2639 napi->weight = 0; 2640 return; 2641 } 2642 2643 return virtnet_napi_enable(vq, napi); 2644 } 2645 2646 static void virtnet_napi_tx_disable(struct napi_struct *napi) 2647 { 2648 if (napi->weight) 2649 napi_disable(napi); 2650 } 2651 2652 static void refill_work(struct work_struct *work) 2653 { 2654 struct virtnet_info *vi = 2655 container_of(work, struct virtnet_info, refill.work); 2656 bool still_empty; 2657 int i; 2658 2659 for (i = 0; i < vi->curr_queue_pairs; i++) { 2660 struct receive_queue *rq = &vi->rq[i]; 2661 2662 napi_disable(&rq->napi); 2663 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); 2664 virtnet_napi_enable(rq->vq, &rq->napi); 2665 2666 /* In theory, this can happen: if we don't get any buffers in 2667 * we will *never* try to fill again. 2668 */ 2669 if (still_empty) 2670 schedule_delayed_work(&vi->refill, HZ/2); 2671 } 2672 } 2673 2674 static int virtnet_receive_xsk_bufs(struct virtnet_info *vi, 2675 struct receive_queue *rq, 2676 int budget, 2677 unsigned int *xdp_xmit, 2678 struct virtnet_rq_stats *stats) 2679 { 2680 unsigned int len; 2681 int packets = 0; 2682 void *buf; 2683 2684 while (packets < budget) { 2685 buf = virtqueue_get_buf(rq->vq, &len); 2686 if (!buf) 2687 break; 2688 2689 virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats); 2690 packets++; 2691 } 2692 2693 return packets; 2694 } 2695 2696 static int virtnet_receive_packets(struct virtnet_info *vi, 2697 struct receive_queue *rq, 2698 int budget, 2699 unsigned int *xdp_xmit, 2700 struct virtnet_rq_stats *stats) 2701 { 2702 unsigned int len; 2703 int packets = 0; 2704 void *buf; 2705 2706 if (!vi->big_packets || vi->mergeable_rx_bufs) { 2707 void *ctx; 2708 while (packets < budget && 2709 (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { 2710 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats); 2711 packets++; 2712 } 2713 } else { 2714 while (packets < budget && 2715 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { 2716 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats); 2717 packets++; 2718 } 2719 } 2720 2721 return packets; 2722 } 2723 2724 static int virtnet_receive(struct receive_queue *rq, int budget, 2725 unsigned int *xdp_xmit) 2726 { 2727 struct virtnet_info *vi = rq->vq->vdev->priv; 2728 struct virtnet_rq_stats stats = {}; 2729 int i, packets; 2730 2731 if (rq->xsk_pool) 2732 packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, &stats); 2733 else 2734 packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats); 2735 2736 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { 2737 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { 2738 spin_lock(&vi->refill_lock); 2739 if (vi->refill_enabled) 2740 schedule_delayed_work(&vi->refill, 0); 2741 spin_unlock(&vi->refill_lock); 2742 } 2743 } 2744 2745 u64_stats_set(&stats.packets, packets); 2746 u64_stats_update_begin(&rq->stats.syncp); 2747 for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) { 2748 size_t offset = virtnet_rq_stats_desc[i].offset; 2749 u64_stats_t *item, *src; 2750 2751 item = (u64_stats_t *)((u8 *)&rq->stats + offset); 2752 src = (u64_stats_t *)((u8 *)&stats + offset); 2753 u64_stats_add(item, u64_stats_read(src)); 2754 } 2755 2756 u64_stats_add(&rq->stats.packets, u64_stats_read(&stats.packets)); 2757 u64_stats_add(&rq->stats.bytes, u64_stats_read(&stats.bytes)); 2758 2759 u64_stats_update_end(&rq->stats.syncp); 2760 2761 return packets; 2762 } 2763 2764 static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) 2765 { 2766 struct virtnet_info *vi = rq->vq->vdev->priv; 2767 unsigned int index = vq2rxq(rq->vq); 2768 struct send_queue *sq = &vi->sq[index]; 2769 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 2770 2771 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) 2772 return; 2773 2774 if (__netif_tx_trylock(txq)) { 2775 if (sq->reset) { 2776 __netif_tx_unlock(txq); 2777 return; 2778 } 2779 2780 do { 2781 virtqueue_disable_cb(sq->vq); 2782 free_old_xmit(sq, txq, !!budget); 2783 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 2784 2785 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { 2786 if (netif_tx_queue_stopped(txq)) { 2787 u64_stats_update_begin(&sq->stats.syncp); 2788 u64_stats_inc(&sq->stats.wake); 2789 u64_stats_update_end(&sq->stats.syncp); 2790 } 2791 netif_tx_wake_queue(txq); 2792 } 2793 2794 __netif_tx_unlock(txq); 2795 } 2796 } 2797 2798 static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq) 2799 { 2800 struct dim_sample cur_sample = {}; 2801 2802 if (!rq->packets_in_napi) 2803 return; 2804 2805 /* Don't need protection when fetching stats, since fetcher and 2806 * updater of the stats are in same context 2807 */ 2808 dim_update_sample(rq->calls, 2809 u64_stats_read(&rq->stats.packets), 2810 u64_stats_read(&rq->stats.bytes), 2811 &cur_sample); 2812 2813 net_dim(&rq->dim, &cur_sample); 2814 rq->packets_in_napi = 0; 2815 } 2816 2817 static int virtnet_poll(struct napi_struct *napi, int budget) 2818 { 2819 struct receive_queue *rq = 2820 container_of(napi, struct receive_queue, napi); 2821 struct virtnet_info *vi = rq->vq->vdev->priv; 2822 struct send_queue *sq; 2823 unsigned int received; 2824 unsigned int xdp_xmit = 0; 2825 bool napi_complete; 2826 2827 virtnet_poll_cleantx(rq, budget); 2828 2829 received = virtnet_receive(rq, budget, &xdp_xmit); 2830 rq->packets_in_napi += received; 2831 2832 if (xdp_xmit & VIRTIO_XDP_REDIR) 2833 xdp_do_flush(); 2834 2835 /* Out of packets? */ 2836 if (received < budget) { 2837 napi_complete = virtqueue_napi_complete(napi, rq->vq, received); 2838 /* Intentionally not taking dim_lock here. This may result in a 2839 * spurious net_dim call. But if that happens virtnet_rx_dim_work 2840 * will not act on the scheduled work. 2841 */ 2842 if (napi_complete && rq->dim_enabled) 2843 virtnet_rx_dim_update(vi, rq); 2844 } 2845 2846 if (xdp_xmit & VIRTIO_XDP_TX) { 2847 sq = virtnet_xdp_get_sq(vi); 2848 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 2849 u64_stats_update_begin(&sq->stats.syncp); 2850 u64_stats_inc(&sq->stats.kicks); 2851 u64_stats_update_end(&sq->stats.syncp); 2852 } 2853 virtnet_xdp_put_sq(vi, sq); 2854 } 2855 2856 return received; 2857 } 2858 2859 static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) 2860 { 2861 virtnet_napi_tx_disable(&vi->sq[qp_index].napi); 2862 napi_disable(&vi->rq[qp_index].napi); 2863 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 2864 } 2865 2866 static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) 2867 { 2868 struct net_device *dev = vi->dev; 2869 int err; 2870 2871 err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index, 2872 vi->rq[qp_index].napi.napi_id); 2873 if (err < 0) 2874 return err; 2875 2876 err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq, 2877 MEM_TYPE_PAGE_SHARED, NULL); 2878 if (err < 0) 2879 goto err_xdp_reg_mem_model; 2880 2881 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index)); 2882 virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); 2883 virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi); 2884 2885 return 0; 2886 2887 err_xdp_reg_mem_model: 2888 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 2889 return err; 2890 } 2891 2892 static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim) 2893 { 2894 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 2895 return; 2896 net_dim_work_cancel(dim); 2897 } 2898 2899 static void virtnet_update_settings(struct virtnet_info *vi) 2900 { 2901 u32 speed; 2902 u8 duplex; 2903 2904 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) 2905 return; 2906 2907 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); 2908 2909 if (ethtool_validate_speed(speed)) 2910 vi->speed = speed; 2911 2912 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); 2913 2914 if (ethtool_validate_duplex(duplex)) 2915 vi->duplex = duplex; 2916 } 2917 2918 static int virtnet_open(struct net_device *dev) 2919 { 2920 struct virtnet_info *vi = netdev_priv(dev); 2921 int i, err; 2922 2923 enable_delayed_refill(vi); 2924 2925 for (i = 0; i < vi->max_queue_pairs; i++) { 2926 if (i < vi->curr_queue_pairs) 2927 /* Make sure we have some buffers: if oom use wq. */ 2928 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 2929 schedule_delayed_work(&vi->refill, 0); 2930 2931 err = virtnet_enable_queue_pair(vi, i); 2932 if (err < 0) 2933 goto err_enable_qp; 2934 } 2935 2936 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 2937 if (vi->status & VIRTIO_NET_S_LINK_UP) 2938 netif_carrier_on(vi->dev); 2939 virtio_config_driver_enable(vi->vdev); 2940 } else { 2941 vi->status = VIRTIO_NET_S_LINK_UP; 2942 netif_carrier_on(dev); 2943 } 2944 2945 return 0; 2946 2947 err_enable_qp: 2948 disable_delayed_refill(vi); 2949 cancel_delayed_work_sync(&vi->refill); 2950 2951 for (i--; i >= 0; i--) { 2952 virtnet_disable_queue_pair(vi, i); 2953 virtnet_cancel_dim(vi, &vi->rq[i].dim); 2954 } 2955 2956 return err; 2957 } 2958 2959 static int virtnet_poll_tx(struct napi_struct *napi, int budget) 2960 { 2961 struct send_queue *sq = container_of(napi, struct send_queue, napi); 2962 struct virtnet_info *vi = sq->vq->vdev->priv; 2963 unsigned int index = vq2txq(sq->vq); 2964 struct netdev_queue *txq; 2965 int opaque; 2966 bool done; 2967 2968 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { 2969 /* We don't need to enable cb for XDP */ 2970 napi_complete_done(napi, 0); 2971 return 0; 2972 } 2973 2974 txq = netdev_get_tx_queue(vi->dev, index); 2975 __netif_tx_lock(txq, raw_smp_processor_id()); 2976 virtqueue_disable_cb(sq->vq); 2977 free_old_xmit(sq, txq, !!budget); 2978 2979 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { 2980 if (netif_tx_queue_stopped(txq)) { 2981 u64_stats_update_begin(&sq->stats.syncp); 2982 u64_stats_inc(&sq->stats.wake); 2983 u64_stats_update_end(&sq->stats.syncp); 2984 } 2985 netif_tx_wake_queue(txq); 2986 } 2987 2988 opaque = virtqueue_enable_cb_prepare(sq->vq); 2989 2990 done = napi_complete_done(napi, 0); 2991 2992 if (!done) 2993 virtqueue_disable_cb(sq->vq); 2994 2995 __netif_tx_unlock(txq); 2996 2997 if (done) { 2998 if (unlikely(virtqueue_poll(sq->vq, opaque))) { 2999 if (napi_schedule_prep(napi)) { 3000 __netif_tx_lock(txq, raw_smp_processor_id()); 3001 virtqueue_disable_cb(sq->vq); 3002 __netif_tx_unlock(txq); 3003 __napi_schedule(napi); 3004 } 3005 } 3006 } 3007 3008 return 0; 3009 } 3010 3011 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) 3012 { 3013 struct virtio_net_hdr_mrg_rxbuf *hdr; 3014 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 3015 struct virtnet_info *vi = sq->vq->vdev->priv; 3016 int num_sg; 3017 unsigned hdr_len = vi->hdr_len; 3018 bool can_push; 3019 3020 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 3021 3022 can_push = vi->any_header_sg && 3023 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && 3024 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; 3025 /* Even if we can, don't push here yet as this would skew 3026 * csum_start offset below. */ 3027 if (can_push) 3028 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len); 3029 else 3030 hdr = &skb_vnet_common_hdr(skb)->mrg_hdr; 3031 3032 if (virtio_net_hdr_from_skb(skb, &hdr->hdr, 3033 virtio_is_little_endian(vi->vdev), false, 3034 0)) 3035 return -EPROTO; 3036 3037 if (vi->mergeable_rx_bufs) 3038 hdr->num_buffers = 0; 3039 3040 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); 3041 if (can_push) { 3042 __skb_push(skb, hdr_len); 3043 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); 3044 if (unlikely(num_sg < 0)) 3045 return num_sg; 3046 /* Pull header back to avoid skew in tx bytes calculations. */ 3047 __skb_pull(skb, hdr_len); 3048 } else { 3049 sg_set_buf(sq->sg, hdr, hdr_len); 3050 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); 3051 if (unlikely(num_sg < 0)) 3052 return num_sg; 3053 num_sg++; 3054 } 3055 return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, 3056 skb_to_ptr(skb, orphan), GFP_ATOMIC); 3057 } 3058 3059 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 3060 { 3061 struct virtnet_info *vi = netdev_priv(dev); 3062 int qnum = skb_get_queue_mapping(skb); 3063 struct send_queue *sq = &vi->sq[qnum]; 3064 int err; 3065 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 3066 bool xmit_more = netdev_xmit_more(); 3067 bool use_napi = sq->napi.weight; 3068 bool kick; 3069 3070 /* Free up any pending old buffers before queueing new ones. */ 3071 do { 3072 if (use_napi) 3073 virtqueue_disable_cb(sq->vq); 3074 3075 free_old_xmit(sq, txq, false); 3076 3077 } while (use_napi && !xmit_more && 3078 unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 3079 3080 /* timestamp packet in software */ 3081 skb_tx_timestamp(skb); 3082 3083 /* Try to transmit */ 3084 err = xmit_skb(sq, skb, !use_napi); 3085 3086 /* This should not happen! */ 3087 if (unlikely(err)) { 3088 DEV_STATS_INC(dev, tx_fifo_errors); 3089 if (net_ratelimit()) 3090 dev_warn(&dev->dev, 3091 "Unexpected TXQ (%d) queue failure: %d\n", 3092 qnum, err); 3093 DEV_STATS_INC(dev, tx_dropped); 3094 dev_kfree_skb_any(skb); 3095 return NETDEV_TX_OK; 3096 } 3097 3098 /* Don't wait up for transmitted skbs to be freed. */ 3099 if (!use_napi) { 3100 skb_orphan(skb); 3101 nf_reset_ct(skb); 3102 } 3103 3104 check_sq_full_and_disable(vi, dev, sq); 3105 3106 kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) : 3107 !xmit_more || netif_xmit_stopped(txq); 3108 if (kick) { 3109 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3110 u64_stats_update_begin(&sq->stats.syncp); 3111 u64_stats_inc(&sq->stats.kicks); 3112 u64_stats_update_end(&sq->stats.syncp); 3113 } 3114 } 3115 3116 return NETDEV_TX_OK; 3117 } 3118 3119 static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) 3120 { 3121 bool running = netif_running(vi->dev); 3122 3123 if (running) { 3124 napi_disable(&rq->napi); 3125 virtnet_cancel_dim(vi, &rq->dim); 3126 } 3127 } 3128 3129 static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) 3130 { 3131 bool running = netif_running(vi->dev); 3132 3133 if (!try_fill_recv(vi, rq, GFP_KERNEL)) 3134 schedule_delayed_work(&vi->refill, 0); 3135 3136 if (running) 3137 virtnet_napi_enable(rq->vq, &rq->napi); 3138 } 3139 3140 static int virtnet_rx_resize(struct virtnet_info *vi, 3141 struct receive_queue *rq, u32 ring_num) 3142 { 3143 int err, qindex; 3144 3145 qindex = rq - vi->rq; 3146 3147 virtnet_rx_pause(vi, rq); 3148 3149 err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf); 3150 if (err) 3151 netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); 3152 3153 virtnet_rx_resume(vi, rq); 3154 return err; 3155 } 3156 3157 static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq) 3158 { 3159 bool running = netif_running(vi->dev); 3160 struct netdev_queue *txq; 3161 int qindex; 3162 3163 qindex = sq - vi->sq; 3164 3165 if (running) 3166 virtnet_napi_tx_disable(&sq->napi); 3167 3168 txq = netdev_get_tx_queue(vi->dev, qindex); 3169 3170 /* 1. wait all ximt complete 3171 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() 3172 */ 3173 __netif_tx_lock_bh(txq); 3174 3175 /* Prevent rx poll from accessing sq. */ 3176 sq->reset = true; 3177 3178 /* Prevent the upper layer from trying to send packets. */ 3179 netif_stop_subqueue(vi->dev, qindex); 3180 3181 __netif_tx_unlock_bh(txq); 3182 } 3183 3184 static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq) 3185 { 3186 bool running = netif_running(vi->dev); 3187 struct netdev_queue *txq; 3188 int qindex; 3189 3190 qindex = sq - vi->sq; 3191 3192 txq = netdev_get_tx_queue(vi->dev, qindex); 3193 3194 __netif_tx_lock_bh(txq); 3195 sq->reset = false; 3196 netif_tx_wake_queue(txq); 3197 __netif_tx_unlock_bh(txq); 3198 3199 if (running) 3200 virtnet_napi_tx_enable(vi, sq->vq, &sq->napi); 3201 } 3202 3203 static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, 3204 u32 ring_num) 3205 { 3206 int qindex, err; 3207 3208 qindex = sq - vi->sq; 3209 3210 virtnet_tx_pause(vi, sq); 3211 3212 err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf); 3213 if (err) 3214 netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); 3215 3216 virtnet_tx_resume(vi, sq); 3217 3218 return err; 3219 } 3220 3221 /* 3222 * Send command via the control virtqueue and check status. Commands 3223 * supported by the hypervisor, as indicated by feature bits, should 3224 * never fail unless improperly formatted. 3225 */ 3226 static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd, 3227 struct scatterlist *out, 3228 struct scatterlist *in) 3229 { 3230 struct scatterlist *sgs[5], hdr, stat; 3231 u32 out_num = 0, tmp, in_num = 0; 3232 bool ok; 3233 int ret; 3234 3235 /* Caller should know better */ 3236 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 3237 3238 mutex_lock(&vi->cvq_lock); 3239 vi->ctrl->status = ~0; 3240 vi->ctrl->hdr.class = class; 3241 vi->ctrl->hdr.cmd = cmd; 3242 /* Add header */ 3243 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); 3244 sgs[out_num++] = &hdr; 3245 3246 if (out) 3247 sgs[out_num++] = out; 3248 3249 /* Add return status. */ 3250 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); 3251 sgs[out_num + in_num++] = &stat; 3252 3253 if (in) 3254 sgs[out_num + in_num++] = in; 3255 3256 BUG_ON(out_num + in_num > ARRAY_SIZE(sgs)); 3257 ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC); 3258 if (ret < 0) { 3259 dev_warn(&vi->vdev->dev, 3260 "Failed to add sgs for command vq: %d\n.", ret); 3261 mutex_unlock(&vi->cvq_lock); 3262 return false; 3263 } 3264 3265 if (unlikely(!virtqueue_kick(vi->cvq))) 3266 goto unlock; 3267 3268 /* Spin for a response, the kick causes an ioport write, trapping 3269 * into the hypervisor, so the request should be handled immediately. 3270 */ 3271 while (!virtqueue_get_buf(vi->cvq, &tmp) && 3272 !virtqueue_is_broken(vi->cvq)) { 3273 cond_resched(); 3274 cpu_relax(); 3275 } 3276 3277 unlock: 3278 ok = vi->ctrl->status == VIRTIO_NET_OK; 3279 mutex_unlock(&vi->cvq_lock); 3280 return ok; 3281 } 3282 3283 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 3284 struct scatterlist *out) 3285 { 3286 return virtnet_send_command_reply(vi, class, cmd, out, NULL); 3287 } 3288 3289 static int virtnet_set_mac_address(struct net_device *dev, void *p) 3290 { 3291 struct virtnet_info *vi = netdev_priv(dev); 3292 struct virtio_device *vdev = vi->vdev; 3293 int ret; 3294 struct sockaddr *addr; 3295 struct scatterlist sg; 3296 3297 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 3298 return -EOPNOTSUPP; 3299 3300 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); 3301 if (!addr) 3302 return -ENOMEM; 3303 3304 ret = eth_prepare_mac_addr_change(dev, addr); 3305 if (ret) 3306 goto out; 3307 3308 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 3309 sg_init_one(&sg, addr->sa_data, dev->addr_len); 3310 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3311 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 3312 dev_warn(&vdev->dev, 3313 "Failed to set mac address by vq command.\n"); 3314 ret = -EINVAL; 3315 goto out; 3316 } 3317 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 3318 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3319 unsigned int i; 3320 3321 /* Naturally, this has an atomicity problem. */ 3322 for (i = 0; i < dev->addr_len; i++) 3323 virtio_cwrite8(vdev, 3324 offsetof(struct virtio_net_config, mac) + 3325 i, addr->sa_data[i]); 3326 } 3327 3328 eth_commit_mac_addr_change(dev, p); 3329 ret = 0; 3330 3331 out: 3332 kfree(addr); 3333 return ret; 3334 } 3335 3336 static void virtnet_stats(struct net_device *dev, 3337 struct rtnl_link_stats64 *tot) 3338 { 3339 struct virtnet_info *vi = netdev_priv(dev); 3340 unsigned int start; 3341 int i; 3342 3343 for (i = 0; i < vi->max_queue_pairs; i++) { 3344 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; 3345 struct receive_queue *rq = &vi->rq[i]; 3346 struct send_queue *sq = &vi->sq[i]; 3347 3348 do { 3349 start = u64_stats_fetch_begin(&sq->stats.syncp); 3350 tpackets = u64_stats_read(&sq->stats.packets); 3351 tbytes = u64_stats_read(&sq->stats.bytes); 3352 terrors = u64_stats_read(&sq->stats.tx_timeouts); 3353 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 3354 3355 do { 3356 start = u64_stats_fetch_begin(&rq->stats.syncp); 3357 rpackets = u64_stats_read(&rq->stats.packets); 3358 rbytes = u64_stats_read(&rq->stats.bytes); 3359 rdrops = u64_stats_read(&rq->stats.drops); 3360 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 3361 3362 tot->rx_packets += rpackets; 3363 tot->tx_packets += tpackets; 3364 tot->rx_bytes += rbytes; 3365 tot->tx_bytes += tbytes; 3366 tot->rx_dropped += rdrops; 3367 tot->tx_errors += terrors; 3368 } 3369 3370 tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped); 3371 tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors); 3372 tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors); 3373 tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors); 3374 } 3375 3376 static void virtnet_ack_link_announce(struct virtnet_info *vi) 3377 { 3378 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 3379 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) 3380 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 3381 } 3382 3383 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 3384 { 3385 struct virtio_net_ctrl_mq *mq __free(kfree) = NULL; 3386 struct scatterlist sg; 3387 struct net_device *dev = vi->dev; 3388 3389 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) 3390 return 0; 3391 3392 mq = kzalloc(sizeof(*mq), GFP_KERNEL); 3393 if (!mq) 3394 return -ENOMEM; 3395 3396 mq->virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); 3397 sg_init_one(&sg, mq, sizeof(*mq)); 3398 3399 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 3400 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { 3401 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 3402 queue_pairs); 3403 return -EINVAL; 3404 } else { 3405 vi->curr_queue_pairs = queue_pairs; 3406 /* virtnet_open() will refill when device is going to up. */ 3407 if (dev->flags & IFF_UP) 3408 schedule_delayed_work(&vi->refill, 0); 3409 } 3410 3411 return 0; 3412 } 3413 3414 static int virtnet_close(struct net_device *dev) 3415 { 3416 struct virtnet_info *vi = netdev_priv(dev); 3417 int i; 3418 3419 /* Make sure NAPI doesn't schedule refill work */ 3420 disable_delayed_refill(vi); 3421 /* Make sure refill_work doesn't re-enable napi! */ 3422 cancel_delayed_work_sync(&vi->refill); 3423 /* Prevent the config change callback from changing carrier 3424 * after close 3425 */ 3426 virtio_config_driver_disable(vi->vdev); 3427 /* Stop getting status/speed updates: we don't care until next 3428 * open 3429 */ 3430 cancel_work_sync(&vi->config_work); 3431 3432 for (i = 0; i < vi->max_queue_pairs; i++) { 3433 virtnet_disable_queue_pair(vi, i); 3434 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3435 } 3436 3437 netif_carrier_off(dev); 3438 3439 return 0; 3440 } 3441 3442 static void virtnet_rx_mode_work(struct work_struct *work) 3443 { 3444 struct virtnet_info *vi = 3445 container_of(work, struct virtnet_info, rx_mode_work); 3446 u8 *promisc_allmulti __free(kfree) = NULL; 3447 struct net_device *dev = vi->dev; 3448 struct scatterlist sg[2]; 3449 struct virtio_net_ctrl_mac *mac_data; 3450 struct netdev_hw_addr *ha; 3451 int uc_count; 3452 int mc_count; 3453 void *buf; 3454 int i; 3455 3456 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ 3457 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 3458 return; 3459 3460 promisc_allmulti = kzalloc(sizeof(*promisc_allmulti), GFP_KERNEL); 3461 if (!promisc_allmulti) { 3462 dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n"); 3463 return; 3464 } 3465 3466 rtnl_lock(); 3467 3468 *promisc_allmulti = !!(dev->flags & IFF_PROMISC); 3469 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3470 3471 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3472 VIRTIO_NET_CTRL_RX_PROMISC, sg)) 3473 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 3474 *promisc_allmulti ? "en" : "dis"); 3475 3476 *promisc_allmulti = !!(dev->flags & IFF_ALLMULTI); 3477 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3478 3479 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3480 VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) 3481 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 3482 *promisc_allmulti ? "en" : "dis"); 3483 3484 netif_addr_lock_bh(dev); 3485 3486 uc_count = netdev_uc_count(dev); 3487 mc_count = netdev_mc_count(dev); 3488 /* MAC filter - use one buffer for both lists */ 3489 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + 3490 (2 * sizeof(mac_data->entries)), GFP_ATOMIC); 3491 mac_data = buf; 3492 if (!buf) { 3493 netif_addr_unlock_bh(dev); 3494 rtnl_unlock(); 3495 return; 3496 } 3497 3498 sg_init_table(sg, 2); 3499 3500 /* Store the unicast list and count in the front of the buffer */ 3501 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); 3502 i = 0; 3503 netdev_for_each_uc_addr(ha, dev) 3504 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3505 3506 sg_set_buf(&sg[0], mac_data, 3507 sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); 3508 3509 /* multicast list and count fill the end */ 3510 mac_data = (void *)&mac_data->macs[uc_count][0]; 3511 3512 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); 3513 i = 0; 3514 netdev_for_each_mc_addr(ha, dev) 3515 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3516 3517 netif_addr_unlock_bh(dev); 3518 3519 sg_set_buf(&sg[1], mac_data, 3520 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 3521 3522 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3523 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) 3524 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 3525 3526 rtnl_unlock(); 3527 3528 kfree(buf); 3529 } 3530 3531 static void virtnet_set_rx_mode(struct net_device *dev) 3532 { 3533 struct virtnet_info *vi = netdev_priv(dev); 3534 3535 if (vi->rx_mode_work_enabled) 3536 schedule_work(&vi->rx_mode_work); 3537 } 3538 3539 static int virtnet_vlan_rx_add_vid(struct net_device *dev, 3540 __be16 proto, u16 vid) 3541 { 3542 struct virtnet_info *vi = netdev_priv(dev); 3543 __virtio16 *_vid __free(kfree) = NULL; 3544 struct scatterlist sg; 3545 3546 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3547 if (!_vid) 3548 return -ENOMEM; 3549 3550 *_vid = cpu_to_virtio16(vi->vdev, vid); 3551 sg_init_one(&sg, _vid, sizeof(*_vid)); 3552 3553 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3554 VIRTIO_NET_CTRL_VLAN_ADD, &sg)) 3555 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 3556 return 0; 3557 } 3558 3559 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, 3560 __be16 proto, u16 vid) 3561 { 3562 struct virtnet_info *vi = netdev_priv(dev); 3563 __virtio16 *_vid __free(kfree) = NULL; 3564 struct scatterlist sg; 3565 3566 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3567 if (!_vid) 3568 return -ENOMEM; 3569 3570 *_vid = cpu_to_virtio16(vi->vdev, vid); 3571 sg_init_one(&sg, _vid, sizeof(*_vid)); 3572 3573 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3574 VIRTIO_NET_CTRL_VLAN_DEL, &sg)) 3575 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 3576 return 0; 3577 } 3578 3579 static void virtnet_clean_affinity(struct virtnet_info *vi) 3580 { 3581 int i; 3582 3583 if (vi->affinity_hint_set) { 3584 for (i = 0; i < vi->max_queue_pairs; i++) { 3585 virtqueue_set_affinity(vi->rq[i].vq, NULL); 3586 virtqueue_set_affinity(vi->sq[i].vq, NULL); 3587 } 3588 3589 vi->affinity_hint_set = false; 3590 } 3591 } 3592 3593 static void virtnet_set_affinity(struct virtnet_info *vi) 3594 { 3595 cpumask_var_t mask; 3596 int stragglers; 3597 int group_size; 3598 int i, j, cpu; 3599 int num_cpu; 3600 int stride; 3601 3602 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 3603 virtnet_clean_affinity(vi); 3604 return; 3605 } 3606 3607 num_cpu = num_online_cpus(); 3608 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); 3609 stragglers = num_cpu >= vi->curr_queue_pairs ? 3610 num_cpu % vi->curr_queue_pairs : 3611 0; 3612 cpu = cpumask_first(cpu_online_mask); 3613 3614 for (i = 0; i < vi->curr_queue_pairs; i++) { 3615 group_size = stride + (i < stragglers ? 1 : 0); 3616 3617 for (j = 0; j < group_size; j++) { 3618 cpumask_set_cpu(cpu, mask); 3619 cpu = cpumask_next_wrap(cpu, cpu_online_mask, 3620 nr_cpu_ids, false); 3621 } 3622 virtqueue_set_affinity(vi->rq[i].vq, mask); 3623 virtqueue_set_affinity(vi->sq[i].vq, mask); 3624 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); 3625 cpumask_clear(mask); 3626 } 3627 3628 vi->affinity_hint_set = true; 3629 free_cpumask_var(mask); 3630 } 3631 3632 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) 3633 { 3634 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3635 node); 3636 virtnet_set_affinity(vi); 3637 return 0; 3638 } 3639 3640 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) 3641 { 3642 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3643 node_dead); 3644 virtnet_set_affinity(vi); 3645 return 0; 3646 } 3647 3648 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 3649 { 3650 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3651 node); 3652 3653 virtnet_clean_affinity(vi); 3654 return 0; 3655 } 3656 3657 static enum cpuhp_state virtionet_online; 3658 3659 static int virtnet_cpu_notif_add(struct virtnet_info *vi) 3660 { 3661 int ret; 3662 3663 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); 3664 if (ret) 3665 return ret; 3666 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, 3667 &vi->node_dead); 3668 if (!ret) 3669 return ret; 3670 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 3671 return ret; 3672 } 3673 3674 static void virtnet_cpu_notif_remove(struct virtnet_info *vi) 3675 { 3676 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 3677 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, 3678 &vi->node_dead); 3679 } 3680 3681 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3682 u16 vqn, u32 max_usecs, u32 max_packets) 3683 { 3684 struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL; 3685 struct scatterlist sgs; 3686 3687 coal_vq = kzalloc(sizeof(*coal_vq), GFP_KERNEL); 3688 if (!coal_vq) 3689 return -ENOMEM; 3690 3691 coal_vq->vqn = cpu_to_le16(vqn); 3692 coal_vq->coal.max_usecs = cpu_to_le32(max_usecs); 3693 coal_vq->coal.max_packets = cpu_to_le32(max_packets); 3694 sg_init_one(&sgs, coal_vq, sizeof(*coal_vq)); 3695 3696 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 3697 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET, 3698 &sgs)) 3699 return -EINVAL; 3700 3701 return 0; 3702 } 3703 3704 static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3705 u16 queue, u32 max_usecs, 3706 u32 max_packets) 3707 { 3708 int err; 3709 3710 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3711 return -EOPNOTSUPP; 3712 3713 err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), 3714 max_usecs, max_packets); 3715 if (err) 3716 return err; 3717 3718 vi->rq[queue].intr_coal.max_usecs = max_usecs; 3719 vi->rq[queue].intr_coal.max_packets = max_packets; 3720 3721 return 0; 3722 } 3723 3724 static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3725 u16 queue, u32 max_usecs, 3726 u32 max_packets) 3727 { 3728 int err; 3729 3730 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3731 return -EOPNOTSUPP; 3732 3733 err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), 3734 max_usecs, max_packets); 3735 if (err) 3736 return err; 3737 3738 vi->sq[queue].intr_coal.max_usecs = max_usecs; 3739 vi->sq[queue].intr_coal.max_packets = max_packets; 3740 3741 return 0; 3742 } 3743 3744 static void virtnet_get_ringparam(struct net_device *dev, 3745 struct ethtool_ringparam *ring, 3746 struct kernel_ethtool_ringparam *kernel_ring, 3747 struct netlink_ext_ack *extack) 3748 { 3749 struct virtnet_info *vi = netdev_priv(dev); 3750 3751 ring->rx_max_pending = vi->rq[0].vq->num_max; 3752 ring->tx_max_pending = vi->sq[0].vq->num_max; 3753 ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 3754 ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 3755 } 3756 3757 static int virtnet_set_ringparam(struct net_device *dev, 3758 struct ethtool_ringparam *ring, 3759 struct kernel_ethtool_ringparam *kernel_ring, 3760 struct netlink_ext_ack *extack) 3761 { 3762 struct virtnet_info *vi = netdev_priv(dev); 3763 u32 rx_pending, tx_pending; 3764 struct receive_queue *rq; 3765 struct send_queue *sq; 3766 int i, err; 3767 3768 if (ring->rx_mini_pending || ring->rx_jumbo_pending) 3769 return -EINVAL; 3770 3771 rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 3772 tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 3773 3774 if (ring->rx_pending == rx_pending && 3775 ring->tx_pending == tx_pending) 3776 return 0; 3777 3778 if (ring->rx_pending > vi->rq[0].vq->num_max) 3779 return -EINVAL; 3780 3781 if (ring->tx_pending > vi->sq[0].vq->num_max) 3782 return -EINVAL; 3783 3784 for (i = 0; i < vi->max_queue_pairs; i++) { 3785 rq = vi->rq + i; 3786 sq = vi->sq + i; 3787 3788 if (ring->tx_pending != tx_pending) { 3789 err = virtnet_tx_resize(vi, sq, ring->tx_pending); 3790 if (err) 3791 return err; 3792 3793 /* Upon disabling and re-enabling a transmit virtqueue, the device must 3794 * set the coalescing parameters of the virtqueue to those configured 3795 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver 3796 * did not set any TX coalescing parameters, to 0. 3797 */ 3798 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, 3799 vi->intr_coal_tx.max_usecs, 3800 vi->intr_coal_tx.max_packets); 3801 3802 /* Don't break the tx resize action if the vq coalescing is not 3803 * supported. The same is true for rx resize below. 3804 */ 3805 if (err && err != -EOPNOTSUPP) 3806 return err; 3807 } 3808 3809 if (ring->rx_pending != rx_pending) { 3810 err = virtnet_rx_resize(vi, rq, ring->rx_pending); 3811 if (err) 3812 return err; 3813 3814 /* The reason is same as the transmit virtqueue reset */ 3815 mutex_lock(&vi->rq[i].dim_lock); 3816 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i, 3817 vi->intr_coal_rx.max_usecs, 3818 vi->intr_coal_rx.max_packets); 3819 mutex_unlock(&vi->rq[i].dim_lock); 3820 if (err && err != -EOPNOTSUPP) 3821 return err; 3822 } 3823 } 3824 3825 return 0; 3826 } 3827 3828 static bool virtnet_commit_rss_command(struct virtnet_info *vi) 3829 { 3830 struct net_device *dev = vi->dev; 3831 struct scatterlist sgs[4]; 3832 unsigned int sg_buf_size; 3833 3834 /* prepare sgs */ 3835 sg_init_table(sgs, 4); 3836 3837 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, indirection_table); 3838 sg_set_buf(&sgs[0], &vi->rss, sg_buf_size); 3839 3840 sg_buf_size = sizeof(uint16_t) * (vi->rss.indirection_table_mask + 1); 3841 sg_set_buf(&sgs[1], vi->rss.indirection_table, sg_buf_size); 3842 3843 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key) 3844 - offsetof(struct virtio_net_ctrl_rss, max_tx_vq); 3845 sg_set_buf(&sgs[2], &vi->rss.max_tx_vq, sg_buf_size); 3846 3847 sg_buf_size = vi->rss_key_size; 3848 sg_set_buf(&sgs[3], vi->rss.key, sg_buf_size); 3849 3850 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 3851 vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG 3852 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) 3853 goto err; 3854 3855 return true; 3856 3857 err: 3858 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); 3859 return false; 3860 3861 } 3862 3863 static void virtnet_init_default_rss(struct virtnet_info *vi) 3864 { 3865 u32 indir_val = 0; 3866 int i = 0; 3867 3868 vi->rss.hash_types = vi->rss_hash_types_supported; 3869 vi->rss_hash_types_saved = vi->rss_hash_types_supported; 3870 vi->rss.indirection_table_mask = vi->rss_indir_table_size 3871 ? vi->rss_indir_table_size - 1 : 0; 3872 vi->rss.unclassified_queue = 0; 3873 3874 for (; i < vi->rss_indir_table_size; ++i) { 3875 indir_val = ethtool_rxfh_indir_default(i, vi->curr_queue_pairs); 3876 vi->rss.indirection_table[i] = indir_val; 3877 } 3878 3879 vi->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0; 3880 vi->rss.hash_key_length = vi->rss_key_size; 3881 3882 netdev_rss_key_fill(vi->rss.key, vi->rss_key_size); 3883 } 3884 3885 static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info) 3886 { 3887 info->data = 0; 3888 switch (info->flow_type) { 3889 case TCP_V4_FLOW: 3890 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 3891 info->data = RXH_IP_SRC | RXH_IP_DST | 3892 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3893 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 3894 info->data = RXH_IP_SRC | RXH_IP_DST; 3895 } 3896 break; 3897 case TCP_V6_FLOW: 3898 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 3899 info->data = RXH_IP_SRC | RXH_IP_DST | 3900 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3901 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 3902 info->data = RXH_IP_SRC | RXH_IP_DST; 3903 } 3904 break; 3905 case UDP_V4_FLOW: 3906 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 3907 info->data = RXH_IP_SRC | RXH_IP_DST | 3908 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3909 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 3910 info->data = RXH_IP_SRC | RXH_IP_DST; 3911 } 3912 break; 3913 case UDP_V6_FLOW: 3914 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 3915 info->data = RXH_IP_SRC | RXH_IP_DST | 3916 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3917 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 3918 info->data = RXH_IP_SRC | RXH_IP_DST; 3919 } 3920 break; 3921 case IPV4_FLOW: 3922 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) 3923 info->data = RXH_IP_SRC | RXH_IP_DST; 3924 3925 break; 3926 case IPV6_FLOW: 3927 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) 3928 info->data = RXH_IP_SRC | RXH_IP_DST; 3929 3930 break; 3931 default: 3932 info->data = 0; 3933 break; 3934 } 3935 } 3936 3937 static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info) 3938 { 3939 u32 new_hashtypes = vi->rss_hash_types_saved; 3940 bool is_disable = info->data & RXH_DISCARD; 3941 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); 3942 3943 /* supports only 'sd', 'sdfn' and 'r' */ 3944 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) 3945 return false; 3946 3947 switch (info->flow_type) { 3948 case TCP_V4_FLOW: 3949 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); 3950 if (!is_disable) 3951 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 3952 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); 3953 break; 3954 case UDP_V4_FLOW: 3955 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); 3956 if (!is_disable) 3957 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 3958 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); 3959 break; 3960 case IPV4_FLOW: 3961 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; 3962 if (!is_disable) 3963 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; 3964 break; 3965 case TCP_V6_FLOW: 3966 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); 3967 if (!is_disable) 3968 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 3969 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); 3970 break; 3971 case UDP_V6_FLOW: 3972 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); 3973 if (!is_disable) 3974 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 3975 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); 3976 break; 3977 case IPV6_FLOW: 3978 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; 3979 if (!is_disable) 3980 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; 3981 break; 3982 default: 3983 /* unsupported flow */ 3984 return false; 3985 } 3986 3987 /* if unsupported hashtype was set */ 3988 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) 3989 return false; 3990 3991 if (new_hashtypes != vi->rss_hash_types_saved) { 3992 vi->rss_hash_types_saved = new_hashtypes; 3993 vi->rss.hash_types = vi->rss_hash_types_saved; 3994 if (vi->dev->features & NETIF_F_RXHASH) 3995 return virtnet_commit_rss_command(vi); 3996 } 3997 3998 return true; 3999 } 4000 4001 static void virtnet_get_drvinfo(struct net_device *dev, 4002 struct ethtool_drvinfo *info) 4003 { 4004 struct virtnet_info *vi = netdev_priv(dev); 4005 struct virtio_device *vdev = vi->vdev; 4006 4007 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 4008 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); 4009 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); 4010 4011 } 4012 4013 /* TODO: Eliminate OOO packets during switching */ 4014 static int virtnet_set_channels(struct net_device *dev, 4015 struct ethtool_channels *channels) 4016 { 4017 struct virtnet_info *vi = netdev_priv(dev); 4018 u16 queue_pairs = channels->combined_count; 4019 int err; 4020 4021 /* We don't support separate rx/tx channels. 4022 * We don't allow setting 'other' channels. 4023 */ 4024 if (channels->rx_count || channels->tx_count || channels->other_count) 4025 return -EINVAL; 4026 4027 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) 4028 return -EINVAL; 4029 4030 /* For now we don't support modifying channels while XDP is loaded 4031 * also when XDP is loaded all RX queues have XDP programs so we only 4032 * need to check a single RX queue. 4033 */ 4034 if (vi->rq[0].xdp_prog) 4035 return -EINVAL; 4036 4037 cpus_read_lock(); 4038 err = virtnet_set_queues(vi, queue_pairs); 4039 if (err) { 4040 cpus_read_unlock(); 4041 goto err; 4042 } 4043 virtnet_set_affinity(vi); 4044 cpus_read_unlock(); 4045 4046 netif_set_real_num_tx_queues(dev, queue_pairs); 4047 netif_set_real_num_rx_queues(dev, queue_pairs); 4048 err: 4049 return err; 4050 } 4051 4052 static void virtnet_stats_sprintf(u8 **p, const char *fmt, const char *noq_fmt, 4053 int num, int qid, const struct virtnet_stat_desc *desc) 4054 { 4055 int i; 4056 4057 if (qid < 0) { 4058 for (i = 0; i < num; ++i) 4059 ethtool_sprintf(p, noq_fmt, desc[i].desc); 4060 } else { 4061 for (i = 0; i < num; ++i) 4062 ethtool_sprintf(p, fmt, qid, desc[i].desc); 4063 } 4064 } 4065 4066 /* qid == -1: for rx/tx queue total field */ 4067 static void virtnet_get_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data) 4068 { 4069 const struct virtnet_stat_desc *desc; 4070 const char *fmt, *noq_fmt; 4071 u8 *p = *data; 4072 u32 num; 4073 4074 if (type == VIRTNET_Q_TYPE_CQ && qid >= 0) { 4075 noq_fmt = "cq_hw_%s"; 4076 4077 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4078 desc = &virtnet_stats_cvq_desc[0]; 4079 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4080 4081 virtnet_stats_sprintf(&p, NULL, noq_fmt, num, -1, desc); 4082 } 4083 } 4084 4085 if (type == VIRTNET_Q_TYPE_RX) { 4086 fmt = "rx%u_%s"; 4087 noq_fmt = "rx_%s"; 4088 4089 desc = &virtnet_rq_stats_desc[0]; 4090 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4091 4092 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4093 4094 fmt = "rx%u_hw_%s"; 4095 noq_fmt = "rx_hw_%s"; 4096 4097 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4098 desc = &virtnet_stats_rx_basic_desc[0]; 4099 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4100 4101 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4102 } 4103 4104 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4105 desc = &virtnet_stats_rx_csum_desc[0]; 4106 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4107 4108 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4109 } 4110 4111 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4112 desc = &virtnet_stats_rx_speed_desc[0]; 4113 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4114 4115 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4116 } 4117 } 4118 4119 if (type == VIRTNET_Q_TYPE_TX) { 4120 fmt = "tx%u_%s"; 4121 noq_fmt = "tx_%s"; 4122 4123 desc = &virtnet_sq_stats_desc[0]; 4124 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4125 4126 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4127 4128 fmt = "tx%u_hw_%s"; 4129 noq_fmt = "tx_hw_%s"; 4130 4131 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4132 desc = &virtnet_stats_tx_basic_desc[0]; 4133 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4134 4135 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4136 } 4137 4138 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4139 desc = &virtnet_stats_tx_gso_desc[0]; 4140 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4141 4142 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4143 } 4144 4145 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4146 desc = &virtnet_stats_tx_speed_desc[0]; 4147 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4148 4149 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4150 } 4151 } 4152 4153 *data = p; 4154 } 4155 4156 struct virtnet_stats_ctx { 4157 /* The stats are write to qstats or ethtool -S */ 4158 bool to_qstat; 4159 4160 /* Used to calculate the offset inside the output buffer. */ 4161 u32 desc_num[3]; 4162 4163 /* The actual supported stat types. */ 4164 u64 bitmap[3]; 4165 4166 /* Used to calculate the reply buffer size. */ 4167 u32 size[3]; 4168 4169 /* Record the output buffer. */ 4170 u64 *data; 4171 }; 4172 4173 static void virtnet_stats_ctx_init(struct virtnet_info *vi, 4174 struct virtnet_stats_ctx *ctx, 4175 u64 *data, bool to_qstat) 4176 { 4177 u32 queue_type; 4178 4179 ctx->data = data; 4180 ctx->to_qstat = to_qstat; 4181 4182 if (to_qstat) { 4183 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4184 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4185 4186 queue_type = VIRTNET_Q_TYPE_RX; 4187 4188 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4189 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4190 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4191 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4192 } 4193 4194 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4195 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4196 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4197 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4198 } 4199 4200 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4201 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_GSO; 4202 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4203 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso); 4204 } 4205 4206 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4207 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4208 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4209 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4210 } 4211 4212 queue_type = VIRTNET_Q_TYPE_TX; 4213 4214 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4215 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4216 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4217 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4218 } 4219 4220 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4221 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_CSUM; 4222 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4223 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum); 4224 } 4225 4226 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4227 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4228 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4229 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4230 } 4231 4232 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4233 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4234 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4235 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4236 } 4237 4238 return; 4239 } 4240 4241 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc); 4242 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc); 4243 4244 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4245 queue_type = VIRTNET_Q_TYPE_CQ; 4246 4247 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_CVQ; 4248 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc); 4249 ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq); 4250 } 4251 4252 queue_type = VIRTNET_Q_TYPE_RX; 4253 4254 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4255 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4256 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4257 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4258 } 4259 4260 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4261 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4262 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4263 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4264 } 4265 4266 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4267 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4268 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4269 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4270 } 4271 4272 queue_type = VIRTNET_Q_TYPE_TX; 4273 4274 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4275 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4276 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4277 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4278 } 4279 4280 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4281 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4282 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4283 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4284 } 4285 4286 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4287 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4288 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4289 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4290 } 4291 } 4292 4293 /* stats_sum_queue - Calculate the sum of the same fields in sq or rq. 4294 * @sum: the position to store the sum values 4295 * @num: field num 4296 * @q_value: the first queue fields 4297 * @q_num: number of the queues 4298 */ 4299 static void stats_sum_queue(u64 *sum, u32 num, u64 *q_value, u32 q_num) 4300 { 4301 u32 step = num; 4302 int i, j; 4303 u64 *p; 4304 4305 for (i = 0; i < num; ++i) { 4306 p = sum + i; 4307 *p = 0; 4308 4309 for (j = 0; j < q_num; ++j) 4310 *p += *(q_value + i + j * step); 4311 } 4312 } 4313 4314 static void virtnet_fill_total_fields(struct virtnet_info *vi, 4315 struct virtnet_stats_ctx *ctx) 4316 { 4317 u64 *data, *first_rx_q, *first_tx_q; 4318 u32 num_cq, num_rx, num_tx; 4319 4320 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4321 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4322 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4323 4324 first_rx_q = ctx->data + num_rx + num_tx + num_cq; 4325 first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx; 4326 4327 data = ctx->data; 4328 4329 stats_sum_queue(data, num_rx, first_rx_q, vi->curr_queue_pairs); 4330 4331 data = ctx->data + num_rx; 4332 4333 stats_sum_queue(data, num_tx, first_tx_q, vi->curr_queue_pairs); 4334 } 4335 4336 static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid, 4337 struct virtnet_stats_ctx *ctx, 4338 const u8 *base, bool drv_stats, u8 reply_type) 4339 { 4340 const struct virtnet_stat_desc *desc; 4341 const u64_stats_t *v_stat; 4342 u64 offset, bitmap; 4343 const __le64 *v; 4344 u32 queue_type; 4345 int i, num; 4346 4347 queue_type = vq_type(vi, qid); 4348 bitmap = ctx->bitmap[queue_type]; 4349 4350 if (drv_stats) { 4351 if (queue_type == VIRTNET_Q_TYPE_RX) { 4352 desc = &virtnet_rq_stats_desc_qstat[0]; 4353 num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4354 } else { 4355 desc = &virtnet_sq_stats_desc_qstat[0]; 4356 num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4357 } 4358 4359 for (i = 0; i < num; ++i) { 4360 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4361 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4362 ctx->data[offset] = u64_stats_read(v_stat); 4363 } 4364 return; 4365 } 4366 4367 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4368 desc = &virtnet_stats_rx_basic_desc_qstat[0]; 4369 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4370 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4371 goto found; 4372 } 4373 4374 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4375 desc = &virtnet_stats_rx_csum_desc_qstat[0]; 4376 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4377 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4378 goto found; 4379 } 4380 4381 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4382 desc = &virtnet_stats_rx_gso_desc_qstat[0]; 4383 num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4384 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO) 4385 goto found; 4386 } 4387 4388 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4389 desc = &virtnet_stats_rx_speed_desc_qstat[0]; 4390 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4391 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4392 goto found; 4393 } 4394 4395 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4396 desc = &virtnet_stats_tx_basic_desc_qstat[0]; 4397 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4398 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4399 goto found; 4400 } 4401 4402 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4403 desc = &virtnet_stats_tx_csum_desc_qstat[0]; 4404 num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4405 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM) 4406 goto found; 4407 } 4408 4409 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4410 desc = &virtnet_stats_tx_gso_desc_qstat[0]; 4411 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4412 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4413 goto found; 4414 } 4415 4416 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4417 desc = &virtnet_stats_tx_speed_desc_qstat[0]; 4418 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4419 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4420 goto found; 4421 } 4422 4423 return; 4424 4425 found: 4426 for (i = 0; i < num; ++i) { 4427 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4428 v = (const __le64 *)(base + desc[i].offset); 4429 ctx->data[offset] = le64_to_cpu(*v); 4430 } 4431 } 4432 4433 /* virtnet_fill_stats - copy the stats to qstats or ethtool -S 4434 * The stats source is the device or the driver. 4435 * 4436 * @vi: virtio net info 4437 * @qid: the vq id 4438 * @ctx: stats ctx (initiated by virtnet_stats_ctx_init()) 4439 * @base: pointer to the device reply or the driver stats structure. 4440 * @drv_stats: designate the base type (device reply, driver stats) 4441 * @type: the type of the device reply (if drv_stats is true, this must be zero) 4442 */ 4443 static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid, 4444 struct virtnet_stats_ctx *ctx, 4445 const u8 *base, bool drv_stats, u8 reply_type) 4446 { 4447 u32 queue_type, num_rx, num_tx, num_cq; 4448 const struct virtnet_stat_desc *desc; 4449 const u64_stats_t *v_stat; 4450 u64 offset, bitmap; 4451 const __le64 *v; 4452 int i, num; 4453 4454 if (ctx->to_qstat) 4455 return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type); 4456 4457 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4458 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4459 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4460 4461 queue_type = vq_type(vi, qid); 4462 bitmap = ctx->bitmap[queue_type]; 4463 4464 /* skip the total fields of pairs */ 4465 offset = num_rx + num_tx; 4466 4467 if (queue_type == VIRTNET_Q_TYPE_TX) { 4468 offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2); 4469 4470 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4471 if (drv_stats) { 4472 desc = &virtnet_sq_stats_desc[0]; 4473 goto drv_stats; 4474 } 4475 4476 offset += num; 4477 4478 } else if (queue_type == VIRTNET_Q_TYPE_RX) { 4479 offset += num_cq + num_rx * (qid / 2); 4480 4481 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4482 if (drv_stats) { 4483 desc = &virtnet_rq_stats_desc[0]; 4484 goto drv_stats; 4485 } 4486 4487 offset += num; 4488 } 4489 4490 if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) { 4491 desc = &virtnet_stats_cvq_desc[0]; 4492 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4493 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ) 4494 goto found; 4495 4496 offset += num; 4497 } 4498 4499 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4500 desc = &virtnet_stats_rx_basic_desc[0]; 4501 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4502 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4503 goto found; 4504 4505 offset += num; 4506 } 4507 4508 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4509 desc = &virtnet_stats_rx_csum_desc[0]; 4510 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4511 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4512 goto found; 4513 4514 offset += num; 4515 } 4516 4517 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4518 desc = &virtnet_stats_rx_speed_desc[0]; 4519 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4520 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4521 goto found; 4522 4523 offset += num; 4524 } 4525 4526 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4527 desc = &virtnet_stats_tx_basic_desc[0]; 4528 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4529 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4530 goto found; 4531 4532 offset += num; 4533 } 4534 4535 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4536 desc = &virtnet_stats_tx_gso_desc[0]; 4537 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4538 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4539 goto found; 4540 4541 offset += num; 4542 } 4543 4544 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4545 desc = &virtnet_stats_tx_speed_desc[0]; 4546 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4547 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4548 goto found; 4549 4550 offset += num; 4551 } 4552 4553 return; 4554 4555 found: 4556 for (i = 0; i < num; ++i) { 4557 v = (const __le64 *)(base + desc[i].offset); 4558 ctx->data[offset + i] = le64_to_cpu(*v); 4559 } 4560 4561 return; 4562 4563 drv_stats: 4564 for (i = 0; i < num; ++i) { 4565 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4566 ctx->data[offset + i] = u64_stats_read(v_stat); 4567 } 4568 } 4569 4570 static int __virtnet_get_hw_stats(struct virtnet_info *vi, 4571 struct virtnet_stats_ctx *ctx, 4572 struct virtio_net_ctrl_queue_stats *req, 4573 int req_size, void *reply, int res_size) 4574 { 4575 struct virtio_net_stats_reply_hdr *hdr; 4576 struct scatterlist sgs_in, sgs_out; 4577 void *p; 4578 u32 qid; 4579 int ok; 4580 4581 sg_init_one(&sgs_out, req, req_size); 4582 sg_init_one(&sgs_in, reply, res_size); 4583 4584 ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 4585 VIRTIO_NET_CTRL_STATS_GET, 4586 &sgs_out, &sgs_in); 4587 4588 if (!ok) 4589 return ok; 4590 4591 for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) { 4592 hdr = p; 4593 qid = le16_to_cpu(hdr->vq_index); 4594 virtnet_fill_stats(vi, qid, ctx, p, false, hdr->type); 4595 } 4596 4597 return 0; 4598 } 4599 4600 static void virtnet_make_stat_req(struct virtnet_info *vi, 4601 struct virtnet_stats_ctx *ctx, 4602 struct virtio_net_ctrl_queue_stats *req, 4603 int qid, int *idx) 4604 { 4605 int qtype = vq_type(vi, qid); 4606 u64 bitmap = ctx->bitmap[qtype]; 4607 4608 if (!bitmap) 4609 return; 4610 4611 req->stats[*idx].vq_index = cpu_to_le16(qid); 4612 req->stats[*idx].types_bitmap[0] = cpu_to_le64(bitmap); 4613 *idx += 1; 4614 } 4615 4616 /* qid: -1: get stats of all vq. 4617 * > 0: get the stats for the special vq. This must not be cvq. 4618 */ 4619 static int virtnet_get_hw_stats(struct virtnet_info *vi, 4620 struct virtnet_stats_ctx *ctx, int qid) 4621 { 4622 int qnum, i, j, res_size, qtype, last_vq, first_vq; 4623 struct virtio_net_ctrl_queue_stats *req; 4624 bool enable_cvq; 4625 void *reply; 4626 int ok; 4627 4628 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) 4629 return 0; 4630 4631 if (qid == -1) { 4632 last_vq = vi->curr_queue_pairs * 2 - 1; 4633 first_vq = 0; 4634 enable_cvq = true; 4635 } else { 4636 last_vq = qid; 4637 first_vq = qid; 4638 enable_cvq = false; 4639 } 4640 4641 qnum = 0; 4642 res_size = 0; 4643 for (i = first_vq; i <= last_vq ; ++i) { 4644 qtype = vq_type(vi, i); 4645 if (ctx->bitmap[qtype]) { 4646 ++qnum; 4647 res_size += ctx->size[qtype]; 4648 } 4649 } 4650 4651 if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) { 4652 res_size += ctx->size[VIRTNET_Q_TYPE_CQ]; 4653 qnum += 1; 4654 } 4655 4656 req = kcalloc(qnum, sizeof(*req), GFP_KERNEL); 4657 if (!req) 4658 return -ENOMEM; 4659 4660 reply = kmalloc(res_size, GFP_KERNEL); 4661 if (!reply) { 4662 kfree(req); 4663 return -ENOMEM; 4664 } 4665 4666 j = 0; 4667 for (i = first_vq; i <= last_vq ; ++i) 4668 virtnet_make_stat_req(vi, ctx, req, i, &j); 4669 4670 if (enable_cvq) 4671 virtnet_make_stat_req(vi, ctx, req, vi->max_queue_pairs * 2, &j); 4672 4673 ok = __virtnet_get_hw_stats(vi, ctx, req, sizeof(*req) * j, reply, res_size); 4674 4675 kfree(req); 4676 kfree(reply); 4677 4678 return ok; 4679 } 4680 4681 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) 4682 { 4683 struct virtnet_info *vi = netdev_priv(dev); 4684 unsigned int i; 4685 u8 *p = data; 4686 4687 switch (stringset) { 4688 case ETH_SS_STATS: 4689 /* Generate the total field names. */ 4690 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, -1, &p); 4691 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, -1, &p); 4692 4693 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, 0, &p); 4694 4695 for (i = 0; i < vi->curr_queue_pairs; ++i) 4696 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, i, &p); 4697 4698 for (i = 0; i < vi->curr_queue_pairs; ++i) 4699 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, i, &p); 4700 break; 4701 } 4702 } 4703 4704 static int virtnet_get_sset_count(struct net_device *dev, int sset) 4705 { 4706 struct virtnet_info *vi = netdev_priv(dev); 4707 struct virtnet_stats_ctx ctx = {0}; 4708 u32 pair_count; 4709 4710 switch (sset) { 4711 case ETH_SS_STATS: 4712 virtnet_stats_ctx_init(vi, &ctx, NULL, false); 4713 4714 pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX]; 4715 4716 return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] + 4717 vi->curr_queue_pairs * pair_count; 4718 default: 4719 return -EOPNOTSUPP; 4720 } 4721 } 4722 4723 static void virtnet_get_ethtool_stats(struct net_device *dev, 4724 struct ethtool_stats *stats, u64 *data) 4725 { 4726 struct virtnet_info *vi = netdev_priv(dev); 4727 struct virtnet_stats_ctx ctx = {0}; 4728 unsigned int start, i; 4729 const u8 *stats_base; 4730 4731 virtnet_stats_ctx_init(vi, &ctx, data, false); 4732 if (virtnet_get_hw_stats(vi, &ctx, -1)) 4733 dev_warn(&vi->dev->dev, "Failed to get hw stats.\n"); 4734 4735 for (i = 0; i < vi->curr_queue_pairs; i++) { 4736 struct receive_queue *rq = &vi->rq[i]; 4737 struct send_queue *sq = &vi->sq[i]; 4738 4739 stats_base = (const u8 *)&rq->stats; 4740 do { 4741 start = u64_stats_fetch_begin(&rq->stats.syncp); 4742 virtnet_fill_stats(vi, i * 2, &ctx, stats_base, true, 0); 4743 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 4744 4745 stats_base = (const u8 *)&sq->stats; 4746 do { 4747 start = u64_stats_fetch_begin(&sq->stats.syncp); 4748 virtnet_fill_stats(vi, i * 2 + 1, &ctx, stats_base, true, 0); 4749 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 4750 } 4751 4752 virtnet_fill_total_fields(vi, &ctx); 4753 } 4754 4755 static void virtnet_get_channels(struct net_device *dev, 4756 struct ethtool_channels *channels) 4757 { 4758 struct virtnet_info *vi = netdev_priv(dev); 4759 4760 channels->combined_count = vi->curr_queue_pairs; 4761 channels->max_combined = vi->max_queue_pairs; 4762 channels->max_other = 0; 4763 channels->rx_count = 0; 4764 channels->tx_count = 0; 4765 channels->other_count = 0; 4766 } 4767 4768 static int virtnet_set_link_ksettings(struct net_device *dev, 4769 const struct ethtool_link_ksettings *cmd) 4770 { 4771 struct virtnet_info *vi = netdev_priv(dev); 4772 4773 return ethtool_virtdev_set_link_ksettings(dev, cmd, 4774 &vi->speed, &vi->duplex); 4775 } 4776 4777 static int virtnet_get_link_ksettings(struct net_device *dev, 4778 struct ethtool_link_ksettings *cmd) 4779 { 4780 struct virtnet_info *vi = netdev_priv(dev); 4781 4782 cmd->base.speed = vi->speed; 4783 cmd->base.duplex = vi->duplex; 4784 cmd->base.port = PORT_OTHER; 4785 4786 return 0; 4787 } 4788 4789 static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi, 4790 struct ethtool_coalesce *ec) 4791 { 4792 struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL; 4793 struct scatterlist sgs_tx; 4794 int i; 4795 4796 coal_tx = kzalloc(sizeof(*coal_tx), GFP_KERNEL); 4797 if (!coal_tx) 4798 return -ENOMEM; 4799 4800 coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); 4801 coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); 4802 sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx)); 4803 4804 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 4805 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, 4806 &sgs_tx)) 4807 return -EINVAL; 4808 4809 vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; 4810 vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; 4811 for (i = 0; i < vi->max_queue_pairs; i++) { 4812 vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; 4813 vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; 4814 } 4815 4816 return 0; 4817 } 4818 4819 static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi, 4820 struct ethtool_coalesce *ec) 4821 { 4822 struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL; 4823 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 4824 struct scatterlist sgs_rx; 4825 int i; 4826 4827 if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4828 return -EOPNOTSUPP; 4829 4830 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs || 4831 ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets)) 4832 return -EINVAL; 4833 4834 if (rx_ctrl_dim_on && !vi->rx_dim_enabled) { 4835 vi->rx_dim_enabled = true; 4836 for (i = 0; i < vi->max_queue_pairs; i++) { 4837 mutex_lock(&vi->rq[i].dim_lock); 4838 vi->rq[i].dim_enabled = true; 4839 mutex_unlock(&vi->rq[i].dim_lock); 4840 } 4841 return 0; 4842 } 4843 4844 coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL); 4845 if (!coal_rx) 4846 return -ENOMEM; 4847 4848 if (!rx_ctrl_dim_on && vi->rx_dim_enabled) { 4849 vi->rx_dim_enabled = false; 4850 for (i = 0; i < vi->max_queue_pairs; i++) { 4851 mutex_lock(&vi->rq[i].dim_lock); 4852 vi->rq[i].dim_enabled = false; 4853 mutex_unlock(&vi->rq[i].dim_lock); 4854 } 4855 } 4856 4857 /* Since the per-queue coalescing params can be set, 4858 * we need apply the global new params even if they 4859 * are not updated. 4860 */ 4861 coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); 4862 coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); 4863 sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx)); 4864 4865 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 4866 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, 4867 &sgs_rx)) 4868 return -EINVAL; 4869 4870 vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; 4871 vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; 4872 for (i = 0; i < vi->max_queue_pairs; i++) { 4873 mutex_lock(&vi->rq[i].dim_lock); 4874 vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; 4875 vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; 4876 mutex_unlock(&vi->rq[i].dim_lock); 4877 } 4878 4879 return 0; 4880 } 4881 4882 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, 4883 struct ethtool_coalesce *ec) 4884 { 4885 int err; 4886 4887 err = virtnet_send_tx_notf_coal_cmds(vi, ec); 4888 if (err) 4889 return err; 4890 4891 err = virtnet_send_rx_notf_coal_cmds(vi, ec); 4892 if (err) 4893 return err; 4894 4895 return 0; 4896 } 4897 4898 static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi, 4899 struct ethtool_coalesce *ec, 4900 u16 queue) 4901 { 4902 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 4903 u32 max_usecs, max_packets; 4904 bool cur_rx_dim; 4905 int err; 4906 4907 mutex_lock(&vi->rq[queue].dim_lock); 4908 cur_rx_dim = vi->rq[queue].dim_enabled; 4909 max_usecs = vi->rq[queue].intr_coal.max_usecs; 4910 max_packets = vi->rq[queue].intr_coal.max_packets; 4911 4912 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs || 4913 ec->rx_max_coalesced_frames != max_packets)) { 4914 mutex_unlock(&vi->rq[queue].dim_lock); 4915 return -EINVAL; 4916 } 4917 4918 if (rx_ctrl_dim_on && !cur_rx_dim) { 4919 vi->rq[queue].dim_enabled = true; 4920 mutex_unlock(&vi->rq[queue].dim_lock); 4921 return 0; 4922 } 4923 4924 if (!rx_ctrl_dim_on && cur_rx_dim) 4925 vi->rq[queue].dim_enabled = false; 4926 4927 /* If no params are updated, userspace ethtool will 4928 * reject the modification. 4929 */ 4930 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue, 4931 ec->rx_coalesce_usecs, 4932 ec->rx_max_coalesced_frames); 4933 mutex_unlock(&vi->rq[queue].dim_lock); 4934 return err; 4935 } 4936 4937 static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, 4938 struct ethtool_coalesce *ec, 4939 u16 queue) 4940 { 4941 int err; 4942 4943 err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue); 4944 if (err) 4945 return err; 4946 4947 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue, 4948 ec->tx_coalesce_usecs, 4949 ec->tx_max_coalesced_frames); 4950 if (err) 4951 return err; 4952 4953 return 0; 4954 } 4955 4956 static void virtnet_rx_dim_work(struct work_struct *work) 4957 { 4958 struct dim *dim = container_of(work, struct dim, work); 4959 struct receive_queue *rq = container_of(dim, 4960 struct receive_queue, dim); 4961 struct virtnet_info *vi = rq->vq->vdev->priv; 4962 struct net_device *dev = vi->dev; 4963 struct dim_cq_moder update_moder; 4964 int qnum, err; 4965 4966 qnum = rq - vi->rq; 4967 4968 mutex_lock(&rq->dim_lock); 4969 if (!rq->dim_enabled) 4970 goto out; 4971 4972 update_moder = net_dim_get_rx_irq_moder(dev, dim); 4973 if (update_moder.usec != rq->intr_coal.max_usecs || 4974 update_moder.pkts != rq->intr_coal.max_packets) { 4975 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum, 4976 update_moder.usec, 4977 update_moder.pkts); 4978 if (err) 4979 pr_debug("%s: Failed to send dim parameters on rxq%d\n", 4980 dev->name, qnum); 4981 } 4982 out: 4983 dim->state = DIM_START_MEASURE; 4984 mutex_unlock(&rq->dim_lock); 4985 } 4986 4987 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) 4988 { 4989 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL 4990 * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated. 4991 */ 4992 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) 4993 return -EOPNOTSUPP; 4994 4995 if (ec->tx_max_coalesced_frames > 1 || 4996 ec->rx_max_coalesced_frames != 1) 4997 return -EINVAL; 4998 4999 return 0; 5000 } 5001 5002 static int virtnet_should_update_vq_weight(int dev_flags, int weight, 5003 int vq_weight, bool *should_update) 5004 { 5005 if (weight ^ vq_weight) { 5006 if (dev_flags & IFF_UP) 5007 return -EBUSY; 5008 *should_update = true; 5009 } 5010 5011 return 0; 5012 } 5013 5014 static int virtnet_set_coalesce(struct net_device *dev, 5015 struct ethtool_coalesce *ec, 5016 struct kernel_ethtool_coalesce *kernel_coal, 5017 struct netlink_ext_ack *extack) 5018 { 5019 struct virtnet_info *vi = netdev_priv(dev); 5020 int ret, queue_number, napi_weight; 5021 bool update_napi = false; 5022 5023 /* Can't change NAPI weight if the link is up */ 5024 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5025 for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) { 5026 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5027 vi->sq[queue_number].napi.weight, 5028 &update_napi); 5029 if (ret) 5030 return ret; 5031 5032 if (update_napi) { 5033 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be 5034 * updated for the sake of simplicity, which might not be necessary 5035 */ 5036 break; 5037 } 5038 } 5039 5040 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) 5041 ret = virtnet_send_notf_coal_cmds(vi, ec); 5042 else 5043 ret = virtnet_coal_params_supported(ec); 5044 5045 if (ret) 5046 return ret; 5047 5048 if (update_napi) { 5049 for (; queue_number < vi->max_queue_pairs; queue_number++) 5050 vi->sq[queue_number].napi.weight = napi_weight; 5051 } 5052 5053 return ret; 5054 } 5055 5056 static int virtnet_get_coalesce(struct net_device *dev, 5057 struct ethtool_coalesce *ec, 5058 struct kernel_ethtool_coalesce *kernel_coal, 5059 struct netlink_ext_ack *extack) 5060 { 5061 struct virtnet_info *vi = netdev_priv(dev); 5062 5063 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 5064 ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs; 5065 ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs; 5066 ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets; 5067 ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets; 5068 ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled; 5069 } else { 5070 ec->rx_max_coalesced_frames = 1; 5071 5072 if (vi->sq[0].napi.weight) 5073 ec->tx_max_coalesced_frames = 1; 5074 } 5075 5076 return 0; 5077 } 5078 5079 static int virtnet_set_per_queue_coalesce(struct net_device *dev, 5080 u32 queue, 5081 struct ethtool_coalesce *ec) 5082 { 5083 struct virtnet_info *vi = netdev_priv(dev); 5084 int ret, napi_weight; 5085 bool update_napi = false; 5086 5087 if (queue >= vi->max_queue_pairs) 5088 return -EINVAL; 5089 5090 /* Can't change NAPI weight if the link is up */ 5091 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5092 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5093 vi->sq[queue].napi.weight, 5094 &update_napi); 5095 if (ret) 5096 return ret; 5097 5098 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5099 ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue); 5100 else 5101 ret = virtnet_coal_params_supported(ec); 5102 5103 if (ret) 5104 return ret; 5105 5106 if (update_napi) 5107 vi->sq[queue].napi.weight = napi_weight; 5108 5109 return 0; 5110 } 5111 5112 static int virtnet_get_per_queue_coalesce(struct net_device *dev, 5113 u32 queue, 5114 struct ethtool_coalesce *ec) 5115 { 5116 struct virtnet_info *vi = netdev_priv(dev); 5117 5118 if (queue >= vi->max_queue_pairs) 5119 return -EINVAL; 5120 5121 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 5122 mutex_lock(&vi->rq[queue].dim_lock); 5123 ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs; 5124 ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs; 5125 ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets; 5126 ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets; 5127 ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled; 5128 mutex_unlock(&vi->rq[queue].dim_lock); 5129 } else { 5130 ec->rx_max_coalesced_frames = 1; 5131 5132 if (vi->sq[queue].napi.weight) 5133 ec->tx_max_coalesced_frames = 1; 5134 } 5135 5136 return 0; 5137 } 5138 5139 static void virtnet_init_settings(struct net_device *dev) 5140 { 5141 struct virtnet_info *vi = netdev_priv(dev); 5142 5143 vi->speed = SPEED_UNKNOWN; 5144 vi->duplex = DUPLEX_UNKNOWN; 5145 } 5146 5147 static u32 virtnet_get_rxfh_key_size(struct net_device *dev) 5148 { 5149 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; 5150 } 5151 5152 static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) 5153 { 5154 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; 5155 } 5156 5157 static int virtnet_get_rxfh(struct net_device *dev, 5158 struct ethtool_rxfh_param *rxfh) 5159 { 5160 struct virtnet_info *vi = netdev_priv(dev); 5161 int i; 5162 5163 if (rxfh->indir) { 5164 for (i = 0; i < vi->rss_indir_table_size; ++i) 5165 rxfh->indir[i] = vi->rss.indirection_table[i]; 5166 } 5167 5168 if (rxfh->key) 5169 memcpy(rxfh->key, vi->rss.key, vi->rss_key_size); 5170 5171 rxfh->hfunc = ETH_RSS_HASH_TOP; 5172 5173 return 0; 5174 } 5175 5176 static int virtnet_set_rxfh(struct net_device *dev, 5177 struct ethtool_rxfh_param *rxfh, 5178 struct netlink_ext_ack *extack) 5179 { 5180 struct virtnet_info *vi = netdev_priv(dev); 5181 bool update = false; 5182 int i; 5183 5184 if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && 5185 rxfh->hfunc != ETH_RSS_HASH_TOP) 5186 return -EOPNOTSUPP; 5187 5188 if (rxfh->indir) { 5189 if (!vi->has_rss) 5190 return -EOPNOTSUPP; 5191 5192 for (i = 0; i < vi->rss_indir_table_size; ++i) 5193 vi->rss.indirection_table[i] = rxfh->indir[i]; 5194 update = true; 5195 } 5196 5197 if (rxfh->key) { 5198 /* If either _F_HASH_REPORT or _F_RSS are negotiated, the 5199 * device provides hash calculation capabilities, that is, 5200 * hash_key is configured. 5201 */ 5202 if (!vi->has_rss && !vi->has_rss_hash_report) 5203 return -EOPNOTSUPP; 5204 5205 memcpy(vi->rss.key, rxfh->key, vi->rss_key_size); 5206 update = true; 5207 } 5208 5209 if (update) 5210 virtnet_commit_rss_command(vi); 5211 5212 return 0; 5213 } 5214 5215 static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) 5216 { 5217 struct virtnet_info *vi = netdev_priv(dev); 5218 int rc = 0; 5219 5220 switch (info->cmd) { 5221 case ETHTOOL_GRXRINGS: 5222 info->data = vi->curr_queue_pairs; 5223 break; 5224 case ETHTOOL_GRXFH: 5225 virtnet_get_hashflow(vi, info); 5226 break; 5227 default: 5228 rc = -EOPNOTSUPP; 5229 } 5230 5231 return rc; 5232 } 5233 5234 static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) 5235 { 5236 struct virtnet_info *vi = netdev_priv(dev); 5237 int rc = 0; 5238 5239 switch (info->cmd) { 5240 case ETHTOOL_SRXFH: 5241 if (!virtnet_set_hashflow(vi, info)) 5242 rc = -EINVAL; 5243 5244 break; 5245 default: 5246 rc = -EOPNOTSUPP; 5247 } 5248 5249 return rc; 5250 } 5251 5252 static const struct ethtool_ops virtnet_ethtool_ops = { 5253 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | 5254 ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, 5255 .get_drvinfo = virtnet_get_drvinfo, 5256 .get_link = ethtool_op_get_link, 5257 .get_ringparam = virtnet_get_ringparam, 5258 .set_ringparam = virtnet_set_ringparam, 5259 .get_strings = virtnet_get_strings, 5260 .get_sset_count = virtnet_get_sset_count, 5261 .get_ethtool_stats = virtnet_get_ethtool_stats, 5262 .set_channels = virtnet_set_channels, 5263 .get_channels = virtnet_get_channels, 5264 .get_ts_info = ethtool_op_get_ts_info, 5265 .get_link_ksettings = virtnet_get_link_ksettings, 5266 .set_link_ksettings = virtnet_set_link_ksettings, 5267 .set_coalesce = virtnet_set_coalesce, 5268 .get_coalesce = virtnet_get_coalesce, 5269 .set_per_queue_coalesce = virtnet_set_per_queue_coalesce, 5270 .get_per_queue_coalesce = virtnet_get_per_queue_coalesce, 5271 .get_rxfh_key_size = virtnet_get_rxfh_key_size, 5272 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, 5273 .get_rxfh = virtnet_get_rxfh, 5274 .set_rxfh = virtnet_set_rxfh, 5275 .get_rxnfc = virtnet_get_rxnfc, 5276 .set_rxnfc = virtnet_set_rxnfc, 5277 }; 5278 5279 static void virtnet_get_queue_stats_rx(struct net_device *dev, int i, 5280 struct netdev_queue_stats_rx *stats) 5281 { 5282 struct virtnet_info *vi = netdev_priv(dev); 5283 struct receive_queue *rq = &vi->rq[i]; 5284 struct virtnet_stats_ctx ctx = {0}; 5285 5286 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5287 5288 virtnet_get_hw_stats(vi, &ctx, i * 2); 5289 virtnet_fill_stats(vi, i * 2, &ctx, (void *)&rq->stats, true, 0); 5290 } 5291 5292 static void virtnet_get_queue_stats_tx(struct net_device *dev, int i, 5293 struct netdev_queue_stats_tx *stats) 5294 { 5295 struct virtnet_info *vi = netdev_priv(dev); 5296 struct send_queue *sq = &vi->sq[i]; 5297 struct virtnet_stats_ctx ctx = {0}; 5298 5299 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5300 5301 virtnet_get_hw_stats(vi, &ctx, i * 2 + 1); 5302 virtnet_fill_stats(vi, i * 2 + 1, &ctx, (void *)&sq->stats, true, 0); 5303 } 5304 5305 static void virtnet_get_base_stats(struct net_device *dev, 5306 struct netdev_queue_stats_rx *rx, 5307 struct netdev_queue_stats_tx *tx) 5308 { 5309 struct virtnet_info *vi = netdev_priv(dev); 5310 5311 /* The queue stats of the virtio-net will not be reset. So here we 5312 * return 0. 5313 */ 5314 rx->bytes = 0; 5315 rx->packets = 0; 5316 5317 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 5318 rx->hw_drops = 0; 5319 rx->hw_drop_overruns = 0; 5320 } 5321 5322 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 5323 rx->csum_unnecessary = 0; 5324 rx->csum_none = 0; 5325 rx->csum_bad = 0; 5326 } 5327 5328 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 5329 rx->hw_gro_packets = 0; 5330 rx->hw_gro_bytes = 0; 5331 rx->hw_gro_wire_packets = 0; 5332 rx->hw_gro_wire_bytes = 0; 5333 } 5334 5335 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) 5336 rx->hw_drop_ratelimits = 0; 5337 5338 tx->bytes = 0; 5339 tx->packets = 0; 5340 tx->stop = 0; 5341 tx->wake = 0; 5342 5343 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 5344 tx->hw_drops = 0; 5345 tx->hw_drop_errors = 0; 5346 } 5347 5348 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 5349 tx->csum_none = 0; 5350 tx->needs_csum = 0; 5351 } 5352 5353 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 5354 tx->hw_gso_packets = 0; 5355 tx->hw_gso_bytes = 0; 5356 tx->hw_gso_wire_packets = 0; 5357 tx->hw_gso_wire_bytes = 0; 5358 } 5359 5360 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) 5361 tx->hw_drop_ratelimits = 0; 5362 } 5363 5364 static const struct netdev_stat_ops virtnet_stat_ops = { 5365 .get_queue_stats_rx = virtnet_get_queue_stats_rx, 5366 .get_queue_stats_tx = virtnet_get_queue_stats_tx, 5367 .get_base_stats = virtnet_get_base_stats, 5368 }; 5369 5370 static void virtnet_freeze_down(struct virtio_device *vdev) 5371 { 5372 struct virtnet_info *vi = vdev->priv; 5373 5374 /* Make sure no work handler is accessing the device */ 5375 flush_work(&vi->config_work); 5376 disable_rx_mode_work(vi); 5377 flush_work(&vi->rx_mode_work); 5378 5379 netif_tx_lock_bh(vi->dev); 5380 netif_device_detach(vi->dev); 5381 netif_tx_unlock_bh(vi->dev); 5382 if (netif_running(vi->dev)) 5383 virtnet_close(vi->dev); 5384 } 5385 5386 static int init_vqs(struct virtnet_info *vi); 5387 5388 static int virtnet_restore_up(struct virtio_device *vdev) 5389 { 5390 struct virtnet_info *vi = vdev->priv; 5391 int err; 5392 5393 err = init_vqs(vi); 5394 if (err) 5395 return err; 5396 5397 virtio_device_ready(vdev); 5398 5399 enable_delayed_refill(vi); 5400 enable_rx_mode_work(vi); 5401 5402 if (netif_running(vi->dev)) { 5403 err = virtnet_open(vi->dev); 5404 if (err) 5405 return err; 5406 } 5407 5408 netif_tx_lock_bh(vi->dev); 5409 netif_device_attach(vi->dev); 5410 netif_tx_unlock_bh(vi->dev); 5411 return err; 5412 } 5413 5414 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) 5415 { 5416 __virtio64 *_offloads __free(kfree) = NULL; 5417 struct scatterlist sg; 5418 5419 _offloads = kzalloc(sizeof(*_offloads), GFP_KERNEL); 5420 if (!_offloads) 5421 return -ENOMEM; 5422 5423 *_offloads = cpu_to_virtio64(vi->vdev, offloads); 5424 5425 sg_init_one(&sg, _offloads, sizeof(*_offloads)); 5426 5427 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, 5428 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { 5429 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); 5430 return -EINVAL; 5431 } 5432 5433 return 0; 5434 } 5435 5436 static int virtnet_clear_guest_offloads(struct virtnet_info *vi) 5437 { 5438 u64 offloads = 0; 5439 5440 if (!vi->guest_offloads) 5441 return 0; 5442 5443 return virtnet_set_guest_offloads(vi, offloads); 5444 } 5445 5446 static int virtnet_restore_guest_offloads(struct virtnet_info *vi) 5447 { 5448 u64 offloads = vi->guest_offloads; 5449 5450 if (!vi->guest_offloads) 5451 return 0; 5452 5453 return virtnet_set_guest_offloads(vi, offloads); 5454 } 5455 5456 static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq, 5457 struct xsk_buff_pool *pool) 5458 { 5459 int err, qindex; 5460 5461 qindex = rq - vi->rq; 5462 5463 if (pool) { 5464 err = xdp_rxq_info_reg(&rq->xsk_rxq_info, vi->dev, qindex, rq->napi.napi_id); 5465 if (err < 0) 5466 return err; 5467 5468 err = xdp_rxq_info_reg_mem_model(&rq->xsk_rxq_info, 5469 MEM_TYPE_XSK_BUFF_POOL, NULL); 5470 if (err < 0) 5471 goto unreg; 5472 5473 xsk_pool_set_rxq_info(pool, &rq->xsk_rxq_info); 5474 } 5475 5476 virtnet_rx_pause(vi, rq); 5477 5478 err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf); 5479 if (err) { 5480 netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err); 5481 5482 pool = NULL; 5483 } 5484 5485 rq->xsk_pool = pool; 5486 5487 virtnet_rx_resume(vi, rq); 5488 5489 if (pool) 5490 return 0; 5491 5492 unreg: 5493 xdp_rxq_info_unreg(&rq->xsk_rxq_info); 5494 return err; 5495 } 5496 5497 static int virtnet_xsk_pool_enable(struct net_device *dev, 5498 struct xsk_buff_pool *pool, 5499 u16 qid) 5500 { 5501 struct virtnet_info *vi = netdev_priv(dev); 5502 struct receive_queue *rq; 5503 struct device *dma_dev; 5504 struct send_queue *sq; 5505 int err, size; 5506 5507 if (vi->hdr_len > xsk_pool_get_headroom(pool)) 5508 return -EINVAL; 5509 5510 /* In big_packets mode, xdp cannot work, so there is no need to 5511 * initialize xsk of rq. 5512 */ 5513 if (vi->big_packets && !vi->mergeable_rx_bufs) 5514 return -ENOENT; 5515 5516 if (qid >= vi->curr_queue_pairs) 5517 return -EINVAL; 5518 5519 sq = &vi->sq[qid]; 5520 rq = &vi->rq[qid]; 5521 5522 /* xsk assumes that tx and rx must have the same dma device. The af-xdp 5523 * may use one buffer to receive from the rx and reuse this buffer to 5524 * send by the tx. So the dma dev of sq and rq must be the same one. 5525 * 5526 * But vq->dma_dev allows every vq has the respective dma dev. So I 5527 * check the dma dev of vq and sq is the same dev. 5528 */ 5529 if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq)) 5530 return -EINVAL; 5531 5532 dma_dev = virtqueue_dma_dev(rq->vq); 5533 if (!dma_dev) 5534 return -EINVAL; 5535 5536 size = virtqueue_get_vring_size(rq->vq); 5537 5538 rq->xsk_buffs = kvcalloc(size, sizeof(*rq->xsk_buffs), GFP_KERNEL); 5539 if (!rq->xsk_buffs) 5540 return -ENOMEM; 5541 5542 err = xsk_pool_dma_map(pool, dma_dev, 0); 5543 if (err) 5544 goto err_xsk_map; 5545 5546 err = virtnet_rq_bind_xsk_pool(vi, rq, pool); 5547 if (err) 5548 goto err_rq; 5549 5550 return 0; 5551 5552 err_rq: 5553 xsk_pool_dma_unmap(pool, 0); 5554 err_xsk_map: 5555 return err; 5556 } 5557 5558 static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid) 5559 { 5560 struct virtnet_info *vi = netdev_priv(dev); 5561 struct xsk_buff_pool *pool; 5562 struct receive_queue *rq; 5563 int err; 5564 5565 if (qid >= vi->curr_queue_pairs) 5566 return -EINVAL; 5567 5568 rq = &vi->rq[qid]; 5569 5570 pool = rq->xsk_pool; 5571 5572 err = virtnet_rq_bind_xsk_pool(vi, rq, NULL); 5573 5574 xsk_pool_dma_unmap(pool, 0); 5575 5576 kvfree(rq->xsk_buffs); 5577 5578 return err; 5579 } 5580 5581 static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp) 5582 { 5583 if (xdp->xsk.pool) 5584 return virtnet_xsk_pool_enable(dev, xdp->xsk.pool, 5585 xdp->xsk.queue_id); 5586 else 5587 return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id); 5588 } 5589 5590 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 5591 struct netlink_ext_ack *extack) 5592 { 5593 unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 5594 sizeof(struct skb_shared_info)); 5595 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; 5596 struct virtnet_info *vi = netdev_priv(dev); 5597 struct bpf_prog *old_prog; 5598 u16 xdp_qp = 0, curr_qp; 5599 int i, err; 5600 5601 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 5602 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 5603 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 5604 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 5605 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 5606 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || 5607 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || 5608 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { 5609 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); 5610 return -EOPNOTSUPP; 5611 } 5612 5613 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 5614 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 5615 return -EINVAL; 5616 } 5617 5618 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { 5619 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); 5620 netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); 5621 return -EINVAL; 5622 } 5623 5624 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; 5625 if (prog) 5626 xdp_qp = nr_cpu_ids; 5627 5628 /* XDP requires extra queues for XDP_TX */ 5629 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 5630 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", 5631 curr_qp + xdp_qp, vi->max_queue_pairs); 5632 xdp_qp = 0; 5633 } 5634 5635 old_prog = rtnl_dereference(vi->rq[0].xdp_prog); 5636 if (!prog && !old_prog) 5637 return 0; 5638 5639 if (prog) 5640 bpf_prog_add(prog, vi->max_queue_pairs - 1); 5641 5642 /* Make sure NAPI is not using any XDP TX queues for RX. */ 5643 if (netif_running(dev)) { 5644 for (i = 0; i < vi->max_queue_pairs; i++) { 5645 napi_disable(&vi->rq[i].napi); 5646 virtnet_napi_tx_disable(&vi->sq[i].napi); 5647 } 5648 } 5649 5650 if (!prog) { 5651 for (i = 0; i < vi->max_queue_pairs; i++) { 5652 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 5653 if (i == 0) 5654 virtnet_restore_guest_offloads(vi); 5655 } 5656 synchronize_net(); 5657 } 5658 5659 err = virtnet_set_queues(vi, curr_qp + xdp_qp); 5660 if (err) 5661 goto err; 5662 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 5663 vi->xdp_queue_pairs = xdp_qp; 5664 5665 if (prog) { 5666 vi->xdp_enabled = true; 5667 for (i = 0; i < vi->max_queue_pairs; i++) { 5668 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 5669 if (i == 0 && !old_prog) 5670 virtnet_clear_guest_offloads(vi); 5671 } 5672 if (!old_prog) 5673 xdp_features_set_redirect_target(dev, true); 5674 } else { 5675 xdp_features_clear_redirect_target(dev); 5676 vi->xdp_enabled = false; 5677 } 5678 5679 for (i = 0; i < vi->max_queue_pairs; i++) { 5680 if (old_prog) 5681 bpf_prog_put(old_prog); 5682 if (netif_running(dev)) { 5683 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 5684 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 5685 &vi->sq[i].napi); 5686 } 5687 } 5688 5689 return 0; 5690 5691 err: 5692 if (!prog) { 5693 virtnet_clear_guest_offloads(vi); 5694 for (i = 0; i < vi->max_queue_pairs; i++) 5695 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); 5696 } 5697 5698 if (netif_running(dev)) { 5699 for (i = 0; i < vi->max_queue_pairs; i++) { 5700 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 5701 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 5702 &vi->sq[i].napi); 5703 } 5704 } 5705 if (prog) 5706 bpf_prog_sub(prog, vi->max_queue_pairs - 1); 5707 return err; 5708 } 5709 5710 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) 5711 { 5712 switch (xdp->command) { 5713 case XDP_SETUP_PROG: 5714 return virtnet_xdp_set(dev, xdp->prog, xdp->extack); 5715 case XDP_SETUP_XSK_POOL: 5716 return virtnet_xsk_pool_setup(dev, xdp); 5717 default: 5718 return -EINVAL; 5719 } 5720 } 5721 5722 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, 5723 size_t len) 5724 { 5725 struct virtnet_info *vi = netdev_priv(dev); 5726 int ret; 5727 5728 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 5729 return -EOPNOTSUPP; 5730 5731 ret = snprintf(buf, len, "sby"); 5732 if (ret >= len) 5733 return -EOPNOTSUPP; 5734 5735 return 0; 5736 } 5737 5738 static int virtnet_set_features(struct net_device *dev, 5739 netdev_features_t features) 5740 { 5741 struct virtnet_info *vi = netdev_priv(dev); 5742 u64 offloads; 5743 int err; 5744 5745 if ((dev->features ^ features) & NETIF_F_GRO_HW) { 5746 if (vi->xdp_enabled) 5747 return -EBUSY; 5748 5749 if (features & NETIF_F_GRO_HW) 5750 offloads = vi->guest_offloads_capable; 5751 else 5752 offloads = vi->guest_offloads_capable & 5753 ~GUEST_OFFLOAD_GRO_HW_MASK; 5754 5755 err = virtnet_set_guest_offloads(vi, offloads); 5756 if (err) 5757 return err; 5758 vi->guest_offloads = offloads; 5759 } 5760 5761 if ((dev->features ^ features) & NETIF_F_RXHASH) { 5762 if (features & NETIF_F_RXHASH) 5763 vi->rss.hash_types = vi->rss_hash_types_saved; 5764 else 5765 vi->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE; 5766 5767 if (!virtnet_commit_rss_command(vi)) 5768 return -EINVAL; 5769 } 5770 5771 return 0; 5772 } 5773 5774 static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) 5775 { 5776 struct virtnet_info *priv = netdev_priv(dev); 5777 struct send_queue *sq = &priv->sq[txqueue]; 5778 struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); 5779 5780 u64_stats_update_begin(&sq->stats.syncp); 5781 u64_stats_inc(&sq->stats.tx_timeouts); 5782 u64_stats_update_end(&sq->stats.syncp); 5783 5784 netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", 5785 txqueue, sq->name, sq->vq->index, sq->vq->name, 5786 jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); 5787 } 5788 5789 static int virtnet_init_irq_moder(struct virtnet_info *vi) 5790 { 5791 u8 profile_flags = 0, coal_flags = 0; 5792 int ret, i; 5793 5794 profile_flags |= DIM_PROFILE_RX; 5795 coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS; 5796 ret = net_dim_init_irq_moder(vi->dev, profile_flags, coal_flags, 5797 DIM_CQ_PERIOD_MODE_START_FROM_EQE, 5798 0, virtnet_rx_dim_work, NULL); 5799 5800 if (ret) 5801 return ret; 5802 5803 for (i = 0; i < vi->max_queue_pairs; i++) 5804 net_dim_setting(vi->dev, &vi->rq[i].dim, false); 5805 5806 return 0; 5807 } 5808 5809 static void virtnet_free_irq_moder(struct virtnet_info *vi) 5810 { 5811 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5812 return; 5813 5814 rtnl_lock(); 5815 net_dim_free_irq_moder(vi->dev); 5816 rtnl_unlock(); 5817 } 5818 5819 static const struct net_device_ops virtnet_netdev = { 5820 .ndo_open = virtnet_open, 5821 .ndo_stop = virtnet_close, 5822 .ndo_start_xmit = start_xmit, 5823 .ndo_validate_addr = eth_validate_addr, 5824 .ndo_set_mac_address = virtnet_set_mac_address, 5825 .ndo_set_rx_mode = virtnet_set_rx_mode, 5826 .ndo_get_stats64 = virtnet_stats, 5827 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 5828 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 5829 .ndo_bpf = virtnet_xdp, 5830 .ndo_xdp_xmit = virtnet_xdp_xmit, 5831 .ndo_xsk_wakeup = virtnet_xsk_wakeup, 5832 .ndo_features_check = passthru_features_check, 5833 .ndo_get_phys_port_name = virtnet_get_phys_port_name, 5834 .ndo_set_features = virtnet_set_features, 5835 .ndo_tx_timeout = virtnet_tx_timeout, 5836 }; 5837 5838 static void virtnet_config_changed_work(struct work_struct *work) 5839 { 5840 struct virtnet_info *vi = 5841 container_of(work, struct virtnet_info, config_work); 5842 u16 v; 5843 5844 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, 5845 struct virtio_net_config, status, &v) < 0) 5846 return; 5847 5848 if (v & VIRTIO_NET_S_ANNOUNCE) { 5849 netdev_notify_peers(vi->dev); 5850 virtnet_ack_link_announce(vi); 5851 } 5852 5853 /* Ignore unknown (future) status bits */ 5854 v &= VIRTIO_NET_S_LINK_UP; 5855 5856 if (vi->status == v) 5857 return; 5858 5859 vi->status = v; 5860 5861 if (vi->status & VIRTIO_NET_S_LINK_UP) { 5862 virtnet_update_settings(vi); 5863 netif_carrier_on(vi->dev); 5864 netif_tx_wake_all_queues(vi->dev); 5865 } else { 5866 netif_carrier_off(vi->dev); 5867 netif_tx_stop_all_queues(vi->dev); 5868 } 5869 } 5870 5871 static void virtnet_config_changed(struct virtio_device *vdev) 5872 { 5873 struct virtnet_info *vi = vdev->priv; 5874 5875 schedule_work(&vi->config_work); 5876 } 5877 5878 static void virtnet_free_queues(struct virtnet_info *vi) 5879 { 5880 int i; 5881 5882 for (i = 0; i < vi->max_queue_pairs; i++) { 5883 __netif_napi_del(&vi->rq[i].napi); 5884 __netif_napi_del(&vi->sq[i].napi); 5885 } 5886 5887 /* We called __netif_napi_del(), 5888 * we need to respect an RCU grace period before freeing vi->rq 5889 */ 5890 synchronize_net(); 5891 5892 kfree(vi->rq); 5893 kfree(vi->sq); 5894 kfree(vi->ctrl); 5895 } 5896 5897 static void _free_receive_bufs(struct virtnet_info *vi) 5898 { 5899 struct bpf_prog *old_prog; 5900 int i; 5901 5902 for (i = 0; i < vi->max_queue_pairs; i++) { 5903 while (vi->rq[i].pages) 5904 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); 5905 5906 old_prog = rtnl_dereference(vi->rq[i].xdp_prog); 5907 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); 5908 if (old_prog) 5909 bpf_prog_put(old_prog); 5910 } 5911 } 5912 5913 static void free_receive_bufs(struct virtnet_info *vi) 5914 { 5915 rtnl_lock(); 5916 _free_receive_bufs(vi); 5917 rtnl_unlock(); 5918 } 5919 5920 static void free_receive_page_frags(struct virtnet_info *vi) 5921 { 5922 int i; 5923 for (i = 0; i < vi->max_queue_pairs; i++) 5924 if (vi->rq[i].alloc_frag.page) { 5925 if (vi->rq[i].last_dma) 5926 virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); 5927 put_page(vi->rq[i].alloc_frag.page); 5928 } 5929 } 5930 5931 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) 5932 { 5933 if (!is_xdp_frame(buf)) 5934 dev_kfree_skb(buf); 5935 else 5936 xdp_return_frame(ptr_to_xdp(buf)); 5937 } 5938 5939 static void free_unused_bufs(struct virtnet_info *vi) 5940 { 5941 void *buf; 5942 int i; 5943 5944 for (i = 0; i < vi->max_queue_pairs; i++) { 5945 struct virtqueue *vq = vi->sq[i].vq; 5946 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 5947 virtnet_sq_free_unused_buf(vq, buf); 5948 cond_resched(); 5949 } 5950 5951 for (i = 0; i < vi->max_queue_pairs; i++) { 5952 struct virtqueue *vq = vi->rq[i].vq; 5953 5954 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 5955 virtnet_rq_unmap_free_buf(vq, buf); 5956 cond_resched(); 5957 } 5958 } 5959 5960 static void virtnet_del_vqs(struct virtnet_info *vi) 5961 { 5962 struct virtio_device *vdev = vi->vdev; 5963 5964 virtnet_clean_affinity(vi); 5965 5966 vdev->config->del_vqs(vdev); 5967 5968 virtnet_free_queues(vi); 5969 } 5970 5971 /* How large should a single buffer be so a queue full of these can fit at 5972 * least one full packet? 5973 * Logic below assumes the mergeable buffer header is used. 5974 */ 5975 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) 5976 { 5977 const unsigned int hdr_len = vi->hdr_len; 5978 unsigned int rq_size = virtqueue_get_vring_size(vq); 5979 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; 5980 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; 5981 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); 5982 5983 return max(max(min_buf_len, hdr_len) - hdr_len, 5984 (unsigned int)GOOD_PACKET_LEN); 5985 } 5986 5987 static int virtnet_find_vqs(struct virtnet_info *vi) 5988 { 5989 struct virtqueue_info *vqs_info; 5990 struct virtqueue **vqs; 5991 int ret = -ENOMEM; 5992 int total_vqs; 5993 bool *ctx; 5994 u16 i; 5995 5996 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by 5997 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by 5998 * possible control vq. 5999 */ 6000 total_vqs = vi->max_queue_pairs * 2 + 6001 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); 6002 6003 /* Allocate space for find_vqs parameters */ 6004 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL); 6005 if (!vqs) 6006 goto err_vq; 6007 vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL); 6008 if (!vqs_info) 6009 goto err_vqs_info; 6010 if (!vi->big_packets || vi->mergeable_rx_bufs) { 6011 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL); 6012 if (!ctx) 6013 goto err_ctx; 6014 } else { 6015 ctx = NULL; 6016 } 6017 6018 /* Parameters for control virtqueue, if any */ 6019 if (vi->has_cvq) { 6020 vqs_info[total_vqs - 1].name = "control"; 6021 } 6022 6023 /* Allocate/initialize parameters for send/receive virtqueues */ 6024 for (i = 0; i < vi->max_queue_pairs; i++) { 6025 vqs_info[rxq2vq(i)].callback = skb_recv_done; 6026 vqs_info[txq2vq(i)].callback = skb_xmit_done; 6027 sprintf(vi->rq[i].name, "input.%u", i); 6028 sprintf(vi->sq[i].name, "output.%u", i); 6029 vqs_info[rxq2vq(i)].name = vi->rq[i].name; 6030 vqs_info[txq2vq(i)].name = vi->sq[i].name; 6031 if (ctx) 6032 vqs_info[rxq2vq(i)].ctx = true; 6033 } 6034 6035 ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, vqs_info, NULL); 6036 if (ret) 6037 goto err_find; 6038 6039 if (vi->has_cvq) { 6040 vi->cvq = vqs[total_vqs - 1]; 6041 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 6042 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 6043 } 6044 6045 for (i = 0; i < vi->max_queue_pairs; i++) { 6046 vi->rq[i].vq = vqs[rxq2vq(i)]; 6047 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); 6048 vi->sq[i].vq = vqs[txq2vq(i)]; 6049 } 6050 6051 /* run here: ret == 0. */ 6052 6053 6054 err_find: 6055 kfree(ctx); 6056 err_ctx: 6057 kfree(vqs_info); 6058 err_vqs_info: 6059 kfree(vqs); 6060 err_vq: 6061 return ret; 6062 } 6063 6064 static int virtnet_alloc_queues(struct virtnet_info *vi) 6065 { 6066 int i; 6067 6068 if (vi->has_cvq) { 6069 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); 6070 if (!vi->ctrl) 6071 goto err_ctrl; 6072 } else { 6073 vi->ctrl = NULL; 6074 } 6075 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); 6076 if (!vi->sq) 6077 goto err_sq; 6078 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL); 6079 if (!vi->rq) 6080 goto err_rq; 6081 6082 INIT_DELAYED_WORK(&vi->refill, refill_work); 6083 for (i = 0; i < vi->max_queue_pairs; i++) { 6084 vi->rq[i].pages = NULL; 6085 netif_napi_add_weight(vi->dev, &vi->rq[i].napi, virtnet_poll, 6086 napi_weight); 6087 netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, 6088 virtnet_poll_tx, 6089 napi_tx ? napi_weight : 0); 6090 6091 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 6092 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); 6093 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 6094 6095 u64_stats_init(&vi->rq[i].stats.syncp); 6096 u64_stats_init(&vi->sq[i].stats.syncp); 6097 mutex_init(&vi->rq[i].dim_lock); 6098 } 6099 6100 return 0; 6101 6102 err_rq: 6103 kfree(vi->sq); 6104 err_sq: 6105 kfree(vi->ctrl); 6106 err_ctrl: 6107 return -ENOMEM; 6108 } 6109 6110 static void virtnet_rq_set_premapped(struct virtnet_info *vi) 6111 { 6112 int i; 6113 6114 for (i = 0; i < vi->max_queue_pairs; i++) 6115 /* error should never happen */ 6116 BUG_ON(virtqueue_set_dma_premapped(vi->rq[i].vq)); 6117 } 6118 6119 static int init_vqs(struct virtnet_info *vi) 6120 { 6121 int ret; 6122 6123 /* Allocate send & receive queues */ 6124 ret = virtnet_alloc_queues(vi); 6125 if (ret) 6126 goto err; 6127 6128 ret = virtnet_find_vqs(vi); 6129 if (ret) 6130 goto err_free; 6131 6132 /* disable for big mode */ 6133 if (!vi->big_packets || vi->mergeable_rx_bufs) 6134 virtnet_rq_set_premapped(vi); 6135 6136 cpus_read_lock(); 6137 virtnet_set_affinity(vi); 6138 cpus_read_unlock(); 6139 6140 return 0; 6141 6142 err_free: 6143 virtnet_free_queues(vi); 6144 err: 6145 return ret; 6146 } 6147 6148 #ifdef CONFIG_SYSFS 6149 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, 6150 char *buf) 6151 { 6152 struct virtnet_info *vi = netdev_priv(queue->dev); 6153 unsigned int queue_index = get_netdev_rx_queue_index(queue); 6154 unsigned int headroom = virtnet_get_headroom(vi); 6155 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 6156 struct ewma_pkt_len *avg; 6157 6158 BUG_ON(queue_index >= vi->max_queue_pairs); 6159 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 6160 return sprintf(buf, "%u\n", 6161 get_mergeable_buf_len(&vi->rq[queue_index], avg, 6162 SKB_DATA_ALIGN(headroom + tailroom))); 6163 } 6164 6165 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 6166 __ATTR_RO(mergeable_rx_buffer_size); 6167 6168 static struct attribute *virtio_net_mrg_rx_attrs[] = { 6169 &mergeable_rx_buffer_size_attribute.attr, 6170 NULL 6171 }; 6172 6173 static const struct attribute_group virtio_net_mrg_rx_group = { 6174 .name = "virtio_net", 6175 .attrs = virtio_net_mrg_rx_attrs 6176 }; 6177 #endif 6178 6179 static bool virtnet_fail_on_feature(struct virtio_device *vdev, 6180 unsigned int fbit, 6181 const char *fname, const char *dname) 6182 { 6183 if (!virtio_has_feature(vdev, fbit)) 6184 return false; 6185 6186 dev_err(&vdev->dev, "device advertises feature %s but not %s", 6187 fname, dname); 6188 6189 return true; 6190 } 6191 6192 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ 6193 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) 6194 6195 static bool virtnet_validate_features(struct virtio_device *vdev) 6196 { 6197 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && 6198 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, 6199 "VIRTIO_NET_F_CTRL_VQ") || 6200 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, 6201 "VIRTIO_NET_F_CTRL_VQ") || 6202 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, 6203 "VIRTIO_NET_F_CTRL_VQ") || 6204 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || 6205 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, 6206 "VIRTIO_NET_F_CTRL_VQ") || 6207 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, 6208 "VIRTIO_NET_F_CTRL_VQ") || 6209 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, 6210 "VIRTIO_NET_F_CTRL_VQ") || 6211 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, 6212 "VIRTIO_NET_F_CTRL_VQ") || 6213 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL, 6214 "VIRTIO_NET_F_CTRL_VQ"))) { 6215 return false; 6216 } 6217 6218 return true; 6219 } 6220 6221 #define MIN_MTU ETH_MIN_MTU 6222 #define MAX_MTU ETH_MAX_MTU 6223 6224 static int virtnet_validate(struct virtio_device *vdev) 6225 { 6226 if (!vdev->config->get) { 6227 dev_err(&vdev->dev, "%s failure: config access disabled\n", 6228 __func__); 6229 return -EINVAL; 6230 } 6231 6232 if (!virtnet_validate_features(vdev)) 6233 return -EINVAL; 6234 6235 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6236 int mtu = virtio_cread16(vdev, 6237 offsetof(struct virtio_net_config, 6238 mtu)); 6239 if (mtu < MIN_MTU) 6240 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); 6241 } 6242 6243 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && 6244 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6245 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); 6246 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); 6247 } 6248 6249 return 0; 6250 } 6251 6252 static bool virtnet_check_guest_gso(const struct virtnet_info *vi) 6253 { 6254 return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6255 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6256 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6257 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6258 (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && 6259 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); 6260 } 6261 6262 static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) 6263 { 6264 bool guest_gso = virtnet_check_guest_gso(vi); 6265 6266 /* If device can receive ANY guest GSO packets, regardless of mtu, 6267 * allocate packets of maximum size, otherwise limit it to only 6268 * mtu size worth only. 6269 */ 6270 if (mtu > ETH_DATA_LEN || guest_gso) { 6271 vi->big_packets = true; 6272 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); 6273 } 6274 } 6275 6276 #define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10 6277 static enum xdp_rss_hash_type 6278 virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = { 6279 [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE, 6280 [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4, 6281 [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP, 6282 [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP, 6283 [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6, 6284 [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP, 6285 [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP, 6286 [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, 6287 [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, 6288 [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX 6289 }; 6290 6291 static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, 6292 enum xdp_rss_hash_type *rss_type) 6293 { 6294 const struct xdp_buff *xdp = (void *)_ctx; 6295 struct virtio_net_hdr_v1_hash *hdr_hash; 6296 struct virtnet_info *vi; 6297 u16 hash_report; 6298 6299 if (!(xdp->rxq->dev->features & NETIF_F_RXHASH)) 6300 return -ENODATA; 6301 6302 vi = netdev_priv(xdp->rxq->dev); 6303 hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len); 6304 hash_report = __le16_to_cpu(hdr_hash->hash_report); 6305 6306 if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE) 6307 hash_report = VIRTIO_NET_HASH_REPORT_NONE; 6308 6309 *rss_type = virtnet_xdp_rss_type[hash_report]; 6310 *hash = __le32_to_cpu(hdr_hash->hash_value); 6311 return 0; 6312 } 6313 6314 static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = { 6315 .xmo_rx_hash = virtnet_xdp_rx_hash, 6316 }; 6317 6318 static int virtnet_probe(struct virtio_device *vdev) 6319 { 6320 int i, err = -ENOMEM; 6321 struct net_device *dev; 6322 struct virtnet_info *vi; 6323 u16 max_queue_pairs; 6324 int mtu = 0; 6325 6326 /* Find if host supports multiqueue/rss virtio_net device */ 6327 max_queue_pairs = 1; 6328 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 6329 max_queue_pairs = 6330 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); 6331 6332 /* We need at least 2 queue's */ 6333 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 6334 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 6335 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6336 max_queue_pairs = 1; 6337 6338 /* Allocate ourselves a network device with room for our info */ 6339 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); 6340 if (!dev) 6341 return -ENOMEM; 6342 6343 /* Set up network device as normal. */ 6344 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | 6345 IFF_TX_SKB_NO_LINEAR; 6346 dev->netdev_ops = &virtnet_netdev; 6347 dev->stat_ops = &virtnet_stat_ops; 6348 dev->features = NETIF_F_HIGHDMA; 6349 6350 dev->ethtool_ops = &virtnet_ethtool_ops; 6351 SET_NETDEV_DEV(dev, &vdev->dev); 6352 6353 /* Do we support "hardware" checksums? */ 6354 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 6355 /* This opens up the world of extra features. */ 6356 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6357 if (csum) 6358 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6359 6360 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 6361 dev->hw_features |= NETIF_F_TSO 6362 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 6363 } 6364 /* Individual feature bits: what can host handle? */ 6365 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 6366 dev->hw_features |= NETIF_F_TSO; 6367 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 6368 dev->hw_features |= NETIF_F_TSO6; 6369 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 6370 dev->hw_features |= NETIF_F_TSO_ECN; 6371 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) 6372 dev->hw_features |= NETIF_F_GSO_UDP_L4; 6373 6374 dev->features |= NETIF_F_GSO_ROBUST; 6375 6376 if (gso) 6377 dev->features |= dev->hw_features & NETIF_F_ALL_TSO; 6378 /* (!csum && gso) case will be fixed by register_netdev() */ 6379 } 6380 6381 /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't 6382 * need to calculate checksums for partially checksummed packets, 6383 * as they're considered valid by the upper layer. 6384 * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only 6385 * receives fully checksummed packets. The device may assist in 6386 * validating these packets' checksums, so the driver won't have to. 6387 */ 6388 dev->features |= NETIF_F_RXCSUM; 6389 6390 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 6391 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) 6392 dev->features |= NETIF_F_GRO_HW; 6393 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) 6394 dev->hw_features |= NETIF_F_GRO_HW; 6395 6396 dev->vlan_features = dev->features; 6397 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; 6398 6399 /* MTU range: 68 - 65535 */ 6400 dev->min_mtu = MIN_MTU; 6401 dev->max_mtu = MAX_MTU; 6402 6403 /* Configuration may specify what MAC to use. Otherwise random. */ 6404 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6405 u8 addr[ETH_ALEN]; 6406 6407 virtio_cread_bytes(vdev, 6408 offsetof(struct virtio_net_config, mac), 6409 addr, ETH_ALEN); 6410 eth_hw_addr_set(dev, addr); 6411 } else { 6412 eth_hw_addr_random(dev); 6413 dev_info(&vdev->dev, "Assigned random MAC address %pM\n", 6414 dev->dev_addr); 6415 } 6416 6417 /* Set up our device-specific information */ 6418 vi = netdev_priv(dev); 6419 vi->dev = dev; 6420 vi->vdev = vdev; 6421 vdev->priv = vi; 6422 6423 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 6424 INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work); 6425 spin_lock_init(&vi->refill_lock); 6426 6427 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { 6428 vi->mergeable_rx_bufs = true; 6429 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; 6430 } 6431 6432 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) 6433 vi->has_rss_hash_report = true; 6434 6435 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { 6436 vi->has_rss = true; 6437 6438 vi->rss_indir_table_size = 6439 virtio_cread16(vdev, offsetof(struct virtio_net_config, 6440 rss_max_indirection_table_length)); 6441 } 6442 6443 if (vi->has_rss || vi->has_rss_hash_report) { 6444 vi->rss_key_size = 6445 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); 6446 6447 vi->rss_hash_types_supported = 6448 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); 6449 vi->rss_hash_types_supported &= 6450 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | 6451 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 6452 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); 6453 6454 dev->hw_features |= NETIF_F_RXHASH; 6455 dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops; 6456 } 6457 6458 if (vi->has_rss_hash_report) 6459 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); 6460 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || 6461 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6462 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 6463 else 6464 vi->hdr_len = sizeof(struct virtio_net_hdr); 6465 6466 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || 6467 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6468 vi->any_header_sg = true; 6469 6470 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6471 vi->has_cvq = true; 6472 6473 mutex_init(&vi->cvq_lock); 6474 6475 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6476 mtu = virtio_cread16(vdev, 6477 offsetof(struct virtio_net_config, 6478 mtu)); 6479 if (mtu < dev->min_mtu) { 6480 /* Should never trigger: MTU was previously validated 6481 * in virtnet_validate. 6482 */ 6483 dev_err(&vdev->dev, 6484 "device MTU appears to have changed it is now %d < %d", 6485 mtu, dev->min_mtu); 6486 err = -EINVAL; 6487 goto free; 6488 } 6489 6490 dev->mtu = mtu; 6491 dev->max_mtu = mtu; 6492 } 6493 6494 virtnet_set_big_packets(vi, mtu); 6495 6496 if (vi->any_header_sg) 6497 dev->needed_headroom = vi->hdr_len; 6498 6499 /* Enable multiqueue by default */ 6500 if (num_online_cpus() >= max_queue_pairs) 6501 vi->curr_queue_pairs = max_queue_pairs; 6502 else 6503 vi->curr_queue_pairs = num_online_cpus(); 6504 vi->max_queue_pairs = max_queue_pairs; 6505 6506 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ 6507 err = init_vqs(vi); 6508 if (err) 6509 goto free; 6510 6511 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 6512 vi->intr_coal_rx.max_usecs = 0; 6513 vi->intr_coal_tx.max_usecs = 0; 6514 vi->intr_coal_rx.max_packets = 0; 6515 6516 /* Keep the default values of the coalescing parameters 6517 * aligned with the default napi_tx state. 6518 */ 6519 if (vi->sq[0].napi.weight) 6520 vi->intr_coal_tx.max_packets = 1; 6521 else 6522 vi->intr_coal_tx.max_packets = 0; 6523 } 6524 6525 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 6526 /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */ 6527 for (i = 0; i < vi->max_queue_pairs; i++) 6528 if (vi->sq[i].napi.weight) 6529 vi->sq[i].intr_coal.max_packets = 1; 6530 6531 err = virtnet_init_irq_moder(vi); 6532 if (err) 6533 goto free; 6534 } 6535 6536 #ifdef CONFIG_SYSFS 6537 if (vi->mergeable_rx_bufs) 6538 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; 6539 #endif 6540 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 6541 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 6542 6543 virtnet_init_settings(dev); 6544 6545 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 6546 vi->failover = net_failover_create(vi->dev); 6547 if (IS_ERR(vi->failover)) { 6548 err = PTR_ERR(vi->failover); 6549 goto free_vqs; 6550 } 6551 } 6552 6553 if (vi->has_rss || vi->has_rss_hash_report) 6554 virtnet_init_default_rss(vi); 6555 6556 enable_rx_mode_work(vi); 6557 6558 /* serialize netdev register + virtio_device_ready() with ndo_open() */ 6559 rtnl_lock(); 6560 6561 err = register_netdevice(dev); 6562 if (err) { 6563 pr_debug("virtio_net: registering device failed\n"); 6564 rtnl_unlock(); 6565 goto free_failover; 6566 } 6567 6568 /* Disable config change notification until ndo_open. */ 6569 virtio_config_driver_disable(vi->vdev); 6570 6571 virtio_device_ready(vdev); 6572 6573 virtnet_set_queues(vi, vi->curr_queue_pairs); 6574 6575 /* a random MAC address has been assigned, notify the device. 6576 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there 6577 * because many devices work fine without getting MAC explicitly 6578 */ 6579 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 6580 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 6581 struct scatterlist sg; 6582 6583 sg_init_one(&sg, dev->dev_addr, dev->addr_len); 6584 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 6585 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 6586 pr_debug("virtio_net: setting MAC address failed\n"); 6587 rtnl_unlock(); 6588 err = -EINVAL; 6589 goto free_unregister_netdev; 6590 } 6591 } 6592 6593 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) { 6594 struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL; 6595 struct scatterlist sg; 6596 __le64 v; 6597 6598 stats_cap = kzalloc(sizeof(*stats_cap), GFP_KERNEL); 6599 if (!stats_cap) { 6600 rtnl_unlock(); 6601 err = -ENOMEM; 6602 goto free_unregister_netdev; 6603 } 6604 6605 sg_init_one(&sg, stats_cap, sizeof(*stats_cap)); 6606 6607 if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 6608 VIRTIO_NET_CTRL_STATS_QUERY, 6609 NULL, &sg)) { 6610 pr_debug("virtio_net: fail to get stats capability\n"); 6611 rtnl_unlock(); 6612 err = -EINVAL; 6613 goto free_unregister_netdev; 6614 } 6615 6616 v = stats_cap->supported_stats_types[0]; 6617 vi->device_stats_cap = le64_to_cpu(v); 6618 } 6619 6620 /* Assume link up if device can't report link status, 6621 otherwise get link status from config. */ 6622 netif_carrier_off(dev); 6623 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 6624 virtnet_config_changed_work(&vi->config_work); 6625 } else { 6626 vi->status = VIRTIO_NET_S_LINK_UP; 6627 virtnet_update_settings(vi); 6628 netif_carrier_on(dev); 6629 } 6630 6631 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) 6632 if (virtio_has_feature(vi->vdev, guest_offloads[i])) 6633 set_bit(guest_offloads[i], &vi->guest_offloads); 6634 vi->guest_offloads_capable = vi->guest_offloads; 6635 6636 rtnl_unlock(); 6637 6638 err = virtnet_cpu_notif_add(vi); 6639 if (err) { 6640 pr_debug("virtio_net: registering cpu notifier failed\n"); 6641 goto free_unregister_netdev; 6642 } 6643 6644 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", 6645 dev->name, max_queue_pairs); 6646 6647 return 0; 6648 6649 free_unregister_netdev: 6650 unregister_netdev(dev); 6651 free_failover: 6652 net_failover_destroy(vi->failover); 6653 free_vqs: 6654 virtio_reset_device(vdev); 6655 cancel_delayed_work_sync(&vi->refill); 6656 free_receive_page_frags(vi); 6657 virtnet_del_vqs(vi); 6658 free: 6659 free_netdev(dev); 6660 return err; 6661 } 6662 6663 static void remove_vq_common(struct virtnet_info *vi) 6664 { 6665 virtio_reset_device(vi->vdev); 6666 6667 /* Free unused buffers in both send and recv, if any. */ 6668 free_unused_bufs(vi); 6669 6670 free_receive_bufs(vi); 6671 6672 free_receive_page_frags(vi); 6673 6674 virtnet_del_vqs(vi); 6675 } 6676 6677 static void virtnet_remove(struct virtio_device *vdev) 6678 { 6679 struct virtnet_info *vi = vdev->priv; 6680 6681 virtnet_cpu_notif_remove(vi); 6682 6683 /* Make sure no work handler is accessing the device. */ 6684 flush_work(&vi->config_work); 6685 disable_rx_mode_work(vi); 6686 flush_work(&vi->rx_mode_work); 6687 6688 virtnet_free_irq_moder(vi); 6689 6690 unregister_netdev(vi->dev); 6691 6692 net_failover_destroy(vi->failover); 6693 6694 remove_vq_common(vi); 6695 6696 free_netdev(vi->dev); 6697 } 6698 6699 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) 6700 { 6701 struct virtnet_info *vi = vdev->priv; 6702 6703 virtnet_cpu_notif_remove(vi); 6704 virtnet_freeze_down(vdev); 6705 remove_vq_common(vi); 6706 6707 return 0; 6708 } 6709 6710 static __maybe_unused int virtnet_restore(struct virtio_device *vdev) 6711 { 6712 struct virtnet_info *vi = vdev->priv; 6713 int err; 6714 6715 err = virtnet_restore_up(vdev); 6716 if (err) 6717 return err; 6718 virtnet_set_queues(vi, vi->curr_queue_pairs); 6719 6720 err = virtnet_cpu_notif_add(vi); 6721 if (err) { 6722 virtnet_freeze_down(vdev); 6723 remove_vq_common(vi); 6724 return err; 6725 } 6726 6727 return 0; 6728 } 6729 6730 static struct virtio_device_id id_table[] = { 6731 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 6732 { 0 }, 6733 }; 6734 6735 #define VIRTNET_FEATURES \ 6736 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ 6737 VIRTIO_NET_F_MAC, \ 6738 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ 6739 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ 6740 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ 6741 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ 6742 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ 6743 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ 6744 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ 6745 VIRTIO_NET_F_CTRL_MAC_ADDR, \ 6746 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ 6747 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ 6748 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ 6749 VIRTIO_NET_F_VQ_NOTF_COAL, \ 6750 VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS 6751 6752 static unsigned int features[] = { 6753 VIRTNET_FEATURES, 6754 }; 6755 6756 static unsigned int features_legacy[] = { 6757 VIRTNET_FEATURES, 6758 VIRTIO_NET_F_GSO, 6759 VIRTIO_F_ANY_LAYOUT, 6760 }; 6761 6762 static struct virtio_driver virtio_net_driver = { 6763 .feature_table = features, 6764 .feature_table_size = ARRAY_SIZE(features), 6765 .feature_table_legacy = features_legacy, 6766 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 6767 .driver.name = KBUILD_MODNAME, 6768 .id_table = id_table, 6769 .validate = virtnet_validate, 6770 .probe = virtnet_probe, 6771 .remove = virtnet_remove, 6772 .config_changed = virtnet_config_changed, 6773 #ifdef CONFIG_PM_SLEEP 6774 .freeze = virtnet_freeze, 6775 .restore = virtnet_restore, 6776 #endif 6777 }; 6778 6779 static __init int virtio_net_driver_init(void) 6780 { 6781 int ret; 6782 6783 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", 6784 virtnet_cpu_online, 6785 virtnet_cpu_down_prep); 6786 if (ret < 0) 6787 goto out; 6788 virtionet_online = ret; 6789 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", 6790 NULL, virtnet_cpu_dead); 6791 if (ret) 6792 goto err_dead; 6793 ret = register_virtio_driver(&virtio_net_driver); 6794 if (ret) 6795 goto err_virtio; 6796 return 0; 6797 err_virtio: 6798 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 6799 err_dead: 6800 cpuhp_remove_multi_state(virtionet_online); 6801 out: 6802 return ret; 6803 } 6804 module_init(virtio_net_driver_init); 6805 6806 static __exit void virtio_net_driver_exit(void) 6807 { 6808 unregister_virtio_driver(&virtio_net_driver); 6809 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 6810 cpuhp_remove_multi_state(virtionet_online); 6811 } 6812 module_exit(virtio_net_driver_exit); 6813 6814 MODULE_DEVICE_TABLE(virtio, id_table); 6815 MODULE_DESCRIPTION("Virtio network driver"); 6816 MODULE_LICENSE("GPL"); 6817