1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* A network driver using virtio. 3 * 4 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 5 */ 6 //#define DEBUG 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/ethtool.h> 10 #include <linux/module.h> 11 #include <linux/virtio.h> 12 #include <linux/virtio_net.h> 13 #include <linux/bpf.h> 14 #include <linux/bpf_trace.h> 15 #include <linux/scatterlist.h> 16 #include <linux/if_vlan.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/average.h> 20 #include <linux/filter.h> 21 #include <linux/kernel.h> 22 #include <linux/dim.h> 23 #include <net/route.h> 24 #include <net/xdp.h> 25 #include <net/net_failover.h> 26 #include <net/netdev_rx_queue.h> 27 #include <net/netdev_queues.h> 28 #include <net/xdp_sock_drv.h> 29 30 static int napi_weight = NAPI_POLL_WEIGHT; 31 module_param(napi_weight, int, 0444); 32 33 static bool csum = true, gso = true, napi_tx = true; 34 module_param(csum, bool, 0444); 35 module_param(gso, bool, 0444); 36 module_param(napi_tx, bool, 0644); 37 38 /* FIXME: MTU in config. */ 39 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 40 #define GOOD_COPY_LEN 128 41 42 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 43 44 /* Separating two types of XDP xmit */ 45 #define VIRTIO_XDP_TX BIT(0) 46 #define VIRTIO_XDP_REDIR BIT(1) 47 48 #define VIRTIO_XDP_FLAG BIT(0) 49 #define VIRTIO_ORPHAN_FLAG BIT(1) 50 51 /* RX packet size EWMA. The average packet size is used to determine the packet 52 * buffer size when refilling RX rings. As the entire RX ring may be refilled 53 * at once, the weight is chosen so that the EWMA will be insensitive to short- 54 * term, transient changes in packet size. 55 */ 56 DECLARE_EWMA(pkt_len, 0, 64) 57 58 #define VIRTNET_DRIVER_VERSION "1.0.0" 59 60 static const unsigned long guest_offloads[] = { 61 VIRTIO_NET_F_GUEST_TSO4, 62 VIRTIO_NET_F_GUEST_TSO6, 63 VIRTIO_NET_F_GUEST_ECN, 64 VIRTIO_NET_F_GUEST_UFO, 65 VIRTIO_NET_F_GUEST_CSUM, 66 VIRTIO_NET_F_GUEST_USO4, 67 VIRTIO_NET_F_GUEST_USO6, 68 VIRTIO_NET_F_GUEST_HDRLEN 69 }; 70 71 #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 72 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 73 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 74 (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ 75 (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ 76 (1ULL << VIRTIO_NET_F_GUEST_USO6)) 77 78 struct virtnet_stat_desc { 79 char desc[ETH_GSTRING_LEN]; 80 size_t offset; 81 size_t qstat_offset; 82 }; 83 84 struct virtnet_sq_free_stats { 85 u64 packets; 86 u64 bytes; 87 u64 napi_packets; 88 u64 napi_bytes; 89 }; 90 91 struct virtnet_sq_stats { 92 struct u64_stats_sync syncp; 93 u64_stats_t packets; 94 u64_stats_t bytes; 95 u64_stats_t xdp_tx; 96 u64_stats_t xdp_tx_drops; 97 u64_stats_t kicks; 98 u64_stats_t tx_timeouts; 99 u64_stats_t stop; 100 u64_stats_t wake; 101 }; 102 103 struct virtnet_rq_stats { 104 struct u64_stats_sync syncp; 105 u64_stats_t packets; 106 u64_stats_t bytes; 107 u64_stats_t drops; 108 u64_stats_t xdp_packets; 109 u64_stats_t xdp_tx; 110 u64_stats_t xdp_redirects; 111 u64_stats_t xdp_drops; 112 u64_stats_t kicks; 113 }; 114 115 #define VIRTNET_SQ_STAT(name, m) {name, offsetof(struct virtnet_sq_stats, m), -1} 116 #define VIRTNET_RQ_STAT(name, m) {name, offsetof(struct virtnet_rq_stats, m), -1} 117 118 #define VIRTNET_SQ_STAT_QSTAT(name, m) \ 119 { \ 120 name, \ 121 offsetof(struct virtnet_sq_stats, m), \ 122 offsetof(struct netdev_queue_stats_tx, m), \ 123 } 124 125 #define VIRTNET_RQ_STAT_QSTAT(name, m) \ 126 { \ 127 name, \ 128 offsetof(struct virtnet_rq_stats, m), \ 129 offsetof(struct netdev_queue_stats_rx, m), \ 130 } 131 132 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { 133 VIRTNET_SQ_STAT("xdp_tx", xdp_tx), 134 VIRTNET_SQ_STAT("xdp_tx_drops", xdp_tx_drops), 135 VIRTNET_SQ_STAT("kicks", kicks), 136 VIRTNET_SQ_STAT("tx_timeouts", tx_timeouts), 137 }; 138 139 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { 140 VIRTNET_RQ_STAT("drops", drops), 141 VIRTNET_RQ_STAT("xdp_packets", xdp_packets), 142 VIRTNET_RQ_STAT("xdp_tx", xdp_tx), 143 VIRTNET_RQ_STAT("xdp_redirects", xdp_redirects), 144 VIRTNET_RQ_STAT("xdp_drops", xdp_drops), 145 VIRTNET_RQ_STAT("kicks", kicks), 146 }; 147 148 static const struct virtnet_stat_desc virtnet_sq_stats_desc_qstat[] = { 149 VIRTNET_SQ_STAT_QSTAT("packets", packets), 150 VIRTNET_SQ_STAT_QSTAT("bytes", bytes), 151 VIRTNET_SQ_STAT_QSTAT("stop", stop), 152 VIRTNET_SQ_STAT_QSTAT("wake", wake), 153 }; 154 155 static const struct virtnet_stat_desc virtnet_rq_stats_desc_qstat[] = { 156 VIRTNET_RQ_STAT_QSTAT("packets", packets), 157 VIRTNET_RQ_STAT_QSTAT("bytes", bytes), 158 }; 159 160 #define VIRTNET_STATS_DESC_CQ(name) \ 161 {#name, offsetof(struct virtio_net_stats_cvq, name), -1} 162 163 #define VIRTNET_STATS_DESC_RX(class, name) \ 164 {#name, offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), -1} 165 166 #define VIRTNET_STATS_DESC_TX(class, name) \ 167 {#name, offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), -1} 168 169 170 static const struct virtnet_stat_desc virtnet_stats_cvq_desc[] = { 171 VIRTNET_STATS_DESC_CQ(command_num), 172 VIRTNET_STATS_DESC_CQ(ok_num), 173 }; 174 175 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc[] = { 176 VIRTNET_STATS_DESC_RX(basic, packets), 177 VIRTNET_STATS_DESC_RX(basic, bytes), 178 179 VIRTNET_STATS_DESC_RX(basic, notifications), 180 VIRTNET_STATS_DESC_RX(basic, interrupts), 181 }; 182 183 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc[] = { 184 VIRTNET_STATS_DESC_TX(basic, packets), 185 VIRTNET_STATS_DESC_TX(basic, bytes), 186 187 VIRTNET_STATS_DESC_TX(basic, notifications), 188 VIRTNET_STATS_DESC_TX(basic, interrupts), 189 }; 190 191 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc[] = { 192 VIRTNET_STATS_DESC_RX(csum, needs_csum), 193 }; 194 195 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc[] = { 196 VIRTNET_STATS_DESC_TX(gso, gso_packets_noseg), 197 VIRTNET_STATS_DESC_TX(gso, gso_bytes_noseg), 198 }; 199 200 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc[] = { 201 VIRTNET_STATS_DESC_RX(speed, ratelimit_bytes), 202 }; 203 204 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc[] = { 205 VIRTNET_STATS_DESC_TX(speed, ratelimit_bytes), 206 }; 207 208 #define VIRTNET_STATS_DESC_RX_QSTAT(class, name, qstat_field) \ 209 { \ 210 #name, \ 211 offsetof(struct virtio_net_stats_rx_ ## class, rx_ ## name), \ 212 offsetof(struct netdev_queue_stats_rx, qstat_field), \ 213 } 214 215 #define VIRTNET_STATS_DESC_TX_QSTAT(class, name, qstat_field) \ 216 { \ 217 #name, \ 218 offsetof(struct virtio_net_stats_tx_ ## class, tx_ ## name), \ 219 offsetof(struct netdev_queue_stats_tx, qstat_field), \ 220 } 221 222 static const struct virtnet_stat_desc virtnet_stats_rx_basic_desc_qstat[] = { 223 VIRTNET_STATS_DESC_RX_QSTAT(basic, drops, hw_drops), 224 VIRTNET_STATS_DESC_RX_QSTAT(basic, drop_overruns, hw_drop_overruns), 225 }; 226 227 static const struct virtnet_stat_desc virtnet_stats_tx_basic_desc_qstat[] = { 228 VIRTNET_STATS_DESC_TX_QSTAT(basic, drops, hw_drops), 229 VIRTNET_STATS_DESC_TX_QSTAT(basic, drop_malformed, hw_drop_errors), 230 }; 231 232 static const struct virtnet_stat_desc virtnet_stats_rx_csum_desc_qstat[] = { 233 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_valid, csum_unnecessary), 234 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_none, csum_none), 235 VIRTNET_STATS_DESC_RX_QSTAT(csum, csum_bad, csum_bad), 236 }; 237 238 static const struct virtnet_stat_desc virtnet_stats_tx_csum_desc_qstat[] = { 239 VIRTNET_STATS_DESC_TX_QSTAT(csum, csum_none, csum_none), 240 VIRTNET_STATS_DESC_TX_QSTAT(csum, needs_csum, needs_csum), 241 }; 242 243 static const struct virtnet_stat_desc virtnet_stats_rx_gso_desc_qstat[] = { 244 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets, hw_gro_packets), 245 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes, hw_gro_bytes), 246 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_packets_coalesced, hw_gro_wire_packets), 247 VIRTNET_STATS_DESC_RX_QSTAT(gso, gso_bytes_coalesced, hw_gro_wire_bytes), 248 }; 249 250 static const struct virtnet_stat_desc virtnet_stats_tx_gso_desc_qstat[] = { 251 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_packets, hw_gso_packets), 252 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_bytes, hw_gso_bytes), 253 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments, hw_gso_wire_packets), 254 VIRTNET_STATS_DESC_TX_QSTAT(gso, gso_segments_bytes, hw_gso_wire_bytes), 255 }; 256 257 static const struct virtnet_stat_desc virtnet_stats_rx_speed_desc_qstat[] = { 258 VIRTNET_STATS_DESC_RX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 259 }; 260 261 static const struct virtnet_stat_desc virtnet_stats_tx_speed_desc_qstat[] = { 262 VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, hw_drop_ratelimits), 263 }; 264 265 #define VIRTNET_Q_TYPE_RX 0 266 #define VIRTNET_Q_TYPE_TX 1 267 #define VIRTNET_Q_TYPE_CQ 2 268 269 struct virtnet_interrupt_coalesce { 270 u32 max_packets; 271 u32 max_usecs; 272 }; 273 274 /* The dma information of pages allocated at a time. */ 275 struct virtnet_rq_dma { 276 dma_addr_t addr; 277 u32 ref; 278 u16 len; 279 u16 need_sync; 280 }; 281 282 /* Internal representation of a send virtqueue */ 283 struct send_queue { 284 /* Virtqueue associated with this send _queue */ 285 struct virtqueue *vq; 286 287 /* TX: fragments + linear part + virtio header */ 288 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 289 290 /* Name of the send queue: output.$index */ 291 char name[16]; 292 293 struct virtnet_sq_stats stats; 294 295 struct virtnet_interrupt_coalesce intr_coal; 296 297 struct napi_struct napi; 298 299 /* Record whether sq is in reset state. */ 300 bool reset; 301 }; 302 303 /* Internal representation of a receive virtqueue */ 304 struct receive_queue { 305 /* Virtqueue associated with this receive_queue */ 306 struct virtqueue *vq; 307 308 struct napi_struct napi; 309 310 struct bpf_prog __rcu *xdp_prog; 311 312 struct virtnet_rq_stats stats; 313 314 /* The number of rx notifications */ 315 u16 calls; 316 317 /* Is dynamic interrupt moderation enabled? */ 318 bool dim_enabled; 319 320 /* Used to protect dim_enabled and inter_coal */ 321 struct mutex dim_lock; 322 323 /* Dynamic Interrupt Moderation */ 324 struct dim dim; 325 326 u32 packets_in_napi; 327 328 struct virtnet_interrupt_coalesce intr_coal; 329 330 /* Chain pages by the private ptr. */ 331 struct page *pages; 332 333 /* Average packet length for mergeable receive buffers. */ 334 struct ewma_pkt_len mrg_avg_pkt_len; 335 336 /* Page frag for packet buffer allocation. */ 337 struct page_frag alloc_frag; 338 339 /* RX: fragments + linear part + virtio header */ 340 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 341 342 /* Min single buffer size for mergeable buffers case. */ 343 unsigned int min_buf_len; 344 345 /* Name of this receive queue: input.$index */ 346 char name[16]; 347 348 struct xdp_rxq_info xdp_rxq; 349 350 /* Record the last dma info to free after new pages is allocated. */ 351 struct virtnet_rq_dma *last_dma; 352 353 struct xsk_buff_pool *xsk_pool; 354 355 /* xdp rxq used by xsk */ 356 struct xdp_rxq_info xsk_rxq_info; 357 358 struct xdp_buff **xsk_buffs; 359 360 /* Do dma by self */ 361 bool do_dma; 362 }; 363 364 /* This structure can contain rss message with maximum settings for indirection table and keysize 365 * Note, that default structure that describes RSS configuration virtio_net_rss_config 366 * contains same info but can't handle table values. 367 * In any case, structure would be passed to virtio hw through sg_buf split by parts 368 * because table sizes may be differ according to the device configuration. 369 */ 370 #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 371 struct virtio_net_ctrl_rss { 372 u32 hash_types; 373 u16 indirection_table_mask; 374 u16 unclassified_queue; 375 u16 hash_cfg_reserved; /* for HASH_CONFIG (see virtio_net_hash_config for details) */ 376 u16 max_tx_vq; 377 u8 hash_key_length; 378 u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; 379 380 u16 *indirection_table; 381 }; 382 383 /* Control VQ buffers: protected by the rtnl lock */ 384 struct control_buf { 385 struct virtio_net_ctrl_hdr hdr; 386 virtio_net_ctrl_ack status; 387 }; 388 389 struct virtnet_info { 390 struct virtio_device *vdev; 391 struct virtqueue *cvq; 392 struct net_device *dev; 393 struct send_queue *sq; 394 struct receive_queue *rq; 395 unsigned int status; 396 397 /* Max # of queue pairs supported by the device */ 398 u16 max_queue_pairs; 399 400 /* # of queue pairs currently used by the driver */ 401 u16 curr_queue_pairs; 402 403 /* # of XDP queue pairs currently used by the driver */ 404 u16 xdp_queue_pairs; 405 406 /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ 407 bool xdp_enabled; 408 409 /* I like... big packets and I cannot lie! */ 410 bool big_packets; 411 412 /* number of sg entries allocated for big packets */ 413 unsigned int big_packets_num_skbfrags; 414 415 /* Host will merge rx buffers for big packets (shake it! shake it!) */ 416 bool mergeable_rx_bufs; 417 418 /* Host supports rss and/or hash report */ 419 bool has_rss; 420 bool has_rss_hash_report; 421 u8 rss_key_size; 422 u16 rss_indir_table_size; 423 u32 rss_hash_types_supported; 424 u32 rss_hash_types_saved; 425 struct virtio_net_ctrl_rss rss; 426 427 /* Has control virtqueue */ 428 bool has_cvq; 429 430 /* Lock to protect the control VQ */ 431 struct mutex cvq_lock; 432 433 /* Host can handle any s/g split between our header and packet data */ 434 bool any_header_sg; 435 436 /* Packet virtio header size */ 437 u8 hdr_len; 438 439 /* Work struct for delayed refilling if we run low on memory. */ 440 struct delayed_work refill; 441 442 /* Is delayed refill enabled? */ 443 bool refill_enabled; 444 445 /* The lock to synchronize the access to refill_enabled */ 446 spinlock_t refill_lock; 447 448 /* Work struct for config space updates */ 449 struct work_struct config_work; 450 451 /* Work struct for setting rx mode */ 452 struct work_struct rx_mode_work; 453 454 /* OK to queue work setting RX mode? */ 455 bool rx_mode_work_enabled; 456 457 /* Does the affinity hint is set for virtqueues? */ 458 bool affinity_hint_set; 459 460 /* CPU hotplug instances for online & dead */ 461 struct hlist_node node; 462 struct hlist_node node_dead; 463 464 struct control_buf *ctrl; 465 466 /* Ethtool settings */ 467 u8 duplex; 468 u32 speed; 469 470 /* Is rx dynamic interrupt moderation enabled? */ 471 bool rx_dim_enabled; 472 473 /* Interrupt coalescing settings */ 474 struct virtnet_interrupt_coalesce intr_coal_tx; 475 struct virtnet_interrupt_coalesce intr_coal_rx; 476 477 unsigned long guest_offloads; 478 unsigned long guest_offloads_capable; 479 480 /* failover when STANDBY feature enabled */ 481 struct failover *failover; 482 483 u64 device_stats_cap; 484 }; 485 486 struct padded_vnet_hdr { 487 struct virtio_net_hdr_v1_hash hdr; 488 /* 489 * hdr is in a separate sg buffer, and data sg buffer shares same page 490 * with this header sg. This padding makes next sg 16 byte aligned 491 * after the header. 492 */ 493 char padding[12]; 494 }; 495 496 struct virtio_net_common_hdr { 497 union { 498 struct virtio_net_hdr hdr; 499 struct virtio_net_hdr_mrg_rxbuf mrg_hdr; 500 struct virtio_net_hdr_v1_hash hash_v1_hdr; 501 }; 502 }; 503 504 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf); 505 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 506 struct net_device *dev, 507 unsigned int *xdp_xmit, 508 struct virtnet_rq_stats *stats); 509 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 510 struct sk_buff *skb, u8 flags); 511 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 512 struct sk_buff *curr_skb, 513 struct page *page, void *buf, 514 int len, int truesize); 515 516 static int rss_indirection_table_alloc(struct virtio_net_ctrl_rss *rss, u16 indir_table_size) 517 { 518 if (!indir_table_size) { 519 rss->indirection_table = NULL; 520 return 0; 521 } 522 523 rss->indirection_table = kmalloc_array(indir_table_size, sizeof(u16), GFP_KERNEL); 524 if (!rss->indirection_table) 525 return -ENOMEM; 526 527 return 0; 528 } 529 530 static void rss_indirection_table_free(struct virtio_net_ctrl_rss *rss) 531 { 532 kfree(rss->indirection_table); 533 } 534 535 static bool is_xdp_frame(void *ptr) 536 { 537 return (unsigned long)ptr & VIRTIO_XDP_FLAG; 538 } 539 540 static void *xdp_to_ptr(struct xdp_frame *ptr) 541 { 542 return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); 543 } 544 545 static struct xdp_frame *ptr_to_xdp(void *ptr) 546 { 547 return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); 548 } 549 550 static bool is_orphan_skb(void *ptr) 551 { 552 return (unsigned long)ptr & VIRTIO_ORPHAN_FLAG; 553 } 554 555 static void *skb_to_ptr(struct sk_buff *skb, bool orphan) 556 { 557 return (void *)((unsigned long)skb | (orphan ? VIRTIO_ORPHAN_FLAG : 0)); 558 } 559 560 static struct sk_buff *ptr_to_skb(void *ptr) 561 { 562 return (struct sk_buff *)((unsigned long)ptr & ~VIRTIO_ORPHAN_FLAG); 563 } 564 565 static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 566 bool in_napi, struct virtnet_sq_free_stats *stats) 567 { 568 unsigned int len; 569 void *ptr; 570 571 while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { 572 if (!is_xdp_frame(ptr)) { 573 struct sk_buff *skb = ptr_to_skb(ptr); 574 575 pr_debug("Sent skb %p\n", skb); 576 577 if (is_orphan_skb(ptr)) { 578 stats->packets++; 579 stats->bytes += skb->len; 580 } else { 581 stats->napi_packets++; 582 stats->napi_bytes += skb->len; 583 } 584 napi_consume_skb(skb, in_napi); 585 } else { 586 struct xdp_frame *frame = ptr_to_xdp(ptr); 587 588 stats->packets++; 589 stats->bytes += xdp_get_frame_len(frame); 590 xdp_return_frame(frame); 591 } 592 } 593 netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes); 594 } 595 596 /* Converting between virtqueue no. and kernel tx/rx queue no. 597 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq 598 */ 599 static int vq2txq(struct virtqueue *vq) 600 { 601 return (vq->index - 1) / 2; 602 } 603 604 static int txq2vq(int txq) 605 { 606 return txq * 2 + 1; 607 } 608 609 static int vq2rxq(struct virtqueue *vq) 610 { 611 return vq->index / 2; 612 } 613 614 static int rxq2vq(int rxq) 615 { 616 return rxq * 2; 617 } 618 619 static int vq_type(struct virtnet_info *vi, int qid) 620 { 621 if (qid == vi->max_queue_pairs * 2) 622 return VIRTNET_Q_TYPE_CQ; 623 624 if (qid % 2) 625 return VIRTNET_Q_TYPE_TX; 626 627 return VIRTNET_Q_TYPE_RX; 628 } 629 630 static inline struct virtio_net_common_hdr * 631 skb_vnet_common_hdr(struct sk_buff *skb) 632 { 633 return (struct virtio_net_common_hdr *)skb->cb; 634 } 635 636 /* 637 * private is used to chain pages for big packets, put the whole 638 * most recent used list in the beginning for reuse 639 */ 640 static void give_pages(struct receive_queue *rq, struct page *page) 641 { 642 struct page *end; 643 644 /* Find end of list, sew whole thing into vi->rq.pages. */ 645 for (end = page; end->private; end = (struct page *)end->private); 646 end->private = (unsigned long)rq->pages; 647 rq->pages = page; 648 } 649 650 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) 651 { 652 struct page *p = rq->pages; 653 654 if (p) { 655 rq->pages = (struct page *)p->private; 656 /* clear private here, it is used to chain pages */ 657 p->private = 0; 658 } else 659 p = alloc_page(gfp_mask); 660 return p; 661 } 662 663 static void virtnet_rq_free_buf(struct virtnet_info *vi, 664 struct receive_queue *rq, void *buf) 665 { 666 if (vi->mergeable_rx_bufs) 667 put_page(virt_to_head_page(buf)); 668 else if (vi->big_packets) 669 give_pages(rq, buf); 670 else 671 put_page(virt_to_head_page(buf)); 672 } 673 674 static void enable_delayed_refill(struct virtnet_info *vi) 675 { 676 spin_lock_bh(&vi->refill_lock); 677 vi->refill_enabled = true; 678 spin_unlock_bh(&vi->refill_lock); 679 } 680 681 static void disable_delayed_refill(struct virtnet_info *vi) 682 { 683 spin_lock_bh(&vi->refill_lock); 684 vi->refill_enabled = false; 685 spin_unlock_bh(&vi->refill_lock); 686 } 687 688 static void enable_rx_mode_work(struct virtnet_info *vi) 689 { 690 rtnl_lock(); 691 vi->rx_mode_work_enabled = true; 692 rtnl_unlock(); 693 } 694 695 static void disable_rx_mode_work(struct virtnet_info *vi) 696 { 697 rtnl_lock(); 698 vi->rx_mode_work_enabled = false; 699 rtnl_unlock(); 700 } 701 702 static void virtqueue_napi_schedule(struct napi_struct *napi, 703 struct virtqueue *vq) 704 { 705 if (napi_schedule_prep(napi)) { 706 virtqueue_disable_cb(vq); 707 __napi_schedule(napi); 708 } 709 } 710 711 static bool virtqueue_napi_complete(struct napi_struct *napi, 712 struct virtqueue *vq, int processed) 713 { 714 int opaque; 715 716 opaque = virtqueue_enable_cb_prepare(vq); 717 if (napi_complete_done(napi, processed)) { 718 if (unlikely(virtqueue_poll(vq, opaque))) 719 virtqueue_napi_schedule(napi, vq); 720 else 721 return true; 722 } else { 723 virtqueue_disable_cb(vq); 724 } 725 726 return false; 727 } 728 729 static void skb_xmit_done(struct virtqueue *vq) 730 { 731 struct virtnet_info *vi = vq->vdev->priv; 732 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi; 733 734 /* Suppress further interrupts. */ 735 virtqueue_disable_cb(vq); 736 737 if (napi->weight) 738 virtqueue_napi_schedule(napi, vq); 739 else 740 /* We were probably waiting for more output buffers. */ 741 netif_wake_subqueue(vi->dev, vq2txq(vq)); 742 } 743 744 #define MRG_CTX_HEADER_SHIFT 22 745 static void *mergeable_len_to_ctx(unsigned int truesize, 746 unsigned int headroom) 747 { 748 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize); 749 } 750 751 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx) 752 { 753 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT; 754 } 755 756 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) 757 { 758 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); 759 } 760 761 static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, 762 unsigned int headroom, 763 unsigned int len) 764 { 765 struct sk_buff *skb; 766 767 skb = build_skb(buf, buflen); 768 if (unlikely(!skb)) 769 return NULL; 770 771 skb_reserve(skb, headroom); 772 skb_put(skb, len); 773 774 return skb; 775 } 776 777 /* Called from bottom half context */ 778 static struct sk_buff *page_to_skb(struct virtnet_info *vi, 779 struct receive_queue *rq, 780 struct page *page, unsigned int offset, 781 unsigned int len, unsigned int truesize, 782 unsigned int headroom) 783 { 784 struct sk_buff *skb; 785 struct virtio_net_common_hdr *hdr; 786 unsigned int copy, hdr_len, hdr_padded_len; 787 struct page *page_to_free = NULL; 788 int tailroom, shinfo_size; 789 char *p, *hdr_p, *buf; 790 791 p = page_address(page) + offset; 792 hdr_p = p; 793 794 hdr_len = vi->hdr_len; 795 if (vi->mergeable_rx_bufs) 796 hdr_padded_len = hdr_len; 797 else 798 hdr_padded_len = sizeof(struct padded_vnet_hdr); 799 800 buf = p - headroom; 801 len -= hdr_len; 802 offset += hdr_padded_len; 803 p += hdr_padded_len; 804 tailroom = truesize - headroom - hdr_padded_len - len; 805 806 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 807 808 if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { 809 skb = virtnet_build_skb(buf, truesize, p - buf, len); 810 if (unlikely(!skb)) 811 return NULL; 812 813 page = (struct page *)page->private; 814 if (page) 815 give_pages(rq, page); 816 goto ok; 817 } 818 819 /* copy small packet so we can reuse these pages for small data */ 820 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); 821 if (unlikely(!skb)) 822 return NULL; 823 824 /* Copy all frame if it fits skb->head, otherwise 825 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. 826 */ 827 if (len <= skb_tailroom(skb)) 828 copy = len; 829 else 830 copy = ETH_HLEN; 831 skb_put_data(skb, p, copy); 832 833 len -= copy; 834 offset += copy; 835 836 if (vi->mergeable_rx_bufs) { 837 if (len) 838 skb_add_rx_frag(skb, 0, page, offset, len, truesize); 839 else 840 page_to_free = page; 841 goto ok; 842 } 843 844 /* 845 * Verify that we can indeed put this data into a skb. 846 * This is here to handle cases when the device erroneously 847 * tries to receive more than is possible. This is usually 848 * the case of a broken device. 849 */ 850 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { 851 net_dbg_ratelimited("%s: too much data\n", skb->dev->name); 852 dev_kfree_skb(skb); 853 return NULL; 854 } 855 BUG_ON(offset >= PAGE_SIZE); 856 while (len) { 857 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); 858 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, 859 frag_size, truesize); 860 len -= frag_size; 861 page = (struct page *)page->private; 862 offset = 0; 863 } 864 865 if (page) 866 give_pages(rq, page); 867 868 ok: 869 hdr = skb_vnet_common_hdr(skb); 870 memcpy(hdr, hdr_p, hdr_len); 871 if (page_to_free) 872 put_page(page_to_free); 873 874 return skb; 875 } 876 877 static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) 878 { 879 struct page *page = virt_to_head_page(buf); 880 struct virtnet_rq_dma *dma; 881 void *head; 882 int offset; 883 884 head = page_address(page); 885 886 dma = head; 887 888 --dma->ref; 889 890 if (dma->need_sync && len) { 891 offset = buf - (head + sizeof(*dma)); 892 893 virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, 894 offset, len, 895 DMA_FROM_DEVICE); 896 } 897 898 if (dma->ref) 899 return; 900 901 virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, 902 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 903 put_page(page); 904 } 905 906 static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) 907 { 908 void *buf; 909 910 buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); 911 if (buf && rq->do_dma) 912 virtnet_rq_unmap(rq, buf, *len); 913 914 return buf; 915 } 916 917 static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) 918 { 919 struct virtnet_rq_dma *dma; 920 dma_addr_t addr; 921 u32 offset; 922 void *head; 923 924 if (!rq->do_dma) { 925 sg_init_one(rq->sg, buf, len); 926 return; 927 } 928 929 head = page_address(rq->alloc_frag.page); 930 931 offset = buf - head; 932 933 dma = head; 934 935 addr = dma->addr - sizeof(*dma) + offset; 936 937 sg_init_table(rq->sg, 1); 938 rq->sg[0].dma_address = addr; 939 rq->sg[0].length = len; 940 } 941 942 static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) 943 { 944 struct page_frag *alloc_frag = &rq->alloc_frag; 945 struct virtnet_rq_dma *dma; 946 void *buf, *head; 947 dma_addr_t addr; 948 949 if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp))) 950 return NULL; 951 952 head = page_address(alloc_frag->page); 953 954 if (rq->do_dma) { 955 dma = head; 956 957 /* new pages */ 958 if (!alloc_frag->offset) { 959 if (rq->last_dma) { 960 /* Now, the new page is allocated, the last dma 961 * will not be used. So the dma can be unmapped 962 * if the ref is 0. 963 */ 964 virtnet_rq_unmap(rq, rq->last_dma, 0); 965 rq->last_dma = NULL; 966 } 967 968 dma->len = alloc_frag->size - sizeof(*dma); 969 970 addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, 971 dma->len, DMA_FROM_DEVICE, 0); 972 if (virtqueue_dma_mapping_error(rq->vq, addr)) 973 return NULL; 974 975 dma->addr = addr; 976 dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); 977 978 /* Add a reference to dma to prevent the entire dma from 979 * being released during error handling. This reference 980 * will be freed after the pages are no longer used. 981 */ 982 get_page(alloc_frag->page); 983 dma->ref = 1; 984 alloc_frag->offset = sizeof(*dma); 985 986 rq->last_dma = dma; 987 } 988 989 ++dma->ref; 990 } 991 992 buf = head + alloc_frag->offset; 993 994 get_page(alloc_frag->page); 995 alloc_frag->offset += size; 996 997 return buf; 998 } 999 1000 static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) 1001 { 1002 struct virtnet_info *vi = vq->vdev->priv; 1003 struct receive_queue *rq; 1004 int i = vq2rxq(vq); 1005 1006 rq = &vi->rq[i]; 1007 1008 if (rq->xsk_pool) { 1009 xsk_buff_free((struct xdp_buff *)buf); 1010 return; 1011 } 1012 1013 if (rq->do_dma) 1014 virtnet_rq_unmap(rq, buf, 0); 1015 1016 virtnet_rq_free_buf(vi, rq, buf); 1017 } 1018 1019 static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, 1020 bool in_napi) 1021 { 1022 struct virtnet_sq_free_stats stats = {0}; 1023 1024 __free_old_xmit(sq, txq, in_napi, &stats); 1025 1026 /* Avoid overhead when no packets have been processed 1027 * happens when called speculatively from start_xmit. 1028 */ 1029 if (!stats.packets && !stats.napi_packets) 1030 return; 1031 1032 u64_stats_update_begin(&sq->stats.syncp); 1033 u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes); 1034 u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets); 1035 u64_stats_update_end(&sq->stats.syncp); 1036 } 1037 1038 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) 1039 { 1040 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) 1041 return false; 1042 else if (q < vi->curr_queue_pairs) 1043 return true; 1044 else 1045 return false; 1046 } 1047 1048 static void check_sq_full_and_disable(struct virtnet_info *vi, 1049 struct net_device *dev, 1050 struct send_queue *sq) 1051 { 1052 bool use_napi = sq->napi.weight; 1053 int qnum; 1054 1055 qnum = sq - vi->sq; 1056 1057 /* If running out of space, stop queue to avoid getting packets that we 1058 * are then unable to transmit. 1059 * An alternative would be to force queuing layer to requeue the skb by 1060 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be 1061 * returned in a normal path of operation: it means that driver is not 1062 * maintaining the TX queue stop/start state properly, and causes 1063 * the stack to do a non-trivial amount of useless work. 1064 * Since most packets only take 1 or 2 ring slots, stopping the queue 1065 * early means 16 slots are typically wasted. 1066 */ 1067 if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { 1068 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 1069 1070 netif_tx_stop_queue(txq); 1071 u64_stats_update_begin(&sq->stats.syncp); 1072 u64_stats_inc(&sq->stats.stop); 1073 u64_stats_update_end(&sq->stats.syncp); 1074 if (use_napi) { 1075 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) 1076 virtqueue_napi_schedule(&sq->napi, sq->vq); 1077 } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { 1078 /* More just got used, free them then recheck. */ 1079 free_old_xmit(sq, txq, false); 1080 if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { 1081 netif_start_subqueue(dev, qnum); 1082 u64_stats_update_begin(&sq->stats.syncp); 1083 u64_stats_inc(&sq->stats.wake); 1084 u64_stats_update_end(&sq->stats.syncp); 1085 virtqueue_disable_cb(sq->vq); 1086 } 1087 } 1088 } 1089 } 1090 1091 static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len) 1092 { 1093 sg->dma_address = addr; 1094 sg->length = len; 1095 } 1096 1097 static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, 1098 struct receive_queue *rq, void *buf, u32 len) 1099 { 1100 struct xdp_buff *xdp; 1101 u32 bufsize; 1102 1103 xdp = (struct xdp_buff *)buf; 1104 1105 bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len; 1106 1107 if (unlikely(len > bufsize)) { 1108 pr_debug("%s: rx error: len %u exceeds truesize %u\n", 1109 vi->dev->name, len, bufsize); 1110 DEV_STATS_INC(vi->dev, rx_length_errors); 1111 xsk_buff_free(xdp); 1112 return NULL; 1113 } 1114 1115 xsk_buff_set_size(xdp, len); 1116 xsk_buff_dma_sync_for_cpu(xdp); 1117 1118 return xdp; 1119 } 1120 1121 static struct sk_buff *xsk_construct_skb(struct receive_queue *rq, 1122 struct xdp_buff *xdp) 1123 { 1124 unsigned int metasize = xdp->data - xdp->data_meta; 1125 struct sk_buff *skb; 1126 unsigned int size; 1127 1128 size = xdp->data_end - xdp->data_hard_start; 1129 skb = napi_alloc_skb(&rq->napi, size); 1130 if (unlikely(!skb)) { 1131 xsk_buff_free(xdp); 1132 return NULL; 1133 } 1134 1135 skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); 1136 1137 size = xdp->data_end - xdp->data_meta; 1138 memcpy(__skb_put(skb, size), xdp->data_meta, size); 1139 1140 if (metasize) { 1141 __skb_pull(skb, metasize); 1142 skb_metadata_set(skb, metasize); 1143 } 1144 1145 xsk_buff_free(xdp); 1146 1147 return skb; 1148 } 1149 1150 static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi, 1151 struct receive_queue *rq, struct xdp_buff *xdp, 1152 unsigned int *xdp_xmit, 1153 struct virtnet_rq_stats *stats) 1154 { 1155 struct bpf_prog *prog; 1156 u32 ret; 1157 1158 ret = XDP_PASS; 1159 rcu_read_lock(); 1160 prog = rcu_dereference(rq->xdp_prog); 1161 if (prog) 1162 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1163 rcu_read_unlock(); 1164 1165 switch (ret) { 1166 case XDP_PASS: 1167 return xsk_construct_skb(rq, xdp); 1168 1169 case XDP_TX: 1170 case XDP_REDIRECT: 1171 return NULL; 1172 1173 default: 1174 /* drop packet */ 1175 xsk_buff_free(xdp); 1176 u64_stats_inc(&stats->drops); 1177 return NULL; 1178 } 1179 } 1180 1181 static void xsk_drop_follow_bufs(struct net_device *dev, 1182 struct receive_queue *rq, 1183 u32 num_buf, 1184 struct virtnet_rq_stats *stats) 1185 { 1186 struct xdp_buff *xdp; 1187 u32 len; 1188 1189 while (num_buf-- > 1) { 1190 xdp = virtqueue_get_buf(rq->vq, &len); 1191 if (unlikely(!xdp)) { 1192 pr_debug("%s: rx error: %d buffers missing\n", 1193 dev->name, num_buf); 1194 DEV_STATS_INC(dev, rx_length_errors); 1195 break; 1196 } 1197 u64_stats_add(&stats->bytes, len); 1198 xsk_buff_free(xdp); 1199 } 1200 } 1201 1202 static int xsk_append_merge_buffer(struct virtnet_info *vi, 1203 struct receive_queue *rq, 1204 struct sk_buff *head_skb, 1205 u32 num_buf, 1206 struct virtio_net_hdr_mrg_rxbuf *hdr, 1207 struct virtnet_rq_stats *stats) 1208 { 1209 struct sk_buff *curr_skb; 1210 struct xdp_buff *xdp; 1211 u32 len, truesize; 1212 struct page *page; 1213 void *buf; 1214 1215 curr_skb = head_skb; 1216 1217 while (--num_buf) { 1218 buf = virtqueue_get_buf(rq->vq, &len); 1219 if (unlikely(!buf)) { 1220 pr_debug("%s: rx error: %d buffers out of %d missing\n", 1221 vi->dev->name, num_buf, 1222 virtio16_to_cpu(vi->vdev, 1223 hdr->num_buffers)); 1224 DEV_STATS_INC(vi->dev, rx_length_errors); 1225 return -EINVAL; 1226 } 1227 1228 u64_stats_add(&stats->bytes, len); 1229 1230 xdp = buf_to_xdp(vi, rq, buf, len); 1231 if (!xdp) 1232 goto err; 1233 1234 buf = napi_alloc_frag(len); 1235 if (!buf) { 1236 xsk_buff_free(xdp); 1237 goto err; 1238 } 1239 1240 memcpy(buf, xdp->data - vi->hdr_len, len); 1241 1242 xsk_buff_free(xdp); 1243 1244 page = virt_to_page(buf); 1245 1246 truesize = len; 1247 1248 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 1249 buf, len, truesize); 1250 if (!curr_skb) { 1251 put_page(page); 1252 goto err; 1253 } 1254 } 1255 1256 return 0; 1257 1258 err: 1259 xsk_drop_follow_bufs(vi->dev, rq, num_buf, stats); 1260 return -EINVAL; 1261 } 1262 1263 static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct virtnet_info *vi, 1264 struct receive_queue *rq, struct xdp_buff *xdp, 1265 unsigned int *xdp_xmit, 1266 struct virtnet_rq_stats *stats) 1267 { 1268 struct virtio_net_hdr_mrg_rxbuf *hdr; 1269 struct bpf_prog *prog; 1270 struct sk_buff *skb; 1271 u32 ret, num_buf; 1272 1273 hdr = xdp->data - vi->hdr_len; 1274 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 1275 1276 ret = XDP_PASS; 1277 rcu_read_lock(); 1278 prog = rcu_dereference(rq->xdp_prog); 1279 /* TODO: support multi buffer. */ 1280 if (prog && num_buf == 1) 1281 ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); 1282 rcu_read_unlock(); 1283 1284 switch (ret) { 1285 case XDP_PASS: 1286 skb = xsk_construct_skb(rq, xdp); 1287 if (!skb) 1288 goto drop_bufs; 1289 1290 if (xsk_append_merge_buffer(vi, rq, skb, num_buf, hdr, stats)) { 1291 dev_kfree_skb(skb); 1292 goto drop; 1293 } 1294 1295 return skb; 1296 1297 case XDP_TX: 1298 case XDP_REDIRECT: 1299 return NULL; 1300 1301 default: 1302 /* drop packet */ 1303 xsk_buff_free(xdp); 1304 } 1305 1306 drop_bufs: 1307 xsk_drop_follow_bufs(dev, rq, num_buf, stats); 1308 1309 drop: 1310 u64_stats_inc(&stats->drops); 1311 return NULL; 1312 } 1313 1314 static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq, 1315 void *buf, u32 len, 1316 unsigned int *xdp_xmit, 1317 struct virtnet_rq_stats *stats) 1318 { 1319 struct net_device *dev = vi->dev; 1320 struct sk_buff *skb = NULL; 1321 struct xdp_buff *xdp; 1322 u8 flags; 1323 1324 len -= vi->hdr_len; 1325 1326 u64_stats_add(&stats->bytes, len); 1327 1328 xdp = buf_to_xdp(vi, rq, buf, len); 1329 if (!xdp) 1330 return; 1331 1332 if (unlikely(len < ETH_HLEN)) { 1333 pr_debug("%s: short packet %i\n", dev->name, len); 1334 DEV_STATS_INC(dev, rx_length_errors); 1335 xsk_buff_free(xdp); 1336 return; 1337 } 1338 1339 flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags; 1340 1341 if (!vi->mergeable_rx_bufs) 1342 skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats); 1343 else 1344 skb = virtnet_receive_xsk_merge(dev, vi, rq, xdp, xdp_xmit, stats); 1345 1346 if (skb) 1347 virtnet_receive_done(vi, rq, skb, flags); 1348 } 1349 1350 static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq, 1351 struct xsk_buff_pool *pool, gfp_t gfp) 1352 { 1353 struct xdp_buff **xsk_buffs; 1354 dma_addr_t addr; 1355 int err = 0; 1356 u32 len, i; 1357 int num; 1358 1359 xsk_buffs = rq->xsk_buffs; 1360 1361 num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free); 1362 if (!num) 1363 return -ENOMEM; 1364 1365 len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len; 1366 1367 for (i = 0; i < num; ++i) { 1368 /* Use the part of XDP_PACKET_HEADROOM as the virtnet hdr space. 1369 * We assume XDP_PACKET_HEADROOM is larger than hdr->len. 1370 * (see function virtnet_xsk_pool_enable) 1371 */ 1372 addr = xsk_buff_xdp_get_dma(xsk_buffs[i]) - vi->hdr_len; 1373 1374 sg_init_table(rq->sg, 1); 1375 sg_fill_dma(rq->sg, addr, len); 1376 1377 err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, xsk_buffs[i], gfp); 1378 if (err) 1379 goto err; 1380 } 1381 1382 return num; 1383 1384 err: 1385 for (; i < num; ++i) 1386 xsk_buff_free(xsk_buffs[i]); 1387 1388 return err; 1389 } 1390 1391 static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) 1392 { 1393 struct virtnet_info *vi = netdev_priv(dev); 1394 struct send_queue *sq; 1395 1396 if (!netif_running(dev)) 1397 return -ENETDOWN; 1398 1399 if (qid >= vi->curr_queue_pairs) 1400 return -EINVAL; 1401 1402 sq = &vi->sq[qid]; 1403 1404 if (napi_if_scheduled_mark_missed(&sq->napi)) 1405 return 0; 1406 1407 local_bh_disable(); 1408 virtqueue_napi_schedule(&sq->napi, sq->vq); 1409 local_bh_enable(); 1410 1411 return 0; 1412 } 1413 1414 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, 1415 struct send_queue *sq, 1416 struct xdp_frame *xdpf) 1417 { 1418 struct virtio_net_hdr_mrg_rxbuf *hdr; 1419 struct skb_shared_info *shinfo; 1420 u8 nr_frags = 0; 1421 int err, i; 1422 1423 if (unlikely(xdpf->headroom < vi->hdr_len)) 1424 return -EOVERFLOW; 1425 1426 if (unlikely(xdp_frame_has_frags(xdpf))) { 1427 shinfo = xdp_get_shared_info_from_frame(xdpf); 1428 nr_frags = shinfo->nr_frags; 1429 } 1430 1431 /* In wrapping function virtnet_xdp_xmit(), we need to free 1432 * up the pending old buffers, where we need to calculate the 1433 * position of skb_shared_info in xdp_get_frame_len() and 1434 * xdp_return_frame(), which will involve to xdpf->data and 1435 * xdpf->headroom. Therefore, we need to update the value of 1436 * headroom synchronously here. 1437 */ 1438 xdpf->headroom -= vi->hdr_len; 1439 xdpf->data -= vi->hdr_len; 1440 /* Zero header and leave csum up to XDP layers */ 1441 hdr = xdpf->data; 1442 memset(hdr, 0, vi->hdr_len); 1443 xdpf->len += vi->hdr_len; 1444 1445 sg_init_table(sq->sg, nr_frags + 1); 1446 sg_set_buf(sq->sg, xdpf->data, xdpf->len); 1447 for (i = 0; i < nr_frags; i++) { 1448 skb_frag_t *frag = &shinfo->frags[i]; 1449 1450 sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), 1451 skb_frag_size(frag), skb_frag_off(frag)); 1452 } 1453 1454 err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1, 1455 xdp_to_ptr(xdpf), GFP_ATOMIC); 1456 if (unlikely(err)) 1457 return -ENOSPC; /* Caller handle free/refcnt */ 1458 1459 return 0; 1460 } 1461 1462 /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on 1463 * the current cpu, so it does not need to be locked. 1464 * 1465 * Here we use marco instead of inline functions because we have to deal with 1466 * three issues at the same time: 1. the choice of sq. 2. judge and execute the 1467 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline 1468 * functions to perfectly solve these three problems at the same time. 1469 */ 1470 #define virtnet_xdp_get_sq(vi) ({ \ 1471 int cpu = smp_processor_id(); \ 1472 struct netdev_queue *txq; \ 1473 typeof(vi) v = (vi); \ 1474 unsigned int qp; \ 1475 \ 1476 if (v->curr_queue_pairs > nr_cpu_ids) { \ 1477 qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ 1478 qp += cpu; \ 1479 txq = netdev_get_tx_queue(v->dev, qp); \ 1480 __netif_tx_acquire(txq); \ 1481 } else { \ 1482 qp = cpu % v->curr_queue_pairs; \ 1483 txq = netdev_get_tx_queue(v->dev, qp); \ 1484 __netif_tx_lock(txq, cpu); \ 1485 } \ 1486 v->sq + qp; \ 1487 }) 1488 1489 #define virtnet_xdp_put_sq(vi, q) { \ 1490 struct netdev_queue *txq; \ 1491 typeof(vi) v = (vi); \ 1492 \ 1493 txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ 1494 if (v->curr_queue_pairs > nr_cpu_ids) \ 1495 __netif_tx_release(txq); \ 1496 else \ 1497 __netif_tx_unlock(txq); \ 1498 } 1499 1500 static int virtnet_xdp_xmit(struct net_device *dev, 1501 int n, struct xdp_frame **frames, u32 flags) 1502 { 1503 struct virtnet_info *vi = netdev_priv(dev); 1504 struct virtnet_sq_free_stats stats = {0}; 1505 struct receive_queue *rq = vi->rq; 1506 struct bpf_prog *xdp_prog; 1507 struct send_queue *sq; 1508 int nxmit = 0; 1509 int kicks = 0; 1510 int ret; 1511 int i; 1512 1513 /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this 1514 * indicate XDP resources have been successfully allocated. 1515 */ 1516 xdp_prog = rcu_access_pointer(rq->xdp_prog); 1517 if (!xdp_prog) 1518 return -ENXIO; 1519 1520 sq = virtnet_xdp_get_sq(vi); 1521 1522 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 1523 ret = -EINVAL; 1524 goto out; 1525 } 1526 1527 /* Free up any pending old buffers before queueing new ones. */ 1528 __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), 1529 false, &stats); 1530 1531 for (i = 0; i < n; i++) { 1532 struct xdp_frame *xdpf = frames[i]; 1533 1534 if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) 1535 break; 1536 nxmit++; 1537 } 1538 ret = nxmit; 1539 1540 if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) 1541 check_sq_full_and_disable(vi, dev, sq); 1542 1543 if (flags & XDP_XMIT_FLUSH) { 1544 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) 1545 kicks = 1; 1546 } 1547 out: 1548 u64_stats_update_begin(&sq->stats.syncp); 1549 u64_stats_add(&sq->stats.bytes, stats.bytes); 1550 u64_stats_add(&sq->stats.packets, stats.packets); 1551 u64_stats_add(&sq->stats.xdp_tx, n); 1552 u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit); 1553 u64_stats_add(&sq->stats.kicks, kicks); 1554 u64_stats_update_end(&sq->stats.syncp); 1555 1556 virtnet_xdp_put_sq(vi, sq); 1557 return ret; 1558 } 1559 1560 static void put_xdp_frags(struct xdp_buff *xdp) 1561 { 1562 struct skb_shared_info *shinfo; 1563 struct page *xdp_page; 1564 int i; 1565 1566 if (xdp_buff_has_frags(xdp)) { 1567 shinfo = xdp_get_shared_info_from_buff(xdp); 1568 for (i = 0; i < shinfo->nr_frags; i++) { 1569 xdp_page = skb_frag_page(&shinfo->frags[i]); 1570 put_page(xdp_page); 1571 } 1572 } 1573 } 1574 1575 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, 1576 struct net_device *dev, 1577 unsigned int *xdp_xmit, 1578 struct virtnet_rq_stats *stats) 1579 { 1580 struct xdp_frame *xdpf; 1581 int err; 1582 u32 act; 1583 1584 act = bpf_prog_run_xdp(xdp_prog, xdp); 1585 u64_stats_inc(&stats->xdp_packets); 1586 1587 switch (act) { 1588 case XDP_PASS: 1589 return act; 1590 1591 case XDP_TX: 1592 u64_stats_inc(&stats->xdp_tx); 1593 xdpf = xdp_convert_buff_to_frame(xdp); 1594 if (unlikely(!xdpf)) { 1595 netdev_dbg(dev, "convert buff to frame failed for xdp\n"); 1596 return XDP_DROP; 1597 } 1598 1599 err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); 1600 if (unlikely(!err)) { 1601 xdp_return_frame_rx_napi(xdpf); 1602 } else if (unlikely(err < 0)) { 1603 trace_xdp_exception(dev, xdp_prog, act); 1604 return XDP_DROP; 1605 } 1606 *xdp_xmit |= VIRTIO_XDP_TX; 1607 return act; 1608 1609 case XDP_REDIRECT: 1610 u64_stats_inc(&stats->xdp_redirects); 1611 err = xdp_do_redirect(dev, xdp, xdp_prog); 1612 if (err) 1613 return XDP_DROP; 1614 1615 *xdp_xmit |= VIRTIO_XDP_REDIR; 1616 return act; 1617 1618 default: 1619 bpf_warn_invalid_xdp_action(dev, xdp_prog, act); 1620 fallthrough; 1621 case XDP_ABORTED: 1622 trace_xdp_exception(dev, xdp_prog, act); 1623 fallthrough; 1624 case XDP_DROP: 1625 return XDP_DROP; 1626 } 1627 } 1628 1629 static unsigned int virtnet_get_headroom(struct virtnet_info *vi) 1630 { 1631 return vi->xdp_enabled ? XDP_PACKET_HEADROOM : 0; 1632 } 1633 1634 /* We copy the packet for XDP in the following cases: 1635 * 1636 * 1) Packet is scattered across multiple rx buffers. 1637 * 2) Headroom space is insufficient. 1638 * 1639 * This is inefficient but it's a temporary condition that 1640 * we hit right after XDP is enabled and until queue is refilled 1641 * with large buffers with sufficient headroom - so it should affect 1642 * at most queue size packets. 1643 * Afterwards, the conditions to enable 1644 * XDP should preclude the underlying device from sending packets 1645 * across multiple buffers (num_buf > 1), and we make sure buffers 1646 * have enough headroom. 1647 */ 1648 static struct page *xdp_linearize_page(struct receive_queue *rq, 1649 int *num_buf, 1650 struct page *p, 1651 int offset, 1652 int page_off, 1653 unsigned int *len) 1654 { 1655 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1656 struct page *page; 1657 1658 if (page_off + *len + tailroom > PAGE_SIZE) 1659 return NULL; 1660 1661 page = alloc_page(GFP_ATOMIC); 1662 if (!page) 1663 return NULL; 1664 1665 memcpy(page_address(page) + page_off, page_address(p) + offset, *len); 1666 page_off += *len; 1667 1668 while (--*num_buf) { 1669 unsigned int buflen; 1670 void *buf; 1671 int off; 1672 1673 buf = virtnet_rq_get_buf(rq, &buflen, NULL); 1674 if (unlikely(!buf)) 1675 goto err_buf; 1676 1677 p = virt_to_head_page(buf); 1678 off = buf - page_address(p); 1679 1680 /* guard against a misconfigured or uncooperative backend that 1681 * is sending packet larger than the MTU. 1682 */ 1683 if ((page_off + buflen + tailroom) > PAGE_SIZE) { 1684 put_page(p); 1685 goto err_buf; 1686 } 1687 1688 memcpy(page_address(page) + page_off, 1689 page_address(p) + off, buflen); 1690 page_off += buflen; 1691 put_page(p); 1692 } 1693 1694 /* Headroom does not contribute to packet length */ 1695 *len = page_off - XDP_PACKET_HEADROOM; 1696 return page; 1697 err_buf: 1698 __free_pages(page, 0); 1699 return NULL; 1700 } 1701 1702 static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, 1703 unsigned int xdp_headroom, 1704 void *buf, 1705 unsigned int len) 1706 { 1707 unsigned int header_offset; 1708 unsigned int headroom; 1709 unsigned int buflen; 1710 struct sk_buff *skb; 1711 1712 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1713 headroom = vi->hdr_len + header_offset; 1714 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1715 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1716 1717 skb = virtnet_build_skb(buf, buflen, headroom, len); 1718 if (unlikely(!skb)) 1719 return NULL; 1720 1721 buf += header_offset; 1722 memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len); 1723 1724 return skb; 1725 } 1726 1727 static struct sk_buff *receive_small_xdp(struct net_device *dev, 1728 struct virtnet_info *vi, 1729 struct receive_queue *rq, 1730 struct bpf_prog *xdp_prog, 1731 void *buf, 1732 unsigned int xdp_headroom, 1733 unsigned int len, 1734 unsigned int *xdp_xmit, 1735 struct virtnet_rq_stats *stats) 1736 { 1737 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom; 1738 unsigned int headroom = vi->hdr_len + header_offset; 1739 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; 1740 struct page *page = virt_to_head_page(buf); 1741 struct page *xdp_page; 1742 unsigned int buflen; 1743 struct xdp_buff xdp; 1744 struct sk_buff *skb; 1745 unsigned int metasize = 0; 1746 u32 act; 1747 1748 if (unlikely(hdr->hdr.gso_type)) 1749 goto err_xdp; 1750 1751 /* Partially checksummed packets must be dropped. */ 1752 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 1753 goto err_xdp; 1754 1755 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1756 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1757 1758 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { 1759 int offset = buf - page_address(page) + header_offset; 1760 unsigned int tlen = len + vi->hdr_len; 1761 int num_buf = 1; 1762 1763 xdp_headroom = virtnet_get_headroom(vi); 1764 header_offset = VIRTNET_RX_PAD + xdp_headroom; 1765 headroom = vi->hdr_len + header_offset; 1766 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + 1767 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1768 xdp_page = xdp_linearize_page(rq, &num_buf, page, 1769 offset, header_offset, 1770 &tlen); 1771 if (!xdp_page) 1772 goto err_xdp; 1773 1774 buf = page_address(xdp_page); 1775 put_page(page); 1776 page = xdp_page; 1777 } 1778 1779 xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); 1780 xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, 1781 xdp_headroom, len, true); 1782 1783 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 1784 1785 switch (act) { 1786 case XDP_PASS: 1787 /* Recalculate length in case bpf program changed it */ 1788 len = xdp.data_end - xdp.data; 1789 metasize = xdp.data - xdp.data_meta; 1790 break; 1791 1792 case XDP_TX: 1793 case XDP_REDIRECT: 1794 goto xdp_xmit; 1795 1796 default: 1797 goto err_xdp; 1798 } 1799 1800 skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len); 1801 if (unlikely(!skb)) 1802 goto err; 1803 1804 if (metasize) 1805 skb_metadata_set(skb, metasize); 1806 1807 return skb; 1808 1809 err_xdp: 1810 u64_stats_inc(&stats->xdp_drops); 1811 err: 1812 u64_stats_inc(&stats->drops); 1813 put_page(page); 1814 xdp_xmit: 1815 return NULL; 1816 } 1817 1818 static struct sk_buff *receive_small(struct net_device *dev, 1819 struct virtnet_info *vi, 1820 struct receive_queue *rq, 1821 void *buf, void *ctx, 1822 unsigned int len, 1823 unsigned int *xdp_xmit, 1824 struct virtnet_rq_stats *stats) 1825 { 1826 unsigned int xdp_headroom = (unsigned long)ctx; 1827 struct page *page = virt_to_head_page(buf); 1828 struct sk_buff *skb; 1829 1830 /* We passed the address of virtnet header to virtio-core, 1831 * so truncate the padding. 1832 */ 1833 buf -= VIRTNET_RX_PAD + xdp_headroom; 1834 1835 len -= vi->hdr_len; 1836 u64_stats_add(&stats->bytes, len); 1837 1838 if (unlikely(len > GOOD_PACKET_LEN)) { 1839 pr_debug("%s: rx error: len %u exceeds max size %d\n", 1840 dev->name, len, GOOD_PACKET_LEN); 1841 DEV_STATS_INC(dev, rx_length_errors); 1842 goto err; 1843 } 1844 1845 if (unlikely(vi->xdp_enabled)) { 1846 struct bpf_prog *xdp_prog; 1847 1848 rcu_read_lock(); 1849 xdp_prog = rcu_dereference(rq->xdp_prog); 1850 if (xdp_prog) { 1851 skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, 1852 xdp_headroom, len, xdp_xmit, 1853 stats); 1854 rcu_read_unlock(); 1855 return skb; 1856 } 1857 rcu_read_unlock(); 1858 } 1859 1860 skb = receive_small_build_skb(vi, xdp_headroom, buf, len); 1861 if (likely(skb)) 1862 return skb; 1863 1864 err: 1865 u64_stats_inc(&stats->drops); 1866 put_page(page); 1867 return NULL; 1868 } 1869 1870 static struct sk_buff *receive_big(struct net_device *dev, 1871 struct virtnet_info *vi, 1872 struct receive_queue *rq, 1873 void *buf, 1874 unsigned int len, 1875 struct virtnet_rq_stats *stats) 1876 { 1877 struct page *page = buf; 1878 struct sk_buff *skb = 1879 page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); 1880 1881 u64_stats_add(&stats->bytes, len - vi->hdr_len); 1882 if (unlikely(!skb)) 1883 goto err; 1884 1885 return skb; 1886 1887 err: 1888 u64_stats_inc(&stats->drops); 1889 give_pages(rq, page); 1890 return NULL; 1891 } 1892 1893 static void mergeable_buf_free(struct receive_queue *rq, int num_buf, 1894 struct net_device *dev, 1895 struct virtnet_rq_stats *stats) 1896 { 1897 struct page *page; 1898 void *buf; 1899 int len; 1900 1901 while (num_buf-- > 1) { 1902 buf = virtnet_rq_get_buf(rq, &len, NULL); 1903 if (unlikely(!buf)) { 1904 pr_debug("%s: rx error: %d buffers missing\n", 1905 dev->name, num_buf); 1906 DEV_STATS_INC(dev, rx_length_errors); 1907 break; 1908 } 1909 u64_stats_add(&stats->bytes, len); 1910 page = virt_to_head_page(buf); 1911 put_page(page); 1912 } 1913 } 1914 1915 /* Why not use xdp_build_skb_from_frame() ? 1916 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in 1917 * virtio-net there are 2 points that do not match its requirements: 1918 * 1. The size of the prefilled buffer is not fixed before xdp is set. 1919 * 2. xdp_build_skb_from_frame() does more checks that we don't need, 1920 * like eth_type_trans() (which virtio-net does in receive_buf()). 1921 */ 1922 static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, 1923 struct virtnet_info *vi, 1924 struct xdp_buff *xdp, 1925 unsigned int xdp_frags_truesz) 1926 { 1927 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 1928 unsigned int headroom, data_len; 1929 struct sk_buff *skb; 1930 int metasize; 1931 u8 nr_frags; 1932 1933 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 1934 pr_debug("Error building skb as missing reserved tailroom for xdp"); 1935 return NULL; 1936 } 1937 1938 if (unlikely(xdp_buff_has_frags(xdp))) 1939 nr_frags = sinfo->nr_frags; 1940 1941 skb = build_skb(xdp->data_hard_start, xdp->frame_sz); 1942 if (unlikely(!skb)) 1943 return NULL; 1944 1945 headroom = xdp->data - xdp->data_hard_start; 1946 data_len = xdp->data_end - xdp->data; 1947 skb_reserve(skb, headroom); 1948 __skb_put(skb, data_len); 1949 1950 metasize = xdp->data - xdp->data_meta; 1951 metasize = metasize > 0 ? metasize : 0; 1952 if (metasize) 1953 skb_metadata_set(skb, metasize); 1954 1955 if (unlikely(xdp_buff_has_frags(xdp))) 1956 xdp_update_skb_shared_info(skb, nr_frags, 1957 sinfo->xdp_frags_size, 1958 xdp_frags_truesz, 1959 xdp_buff_is_frag_pfmemalloc(xdp)); 1960 1961 return skb; 1962 } 1963 1964 /* TODO: build xdp in big mode */ 1965 static int virtnet_build_xdp_buff_mrg(struct net_device *dev, 1966 struct virtnet_info *vi, 1967 struct receive_queue *rq, 1968 struct xdp_buff *xdp, 1969 void *buf, 1970 unsigned int len, 1971 unsigned int frame_sz, 1972 int *num_buf, 1973 unsigned int *xdp_frags_truesize, 1974 struct virtnet_rq_stats *stats) 1975 { 1976 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 1977 unsigned int headroom, tailroom, room; 1978 unsigned int truesize, cur_frag_size; 1979 struct skb_shared_info *shinfo; 1980 unsigned int xdp_frags_truesz = 0; 1981 struct page *page; 1982 skb_frag_t *frag; 1983 int offset; 1984 void *ctx; 1985 1986 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 1987 xdp_prepare_buff(xdp, buf - XDP_PACKET_HEADROOM, 1988 XDP_PACKET_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); 1989 1990 if (!*num_buf) 1991 return 0; 1992 1993 if (*num_buf > 1) { 1994 /* If we want to build multi-buffer xdp, we need 1995 * to specify that the flags of xdp_buff have the 1996 * XDP_FLAGS_HAS_FRAG bit. 1997 */ 1998 if (!xdp_buff_has_frags(xdp)) 1999 xdp_buff_set_frags_flag(xdp); 2000 2001 shinfo = xdp_get_shared_info_from_buff(xdp); 2002 shinfo->nr_frags = 0; 2003 shinfo->xdp_frags_size = 0; 2004 } 2005 2006 if (*num_buf > MAX_SKB_FRAGS + 1) 2007 return -EINVAL; 2008 2009 while (--*num_buf > 0) { 2010 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2011 if (unlikely(!buf)) { 2012 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2013 dev->name, *num_buf, 2014 virtio16_to_cpu(vi->vdev, hdr->num_buffers)); 2015 DEV_STATS_INC(dev, rx_length_errors); 2016 goto err; 2017 } 2018 2019 u64_stats_add(&stats->bytes, len); 2020 page = virt_to_head_page(buf); 2021 offset = buf - page_address(page); 2022 2023 truesize = mergeable_ctx_to_truesize(ctx); 2024 headroom = mergeable_ctx_to_headroom(ctx); 2025 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2026 room = SKB_DATA_ALIGN(headroom + tailroom); 2027 2028 cur_frag_size = truesize; 2029 xdp_frags_truesz += cur_frag_size; 2030 if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { 2031 put_page(page); 2032 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2033 dev->name, len, (unsigned long)(truesize - room)); 2034 DEV_STATS_INC(dev, rx_length_errors); 2035 goto err; 2036 } 2037 2038 frag = &shinfo->frags[shinfo->nr_frags++]; 2039 skb_frag_fill_page_desc(frag, page, offset, len); 2040 if (page_is_pfmemalloc(page)) 2041 xdp_buff_set_frag_pfmemalloc(xdp); 2042 2043 shinfo->xdp_frags_size += len; 2044 } 2045 2046 *xdp_frags_truesize = xdp_frags_truesz; 2047 return 0; 2048 2049 err: 2050 put_xdp_frags(xdp); 2051 return -EINVAL; 2052 } 2053 2054 static void *mergeable_xdp_get_buf(struct virtnet_info *vi, 2055 struct receive_queue *rq, 2056 struct bpf_prog *xdp_prog, 2057 void *ctx, 2058 unsigned int *frame_sz, 2059 int *num_buf, 2060 struct page **page, 2061 int offset, 2062 unsigned int *len, 2063 struct virtio_net_hdr_mrg_rxbuf *hdr) 2064 { 2065 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2066 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2067 struct page *xdp_page; 2068 unsigned int xdp_room; 2069 2070 /* Transient failure which in theory could occur if 2071 * in-flight packets from before XDP was enabled reach 2072 * the receive path after XDP is loaded. 2073 */ 2074 if (unlikely(hdr->hdr.gso_type)) 2075 return NULL; 2076 2077 /* Partially checksummed packets must be dropped. */ 2078 if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) 2079 return NULL; 2080 2081 /* Now XDP core assumes frag size is PAGE_SIZE, but buffers 2082 * with headroom may add hole in truesize, which 2083 * make their length exceed PAGE_SIZE. So we disabled the 2084 * hole mechanism for xdp. See add_recvbuf_mergeable(). 2085 */ 2086 *frame_sz = truesize; 2087 2088 if (likely(headroom >= virtnet_get_headroom(vi) && 2089 (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) { 2090 return page_address(*page) + offset; 2091 } 2092 2093 /* This happens when headroom is not enough because 2094 * of the buffer was prefilled before XDP is set. 2095 * This should only happen for the first several packets. 2096 * In fact, vq reset can be used here to help us clean up 2097 * the prefilled buffers, but many existing devices do not 2098 * support it, and we don't want to bother users who are 2099 * using xdp normally. 2100 */ 2101 if (!xdp_prog->aux->xdp_has_frags) { 2102 /* linearize data for XDP */ 2103 xdp_page = xdp_linearize_page(rq, num_buf, 2104 *page, offset, 2105 XDP_PACKET_HEADROOM, 2106 len); 2107 if (!xdp_page) 2108 return NULL; 2109 } else { 2110 xdp_room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 2111 sizeof(struct skb_shared_info)); 2112 if (*len + xdp_room > PAGE_SIZE) 2113 return NULL; 2114 2115 xdp_page = alloc_page(GFP_ATOMIC); 2116 if (!xdp_page) 2117 return NULL; 2118 2119 memcpy(page_address(xdp_page) + XDP_PACKET_HEADROOM, 2120 page_address(*page) + offset, *len); 2121 } 2122 2123 *frame_sz = PAGE_SIZE; 2124 2125 put_page(*page); 2126 2127 *page = xdp_page; 2128 2129 return page_address(*page) + XDP_PACKET_HEADROOM; 2130 } 2131 2132 static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, 2133 struct virtnet_info *vi, 2134 struct receive_queue *rq, 2135 struct bpf_prog *xdp_prog, 2136 void *buf, 2137 void *ctx, 2138 unsigned int len, 2139 unsigned int *xdp_xmit, 2140 struct virtnet_rq_stats *stats) 2141 { 2142 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2143 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2144 struct page *page = virt_to_head_page(buf); 2145 int offset = buf - page_address(page); 2146 unsigned int xdp_frags_truesz = 0; 2147 struct sk_buff *head_skb; 2148 unsigned int frame_sz; 2149 struct xdp_buff xdp; 2150 void *data; 2151 u32 act; 2152 int err; 2153 2154 data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page, 2155 offset, &len, hdr); 2156 if (unlikely(!data)) 2157 goto err_xdp; 2158 2159 err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, 2160 &num_buf, &xdp_frags_truesz, stats); 2161 if (unlikely(err)) 2162 goto err_xdp; 2163 2164 act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats); 2165 2166 switch (act) { 2167 case XDP_PASS: 2168 head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); 2169 if (unlikely(!head_skb)) 2170 break; 2171 return head_skb; 2172 2173 case XDP_TX: 2174 case XDP_REDIRECT: 2175 return NULL; 2176 2177 default: 2178 break; 2179 } 2180 2181 put_xdp_frags(&xdp); 2182 2183 err_xdp: 2184 put_page(page); 2185 mergeable_buf_free(rq, num_buf, dev, stats); 2186 2187 u64_stats_inc(&stats->xdp_drops); 2188 u64_stats_inc(&stats->drops); 2189 return NULL; 2190 } 2191 2192 static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, 2193 struct sk_buff *curr_skb, 2194 struct page *page, void *buf, 2195 int len, int truesize) 2196 { 2197 int num_skb_frags; 2198 int offset; 2199 2200 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 2201 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { 2202 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); 2203 2204 if (unlikely(!nskb)) 2205 return NULL; 2206 2207 if (curr_skb == head_skb) 2208 skb_shinfo(curr_skb)->frag_list = nskb; 2209 else 2210 curr_skb->next = nskb; 2211 curr_skb = nskb; 2212 head_skb->truesize += nskb->truesize; 2213 num_skb_frags = 0; 2214 } 2215 2216 if (curr_skb != head_skb) { 2217 head_skb->data_len += len; 2218 head_skb->len += len; 2219 head_skb->truesize += truesize; 2220 } 2221 2222 offset = buf - page_address(page); 2223 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 2224 put_page(page); 2225 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 2226 len, truesize); 2227 } else { 2228 skb_add_rx_frag(curr_skb, num_skb_frags, page, 2229 offset, len, truesize); 2230 } 2231 2232 return curr_skb; 2233 } 2234 2235 static struct sk_buff *receive_mergeable(struct net_device *dev, 2236 struct virtnet_info *vi, 2237 struct receive_queue *rq, 2238 void *buf, 2239 void *ctx, 2240 unsigned int len, 2241 unsigned int *xdp_xmit, 2242 struct virtnet_rq_stats *stats) 2243 { 2244 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 2245 int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 2246 struct page *page = virt_to_head_page(buf); 2247 int offset = buf - page_address(page); 2248 struct sk_buff *head_skb, *curr_skb; 2249 unsigned int truesize = mergeable_ctx_to_truesize(ctx); 2250 unsigned int headroom = mergeable_ctx_to_headroom(ctx); 2251 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2252 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2253 2254 head_skb = NULL; 2255 u64_stats_add(&stats->bytes, len - vi->hdr_len); 2256 2257 if (unlikely(len > truesize - room)) { 2258 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2259 dev->name, len, (unsigned long)(truesize - room)); 2260 DEV_STATS_INC(dev, rx_length_errors); 2261 goto err_skb; 2262 } 2263 2264 if (unlikely(vi->xdp_enabled)) { 2265 struct bpf_prog *xdp_prog; 2266 2267 rcu_read_lock(); 2268 xdp_prog = rcu_dereference(rq->xdp_prog); 2269 if (xdp_prog) { 2270 head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx, 2271 len, xdp_xmit, stats); 2272 rcu_read_unlock(); 2273 return head_skb; 2274 } 2275 rcu_read_unlock(); 2276 } 2277 2278 head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); 2279 curr_skb = head_skb; 2280 2281 if (unlikely(!curr_skb)) 2282 goto err_skb; 2283 while (--num_buf) { 2284 buf = virtnet_rq_get_buf(rq, &len, &ctx); 2285 if (unlikely(!buf)) { 2286 pr_debug("%s: rx error: %d buffers out of %d missing\n", 2287 dev->name, num_buf, 2288 virtio16_to_cpu(vi->vdev, 2289 hdr->num_buffers)); 2290 DEV_STATS_INC(dev, rx_length_errors); 2291 goto err_buf; 2292 } 2293 2294 u64_stats_add(&stats->bytes, len); 2295 page = virt_to_head_page(buf); 2296 2297 truesize = mergeable_ctx_to_truesize(ctx); 2298 headroom = mergeable_ctx_to_headroom(ctx); 2299 tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2300 room = SKB_DATA_ALIGN(headroom + tailroom); 2301 if (unlikely(len > truesize - room)) { 2302 pr_debug("%s: rx error: len %u exceeds truesize %lu\n", 2303 dev->name, len, (unsigned long)(truesize - room)); 2304 DEV_STATS_INC(dev, rx_length_errors); 2305 goto err_skb; 2306 } 2307 2308 curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, 2309 buf, len, truesize); 2310 if (!curr_skb) 2311 goto err_skb; 2312 } 2313 2314 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); 2315 return head_skb; 2316 2317 err_skb: 2318 put_page(page); 2319 mergeable_buf_free(rq, num_buf, dev, stats); 2320 2321 err_buf: 2322 u64_stats_inc(&stats->drops); 2323 dev_kfree_skb(head_skb); 2324 return NULL; 2325 } 2326 2327 static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, 2328 struct sk_buff *skb) 2329 { 2330 enum pkt_hash_types rss_hash_type; 2331 2332 if (!hdr_hash || !skb) 2333 return; 2334 2335 switch (__le16_to_cpu(hdr_hash->hash_report)) { 2336 case VIRTIO_NET_HASH_REPORT_TCPv4: 2337 case VIRTIO_NET_HASH_REPORT_UDPv4: 2338 case VIRTIO_NET_HASH_REPORT_TCPv6: 2339 case VIRTIO_NET_HASH_REPORT_UDPv6: 2340 case VIRTIO_NET_HASH_REPORT_TCPv6_EX: 2341 case VIRTIO_NET_HASH_REPORT_UDPv6_EX: 2342 rss_hash_type = PKT_HASH_TYPE_L4; 2343 break; 2344 case VIRTIO_NET_HASH_REPORT_IPv4: 2345 case VIRTIO_NET_HASH_REPORT_IPv6: 2346 case VIRTIO_NET_HASH_REPORT_IPv6_EX: 2347 rss_hash_type = PKT_HASH_TYPE_L3; 2348 break; 2349 case VIRTIO_NET_HASH_REPORT_NONE: 2350 default: 2351 rss_hash_type = PKT_HASH_TYPE_NONE; 2352 } 2353 skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); 2354 } 2355 2356 static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, 2357 struct sk_buff *skb, u8 flags) 2358 { 2359 struct virtio_net_common_hdr *hdr; 2360 struct net_device *dev = vi->dev; 2361 2362 hdr = skb_vnet_common_hdr(skb); 2363 if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) 2364 virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); 2365 2366 if (flags & VIRTIO_NET_HDR_F_DATA_VALID) 2367 skb->ip_summed = CHECKSUM_UNNECESSARY; 2368 2369 if (virtio_net_hdr_to_skb(skb, &hdr->hdr, 2370 virtio_is_little_endian(vi->vdev))) { 2371 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n", 2372 dev->name, hdr->hdr.gso_type, 2373 hdr->hdr.gso_size); 2374 goto frame_err; 2375 } 2376 2377 skb_record_rx_queue(skb, vq2rxq(rq->vq)); 2378 skb->protocol = eth_type_trans(skb, dev); 2379 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 2380 ntohs(skb->protocol), skb->len, skb->pkt_type); 2381 2382 napi_gro_receive(&rq->napi, skb); 2383 return; 2384 2385 frame_err: 2386 DEV_STATS_INC(dev, rx_frame_errors); 2387 dev_kfree_skb(skb); 2388 } 2389 2390 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 2391 void *buf, unsigned int len, void **ctx, 2392 unsigned int *xdp_xmit, 2393 struct virtnet_rq_stats *stats) 2394 { 2395 struct net_device *dev = vi->dev; 2396 struct sk_buff *skb; 2397 u8 flags; 2398 2399 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 2400 pr_debug("%s: short packet %i\n", dev->name, len); 2401 DEV_STATS_INC(dev, rx_length_errors); 2402 virtnet_rq_free_buf(vi, rq, buf); 2403 return; 2404 } 2405 2406 /* 1. Save the flags early, as the XDP program might overwrite them. 2407 * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID 2408 * stay valid after XDP processing. 2409 * 2. XDP doesn't work with partially checksummed packets (refer to 2410 * virtnet_xdp_set()), so packets marked as 2411 * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. 2412 */ 2413 flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2414 2415 if (vi->mergeable_rx_bufs) 2416 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 2417 stats); 2418 else if (vi->big_packets) 2419 skb = receive_big(dev, vi, rq, buf, len, stats); 2420 else 2421 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 2422 2423 if (unlikely(!skb)) 2424 return; 2425 2426 virtnet_receive_done(vi, rq, skb, flags); 2427 } 2428 2429 /* Unlike mergeable buffers, all buffers are allocated to the 2430 * same size, except for the headroom. For this reason we do 2431 * not need to use mergeable_len_to_ctx here - it is enough 2432 * to store the headroom as the context ignoring the truesize. 2433 */ 2434 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, 2435 gfp_t gfp) 2436 { 2437 char *buf; 2438 unsigned int xdp_headroom = virtnet_get_headroom(vi); 2439 void *ctx = (void *)(unsigned long)xdp_headroom; 2440 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; 2441 int err; 2442 2443 len = SKB_DATA_ALIGN(len) + 2444 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 2445 2446 buf = virtnet_rq_alloc(rq, len, gfp); 2447 if (unlikely(!buf)) 2448 return -ENOMEM; 2449 2450 buf += VIRTNET_RX_PAD + xdp_headroom; 2451 2452 virtnet_rq_init_one_sg(rq, buf, vi->hdr_len + GOOD_PACKET_LEN); 2453 2454 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 2455 if (err < 0) { 2456 if (rq->do_dma) 2457 virtnet_rq_unmap(rq, buf, 0); 2458 put_page(virt_to_head_page(buf)); 2459 } 2460 2461 return err; 2462 } 2463 2464 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, 2465 gfp_t gfp) 2466 { 2467 struct page *first, *list = NULL; 2468 char *p; 2469 int i, err, offset; 2470 2471 sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2); 2472 2473 /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */ 2474 for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) { 2475 first = get_a_page(rq, gfp); 2476 if (!first) { 2477 if (list) 2478 give_pages(rq, list); 2479 return -ENOMEM; 2480 } 2481 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE); 2482 2483 /* chain new page in list head to match sg */ 2484 first->private = (unsigned long)list; 2485 list = first; 2486 } 2487 2488 first = get_a_page(rq, gfp); 2489 if (!first) { 2490 give_pages(rq, list); 2491 return -ENOMEM; 2492 } 2493 p = page_address(first); 2494 2495 /* rq->sg[0], rq->sg[1] share the same page */ 2496 /* a separated rq->sg[0] for header - required in case !any_header_sg */ 2497 sg_set_buf(&rq->sg[0], p, vi->hdr_len); 2498 2499 /* rq->sg[1] for data packet, from offset */ 2500 offset = sizeof(struct padded_vnet_hdr); 2501 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); 2502 2503 /* chain first in list head */ 2504 first->private = (unsigned long)list; 2505 err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2, 2506 first, gfp); 2507 if (err < 0) 2508 give_pages(rq, first); 2509 2510 return err; 2511 } 2512 2513 static unsigned int get_mergeable_buf_len(struct receive_queue *rq, 2514 struct ewma_pkt_len *avg_pkt_len, 2515 unsigned int room) 2516 { 2517 struct virtnet_info *vi = rq->vq->vdev->priv; 2518 const size_t hdr_len = vi->hdr_len; 2519 unsigned int len; 2520 2521 if (room) 2522 return PAGE_SIZE - room; 2523 2524 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), 2525 rq->min_buf_len, PAGE_SIZE - hdr_len); 2526 2527 return ALIGN(len, L1_CACHE_BYTES); 2528 } 2529 2530 static int add_recvbuf_mergeable(struct virtnet_info *vi, 2531 struct receive_queue *rq, gfp_t gfp) 2532 { 2533 struct page_frag *alloc_frag = &rq->alloc_frag; 2534 unsigned int headroom = virtnet_get_headroom(vi); 2535 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 2536 unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); 2537 unsigned int len, hole; 2538 void *ctx; 2539 char *buf; 2540 int err; 2541 2542 /* Extra tailroom is needed to satisfy XDP's assumption. This 2543 * means rx frags coalescing won't work, but consider we've 2544 * disabled GSO for XDP, it won't be a big issue. 2545 */ 2546 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); 2547 2548 buf = virtnet_rq_alloc(rq, len + room, gfp); 2549 if (unlikely(!buf)) 2550 return -ENOMEM; 2551 2552 buf += headroom; /* advance address leaving hole at front of pkt */ 2553 hole = alloc_frag->size - alloc_frag->offset; 2554 if (hole < len + room) { 2555 /* To avoid internal fragmentation, if there is very likely not 2556 * enough space for another buffer, add the remaining space to 2557 * the current buffer. 2558 * XDP core assumes that frame_size of xdp_buff and the length 2559 * of the frag are PAGE_SIZE, so we disable the hole mechanism. 2560 */ 2561 if (!headroom) 2562 len += hole; 2563 alloc_frag->offset += hole; 2564 } 2565 2566 virtnet_rq_init_one_sg(rq, buf, len); 2567 2568 ctx = mergeable_len_to_ctx(len + room, headroom); 2569 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); 2570 if (err < 0) { 2571 if (rq->do_dma) 2572 virtnet_rq_unmap(rq, buf, 0); 2573 put_page(virt_to_head_page(buf)); 2574 } 2575 2576 return err; 2577 } 2578 2579 /* 2580 * Returns false if we couldn't fill entirely (OOM). 2581 * 2582 * Normally run in the receive path, but can also be run from ndo_open 2583 * before we're receiving packets, or from refill_work which is 2584 * careful to disable receiving (using napi_disable). 2585 */ 2586 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, 2587 gfp_t gfp) 2588 { 2589 int err; 2590 2591 if (rq->xsk_pool) { 2592 err = virtnet_add_recvbuf_xsk(vi, rq, rq->xsk_pool, gfp); 2593 goto kick; 2594 } 2595 2596 do { 2597 if (vi->mergeable_rx_bufs) 2598 err = add_recvbuf_mergeable(vi, rq, gfp); 2599 else if (vi->big_packets) 2600 err = add_recvbuf_big(vi, rq, gfp); 2601 else 2602 err = add_recvbuf_small(vi, rq, gfp); 2603 2604 if (err) 2605 break; 2606 } while (rq->vq->num_free); 2607 2608 kick: 2609 if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { 2610 unsigned long flags; 2611 2612 flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); 2613 u64_stats_inc(&rq->stats.kicks); 2614 u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); 2615 } 2616 2617 return err != -ENOMEM; 2618 } 2619 2620 static void skb_recv_done(struct virtqueue *rvq) 2621 { 2622 struct virtnet_info *vi = rvq->vdev->priv; 2623 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; 2624 2625 rq->calls++; 2626 virtqueue_napi_schedule(&rq->napi, rvq); 2627 } 2628 2629 static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi) 2630 { 2631 napi_enable(napi); 2632 2633 /* If all buffers were filled by other side before we napi_enabled, we 2634 * won't get another interrupt, so process any outstanding packets now. 2635 * Call local_bh_enable after to trigger softIRQ processing. 2636 */ 2637 local_bh_disable(); 2638 virtqueue_napi_schedule(napi, vq); 2639 local_bh_enable(); 2640 } 2641 2642 static void virtnet_napi_tx_enable(struct virtnet_info *vi, 2643 struct virtqueue *vq, 2644 struct napi_struct *napi) 2645 { 2646 if (!napi->weight) 2647 return; 2648 2649 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only 2650 * enable the feature if this is likely affine with the transmit path. 2651 */ 2652 if (!vi->affinity_hint_set) { 2653 napi->weight = 0; 2654 return; 2655 } 2656 2657 return virtnet_napi_enable(vq, napi); 2658 } 2659 2660 static void virtnet_napi_tx_disable(struct napi_struct *napi) 2661 { 2662 if (napi->weight) 2663 napi_disable(napi); 2664 } 2665 2666 static void refill_work(struct work_struct *work) 2667 { 2668 struct virtnet_info *vi = 2669 container_of(work, struct virtnet_info, refill.work); 2670 bool still_empty; 2671 int i; 2672 2673 for (i = 0; i < vi->curr_queue_pairs; i++) { 2674 struct receive_queue *rq = &vi->rq[i]; 2675 2676 napi_disable(&rq->napi); 2677 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); 2678 virtnet_napi_enable(rq->vq, &rq->napi); 2679 2680 /* In theory, this can happen: if we don't get any buffers in 2681 * we will *never* try to fill again. 2682 */ 2683 if (still_empty) 2684 schedule_delayed_work(&vi->refill, HZ/2); 2685 } 2686 } 2687 2688 static int virtnet_receive_xsk_bufs(struct virtnet_info *vi, 2689 struct receive_queue *rq, 2690 int budget, 2691 unsigned int *xdp_xmit, 2692 struct virtnet_rq_stats *stats) 2693 { 2694 unsigned int len; 2695 int packets = 0; 2696 void *buf; 2697 2698 while (packets < budget) { 2699 buf = virtqueue_get_buf(rq->vq, &len); 2700 if (!buf) 2701 break; 2702 2703 virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats); 2704 packets++; 2705 } 2706 2707 return packets; 2708 } 2709 2710 static int virtnet_receive_packets(struct virtnet_info *vi, 2711 struct receive_queue *rq, 2712 int budget, 2713 unsigned int *xdp_xmit, 2714 struct virtnet_rq_stats *stats) 2715 { 2716 unsigned int len; 2717 int packets = 0; 2718 void *buf; 2719 2720 if (!vi->big_packets || vi->mergeable_rx_bufs) { 2721 void *ctx; 2722 while (packets < budget && 2723 (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { 2724 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats); 2725 packets++; 2726 } 2727 } else { 2728 while (packets < budget && 2729 (buf = virtnet_rq_get_buf(rq, &len, NULL)) != NULL) { 2730 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats); 2731 packets++; 2732 } 2733 } 2734 2735 return packets; 2736 } 2737 2738 static int virtnet_receive(struct receive_queue *rq, int budget, 2739 unsigned int *xdp_xmit) 2740 { 2741 struct virtnet_info *vi = rq->vq->vdev->priv; 2742 struct virtnet_rq_stats stats = {}; 2743 int i, packets; 2744 2745 if (rq->xsk_pool) 2746 packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, &stats); 2747 else 2748 packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats); 2749 2750 if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { 2751 if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { 2752 spin_lock(&vi->refill_lock); 2753 if (vi->refill_enabled) 2754 schedule_delayed_work(&vi->refill, 0); 2755 spin_unlock(&vi->refill_lock); 2756 } 2757 } 2758 2759 u64_stats_set(&stats.packets, packets); 2760 u64_stats_update_begin(&rq->stats.syncp); 2761 for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) { 2762 size_t offset = virtnet_rq_stats_desc[i].offset; 2763 u64_stats_t *item, *src; 2764 2765 item = (u64_stats_t *)((u8 *)&rq->stats + offset); 2766 src = (u64_stats_t *)((u8 *)&stats + offset); 2767 u64_stats_add(item, u64_stats_read(src)); 2768 } 2769 2770 u64_stats_add(&rq->stats.packets, u64_stats_read(&stats.packets)); 2771 u64_stats_add(&rq->stats.bytes, u64_stats_read(&stats.bytes)); 2772 2773 u64_stats_update_end(&rq->stats.syncp); 2774 2775 return packets; 2776 } 2777 2778 static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) 2779 { 2780 struct virtnet_info *vi = rq->vq->vdev->priv; 2781 unsigned int index = vq2rxq(rq->vq); 2782 struct send_queue *sq = &vi->sq[index]; 2783 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); 2784 2785 if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) 2786 return; 2787 2788 if (__netif_tx_trylock(txq)) { 2789 if (sq->reset) { 2790 __netif_tx_unlock(txq); 2791 return; 2792 } 2793 2794 do { 2795 virtqueue_disable_cb(sq->vq); 2796 free_old_xmit(sq, txq, !!budget); 2797 } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 2798 2799 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { 2800 if (netif_tx_queue_stopped(txq)) { 2801 u64_stats_update_begin(&sq->stats.syncp); 2802 u64_stats_inc(&sq->stats.wake); 2803 u64_stats_update_end(&sq->stats.syncp); 2804 } 2805 netif_tx_wake_queue(txq); 2806 } 2807 2808 __netif_tx_unlock(txq); 2809 } 2810 } 2811 2812 static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq) 2813 { 2814 struct dim_sample cur_sample = {}; 2815 2816 if (!rq->packets_in_napi) 2817 return; 2818 2819 /* Don't need protection when fetching stats, since fetcher and 2820 * updater of the stats are in same context 2821 */ 2822 dim_update_sample(rq->calls, 2823 u64_stats_read(&rq->stats.packets), 2824 u64_stats_read(&rq->stats.bytes), 2825 &cur_sample); 2826 2827 net_dim(&rq->dim, cur_sample); 2828 rq->packets_in_napi = 0; 2829 } 2830 2831 static int virtnet_poll(struct napi_struct *napi, int budget) 2832 { 2833 struct receive_queue *rq = 2834 container_of(napi, struct receive_queue, napi); 2835 struct virtnet_info *vi = rq->vq->vdev->priv; 2836 struct send_queue *sq; 2837 unsigned int received; 2838 unsigned int xdp_xmit = 0; 2839 bool napi_complete; 2840 2841 virtnet_poll_cleantx(rq, budget); 2842 2843 received = virtnet_receive(rq, budget, &xdp_xmit); 2844 rq->packets_in_napi += received; 2845 2846 if (xdp_xmit & VIRTIO_XDP_REDIR) 2847 xdp_do_flush(); 2848 2849 /* Out of packets? */ 2850 if (received < budget) { 2851 napi_complete = virtqueue_napi_complete(napi, rq->vq, received); 2852 /* Intentionally not taking dim_lock here. This may result in a 2853 * spurious net_dim call. But if that happens virtnet_rx_dim_work 2854 * will not act on the scheduled work. 2855 */ 2856 if (napi_complete && rq->dim_enabled) 2857 virtnet_rx_dim_update(vi, rq); 2858 } 2859 2860 if (xdp_xmit & VIRTIO_XDP_TX) { 2861 sq = virtnet_xdp_get_sq(vi); 2862 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 2863 u64_stats_update_begin(&sq->stats.syncp); 2864 u64_stats_inc(&sq->stats.kicks); 2865 u64_stats_update_end(&sq->stats.syncp); 2866 } 2867 virtnet_xdp_put_sq(vi, sq); 2868 } 2869 2870 return received; 2871 } 2872 2873 static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) 2874 { 2875 virtnet_napi_tx_disable(&vi->sq[qp_index].napi); 2876 napi_disable(&vi->rq[qp_index].napi); 2877 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 2878 } 2879 2880 static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) 2881 { 2882 struct net_device *dev = vi->dev; 2883 int err; 2884 2885 err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index, 2886 vi->rq[qp_index].napi.napi_id); 2887 if (err < 0) 2888 return err; 2889 2890 err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq, 2891 MEM_TYPE_PAGE_SHARED, NULL); 2892 if (err < 0) 2893 goto err_xdp_reg_mem_model; 2894 2895 netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index)); 2896 virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); 2897 virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi); 2898 2899 return 0; 2900 2901 err_xdp_reg_mem_model: 2902 xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); 2903 return err; 2904 } 2905 2906 static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim) 2907 { 2908 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 2909 return; 2910 net_dim_work_cancel(dim); 2911 } 2912 2913 static void virtnet_update_settings(struct virtnet_info *vi) 2914 { 2915 u32 speed; 2916 u8 duplex; 2917 2918 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) 2919 return; 2920 2921 virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); 2922 2923 if (ethtool_validate_speed(speed)) 2924 vi->speed = speed; 2925 2926 virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); 2927 2928 if (ethtool_validate_duplex(duplex)) 2929 vi->duplex = duplex; 2930 } 2931 2932 static int virtnet_open(struct net_device *dev) 2933 { 2934 struct virtnet_info *vi = netdev_priv(dev); 2935 int i, err; 2936 2937 enable_delayed_refill(vi); 2938 2939 for (i = 0; i < vi->max_queue_pairs; i++) { 2940 if (i < vi->curr_queue_pairs) 2941 /* Make sure we have some buffers: if oom use wq. */ 2942 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 2943 schedule_delayed_work(&vi->refill, 0); 2944 2945 err = virtnet_enable_queue_pair(vi, i); 2946 if (err < 0) 2947 goto err_enable_qp; 2948 } 2949 2950 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 2951 if (vi->status & VIRTIO_NET_S_LINK_UP) 2952 netif_carrier_on(vi->dev); 2953 virtio_config_driver_enable(vi->vdev); 2954 } else { 2955 vi->status = VIRTIO_NET_S_LINK_UP; 2956 netif_carrier_on(dev); 2957 } 2958 2959 return 0; 2960 2961 err_enable_qp: 2962 disable_delayed_refill(vi); 2963 cancel_delayed_work_sync(&vi->refill); 2964 2965 for (i--; i >= 0; i--) { 2966 virtnet_disable_queue_pair(vi, i); 2967 virtnet_cancel_dim(vi, &vi->rq[i].dim); 2968 } 2969 2970 return err; 2971 } 2972 2973 static int virtnet_poll_tx(struct napi_struct *napi, int budget) 2974 { 2975 struct send_queue *sq = container_of(napi, struct send_queue, napi); 2976 struct virtnet_info *vi = sq->vq->vdev->priv; 2977 unsigned int index = vq2txq(sq->vq); 2978 struct netdev_queue *txq; 2979 int opaque; 2980 bool done; 2981 2982 if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { 2983 /* We don't need to enable cb for XDP */ 2984 napi_complete_done(napi, 0); 2985 return 0; 2986 } 2987 2988 txq = netdev_get_tx_queue(vi->dev, index); 2989 __netif_tx_lock(txq, raw_smp_processor_id()); 2990 virtqueue_disable_cb(sq->vq); 2991 free_old_xmit(sq, txq, !!budget); 2992 2993 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { 2994 if (netif_tx_queue_stopped(txq)) { 2995 u64_stats_update_begin(&sq->stats.syncp); 2996 u64_stats_inc(&sq->stats.wake); 2997 u64_stats_update_end(&sq->stats.syncp); 2998 } 2999 netif_tx_wake_queue(txq); 3000 } 3001 3002 opaque = virtqueue_enable_cb_prepare(sq->vq); 3003 3004 done = napi_complete_done(napi, 0); 3005 3006 if (!done) 3007 virtqueue_disable_cb(sq->vq); 3008 3009 __netif_tx_unlock(txq); 3010 3011 if (done) { 3012 if (unlikely(virtqueue_poll(sq->vq, opaque))) { 3013 if (napi_schedule_prep(napi)) { 3014 __netif_tx_lock(txq, raw_smp_processor_id()); 3015 virtqueue_disable_cb(sq->vq); 3016 __netif_tx_unlock(txq); 3017 __napi_schedule(napi); 3018 } 3019 } 3020 } 3021 3022 return 0; 3023 } 3024 3025 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) 3026 { 3027 struct virtio_net_hdr_mrg_rxbuf *hdr; 3028 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 3029 struct virtnet_info *vi = sq->vq->vdev->priv; 3030 int num_sg; 3031 unsigned hdr_len = vi->hdr_len; 3032 bool can_push; 3033 3034 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); 3035 3036 can_push = vi->any_header_sg && 3037 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && 3038 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; 3039 /* Even if we can, don't push here yet as this would skew 3040 * csum_start offset below. */ 3041 if (can_push) 3042 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len); 3043 else 3044 hdr = &skb_vnet_common_hdr(skb)->mrg_hdr; 3045 3046 if (virtio_net_hdr_from_skb(skb, &hdr->hdr, 3047 virtio_is_little_endian(vi->vdev), false, 3048 0)) 3049 return -EPROTO; 3050 3051 if (vi->mergeable_rx_bufs) 3052 hdr->num_buffers = 0; 3053 3054 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); 3055 if (can_push) { 3056 __skb_push(skb, hdr_len); 3057 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); 3058 if (unlikely(num_sg < 0)) 3059 return num_sg; 3060 /* Pull header back to avoid skew in tx bytes calculations. */ 3061 __skb_pull(skb, hdr_len); 3062 } else { 3063 sg_set_buf(sq->sg, hdr, hdr_len); 3064 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); 3065 if (unlikely(num_sg < 0)) 3066 return num_sg; 3067 num_sg++; 3068 } 3069 return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, 3070 skb_to_ptr(skb, orphan), GFP_ATOMIC); 3071 } 3072 3073 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) 3074 { 3075 struct virtnet_info *vi = netdev_priv(dev); 3076 int qnum = skb_get_queue_mapping(skb); 3077 struct send_queue *sq = &vi->sq[qnum]; 3078 int err; 3079 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); 3080 bool xmit_more = netdev_xmit_more(); 3081 bool use_napi = sq->napi.weight; 3082 bool kick; 3083 3084 /* Free up any pending old buffers before queueing new ones. */ 3085 do { 3086 if (use_napi) 3087 virtqueue_disable_cb(sq->vq); 3088 3089 free_old_xmit(sq, txq, false); 3090 3091 } while (use_napi && !xmit_more && 3092 unlikely(!virtqueue_enable_cb_delayed(sq->vq))); 3093 3094 /* timestamp packet in software */ 3095 skb_tx_timestamp(skb); 3096 3097 /* Try to transmit */ 3098 err = xmit_skb(sq, skb, !use_napi); 3099 3100 /* This should not happen! */ 3101 if (unlikely(err)) { 3102 DEV_STATS_INC(dev, tx_fifo_errors); 3103 if (net_ratelimit()) 3104 dev_warn(&dev->dev, 3105 "Unexpected TXQ (%d) queue failure: %d\n", 3106 qnum, err); 3107 DEV_STATS_INC(dev, tx_dropped); 3108 dev_kfree_skb_any(skb); 3109 return NETDEV_TX_OK; 3110 } 3111 3112 /* Don't wait up for transmitted skbs to be freed. */ 3113 if (!use_napi) { 3114 skb_orphan(skb); 3115 nf_reset_ct(skb); 3116 } 3117 3118 check_sq_full_and_disable(vi, dev, sq); 3119 3120 kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) : 3121 !xmit_more || netif_xmit_stopped(txq); 3122 if (kick) { 3123 if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { 3124 u64_stats_update_begin(&sq->stats.syncp); 3125 u64_stats_inc(&sq->stats.kicks); 3126 u64_stats_update_end(&sq->stats.syncp); 3127 } 3128 } 3129 3130 return NETDEV_TX_OK; 3131 } 3132 3133 static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) 3134 { 3135 bool running = netif_running(vi->dev); 3136 3137 if (running) { 3138 napi_disable(&rq->napi); 3139 virtnet_cancel_dim(vi, &rq->dim); 3140 } 3141 } 3142 3143 static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) 3144 { 3145 bool running = netif_running(vi->dev); 3146 3147 if (!try_fill_recv(vi, rq, GFP_KERNEL)) 3148 schedule_delayed_work(&vi->refill, 0); 3149 3150 if (running) 3151 virtnet_napi_enable(rq->vq, &rq->napi); 3152 } 3153 3154 static int virtnet_rx_resize(struct virtnet_info *vi, 3155 struct receive_queue *rq, u32 ring_num) 3156 { 3157 int err, qindex; 3158 3159 qindex = rq - vi->rq; 3160 3161 virtnet_rx_pause(vi, rq); 3162 3163 err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf); 3164 if (err) 3165 netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err); 3166 3167 virtnet_rx_resume(vi, rq); 3168 return err; 3169 } 3170 3171 static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq) 3172 { 3173 bool running = netif_running(vi->dev); 3174 struct netdev_queue *txq; 3175 int qindex; 3176 3177 qindex = sq - vi->sq; 3178 3179 if (running) 3180 virtnet_napi_tx_disable(&sq->napi); 3181 3182 txq = netdev_get_tx_queue(vi->dev, qindex); 3183 3184 /* 1. wait all ximt complete 3185 * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue() 3186 */ 3187 __netif_tx_lock_bh(txq); 3188 3189 /* Prevent rx poll from accessing sq. */ 3190 sq->reset = true; 3191 3192 /* Prevent the upper layer from trying to send packets. */ 3193 netif_stop_subqueue(vi->dev, qindex); 3194 3195 __netif_tx_unlock_bh(txq); 3196 } 3197 3198 static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq) 3199 { 3200 bool running = netif_running(vi->dev); 3201 struct netdev_queue *txq; 3202 int qindex; 3203 3204 qindex = sq - vi->sq; 3205 3206 txq = netdev_get_tx_queue(vi->dev, qindex); 3207 3208 __netif_tx_lock_bh(txq); 3209 sq->reset = false; 3210 netif_tx_wake_queue(txq); 3211 __netif_tx_unlock_bh(txq); 3212 3213 if (running) 3214 virtnet_napi_tx_enable(vi, sq->vq, &sq->napi); 3215 } 3216 3217 static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, 3218 u32 ring_num) 3219 { 3220 int qindex, err; 3221 3222 qindex = sq - vi->sq; 3223 3224 virtnet_tx_pause(vi, sq); 3225 3226 err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf); 3227 if (err) 3228 netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err); 3229 3230 virtnet_tx_resume(vi, sq); 3231 3232 return err; 3233 } 3234 3235 /* 3236 * Send command via the control virtqueue and check status. Commands 3237 * supported by the hypervisor, as indicated by feature bits, should 3238 * never fail unless improperly formatted. 3239 */ 3240 static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd, 3241 struct scatterlist *out, 3242 struct scatterlist *in) 3243 { 3244 struct scatterlist *sgs[5], hdr, stat; 3245 u32 out_num = 0, tmp, in_num = 0; 3246 bool ok; 3247 int ret; 3248 3249 /* Caller should know better */ 3250 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); 3251 3252 mutex_lock(&vi->cvq_lock); 3253 vi->ctrl->status = ~0; 3254 vi->ctrl->hdr.class = class; 3255 vi->ctrl->hdr.cmd = cmd; 3256 /* Add header */ 3257 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr)); 3258 sgs[out_num++] = &hdr; 3259 3260 if (out) 3261 sgs[out_num++] = out; 3262 3263 /* Add return status. */ 3264 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status)); 3265 sgs[out_num + in_num++] = &stat; 3266 3267 if (in) 3268 sgs[out_num + in_num++] = in; 3269 3270 BUG_ON(out_num + in_num > ARRAY_SIZE(sgs)); 3271 ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC); 3272 if (ret < 0) { 3273 dev_warn(&vi->vdev->dev, 3274 "Failed to add sgs for command vq: %d\n.", ret); 3275 mutex_unlock(&vi->cvq_lock); 3276 return false; 3277 } 3278 3279 if (unlikely(!virtqueue_kick(vi->cvq))) 3280 goto unlock; 3281 3282 /* Spin for a response, the kick causes an ioport write, trapping 3283 * into the hypervisor, so the request should be handled immediately. 3284 */ 3285 while (!virtqueue_get_buf(vi->cvq, &tmp) && 3286 !virtqueue_is_broken(vi->cvq)) { 3287 cond_resched(); 3288 cpu_relax(); 3289 } 3290 3291 unlock: 3292 ok = vi->ctrl->status == VIRTIO_NET_OK; 3293 mutex_unlock(&vi->cvq_lock); 3294 return ok; 3295 } 3296 3297 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, 3298 struct scatterlist *out) 3299 { 3300 return virtnet_send_command_reply(vi, class, cmd, out, NULL); 3301 } 3302 3303 static int virtnet_set_mac_address(struct net_device *dev, void *p) 3304 { 3305 struct virtnet_info *vi = netdev_priv(dev); 3306 struct virtio_device *vdev = vi->vdev; 3307 int ret; 3308 struct sockaddr *addr; 3309 struct scatterlist sg; 3310 3311 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 3312 return -EOPNOTSUPP; 3313 3314 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); 3315 if (!addr) 3316 return -ENOMEM; 3317 3318 ret = eth_prepare_mac_addr_change(dev, addr); 3319 if (ret) 3320 goto out; 3321 3322 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 3323 sg_init_one(&sg, addr->sa_data, dev->addr_len); 3324 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3325 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 3326 dev_warn(&vdev->dev, 3327 "Failed to set mac address by vq command.\n"); 3328 ret = -EINVAL; 3329 goto out; 3330 } 3331 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 3332 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3333 unsigned int i; 3334 3335 /* Naturally, this has an atomicity problem. */ 3336 for (i = 0; i < dev->addr_len; i++) 3337 virtio_cwrite8(vdev, 3338 offsetof(struct virtio_net_config, mac) + 3339 i, addr->sa_data[i]); 3340 } 3341 3342 eth_commit_mac_addr_change(dev, p); 3343 ret = 0; 3344 3345 out: 3346 kfree(addr); 3347 return ret; 3348 } 3349 3350 static void virtnet_stats(struct net_device *dev, 3351 struct rtnl_link_stats64 *tot) 3352 { 3353 struct virtnet_info *vi = netdev_priv(dev); 3354 unsigned int start; 3355 int i; 3356 3357 for (i = 0; i < vi->max_queue_pairs; i++) { 3358 u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; 3359 struct receive_queue *rq = &vi->rq[i]; 3360 struct send_queue *sq = &vi->sq[i]; 3361 3362 do { 3363 start = u64_stats_fetch_begin(&sq->stats.syncp); 3364 tpackets = u64_stats_read(&sq->stats.packets); 3365 tbytes = u64_stats_read(&sq->stats.bytes); 3366 terrors = u64_stats_read(&sq->stats.tx_timeouts); 3367 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 3368 3369 do { 3370 start = u64_stats_fetch_begin(&rq->stats.syncp); 3371 rpackets = u64_stats_read(&rq->stats.packets); 3372 rbytes = u64_stats_read(&rq->stats.bytes); 3373 rdrops = u64_stats_read(&rq->stats.drops); 3374 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 3375 3376 tot->rx_packets += rpackets; 3377 tot->tx_packets += tpackets; 3378 tot->rx_bytes += rbytes; 3379 tot->tx_bytes += tbytes; 3380 tot->rx_dropped += rdrops; 3381 tot->tx_errors += terrors; 3382 } 3383 3384 tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped); 3385 tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors); 3386 tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors); 3387 tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors); 3388 } 3389 3390 static void virtnet_ack_link_announce(struct virtnet_info *vi) 3391 { 3392 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE, 3393 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL)) 3394 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); 3395 } 3396 3397 static bool virtnet_commit_rss_command(struct virtnet_info *vi); 3398 3399 static void virtnet_rss_update_by_qpairs(struct virtnet_info *vi, u16 queue_pairs) 3400 { 3401 u32 indir_val = 0; 3402 int i = 0; 3403 3404 for (; i < vi->rss_indir_table_size; ++i) { 3405 indir_val = ethtool_rxfh_indir_default(i, queue_pairs); 3406 vi->rss.indirection_table[i] = indir_val; 3407 } 3408 vi->rss.max_tx_vq = queue_pairs; 3409 } 3410 3411 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) 3412 { 3413 struct virtio_net_ctrl_mq *mq __free(kfree) = NULL; 3414 struct virtio_net_ctrl_rss old_rss; 3415 struct net_device *dev = vi->dev; 3416 struct scatterlist sg; 3417 3418 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) 3419 return 0; 3420 3421 /* Firstly check if we need update rss. Do updating if both (1) rss enabled and 3422 * (2) no user configuration. 3423 * 3424 * During rss command processing, device updates queue_pairs using rss.max_tx_vq. That is, 3425 * the device updates queue_pairs together with rss, so we can skip the sperate queue_pairs 3426 * update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly. 3427 */ 3428 if (vi->has_rss && !netif_is_rxfh_configured(dev)) { 3429 memcpy(&old_rss, &vi->rss, sizeof(old_rss)); 3430 if (rss_indirection_table_alloc(&vi->rss, vi->rss_indir_table_size)) { 3431 vi->rss.indirection_table = old_rss.indirection_table; 3432 return -ENOMEM; 3433 } 3434 3435 virtnet_rss_update_by_qpairs(vi, queue_pairs); 3436 3437 if (!virtnet_commit_rss_command(vi)) { 3438 /* restore ctrl_rss if commit_rss_command failed */ 3439 rss_indirection_table_free(&vi->rss); 3440 memcpy(&vi->rss, &old_rss, sizeof(old_rss)); 3441 3442 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d, because committing RSS failed\n", 3443 queue_pairs); 3444 return -EINVAL; 3445 } 3446 rss_indirection_table_free(&old_rss); 3447 goto succ; 3448 } 3449 3450 mq = kzalloc(sizeof(*mq), GFP_KERNEL); 3451 if (!mq) 3452 return -ENOMEM; 3453 3454 mq->virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); 3455 sg_init_one(&sg, mq, sizeof(*mq)); 3456 3457 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 3458 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { 3459 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", 3460 queue_pairs); 3461 return -EINVAL; 3462 } 3463 succ: 3464 vi->curr_queue_pairs = queue_pairs; 3465 /* virtnet_open() will refill when device is going to up. */ 3466 if (dev->flags & IFF_UP) 3467 schedule_delayed_work(&vi->refill, 0); 3468 3469 return 0; 3470 } 3471 3472 static int virtnet_close(struct net_device *dev) 3473 { 3474 struct virtnet_info *vi = netdev_priv(dev); 3475 int i; 3476 3477 /* Make sure NAPI doesn't schedule refill work */ 3478 disable_delayed_refill(vi); 3479 /* Make sure refill_work doesn't re-enable napi! */ 3480 cancel_delayed_work_sync(&vi->refill); 3481 /* Prevent the config change callback from changing carrier 3482 * after close 3483 */ 3484 virtio_config_driver_disable(vi->vdev); 3485 /* Stop getting status/speed updates: we don't care until next 3486 * open 3487 */ 3488 cancel_work_sync(&vi->config_work); 3489 3490 for (i = 0; i < vi->max_queue_pairs; i++) { 3491 virtnet_disable_queue_pair(vi, i); 3492 virtnet_cancel_dim(vi, &vi->rq[i].dim); 3493 } 3494 3495 netif_carrier_off(dev); 3496 3497 return 0; 3498 } 3499 3500 static void virtnet_rx_mode_work(struct work_struct *work) 3501 { 3502 struct virtnet_info *vi = 3503 container_of(work, struct virtnet_info, rx_mode_work); 3504 u8 *promisc_allmulti __free(kfree) = NULL; 3505 struct net_device *dev = vi->dev; 3506 struct scatterlist sg[2]; 3507 struct virtio_net_ctrl_mac *mac_data; 3508 struct netdev_hw_addr *ha; 3509 int uc_count; 3510 int mc_count; 3511 void *buf; 3512 int i; 3513 3514 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */ 3515 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) 3516 return; 3517 3518 promisc_allmulti = kzalloc(sizeof(*promisc_allmulti), GFP_KERNEL); 3519 if (!promisc_allmulti) { 3520 dev_warn(&dev->dev, "Failed to set RX mode, no memory.\n"); 3521 return; 3522 } 3523 3524 rtnl_lock(); 3525 3526 *promisc_allmulti = !!(dev->flags & IFF_PROMISC); 3527 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3528 3529 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3530 VIRTIO_NET_CTRL_RX_PROMISC, sg)) 3531 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n", 3532 *promisc_allmulti ? "en" : "dis"); 3533 3534 *promisc_allmulti = !!(dev->flags & IFF_ALLMULTI); 3535 sg_init_one(sg, promisc_allmulti, sizeof(*promisc_allmulti)); 3536 3537 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX, 3538 VIRTIO_NET_CTRL_RX_ALLMULTI, sg)) 3539 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", 3540 *promisc_allmulti ? "en" : "dis"); 3541 3542 netif_addr_lock_bh(dev); 3543 3544 uc_count = netdev_uc_count(dev); 3545 mc_count = netdev_mc_count(dev); 3546 /* MAC filter - use one buffer for both lists */ 3547 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + 3548 (2 * sizeof(mac_data->entries)), GFP_ATOMIC); 3549 mac_data = buf; 3550 if (!buf) { 3551 netif_addr_unlock_bh(dev); 3552 rtnl_unlock(); 3553 return; 3554 } 3555 3556 sg_init_table(sg, 2); 3557 3558 /* Store the unicast list and count in the front of the buffer */ 3559 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count); 3560 i = 0; 3561 netdev_for_each_uc_addr(ha, dev) 3562 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3563 3564 sg_set_buf(&sg[0], mac_data, 3565 sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); 3566 3567 /* multicast list and count fill the end */ 3568 mac_data = (void *)&mac_data->macs[uc_count][0]; 3569 3570 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count); 3571 i = 0; 3572 netdev_for_each_mc_addr(ha, dev) 3573 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); 3574 3575 netif_addr_unlock_bh(dev); 3576 3577 sg_set_buf(&sg[1], mac_data, 3578 sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); 3579 3580 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 3581 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) 3582 dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); 3583 3584 rtnl_unlock(); 3585 3586 kfree(buf); 3587 } 3588 3589 static void virtnet_set_rx_mode(struct net_device *dev) 3590 { 3591 struct virtnet_info *vi = netdev_priv(dev); 3592 3593 if (vi->rx_mode_work_enabled) 3594 schedule_work(&vi->rx_mode_work); 3595 } 3596 3597 static int virtnet_vlan_rx_add_vid(struct net_device *dev, 3598 __be16 proto, u16 vid) 3599 { 3600 struct virtnet_info *vi = netdev_priv(dev); 3601 __virtio16 *_vid __free(kfree) = NULL; 3602 struct scatterlist sg; 3603 3604 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3605 if (!_vid) 3606 return -ENOMEM; 3607 3608 *_vid = cpu_to_virtio16(vi->vdev, vid); 3609 sg_init_one(&sg, _vid, sizeof(*_vid)); 3610 3611 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3612 VIRTIO_NET_CTRL_VLAN_ADD, &sg)) 3613 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid); 3614 return 0; 3615 } 3616 3617 static int virtnet_vlan_rx_kill_vid(struct net_device *dev, 3618 __be16 proto, u16 vid) 3619 { 3620 struct virtnet_info *vi = netdev_priv(dev); 3621 __virtio16 *_vid __free(kfree) = NULL; 3622 struct scatterlist sg; 3623 3624 _vid = kzalloc(sizeof(*_vid), GFP_KERNEL); 3625 if (!_vid) 3626 return -ENOMEM; 3627 3628 *_vid = cpu_to_virtio16(vi->vdev, vid); 3629 sg_init_one(&sg, _vid, sizeof(*_vid)); 3630 3631 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, 3632 VIRTIO_NET_CTRL_VLAN_DEL, &sg)) 3633 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); 3634 return 0; 3635 } 3636 3637 static void virtnet_clean_affinity(struct virtnet_info *vi) 3638 { 3639 int i; 3640 3641 if (vi->affinity_hint_set) { 3642 for (i = 0; i < vi->max_queue_pairs; i++) { 3643 virtqueue_set_affinity(vi->rq[i].vq, NULL); 3644 virtqueue_set_affinity(vi->sq[i].vq, NULL); 3645 } 3646 3647 vi->affinity_hint_set = false; 3648 } 3649 } 3650 3651 static void virtnet_set_affinity(struct virtnet_info *vi) 3652 { 3653 cpumask_var_t mask; 3654 int stragglers; 3655 int group_size; 3656 int i, j, cpu; 3657 int num_cpu; 3658 int stride; 3659 3660 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 3661 virtnet_clean_affinity(vi); 3662 return; 3663 } 3664 3665 num_cpu = num_online_cpus(); 3666 stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); 3667 stragglers = num_cpu >= vi->curr_queue_pairs ? 3668 num_cpu % vi->curr_queue_pairs : 3669 0; 3670 cpu = cpumask_first(cpu_online_mask); 3671 3672 for (i = 0; i < vi->curr_queue_pairs; i++) { 3673 group_size = stride + (i < stragglers ? 1 : 0); 3674 3675 for (j = 0; j < group_size; j++) { 3676 cpumask_set_cpu(cpu, mask); 3677 cpu = cpumask_next_wrap(cpu, cpu_online_mask, 3678 nr_cpu_ids, false); 3679 } 3680 virtqueue_set_affinity(vi->rq[i].vq, mask); 3681 virtqueue_set_affinity(vi->sq[i].vq, mask); 3682 __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); 3683 cpumask_clear(mask); 3684 } 3685 3686 vi->affinity_hint_set = true; 3687 free_cpumask_var(mask); 3688 } 3689 3690 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) 3691 { 3692 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3693 node); 3694 virtnet_set_affinity(vi); 3695 return 0; 3696 } 3697 3698 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node) 3699 { 3700 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3701 node_dead); 3702 virtnet_set_affinity(vi); 3703 return 0; 3704 } 3705 3706 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 3707 { 3708 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, 3709 node); 3710 3711 virtnet_clean_affinity(vi); 3712 return 0; 3713 } 3714 3715 static enum cpuhp_state virtionet_online; 3716 3717 static int virtnet_cpu_notif_add(struct virtnet_info *vi) 3718 { 3719 int ret; 3720 3721 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node); 3722 if (ret) 3723 return ret; 3724 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD, 3725 &vi->node_dead); 3726 if (!ret) 3727 return ret; 3728 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 3729 return ret; 3730 } 3731 3732 static void virtnet_cpu_notif_remove(struct virtnet_info *vi) 3733 { 3734 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node); 3735 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD, 3736 &vi->node_dead); 3737 } 3738 3739 static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3740 u16 vqn, u32 max_usecs, u32 max_packets) 3741 { 3742 struct virtio_net_ctrl_coal_vq *coal_vq __free(kfree) = NULL; 3743 struct scatterlist sgs; 3744 3745 coal_vq = kzalloc(sizeof(*coal_vq), GFP_KERNEL); 3746 if (!coal_vq) 3747 return -ENOMEM; 3748 3749 coal_vq->vqn = cpu_to_le16(vqn); 3750 coal_vq->coal.max_usecs = cpu_to_le32(max_usecs); 3751 coal_vq->coal.max_packets = cpu_to_le32(max_packets); 3752 sg_init_one(&sgs, coal_vq, sizeof(*coal_vq)); 3753 3754 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 3755 VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET, 3756 &sgs)) 3757 return -EINVAL; 3758 3759 return 0; 3760 } 3761 3762 static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3763 u16 queue, u32 max_usecs, 3764 u32 max_packets) 3765 { 3766 int err; 3767 3768 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3769 return -EOPNOTSUPP; 3770 3771 err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), 3772 max_usecs, max_packets); 3773 if (err) 3774 return err; 3775 3776 vi->rq[queue].intr_coal.max_usecs = max_usecs; 3777 vi->rq[queue].intr_coal.max_packets = max_packets; 3778 3779 return 0; 3780 } 3781 3782 static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, 3783 u16 queue, u32 max_usecs, 3784 u32 max_packets) 3785 { 3786 int err; 3787 3788 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 3789 return -EOPNOTSUPP; 3790 3791 err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), 3792 max_usecs, max_packets); 3793 if (err) 3794 return err; 3795 3796 vi->sq[queue].intr_coal.max_usecs = max_usecs; 3797 vi->sq[queue].intr_coal.max_packets = max_packets; 3798 3799 return 0; 3800 } 3801 3802 static void virtnet_get_ringparam(struct net_device *dev, 3803 struct ethtool_ringparam *ring, 3804 struct kernel_ethtool_ringparam *kernel_ring, 3805 struct netlink_ext_ack *extack) 3806 { 3807 struct virtnet_info *vi = netdev_priv(dev); 3808 3809 ring->rx_max_pending = vi->rq[0].vq->num_max; 3810 ring->tx_max_pending = vi->sq[0].vq->num_max; 3811 ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 3812 ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 3813 } 3814 3815 static int virtnet_set_ringparam(struct net_device *dev, 3816 struct ethtool_ringparam *ring, 3817 struct kernel_ethtool_ringparam *kernel_ring, 3818 struct netlink_ext_ack *extack) 3819 { 3820 struct virtnet_info *vi = netdev_priv(dev); 3821 u32 rx_pending, tx_pending; 3822 struct receive_queue *rq; 3823 struct send_queue *sq; 3824 int i, err; 3825 3826 if (ring->rx_mini_pending || ring->rx_jumbo_pending) 3827 return -EINVAL; 3828 3829 rx_pending = virtqueue_get_vring_size(vi->rq[0].vq); 3830 tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); 3831 3832 if (ring->rx_pending == rx_pending && 3833 ring->tx_pending == tx_pending) 3834 return 0; 3835 3836 if (ring->rx_pending > vi->rq[0].vq->num_max) 3837 return -EINVAL; 3838 3839 if (ring->tx_pending > vi->sq[0].vq->num_max) 3840 return -EINVAL; 3841 3842 for (i = 0; i < vi->max_queue_pairs; i++) { 3843 rq = vi->rq + i; 3844 sq = vi->sq + i; 3845 3846 if (ring->tx_pending != tx_pending) { 3847 err = virtnet_tx_resize(vi, sq, ring->tx_pending); 3848 if (err) 3849 return err; 3850 3851 /* Upon disabling and re-enabling a transmit virtqueue, the device must 3852 * set the coalescing parameters of the virtqueue to those configured 3853 * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver 3854 * did not set any TX coalescing parameters, to 0. 3855 */ 3856 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, 3857 vi->intr_coal_tx.max_usecs, 3858 vi->intr_coal_tx.max_packets); 3859 3860 /* Don't break the tx resize action if the vq coalescing is not 3861 * supported. The same is true for rx resize below. 3862 */ 3863 if (err && err != -EOPNOTSUPP) 3864 return err; 3865 } 3866 3867 if (ring->rx_pending != rx_pending) { 3868 err = virtnet_rx_resize(vi, rq, ring->rx_pending); 3869 if (err) 3870 return err; 3871 3872 /* The reason is same as the transmit virtqueue reset */ 3873 mutex_lock(&vi->rq[i].dim_lock); 3874 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i, 3875 vi->intr_coal_rx.max_usecs, 3876 vi->intr_coal_rx.max_packets); 3877 mutex_unlock(&vi->rq[i].dim_lock); 3878 if (err && err != -EOPNOTSUPP) 3879 return err; 3880 } 3881 } 3882 3883 return 0; 3884 } 3885 3886 static bool virtnet_commit_rss_command(struct virtnet_info *vi) 3887 { 3888 struct net_device *dev = vi->dev; 3889 struct scatterlist sgs[4]; 3890 unsigned int sg_buf_size; 3891 3892 /* prepare sgs */ 3893 sg_init_table(sgs, 4); 3894 3895 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, hash_cfg_reserved); 3896 sg_set_buf(&sgs[0], &vi->rss, sg_buf_size); 3897 3898 if (vi->has_rss) { 3899 sg_buf_size = sizeof(uint16_t) * vi->rss_indir_table_size; 3900 sg_set_buf(&sgs[1], vi->rss.indirection_table, sg_buf_size); 3901 } else { 3902 sg_set_buf(&sgs[1], &vi->rss.hash_cfg_reserved, sizeof(uint16_t)); 3903 } 3904 3905 sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key) 3906 - offsetof(struct virtio_net_ctrl_rss, max_tx_vq); 3907 sg_set_buf(&sgs[2], &vi->rss.max_tx_vq, sg_buf_size); 3908 3909 sg_buf_size = vi->rss_key_size; 3910 sg_set_buf(&sgs[3], vi->rss.key, sg_buf_size); 3911 3912 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, 3913 vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG 3914 : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) 3915 goto err; 3916 3917 return true; 3918 3919 err: 3920 dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); 3921 return false; 3922 3923 } 3924 3925 static void virtnet_init_default_rss(struct virtnet_info *vi) 3926 { 3927 vi->rss.hash_types = vi->rss_hash_types_supported; 3928 vi->rss_hash_types_saved = vi->rss_hash_types_supported; 3929 vi->rss.indirection_table_mask = vi->rss_indir_table_size 3930 ? vi->rss_indir_table_size - 1 : 0; 3931 vi->rss.unclassified_queue = 0; 3932 3933 virtnet_rss_update_by_qpairs(vi, vi->curr_queue_pairs); 3934 3935 vi->rss.hash_key_length = vi->rss_key_size; 3936 3937 netdev_rss_key_fill(vi->rss.key, vi->rss_key_size); 3938 } 3939 3940 static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info) 3941 { 3942 info->data = 0; 3943 switch (info->flow_type) { 3944 case TCP_V4_FLOW: 3945 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 3946 info->data = RXH_IP_SRC | RXH_IP_DST | 3947 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3948 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 3949 info->data = RXH_IP_SRC | RXH_IP_DST; 3950 } 3951 break; 3952 case TCP_V6_FLOW: 3953 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 3954 info->data = RXH_IP_SRC | RXH_IP_DST | 3955 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3956 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 3957 info->data = RXH_IP_SRC | RXH_IP_DST; 3958 } 3959 break; 3960 case UDP_V4_FLOW: 3961 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 3962 info->data = RXH_IP_SRC | RXH_IP_DST | 3963 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3964 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 3965 info->data = RXH_IP_SRC | RXH_IP_DST; 3966 } 3967 break; 3968 case UDP_V6_FLOW: 3969 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 3970 info->data = RXH_IP_SRC | RXH_IP_DST | 3971 RXH_L4_B_0_1 | RXH_L4_B_2_3; 3972 } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 3973 info->data = RXH_IP_SRC | RXH_IP_DST; 3974 } 3975 break; 3976 case IPV4_FLOW: 3977 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) 3978 info->data = RXH_IP_SRC | RXH_IP_DST; 3979 3980 break; 3981 case IPV6_FLOW: 3982 if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) 3983 info->data = RXH_IP_SRC | RXH_IP_DST; 3984 3985 break; 3986 default: 3987 info->data = 0; 3988 break; 3989 } 3990 } 3991 3992 static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info) 3993 { 3994 u32 new_hashtypes = vi->rss_hash_types_saved; 3995 bool is_disable = info->data & RXH_DISCARD; 3996 bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); 3997 3998 /* supports only 'sd', 'sdfn' and 'r' */ 3999 if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) 4000 return false; 4001 4002 switch (info->flow_type) { 4003 case TCP_V4_FLOW: 4004 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); 4005 if (!is_disable) 4006 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 4007 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); 4008 break; 4009 case UDP_V4_FLOW: 4010 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); 4011 if (!is_disable) 4012 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 4013 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); 4014 break; 4015 case IPV4_FLOW: 4016 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; 4017 if (!is_disable) 4018 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; 4019 break; 4020 case TCP_V6_FLOW: 4021 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); 4022 if (!is_disable) 4023 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 4024 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); 4025 break; 4026 case UDP_V6_FLOW: 4027 new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); 4028 if (!is_disable) 4029 new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 4030 | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); 4031 break; 4032 case IPV6_FLOW: 4033 new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; 4034 if (!is_disable) 4035 new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; 4036 break; 4037 default: 4038 /* unsupported flow */ 4039 return false; 4040 } 4041 4042 /* if unsupported hashtype was set */ 4043 if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) 4044 return false; 4045 4046 if (new_hashtypes != vi->rss_hash_types_saved) { 4047 vi->rss_hash_types_saved = new_hashtypes; 4048 vi->rss.hash_types = vi->rss_hash_types_saved; 4049 if (vi->dev->features & NETIF_F_RXHASH) 4050 return virtnet_commit_rss_command(vi); 4051 } 4052 4053 return true; 4054 } 4055 4056 static void virtnet_get_drvinfo(struct net_device *dev, 4057 struct ethtool_drvinfo *info) 4058 { 4059 struct virtnet_info *vi = netdev_priv(dev); 4060 struct virtio_device *vdev = vi->vdev; 4061 4062 strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); 4063 strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version)); 4064 strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info)); 4065 4066 } 4067 4068 /* TODO: Eliminate OOO packets during switching */ 4069 static int virtnet_set_channels(struct net_device *dev, 4070 struct ethtool_channels *channels) 4071 { 4072 struct virtnet_info *vi = netdev_priv(dev); 4073 u16 queue_pairs = channels->combined_count; 4074 int err; 4075 4076 /* We don't support separate rx/tx channels. 4077 * We don't allow setting 'other' channels. 4078 */ 4079 if (channels->rx_count || channels->tx_count || channels->other_count) 4080 return -EINVAL; 4081 4082 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) 4083 return -EINVAL; 4084 4085 /* For now we don't support modifying channels while XDP is loaded 4086 * also when XDP is loaded all RX queues have XDP programs so we only 4087 * need to check a single RX queue. 4088 */ 4089 if (vi->rq[0].xdp_prog) 4090 return -EINVAL; 4091 4092 cpus_read_lock(); 4093 err = virtnet_set_queues(vi, queue_pairs); 4094 if (err) { 4095 cpus_read_unlock(); 4096 goto err; 4097 } 4098 virtnet_set_affinity(vi); 4099 cpus_read_unlock(); 4100 4101 netif_set_real_num_tx_queues(dev, queue_pairs); 4102 netif_set_real_num_rx_queues(dev, queue_pairs); 4103 err: 4104 return err; 4105 } 4106 4107 static void virtnet_stats_sprintf(u8 **p, const char *fmt, const char *noq_fmt, 4108 int num, int qid, const struct virtnet_stat_desc *desc) 4109 { 4110 int i; 4111 4112 if (qid < 0) { 4113 for (i = 0; i < num; ++i) 4114 ethtool_sprintf(p, noq_fmt, desc[i].desc); 4115 } else { 4116 for (i = 0; i < num; ++i) 4117 ethtool_sprintf(p, fmt, qid, desc[i].desc); 4118 } 4119 } 4120 4121 /* qid == -1: for rx/tx queue total field */ 4122 static void virtnet_get_stats_string(struct virtnet_info *vi, int type, int qid, u8 **data) 4123 { 4124 const struct virtnet_stat_desc *desc; 4125 const char *fmt, *noq_fmt; 4126 u8 *p = *data; 4127 u32 num; 4128 4129 if (type == VIRTNET_Q_TYPE_CQ && qid >= 0) { 4130 noq_fmt = "cq_hw_%s"; 4131 4132 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4133 desc = &virtnet_stats_cvq_desc[0]; 4134 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4135 4136 virtnet_stats_sprintf(&p, NULL, noq_fmt, num, -1, desc); 4137 } 4138 } 4139 4140 if (type == VIRTNET_Q_TYPE_RX) { 4141 fmt = "rx%u_%s"; 4142 noq_fmt = "rx_%s"; 4143 4144 desc = &virtnet_rq_stats_desc[0]; 4145 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4146 4147 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4148 4149 fmt = "rx%u_hw_%s"; 4150 noq_fmt = "rx_hw_%s"; 4151 4152 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4153 desc = &virtnet_stats_rx_basic_desc[0]; 4154 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4155 4156 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4157 } 4158 4159 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4160 desc = &virtnet_stats_rx_csum_desc[0]; 4161 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4162 4163 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4164 } 4165 4166 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4167 desc = &virtnet_stats_rx_speed_desc[0]; 4168 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4169 4170 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4171 } 4172 } 4173 4174 if (type == VIRTNET_Q_TYPE_TX) { 4175 fmt = "tx%u_%s"; 4176 noq_fmt = "tx_%s"; 4177 4178 desc = &virtnet_sq_stats_desc[0]; 4179 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4180 4181 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4182 4183 fmt = "tx%u_hw_%s"; 4184 noq_fmt = "tx_hw_%s"; 4185 4186 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4187 desc = &virtnet_stats_tx_basic_desc[0]; 4188 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4189 4190 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4191 } 4192 4193 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4194 desc = &virtnet_stats_tx_gso_desc[0]; 4195 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4196 4197 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4198 } 4199 4200 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4201 desc = &virtnet_stats_tx_speed_desc[0]; 4202 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4203 4204 virtnet_stats_sprintf(&p, fmt, noq_fmt, num, qid, desc); 4205 } 4206 } 4207 4208 *data = p; 4209 } 4210 4211 struct virtnet_stats_ctx { 4212 /* The stats are write to qstats or ethtool -S */ 4213 bool to_qstat; 4214 4215 /* Used to calculate the offset inside the output buffer. */ 4216 u32 desc_num[3]; 4217 4218 /* The actual supported stat types. */ 4219 u64 bitmap[3]; 4220 4221 /* Used to calculate the reply buffer size. */ 4222 u32 size[3]; 4223 4224 /* Record the output buffer. */ 4225 u64 *data; 4226 }; 4227 4228 static void virtnet_stats_ctx_init(struct virtnet_info *vi, 4229 struct virtnet_stats_ctx *ctx, 4230 u64 *data, bool to_qstat) 4231 { 4232 u32 queue_type; 4233 4234 ctx->data = data; 4235 ctx->to_qstat = to_qstat; 4236 4237 if (to_qstat) { 4238 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4239 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4240 4241 queue_type = VIRTNET_Q_TYPE_RX; 4242 4243 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4244 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4245 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4246 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4247 } 4248 4249 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4250 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4251 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4252 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4253 } 4254 4255 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4256 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_GSO; 4257 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4258 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_gso); 4259 } 4260 4261 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4262 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4263 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4264 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4265 } 4266 4267 queue_type = VIRTNET_Q_TYPE_TX; 4268 4269 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4270 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4271 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4272 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4273 } 4274 4275 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4276 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_CSUM; 4277 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4278 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_csum); 4279 } 4280 4281 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4282 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4283 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4284 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4285 } 4286 4287 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4288 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4289 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4290 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4291 } 4292 4293 return; 4294 } 4295 4296 ctx->desc_num[VIRTNET_Q_TYPE_RX] = ARRAY_SIZE(virtnet_rq_stats_desc); 4297 ctx->desc_num[VIRTNET_Q_TYPE_TX] = ARRAY_SIZE(virtnet_sq_stats_desc); 4298 4299 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_CVQ) { 4300 queue_type = VIRTNET_Q_TYPE_CQ; 4301 4302 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_CVQ; 4303 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_cvq_desc); 4304 ctx->size[queue_type] += sizeof(struct virtio_net_stats_cvq); 4305 } 4306 4307 queue_type = VIRTNET_Q_TYPE_RX; 4308 4309 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4310 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_BASIC; 4311 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4312 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_basic); 4313 } 4314 4315 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4316 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_CSUM; 4317 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4318 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_csum); 4319 } 4320 4321 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4322 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_RX_SPEED; 4323 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4324 ctx->size[queue_type] += sizeof(struct virtio_net_stats_rx_speed); 4325 } 4326 4327 queue_type = VIRTNET_Q_TYPE_TX; 4328 4329 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4330 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_BASIC; 4331 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4332 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_basic); 4333 } 4334 4335 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4336 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_GSO; 4337 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4338 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_gso); 4339 } 4340 4341 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4342 ctx->bitmap[queue_type] |= VIRTIO_NET_STATS_TYPE_TX_SPEED; 4343 ctx->desc_num[queue_type] += ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4344 ctx->size[queue_type] += sizeof(struct virtio_net_stats_tx_speed); 4345 } 4346 } 4347 4348 /* stats_sum_queue - Calculate the sum of the same fields in sq or rq. 4349 * @sum: the position to store the sum values 4350 * @num: field num 4351 * @q_value: the first queue fields 4352 * @q_num: number of the queues 4353 */ 4354 static void stats_sum_queue(u64 *sum, u32 num, u64 *q_value, u32 q_num) 4355 { 4356 u32 step = num; 4357 int i, j; 4358 u64 *p; 4359 4360 for (i = 0; i < num; ++i) { 4361 p = sum + i; 4362 *p = 0; 4363 4364 for (j = 0; j < q_num; ++j) 4365 *p += *(q_value + i + j * step); 4366 } 4367 } 4368 4369 static void virtnet_fill_total_fields(struct virtnet_info *vi, 4370 struct virtnet_stats_ctx *ctx) 4371 { 4372 u64 *data, *first_rx_q, *first_tx_q; 4373 u32 num_cq, num_rx, num_tx; 4374 4375 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4376 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4377 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4378 4379 first_rx_q = ctx->data + num_rx + num_tx + num_cq; 4380 first_tx_q = first_rx_q + vi->curr_queue_pairs * num_rx; 4381 4382 data = ctx->data; 4383 4384 stats_sum_queue(data, num_rx, first_rx_q, vi->curr_queue_pairs); 4385 4386 data = ctx->data + num_rx; 4387 4388 stats_sum_queue(data, num_tx, first_tx_q, vi->curr_queue_pairs); 4389 } 4390 4391 static void virtnet_fill_stats_qstat(struct virtnet_info *vi, u32 qid, 4392 struct virtnet_stats_ctx *ctx, 4393 const u8 *base, bool drv_stats, u8 reply_type) 4394 { 4395 const struct virtnet_stat_desc *desc; 4396 const u64_stats_t *v_stat; 4397 u64 offset, bitmap; 4398 const __le64 *v; 4399 u32 queue_type; 4400 int i, num; 4401 4402 queue_type = vq_type(vi, qid); 4403 bitmap = ctx->bitmap[queue_type]; 4404 4405 if (drv_stats) { 4406 if (queue_type == VIRTNET_Q_TYPE_RX) { 4407 desc = &virtnet_rq_stats_desc_qstat[0]; 4408 num = ARRAY_SIZE(virtnet_rq_stats_desc_qstat); 4409 } else { 4410 desc = &virtnet_sq_stats_desc_qstat[0]; 4411 num = ARRAY_SIZE(virtnet_sq_stats_desc_qstat); 4412 } 4413 4414 for (i = 0; i < num; ++i) { 4415 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4416 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4417 ctx->data[offset] = u64_stats_read(v_stat); 4418 } 4419 return; 4420 } 4421 4422 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4423 desc = &virtnet_stats_rx_basic_desc_qstat[0]; 4424 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc_qstat); 4425 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4426 goto found; 4427 } 4428 4429 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4430 desc = &virtnet_stats_rx_csum_desc_qstat[0]; 4431 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc_qstat); 4432 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4433 goto found; 4434 } 4435 4436 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 4437 desc = &virtnet_stats_rx_gso_desc_qstat[0]; 4438 num = ARRAY_SIZE(virtnet_stats_rx_gso_desc_qstat); 4439 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO) 4440 goto found; 4441 } 4442 4443 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4444 desc = &virtnet_stats_rx_speed_desc_qstat[0]; 4445 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc_qstat); 4446 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4447 goto found; 4448 } 4449 4450 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4451 desc = &virtnet_stats_tx_basic_desc_qstat[0]; 4452 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc_qstat); 4453 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4454 goto found; 4455 } 4456 4457 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 4458 desc = &virtnet_stats_tx_csum_desc_qstat[0]; 4459 num = ARRAY_SIZE(virtnet_stats_tx_csum_desc_qstat); 4460 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM) 4461 goto found; 4462 } 4463 4464 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4465 desc = &virtnet_stats_tx_gso_desc_qstat[0]; 4466 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc_qstat); 4467 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4468 goto found; 4469 } 4470 4471 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4472 desc = &virtnet_stats_tx_speed_desc_qstat[0]; 4473 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc_qstat); 4474 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4475 goto found; 4476 } 4477 4478 return; 4479 4480 found: 4481 for (i = 0; i < num; ++i) { 4482 offset = desc[i].qstat_offset / sizeof(*ctx->data); 4483 v = (const __le64 *)(base + desc[i].offset); 4484 ctx->data[offset] = le64_to_cpu(*v); 4485 } 4486 } 4487 4488 /* virtnet_fill_stats - copy the stats to qstats or ethtool -S 4489 * The stats source is the device or the driver. 4490 * 4491 * @vi: virtio net info 4492 * @qid: the vq id 4493 * @ctx: stats ctx (initiated by virtnet_stats_ctx_init()) 4494 * @base: pointer to the device reply or the driver stats structure. 4495 * @drv_stats: designate the base type (device reply, driver stats) 4496 * @type: the type of the device reply (if drv_stats is true, this must be zero) 4497 */ 4498 static void virtnet_fill_stats(struct virtnet_info *vi, u32 qid, 4499 struct virtnet_stats_ctx *ctx, 4500 const u8 *base, bool drv_stats, u8 reply_type) 4501 { 4502 u32 queue_type, num_rx, num_tx, num_cq; 4503 const struct virtnet_stat_desc *desc; 4504 const u64_stats_t *v_stat; 4505 u64 offset, bitmap; 4506 const __le64 *v; 4507 int i, num; 4508 4509 if (ctx->to_qstat) 4510 return virtnet_fill_stats_qstat(vi, qid, ctx, base, drv_stats, reply_type); 4511 4512 num_cq = ctx->desc_num[VIRTNET_Q_TYPE_CQ]; 4513 num_rx = ctx->desc_num[VIRTNET_Q_TYPE_RX]; 4514 num_tx = ctx->desc_num[VIRTNET_Q_TYPE_TX]; 4515 4516 queue_type = vq_type(vi, qid); 4517 bitmap = ctx->bitmap[queue_type]; 4518 4519 /* skip the total fields of pairs */ 4520 offset = num_rx + num_tx; 4521 4522 if (queue_type == VIRTNET_Q_TYPE_TX) { 4523 offset += num_cq + num_rx * vi->curr_queue_pairs + num_tx * (qid / 2); 4524 4525 num = ARRAY_SIZE(virtnet_sq_stats_desc); 4526 if (drv_stats) { 4527 desc = &virtnet_sq_stats_desc[0]; 4528 goto drv_stats; 4529 } 4530 4531 offset += num; 4532 4533 } else if (queue_type == VIRTNET_Q_TYPE_RX) { 4534 offset += num_cq + num_rx * (qid / 2); 4535 4536 num = ARRAY_SIZE(virtnet_rq_stats_desc); 4537 if (drv_stats) { 4538 desc = &virtnet_rq_stats_desc[0]; 4539 goto drv_stats; 4540 } 4541 4542 offset += num; 4543 } 4544 4545 if (bitmap & VIRTIO_NET_STATS_TYPE_CVQ) { 4546 desc = &virtnet_stats_cvq_desc[0]; 4547 num = ARRAY_SIZE(virtnet_stats_cvq_desc); 4548 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_CVQ) 4549 goto found; 4550 4551 offset += num; 4552 } 4553 4554 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 4555 desc = &virtnet_stats_rx_basic_desc[0]; 4556 num = ARRAY_SIZE(virtnet_stats_rx_basic_desc); 4557 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC) 4558 goto found; 4559 4560 offset += num; 4561 } 4562 4563 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 4564 desc = &virtnet_stats_rx_csum_desc[0]; 4565 num = ARRAY_SIZE(virtnet_stats_rx_csum_desc); 4566 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM) 4567 goto found; 4568 4569 offset += num; 4570 } 4571 4572 if (bitmap & VIRTIO_NET_STATS_TYPE_RX_SPEED) { 4573 desc = &virtnet_stats_rx_speed_desc[0]; 4574 num = ARRAY_SIZE(virtnet_stats_rx_speed_desc); 4575 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED) 4576 goto found; 4577 4578 offset += num; 4579 } 4580 4581 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 4582 desc = &virtnet_stats_tx_basic_desc[0]; 4583 num = ARRAY_SIZE(virtnet_stats_tx_basic_desc); 4584 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC) 4585 goto found; 4586 4587 offset += num; 4588 } 4589 4590 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 4591 desc = &virtnet_stats_tx_gso_desc[0]; 4592 num = ARRAY_SIZE(virtnet_stats_tx_gso_desc); 4593 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO) 4594 goto found; 4595 4596 offset += num; 4597 } 4598 4599 if (bitmap & VIRTIO_NET_STATS_TYPE_TX_SPEED) { 4600 desc = &virtnet_stats_tx_speed_desc[0]; 4601 num = ARRAY_SIZE(virtnet_stats_tx_speed_desc); 4602 if (reply_type == VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED) 4603 goto found; 4604 4605 offset += num; 4606 } 4607 4608 return; 4609 4610 found: 4611 for (i = 0; i < num; ++i) { 4612 v = (const __le64 *)(base + desc[i].offset); 4613 ctx->data[offset + i] = le64_to_cpu(*v); 4614 } 4615 4616 return; 4617 4618 drv_stats: 4619 for (i = 0; i < num; ++i) { 4620 v_stat = (const u64_stats_t *)(base + desc[i].offset); 4621 ctx->data[offset + i] = u64_stats_read(v_stat); 4622 } 4623 } 4624 4625 static int __virtnet_get_hw_stats(struct virtnet_info *vi, 4626 struct virtnet_stats_ctx *ctx, 4627 struct virtio_net_ctrl_queue_stats *req, 4628 int req_size, void *reply, int res_size) 4629 { 4630 struct virtio_net_stats_reply_hdr *hdr; 4631 struct scatterlist sgs_in, sgs_out; 4632 void *p; 4633 u32 qid; 4634 int ok; 4635 4636 sg_init_one(&sgs_out, req, req_size); 4637 sg_init_one(&sgs_in, reply, res_size); 4638 4639 ok = virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 4640 VIRTIO_NET_CTRL_STATS_GET, 4641 &sgs_out, &sgs_in); 4642 4643 if (!ok) 4644 return ok; 4645 4646 for (p = reply; p - reply < res_size; p += le16_to_cpu(hdr->size)) { 4647 hdr = p; 4648 qid = le16_to_cpu(hdr->vq_index); 4649 virtnet_fill_stats(vi, qid, ctx, p, false, hdr->type); 4650 } 4651 4652 return 0; 4653 } 4654 4655 static void virtnet_make_stat_req(struct virtnet_info *vi, 4656 struct virtnet_stats_ctx *ctx, 4657 struct virtio_net_ctrl_queue_stats *req, 4658 int qid, int *idx) 4659 { 4660 int qtype = vq_type(vi, qid); 4661 u64 bitmap = ctx->bitmap[qtype]; 4662 4663 if (!bitmap) 4664 return; 4665 4666 req->stats[*idx].vq_index = cpu_to_le16(qid); 4667 req->stats[*idx].types_bitmap[0] = cpu_to_le64(bitmap); 4668 *idx += 1; 4669 } 4670 4671 /* qid: -1: get stats of all vq. 4672 * > 0: get the stats for the special vq. This must not be cvq. 4673 */ 4674 static int virtnet_get_hw_stats(struct virtnet_info *vi, 4675 struct virtnet_stats_ctx *ctx, int qid) 4676 { 4677 int qnum, i, j, res_size, qtype, last_vq, first_vq; 4678 struct virtio_net_ctrl_queue_stats *req; 4679 bool enable_cvq; 4680 void *reply; 4681 int ok; 4682 4683 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) 4684 return 0; 4685 4686 if (qid == -1) { 4687 last_vq = vi->curr_queue_pairs * 2 - 1; 4688 first_vq = 0; 4689 enable_cvq = true; 4690 } else { 4691 last_vq = qid; 4692 first_vq = qid; 4693 enable_cvq = false; 4694 } 4695 4696 qnum = 0; 4697 res_size = 0; 4698 for (i = first_vq; i <= last_vq ; ++i) { 4699 qtype = vq_type(vi, i); 4700 if (ctx->bitmap[qtype]) { 4701 ++qnum; 4702 res_size += ctx->size[qtype]; 4703 } 4704 } 4705 4706 if (enable_cvq && ctx->bitmap[VIRTNET_Q_TYPE_CQ]) { 4707 res_size += ctx->size[VIRTNET_Q_TYPE_CQ]; 4708 qnum += 1; 4709 } 4710 4711 req = kcalloc(qnum, sizeof(*req), GFP_KERNEL); 4712 if (!req) 4713 return -ENOMEM; 4714 4715 reply = kmalloc(res_size, GFP_KERNEL); 4716 if (!reply) { 4717 kfree(req); 4718 return -ENOMEM; 4719 } 4720 4721 j = 0; 4722 for (i = first_vq; i <= last_vq ; ++i) 4723 virtnet_make_stat_req(vi, ctx, req, i, &j); 4724 4725 if (enable_cvq) 4726 virtnet_make_stat_req(vi, ctx, req, vi->max_queue_pairs * 2, &j); 4727 4728 ok = __virtnet_get_hw_stats(vi, ctx, req, sizeof(*req) * j, reply, res_size); 4729 4730 kfree(req); 4731 kfree(reply); 4732 4733 return ok; 4734 } 4735 4736 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) 4737 { 4738 struct virtnet_info *vi = netdev_priv(dev); 4739 unsigned int i; 4740 u8 *p = data; 4741 4742 switch (stringset) { 4743 case ETH_SS_STATS: 4744 /* Generate the total field names. */ 4745 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, -1, &p); 4746 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, -1, &p); 4747 4748 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_CQ, 0, &p); 4749 4750 for (i = 0; i < vi->curr_queue_pairs; ++i) 4751 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_RX, i, &p); 4752 4753 for (i = 0; i < vi->curr_queue_pairs; ++i) 4754 virtnet_get_stats_string(vi, VIRTNET_Q_TYPE_TX, i, &p); 4755 break; 4756 } 4757 } 4758 4759 static int virtnet_get_sset_count(struct net_device *dev, int sset) 4760 { 4761 struct virtnet_info *vi = netdev_priv(dev); 4762 struct virtnet_stats_ctx ctx = {0}; 4763 u32 pair_count; 4764 4765 switch (sset) { 4766 case ETH_SS_STATS: 4767 virtnet_stats_ctx_init(vi, &ctx, NULL, false); 4768 4769 pair_count = ctx.desc_num[VIRTNET_Q_TYPE_RX] + ctx.desc_num[VIRTNET_Q_TYPE_TX]; 4770 4771 return pair_count + ctx.desc_num[VIRTNET_Q_TYPE_CQ] + 4772 vi->curr_queue_pairs * pair_count; 4773 default: 4774 return -EOPNOTSUPP; 4775 } 4776 } 4777 4778 static void virtnet_get_ethtool_stats(struct net_device *dev, 4779 struct ethtool_stats *stats, u64 *data) 4780 { 4781 struct virtnet_info *vi = netdev_priv(dev); 4782 struct virtnet_stats_ctx ctx = {0}; 4783 unsigned int start, i; 4784 const u8 *stats_base; 4785 4786 virtnet_stats_ctx_init(vi, &ctx, data, false); 4787 if (virtnet_get_hw_stats(vi, &ctx, -1)) 4788 dev_warn(&vi->dev->dev, "Failed to get hw stats.\n"); 4789 4790 for (i = 0; i < vi->curr_queue_pairs; i++) { 4791 struct receive_queue *rq = &vi->rq[i]; 4792 struct send_queue *sq = &vi->sq[i]; 4793 4794 stats_base = (const u8 *)&rq->stats; 4795 do { 4796 start = u64_stats_fetch_begin(&rq->stats.syncp); 4797 virtnet_fill_stats(vi, i * 2, &ctx, stats_base, true, 0); 4798 } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); 4799 4800 stats_base = (const u8 *)&sq->stats; 4801 do { 4802 start = u64_stats_fetch_begin(&sq->stats.syncp); 4803 virtnet_fill_stats(vi, i * 2 + 1, &ctx, stats_base, true, 0); 4804 } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); 4805 } 4806 4807 virtnet_fill_total_fields(vi, &ctx); 4808 } 4809 4810 static void virtnet_get_channels(struct net_device *dev, 4811 struct ethtool_channels *channels) 4812 { 4813 struct virtnet_info *vi = netdev_priv(dev); 4814 4815 channels->combined_count = vi->curr_queue_pairs; 4816 channels->max_combined = vi->max_queue_pairs; 4817 channels->max_other = 0; 4818 channels->rx_count = 0; 4819 channels->tx_count = 0; 4820 channels->other_count = 0; 4821 } 4822 4823 static int virtnet_set_link_ksettings(struct net_device *dev, 4824 const struct ethtool_link_ksettings *cmd) 4825 { 4826 struct virtnet_info *vi = netdev_priv(dev); 4827 4828 return ethtool_virtdev_set_link_ksettings(dev, cmd, 4829 &vi->speed, &vi->duplex); 4830 } 4831 4832 static int virtnet_get_link_ksettings(struct net_device *dev, 4833 struct ethtool_link_ksettings *cmd) 4834 { 4835 struct virtnet_info *vi = netdev_priv(dev); 4836 4837 cmd->base.speed = vi->speed; 4838 cmd->base.duplex = vi->duplex; 4839 cmd->base.port = PORT_OTHER; 4840 4841 return 0; 4842 } 4843 4844 static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi, 4845 struct ethtool_coalesce *ec) 4846 { 4847 struct virtio_net_ctrl_coal_tx *coal_tx __free(kfree) = NULL; 4848 struct scatterlist sgs_tx; 4849 int i; 4850 4851 coal_tx = kzalloc(sizeof(*coal_tx), GFP_KERNEL); 4852 if (!coal_tx) 4853 return -ENOMEM; 4854 4855 coal_tx->tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); 4856 coal_tx->tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); 4857 sg_init_one(&sgs_tx, coal_tx, sizeof(*coal_tx)); 4858 4859 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 4860 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, 4861 &sgs_tx)) 4862 return -EINVAL; 4863 4864 vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; 4865 vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; 4866 for (i = 0; i < vi->max_queue_pairs; i++) { 4867 vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; 4868 vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; 4869 } 4870 4871 return 0; 4872 } 4873 4874 static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi, 4875 struct ethtool_coalesce *ec) 4876 { 4877 struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL; 4878 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 4879 struct scatterlist sgs_rx; 4880 int i; 4881 4882 if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 4883 return -EOPNOTSUPP; 4884 4885 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs || 4886 ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets)) 4887 return -EINVAL; 4888 4889 if (rx_ctrl_dim_on && !vi->rx_dim_enabled) { 4890 vi->rx_dim_enabled = true; 4891 for (i = 0; i < vi->max_queue_pairs; i++) { 4892 mutex_lock(&vi->rq[i].dim_lock); 4893 vi->rq[i].dim_enabled = true; 4894 mutex_unlock(&vi->rq[i].dim_lock); 4895 } 4896 return 0; 4897 } 4898 4899 coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL); 4900 if (!coal_rx) 4901 return -ENOMEM; 4902 4903 if (!rx_ctrl_dim_on && vi->rx_dim_enabled) { 4904 vi->rx_dim_enabled = false; 4905 for (i = 0; i < vi->max_queue_pairs; i++) { 4906 mutex_lock(&vi->rq[i].dim_lock); 4907 vi->rq[i].dim_enabled = false; 4908 mutex_unlock(&vi->rq[i].dim_lock); 4909 } 4910 } 4911 4912 /* Since the per-queue coalescing params can be set, 4913 * we need apply the global new params even if they 4914 * are not updated. 4915 */ 4916 coal_rx->rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); 4917 coal_rx->rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); 4918 sg_init_one(&sgs_rx, coal_rx, sizeof(*coal_rx)); 4919 4920 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, 4921 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, 4922 &sgs_rx)) 4923 return -EINVAL; 4924 4925 vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; 4926 vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; 4927 for (i = 0; i < vi->max_queue_pairs; i++) { 4928 mutex_lock(&vi->rq[i].dim_lock); 4929 vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; 4930 vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; 4931 mutex_unlock(&vi->rq[i].dim_lock); 4932 } 4933 4934 return 0; 4935 } 4936 4937 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, 4938 struct ethtool_coalesce *ec) 4939 { 4940 int err; 4941 4942 err = virtnet_send_tx_notf_coal_cmds(vi, ec); 4943 if (err) 4944 return err; 4945 4946 err = virtnet_send_rx_notf_coal_cmds(vi, ec); 4947 if (err) 4948 return err; 4949 4950 return 0; 4951 } 4952 4953 static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi, 4954 struct ethtool_coalesce *ec, 4955 u16 queue) 4956 { 4957 bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; 4958 u32 max_usecs, max_packets; 4959 bool cur_rx_dim; 4960 int err; 4961 4962 mutex_lock(&vi->rq[queue].dim_lock); 4963 cur_rx_dim = vi->rq[queue].dim_enabled; 4964 max_usecs = vi->rq[queue].intr_coal.max_usecs; 4965 max_packets = vi->rq[queue].intr_coal.max_packets; 4966 4967 if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs || 4968 ec->rx_max_coalesced_frames != max_packets)) { 4969 mutex_unlock(&vi->rq[queue].dim_lock); 4970 return -EINVAL; 4971 } 4972 4973 if (rx_ctrl_dim_on && !cur_rx_dim) { 4974 vi->rq[queue].dim_enabled = true; 4975 mutex_unlock(&vi->rq[queue].dim_lock); 4976 return 0; 4977 } 4978 4979 if (!rx_ctrl_dim_on && cur_rx_dim) 4980 vi->rq[queue].dim_enabled = false; 4981 4982 /* If no params are updated, userspace ethtool will 4983 * reject the modification. 4984 */ 4985 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue, 4986 ec->rx_coalesce_usecs, 4987 ec->rx_max_coalesced_frames); 4988 mutex_unlock(&vi->rq[queue].dim_lock); 4989 return err; 4990 } 4991 4992 static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, 4993 struct ethtool_coalesce *ec, 4994 u16 queue) 4995 { 4996 int err; 4997 4998 err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue); 4999 if (err) 5000 return err; 5001 5002 err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue, 5003 ec->tx_coalesce_usecs, 5004 ec->tx_max_coalesced_frames); 5005 if (err) 5006 return err; 5007 5008 return 0; 5009 } 5010 5011 static void virtnet_rx_dim_work(struct work_struct *work) 5012 { 5013 struct dim *dim = container_of(work, struct dim, work); 5014 struct receive_queue *rq = container_of(dim, 5015 struct receive_queue, dim); 5016 struct virtnet_info *vi = rq->vq->vdev->priv; 5017 struct net_device *dev = vi->dev; 5018 struct dim_cq_moder update_moder; 5019 int qnum, err; 5020 5021 qnum = rq - vi->rq; 5022 5023 mutex_lock(&rq->dim_lock); 5024 if (!rq->dim_enabled) 5025 goto out; 5026 5027 update_moder = net_dim_get_rx_irq_moder(dev, dim); 5028 if (update_moder.usec != rq->intr_coal.max_usecs || 5029 update_moder.pkts != rq->intr_coal.max_packets) { 5030 err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum, 5031 update_moder.usec, 5032 update_moder.pkts); 5033 if (err) 5034 pr_debug("%s: Failed to send dim parameters on rxq%d\n", 5035 dev->name, qnum); 5036 } 5037 out: 5038 dim->state = DIM_START_MEASURE; 5039 mutex_unlock(&rq->dim_lock); 5040 } 5041 5042 static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) 5043 { 5044 /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL 5045 * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated. 5046 */ 5047 if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) 5048 return -EOPNOTSUPP; 5049 5050 if (ec->tx_max_coalesced_frames > 1 || 5051 ec->rx_max_coalesced_frames != 1) 5052 return -EINVAL; 5053 5054 return 0; 5055 } 5056 5057 static int virtnet_should_update_vq_weight(int dev_flags, int weight, 5058 int vq_weight, bool *should_update) 5059 { 5060 if (weight ^ vq_weight) { 5061 if (dev_flags & IFF_UP) 5062 return -EBUSY; 5063 *should_update = true; 5064 } 5065 5066 return 0; 5067 } 5068 5069 static int virtnet_set_coalesce(struct net_device *dev, 5070 struct ethtool_coalesce *ec, 5071 struct kernel_ethtool_coalesce *kernel_coal, 5072 struct netlink_ext_ack *extack) 5073 { 5074 struct virtnet_info *vi = netdev_priv(dev); 5075 int ret, queue_number, napi_weight; 5076 bool update_napi = false; 5077 5078 /* Can't change NAPI weight if the link is up */ 5079 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5080 for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) { 5081 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5082 vi->sq[queue_number].napi.weight, 5083 &update_napi); 5084 if (ret) 5085 return ret; 5086 5087 if (update_napi) { 5088 /* All queues that belong to [queue_number, vi->max_queue_pairs] will be 5089 * updated for the sake of simplicity, which might not be necessary 5090 */ 5091 break; 5092 } 5093 } 5094 5095 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) 5096 ret = virtnet_send_notf_coal_cmds(vi, ec); 5097 else 5098 ret = virtnet_coal_params_supported(ec); 5099 5100 if (ret) 5101 return ret; 5102 5103 if (update_napi) { 5104 for (; queue_number < vi->max_queue_pairs; queue_number++) 5105 vi->sq[queue_number].napi.weight = napi_weight; 5106 } 5107 5108 return ret; 5109 } 5110 5111 static int virtnet_get_coalesce(struct net_device *dev, 5112 struct ethtool_coalesce *ec, 5113 struct kernel_ethtool_coalesce *kernel_coal, 5114 struct netlink_ext_ack *extack) 5115 { 5116 struct virtnet_info *vi = netdev_priv(dev); 5117 5118 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 5119 ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs; 5120 ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs; 5121 ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets; 5122 ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets; 5123 ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled; 5124 } else { 5125 ec->rx_max_coalesced_frames = 1; 5126 5127 if (vi->sq[0].napi.weight) 5128 ec->tx_max_coalesced_frames = 1; 5129 } 5130 5131 return 0; 5132 } 5133 5134 static int virtnet_set_per_queue_coalesce(struct net_device *dev, 5135 u32 queue, 5136 struct ethtool_coalesce *ec) 5137 { 5138 struct virtnet_info *vi = netdev_priv(dev); 5139 int ret, napi_weight; 5140 bool update_napi = false; 5141 5142 if (queue >= vi->max_queue_pairs) 5143 return -EINVAL; 5144 5145 /* Can't change NAPI weight if the link is up */ 5146 napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; 5147 ret = virtnet_should_update_vq_weight(dev->flags, napi_weight, 5148 vi->sq[queue].napi.weight, 5149 &update_napi); 5150 if (ret) 5151 return ret; 5152 5153 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5154 ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue); 5155 else 5156 ret = virtnet_coal_params_supported(ec); 5157 5158 if (ret) 5159 return ret; 5160 5161 if (update_napi) 5162 vi->sq[queue].napi.weight = napi_weight; 5163 5164 return 0; 5165 } 5166 5167 static int virtnet_get_per_queue_coalesce(struct net_device *dev, 5168 u32 queue, 5169 struct ethtool_coalesce *ec) 5170 { 5171 struct virtnet_info *vi = netdev_priv(dev); 5172 5173 if (queue >= vi->max_queue_pairs) 5174 return -EINVAL; 5175 5176 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 5177 mutex_lock(&vi->rq[queue].dim_lock); 5178 ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs; 5179 ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs; 5180 ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets; 5181 ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets; 5182 ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled; 5183 mutex_unlock(&vi->rq[queue].dim_lock); 5184 } else { 5185 ec->rx_max_coalesced_frames = 1; 5186 5187 if (vi->sq[queue].napi.weight) 5188 ec->tx_max_coalesced_frames = 1; 5189 } 5190 5191 return 0; 5192 } 5193 5194 static void virtnet_init_settings(struct net_device *dev) 5195 { 5196 struct virtnet_info *vi = netdev_priv(dev); 5197 5198 vi->speed = SPEED_UNKNOWN; 5199 vi->duplex = DUPLEX_UNKNOWN; 5200 } 5201 5202 static u32 virtnet_get_rxfh_key_size(struct net_device *dev) 5203 { 5204 return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; 5205 } 5206 5207 static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) 5208 { 5209 return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; 5210 } 5211 5212 static int virtnet_get_rxfh(struct net_device *dev, 5213 struct ethtool_rxfh_param *rxfh) 5214 { 5215 struct virtnet_info *vi = netdev_priv(dev); 5216 int i; 5217 5218 if (rxfh->indir) { 5219 for (i = 0; i < vi->rss_indir_table_size; ++i) 5220 rxfh->indir[i] = vi->rss.indirection_table[i]; 5221 } 5222 5223 if (rxfh->key) 5224 memcpy(rxfh->key, vi->rss.key, vi->rss_key_size); 5225 5226 rxfh->hfunc = ETH_RSS_HASH_TOP; 5227 5228 return 0; 5229 } 5230 5231 static int virtnet_set_rxfh(struct net_device *dev, 5232 struct ethtool_rxfh_param *rxfh, 5233 struct netlink_ext_ack *extack) 5234 { 5235 struct virtnet_info *vi = netdev_priv(dev); 5236 bool update = false; 5237 int i; 5238 5239 if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && 5240 rxfh->hfunc != ETH_RSS_HASH_TOP) 5241 return -EOPNOTSUPP; 5242 5243 if (rxfh->indir) { 5244 if (!vi->has_rss) 5245 return -EOPNOTSUPP; 5246 5247 for (i = 0; i < vi->rss_indir_table_size; ++i) 5248 vi->rss.indirection_table[i] = rxfh->indir[i]; 5249 update = true; 5250 } 5251 5252 if (rxfh->key) { 5253 /* If either _F_HASH_REPORT or _F_RSS are negotiated, the 5254 * device provides hash calculation capabilities, that is, 5255 * hash_key is configured. 5256 */ 5257 if (!vi->has_rss && !vi->has_rss_hash_report) 5258 return -EOPNOTSUPP; 5259 5260 memcpy(vi->rss.key, rxfh->key, vi->rss_key_size); 5261 update = true; 5262 } 5263 5264 if (update) 5265 virtnet_commit_rss_command(vi); 5266 5267 return 0; 5268 } 5269 5270 static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) 5271 { 5272 struct virtnet_info *vi = netdev_priv(dev); 5273 int rc = 0; 5274 5275 switch (info->cmd) { 5276 case ETHTOOL_GRXRINGS: 5277 info->data = vi->curr_queue_pairs; 5278 break; 5279 case ETHTOOL_GRXFH: 5280 virtnet_get_hashflow(vi, info); 5281 break; 5282 default: 5283 rc = -EOPNOTSUPP; 5284 } 5285 5286 return rc; 5287 } 5288 5289 static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) 5290 { 5291 struct virtnet_info *vi = netdev_priv(dev); 5292 int rc = 0; 5293 5294 switch (info->cmd) { 5295 case ETHTOOL_SRXFH: 5296 if (!virtnet_set_hashflow(vi, info)) 5297 rc = -EINVAL; 5298 5299 break; 5300 default: 5301 rc = -EOPNOTSUPP; 5302 } 5303 5304 return rc; 5305 } 5306 5307 static const struct ethtool_ops virtnet_ethtool_ops = { 5308 .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | 5309 ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, 5310 .get_drvinfo = virtnet_get_drvinfo, 5311 .get_link = ethtool_op_get_link, 5312 .get_ringparam = virtnet_get_ringparam, 5313 .set_ringparam = virtnet_set_ringparam, 5314 .get_strings = virtnet_get_strings, 5315 .get_sset_count = virtnet_get_sset_count, 5316 .get_ethtool_stats = virtnet_get_ethtool_stats, 5317 .set_channels = virtnet_set_channels, 5318 .get_channels = virtnet_get_channels, 5319 .get_ts_info = ethtool_op_get_ts_info, 5320 .get_link_ksettings = virtnet_get_link_ksettings, 5321 .set_link_ksettings = virtnet_set_link_ksettings, 5322 .set_coalesce = virtnet_set_coalesce, 5323 .get_coalesce = virtnet_get_coalesce, 5324 .set_per_queue_coalesce = virtnet_set_per_queue_coalesce, 5325 .get_per_queue_coalesce = virtnet_get_per_queue_coalesce, 5326 .get_rxfh_key_size = virtnet_get_rxfh_key_size, 5327 .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, 5328 .get_rxfh = virtnet_get_rxfh, 5329 .set_rxfh = virtnet_set_rxfh, 5330 .get_rxnfc = virtnet_get_rxnfc, 5331 .set_rxnfc = virtnet_set_rxnfc, 5332 }; 5333 5334 static void virtnet_get_queue_stats_rx(struct net_device *dev, int i, 5335 struct netdev_queue_stats_rx *stats) 5336 { 5337 struct virtnet_info *vi = netdev_priv(dev); 5338 struct receive_queue *rq = &vi->rq[i]; 5339 struct virtnet_stats_ctx ctx = {0}; 5340 5341 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5342 5343 virtnet_get_hw_stats(vi, &ctx, i * 2); 5344 virtnet_fill_stats(vi, i * 2, &ctx, (void *)&rq->stats, true, 0); 5345 } 5346 5347 static void virtnet_get_queue_stats_tx(struct net_device *dev, int i, 5348 struct netdev_queue_stats_tx *stats) 5349 { 5350 struct virtnet_info *vi = netdev_priv(dev); 5351 struct send_queue *sq = &vi->sq[i]; 5352 struct virtnet_stats_ctx ctx = {0}; 5353 5354 virtnet_stats_ctx_init(vi, &ctx, (void *)stats, true); 5355 5356 virtnet_get_hw_stats(vi, &ctx, i * 2 + 1); 5357 virtnet_fill_stats(vi, i * 2 + 1, &ctx, (void *)&sq->stats, true, 0); 5358 } 5359 5360 static void virtnet_get_base_stats(struct net_device *dev, 5361 struct netdev_queue_stats_rx *rx, 5362 struct netdev_queue_stats_tx *tx) 5363 { 5364 struct virtnet_info *vi = netdev_priv(dev); 5365 5366 /* The queue stats of the virtio-net will not be reset. So here we 5367 * return 0. 5368 */ 5369 rx->bytes = 0; 5370 rx->packets = 0; 5371 5372 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_BASIC) { 5373 rx->hw_drops = 0; 5374 rx->hw_drop_overruns = 0; 5375 } 5376 5377 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_CSUM) { 5378 rx->csum_unnecessary = 0; 5379 rx->csum_none = 0; 5380 rx->csum_bad = 0; 5381 } 5382 5383 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_GSO) { 5384 rx->hw_gro_packets = 0; 5385 rx->hw_gro_bytes = 0; 5386 rx->hw_gro_wire_packets = 0; 5387 rx->hw_gro_wire_bytes = 0; 5388 } 5389 5390 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_RX_SPEED) 5391 rx->hw_drop_ratelimits = 0; 5392 5393 tx->bytes = 0; 5394 tx->packets = 0; 5395 tx->stop = 0; 5396 tx->wake = 0; 5397 5398 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_BASIC) { 5399 tx->hw_drops = 0; 5400 tx->hw_drop_errors = 0; 5401 } 5402 5403 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_CSUM) { 5404 tx->csum_none = 0; 5405 tx->needs_csum = 0; 5406 } 5407 5408 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_GSO) { 5409 tx->hw_gso_packets = 0; 5410 tx->hw_gso_bytes = 0; 5411 tx->hw_gso_wire_packets = 0; 5412 tx->hw_gso_wire_bytes = 0; 5413 } 5414 5415 if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) 5416 tx->hw_drop_ratelimits = 0; 5417 } 5418 5419 static const struct netdev_stat_ops virtnet_stat_ops = { 5420 .get_queue_stats_rx = virtnet_get_queue_stats_rx, 5421 .get_queue_stats_tx = virtnet_get_queue_stats_tx, 5422 .get_base_stats = virtnet_get_base_stats, 5423 }; 5424 5425 static void virtnet_freeze_down(struct virtio_device *vdev) 5426 { 5427 struct virtnet_info *vi = vdev->priv; 5428 5429 /* Make sure no work handler is accessing the device */ 5430 flush_work(&vi->config_work); 5431 disable_rx_mode_work(vi); 5432 flush_work(&vi->rx_mode_work); 5433 5434 netif_tx_lock_bh(vi->dev); 5435 netif_device_detach(vi->dev); 5436 netif_tx_unlock_bh(vi->dev); 5437 if (netif_running(vi->dev)) 5438 virtnet_close(vi->dev); 5439 } 5440 5441 static int init_vqs(struct virtnet_info *vi); 5442 5443 static int virtnet_restore_up(struct virtio_device *vdev) 5444 { 5445 struct virtnet_info *vi = vdev->priv; 5446 int err; 5447 5448 err = init_vqs(vi); 5449 if (err) 5450 return err; 5451 5452 virtio_device_ready(vdev); 5453 5454 enable_delayed_refill(vi); 5455 enable_rx_mode_work(vi); 5456 5457 if (netif_running(vi->dev)) { 5458 err = virtnet_open(vi->dev); 5459 if (err) 5460 return err; 5461 } 5462 5463 netif_tx_lock_bh(vi->dev); 5464 netif_device_attach(vi->dev); 5465 netif_tx_unlock_bh(vi->dev); 5466 return err; 5467 } 5468 5469 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads) 5470 { 5471 __virtio64 *_offloads __free(kfree) = NULL; 5472 struct scatterlist sg; 5473 5474 _offloads = kzalloc(sizeof(*_offloads), GFP_KERNEL); 5475 if (!_offloads) 5476 return -ENOMEM; 5477 5478 *_offloads = cpu_to_virtio64(vi->vdev, offloads); 5479 5480 sg_init_one(&sg, _offloads, sizeof(*_offloads)); 5481 5482 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS, 5483 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) { 5484 dev_warn(&vi->dev->dev, "Fail to set guest offload.\n"); 5485 return -EINVAL; 5486 } 5487 5488 return 0; 5489 } 5490 5491 static int virtnet_clear_guest_offloads(struct virtnet_info *vi) 5492 { 5493 u64 offloads = 0; 5494 5495 if (!vi->guest_offloads) 5496 return 0; 5497 5498 return virtnet_set_guest_offloads(vi, offloads); 5499 } 5500 5501 static int virtnet_restore_guest_offloads(struct virtnet_info *vi) 5502 { 5503 u64 offloads = vi->guest_offloads; 5504 5505 if (!vi->guest_offloads) 5506 return 0; 5507 5508 return virtnet_set_guest_offloads(vi, offloads); 5509 } 5510 5511 static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq, 5512 struct xsk_buff_pool *pool) 5513 { 5514 int err, qindex; 5515 5516 qindex = rq - vi->rq; 5517 5518 if (pool) { 5519 err = xdp_rxq_info_reg(&rq->xsk_rxq_info, vi->dev, qindex, rq->napi.napi_id); 5520 if (err < 0) 5521 return err; 5522 5523 err = xdp_rxq_info_reg_mem_model(&rq->xsk_rxq_info, 5524 MEM_TYPE_XSK_BUFF_POOL, NULL); 5525 if (err < 0) 5526 goto unreg; 5527 5528 xsk_pool_set_rxq_info(pool, &rq->xsk_rxq_info); 5529 } 5530 5531 virtnet_rx_pause(vi, rq); 5532 5533 err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf); 5534 if (err) { 5535 netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err); 5536 5537 pool = NULL; 5538 } 5539 5540 rq->xsk_pool = pool; 5541 5542 virtnet_rx_resume(vi, rq); 5543 5544 if (pool) 5545 return 0; 5546 5547 unreg: 5548 xdp_rxq_info_unreg(&rq->xsk_rxq_info); 5549 return err; 5550 } 5551 5552 static int virtnet_xsk_pool_enable(struct net_device *dev, 5553 struct xsk_buff_pool *pool, 5554 u16 qid) 5555 { 5556 struct virtnet_info *vi = netdev_priv(dev); 5557 struct receive_queue *rq; 5558 struct device *dma_dev; 5559 struct send_queue *sq; 5560 int err, size; 5561 5562 if (vi->hdr_len > xsk_pool_get_headroom(pool)) 5563 return -EINVAL; 5564 5565 /* In big_packets mode, xdp cannot work, so there is no need to 5566 * initialize xsk of rq. 5567 */ 5568 if (vi->big_packets && !vi->mergeable_rx_bufs) 5569 return -ENOENT; 5570 5571 if (qid >= vi->curr_queue_pairs) 5572 return -EINVAL; 5573 5574 sq = &vi->sq[qid]; 5575 rq = &vi->rq[qid]; 5576 5577 /* xsk assumes that tx and rx must have the same dma device. The af-xdp 5578 * may use one buffer to receive from the rx and reuse this buffer to 5579 * send by the tx. So the dma dev of sq and rq must be the same one. 5580 * 5581 * But vq->dma_dev allows every vq has the respective dma dev. So I 5582 * check the dma dev of vq and sq is the same dev. 5583 */ 5584 if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq)) 5585 return -EINVAL; 5586 5587 dma_dev = virtqueue_dma_dev(rq->vq); 5588 if (!dma_dev) 5589 return -EINVAL; 5590 5591 size = virtqueue_get_vring_size(rq->vq); 5592 5593 rq->xsk_buffs = kvcalloc(size, sizeof(*rq->xsk_buffs), GFP_KERNEL); 5594 if (!rq->xsk_buffs) 5595 return -ENOMEM; 5596 5597 err = xsk_pool_dma_map(pool, dma_dev, 0); 5598 if (err) 5599 goto err_xsk_map; 5600 5601 err = virtnet_rq_bind_xsk_pool(vi, rq, pool); 5602 if (err) 5603 goto err_rq; 5604 5605 return 0; 5606 5607 err_rq: 5608 xsk_pool_dma_unmap(pool, 0); 5609 err_xsk_map: 5610 return err; 5611 } 5612 5613 static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid) 5614 { 5615 struct virtnet_info *vi = netdev_priv(dev); 5616 struct xsk_buff_pool *pool; 5617 struct receive_queue *rq; 5618 int err; 5619 5620 if (qid >= vi->curr_queue_pairs) 5621 return -EINVAL; 5622 5623 rq = &vi->rq[qid]; 5624 5625 pool = rq->xsk_pool; 5626 5627 err = virtnet_rq_bind_xsk_pool(vi, rq, NULL); 5628 5629 xsk_pool_dma_unmap(pool, 0); 5630 5631 kvfree(rq->xsk_buffs); 5632 5633 return err; 5634 } 5635 5636 static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp) 5637 { 5638 if (xdp->xsk.pool) 5639 return virtnet_xsk_pool_enable(dev, xdp->xsk.pool, 5640 xdp->xsk.queue_id); 5641 else 5642 return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id); 5643 } 5644 5645 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, 5646 struct netlink_ext_ack *extack) 5647 { 5648 unsigned int room = SKB_DATA_ALIGN(XDP_PACKET_HEADROOM + 5649 sizeof(struct skb_shared_info)); 5650 unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; 5651 struct virtnet_info *vi = netdev_priv(dev); 5652 struct bpf_prog *old_prog; 5653 u16 xdp_qp = 0, curr_qp; 5654 int i, err; 5655 5656 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 5657 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 5658 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 5659 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 5660 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 5661 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || 5662 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || 5663 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) { 5664 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); 5665 return -EOPNOTSUPP; 5666 } 5667 5668 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 5669 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 5670 return -EINVAL; 5671 } 5672 5673 if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { 5674 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); 5675 netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); 5676 return -EINVAL; 5677 } 5678 5679 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; 5680 if (prog) 5681 xdp_qp = nr_cpu_ids; 5682 5683 /* XDP requires extra queues for XDP_TX */ 5684 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 5685 netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", 5686 curr_qp + xdp_qp, vi->max_queue_pairs); 5687 xdp_qp = 0; 5688 } 5689 5690 old_prog = rtnl_dereference(vi->rq[0].xdp_prog); 5691 if (!prog && !old_prog) 5692 return 0; 5693 5694 if (prog) 5695 bpf_prog_add(prog, vi->max_queue_pairs - 1); 5696 5697 /* Make sure NAPI is not using any XDP TX queues for RX. */ 5698 if (netif_running(dev)) { 5699 for (i = 0; i < vi->max_queue_pairs; i++) { 5700 napi_disable(&vi->rq[i].napi); 5701 virtnet_napi_tx_disable(&vi->sq[i].napi); 5702 } 5703 } 5704 5705 if (!prog) { 5706 for (i = 0; i < vi->max_queue_pairs; i++) { 5707 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 5708 if (i == 0) 5709 virtnet_restore_guest_offloads(vi); 5710 } 5711 synchronize_net(); 5712 } 5713 5714 err = virtnet_set_queues(vi, curr_qp + xdp_qp); 5715 if (err) 5716 goto err; 5717 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); 5718 vi->xdp_queue_pairs = xdp_qp; 5719 5720 if (prog) { 5721 vi->xdp_enabled = true; 5722 for (i = 0; i < vi->max_queue_pairs; i++) { 5723 rcu_assign_pointer(vi->rq[i].xdp_prog, prog); 5724 if (i == 0 && !old_prog) 5725 virtnet_clear_guest_offloads(vi); 5726 } 5727 if (!old_prog) 5728 xdp_features_set_redirect_target(dev, true); 5729 } else { 5730 xdp_features_clear_redirect_target(dev); 5731 vi->xdp_enabled = false; 5732 } 5733 5734 for (i = 0; i < vi->max_queue_pairs; i++) { 5735 if (old_prog) 5736 bpf_prog_put(old_prog); 5737 if (netif_running(dev)) { 5738 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 5739 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 5740 &vi->sq[i].napi); 5741 } 5742 } 5743 5744 return 0; 5745 5746 err: 5747 if (!prog) { 5748 virtnet_clear_guest_offloads(vi); 5749 for (i = 0; i < vi->max_queue_pairs; i++) 5750 rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); 5751 } 5752 5753 if (netif_running(dev)) { 5754 for (i = 0; i < vi->max_queue_pairs; i++) { 5755 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 5756 virtnet_napi_tx_enable(vi, vi->sq[i].vq, 5757 &vi->sq[i].napi); 5758 } 5759 } 5760 if (prog) 5761 bpf_prog_sub(prog, vi->max_queue_pairs - 1); 5762 return err; 5763 } 5764 5765 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) 5766 { 5767 switch (xdp->command) { 5768 case XDP_SETUP_PROG: 5769 return virtnet_xdp_set(dev, xdp->prog, xdp->extack); 5770 case XDP_SETUP_XSK_POOL: 5771 return virtnet_xsk_pool_setup(dev, xdp); 5772 default: 5773 return -EINVAL; 5774 } 5775 } 5776 5777 static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, 5778 size_t len) 5779 { 5780 struct virtnet_info *vi = netdev_priv(dev); 5781 int ret; 5782 5783 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) 5784 return -EOPNOTSUPP; 5785 5786 ret = snprintf(buf, len, "sby"); 5787 if (ret >= len) 5788 return -EOPNOTSUPP; 5789 5790 return 0; 5791 } 5792 5793 static int virtnet_set_features(struct net_device *dev, 5794 netdev_features_t features) 5795 { 5796 struct virtnet_info *vi = netdev_priv(dev); 5797 u64 offloads; 5798 int err; 5799 5800 if ((dev->features ^ features) & NETIF_F_GRO_HW) { 5801 if (vi->xdp_enabled) 5802 return -EBUSY; 5803 5804 if (features & NETIF_F_GRO_HW) 5805 offloads = vi->guest_offloads_capable; 5806 else 5807 offloads = vi->guest_offloads_capable & 5808 ~GUEST_OFFLOAD_GRO_HW_MASK; 5809 5810 err = virtnet_set_guest_offloads(vi, offloads); 5811 if (err) 5812 return err; 5813 vi->guest_offloads = offloads; 5814 } 5815 5816 if ((dev->features ^ features) & NETIF_F_RXHASH) { 5817 if (features & NETIF_F_RXHASH) 5818 vi->rss.hash_types = vi->rss_hash_types_saved; 5819 else 5820 vi->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE; 5821 5822 if (!virtnet_commit_rss_command(vi)) 5823 return -EINVAL; 5824 } 5825 5826 return 0; 5827 } 5828 5829 static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) 5830 { 5831 struct virtnet_info *priv = netdev_priv(dev); 5832 struct send_queue *sq = &priv->sq[txqueue]; 5833 struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); 5834 5835 u64_stats_update_begin(&sq->stats.syncp); 5836 u64_stats_inc(&sq->stats.tx_timeouts); 5837 u64_stats_update_end(&sq->stats.syncp); 5838 5839 netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", 5840 txqueue, sq->name, sq->vq->index, sq->vq->name, 5841 jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); 5842 } 5843 5844 static int virtnet_init_irq_moder(struct virtnet_info *vi) 5845 { 5846 u8 profile_flags = 0, coal_flags = 0; 5847 int ret, i; 5848 5849 profile_flags |= DIM_PROFILE_RX; 5850 coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS; 5851 ret = net_dim_init_irq_moder(vi->dev, profile_flags, coal_flags, 5852 DIM_CQ_PERIOD_MODE_START_FROM_EQE, 5853 0, virtnet_rx_dim_work, NULL); 5854 5855 if (ret) 5856 return ret; 5857 5858 for (i = 0; i < vi->max_queue_pairs; i++) 5859 net_dim_setting(vi->dev, &vi->rq[i].dim, false); 5860 5861 return 0; 5862 } 5863 5864 static void virtnet_free_irq_moder(struct virtnet_info *vi) 5865 { 5866 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) 5867 return; 5868 5869 rtnl_lock(); 5870 net_dim_free_irq_moder(vi->dev); 5871 rtnl_unlock(); 5872 } 5873 5874 static const struct net_device_ops virtnet_netdev = { 5875 .ndo_open = virtnet_open, 5876 .ndo_stop = virtnet_close, 5877 .ndo_start_xmit = start_xmit, 5878 .ndo_validate_addr = eth_validate_addr, 5879 .ndo_set_mac_address = virtnet_set_mac_address, 5880 .ndo_set_rx_mode = virtnet_set_rx_mode, 5881 .ndo_get_stats64 = virtnet_stats, 5882 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 5883 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 5884 .ndo_bpf = virtnet_xdp, 5885 .ndo_xdp_xmit = virtnet_xdp_xmit, 5886 .ndo_xsk_wakeup = virtnet_xsk_wakeup, 5887 .ndo_features_check = passthru_features_check, 5888 .ndo_get_phys_port_name = virtnet_get_phys_port_name, 5889 .ndo_set_features = virtnet_set_features, 5890 .ndo_tx_timeout = virtnet_tx_timeout, 5891 }; 5892 5893 static void virtnet_config_changed_work(struct work_struct *work) 5894 { 5895 struct virtnet_info *vi = 5896 container_of(work, struct virtnet_info, config_work); 5897 u16 v; 5898 5899 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, 5900 struct virtio_net_config, status, &v) < 0) 5901 return; 5902 5903 if (v & VIRTIO_NET_S_ANNOUNCE) { 5904 netdev_notify_peers(vi->dev); 5905 virtnet_ack_link_announce(vi); 5906 } 5907 5908 /* Ignore unknown (future) status bits */ 5909 v &= VIRTIO_NET_S_LINK_UP; 5910 5911 if (vi->status == v) 5912 return; 5913 5914 vi->status = v; 5915 5916 if (vi->status & VIRTIO_NET_S_LINK_UP) { 5917 virtnet_update_settings(vi); 5918 netif_carrier_on(vi->dev); 5919 netif_tx_wake_all_queues(vi->dev); 5920 } else { 5921 netif_carrier_off(vi->dev); 5922 netif_tx_stop_all_queues(vi->dev); 5923 } 5924 } 5925 5926 static void virtnet_config_changed(struct virtio_device *vdev) 5927 { 5928 struct virtnet_info *vi = vdev->priv; 5929 5930 schedule_work(&vi->config_work); 5931 } 5932 5933 static void virtnet_free_queues(struct virtnet_info *vi) 5934 { 5935 int i; 5936 5937 for (i = 0; i < vi->max_queue_pairs; i++) { 5938 __netif_napi_del(&vi->rq[i].napi); 5939 __netif_napi_del(&vi->sq[i].napi); 5940 } 5941 5942 /* We called __netif_napi_del(), 5943 * we need to respect an RCU grace period before freeing vi->rq 5944 */ 5945 synchronize_net(); 5946 5947 kfree(vi->rq); 5948 kfree(vi->sq); 5949 kfree(vi->ctrl); 5950 } 5951 5952 static void _free_receive_bufs(struct virtnet_info *vi) 5953 { 5954 struct bpf_prog *old_prog; 5955 int i; 5956 5957 for (i = 0; i < vi->max_queue_pairs; i++) { 5958 while (vi->rq[i].pages) 5959 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); 5960 5961 old_prog = rtnl_dereference(vi->rq[i].xdp_prog); 5962 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); 5963 if (old_prog) 5964 bpf_prog_put(old_prog); 5965 } 5966 } 5967 5968 static void free_receive_bufs(struct virtnet_info *vi) 5969 { 5970 rtnl_lock(); 5971 _free_receive_bufs(vi); 5972 rtnl_unlock(); 5973 } 5974 5975 static void free_receive_page_frags(struct virtnet_info *vi) 5976 { 5977 int i; 5978 for (i = 0; i < vi->max_queue_pairs; i++) 5979 if (vi->rq[i].alloc_frag.page) { 5980 if (vi->rq[i].do_dma && vi->rq[i].last_dma) 5981 virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); 5982 put_page(vi->rq[i].alloc_frag.page); 5983 } 5984 } 5985 5986 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) 5987 { 5988 if (!is_xdp_frame(buf)) 5989 dev_kfree_skb(buf); 5990 else 5991 xdp_return_frame(ptr_to_xdp(buf)); 5992 } 5993 5994 static void free_unused_bufs(struct virtnet_info *vi) 5995 { 5996 void *buf; 5997 int i; 5998 5999 for (i = 0; i < vi->max_queue_pairs; i++) { 6000 struct virtqueue *vq = vi->sq[i].vq; 6001 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 6002 virtnet_sq_free_unused_buf(vq, buf); 6003 cond_resched(); 6004 } 6005 6006 for (i = 0; i < vi->max_queue_pairs; i++) { 6007 struct virtqueue *vq = vi->rq[i].vq; 6008 6009 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) 6010 virtnet_rq_unmap_free_buf(vq, buf); 6011 cond_resched(); 6012 } 6013 } 6014 6015 static void virtnet_del_vqs(struct virtnet_info *vi) 6016 { 6017 struct virtio_device *vdev = vi->vdev; 6018 6019 virtnet_clean_affinity(vi); 6020 6021 vdev->config->del_vqs(vdev); 6022 6023 virtnet_free_queues(vi); 6024 } 6025 6026 /* How large should a single buffer be so a queue full of these can fit at 6027 * least one full packet? 6028 * Logic below assumes the mergeable buffer header is used. 6029 */ 6030 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) 6031 { 6032 const unsigned int hdr_len = vi->hdr_len; 6033 unsigned int rq_size = virtqueue_get_vring_size(vq); 6034 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; 6035 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; 6036 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size); 6037 6038 return max(max(min_buf_len, hdr_len) - hdr_len, 6039 (unsigned int)GOOD_PACKET_LEN); 6040 } 6041 6042 static int virtnet_find_vqs(struct virtnet_info *vi) 6043 { 6044 struct virtqueue_info *vqs_info; 6045 struct virtqueue **vqs; 6046 int ret = -ENOMEM; 6047 int total_vqs; 6048 bool *ctx; 6049 u16 i; 6050 6051 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by 6052 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by 6053 * possible control vq. 6054 */ 6055 total_vqs = vi->max_queue_pairs * 2 + 6056 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); 6057 6058 /* Allocate space for find_vqs parameters */ 6059 vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL); 6060 if (!vqs) 6061 goto err_vq; 6062 vqs_info = kcalloc(total_vqs, sizeof(*vqs_info), GFP_KERNEL); 6063 if (!vqs_info) 6064 goto err_vqs_info; 6065 if (!vi->big_packets || vi->mergeable_rx_bufs) { 6066 ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL); 6067 if (!ctx) 6068 goto err_ctx; 6069 } else { 6070 ctx = NULL; 6071 } 6072 6073 /* Parameters for control virtqueue, if any */ 6074 if (vi->has_cvq) { 6075 vqs_info[total_vqs - 1].name = "control"; 6076 } 6077 6078 /* Allocate/initialize parameters for send/receive virtqueues */ 6079 for (i = 0; i < vi->max_queue_pairs; i++) { 6080 vqs_info[rxq2vq(i)].callback = skb_recv_done; 6081 vqs_info[txq2vq(i)].callback = skb_xmit_done; 6082 sprintf(vi->rq[i].name, "input.%u", i); 6083 sprintf(vi->sq[i].name, "output.%u", i); 6084 vqs_info[rxq2vq(i)].name = vi->rq[i].name; 6085 vqs_info[txq2vq(i)].name = vi->sq[i].name; 6086 if (ctx) 6087 vqs_info[rxq2vq(i)].ctx = true; 6088 } 6089 6090 ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, vqs_info, NULL); 6091 if (ret) 6092 goto err_find; 6093 6094 if (vi->has_cvq) { 6095 vi->cvq = vqs[total_vqs - 1]; 6096 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 6097 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 6098 } 6099 6100 for (i = 0; i < vi->max_queue_pairs; i++) { 6101 vi->rq[i].vq = vqs[rxq2vq(i)]; 6102 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); 6103 vi->sq[i].vq = vqs[txq2vq(i)]; 6104 } 6105 6106 /* run here: ret == 0. */ 6107 6108 6109 err_find: 6110 kfree(ctx); 6111 err_ctx: 6112 kfree(vqs_info); 6113 err_vqs_info: 6114 kfree(vqs); 6115 err_vq: 6116 return ret; 6117 } 6118 6119 static int virtnet_alloc_queues(struct virtnet_info *vi) 6120 { 6121 int i; 6122 6123 if (vi->has_cvq) { 6124 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); 6125 if (!vi->ctrl) 6126 goto err_ctrl; 6127 } else { 6128 vi->ctrl = NULL; 6129 } 6130 vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); 6131 if (!vi->sq) 6132 goto err_sq; 6133 vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL); 6134 if (!vi->rq) 6135 goto err_rq; 6136 6137 INIT_DELAYED_WORK(&vi->refill, refill_work); 6138 for (i = 0; i < vi->max_queue_pairs; i++) { 6139 vi->rq[i].pages = NULL; 6140 netif_napi_add_weight(vi->dev, &vi->rq[i].napi, virtnet_poll, 6141 napi_weight); 6142 netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, 6143 virtnet_poll_tx, 6144 napi_tx ? napi_weight : 0); 6145 6146 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 6147 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); 6148 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 6149 6150 u64_stats_init(&vi->rq[i].stats.syncp); 6151 u64_stats_init(&vi->sq[i].stats.syncp); 6152 mutex_init(&vi->rq[i].dim_lock); 6153 } 6154 6155 return 0; 6156 6157 err_rq: 6158 kfree(vi->sq); 6159 err_sq: 6160 kfree(vi->ctrl); 6161 err_ctrl: 6162 return -ENOMEM; 6163 } 6164 6165 static int init_vqs(struct virtnet_info *vi) 6166 { 6167 int ret; 6168 6169 /* Allocate send & receive queues */ 6170 ret = virtnet_alloc_queues(vi); 6171 if (ret) 6172 goto err; 6173 6174 ret = virtnet_find_vqs(vi); 6175 if (ret) 6176 goto err_free; 6177 6178 cpus_read_lock(); 6179 virtnet_set_affinity(vi); 6180 cpus_read_unlock(); 6181 6182 return 0; 6183 6184 err_free: 6185 virtnet_free_queues(vi); 6186 err: 6187 return ret; 6188 } 6189 6190 #ifdef CONFIG_SYSFS 6191 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, 6192 char *buf) 6193 { 6194 struct virtnet_info *vi = netdev_priv(queue->dev); 6195 unsigned int queue_index = get_netdev_rx_queue_index(queue); 6196 unsigned int headroom = virtnet_get_headroom(vi); 6197 unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; 6198 struct ewma_pkt_len *avg; 6199 6200 BUG_ON(queue_index >= vi->max_queue_pairs); 6201 avg = &vi->rq[queue_index].mrg_avg_pkt_len; 6202 return sprintf(buf, "%u\n", 6203 get_mergeable_buf_len(&vi->rq[queue_index], avg, 6204 SKB_DATA_ALIGN(headroom + tailroom))); 6205 } 6206 6207 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = 6208 __ATTR_RO(mergeable_rx_buffer_size); 6209 6210 static struct attribute *virtio_net_mrg_rx_attrs[] = { 6211 &mergeable_rx_buffer_size_attribute.attr, 6212 NULL 6213 }; 6214 6215 static const struct attribute_group virtio_net_mrg_rx_group = { 6216 .name = "virtio_net", 6217 .attrs = virtio_net_mrg_rx_attrs 6218 }; 6219 #endif 6220 6221 static bool virtnet_fail_on_feature(struct virtio_device *vdev, 6222 unsigned int fbit, 6223 const char *fname, const char *dname) 6224 { 6225 if (!virtio_has_feature(vdev, fbit)) 6226 return false; 6227 6228 dev_err(&vdev->dev, "device advertises feature %s but not %s", 6229 fname, dname); 6230 6231 return true; 6232 } 6233 6234 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ 6235 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) 6236 6237 static bool virtnet_validate_features(struct virtio_device *vdev) 6238 { 6239 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && 6240 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, 6241 "VIRTIO_NET_F_CTRL_VQ") || 6242 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, 6243 "VIRTIO_NET_F_CTRL_VQ") || 6244 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, 6245 "VIRTIO_NET_F_CTRL_VQ") || 6246 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || 6247 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, 6248 "VIRTIO_NET_F_CTRL_VQ") || 6249 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, 6250 "VIRTIO_NET_F_CTRL_VQ") || 6251 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, 6252 "VIRTIO_NET_F_CTRL_VQ") || 6253 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, 6254 "VIRTIO_NET_F_CTRL_VQ") || 6255 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL, 6256 "VIRTIO_NET_F_CTRL_VQ"))) { 6257 return false; 6258 } 6259 6260 return true; 6261 } 6262 6263 #define MIN_MTU ETH_MIN_MTU 6264 #define MAX_MTU ETH_MAX_MTU 6265 6266 static int virtnet_validate(struct virtio_device *vdev) 6267 { 6268 if (!vdev->config->get) { 6269 dev_err(&vdev->dev, "%s failure: config access disabled\n", 6270 __func__); 6271 return -EINVAL; 6272 } 6273 6274 if (!virtnet_validate_features(vdev)) 6275 return -EINVAL; 6276 6277 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6278 int mtu = virtio_cread16(vdev, 6279 offsetof(struct virtio_net_config, 6280 mtu)); 6281 if (mtu < MIN_MTU) 6282 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); 6283 } 6284 6285 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && 6286 !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6287 dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); 6288 __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); 6289 } 6290 6291 return 0; 6292 } 6293 6294 static bool virtnet_check_guest_gso(const struct virtnet_info *vi) 6295 { 6296 return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || 6297 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 6298 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 6299 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || 6300 (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && 6301 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); 6302 } 6303 6304 static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) 6305 { 6306 bool guest_gso = virtnet_check_guest_gso(vi); 6307 6308 /* If device can receive ANY guest GSO packets, regardless of mtu, 6309 * allocate packets of maximum size, otherwise limit it to only 6310 * mtu size worth only. 6311 */ 6312 if (mtu > ETH_DATA_LEN || guest_gso) { 6313 vi->big_packets = true; 6314 vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE); 6315 } 6316 } 6317 6318 #define VIRTIO_NET_HASH_REPORT_MAX_TABLE 10 6319 static enum xdp_rss_hash_type 6320 virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = { 6321 [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE, 6322 [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4, 6323 [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP, 6324 [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP, 6325 [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6, 6326 [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP, 6327 [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP, 6328 [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, 6329 [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, 6330 [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX 6331 }; 6332 6333 static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, 6334 enum xdp_rss_hash_type *rss_type) 6335 { 6336 const struct xdp_buff *xdp = (void *)_ctx; 6337 struct virtio_net_hdr_v1_hash *hdr_hash; 6338 struct virtnet_info *vi; 6339 u16 hash_report; 6340 6341 if (!(xdp->rxq->dev->features & NETIF_F_RXHASH)) 6342 return -ENODATA; 6343 6344 vi = netdev_priv(xdp->rxq->dev); 6345 hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len); 6346 hash_report = __le16_to_cpu(hdr_hash->hash_report); 6347 6348 if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE) 6349 hash_report = VIRTIO_NET_HASH_REPORT_NONE; 6350 6351 *rss_type = virtnet_xdp_rss_type[hash_report]; 6352 *hash = __le32_to_cpu(hdr_hash->hash_value); 6353 return 0; 6354 } 6355 6356 static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = { 6357 .xmo_rx_hash = virtnet_xdp_rx_hash, 6358 }; 6359 6360 static int virtnet_probe(struct virtio_device *vdev) 6361 { 6362 int i, err = -ENOMEM; 6363 struct net_device *dev; 6364 struct virtnet_info *vi; 6365 u16 max_queue_pairs; 6366 int mtu = 0; 6367 6368 /* Find if host supports multiqueue/rss virtio_net device */ 6369 max_queue_pairs = 1; 6370 if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) 6371 max_queue_pairs = 6372 virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); 6373 6374 /* We need at least 2 queue's */ 6375 if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 6376 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 6377 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6378 max_queue_pairs = 1; 6379 6380 /* Allocate ourselves a network device with room for our info */ 6381 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); 6382 if (!dev) 6383 return -ENOMEM; 6384 6385 /* Set up network device as normal. */ 6386 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | 6387 IFF_TX_SKB_NO_LINEAR; 6388 dev->netdev_ops = &virtnet_netdev; 6389 dev->stat_ops = &virtnet_stat_ops; 6390 dev->features = NETIF_F_HIGHDMA; 6391 6392 dev->ethtool_ops = &virtnet_ethtool_ops; 6393 SET_NETDEV_DEV(dev, &vdev->dev); 6394 6395 /* Do we support "hardware" checksums? */ 6396 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 6397 /* This opens up the world of extra features. */ 6398 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6399 if (csum) 6400 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; 6401 6402 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 6403 dev->hw_features |= NETIF_F_TSO 6404 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 6405 } 6406 /* Individual feature bits: what can host handle? */ 6407 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 6408 dev->hw_features |= NETIF_F_TSO; 6409 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 6410 dev->hw_features |= NETIF_F_TSO6; 6411 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 6412 dev->hw_features |= NETIF_F_TSO_ECN; 6413 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) 6414 dev->hw_features |= NETIF_F_GSO_UDP_L4; 6415 6416 dev->features |= NETIF_F_GSO_ROBUST; 6417 6418 if (gso) 6419 dev->features |= dev->hw_features & NETIF_F_ALL_TSO; 6420 /* (!csum && gso) case will be fixed by register_netdev() */ 6421 } 6422 6423 /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't 6424 * need to calculate checksums for partially checksummed packets, 6425 * as they're considered valid by the upper layer. 6426 * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only 6427 * receives fully checksummed packets. The device may assist in 6428 * validating these packets' checksums, so the driver won't have to. 6429 */ 6430 dev->features |= NETIF_F_RXCSUM; 6431 6432 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || 6433 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) 6434 dev->features |= NETIF_F_GRO_HW; 6435 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) 6436 dev->hw_features |= NETIF_F_GRO_HW; 6437 6438 dev->vlan_features = dev->features; 6439 dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; 6440 6441 /* MTU range: 68 - 65535 */ 6442 dev->min_mtu = MIN_MTU; 6443 dev->max_mtu = MAX_MTU; 6444 6445 /* Configuration may specify what MAC to use. Otherwise random. */ 6446 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 6447 u8 addr[ETH_ALEN]; 6448 6449 virtio_cread_bytes(vdev, 6450 offsetof(struct virtio_net_config, mac), 6451 addr, ETH_ALEN); 6452 eth_hw_addr_set(dev, addr); 6453 } else { 6454 eth_hw_addr_random(dev); 6455 dev_info(&vdev->dev, "Assigned random MAC address %pM\n", 6456 dev->dev_addr); 6457 } 6458 6459 /* Set up our device-specific information */ 6460 vi = netdev_priv(dev); 6461 vi->dev = dev; 6462 vi->vdev = vdev; 6463 vdev->priv = vi; 6464 6465 INIT_WORK(&vi->config_work, virtnet_config_changed_work); 6466 INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work); 6467 spin_lock_init(&vi->refill_lock); 6468 6469 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { 6470 vi->mergeable_rx_bufs = true; 6471 dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; 6472 } 6473 6474 if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) 6475 vi->has_rss_hash_report = true; 6476 6477 if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { 6478 vi->has_rss = true; 6479 6480 vi->rss_indir_table_size = 6481 virtio_cread16(vdev, offsetof(struct virtio_net_config, 6482 rss_max_indirection_table_length)); 6483 } 6484 err = rss_indirection_table_alloc(&vi->rss, vi->rss_indir_table_size); 6485 if (err) 6486 goto free; 6487 6488 if (vi->has_rss || vi->has_rss_hash_report) { 6489 vi->rss_key_size = 6490 virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); 6491 if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 6492 dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n", 6493 vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE); 6494 err = -EINVAL; 6495 goto free; 6496 } 6497 6498 vi->rss_hash_types_supported = 6499 virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); 6500 vi->rss_hash_types_supported &= 6501 ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | 6502 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 6503 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); 6504 6505 dev->hw_features |= NETIF_F_RXHASH; 6506 dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops; 6507 } 6508 6509 if (vi->has_rss_hash_report) 6510 vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); 6511 else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || 6512 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6513 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 6514 else 6515 vi->hdr_len = sizeof(struct virtio_net_hdr); 6516 6517 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || 6518 virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) 6519 vi->any_header_sg = true; 6520 6521 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) 6522 vi->has_cvq = true; 6523 6524 mutex_init(&vi->cvq_lock); 6525 6526 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { 6527 mtu = virtio_cread16(vdev, 6528 offsetof(struct virtio_net_config, 6529 mtu)); 6530 if (mtu < dev->min_mtu) { 6531 /* Should never trigger: MTU was previously validated 6532 * in virtnet_validate. 6533 */ 6534 dev_err(&vdev->dev, 6535 "device MTU appears to have changed it is now %d < %d", 6536 mtu, dev->min_mtu); 6537 err = -EINVAL; 6538 goto free; 6539 } 6540 6541 dev->mtu = mtu; 6542 dev->max_mtu = mtu; 6543 } 6544 6545 virtnet_set_big_packets(vi, mtu); 6546 6547 if (vi->any_header_sg) 6548 dev->needed_headroom = vi->hdr_len; 6549 6550 /* Enable multiqueue by default */ 6551 if (num_online_cpus() >= max_queue_pairs) 6552 vi->curr_queue_pairs = max_queue_pairs; 6553 else 6554 vi->curr_queue_pairs = num_online_cpus(); 6555 vi->max_queue_pairs = max_queue_pairs; 6556 6557 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ 6558 err = init_vqs(vi); 6559 if (err) 6560 goto free; 6561 6562 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { 6563 vi->intr_coal_rx.max_usecs = 0; 6564 vi->intr_coal_tx.max_usecs = 0; 6565 vi->intr_coal_rx.max_packets = 0; 6566 6567 /* Keep the default values of the coalescing parameters 6568 * aligned with the default napi_tx state. 6569 */ 6570 if (vi->sq[0].napi.weight) 6571 vi->intr_coal_tx.max_packets = 1; 6572 else 6573 vi->intr_coal_tx.max_packets = 0; 6574 } 6575 6576 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { 6577 /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */ 6578 for (i = 0; i < vi->max_queue_pairs; i++) 6579 if (vi->sq[i].napi.weight) 6580 vi->sq[i].intr_coal.max_packets = 1; 6581 6582 err = virtnet_init_irq_moder(vi); 6583 if (err) 6584 goto free; 6585 } 6586 6587 #ifdef CONFIG_SYSFS 6588 if (vi->mergeable_rx_bufs) 6589 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; 6590 #endif 6591 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 6592 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 6593 6594 virtnet_init_settings(dev); 6595 6596 if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 6597 vi->failover = net_failover_create(vi->dev); 6598 if (IS_ERR(vi->failover)) { 6599 err = PTR_ERR(vi->failover); 6600 goto free_vqs; 6601 } 6602 } 6603 6604 if (vi->has_rss || vi->has_rss_hash_report) 6605 virtnet_init_default_rss(vi); 6606 6607 enable_rx_mode_work(vi); 6608 6609 /* serialize netdev register + virtio_device_ready() with ndo_open() */ 6610 rtnl_lock(); 6611 6612 err = register_netdevice(dev); 6613 if (err) { 6614 pr_debug("virtio_net: registering device failed\n"); 6615 rtnl_unlock(); 6616 goto free_failover; 6617 } 6618 6619 /* Disable config change notification until ndo_open. */ 6620 virtio_config_driver_disable(vi->vdev); 6621 6622 virtio_device_ready(vdev); 6623 6624 if (vi->has_rss || vi->has_rss_hash_report) { 6625 if (!virtnet_commit_rss_command(vi)) { 6626 dev_warn(&vdev->dev, "RSS disabled because committing failed.\n"); 6627 dev->hw_features &= ~NETIF_F_RXHASH; 6628 vi->has_rss_hash_report = false; 6629 vi->has_rss = false; 6630 } 6631 } 6632 6633 virtnet_set_queues(vi, vi->curr_queue_pairs); 6634 6635 /* a random MAC address has been assigned, notify the device. 6636 * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there 6637 * because many devices work fine without getting MAC explicitly 6638 */ 6639 if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && 6640 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 6641 struct scatterlist sg; 6642 6643 sg_init_one(&sg, dev->dev_addr, dev->addr_len); 6644 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, 6645 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { 6646 pr_debug("virtio_net: setting MAC address failed\n"); 6647 rtnl_unlock(); 6648 err = -EINVAL; 6649 goto free_unregister_netdev; 6650 } 6651 } 6652 6653 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_DEVICE_STATS)) { 6654 struct virtio_net_stats_capabilities *stats_cap __free(kfree) = NULL; 6655 struct scatterlist sg; 6656 __le64 v; 6657 6658 stats_cap = kzalloc(sizeof(*stats_cap), GFP_KERNEL); 6659 if (!stats_cap) { 6660 rtnl_unlock(); 6661 err = -ENOMEM; 6662 goto free_unregister_netdev; 6663 } 6664 6665 sg_init_one(&sg, stats_cap, sizeof(*stats_cap)); 6666 6667 if (!virtnet_send_command_reply(vi, VIRTIO_NET_CTRL_STATS, 6668 VIRTIO_NET_CTRL_STATS_QUERY, 6669 NULL, &sg)) { 6670 pr_debug("virtio_net: fail to get stats capability\n"); 6671 rtnl_unlock(); 6672 err = -EINVAL; 6673 goto free_unregister_netdev; 6674 } 6675 6676 v = stats_cap->supported_stats_types[0]; 6677 vi->device_stats_cap = le64_to_cpu(v); 6678 } 6679 6680 /* Assume link up if device can't report link status, 6681 otherwise get link status from config. */ 6682 netif_carrier_off(dev); 6683 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { 6684 virtnet_config_changed_work(&vi->config_work); 6685 } else { 6686 vi->status = VIRTIO_NET_S_LINK_UP; 6687 virtnet_update_settings(vi); 6688 netif_carrier_on(dev); 6689 } 6690 6691 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) 6692 if (virtio_has_feature(vi->vdev, guest_offloads[i])) 6693 set_bit(guest_offloads[i], &vi->guest_offloads); 6694 vi->guest_offloads_capable = vi->guest_offloads; 6695 6696 rtnl_unlock(); 6697 6698 err = virtnet_cpu_notif_add(vi); 6699 if (err) { 6700 pr_debug("virtio_net: registering cpu notifier failed\n"); 6701 goto free_unregister_netdev; 6702 } 6703 6704 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", 6705 dev->name, max_queue_pairs); 6706 6707 return 0; 6708 6709 free_unregister_netdev: 6710 unregister_netdev(dev); 6711 free_failover: 6712 net_failover_destroy(vi->failover); 6713 free_vqs: 6714 virtio_reset_device(vdev); 6715 cancel_delayed_work_sync(&vi->refill); 6716 free_receive_page_frags(vi); 6717 virtnet_del_vqs(vi); 6718 free: 6719 free_netdev(dev); 6720 return err; 6721 } 6722 6723 static void remove_vq_common(struct virtnet_info *vi) 6724 { 6725 virtio_reset_device(vi->vdev); 6726 6727 /* Free unused buffers in both send and recv, if any. */ 6728 free_unused_bufs(vi); 6729 6730 free_receive_bufs(vi); 6731 6732 free_receive_page_frags(vi); 6733 6734 virtnet_del_vqs(vi); 6735 } 6736 6737 static void virtnet_remove(struct virtio_device *vdev) 6738 { 6739 struct virtnet_info *vi = vdev->priv; 6740 6741 virtnet_cpu_notif_remove(vi); 6742 6743 /* Make sure no work handler is accessing the device. */ 6744 flush_work(&vi->config_work); 6745 disable_rx_mode_work(vi); 6746 flush_work(&vi->rx_mode_work); 6747 6748 virtnet_free_irq_moder(vi); 6749 6750 unregister_netdev(vi->dev); 6751 6752 net_failover_destroy(vi->failover); 6753 6754 remove_vq_common(vi); 6755 6756 rss_indirection_table_free(&vi->rss); 6757 6758 free_netdev(vi->dev); 6759 } 6760 6761 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev) 6762 { 6763 struct virtnet_info *vi = vdev->priv; 6764 6765 virtnet_cpu_notif_remove(vi); 6766 virtnet_freeze_down(vdev); 6767 remove_vq_common(vi); 6768 6769 return 0; 6770 } 6771 6772 static __maybe_unused int virtnet_restore(struct virtio_device *vdev) 6773 { 6774 struct virtnet_info *vi = vdev->priv; 6775 int err; 6776 6777 err = virtnet_restore_up(vdev); 6778 if (err) 6779 return err; 6780 virtnet_set_queues(vi, vi->curr_queue_pairs); 6781 6782 err = virtnet_cpu_notif_add(vi); 6783 if (err) { 6784 virtnet_freeze_down(vdev); 6785 remove_vq_common(vi); 6786 return err; 6787 } 6788 6789 return 0; 6790 } 6791 6792 static struct virtio_device_id id_table[] = { 6793 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 6794 { 0 }, 6795 }; 6796 6797 #define VIRTNET_FEATURES \ 6798 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ 6799 VIRTIO_NET_F_MAC, \ 6800 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ 6801 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ 6802 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ 6803 VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ 6804 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ 6805 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ 6806 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ 6807 VIRTIO_NET_F_CTRL_MAC_ADDR, \ 6808 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ 6809 VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ 6810 VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ 6811 VIRTIO_NET_F_VQ_NOTF_COAL, \ 6812 VIRTIO_NET_F_GUEST_HDRLEN, VIRTIO_NET_F_DEVICE_STATS 6813 6814 static unsigned int features[] = { 6815 VIRTNET_FEATURES, 6816 }; 6817 6818 static unsigned int features_legacy[] = { 6819 VIRTNET_FEATURES, 6820 VIRTIO_NET_F_GSO, 6821 VIRTIO_F_ANY_LAYOUT, 6822 }; 6823 6824 static struct virtio_driver virtio_net_driver = { 6825 .feature_table = features, 6826 .feature_table_size = ARRAY_SIZE(features), 6827 .feature_table_legacy = features_legacy, 6828 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 6829 .driver.name = KBUILD_MODNAME, 6830 .id_table = id_table, 6831 .validate = virtnet_validate, 6832 .probe = virtnet_probe, 6833 .remove = virtnet_remove, 6834 .config_changed = virtnet_config_changed, 6835 #ifdef CONFIG_PM_SLEEP 6836 .freeze = virtnet_freeze, 6837 .restore = virtnet_restore, 6838 #endif 6839 }; 6840 6841 static __init int virtio_net_driver_init(void) 6842 { 6843 int ret; 6844 6845 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online", 6846 virtnet_cpu_online, 6847 virtnet_cpu_down_prep); 6848 if (ret < 0) 6849 goto out; 6850 virtionet_online = ret; 6851 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead", 6852 NULL, virtnet_cpu_dead); 6853 if (ret) 6854 goto err_dead; 6855 ret = register_virtio_driver(&virtio_net_driver); 6856 if (ret) 6857 goto err_virtio; 6858 return 0; 6859 err_virtio: 6860 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 6861 err_dead: 6862 cpuhp_remove_multi_state(virtionet_online); 6863 out: 6864 return ret; 6865 } 6866 module_init(virtio_net_driver_init); 6867 6868 static __exit void virtio_net_driver_exit(void) 6869 { 6870 unregister_virtio_driver(&virtio_net_driver); 6871 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD); 6872 cpuhp_remove_multi_state(virtionet_online); 6873 } 6874 module_exit(virtio_net_driver_exit); 6875 6876 MODULE_DEVICE_TABLE(virtio, id_table); 6877 MODULE_DESCRIPTION("Virtio network driver"); 6878 MODULE_LICENSE("GPL"); 6879